polars-df 0.16.0-aarch64-linux-musl → 0.17.1-aarch64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,8 +6,7 @@ module Polars
6
6
  # DataFrames/Series/LazyFrames to concatenate.
7
7
  # @param rechunk [Boolean]
8
8
  # Make sure that all data is in contiguous memory.
9
- # @param how ["vertical", "vertical_relaxed", "diagonal", "horizontal"]
10
- # LazyFrames do not support the `horizontal` strategy.
9
+ # @param how ["vertical", "vertical_relaxed", "diagonal", "diagonal_relaxed", "horizontal"]
11
10
  #
12
11
  # - Vertical: applies multiple `vstack` operations.
13
12
  # - Diagonal: finds a union between the column schemas and fills missing column values with null.
@@ -21,7 +20,7 @@ module Polars
21
20
  # @example
22
21
  # df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
23
22
  # df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
24
- # Polars.concat([df1, df2])
23
+ # Polars.concat([df1, df2]) # default is 'vertical' strategy
25
24
  # # =>
26
25
  # # shape: (2, 2)
27
26
  # # ┌─────┬─────┐
@@ -32,38 +31,168 @@ module Polars
32
31
  # # │ 1 ┆ 3 │
33
32
  # # │ 2 ┆ 4 │
34
33
  # # └─────┴─────┘
34
+ #
35
+ # @example
36
+ # df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
37
+ # df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
38
+ # Polars.concat([df1, df2], how: "vertical_relaxed") # 'a' coerced into f64
39
+ # # =>
40
+ # # shape: (2, 2)
41
+ # # ┌─────┬─────┐
42
+ # # │ a ┆ b │
43
+ # # │ --- ┆ --- │
44
+ # # │ f64 ┆ i64 │
45
+ # # ╞═════╪═════╡
46
+ # # │ 1.0 ┆ 3 │
47
+ # # │ 2.5 ┆ 4 │
48
+ # # └─────┴─────┘
49
+ #
50
+ # @example
51
+ # df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
52
+ # df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
53
+ # Polars.concat([df_h1, df_h2], how: "horizontal")
54
+ # # =>
55
+ # # shape: (2, 5)
56
+ # # ┌─────┬─────┬─────┬─────┬─────┐
57
+ # # │ l1 ┆ l2 ┆ r1 ┆ r2 ┆ r3 │
58
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
59
+ # # │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
60
+ # # ╞═════╪═════╪═════╪═════╪═════╡
61
+ # # │ 1 ┆ 3 ┆ 5 ┆ 7 ┆ 9 │
62
+ # # │ 2 ┆ 4 ┆ 6 ┆ 8 ┆ 10 │
63
+ # # └─────┴─────┴─────┴─────┴─────┘
64
+ #
65
+ # @example
66
+ # df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
67
+ # df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
68
+ # Polars.concat([df_d1, df_d2], how: "diagonal")
69
+ # # =>
70
+ # # shape: (2, 3)
71
+ # # ┌─────┬──────┬──────┐
72
+ # # │ a ┆ b ┆ c │
73
+ # # │ --- ┆ --- ┆ --- │
74
+ # # │ i64 ┆ i64 ┆ i64 │
75
+ # # ╞═════╪══════╪══════╡
76
+ # # │ 1 ┆ 3 ┆ null │
77
+ # # │ 2 ┆ null ┆ 4 │
78
+ # # └─────┴──────┴──────┘
79
+ #
80
+ # @example
81
+ # df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
82
+ # df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
83
+ # df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
84
+ # Polars.concat([df_a1, df_a2, df_a3], how: "align")
85
+ # # =>
86
+ # # shape: (3, 4)
87
+ # # ┌─────┬──────┬──────┬──────┐
88
+ # # │ id ┆ x ┆ y ┆ z │
89
+ # # │ --- ┆ --- ┆ --- ┆ --- │
90
+ # # │ i64 ┆ i64 ┆ i64 ┆ i64 │
91
+ # # ╞═════╪══════╪══════╪══════╡
92
+ # # │ 1 ┆ 3 ┆ null ┆ 7 │
93
+ # # │ 2 ┆ 4 ┆ 5 ┆ null │
94
+ # # │ 3 ┆ null ┆ 6 ┆ 8 │
95
+ # # └─────┴──────┴──────┴──────┘
35
96
  def concat(items, rechunk: true, how: "vertical", parallel: true)
36
- if items.empty?
97
+ elems = items.to_a
98
+
99
+ if elems.empty?
37
100
  raise ArgumentError, "cannot concat empty list"
38
101
  end
39
102
 
40
- first = items[0]
103
+ if how == "align"
104
+ if !elems[0].is_a?(DataFrame) && !elems[0].is_a?(LazyFrame)
105
+ msg = "'align' strategy is not supported for #{elems[0].class.name}"
106
+ raise TypeError, msg
107
+ end
108
+
109
+ # establish common columns, maintaining the order in which they appear
110
+ all_columns = elems.flat_map { |e| e.collect_schema.names }
111
+ key = all_columns.uniq.map.with_index.to_h
112
+ common_cols = elems.map { |e| e.collect_schema.names }
113
+ .reduce { |x, y| Set.new(x) & Set.new(y) }
114
+ .sort_by { |k| key[k] }
115
+ # we require at least one key column for 'align'
116
+ if common_cols.empty?
117
+ msg = "'align' strategy requires at least one common column"
118
+ raise InvalidOperationError, msg
119
+ end
120
+
121
+ # align the frame data using a full outer join with no suffix-resolution
122
+ # (so we raise an error in case of column collision, like "horizontal")
123
+ lf = elems.map { |df| df.lazy }.reduce do |x, y|
124
+ x.join(
125
+ y,
126
+ how: "full",
127
+ on: common_cols,
128
+ suffix: "_PL_CONCAT_RIGHT",
129
+ maintain_order: "right_left"
130
+ )
131
+ # Coalesce full outer join columns
132
+ .with_columns(
133
+ common_cols.map { |name| F.coalesce([name, "#{name}_PL_CONCAT_RIGHT"]) }
134
+ )
135
+ .drop(common_cols.map { |name| "#{name}_PL_CONCAT_RIGHT" })
136
+ end.sort(common_cols)
137
+
138
+ eager = elems[0].is_a?(DataFrame)
139
+ return eager ? lf.collect : lf
140
+ end
141
+
142
+ first = elems[0]
143
+
41
144
  if first.is_a?(DataFrame)
42
145
  if how == "vertical"
43
- out = Utils.wrap_df(Plr.concat_df(items))
146
+ out = Utils.wrap_df(Plr.concat_df(elems))
147
+ elsif how == "vertical_relaxed"
148
+ out = Utils.wrap_ldf(
149
+ Plr.concat_lf(
150
+ elems.map { |df| df.lazy },
151
+ rechunk,
152
+ parallel,
153
+ true
154
+ )
155
+ ).collect(no_optimization: true)
44
156
  elsif how == "diagonal"
45
- out = Utils.wrap_df(Plr.concat_df_diagonal(items))
157
+ out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
158
+ elsif how == "diagonal_relaxed"
159
+ out = Utils.wrap_ldf(
160
+ Plr.concat_lf_diagonal(
161
+ elems.map { |df| df.lazy },
162
+ rechunk,
163
+ parallel,
164
+ true
165
+ )
166
+ ).collect(no_optimization: true)
46
167
  elsif how == "horizontal"
47
- out = Utils.wrap_df(Plr.concat_df_horizontal(items))
168
+ out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
48
169
  else
49
- raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
170
+ raise ArgumentError, "how must be one of {{'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal'}}, got #{how}"
50
171
  end
51
172
  elsif first.is_a?(LazyFrame)
52
173
  if how == "vertical"
53
- return Utils.wrap_ldf(Plr.concat_lf(items, rechunk, parallel, false))
174
+ return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, false))
54
175
  elsif how == "vertical_relaxed"
55
- return Utils.wrap_ldf(Plr.concat_lf(items, rechunk, parallel, true))
176
+ return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, true))
56
177
  elsif how == "diagonal"
57
- return Utils.wrap_ldf(Plr.concat_lf_diagonal(items, rechunk, parallel, false))
178
+ return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, false))
179
+ elsif how == "diagonal_relaxed"
180
+ return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, true))
181
+ elsif how == "horizontal"
182
+ return Utils.wrap_ldf(Plr.concat_lf_horizontal(elems, parallel))
58
183
  else
59
- raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', and 'diagonal' concat strategy."
184
+ raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', 'diagonal', and 'diagonal_relaxed' concat strategy."
60
185
  end
61
186
  elsif first.is_a?(Series)
62
- # TODO
63
- out = Utils.wrap_s(Plr.concat_series(items))
187
+ if how == "vertical"
188
+ out = Utils.wrap_s(Plr.concat_series(elems))
189
+ else
190
+ msg = "Series only supports 'vertical' concat strategy"
191
+ raise ArgumentError, msg
192
+ end
64
193
  elsif first.is_a?(Expr)
65
194
  out = first
66
- items[1..-1].each do |e|
195
+ elems[1..-1].each do |e|
67
196
  out = out.append(e)
68
197
  end
69
198
  else
@@ -51,8 +51,25 @@ module Polars
51
51
  when :decimal
52
52
  Decimal
53
53
  when :float
54
+ # TODO uncomment in 0.18.0
55
+ # if column_type.limit && column_type.limit <= 24
56
+ # Float32
57
+ # else
58
+ # Float64
59
+ # end
54
60
  Float64
55
61
  when :integer
62
+ # TODO uncomment in 0.18.0
63
+ # case column_type.limit
64
+ # when 1
65
+ # Int8
66
+ # when 2
67
+ # Int16
68
+ # when 4
69
+ # Int32
70
+ # else
71
+ # Int64
72
+ # end
56
73
  Int64
57
74
  when :string, :text
58
75
  String
@@ -619,6 +619,10 @@ module Polars
619
619
  # Slice pushdown optimization.
620
620
  # @param no_optimization [Boolean]
621
621
  # Turn off (certain) optimizations.
622
+ # @param storage_options [Object]
623
+ # Options that indicate how to connect to a cloud provider.
624
+ # @param retries [Integer]
625
+ # Number of retries if accessing a cloud instance fails.
622
626
  #
623
627
  # @return [DataFrame]
624
628
  #
@@ -646,7 +650,9 @@ module Polars
646
650
  projection_pushdown: true,
647
651
  simplify_expression: true,
648
652
  slice_pushdown: true,
649
- no_optimization: false
653
+ no_optimization: false,
654
+ storage_options: nil,
655
+ retries: 2
650
656
  )
651
657
  Utils._check_arg_is_1byte("separator", separator, false)
652
658
  Utils._check_arg_is_1byte("quote_char", quote_char, false)
@@ -660,6 +666,12 @@ module Polars
660
666
  no_optimization: no_optimization
661
667
  )
662
668
 
669
+ if storage_options&.any?
670
+ storage_options = storage_options.to_a
671
+ else
672
+ storage_options = nil
673
+ end
674
+
663
675
  lf.sink_csv(
664
676
  path,
665
677
  include_bom,
@@ -675,7 +687,9 @@ module Polars
675
687
  float_precision,
676
688
  null_value,
677
689
  quote_style,
678
- maintain_order
690
+ maintain_order,
691
+ storage_options,
692
+ retries
679
693
  )
680
694
  end
681
695
 
@@ -1616,6 +1630,14 @@ module Polars
1616
1630
  # - true: -> Always coalesce join columns.
1617
1631
  # - false: -> Never coalesce join columns.
1618
1632
  # Note that joining on any other expressions than `col` will turn off coalescing.
1633
+ # @param allow_exact_matches [Boolean]
1634
+ # Whether exact matches are valid join predicates.
1635
+ # - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
1636
+ # - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
1637
+ # @param check_sortedness [Boolean]
1638
+ # Check the sortedness of the asof keys. If the keys are not sorted Polars
1639
+ # will error, or in case of 'by' argument raise a warning. This might become
1640
+ # a hard error in the future.
1619
1641
  #
1620
1642
  # @return [LazyFrame]
1621
1643
  #
@@ -1815,7 +1837,9 @@ module Polars
1815
1837
  tolerance: nil,
1816
1838
  allow_parallel: true,
1817
1839
  force_parallel: false,
1818
- coalesce: true
1840
+ coalesce: true,
1841
+ allow_exact_matches: true,
1842
+ check_sortedness: true
1819
1843
  )
1820
1844
  if !other.is_a?(LazyFrame)
1821
1845
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1871,7 +1895,9 @@ module Polars
1871
1895
  strategy,
1872
1896
  tolerance_num,
1873
1897
  tolerance_str,
1874
- coalesce
1898
+ coalesce,
1899
+ allow_exact_matches,
1900
+ check_sortedness
1875
1901
  )
1876
1902
  )
1877
1903
  end
@@ -1911,6 +1937,24 @@ module Polars
1911
1937
  # - true: -> Always coalesce join columns.
1912
1938
  # - false: -> Never coalesce join columns.
1913
1939
  # Note that joining on any other expressions than `col` will turn off coalescing.
1940
+ # @param maintain_order ['none', 'left', 'right', 'left_right', 'right_left']
1941
+ # Which DataFrame row order to preserve, if any.
1942
+ # Do not rely on any observed ordering without explicitly
1943
+ # setting this parameter, as your code may break in a future release.
1944
+ # Not specifying any ordering can improve performance
1945
+ # Supported for inner, left, right and full joins
1946
+ #
1947
+ # * *none*
1948
+ # No specific ordering is desired. The ordering might differ across
1949
+ # Polars versions or even between different runs.
1950
+ # * *left*
1951
+ # Preserves the order of the left DataFrame.
1952
+ # * *right*
1953
+ # Preserves the order of the right DataFrame.
1954
+ # * *left_right*
1955
+ # First preserves the order of the left DataFrame, then the right.
1956
+ # * *right_left*
1957
+ # First preserves the order of the right DataFrame, then the left.
1914
1958
  #
1915
1959
  # @return [LazyFrame]
1916
1960
  #
@@ -2004,18 +2048,33 @@ module Polars
2004
2048
  join_nulls: false,
2005
2049
  allow_parallel: true,
2006
2050
  force_parallel: false,
2007
- coalesce: nil
2051
+ coalesce: nil,
2052
+ maintain_order: nil
2008
2053
  )
2009
2054
  if !other.is_a?(LazyFrame)
2010
2055
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
2011
2056
  end
2012
2057
 
2058
+ if maintain_order.nil?
2059
+ maintain_order = "none"
2060
+ end
2061
+
2013
2062
  if how == "outer"
2014
2063
  how = "full"
2015
2064
  elsif how == "cross"
2016
2065
  return _from_rbldf(
2017
2066
  _ldf.join(
2018
- other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
2067
+ other._ldf,
2068
+ [],
2069
+ [],
2070
+ allow_parallel,
2071
+ join_nulls,
2072
+ force_parallel,
2073
+ how,
2074
+ suffix,
2075
+ validate,
2076
+ maintain_order,
2077
+ coalesce
2019
2078
  )
2020
2079
  )
2021
2080
  end
@@ -2042,6 +2101,7 @@ module Polars
2042
2101
  how,
2043
2102
  suffix,
2044
2103
  validate,
2104
+ maintain_order,
2045
2105
  coalesce
2046
2106
  )
2047
2107
  )
@@ -3335,12 +3395,17 @@ module Polars
3335
3395
  _from_rbldf(_ldf.merge_sorted(other._ldf, key))
3336
3396
  end
3337
3397
 
3338
- # Indicate that one or multiple columns are sorted.
3398
+ # Flag a column as sorted.
3399
+ #
3400
+ # This can speed up future operations.
3401
+ #
3402
+ # @note
3403
+ # This can lead to incorrect results if the data is NOT sorted! Use with care!
3339
3404
  #
3340
3405
  # @param column [Object]
3341
- # Columns that are sorted
3406
+ # Column that is sorted.
3342
3407
  # @param descending [Boolean]
3343
- # Whether the columns are sorted in descending order.
3408
+ # Whether the column is sorted in descending order.
3344
3409
  #
3345
3410
  # @return [LazyFrame]
3346
3411
  def set_sorted(
@@ -0,0 +1,29 @@
1
+ module Polars
2
+ class Schema
3
+ def initialize(schema, check_dtypes: true)
4
+ raise Todo if check_dtypes
5
+ @schema = schema.to_h
6
+ end
7
+
8
+ def [](key)
9
+ @schema[key]
10
+ end
11
+
12
+ def names
13
+ @schema.keys
14
+ end
15
+
16
+ def dtypes
17
+ @schema.values
18
+ end
19
+
20
+ def length
21
+ @schema.length
22
+ end
23
+
24
+ def to_s
25
+ "#{self.class.name}(#{@schema})"
26
+ end
27
+ alias_method :inspect, :to_s
28
+ end
29
+ end
data/lib/polars/series.rb CHANGED
@@ -2554,29 +2554,7 @@ module Polars
2554
2554
  # # Numo::Int64#shape=[3]
2555
2555
  # # [1, 2, 3]
2556
2556
  def to_numo
2557
- if !has_validity
2558
- if is_datelike
2559
- Numo::RObject.cast(to_a)
2560
- elsif is_numeric
2561
- # TODO make more efficient
2562
- {
2563
- UInt8 => Numo::UInt8,
2564
- UInt16 => Numo::UInt16,
2565
- UInt32 => Numo::UInt32,
2566
- UInt64 => Numo::UInt64,
2567
- Int8 => Numo::Int8,
2568
- Int16 => Numo::Int16,
2569
- Int32 => Numo::Int32,
2570
- Int64 => Numo::Int64,
2571
- Float32 => Numo::SFloat,
2572
- Float64 => Numo::DFloat
2573
- }.fetch(dtype.class).cast(to_a)
2574
- elsif is_boolean
2575
- Numo::Bit.cast(to_a)
2576
- else
2577
- _s.to_numo
2578
- end
2579
- elsif is_datelike
2557
+ if is_datelike
2580
2558
  Numo::RObject.cast(to_a)
2581
2559
  else
2582
2560
  _s.to_numo
@@ -3815,6 +3793,30 @@ module Polars
3815
3793
  super
3816
3794
  end
3817
3795
 
3796
+ # Fill null values using interpolation based on another column.
3797
+ #
3798
+ # @param by [Expr]
3799
+ # Column to interpolate values based on.
3800
+ #
3801
+ # @return [Series]
3802
+ #
3803
+ # @example Fill null values using linear interpolation.
3804
+ # s = Polars::Series.new("a", [1, nil, nil, 3])
3805
+ # by = Polars::Series.new("b", [1, 2, 7, 8])
3806
+ # s.interpolate_by(by)
3807
+ # # =>
3808
+ # # shape: (4,)
3809
+ # # Series: 'a' [f64]
3810
+ # # [
3811
+ # # 1.0
3812
+ # # 1.285714
3813
+ # # 2.714286
3814
+ # # 3.0
3815
+ # # ]
3816
+ def interpolate_by(by)
3817
+ super
3818
+ end
3819
+
3818
3820
  # Compute absolute values.
3819
3821
  #
3820
3822
  # @return [Series]
@@ -4696,7 +4698,12 @@ module Polars
4696
4698
  end
4697
4699
 
4698
4700
  constructor = polars_type_to_constructor(dtype)
4699
- rbseries = constructor.call(name, values, strict)
4701
+ rbseries =
4702
+ if dtype == Array
4703
+ constructor.call(name, values, strict)
4704
+ else
4705
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4706
+ end
4700
4707
 
4701
4708
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
4702
4709
  if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum, Decimal].include?(base_type)
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.16.0"
3
+ VERSION = "0.17.1"
4
4
  end
data/lib/polars.rb CHANGED
@@ -61,6 +61,7 @@ require_relative "polars/list_name_space"
61
61
  require_relative "polars/meta_expr"
62
62
  require_relative "polars/name_expr"
63
63
  require_relative "polars/rolling_group_by"
64
+ require_relative "polars/schema"
64
65
  require_relative "polars/selectors"
65
66
  require_relative "polars/series"
66
67
  require_relative "polars/slice"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.17.1
5
5
  platform: aarch64-linux-musl
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-29 00:00:00.000000000 Z
11
+ date: 2025-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -92,6 +92,7 @@ files:
92
92
  - lib/polars/name_expr.rb
93
93
  - lib/polars/plot.rb
94
94
  - lib/polars/rolling_group_by.rb
95
+ - lib/polars/schema.rb
95
96
  - lib/polars/selectors.rb
96
97
  - lib/polars/series.rb
97
98
  - lib/polars/slice.rb
@@ -130,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
130
131
  requirements:
131
132
  - - ">="
132
133
  - !ruby/object:Gem::Version
133
- version: '0'
134
+ version: 3.3.22
134
135
  requirements: []
135
136
  rubygems_version: 3.5.23
136
137
  signing_key: