polars-df 0.5.0-arm64-darwin → 0.6.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,25 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ def self.base_type
5
+ self
6
+ end
7
+
8
+ def base_type
9
+ is_a?(DataType) ? self.class : self
10
+ end
11
+
12
+ def self.nested?
13
+ false
14
+ end
15
+
16
+ def nested?
17
+ self.class.nested?
18
+ end
19
+
20
+ def self.==(other)
21
+ eql?(other) || other.is_a?(self)
22
+ end
4
23
  end
5
24
 
6
25
  # Base class for numeric data types.
@@ -15,12 +34,19 @@ module Polars
15
34
  class FractionalType < NumericType
16
35
  end
17
36
 
37
+ # Base class for float data types.
38
+ class FloatType < FractionalType
39
+ end
40
+
18
41
  # Base class for temporal data types.
19
42
  class TemporalType < DataType
20
43
  end
21
44
 
22
45
  # Base class for nested data types.
23
46
  class NestedType < DataType
47
+ def self.nested?
48
+ true
49
+ end
24
50
  end
25
51
 
26
52
  # 8-bit signed integer type.
@@ -56,11 +82,37 @@ module Polars
56
82
  end
57
83
 
58
84
  # 32-bit floating point type.
59
- class Float32 < FractionalType
85
+ class Float32 < FloatType
60
86
  end
61
87
 
62
88
  # 64-bit floating point type.
63
- class Float64 < FractionalType
89
+ class Float64 < FloatType
90
+ end
91
+
92
+ # Decimal 128-bit type with an optional precision and non-negative scale.
93
+ #
94
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
+ class Decimal < FractionalType
96
+ attr_reader :precision, :scale
97
+
98
+ def initialize(precision, scale)
99
+ @precision = precision
100
+ @scale = scale
101
+ end
102
+
103
+ def ==(other)
104
+ if other.eql?(Decimal)
105
+ true
106
+ elsif other.is_a?(Decimal)
107
+ precision == other.precision && scale == other.scale
108
+ else
109
+ false
110
+ end
111
+ end
112
+
113
+ def to_s
114
+ "#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
115
+ end
64
116
  end
65
117
 
66
118
  # Boolean type.
@@ -71,17 +123,18 @@ module Polars
71
123
  class Utf8 < DataType
72
124
  end
73
125
 
74
- # Nested list/array type.
75
- class List < NestedType
76
- def initialize(inner)
77
- @inner = Utils.rb_type_to_dtype(inner)
78
- end
126
+ # Binary type.
127
+ class Binary < DataType
79
128
  end
80
129
 
81
130
  # Calendar date type.
82
131
  class Date < TemporalType
83
132
  end
84
133
 
134
+ # Time of day type.
135
+ class Time < TemporalType
136
+ end
137
+
85
138
  # Calendar date and time type.
86
139
  class Datetime < TemporalType
87
140
  attr_reader :time_unit, :time_zone
@@ -91,6 +144,20 @@ module Polars
91
144
  @time_unit = time_unit || "us"
92
145
  @time_zone = time_zone
93
146
  end
147
+
148
+ def ==(other)
149
+ if other.eql?(Datetime)
150
+ true
151
+ elsif other.is_a?(Datetime)
152
+ time_unit == other.time_unit && time_zone == other.time_zone
153
+ else
154
+ false
155
+ end
156
+ end
157
+
158
+ def to_s
159
+ "#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
160
+ end
94
161
  end
95
162
 
96
163
  # Time duration/delta type.
@@ -101,18 +168,85 @@ module Polars
101
168
  def initialize(time_unit = "us")
102
169
  @time_unit = time_unit
103
170
  end
171
+
172
+ def ==(other)
173
+ if other.eql?(Duration)
174
+ true
175
+ elsif other.is_a?(Duration)
176
+ time_unit == other.time_unit
177
+ else
178
+ false
179
+ end
180
+ end
181
+
182
+ def to_s
183
+ "#{self.class.name}(time_unit: #{time_unit.inspect})"
184
+ end
104
185
  end
105
186
 
106
- # Time of day type.
107
- class Time < TemporalType
187
+ # A categorical encoding of a set of strings.
188
+ class Categorical < DataType
108
189
  end
109
190
 
110
191
  # Type for wrapping arbitrary Ruby objects.
111
192
  class Object < DataType
112
193
  end
113
194
 
114
- # A categorical encoding of a set of strings.
115
- class Categorical < DataType
195
+ # Type representing Null / None values.
196
+ class Null < DataType
197
+ end
198
+
199
+ # Type representing Datatype values that could not be determined statically.
200
+ class Unknown < DataType
201
+ end
202
+
203
+ # Nested list/array type.
204
+ class List < NestedType
205
+ attr_reader :inner
206
+
207
+ def initialize(inner)
208
+ @inner = Utils.rb_type_to_dtype(inner)
209
+ end
210
+
211
+ def ==(other)
212
+ if other.eql?(List)
213
+ true
214
+ elsif other.is_a?(List)
215
+ @inner.nil? || other.inner.nil? || @inner == other.inner
216
+ else
217
+ false
218
+ end
219
+ end
220
+
221
+ def to_s
222
+ "#{self.class.name}(#{inner})"
223
+ end
224
+ end
225
+
226
+ # Nested list/array type.
227
+ class Array < NestedType
228
+ attr_reader :width, :inner
229
+
230
+ def initialize(width, inner = nil)
231
+ @width = width
232
+ @inner = Utils.rb_type_to_dtype(inner) if inner
233
+ end
234
+
235
+ # TODO check width?
236
+ def ==(other)
237
+ if other.eql?(Array)
238
+ true
239
+ elsif other.is_a?(Array)
240
+ @inner.nil? || other.inner.nil? || @inner == other.inner
241
+ else
242
+ false
243
+ end
244
+ end
245
+
246
+ # TODO add width?
247
+ def to_s
248
+ "#{self.class.name}(#{inner})"
249
+ end
116
250
  end
117
251
 
118
252
  # Definition of a single field within a `Struct` DataType.
@@ -124,9 +258,12 @@ module Polars
124
258
  @dtype = Utils.rb_type_to_dtype(dtype)
125
259
  end
126
260
 
127
- def inspect
128
- class_name = self.class.name
129
- "#{class_name}(#{@name}: #{@dtype})"
261
+ def ==(other)
262
+ name == other.name && dtype == other.dtype
263
+ end
264
+
265
+ def to_s
266
+ "#{self.class.name}(#{name.inspect}, #{dtype})"
130
267
  end
131
268
  end
132
269
 
@@ -142,25 +279,22 @@ module Polars
142
279
  end
143
280
  end
144
281
 
145
- def inspect
146
- class_name = self.class.name
147
- "#{class_name}(#{@fields})"
282
+ def ==(other)
283
+ if other.eql?(Struct)
284
+ true
285
+ elsif other.is_a?(Struct)
286
+ fields == other.fields
287
+ else
288
+ false
289
+ end
290
+ end
291
+
292
+ def to_s
293
+ "#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
148
294
  end
149
295
 
150
296
  def to_schema
151
297
  @fields.to_h { |f| [f.name, f.dtype] }
152
298
  end
153
299
  end
154
-
155
- # Binary type.
156
- class Binary < DataType
157
- end
158
-
159
- # Type representing Null / None values.
160
- class Null < DataType
161
- end
162
-
163
- # Type representing Datatype values that could not be determined statically.
164
- class Unknown < DataType
165
- end
166
300
  end
@@ -82,8 +82,15 @@ module Polars
82
82
  # # => 2001-01-02 00:00:00 UTC
83
83
  def median
84
84
  s = Utils.wrap_s(_s)
85
- out = s.median.to_i
86
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
85
+ out = s.median
86
+ if !out.nil?
87
+ if s.dtype == Date
88
+ return Utils._to_ruby_date(out.to_i)
89
+ else
90
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
91
+ end
92
+ end
93
+ nil
87
94
  end
88
95
 
89
96
  # Return mean as Ruby object.
@@ -107,7 +114,14 @@ module Polars
107
114
  def mean
108
115
  s = Utils.wrap_s(_s)
109
116
  out = s.mean.to_i
110
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
117
+ if !out.nil?
118
+ if s.dtype == Date
119
+ return Utils._to_ruby_date(out.to_i)
120
+ else
121
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
122
+ end
123
+ end
124
+ nil
111
125
  end
112
126
 
113
127
  # Format Date/datetime with a formatting rule.
data/lib/polars/expr.rb CHANGED
@@ -362,7 +362,7 @@ module Polars
362
362
  if columns.is_a?(String)
363
363
  columns = [columns]
364
364
  return wrap_expr(_rbexpr.exclude(columns))
365
- elsif !columns.is_a?(Array)
365
+ elsif !columns.is_a?(::Array)
366
366
  columns = [columns]
367
367
  return wrap_expr(_rbexpr.exclude_dtype(columns))
368
368
  end
@@ -820,18 +820,18 @@ module Polars
820
820
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
821
821
  # # =>
822
822
  # # shape: (6, 1)
823
- # # ┌─────────┐
824
- # # │ literal
825
- # # │ ---
826
- # # │ i64
827
- # # ╞═════════╡
828
- # # │ null
829
- # # │ null
830
- # # │ null
831
- # # │ 1
832
- # # │ 1
833
- # # │ 2
834
- # # └─────────┘
823
+ # # ┌────────┐
824
+ # # │ repeat
825
+ # # │ ---
826
+ # # │ i64
827
+ # # ╞════════╡
828
+ # # │ null
829
+ # # │ null
830
+ # # │ null
831
+ # # │ 1
832
+ # # │ 1
833
+ # # │ 2
834
+ # # └────────┘
835
835
  def rechunk
836
836
  wrap_expr(_rbexpr.rechunk)
837
837
  end
@@ -1534,10 +1534,10 @@ module Polars
1534
1534
  # # │ two │
1535
1535
  # # └───────┘
1536
1536
  def sort_by(by, reverse: false)
1537
- if !by.is_a?(Array)
1537
+ if !by.is_a?(::Array)
1538
1538
  by = [by]
1539
1539
  end
1540
- if !reverse.is_a?(Array)
1540
+ if !reverse.is_a?(::Array)
1541
1541
  reverse = [reverse]
1542
1542
  end
1543
1543
  by = Utils.selection_to_rbexpr_list(by)
@@ -1578,7 +1578,7 @@ module Polars
1578
1578
  # # │ two ┆ 99 │
1579
1579
  # # └───────┴───────┘
1580
1580
  def take(indices)
1581
- if indices.is_a?(Array)
1581
+ if indices.is_a?(::Array)
1582
1582
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1583
1583
  else
1584
1584
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
@@ -2436,14 +2436,14 @@ module Polars
2436
2436
  # ).sort("group_col")
2437
2437
  # # =>
2438
2438
  # # shape: (2, 3)
2439
- # # ┌───────────┬──────┬─────┐
2440
- # # │ group_col ┆ lt ┆ gte │
2441
- # # │ --- ┆ --- ┆ --- │
2442
- # # │ str ┆ i64 ┆ i64 │
2443
- # # ╞═══════════╪══════╪═════╡
2444
- # # │ g1 ┆ 1 ┆ 2 │
2445
- # # │ g2 ┆ null ┆ 3 │
2446
- # # └───────────┴──────┴─────┘
2439
+ # # ┌───────────┬─────┬─────┐
2440
+ # # │ group_col ┆ lt ┆ gte │
2441
+ # # │ --- ┆ --- ┆ --- │
2442
+ # # │ str ┆ i64 ┆ i64 │
2443
+ # # ╞═══════════╪═════╪═════╡
2444
+ # # │ g1 ┆ 1 ┆ 2 │
2445
+ # # │ g2 ┆ 0 ┆ 3 │
2446
+ # # └───────────┴─────┴─────┘
2447
2447
  def filter(predicate)
2448
2448
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
2449
2449
  end
@@ -2474,14 +2474,14 @@ module Polars
2474
2474
  # ).sort("group_col")
2475
2475
  # # =>
2476
2476
  # # shape: (2, 3)
2477
- # # ┌───────────┬──────┬─────┐
2478
- # # │ group_col ┆ lt ┆ gte │
2479
- # # │ --- ┆ --- ┆ --- │
2480
- # # │ str ┆ i64 ┆ i64 │
2481
- # # ╞═══════════╪══════╪═════╡
2482
- # # │ g1 ┆ 1 ┆ 2 │
2483
- # # │ g2 ┆ null ┆ 3 │
2484
- # # └───────────┴──────┴─────┘
2477
+ # # ┌───────────┬─────┬─────┐
2478
+ # # │ group_col ┆ lt ┆ gte │
2479
+ # # │ --- ┆ --- ┆ --- │
2480
+ # # │ str ┆ i64 ┆ i64 │
2481
+ # # ╞═══════════╪═════╪═════╡
2482
+ # # │ g1 ┆ 1 ┆ 2 │
2483
+ # # │ g2 ┆ 0 ┆ 3 │
2484
+ # # └───────────┴─────┴─────┘
2485
2485
  def where(predicate)
2486
2486
  filter(predicate)
2487
2487
  end
@@ -2616,25 +2616,23 @@ module Polars
2616
2616
  # @return [Expr]
2617
2617
  #
2618
2618
  # @example
2619
- # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2620
- # df.select(Polars.col("foo").flatten)
2621
- # # =>
2622
- # # shape: (10, 1)
2623
- # # ┌─────┐
2624
- # # │ foo │
2625
- # # │ --- │
2626
- # # │ str │
2627
- # # ╞═════╡
2628
- # # │ h │
2629
- # # │ e
2630
- # # │ l
2631
- # # │ l
2632
- # # │ … │
2633
- # # │ o
2634
- # # │ r
2635
- # # │ l │
2636
- # # │ d │
2637
- # # └─────┘
2619
+ # df = Polars::DataFrame.new(
2620
+ # {
2621
+ # "group" => ["a", "b", "b"],
2622
+ # "values" => [[1, 2], [2, 3], [4]]
2623
+ # }
2624
+ # )
2625
+ # df.groupby("group").agg(Polars.col("values").flatten)
2626
+ # # =>
2627
+ # # shape: (2, 2)
2628
+ # # ┌───────┬───────────┐
2629
+ # # │ group ┆ values
2630
+ # # │ --- ┆ ---
2631
+ # # │ str ┆ list[i64]
2632
+ # # ╞═══════╪═══════════╡
2633
+ # # │ a ┆ [1, 2]
2634
+ # # │ b ┆ [2, 3, 4]
2635
+ # # └───────┴───────────┘
2638
2636
  def flatten
2639
2637
  wrap_expr(_rbexpr.explode)
2640
2638
  end
@@ -2798,7 +2796,7 @@ module Polars
2798
2796
  # # │ false │
2799
2797
  # # └──────────┘
2800
2798
  def is_in(other)
2801
- if other.is_a?(Array)
2799
+ if other.is_a?(::Array)
2802
2800
  if other.length == 0
2803
2801
  other = Polars.lit(nil)
2804
2802
  else
@@ -3502,14 +3500,15 @@ module Polars
3502
3500
  min_periods: nil,
3503
3501
  center: false,
3504
3502
  by: nil,
3505
- closed: "left"
3503
+ closed: "left",
3504
+ ddof: 1
3506
3505
  )
3507
3506
  window_size, min_periods = _prepare_rolling_window_args(
3508
3507
  window_size, min_periods
3509
3508
  )
3510
3509
  wrap_expr(
3511
3510
  _rbexpr.rolling_std(
3512
- window_size, weights, min_periods, center, by, closed
3511
+ window_size, weights, min_periods, center, by, closed, ddof
3513
3512
  )
3514
3513
  )
3515
3514
  end
@@ -3591,14 +3590,15 @@ module Polars
3591
3590
  min_periods: nil,
3592
3591
  center: false,
3593
3592
  by: nil,
3594
- closed: "left"
3593
+ closed: "left",
3594
+ ddof: 1
3595
3595
  )
3596
3596
  window_size, min_periods = _prepare_rolling_window_args(
3597
3597
  window_size, min_periods
3598
3598
  )
3599
3599
  wrap_expr(
3600
3600
  _rbexpr.rolling_var(
3601
- window_size, weights, min_periods, center, by, closed
3601
+ window_size, weights, min_periods, center, by, closed, ddof
3602
3602
  )
3603
3603
  )
3604
3604
  end
@@ -4558,11 +4558,11 @@ module Polars
4558
4558
  # # │ 1 │
4559
4559
  # # │ 3 │
4560
4560
  # # └─────┘
4561
- def shuffle(seed: nil)
4561
+ def shuffle(seed: nil, fixed_seed: false)
4562
4562
  if seed.nil?
4563
4563
  seed = rand(10000)
4564
4564
  end
4565
- wrap_expr(_rbexpr.shuffle(seed))
4565
+ wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
4566
4566
  end
4567
4567
 
4568
4568
  # Sample from this expression.
@@ -4600,21 +4600,22 @@ module Polars
4600
4600
  with_replacement: true,
4601
4601
  shuffle: false,
4602
4602
  seed: nil,
4603
- n: nil
4603
+ n: nil,
4604
+ fixed_seed: false
4604
4605
  )
4605
4606
  if !n.nil? && !frac.nil?
4606
4607
  raise ArgumentError, "cannot specify both `n` and `frac`"
4607
4608
  end
4608
4609
 
4609
4610
  if !n.nil? && frac.nil?
4610
- return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4611
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
4611
4612
  end
4612
4613
 
4613
4614
  if frac.nil?
4614
4615
  frac = 1.0
4615
4616
  end
4616
4617
  wrap_expr(
4617
- _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4618
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
4618
4619
  )
4619
4620
  end
4620
4621
 
@@ -4929,8 +4930,8 @@ module Polars
4929
4930
  #
4930
4931
  # Enables downstream code to user fast paths for sorted arrays.
4931
4932
  #
4932
- # @param reverse [Boolean]
4933
- # If the `Series` order is reversed, e.g. descending.
4933
+ # @param descending [Boolean]
4934
+ # Whether the `Series` order is descending.
4934
4935
  #
4935
4936
  # @return [Expr]
4936
4937
  #
@@ -4950,9 +4951,9 @@ module Polars
4950
4951
  # # ╞════════╡
4951
4952
  # # │ 3 │
4952
4953
  # # └────────┘
4953
- # def set_sorted(reverse: false)
4954
- # map { |s| s.set_sorted(reverse) }
4955
- # end
4954
+ def set_sorted(descending: false)
4955
+ wrap_expr(_rbexpr.set_sorted_flag(descending))
4956
+ end
4956
4957
 
4957
4958
  # Aggregate to list.
4958
4959
  #
@@ -4965,7 +4966,7 @@ module Polars
4965
4966
  # "b" => [4, 5, 6]
4966
4967
  # }
4967
4968
  # )
4968
- # df.select(Polars.all.list)
4969
+ # df.select(Polars.all.implode)
4969
4970
  # # =>
4970
4971
  # # shape: (1, 2)
4971
4972
  # # ┌───────────┬───────────┐
@@ -4978,7 +4979,6 @@ module Polars
4978
4979
  def implode
4979
4980
  wrap_expr(_rbexpr.implode)
4980
4981
  end
4981
- alias_method :list, :implode
4982
4982
 
4983
4983
  # Shrink numeric columns to the minimal required datatype.
4984
4984
  #
@@ -5018,10 +5018,17 @@ module Polars
5018
5018
  # Create an object namespace of all list related methods.
5019
5019
  #
5020
5020
  # @return [ListExpr]
5021
- def arr
5021
+ def list
5022
5022
  ListExpr.new(self)
5023
5023
  end
5024
5024
 
5025
+ # Create an object namespace of all array related methods.
5026
+ #
5027
+ # @return [ArrayExpr]
5028
+ def arr
5029
+ ArrayExpr.new(self)
5030
+ end
5031
+
5025
5032
  # Create an object namespace of all binary related methods.
5026
5033
  #
5027
5034
  # @return [BinaryExpr]
@@ -43,7 +43,6 @@ module Polars
43
43
  # # │ i64 ┆ i64 │
44
44
  # # ╞═════╪═════╡
45
45
  # # │ 1 ┆ 3 │
46
- # # ├╌╌╌╌╌┼╌╌╌╌╌┤
47
46
  # # │ 2 ┆ 4 │
48
47
  # # └─────┴─────┘
49
48
  def concat(items, rechunk: true, how: "vertical", parallel: true)
@@ -551,32 +551,11 @@ module Polars
551
551
  agg(Polars.all.median)
552
552
  end
553
553
 
554
- # Aggregate the groups into Series.
555
- #
556
- # @return [DataFrame]
557
- #
558
- # @example
559
- # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
560
- # df.groupby("a", maintain_order: true).agg_list
561
- # # =>
562
- # # shape: (2, 2)
563
- # # ┌─────┬─────────────────┐
564
- # # │ a ┆ b │
565
- # # │ --- ┆ --- │
566
- # # │ str ┆ list[list[i64]] │
567
- # # ╞═════╪═════════════════╡
568
- # # │ one ┆ [[1, 3]] │
569
- # # │ two ┆ [[2, 4]] │
570
- # # └─────┴─────────────────┘
571
- def agg_list
572
- agg(Polars.all.list)
573
- end
574
-
575
554
  # Plot data.
576
555
  #
577
556
  # @return [Vega::LiteChart]
578
557
  def plot(*args, **options)
579
- raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
558
+ raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
580
559
  # same message as Ruby
581
560
  raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
582
561