polars-df 0.5.0-x86_64-linux → 0.6.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,25 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ def self.base_type
5
+ self
6
+ end
7
+
8
+ def base_type
9
+ is_a?(DataType) ? self.class : self
10
+ end
11
+
12
+ def self.nested?
13
+ false
14
+ end
15
+
16
+ def nested?
17
+ self.class.nested?
18
+ end
19
+
20
+ def self.==(other)
21
+ eql?(other) || other.is_a?(self)
22
+ end
4
23
  end
5
24
 
6
25
  # Base class for numeric data types.
@@ -15,12 +34,19 @@ module Polars
15
34
  class FractionalType < NumericType
16
35
  end
17
36
 
37
+ # Base class for float data types.
38
+ class FloatType < FractionalType
39
+ end
40
+
18
41
  # Base class for temporal data types.
19
42
  class TemporalType < DataType
20
43
  end
21
44
 
22
45
  # Base class for nested data types.
23
46
  class NestedType < DataType
47
+ def self.nested?
48
+ true
49
+ end
24
50
  end
25
51
 
26
52
  # 8-bit signed integer type.
@@ -56,11 +82,37 @@ module Polars
56
82
  end
57
83
 
58
84
  # 32-bit floating point type.
59
- class Float32 < FractionalType
85
+ class Float32 < FloatType
60
86
  end
61
87
 
62
88
  # 64-bit floating point type.
63
- class Float64 < FractionalType
89
+ class Float64 < FloatType
90
+ end
91
+
92
+ # Decimal 128-bit type with an optional precision and non-negative scale.
93
+ #
94
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
+ class Decimal < FractionalType
96
+ attr_reader :precision, :scale
97
+
98
+ def initialize(precision, scale)
99
+ @precision = precision
100
+ @scale = scale
101
+ end
102
+
103
+ def ==(other)
104
+ if other.eql?(Decimal)
105
+ true
106
+ elsif other.is_a?(Decimal)
107
+ precision == other.precision && scale == other.scale
108
+ else
109
+ false
110
+ end
111
+ end
112
+
113
+ def to_s
114
+ "#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
115
+ end
64
116
  end
65
117
 
66
118
  # Boolean type.
@@ -71,17 +123,18 @@ module Polars
71
123
  class Utf8 < DataType
72
124
  end
73
125
 
74
- # Nested list/array type.
75
- class List < NestedType
76
- def initialize(inner)
77
- @inner = Utils.rb_type_to_dtype(inner)
78
- end
126
+ # Binary type.
127
+ class Binary < DataType
79
128
  end
80
129
 
81
130
  # Calendar date type.
82
131
  class Date < TemporalType
83
132
  end
84
133
 
134
+ # Time of day type.
135
+ class Time < TemporalType
136
+ end
137
+
85
138
  # Calendar date and time type.
86
139
  class Datetime < TemporalType
87
140
  attr_reader :time_unit, :time_zone
@@ -91,6 +144,20 @@ module Polars
91
144
  @time_unit = time_unit || "us"
92
145
  @time_zone = time_zone
93
146
  end
147
+
148
+ def ==(other)
149
+ if other.eql?(Datetime)
150
+ true
151
+ elsif other.is_a?(Datetime)
152
+ time_unit == other.time_unit && time_zone == other.time_zone
153
+ else
154
+ false
155
+ end
156
+ end
157
+
158
+ def to_s
159
+ "#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
160
+ end
94
161
  end
95
162
 
96
163
  # Time duration/delta type.
@@ -101,18 +168,85 @@ module Polars
101
168
  def initialize(time_unit = "us")
102
169
  @time_unit = time_unit
103
170
  end
171
+
172
+ def ==(other)
173
+ if other.eql?(Duration)
174
+ true
175
+ elsif other.is_a?(Duration)
176
+ time_unit == other.time_unit
177
+ else
178
+ false
179
+ end
180
+ end
181
+
182
+ def to_s
183
+ "#{self.class.name}(time_unit: #{time_unit.inspect})"
184
+ end
104
185
  end
105
186
 
106
- # Time of day type.
107
- class Time < TemporalType
187
+ # A categorical encoding of a set of strings.
188
+ class Categorical < DataType
108
189
  end
109
190
 
110
191
  # Type for wrapping arbitrary Ruby objects.
111
192
  class Object < DataType
112
193
  end
113
194
 
114
- # A categorical encoding of a set of strings.
115
- class Categorical < DataType
195
+ # Type representing Null / None values.
196
+ class Null < DataType
197
+ end
198
+
199
+ # Type representing Datatype values that could not be determined statically.
200
+ class Unknown < DataType
201
+ end
202
+
203
+ # Nested list/array type.
204
+ class List < NestedType
205
+ attr_reader :inner
206
+
207
+ def initialize(inner)
208
+ @inner = Utils.rb_type_to_dtype(inner)
209
+ end
210
+
211
+ def ==(other)
212
+ if other.eql?(List)
213
+ true
214
+ elsif other.is_a?(List)
215
+ @inner.nil? || other.inner.nil? || @inner == other.inner
216
+ else
217
+ false
218
+ end
219
+ end
220
+
221
+ def to_s
222
+ "#{self.class.name}(#{inner})"
223
+ end
224
+ end
225
+
226
+ # Nested list/array type.
227
+ class Array < NestedType
228
+ attr_reader :width, :inner
229
+
230
+ def initialize(width, inner = nil)
231
+ @width = width
232
+ @inner = Utils.rb_type_to_dtype(inner) if inner
233
+ end
234
+
235
+ # TODO check width?
236
+ def ==(other)
237
+ if other.eql?(Array)
238
+ true
239
+ elsif other.is_a?(Array)
240
+ @inner.nil? || other.inner.nil? || @inner == other.inner
241
+ else
242
+ false
243
+ end
244
+ end
245
+
246
+ # TODO add width?
247
+ def to_s
248
+ "#{self.class.name}(#{inner})"
249
+ end
116
250
  end
117
251
 
118
252
  # Definition of a single field within a `Struct` DataType.
@@ -124,9 +258,12 @@ module Polars
124
258
  @dtype = Utils.rb_type_to_dtype(dtype)
125
259
  end
126
260
 
127
- def inspect
128
- class_name = self.class.name
129
- "#{class_name}(#{@name}: #{@dtype})"
261
+ def ==(other)
262
+ name == other.name && dtype == other.dtype
263
+ end
264
+
265
+ def to_s
266
+ "#{self.class.name}(#{name.inspect}, #{dtype})"
130
267
  end
131
268
  end
132
269
 
@@ -142,25 +279,22 @@ module Polars
142
279
  end
143
280
  end
144
281
 
145
- def inspect
146
- class_name = self.class.name
147
- "#{class_name}(#{@fields})"
282
+ def ==(other)
283
+ if other.eql?(Struct)
284
+ true
285
+ elsif other.is_a?(Struct)
286
+ fields == other.fields
287
+ else
288
+ false
289
+ end
290
+ end
291
+
292
+ def to_s
293
+ "#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
148
294
  end
149
295
 
150
296
  def to_schema
151
297
  @fields.to_h { |f| [f.name, f.dtype] }
152
298
  end
153
299
  end
154
-
155
- # Binary type.
156
- class Binary < DataType
157
- end
158
-
159
- # Type representing Null / None values.
160
- class Null < DataType
161
- end
162
-
163
- # Type representing Datatype values that could not be determined statically.
164
- class Unknown < DataType
165
- end
166
300
  end
@@ -82,8 +82,15 @@ module Polars
82
82
  # # => 2001-01-02 00:00:00 UTC
83
83
  def median
84
84
  s = Utils.wrap_s(_s)
85
- out = s.median.to_i
86
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
85
+ out = s.median
86
+ if !out.nil?
87
+ if s.dtype == Date
88
+ return Utils._to_ruby_date(out.to_i)
89
+ else
90
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
91
+ end
92
+ end
93
+ nil
87
94
  end
88
95
 
89
96
  # Return mean as Ruby object.
@@ -107,7 +114,14 @@ module Polars
107
114
  def mean
108
115
  s = Utils.wrap_s(_s)
109
116
  out = s.mean.to_i
110
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
117
+ if !out.nil?
118
+ if s.dtype == Date
119
+ return Utils._to_ruby_date(out.to_i)
120
+ else
121
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
122
+ end
123
+ end
124
+ nil
111
125
  end
112
126
 
113
127
  # Format Date/datetime with a formatting rule.
data/lib/polars/expr.rb CHANGED
@@ -362,7 +362,7 @@ module Polars
362
362
  if columns.is_a?(String)
363
363
  columns = [columns]
364
364
  return wrap_expr(_rbexpr.exclude(columns))
365
- elsif !columns.is_a?(Array)
365
+ elsif !columns.is_a?(::Array)
366
366
  columns = [columns]
367
367
  return wrap_expr(_rbexpr.exclude_dtype(columns))
368
368
  end
@@ -820,18 +820,18 @@ module Polars
820
820
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
821
821
  # # =>
822
822
  # # shape: (6, 1)
823
- # # ┌─────────┐
824
- # # │ literal
825
- # # │ ---
826
- # # │ i64
827
- # # ╞═════════╡
828
- # # │ null
829
- # # │ null
830
- # # │ null
831
- # # │ 1
832
- # # │ 1
833
- # # │ 2
834
- # # └─────────┘
823
+ # # ┌────────┐
824
+ # # │ repeat
825
+ # # │ ---
826
+ # # │ i64
827
+ # # ╞════════╡
828
+ # # │ null
829
+ # # │ null
830
+ # # │ null
831
+ # # │ 1
832
+ # # │ 1
833
+ # # │ 2
834
+ # # └────────┘
835
835
  def rechunk
836
836
  wrap_expr(_rbexpr.rechunk)
837
837
  end
@@ -1534,10 +1534,10 @@ module Polars
1534
1534
  # # │ two │
1535
1535
  # # └───────┘
1536
1536
  def sort_by(by, reverse: false)
1537
- if !by.is_a?(Array)
1537
+ if !by.is_a?(::Array)
1538
1538
  by = [by]
1539
1539
  end
1540
- if !reverse.is_a?(Array)
1540
+ if !reverse.is_a?(::Array)
1541
1541
  reverse = [reverse]
1542
1542
  end
1543
1543
  by = Utils.selection_to_rbexpr_list(by)
@@ -1578,7 +1578,7 @@ module Polars
1578
1578
  # # │ two ┆ 99 │
1579
1579
  # # └───────┴───────┘
1580
1580
  def take(indices)
1581
- if indices.is_a?(Array)
1581
+ if indices.is_a?(::Array)
1582
1582
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1583
1583
  else
1584
1584
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
@@ -2436,14 +2436,14 @@ module Polars
2436
2436
  # ).sort("group_col")
2437
2437
  # # =>
2438
2438
  # # shape: (2, 3)
2439
- # # ┌───────────┬──────┬─────┐
2440
- # # │ group_col ┆ lt ┆ gte │
2441
- # # │ --- ┆ --- ┆ --- │
2442
- # # │ str ┆ i64 ┆ i64 │
2443
- # # ╞═══════════╪══════╪═════╡
2444
- # # │ g1 ┆ 1 ┆ 2 │
2445
- # # │ g2 ┆ null ┆ 3 │
2446
- # # └───────────┴──────┴─────┘
2439
+ # # ┌───────────┬─────┬─────┐
2440
+ # # │ group_col ┆ lt ┆ gte │
2441
+ # # │ --- ┆ --- ┆ --- │
2442
+ # # │ str ┆ i64 ┆ i64 │
2443
+ # # ╞═══════════╪═════╪═════╡
2444
+ # # │ g1 ┆ 1 ┆ 2 │
2445
+ # # │ g2 ┆ 0 ┆ 3 │
2446
+ # # └───────────┴─────┴─────┘
2447
2447
  def filter(predicate)
2448
2448
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
2449
2449
  end
@@ -2474,14 +2474,14 @@ module Polars
2474
2474
  # ).sort("group_col")
2475
2475
  # # =>
2476
2476
  # # shape: (2, 3)
2477
- # # ┌───────────┬──────┬─────┐
2478
- # # │ group_col ┆ lt ┆ gte │
2479
- # # │ --- ┆ --- ┆ --- │
2480
- # # │ str ┆ i64 ┆ i64 │
2481
- # # ╞═══════════╪══════╪═════╡
2482
- # # │ g1 ┆ 1 ┆ 2 │
2483
- # # │ g2 ┆ null ┆ 3 │
2484
- # # └───────────┴──────┴─────┘
2477
+ # # ┌───────────┬─────┬─────┐
2478
+ # # │ group_col ┆ lt ┆ gte │
2479
+ # # │ --- ┆ --- ┆ --- │
2480
+ # # │ str ┆ i64 ┆ i64 │
2481
+ # # ╞═══════════╪═════╪═════╡
2482
+ # # │ g1 ┆ 1 ┆ 2 │
2483
+ # # │ g2 ┆ 0 ┆ 3 │
2484
+ # # └───────────┴─────┴─────┘
2485
2485
  def where(predicate)
2486
2486
  filter(predicate)
2487
2487
  end
@@ -2616,25 +2616,23 @@ module Polars
2616
2616
  # @return [Expr]
2617
2617
  #
2618
2618
  # @example
2619
- # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2620
- # df.select(Polars.col("foo").flatten)
2621
- # # =>
2622
- # # shape: (10, 1)
2623
- # # ┌─────┐
2624
- # # │ foo │
2625
- # # │ --- │
2626
- # # │ str │
2627
- # # ╞═════╡
2628
- # # │ h │
2629
- # # │ e
2630
- # # │ l
2631
- # # │ l
2632
- # # │ … │
2633
- # # │ o
2634
- # # │ r
2635
- # # │ l │
2636
- # # │ d │
2637
- # # └─────┘
2619
+ # df = Polars::DataFrame.new(
2620
+ # {
2621
+ # "group" => ["a", "b", "b"],
2622
+ # "values" => [[1, 2], [2, 3], [4]]
2623
+ # }
2624
+ # )
2625
+ # df.groupby("group").agg(Polars.col("values").flatten)
2626
+ # # =>
2627
+ # # shape: (2, 2)
2628
+ # # ┌───────┬───────────┐
2629
+ # # │ group ┆ values
2630
+ # # │ --- ┆ ---
2631
+ # # │ str ┆ list[i64]
2632
+ # # ╞═══════╪═══════════╡
2633
+ # # │ a ┆ [1, 2]
2634
+ # # │ b ┆ [2, 3, 4]
2635
+ # # └───────┴───────────┘
2638
2636
  def flatten
2639
2637
  wrap_expr(_rbexpr.explode)
2640
2638
  end
@@ -2798,7 +2796,7 @@ module Polars
2798
2796
  # # │ false │
2799
2797
  # # └──────────┘
2800
2798
  def is_in(other)
2801
- if other.is_a?(Array)
2799
+ if other.is_a?(::Array)
2802
2800
  if other.length == 0
2803
2801
  other = Polars.lit(nil)
2804
2802
  else
@@ -3502,14 +3500,15 @@ module Polars
3502
3500
  min_periods: nil,
3503
3501
  center: false,
3504
3502
  by: nil,
3505
- closed: "left"
3503
+ closed: "left",
3504
+ ddof: 1
3506
3505
  )
3507
3506
  window_size, min_periods = _prepare_rolling_window_args(
3508
3507
  window_size, min_periods
3509
3508
  )
3510
3509
  wrap_expr(
3511
3510
  _rbexpr.rolling_std(
3512
- window_size, weights, min_periods, center, by, closed
3511
+ window_size, weights, min_periods, center, by, closed, ddof
3513
3512
  )
3514
3513
  )
3515
3514
  end
@@ -3591,14 +3590,15 @@ module Polars
3591
3590
  min_periods: nil,
3592
3591
  center: false,
3593
3592
  by: nil,
3594
- closed: "left"
3593
+ closed: "left",
3594
+ ddof: 1
3595
3595
  )
3596
3596
  window_size, min_periods = _prepare_rolling_window_args(
3597
3597
  window_size, min_periods
3598
3598
  )
3599
3599
  wrap_expr(
3600
3600
  _rbexpr.rolling_var(
3601
- window_size, weights, min_periods, center, by, closed
3601
+ window_size, weights, min_periods, center, by, closed, ddof
3602
3602
  )
3603
3603
  )
3604
3604
  end
@@ -4558,11 +4558,11 @@ module Polars
4558
4558
  # # │ 1 │
4559
4559
  # # │ 3 │
4560
4560
  # # └─────┘
4561
- def shuffle(seed: nil)
4561
+ def shuffle(seed: nil, fixed_seed: false)
4562
4562
  if seed.nil?
4563
4563
  seed = rand(10000)
4564
4564
  end
4565
- wrap_expr(_rbexpr.shuffle(seed))
4565
+ wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
4566
4566
  end
4567
4567
 
4568
4568
  # Sample from this expression.
@@ -4600,21 +4600,22 @@ module Polars
4600
4600
  with_replacement: true,
4601
4601
  shuffle: false,
4602
4602
  seed: nil,
4603
- n: nil
4603
+ n: nil,
4604
+ fixed_seed: false
4604
4605
  )
4605
4606
  if !n.nil? && !frac.nil?
4606
4607
  raise ArgumentError, "cannot specify both `n` and `frac`"
4607
4608
  end
4608
4609
 
4609
4610
  if !n.nil? && frac.nil?
4610
- return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4611
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
4611
4612
  end
4612
4613
 
4613
4614
  if frac.nil?
4614
4615
  frac = 1.0
4615
4616
  end
4616
4617
  wrap_expr(
4617
- _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4618
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
4618
4619
  )
4619
4620
  end
4620
4621
 
@@ -4929,8 +4930,8 @@ module Polars
4929
4930
  #
4930
4931
  # Enables downstream code to user fast paths for sorted arrays.
4931
4932
  #
4932
- # @param reverse [Boolean]
4933
- # If the `Series` order is reversed, e.g. descending.
4933
+ # @param descending [Boolean]
4934
+ # Whether the `Series` order is descending.
4934
4935
  #
4935
4936
  # @return [Expr]
4936
4937
  #
@@ -4950,9 +4951,9 @@ module Polars
4950
4951
  # # ╞════════╡
4951
4952
  # # │ 3 │
4952
4953
  # # └────────┘
4953
- # def set_sorted(reverse: false)
4954
- # map { |s| s.set_sorted(reverse) }
4955
- # end
4954
+ def set_sorted(descending: false)
4955
+ wrap_expr(_rbexpr.set_sorted_flag(descending))
4956
+ end
4956
4957
 
4957
4958
  # Aggregate to list.
4958
4959
  #
@@ -4965,7 +4966,7 @@ module Polars
4965
4966
  # "b" => [4, 5, 6]
4966
4967
  # }
4967
4968
  # )
4968
- # df.select(Polars.all.list)
4969
+ # df.select(Polars.all.implode)
4969
4970
  # # =>
4970
4971
  # # shape: (1, 2)
4971
4972
  # # ┌───────────┬───────────┐
@@ -4978,7 +4979,6 @@ module Polars
4978
4979
  def implode
4979
4980
  wrap_expr(_rbexpr.implode)
4980
4981
  end
4981
- alias_method :list, :implode
4982
4982
 
4983
4983
  # Shrink numeric columns to the minimal required datatype.
4984
4984
  #
@@ -5018,10 +5018,17 @@ module Polars
5018
5018
  # Create an object namespace of all list related methods.
5019
5019
  #
5020
5020
  # @return [ListExpr]
5021
- def arr
5021
+ def list
5022
5022
  ListExpr.new(self)
5023
5023
  end
5024
5024
 
5025
+ # Create an object namespace of all array related methods.
5026
+ #
5027
+ # @return [ArrayExpr]
5028
+ def arr
5029
+ ArrayExpr.new(self)
5030
+ end
5031
+
5025
5032
  # Create an object namespace of all binary related methods.
5026
5033
  #
5027
5034
  # @return [BinaryExpr]
@@ -43,7 +43,6 @@ module Polars
43
43
  # # │ i64 ┆ i64 │
44
44
  # # ╞═════╪═════╡
45
45
  # # │ 1 ┆ 3 │
46
- # # ├╌╌╌╌╌┼╌╌╌╌╌┤
47
46
  # # │ 2 ┆ 4 │
48
47
  # # └─────┴─────┘
49
48
  def concat(items, rechunk: true, how: "vertical", parallel: true)
@@ -551,32 +551,11 @@ module Polars
551
551
  agg(Polars.all.median)
552
552
  end
553
553
 
554
- # Aggregate the groups into Series.
555
- #
556
- # @return [DataFrame]
557
- #
558
- # @example
559
- # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
560
- # df.groupby("a", maintain_order: true).agg_list
561
- # # =>
562
- # # shape: (2, 2)
563
- # # ┌─────┬─────────────────┐
564
- # # │ a ┆ b │
565
- # # │ --- ┆ --- │
566
- # # │ str ┆ list[list[i64]] │
567
- # # ╞═════╪═════════════════╡
568
- # # │ one ┆ [[1, 3]] │
569
- # # │ two ┆ [[2, 4]] │
570
- # # └─────┴─────────────────┘
571
- def agg_list
572
- agg(Polars.all.list)
573
- end
574
-
575
554
  # Plot data.
576
555
  #
577
556
  # @return [Vega::LiteChart]
578
557
  def plot(*args, **options)
579
- raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
558
+ raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
580
559
  # same message as Ruby
581
560
  raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
582
561