polars-df 0.4.0-arm64-darwin → 0.6.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,25 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ def self.base_type
5
+ self
6
+ end
7
+
8
+ def base_type
9
+ is_a?(DataType) ? self.class : self
10
+ end
11
+
12
+ def self.nested?
13
+ false
14
+ end
15
+
16
+ def nested?
17
+ self.class.nested?
18
+ end
19
+
20
+ def self.==(other)
21
+ eql?(other) || other.is_a?(self)
22
+ end
4
23
  end
5
24
 
6
25
  # Base class for numeric data types.
@@ -15,12 +34,19 @@ module Polars
15
34
  class FractionalType < NumericType
16
35
  end
17
36
 
37
+ # Base class for float data types.
38
+ class FloatType < FractionalType
39
+ end
40
+
18
41
  # Base class for temporal data types.
19
42
  class TemporalType < DataType
20
43
  end
21
44
 
22
45
  # Base class for nested data types.
23
46
  class NestedType < DataType
47
+ def self.nested?
48
+ true
49
+ end
24
50
  end
25
51
 
26
52
  # 8-bit signed integer type.
@@ -56,11 +82,37 @@ module Polars
56
82
  end
57
83
 
58
84
  # 32-bit floating point type.
59
- class Float32 < FractionalType
85
+ class Float32 < FloatType
60
86
  end
61
87
 
62
88
  # 64-bit floating point type.
63
- class Float64 < FractionalType
89
+ class Float64 < FloatType
90
+ end
91
+
92
+ # Decimal 128-bit type with an optional precision and non-negative scale.
93
+ #
94
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
+ class Decimal < FractionalType
96
+ attr_reader :precision, :scale
97
+
98
+ def initialize(precision, scale)
99
+ @precision = precision
100
+ @scale = scale
101
+ end
102
+
103
+ def ==(other)
104
+ if other.eql?(Decimal)
105
+ true
106
+ elsif other.is_a?(Decimal)
107
+ precision == other.precision && scale == other.scale
108
+ else
109
+ false
110
+ end
111
+ end
112
+
113
+ def to_s
114
+ "#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
115
+ end
64
116
  end
65
117
 
66
118
  # Boolean type.
@@ -71,46 +123,130 @@ module Polars
71
123
  class Utf8 < DataType
72
124
  end
73
125
 
74
- # Nested list/array type.
75
- class List < NestedType
76
- def initialize(inner)
77
- @inner = Utils.rb_type_to_dtype(inner)
78
- end
126
+ # Binary type.
127
+ class Binary < DataType
79
128
  end
80
129
 
81
130
  # Calendar date type.
82
131
  class Date < TemporalType
83
132
  end
84
133
 
134
+ # Time of day type.
135
+ class Time < TemporalType
136
+ end
137
+
85
138
  # Calendar date and time type.
86
139
  class Datetime < TemporalType
87
- attr_reader :tu
140
+ attr_reader :time_unit, :time_zone
141
+ alias_method :tu, :time_unit
88
142
 
89
143
  def initialize(time_unit = "us", time_zone = nil)
90
- @tu = time_unit || "us"
144
+ @time_unit = time_unit || "us"
91
145
  @time_zone = time_zone
92
146
  end
147
+
148
+ def ==(other)
149
+ if other.eql?(Datetime)
150
+ true
151
+ elsif other.is_a?(Datetime)
152
+ time_unit == other.time_unit && time_zone == other.time_zone
153
+ else
154
+ false
155
+ end
156
+ end
157
+
158
+ def to_s
159
+ "#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
160
+ end
93
161
  end
94
162
 
95
163
  # Time duration/delta type.
96
164
  class Duration < TemporalType
97
- attr_reader :tu
165
+ attr_reader :time_unit
166
+ alias_method :tu, :time_unit
98
167
 
99
168
  def initialize(time_unit = "us")
100
- @tu = time_unit
169
+ @time_unit = time_unit
170
+ end
171
+
172
+ def ==(other)
173
+ if other.eql?(Duration)
174
+ true
175
+ elsif other.is_a?(Duration)
176
+ time_unit == other.time_unit
177
+ else
178
+ false
179
+ end
180
+ end
181
+
182
+ def to_s
183
+ "#{self.class.name}(time_unit: #{time_unit.inspect})"
101
184
  end
102
185
  end
103
186
 
104
- # Time of day type.
105
- class Time < TemporalType
187
+ # A categorical encoding of a set of strings.
188
+ class Categorical < DataType
106
189
  end
107
190
 
108
191
  # Type for wrapping arbitrary Ruby objects.
109
192
  class Object < DataType
110
193
  end
111
194
 
112
- # A categorical encoding of a set of strings.
113
- class Categorical < DataType
195
+ # Type representing Null / None values.
196
+ class Null < DataType
197
+ end
198
+
199
+ # Type representing Datatype values that could not be determined statically.
200
+ class Unknown < DataType
201
+ end
202
+
203
+ # Nested list/array type.
204
+ class List < NestedType
205
+ attr_reader :inner
206
+
207
+ def initialize(inner)
208
+ @inner = Utils.rb_type_to_dtype(inner)
209
+ end
210
+
211
+ def ==(other)
212
+ if other.eql?(List)
213
+ true
214
+ elsif other.is_a?(List)
215
+ @inner.nil? || other.inner.nil? || @inner == other.inner
216
+ else
217
+ false
218
+ end
219
+ end
220
+
221
+ def to_s
222
+ "#{self.class.name}(#{inner})"
223
+ end
224
+ end
225
+
226
+ # Nested list/array type.
227
+ class Array < NestedType
228
+ attr_reader :width, :inner
229
+
230
+ def initialize(width, inner = nil)
231
+ @width = width
232
+ @inner = Utils.rb_type_to_dtype(inner) if inner
233
+ end
234
+
235
+ # TODO check width?
236
+ def ==(other)
237
+ if other.eql?(Array)
238
+ true
239
+ elsif other.is_a?(Array)
240
+ @inner.nil? || other.inner.nil? || @inner == other.inner
241
+ else
242
+ false
243
+ end
244
+ end
245
+
246
+ # TODO add width?
247
+ def to_s
248
+ "#{self.class.name}(#{inner})"
249
+ end
114
250
  end
115
251
 
116
252
  # Definition of a single field within a `Struct` DataType.
@@ -122,9 +258,12 @@ module Polars
122
258
  @dtype = Utils.rb_type_to_dtype(dtype)
123
259
  end
124
260
 
125
- def inspect
126
- class_name = self.class.name
127
- "#{class_name}(#{@name}: #{@dtype})"
261
+ def ==(other)
262
+ name == other.name && dtype == other.dtype
263
+ end
264
+
265
+ def to_s
266
+ "#{self.class.name}(#{name.inspect}, #{dtype})"
128
267
  end
129
268
  end
130
269
 
@@ -140,25 +279,22 @@ module Polars
140
279
  end
141
280
  end
142
281
 
143
- def inspect
144
- class_name = self.class.name
145
- "#{class_name}(#{@fields})"
282
+ def ==(other)
283
+ if other.eql?(Struct)
284
+ true
285
+ elsif other.is_a?(Struct)
286
+ fields == other.fields
287
+ else
288
+ false
289
+ end
290
+ end
291
+
292
+ def to_s
293
+ "#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
146
294
  end
147
295
 
148
296
  def to_schema
149
297
  @fields.to_h { |f| [f.name, f.dtype] }
150
298
  end
151
299
  end
152
-
153
- # Binary type.
154
- class Binary < DataType
155
- end
156
-
157
- # Type representing Null / None values.
158
- class Null < DataType
159
- end
160
-
161
- # Type representing Datatype values that could not be determined statically.
162
- class Unknown < DataType
163
- end
164
300
  end
@@ -218,6 +218,25 @@ module Polars
218
218
  )
219
219
  end
220
220
 
221
+ # Create a naive Datetime from an existing Date/Datetime expression and a Time.
222
+ #
223
+ # If the underlying expression is a Datetime then its time component is replaced,
224
+ # and if it is a Date then a new Datetime is created by combining the two values.
225
+ #
226
+ # @param time [Object]
227
+ # A Ruby time literal or Polars expression/column that resolves to a time.
228
+ # @param time_unit ["ns", "us", "ms"]
229
+ # Unit of time.
230
+ #
231
+ # @return [Expr]
232
+ def combine(time, time_unit: "us")
233
+ unless time.is_a?(Time) || time.is_a?(Expr)
234
+ raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
235
+ end
236
+ time = Utils.expr_to_lit_or_expr(time)
237
+ Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
238
+ end
239
+
221
240
  # Format Date/datetime with a formatting rule.
222
241
  #
223
242
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
@@ -270,6 +289,34 @@ module Polars
270
289
  Utils.wrap_expr(_rbexpr.year)
271
290
  end
272
291
 
292
+ # Determine whether the year of the underlying date is a leap year.
293
+ #
294
+ # Applies to Date and Datetime columns.
295
+ #
296
+ # @return [Expr]
297
+ #
298
+ # @example
299
+ # start = DateTime.new(2000, 1, 1)
300
+ # stop = DateTime.new(2002, 1, 1)
301
+ # df = Polars::DataFrame.new(
302
+ # {"date" => Polars.date_range(start, stop, "1y")}
303
+ # )
304
+ # df.select(Polars.col("date").dt.is_leap_year)
305
+ # # =>
306
+ # # shape: (3, 1)
307
+ # # ┌───────┐
308
+ # # │ date │
309
+ # # │ --- │
310
+ # # │ bool │
311
+ # # ╞═══════╡
312
+ # # │ true │
313
+ # # │ false │
314
+ # # │ false │
315
+ # # └───────┘
316
+ def is_leap_year
317
+ Utils.wrap_expr(_rbexpr.dt_is_leap_year)
318
+ end
319
+
273
320
  # Extract ISO year from underlying Date representation.
274
321
  #
275
322
  # Applies to Date and Datetime columns.
@@ -550,6 +597,27 @@ module Polars
550
597
  Utils.wrap_expr(_rbexpr.ordinal_day)
551
598
  end
552
599
 
600
+ # Time
601
+ #
602
+ # @return [Expr]
603
+ def time
604
+ Utils.wrap_expr(_rbexpr.dt_time)
605
+ end
606
+
607
+ # Date
608
+ #
609
+ # @return [Expr]
610
+ def date
611
+ Utils.wrap_expr(_rbexpr.dt_date)
612
+ end
613
+
614
+ # Datetime
615
+ #
616
+ # @return [Expr]
617
+ def datetime
618
+ Utils.wrap_expr(_rbexpr.dt_datetime)
619
+ end
620
+
553
621
  # Extract hour from underlying DateTime representation.
554
622
  #
555
623
  # Applies to Datetime columns.
@@ -958,8 +1026,8 @@ module Polars
958
1026
  # Time zone for the `Datetime` Series.
959
1027
  #
960
1028
  # @return [Expr]
961
- def replace_time_zone(tz)
962
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
1029
+ def replace_time_zone(tz, use_earliest: nil)
1030
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
963
1031
  end
964
1032
 
965
1033
  # Localize tz-naive Datetime Series to tz-aware Datetime Series.
@@ -1282,5 +1350,77 @@ module Polars
1282
1350
  def offset_by(by)
1283
1351
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1284
1352
  end
1353
+
1354
+ # Roll backward to the first day of the month.
1355
+ #
1356
+ # @return [Expr]
1357
+ #
1358
+ # @example
1359
+ # df = Polars::DataFrame.new(
1360
+ # {
1361
+ # "dates" => Polars.date_range(
1362
+ # DateTime.new(2000, 1, 15, 2),
1363
+ # DateTime.new(2000, 12, 15, 2),
1364
+ # "1mo"
1365
+ # )
1366
+ # }
1367
+ # )
1368
+ # df.select(Polars.col("dates").dt.month_start)
1369
+ # # =>
1370
+ # # shape: (12, 1)
1371
+ # # ┌─────────────────────┐
1372
+ # # │ dates │
1373
+ # # │ --- │
1374
+ # # │ datetime[μs] │
1375
+ # # ╞═════════════════════╡
1376
+ # # │ 2000-01-01 02:00:00 │
1377
+ # # │ 2000-02-01 02:00:00 │
1378
+ # # │ 2000-03-01 02:00:00 │
1379
+ # # │ 2000-04-01 02:00:00 │
1380
+ # # │ … │
1381
+ # # │ 2000-09-01 02:00:00 │
1382
+ # # │ 2000-10-01 02:00:00 │
1383
+ # # │ 2000-11-01 02:00:00 │
1384
+ # # │ 2000-12-01 02:00:00 │
1385
+ # # └─────────────────────┘
1386
+ def month_start
1387
+ Utils.wrap_expr(_rbexpr.dt_month_start)
1388
+ end
1389
+
1390
+ # Roll forward to the last day of the month.
1391
+ #
1392
+ # @return [Expr]
1393
+ #
1394
+ # @example
1395
+ # df = Polars::DataFrame.new(
1396
+ # {
1397
+ # "dates" => Polars.date_range(
1398
+ # DateTime.new(2000, 1, 15, 2),
1399
+ # DateTime.new(2000, 12, 15, 2),
1400
+ # "1mo"
1401
+ # )
1402
+ # }
1403
+ # )
1404
+ # df.select(Polars.col("dates").dt.month_end)
1405
+ # # =>
1406
+ # # shape: (12, 1)
1407
+ # # ┌─────────────────────┐
1408
+ # # │ dates │
1409
+ # # │ --- │
1410
+ # # │ datetime[μs] │
1411
+ # # ╞═════════════════════╡
1412
+ # # │ 2000-01-31 02:00:00 │
1413
+ # # │ 2000-02-29 02:00:00 │
1414
+ # # │ 2000-03-31 02:00:00 │
1415
+ # # │ 2000-04-30 02:00:00 │
1416
+ # # │ … │
1417
+ # # │ 2000-09-30 02:00:00 │
1418
+ # # │ 2000-10-31 02:00:00 │
1419
+ # # │ 2000-11-30 02:00:00 │
1420
+ # # │ 2000-12-31 02:00:00 │
1421
+ # # └─────────────────────┘
1422
+ def month_end
1423
+ Utils.wrap_expr(_rbexpr.dt_month_end)
1424
+ end
1285
1425
  end
1286
1426
  end
@@ -82,8 +82,15 @@ module Polars
82
82
  # # => 2001-01-02 00:00:00 UTC
83
83
  def median
84
84
  s = Utils.wrap_s(_s)
85
- out = s.median.to_i
86
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
85
+ out = s.median
86
+ if !out.nil?
87
+ if s.dtype == Date
88
+ return Utils._to_ruby_date(out.to_i)
89
+ else
90
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
91
+ end
92
+ end
93
+ nil
87
94
  end
88
95
 
89
96
  # Return mean as Ruby object.
@@ -107,7 +114,14 @@ module Polars
107
114
  def mean
108
115
  s = Utils.wrap_s(_s)
109
116
  out = s.mean.to_i
110
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
117
+ if !out.nil?
118
+ if s.dtype == Date
119
+ return Utils._to_ruby_date(out.to_i)
120
+ else
121
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
122
+ end
123
+ end
124
+ nil
111
125
  end
112
126
 
113
127
  # Format Date/datetime with a formatting rule.