polars-df 0.4.0-x86_64-linux → 0.6.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,25 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ def self.base_type
5
+ self
6
+ end
7
+
8
+ def base_type
9
+ is_a?(DataType) ? self.class : self
10
+ end
11
+
12
+ def self.nested?
13
+ false
14
+ end
15
+
16
+ def nested?
17
+ self.class.nested?
18
+ end
19
+
20
+ def self.==(other)
21
+ eql?(other) || other.is_a?(self)
22
+ end
4
23
  end
5
24
 
6
25
  # Base class for numeric data types.
@@ -15,12 +34,19 @@ module Polars
15
34
  class FractionalType < NumericType
16
35
  end
17
36
 
37
+ # Base class for float data types.
38
+ class FloatType < FractionalType
39
+ end
40
+
18
41
  # Base class for temporal data types.
19
42
  class TemporalType < DataType
20
43
  end
21
44
 
22
45
  # Base class for nested data types.
23
46
  class NestedType < DataType
47
+ def self.nested?
48
+ true
49
+ end
24
50
  end
25
51
 
26
52
  # 8-bit signed integer type.
@@ -56,11 +82,37 @@ module Polars
56
82
  end
57
83
 
58
84
  # 32-bit floating point type.
59
- class Float32 < FractionalType
85
+ class Float32 < FloatType
60
86
  end
61
87
 
62
88
  # 64-bit floating point type.
63
- class Float64 < FractionalType
89
+ class Float64 < FloatType
90
+ end
91
+
92
+ # Decimal 128-bit type with an optional precision and non-negative scale.
93
+ #
94
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
+ class Decimal < FractionalType
96
+ attr_reader :precision, :scale
97
+
98
+ def initialize(precision, scale)
99
+ @precision = precision
100
+ @scale = scale
101
+ end
102
+
103
+ def ==(other)
104
+ if other.eql?(Decimal)
105
+ true
106
+ elsif other.is_a?(Decimal)
107
+ precision == other.precision && scale == other.scale
108
+ else
109
+ false
110
+ end
111
+ end
112
+
113
+ def to_s
114
+ "#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
115
+ end
64
116
  end
65
117
 
66
118
  # Boolean type.
@@ -71,46 +123,130 @@ module Polars
71
123
  class Utf8 < DataType
72
124
  end
73
125
 
74
- # Nested list/array type.
75
- class List < NestedType
76
- def initialize(inner)
77
- @inner = Utils.rb_type_to_dtype(inner)
78
- end
126
+ # Binary type.
127
+ class Binary < DataType
79
128
  end
80
129
 
81
130
  # Calendar date type.
82
131
  class Date < TemporalType
83
132
  end
84
133
 
134
+ # Time of day type.
135
+ class Time < TemporalType
136
+ end
137
+
85
138
  # Calendar date and time type.
86
139
  class Datetime < TemporalType
87
- attr_reader :tu
140
+ attr_reader :time_unit, :time_zone
141
+ alias_method :tu, :time_unit
88
142
 
89
143
  def initialize(time_unit = "us", time_zone = nil)
90
- @tu = time_unit || "us"
144
+ @time_unit = time_unit || "us"
91
145
  @time_zone = time_zone
92
146
  end
147
+
148
+ def ==(other)
149
+ if other.eql?(Datetime)
150
+ true
151
+ elsif other.is_a?(Datetime)
152
+ time_unit == other.time_unit && time_zone == other.time_zone
153
+ else
154
+ false
155
+ end
156
+ end
157
+
158
+ def to_s
159
+ "#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
160
+ end
93
161
  end
94
162
 
95
163
  # Time duration/delta type.
96
164
  class Duration < TemporalType
97
- attr_reader :tu
165
+ attr_reader :time_unit
166
+ alias_method :tu, :time_unit
98
167
 
99
168
  def initialize(time_unit = "us")
100
- @tu = time_unit
169
+ @time_unit = time_unit
170
+ end
171
+
172
+ def ==(other)
173
+ if other.eql?(Duration)
174
+ true
175
+ elsif other.is_a?(Duration)
176
+ time_unit == other.time_unit
177
+ else
178
+ false
179
+ end
180
+ end
181
+
182
+ def to_s
183
+ "#{self.class.name}(time_unit: #{time_unit.inspect})"
101
184
  end
102
185
  end
103
186
 
104
- # Time of day type.
105
- class Time < TemporalType
187
+ # A categorical encoding of a set of strings.
188
+ class Categorical < DataType
106
189
  end
107
190
 
108
191
  # Type for wrapping arbitrary Ruby objects.
109
192
  class Object < DataType
110
193
  end
111
194
 
112
- # A categorical encoding of a set of strings.
113
- class Categorical < DataType
195
+ # Type representing Null / None values.
196
+ class Null < DataType
197
+ end
198
+
199
+ # Type representing Datatype values that could not be determined statically.
200
+ class Unknown < DataType
201
+ end
202
+
203
+ # Nested list/array type.
204
+ class List < NestedType
205
+ attr_reader :inner
206
+
207
+ def initialize(inner)
208
+ @inner = Utils.rb_type_to_dtype(inner)
209
+ end
210
+
211
+ def ==(other)
212
+ if other.eql?(List)
213
+ true
214
+ elsif other.is_a?(List)
215
+ @inner.nil? || other.inner.nil? || @inner == other.inner
216
+ else
217
+ false
218
+ end
219
+ end
220
+
221
+ def to_s
222
+ "#{self.class.name}(#{inner})"
223
+ end
224
+ end
225
+
226
+ # Nested list/array type.
227
+ class Array < NestedType
228
+ attr_reader :width, :inner
229
+
230
+ def initialize(width, inner = nil)
231
+ @width = width
232
+ @inner = Utils.rb_type_to_dtype(inner) if inner
233
+ end
234
+
235
+ # TODO check width?
236
+ def ==(other)
237
+ if other.eql?(Array)
238
+ true
239
+ elsif other.is_a?(Array)
240
+ @inner.nil? || other.inner.nil? || @inner == other.inner
241
+ else
242
+ false
243
+ end
244
+ end
245
+
246
+ # TODO add width?
247
+ def to_s
248
+ "#{self.class.name}(#{inner})"
249
+ end
114
250
  end
115
251
 
116
252
  # Definition of a single field within a `Struct` DataType.
@@ -122,9 +258,12 @@ module Polars
122
258
  @dtype = Utils.rb_type_to_dtype(dtype)
123
259
  end
124
260
 
125
- def inspect
126
- class_name = self.class.name
127
- "#{class_name}(#{@name}: #{@dtype})"
261
+ def ==(other)
262
+ name == other.name && dtype == other.dtype
263
+ end
264
+
265
+ def to_s
266
+ "#{self.class.name}(#{name.inspect}, #{dtype})"
128
267
  end
129
268
  end
130
269
 
@@ -140,25 +279,22 @@ module Polars
140
279
  end
141
280
  end
142
281
 
143
- def inspect
144
- class_name = self.class.name
145
- "#{class_name}(#{@fields})"
282
+ def ==(other)
283
+ if other.eql?(Struct)
284
+ true
285
+ elsif other.is_a?(Struct)
286
+ fields == other.fields
287
+ else
288
+ false
289
+ end
290
+ end
291
+
292
+ def to_s
293
+ "#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
146
294
  end
147
295
 
148
296
  def to_schema
149
297
  @fields.to_h { |f| [f.name, f.dtype] }
150
298
  end
151
299
  end
152
-
153
- # Binary type.
154
- class Binary < DataType
155
- end
156
-
157
- # Type representing Null / None values.
158
- class Null < DataType
159
- end
160
-
161
- # Type representing Datatype values that could not be determined statically.
162
- class Unknown < DataType
163
- end
164
300
  end
@@ -218,6 +218,25 @@ module Polars
218
218
  )
219
219
  end
220
220
 
221
+ # Create a naive Datetime from an existing Date/Datetime expression and a Time.
222
+ #
223
+ # If the underlying expression is a Datetime then its time component is replaced,
224
+ # and if it is a Date then a new Datetime is created by combining the two values.
225
+ #
226
+ # @param time [Object]
227
+ # A Ruby time literal or Polars expression/column that resolves to a time.
228
+ # @param time_unit ["ns", "us", "ms"]
229
+ # Unit of time.
230
+ #
231
+ # @return [Expr]
232
+ def combine(time, time_unit: "us")
233
+ unless time.is_a?(Time) || time.is_a?(Expr)
234
+ raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
235
+ end
236
+ time = Utils.expr_to_lit_or_expr(time)
237
+ Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
238
+ end
239
+
221
240
  # Format Date/datetime with a formatting rule.
222
241
  #
223
242
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
@@ -270,6 +289,34 @@ module Polars
270
289
  Utils.wrap_expr(_rbexpr.year)
271
290
  end
272
291
 
292
+ # Determine whether the year of the underlying date is a leap year.
293
+ #
294
+ # Applies to Date and Datetime columns.
295
+ #
296
+ # @return [Expr]
297
+ #
298
+ # @example
299
+ # start = DateTime.new(2000, 1, 1)
300
+ # stop = DateTime.new(2002, 1, 1)
301
+ # df = Polars::DataFrame.new(
302
+ # {"date" => Polars.date_range(start, stop, "1y")}
303
+ # )
304
+ # df.select(Polars.col("date").dt.is_leap_year)
305
+ # # =>
306
+ # # shape: (3, 1)
307
+ # # ┌───────┐
308
+ # # │ date │
309
+ # # │ --- │
310
+ # # │ bool │
311
+ # # ╞═══════╡
312
+ # # │ true │
313
+ # # │ false │
314
+ # # │ false │
315
+ # # └───────┘
316
+ def is_leap_year
317
+ Utils.wrap_expr(_rbexpr.dt_is_leap_year)
318
+ end
319
+
273
320
  # Extract ISO year from underlying Date representation.
274
321
  #
275
322
  # Applies to Date and Datetime columns.
@@ -550,6 +597,27 @@ module Polars
550
597
  Utils.wrap_expr(_rbexpr.ordinal_day)
551
598
  end
552
599
 
600
+ # Time
601
+ #
602
+ # @return [Expr]
603
+ def time
604
+ Utils.wrap_expr(_rbexpr.dt_time)
605
+ end
606
+
607
+ # Date
608
+ #
609
+ # @return [Expr]
610
+ def date
611
+ Utils.wrap_expr(_rbexpr.dt_date)
612
+ end
613
+
614
+ # Datetime
615
+ #
616
+ # @return [Expr]
617
+ def datetime
618
+ Utils.wrap_expr(_rbexpr.dt_datetime)
619
+ end
620
+
553
621
  # Extract hour from underlying DateTime representation.
554
622
  #
555
623
  # Applies to Datetime columns.
@@ -958,8 +1026,8 @@ module Polars
958
1026
  # Time zone for the `Datetime` Series.
959
1027
  #
960
1028
  # @return [Expr]
961
- def replace_time_zone(tz)
962
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
1029
+ def replace_time_zone(tz, use_earliest: nil)
1030
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
963
1031
  end
964
1032
 
965
1033
  # Localize tz-naive Datetime Series to tz-aware Datetime Series.
@@ -1282,5 +1350,77 @@ module Polars
1282
1350
  def offset_by(by)
1283
1351
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1284
1352
  end
1353
+
1354
+ # Roll backward to the first day of the month.
1355
+ #
1356
+ # @return [Expr]
1357
+ #
1358
+ # @example
1359
+ # df = Polars::DataFrame.new(
1360
+ # {
1361
+ # "dates" => Polars.date_range(
1362
+ # DateTime.new(2000, 1, 15, 2),
1363
+ # DateTime.new(2000, 12, 15, 2),
1364
+ # "1mo"
1365
+ # )
1366
+ # }
1367
+ # )
1368
+ # df.select(Polars.col("dates").dt.month_start)
1369
+ # # =>
1370
+ # # shape: (12, 1)
1371
+ # # ┌─────────────────────┐
1372
+ # # │ dates │
1373
+ # # │ --- │
1374
+ # # │ datetime[μs] │
1375
+ # # ╞═════════════════════╡
1376
+ # # │ 2000-01-01 02:00:00 │
1377
+ # # │ 2000-02-01 02:00:00 │
1378
+ # # │ 2000-03-01 02:00:00 │
1379
+ # # │ 2000-04-01 02:00:00 │
1380
+ # # │ … │
1381
+ # # │ 2000-09-01 02:00:00 │
1382
+ # # │ 2000-10-01 02:00:00 │
1383
+ # # │ 2000-11-01 02:00:00 │
1384
+ # # │ 2000-12-01 02:00:00 │
1385
+ # # └─────────────────────┘
1386
+ def month_start
1387
+ Utils.wrap_expr(_rbexpr.dt_month_start)
1388
+ end
1389
+
1390
+ # Roll forward to the last day of the month.
1391
+ #
1392
+ # @return [Expr]
1393
+ #
1394
+ # @example
1395
+ # df = Polars::DataFrame.new(
1396
+ # {
1397
+ # "dates" => Polars.date_range(
1398
+ # DateTime.new(2000, 1, 15, 2),
1399
+ # DateTime.new(2000, 12, 15, 2),
1400
+ # "1mo"
1401
+ # )
1402
+ # }
1403
+ # )
1404
+ # df.select(Polars.col("dates").dt.month_end)
1405
+ # # =>
1406
+ # # shape: (12, 1)
1407
+ # # ┌─────────────────────┐
1408
+ # # │ dates │
1409
+ # # │ --- │
1410
+ # # │ datetime[μs] │
1411
+ # # ╞═════════════════════╡
1412
+ # # │ 2000-01-31 02:00:00 │
1413
+ # # │ 2000-02-29 02:00:00 │
1414
+ # # │ 2000-03-31 02:00:00 │
1415
+ # # │ 2000-04-30 02:00:00 │
1416
+ # # │ … │
1417
+ # # │ 2000-09-30 02:00:00 │
1418
+ # # │ 2000-10-31 02:00:00 │
1419
+ # # │ 2000-11-30 02:00:00 │
1420
+ # # │ 2000-12-31 02:00:00 │
1421
+ # # └─────────────────────┘
1422
+ def month_end
1423
+ Utils.wrap_expr(_rbexpr.dt_month_end)
1424
+ end
1285
1425
  end
1286
1426
  end
@@ -82,8 +82,15 @@ module Polars
82
82
  # # => 2001-01-02 00:00:00 UTC
83
83
  def median
84
84
  s = Utils.wrap_s(_s)
85
- out = s.median.to_i
86
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
85
+ out = s.median
86
+ if !out.nil?
87
+ if s.dtype == Date
88
+ return Utils._to_ruby_date(out.to_i)
89
+ else
90
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
91
+ end
92
+ end
93
+ nil
87
94
  end
88
95
 
89
96
  # Return mean as Ruby object.
@@ -107,7 +114,14 @@ module Polars
107
114
  def mean
108
115
  s = Utils.wrap_s(_s)
109
116
  out = s.mean.to_i
110
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
117
+ if !out.nil?
118
+ if s.dtype == Date
119
+ return Utils._to_ruby_date(out.to_i)
120
+ else
121
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
122
+ end
123
+ end
124
+ nil
111
125
  end
112
126
 
113
127
  # Format Date/datetime with a formatting rule.