polars-df 0.4.0-x86_64-darwin → 0.6.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2142 -972
- data/README.md +6 -5
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +4 -2
data/lib/polars/data_types.rb
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
module Polars
|
2
2
|
# Base class for all Polars data types.
|
3
3
|
class DataType
|
4
|
+
def self.base_type
|
5
|
+
self
|
6
|
+
end
|
7
|
+
|
8
|
+
def base_type
|
9
|
+
is_a?(DataType) ? self.class : self
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.nested?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
|
16
|
+
def nested?
|
17
|
+
self.class.nested?
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.==(other)
|
21
|
+
eql?(other) || other.is_a?(self)
|
22
|
+
end
|
4
23
|
end
|
5
24
|
|
6
25
|
# Base class for numeric data types.
|
@@ -15,12 +34,19 @@ module Polars
|
|
15
34
|
class FractionalType < NumericType
|
16
35
|
end
|
17
36
|
|
37
|
+
# Base class for float data types.
|
38
|
+
class FloatType < FractionalType
|
39
|
+
end
|
40
|
+
|
18
41
|
# Base class for temporal data types.
|
19
42
|
class TemporalType < DataType
|
20
43
|
end
|
21
44
|
|
22
45
|
# Base class for nested data types.
|
23
46
|
class NestedType < DataType
|
47
|
+
def self.nested?
|
48
|
+
true
|
49
|
+
end
|
24
50
|
end
|
25
51
|
|
26
52
|
# 8-bit signed integer type.
|
@@ -56,11 +82,37 @@ module Polars
|
|
56
82
|
end
|
57
83
|
|
58
84
|
# 32-bit floating point type.
|
59
|
-
class Float32 <
|
85
|
+
class Float32 < FloatType
|
60
86
|
end
|
61
87
|
|
62
88
|
# 64-bit floating point type.
|
63
|
-
class Float64 <
|
89
|
+
class Float64 < FloatType
|
90
|
+
end
|
91
|
+
|
92
|
+
# Decimal 128-bit type with an optional precision and non-negative scale.
|
93
|
+
#
|
94
|
+
# NOTE: this is an experimental work-in-progress feature and may not work as expected.
|
95
|
+
class Decimal < FractionalType
|
96
|
+
attr_reader :precision, :scale
|
97
|
+
|
98
|
+
def initialize(precision, scale)
|
99
|
+
@precision = precision
|
100
|
+
@scale = scale
|
101
|
+
end
|
102
|
+
|
103
|
+
def ==(other)
|
104
|
+
if other.eql?(Decimal)
|
105
|
+
true
|
106
|
+
elsif other.is_a?(Decimal)
|
107
|
+
precision == other.precision && scale == other.scale
|
108
|
+
else
|
109
|
+
false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_s
|
114
|
+
"#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
|
115
|
+
end
|
64
116
|
end
|
65
117
|
|
66
118
|
# Boolean type.
|
@@ -71,46 +123,130 @@ module Polars
|
|
71
123
|
class Utf8 < DataType
|
72
124
|
end
|
73
125
|
|
74
|
-
#
|
75
|
-
class
|
76
|
-
def initialize(inner)
|
77
|
-
@inner = Utils.rb_type_to_dtype(inner)
|
78
|
-
end
|
126
|
+
# Binary type.
|
127
|
+
class Binary < DataType
|
79
128
|
end
|
80
129
|
|
81
130
|
# Calendar date type.
|
82
131
|
class Date < TemporalType
|
83
132
|
end
|
84
133
|
|
134
|
+
# Time of day type.
|
135
|
+
class Time < TemporalType
|
136
|
+
end
|
137
|
+
|
85
138
|
# Calendar date and time type.
|
86
139
|
class Datetime < TemporalType
|
87
|
-
attr_reader :
|
140
|
+
attr_reader :time_unit, :time_zone
|
141
|
+
alias_method :tu, :time_unit
|
88
142
|
|
89
143
|
def initialize(time_unit = "us", time_zone = nil)
|
90
|
-
@
|
144
|
+
@time_unit = time_unit || "us"
|
91
145
|
@time_zone = time_zone
|
92
146
|
end
|
147
|
+
|
148
|
+
def ==(other)
|
149
|
+
if other.eql?(Datetime)
|
150
|
+
true
|
151
|
+
elsif other.is_a?(Datetime)
|
152
|
+
time_unit == other.time_unit && time_zone == other.time_zone
|
153
|
+
else
|
154
|
+
false
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
|
160
|
+
end
|
93
161
|
end
|
94
162
|
|
95
163
|
# Time duration/delta type.
|
96
164
|
class Duration < TemporalType
|
97
|
-
attr_reader :
|
165
|
+
attr_reader :time_unit
|
166
|
+
alias_method :tu, :time_unit
|
98
167
|
|
99
168
|
def initialize(time_unit = "us")
|
100
|
-
@
|
169
|
+
@time_unit = time_unit
|
170
|
+
end
|
171
|
+
|
172
|
+
def ==(other)
|
173
|
+
if other.eql?(Duration)
|
174
|
+
true
|
175
|
+
elsif other.is_a?(Duration)
|
176
|
+
time_unit == other.time_unit
|
177
|
+
else
|
178
|
+
false
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def to_s
|
183
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect})"
|
101
184
|
end
|
102
185
|
end
|
103
186
|
|
104
|
-
#
|
105
|
-
class
|
187
|
+
# A categorical encoding of a set of strings.
|
188
|
+
class Categorical < DataType
|
106
189
|
end
|
107
190
|
|
108
191
|
# Type for wrapping arbitrary Ruby objects.
|
109
192
|
class Object < DataType
|
110
193
|
end
|
111
194
|
|
112
|
-
#
|
113
|
-
class
|
195
|
+
# Type representing Null / None values.
|
196
|
+
class Null < DataType
|
197
|
+
end
|
198
|
+
|
199
|
+
# Type representing Datatype values that could not be determined statically.
|
200
|
+
class Unknown < DataType
|
201
|
+
end
|
202
|
+
|
203
|
+
# Nested list/array type.
|
204
|
+
class List < NestedType
|
205
|
+
attr_reader :inner
|
206
|
+
|
207
|
+
def initialize(inner)
|
208
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
209
|
+
end
|
210
|
+
|
211
|
+
def ==(other)
|
212
|
+
if other.eql?(List)
|
213
|
+
true
|
214
|
+
elsif other.is_a?(List)
|
215
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
216
|
+
else
|
217
|
+
false
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def to_s
|
222
|
+
"#{self.class.name}(#{inner})"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Nested list/array type.
|
227
|
+
class Array < NestedType
|
228
|
+
attr_reader :width, :inner
|
229
|
+
|
230
|
+
def initialize(width, inner = nil)
|
231
|
+
@width = width
|
232
|
+
@inner = Utils.rb_type_to_dtype(inner) if inner
|
233
|
+
end
|
234
|
+
|
235
|
+
# TODO check width?
|
236
|
+
def ==(other)
|
237
|
+
if other.eql?(Array)
|
238
|
+
true
|
239
|
+
elsif other.is_a?(Array)
|
240
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
241
|
+
else
|
242
|
+
false
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# TODO add width?
|
247
|
+
def to_s
|
248
|
+
"#{self.class.name}(#{inner})"
|
249
|
+
end
|
114
250
|
end
|
115
251
|
|
116
252
|
# Definition of a single field within a `Struct` DataType.
|
@@ -122,9 +258,12 @@ module Polars
|
|
122
258
|
@dtype = Utils.rb_type_to_dtype(dtype)
|
123
259
|
end
|
124
260
|
|
125
|
-
def
|
126
|
-
|
127
|
-
|
261
|
+
def ==(other)
|
262
|
+
name == other.name && dtype == other.dtype
|
263
|
+
end
|
264
|
+
|
265
|
+
def to_s
|
266
|
+
"#{self.class.name}(#{name.inspect}, #{dtype})"
|
128
267
|
end
|
129
268
|
end
|
130
269
|
|
@@ -140,25 +279,22 @@ module Polars
|
|
140
279
|
end
|
141
280
|
end
|
142
281
|
|
143
|
-
def
|
144
|
-
|
145
|
-
|
282
|
+
def ==(other)
|
283
|
+
if other.eql?(Struct)
|
284
|
+
true
|
285
|
+
elsif other.is_a?(Struct)
|
286
|
+
fields == other.fields
|
287
|
+
else
|
288
|
+
false
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def to_s
|
293
|
+
"#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
|
146
294
|
end
|
147
295
|
|
148
296
|
def to_schema
|
149
297
|
@fields.to_h { |f| [f.name, f.dtype] }
|
150
298
|
end
|
151
299
|
end
|
152
|
-
|
153
|
-
# Binary type.
|
154
|
-
class Binary < DataType
|
155
|
-
end
|
156
|
-
|
157
|
-
# Type representing Null / None values.
|
158
|
-
class Null < DataType
|
159
|
-
end
|
160
|
-
|
161
|
-
# Type representing Datatype values that could not be determined statically.
|
162
|
-
class Unknown < DataType
|
163
|
-
end
|
164
300
|
end
|
@@ -218,6 +218,25 @@ module Polars
|
|
218
218
|
)
|
219
219
|
end
|
220
220
|
|
221
|
+
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
222
|
+
#
|
223
|
+
# If the underlying expression is a Datetime then its time component is replaced,
|
224
|
+
# and if it is a Date then a new Datetime is created by combining the two values.
|
225
|
+
#
|
226
|
+
# @param time [Object]
|
227
|
+
# A Ruby time literal or Polars expression/column that resolves to a time.
|
228
|
+
# @param time_unit ["ns", "us", "ms"]
|
229
|
+
# Unit of time.
|
230
|
+
#
|
231
|
+
# @return [Expr]
|
232
|
+
def combine(time, time_unit: "us")
|
233
|
+
unless time.is_a?(Time) || time.is_a?(Expr)
|
234
|
+
raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
|
235
|
+
end
|
236
|
+
time = Utils.expr_to_lit_or_expr(time)
|
237
|
+
Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
|
238
|
+
end
|
239
|
+
|
221
240
|
# Format Date/datetime with a formatting rule.
|
222
241
|
#
|
223
242
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
@@ -270,6 +289,34 @@ module Polars
|
|
270
289
|
Utils.wrap_expr(_rbexpr.year)
|
271
290
|
end
|
272
291
|
|
292
|
+
# Determine whether the year of the underlying date is a leap year.
|
293
|
+
#
|
294
|
+
# Applies to Date and Datetime columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example
|
299
|
+
# start = DateTime.new(2000, 1, 1)
|
300
|
+
# stop = DateTime.new(2002, 1, 1)
|
301
|
+
# df = Polars::DataFrame.new(
|
302
|
+
# {"date" => Polars.date_range(start, stop, "1y")}
|
303
|
+
# )
|
304
|
+
# df.select(Polars.col("date").dt.is_leap_year)
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 1)
|
307
|
+
# # ┌───────┐
|
308
|
+
# # │ date │
|
309
|
+
# # │ --- │
|
310
|
+
# # │ bool │
|
311
|
+
# # ╞═══════╡
|
312
|
+
# # │ true │
|
313
|
+
# # │ false │
|
314
|
+
# # │ false │
|
315
|
+
# # └───────┘
|
316
|
+
def is_leap_year
|
317
|
+
Utils.wrap_expr(_rbexpr.dt_is_leap_year)
|
318
|
+
end
|
319
|
+
|
273
320
|
# Extract ISO year from underlying Date representation.
|
274
321
|
#
|
275
322
|
# Applies to Date and Datetime columns.
|
@@ -550,6 +597,27 @@ module Polars
|
|
550
597
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
551
598
|
end
|
552
599
|
|
600
|
+
# Time
|
601
|
+
#
|
602
|
+
# @return [Expr]
|
603
|
+
def time
|
604
|
+
Utils.wrap_expr(_rbexpr.dt_time)
|
605
|
+
end
|
606
|
+
|
607
|
+
# Date
|
608
|
+
#
|
609
|
+
# @return [Expr]
|
610
|
+
def date
|
611
|
+
Utils.wrap_expr(_rbexpr.dt_date)
|
612
|
+
end
|
613
|
+
|
614
|
+
# Datetime
|
615
|
+
#
|
616
|
+
# @return [Expr]
|
617
|
+
def datetime
|
618
|
+
Utils.wrap_expr(_rbexpr.dt_datetime)
|
619
|
+
end
|
620
|
+
|
553
621
|
# Extract hour from underlying DateTime representation.
|
554
622
|
#
|
555
623
|
# Applies to Datetime columns.
|
@@ -958,8 +1026,8 @@ module Polars
|
|
958
1026
|
# Time zone for the `Datetime` Series.
|
959
1027
|
#
|
960
1028
|
# @return [Expr]
|
961
|
-
def replace_time_zone(tz)
|
962
|
-
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
|
1029
|
+
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
|
963
1031
|
end
|
964
1032
|
|
965
1033
|
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
@@ -1282,5 +1350,77 @@ module Polars
|
|
1282
1350
|
def offset_by(by)
|
1283
1351
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1284
1352
|
end
|
1353
|
+
|
1354
|
+
# Roll backward to the first day of the month.
|
1355
|
+
#
|
1356
|
+
# @return [Expr]
|
1357
|
+
#
|
1358
|
+
# @example
|
1359
|
+
# df = Polars::DataFrame.new(
|
1360
|
+
# {
|
1361
|
+
# "dates" => Polars.date_range(
|
1362
|
+
# DateTime.new(2000, 1, 15, 2),
|
1363
|
+
# DateTime.new(2000, 12, 15, 2),
|
1364
|
+
# "1mo"
|
1365
|
+
# )
|
1366
|
+
# }
|
1367
|
+
# )
|
1368
|
+
# df.select(Polars.col("dates").dt.month_start)
|
1369
|
+
# # =>
|
1370
|
+
# # shape: (12, 1)
|
1371
|
+
# # ┌─────────────────────┐
|
1372
|
+
# # │ dates │
|
1373
|
+
# # │ --- │
|
1374
|
+
# # │ datetime[μs] │
|
1375
|
+
# # ╞═════════════════════╡
|
1376
|
+
# # │ 2000-01-01 02:00:00 │
|
1377
|
+
# # │ 2000-02-01 02:00:00 │
|
1378
|
+
# # │ 2000-03-01 02:00:00 │
|
1379
|
+
# # │ 2000-04-01 02:00:00 │
|
1380
|
+
# # │ … │
|
1381
|
+
# # │ 2000-09-01 02:00:00 │
|
1382
|
+
# # │ 2000-10-01 02:00:00 │
|
1383
|
+
# # │ 2000-11-01 02:00:00 │
|
1384
|
+
# # │ 2000-12-01 02:00:00 │
|
1385
|
+
# # └─────────────────────┘
|
1386
|
+
def month_start
|
1387
|
+
Utils.wrap_expr(_rbexpr.dt_month_start)
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
# Roll forward to the last day of the month.
|
1391
|
+
#
|
1392
|
+
# @return [Expr]
|
1393
|
+
#
|
1394
|
+
# @example
|
1395
|
+
# df = Polars::DataFrame.new(
|
1396
|
+
# {
|
1397
|
+
# "dates" => Polars.date_range(
|
1398
|
+
# DateTime.new(2000, 1, 15, 2),
|
1399
|
+
# DateTime.new(2000, 12, 15, 2),
|
1400
|
+
# "1mo"
|
1401
|
+
# )
|
1402
|
+
# }
|
1403
|
+
# )
|
1404
|
+
# df.select(Polars.col("dates").dt.month_end)
|
1405
|
+
# # =>
|
1406
|
+
# # shape: (12, 1)
|
1407
|
+
# # ┌─────────────────────┐
|
1408
|
+
# # │ dates │
|
1409
|
+
# # │ --- │
|
1410
|
+
# # │ datetime[μs] │
|
1411
|
+
# # ╞═════════════════════╡
|
1412
|
+
# # │ 2000-01-31 02:00:00 │
|
1413
|
+
# # │ 2000-02-29 02:00:00 │
|
1414
|
+
# # │ 2000-03-31 02:00:00 │
|
1415
|
+
# # │ 2000-04-30 02:00:00 │
|
1416
|
+
# # │ … │
|
1417
|
+
# # │ 2000-09-30 02:00:00 │
|
1418
|
+
# # │ 2000-10-31 02:00:00 │
|
1419
|
+
# # │ 2000-11-30 02:00:00 │
|
1420
|
+
# # │ 2000-12-31 02:00:00 │
|
1421
|
+
# # └─────────────────────┘
|
1422
|
+
def month_end
|
1423
|
+
Utils.wrap_expr(_rbexpr.dt_month_end)
|
1424
|
+
end
|
1285
1425
|
end
|
1286
1426
|
end
|
@@ -82,8 +82,15 @@ module Polars
|
|
82
82
|
# # => 2001-01-02 00:00:00 UTC
|
83
83
|
def median
|
84
84
|
s = Utils.wrap_s(_s)
|
85
|
-
out = s.median
|
86
|
-
|
85
|
+
out = s.median
|
86
|
+
if !out.nil?
|
87
|
+
if s.dtype == Date
|
88
|
+
return Utils._to_ruby_date(out.to_i)
|
89
|
+
else
|
90
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
nil
|
87
94
|
end
|
88
95
|
|
89
96
|
# Return mean as Ruby object.
|
@@ -107,7 +114,14 @@ module Polars
|
|
107
114
|
def mean
|
108
115
|
s = Utils.wrap_s(_s)
|
109
116
|
out = s.mean.to_i
|
110
|
-
|
117
|
+
if !out.nil?
|
118
|
+
if s.dtype == Date
|
119
|
+
return Utils._to_ruby_date(out.to_i)
|
120
|
+
else
|
121
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
nil
|
111
125
|
end
|
112
126
|
|
113
127
|
# Format Date/datetime with a formatting rule.
|