polars-df 0.4.0-aarch64-linux → 0.6.0-aarch64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2386 -1216
- data/README.md +6 -5
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +4 -2
data/lib/polars/data_types.rb
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
module Polars
|
2
2
|
# Base class for all Polars data types.
|
3
3
|
class DataType
|
4
|
+
def self.base_type
|
5
|
+
self
|
6
|
+
end
|
7
|
+
|
8
|
+
def base_type
|
9
|
+
is_a?(DataType) ? self.class : self
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.nested?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
|
16
|
+
def nested?
|
17
|
+
self.class.nested?
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.==(other)
|
21
|
+
eql?(other) || other.is_a?(self)
|
22
|
+
end
|
4
23
|
end
|
5
24
|
|
6
25
|
# Base class for numeric data types.
|
@@ -15,12 +34,19 @@ module Polars
|
|
15
34
|
class FractionalType < NumericType
|
16
35
|
end
|
17
36
|
|
37
|
+
# Base class for float data types.
|
38
|
+
class FloatType < FractionalType
|
39
|
+
end
|
40
|
+
|
18
41
|
# Base class for temporal data types.
|
19
42
|
class TemporalType < DataType
|
20
43
|
end
|
21
44
|
|
22
45
|
# Base class for nested data types.
|
23
46
|
class NestedType < DataType
|
47
|
+
def self.nested?
|
48
|
+
true
|
49
|
+
end
|
24
50
|
end
|
25
51
|
|
26
52
|
# 8-bit signed integer type.
|
@@ -56,11 +82,37 @@ module Polars
|
|
56
82
|
end
|
57
83
|
|
58
84
|
# 32-bit floating point type.
|
59
|
-
class Float32 <
|
85
|
+
class Float32 < FloatType
|
60
86
|
end
|
61
87
|
|
62
88
|
# 64-bit floating point type.
|
63
|
-
class Float64 <
|
89
|
+
class Float64 < FloatType
|
90
|
+
end
|
91
|
+
|
92
|
+
# Decimal 128-bit type with an optional precision and non-negative scale.
|
93
|
+
#
|
94
|
+
# NOTE: this is an experimental work-in-progress feature and may not work as expected.
|
95
|
+
class Decimal < FractionalType
|
96
|
+
attr_reader :precision, :scale
|
97
|
+
|
98
|
+
def initialize(precision, scale)
|
99
|
+
@precision = precision
|
100
|
+
@scale = scale
|
101
|
+
end
|
102
|
+
|
103
|
+
def ==(other)
|
104
|
+
if other.eql?(Decimal)
|
105
|
+
true
|
106
|
+
elsif other.is_a?(Decimal)
|
107
|
+
precision == other.precision && scale == other.scale
|
108
|
+
else
|
109
|
+
false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_s
|
114
|
+
"#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
|
115
|
+
end
|
64
116
|
end
|
65
117
|
|
66
118
|
# Boolean type.
|
@@ -71,46 +123,130 @@ module Polars
|
|
71
123
|
class Utf8 < DataType
|
72
124
|
end
|
73
125
|
|
74
|
-
#
|
75
|
-
class
|
76
|
-
def initialize(inner)
|
77
|
-
@inner = Utils.rb_type_to_dtype(inner)
|
78
|
-
end
|
126
|
+
# Binary type.
|
127
|
+
class Binary < DataType
|
79
128
|
end
|
80
129
|
|
81
130
|
# Calendar date type.
|
82
131
|
class Date < TemporalType
|
83
132
|
end
|
84
133
|
|
134
|
+
# Time of day type.
|
135
|
+
class Time < TemporalType
|
136
|
+
end
|
137
|
+
|
85
138
|
# Calendar date and time type.
|
86
139
|
class Datetime < TemporalType
|
87
|
-
attr_reader :
|
140
|
+
attr_reader :time_unit, :time_zone
|
141
|
+
alias_method :tu, :time_unit
|
88
142
|
|
89
143
|
def initialize(time_unit = "us", time_zone = nil)
|
90
|
-
@
|
144
|
+
@time_unit = time_unit || "us"
|
91
145
|
@time_zone = time_zone
|
92
146
|
end
|
147
|
+
|
148
|
+
def ==(other)
|
149
|
+
if other.eql?(Datetime)
|
150
|
+
true
|
151
|
+
elsif other.is_a?(Datetime)
|
152
|
+
time_unit == other.time_unit && time_zone == other.time_zone
|
153
|
+
else
|
154
|
+
false
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
|
160
|
+
end
|
93
161
|
end
|
94
162
|
|
95
163
|
# Time duration/delta type.
|
96
164
|
class Duration < TemporalType
|
97
|
-
attr_reader :
|
165
|
+
attr_reader :time_unit
|
166
|
+
alias_method :tu, :time_unit
|
98
167
|
|
99
168
|
def initialize(time_unit = "us")
|
100
|
-
@
|
169
|
+
@time_unit = time_unit
|
170
|
+
end
|
171
|
+
|
172
|
+
def ==(other)
|
173
|
+
if other.eql?(Duration)
|
174
|
+
true
|
175
|
+
elsif other.is_a?(Duration)
|
176
|
+
time_unit == other.time_unit
|
177
|
+
else
|
178
|
+
false
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def to_s
|
183
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect})"
|
101
184
|
end
|
102
185
|
end
|
103
186
|
|
104
|
-
#
|
105
|
-
class
|
187
|
+
# A categorical encoding of a set of strings.
|
188
|
+
class Categorical < DataType
|
106
189
|
end
|
107
190
|
|
108
191
|
# Type for wrapping arbitrary Ruby objects.
|
109
192
|
class Object < DataType
|
110
193
|
end
|
111
194
|
|
112
|
-
#
|
113
|
-
class
|
195
|
+
# Type representing Null / None values.
|
196
|
+
class Null < DataType
|
197
|
+
end
|
198
|
+
|
199
|
+
# Type representing Datatype values that could not be determined statically.
|
200
|
+
class Unknown < DataType
|
201
|
+
end
|
202
|
+
|
203
|
+
# Nested list/array type.
|
204
|
+
class List < NestedType
|
205
|
+
attr_reader :inner
|
206
|
+
|
207
|
+
def initialize(inner)
|
208
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
209
|
+
end
|
210
|
+
|
211
|
+
def ==(other)
|
212
|
+
if other.eql?(List)
|
213
|
+
true
|
214
|
+
elsif other.is_a?(List)
|
215
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
216
|
+
else
|
217
|
+
false
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def to_s
|
222
|
+
"#{self.class.name}(#{inner})"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Nested list/array type.
|
227
|
+
class Array < NestedType
|
228
|
+
attr_reader :width, :inner
|
229
|
+
|
230
|
+
def initialize(width, inner = nil)
|
231
|
+
@width = width
|
232
|
+
@inner = Utils.rb_type_to_dtype(inner) if inner
|
233
|
+
end
|
234
|
+
|
235
|
+
# TODO check width?
|
236
|
+
def ==(other)
|
237
|
+
if other.eql?(Array)
|
238
|
+
true
|
239
|
+
elsif other.is_a?(Array)
|
240
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
241
|
+
else
|
242
|
+
false
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# TODO add width?
|
247
|
+
def to_s
|
248
|
+
"#{self.class.name}(#{inner})"
|
249
|
+
end
|
114
250
|
end
|
115
251
|
|
116
252
|
# Definition of a single field within a `Struct` DataType.
|
@@ -122,9 +258,12 @@ module Polars
|
|
122
258
|
@dtype = Utils.rb_type_to_dtype(dtype)
|
123
259
|
end
|
124
260
|
|
125
|
-
def
|
126
|
-
|
127
|
-
|
261
|
+
def ==(other)
|
262
|
+
name == other.name && dtype == other.dtype
|
263
|
+
end
|
264
|
+
|
265
|
+
def to_s
|
266
|
+
"#{self.class.name}(#{name.inspect}, #{dtype})"
|
128
267
|
end
|
129
268
|
end
|
130
269
|
|
@@ -140,25 +279,22 @@ module Polars
|
|
140
279
|
end
|
141
280
|
end
|
142
281
|
|
143
|
-
def
|
144
|
-
|
145
|
-
|
282
|
+
def ==(other)
|
283
|
+
if other.eql?(Struct)
|
284
|
+
true
|
285
|
+
elsif other.is_a?(Struct)
|
286
|
+
fields == other.fields
|
287
|
+
else
|
288
|
+
false
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def to_s
|
293
|
+
"#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
|
146
294
|
end
|
147
295
|
|
148
296
|
def to_schema
|
149
297
|
@fields.to_h { |f| [f.name, f.dtype] }
|
150
298
|
end
|
151
299
|
end
|
152
|
-
|
153
|
-
# Binary type.
|
154
|
-
class Binary < DataType
|
155
|
-
end
|
156
|
-
|
157
|
-
# Type representing Null / None values.
|
158
|
-
class Null < DataType
|
159
|
-
end
|
160
|
-
|
161
|
-
# Type representing Datatype values that could not be determined statically.
|
162
|
-
class Unknown < DataType
|
163
|
-
end
|
164
300
|
end
|
@@ -218,6 +218,25 @@ module Polars
|
|
218
218
|
)
|
219
219
|
end
|
220
220
|
|
221
|
+
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
222
|
+
#
|
223
|
+
# If the underlying expression is a Datetime then its time component is replaced,
|
224
|
+
# and if it is a Date then a new Datetime is created by combining the two values.
|
225
|
+
#
|
226
|
+
# @param time [Object]
|
227
|
+
# A Ruby time literal or Polars expression/column that resolves to a time.
|
228
|
+
# @param time_unit ["ns", "us", "ms"]
|
229
|
+
# Unit of time.
|
230
|
+
#
|
231
|
+
# @return [Expr]
|
232
|
+
def combine(time, time_unit: "us")
|
233
|
+
unless time.is_a?(Time) || time.is_a?(Expr)
|
234
|
+
raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
|
235
|
+
end
|
236
|
+
time = Utils.expr_to_lit_or_expr(time)
|
237
|
+
Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
|
238
|
+
end
|
239
|
+
|
221
240
|
# Format Date/datetime with a formatting rule.
|
222
241
|
#
|
223
242
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
@@ -270,6 +289,34 @@ module Polars
|
|
270
289
|
Utils.wrap_expr(_rbexpr.year)
|
271
290
|
end
|
272
291
|
|
292
|
+
# Determine whether the year of the underlying date is a leap year.
|
293
|
+
#
|
294
|
+
# Applies to Date and Datetime columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example
|
299
|
+
# start = DateTime.new(2000, 1, 1)
|
300
|
+
# stop = DateTime.new(2002, 1, 1)
|
301
|
+
# df = Polars::DataFrame.new(
|
302
|
+
# {"date" => Polars.date_range(start, stop, "1y")}
|
303
|
+
# )
|
304
|
+
# df.select(Polars.col("date").dt.is_leap_year)
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 1)
|
307
|
+
# # ┌───────┐
|
308
|
+
# # │ date │
|
309
|
+
# # │ --- │
|
310
|
+
# # │ bool │
|
311
|
+
# # ╞═══════╡
|
312
|
+
# # │ true │
|
313
|
+
# # │ false │
|
314
|
+
# # │ false │
|
315
|
+
# # └───────┘
|
316
|
+
def is_leap_year
|
317
|
+
Utils.wrap_expr(_rbexpr.dt_is_leap_year)
|
318
|
+
end
|
319
|
+
|
273
320
|
# Extract ISO year from underlying Date representation.
|
274
321
|
#
|
275
322
|
# Applies to Date and Datetime columns.
|
@@ -550,6 +597,27 @@ module Polars
|
|
550
597
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
551
598
|
end
|
552
599
|
|
600
|
+
# Time
|
601
|
+
#
|
602
|
+
# @return [Expr]
|
603
|
+
def time
|
604
|
+
Utils.wrap_expr(_rbexpr.dt_time)
|
605
|
+
end
|
606
|
+
|
607
|
+
# Date
|
608
|
+
#
|
609
|
+
# @return [Expr]
|
610
|
+
def date
|
611
|
+
Utils.wrap_expr(_rbexpr.dt_date)
|
612
|
+
end
|
613
|
+
|
614
|
+
# Datetime
|
615
|
+
#
|
616
|
+
# @return [Expr]
|
617
|
+
def datetime
|
618
|
+
Utils.wrap_expr(_rbexpr.dt_datetime)
|
619
|
+
end
|
620
|
+
|
553
621
|
# Extract hour from underlying DateTime representation.
|
554
622
|
#
|
555
623
|
# Applies to Datetime columns.
|
@@ -958,8 +1026,8 @@ module Polars
|
|
958
1026
|
# Time zone for the `Datetime` Series.
|
959
1027
|
#
|
960
1028
|
# @return [Expr]
|
961
|
-
def replace_time_zone(tz)
|
962
|
-
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
|
1029
|
+
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
|
963
1031
|
end
|
964
1032
|
|
965
1033
|
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
@@ -1282,5 +1350,77 @@ module Polars
|
|
1282
1350
|
def offset_by(by)
|
1283
1351
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1284
1352
|
end
|
1353
|
+
|
1354
|
+
# Roll backward to the first day of the month.
|
1355
|
+
#
|
1356
|
+
# @return [Expr]
|
1357
|
+
#
|
1358
|
+
# @example
|
1359
|
+
# df = Polars::DataFrame.new(
|
1360
|
+
# {
|
1361
|
+
# "dates" => Polars.date_range(
|
1362
|
+
# DateTime.new(2000, 1, 15, 2),
|
1363
|
+
# DateTime.new(2000, 12, 15, 2),
|
1364
|
+
# "1mo"
|
1365
|
+
# )
|
1366
|
+
# }
|
1367
|
+
# )
|
1368
|
+
# df.select(Polars.col("dates").dt.month_start)
|
1369
|
+
# # =>
|
1370
|
+
# # shape: (12, 1)
|
1371
|
+
# # ┌─────────────────────┐
|
1372
|
+
# # │ dates │
|
1373
|
+
# # │ --- │
|
1374
|
+
# # │ datetime[μs] │
|
1375
|
+
# # ╞═════════════════════╡
|
1376
|
+
# # │ 2000-01-01 02:00:00 │
|
1377
|
+
# # │ 2000-02-01 02:00:00 │
|
1378
|
+
# # │ 2000-03-01 02:00:00 │
|
1379
|
+
# # │ 2000-04-01 02:00:00 │
|
1380
|
+
# # │ … │
|
1381
|
+
# # │ 2000-09-01 02:00:00 │
|
1382
|
+
# # │ 2000-10-01 02:00:00 │
|
1383
|
+
# # │ 2000-11-01 02:00:00 │
|
1384
|
+
# # │ 2000-12-01 02:00:00 │
|
1385
|
+
# # └─────────────────────┘
|
1386
|
+
def month_start
|
1387
|
+
Utils.wrap_expr(_rbexpr.dt_month_start)
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
# Roll forward to the last day of the month.
|
1391
|
+
#
|
1392
|
+
# @return [Expr]
|
1393
|
+
#
|
1394
|
+
# @example
|
1395
|
+
# df = Polars::DataFrame.new(
|
1396
|
+
# {
|
1397
|
+
# "dates" => Polars.date_range(
|
1398
|
+
# DateTime.new(2000, 1, 15, 2),
|
1399
|
+
# DateTime.new(2000, 12, 15, 2),
|
1400
|
+
# "1mo"
|
1401
|
+
# )
|
1402
|
+
# }
|
1403
|
+
# )
|
1404
|
+
# df.select(Polars.col("dates").dt.month_end)
|
1405
|
+
# # =>
|
1406
|
+
# # shape: (12, 1)
|
1407
|
+
# # ┌─────────────────────┐
|
1408
|
+
# # │ dates │
|
1409
|
+
# # │ --- │
|
1410
|
+
# # │ datetime[μs] │
|
1411
|
+
# # ╞═════════════════════╡
|
1412
|
+
# # │ 2000-01-31 02:00:00 │
|
1413
|
+
# # │ 2000-02-29 02:00:00 │
|
1414
|
+
# # │ 2000-03-31 02:00:00 │
|
1415
|
+
# # │ 2000-04-30 02:00:00 │
|
1416
|
+
# # │ … │
|
1417
|
+
# # │ 2000-09-30 02:00:00 │
|
1418
|
+
# # │ 2000-10-31 02:00:00 │
|
1419
|
+
# # │ 2000-11-30 02:00:00 │
|
1420
|
+
# # │ 2000-12-31 02:00:00 │
|
1421
|
+
# # └─────────────────────┘
|
1422
|
+
def month_end
|
1423
|
+
Utils.wrap_expr(_rbexpr.dt_month_end)
|
1424
|
+
end
|
1285
1425
|
end
|
1286
1426
|
end
|
@@ -82,8 +82,15 @@ module Polars
|
|
82
82
|
# # => 2001-01-02 00:00:00 UTC
|
83
83
|
def median
|
84
84
|
s = Utils.wrap_s(_s)
|
85
|
-
out = s.median
|
86
|
-
|
85
|
+
out = s.median
|
86
|
+
if !out.nil?
|
87
|
+
if s.dtype == Date
|
88
|
+
return Utils._to_ruby_date(out.to_i)
|
89
|
+
else
|
90
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
nil
|
87
94
|
end
|
88
95
|
|
89
96
|
# Return mean as Ruby object.
|
@@ -107,7 +114,14 @@ module Polars
|
|
107
114
|
def mean
|
108
115
|
s = Utils.wrap_s(_s)
|
109
116
|
out = s.mean.to_i
|
110
|
-
|
117
|
+
if !out.nil?
|
118
|
+
if s.dtype == Date
|
119
|
+
return Utils._to_ruby_date(out.to_i)
|
120
|
+
else
|
121
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
nil
|
111
125
|
end
|
112
126
|
|
113
127
|
# Format Date/datetime with a formatting rule.
|