polars-df 0.5.0-x86_64-linux → 0.7.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +3854 -4496
- data/README.md +11 -9
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
data/lib/polars/data_types.rb
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
module Polars
|
2
2
|
# Base class for all Polars data types.
|
3
3
|
class DataType
|
4
|
+
def self.base_type
|
5
|
+
self
|
6
|
+
end
|
7
|
+
|
8
|
+
def base_type
|
9
|
+
is_a?(DataType) ? self.class : self
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.nested?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
|
16
|
+
def nested?
|
17
|
+
self.class.nested?
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.==(other)
|
21
|
+
eql?(other) || other.is_a?(self)
|
22
|
+
end
|
4
23
|
end
|
5
24
|
|
6
25
|
# Base class for numeric data types.
|
@@ -15,12 +34,19 @@ module Polars
|
|
15
34
|
class FractionalType < NumericType
|
16
35
|
end
|
17
36
|
|
37
|
+
# Base class for float data types.
|
38
|
+
class FloatType < FractionalType
|
39
|
+
end
|
40
|
+
|
18
41
|
# Base class for temporal data types.
|
19
42
|
class TemporalType < DataType
|
20
43
|
end
|
21
44
|
|
22
45
|
# Base class for nested data types.
|
23
46
|
class NestedType < DataType
|
47
|
+
def self.nested?
|
48
|
+
true
|
49
|
+
end
|
24
50
|
end
|
25
51
|
|
26
52
|
# 8-bit signed integer type.
|
@@ -56,11 +82,37 @@ module Polars
|
|
56
82
|
end
|
57
83
|
|
58
84
|
# 32-bit floating point type.
|
59
|
-
class Float32 <
|
85
|
+
class Float32 < FloatType
|
60
86
|
end
|
61
87
|
|
62
88
|
# 64-bit floating point type.
|
63
|
-
class Float64 <
|
89
|
+
class Float64 < FloatType
|
90
|
+
end
|
91
|
+
|
92
|
+
# Decimal 128-bit type with an optional precision and non-negative scale.
|
93
|
+
#
|
94
|
+
# NOTE: this is an experimental work-in-progress feature and may not work as expected.
|
95
|
+
class Decimal < FractionalType
|
96
|
+
attr_reader :precision, :scale
|
97
|
+
|
98
|
+
def initialize(precision, scale)
|
99
|
+
@precision = precision
|
100
|
+
@scale = scale
|
101
|
+
end
|
102
|
+
|
103
|
+
def ==(other)
|
104
|
+
if other.eql?(Decimal)
|
105
|
+
true
|
106
|
+
elsif other.is_a?(Decimal)
|
107
|
+
precision == other.precision && scale == other.scale
|
108
|
+
else
|
109
|
+
false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_s
|
114
|
+
"#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
|
115
|
+
end
|
64
116
|
end
|
65
117
|
|
66
118
|
# Boolean type.
|
@@ -71,17 +123,18 @@ module Polars
|
|
71
123
|
class Utf8 < DataType
|
72
124
|
end
|
73
125
|
|
74
|
-
#
|
75
|
-
class
|
76
|
-
def initialize(inner)
|
77
|
-
@inner = Utils.rb_type_to_dtype(inner)
|
78
|
-
end
|
126
|
+
# Binary type.
|
127
|
+
class Binary < DataType
|
79
128
|
end
|
80
129
|
|
81
130
|
# Calendar date type.
|
82
131
|
class Date < TemporalType
|
83
132
|
end
|
84
133
|
|
134
|
+
# Time of day type.
|
135
|
+
class Time < TemporalType
|
136
|
+
end
|
137
|
+
|
85
138
|
# Calendar date and time type.
|
86
139
|
class Datetime < TemporalType
|
87
140
|
attr_reader :time_unit, :time_zone
|
@@ -91,6 +144,20 @@ module Polars
|
|
91
144
|
@time_unit = time_unit || "us"
|
92
145
|
@time_zone = time_zone
|
93
146
|
end
|
147
|
+
|
148
|
+
def ==(other)
|
149
|
+
if other.eql?(Datetime)
|
150
|
+
true
|
151
|
+
elsif other.is_a?(Datetime)
|
152
|
+
time_unit == other.time_unit && time_zone == other.time_zone
|
153
|
+
else
|
154
|
+
false
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
|
160
|
+
end
|
94
161
|
end
|
95
162
|
|
96
163
|
# Time duration/delta type.
|
@@ -101,18 +168,85 @@ module Polars
|
|
101
168
|
def initialize(time_unit = "us")
|
102
169
|
@time_unit = time_unit
|
103
170
|
end
|
171
|
+
|
172
|
+
def ==(other)
|
173
|
+
if other.eql?(Duration)
|
174
|
+
true
|
175
|
+
elsif other.is_a?(Duration)
|
176
|
+
time_unit == other.time_unit
|
177
|
+
else
|
178
|
+
false
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def to_s
|
183
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect})"
|
184
|
+
end
|
104
185
|
end
|
105
186
|
|
106
|
-
#
|
107
|
-
class
|
187
|
+
# A categorical encoding of a set of strings.
|
188
|
+
class Categorical < DataType
|
108
189
|
end
|
109
190
|
|
110
191
|
# Type for wrapping arbitrary Ruby objects.
|
111
192
|
class Object < DataType
|
112
193
|
end
|
113
194
|
|
114
|
-
#
|
115
|
-
class
|
195
|
+
# Type representing Null / None values.
|
196
|
+
class Null < DataType
|
197
|
+
end
|
198
|
+
|
199
|
+
# Type representing Datatype values that could not be determined statically.
|
200
|
+
class Unknown < DataType
|
201
|
+
end
|
202
|
+
|
203
|
+
# Nested list/array type.
|
204
|
+
class List < NestedType
|
205
|
+
attr_reader :inner
|
206
|
+
|
207
|
+
def initialize(inner)
|
208
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
209
|
+
end
|
210
|
+
|
211
|
+
def ==(other)
|
212
|
+
if other.eql?(List)
|
213
|
+
true
|
214
|
+
elsif other.is_a?(List)
|
215
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
216
|
+
else
|
217
|
+
false
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def to_s
|
222
|
+
"#{self.class.name}(#{inner})"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Nested list/array type.
|
227
|
+
class Array < NestedType
|
228
|
+
attr_reader :width, :inner
|
229
|
+
|
230
|
+
def initialize(width, inner = nil)
|
231
|
+
@width = width
|
232
|
+
@inner = Utils.rb_type_to_dtype(inner) if inner
|
233
|
+
end
|
234
|
+
|
235
|
+
# TODO check width?
|
236
|
+
def ==(other)
|
237
|
+
if other.eql?(Array)
|
238
|
+
true
|
239
|
+
elsif other.is_a?(Array)
|
240
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
241
|
+
else
|
242
|
+
false
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# TODO add width?
|
247
|
+
def to_s
|
248
|
+
"#{self.class.name}(#{inner})"
|
249
|
+
end
|
116
250
|
end
|
117
251
|
|
118
252
|
# Definition of a single field within a `Struct` DataType.
|
@@ -124,9 +258,12 @@ module Polars
|
|
124
258
|
@dtype = Utils.rb_type_to_dtype(dtype)
|
125
259
|
end
|
126
260
|
|
127
|
-
def
|
128
|
-
|
129
|
-
|
261
|
+
def ==(other)
|
262
|
+
name == other.name && dtype == other.dtype
|
263
|
+
end
|
264
|
+
|
265
|
+
def to_s
|
266
|
+
"#{self.class.name}(#{name.inspect}, #{dtype})"
|
130
267
|
end
|
131
268
|
end
|
132
269
|
|
@@ -142,25 +279,22 @@ module Polars
|
|
142
279
|
end
|
143
280
|
end
|
144
281
|
|
145
|
-
def
|
146
|
-
|
147
|
-
|
282
|
+
def ==(other)
|
283
|
+
if other.eql?(Struct)
|
284
|
+
true
|
285
|
+
elsif other.is_a?(Struct)
|
286
|
+
fields == other.fields
|
287
|
+
else
|
288
|
+
false
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def to_s
|
293
|
+
"#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
|
148
294
|
end
|
149
295
|
|
150
296
|
def to_schema
|
151
297
|
@fields.to_h { |f| [f.name, f.dtype] }
|
152
298
|
end
|
153
299
|
end
|
154
|
-
|
155
|
-
# Binary type.
|
156
|
-
class Binary < DataType
|
157
|
-
end
|
158
|
-
|
159
|
-
# Type representing Null / None values.
|
160
|
-
class Null < DataType
|
161
|
-
end
|
162
|
-
|
163
|
-
# Type representing Datatype values that could not be determined statically.
|
164
|
-
class Unknown < DataType
|
165
|
-
end
|
166
300
|
end
|
@@ -97,15 +97,20 @@ module Polars
|
|
97
97
|
# # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
|
98
98
|
# # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
|
99
99
|
# # └─────────────────────┴─────────────────────┘
|
100
|
-
def truncate(every, offset: nil)
|
100
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
101
101
|
if offset.nil?
|
102
102
|
offset = "0ns"
|
103
103
|
end
|
104
104
|
|
105
|
+
if !every.is_a?(Expr)
|
106
|
+
every = Utils._timedelta_to_pl_duration(every)
|
107
|
+
end
|
108
|
+
every = Utils.parse_as_expression(every, str_as_lit: true)
|
109
|
+
|
105
110
|
Utils.wrap_expr(
|
106
111
|
_rbexpr.dt_truncate(
|
107
|
-
|
108
|
-
Utils._timedelta_to_pl_duration(offset)
|
112
|
+
every,
|
113
|
+
Utils._timedelta_to_pl_duration(offset),
|
109
114
|
)
|
110
115
|
)
|
111
116
|
end
|
@@ -1026,21 +1031,10 @@ module Polars
|
|
1026
1031
|
# Time zone for the `Datetime` Series.
|
1027
1032
|
#
|
1028
1033
|
# @return [Expr]
|
1029
|
-
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
-
Utils.
|
1031
|
-
|
1032
|
-
|
1033
|
-
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
1034
|
-
#
|
1035
|
-
# This method takes a naive Datetime Series and makes this time zone aware.
|
1036
|
-
# It does not move the time to another time zone.
|
1037
|
-
#
|
1038
|
-
# @param tz [String]
|
1039
|
-
# Time zone for the `Datetime` Series.
|
1040
|
-
#
|
1041
|
-
# @return [Expr]
|
1042
|
-
def tz_localize(tz)
|
1043
|
-
Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
|
1034
|
+
def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
|
1035
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
1036
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
1037
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
|
1044
1038
|
end
|
1045
1039
|
|
1046
1040
|
# Extract the days from a Duration type.
|
@@ -1348,6 +1342,7 @@ module Polars
|
|
1348
1342
|
# # │ 2006-01-01 00:00:00 ┆ 2003-11-01 00:00:00 │
|
1349
1343
|
# # └─────────────────────┴─────────────────────┘
|
1350
1344
|
def offset_by(by)
|
1345
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
1351
1346
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1352
1347
|
end
|
1353
1348
|
|
@@ -23,18 +23,8 @@ module Polars
|
|
23
23
|
# @return [Object]
|
24
24
|
#
|
25
25
|
# @example
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# # shape: (3,)
|
29
|
-
# # Series: '' [datetime[μs]]
|
30
|
-
# # [
|
31
|
-
# # 2001-01-01 00:00:00
|
32
|
-
# # 2001-01-02 00:00:00
|
33
|
-
# # 2001-01-03 00:00:00
|
34
|
-
# # ]
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# date.dt.min
|
26
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
27
|
+
# s.dt.min
|
38
28
|
# # => 2001-01-01 00:00:00 UTC
|
39
29
|
def min
|
40
30
|
Utils.wrap_s(_s).min
|
@@ -45,18 +35,8 @@ module Polars
|
|
45
35
|
# @return [Object]
|
46
36
|
#
|
47
37
|
# @example
|
48
|
-
#
|
49
|
-
#
|
50
|
-
# # shape: (3,)
|
51
|
-
# # Series: '' [datetime[μs]]
|
52
|
-
# # [
|
53
|
-
# # 2001-01-01 00:00:00
|
54
|
-
# # 2001-01-02 00:00:00
|
55
|
-
# # 2001-01-03 00:00:00
|
56
|
-
# # ]
|
57
|
-
#
|
58
|
-
# @example
|
59
|
-
# date.dt.max
|
38
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
39
|
+
# s.dt.max
|
60
40
|
# # => 2001-01-03 00:00:00 UTC
|
61
41
|
def max
|
62
42
|
Utils.wrap_s(_s).max
|
@@ -82,8 +62,15 @@ module Polars
|
|
82
62
|
# # => 2001-01-02 00:00:00 UTC
|
83
63
|
def median
|
84
64
|
s = Utils.wrap_s(_s)
|
85
|
-
out = s.median
|
86
|
-
|
65
|
+
out = s.median
|
66
|
+
if !out.nil?
|
67
|
+
if s.dtype == Date
|
68
|
+
return Utils._to_ruby_date(out.to_i)
|
69
|
+
else
|
70
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
nil
|
87
74
|
end
|
88
75
|
|
89
76
|
# Return mean as Ruby object.
|
@@ -107,7 +94,14 @@ module Polars
|
|
107
94
|
def mean
|
108
95
|
s = Utils.wrap_s(_s)
|
109
96
|
out = s.mean.to_i
|
110
|
-
|
97
|
+
if !out.nil?
|
98
|
+
if s.dtype == Date
|
99
|
+
return Utils._to_ruby_date(out.to_i)
|
100
|
+
else
|
101
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
nil
|
111
105
|
end
|
112
106
|
|
113
107
|
# Format Date/datetime with a formatting rule.
|
@@ -1386,7 +1380,7 @@ module Polars
|
|
1386
1380
|
# # 2001-01-01 00:30:00
|
1387
1381
|
# # 2001-01-01 01:00:00
|
1388
1382
|
# # ]
|
1389
|
-
def truncate(every, offset: nil)
|
1383
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
1390
1384
|
super
|
1391
1385
|
end
|
1392
1386
|
|
@@ -2,7 +2,7 @@ module Polars
|
|
2
2
|
# A dynamic grouper.
|
3
3
|
#
|
4
4
|
# This has an `.agg` method which allows you to run all polars expressions in a
|
5
|
-
#
|
5
|
+
# group by context.
|
6
6
|
class DynamicGroupBy
|
7
7
|
def initialize(
|
8
8
|
df,
|
@@ -34,7 +34,7 @@ module Polars
|
|
34
34
|
|
35
35
|
def agg(aggs)
|
36
36
|
@df.lazy
|
37
|
-
.
|
37
|
+
.group_by_dynamic(
|
38
38
|
@time_column,
|
39
39
|
every: @every,
|
40
40
|
period: @period,
|