polars-df 0.5.0-x86_64-linux → 0.7.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +3854 -4496
- data/README.md +11 -9
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
data/lib/polars/data_types.rb
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
module Polars
|
2
2
|
# Base class for all Polars data types.
|
3
3
|
class DataType
|
4
|
+
def self.base_type
|
5
|
+
self
|
6
|
+
end
|
7
|
+
|
8
|
+
def base_type
|
9
|
+
is_a?(DataType) ? self.class : self
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.nested?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
|
16
|
+
def nested?
|
17
|
+
self.class.nested?
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.==(other)
|
21
|
+
eql?(other) || other.is_a?(self)
|
22
|
+
end
|
4
23
|
end
|
5
24
|
|
6
25
|
# Base class for numeric data types.
|
@@ -15,12 +34,19 @@ module Polars
|
|
15
34
|
class FractionalType < NumericType
|
16
35
|
end
|
17
36
|
|
37
|
+
# Base class for float data types.
|
38
|
+
class FloatType < FractionalType
|
39
|
+
end
|
40
|
+
|
18
41
|
# Base class for temporal data types.
|
19
42
|
class TemporalType < DataType
|
20
43
|
end
|
21
44
|
|
22
45
|
# Base class for nested data types.
|
23
46
|
class NestedType < DataType
|
47
|
+
def self.nested?
|
48
|
+
true
|
49
|
+
end
|
24
50
|
end
|
25
51
|
|
26
52
|
# 8-bit signed integer type.
|
@@ -56,11 +82,37 @@ module Polars
|
|
56
82
|
end
|
57
83
|
|
58
84
|
# 32-bit floating point type.
|
59
|
-
class Float32 <
|
85
|
+
class Float32 < FloatType
|
60
86
|
end
|
61
87
|
|
62
88
|
# 64-bit floating point type.
|
63
|
-
class Float64 <
|
89
|
+
class Float64 < FloatType
|
90
|
+
end
|
91
|
+
|
92
|
+
# Decimal 128-bit type with an optional precision and non-negative scale.
|
93
|
+
#
|
94
|
+
# NOTE: this is an experimental work-in-progress feature and may not work as expected.
|
95
|
+
class Decimal < FractionalType
|
96
|
+
attr_reader :precision, :scale
|
97
|
+
|
98
|
+
def initialize(precision, scale)
|
99
|
+
@precision = precision
|
100
|
+
@scale = scale
|
101
|
+
end
|
102
|
+
|
103
|
+
def ==(other)
|
104
|
+
if other.eql?(Decimal)
|
105
|
+
true
|
106
|
+
elsif other.is_a?(Decimal)
|
107
|
+
precision == other.precision && scale == other.scale
|
108
|
+
else
|
109
|
+
false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_s
|
114
|
+
"#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
|
115
|
+
end
|
64
116
|
end
|
65
117
|
|
66
118
|
# Boolean type.
|
@@ -71,17 +123,18 @@ module Polars
|
|
71
123
|
class Utf8 < DataType
|
72
124
|
end
|
73
125
|
|
74
|
-
#
|
75
|
-
class
|
76
|
-
def initialize(inner)
|
77
|
-
@inner = Utils.rb_type_to_dtype(inner)
|
78
|
-
end
|
126
|
+
# Binary type.
|
127
|
+
class Binary < DataType
|
79
128
|
end
|
80
129
|
|
81
130
|
# Calendar date type.
|
82
131
|
class Date < TemporalType
|
83
132
|
end
|
84
133
|
|
134
|
+
# Time of day type.
|
135
|
+
class Time < TemporalType
|
136
|
+
end
|
137
|
+
|
85
138
|
# Calendar date and time type.
|
86
139
|
class Datetime < TemporalType
|
87
140
|
attr_reader :time_unit, :time_zone
|
@@ -91,6 +144,20 @@ module Polars
|
|
91
144
|
@time_unit = time_unit || "us"
|
92
145
|
@time_zone = time_zone
|
93
146
|
end
|
147
|
+
|
148
|
+
def ==(other)
|
149
|
+
if other.eql?(Datetime)
|
150
|
+
true
|
151
|
+
elsif other.is_a?(Datetime)
|
152
|
+
time_unit == other.time_unit && time_zone == other.time_zone
|
153
|
+
else
|
154
|
+
false
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
|
160
|
+
end
|
94
161
|
end
|
95
162
|
|
96
163
|
# Time duration/delta type.
|
@@ -101,18 +168,85 @@ module Polars
|
|
101
168
|
def initialize(time_unit = "us")
|
102
169
|
@time_unit = time_unit
|
103
170
|
end
|
171
|
+
|
172
|
+
def ==(other)
|
173
|
+
if other.eql?(Duration)
|
174
|
+
true
|
175
|
+
elsif other.is_a?(Duration)
|
176
|
+
time_unit == other.time_unit
|
177
|
+
else
|
178
|
+
false
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def to_s
|
183
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect})"
|
184
|
+
end
|
104
185
|
end
|
105
186
|
|
106
|
-
#
|
107
|
-
class
|
187
|
+
# A categorical encoding of a set of strings.
|
188
|
+
class Categorical < DataType
|
108
189
|
end
|
109
190
|
|
110
191
|
# Type for wrapping arbitrary Ruby objects.
|
111
192
|
class Object < DataType
|
112
193
|
end
|
113
194
|
|
114
|
-
#
|
115
|
-
class
|
195
|
+
# Type representing Null / None values.
|
196
|
+
class Null < DataType
|
197
|
+
end
|
198
|
+
|
199
|
+
# Type representing Datatype values that could not be determined statically.
|
200
|
+
class Unknown < DataType
|
201
|
+
end
|
202
|
+
|
203
|
+
# Nested list/array type.
|
204
|
+
class List < NestedType
|
205
|
+
attr_reader :inner
|
206
|
+
|
207
|
+
def initialize(inner)
|
208
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
209
|
+
end
|
210
|
+
|
211
|
+
def ==(other)
|
212
|
+
if other.eql?(List)
|
213
|
+
true
|
214
|
+
elsif other.is_a?(List)
|
215
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
216
|
+
else
|
217
|
+
false
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def to_s
|
222
|
+
"#{self.class.name}(#{inner})"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Nested list/array type.
|
227
|
+
class Array < NestedType
|
228
|
+
attr_reader :width, :inner
|
229
|
+
|
230
|
+
def initialize(width, inner = nil)
|
231
|
+
@width = width
|
232
|
+
@inner = Utils.rb_type_to_dtype(inner) if inner
|
233
|
+
end
|
234
|
+
|
235
|
+
# TODO check width?
|
236
|
+
def ==(other)
|
237
|
+
if other.eql?(Array)
|
238
|
+
true
|
239
|
+
elsif other.is_a?(Array)
|
240
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
241
|
+
else
|
242
|
+
false
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# TODO add width?
|
247
|
+
def to_s
|
248
|
+
"#{self.class.name}(#{inner})"
|
249
|
+
end
|
116
250
|
end
|
117
251
|
|
118
252
|
# Definition of a single field within a `Struct` DataType.
|
@@ -124,9 +258,12 @@ module Polars
|
|
124
258
|
@dtype = Utils.rb_type_to_dtype(dtype)
|
125
259
|
end
|
126
260
|
|
127
|
-
def
|
128
|
-
|
129
|
-
|
261
|
+
def ==(other)
|
262
|
+
name == other.name && dtype == other.dtype
|
263
|
+
end
|
264
|
+
|
265
|
+
def to_s
|
266
|
+
"#{self.class.name}(#{name.inspect}, #{dtype})"
|
130
267
|
end
|
131
268
|
end
|
132
269
|
|
@@ -142,25 +279,22 @@ module Polars
|
|
142
279
|
end
|
143
280
|
end
|
144
281
|
|
145
|
-
def
|
146
|
-
|
147
|
-
|
282
|
+
def ==(other)
|
283
|
+
if other.eql?(Struct)
|
284
|
+
true
|
285
|
+
elsif other.is_a?(Struct)
|
286
|
+
fields == other.fields
|
287
|
+
else
|
288
|
+
false
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def to_s
|
293
|
+
"#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
|
148
294
|
end
|
149
295
|
|
150
296
|
def to_schema
|
151
297
|
@fields.to_h { |f| [f.name, f.dtype] }
|
152
298
|
end
|
153
299
|
end
|
154
|
-
|
155
|
-
# Binary type.
|
156
|
-
class Binary < DataType
|
157
|
-
end
|
158
|
-
|
159
|
-
# Type representing Null / None values.
|
160
|
-
class Null < DataType
|
161
|
-
end
|
162
|
-
|
163
|
-
# Type representing Datatype values that could not be determined statically.
|
164
|
-
class Unknown < DataType
|
165
|
-
end
|
166
300
|
end
|
@@ -97,15 +97,20 @@ module Polars
|
|
97
97
|
# # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
|
98
98
|
# # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
|
99
99
|
# # └─────────────────────┴─────────────────────┘
|
100
|
-
def truncate(every, offset: nil)
|
100
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
101
101
|
if offset.nil?
|
102
102
|
offset = "0ns"
|
103
103
|
end
|
104
104
|
|
105
|
+
if !every.is_a?(Expr)
|
106
|
+
every = Utils._timedelta_to_pl_duration(every)
|
107
|
+
end
|
108
|
+
every = Utils.parse_as_expression(every, str_as_lit: true)
|
109
|
+
|
105
110
|
Utils.wrap_expr(
|
106
111
|
_rbexpr.dt_truncate(
|
107
|
-
|
108
|
-
Utils._timedelta_to_pl_duration(offset)
|
112
|
+
every,
|
113
|
+
Utils._timedelta_to_pl_duration(offset),
|
109
114
|
)
|
110
115
|
)
|
111
116
|
end
|
@@ -1026,21 +1031,10 @@ module Polars
|
|
1026
1031
|
# Time zone for the `Datetime` Series.
|
1027
1032
|
#
|
1028
1033
|
# @return [Expr]
|
1029
|
-
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
-
Utils.
|
1031
|
-
|
1032
|
-
|
1033
|
-
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
1034
|
-
#
|
1035
|
-
# This method takes a naive Datetime Series and makes this time zone aware.
|
1036
|
-
# It does not move the time to another time zone.
|
1037
|
-
#
|
1038
|
-
# @param tz [String]
|
1039
|
-
# Time zone for the `Datetime` Series.
|
1040
|
-
#
|
1041
|
-
# @return [Expr]
|
1042
|
-
def tz_localize(tz)
|
1043
|
-
Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
|
1034
|
+
def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
|
1035
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
1036
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
1037
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
|
1044
1038
|
end
|
1045
1039
|
|
1046
1040
|
# Extract the days from a Duration type.
|
@@ -1348,6 +1342,7 @@ module Polars
|
|
1348
1342
|
# # │ 2006-01-01 00:00:00 ┆ 2003-11-01 00:00:00 │
|
1349
1343
|
# # └─────────────────────┴─────────────────────┘
|
1350
1344
|
def offset_by(by)
|
1345
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
1351
1346
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1352
1347
|
end
|
1353
1348
|
|
@@ -23,18 +23,8 @@ module Polars
|
|
23
23
|
# @return [Object]
|
24
24
|
#
|
25
25
|
# @example
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# # shape: (3,)
|
29
|
-
# # Series: '' [datetime[μs]]
|
30
|
-
# # [
|
31
|
-
# # 2001-01-01 00:00:00
|
32
|
-
# # 2001-01-02 00:00:00
|
33
|
-
# # 2001-01-03 00:00:00
|
34
|
-
# # ]
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# date.dt.min
|
26
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
27
|
+
# s.dt.min
|
38
28
|
# # => 2001-01-01 00:00:00 UTC
|
39
29
|
def min
|
40
30
|
Utils.wrap_s(_s).min
|
@@ -45,18 +35,8 @@ module Polars
|
|
45
35
|
# @return [Object]
|
46
36
|
#
|
47
37
|
# @example
|
48
|
-
#
|
49
|
-
#
|
50
|
-
# # shape: (3,)
|
51
|
-
# # Series: '' [datetime[μs]]
|
52
|
-
# # [
|
53
|
-
# # 2001-01-01 00:00:00
|
54
|
-
# # 2001-01-02 00:00:00
|
55
|
-
# # 2001-01-03 00:00:00
|
56
|
-
# # ]
|
57
|
-
#
|
58
|
-
# @example
|
59
|
-
# date.dt.max
|
38
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
39
|
+
# s.dt.max
|
60
40
|
# # => 2001-01-03 00:00:00 UTC
|
61
41
|
def max
|
62
42
|
Utils.wrap_s(_s).max
|
@@ -82,8 +62,15 @@ module Polars
|
|
82
62
|
# # => 2001-01-02 00:00:00 UTC
|
83
63
|
def median
|
84
64
|
s = Utils.wrap_s(_s)
|
85
|
-
out = s.median
|
86
|
-
|
65
|
+
out = s.median
|
66
|
+
if !out.nil?
|
67
|
+
if s.dtype == Date
|
68
|
+
return Utils._to_ruby_date(out.to_i)
|
69
|
+
else
|
70
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
nil
|
87
74
|
end
|
88
75
|
|
89
76
|
# Return mean as Ruby object.
|
@@ -107,7 +94,14 @@ module Polars
|
|
107
94
|
def mean
|
108
95
|
s = Utils.wrap_s(_s)
|
109
96
|
out = s.mean.to_i
|
110
|
-
|
97
|
+
if !out.nil?
|
98
|
+
if s.dtype == Date
|
99
|
+
return Utils._to_ruby_date(out.to_i)
|
100
|
+
else
|
101
|
+
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
nil
|
111
105
|
end
|
112
106
|
|
113
107
|
# Format Date/datetime with a formatting rule.
|
@@ -1386,7 +1380,7 @@ module Polars
|
|
1386
1380
|
# # 2001-01-01 00:30:00
|
1387
1381
|
# # 2001-01-01 01:00:00
|
1388
1382
|
# # ]
|
1389
|
-
def truncate(every, offset: nil)
|
1383
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
1390
1384
|
super
|
1391
1385
|
end
|
1392
1386
|
|
@@ -2,7 +2,7 @@ module Polars
|
|
2
2
|
# A dynamic grouper.
|
3
3
|
#
|
4
4
|
# This has an `.agg` method which allows you to run all polars expressions in a
|
5
|
-
#
|
5
|
+
# group by context.
|
6
6
|
class DynamicGroupBy
|
7
7
|
def initialize(
|
8
8
|
df,
|
@@ -34,7 +34,7 @@ module Polars
|
|
34
34
|
|
35
35
|
def agg(aggs)
|
36
36
|
@df.lazy
|
37
|
-
.
|
37
|
+
.group_by_dynamic(
|
38
38
|
@time_column,
|
39
39
|
every: @every,
|
40
40
|
period: @period,
|