polars-df 0.4.0-x86_64-linux → 0.6.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2386 -1216
- data/README.md +6 -5
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +4 -2
data/lib/polars/utils.rb
CHANGED
@@ -23,52 +23,98 @@ module Polars
|
|
23
23
|
Polars.col(name)
|
24
24
|
end
|
25
25
|
|
26
|
+
def self.arrlen(obj)
|
27
|
+
if obj.is_a?(Range)
|
28
|
+
# size only works for numeric ranges
|
29
|
+
obj.to_a.length
|
30
|
+
elsif obj.is_a?(String)
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
obj.length
|
34
|
+
end
|
35
|
+
rescue
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
26
39
|
def self._timedelta_to_pl_duration(td)
|
27
40
|
td
|
28
41
|
end
|
29
42
|
|
30
|
-
def self._datetime_to_pl_timestamp(dt,
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
elsif
|
36
|
-
|
37
|
-
|
38
|
-
|
43
|
+
def self._datetime_to_pl_timestamp(dt, time_unit)
|
44
|
+
dt = dt.to_datetime.to_time
|
45
|
+
if time_unit == "ns"
|
46
|
+
nanos = dt.nsec
|
47
|
+
dt.to_i * 1_000_000_000 + nanos
|
48
|
+
elsif time_unit == "us"
|
49
|
+
micros = dt.usec
|
50
|
+
dt.to_i * 1_000_000 + micros
|
51
|
+
elsif time_unit == "ms"
|
52
|
+
millis = dt.usec / 1000
|
53
|
+
dt.to_i * 1_000 + millis
|
54
|
+
elsif time_unit.nil?
|
55
|
+
# Ruby has ns precision
|
56
|
+
nanos = dt.nsec
|
57
|
+
dt.to_i * 1_000_000_000 + nanos
|
58
|
+
else
|
59
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def self._date_to_pl_date(d)
|
64
|
+
dt = d.to_datetime.to_time
|
65
|
+
dt.to_i / (3600 * 24)
|
66
|
+
end
|
67
|
+
|
68
|
+
def self._to_ruby_time(value)
|
69
|
+
if value == 0
|
70
|
+
::Time.utc(2000, 1, 1)
|
39
71
|
else
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
72
|
+
seconds, nanoseconds = value.divmod(1_000_000_000)
|
73
|
+
minutes, seconds = seconds.divmod(60)
|
74
|
+
hours, minutes = minutes.divmod(60)
|
75
|
+
::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def self._to_ruby_duration(value, time_unit = "ns")
|
80
|
+
if time_unit == "ns"
|
81
|
+
value / 1e9
|
82
|
+
elsif time_unit == "us"
|
83
|
+
value / 1e6
|
84
|
+
elsif time_unit == "ms"
|
85
|
+
value / 1e3
|
86
|
+
else
|
87
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def self._to_ruby_date(value)
|
92
|
+
# days to seconds
|
93
|
+
# important to create from utc. Not doing this leads
|
94
|
+
# to inconsistencies dependent on the timezone you are in.
|
95
|
+
::Time.at(value * 86400).utc.to_date
|
96
|
+
end
|
97
|
+
|
98
|
+
def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
|
99
|
+
if time_zone.nil? || time_zone == ""
|
100
|
+
if time_unit == "ns"
|
101
|
+
return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
|
102
|
+
elsif time_unit == "us"
|
103
|
+
return ::Time.at(value / 1000000, value % 1000000, :usec).utc
|
104
|
+
elsif time_unit == "ms"
|
105
|
+
return ::Time.at(value / 1000, value % 1000, :millisecond).utc
|
62
106
|
else
|
63
|
-
raise
|
107
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
64
108
|
end
|
65
|
-
|
66
|
-
dt
|
67
109
|
else
|
68
|
-
raise
|
110
|
+
raise Todo
|
69
111
|
end
|
70
112
|
end
|
71
113
|
|
114
|
+
def self._to_ruby_decimal(digits, scale)
|
115
|
+
BigDecimal("#{digits}e#{scale}")
|
116
|
+
end
|
117
|
+
|
72
118
|
def self.selection_to_rbexpr_list(exprs)
|
73
119
|
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
74
120
|
exprs = [exprs]
|
@@ -102,19 +148,34 @@ module Polars
|
|
102
148
|
end
|
103
149
|
|
104
150
|
# TODO fix
|
105
|
-
def self.is_polars_dtype(data_type)
|
151
|
+
def self.is_polars_dtype(data_type, include_unknown: false)
|
152
|
+
if data_type == Unknown
|
153
|
+
return include_unknown
|
154
|
+
end
|
106
155
|
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
107
156
|
end
|
108
157
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
158
|
+
def self.map_rb_type_to_dtype(ruby_dtype)
|
159
|
+
if ruby_dtype == Float
|
160
|
+
Float64
|
161
|
+
elsif ruby_dtype == Integer
|
162
|
+
Int64
|
163
|
+
elsif ruby_dtype == String
|
164
|
+
Utf8
|
165
|
+
elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
|
166
|
+
Boolean
|
167
|
+
elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
|
168
|
+
Datetime.new("ns")
|
169
|
+
elsif ruby_dtype == ::Date
|
170
|
+
Date
|
171
|
+
elsif ruby_dtype == ::Array
|
172
|
+
List
|
173
|
+
elsif ruby_dtype == NilClass
|
174
|
+
Null
|
175
|
+
else
|
176
|
+
raise TypeError, "Invalid type"
|
177
|
+
end
|
178
|
+
end
|
118
179
|
|
119
180
|
# TODO fix
|
120
181
|
def self.rb_type_to_dtype(data_type)
|
@@ -124,8 +185,8 @@ module Polars
|
|
124
185
|
end
|
125
186
|
|
126
187
|
begin
|
127
|
-
|
128
|
-
rescue
|
188
|
+
map_rb_type_to_dtype(data_type)
|
189
|
+
rescue TypeError
|
129
190
|
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
130
191
|
end
|
131
192
|
end
|
@@ -178,7 +239,7 @@ module Polars
|
|
178
239
|
end
|
179
240
|
|
180
241
|
def self.bool?(value)
|
181
|
-
value
|
242
|
+
value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
182
243
|
end
|
183
244
|
|
184
245
|
def self.strlike?(value)
|
@@ -194,35 +255,58 @@ module Polars
|
|
194
255
|
end
|
195
256
|
|
196
257
|
def self.is_bool_sequence(val)
|
197
|
-
val.is_a?(Array) && val.all? { |x| x == true || x == false }
|
258
|
+
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
198
259
|
end
|
199
260
|
|
200
261
|
def self.is_dtype_sequence(val)
|
201
|
-
val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
|
262
|
+
val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
|
202
263
|
end
|
203
264
|
|
204
265
|
def self.is_int_sequence(val)
|
205
|
-
val.is_a?(Array) && _is_iterable_of(val, Integer)
|
266
|
+
val.is_a?(::Array) && _is_iterable_of(val, Integer)
|
206
267
|
end
|
207
268
|
|
208
269
|
def self.is_expr_sequence(val)
|
209
|
-
val.is_a?(Array) && _is_iterable_of(val, Expr)
|
270
|
+
val.is_a?(::Array) && _is_iterable_of(val, Expr)
|
210
271
|
end
|
211
272
|
|
212
273
|
def self.is_rbexpr_sequence(val)
|
213
|
-
val.is_a?(Array) && _is_iterable_of(val, RbExpr)
|
274
|
+
val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
|
214
275
|
end
|
215
276
|
|
216
277
|
def self.is_str_sequence(val, allow_str: false)
|
217
278
|
if allow_str == false && val.is_a?(String)
|
218
279
|
false
|
219
280
|
else
|
220
|
-
val.is_a?(Array) && _is_iterable_of(val, String)
|
281
|
+
val.is_a?(::Array) && _is_iterable_of(val, String)
|
221
282
|
end
|
222
283
|
end
|
223
284
|
|
224
285
|
def self.local_file?(file)
|
225
286
|
Dir.glob(file).any?
|
226
287
|
end
|
288
|
+
|
289
|
+
def self.parse_as_expression(input, str_as_lit: false, structify: false)
|
290
|
+
if input.is_a?(Expr)
|
291
|
+
expr = input
|
292
|
+
elsif input.is_a?(String) && !str_as_lit
|
293
|
+
expr = Polars.col(input)
|
294
|
+
structify = false
|
295
|
+
elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
|
296
|
+
expr = Polars.lit(input)
|
297
|
+
structify = false
|
298
|
+
elsif input.is_a?(Array)
|
299
|
+
expr = Polars.lit(Polars::Series.new("", [input]))
|
300
|
+
structify = false
|
301
|
+
else
|
302
|
+
raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
|
303
|
+
end
|
304
|
+
|
305
|
+
if structify
|
306
|
+
raise Todo
|
307
|
+
end
|
308
|
+
|
309
|
+
expr._rbexpr
|
310
|
+
end
|
227
311
|
end
|
228
312
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
1
|
# ext
|
2
2
|
begin
|
3
|
-
|
3
|
+
require "polars/#{RUBY_VERSION.to_f}/polars"
|
4
4
|
rescue LoadError
|
5
|
-
|
5
|
+
require "polars/polars"
|
6
6
|
end
|
7
7
|
|
8
8
|
# stdlib
|
9
|
+
require "bigdecimal"
|
9
10
|
require "date"
|
10
11
|
require "stringio"
|
11
12
|
|
12
13
|
# modules
|
13
14
|
require_relative "polars/expr_dispatch"
|
15
|
+
require_relative "polars/array_expr"
|
16
|
+
require_relative "polars/array_name_space"
|
14
17
|
require_relative "polars/batched_csv_reader"
|
15
18
|
require_relative "polars/binary_expr"
|
16
19
|
require_relative "polars/binary_name_space"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -28,6 +28,8 @@ files:
|
|
28
28
|
- lib/polars/3.0/polars.so
|
29
29
|
- lib/polars/3.1/polars.so
|
30
30
|
- lib/polars/3.2/polars.so
|
31
|
+
- lib/polars/array_expr.rb
|
32
|
+
- lib/polars/array_name_space.rb
|
31
33
|
- lib/polars/batched_csv_reader.rb
|
32
34
|
- lib/polars/binary_expr.rb
|
33
35
|
- lib/polars/binary_name_space.rb
|