polars-df 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/lib/polars/utils.rb
CHANGED
@@ -3,150 +3,6 @@ module Polars
|
|
3
3
|
module Utils
|
4
4
|
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
5
5
|
|
6
|
-
def self.wrap_s(s)
|
7
|
-
Series._from_rbseries(s)
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.wrap_df(df)
|
11
|
-
DataFrame._from_rbdf(df)
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.wrap_ldf(ldf)
|
15
|
-
LazyFrame._from_rbldf(ldf)
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.wrap_expr(rbexpr)
|
19
|
-
Expr._from_rbexpr(rbexpr)
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.col(name)
|
23
|
-
Polars.col(name)
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.arrlen(obj)
|
27
|
-
if obj.is_a?(Range)
|
28
|
-
# size only works for numeric ranges
|
29
|
-
obj.to_a.length
|
30
|
-
elsif obj.is_a?(::String)
|
31
|
-
nil
|
32
|
-
else
|
33
|
-
obj.length
|
34
|
-
end
|
35
|
-
rescue
|
36
|
-
nil
|
37
|
-
end
|
38
|
-
|
39
|
-
def self._timedelta_to_pl_duration(td)
|
40
|
-
td
|
41
|
-
end
|
42
|
-
|
43
|
-
def self._datetime_to_pl_timestamp(dt, time_unit)
|
44
|
-
dt = dt.to_datetime.to_time
|
45
|
-
if time_unit == "ns"
|
46
|
-
nanos = dt.nsec
|
47
|
-
dt.to_i * 1_000_000_000 + nanos
|
48
|
-
elsif time_unit == "us"
|
49
|
-
micros = dt.usec
|
50
|
-
dt.to_i * 1_000_000 + micros
|
51
|
-
elsif time_unit == "ms"
|
52
|
-
millis = dt.usec / 1000
|
53
|
-
dt.to_i * 1_000 + millis
|
54
|
-
elsif time_unit.nil?
|
55
|
-
# Ruby has ns precision
|
56
|
-
nanos = dt.nsec
|
57
|
-
dt.to_i * 1_000_000_000 + nanos
|
58
|
-
else
|
59
|
-
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def self._date_to_pl_date(d)
|
64
|
-
dt = d.to_datetime.to_time
|
65
|
-
dt.to_i / (3600 * 24)
|
66
|
-
end
|
67
|
-
|
68
|
-
def self._to_ruby_time(value)
|
69
|
-
if value == 0
|
70
|
-
::Time.utc(2000, 1, 1)
|
71
|
-
else
|
72
|
-
seconds, nanoseconds = value.divmod(1_000_000_000)
|
73
|
-
minutes, seconds = seconds.divmod(60)
|
74
|
-
hours, minutes = minutes.divmod(60)
|
75
|
-
::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def self._to_ruby_duration(value, time_unit = "ns")
|
80
|
-
if time_unit == "ns"
|
81
|
-
value / 1e9
|
82
|
-
elsif time_unit == "us"
|
83
|
-
value / 1e6
|
84
|
-
elsif time_unit == "ms"
|
85
|
-
value / 1e3
|
86
|
-
else
|
87
|
-
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
def self._to_ruby_date(value)
|
92
|
-
# days to seconds
|
93
|
-
# important to create from utc. Not doing this leads
|
94
|
-
# to inconsistencies dependent on the timezone you are in.
|
95
|
-
::Time.at(value * 86400).utc.to_date
|
96
|
-
end
|
97
|
-
|
98
|
-
def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
|
99
|
-
if time_zone.nil? || time_zone == ""
|
100
|
-
if time_unit == "ns"
|
101
|
-
return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
|
102
|
-
elsif time_unit == "us"
|
103
|
-
return ::Time.at(value / 1000000, value % 1000000, :usec).utc
|
104
|
-
elsif time_unit == "ms"
|
105
|
-
return ::Time.at(value / 1000, value % 1000, :millisecond).utc
|
106
|
-
else
|
107
|
-
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
108
|
-
end
|
109
|
-
else
|
110
|
-
raise Todo
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
def self._to_ruby_decimal(digits, scale)
|
115
|
-
BigDecimal("#{digits}e#{scale}")
|
116
|
-
end
|
117
|
-
|
118
|
-
def self.selection_to_rbexpr_list(exprs)
|
119
|
-
if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
120
|
-
exprs = [exprs]
|
121
|
-
end
|
122
|
-
|
123
|
-
exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
|
124
|
-
end
|
125
|
-
|
126
|
-
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
127
|
-
if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
|
128
|
-
col(expr)
|
129
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
130
|
-
lit(expr)
|
131
|
-
elsif expr.is_a?(Expr)
|
132
|
-
expr
|
133
|
-
else
|
134
|
-
raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
def self.lit(value)
|
139
|
-
Polars.lit(value)
|
140
|
-
end
|
141
|
-
|
142
|
-
def self.normalize_filepath(path, check_not_directory: true)
|
143
|
-
path = File.expand_path(path)
|
144
|
-
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
145
|
-
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
146
|
-
end
|
147
|
-
path
|
148
|
-
end
|
149
|
-
|
150
6
|
# TODO fix
|
151
7
|
def self.is_polars_dtype(data_type, include_unknown: false)
|
152
8
|
if data_type == Unknown
|
@@ -191,19 +47,11 @@ module Polars
|
|
191
47
|
end
|
192
48
|
end
|
193
49
|
|
194
|
-
def self.
|
195
|
-
if
|
196
|
-
null_values.to_a
|
197
|
-
else
|
198
|
-
null_values
|
199
|
-
end
|
200
|
-
end
|
201
|
-
|
202
|
-
def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
|
203
|
-
if !row_count_name.nil?
|
204
|
-
[row_count_name, row_count_offset]
|
205
|
-
else
|
50
|
+
def self.parse_row_index_args(row_index_name = nil, row_index_offset = 0)
|
51
|
+
if row_index_name.nil?
|
206
52
|
nil
|
53
|
+
else
|
54
|
+
[row_index_name, row_index_offset]
|
207
55
|
end
|
208
56
|
end
|
209
57
|
|
@@ -223,21 +71,6 @@ module Polars
|
|
223
71
|
[projection, columns]
|
224
72
|
end
|
225
73
|
|
226
|
-
def self.scale_bytes(sz, to:)
|
227
|
-
scaling_factor = {
|
228
|
-
"b" => 1,
|
229
|
-
"k" => 1024,
|
230
|
-
"m" => 1024 ** 2,
|
231
|
-
"g" => 1024 ** 3,
|
232
|
-
"t" => 1024 ** 4
|
233
|
-
}[to[0]]
|
234
|
-
if scaling_factor > 1
|
235
|
-
sz / scaling_factor.to_f
|
236
|
-
else
|
237
|
-
sz
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
74
|
def self.bool?(value)
|
242
75
|
value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
243
76
|
end
|
@@ -250,108 +83,10 @@ module Polars
|
|
250
83
|
value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
|
251
84
|
end
|
252
85
|
|
253
|
-
def self._is_iterable_of(val, eltype)
|
254
|
-
val.all? { |x| x.is_a?(eltype) }
|
255
|
-
end
|
256
|
-
|
257
|
-
def self.is_bool_sequence(val)
|
258
|
-
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
259
|
-
end
|
260
|
-
|
261
|
-
def self.is_dtype_sequence(val)
|
262
|
-
val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
|
263
|
-
end
|
264
|
-
|
265
|
-
def self.is_int_sequence(val)
|
266
|
-
val.is_a?(::Array) && _is_iterable_of(val, Integer)
|
267
|
-
end
|
268
|
-
|
269
|
-
def self.is_expr_sequence(val)
|
270
|
-
val.is_a?(::Array) && _is_iterable_of(val, Expr)
|
271
|
-
end
|
272
|
-
|
273
|
-
def self.is_rbexpr_sequence(val)
|
274
|
-
val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
|
275
|
-
end
|
276
|
-
|
277
|
-
def self.is_str_sequence(val, allow_str: false)
|
278
|
-
if allow_str == false && val.is_a?(::String)
|
279
|
-
false
|
280
|
-
else
|
281
|
-
val.is_a?(::Array) && _is_iterable_of(val, ::String)
|
282
|
-
end
|
283
|
-
end
|
284
|
-
|
285
86
|
def self.local_file?(file)
|
286
87
|
Dir.glob(file).any?
|
287
88
|
end
|
288
89
|
|
289
|
-
def self.parse_as_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
290
|
-
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
291
|
-
if named_inputs.any?
|
292
|
-
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
293
|
-
exprs.concat(named_exprs)
|
294
|
-
end
|
295
|
-
|
296
|
-
exprs
|
297
|
-
end
|
298
|
-
|
299
|
-
def self._parse_positional_inputs(inputs, structify: false)
|
300
|
-
inputs_iter = _parse_inputs_as_iterable(inputs)
|
301
|
-
inputs_iter.map { |e| parse_as_expression(e, structify: structify) }
|
302
|
-
end
|
303
|
-
|
304
|
-
def self._parse_inputs_as_iterable(inputs)
|
305
|
-
if inputs.empty?
|
306
|
-
return []
|
307
|
-
end
|
308
|
-
|
309
|
-
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
310
|
-
return inputs[0]
|
311
|
-
end
|
312
|
-
|
313
|
-
inputs
|
314
|
-
end
|
315
|
-
|
316
|
-
def self._parse_named_inputs(named_inputs, structify: false)
|
317
|
-
named_inputs.map do |name, input|
|
318
|
-
parse_as_expression(input, structify: structify)._alias(name.to_s)
|
319
|
-
end
|
320
|
-
end
|
321
|
-
|
322
|
-
def self.parse_as_expression(input, str_as_lit: false, list_as_lit: true, structify: false, dtype: nil)
|
323
|
-
if input.is_a?(Expr)
|
324
|
-
expr = input
|
325
|
-
elsif input.is_a?(::String) && !str_as_lit
|
326
|
-
expr = Polars.col(input)
|
327
|
-
structify = false
|
328
|
-
elsif input.is_a?(::Array) && !list_as_lit
|
329
|
-
expr = Polars.lit(Series.new(input), dtype: dtype)
|
330
|
-
structify = false
|
331
|
-
else
|
332
|
-
expr = Polars.lit(input, dtype: dtype)
|
333
|
-
structify = false
|
334
|
-
end
|
335
|
-
|
336
|
-
if structify
|
337
|
-
raise Todo
|
338
|
-
end
|
339
|
-
|
340
|
-
expr._rbexpr
|
341
|
-
end
|
342
|
-
|
343
|
-
USE_EARLIEST_TO_AMBIGUOUS = {
|
344
|
-
true => "earliest",
|
345
|
-
false => "latest"
|
346
|
-
}
|
347
|
-
|
348
|
-
def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
349
|
-
unless use_earliest.nil?
|
350
|
-
ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
|
351
|
-
end
|
352
|
-
ambiguous
|
353
|
-
end
|
354
|
-
|
355
90
|
def self._check_arg_is_1byte(arg_name, arg, can_be_empty = false)
|
356
91
|
if arg.is_a?(::String)
|
357
92
|
arg_byte_length = arg.bytesize
|
@@ -385,72 +120,11 @@ module Polars
|
|
385
120
|
false
|
386
121
|
end
|
387
122
|
|
388
|
-
def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
|
389
|
-
all_predicates = _parse_positional_inputs(predicates)
|
390
|
-
|
391
|
-
if constraints.any?
|
392
|
-
constraint_predicates = _parse_constraints(constraints)
|
393
|
-
all_predicates.concat(constraint_predicates)
|
394
|
-
end
|
395
|
-
|
396
|
-
_combine_predicates(all_predicates)
|
397
|
-
end
|
398
|
-
|
399
|
-
def self._parse_constraints(constraints)
|
400
|
-
constraints.map do |name, value|
|
401
|
-
Polars.col(name).eq(value)._rbexpr
|
402
|
-
end
|
403
|
-
end
|
404
|
-
|
405
|
-
def self._combine_predicates(predicates)
|
406
|
-
if !predicates.any?
|
407
|
-
msg = "at least one predicate or constraint must be provided"
|
408
|
-
raise TypeError, msg
|
409
|
-
end
|
410
|
-
|
411
|
-
if predicates.length == 1
|
412
|
-
return predicates[0]
|
413
|
-
end
|
414
|
-
|
415
|
-
Plr.all_horizontal(predicates)
|
416
|
-
end
|
417
|
-
|
418
|
-
def self.parse_when_inputs(*predicates, **constraints)
|
419
|
-
parse_predicates_constraints_as_expression(*predicates, **constraints)
|
420
|
-
end
|
421
|
-
|
422
123
|
def self.parse_interval_argument(interval)
|
423
124
|
if interval.include?(" ")
|
424
125
|
interval = interval.gsub(" ", "")
|
425
126
|
end
|
426
127
|
interval.downcase
|
427
128
|
end
|
428
|
-
|
429
|
-
def self.validate_rolling_by_aggs_arguments(weights, center:)
|
430
|
-
if !weights.nil?
|
431
|
-
msg = "`weights` is not supported in `rolling_*(..., by=...)` expression"
|
432
|
-
raise InvalidOperationError, msg
|
433
|
-
end
|
434
|
-
if center
|
435
|
-
msg = "`center=True` is not supported in `rolling_*(..., by=...)` expression"
|
436
|
-
raise InvalidOperationError, msg
|
437
|
-
end
|
438
|
-
end
|
439
|
-
|
440
|
-
def self.validate_rolling_aggs_arguments(window_size, closed)
|
441
|
-
if window_size.is_a?(::String)
|
442
|
-
begin
|
443
|
-
window_size = window_size.delete_suffix("i").to_i
|
444
|
-
rescue
|
445
|
-
msg = "Expected a string of the form 'ni', where `n` is a positive integer, got: #{window_size}"
|
446
|
-
raise InvalidOperationError, msg
|
447
|
-
end
|
448
|
-
end
|
449
|
-
if !closed.nil?
|
450
|
-
msg = "`closed` is not supported in `rolling_*(...)` expression"
|
451
|
-
raise InvalidOperationError, msg
|
452
|
-
end
|
453
|
-
window_size
|
454
|
-
end
|
455
129
|
end
|
456
130
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars/whenthen.rb
CHANGED
@@ -8,7 +8,7 @@ module Polars
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def then(statement)
|
11
|
-
statement_rbexpr = Utils.
|
11
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
12
12
|
Then.new(_when.then(statement_rbexpr))
|
13
13
|
end
|
14
14
|
end
|
@@ -30,12 +30,12 @@ module Polars
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def when(*predicates, **constraints)
|
33
|
-
condition_rbexpr = Utils.
|
33
|
+
condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
34
34
|
ChainedWhen.new(_then.when(condition_rbexpr))
|
35
35
|
end
|
36
36
|
|
37
37
|
def otherwise(statement)
|
38
|
-
statement_rbexpr = Utils.
|
38
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
39
39
|
Utils.wrap_expr(_then.otherwise(statement_rbexpr))
|
40
40
|
end
|
41
41
|
end
|
@@ -49,7 +49,7 @@ module Polars
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def then(statement)
|
52
|
-
statement_rbexpr = Utils.
|
52
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
53
53
|
ChainedThen.new(_chained_when.then(statement_rbexpr))
|
54
54
|
end
|
55
55
|
end
|
@@ -71,12 +71,12 @@ module Polars
|
|
71
71
|
end
|
72
72
|
|
73
73
|
def when(*predicates, **constraints)
|
74
|
-
condition_rbexpr = Utils.
|
74
|
+
condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
75
75
|
ChainedWhen.new(_chained_then.when(condition_rbexpr))
|
76
76
|
end
|
77
77
|
|
78
78
|
def otherwise(statement)
|
79
|
-
statement_rbexpr = Utils.
|
79
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
80
80
|
Utils.wrap_expr(_chained_then.otherwise(statement_rbexpr))
|
81
81
|
end
|
82
82
|
end
|
data/lib/polars.rb
CHANGED
@@ -70,6 +70,11 @@ require_relative "polars/struct_expr"
|
|
70
70
|
require_relative "polars/struct_name_space"
|
71
71
|
require_relative "polars/testing"
|
72
72
|
require_relative "polars/utils"
|
73
|
+
require_relative "polars/utils/constants"
|
74
|
+
require_relative "polars/utils/convert"
|
75
|
+
require_relative "polars/utils/parse"
|
76
|
+
require_relative "polars/utils/various"
|
77
|
+
require_relative "polars/utils/wrap"
|
73
78
|
require_relative "polars/version"
|
74
79
|
require_relative "polars/whenthen"
|
75
80
|
|
@@ -77,4 +82,10 @@ module Polars
|
|
77
82
|
extend Convert
|
78
83
|
extend Functions
|
79
84
|
extend IO
|
85
|
+
|
86
|
+
# @private
|
87
|
+
F = self
|
88
|
+
|
89
|
+
# @private
|
90
|
+
N_INFER_DEFAULT = 100
|
80
91
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- ext/polars/Cargo.toml
|
55
55
|
- ext/polars/extconf.rb
|
56
56
|
- ext/polars/src/batched_csv.rs
|
57
|
-
- ext/polars/src/conversion/
|
57
|
+
- ext/polars/src/conversion/any_value.rs
|
58
58
|
- ext/polars/src/conversion/chunked_array.rs
|
59
59
|
- ext/polars/src/conversion/mod.rs
|
60
60
|
- ext/polars/src/dataframe/construction.rs
|
@@ -172,6 +172,11 @@ files:
|
|
172
172
|
- lib/polars/struct_name_space.rb
|
173
173
|
- lib/polars/testing.rb
|
174
174
|
- lib/polars/utils.rb
|
175
|
+
- lib/polars/utils/constants.rb
|
176
|
+
- lib/polars/utils/convert.rb
|
177
|
+
- lib/polars/utils/parse.rb
|
178
|
+
- lib/polars/utils/various.rb
|
179
|
+
- lib/polars/utils/wrap.rb
|
175
180
|
- lib/polars/version.rb
|
176
181
|
- lib/polars/whenthen.rb
|
177
182
|
homepage: https://github.com/ankane/ruby-polars
|
@@ -193,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
198
|
- !ruby/object:Gem::Version
|
194
199
|
version: '0'
|
195
200
|
requirements: []
|
196
|
-
rubygems_version: 3.5.
|
201
|
+
rubygems_version: 3.5.11
|
197
202
|
signing_key:
|
198
203
|
specification_version: 4
|
199
204
|
summary: Blazingly fast DataFrames for Ruby
|
@@ -1,186 +0,0 @@
|
|
1
|
-
use magnus::encoding::{EncodingCapable, Index};
|
2
|
-
use magnus::{
|
3
|
-
class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby,
|
4
|
-
TryConvert, Value,
|
5
|
-
};
|
6
|
-
use polars::prelude::*;
|
7
|
-
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
8
|
-
|
9
|
-
use super::{struct_dict, ObjectValue, Wrap};
|
10
|
-
|
11
|
-
use crate::rb_modules::utils;
|
12
|
-
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
-
|
14
|
-
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
-
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
-
match self.0 {
|
17
|
-
AnyValue::UInt8(v) => ruby.into_value(v),
|
18
|
-
AnyValue::UInt16(v) => ruby.into_value(v),
|
19
|
-
AnyValue::UInt32(v) => ruby.into_value(v),
|
20
|
-
AnyValue::UInt64(v) => ruby.into_value(v),
|
21
|
-
AnyValue::Int8(v) => ruby.into_value(v),
|
22
|
-
AnyValue::Int16(v) => ruby.into_value(v),
|
23
|
-
AnyValue::Int32(v) => ruby.into_value(v),
|
24
|
-
AnyValue::Int64(v) => ruby.into_value(v),
|
25
|
-
AnyValue::Float32(v) => ruby.into_value(v),
|
26
|
-
AnyValue::Float64(v) => ruby.into_value(v),
|
27
|
-
AnyValue::Null => ruby.qnil().as_value(),
|
28
|
-
AnyValue::Boolean(v) => ruby.into_value(v),
|
29
|
-
AnyValue::String(v) => ruby.into_value(v),
|
30
|
-
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
31
|
-
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
32
|
-
let s = if arr.is_null() {
|
33
|
-
rev.get(idx)
|
34
|
-
} else {
|
35
|
-
unsafe { arr.deref_unchecked().value(idx as usize) }
|
36
|
-
};
|
37
|
-
s.into_value()
|
38
|
-
}
|
39
|
-
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
40
|
-
AnyValue::Datetime(v, time_unit, time_zone) => {
|
41
|
-
let time_unit = time_unit.to_ascii();
|
42
|
-
utils()
|
43
|
-
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
44
|
-
.unwrap()
|
45
|
-
}
|
46
|
-
AnyValue::Duration(v, time_unit) => {
|
47
|
-
let time_unit = time_unit.to_ascii();
|
48
|
-
utils()
|
49
|
-
.funcall("_to_ruby_duration", (v, time_unit))
|
50
|
-
.unwrap()
|
51
|
-
}
|
52
|
-
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
53
|
-
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
54
|
-
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
55
|
-
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
56
|
-
AnyValue::Object(v) => {
|
57
|
-
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
58
|
-
object.to_object()
|
59
|
-
}
|
60
|
-
AnyValue::ObjectOwned(v) => {
|
61
|
-
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
62
|
-
object.to_object()
|
63
|
-
}
|
64
|
-
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
65
|
-
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
66
|
-
AnyValue::Decimal(v, scale) => utils()
|
67
|
-
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
68
|
-
.unwrap(),
|
69
|
-
}
|
70
|
-
}
|
71
|
-
}
|
72
|
-
|
73
|
-
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
74
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
75
|
-
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
76
|
-
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
77
|
-
} else if let Some(v) = Integer::from_value(ob) {
|
78
|
-
Ok(AnyValue::Int64(v.to_i64()?).into())
|
79
|
-
} else if let Some(v) = Float::from_value(ob) {
|
80
|
-
Ok(AnyValue::Float64(v.to_f64()).into())
|
81
|
-
} else if let Some(v) = RString::from_value(ob) {
|
82
|
-
if v.enc_get() == Index::utf8() {
|
83
|
-
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
84
|
-
} else {
|
85
|
-
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
86
|
-
}
|
87
|
-
// call is_a? for ActiveSupport::TimeWithZone
|
88
|
-
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
89
|
-
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
90
|
-
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
91
|
-
let v = sec * 1_000_000_000 + nsec;
|
92
|
-
// TODO support time zone when possible
|
93
|
-
// https://github.com/pola-rs/polars/issues/9103
|
94
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
95
|
-
} else if ob.is_nil() {
|
96
|
-
Ok(AnyValue::Null.into())
|
97
|
-
} else if let Some(dict) = RHash::from_value(ob) {
|
98
|
-
let len = dict.len();
|
99
|
-
let mut keys = Vec::with_capacity(len);
|
100
|
-
let mut vals = Vec::with_capacity(len);
|
101
|
-
dict.foreach(|k: Value, v: Value| {
|
102
|
-
let key = String::try_convert(k)?;
|
103
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
104
|
-
let dtype = DataType::from(&val);
|
105
|
-
keys.push(Field::new(&key, dtype));
|
106
|
-
vals.push(val);
|
107
|
-
Ok(ForEach::Continue)
|
108
|
-
})?;
|
109
|
-
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
110
|
-
} else if let Some(v) = RArray::from_value(ob) {
|
111
|
-
if v.is_empty() {
|
112
|
-
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
113
|
-
} else {
|
114
|
-
let list = v;
|
115
|
-
|
116
|
-
let mut avs = Vec::with_capacity(25);
|
117
|
-
let mut iter = list.each();
|
118
|
-
|
119
|
-
for item in (&mut iter).take(25) {
|
120
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
121
|
-
}
|
122
|
-
|
123
|
-
let (dtype, _n_types) =
|
124
|
-
any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
|
125
|
-
|
126
|
-
// push the rest
|
127
|
-
avs.reserve(list.len());
|
128
|
-
for item in iter {
|
129
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
130
|
-
}
|
131
|
-
|
132
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
133
|
-
.map_err(RbPolarsErr::from)?;
|
134
|
-
Ok(Wrap(AnyValue::List(s)))
|
135
|
-
}
|
136
|
-
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
137
|
-
let sec: i64 = ob.funcall("to_i", ())?;
|
138
|
-
let nsec: i64 = ob.funcall("nsec", ())?;
|
139
|
-
Ok(Wrap(AnyValue::Datetime(
|
140
|
-
sec * 1_000_000_000 + nsec,
|
141
|
-
TimeUnit::Nanoseconds,
|
142
|
-
&None,
|
143
|
-
)))
|
144
|
-
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
145
|
-
// convert to DateTime for UTC
|
146
|
-
let v = ob
|
147
|
-
.funcall::<_, _, Value>("to_datetime", ())?
|
148
|
-
.funcall::<_, _, Value>("to_time", ())?
|
149
|
-
.funcall::<_, _, i64>("to_i", ())?;
|
150
|
-
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
151
|
-
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
152
|
-
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
153
|
-
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
154
|
-
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
155
|
-
})?;
|
156
|
-
if sign < 0 {
|
157
|
-
// TODO better error
|
158
|
-
v = v.checked_neg().unwrap();
|
159
|
-
}
|
160
|
-
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
161
|
-
} else {
|
162
|
-
Err(RbPolarsErr::other(format!(
|
163
|
-
"object type not supported {:?}",
|
164
|
-
ob
|
165
|
-
)))
|
166
|
-
}
|
167
|
-
}
|
168
|
-
}
|
169
|
-
|
170
|
-
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
171
|
-
let exp = exp - (digits.len() as i32);
|
172
|
-
match digits.parse::<i128>() {
|
173
|
-
Ok(mut v) => {
|
174
|
-
let scale = if exp > 0 {
|
175
|
-
v = 10_i128
|
176
|
-
.checked_pow(exp as u32)
|
177
|
-
.and_then(|factor| v.checked_mul(factor))?;
|
178
|
-
0
|
179
|
-
} else {
|
180
|
-
(-exp) as usize
|
181
|
-
};
|
182
|
-
Some((v, scale))
|
183
|
-
}
|
184
|
-
Err(_) => None,
|
185
|
-
}
|
186
|
-
}
|