polars-df 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/lib/polars/utils.rb
CHANGED
@@ -3,150 +3,6 @@ module Polars
|
|
3
3
|
module Utils
|
4
4
|
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
5
5
|
|
6
|
-
def self.wrap_s(s)
|
7
|
-
Series._from_rbseries(s)
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.wrap_df(df)
|
11
|
-
DataFrame._from_rbdf(df)
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.wrap_ldf(ldf)
|
15
|
-
LazyFrame._from_rbldf(ldf)
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.wrap_expr(rbexpr)
|
19
|
-
Expr._from_rbexpr(rbexpr)
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.col(name)
|
23
|
-
Polars.col(name)
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.arrlen(obj)
|
27
|
-
if obj.is_a?(Range)
|
28
|
-
# size only works for numeric ranges
|
29
|
-
obj.to_a.length
|
30
|
-
elsif obj.is_a?(::String)
|
31
|
-
nil
|
32
|
-
else
|
33
|
-
obj.length
|
34
|
-
end
|
35
|
-
rescue
|
36
|
-
nil
|
37
|
-
end
|
38
|
-
|
39
|
-
def self._timedelta_to_pl_duration(td)
|
40
|
-
td
|
41
|
-
end
|
42
|
-
|
43
|
-
def self._datetime_to_pl_timestamp(dt, time_unit)
|
44
|
-
dt = dt.to_datetime.to_time
|
45
|
-
if time_unit == "ns"
|
46
|
-
nanos = dt.nsec
|
47
|
-
dt.to_i * 1_000_000_000 + nanos
|
48
|
-
elsif time_unit == "us"
|
49
|
-
micros = dt.usec
|
50
|
-
dt.to_i * 1_000_000 + micros
|
51
|
-
elsif time_unit == "ms"
|
52
|
-
millis = dt.usec / 1000
|
53
|
-
dt.to_i * 1_000 + millis
|
54
|
-
elsif time_unit.nil?
|
55
|
-
# Ruby has ns precision
|
56
|
-
nanos = dt.nsec
|
57
|
-
dt.to_i * 1_000_000_000 + nanos
|
58
|
-
else
|
59
|
-
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def self._date_to_pl_date(d)
|
64
|
-
dt = d.to_datetime.to_time
|
65
|
-
dt.to_i / (3600 * 24)
|
66
|
-
end
|
67
|
-
|
68
|
-
def self._to_ruby_time(value)
|
69
|
-
if value == 0
|
70
|
-
::Time.utc(2000, 1, 1)
|
71
|
-
else
|
72
|
-
seconds, nanoseconds = value.divmod(1_000_000_000)
|
73
|
-
minutes, seconds = seconds.divmod(60)
|
74
|
-
hours, minutes = minutes.divmod(60)
|
75
|
-
::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def self._to_ruby_duration(value, time_unit = "ns")
|
80
|
-
if time_unit == "ns"
|
81
|
-
value / 1e9
|
82
|
-
elsif time_unit == "us"
|
83
|
-
value / 1e6
|
84
|
-
elsif time_unit == "ms"
|
85
|
-
value / 1e3
|
86
|
-
else
|
87
|
-
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
def self._to_ruby_date(value)
|
92
|
-
# days to seconds
|
93
|
-
# important to create from utc. Not doing this leads
|
94
|
-
# to inconsistencies dependent on the timezone you are in.
|
95
|
-
::Time.at(value * 86400).utc.to_date
|
96
|
-
end
|
97
|
-
|
98
|
-
def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
|
99
|
-
if time_zone.nil? || time_zone == ""
|
100
|
-
if time_unit == "ns"
|
101
|
-
return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
|
102
|
-
elsif time_unit == "us"
|
103
|
-
return ::Time.at(value / 1000000, value % 1000000, :usec).utc
|
104
|
-
elsif time_unit == "ms"
|
105
|
-
return ::Time.at(value / 1000, value % 1000, :millisecond).utc
|
106
|
-
else
|
107
|
-
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
108
|
-
end
|
109
|
-
else
|
110
|
-
raise Todo
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
def self._to_ruby_decimal(digits, scale)
|
115
|
-
BigDecimal("#{digits}e#{scale}")
|
116
|
-
end
|
117
|
-
|
118
|
-
def self.selection_to_rbexpr_list(exprs)
|
119
|
-
if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
120
|
-
exprs = [exprs]
|
121
|
-
end
|
122
|
-
|
123
|
-
exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
|
124
|
-
end
|
125
|
-
|
126
|
-
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
127
|
-
if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
|
128
|
-
col(expr)
|
129
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
130
|
-
lit(expr)
|
131
|
-
elsif expr.is_a?(Expr)
|
132
|
-
expr
|
133
|
-
else
|
134
|
-
raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
def self.lit(value)
|
139
|
-
Polars.lit(value)
|
140
|
-
end
|
141
|
-
|
142
|
-
def self.normalize_filepath(path, check_not_directory: true)
|
143
|
-
path = File.expand_path(path)
|
144
|
-
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
145
|
-
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
146
|
-
end
|
147
|
-
path
|
148
|
-
end
|
149
|
-
|
150
6
|
# TODO fix
|
151
7
|
def self.is_polars_dtype(data_type, include_unknown: false)
|
152
8
|
if data_type == Unknown
|
@@ -191,19 +47,11 @@ module Polars
|
|
191
47
|
end
|
192
48
|
end
|
193
49
|
|
194
|
-
def self.
|
195
|
-
if
|
196
|
-
null_values.to_a
|
197
|
-
else
|
198
|
-
null_values
|
199
|
-
end
|
200
|
-
end
|
201
|
-
|
202
|
-
def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
|
203
|
-
if !row_count_name.nil?
|
204
|
-
[row_count_name, row_count_offset]
|
205
|
-
else
|
50
|
+
def self.parse_row_index_args(row_index_name = nil, row_index_offset = 0)
|
51
|
+
if row_index_name.nil?
|
206
52
|
nil
|
53
|
+
else
|
54
|
+
[row_index_name, row_index_offset]
|
207
55
|
end
|
208
56
|
end
|
209
57
|
|
@@ -223,21 +71,6 @@ module Polars
|
|
223
71
|
[projection, columns]
|
224
72
|
end
|
225
73
|
|
226
|
-
def self.scale_bytes(sz, to:)
|
227
|
-
scaling_factor = {
|
228
|
-
"b" => 1,
|
229
|
-
"k" => 1024,
|
230
|
-
"m" => 1024 ** 2,
|
231
|
-
"g" => 1024 ** 3,
|
232
|
-
"t" => 1024 ** 4
|
233
|
-
}[to[0]]
|
234
|
-
if scaling_factor > 1
|
235
|
-
sz / scaling_factor.to_f
|
236
|
-
else
|
237
|
-
sz
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
74
|
def self.bool?(value)
|
242
75
|
value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
243
76
|
end
|
@@ -250,108 +83,10 @@ module Polars
|
|
250
83
|
value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
|
251
84
|
end
|
252
85
|
|
253
|
-
def self._is_iterable_of(val, eltype)
|
254
|
-
val.all? { |x| x.is_a?(eltype) }
|
255
|
-
end
|
256
|
-
|
257
|
-
def self.is_bool_sequence(val)
|
258
|
-
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
259
|
-
end
|
260
|
-
|
261
|
-
def self.is_dtype_sequence(val)
|
262
|
-
val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
|
263
|
-
end
|
264
|
-
|
265
|
-
def self.is_int_sequence(val)
|
266
|
-
val.is_a?(::Array) && _is_iterable_of(val, Integer)
|
267
|
-
end
|
268
|
-
|
269
|
-
def self.is_expr_sequence(val)
|
270
|
-
val.is_a?(::Array) && _is_iterable_of(val, Expr)
|
271
|
-
end
|
272
|
-
|
273
|
-
def self.is_rbexpr_sequence(val)
|
274
|
-
val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
|
275
|
-
end
|
276
|
-
|
277
|
-
def self.is_str_sequence(val, allow_str: false)
|
278
|
-
if allow_str == false && val.is_a?(::String)
|
279
|
-
false
|
280
|
-
else
|
281
|
-
val.is_a?(::Array) && _is_iterable_of(val, ::String)
|
282
|
-
end
|
283
|
-
end
|
284
|
-
|
285
86
|
def self.local_file?(file)
|
286
87
|
Dir.glob(file).any?
|
287
88
|
end
|
288
89
|
|
289
|
-
def self.parse_as_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
290
|
-
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
291
|
-
if named_inputs.any?
|
292
|
-
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
293
|
-
exprs.concat(named_exprs)
|
294
|
-
end
|
295
|
-
|
296
|
-
exprs
|
297
|
-
end
|
298
|
-
|
299
|
-
def self._parse_positional_inputs(inputs, structify: false)
|
300
|
-
inputs_iter = _parse_inputs_as_iterable(inputs)
|
301
|
-
inputs_iter.map { |e| parse_as_expression(e, structify: structify) }
|
302
|
-
end
|
303
|
-
|
304
|
-
def self._parse_inputs_as_iterable(inputs)
|
305
|
-
if inputs.empty?
|
306
|
-
return []
|
307
|
-
end
|
308
|
-
|
309
|
-
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
310
|
-
return inputs[0]
|
311
|
-
end
|
312
|
-
|
313
|
-
inputs
|
314
|
-
end
|
315
|
-
|
316
|
-
def self._parse_named_inputs(named_inputs, structify: false)
|
317
|
-
named_inputs.map do |name, input|
|
318
|
-
parse_as_expression(input, structify: structify)._alias(name.to_s)
|
319
|
-
end
|
320
|
-
end
|
321
|
-
|
322
|
-
def self.parse_as_expression(input, str_as_lit: false, list_as_lit: true, structify: false, dtype: nil)
|
323
|
-
if input.is_a?(Expr)
|
324
|
-
expr = input
|
325
|
-
elsif input.is_a?(::String) && !str_as_lit
|
326
|
-
expr = Polars.col(input)
|
327
|
-
structify = false
|
328
|
-
elsif input.is_a?(::Array) && !list_as_lit
|
329
|
-
expr = Polars.lit(Series.new(input), dtype: dtype)
|
330
|
-
structify = false
|
331
|
-
else
|
332
|
-
expr = Polars.lit(input, dtype: dtype)
|
333
|
-
structify = false
|
334
|
-
end
|
335
|
-
|
336
|
-
if structify
|
337
|
-
raise Todo
|
338
|
-
end
|
339
|
-
|
340
|
-
expr._rbexpr
|
341
|
-
end
|
342
|
-
|
343
|
-
USE_EARLIEST_TO_AMBIGUOUS = {
|
344
|
-
true => "earliest",
|
345
|
-
false => "latest"
|
346
|
-
}
|
347
|
-
|
348
|
-
def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
349
|
-
unless use_earliest.nil?
|
350
|
-
ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
|
351
|
-
end
|
352
|
-
ambiguous
|
353
|
-
end
|
354
|
-
|
355
90
|
def self._check_arg_is_1byte(arg_name, arg, can_be_empty = false)
|
356
91
|
if arg.is_a?(::String)
|
357
92
|
arg_byte_length = arg.bytesize
|
@@ -385,72 +120,11 @@ module Polars
|
|
385
120
|
false
|
386
121
|
end
|
387
122
|
|
388
|
-
def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
|
389
|
-
all_predicates = _parse_positional_inputs(predicates)
|
390
|
-
|
391
|
-
if constraints.any?
|
392
|
-
constraint_predicates = _parse_constraints(constraints)
|
393
|
-
all_predicates.concat(constraint_predicates)
|
394
|
-
end
|
395
|
-
|
396
|
-
_combine_predicates(all_predicates)
|
397
|
-
end
|
398
|
-
|
399
|
-
def self._parse_constraints(constraints)
|
400
|
-
constraints.map do |name, value|
|
401
|
-
Polars.col(name).eq(value)._rbexpr
|
402
|
-
end
|
403
|
-
end
|
404
|
-
|
405
|
-
def self._combine_predicates(predicates)
|
406
|
-
if !predicates.any?
|
407
|
-
msg = "at least one predicate or constraint must be provided"
|
408
|
-
raise TypeError, msg
|
409
|
-
end
|
410
|
-
|
411
|
-
if predicates.length == 1
|
412
|
-
return predicates[0]
|
413
|
-
end
|
414
|
-
|
415
|
-
Plr.all_horizontal(predicates)
|
416
|
-
end
|
417
|
-
|
418
|
-
def self.parse_when_inputs(*predicates, **constraints)
|
419
|
-
parse_predicates_constraints_as_expression(*predicates, **constraints)
|
420
|
-
end
|
421
|
-
|
422
123
|
def self.parse_interval_argument(interval)
|
423
124
|
if interval.include?(" ")
|
424
125
|
interval = interval.gsub(" ", "")
|
425
126
|
end
|
426
127
|
interval.downcase
|
427
128
|
end
|
428
|
-
|
429
|
-
def self.validate_rolling_by_aggs_arguments(weights, center:)
|
430
|
-
if !weights.nil?
|
431
|
-
msg = "`weights` is not supported in `rolling_*(..., by=...)` expression"
|
432
|
-
raise InvalidOperationError, msg
|
433
|
-
end
|
434
|
-
if center
|
435
|
-
msg = "`center=True` is not supported in `rolling_*(..., by=...)` expression"
|
436
|
-
raise InvalidOperationError, msg
|
437
|
-
end
|
438
|
-
end
|
439
|
-
|
440
|
-
def self.validate_rolling_aggs_arguments(window_size, closed)
|
441
|
-
if window_size.is_a?(::String)
|
442
|
-
begin
|
443
|
-
window_size = window_size.delete_suffix("i").to_i
|
444
|
-
rescue
|
445
|
-
msg = "Expected a string of the form 'ni', where `n` is a positive integer, got: #{window_size}"
|
446
|
-
raise InvalidOperationError, msg
|
447
|
-
end
|
448
|
-
end
|
449
|
-
if !closed.nil?
|
450
|
-
msg = "`closed` is not supported in `rolling_*(...)` expression"
|
451
|
-
raise InvalidOperationError, msg
|
452
|
-
end
|
453
|
-
window_size
|
454
|
-
end
|
455
129
|
end
|
456
130
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars/whenthen.rb
CHANGED
@@ -8,7 +8,7 @@ module Polars
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def then(statement)
|
11
|
-
statement_rbexpr = Utils.
|
11
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
12
12
|
Then.new(_when.then(statement_rbexpr))
|
13
13
|
end
|
14
14
|
end
|
@@ -30,12 +30,12 @@ module Polars
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def when(*predicates, **constraints)
|
33
|
-
condition_rbexpr = Utils.
|
33
|
+
condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
34
34
|
ChainedWhen.new(_then.when(condition_rbexpr))
|
35
35
|
end
|
36
36
|
|
37
37
|
def otherwise(statement)
|
38
|
-
statement_rbexpr = Utils.
|
38
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
39
39
|
Utils.wrap_expr(_then.otherwise(statement_rbexpr))
|
40
40
|
end
|
41
41
|
end
|
@@ -49,7 +49,7 @@ module Polars
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def then(statement)
|
52
|
-
statement_rbexpr = Utils.
|
52
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
53
53
|
ChainedThen.new(_chained_when.then(statement_rbexpr))
|
54
54
|
end
|
55
55
|
end
|
@@ -71,12 +71,12 @@ module Polars
|
|
71
71
|
end
|
72
72
|
|
73
73
|
def when(*predicates, **constraints)
|
74
|
-
condition_rbexpr = Utils.
|
74
|
+
condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
75
75
|
ChainedWhen.new(_chained_then.when(condition_rbexpr))
|
76
76
|
end
|
77
77
|
|
78
78
|
def otherwise(statement)
|
79
|
-
statement_rbexpr = Utils.
|
79
|
+
statement_rbexpr = Utils.parse_into_expression(statement)
|
80
80
|
Utils.wrap_expr(_chained_then.otherwise(statement_rbexpr))
|
81
81
|
end
|
82
82
|
end
|
data/lib/polars.rb
CHANGED
@@ -70,6 +70,11 @@ require_relative "polars/struct_expr"
|
|
70
70
|
require_relative "polars/struct_name_space"
|
71
71
|
require_relative "polars/testing"
|
72
72
|
require_relative "polars/utils"
|
73
|
+
require_relative "polars/utils/constants"
|
74
|
+
require_relative "polars/utils/convert"
|
75
|
+
require_relative "polars/utils/parse"
|
76
|
+
require_relative "polars/utils/various"
|
77
|
+
require_relative "polars/utils/wrap"
|
73
78
|
require_relative "polars/version"
|
74
79
|
require_relative "polars/whenthen"
|
75
80
|
|
@@ -77,4 +82,10 @@ module Polars
|
|
77
82
|
extend Convert
|
78
83
|
extend Functions
|
79
84
|
extend IO
|
85
|
+
|
86
|
+
# @private
|
87
|
+
F = self
|
88
|
+
|
89
|
+
# @private
|
90
|
+
N_INFER_DEFAULT = 100
|
80
91
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- ext/polars/Cargo.toml
|
55
55
|
- ext/polars/extconf.rb
|
56
56
|
- ext/polars/src/batched_csv.rs
|
57
|
-
- ext/polars/src/conversion/
|
57
|
+
- ext/polars/src/conversion/any_value.rs
|
58
58
|
- ext/polars/src/conversion/chunked_array.rs
|
59
59
|
- ext/polars/src/conversion/mod.rs
|
60
60
|
- ext/polars/src/dataframe/construction.rs
|
@@ -172,6 +172,11 @@ files:
|
|
172
172
|
- lib/polars/struct_name_space.rb
|
173
173
|
- lib/polars/testing.rb
|
174
174
|
- lib/polars/utils.rb
|
175
|
+
- lib/polars/utils/constants.rb
|
176
|
+
- lib/polars/utils/convert.rb
|
177
|
+
- lib/polars/utils/parse.rb
|
178
|
+
- lib/polars/utils/various.rb
|
179
|
+
- lib/polars/utils/wrap.rb
|
175
180
|
- lib/polars/version.rb
|
176
181
|
- lib/polars/whenthen.rb
|
177
182
|
homepage: https://github.com/ankane/ruby-polars
|
@@ -193,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
198
|
- !ruby/object:Gem::Version
|
194
199
|
version: '0'
|
195
200
|
requirements: []
|
196
|
-
rubygems_version: 3.5.
|
201
|
+
rubygems_version: 3.5.11
|
197
202
|
signing_key:
|
198
203
|
specification_version: 4
|
199
204
|
summary: Blazingly fast DataFrames for Ruby
|
@@ -1,186 +0,0 @@
|
|
1
|
-
use magnus::encoding::{EncodingCapable, Index};
|
2
|
-
use magnus::{
|
3
|
-
class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby,
|
4
|
-
TryConvert, Value,
|
5
|
-
};
|
6
|
-
use polars::prelude::*;
|
7
|
-
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
8
|
-
|
9
|
-
use super::{struct_dict, ObjectValue, Wrap};
|
10
|
-
|
11
|
-
use crate::rb_modules::utils;
|
12
|
-
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
-
|
14
|
-
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
-
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
-
match self.0 {
|
17
|
-
AnyValue::UInt8(v) => ruby.into_value(v),
|
18
|
-
AnyValue::UInt16(v) => ruby.into_value(v),
|
19
|
-
AnyValue::UInt32(v) => ruby.into_value(v),
|
20
|
-
AnyValue::UInt64(v) => ruby.into_value(v),
|
21
|
-
AnyValue::Int8(v) => ruby.into_value(v),
|
22
|
-
AnyValue::Int16(v) => ruby.into_value(v),
|
23
|
-
AnyValue::Int32(v) => ruby.into_value(v),
|
24
|
-
AnyValue::Int64(v) => ruby.into_value(v),
|
25
|
-
AnyValue::Float32(v) => ruby.into_value(v),
|
26
|
-
AnyValue::Float64(v) => ruby.into_value(v),
|
27
|
-
AnyValue::Null => ruby.qnil().as_value(),
|
28
|
-
AnyValue::Boolean(v) => ruby.into_value(v),
|
29
|
-
AnyValue::String(v) => ruby.into_value(v),
|
30
|
-
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
31
|
-
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
32
|
-
let s = if arr.is_null() {
|
33
|
-
rev.get(idx)
|
34
|
-
} else {
|
35
|
-
unsafe { arr.deref_unchecked().value(idx as usize) }
|
36
|
-
};
|
37
|
-
s.into_value()
|
38
|
-
}
|
39
|
-
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
40
|
-
AnyValue::Datetime(v, time_unit, time_zone) => {
|
41
|
-
let time_unit = time_unit.to_ascii();
|
42
|
-
utils()
|
43
|
-
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
44
|
-
.unwrap()
|
45
|
-
}
|
46
|
-
AnyValue::Duration(v, time_unit) => {
|
47
|
-
let time_unit = time_unit.to_ascii();
|
48
|
-
utils()
|
49
|
-
.funcall("_to_ruby_duration", (v, time_unit))
|
50
|
-
.unwrap()
|
51
|
-
}
|
52
|
-
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
53
|
-
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
54
|
-
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
55
|
-
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
56
|
-
AnyValue::Object(v) => {
|
57
|
-
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
58
|
-
object.to_object()
|
59
|
-
}
|
60
|
-
AnyValue::ObjectOwned(v) => {
|
61
|
-
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
62
|
-
object.to_object()
|
63
|
-
}
|
64
|
-
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
65
|
-
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
66
|
-
AnyValue::Decimal(v, scale) => utils()
|
67
|
-
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
68
|
-
.unwrap(),
|
69
|
-
}
|
70
|
-
}
|
71
|
-
}
|
72
|
-
|
73
|
-
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
74
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
75
|
-
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
76
|
-
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
77
|
-
} else if let Some(v) = Integer::from_value(ob) {
|
78
|
-
Ok(AnyValue::Int64(v.to_i64()?).into())
|
79
|
-
} else if let Some(v) = Float::from_value(ob) {
|
80
|
-
Ok(AnyValue::Float64(v.to_f64()).into())
|
81
|
-
} else if let Some(v) = RString::from_value(ob) {
|
82
|
-
if v.enc_get() == Index::utf8() {
|
83
|
-
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
84
|
-
} else {
|
85
|
-
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
86
|
-
}
|
87
|
-
// call is_a? for ActiveSupport::TimeWithZone
|
88
|
-
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
89
|
-
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
90
|
-
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
91
|
-
let v = sec * 1_000_000_000 + nsec;
|
92
|
-
// TODO support time zone when possible
|
93
|
-
// https://github.com/pola-rs/polars/issues/9103
|
94
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
95
|
-
} else if ob.is_nil() {
|
96
|
-
Ok(AnyValue::Null.into())
|
97
|
-
} else if let Some(dict) = RHash::from_value(ob) {
|
98
|
-
let len = dict.len();
|
99
|
-
let mut keys = Vec::with_capacity(len);
|
100
|
-
let mut vals = Vec::with_capacity(len);
|
101
|
-
dict.foreach(|k: Value, v: Value| {
|
102
|
-
let key = String::try_convert(k)?;
|
103
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
104
|
-
let dtype = DataType::from(&val);
|
105
|
-
keys.push(Field::new(&key, dtype));
|
106
|
-
vals.push(val);
|
107
|
-
Ok(ForEach::Continue)
|
108
|
-
})?;
|
109
|
-
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
110
|
-
} else if let Some(v) = RArray::from_value(ob) {
|
111
|
-
if v.is_empty() {
|
112
|
-
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
113
|
-
} else {
|
114
|
-
let list = v;
|
115
|
-
|
116
|
-
let mut avs = Vec::with_capacity(25);
|
117
|
-
let mut iter = list.each();
|
118
|
-
|
119
|
-
for item in (&mut iter).take(25) {
|
120
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
121
|
-
}
|
122
|
-
|
123
|
-
let (dtype, _n_types) =
|
124
|
-
any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
|
125
|
-
|
126
|
-
// push the rest
|
127
|
-
avs.reserve(list.len());
|
128
|
-
for item in iter {
|
129
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
130
|
-
}
|
131
|
-
|
132
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
133
|
-
.map_err(RbPolarsErr::from)?;
|
134
|
-
Ok(Wrap(AnyValue::List(s)))
|
135
|
-
}
|
136
|
-
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
137
|
-
let sec: i64 = ob.funcall("to_i", ())?;
|
138
|
-
let nsec: i64 = ob.funcall("nsec", ())?;
|
139
|
-
Ok(Wrap(AnyValue::Datetime(
|
140
|
-
sec * 1_000_000_000 + nsec,
|
141
|
-
TimeUnit::Nanoseconds,
|
142
|
-
&None,
|
143
|
-
)))
|
144
|
-
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
145
|
-
// convert to DateTime for UTC
|
146
|
-
let v = ob
|
147
|
-
.funcall::<_, _, Value>("to_datetime", ())?
|
148
|
-
.funcall::<_, _, Value>("to_time", ())?
|
149
|
-
.funcall::<_, _, i64>("to_i", ())?;
|
150
|
-
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
151
|
-
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
152
|
-
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
153
|
-
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
154
|
-
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
155
|
-
})?;
|
156
|
-
if sign < 0 {
|
157
|
-
// TODO better error
|
158
|
-
v = v.checked_neg().unwrap();
|
159
|
-
}
|
160
|
-
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
161
|
-
} else {
|
162
|
-
Err(RbPolarsErr::other(format!(
|
163
|
-
"object type not supported {:?}",
|
164
|
-
ob
|
165
|
-
)))
|
166
|
-
}
|
167
|
-
}
|
168
|
-
}
|
169
|
-
|
170
|
-
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
171
|
-
let exp = exp - (digits.len() as i32);
|
172
|
-
match digits.parse::<i128>() {
|
173
|
-
Ok(mut v) => {
|
174
|
-
let scale = if exp > 0 {
|
175
|
-
v = 10_i128
|
176
|
-
.checked_pow(exp as u32)
|
177
|
-
.and_then(|factor| v.checked_mul(factor))?;
|
178
|
-
0
|
179
|
-
} else {
|
180
|
-
(-exp) as usize
|
181
|
-
};
|
182
|
-
Some((v, scale))
|
183
|
-
}
|
184
|
-
Err(_) => None,
|
185
|
-
}
|
186
|
-
}
|