polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
data/lib/polars/testing.rb
CHANGED
|
@@ -40,50 +40,22 @@ module Polars
|
|
|
40
40
|
categorical_as_str: false
|
|
41
41
|
)
|
|
42
42
|
lazy = _assert_correct_input_type(left, right)
|
|
43
|
-
objects = lazy ? "LazyFrames" : "DataFrames"
|
|
44
|
-
|
|
45
|
-
_assert_frame_schema_equal(
|
|
46
|
-
left,
|
|
47
|
-
right,
|
|
48
|
-
check_column_order: check_column_order,
|
|
49
|
-
check_dtype: check_dtype,
|
|
50
|
-
objects: objects,
|
|
51
|
-
)
|
|
52
43
|
|
|
53
44
|
if lazy
|
|
54
45
|
left, right = left.collect, right.collect
|
|
55
46
|
end
|
|
56
47
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
s_left, s_right = left.get_column(c), right.get_column(c)
|
|
69
|
-
begin
|
|
70
|
-
_assert_series_values_equal(
|
|
71
|
-
s_left,
|
|
72
|
-
s_right,
|
|
73
|
-
check_exact: check_exact,
|
|
74
|
-
rtol: rtol,
|
|
75
|
-
atol: atol,
|
|
76
|
-
categorical_as_str: categorical_as_str
|
|
77
|
-
)
|
|
78
|
-
rescue AssertionError
|
|
79
|
-
raise_assertion_error(
|
|
80
|
-
objects,
|
|
81
|
-
"value mismatch for column #{c.inspect}",
|
|
82
|
-
s_left.to_a,
|
|
83
|
-
s_right.to_a
|
|
84
|
-
)
|
|
85
|
-
end
|
|
86
|
-
end
|
|
48
|
+
Plr.assert_dataframe_equal_rb(
|
|
49
|
+
left._df,
|
|
50
|
+
right._df,
|
|
51
|
+
check_row_order,
|
|
52
|
+
check_column_order,
|
|
53
|
+
check_dtype,
|
|
54
|
+
check_exact,
|
|
55
|
+
rtol,
|
|
56
|
+
atol,
|
|
57
|
+
categorical_as_str,
|
|
58
|
+
)
|
|
87
59
|
end
|
|
88
60
|
|
|
89
61
|
# Assert that the left and right frame are **not** equal.
|
|
@@ -157,6 +129,8 @@ module Polars
|
|
|
157
129
|
# Require data types to match.
|
|
158
130
|
# @param check_names [Boolean]
|
|
159
131
|
# Require names to match.
|
|
132
|
+
# @param check_order [Boolean]
|
|
133
|
+
# Requires elements to appear in the same order.
|
|
160
134
|
# @param check_exact [Boolean]
|
|
161
135
|
# Require float values to match exactly. If set to `false`, values are considered
|
|
162
136
|
# equal when within tolerance of each other (see `rtol` and `atol`).
|
|
@@ -176,6 +150,7 @@ module Polars
|
|
|
176
150
|
right,
|
|
177
151
|
check_dtype: true,
|
|
178
152
|
check_names: true,
|
|
153
|
+
check_order: true,
|
|
179
154
|
check_exact: false,
|
|
180
155
|
rtol: 1e-5,
|
|
181
156
|
atol: 1e-8,
|
|
@@ -190,25 +165,16 @@ module Polars
|
|
|
190
165
|
)
|
|
191
166
|
end
|
|
192
167
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
_assert_series_values_equal(
|
|
206
|
-
left,
|
|
207
|
-
right,
|
|
208
|
-
check_exact: check_exact,
|
|
209
|
-
rtol: rtol,
|
|
210
|
-
atol: atol,
|
|
211
|
-
categorical_as_str: categorical_as_str
|
|
168
|
+
Plr.assert_series_equal_rb(
|
|
169
|
+
left._s,
|
|
170
|
+
right._s,
|
|
171
|
+
check_dtype,
|
|
172
|
+
check_names,
|
|
173
|
+
check_order,
|
|
174
|
+
check_exact,
|
|
175
|
+
rtol,
|
|
176
|
+
atol,
|
|
177
|
+
categorical_as_str
|
|
212
178
|
)
|
|
213
179
|
end
|
|
214
180
|
|
|
@@ -284,221 +250,6 @@ module Polars
|
|
|
284
250
|
end
|
|
285
251
|
end
|
|
286
252
|
|
|
287
|
-
def _assert_frame_schema_equal(
|
|
288
|
-
left,
|
|
289
|
-
right,
|
|
290
|
-
check_dtype:,
|
|
291
|
-
check_column_order:,
|
|
292
|
-
objects:
|
|
293
|
-
)
|
|
294
|
-
left_schema, right_schema = left.schema, right.schema
|
|
295
|
-
|
|
296
|
-
# Fast path for equal frames
|
|
297
|
-
if left_schema == right_schema
|
|
298
|
-
return
|
|
299
|
-
end
|
|
300
|
-
|
|
301
|
-
# Special error message for when column names do not match
|
|
302
|
-
if left_schema.keys != right_schema.keys
|
|
303
|
-
if (left_not_right = right_schema.keys - left_schema.keys).any?
|
|
304
|
-
msg = "columns #{left_not_right.inspect} in left #{objects[..-1]}, but not in right"
|
|
305
|
-
raise AssertionError, msg
|
|
306
|
-
else
|
|
307
|
-
right_not_left = right_schema.keys - left_schema.keys
|
|
308
|
-
msg = "columns #{right_not_left.inspect} in right #{objects[..-1]}, but not in left"
|
|
309
|
-
raise AssertionError, msg
|
|
310
|
-
end
|
|
311
|
-
end
|
|
312
|
-
|
|
313
|
-
if check_column_order
|
|
314
|
-
left_columns, right_columns = left_schema.keys, right_schema.keys
|
|
315
|
-
if left_columns != right_columns
|
|
316
|
-
detail = "columns are not in the same order"
|
|
317
|
-
raise_assertion_error(objects, detail, left_columns, right_columns)
|
|
318
|
-
end
|
|
319
|
-
end
|
|
320
|
-
|
|
321
|
-
if check_dtype
|
|
322
|
-
left_schema_dict, right_schema_dict = left_schema.to_h, right_schema.to_h
|
|
323
|
-
if check_column_order || left_schema_dict != right_schema_dict
|
|
324
|
-
detail = "dtypes do not match"
|
|
325
|
-
raise_assertion_error(objects, detail, left_schema_dict, right_schema_dict)
|
|
326
|
-
end
|
|
327
|
-
end
|
|
328
|
-
end
|
|
329
|
-
|
|
330
|
-
def _sort_dataframes(left, right)
|
|
331
|
-
by = left.columns
|
|
332
|
-
begin
|
|
333
|
-
left = left.sort(by)
|
|
334
|
-
right = right.sort(by)
|
|
335
|
-
rescue
|
|
336
|
-
msg = "cannot set `check_row_order: false` on frame with unsortable columns"
|
|
337
|
-
raise InvalidAssert, msg
|
|
338
|
-
end
|
|
339
|
-
[left, right]
|
|
340
|
-
end
|
|
341
|
-
|
|
342
|
-
def _assert_series_values_equal(
|
|
343
|
-
left,
|
|
344
|
-
right,
|
|
345
|
-
check_exact:,
|
|
346
|
-
rtol:,
|
|
347
|
-
atol:,
|
|
348
|
-
categorical_as_str:
|
|
349
|
-
)
|
|
350
|
-
if categorical_as_str
|
|
351
|
-
if left.dtype == Categorical
|
|
352
|
-
left = left.cast(String)
|
|
353
|
-
end
|
|
354
|
-
if right.dtype == Categorical
|
|
355
|
-
right = right.cast(String)
|
|
356
|
-
end
|
|
357
|
-
end
|
|
358
|
-
|
|
359
|
-
# Determine unequal elements
|
|
360
|
-
begin
|
|
361
|
-
unequal = left.ne_missing(right)
|
|
362
|
-
rescue
|
|
363
|
-
raise_assertion_error(
|
|
364
|
-
"Series",
|
|
365
|
-
"incompatible data types",
|
|
366
|
-
left.dtype,
|
|
367
|
-
right.dtype
|
|
368
|
-
)
|
|
369
|
-
end
|
|
370
|
-
|
|
371
|
-
# Check nested dtypes in separate function
|
|
372
|
-
if _comparing_nested_floats(left.dtype, right.dtype)
|
|
373
|
-
begin
|
|
374
|
-
_assert_series_nested_values_equal(
|
|
375
|
-
left: left.filter(unequal),
|
|
376
|
-
right: right.filter(unequal),
|
|
377
|
-
check_exact: check_exact,
|
|
378
|
-
rtol: rtol,
|
|
379
|
-
atol: atol,
|
|
380
|
-
categorical_as_str: categorical_as_str
|
|
381
|
-
)
|
|
382
|
-
rescue AssertionError
|
|
383
|
-
raise_assertion_error(
|
|
384
|
-
"Series",
|
|
385
|
-
"nested value mismatch",
|
|
386
|
-
left.to_a,
|
|
387
|
-
right.to_a
|
|
388
|
-
)
|
|
389
|
-
else
|
|
390
|
-
return
|
|
391
|
-
end
|
|
392
|
-
end
|
|
393
|
-
|
|
394
|
-
# If no differences found during exact checking, we're done
|
|
395
|
-
if !unequal.any
|
|
396
|
-
return
|
|
397
|
-
end
|
|
398
|
-
|
|
399
|
-
# Only do inexact checking for float types
|
|
400
|
-
if check_exact || !left.dtype.float? || !right.dtype.float?
|
|
401
|
-
raise_assertion_error(
|
|
402
|
-
"Series", "exact value mismatch", left.to_a, right.to_a
|
|
403
|
-
)
|
|
404
|
-
end
|
|
405
|
-
|
|
406
|
-
_assert_series_null_values_match(left, right)
|
|
407
|
-
_assert_series_nan_values_match(left, right)
|
|
408
|
-
_assert_series_values_within_tolerance(
|
|
409
|
-
left,
|
|
410
|
-
right,
|
|
411
|
-
unequal,
|
|
412
|
-
rtol: rtol,
|
|
413
|
-
atol: atol
|
|
414
|
-
)
|
|
415
|
-
end
|
|
416
|
-
|
|
417
|
-
def _assert_series_nested_values_equal(
|
|
418
|
-
left,
|
|
419
|
-
right,
|
|
420
|
-
check_exact:,
|
|
421
|
-
rtol:,
|
|
422
|
-
atol:,
|
|
423
|
-
categorical_as_str:
|
|
424
|
-
)
|
|
425
|
-
# compare nested lists element-wise
|
|
426
|
-
if _comparing_lists(left.dtype, right.dtype)
|
|
427
|
-
left.zip(right) do |s1, s2|
|
|
428
|
-
if s1.nil? || s2.nil?
|
|
429
|
-
raise_assertion_error("Series", "nested value mismatch", s1, s2)
|
|
430
|
-
end
|
|
431
|
-
|
|
432
|
-
_assert_series_values_equal(
|
|
433
|
-
s1,
|
|
434
|
-
s2,
|
|
435
|
-
check_exact: check_exact,
|
|
436
|
-
rtol: rtol,
|
|
437
|
-
atol: atol,
|
|
438
|
-
categorical_as_str: categorical_as_str
|
|
439
|
-
)
|
|
440
|
-
end
|
|
441
|
-
|
|
442
|
-
# unnest structs as series and compare
|
|
443
|
-
else
|
|
444
|
-
ls, rs = left.struct.unnest, right.struct.unnest
|
|
445
|
-
ls.zip(rs) do |s1, s2|
|
|
446
|
-
_assert_series_values_equal(
|
|
447
|
-
s1,
|
|
448
|
-
s2,
|
|
449
|
-
check_exact: check_exact,
|
|
450
|
-
rtol: rtol,
|
|
451
|
-
atol: atol,
|
|
452
|
-
categorical_as_str: categorical_as_str
|
|
453
|
-
)
|
|
454
|
-
end
|
|
455
|
-
end
|
|
456
|
-
end
|
|
457
|
-
|
|
458
|
-
def _assert_series_null_values_match(left, right)
|
|
459
|
-
null_value_mismatch = left.is_null != right.is_null
|
|
460
|
-
if null_value_mismatch.any
|
|
461
|
-
raise_assertion_error(
|
|
462
|
-
"Series", "null value mismatch", left.to_a, right.to_a
|
|
463
|
-
)
|
|
464
|
-
end
|
|
465
|
-
end
|
|
466
|
-
|
|
467
|
-
def _assert_series_nan_values_match(left, right)
|
|
468
|
-
if !_comparing_floats(left.dtype, right.dtype)
|
|
469
|
-
return
|
|
470
|
-
end
|
|
471
|
-
nan_value_mismatch = left.is_nan != right.is_nan
|
|
472
|
-
if nan_value_mismatch.any
|
|
473
|
-
raise_assertion_error(
|
|
474
|
-
"Series",
|
|
475
|
-
"nan value mismatch",
|
|
476
|
-
left.to_a,
|
|
477
|
-
right.to_a
|
|
478
|
-
)
|
|
479
|
-
end
|
|
480
|
-
end
|
|
481
|
-
|
|
482
|
-
def _comparing_floats(left, right)
|
|
483
|
-
left.is_float && right.is_float
|
|
484
|
-
end
|
|
485
|
-
|
|
486
|
-
def _comparing_lists(left, right)
|
|
487
|
-
[List, Array].include?(left) && [List, Array].include?(right)
|
|
488
|
-
end
|
|
489
|
-
|
|
490
|
-
def _comparing_structs(left, right)
|
|
491
|
-
left == Struct && right == Struct
|
|
492
|
-
end
|
|
493
|
-
|
|
494
|
-
def _comparing_nested_floats(left, right)
|
|
495
|
-
if !(_comparing_lists(left, right) || _comparing_structs(left, right))
|
|
496
|
-
return false
|
|
497
|
-
end
|
|
498
|
-
|
|
499
|
-
left.float? && right.float?
|
|
500
|
-
end
|
|
501
|
-
|
|
502
253
|
def raise_assertion_error(objects, detail, left, right)
|
|
503
254
|
msg = "#{objects} are different (#{detail})\n[left]: #{left}\n[right]: #{right}"
|
|
504
255
|
raise AssertionError, msg
|