polars-df 0.8.0-aarch64-linux → 0.10.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +3112 -1613
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +34 -6
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -0,0 +1,507 @@
|
|
1
|
+
module Polars
|
2
|
+
module Testing
|
3
|
+
# Assert that the left and right frame are equal.
|
4
|
+
#
|
5
|
+
# Raises a detailed `AssertionError` if the frames differ.
|
6
|
+
# This function is intended for use in unit tests.
|
7
|
+
#
|
8
|
+
# @param left [Object]
|
9
|
+
# The first DataFrame or LazyFrame to compare.
|
10
|
+
# @param right [Object]
|
11
|
+
# The second DataFrame or LazyFrame to compare.
|
12
|
+
# @param check_row_order [Boolean]
|
13
|
+
# Require row order to match.
|
14
|
+
# @param check_column_order [Boolean]
|
15
|
+
# Require column order to match.
|
16
|
+
# @param check_dtype [Boolean]
|
17
|
+
# Require data types to match.
|
18
|
+
# @param check_exact [Boolean]
|
19
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
20
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
21
|
+
# Only affects columns with a Float data type.
|
22
|
+
# @param rtol [Float]
|
23
|
+
# Relative tolerance for inexact checking. Fraction of values in `right`.
|
24
|
+
# @param atol [Float]
|
25
|
+
# Absolute tolerance for inexact checking.
|
26
|
+
# @param categorical_as_str [Boolean]
|
27
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
28
|
+
# compare columns that do not share the same string cache.
|
29
|
+
#
|
30
|
+
# @return [nil]
|
31
|
+
def assert_frame_equal(
|
32
|
+
left,
|
33
|
+
right,
|
34
|
+
check_row_order: true,
|
35
|
+
check_column_order: true,
|
36
|
+
check_dtype: true,
|
37
|
+
check_exact: false,
|
38
|
+
rtol: 1e-5,
|
39
|
+
atol: 1e-8,
|
40
|
+
categorical_as_str: false
|
41
|
+
)
|
42
|
+
lazy = _assert_correct_input_type(left, right)
|
43
|
+
objects = lazy ? "LazyFrames" : "DataFrames"
|
44
|
+
|
45
|
+
_assert_frame_schema_equal(
|
46
|
+
left,
|
47
|
+
right,
|
48
|
+
check_column_order: check_column_order,
|
49
|
+
check_dtype: check_dtype,
|
50
|
+
objects: objects,
|
51
|
+
)
|
52
|
+
|
53
|
+
if lazy
|
54
|
+
left, right = left.collect, right.collect
|
55
|
+
end
|
56
|
+
|
57
|
+
if left.height != right.height
|
58
|
+
raise_assertion_error(
|
59
|
+
objects, "number of rows does not match", left.height, right.height
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
if !check_row_order
|
64
|
+
left, right = _sort_dataframes(left, right)
|
65
|
+
end
|
66
|
+
|
67
|
+
left.columns.each do |c|
|
68
|
+
s_left, s_right = left.get_column(c), right.get_column(c)
|
69
|
+
begin
|
70
|
+
_assert_series_values_equal(
|
71
|
+
s_left,
|
72
|
+
s_right,
|
73
|
+
check_exact: check_exact,
|
74
|
+
rtol: rtol,
|
75
|
+
atol: atol,
|
76
|
+
categorical_as_str: categorical_as_str
|
77
|
+
)
|
78
|
+
rescue AssertionError
|
79
|
+
raise_assertion_error(
|
80
|
+
objects,
|
81
|
+
"value mismatch for column #{c.inspect}",
|
82
|
+
s_left.to_a,
|
83
|
+
s_right.to_a
|
84
|
+
)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Assert that the left and right frame are **not** equal.
|
90
|
+
#
|
91
|
+
# This function is intended for use in unit tests.
|
92
|
+
#
|
93
|
+
# @param left [Object]
|
94
|
+
# The first DataFrame or LazyFrame to compare.
|
95
|
+
# @param right [Object]
|
96
|
+
# The second DataFrame or LazyFrame to compare.
|
97
|
+
# @param check_row_order [Boolean]
|
98
|
+
# Require row order to match.
|
99
|
+
# @param check_column_order [Boolean]
|
100
|
+
# Require column order to match.
|
101
|
+
# @param check_dtype [Boolean]
|
102
|
+
# Require data types to match.
|
103
|
+
# @param check_exact [Boolean]
|
104
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
105
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
106
|
+
# Only affects columns with a Float data type.
|
107
|
+
# @param rtol [Float]
|
108
|
+
# Relative tolerance for inexact checking. Fraction of values in `right`.
|
109
|
+
# @param atol [Float]
|
110
|
+
# Absolute tolerance for inexact checking.
|
111
|
+
# @param categorical_as_str [Boolean]
|
112
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
113
|
+
# compare columns that do not share the same string cache.
|
114
|
+
#
|
115
|
+
# @return [nil]
|
116
|
+
def assert_frame_not_equal(
|
117
|
+
left,
|
118
|
+
right,
|
119
|
+
check_row_order: true,
|
120
|
+
check_column_order: true,
|
121
|
+
check_dtype: true,
|
122
|
+
check_exact: false,
|
123
|
+
rtol: 1e-5,
|
124
|
+
atol: 1e-8,
|
125
|
+
categorical_as_str: false
|
126
|
+
)
|
127
|
+
begin
|
128
|
+
assert_frame_equal(
|
129
|
+
left,
|
130
|
+
right,
|
131
|
+
check_column_order: check_column_order,
|
132
|
+
check_row_order: check_row_order,
|
133
|
+
check_dtype: check_dtype,
|
134
|
+
check_exact: check_exact,
|
135
|
+
rtol: rtol,
|
136
|
+
atol: atol,
|
137
|
+
categorical_as_str: categorical_as_str
|
138
|
+
)
|
139
|
+
rescue AssertionError
|
140
|
+
return
|
141
|
+
end
|
142
|
+
|
143
|
+
msg = "frames are equal"
|
144
|
+
raise AssertionError, msg
|
145
|
+
end
|
146
|
+
|
147
|
+
# Assert that the left and right Series are equal.
|
148
|
+
#
|
149
|
+
# Raises a detailed `AssertionError` if the Series differ.
|
150
|
+
# This function is intended for use in unit tests.
|
151
|
+
#
|
152
|
+
# @param left [Object]
|
153
|
+
# The first Series to compare.
|
154
|
+
# @param right [Object]
|
155
|
+
# The second Series to compare.
|
156
|
+
# @param check_dtype [Boolean]
|
157
|
+
# Require data types to match.
|
158
|
+
# @param check_names [Boolean]
|
159
|
+
# Require names to match.
|
160
|
+
# @param check_exact [Boolean]
|
161
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
162
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
163
|
+
# Only affects columns with a Float data type.
|
164
|
+
# @param rtol [Float]
|
165
|
+
# Relative tolerance for inexact checking, given as a fraction of the values in
|
166
|
+
# `right`.
|
167
|
+
# @param atol [Float]
|
168
|
+
# Absolute tolerance for inexact checking.
|
169
|
+
# @param categorical_as_str [Boolean]
|
170
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
171
|
+
# compare columns that do not share the same string cache.
|
172
|
+
#
|
173
|
+
# @return [nil]
|
174
|
+
def assert_series_equal(
|
175
|
+
left,
|
176
|
+
right,
|
177
|
+
check_dtype: true,
|
178
|
+
check_names: true,
|
179
|
+
check_exact: false,
|
180
|
+
rtol: 1e-5,
|
181
|
+
atol: 1e-8,
|
182
|
+
categorical_as_str: false
|
183
|
+
)
|
184
|
+
if !(left.is_a?(Series) && right.is_a?(Series))
|
185
|
+
raise_assertion_error(
|
186
|
+
"inputs",
|
187
|
+
"unexpected input types",
|
188
|
+
left.class.name,
|
189
|
+
right.class.name
|
190
|
+
)
|
191
|
+
end
|
192
|
+
|
193
|
+
if left.len != right.len
|
194
|
+
raise_assertion_error("Series", "length mismatch", left.len, right.len)
|
195
|
+
end
|
196
|
+
|
197
|
+
if check_names && left.name != right.name
|
198
|
+
raise_assertion_error("Series", "name mismatch", left.name, right.name)
|
199
|
+
end
|
200
|
+
|
201
|
+
if check_dtype && left.dtype != right.dtype
|
202
|
+
raise_assertion_error("Series", "dtype mismatch", left.dtype, right.dtype)
|
203
|
+
end
|
204
|
+
|
205
|
+
_assert_series_values_equal(
|
206
|
+
left,
|
207
|
+
right,
|
208
|
+
check_exact: check_exact,
|
209
|
+
rtol: rtol,
|
210
|
+
atol: atol,
|
211
|
+
categorical_as_str: categorical_as_str
|
212
|
+
)
|
213
|
+
end
|
214
|
+
|
215
|
+
# Assert that the left and right Series are **not** equal.
|
216
|
+
#
|
217
|
+
# This function is intended for use in unit tests.
|
218
|
+
#
|
219
|
+
# @param left [Object]
|
220
|
+
# The first Series to compare.
|
221
|
+
# @param right [Object]
|
222
|
+
# The second Series to compare.
|
223
|
+
# @param check_dtype [Boolean]
|
224
|
+
# Require data types to match.
|
225
|
+
# @param check_names [Boolean]
|
226
|
+
# Require names to match.
|
227
|
+
# @param check_exact [Boolean]
|
228
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
229
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
230
|
+
# Only affects columns with a Float data type.
|
231
|
+
# @param rtol [Float]
|
232
|
+
# Relative tolerance for inexact checking, given as a fraction of the values in
|
233
|
+
# `right`.
|
234
|
+
# @param atol [Float]
|
235
|
+
# Absolute tolerance for inexact checking.
|
236
|
+
# @param categorical_as_str [Boolean]
|
237
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
238
|
+
# compare columns that do not share the same string cache.
|
239
|
+
#
|
240
|
+
# @return [nil]
|
241
|
+
def assert_series_not_equal(
|
242
|
+
left,
|
243
|
+
right,
|
244
|
+
check_dtype: true,
|
245
|
+
check_names: true,
|
246
|
+
check_exact: false,
|
247
|
+
rtol: 1e-5,
|
248
|
+
atol: 1e-8,
|
249
|
+
categorical_as_str: false
|
250
|
+
)
|
251
|
+
begin
|
252
|
+
assert_series_equal(
|
253
|
+
left,
|
254
|
+
right,
|
255
|
+
check_dtype: check_dtype,
|
256
|
+
check_names: check_names,
|
257
|
+
check_exact: check_exact,
|
258
|
+
rtol: rtol,
|
259
|
+
atol: atol,
|
260
|
+
categorical_as_str: categorical_as_str
|
261
|
+
)
|
262
|
+
rescue AssertionError
|
263
|
+
return
|
264
|
+
end
|
265
|
+
|
266
|
+
msg = "Series are equal"
|
267
|
+
raise AssertionError, msg
|
268
|
+
end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
def _assert_correct_input_type(left, right)
|
273
|
+
if left.is_a?(DataFrame) && right.is_a?(DataFrame)
|
274
|
+
return false
|
275
|
+
elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
|
276
|
+
return true
|
277
|
+
else
|
278
|
+
raise_assertion_error(
|
279
|
+
"inputs",
|
280
|
+
"unexpected input types",
|
281
|
+
left.class.name,
|
282
|
+
right.class.name
|
283
|
+
)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def _assert_frame_schema_equal(
|
288
|
+
left,
|
289
|
+
right,
|
290
|
+
check_dtype:,
|
291
|
+
check_column_order:,
|
292
|
+
objects:
|
293
|
+
)
|
294
|
+
left_schema, right_schema = left.schema, right.schema
|
295
|
+
|
296
|
+
# Fast path for equal frames
|
297
|
+
if left_schema == right_schema
|
298
|
+
return
|
299
|
+
end
|
300
|
+
|
301
|
+
# Special error message for when column names do not match
|
302
|
+
if left_schema.keys != right_schema.keys
|
303
|
+
if (left_not_right = right_schema.keys - left_schema.keys).any?
|
304
|
+
msg = "columns #{left_not_right.inspect} in left #{objects[..-1]}, but not in right"
|
305
|
+
raise AssertionError, msg
|
306
|
+
else
|
307
|
+
right_not_left = right_schema.keys - left_schema.keys
|
308
|
+
msg = "columns #{right_not_left.inspect} in right #{objects[..-1]}, but not in left"
|
309
|
+
raise AssertionError, msg
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
if check_column_order
|
314
|
+
left_columns, right_columns = left_schema.keys, right_schema.keys
|
315
|
+
if left_columns != right_columns
|
316
|
+
detail = "columns are not in the same order"
|
317
|
+
raise_assertion_error(objects, detail, left_columns, right_columns)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
if check_dtype
|
322
|
+
left_schema_dict, right_schema_dict = left_schema.to_h, right_schema.to_h
|
323
|
+
if check_column_order || left_schema_dict != right_schema_dict
|
324
|
+
detail = "dtypes do not match"
|
325
|
+
raise_assertion_error(objects, detail, left_schema_dict, right_schema_dict)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def _sort_dataframes(left, right)
|
331
|
+
by = left.columns
|
332
|
+
begin
|
333
|
+
left = left.sort(by)
|
334
|
+
right = right.sort(by)
|
335
|
+
rescue
|
336
|
+
msg = "cannot set `check_row_order: false` on frame with unsortable columns"
|
337
|
+
raise InvalidAssert, msg
|
338
|
+
end
|
339
|
+
[left, right]
|
340
|
+
end
|
341
|
+
|
342
|
+
def _assert_series_values_equal(
|
343
|
+
left,
|
344
|
+
right,
|
345
|
+
check_exact:,
|
346
|
+
rtol:,
|
347
|
+
atol:,
|
348
|
+
categorical_as_str:
|
349
|
+
)
|
350
|
+
if categorical_as_str
|
351
|
+
if left.dtype == Categorical
|
352
|
+
left = left.cast(String)
|
353
|
+
end
|
354
|
+
if right.dtype == Categorical
|
355
|
+
right = right.cast(String)
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
# Determine unequal elements
|
360
|
+
begin
|
361
|
+
unequal = left.ne_missing(right)
|
362
|
+
rescue
|
363
|
+
raise_assertion_error(
|
364
|
+
"Series",
|
365
|
+
"incompatible data types",
|
366
|
+
left.dtype,
|
367
|
+
right.dtype
|
368
|
+
)
|
369
|
+
end
|
370
|
+
|
371
|
+
# Check nested dtypes in separate function
|
372
|
+
if _comparing_nested_floats(left.dtype, right.dtype)
|
373
|
+
begin
|
374
|
+
_assert_series_nested_values_equal(
|
375
|
+
left: left.filter(unequal),
|
376
|
+
right: right.filter(unequal),
|
377
|
+
check_exact: check_exact,
|
378
|
+
rtol: rtol,
|
379
|
+
atol: atol,
|
380
|
+
categorical_as_str: categorical_as_str
|
381
|
+
)
|
382
|
+
rescue AssertionError
|
383
|
+
raise_assertion_error(
|
384
|
+
"Series",
|
385
|
+
"nested value mismatch",
|
386
|
+
left.to_a,
|
387
|
+
right.to_a
|
388
|
+
)
|
389
|
+
else
|
390
|
+
return
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# If no differences found during exact checking, we're done
|
395
|
+
if !unequal.any
|
396
|
+
return
|
397
|
+
end
|
398
|
+
|
399
|
+
# Only do inexact checking for float types
|
400
|
+
if check_exact || !left.dtype.float? || !right.dtype.float?
|
401
|
+
raise_assertion_error(
|
402
|
+
"Series", "exact value mismatch", left.to_a, right.to_a
|
403
|
+
)
|
404
|
+
end
|
405
|
+
|
406
|
+
_assert_series_null_values_match(left, right)
|
407
|
+
_assert_series_nan_values_match(left, right)
|
408
|
+
_assert_series_values_within_tolerance(
|
409
|
+
left,
|
410
|
+
right,
|
411
|
+
unequal,
|
412
|
+
rtol: rtol,
|
413
|
+
atol: atol
|
414
|
+
)
|
415
|
+
end
|
416
|
+
|
417
|
+
def _assert_series_nested_values_equal(
|
418
|
+
left,
|
419
|
+
right,
|
420
|
+
check_exact:,
|
421
|
+
rtol:,
|
422
|
+
atol:,
|
423
|
+
categorical_as_str:
|
424
|
+
)
|
425
|
+
# compare nested lists element-wise
|
426
|
+
if _comparing_lists(left.dtype, right.dtype)
|
427
|
+
left.zip(right) do |s1, s2|
|
428
|
+
if s1.nil? || s2.nil?
|
429
|
+
raise_assertion_error("Series", "nested value mismatch", s1, s2)
|
430
|
+
end
|
431
|
+
|
432
|
+
_assert_series_values_equal(
|
433
|
+
s1,
|
434
|
+
s2,
|
435
|
+
check_exact: check_exact,
|
436
|
+
rtol: rtol,
|
437
|
+
atol: atol,
|
438
|
+
categorical_as_str: categorical_as_str
|
439
|
+
)
|
440
|
+
end
|
441
|
+
|
442
|
+
# unnest structs as series and compare
|
443
|
+
else
|
444
|
+
ls, rs = left.struct.unnest, right.struct.unnest
|
445
|
+
ls.zip(rs) do |s1, s2|
|
446
|
+
_assert_series_values_equal(
|
447
|
+
s1,
|
448
|
+
s2,
|
449
|
+
check_exact: check_exact,
|
450
|
+
rtol: rtol,
|
451
|
+
atol: atol,
|
452
|
+
categorical_as_str: categorical_as_str
|
453
|
+
)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def _assert_series_null_values_match(left, right)
|
459
|
+
null_value_mismatch = left.is_null != right.is_null
|
460
|
+
if null_value_mismatch.any
|
461
|
+
raise_assertion_error(
|
462
|
+
"Series", "null value mismatch", left.to_a, right.to_a
|
463
|
+
)
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
def _assert_series_nan_values_match(left, right)
|
468
|
+
if !_comparing_floats(left.dtype, right.dtype)
|
469
|
+
return
|
470
|
+
end
|
471
|
+
nan_value_mismatch = left.is_nan != right.is_nan
|
472
|
+
if nan_value_mismatch.any
|
473
|
+
raise_assertion_error(
|
474
|
+
"Series",
|
475
|
+
"nan value mismatch",
|
476
|
+
left.to_a,
|
477
|
+
right.to_a
|
478
|
+
)
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
def _comparing_floats(left, right)
|
483
|
+
left.is_float && right.is_float
|
484
|
+
end
|
485
|
+
|
486
|
+
def _comparing_lists(left, right)
|
487
|
+
[List, Array].include?(left) && [List, Array].include?(right)
|
488
|
+
end
|
489
|
+
|
490
|
+
def _comparing_structs(left, right)
|
491
|
+
left == Struct && right == Struct
|
492
|
+
end
|
493
|
+
|
494
|
+
def _comparing_nested_floats(left, right)
|
495
|
+
if !(_comparing_lists(left, right) || _comparing_structs(left, right))
|
496
|
+
return false
|
497
|
+
end
|
498
|
+
|
499
|
+
left.float? && right.float?
|
500
|
+
end
|
501
|
+
|
502
|
+
def raise_assertion_error(objects, detail, left, right)
|
503
|
+
msg = "#{objects} are different (#{detail})\n[left]: #{left}\n[right]: #{right}"
|
504
|
+
raise AssertionError, msg
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
data/lib/polars/utils.rb
CHANGED
@@ -187,7 +187,7 @@ module Polars
|
|
187
187
|
begin
|
188
188
|
map_rb_type_to_dtype(data_type)
|
189
189
|
rescue TypeError
|
190
|
-
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
190
|
+
raise ArgumentError, "Conversion of Ruby data type #{data_type.inspect} to Polars data type not implemented."
|
191
191
|
end
|
192
192
|
end
|
193
193
|
|
@@ -286,20 +286,51 @@ module Polars
|
|
286
286
|
Dir.glob(file).any?
|
287
287
|
end
|
288
288
|
|
289
|
-
def self.
|
289
|
+
def self.parse_as_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
290
|
+
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
291
|
+
if named_inputs.any?
|
292
|
+
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
293
|
+
exprs.concat(named_exprs)
|
294
|
+
end
|
295
|
+
|
296
|
+
exprs
|
297
|
+
end
|
298
|
+
|
299
|
+
def self._parse_positional_inputs(inputs, structify: false)
|
300
|
+
inputs_iter = _parse_inputs_as_iterable(inputs)
|
301
|
+
inputs_iter.map { |e| parse_as_expression(e, structify: structify) }
|
302
|
+
end
|
303
|
+
|
304
|
+
def self._parse_inputs_as_iterable(inputs)
|
305
|
+
if inputs.empty?
|
306
|
+
return []
|
307
|
+
end
|
308
|
+
|
309
|
+
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
310
|
+
return inputs[0]
|
311
|
+
end
|
312
|
+
|
313
|
+
inputs
|
314
|
+
end
|
315
|
+
|
316
|
+
def self._parse_named_inputs(named_inputs, structify: false)
|
317
|
+
named_inputs.map do |name, input|
|
318
|
+
parse_as_expression(input, structify: structify)._alias(name.to_s)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def self.parse_as_expression(input, str_as_lit: false, list_as_lit: true, structify: false, dtype: nil)
|
290
323
|
if input.is_a?(Expr)
|
291
324
|
expr = input
|
292
325
|
elsif input.is_a?(::String) && !str_as_lit
|
293
326
|
expr = Polars.col(input)
|
294
327
|
structify = false
|
295
|
-
elsif
|
296
|
-
expr = Polars.lit(input)
|
297
|
-
structify = false
|
298
|
-
elsif input.is_a?(Array)
|
299
|
-
expr = Polars.lit(Polars::Series.new("", [input]))
|
328
|
+
elsif input.is_a?(::Array) && !list_as_lit
|
329
|
+
expr = Polars.lit(Series.new(input), dtype: dtype)
|
300
330
|
structify = false
|
301
331
|
else
|
302
|
-
|
332
|
+
expr = Polars.lit(input, dtype: dtype)
|
333
|
+
structify = false
|
303
334
|
end
|
304
335
|
|
305
336
|
if structify
|
@@ -320,5 +351,72 @@ module Polars
|
|
320
351
|
end
|
321
352
|
ambiguous
|
322
353
|
end
|
354
|
+
|
355
|
+
def self._check_arg_is_1byte(arg_name, arg, can_be_empty = false)
|
356
|
+
if arg.is_a?(::String)
|
357
|
+
arg_byte_length = arg.bytesize
|
358
|
+
if can_be_empty
|
359
|
+
if arg_byte_length > 1
|
360
|
+
raise ArgumentError, "#{arg_name} should be a single byte character or empty, but is #{arg_byte_length} bytes long."
|
361
|
+
end
|
362
|
+
elsif arg_byte_length != 1
|
363
|
+
raise ArgumentError, "#{arg_name} should be a single byte character, but is #{arg_byte_length} bytes long."
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
def self._expand_selectors(frame, *items)
|
369
|
+
items_iter = _parse_inputs_as_iterable(items)
|
370
|
+
|
371
|
+
expanded = []
|
372
|
+
items_iter.each do |item|
|
373
|
+
if is_selector(item)
|
374
|
+
selector_cols = expand_selector(frame, item)
|
375
|
+
expanded.concat(selector_cols)
|
376
|
+
else
|
377
|
+
expanded << item
|
378
|
+
end
|
379
|
+
end
|
380
|
+
expanded
|
381
|
+
end
|
382
|
+
|
383
|
+
# TODO
|
384
|
+
def self.is_selector(obj)
|
385
|
+
false
|
386
|
+
end
|
387
|
+
|
388
|
+
def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
|
389
|
+
all_predicates = _parse_positional_inputs(predicates)
|
390
|
+
|
391
|
+
if constraints.any?
|
392
|
+
constraint_predicates = _parse_constraints(constraints)
|
393
|
+
all_predicates.concat(constraint_predicates)
|
394
|
+
end
|
395
|
+
|
396
|
+
_combine_predicates(all_predicates)
|
397
|
+
end
|
398
|
+
|
399
|
+
def self._parse_constraints(constraints)
|
400
|
+
constraints.map do |name, value|
|
401
|
+
Polars.col(name).eq(value)._rbexpr
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
def self._combine_predicates(predicates)
|
406
|
+
if !predicates.any?
|
407
|
+
msg = "at least one predicate or constraint must be provided"
|
408
|
+
raise TypeError, msg
|
409
|
+
end
|
410
|
+
|
411
|
+
if predicates.length == 1
|
412
|
+
return predicates[0]
|
413
|
+
end
|
414
|
+
|
415
|
+
Plr.all_horizontal(predicates)
|
416
|
+
end
|
417
|
+
|
418
|
+
def self.parse_when_inputs(*predicates, **constraints)
|
419
|
+
parse_predicates_constraints_as_expression(*predicates, **constraints)
|
420
|
+
end
|
323
421
|
end
|
324
422
|
end
|
data/lib/polars/version.rb
CHANGED