polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +1978 -1459
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +36 -7
- data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/struct_expr.rb
CHANGED
@@ -0,0 +1,507 @@
|
|
1
|
+
module Polars
|
2
|
+
module Testing
|
3
|
+
# Assert that the left and right frame are equal.
|
4
|
+
#
|
5
|
+
# Raises a detailed `AssertionError` if the frames differ.
|
6
|
+
# This function is intended for use in unit tests.
|
7
|
+
#
|
8
|
+
# @param left [Object]
|
9
|
+
# The first DataFrame or LazyFrame to compare.
|
10
|
+
# @param right [Object]
|
11
|
+
# The second DataFrame or LazyFrame to compare.
|
12
|
+
# @param check_row_order [Boolean]
|
13
|
+
# Require row order to match.
|
14
|
+
# @param check_column_order [Boolean]
|
15
|
+
# Require column order to match.
|
16
|
+
# @param check_dtype [Boolean]
|
17
|
+
# Require data types to match.
|
18
|
+
# @param check_exact [Boolean]
|
19
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
20
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
21
|
+
# Only affects columns with a Float data type.
|
22
|
+
# @param rtol [Float]
|
23
|
+
# Relative tolerance for inexact checking. Fraction of values in `right`.
|
24
|
+
# @param atol [Float]
|
25
|
+
# Absolute tolerance for inexact checking.
|
26
|
+
# @param categorical_as_str [Boolean]
|
27
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
28
|
+
# compare columns that do not share the same string cache.
|
29
|
+
#
|
30
|
+
# @return [nil]
|
31
|
+
def assert_frame_equal(
|
32
|
+
left,
|
33
|
+
right,
|
34
|
+
check_row_order: true,
|
35
|
+
check_column_order: true,
|
36
|
+
check_dtype: true,
|
37
|
+
check_exact: false,
|
38
|
+
rtol: 1e-5,
|
39
|
+
atol: 1e-8,
|
40
|
+
categorical_as_str: false
|
41
|
+
)
|
42
|
+
lazy = _assert_correct_input_type(left, right)
|
43
|
+
objects = lazy ? "LazyFrames" : "DataFrames"
|
44
|
+
|
45
|
+
_assert_frame_schema_equal(
|
46
|
+
left,
|
47
|
+
right,
|
48
|
+
check_column_order: check_column_order,
|
49
|
+
check_dtype: check_dtype,
|
50
|
+
objects: objects,
|
51
|
+
)
|
52
|
+
|
53
|
+
if lazy
|
54
|
+
left, right = left.collect, right.collect
|
55
|
+
end
|
56
|
+
|
57
|
+
if left.height != right.height
|
58
|
+
raise_assertion_error(
|
59
|
+
objects, "number of rows does not match", left.height, right.height
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
if !check_row_order
|
64
|
+
left, right = _sort_dataframes(left, right)
|
65
|
+
end
|
66
|
+
|
67
|
+
left.columns.each do |c|
|
68
|
+
s_left, s_right = left.get_column(c), right.get_column(c)
|
69
|
+
begin
|
70
|
+
_assert_series_values_equal(
|
71
|
+
s_left,
|
72
|
+
s_right,
|
73
|
+
check_exact: check_exact,
|
74
|
+
rtol: rtol,
|
75
|
+
atol: atol,
|
76
|
+
categorical_as_str: categorical_as_str
|
77
|
+
)
|
78
|
+
rescue AssertionError
|
79
|
+
raise_assertion_error(
|
80
|
+
objects,
|
81
|
+
"value mismatch for column #{c.inspect}",
|
82
|
+
s_left.to_a,
|
83
|
+
s_right.to_a
|
84
|
+
)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Assert that the left and right frame are **not** equal.
|
90
|
+
#
|
91
|
+
# This function is intended for use in unit tests.
|
92
|
+
#
|
93
|
+
# @param left [Object]
|
94
|
+
# The first DataFrame or LazyFrame to compare.
|
95
|
+
# @param right [Object]
|
96
|
+
# The second DataFrame or LazyFrame to compare.
|
97
|
+
# @param check_row_order [Boolean]
|
98
|
+
# Require row order to match.
|
99
|
+
# @param check_column_order [Boolean]
|
100
|
+
# Require column order to match.
|
101
|
+
# @param check_dtype [Boolean]
|
102
|
+
# Require data types to match.
|
103
|
+
# @param check_exact [Boolean]
|
104
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
105
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
106
|
+
# Only affects columns with a Float data type.
|
107
|
+
# @param rtol [Float]
|
108
|
+
# Relative tolerance for inexact checking. Fraction of values in `right`.
|
109
|
+
# @param atol [Float]
|
110
|
+
# Absolute tolerance for inexact checking.
|
111
|
+
# @param categorical_as_str [Boolean]
|
112
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
113
|
+
# compare columns that do not share the same string cache.
|
114
|
+
#
|
115
|
+
# @return [nil]
|
116
|
+
def assert_frame_not_equal(
|
117
|
+
left,
|
118
|
+
right,
|
119
|
+
check_row_order: true,
|
120
|
+
check_column_order: true,
|
121
|
+
check_dtype: true,
|
122
|
+
check_exact: false,
|
123
|
+
rtol: 1e-5,
|
124
|
+
atol: 1e-8,
|
125
|
+
categorical_as_str: false
|
126
|
+
)
|
127
|
+
begin
|
128
|
+
assert_frame_equal(
|
129
|
+
left,
|
130
|
+
right,
|
131
|
+
check_column_order: check_column_order,
|
132
|
+
check_row_order: check_row_order,
|
133
|
+
check_dtype: check_dtype,
|
134
|
+
check_exact: check_exact,
|
135
|
+
rtol: rtol,
|
136
|
+
atol: atol,
|
137
|
+
categorical_as_str: categorical_as_str
|
138
|
+
)
|
139
|
+
rescue AssertionError
|
140
|
+
return
|
141
|
+
end
|
142
|
+
|
143
|
+
msg = "frames are equal"
|
144
|
+
raise AssertionError, msg
|
145
|
+
end
|
146
|
+
|
147
|
+
# Assert that the left and right Series are equal.
|
148
|
+
#
|
149
|
+
# Raises a detailed `AssertionError` if the Series differ.
|
150
|
+
# This function is intended for use in unit tests.
|
151
|
+
#
|
152
|
+
# @param left [Object]
|
153
|
+
# The first Series to compare.
|
154
|
+
# @param right [Object]
|
155
|
+
# The second Series to compare.
|
156
|
+
# @param check_dtype [Boolean]
|
157
|
+
# Require data types to match.
|
158
|
+
# @param check_names [Boolean]
|
159
|
+
# Require names to match.
|
160
|
+
# @param check_exact [Boolean]
|
161
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
162
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
163
|
+
# Only affects columns with a Float data type.
|
164
|
+
# @param rtol [Float]
|
165
|
+
# Relative tolerance for inexact checking, given as a fraction of the values in
|
166
|
+
# `right`.
|
167
|
+
# @param atol [Float]
|
168
|
+
# Absolute tolerance for inexact checking.
|
169
|
+
# @param categorical_as_str [Boolean]
|
170
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
171
|
+
# compare columns that do not share the same string cache.
|
172
|
+
#
|
173
|
+
# @return [nil]
|
174
|
+
def assert_series_equal(
|
175
|
+
left,
|
176
|
+
right,
|
177
|
+
check_dtype: true,
|
178
|
+
check_names: true,
|
179
|
+
check_exact: false,
|
180
|
+
rtol: 1e-5,
|
181
|
+
atol: 1e-8,
|
182
|
+
categorical_as_str: false
|
183
|
+
)
|
184
|
+
if !(left.is_a?(Series) && right.is_a?(Series))
|
185
|
+
raise_assertion_error(
|
186
|
+
"inputs",
|
187
|
+
"unexpected input types",
|
188
|
+
left.class.name,
|
189
|
+
right.class.name
|
190
|
+
)
|
191
|
+
end
|
192
|
+
|
193
|
+
if left.len != right.len
|
194
|
+
raise_assertion_error("Series", "length mismatch", left.len, right.len)
|
195
|
+
end
|
196
|
+
|
197
|
+
if check_names && left.name != right.name
|
198
|
+
raise_assertion_error("Series", "name mismatch", left.name, right.name)
|
199
|
+
end
|
200
|
+
|
201
|
+
if check_dtype && left.dtype != right.dtype
|
202
|
+
raise_assertion_error("Series", "dtype mismatch", left.dtype, right.dtype)
|
203
|
+
end
|
204
|
+
|
205
|
+
_assert_series_values_equal(
|
206
|
+
left,
|
207
|
+
right,
|
208
|
+
check_exact: check_exact,
|
209
|
+
rtol: rtol,
|
210
|
+
atol: atol,
|
211
|
+
categorical_as_str: categorical_as_str
|
212
|
+
)
|
213
|
+
end
|
214
|
+
|
215
|
+
# Assert that the left and right Series are **not** equal.
|
216
|
+
#
|
217
|
+
# This function is intended for use in unit tests.
|
218
|
+
#
|
219
|
+
# @param left [Object]
|
220
|
+
# The first Series to compare.
|
221
|
+
# @param right [Object]
|
222
|
+
# The second Series to compare.
|
223
|
+
# @param check_dtype [Boolean]
|
224
|
+
# Require data types to match.
|
225
|
+
# @param check_names [Boolean]
|
226
|
+
# Require names to match.
|
227
|
+
# @param check_exact [Boolean]
|
228
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
229
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
230
|
+
# Only affects columns with a Float data type.
|
231
|
+
# @param rtol [Float]
|
232
|
+
# Relative tolerance for inexact checking, given as a fraction of the values in
|
233
|
+
# `right`.
|
234
|
+
# @param atol [Float]
|
235
|
+
# Absolute tolerance for inexact checking.
|
236
|
+
# @param categorical_as_str [Boolean]
|
237
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
238
|
+
# compare columns that do not share the same string cache.
|
239
|
+
#
|
240
|
+
# @return [nil]
|
241
|
+
def assert_series_not_equal(
|
242
|
+
left,
|
243
|
+
right,
|
244
|
+
check_dtype: true,
|
245
|
+
check_names: true,
|
246
|
+
check_exact: false,
|
247
|
+
rtol: 1e-5,
|
248
|
+
atol: 1e-8,
|
249
|
+
categorical_as_str: false
|
250
|
+
)
|
251
|
+
begin
|
252
|
+
assert_series_equal(
|
253
|
+
left,
|
254
|
+
right,
|
255
|
+
check_dtype: check_dtype,
|
256
|
+
check_names: check_names,
|
257
|
+
check_exact: check_exact,
|
258
|
+
rtol: rtol,
|
259
|
+
atol: atol,
|
260
|
+
categorical_as_str: categorical_as_str
|
261
|
+
)
|
262
|
+
rescue AssertionError
|
263
|
+
return
|
264
|
+
end
|
265
|
+
|
266
|
+
msg = "Series are equal"
|
267
|
+
raise AssertionError, msg
|
268
|
+
end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
def _assert_correct_input_type(left, right)
|
273
|
+
if left.is_a?(DataFrame) && right.is_a?(DataFrame)
|
274
|
+
return false
|
275
|
+
elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
|
276
|
+
return true
|
277
|
+
else
|
278
|
+
raise_assertion_error(
|
279
|
+
"inputs",
|
280
|
+
"unexpected input types",
|
281
|
+
left.class.name,
|
282
|
+
right.class.name
|
283
|
+
)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def _assert_frame_schema_equal(
|
288
|
+
left,
|
289
|
+
right,
|
290
|
+
check_dtype:,
|
291
|
+
check_column_order:,
|
292
|
+
objects:
|
293
|
+
)
|
294
|
+
left_schema, right_schema = left.schema, right.schema
|
295
|
+
|
296
|
+
# Fast path for equal frames
|
297
|
+
if left_schema == right_schema
|
298
|
+
return
|
299
|
+
end
|
300
|
+
|
301
|
+
# Special error message for when column names do not match
|
302
|
+
if left_schema.keys != right_schema.keys
|
303
|
+
if (left_not_right = right_schema.keys - left_schema.keys).any?
|
304
|
+
msg = "columns #{left_not_right.inspect} in left #{objects[..-1]}, but not in right"
|
305
|
+
raise AssertionError, msg
|
306
|
+
else
|
307
|
+
right_not_left = right_schema.keys - left_schema.keys
|
308
|
+
msg = "columns #{right_not_left.inspect} in right #{objects[..-1]}, but not in left"
|
309
|
+
raise AssertionError, msg
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
if check_column_order
|
314
|
+
left_columns, right_columns = left_schema.keys, right_schema.keys
|
315
|
+
if left_columns != right_columns
|
316
|
+
detail = "columns are not in the same order"
|
317
|
+
raise_assertion_error(objects, detail, left_columns, right_columns)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
if check_dtype
|
322
|
+
left_schema_dict, right_schema_dict = left_schema.to_h, right_schema.to_h
|
323
|
+
if check_column_order || left_schema_dict != right_schema_dict
|
324
|
+
detail = "dtypes do not match"
|
325
|
+
raise_assertion_error(objects, detail, left_schema_dict, right_schema_dict)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def _sort_dataframes(left, right)
|
331
|
+
by = left.columns
|
332
|
+
begin
|
333
|
+
left = left.sort(by)
|
334
|
+
right = right.sort(by)
|
335
|
+
rescue
|
336
|
+
msg = "cannot set `check_row_order: false` on frame with unsortable columns"
|
337
|
+
raise InvalidAssert, msg
|
338
|
+
end
|
339
|
+
[left, right]
|
340
|
+
end
|
341
|
+
|
342
|
+
def _assert_series_values_equal(
|
343
|
+
left,
|
344
|
+
right,
|
345
|
+
check_exact:,
|
346
|
+
rtol:,
|
347
|
+
atol:,
|
348
|
+
categorical_as_str:
|
349
|
+
)
|
350
|
+
if categorical_as_str
|
351
|
+
if left.dtype == Categorical
|
352
|
+
left = left.cast(String)
|
353
|
+
end
|
354
|
+
if right.dtype == Categorical
|
355
|
+
right = right.cast(String)
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
# Determine unequal elements
|
360
|
+
begin
|
361
|
+
unequal = left.ne_missing(right)
|
362
|
+
rescue
|
363
|
+
raise_assertion_error(
|
364
|
+
"Series",
|
365
|
+
"incompatible data types",
|
366
|
+
left.dtype,
|
367
|
+
right.dtype
|
368
|
+
)
|
369
|
+
end
|
370
|
+
|
371
|
+
# Check nested dtypes in separate function
|
372
|
+
if _comparing_nested_floats(left.dtype, right.dtype)
|
373
|
+
begin
|
374
|
+
_assert_series_nested_values_equal(
|
375
|
+
left: left.filter(unequal),
|
376
|
+
right: right.filter(unequal),
|
377
|
+
check_exact: check_exact,
|
378
|
+
rtol: rtol,
|
379
|
+
atol: atol,
|
380
|
+
categorical_as_str: categorical_as_str
|
381
|
+
)
|
382
|
+
rescue AssertionError
|
383
|
+
raise_assertion_error(
|
384
|
+
"Series",
|
385
|
+
"nested value mismatch",
|
386
|
+
left.to_a,
|
387
|
+
right.to_a
|
388
|
+
)
|
389
|
+
else
|
390
|
+
return
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# If no differences found during exact checking, we're done
|
395
|
+
if !unequal.any
|
396
|
+
return
|
397
|
+
end
|
398
|
+
|
399
|
+
# Only do inexact checking for float types
|
400
|
+
if check_exact || !left.dtype.float? || !right.dtype.float?
|
401
|
+
raise_assertion_error(
|
402
|
+
"Series", "exact value mismatch", left.to_a, right.to_a
|
403
|
+
)
|
404
|
+
end
|
405
|
+
|
406
|
+
_assert_series_null_values_match(left, right)
|
407
|
+
_assert_series_nan_values_match(left, right)
|
408
|
+
_assert_series_values_within_tolerance(
|
409
|
+
left,
|
410
|
+
right,
|
411
|
+
unequal,
|
412
|
+
rtol: rtol,
|
413
|
+
atol: atol
|
414
|
+
)
|
415
|
+
end
|
416
|
+
|
417
|
+
def _assert_series_nested_values_equal(
|
418
|
+
left,
|
419
|
+
right,
|
420
|
+
check_exact:,
|
421
|
+
rtol:,
|
422
|
+
atol:,
|
423
|
+
categorical_as_str:
|
424
|
+
)
|
425
|
+
# compare nested lists element-wise
|
426
|
+
if _comparing_lists(left.dtype, right.dtype)
|
427
|
+
left.zip(right) do |s1, s2|
|
428
|
+
if s1.nil? || s2.nil?
|
429
|
+
raise_assertion_error("Series", "nested value mismatch", s1, s2)
|
430
|
+
end
|
431
|
+
|
432
|
+
_assert_series_values_equal(
|
433
|
+
s1,
|
434
|
+
s2,
|
435
|
+
check_exact: check_exact,
|
436
|
+
rtol: rtol,
|
437
|
+
atol: atol,
|
438
|
+
categorical_as_str: categorical_as_str
|
439
|
+
)
|
440
|
+
end
|
441
|
+
|
442
|
+
# unnest structs as series and compare
|
443
|
+
else
|
444
|
+
ls, rs = left.struct.unnest, right.struct.unnest
|
445
|
+
ls.zip(rs) do |s1, s2|
|
446
|
+
_assert_series_values_equal(
|
447
|
+
s1,
|
448
|
+
s2,
|
449
|
+
check_exact: check_exact,
|
450
|
+
rtol: rtol,
|
451
|
+
atol: atol,
|
452
|
+
categorical_as_str: categorical_as_str
|
453
|
+
)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def _assert_series_null_values_match(left, right)
|
459
|
+
null_value_mismatch = left.is_null != right.is_null
|
460
|
+
if null_value_mismatch.any
|
461
|
+
raise_assertion_error(
|
462
|
+
"Series", "null value mismatch", left.to_a, right.to_a
|
463
|
+
)
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
def _assert_series_nan_values_match(left, right)
|
468
|
+
if !_comparing_floats(left.dtype, right.dtype)
|
469
|
+
return
|
470
|
+
end
|
471
|
+
nan_value_mismatch = left.is_nan != right.is_nan
|
472
|
+
if nan_value_mismatch.any
|
473
|
+
raise_assertion_error(
|
474
|
+
"Series",
|
475
|
+
"nan value mismatch",
|
476
|
+
left.to_a,
|
477
|
+
right.to_a
|
478
|
+
)
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
def _comparing_floats(left, right)
|
483
|
+
left.is_float && right.is_float
|
484
|
+
end
|
485
|
+
|
486
|
+
def _comparing_lists(left, right)
|
487
|
+
[List, Array].include?(left) && [List, Array].include?(right)
|
488
|
+
end
|
489
|
+
|
490
|
+
def _comparing_structs(left, right)
|
491
|
+
left == Struct && right == Struct
|
492
|
+
end
|
493
|
+
|
494
|
+
def _comparing_nested_floats(left, right)
|
495
|
+
if !(_comparing_lists(left, right) || _comparing_structs(left, right))
|
496
|
+
return false
|
497
|
+
end
|
498
|
+
|
499
|
+
left.float? && right.float?
|
500
|
+
end
|
501
|
+
|
502
|
+
def raise_assertion_error(objects, detail, left, right)
|
503
|
+
msg = "#{objects} are different (#{detail})\n[left]: #{left}\n[right]: #{right}"
|
504
|
+
raise AssertionError, msg
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
data/lib/polars/utils.rb
CHANGED
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
if obj.is_a?(Range)
|
28
28
|
# size only works for numeric ranges
|
29
29
|
obj.to_a.length
|
30
|
-
elsif obj.is_a?(String)
|
30
|
+
elsif obj.is_a?(::String)
|
31
31
|
nil
|
32
32
|
else
|
33
33
|
obj.length
|
@@ -116,7 +116,7 @@ module Polars
|
|
116
116
|
end
|
117
117
|
|
118
118
|
def self.selection_to_rbexpr_list(exprs)
|
119
|
-
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
119
|
+
if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
120
120
|
exprs = [exprs]
|
121
121
|
end
|
122
122
|
|
@@ -124,9 +124,9 @@ module Polars
|
|
124
124
|
end
|
125
125
|
|
126
126
|
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
127
|
-
if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
|
127
|
+
if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
|
128
128
|
col(expr)
|
129
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
129
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
130
130
|
lit(expr)
|
131
131
|
elsif expr.is_a?(Expr)
|
132
132
|
expr
|
@@ -152,7 +152,7 @@ module Polars
|
|
152
152
|
if data_type == Unknown
|
153
153
|
return include_unknown
|
154
154
|
end
|
155
|
-
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
155
|
+
data_type.is_a?(Symbol) || data_type.is_a?(::String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
156
156
|
end
|
157
157
|
|
158
158
|
def self.map_rb_type_to_dtype(ruby_dtype)
|
@@ -160,7 +160,7 @@ module Polars
|
|
160
160
|
Float64
|
161
161
|
elsif ruby_dtype == Integer
|
162
162
|
Int64
|
163
|
-
elsif ruby_dtype == String
|
163
|
+
elsif ruby_dtype == ::String
|
164
164
|
Utf8
|
165
165
|
elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
|
166
166
|
Boolean
|
@@ -187,7 +187,7 @@ module Polars
|
|
187
187
|
begin
|
188
188
|
map_rb_type_to_dtype(data_type)
|
189
189
|
rescue TypeError
|
190
|
-
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
190
|
+
raise ArgumentError, "Conversion of Ruby data type #{data_type.inspect} to Polars data type not implemented."
|
191
191
|
end
|
192
192
|
end
|
193
193
|
|
@@ -211,7 +211,7 @@ module Polars
|
|
211
211
|
projection = nil
|
212
212
|
if columns
|
213
213
|
raise Todo
|
214
|
-
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
214
|
+
# if columns.is_a?(::String) || columns.is_a?(Symbol)
|
215
215
|
# columns = [columns]
|
216
216
|
# elsif is_int_sequence(columns)
|
217
217
|
# projection = columns.to_a
|
@@ -243,11 +243,11 @@ module Polars
|
|
243
243
|
end
|
244
244
|
|
245
245
|
def self.strlike?(value)
|
246
|
-
value.is_a?(String) || value.is_a?(Symbol)
|
246
|
+
value.is_a?(::String) || value.is_a?(Symbol)
|
247
247
|
end
|
248
248
|
|
249
249
|
def self.pathlike?(value)
|
250
|
-
value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
|
250
|
+
value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
|
251
251
|
end
|
252
252
|
|
253
253
|
def self._is_iterable_of(val, eltype)
|
@@ -275,10 +275,10 @@ module Polars
|
|
275
275
|
end
|
276
276
|
|
277
277
|
def self.is_str_sequence(val, allow_str: false)
|
278
|
-
if allow_str == false && val.is_a?(String)
|
278
|
+
if allow_str == false && val.is_a?(::String)
|
279
279
|
false
|
280
280
|
else
|
281
|
-
val.is_a?(::Array) && _is_iterable_of(val, String)
|
281
|
+
val.is_a?(::Array) && _is_iterable_of(val, ::String)
|
282
282
|
end
|
283
283
|
end
|
284
284
|
|
@@ -286,20 +286,51 @@ module Polars
|
|
286
286
|
Dir.glob(file).any?
|
287
287
|
end
|
288
288
|
|
289
|
-
def self.
|
289
|
+
def self.parse_as_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
290
|
+
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
291
|
+
if named_inputs.any?
|
292
|
+
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
293
|
+
exprs.concat(named_exprs)
|
294
|
+
end
|
295
|
+
|
296
|
+
exprs
|
297
|
+
end
|
298
|
+
|
299
|
+
def self._parse_positional_inputs(inputs, structify: false)
|
300
|
+
inputs_iter = _parse_inputs_as_iterable(inputs)
|
301
|
+
inputs_iter.map { |e| parse_as_expression(e, structify: structify) }
|
302
|
+
end
|
303
|
+
|
304
|
+
def self._parse_inputs_as_iterable(inputs)
|
305
|
+
if inputs.empty?
|
306
|
+
return []
|
307
|
+
end
|
308
|
+
|
309
|
+
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
310
|
+
return inputs[0]
|
311
|
+
end
|
312
|
+
|
313
|
+
inputs
|
314
|
+
end
|
315
|
+
|
316
|
+
def self._parse_named_inputs(named_inputs, structify: false)
|
317
|
+
named_inputs.map do |name, input|
|
318
|
+
parse_as_expression(input, structify: structify)._alias(name.to_s)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def self.parse_as_expression(input, str_as_lit: false, list_as_lit: true, structify: false, dtype: nil)
|
290
323
|
if input.is_a?(Expr)
|
291
324
|
expr = input
|
292
|
-
elsif input.is_a?(String) && !str_as_lit
|
325
|
+
elsif input.is_a?(::String) && !str_as_lit
|
293
326
|
expr = Polars.col(input)
|
294
327
|
structify = false
|
295
|
-
elsif
|
296
|
-
expr = Polars.lit(input)
|
297
|
-
structify = false
|
298
|
-
elsif input.is_a?(Array)
|
299
|
-
expr = Polars.lit(Polars::Series.new("", [input]))
|
328
|
+
elsif input.is_a?(::Array) && !list_as_lit
|
329
|
+
expr = Polars.lit(Series.new(input), dtype: dtype)
|
300
330
|
structify = false
|
301
331
|
else
|
302
|
-
|
332
|
+
expr = Polars.lit(input, dtype: dtype)
|
333
|
+
structify = false
|
303
334
|
end
|
304
335
|
|
305
336
|
if structify
|
@@ -320,5 +351,18 @@ module Polars
|
|
320
351
|
end
|
321
352
|
ambiguous
|
322
353
|
end
|
354
|
+
|
355
|
+
def self._check_arg_is_1byte(arg_name, arg, can_be_empty = false)
|
356
|
+
if arg.is_a?(::String)
|
357
|
+
arg_byte_length = arg.bytesize
|
358
|
+
if can_be_empty
|
359
|
+
if arg_byte_length > 1
|
360
|
+
raise ArgumentError, "#{arg_name} should be a single byte character or empty, but is #{arg_byte_length} bytes long."
|
361
|
+
end
|
362
|
+
elsif arg_byte_length != 1
|
363
|
+
raise ArgumentError, "#{arg_name} should be a single byte character, but is #{arg_byte_length} bytes long."
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
323
367
|
end
|
324
368
|
end
|
data/lib/polars/version.rb
CHANGED