polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +1978 -1459
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +36 -7
- data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/struct_expr.rb
CHANGED
@@ -0,0 +1,507 @@
|
|
1
|
+
module Polars
|
2
|
+
module Testing
|
3
|
+
# Assert that the left and right frame are equal.
|
4
|
+
#
|
5
|
+
# Raises a detailed `AssertionError` if the frames differ.
|
6
|
+
# This function is intended for use in unit tests.
|
7
|
+
#
|
8
|
+
# @param left [Object]
|
9
|
+
# The first DataFrame or LazyFrame to compare.
|
10
|
+
# @param right [Object]
|
11
|
+
# The second DataFrame or LazyFrame to compare.
|
12
|
+
# @param check_row_order [Boolean]
|
13
|
+
# Require row order to match.
|
14
|
+
# @param check_column_order [Boolean]
|
15
|
+
# Require column order to match.
|
16
|
+
# @param check_dtype [Boolean]
|
17
|
+
# Require data types to match.
|
18
|
+
# @param check_exact [Boolean]
|
19
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
20
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
21
|
+
# Only affects columns with a Float data type.
|
22
|
+
# @param rtol [Float]
|
23
|
+
# Relative tolerance for inexact checking. Fraction of values in `right`.
|
24
|
+
# @param atol [Float]
|
25
|
+
# Absolute tolerance for inexact checking.
|
26
|
+
# @param categorical_as_str [Boolean]
|
27
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
28
|
+
# compare columns that do not share the same string cache.
|
29
|
+
#
|
30
|
+
# @return [nil]
|
31
|
+
def assert_frame_equal(
|
32
|
+
left,
|
33
|
+
right,
|
34
|
+
check_row_order: true,
|
35
|
+
check_column_order: true,
|
36
|
+
check_dtype: true,
|
37
|
+
check_exact: false,
|
38
|
+
rtol: 1e-5,
|
39
|
+
atol: 1e-8,
|
40
|
+
categorical_as_str: false
|
41
|
+
)
|
42
|
+
lazy = _assert_correct_input_type(left, right)
|
43
|
+
objects = lazy ? "LazyFrames" : "DataFrames"
|
44
|
+
|
45
|
+
_assert_frame_schema_equal(
|
46
|
+
left,
|
47
|
+
right,
|
48
|
+
check_column_order: check_column_order,
|
49
|
+
check_dtype: check_dtype,
|
50
|
+
objects: objects,
|
51
|
+
)
|
52
|
+
|
53
|
+
if lazy
|
54
|
+
left, right = left.collect, right.collect
|
55
|
+
end
|
56
|
+
|
57
|
+
if left.height != right.height
|
58
|
+
raise_assertion_error(
|
59
|
+
objects, "number of rows does not match", left.height, right.height
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
if !check_row_order
|
64
|
+
left, right = _sort_dataframes(left, right)
|
65
|
+
end
|
66
|
+
|
67
|
+
left.columns.each do |c|
|
68
|
+
s_left, s_right = left.get_column(c), right.get_column(c)
|
69
|
+
begin
|
70
|
+
_assert_series_values_equal(
|
71
|
+
s_left,
|
72
|
+
s_right,
|
73
|
+
check_exact: check_exact,
|
74
|
+
rtol: rtol,
|
75
|
+
atol: atol,
|
76
|
+
categorical_as_str: categorical_as_str
|
77
|
+
)
|
78
|
+
rescue AssertionError
|
79
|
+
raise_assertion_error(
|
80
|
+
objects,
|
81
|
+
"value mismatch for column #{c.inspect}",
|
82
|
+
s_left.to_a,
|
83
|
+
s_right.to_a
|
84
|
+
)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Assert that the left and right frame are **not** equal.
|
90
|
+
#
|
91
|
+
# This function is intended for use in unit tests.
|
92
|
+
#
|
93
|
+
# @param left [Object]
|
94
|
+
# The first DataFrame or LazyFrame to compare.
|
95
|
+
# @param right [Object]
|
96
|
+
# The second DataFrame or LazyFrame to compare.
|
97
|
+
# @param check_row_order [Boolean]
|
98
|
+
# Require row order to match.
|
99
|
+
# @param check_column_order [Boolean]
|
100
|
+
# Require column order to match.
|
101
|
+
# @param check_dtype [Boolean]
|
102
|
+
# Require data types to match.
|
103
|
+
# @param check_exact [Boolean]
|
104
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
105
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
106
|
+
# Only affects columns with a Float data type.
|
107
|
+
# @param rtol [Float]
|
108
|
+
# Relative tolerance for inexact checking. Fraction of values in `right`.
|
109
|
+
# @param atol [Float]
|
110
|
+
# Absolute tolerance for inexact checking.
|
111
|
+
# @param categorical_as_str [Boolean]
|
112
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
113
|
+
# compare columns that do not share the same string cache.
|
114
|
+
#
|
115
|
+
# @return [nil]
|
116
|
+
def assert_frame_not_equal(
|
117
|
+
left,
|
118
|
+
right,
|
119
|
+
check_row_order: true,
|
120
|
+
check_column_order: true,
|
121
|
+
check_dtype: true,
|
122
|
+
check_exact: false,
|
123
|
+
rtol: 1e-5,
|
124
|
+
atol: 1e-8,
|
125
|
+
categorical_as_str: false
|
126
|
+
)
|
127
|
+
begin
|
128
|
+
assert_frame_equal(
|
129
|
+
left,
|
130
|
+
right,
|
131
|
+
check_column_order: check_column_order,
|
132
|
+
check_row_order: check_row_order,
|
133
|
+
check_dtype: check_dtype,
|
134
|
+
check_exact: check_exact,
|
135
|
+
rtol: rtol,
|
136
|
+
atol: atol,
|
137
|
+
categorical_as_str: categorical_as_str
|
138
|
+
)
|
139
|
+
rescue AssertionError
|
140
|
+
return
|
141
|
+
end
|
142
|
+
|
143
|
+
msg = "frames are equal"
|
144
|
+
raise AssertionError, msg
|
145
|
+
end
|
146
|
+
|
147
|
+
# Assert that the left and right Series are equal.
|
148
|
+
#
|
149
|
+
# Raises a detailed `AssertionError` if the Series differ.
|
150
|
+
# This function is intended for use in unit tests.
|
151
|
+
#
|
152
|
+
# @param left [Object]
|
153
|
+
# The first Series to compare.
|
154
|
+
# @param right [Object]
|
155
|
+
# The second Series to compare.
|
156
|
+
# @param check_dtype [Boolean]
|
157
|
+
# Require data types to match.
|
158
|
+
# @param check_names [Boolean]
|
159
|
+
# Require names to match.
|
160
|
+
# @param check_exact [Boolean]
|
161
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
162
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
163
|
+
# Only affects columns with a Float data type.
|
164
|
+
# @param rtol [Float]
|
165
|
+
# Relative tolerance for inexact checking, given as a fraction of the values in
|
166
|
+
# `right`.
|
167
|
+
# @param atol [Float]
|
168
|
+
# Absolute tolerance for inexact checking.
|
169
|
+
# @param categorical_as_str [Boolean]
|
170
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
171
|
+
# compare columns that do not share the same string cache.
|
172
|
+
#
|
173
|
+
# @return [nil]
|
174
|
+
def assert_series_equal(
|
175
|
+
left,
|
176
|
+
right,
|
177
|
+
check_dtype: true,
|
178
|
+
check_names: true,
|
179
|
+
check_exact: false,
|
180
|
+
rtol: 1e-5,
|
181
|
+
atol: 1e-8,
|
182
|
+
categorical_as_str: false
|
183
|
+
)
|
184
|
+
if !(left.is_a?(Series) && right.is_a?(Series))
|
185
|
+
raise_assertion_error(
|
186
|
+
"inputs",
|
187
|
+
"unexpected input types",
|
188
|
+
left.class.name,
|
189
|
+
right.class.name
|
190
|
+
)
|
191
|
+
end
|
192
|
+
|
193
|
+
if left.len != right.len
|
194
|
+
raise_assertion_error("Series", "length mismatch", left.len, right.len)
|
195
|
+
end
|
196
|
+
|
197
|
+
if check_names && left.name != right.name
|
198
|
+
raise_assertion_error("Series", "name mismatch", left.name, right.name)
|
199
|
+
end
|
200
|
+
|
201
|
+
if check_dtype && left.dtype != right.dtype
|
202
|
+
raise_assertion_error("Series", "dtype mismatch", left.dtype, right.dtype)
|
203
|
+
end
|
204
|
+
|
205
|
+
_assert_series_values_equal(
|
206
|
+
left,
|
207
|
+
right,
|
208
|
+
check_exact: check_exact,
|
209
|
+
rtol: rtol,
|
210
|
+
atol: atol,
|
211
|
+
categorical_as_str: categorical_as_str
|
212
|
+
)
|
213
|
+
end
|
214
|
+
|
215
|
+
# Assert that the left and right Series are **not** equal.
|
216
|
+
#
|
217
|
+
# This function is intended for use in unit tests.
|
218
|
+
#
|
219
|
+
# @param left [Object]
|
220
|
+
# The first Series to compare.
|
221
|
+
# @param right [Object]
|
222
|
+
# The second Series to compare.
|
223
|
+
# @param check_dtype [Boolean]
|
224
|
+
# Require data types to match.
|
225
|
+
# @param check_names [Boolean]
|
226
|
+
# Require names to match.
|
227
|
+
# @param check_exact [Boolean]
|
228
|
+
# Require float values to match exactly. If set to `false`, values are considered
|
229
|
+
# equal when within tolerance of each other (see `rtol` and `atol`).
|
230
|
+
# Only affects columns with a Float data type.
|
231
|
+
# @param rtol [Float]
|
232
|
+
# Relative tolerance for inexact checking, given as a fraction of the values in
|
233
|
+
# `right`.
|
234
|
+
# @param atol [Float]
|
235
|
+
# Absolute tolerance for inexact checking.
|
236
|
+
# @param categorical_as_str [Boolean]
|
237
|
+
# Cast categorical columns to string before comparing. Enabling this helps
|
238
|
+
# compare columns that do not share the same string cache.
|
239
|
+
#
|
240
|
+
# @return [nil]
|
241
|
+
def assert_series_not_equal(
|
242
|
+
left,
|
243
|
+
right,
|
244
|
+
check_dtype: true,
|
245
|
+
check_names: true,
|
246
|
+
check_exact: false,
|
247
|
+
rtol: 1e-5,
|
248
|
+
atol: 1e-8,
|
249
|
+
categorical_as_str: false
|
250
|
+
)
|
251
|
+
begin
|
252
|
+
assert_series_equal(
|
253
|
+
left,
|
254
|
+
right,
|
255
|
+
check_dtype: check_dtype,
|
256
|
+
check_names: check_names,
|
257
|
+
check_exact: check_exact,
|
258
|
+
rtol: rtol,
|
259
|
+
atol: atol,
|
260
|
+
categorical_as_str: categorical_as_str
|
261
|
+
)
|
262
|
+
rescue AssertionError
|
263
|
+
return
|
264
|
+
end
|
265
|
+
|
266
|
+
msg = "Series are equal"
|
267
|
+
raise AssertionError, msg
|
268
|
+
end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
def _assert_correct_input_type(left, right)
|
273
|
+
if left.is_a?(DataFrame) && right.is_a?(DataFrame)
|
274
|
+
return false
|
275
|
+
elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
|
276
|
+
return true
|
277
|
+
else
|
278
|
+
raise_assertion_error(
|
279
|
+
"inputs",
|
280
|
+
"unexpected input types",
|
281
|
+
left.class.name,
|
282
|
+
right.class.name
|
283
|
+
)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def _assert_frame_schema_equal(
|
288
|
+
left,
|
289
|
+
right,
|
290
|
+
check_dtype:,
|
291
|
+
check_column_order:,
|
292
|
+
objects:
|
293
|
+
)
|
294
|
+
left_schema, right_schema = left.schema, right.schema
|
295
|
+
|
296
|
+
# Fast path for equal frames
|
297
|
+
if left_schema == right_schema
|
298
|
+
return
|
299
|
+
end
|
300
|
+
|
301
|
+
# Special error message for when column names do not match
|
302
|
+
if left_schema.keys != right_schema.keys
|
303
|
+
if (left_not_right = right_schema.keys - left_schema.keys).any?
|
304
|
+
msg = "columns #{left_not_right.inspect} in left #{objects[..-1]}, but not in right"
|
305
|
+
raise AssertionError, msg
|
306
|
+
else
|
307
|
+
right_not_left = right_schema.keys - left_schema.keys
|
308
|
+
msg = "columns #{right_not_left.inspect} in right #{objects[..-1]}, but not in left"
|
309
|
+
raise AssertionError, msg
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
if check_column_order
|
314
|
+
left_columns, right_columns = left_schema.keys, right_schema.keys
|
315
|
+
if left_columns != right_columns
|
316
|
+
detail = "columns are not in the same order"
|
317
|
+
raise_assertion_error(objects, detail, left_columns, right_columns)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
if check_dtype
|
322
|
+
left_schema_dict, right_schema_dict = left_schema.to_h, right_schema.to_h
|
323
|
+
if check_column_order || left_schema_dict != right_schema_dict
|
324
|
+
detail = "dtypes do not match"
|
325
|
+
raise_assertion_error(objects, detail, left_schema_dict, right_schema_dict)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def _sort_dataframes(left, right)
|
331
|
+
by = left.columns
|
332
|
+
begin
|
333
|
+
left = left.sort(by)
|
334
|
+
right = right.sort(by)
|
335
|
+
rescue
|
336
|
+
msg = "cannot set `check_row_order: false` on frame with unsortable columns"
|
337
|
+
raise InvalidAssert, msg
|
338
|
+
end
|
339
|
+
[left, right]
|
340
|
+
end
|
341
|
+
|
342
|
+
def _assert_series_values_equal(
|
343
|
+
left,
|
344
|
+
right,
|
345
|
+
check_exact:,
|
346
|
+
rtol:,
|
347
|
+
atol:,
|
348
|
+
categorical_as_str:
|
349
|
+
)
|
350
|
+
if categorical_as_str
|
351
|
+
if left.dtype == Categorical
|
352
|
+
left = left.cast(String)
|
353
|
+
end
|
354
|
+
if right.dtype == Categorical
|
355
|
+
right = right.cast(String)
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
# Determine unequal elements
|
360
|
+
begin
|
361
|
+
unequal = left.ne_missing(right)
|
362
|
+
rescue
|
363
|
+
raise_assertion_error(
|
364
|
+
"Series",
|
365
|
+
"incompatible data types",
|
366
|
+
left.dtype,
|
367
|
+
right.dtype
|
368
|
+
)
|
369
|
+
end
|
370
|
+
|
371
|
+
# Check nested dtypes in separate function
|
372
|
+
if _comparing_nested_floats(left.dtype, right.dtype)
|
373
|
+
begin
|
374
|
+
_assert_series_nested_values_equal(
|
375
|
+
left: left.filter(unequal),
|
376
|
+
right: right.filter(unequal),
|
377
|
+
check_exact: check_exact,
|
378
|
+
rtol: rtol,
|
379
|
+
atol: atol,
|
380
|
+
categorical_as_str: categorical_as_str
|
381
|
+
)
|
382
|
+
rescue AssertionError
|
383
|
+
raise_assertion_error(
|
384
|
+
"Series",
|
385
|
+
"nested value mismatch",
|
386
|
+
left.to_a,
|
387
|
+
right.to_a
|
388
|
+
)
|
389
|
+
else
|
390
|
+
return
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# If no differences found during exact checking, we're done
|
395
|
+
if !unequal.any
|
396
|
+
return
|
397
|
+
end
|
398
|
+
|
399
|
+
# Only do inexact checking for float types
|
400
|
+
if check_exact || !left.dtype.float? || !right.dtype.float?
|
401
|
+
raise_assertion_error(
|
402
|
+
"Series", "exact value mismatch", left.to_a, right.to_a
|
403
|
+
)
|
404
|
+
end
|
405
|
+
|
406
|
+
_assert_series_null_values_match(left, right)
|
407
|
+
_assert_series_nan_values_match(left, right)
|
408
|
+
_assert_series_values_within_tolerance(
|
409
|
+
left,
|
410
|
+
right,
|
411
|
+
unequal,
|
412
|
+
rtol: rtol,
|
413
|
+
atol: atol
|
414
|
+
)
|
415
|
+
end
|
416
|
+
|
417
|
+
def _assert_series_nested_values_equal(
|
418
|
+
left,
|
419
|
+
right,
|
420
|
+
check_exact:,
|
421
|
+
rtol:,
|
422
|
+
atol:,
|
423
|
+
categorical_as_str:
|
424
|
+
)
|
425
|
+
# compare nested lists element-wise
|
426
|
+
if _comparing_lists(left.dtype, right.dtype)
|
427
|
+
left.zip(right) do |s1, s2|
|
428
|
+
if s1.nil? || s2.nil?
|
429
|
+
raise_assertion_error("Series", "nested value mismatch", s1, s2)
|
430
|
+
end
|
431
|
+
|
432
|
+
_assert_series_values_equal(
|
433
|
+
s1,
|
434
|
+
s2,
|
435
|
+
check_exact: check_exact,
|
436
|
+
rtol: rtol,
|
437
|
+
atol: atol,
|
438
|
+
categorical_as_str: categorical_as_str
|
439
|
+
)
|
440
|
+
end
|
441
|
+
|
442
|
+
# unnest structs as series and compare
|
443
|
+
else
|
444
|
+
ls, rs = left.struct.unnest, right.struct.unnest
|
445
|
+
ls.zip(rs) do |s1, s2|
|
446
|
+
_assert_series_values_equal(
|
447
|
+
s1,
|
448
|
+
s2,
|
449
|
+
check_exact: check_exact,
|
450
|
+
rtol: rtol,
|
451
|
+
atol: atol,
|
452
|
+
categorical_as_str: categorical_as_str
|
453
|
+
)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def _assert_series_null_values_match(left, right)
|
459
|
+
null_value_mismatch = left.is_null != right.is_null
|
460
|
+
if null_value_mismatch.any
|
461
|
+
raise_assertion_error(
|
462
|
+
"Series", "null value mismatch", left.to_a, right.to_a
|
463
|
+
)
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
def _assert_series_nan_values_match(left, right)
|
468
|
+
if !_comparing_floats(left.dtype, right.dtype)
|
469
|
+
return
|
470
|
+
end
|
471
|
+
nan_value_mismatch = left.is_nan != right.is_nan
|
472
|
+
if nan_value_mismatch.any
|
473
|
+
raise_assertion_error(
|
474
|
+
"Series",
|
475
|
+
"nan value mismatch",
|
476
|
+
left.to_a,
|
477
|
+
right.to_a
|
478
|
+
)
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
def _comparing_floats(left, right)
|
483
|
+
left.is_float && right.is_float
|
484
|
+
end
|
485
|
+
|
486
|
+
def _comparing_lists(left, right)
|
487
|
+
[List, Array].include?(left) && [List, Array].include?(right)
|
488
|
+
end
|
489
|
+
|
490
|
+
def _comparing_structs(left, right)
|
491
|
+
left == Struct && right == Struct
|
492
|
+
end
|
493
|
+
|
494
|
+
def _comparing_nested_floats(left, right)
|
495
|
+
if !(_comparing_lists(left, right) || _comparing_structs(left, right))
|
496
|
+
return false
|
497
|
+
end
|
498
|
+
|
499
|
+
left.float? && right.float?
|
500
|
+
end
|
501
|
+
|
502
|
+
def raise_assertion_error(objects, detail, left, right)
|
503
|
+
msg = "#{objects} are different (#{detail})\n[left]: #{left}\n[right]: #{right}"
|
504
|
+
raise AssertionError, msg
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
data/lib/polars/utils.rb
CHANGED
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
if obj.is_a?(Range)
|
28
28
|
# size only works for numeric ranges
|
29
29
|
obj.to_a.length
|
30
|
-
elsif obj.is_a?(String)
|
30
|
+
elsif obj.is_a?(::String)
|
31
31
|
nil
|
32
32
|
else
|
33
33
|
obj.length
|
@@ -116,7 +116,7 @@ module Polars
|
|
116
116
|
end
|
117
117
|
|
118
118
|
def self.selection_to_rbexpr_list(exprs)
|
119
|
-
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
119
|
+
if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
120
120
|
exprs = [exprs]
|
121
121
|
end
|
122
122
|
|
@@ -124,9 +124,9 @@ module Polars
|
|
124
124
|
end
|
125
125
|
|
126
126
|
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
127
|
-
if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
|
127
|
+
if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
|
128
128
|
col(expr)
|
129
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
129
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
130
130
|
lit(expr)
|
131
131
|
elsif expr.is_a?(Expr)
|
132
132
|
expr
|
@@ -152,7 +152,7 @@ module Polars
|
|
152
152
|
if data_type == Unknown
|
153
153
|
return include_unknown
|
154
154
|
end
|
155
|
-
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
155
|
+
data_type.is_a?(Symbol) || data_type.is_a?(::String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
156
156
|
end
|
157
157
|
|
158
158
|
def self.map_rb_type_to_dtype(ruby_dtype)
|
@@ -160,7 +160,7 @@ module Polars
|
|
160
160
|
Float64
|
161
161
|
elsif ruby_dtype == Integer
|
162
162
|
Int64
|
163
|
-
elsif ruby_dtype == String
|
163
|
+
elsif ruby_dtype == ::String
|
164
164
|
Utf8
|
165
165
|
elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
|
166
166
|
Boolean
|
@@ -187,7 +187,7 @@ module Polars
|
|
187
187
|
begin
|
188
188
|
map_rb_type_to_dtype(data_type)
|
189
189
|
rescue TypeError
|
190
|
-
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
190
|
+
raise ArgumentError, "Conversion of Ruby data type #{data_type.inspect} to Polars data type not implemented."
|
191
191
|
end
|
192
192
|
end
|
193
193
|
|
@@ -211,7 +211,7 @@ module Polars
|
|
211
211
|
projection = nil
|
212
212
|
if columns
|
213
213
|
raise Todo
|
214
|
-
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
214
|
+
# if columns.is_a?(::String) || columns.is_a?(Symbol)
|
215
215
|
# columns = [columns]
|
216
216
|
# elsif is_int_sequence(columns)
|
217
217
|
# projection = columns.to_a
|
@@ -243,11 +243,11 @@ module Polars
|
|
243
243
|
end
|
244
244
|
|
245
245
|
def self.strlike?(value)
|
246
|
-
value.is_a?(String) || value.is_a?(Symbol)
|
246
|
+
value.is_a?(::String) || value.is_a?(Symbol)
|
247
247
|
end
|
248
248
|
|
249
249
|
def self.pathlike?(value)
|
250
|
-
value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
|
250
|
+
value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
|
251
251
|
end
|
252
252
|
|
253
253
|
def self._is_iterable_of(val, eltype)
|
@@ -275,10 +275,10 @@ module Polars
|
|
275
275
|
end
|
276
276
|
|
277
277
|
def self.is_str_sequence(val, allow_str: false)
|
278
|
-
if allow_str == false && val.is_a?(String)
|
278
|
+
if allow_str == false && val.is_a?(::String)
|
279
279
|
false
|
280
280
|
else
|
281
|
-
val.is_a?(::Array) && _is_iterable_of(val, String)
|
281
|
+
val.is_a?(::Array) && _is_iterable_of(val, ::String)
|
282
282
|
end
|
283
283
|
end
|
284
284
|
|
@@ -286,20 +286,51 @@ module Polars
|
|
286
286
|
Dir.glob(file).any?
|
287
287
|
end
|
288
288
|
|
289
|
-
def self.
|
289
|
+
def self.parse_as_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
290
|
+
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
291
|
+
if named_inputs.any?
|
292
|
+
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
293
|
+
exprs.concat(named_exprs)
|
294
|
+
end
|
295
|
+
|
296
|
+
exprs
|
297
|
+
end
|
298
|
+
|
299
|
+
def self._parse_positional_inputs(inputs, structify: false)
|
300
|
+
inputs_iter = _parse_inputs_as_iterable(inputs)
|
301
|
+
inputs_iter.map { |e| parse_as_expression(e, structify: structify) }
|
302
|
+
end
|
303
|
+
|
304
|
+
def self._parse_inputs_as_iterable(inputs)
|
305
|
+
if inputs.empty?
|
306
|
+
return []
|
307
|
+
end
|
308
|
+
|
309
|
+
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
310
|
+
return inputs[0]
|
311
|
+
end
|
312
|
+
|
313
|
+
inputs
|
314
|
+
end
|
315
|
+
|
316
|
+
def self._parse_named_inputs(named_inputs, structify: false)
|
317
|
+
named_inputs.map do |name, input|
|
318
|
+
parse_as_expression(input, structify: structify)._alias(name.to_s)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def self.parse_as_expression(input, str_as_lit: false, list_as_lit: true, structify: false, dtype: nil)
|
290
323
|
if input.is_a?(Expr)
|
291
324
|
expr = input
|
292
|
-
elsif input.is_a?(String) && !str_as_lit
|
325
|
+
elsif input.is_a?(::String) && !str_as_lit
|
293
326
|
expr = Polars.col(input)
|
294
327
|
structify = false
|
295
|
-
elsif
|
296
|
-
expr = Polars.lit(input)
|
297
|
-
structify = false
|
298
|
-
elsif input.is_a?(Array)
|
299
|
-
expr = Polars.lit(Polars::Series.new("", [input]))
|
328
|
+
elsif input.is_a?(::Array) && !list_as_lit
|
329
|
+
expr = Polars.lit(Series.new(input), dtype: dtype)
|
300
330
|
structify = false
|
301
331
|
else
|
302
|
-
|
332
|
+
expr = Polars.lit(input, dtype: dtype)
|
333
|
+
structify = false
|
303
334
|
end
|
304
335
|
|
305
336
|
if structify
|
@@ -320,5 +351,18 @@ module Polars
|
|
320
351
|
end
|
321
352
|
ambiguous
|
322
353
|
end
|
354
|
+
|
355
|
+
def self._check_arg_is_1byte(arg_name, arg, can_be_empty = false)
|
356
|
+
if arg.is_a?(::String)
|
357
|
+
arg_byte_length = arg.bytesize
|
358
|
+
if can_be_empty
|
359
|
+
if arg_byte_length > 1
|
360
|
+
raise ArgumentError, "#{arg_name} should be a single byte character or empty, but is #{arg_byte_length} bytes long."
|
361
|
+
end
|
362
|
+
elsif arg_byte_length != 1
|
363
|
+
raise ArgumentError, "#{arg_name} should be a single byte character, but is #{arg_byte_length} bytes long."
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
323
367
|
end
|
324
368
|
end
|
data/lib/polars/version.rb
CHANGED