polars-df 0.3.1-x86_64-linux → 0.5.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/LICENSE-THIRD-PARTY.txt +7353 -8473
- data/README.md +31 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +4 -2
data/lib/polars/list_expr.rb
CHANGED
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
# # │ 1 │
|
28
28
|
# # └─────┘
|
29
29
|
def lengths
|
30
|
-
Utils.wrap_expr(_rbexpr.
|
30
|
+
Utils.wrap_expr(_rbexpr.list_lengths)
|
31
31
|
end
|
32
32
|
|
33
33
|
# Sum all the lists in the array.
|
@@ -48,7 +48,7 @@ module Polars
|
|
48
48
|
# # │ 5 │
|
49
49
|
# # └────────┘
|
50
50
|
def sum
|
51
|
-
Utils.wrap_expr(_rbexpr.
|
51
|
+
Utils.wrap_expr(_rbexpr.list_sum)
|
52
52
|
end
|
53
53
|
|
54
54
|
# Compute the max value of the lists in the array.
|
@@ -69,7 +69,7 @@ module Polars
|
|
69
69
|
# # │ 3 │
|
70
70
|
# # └────────┘
|
71
71
|
def max
|
72
|
-
Utils.wrap_expr(_rbexpr.
|
72
|
+
Utils.wrap_expr(_rbexpr.list_max)
|
73
73
|
end
|
74
74
|
|
75
75
|
# Compute the min value of the lists in the array.
|
@@ -90,7 +90,7 @@ module Polars
|
|
90
90
|
# # │ 2 │
|
91
91
|
# # └────────┘
|
92
92
|
def min
|
93
|
-
Utils.wrap_expr(_rbexpr.
|
93
|
+
Utils.wrap_expr(_rbexpr.list_min)
|
94
94
|
end
|
95
95
|
|
96
96
|
# Compute the mean value of the lists in the array.
|
@@ -111,7 +111,7 @@ module Polars
|
|
111
111
|
# # │ 2.5 │
|
112
112
|
# # └────────┘
|
113
113
|
def mean
|
114
|
-
Utils.wrap_expr(_rbexpr.
|
114
|
+
Utils.wrap_expr(_rbexpr.list_mean)
|
115
115
|
end
|
116
116
|
|
117
117
|
# Sort the arrays in the list.
|
@@ -136,7 +136,7 @@ module Polars
|
|
136
136
|
# # │ [1, 2, 9] │
|
137
137
|
# # └───────────┘
|
138
138
|
def sort(reverse: false)
|
139
|
-
Utils.wrap_expr(_rbexpr.
|
139
|
+
Utils.wrap_expr(_rbexpr.list_sort(reverse))
|
140
140
|
end
|
141
141
|
|
142
142
|
# Reverse the arrays in the list.
|
@@ -161,7 +161,7 @@ module Polars
|
|
161
161
|
# # │ [2, 1, 9] │
|
162
162
|
# # └───────────┘
|
163
163
|
def reverse
|
164
|
-
Utils.wrap_expr(_rbexpr.
|
164
|
+
Utils.wrap_expr(_rbexpr.list_reverse)
|
165
165
|
end
|
166
166
|
|
167
167
|
# Get the unique/distinct values in the list.
|
@@ -184,8 +184,8 @@ module Polars
|
|
184
184
|
# # ╞═══════════╡
|
185
185
|
# # │ [1, 2] │
|
186
186
|
# # └───────────┘
|
187
|
-
def unique
|
188
|
-
Utils.wrap_expr(_rbexpr.
|
187
|
+
def unique(maintain_order: false)
|
188
|
+
Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
|
189
189
|
end
|
190
190
|
|
191
191
|
# Concat the arrays in a Series dtype List in linear time.
|
@@ -255,7 +255,7 @@ module Polars
|
|
255
255
|
# # └──────┘
|
256
256
|
def get(index)
|
257
257
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
258
|
-
Utils.wrap_expr(_rbexpr.
|
258
|
+
Utils.wrap_expr(_rbexpr.list_get(index))
|
259
259
|
end
|
260
260
|
|
261
261
|
# Get the value by index in the sublists.
|
@@ -265,6 +265,28 @@ module Polars
|
|
265
265
|
get(item)
|
266
266
|
end
|
267
267
|
|
268
|
+
# Take sublists by multiple indices.
|
269
|
+
#
|
270
|
+
# The indices may be defined in a single column, or by sublists in another
|
271
|
+
# column of dtype `List`.
|
272
|
+
#
|
273
|
+
# @param index [Object]
|
274
|
+
# Indices to return per sublist
|
275
|
+
# @param null_on_oob [Boolean]
|
276
|
+
# Behavior if an index is out of bounds:
|
277
|
+
# True -> set as null
|
278
|
+
# False -> raise an error
|
279
|
+
# Note that defaulting to raising an error is much cheaper
|
280
|
+
#
|
281
|
+
# @return [Expr]
|
282
|
+
def take(index, null_on_oob: false)
|
283
|
+
if index.is_a?(Array)
|
284
|
+
index = Series.new(index)
|
285
|
+
end
|
286
|
+
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
287
|
+
Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
|
288
|
+
end
|
289
|
+
|
268
290
|
# Get the first value of the sublists.
|
269
291
|
#
|
270
292
|
# @return [Expr]
|
@@ -331,7 +353,7 @@ module Polars
|
|
331
353
|
# # │ true │
|
332
354
|
# # └───────┘
|
333
355
|
def contains(item)
|
334
|
-
Utils.wrap_expr(_rbexpr.
|
356
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
|
335
357
|
end
|
336
358
|
|
337
359
|
# Join all string items in a sublist and place a separator between them.
|
@@ -357,7 +379,7 @@ module Polars
|
|
357
379
|
# # │ x y │
|
358
380
|
# # └───────┘
|
359
381
|
def join(separator)
|
360
|
-
Utils.wrap_expr(_rbexpr.
|
382
|
+
Utils.wrap_expr(_rbexpr.list_join(separator))
|
361
383
|
end
|
362
384
|
|
363
385
|
# Retrieve the index of the minimal value in every sublist.
|
@@ -382,7 +404,7 @@ module Polars
|
|
382
404
|
# # │ 1 │
|
383
405
|
# # └─────┘
|
384
406
|
def arg_min
|
385
|
-
Utils.wrap_expr(_rbexpr.
|
407
|
+
Utils.wrap_expr(_rbexpr.list_arg_min)
|
386
408
|
end
|
387
409
|
|
388
410
|
# Retrieve the index of the maximum value in every sublist.
|
@@ -407,7 +429,7 @@ module Polars
|
|
407
429
|
# # │ 0 │
|
408
430
|
# # └─────┘
|
409
431
|
def arg_max
|
410
|
-
Utils.wrap_expr(_rbexpr.
|
432
|
+
Utils.wrap_expr(_rbexpr.list_arg_max)
|
411
433
|
end
|
412
434
|
|
413
435
|
# Calculate the n-th discrete difference of every sublist.
|
@@ -426,11 +448,11 @@ module Polars
|
|
426
448
|
# # shape: (2,)
|
427
449
|
# # Series: 'a' [list[i64]]
|
428
450
|
# # [
|
429
|
-
# # [null, 1,
|
451
|
+
# # [null, 1, … 1]
|
430
452
|
# # [null, -8, -1]
|
431
453
|
# # ]
|
432
454
|
def diff(n: 1, null_behavior: "ignore")
|
433
|
-
Utils.wrap_expr(_rbexpr.
|
455
|
+
Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
|
434
456
|
end
|
435
457
|
|
436
458
|
# Shift values by the given period.
|
@@ -447,11 +469,11 @@ module Polars
|
|
447
469
|
# # shape: (2,)
|
448
470
|
# # Series: 'a' [list[i64]]
|
449
471
|
# # [
|
450
|
-
# # [null, 1,
|
472
|
+
# # [null, 1, … 3]
|
451
473
|
# # [null, 10, 2]
|
452
474
|
# # ]
|
453
475
|
def shift(periods = 1)
|
454
|
-
Utils.wrap_expr(_rbexpr.
|
476
|
+
Utils.wrap_expr(_rbexpr.list_shift(periods))
|
455
477
|
end
|
456
478
|
|
457
479
|
# Slice every sublist.
|
@@ -477,7 +499,7 @@ module Polars
|
|
477
499
|
def slice(offset, length = nil)
|
478
500
|
offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
|
479
501
|
length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
|
480
|
-
Utils.wrap_expr(_rbexpr.
|
502
|
+
Utils.wrap_expr(_rbexpr.list_slice(offset, length))
|
481
503
|
end
|
482
504
|
|
483
505
|
# Slice the first `n` values of every sublist.
|
@@ -523,6 +545,33 @@ module Polars
|
|
523
545
|
slice(offset, n)
|
524
546
|
end
|
525
547
|
|
548
|
+
# Count how often the value produced by ``element`` occurs.
|
549
|
+
#
|
550
|
+
# @param element [Expr]
|
551
|
+
# An expression that produces a single value
|
552
|
+
#
|
553
|
+
# @return [Expr]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
557
|
+
# df.select(Polars.col("listcol").arr.count_match(2).alias("number_of_twos"))
|
558
|
+
# # =>
|
559
|
+
# # shape: (5, 1)
|
560
|
+
# # ┌────────────────┐
|
561
|
+
# # │ number_of_twos │
|
562
|
+
# # │ --- │
|
563
|
+
# # │ u32 │
|
564
|
+
# # ╞════════════════╡
|
565
|
+
# # │ 0 │
|
566
|
+
# # │ 0 │
|
567
|
+
# # │ 2 │
|
568
|
+
# # │ 1 │
|
569
|
+
# # │ 0 │
|
570
|
+
# # └────────────────┘
|
571
|
+
def count_match(element)
|
572
|
+
Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
|
573
|
+
end
|
574
|
+
|
526
575
|
# Convert the series of type `List` to a series of type `Struct`.
|
527
576
|
#
|
528
577
|
# @param n_field_strategy ["first_non_null", "max_width"]
|
@@ -548,7 +597,7 @@ module Polars
|
|
548
597
|
# # └────────────┘
|
549
598
|
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
550
599
|
raise Todo if name_generator
|
551
|
-
Utils.wrap_expr(_rbexpr.
|
600
|
+
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
|
552
601
|
end
|
553
602
|
|
554
603
|
# Run any polars expression against the lists' elements.
|
@@ -582,7 +631,7 @@ module Polars
|
|
582
631
|
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
583
632
|
# # └─────┴─────┴────────────┘
|
584
633
|
def eval(expr, parallel: false)
|
585
|
-
|
634
|
+
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
|
586
635
|
end
|
587
636
|
end
|
588
637
|
end
|
@@ -185,7 +185,7 @@ module Polars
|
|
185
185
|
# # shape: (2,)
|
186
186
|
# # Series: 'a' [list[i64]]
|
187
187
|
# # [
|
188
|
-
# # [null, 1,
|
188
|
+
# # [null, 1, … 1]
|
189
189
|
# # [null, -8, -1]
|
190
190
|
# # ]
|
191
191
|
def diff(n: 1, null_behavior: "ignore")
|
@@ -206,7 +206,7 @@ module Polars
|
|
206
206
|
# # shape: (2,)
|
207
207
|
# # Series: 'a' [list[i64]]
|
208
208
|
# # [
|
209
|
-
# # [null, 1,
|
209
|
+
# # [null, 1, … 3]
|
210
210
|
# # [null, 10, 2]
|
211
211
|
# # ]
|
212
212
|
def shift(periods = 1)
|
data/lib/polars/series.rb
CHANGED
@@ -67,6 +67,12 @@ module Polars
|
|
67
67
|
._s
|
68
68
|
elsif values.is_a?(Array)
|
69
69
|
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
70
|
+
elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
|
71
|
+
self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
|
72
|
+
|
73
|
+
if !dtype.nil?
|
74
|
+
self._s = self.cast(dtype, strict: true)._s
|
75
|
+
end
|
70
76
|
else
|
71
77
|
raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
|
72
78
|
end
|
@@ -90,10 +96,14 @@ module Polars
|
|
90
96
|
#
|
91
97
|
# @return [Hash]
|
92
98
|
def flags
|
93
|
-
{
|
99
|
+
out = {
|
94
100
|
"SORTED_ASC" => _s.is_sorted_flag,
|
95
101
|
"SORTED_DESC" => _s.is_sorted_reverse_flag
|
96
102
|
}
|
103
|
+
if dtype.is_a?(List)
|
104
|
+
out["FAST_EXPLODE"] = _s.can_fast_explode_flag
|
105
|
+
end
|
106
|
+
out
|
97
107
|
end
|
98
108
|
|
99
109
|
# Get the inner dtype in of a List typed Series.
|
@@ -222,14 +232,28 @@ module Polars
|
|
222
232
|
#
|
223
233
|
# @return [Series]
|
224
234
|
def *(other)
|
225
|
-
|
235
|
+
if is_temporal
|
236
|
+
raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
|
237
|
+
elsif other.is_a?(DataFrame)
|
238
|
+
other * self
|
239
|
+
else
|
240
|
+
_arithmetic(other, :mul)
|
241
|
+
end
|
226
242
|
end
|
227
243
|
|
228
244
|
# Performs division.
|
229
245
|
#
|
230
246
|
# @return [Series]
|
231
247
|
def /(other)
|
232
|
-
|
248
|
+
if is_temporal
|
249
|
+
raise ArgumentError, "first cast to integer before dividing datelike dtypes"
|
250
|
+
end
|
251
|
+
|
252
|
+
if is_float
|
253
|
+
return _arithmetic(other, :div)
|
254
|
+
end
|
255
|
+
|
256
|
+
cast(Float64) / other
|
233
257
|
end
|
234
258
|
|
235
259
|
# Returns the modulo.
|
@@ -252,6 +276,16 @@ module Polars
|
|
252
276
|
to_frame.select(Polars.col(name).pow(power)).to_series
|
253
277
|
end
|
254
278
|
|
279
|
+
# Performs boolean not.
|
280
|
+
#
|
281
|
+
# @return [Series]
|
282
|
+
def !
|
283
|
+
if dtype == Boolean
|
284
|
+
return Utils.wrap_s(_s.not)
|
285
|
+
end
|
286
|
+
raise NotImplementedError
|
287
|
+
end
|
288
|
+
|
255
289
|
# Performs negation.
|
256
290
|
#
|
257
291
|
# @return [Series]
|
@@ -278,6 +312,10 @@ module Polars
|
|
278
312
|
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
279
313
|
end
|
280
314
|
|
315
|
+
if item.is_a?(Series) && item.bool?
|
316
|
+
return filter(item)
|
317
|
+
end
|
318
|
+
|
281
319
|
if item.is_a?(Integer)
|
282
320
|
return _s.get_idx(item)
|
283
321
|
end
|
@@ -369,16 +407,26 @@ module Polars
|
|
369
407
|
# Check if any boolean value in the column is `true`.
|
370
408
|
#
|
371
409
|
# @return [Boolean]
|
372
|
-
def any
|
373
|
-
|
410
|
+
def any?(&block)
|
411
|
+
if block_given?
|
412
|
+
apply(&block).any?
|
413
|
+
else
|
414
|
+
to_frame.select(Polars.col(name).any).to_series[0]
|
415
|
+
end
|
374
416
|
end
|
417
|
+
alias_method :any, :any?
|
375
418
|
|
376
419
|
# Check if all boolean values in the column are `true`.
|
377
420
|
#
|
378
421
|
# @return [Boolean]
|
379
|
-
def all
|
380
|
-
|
422
|
+
def all?(&block)
|
423
|
+
if block_given?
|
424
|
+
apply(&block).all?
|
425
|
+
else
|
426
|
+
to_frame.select(Polars.col(name).all).to_series[0]
|
427
|
+
end
|
381
428
|
end
|
429
|
+
alias_method :all, :all?
|
382
430
|
|
383
431
|
# Compute the logarithm to a given base.
|
384
432
|
#
|
@@ -1314,6 +1362,7 @@ module Polars
|
|
1314
1362
|
def unique(maintain_order: false)
|
1315
1363
|
super
|
1316
1364
|
end
|
1365
|
+
alias_method :uniq, :unique
|
1317
1366
|
|
1318
1367
|
# Take values by index.
|
1319
1368
|
#
|
@@ -1535,6 +1584,7 @@ module Polars
|
|
1535
1584
|
def is_in(other)
|
1536
1585
|
super
|
1537
1586
|
end
|
1587
|
+
alias_method :in?, :is_in
|
1538
1588
|
|
1539
1589
|
# Get index values where Boolean Series evaluate `true`.
|
1540
1590
|
#
|
@@ -1660,6 +1710,7 @@ module Polars
|
|
1660
1710
|
end
|
1661
1711
|
alias_method :count, :len
|
1662
1712
|
alias_method :length, :len
|
1713
|
+
alias_method :size, :len
|
1663
1714
|
|
1664
1715
|
# Cast between data types.
|
1665
1716
|
#
|
@@ -1779,6 +1830,8 @@ module Polars
|
|
1779
1830
|
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
1780
1831
|
end
|
1781
1832
|
alias_method :datelike?, :is_datelike
|
1833
|
+
alias_method :is_temporal, :is_datelike
|
1834
|
+
alias_method :temporal?, :is_datelike
|
1782
1835
|
|
1783
1836
|
# Check if this Series has floating point numbers.
|
1784
1837
|
#
|
@@ -2432,6 +2485,7 @@ module Polars
|
|
2432
2485
|
end
|
2433
2486
|
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2434
2487
|
end
|
2488
|
+
alias_method :map, :apply
|
2435
2489
|
|
2436
2490
|
# Shift the values by a given period.
|
2437
2491
|
#
|
@@ -3483,7 +3537,7 @@ module Polars
|
|
3483
3537
|
# # 99
|
3484
3538
|
# # ]
|
3485
3539
|
def extend_constant(value, n)
|
3486
|
-
|
3540
|
+
Utils.wrap_s(_s.extend_constant(value, n))
|
3487
3541
|
end
|
3488
3542
|
|
3489
3543
|
# Flags the Series as sorted.
|
@@ -3531,6 +3585,13 @@ module Polars
|
|
3531
3585
|
ListNameSpace.new(self)
|
3532
3586
|
end
|
3533
3587
|
|
3588
|
+
# Create an object namespace of all binary related methods.
|
3589
|
+
#
|
3590
|
+
# @return [BinaryNameSpace]
|
3591
|
+
def bin
|
3592
|
+
BinaryNameSpace.new(self)
|
3593
|
+
end
|
3594
|
+
|
3534
3595
|
# Create an object namespace of all categorical related methods.
|
3535
3596
|
#
|
3536
3597
|
# @return [CatNameSpace]
|
@@ -3630,14 +3691,39 @@ module Polars
|
|
3630
3691
|
end
|
3631
3692
|
|
3632
3693
|
def _comp(other, op)
|
3694
|
+
if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
|
3695
|
+
if (other == true && op == :eq) || (other == false && op == :neq)
|
3696
|
+
return clone
|
3697
|
+
elsif (other == false && op == :eq) || (other == true && op == :neq)
|
3698
|
+
return !self
|
3699
|
+
end
|
3700
|
+
end
|
3701
|
+
|
3702
|
+
if other.is_a?(::Time) && dtype.is_a?(Datetime)
|
3703
|
+
ts = Utils._datetime_to_pl_timestamp(other, time_unit)
|
3704
|
+
f = ffi_func("#{op}_<>", Int64, _s)
|
3705
|
+
fail if f.nil?
|
3706
|
+
return Utils.wrap_s(f.call(ts))
|
3707
|
+
elsif other.is_a?(::Date) && dtype == Date
|
3708
|
+
d = Utils._date_to_pl_date(other)
|
3709
|
+
f = ffi_func("#{op}_<>", Int32, _s)
|
3710
|
+
fail if f.nil?
|
3711
|
+
return Utils.wrap_s(f.call(d))
|
3712
|
+
end
|
3713
|
+
|
3633
3714
|
if other.is_a?(Series)
|
3634
3715
|
return Utils.wrap_s(_s.send(op, other._s))
|
3635
3716
|
end
|
3636
3717
|
|
3637
|
-
|
3638
|
-
|
3718
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3719
|
+
if f.nil?
|
3720
|
+
raise NotImplementedError
|
3639
3721
|
end
|
3640
|
-
Utils.wrap_s(
|
3722
|
+
Utils.wrap_s(f.call(other))
|
3723
|
+
end
|
3724
|
+
|
3725
|
+
def ffi_func(name, dtype, _s)
|
3726
|
+
_s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
|
3641
3727
|
end
|
3642
3728
|
|
3643
3729
|
def _arithmetic(other, op)
|
@@ -3648,14 +3734,16 @@ module Polars
|
|
3648
3734
|
return Utils.wrap_s(_s.send(op, other._s))
|
3649
3735
|
end
|
3650
3736
|
|
3651
|
-
if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
|
3652
|
-
|
3653
|
-
|
3654
|
-
if other.is_a?(Float) && !is_float
|
3655
|
-
raise Todo
|
3737
|
+
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
|
3738
|
+
_s2 = sequence_to_rbseries(name, [other])
|
3739
|
+
return Utils.wrap_s(_s.send(op, _s2))
|
3656
3740
|
end
|
3657
3741
|
|
3658
|
-
|
3742
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3743
|
+
if f.nil?
|
3744
|
+
raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
|
3745
|
+
end
|
3746
|
+
Utils.wrap_s(f.call(other))
|
3659
3747
|
end
|
3660
3748
|
|
3661
3749
|
DTYPE_TO_FFINAME = {
|
@@ -3688,25 +3776,57 @@ module Polars
|
|
3688
3776
|
values._s
|
3689
3777
|
end
|
3690
3778
|
|
3779
|
+
def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
|
3780
|
+
# not needed yet
|
3781
|
+
# if !values.contiguous?
|
3782
|
+
# end
|
3783
|
+
|
3784
|
+
if values.shape.length == 1
|
3785
|
+
values, dtype = numo_values_and_dtype(values)
|
3786
|
+
strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
|
3787
|
+
if dtype == Numo::RObject
|
3788
|
+
sequence_to_rbseries(name, values.to_a, strict: strict)
|
3789
|
+
else
|
3790
|
+
constructor = numo_type_to_constructor(dtype)
|
3791
|
+
# TODO improve performance
|
3792
|
+
constructor.call(name, values.to_a, strict)
|
3793
|
+
end
|
3794
|
+
elsif values.shape.length == 2
|
3795
|
+
raise Todo
|
3796
|
+
else
|
3797
|
+
raise Todo
|
3798
|
+
end
|
3799
|
+
end
|
3800
|
+
|
3801
|
+
def numo_values_and_dtype(values)
|
3802
|
+
[values, values.class]
|
3803
|
+
end
|
3804
|
+
|
3805
|
+
def numo_type_to_constructor(dtype)
|
3806
|
+
{
|
3807
|
+
Numo::Float32 => RbSeries.method(:new_opt_f32),
|
3808
|
+
Numo::Float64 => RbSeries.method(:new_opt_f64),
|
3809
|
+
Numo::Int8 => RbSeries.method(:new_opt_i8),
|
3810
|
+
Numo::Int16 => RbSeries.method(:new_opt_i16),
|
3811
|
+
Numo::Int32 => RbSeries.method(:new_opt_i32),
|
3812
|
+
Numo::Int64 => RbSeries.method(:new_opt_i64),
|
3813
|
+
Numo::UInt8 => RbSeries.method(:new_opt_u8),
|
3814
|
+
Numo::UInt16 => RbSeries.method(:new_opt_u16),
|
3815
|
+
Numo::UInt32 => RbSeries.method(:new_opt_u32),
|
3816
|
+
Numo::UInt64 => RbSeries.method(:new_opt_u64)
|
3817
|
+
}.fetch(dtype)
|
3818
|
+
rescue KeyError
|
3819
|
+
RbSeries.method(:new_object)
|
3820
|
+
end
|
3821
|
+
|
3691
3822
|
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
3692
3823
|
ruby_dtype = nil
|
3693
|
-
nested_dtype = nil
|
3694
3824
|
|
3695
3825
|
if (values.nil? || values.empty?) && dtype.nil?
|
3696
|
-
|
3697
|
-
# if dtype for empty sequence could be guessed
|
3698
|
-
# (e.g comparisons between self and other)
|
3699
|
-
dtype = dtype_if_empty
|
3700
|
-
else
|
3701
|
-
# default to Float32 type
|
3702
|
-
dtype = :f32
|
3703
|
-
end
|
3826
|
+
dtype = dtype_if_empty || Float32
|
3704
3827
|
end
|
3705
3828
|
|
3706
|
-
rb_temporal_types = []
|
3707
|
-
rb_temporal_types << ::Date if defined?(::Date)
|
3708
|
-
rb_temporal_types << ::DateTime if defined?(::DateTime)
|
3709
|
-
rb_temporal_types << ::Time if defined?(::Time)
|
3829
|
+
rb_temporal_types = [::Date, ::DateTime, ::Time]
|
3710
3830
|
|
3711
3831
|
value = _get_first_non_none(values)
|
3712
3832
|
if !value.nil?
|
@@ -3731,58 +3851,54 @@ module Polars
|
|
3731
3851
|
|
3732
3852
|
# temporal branch
|
3733
3853
|
if rb_temporal_types.include?(ruby_dtype)
|
3734
|
-
|
3735
|
-
|
3736
|
-
|
3737
|
-
|
3738
|
-
# end
|
3739
|
-
|
3740
|
-
if ruby_dtype == ::Date
|
3741
|
-
RbSeries.new_opt_date(name, values, strict)
|
3742
|
-
elsif ruby_dtype == ::Time
|
3743
|
-
RbSeries.new_opt_datetime(name, values, strict)
|
3744
|
-
elsif ruby_dtype == ::DateTime
|
3745
|
-
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
3746
|
-
else
|
3747
|
-
raise Todo
|
3748
|
-
end
|
3749
|
-
elsif ruby_dtype == Array
|
3750
|
-
if nested_dtype.nil?
|
3751
|
-
nested_value = _get_first_non_none(value)
|
3752
|
-
nested_dtype = nested_value.nil? ? Float : nested_value.class
|
3854
|
+
if dtype.nil?
|
3855
|
+
dtype = Utils.rb_type_to_dtype(ruby_dtype)
|
3856
|
+
elsif rb_temporal_types.include?(dtype)
|
3857
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
3753
3858
|
end
|
3859
|
+
# TODO
|
3860
|
+
time_unit = nil
|
3754
3861
|
|
3755
|
-
|
3756
|
-
|
3862
|
+
rb_series = RbSeries.new_from_anyvalues(name, values, strict)
|
3863
|
+
if time_unit.nil?
|
3864
|
+
s = Utils.wrap_s(rb_series)
|
3865
|
+
else
|
3866
|
+
s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
|
3757
3867
|
end
|
3758
|
-
|
3759
|
-
|
3760
|
-
|
3761
|
-
|
3762
|
-
|
3763
|
-
|
3764
|
-
|
3765
|
-
|
3766
|
-
|
3767
|
-
|
3768
|
-
|
3868
|
+
return s._s
|
3869
|
+
elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
|
3870
|
+
raise Todo
|
3871
|
+
elsif ruby_dtype == Array
|
3872
|
+
return sequence_from_anyvalue_or_object(name, values)
|
3873
|
+
elsif ruby_dtype == Series
|
3874
|
+
return RbSeries.new_series_list(name, values.map(&:_s), strict)
|
3875
|
+
elsif ruby_dtype == RbSeries
|
3876
|
+
return RbSeries.new_series_list(name, values, strict)
|
3877
|
+
else
|
3878
|
+
constructor =
|
3879
|
+
if value.is_a?(String)
|
3880
|
+
if value.encoding == Encoding::UTF_8
|
3881
|
+
RbSeries.method(:new_str)
|
3882
|
+
else
|
3883
|
+
RbSeries.method(:new_binary)
|
3769
3884
|
end
|
3885
|
+
elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
|
3886
|
+
# TODO improve performance
|
3887
|
+
RbSeries.method(:new_opt_f64)
|
3888
|
+
else
|
3889
|
+
rb_type_to_constructor(value.class)
|
3770
3890
|
end
|
3771
|
-
if equal_to_inner
|
3772
|
-
dtype = Utils.rb_type_to_dtype(nested_dtype)
|
3773
|
-
# TODO rescue and fallback to new_object
|
3774
|
-
return RbSeries.new_list(name, values, dtype)
|
3775
|
-
end
|
3776
|
-
end
|
3777
|
-
|
3778
|
-
RbSeries.new_object(name, values, strict)
|
3779
|
-
else
|
3780
|
-
constructor = rb_type_to_constructor(value.class)
|
3781
3891
|
constructor.call(name, values, strict)
|
3782
3892
|
end
|
3783
3893
|
end
|
3784
3894
|
end
|
3785
3895
|
|
3896
|
+
def sequence_from_anyvalue_or_object(name, values)
|
3897
|
+
RbSeries.new_from_anyvalues(name, values, true)
|
3898
|
+
rescue
|
3899
|
+
RbSeries.new_object(name, values, false)
|
3900
|
+
end
|
3901
|
+
|
3786
3902
|
POLARS_TYPE_TO_CONSTRUCTOR = {
|
3787
3903
|
Float32 => RbSeries.method(:new_opt_f32),
|
3788
3904
|
Float64 => RbSeries.method(:new_opt_f64),
|
@@ -3795,7 +3911,8 @@ module Polars
|
|
3795
3911
|
UInt32 => RbSeries.method(:new_opt_u32),
|
3796
3912
|
UInt64 => RbSeries.method(:new_opt_u64),
|
3797
3913
|
Boolean => RbSeries.method(:new_opt_bool),
|
3798
|
-
Utf8 => RbSeries.method(:new_str)
|
3914
|
+
Utf8 => RbSeries.method(:new_str),
|
3915
|
+
Binary => RbSeries.method(:new_binary)
|
3799
3916
|
}
|
3800
3917
|
|
3801
3918
|
SYM_TYPE_TO_CONSTRUCTOR = {
|
@@ -3826,7 +3943,6 @@ module Polars
|
|
3826
3943
|
RB_TYPE_TO_CONSTRUCTOR = {
|
3827
3944
|
Float => RbSeries.method(:new_opt_f64),
|
3828
3945
|
Integer => RbSeries.method(:new_opt_i64),
|
3829
|
-
String => RbSeries.method(:new_str),
|
3830
3946
|
TrueClass => RbSeries.method(:new_opt_bool),
|
3831
3947
|
FalseClass => RbSeries.method(:new_opt_bool)
|
3832
3948
|
}
|