polars-df 0.3.1-x86_64-linux → 0.5.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/LICENSE-THIRD-PARTY.txt +7353 -8473
- data/README.md +31 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +4 -2
data/lib/polars/list_expr.rb
CHANGED
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
# # │ 1 │
|
28
28
|
# # └─────┘
|
29
29
|
def lengths
|
30
|
-
Utils.wrap_expr(_rbexpr.
|
30
|
+
Utils.wrap_expr(_rbexpr.list_lengths)
|
31
31
|
end
|
32
32
|
|
33
33
|
# Sum all the lists in the array.
|
@@ -48,7 +48,7 @@ module Polars
|
|
48
48
|
# # │ 5 │
|
49
49
|
# # └────────┘
|
50
50
|
def sum
|
51
|
-
Utils.wrap_expr(_rbexpr.
|
51
|
+
Utils.wrap_expr(_rbexpr.list_sum)
|
52
52
|
end
|
53
53
|
|
54
54
|
# Compute the max value of the lists in the array.
|
@@ -69,7 +69,7 @@ module Polars
|
|
69
69
|
# # │ 3 │
|
70
70
|
# # └────────┘
|
71
71
|
def max
|
72
|
-
Utils.wrap_expr(_rbexpr.
|
72
|
+
Utils.wrap_expr(_rbexpr.list_max)
|
73
73
|
end
|
74
74
|
|
75
75
|
# Compute the min value of the lists in the array.
|
@@ -90,7 +90,7 @@ module Polars
|
|
90
90
|
# # │ 2 │
|
91
91
|
# # └────────┘
|
92
92
|
def min
|
93
|
-
Utils.wrap_expr(_rbexpr.
|
93
|
+
Utils.wrap_expr(_rbexpr.list_min)
|
94
94
|
end
|
95
95
|
|
96
96
|
# Compute the mean value of the lists in the array.
|
@@ -111,7 +111,7 @@ module Polars
|
|
111
111
|
# # │ 2.5 │
|
112
112
|
# # └────────┘
|
113
113
|
def mean
|
114
|
-
Utils.wrap_expr(_rbexpr.
|
114
|
+
Utils.wrap_expr(_rbexpr.list_mean)
|
115
115
|
end
|
116
116
|
|
117
117
|
# Sort the arrays in the list.
|
@@ -136,7 +136,7 @@ module Polars
|
|
136
136
|
# # │ [1, 2, 9] │
|
137
137
|
# # └───────────┘
|
138
138
|
def sort(reverse: false)
|
139
|
-
Utils.wrap_expr(_rbexpr.
|
139
|
+
Utils.wrap_expr(_rbexpr.list_sort(reverse))
|
140
140
|
end
|
141
141
|
|
142
142
|
# Reverse the arrays in the list.
|
@@ -161,7 +161,7 @@ module Polars
|
|
161
161
|
# # │ [2, 1, 9] │
|
162
162
|
# # └───────────┘
|
163
163
|
def reverse
|
164
|
-
Utils.wrap_expr(_rbexpr.
|
164
|
+
Utils.wrap_expr(_rbexpr.list_reverse)
|
165
165
|
end
|
166
166
|
|
167
167
|
# Get the unique/distinct values in the list.
|
@@ -184,8 +184,8 @@ module Polars
|
|
184
184
|
# # ╞═══════════╡
|
185
185
|
# # │ [1, 2] │
|
186
186
|
# # └───────────┘
|
187
|
-
def unique
|
188
|
-
Utils.wrap_expr(_rbexpr.
|
187
|
+
def unique(maintain_order: false)
|
188
|
+
Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
|
189
189
|
end
|
190
190
|
|
191
191
|
# Concat the arrays in a Series dtype List in linear time.
|
@@ -255,7 +255,7 @@ module Polars
|
|
255
255
|
# # └──────┘
|
256
256
|
def get(index)
|
257
257
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
258
|
-
Utils.wrap_expr(_rbexpr.
|
258
|
+
Utils.wrap_expr(_rbexpr.list_get(index))
|
259
259
|
end
|
260
260
|
|
261
261
|
# Get the value by index in the sublists.
|
@@ -265,6 +265,28 @@ module Polars
|
|
265
265
|
get(item)
|
266
266
|
end
|
267
267
|
|
268
|
+
# Take sublists by multiple indices.
|
269
|
+
#
|
270
|
+
# The indices may be defined in a single column, or by sublists in another
|
271
|
+
# column of dtype `List`.
|
272
|
+
#
|
273
|
+
# @param index [Object]
|
274
|
+
# Indices to return per sublist
|
275
|
+
# @param null_on_oob [Boolean]
|
276
|
+
# Behavior if an index is out of bounds:
|
277
|
+
# True -> set as null
|
278
|
+
# False -> raise an error
|
279
|
+
# Note that defaulting to raising an error is much cheaper
|
280
|
+
#
|
281
|
+
# @return [Expr]
|
282
|
+
def take(index, null_on_oob: false)
|
283
|
+
if index.is_a?(Array)
|
284
|
+
index = Series.new(index)
|
285
|
+
end
|
286
|
+
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
287
|
+
Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
|
288
|
+
end
|
289
|
+
|
268
290
|
# Get the first value of the sublists.
|
269
291
|
#
|
270
292
|
# @return [Expr]
|
@@ -331,7 +353,7 @@ module Polars
|
|
331
353
|
# # │ true │
|
332
354
|
# # └───────┘
|
333
355
|
def contains(item)
|
334
|
-
Utils.wrap_expr(_rbexpr.
|
356
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
|
335
357
|
end
|
336
358
|
|
337
359
|
# Join all string items in a sublist and place a separator between them.
|
@@ -357,7 +379,7 @@ module Polars
|
|
357
379
|
# # │ x y │
|
358
380
|
# # └───────┘
|
359
381
|
def join(separator)
|
360
|
-
Utils.wrap_expr(_rbexpr.
|
382
|
+
Utils.wrap_expr(_rbexpr.list_join(separator))
|
361
383
|
end
|
362
384
|
|
363
385
|
# Retrieve the index of the minimal value in every sublist.
|
@@ -382,7 +404,7 @@ module Polars
|
|
382
404
|
# # │ 1 │
|
383
405
|
# # └─────┘
|
384
406
|
def arg_min
|
385
|
-
Utils.wrap_expr(_rbexpr.
|
407
|
+
Utils.wrap_expr(_rbexpr.list_arg_min)
|
386
408
|
end
|
387
409
|
|
388
410
|
# Retrieve the index of the maximum value in every sublist.
|
@@ -407,7 +429,7 @@ module Polars
|
|
407
429
|
# # │ 0 │
|
408
430
|
# # └─────┘
|
409
431
|
def arg_max
|
410
|
-
Utils.wrap_expr(_rbexpr.
|
432
|
+
Utils.wrap_expr(_rbexpr.list_arg_max)
|
411
433
|
end
|
412
434
|
|
413
435
|
# Calculate the n-th discrete difference of every sublist.
|
@@ -426,11 +448,11 @@ module Polars
|
|
426
448
|
# # shape: (2,)
|
427
449
|
# # Series: 'a' [list[i64]]
|
428
450
|
# # [
|
429
|
-
# # [null, 1,
|
451
|
+
# # [null, 1, … 1]
|
430
452
|
# # [null, -8, -1]
|
431
453
|
# # ]
|
432
454
|
def diff(n: 1, null_behavior: "ignore")
|
433
|
-
Utils.wrap_expr(_rbexpr.
|
455
|
+
Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
|
434
456
|
end
|
435
457
|
|
436
458
|
# Shift values by the given period.
|
@@ -447,11 +469,11 @@ module Polars
|
|
447
469
|
# # shape: (2,)
|
448
470
|
# # Series: 'a' [list[i64]]
|
449
471
|
# # [
|
450
|
-
# # [null, 1,
|
472
|
+
# # [null, 1, … 3]
|
451
473
|
# # [null, 10, 2]
|
452
474
|
# # ]
|
453
475
|
def shift(periods = 1)
|
454
|
-
Utils.wrap_expr(_rbexpr.
|
476
|
+
Utils.wrap_expr(_rbexpr.list_shift(periods))
|
455
477
|
end
|
456
478
|
|
457
479
|
# Slice every sublist.
|
@@ -477,7 +499,7 @@ module Polars
|
|
477
499
|
def slice(offset, length = nil)
|
478
500
|
offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
|
479
501
|
length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
|
480
|
-
Utils.wrap_expr(_rbexpr.
|
502
|
+
Utils.wrap_expr(_rbexpr.list_slice(offset, length))
|
481
503
|
end
|
482
504
|
|
483
505
|
# Slice the first `n` values of every sublist.
|
@@ -523,6 +545,33 @@ module Polars
|
|
523
545
|
slice(offset, n)
|
524
546
|
end
|
525
547
|
|
548
|
+
# Count how often the value produced by ``element`` occurs.
|
549
|
+
#
|
550
|
+
# @param element [Expr]
|
551
|
+
# An expression that produces a single value
|
552
|
+
#
|
553
|
+
# @return [Expr]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
557
|
+
# df.select(Polars.col("listcol").arr.count_match(2).alias("number_of_twos"))
|
558
|
+
# # =>
|
559
|
+
# # shape: (5, 1)
|
560
|
+
# # ┌────────────────┐
|
561
|
+
# # │ number_of_twos │
|
562
|
+
# # │ --- │
|
563
|
+
# # │ u32 │
|
564
|
+
# # ╞════════════════╡
|
565
|
+
# # │ 0 │
|
566
|
+
# # │ 0 │
|
567
|
+
# # │ 2 │
|
568
|
+
# # │ 1 │
|
569
|
+
# # │ 0 │
|
570
|
+
# # └────────────────┘
|
571
|
+
def count_match(element)
|
572
|
+
Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
|
573
|
+
end
|
574
|
+
|
526
575
|
# Convert the series of type `List` to a series of type `Struct`.
|
527
576
|
#
|
528
577
|
# @param n_field_strategy ["first_non_null", "max_width"]
|
@@ -548,7 +597,7 @@ module Polars
|
|
548
597
|
# # └────────────┘
|
549
598
|
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
550
599
|
raise Todo if name_generator
|
551
|
-
Utils.wrap_expr(_rbexpr.
|
600
|
+
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
|
552
601
|
end
|
553
602
|
|
554
603
|
# Run any polars expression against the lists' elements.
|
@@ -582,7 +631,7 @@ module Polars
|
|
582
631
|
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
583
632
|
# # └─────┴─────┴────────────┘
|
584
633
|
def eval(expr, parallel: false)
|
585
|
-
|
634
|
+
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
|
586
635
|
end
|
587
636
|
end
|
588
637
|
end
|
@@ -185,7 +185,7 @@ module Polars
|
|
185
185
|
# # shape: (2,)
|
186
186
|
# # Series: 'a' [list[i64]]
|
187
187
|
# # [
|
188
|
-
# # [null, 1,
|
188
|
+
# # [null, 1, … 1]
|
189
189
|
# # [null, -8, -1]
|
190
190
|
# # ]
|
191
191
|
def diff(n: 1, null_behavior: "ignore")
|
@@ -206,7 +206,7 @@ module Polars
|
|
206
206
|
# # shape: (2,)
|
207
207
|
# # Series: 'a' [list[i64]]
|
208
208
|
# # [
|
209
|
-
# # [null, 1,
|
209
|
+
# # [null, 1, … 3]
|
210
210
|
# # [null, 10, 2]
|
211
211
|
# # ]
|
212
212
|
def shift(periods = 1)
|
data/lib/polars/series.rb
CHANGED
@@ -67,6 +67,12 @@ module Polars
|
|
67
67
|
._s
|
68
68
|
elsif values.is_a?(Array)
|
69
69
|
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
70
|
+
elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
|
71
|
+
self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
|
72
|
+
|
73
|
+
if !dtype.nil?
|
74
|
+
self._s = self.cast(dtype, strict: true)._s
|
75
|
+
end
|
70
76
|
else
|
71
77
|
raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
|
72
78
|
end
|
@@ -90,10 +96,14 @@ module Polars
|
|
90
96
|
#
|
91
97
|
# @return [Hash]
|
92
98
|
def flags
|
93
|
-
{
|
99
|
+
out = {
|
94
100
|
"SORTED_ASC" => _s.is_sorted_flag,
|
95
101
|
"SORTED_DESC" => _s.is_sorted_reverse_flag
|
96
102
|
}
|
103
|
+
if dtype.is_a?(List)
|
104
|
+
out["FAST_EXPLODE"] = _s.can_fast_explode_flag
|
105
|
+
end
|
106
|
+
out
|
97
107
|
end
|
98
108
|
|
99
109
|
# Get the inner dtype in of a List typed Series.
|
@@ -222,14 +232,28 @@ module Polars
|
|
222
232
|
#
|
223
233
|
# @return [Series]
|
224
234
|
def *(other)
|
225
|
-
|
235
|
+
if is_temporal
|
236
|
+
raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
|
237
|
+
elsif other.is_a?(DataFrame)
|
238
|
+
other * self
|
239
|
+
else
|
240
|
+
_arithmetic(other, :mul)
|
241
|
+
end
|
226
242
|
end
|
227
243
|
|
228
244
|
# Performs division.
|
229
245
|
#
|
230
246
|
# @return [Series]
|
231
247
|
def /(other)
|
232
|
-
|
248
|
+
if is_temporal
|
249
|
+
raise ArgumentError, "first cast to integer before dividing datelike dtypes"
|
250
|
+
end
|
251
|
+
|
252
|
+
if is_float
|
253
|
+
return _arithmetic(other, :div)
|
254
|
+
end
|
255
|
+
|
256
|
+
cast(Float64) / other
|
233
257
|
end
|
234
258
|
|
235
259
|
# Returns the modulo.
|
@@ -252,6 +276,16 @@ module Polars
|
|
252
276
|
to_frame.select(Polars.col(name).pow(power)).to_series
|
253
277
|
end
|
254
278
|
|
279
|
+
# Performs boolean not.
|
280
|
+
#
|
281
|
+
# @return [Series]
|
282
|
+
def !
|
283
|
+
if dtype == Boolean
|
284
|
+
return Utils.wrap_s(_s.not)
|
285
|
+
end
|
286
|
+
raise NotImplementedError
|
287
|
+
end
|
288
|
+
|
255
289
|
# Performs negation.
|
256
290
|
#
|
257
291
|
# @return [Series]
|
@@ -278,6 +312,10 @@ module Polars
|
|
278
312
|
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
279
313
|
end
|
280
314
|
|
315
|
+
if item.is_a?(Series) && item.bool?
|
316
|
+
return filter(item)
|
317
|
+
end
|
318
|
+
|
281
319
|
if item.is_a?(Integer)
|
282
320
|
return _s.get_idx(item)
|
283
321
|
end
|
@@ -369,16 +407,26 @@ module Polars
|
|
369
407
|
# Check if any boolean value in the column is `true`.
|
370
408
|
#
|
371
409
|
# @return [Boolean]
|
372
|
-
def any
|
373
|
-
|
410
|
+
def any?(&block)
|
411
|
+
if block_given?
|
412
|
+
apply(&block).any?
|
413
|
+
else
|
414
|
+
to_frame.select(Polars.col(name).any).to_series[0]
|
415
|
+
end
|
374
416
|
end
|
417
|
+
alias_method :any, :any?
|
375
418
|
|
376
419
|
# Check if all boolean values in the column are `true`.
|
377
420
|
#
|
378
421
|
# @return [Boolean]
|
379
|
-
def all
|
380
|
-
|
422
|
+
def all?(&block)
|
423
|
+
if block_given?
|
424
|
+
apply(&block).all?
|
425
|
+
else
|
426
|
+
to_frame.select(Polars.col(name).all).to_series[0]
|
427
|
+
end
|
381
428
|
end
|
429
|
+
alias_method :all, :all?
|
382
430
|
|
383
431
|
# Compute the logarithm to a given base.
|
384
432
|
#
|
@@ -1314,6 +1362,7 @@ module Polars
|
|
1314
1362
|
def unique(maintain_order: false)
|
1315
1363
|
super
|
1316
1364
|
end
|
1365
|
+
alias_method :uniq, :unique
|
1317
1366
|
|
1318
1367
|
# Take values by index.
|
1319
1368
|
#
|
@@ -1535,6 +1584,7 @@ module Polars
|
|
1535
1584
|
def is_in(other)
|
1536
1585
|
super
|
1537
1586
|
end
|
1587
|
+
alias_method :in?, :is_in
|
1538
1588
|
|
1539
1589
|
# Get index values where Boolean Series evaluate `true`.
|
1540
1590
|
#
|
@@ -1660,6 +1710,7 @@ module Polars
|
|
1660
1710
|
end
|
1661
1711
|
alias_method :count, :len
|
1662
1712
|
alias_method :length, :len
|
1713
|
+
alias_method :size, :len
|
1663
1714
|
|
1664
1715
|
# Cast between data types.
|
1665
1716
|
#
|
@@ -1779,6 +1830,8 @@ module Polars
|
|
1779
1830
|
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
1780
1831
|
end
|
1781
1832
|
alias_method :datelike?, :is_datelike
|
1833
|
+
alias_method :is_temporal, :is_datelike
|
1834
|
+
alias_method :temporal?, :is_datelike
|
1782
1835
|
|
1783
1836
|
# Check if this Series has floating point numbers.
|
1784
1837
|
#
|
@@ -2432,6 +2485,7 @@ module Polars
|
|
2432
2485
|
end
|
2433
2486
|
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2434
2487
|
end
|
2488
|
+
alias_method :map, :apply
|
2435
2489
|
|
2436
2490
|
# Shift the values by a given period.
|
2437
2491
|
#
|
@@ -3483,7 +3537,7 @@ module Polars
|
|
3483
3537
|
# # 99
|
3484
3538
|
# # ]
|
3485
3539
|
def extend_constant(value, n)
|
3486
|
-
|
3540
|
+
Utils.wrap_s(_s.extend_constant(value, n))
|
3487
3541
|
end
|
3488
3542
|
|
3489
3543
|
# Flags the Series as sorted.
|
@@ -3531,6 +3585,13 @@ module Polars
|
|
3531
3585
|
ListNameSpace.new(self)
|
3532
3586
|
end
|
3533
3587
|
|
3588
|
+
# Create an object namespace of all binary related methods.
|
3589
|
+
#
|
3590
|
+
# @return [BinaryNameSpace]
|
3591
|
+
def bin
|
3592
|
+
BinaryNameSpace.new(self)
|
3593
|
+
end
|
3594
|
+
|
3534
3595
|
# Create an object namespace of all categorical related methods.
|
3535
3596
|
#
|
3536
3597
|
# @return [CatNameSpace]
|
@@ -3630,14 +3691,39 @@ module Polars
|
|
3630
3691
|
end
|
3631
3692
|
|
3632
3693
|
def _comp(other, op)
|
3694
|
+
if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
|
3695
|
+
if (other == true && op == :eq) || (other == false && op == :neq)
|
3696
|
+
return clone
|
3697
|
+
elsif (other == false && op == :eq) || (other == true && op == :neq)
|
3698
|
+
return !self
|
3699
|
+
end
|
3700
|
+
end
|
3701
|
+
|
3702
|
+
if other.is_a?(::Time) && dtype.is_a?(Datetime)
|
3703
|
+
ts = Utils._datetime_to_pl_timestamp(other, time_unit)
|
3704
|
+
f = ffi_func("#{op}_<>", Int64, _s)
|
3705
|
+
fail if f.nil?
|
3706
|
+
return Utils.wrap_s(f.call(ts))
|
3707
|
+
elsif other.is_a?(::Date) && dtype == Date
|
3708
|
+
d = Utils._date_to_pl_date(other)
|
3709
|
+
f = ffi_func("#{op}_<>", Int32, _s)
|
3710
|
+
fail if f.nil?
|
3711
|
+
return Utils.wrap_s(f.call(d))
|
3712
|
+
end
|
3713
|
+
|
3633
3714
|
if other.is_a?(Series)
|
3634
3715
|
return Utils.wrap_s(_s.send(op, other._s))
|
3635
3716
|
end
|
3636
3717
|
|
3637
|
-
|
3638
|
-
|
3718
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3719
|
+
if f.nil?
|
3720
|
+
raise NotImplementedError
|
3639
3721
|
end
|
3640
|
-
Utils.wrap_s(
|
3722
|
+
Utils.wrap_s(f.call(other))
|
3723
|
+
end
|
3724
|
+
|
3725
|
+
def ffi_func(name, dtype, _s)
|
3726
|
+
_s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
|
3641
3727
|
end
|
3642
3728
|
|
3643
3729
|
def _arithmetic(other, op)
|
@@ -3648,14 +3734,16 @@ module Polars
|
|
3648
3734
|
return Utils.wrap_s(_s.send(op, other._s))
|
3649
3735
|
end
|
3650
3736
|
|
3651
|
-
if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
|
3652
|
-
|
3653
|
-
|
3654
|
-
if other.is_a?(Float) && !is_float
|
3655
|
-
raise Todo
|
3737
|
+
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
|
3738
|
+
_s2 = sequence_to_rbseries(name, [other])
|
3739
|
+
return Utils.wrap_s(_s.send(op, _s2))
|
3656
3740
|
end
|
3657
3741
|
|
3658
|
-
|
3742
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3743
|
+
if f.nil?
|
3744
|
+
raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
|
3745
|
+
end
|
3746
|
+
Utils.wrap_s(f.call(other))
|
3659
3747
|
end
|
3660
3748
|
|
3661
3749
|
DTYPE_TO_FFINAME = {
|
@@ -3688,25 +3776,57 @@ module Polars
|
|
3688
3776
|
values._s
|
3689
3777
|
end
|
3690
3778
|
|
3779
|
+
def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
|
3780
|
+
# not needed yet
|
3781
|
+
# if !values.contiguous?
|
3782
|
+
# end
|
3783
|
+
|
3784
|
+
if values.shape.length == 1
|
3785
|
+
values, dtype = numo_values_and_dtype(values)
|
3786
|
+
strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
|
3787
|
+
if dtype == Numo::RObject
|
3788
|
+
sequence_to_rbseries(name, values.to_a, strict: strict)
|
3789
|
+
else
|
3790
|
+
constructor = numo_type_to_constructor(dtype)
|
3791
|
+
# TODO improve performance
|
3792
|
+
constructor.call(name, values.to_a, strict)
|
3793
|
+
end
|
3794
|
+
elsif values.shape.length == 2
|
3795
|
+
raise Todo
|
3796
|
+
else
|
3797
|
+
raise Todo
|
3798
|
+
end
|
3799
|
+
end
|
3800
|
+
|
3801
|
+
def numo_values_and_dtype(values)
|
3802
|
+
[values, values.class]
|
3803
|
+
end
|
3804
|
+
|
3805
|
+
def numo_type_to_constructor(dtype)
|
3806
|
+
{
|
3807
|
+
Numo::Float32 => RbSeries.method(:new_opt_f32),
|
3808
|
+
Numo::Float64 => RbSeries.method(:new_opt_f64),
|
3809
|
+
Numo::Int8 => RbSeries.method(:new_opt_i8),
|
3810
|
+
Numo::Int16 => RbSeries.method(:new_opt_i16),
|
3811
|
+
Numo::Int32 => RbSeries.method(:new_opt_i32),
|
3812
|
+
Numo::Int64 => RbSeries.method(:new_opt_i64),
|
3813
|
+
Numo::UInt8 => RbSeries.method(:new_opt_u8),
|
3814
|
+
Numo::UInt16 => RbSeries.method(:new_opt_u16),
|
3815
|
+
Numo::UInt32 => RbSeries.method(:new_opt_u32),
|
3816
|
+
Numo::UInt64 => RbSeries.method(:new_opt_u64)
|
3817
|
+
}.fetch(dtype)
|
3818
|
+
rescue KeyError
|
3819
|
+
RbSeries.method(:new_object)
|
3820
|
+
end
|
3821
|
+
|
3691
3822
|
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
3692
3823
|
ruby_dtype = nil
|
3693
|
-
nested_dtype = nil
|
3694
3824
|
|
3695
3825
|
if (values.nil? || values.empty?) && dtype.nil?
|
3696
|
-
|
3697
|
-
# if dtype for empty sequence could be guessed
|
3698
|
-
# (e.g comparisons between self and other)
|
3699
|
-
dtype = dtype_if_empty
|
3700
|
-
else
|
3701
|
-
# default to Float32 type
|
3702
|
-
dtype = :f32
|
3703
|
-
end
|
3826
|
+
dtype = dtype_if_empty || Float32
|
3704
3827
|
end
|
3705
3828
|
|
3706
|
-
rb_temporal_types = []
|
3707
|
-
rb_temporal_types << ::Date if defined?(::Date)
|
3708
|
-
rb_temporal_types << ::DateTime if defined?(::DateTime)
|
3709
|
-
rb_temporal_types << ::Time if defined?(::Time)
|
3829
|
+
rb_temporal_types = [::Date, ::DateTime, ::Time]
|
3710
3830
|
|
3711
3831
|
value = _get_first_non_none(values)
|
3712
3832
|
if !value.nil?
|
@@ -3731,58 +3851,54 @@ module Polars
|
|
3731
3851
|
|
3732
3852
|
# temporal branch
|
3733
3853
|
if rb_temporal_types.include?(ruby_dtype)
|
3734
|
-
|
3735
|
-
|
3736
|
-
|
3737
|
-
|
3738
|
-
# end
|
3739
|
-
|
3740
|
-
if ruby_dtype == ::Date
|
3741
|
-
RbSeries.new_opt_date(name, values, strict)
|
3742
|
-
elsif ruby_dtype == ::Time
|
3743
|
-
RbSeries.new_opt_datetime(name, values, strict)
|
3744
|
-
elsif ruby_dtype == ::DateTime
|
3745
|
-
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
3746
|
-
else
|
3747
|
-
raise Todo
|
3748
|
-
end
|
3749
|
-
elsif ruby_dtype == Array
|
3750
|
-
if nested_dtype.nil?
|
3751
|
-
nested_value = _get_first_non_none(value)
|
3752
|
-
nested_dtype = nested_value.nil? ? Float : nested_value.class
|
3854
|
+
if dtype.nil?
|
3855
|
+
dtype = Utils.rb_type_to_dtype(ruby_dtype)
|
3856
|
+
elsif rb_temporal_types.include?(dtype)
|
3857
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
3753
3858
|
end
|
3859
|
+
# TODO
|
3860
|
+
time_unit = nil
|
3754
3861
|
|
3755
|
-
|
3756
|
-
|
3862
|
+
rb_series = RbSeries.new_from_anyvalues(name, values, strict)
|
3863
|
+
if time_unit.nil?
|
3864
|
+
s = Utils.wrap_s(rb_series)
|
3865
|
+
else
|
3866
|
+
s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
|
3757
3867
|
end
|
3758
|
-
|
3759
|
-
|
3760
|
-
|
3761
|
-
|
3762
|
-
|
3763
|
-
|
3764
|
-
|
3765
|
-
|
3766
|
-
|
3767
|
-
|
3768
|
-
|
3868
|
+
return s._s
|
3869
|
+
elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
|
3870
|
+
raise Todo
|
3871
|
+
elsif ruby_dtype == Array
|
3872
|
+
return sequence_from_anyvalue_or_object(name, values)
|
3873
|
+
elsif ruby_dtype == Series
|
3874
|
+
return RbSeries.new_series_list(name, values.map(&:_s), strict)
|
3875
|
+
elsif ruby_dtype == RbSeries
|
3876
|
+
return RbSeries.new_series_list(name, values, strict)
|
3877
|
+
else
|
3878
|
+
constructor =
|
3879
|
+
if value.is_a?(String)
|
3880
|
+
if value.encoding == Encoding::UTF_8
|
3881
|
+
RbSeries.method(:new_str)
|
3882
|
+
else
|
3883
|
+
RbSeries.method(:new_binary)
|
3769
3884
|
end
|
3885
|
+
elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
|
3886
|
+
# TODO improve performance
|
3887
|
+
RbSeries.method(:new_opt_f64)
|
3888
|
+
else
|
3889
|
+
rb_type_to_constructor(value.class)
|
3770
3890
|
end
|
3771
|
-
if equal_to_inner
|
3772
|
-
dtype = Utils.rb_type_to_dtype(nested_dtype)
|
3773
|
-
# TODO rescue and fallback to new_object
|
3774
|
-
return RbSeries.new_list(name, values, dtype)
|
3775
|
-
end
|
3776
|
-
end
|
3777
|
-
|
3778
|
-
RbSeries.new_object(name, values, strict)
|
3779
|
-
else
|
3780
|
-
constructor = rb_type_to_constructor(value.class)
|
3781
3891
|
constructor.call(name, values, strict)
|
3782
3892
|
end
|
3783
3893
|
end
|
3784
3894
|
end
|
3785
3895
|
|
3896
|
+
def sequence_from_anyvalue_or_object(name, values)
|
3897
|
+
RbSeries.new_from_anyvalues(name, values, true)
|
3898
|
+
rescue
|
3899
|
+
RbSeries.new_object(name, values, false)
|
3900
|
+
end
|
3901
|
+
|
3786
3902
|
POLARS_TYPE_TO_CONSTRUCTOR = {
|
3787
3903
|
Float32 => RbSeries.method(:new_opt_f32),
|
3788
3904
|
Float64 => RbSeries.method(:new_opt_f64),
|
@@ -3795,7 +3911,8 @@ module Polars
|
|
3795
3911
|
UInt32 => RbSeries.method(:new_opt_u32),
|
3796
3912
|
UInt64 => RbSeries.method(:new_opt_u64),
|
3797
3913
|
Boolean => RbSeries.method(:new_opt_bool),
|
3798
|
-
Utf8 => RbSeries.method(:new_str)
|
3914
|
+
Utf8 => RbSeries.method(:new_str),
|
3915
|
+
Binary => RbSeries.method(:new_binary)
|
3799
3916
|
}
|
3800
3917
|
|
3801
3918
|
SYM_TYPE_TO_CONSTRUCTOR = {
|
@@ -3826,7 +3943,6 @@ module Polars
|
|
3826
3943
|
RB_TYPE_TO_CONSTRUCTOR = {
|
3827
3944
|
Float => RbSeries.method(:new_opt_f64),
|
3828
3945
|
Integer => RbSeries.method(:new_opt_i64),
|
3829
|
-
String => RbSeries.method(:new_str),
|
3830
3946
|
TrueClass => RbSeries.method(:new_opt_bool),
|
3831
3947
|
FalseClass => RbSeries.method(:new_opt_bool)
|
3832
3948
|
}
|