polars-df 0.10.0-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,4527 @@
1
+ module Polars
2
+ # A Series represents a single column in a polars DataFrame.
3
+ class Series
4
+ include ExprDispatch
5
+
6
+ # Create a new Series.
7
+ #
8
+ # @param name [String, Array, nil]
9
+ # Name of the series. Will be used as a column name when used in a DataFrame.
10
+ # When not specified, name is set to an empty string.
11
+ # @param values [Array, nil]
12
+ # One-dimensional data in various forms. Supported are: Array and Series.
13
+ # @param dtype [Symbol, nil]
14
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
15
+ # @param strict [Boolean]
16
+ # Throw error on numeric overflow.
17
+ # @param nan_to_null [Boolean]
18
+ # Not used.
19
+ # @param dtype_if_empty [Symbol, nil]
20
+ # If no dtype is specified and values contains `nil` or an empty array,
21
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
22
+ #
23
+ # @example Constructing a Series by specifying name and values positionally:
24
+ # s = Polars::Series.new("a", [1, 2, 3])
25
+ #
26
+ # @example Notice that the dtype is automatically inferred as a polars `Int64`:
27
+ # s.dtype
28
+ # # => Polars::Int64
29
+ #
30
+ # @example Constructing a Series with a specific dtype:
31
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
32
+ #
33
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
34
+ # s3 = Polars::Series.new([1, 2, 3])
35
+ def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
36
+ # Handle case where values are passed as the first argument
37
+ if !name.nil? && !name.is_a?(::String)
38
+ if values.nil?
39
+ values = name
40
+ name = nil
41
+ else
42
+ raise ArgumentError, "Series name must be a string."
43
+ end
44
+ end
45
+
46
+ name = "" if name.nil?
47
+
48
+ # TODO improve
49
+ if values.is_a?(Range) && values.begin.is_a?(::String)
50
+ values = values.to_a
51
+ end
52
+
53
+ if values.nil?
54
+ self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
55
+ elsif values.is_a?(Series)
56
+ self._s = series_to_rbseries(name, values)
57
+ elsif values.is_a?(Range)
58
+ self._s =
59
+ Polars.arange(
60
+ values.first,
61
+ values.last + (values.exclude_end? ? 0 : 1),
62
+ step: 1,
63
+ eager: true,
64
+ dtype: dtype
65
+ )
66
+ .rename(name, in_place: true)
67
+ ._s
68
+ elsif values.is_a?(::Array)
69
+ self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
70
+ elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
+ self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
72
+
73
+ if !dtype.nil?
74
+ self._s = self.cast(dtype, strict: true)._s
75
+ end
76
+ else
77
+ raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
78
+ end
79
+ end
80
+
81
+ # @private
82
+ def self._from_rbseries(s)
83
+ series = Series.allocate
84
+ series._s = s
85
+ series
86
+ end
87
+
88
+ # Get the data type of this Series.
89
+ #
90
+ # @return [Symbol]
91
+ def dtype
92
+ _s.dtype
93
+ end
94
+
95
+ # Get flags that are set on the Series.
96
+ #
97
+ # @return [Hash]
98
+ def flags
99
+ out = {
100
+ "SORTED_ASC" => _s.is_sorted_flag,
101
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
102
+ }
103
+ if dtype.is_a?(List)
104
+ out["FAST_EXPLODE"] = _s.can_fast_explode_flag
105
+ end
106
+ out
107
+ end
108
+
109
+ # Get the inner dtype in of a List typed Series.
110
+ #
111
+ # @return [Symbol]
112
+ def inner_dtype
113
+ _s.inner_dtype
114
+ end
115
+
116
+ # Get the name of this Series.
117
+ #
118
+ # @return [String]
119
+ def name
120
+ _s.name
121
+ end
122
+
123
+ # Shape of this Series.
124
+ #
125
+ # @return [Array]
126
+ def shape
127
+ [_s.len]
128
+ end
129
+
130
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
131
+ #
132
+ # @return [String]
133
+ def time_unit
134
+ _s.time_unit
135
+ end
136
+
137
+ # Returns a string representing the Series.
138
+ #
139
+ # @return [String]
140
+ def to_s
141
+ _s.to_s
142
+ end
143
+ alias_method :inspect, :to_s
144
+
145
+ # Bitwise AND.
146
+ #
147
+ # @return [Series]
148
+ def &(other)
149
+ if !other.is_a?(Series)
150
+ other = Series.new([other])
151
+ end
152
+ Utils.wrap_s(_s.bitand(other._s))
153
+ end
154
+
155
+ # Bitwise OR.
156
+ #
157
+ # @return [Series]
158
+ def |(other)
159
+ if !other.is_a?(Series)
160
+ other = Series.new([other])
161
+ end
162
+ Utils.wrap_s(_s.bitor(other._s))
163
+ end
164
+
165
+ # Bitwise XOR.
166
+ #
167
+ # @return [Series]
168
+ def ^(other)
169
+ if !other.is_a?(Series)
170
+ other = Series.new([other])
171
+ end
172
+ Utils.wrap_s(_s.bitxor(other._s))
173
+ end
174
+
175
+ # Equal.
176
+ #
177
+ # @return [Series]
178
+ def ==(other)
179
+ _comp(other, :eq)
180
+ end
181
+
182
+ # Not equal.
183
+ #
184
+ # @return [Series]
185
+ def !=(other)
186
+ _comp(other, :neq)
187
+ end
188
+
189
+ # Greater than.
190
+ #
191
+ # @return [Series]
192
+ def >(other)
193
+ _comp(other, :gt)
194
+ end
195
+
196
+ # Less than.
197
+ #
198
+ # @return [Series]
199
+ def <(other)
200
+ _comp(other, :lt)
201
+ end
202
+
203
+ # Greater than or equal.
204
+ #
205
+ # @return [Series]
206
+ def >=(other)
207
+ _comp(other, :gt_eq)
208
+ end
209
+
210
+ # Less than or equal.
211
+ #
212
+ # @return [Series]
213
+ def <=(other)
214
+ _comp(other, :lt_eq)
215
+ end
216
+
217
+ # Method equivalent of operator expression `series <= other`.
218
+ #
219
+ # @return [Series]
220
+ def le(other)
221
+ self <= other
222
+ end
223
+
224
+ # Method equivalent of operator expression `series < other`.
225
+ #
226
+ # @return [Series]
227
+ def lt(other)
228
+ self < other
229
+ end
230
+
231
+ # Method equivalent of operator expression `series == other`.
232
+ #
233
+ # @return [Series]
234
+ def eq(other)
235
+ self == other
236
+ end
237
+
238
+ # Method equivalent of equality operator `series == other` where `nil == nil`.
239
+ #
240
+ # This differs from the standard `ne` where null values are propagated.
241
+ #
242
+ # @param other [Object]
243
+ # A literal or expression value to compare with.
244
+ #
245
+ # @return [Object]
246
+ #
247
+ # @example
248
+ # s1 = Polars::Series.new("a", [333, 200, nil])
249
+ # s2 = Polars::Series.new("a", [100, 200, nil])
250
+ # s1.eq(s2)
251
+ # # =>
252
+ # # shape: (3,)
253
+ # # Series: 'a' [bool]
254
+ # # [
255
+ # # false
256
+ # # true
257
+ # # null
258
+ # # ]
259
+ #
260
+ # @example
261
+ # s1.eq_missing(s2)
262
+ # # =>
263
+ # # shape: (3,)
264
+ # # Series: 'a' [bool]
265
+ # # [
266
+ # # false
267
+ # # true
268
+ # # true
269
+ # # ]
270
+ def eq_missing(other)
271
+ if other.is_a?(Expr)
272
+ return Polars.lit(self).eq_missing(other)
273
+ end
274
+ to_frame.select(Polars.col(name).eq_missing(other)).to_series
275
+ end
276
+
277
+ # Method equivalent of operator expression `series != other`.
278
+ #
279
+ # @return [Series]
280
+ def ne(other)
281
+ self != other
282
+ end
283
+
284
+ # Method equivalent of equality operator `series != other` where `None == None`.
285
+ #
286
+ # This differs from the standard `ne` where null values are propagated.
287
+ #
288
+ # @param other [Object]
289
+ # A literal or expression value to compare with.
290
+ #
291
+ # @return [Object]
292
+ #
293
+ # @example
294
+ # s1 = Polars::Series.new("a", [333, 200, nil])
295
+ # s2 = Polars::Series.new("a", [100, 200, nil])
296
+ # s1.ne(s2)
297
+ # # =>
298
+ # # shape: (3,)
299
+ # # Series: 'a' [bool]
300
+ # # [
301
+ # # true
302
+ # # false
303
+ # # null
304
+ # # ]
305
+ #
306
+ # @example
307
+ # s1.ne_missing(s2)
308
+ # # =>
309
+ # # shape: (3,)
310
+ # # Series: 'a' [bool]
311
+ # # [
312
+ # # true
313
+ # # false
314
+ # # false
315
+ # # ]
316
+ def ne_missing(other)
317
+ if other.is_a?(Expr)
318
+ return Polars.lit(self).ne_missing(other)
319
+ end
320
+ to_frame.select(Polars.col(name).ne_missing(other)).to_series
321
+ end
322
+
323
+ # Method equivalent of operator expression `series >= other`.
324
+ #
325
+ # @return [Series]
326
+ def ge(other)
327
+ self >= other
328
+ end
329
+
330
+ # Method equivalent of operator expression `series > other`.
331
+ #
332
+ # @return [Series]
333
+ def gt(other)
334
+ self > other
335
+ end
336
+
337
+ # Performs addition.
338
+ #
339
+ # @return [Series]
340
+ def +(other)
341
+ _arithmetic(other, :add)
342
+ end
343
+
344
+ # Performs subtraction.
345
+ #
346
+ # @return [Series]
347
+ def -(other)
348
+ _arithmetic(other, :sub)
349
+ end
350
+
351
+ # Performs multiplication.
352
+ #
353
+ # @return [Series]
354
+ def *(other)
355
+ if is_temporal
356
+ raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
357
+ elsif other.is_a?(DataFrame)
358
+ other * self
359
+ else
360
+ _arithmetic(other, :mul)
361
+ end
362
+ end
363
+
364
+ # Performs division.
365
+ #
366
+ # @return [Series]
367
+ def /(other)
368
+ if is_temporal
369
+ raise ArgumentError, "first cast to integer before dividing datelike dtypes"
370
+ end
371
+
372
+ if is_float
373
+ return _arithmetic(other, :div)
374
+ end
375
+
376
+ cast(Float64) / other
377
+ end
378
+
379
+ # Returns the modulo.
380
+ #
381
+ # @return [Series]
382
+ def %(other)
383
+ if is_datelike
384
+ raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
385
+ end
386
+ _arithmetic(other, :rem)
387
+ end
388
+
389
+ # Raises to the power of exponent.
390
+ #
391
+ # @return [Series]
392
+ def **(power)
393
+ if is_datelike
394
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
395
+ end
396
+ to_frame.select(Polars.col(name).pow(power)).to_series
397
+ end
398
+
399
+ # Performs boolean not.
400
+ #
401
+ # @return [Series]
402
+ def !
403
+ if dtype == Boolean
404
+ return Utils.wrap_s(_s.not)
405
+ end
406
+ raise NotImplementedError
407
+ end
408
+
409
+ # Performs negation.
410
+ #
411
+ # @return [Series]
412
+ def -@
413
+ 0 - self
414
+ end
415
+
416
+ # Returns an enumerator.
417
+ #
418
+ # @return [Object]
419
+ def each
420
+ return to_enum(:each) unless block_given?
421
+
422
+ length.times do |i|
423
+ yield self[i]
424
+ end
425
+ end
426
+
427
+ # Returns elements of the Series.
428
+ #
429
+ # @return [Object]
430
+ def [](item)
431
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
432
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
433
+ end
434
+
435
+ if item.is_a?(Series) && item.bool?
436
+ return filter(item)
437
+ end
438
+
439
+ if item.is_a?(Integer)
440
+ if item < 0
441
+ item = len + item
442
+ end
443
+
444
+ return _s.get_idx(item)
445
+ end
446
+
447
+ if item.is_a?(Range)
448
+ return Slice.new(self).apply(item)
449
+ end
450
+
451
+ if Utils.is_int_sequence(item)
452
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
453
+ end
454
+
455
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
456
+ end
457
+
458
+ # Sets an element of the Series.
459
+ #
460
+ # @return [Object]
461
+ def []=(key, value)
462
+ if value.is_a?(::Array)
463
+ if is_numeric || is_datelike
464
+ scatter(key, value)
465
+ return
466
+ end
467
+ raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
468
+ end
469
+
470
+ if key.is_a?(Series)
471
+ if key.dtype == Boolean
472
+ self._s = set(key, value)._s
473
+ elsif key.dtype == UInt64
474
+ self._s = scatter(key.cast(UInt32), value)._s
475
+ elsif key.dtype == UInt32
476
+ self._s = scatter(key, value)._s
477
+ else
478
+ raise Todo
479
+ end
480
+ elsif key.is_a?(::Array)
481
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
482
+ self[s] = value
483
+ elsif key.is_a?(Range)
484
+ s = Series.new("", key, dtype: UInt32)
485
+ self[s] = value
486
+ elsif key.is_a?(Integer)
487
+ self[[key]] = value
488
+ else
489
+ raise ArgumentError, "cannot use #{key} for indexing"
490
+ end
491
+ end
492
+
493
+ # Return an estimation of the total (heap) allocated size of the Series.
494
+ #
495
+ # Estimated size is given in the specified unit (bytes by default).
496
+ #
497
+ # This estimation is the sum of the size of its buffers, validity, including
498
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
499
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
500
+ # particular, StructArray's size is an upper bound.
501
+ #
502
+ # When an array is sliced, its allocated size remains constant because the buffer
503
+ # unchanged. However, this function will yield a smaller number. This is because
504
+ # this function returns the visible size of the buffer, not its total capacity.
505
+ #
506
+ # FFI buffers are included in this estimation.
507
+ #
508
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
509
+ # Scale the returned size to the given unit.
510
+ #
511
+ # @return [Numeric]
512
+ #
513
+ # @example
514
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
515
+ # s.estimated_size
516
+ # # => 4000000
517
+ # s.estimated_size("mb")
518
+ # # => 3.814697265625
519
+ def estimated_size(unit = "b")
520
+ sz = _s.estimated_size
521
+ Utils.scale_bytes(sz, to: unit)
522
+ end
523
+
524
+ # Compute the square root of the elements.
525
+ #
526
+ # @return [Series]
527
+ def sqrt
528
+ self**0.5
529
+ end
530
+
531
+ # Check if any boolean value in the column is `true`.
532
+ #
533
+ # @return [Boolean]
534
+ def any?(ignore_nulls: true, &block)
535
+ if block_given?
536
+ apply(skip_nulls: ignore_nulls, &block).any?
537
+ else
538
+ _s.any(ignore_nulls)
539
+ end
540
+ end
541
+ alias_method :any, :any?
542
+
543
+ # Check if all boolean values in the column are `true`.
544
+ #
545
+ # @return [Boolean]
546
+ def all?(ignore_nulls: true, &block)
547
+ if block_given?
548
+ apply(skip_nulls: ignore_nulls, &block).all?
549
+ else
550
+ _s.all(ignore_nulls)
551
+ end
552
+ end
553
+ alias_method :all, :all?
554
+
555
+ # Check if all boolean values in the column are `false`.
556
+ #
557
+ # @return [Boolean]
558
+ def none?(&block)
559
+ if block_given?
560
+ apply(&block).none?
561
+ else
562
+ to_frame.select(Polars.col(name).is_not.all).to_series[0]
563
+ end
564
+ end
565
+ alias_method :none, :none?
566
+
567
+ # Compute the logarithm to a given base.
568
+ #
569
+ # @param base [Float]
570
+ # Given base, defaults to `Math::E`.
571
+ #
572
+ # @return [Series]
573
+ def log(base = Math::E)
574
+ super
575
+ end
576
+
577
+ # Compute the base 10 logarithm of the input array, element-wise.
578
+ #
579
+ # @return [Series]
580
+ def log10
581
+ super
582
+ end
583
+
584
+ # Compute the exponential, element-wise.
585
+ #
586
+ # @return [Series]
587
+ def exp
588
+ super
589
+ end
590
+
591
+ # Create a new Series that copies data from this Series without null values.
592
+ #
593
+ # @return [Series]
594
+ def drop_nulls
595
+ super
596
+ end
597
+
598
+ # Drop NaN values.
599
+ #
600
+ # @return [Series]
601
+ def drop_nans
602
+ super
603
+ end
604
+
605
+ # Cast this Series to a DataFrame.
606
+ #
607
+ # @return [DataFrame]
608
+ def to_frame
609
+ Utils.wrap_df(RbDataFrame.new([_s]))
610
+ end
611
+
612
+ # Quick summary statistics of a series.
613
+ #
614
+ # Series with mixed datatypes will return summary statistics for the datatype of
615
+ # the first value.
616
+ #
617
+ # @return [DataFrame]
618
+ #
619
+ # @example
620
+ # series_num = Polars::Series.new([1, 2, 3, 4, 5])
621
+ # series_num.describe
622
+ # # =>
623
+ # # shape: (6, 2)
624
+ # # ┌────────────┬──────────┐
625
+ # # │ statistic ┆ value │
626
+ # # │ --- ┆ --- │
627
+ # # │ str ┆ f64 │
628
+ # # ╞════════════╪══════════╡
629
+ # # │ min ┆ 1.0 │
630
+ # # │ max ┆ 5.0 │
631
+ # # │ null_count ┆ 0.0 │
632
+ # # │ mean ┆ 3.0 │
633
+ # # │ std ┆ 1.581139 │
634
+ # # │ count ┆ 5.0 │
635
+ # # └────────────┴──────────┘
636
+ #
637
+ # @example
638
+ # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
639
+ # series_str.describe
640
+ # # =>
641
+ # # shape: (3, 2)
642
+ # # ┌────────────┬───────┐
643
+ # # │ statistic ┆ value │
644
+ # # │ --- ┆ --- │
645
+ # # │ str ┆ i64 │
646
+ # # ╞════════════╪═══════╡
647
+ # # │ unique ┆ 4 │
648
+ # # │ null_count ┆ 1 │
649
+ # # │ count ┆ 5 │
650
+ # # └────────────┴───────┘
651
+ def describe
652
+ if len == 0
653
+ raise ArgumentError, "Series must contain at least one value"
654
+ elsif is_numeric
655
+ s = cast(:f64)
656
+ stats = {
657
+ "min" => s.min,
658
+ "max" => s.max,
659
+ "null_count" => s.null_count,
660
+ "mean" => s.mean,
661
+ "std" => s.std,
662
+ "count" => s.len
663
+ }
664
+ elsif is_boolean
665
+ stats = {
666
+ "sum" => sum,
667
+ "null_count" => null_count,
668
+ "count" => len
669
+ }
670
+ elsif is_utf8
671
+ stats = {
672
+ "unique" => unique.length,
673
+ "null_count" => null_count,
674
+ "count" => len
675
+ }
676
+ elsif is_datelike
677
+ # we coerce all to string, because a polars column
678
+ # only has a single dtype and dates: datetime and count: int don't match
679
+ stats = {
680
+ "min" => dt.min.to_s,
681
+ "max" => dt.max.to_s,
682
+ "null_count" => null_count.to_s,
683
+ "count" => len.to_s
684
+ }
685
+ else
686
+ raise TypeError, "This type is not supported"
687
+ end
688
+
689
+ Polars::DataFrame.new(
690
+ {"statistic" => stats.keys, "value" => stats.values}
691
+ )
692
+ end
693
+
694
+ # Reduce this Series to the sum value.
695
+ #
696
+ # @return [Numeric]
697
+ #
698
+ # @note
699
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
700
+ # `:i64` before summing to prevent overflow issues.
701
+ #
702
+ # @example
703
+ # s = Polars::Series.new("a", [1, 2, 3])
704
+ # s.sum
705
+ # # => 6
706
+ def sum
707
+ _s.sum
708
+ end
709
+
710
+ # Reduce this Series to the mean value.
711
+ #
712
+ # @return [Float, nil]
713
+ #
714
+ # @example
715
+ # s = Polars::Series.new("a", [1, 2, 3])
716
+ # s.mean
717
+ # # => 2.0
718
+ def mean
719
+ _s.mean
720
+ end
721
+
722
+ # Reduce this Series to the product value.
723
+ #
724
+ # @return [Numeric]
725
+ def product
726
+ to_frame.select(Polars.col(name).product).to_series[0]
727
+ end
728
+
729
+ # Get the minimal value in this Series.
730
+ #
731
+ # @return [Object]
732
+ #
733
+ # @example
734
+ # s = Polars::Series.new("a", [1, 2, 3])
735
+ # s.min
736
+ # # => 1
737
+ def min
738
+ _s.min
739
+ end
740
+
741
+ # Get the maximum value in this Series.
742
+ #
743
+ # @return [Object]
744
+ #
745
+ # @example
746
+ # s = Polars::Series.new("a", [1, 2, 3])
747
+ # s.max
748
+ # # => 3
749
+ def max
750
+ _s.max
751
+ end
752
+
753
+ # Get maximum value, but propagate/poison encountered NaN values.
754
+ #
755
+ # @return [Object]
756
+ def nan_max
757
+ to_frame.select(Polars.col(name).nan_max)[0, 0]
758
+ end
759
+
760
+ # Get minimum value, but propagate/poison encountered NaN values.
761
+ #
762
+ # @return [Object]
763
+ def nan_min
764
+ to_frame.select(Polars.col(name).nan_min)[0, 0]
765
+ end
766
+
767
+ # Get the standard deviation of this Series.
768
+ #
769
+ # @param ddof [Integer]
770
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
771
+ # where N represents the number of elements.
772
+ #
773
+ # @return [Float, nil]
774
+ #
775
+ # @example
776
+ # s = Polars::Series.new("a", [1, 2, 3])
777
+ # s.std
778
+ # # => 1.0
779
+ def std(ddof: 1)
780
+ if !is_numeric
781
+ nil
782
+ else
783
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
784
+ end
785
+ end
786
+
787
+ # Get variance of this Series.
788
+ #
789
+ # @param ddof [Integer]
790
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
791
+ # where N represents the number of elements.
792
+ #
793
+ # @return [Float, nil]
794
+ #
795
+ # @example
796
+ # s = Polars::Series.new("a", [1, 2, 3])
797
+ # s.var
798
+ # # => 1.0
799
+ def var(ddof: 1)
800
+ if !is_numeric
801
+ nil
802
+ else
803
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
804
+ end
805
+ end
806
+
807
+ # Get the median of this Series.
808
+ #
809
+ # @return [Float, nil]
810
+ #
811
+ # @example
812
+ # s = Polars::Series.new("a", [1, 2, 3])
813
+ # s.median
814
+ # # => 2.0
815
+ def median
816
+ _s.median
817
+ end
818
+
819
+ # Get the quantile value of this Series.
820
+ #
821
+ # @param quantile [Float, nil]
822
+ # Quantile between 0.0 and 1.0.
823
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
824
+ # Interpolation method.
825
+ #
826
+ # @return [Float, nil]
827
+ #
828
+ # @example
829
+ # s = Polars::Series.new("a", [1, 2, 3])
830
+ # s.quantile(0.5)
831
+ # # => 2.0
832
+ def quantile(quantile, interpolation: "nearest")
833
+ _s.quantile(quantile, interpolation)
834
+ end
835
+
836
+ # Get dummy variables.
837
+ #
838
+ # @return [DataFrame]
839
+ #
840
+ # @example
841
+ # s = Polars::Series.new("a", [1, 2, 3])
842
+ # s.to_dummies
843
+ # # =>
844
+ # # shape: (3, 3)
845
+ # # ┌─────┬─────┬─────┐
846
+ # # │ a_1 ┆ a_2 ┆ a_3 │
847
+ # # │ --- ┆ --- ┆ --- │
848
+ # # │ u8 ┆ u8 ┆ u8 │
849
+ # # ╞═════╪═════╪═════╡
850
+ # # │ 1 ┆ 0 ┆ 0 │
851
+ # # │ 0 ┆ 1 ┆ 0 │
852
+ # # │ 0 ┆ 0 ┆ 1 │
853
+ # # └─────┴─────┴─────┘
854
+ def to_dummies(separator: "_", drop_first: false)
855
+ Utils.wrap_df(_s.to_dummies(separator, drop_first))
856
+ end
857
+
858
+ # Bin continuous values into discrete categories.
859
+ #
860
+ # @param breaks [Array]
861
+ # List of unique cut points.
862
+ # @param labels [Array]
863
+ # Names of the categories. The number of labels must be equal to the number
864
+ # of cut points plus one.
865
+ # @param left_closed [Boolean]
866
+ # Set the intervals to be left-closed instead of right-closed.
867
+ # @param include_breaks [Boolean]
868
+ # Include a column with the right endpoint of the bin each observation falls
869
+ # in. This will change the data type of the output from a
870
+ # `Categorical` to a `Struct`.
871
+ #
872
+ # @return [Series]
873
+ #
874
+ # @example Divide the column into three categories.
875
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
876
+ # s.cut([-1, 1], labels: ["a", "b", "c"])
877
+ # # =>
878
+ # # shape: (5,)
879
+ # # Series: 'foo' [cat]
880
+ # # [
881
+ # # "a"
882
+ # # "a"
883
+ # # "b"
884
+ # # "b"
885
+ # # "c"
886
+ # # ]
887
+ #
888
+ # @example Create a DataFrame with the breakpoint and category for each value.
889
+ # cut = s.cut([-1, 1], include_breaks: true).alias("cut")
890
+ # s.to_frame.with_columns(cut).unnest("cut")
891
+ # # =>
892
+ # # shape: (5, 3)
893
+ # # ┌─────┬─────────────┬────────────┐
894
+ # # │ foo ┆ break_point ┆ category │
895
+ # # │ --- ┆ --- ┆ --- │
896
+ # # │ i64 ┆ f64 ┆ cat │
897
+ # # ╞═════╪═════════════╪════════════╡
898
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
899
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
900
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
901
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
902
+ # # │ 2 ┆ inf ┆ (1, inf] │
903
+ # # └─────┴─────────────┴────────────┘
904
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
905
+ result = (
906
+ to_frame
907
+ .select(
908
+ Polars.col(name).cut(
909
+ breaks,
910
+ labels: labels,
911
+ left_closed: left_closed,
912
+ include_breaks: include_breaks
913
+ )
914
+ )
915
+ .to_series
916
+ )
917
+
918
+ if include_breaks
919
+ result = result.struct.rename_fields(["break_point", "category"])
920
+ end
921
+
922
+ result
923
+ end
924
+
925
+ # Bin continuous values into discrete categories based on their quantiles.
926
+ #
927
+ # @param quantiles [Array]
928
+ # Either a list of quantile probabilities between 0 and 1 or a positive
929
+ # integer determining the number of bins with uniform probability.
930
+ # @param labels [Array]
931
+ # Names of the categories. The number of labels must be equal to the number
932
+ # of cut points plus one.
933
+ # @param left_closed [Boolean]
934
+ # Set the intervals to be left-closed instead of right-closed.
935
+ # @param allow_duplicates [Boolean]
936
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
937
+ # rather than raising a `DuplicateError`. This can happen even with unique
938
+ # probabilities, depending on the data.
939
+ # @param include_breaks [Boolean]
940
+ # Include a column with the right endpoint of the bin each observation falls
941
+ # in. This will change the data type of the output from a
942
+ # `Categorical` to a `Struct`.
943
+ #
944
+ # @return [Series]
945
+ #
946
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
947
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
948
+ # s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
949
+ # # =>
950
+ # # shape: (5,)
951
+ # # Series: 'foo' [cat]
952
+ # # [
953
+ # # "a"
954
+ # # "a"
955
+ # # "b"
956
+ # # "b"
957
+ # # "c"
958
+ # # ]
959
+ #
960
+ # @example Divide a column into two categories using uniform quantile probabilities.
961
+ # s.qcut(2, labels: ["low", "high"], left_closed: true)
962
+ # # =>
963
+ # # shape: (5,)
964
+ # # Series: 'foo' [cat]
965
+ # # [
966
+ # # "low"
967
+ # # "low"
968
+ # # "high"
969
+ # # "high"
970
+ # # "high"
971
+ # # ]
972
+ #
973
+ # @example Create a DataFrame with the breakpoint and category for each value.
974
+ # cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
975
+ # s.to_frame.with_columns(cut).unnest("cut")
976
+ # # =>
977
+ # # shape: (5, 3)
978
+ # # ┌─────┬─────────────┬────────────┐
979
+ # # │ foo ┆ break_point ┆ category │
980
+ # # │ --- ┆ --- ┆ --- │
981
+ # # │ i64 ┆ f64 ┆ cat │
982
+ # # ╞═════╪═════════════╪════════════╡
983
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
984
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
985
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
986
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
987
+ # # │ 2 ┆ inf ┆ (1, inf] │
988
+ # # └─────┴─────────────┴────────────┘
989
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
990
+ result = (
991
+ to_frame
992
+ .select(
993
+ Polars.col(name).qcut(
994
+ quantiles,
995
+ labels: labels,
996
+ left_closed: left_closed,
997
+ allow_duplicates: allow_duplicates,
998
+ include_breaks: include_breaks
999
+ )
1000
+ )
1001
+ .to_series
1002
+ )
1003
+
1004
+ if include_breaks
1005
+ result = result.struct.rename_fields(["break_point", "category"])
1006
+ end
1007
+
1008
+ result
1009
+ end
1010
+
1011
+ # Get the lengths of runs of identical values.
1012
+ #
1013
+ # @return [Series]
1014
+ #
1015
+ # @example
1016
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1017
+ # s.rle.struct.unnest
1018
+ # # =>
1019
+ # # shape: (6, 2)
1020
+ # # ┌─────────┬────────┐
1021
+ # # │ lengths ┆ values │
1022
+ # # │ --- ┆ --- │
1023
+ # # │ i32 ┆ i64 │
1024
+ # # ╞═════════╪════════╡
1025
+ # # │ 2 ┆ 1 │
1026
+ # # │ 1 ┆ 2 │
1027
+ # # │ 1 ┆ 1 │
1028
+ # # │ 1 ┆ null │
1029
+ # # │ 1 ┆ 1 │
1030
+ # # │ 2 ┆ 3 │
1031
+ # # └─────────┴────────┘
1032
+ def rle
1033
+ super
1034
+ end
1035
+
1036
+ # Map values to run IDs.
1037
+ #
1038
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
1039
+ # which it falls. This is especially useful when you want to define groups by
1040
+ # runs of identical values rather than the values themselves.
1041
+ #
1042
+ # @return [Series]
1043
+ #
1044
+ # @example
1045
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1046
+ # s.rle_id
1047
+ # # =>
1048
+ # # shape: (8,)
1049
+ # # Series: 's' [u32]
1050
+ # # [
1051
+ # # 0
1052
+ # # 0
1053
+ # # 1
1054
+ # # 2
1055
+ # # 3
1056
+ # # 4
1057
+ # # 5
1058
+ # # 5
1059
+ # # ]
1060
+ def rle_id
1061
+ super
1062
+ end
1063
+
1064
+ # Count the unique values in a Series.
1065
+ #
1066
+ # @param sort [Boolean]
1067
+ # Ensure the output is sorted from most values to least.
1068
+ #
1069
+ # @return [DataFrame]
1070
+ #
1071
+ # @example
1072
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1073
+ # s.value_counts.sort("a")
1074
+ # # =>
1075
+ # # shape: (3, 2)
1076
+ # # ┌─────┬────────┐
1077
+ # # │ a ┆ counts │
1078
+ # # │ --- ┆ --- │
1079
+ # # │ i64 ┆ u32 │
1080
+ # # ╞═════╪════════╡
1081
+ # # │ 1 ┆ 1 │
1082
+ # # │ 2 ┆ 2 │
1083
+ # # │ 3 ┆ 1 │
1084
+ # # └─────┴────────┘
1085
+ def value_counts(sort: false)
1086
+ Utils.wrap_df(_s.value_counts(sort))
1087
+ end
1088
+
1089
+ # Return a count of the unique values in the order of appearance.
1090
+ #
1091
+ # @return [Series]
1092
+ #
1093
+ # @example
1094
+ # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
1095
+ # s.unique_counts
1096
+ # # =>
1097
+ # # shape: (3,)
1098
+ # # Series: 'id' [u32]
1099
+ # # [
1100
+ # # 1
1101
+ # # 2
1102
+ # # 3
1103
+ # # ]
1104
+ def unique_counts
1105
+ super
1106
+ end
1107
+
1108
+ # Computes the entropy.
1109
+ #
1110
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
1111
+ #
1112
+ # @param base [Float]
1113
+ # Given base, defaults to `e`
1114
+ # @param normalize [Boolean]
1115
+ # Normalize pk if it doesn't sum to 1.
1116
+ #
1117
+ # @return [Float, nil]
1118
+ #
1119
+ # @example
1120
+ # a = Polars::Series.new([0.99, 0.005, 0.005])
1121
+ # a.entropy(normalize: true)
1122
+ # # => 0.06293300616044681
1123
+ #
1124
+ # @example
1125
+ # b = Polars::Series.new([0.65, 0.10, 0.25])
1126
+ # b.entropy(normalize: true)
1127
+ # # => 0.8568409950394724
1128
+ def entropy(base: Math::E, normalize: false)
1129
+ Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
1130
+ end
1131
+
1132
+ # Run an expression over a sliding window that increases `1` slot every iteration.
1133
+ #
1134
+ # @param expr [Expr]
1135
+ # Expression to evaluate
1136
+ # @param min_periods [Integer]
1137
+ # Number of valid values there should be in the window before the expression
1138
+ # is evaluated. valid values = `length - null_count`
1139
+ # @param parallel [Boolean]
1140
+ # Run in parallel. Don't do this in a group by or another operation that
1141
+ # already has much parallelization.
1142
+ #
1143
+ # @return [Series]
1144
+ #
1145
+ # @note
1146
+ # This functionality is experimental and may change without it being considered a
1147
+ # breaking change.
1148
+ #
1149
+ # @note
1150
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
1151
+ # for operations that visit all elements.
1152
+ #
1153
+ # @example
1154
+ # s = Polars::Series.new("values", [1, 2, 3, 4, 5])
1155
+ # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
1156
+ # # =>
1157
+ # # shape: (5,)
1158
+ # # Series: 'values' [i64]
1159
+ # # [
1160
+ # # 0
1161
+ # # -3
1162
+ # # -8
1163
+ # # -15
1164
+ # # -24
1165
+ # # ]
1166
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
1167
+ super
1168
+ end
1169
+
1170
+ # Return a copy of the Series with a new alias/name.
1171
+ #
1172
+ # @param name [String]
1173
+ # New name.
1174
+ #
1175
+ # @return [Series]
1176
+ #
1177
+ # @example
1178
+ # s = Polars::Series.new("x", [1, 2, 3])
1179
+ # s.alias("y")
1180
+ def alias(name)
1181
+ s = dup
1182
+ s._s.rename(name)
1183
+ s
1184
+ end
1185
+
1186
+ # Rename this Series.
1187
+ #
1188
+ # @param name [String]
1189
+ # New name.
1190
+ # @param in_place [Boolean]
1191
+ # Modify the Series in-place.
1192
+ #
1193
+ # @return [Series]
1194
+ #
1195
+ # @example
1196
+ # s = Polars::Series.new("a", [1, 2, 3])
1197
+ # s.rename("b")
1198
+ def rename(name, in_place: false)
1199
+ if in_place
1200
+ _s.rename(name)
1201
+ self
1202
+ else
1203
+ self.alias(name)
1204
+ end
1205
+ end
1206
+
1207
+ # Get the length of each individual chunk.
1208
+ #
1209
+ # @return [Array]
1210
+ #
1211
+ # @example
1212
+ # s = Polars::Series.new("a", [1, 2, 3])
1213
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1214
+ #
1215
+ # @example Concatenate Series with rechunk: true
1216
+ # Polars.concat([s, s2]).chunk_lengths
1217
+ # # => [6]
1218
+ #
1219
+ # @example Concatenate Series with rechunk: false
1220
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
1221
+ # # => [3, 3]
1222
+ def chunk_lengths
1223
+ _s.chunk_lengths
1224
+ end
1225
+
1226
+ # Get the number of chunks that this Series contains.
1227
+ #
1228
+ # @return [Integer]
1229
+ #
1230
+ # @example
1231
+ # s = Polars::Series.new("a", [1, 2, 3])
1232
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1233
+ #
1234
+ # @example Concatenate Series with rechunk: true
1235
+ # Polars.concat([s, s2]).n_chunks
1236
+ # # => 1
1237
+ #
1238
+ # @example Concatenate Series with rechunk: false
1239
+ # Polars.concat([s, s2], rechunk: false).n_chunks
1240
+ # # => 2
1241
+ def n_chunks
1242
+ _s.n_chunks
1243
+ end
1244
+
1245
+ # Get an array with the cumulative sum computed at every element.
1246
+ #
1247
+ # @param reverse [Boolean]
1248
+ # reverse the operation.
1249
+ #
1250
+ # @return [Series]
1251
+ #
1252
+ # @note
1253
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1254
+ # `:i64` before summing to prevent overflow issues.
1255
+ #
1256
+ # @example
1257
+ # s = Polars::Series.new("a", [1, 2, 3])
1258
+ # s.cum_sum
1259
+ # # =>
1260
+ # # shape: (3,)
1261
+ # # Series: 'a' [i64]
1262
+ # # [
1263
+ # # 1
1264
+ # # 3
1265
+ # # 6
1266
+ # # ]
1267
+ def cum_sum(reverse: false)
1268
+ super
1269
+ end
1270
+ alias_method :cumsum, :cum_sum
1271
+
1272
+ # Get an array with the cumulative min computed at every element.
1273
+ #
1274
+ # @param reverse [Boolean]
1275
+ # reverse the operation.
1276
+ #
1277
+ # @return [Series]
1278
+ #
1279
+ # @example
1280
+ # s = Polars::Series.new("a", [3, 5, 1])
1281
+ # s.cum_min
1282
+ # # =>
1283
+ # # shape: (3,)
1284
+ # # Series: 'a' [i64]
1285
+ # # [
1286
+ # # 3
1287
+ # # 3
1288
+ # # 1
1289
+ # # ]
1290
+ def cum_min(reverse: false)
1291
+ super
1292
+ end
1293
+ alias_method :cummin, :cum_min
1294
+
1295
+ # Get an array with the cumulative max computed at every element.
1296
+ #
1297
+ # @param reverse [Boolean]
1298
+ # reverse the operation.
1299
+ #
1300
+ # @return [Series]
1301
+ #
1302
+ # @example
1303
+ # s = Polars::Series.new("a", [3, 5, 1])
1304
+ # s.cum_max
1305
+ # # =>
1306
+ # # shape: (3,)
1307
+ # # Series: 'a' [i64]
1308
+ # # [
1309
+ # # 3
1310
+ # # 5
1311
+ # # 5
1312
+ # # ]
1313
+ def cum_max(reverse: false)
1314
+ super
1315
+ end
1316
+ alias_method :cummax, :cum_max
1317
+
1318
+ # Get an array with the cumulative product computed at every element.
1319
+ #
1320
+ # @param reverse [Boolean]
1321
+ # reverse the operation.
1322
+ #
1323
+ # @return [Series]
1324
+ #
1325
+ # @note
1326
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1327
+ # `:i64` before multiplying to prevent overflow issues.
1328
+ #
1329
+ # @example
1330
+ # s = Polars::Series.new("a", [1, 2, 3])
1331
+ # s.cum_prod
1332
+ # # =>
1333
+ # # shape: (3,)
1334
+ # # Series: 'a' [i64]
1335
+ # # [
1336
+ # # 1
1337
+ # # 2
1338
+ # # 6
1339
+ # # ]
1340
+ def cum_prod(reverse: false)
1341
+ super
1342
+ end
1343
+ alias_method :cumprod, :cum_prod
1344
+
1345
+ # Get the first `n` rows.
1346
+ #
1347
+ # Alias for {#head}.
1348
+ #
1349
+ # @param n [Integer]
1350
+ # Number of rows to return.
1351
+ #
1352
+ # @return [Series]
1353
+ #
1354
+ # @example
1355
+ # s = Polars::Series.new("a", [1, 2, 3])
1356
+ # s.limit(2)
1357
+ # # =>
1358
+ # # shape: (2,)
1359
+ # # Series: 'a' [i64]
1360
+ # # [
1361
+ # # 1
1362
+ # # 2
1363
+ # # ]
1364
+ def limit(n = 10)
1365
+ to_frame.select(Utils.col(name).limit(n)).to_series
1366
+ end
1367
+
1368
+ # Get a slice of this Series.
1369
+ #
1370
+ # @param offset [Integer]
1371
+ # Start index. Negative indexing is supported.
1372
+ # @param length [Integer, nil]
1373
+ # Length of the slice. If set to `nil`, all rows starting at the offset
1374
+ # will be selected.
1375
+ #
1376
+ # @return [Series]
1377
+ #
1378
+ # @example
1379
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1380
+ # s.slice(1, 2)
1381
+ # # =>
1382
+ # # shape: (2,)
1383
+ # # Series: 'a' [i64]
1384
+ # # [
1385
+ # # 2
1386
+ # # 3
1387
+ # # ]
1388
+ def slice(offset, length = nil)
1389
+ super
1390
+ end
1391
+
1392
+ # Append a Series to this one.
1393
+ #
1394
+ # @param other [Series]
1395
+ # Series to append.
1396
+ # @param append_chunks [Boolean]
1397
+ # If set to `true` the append operation will add the chunks from `other` to
1398
+ # self. This is super cheap.
1399
+ #
1400
+ # If set to `false` the append operation will do the same as
1401
+ # {DataFrame#extend} which extends the memory backed by this Series with
1402
+ # the values from `other`.
1403
+ #
1404
+ # Different from `append_chunks`, `extend` appends the data from `other` to
1405
+ # the underlying memory locations and thus may cause a reallocation (which is
1406
+ # expensive).
1407
+ #
1408
+ # If this does not cause a reallocation, the resulting data structure will not
1409
+ # have any extra chunks and thus will yield faster queries.
1410
+ #
1411
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
1412
+ # single append. For instance during online operations where you add `n` rows
1413
+ # and rerun a query.
1414
+ #
1415
+ # Prefer `append_chunks` over `extend` when you want to append many times
1416
+ # before doing a query. For instance, when you read in multiple files and when
1417
+ # to store them in a single Series. In the latter case, finish the sequence
1418
+ # of `append_chunks` operations with a `rechunk`.
1419
+ #
1420
+ # @return [Series]
1421
+ #
1422
+ # @example
1423
+ # s = Polars::Series.new("a", [1, 2, 3])
1424
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1425
+ # s.append(s2)
1426
+ # # =>
1427
+ # # shape: (6,)
1428
+ # # Series: 'a' [i64]
1429
+ # # [
1430
+ # # 1
1431
+ # # 2
1432
+ # # 3
1433
+ # # 4
1434
+ # # 5
1435
+ # # 6
1436
+ # # ]
1437
+ def append(other, append_chunks: true)
1438
+ begin
1439
+ if append_chunks
1440
+ _s.append(other._s)
1441
+ else
1442
+ _s.extend(other._s)
1443
+ end
1444
+ rescue => e
1445
+ if e.message == "Already mutably borrowed"
1446
+ append(other.clone, append_chunks)
1447
+ else
1448
+ raise e
1449
+ end
1450
+ end
1451
+ self
1452
+ end
1453
+
1454
+ # Filter elements by a boolean mask.
1455
+ #
1456
+ # @param predicate [Series, Array]
1457
+ # Boolean mask.
1458
+ #
1459
+ # @return [Series]
1460
+ #
1461
+ # @example
1462
+ # s = Polars::Series.new("a", [1, 2, 3])
1463
+ # mask = Polars::Series.new("", [true, false, true])
1464
+ # s.filter(mask)
1465
+ # # =>
1466
+ # # shape: (2,)
1467
+ # # Series: 'a' [i64]
1468
+ # # [
1469
+ # # 1
1470
+ # # 3
1471
+ # # ]
1472
+ def filter(predicate)
1473
+ if predicate.is_a?(::Array)
1474
+ predicate = Series.new("", predicate)
1475
+ end
1476
+ Utils.wrap_s(_s.filter(predicate._s))
1477
+ end
1478
+
1479
+ # Get the first `n` rows.
1480
+ #
1481
+ # @param n [Integer]
1482
+ # Number of rows to return.
1483
+ #
1484
+ # @return [Series]
1485
+ #
1486
+ # @example
1487
+ # s = Polars::Series.new("a", [1, 2, 3])
1488
+ # s.head(2)
1489
+ # # =>
1490
+ # # shape: (2,)
1491
+ # # Series: 'a' [i64]
1492
+ # # [
1493
+ # # 1
1494
+ # # 2
1495
+ # # ]
1496
+ def head(n = 10)
1497
+ to_frame.select(Utils.col(name).head(n)).to_series
1498
+ end
1499
+
1500
+ # Get the last `n` rows.
1501
+ #
1502
+ # @param n [Integer]
1503
+ # Number of rows to return.
1504
+ #
1505
+ # @return [Series]
1506
+ #
1507
+ # @example
1508
+ # s = Polars::Series.new("a", [1, 2, 3])
1509
+ # s.tail(2)
1510
+ # # =>
1511
+ # # shape: (2,)
1512
+ # # Series: 'a' [i64]
1513
+ # # [
1514
+ # # 2
1515
+ # # 3
1516
+ # # ]
1517
+ def tail(n = 10)
1518
+ to_frame.select(Utils.col(name).tail(n)).to_series
1519
+ end
1520
+
1521
+ # Take every nth value in the Series and return as new Series.
1522
+ #
1523
+ # @return [Series]
1524
+ #
1525
+ # @example
1526
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1527
+ # s.take_every(2)
1528
+ # # =>
1529
+ # # shape: (2,)
1530
+ # # Series: 'a' [i64]
1531
+ # # [
1532
+ # # 1
1533
+ # # 3
1534
+ # # ]
1535
+ def take_every(n)
1536
+ super
1537
+ end
1538
+
1539
+ # Sort this Series.
1540
+ #
1541
+ # @param reverse [Boolean]
1542
+ # Reverse sort.
1543
+ # @param in_place [Boolean]
1544
+ # Sort in place.
1545
+ #
1546
+ # @return [Series]
1547
+ #
1548
+ # @example
1549
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
1550
+ # s.sort
1551
+ # # =>
1552
+ # # shape: (4,)
1553
+ # # Series: 'a' [i64]
1554
+ # # [
1555
+ # # 1
1556
+ # # 2
1557
+ # # 3
1558
+ # # 4
1559
+ # # ]
1560
+ # s.sort(reverse: true)
1561
+ # # =>
1562
+ # # shape: (4,)
1563
+ # # Series: 'a' [i64]
1564
+ # # [
1565
+ # # 4
1566
+ # # 3
1567
+ # # 2
1568
+ # # 1
1569
+ # # ]
1570
+ def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
1571
+ if in_place
1572
+ self._s = _s.sort(reverse, nulls_last, multithreaded)
1573
+ self
1574
+ else
1575
+ Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
1576
+ end
1577
+ end
1578
+
1579
+ # Return the `k` largest elements.
1580
+ #
1581
+ # @param k [Integer]
1582
+ # Number of elements to return.
1583
+ #
1584
+ # @return [Boolean]
1585
+ #
1586
+ # @example
1587
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1588
+ # s.top_k(k: 3)
1589
+ # # =>
1590
+ # # shape: (3,)
1591
+ # # Series: 'a' [i64]
1592
+ # # [
1593
+ # # 5
1594
+ # # 4
1595
+ # # 3
1596
+ # # ]
1597
+ def top_k(k: 5)
1598
+ super
1599
+ end
1600
+
1601
+ # Return the `k` smallest elements.
1602
+ #
1603
+ # @param k [Integer]
1604
+ # Number of elements to return.
1605
+ #
1606
+ # @return [Boolean]
1607
+ #
1608
+ # @example
1609
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1610
+ # s.bottom_k(k: 3)
1611
+ # # =>
1612
+ # # shape: (3,)
1613
+ # # Series: 'a' [i64]
1614
+ # # [
1615
+ # # 1
1616
+ # # 2
1617
+ # # 3
1618
+ # # ]
1619
+ def bottom_k(k: 5)
1620
+ super
1621
+ end
1622
+
1623
+ # Get the index values that would sort this Series.
1624
+ #
1625
+ # @param reverse [Boolean]
1626
+ # Sort in reverse (descending) order.
1627
+ # @param nulls_last [Boolean]
1628
+ # Place null values last instead of first.
1629
+ #
1630
+ # @return [Series]
1631
+ #
1632
+ # @example
1633
+ # s = Polars::Series.new("a", [5, 3, 4, 1, 2])
1634
+ # s.arg_sort
1635
+ # # =>
1636
+ # # shape: (5,)
1637
+ # # Series: 'a' [u32]
1638
+ # # [
1639
+ # # 3
1640
+ # # 4
1641
+ # # 1
1642
+ # # 2
1643
+ # # 0
1644
+ # # ]
1645
+ def arg_sort(reverse: false, nulls_last: false)
1646
+ super
1647
+ end
1648
+
1649
+ # Get the index values that would sort this Series.
1650
+ #
1651
+ # Alias for {#arg_sort}.
1652
+ #
1653
+ # @param reverse [Boolean]
1654
+ # Sort in reverse (descending) order.
1655
+ # @param nulls_last [Boolean]
1656
+ # Place null values last instead of first.
1657
+ #
1658
+ # @return [Series]
1659
+ def argsort(reverse: false, nulls_last: false)
1660
+ super
1661
+ end
1662
+
1663
+ # Get unique index as Series.
1664
+ #
1665
+ # @return [Series]
1666
+ #
1667
+ # @example
1668
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1669
+ # s.arg_unique
1670
+ # # =>
1671
+ # # shape: (3,)
1672
+ # # Series: 'a' [u32]
1673
+ # # [
1674
+ # # 0
1675
+ # # 1
1676
+ # # 3
1677
+ # # ]
1678
+ def arg_unique
1679
+ super
1680
+ end
1681
+
1682
+ # Get the index of the minimal value.
1683
+ #
1684
+ # @return [Integer, nil]
1685
+ #
1686
+ # @example
1687
+ # s = Polars::Series.new("a", [3, 2, 1])
1688
+ # s.arg_min
1689
+ # # => 2
1690
+ def arg_min
1691
+ _s.arg_min
1692
+ end
1693
+
1694
+ # Get the index of the maximal value.
1695
+ #
1696
+ # @return [Integer, nil]
1697
+ #
1698
+ # @example
1699
+ # s = Polars::Series.new("a", [3, 2, 1])
1700
+ # s.arg_max
1701
+ # # => 0
1702
+ def arg_max
1703
+ _s.arg_max
1704
+ end
1705
+
1706
+ # Find indices where elements should be inserted to maintain order.
1707
+ #
1708
+ # @param element [Object]
1709
+ # Expression or scalar value.
1710
+ #
1711
+ # @return [Integer]
1712
+ def search_sorted(element, side: "any")
1713
+ if element.is_a?(Integer) || element.is_a?(Float)
1714
+ return Polars.select(Polars.lit(self).search_sorted(element, side: side)).item
1715
+ end
1716
+ element = Series.new(element)
1717
+ Polars.select(Polars.lit(self).search_sorted(element, side: side)).to_series
1718
+ end
1719
+
1720
+ # Get unique elements in series.
1721
+ #
1722
+ # @param maintain_order [Boolean]
1723
+ # Maintain order of data. This requires more work.
1724
+ #
1725
+ # @return [Series]
1726
+ #
1727
+ # @example
1728
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1729
+ # s.unique.sort
1730
+ # # =>
1731
+ # # shape: (3,)
1732
+ # # Series: 'a' [i64]
1733
+ # # [
1734
+ # # 1
1735
+ # # 2
1736
+ # # 3
1737
+ # # ]
1738
+ def unique(maintain_order: false)
1739
+ super
1740
+ end
1741
+ alias_method :uniq, :unique
1742
+
1743
+ # Take values by index.
1744
+ #
1745
+ # @param indices [Array]
1746
+ # Index location used for selection.
1747
+ #
1748
+ # @return [Series]
1749
+ #
1750
+ # @example
1751
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1752
+ # s.take([1, 3])
1753
+ # # =>
1754
+ # # shape: (2,)
1755
+ # # Series: 'a' [i64]
1756
+ # # [
1757
+ # # 2
1758
+ # # 4
1759
+ # # ]
1760
+ def take(indices)
1761
+ to_frame.select(Polars.col(name).take(indices)).to_series
1762
+ end
1763
+
1764
+ # Count the null values in this Series.
1765
+ #
1766
+ # @return [Integer]
1767
+ def null_count
1768
+ _s.null_count
1769
+ end
1770
+
1771
+ # Return `true` if the Series has a validity bitmask.
1772
+ #
1773
+ # If there is none, it means that there are no null values.
1774
+ # Use this to swiftly assert a Series does not have null values.
1775
+ #
1776
+ # @return [Boolean]
1777
+ def has_validity
1778
+ _s.has_validity
1779
+ end
1780
+
1781
+ # Check if the Series is empty.
1782
+ #
1783
+ # @return [Boolean]
1784
+ #
1785
+ # @example
1786
+ # s = Polars::Series.new("a", [])
1787
+ # s.is_empty
1788
+ # # => true
1789
+ def is_empty
1790
+ len == 0
1791
+ end
1792
+ alias_method :empty?, :is_empty
1793
+
1794
+ # Returns a boolean Series indicating which values are null.
1795
+ #
1796
+ # @return [Series]
1797
+ #
1798
+ # @example
1799
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1800
+ # s.is_null
1801
+ # # =>
1802
+ # # shape: (4,)
1803
+ # # Series: 'a' [bool]
1804
+ # # [
1805
+ # # false
1806
+ # # false
1807
+ # # false
1808
+ # # true
1809
+ # # ]
1810
+ def is_null
1811
+ super
1812
+ end
1813
+
1814
+ # Returns a boolean Series indicating which values are not null.
1815
+ #
1816
+ # @return [Series]
1817
+ #
1818
+ # @example
1819
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1820
+ # s.is_not_null
1821
+ # # =>
1822
+ # # shape: (4,)
1823
+ # # Series: 'a' [bool]
1824
+ # # [
1825
+ # # true
1826
+ # # true
1827
+ # # true
1828
+ # # false
1829
+ # # ]
1830
+ def is_not_null
1831
+ super
1832
+ end
1833
+
1834
+ # Returns a boolean Series indicating which values are finite.
1835
+ #
1836
+ # @return [Series]
1837
+ #
1838
+ # @example
1839
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1840
+ # s.is_finite
1841
+ # # =>
1842
+ # # shape: (3,)
1843
+ # # Series: 'a' [bool]
1844
+ # # [
1845
+ # # true
1846
+ # # true
1847
+ # # false
1848
+ # # ]
1849
+ def is_finite
1850
+ super
1851
+ end
1852
+
1853
+ # Returns a boolean Series indicating which values are infinite.
1854
+ #
1855
+ # @return [Series]
1856
+ #
1857
+ # @example
1858
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1859
+ # s.is_infinite
1860
+ # # =>
1861
+ # # shape: (3,)
1862
+ # # Series: 'a' [bool]
1863
+ # # [
1864
+ # # false
1865
+ # # false
1866
+ # # true
1867
+ # # ]
1868
+ def is_infinite
1869
+ super
1870
+ end
1871
+
1872
+ # Returns a boolean Series indicating which values are NaN.
1873
+ #
1874
+ # @return [Series]
1875
+ #
1876
+ # @example
1877
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1878
+ # s.is_nan
1879
+ # # =>
1880
+ # # shape: (4,)
1881
+ # # Series: 'a' [bool]
1882
+ # # [
1883
+ # # false
1884
+ # # false
1885
+ # # false
1886
+ # # true
1887
+ # # ]
1888
+ def is_nan
1889
+ super
1890
+ end
1891
+
1892
+ # Returns a boolean Series indicating which values are not NaN.
1893
+ #
1894
+ # @return [Series]
1895
+ #
1896
+ # @example
1897
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1898
+ # s.is_not_nan
1899
+ # # =>
1900
+ # # shape: (4,)
1901
+ # # Series: 'a' [bool]
1902
+ # # [
1903
+ # # true
1904
+ # # true
1905
+ # # true
1906
+ # # false
1907
+ # # ]
1908
+ def is_not_nan
1909
+ super
1910
+ end
1911
+
1912
+ # Check if elements of this Series are in the other Series.
1913
+ #
1914
+ # @return [Series]
1915
+ #
1916
+ # @example
1917
+ # s = Polars::Series.new("a", [1, 2, 3])
1918
+ # s2 = Polars::Series.new("b", [2, 4])
1919
+ # s2.is_in(s)
1920
+ # # =>
1921
+ # # shape: (2,)
1922
+ # # Series: 'b' [bool]
1923
+ # # [
1924
+ # # true
1925
+ # # false
1926
+ # # ]
1927
+ #
1928
+ # @example
1929
+ # sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
1930
+ # # =>
1931
+ # # shape: (3,)
1932
+ # # Series: 'sets' [list[i64]]
1933
+ # # [
1934
+ # # [1, 2, 3]
1935
+ # # [1, 2]
1936
+ # # [9, 10]
1937
+ # # ]
1938
+ #
1939
+ # @example
1940
+ # optional_members = Polars::Series.new("optional_members", [1, 2, 3])
1941
+ # # =>
1942
+ # # shape: (3,)
1943
+ # # Series: 'optional_members' [i64]
1944
+ # # [
1945
+ # # 1
1946
+ # # 2
1947
+ # # 3
1948
+ # # ]
1949
+ #
1950
+ # @example
1951
+ # optional_members.is_in(sets)
1952
+ # # =>
1953
+ # # shape: (3,)
1954
+ # # Series: 'optional_members' [bool]
1955
+ # # [
1956
+ # # true
1957
+ # # true
1958
+ # # false
1959
+ # # ]
1960
+ def is_in(other)
1961
+ super
1962
+ end
1963
+ alias_method :in?, :is_in
1964
+
1965
+ # Get index values where Boolean Series evaluate `true`.
1966
+ #
1967
+ # @return [Series]
1968
+ #
1969
+ # @example
1970
+ # s = Polars::Series.new("a", [1, 2, 3])
1971
+ # (s == 2).arg_true
1972
+ # # =>
1973
+ # # shape: (1,)
1974
+ # # Series: 'a' [u32]
1975
+ # # [
1976
+ # # 1
1977
+ # # ]
1978
+ def arg_true
1979
+ Polars.arg_where(self, eager: true)
1980
+ end
1981
+
1982
+ # Get mask of all unique values.
1983
+ #
1984
+ # @return [Series]
1985
+ #
1986
+ # @example
1987
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1988
+ # s.is_unique
1989
+ # # =>
1990
+ # # shape: (4,)
1991
+ # # Series: 'a' [bool]
1992
+ # # [
1993
+ # # true
1994
+ # # false
1995
+ # # false
1996
+ # # true
1997
+ # # ]
1998
+ def is_unique
1999
+ super
2000
+ end
2001
+
2002
+ # Get a mask of the first unique value.
2003
+ #
2004
+ # @return [Series]
2005
+ def is_first
2006
+ super
2007
+ end
2008
+
2009
+ # Get mask of all duplicated values.
2010
+ #
2011
+ # @return [Series]
2012
+ #
2013
+ # @example
2014
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2015
+ # s.is_duplicated
2016
+ # # =>
2017
+ # # shape: (4,)
2018
+ # # Series: 'a' [bool]
2019
+ # # [
2020
+ # # false
2021
+ # # true
2022
+ # # true
2023
+ # # false
2024
+ # # ]
2025
+ def is_duplicated
2026
+ super
2027
+ end
2028
+
2029
+ # Explode a list or utf8 Series.
2030
+ #
2031
+ # This means that every item is expanded to a new row.
2032
+ #
2033
+ # @return [Series]
2034
+ #
2035
+ # @example
2036
+ # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
2037
+ # s.explode
2038
+ # # =>
2039
+ # # shape: (6,)
2040
+ # # Series: 'a' [i64]
2041
+ # # [
2042
+ # # 1
2043
+ # # 2
2044
+ # # 3
2045
+ # # 4
2046
+ # # 9
2047
+ # # 10
2048
+ # # ]
2049
+ def explode
2050
+ super
2051
+ end
2052
+
2053
+ # Check if series is equal with another Series.
2054
+ #
2055
+ # @param other [Series]
2056
+ # Series to compare with.
2057
+ # @param null_equal [Boolean]
2058
+ # Consider null values as equal.
2059
+ # @param strict [Boolean]
2060
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
2061
+ # `:i64` will return `false`.
2062
+ #
2063
+ # @return [Boolean]
2064
+ #
2065
+ # @example
2066
+ # s = Polars::Series.new("a", [1, 2, 3])
2067
+ # s2 = Polars::Series.new("b", [4, 5, 6])
2068
+ # s.equals(s)
2069
+ # # => true
2070
+ # s.equals(s2)
2071
+ # # => false
2072
+ def equals(other, null_equal: false, strict: false)
2073
+ _s.equals(other._s, null_equal, strict)
2074
+ end
2075
+ alias_method :series_equal, :equals
2076
+
2077
+ # Return the number of elements in the Series.
2078
+ #
2079
+ # @return [Integer]
2080
+ #
2081
+ # @example
2082
+ # s = Polars::Series.new("a", [1, 2, nil])
2083
+ # s.count
2084
+ # # => 2
2085
+ def count
2086
+ len - null_count
2087
+ end
2088
+
2089
+ # Return the number of elements in the Series.
2090
+ #
2091
+ # @return [Integer]
2092
+ #
2093
+ # @example
2094
+ # s = Polars::Series.new("a", [1, 2, nil])
2095
+ # s.len
2096
+ # # => 3
2097
+ def len
2098
+ _s.len
2099
+ end
2100
+ alias_method :length, :len
2101
+ alias_method :size, :len
2102
+
2103
+ # Cast between data types.
2104
+ #
2105
+ # @param dtype [Symbol]
2106
+ # DataType to cast to
2107
+ # @param strict [Boolean]
2108
+ # Throw an error if a cast could not be done for instance due to an overflow
2109
+ #
2110
+ # @return [Series]
2111
+ #
2112
+ # @example
2113
+ # s = Polars::Series.new("a", [true, false, true])
2114
+ # s.cast(:u32)
2115
+ # # =>
2116
+ # # shape: (3,)
2117
+ # # Series: 'a' [u32]
2118
+ # # [
2119
+ # # 1
2120
+ # # 0
2121
+ # # 1
2122
+ # # ]
2123
+ def cast(dtype, strict: true)
2124
+ super
2125
+ end
2126
+
2127
+ # Cast to physical representation of the logical dtype.
2128
+ #
2129
+ # - `:date` -> `:i32`
2130
+ # - `:datetime` -> `:i64`
2131
+ # - `:time` -> `:i64`
2132
+ # - `:duration` -> `:i64`
2133
+ # - `:cat` -> `:u32`
2134
+ # - other data types will be left unchanged.
2135
+ #
2136
+ # @return [Series]
2137
+ #
2138
+ # @example
2139
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
2140
+ # s.cast(:cat).to_physical
2141
+ # # =>
2142
+ # # shape: (4,)
2143
+ # # Series: 'values' [u32]
2144
+ # # [
2145
+ # # 0
2146
+ # # null
2147
+ # # 1
2148
+ # # 0
2149
+ # # ]
2150
+ def to_physical
2151
+ super
2152
+ end
2153
+
2154
+ # Convert this Series to a Ruby Array. This operation clones data.
2155
+ #
2156
+ # @return [Array]
2157
+ #
2158
+ # @example
2159
+ # s = Polars::Series.new("a", [1, 2, 3])
2160
+ # s.to_a
2161
+ # # => [1, 2, 3]
2162
+ def to_a
2163
+ _s.to_a
2164
+ end
2165
+
2166
+ # Create a single chunk of memory for this Series.
2167
+ #
2168
+ # @param in_place [Boolean]
2169
+ # In place or not.
2170
+ #
2171
+ # @return [Series]
2172
+ def rechunk(in_place: false)
2173
+ opt_s = _s.rechunk(in_place)
2174
+ in_place ? self : Utils.wrap_s(opt_s)
2175
+ end
2176
+
2177
+ # Return Series in reverse order.
2178
+ #
2179
+ # @return [Series]
2180
+ #
2181
+ # @example
2182
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
2183
+ # s.reverse
2184
+ # # =>
2185
+ # # shape: (3,)
2186
+ # # Series: 'a' [i8]
2187
+ # # [
2188
+ # # 3
2189
+ # # 2
2190
+ # # 1
2191
+ # # ]
2192
+ def reverse
2193
+ super
2194
+ end
2195
+
2196
+ # Check if this Series datatype is numeric.
2197
+ #
2198
+ # @return [Boolean]
2199
+ #
2200
+ # @example
2201
+ # s = Polars::Series.new("a", [1, 2, 3])
2202
+ # s.is_numeric
2203
+ # # => true
2204
+ def is_numeric
2205
+ [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
2206
+ end
2207
+ alias_method :numeric?, :is_numeric
2208
+
2209
+ # Check if this Series datatype is datelike.
2210
+ #
2211
+ # @return [Boolean]
2212
+ #
2213
+ # @example
2214
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
2215
+ # s.is_datelike
2216
+ # # => true
2217
+ def is_datelike
2218
+ [Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
2219
+ end
2220
+ alias_method :datelike?, :is_datelike
2221
+ alias_method :is_temporal, :is_datelike
2222
+ alias_method :temporal?, :is_datelike
2223
+
2224
+ # Check if this Series has floating point numbers.
2225
+ #
2226
+ # @return [Boolean]
2227
+ #
2228
+ # @example
2229
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
2230
+ # s.is_float
2231
+ # # => true
2232
+ def is_float
2233
+ [Float32, Float64].include?(dtype)
2234
+ end
2235
+ alias_method :float?, :is_float
2236
+
2237
+ # Check if this Series is a Boolean.
2238
+ #
2239
+ # @return [Boolean]
2240
+ #
2241
+ # @example
2242
+ # s = Polars::Series.new("a", [true, false, true])
2243
+ # s.is_boolean
2244
+ # # => true
2245
+ def is_boolean
2246
+ dtype == Boolean
2247
+ end
2248
+ alias_method :boolean?, :is_boolean
2249
+ alias_method :is_bool, :is_boolean
2250
+ alias_method :bool?, :is_boolean
2251
+
2252
+ # Check if this Series datatype is a Utf8.
2253
+ #
2254
+ # @return [Boolean]
2255
+ #
2256
+ # @example
2257
+ # s = Polars::Series.new("x", ["a", "b", "c"])
2258
+ # s.is_utf8
2259
+ # # => true
2260
+ def is_utf8
2261
+ dtype == String
2262
+ end
2263
+ alias_method :utf8?, :is_utf8
2264
+
2265
+ # def view
2266
+ # end
2267
+
2268
+ # Convert this Series to a Numo array. This operation clones data but is completely safe.
2269
+ #
2270
+ # @return [Numo::NArray]
2271
+ #
2272
+ # @example
2273
+ # s = Polars::Series.new("a", [1, 2, 3])
2274
+ # s.to_numo
2275
+ # # =>
2276
+ # # Numo::Int64#shape=[3]
2277
+ # # [1, 2, 3]
2278
+ def to_numo
2279
+ if !has_validity
2280
+ if is_datelike
2281
+ Numo::RObject.cast(to_a)
2282
+ elsif is_numeric
2283
+ # TODO make more efficient
2284
+ {
2285
+ UInt8 => Numo::UInt8,
2286
+ UInt16 => Numo::UInt16,
2287
+ UInt32 => Numo::UInt32,
2288
+ UInt64 => Numo::UInt64,
2289
+ Int8 => Numo::Int8,
2290
+ Int16 => Numo::Int16,
2291
+ Int32 => Numo::Int32,
2292
+ Int64 => Numo::Int64,
2293
+ Float32 => Numo::SFloat,
2294
+ Float64 => Numo::DFloat
2295
+ }.fetch(dtype.class).cast(to_a)
2296
+ elsif is_boolean
2297
+ Numo::Bit.cast(to_a)
2298
+ else
2299
+ _s.to_numo
2300
+ end
2301
+ elsif is_datelike
2302
+ Numo::RObject.cast(to_a)
2303
+ else
2304
+ _s.to_numo
2305
+ end
2306
+ end
2307
+
2308
+ # Set masked values.
2309
+ #
2310
+ # @param filter [Series]
2311
+ # Boolean mask.
2312
+ # @param value [Object]
2313
+ # Value with which to replace the masked values.
2314
+ #
2315
+ # @return [Series]
2316
+ #
2317
+ # @note
2318
+ # Use of this function is frequently an anti-pattern, as it can
2319
+ # block optimization (predicate pushdown, etc). Consider using
2320
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
2321
+ #
2322
+ # @example
2323
+ # s = Polars::Series.new("a", [1, 2, 3])
2324
+ # s.set(s == 2, 10)
2325
+ # # =>
2326
+ # # shape: (3,)
2327
+ # # Series: 'a' [i64]
2328
+ # # [
2329
+ # # 1
2330
+ # # 10
2331
+ # # 3
2332
+ # # ]
2333
+ def set(filter, value)
2334
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype.class)}", filter._s, value))
2335
+ end
2336
+
2337
+ # Set values at the index locations.
2338
+ #
2339
+ # @param idx [Object]
2340
+ # Integers representing the index locations.
2341
+ # @param value [Object]
2342
+ # Replacement values.
2343
+ #
2344
+ # @return [Series]
2345
+ #
2346
+ # @example
2347
+ # s = Polars::Series.new("a", [1, 2, 3])
2348
+ # s.set_at_idx(1, 10)
2349
+ # # =>
2350
+ # # shape: (3,)
2351
+ # # Series: 'a' [i64]
2352
+ # # [
2353
+ # # 1
2354
+ # # 10
2355
+ # # 3
2356
+ # # ]
2357
+ def scatter(idx, value)
2358
+ if idx.is_a?(Integer)
2359
+ idx = [idx]
2360
+ end
2361
+ if idx.length == 0
2362
+ return self
2363
+ end
2364
+
2365
+ idx = Series.new("", idx)
2366
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
2367
+ value = Series.new("", [value])
2368
+
2369
+ # if we need to set more than a single value, we extend it
2370
+ if idx.length > 0
2371
+ value = value.extend_constant(value[0], idx.length - 1)
2372
+ end
2373
+ elsif !value.is_a?(Series)
2374
+ value = Series.new("", value)
2375
+ end
2376
+ _s.scatter(idx._s, value._s)
2377
+ self
2378
+ end
2379
+ alias_method :set_at_idx, :scatter
2380
+
2381
+ # Create an empty copy of the current Series.
2382
+ #
2383
+ # The copy has identical name/dtype but no data.
2384
+ #
2385
+ # @return [Series]
2386
+ #
2387
+ # @example
2388
+ # s = Polars::Series.new("a", [nil, true, false])
2389
+ # s.cleared
2390
+ # # =>
2391
+ # # shape: (0,)
2392
+ # # Series: 'a' [bool]
2393
+ # # [
2394
+ # # ]
2395
+ def cleared
2396
+ len > 0 ? limit(0) : clone
2397
+ end
2398
+
2399
+ # clone handled by initialize_copy
2400
+
2401
+ # Fill floating point NaN value with a fill value.
2402
+ #
2403
+ # @param fill_value [Object]
2404
+ # Value used to fill nan values.
2405
+ #
2406
+ # @return [Series]
2407
+ #
2408
+ # @example
2409
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
2410
+ # s.fill_nan(0)
2411
+ # # =>
2412
+ # # shape: (4,)
2413
+ # # Series: 'a' [f64]
2414
+ # # [
2415
+ # # 1.0
2416
+ # # 2.0
2417
+ # # 3.0
2418
+ # # 0.0
2419
+ # # ]
2420
+ def fill_nan(fill_value)
2421
+ super
2422
+ end
2423
+
2424
+ # Fill null values using the specified value or strategy.
2425
+ #
2426
+ # @param value [Object]
2427
+ # Value used to fill null values.
2428
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2429
+ # Strategy used to fill null values.
2430
+ # @param limit
2431
+ # Number of consecutive null values to fill when using the "forward" or
2432
+ # "backward" strategy.
2433
+ #
2434
+ # @return [Series]
2435
+ #
2436
+ # @example
2437
+ # s = Polars::Series.new("a", [1, 2, 3, nil])
2438
+ # s.fill_null(strategy: "forward")
2439
+ # # =>
2440
+ # # shape: (4,)
2441
+ # # Series: 'a' [i64]
2442
+ # # [
2443
+ # # 1
2444
+ # # 2
2445
+ # # 3
2446
+ # # 3
2447
+ # # ]
2448
+ #
2449
+ # @example
2450
+ # s.fill_null(strategy: "min")
2451
+ # # =>
2452
+ # # shape: (4,)
2453
+ # # Series: 'a' [i64]
2454
+ # # [
2455
+ # # 1
2456
+ # # 2
2457
+ # # 3
2458
+ # # 1
2459
+ # # ]
2460
+ #
2461
+ # @example
2462
+ # s = Polars::Series.new("b", ["x", nil, "z"])
2463
+ # s.fill_null(Polars.lit(""))
2464
+ # # =>
2465
+ # # shape: (3,)
2466
+ # # Series: 'b' [str]
2467
+ # # [
2468
+ # # "x"
2469
+ # # ""
2470
+ # # "z"
2471
+ # # ]
2472
+ def fill_null(value = nil, strategy: nil, limit: nil)
2473
+ super
2474
+ end
2475
+
2476
+ # Rounds down to the nearest integer value.
2477
+ #
2478
+ # Only works on floating point Series.
2479
+ #
2480
+ # @return [Series]
2481
+ #
2482
+ # @example
2483
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2484
+ # s.floor
2485
+ # # =>
2486
+ # # shape: (3,)
2487
+ # # Series: 'a' [f64]
2488
+ # # [
2489
+ # # 1.0
2490
+ # # 2.0
2491
+ # # 3.0
2492
+ # # ]
2493
+ def floor
2494
+ Utils.wrap_s(_s.floor)
2495
+ end
2496
+
2497
+ # Rounds up to the nearest integer value.
2498
+ #
2499
+ # Only works on floating point Series.
2500
+ #
2501
+ # @return [Series]
2502
+ #
2503
+ # @example
2504
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2505
+ # s.ceil
2506
+ # # =>
2507
+ # # shape: (3,)
2508
+ # # Series: 'a' [f64]
2509
+ # # [
2510
+ # # 2.0
2511
+ # # 3.0
2512
+ # # 4.0
2513
+ # # ]
2514
+ def ceil
2515
+ super
2516
+ end
2517
+
2518
+ # Round underlying floating point data by `decimals` digits.
2519
+ #
2520
+ # @param decimals [Integer]
2521
+ # number of decimals to round by.
2522
+ #
2523
+ # @return [Series]
2524
+ #
2525
+ # @example
2526
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2527
+ # s.round(2)
2528
+ # # =>
2529
+ # # shape: (3,)
2530
+ # # Series: 'a' [f64]
2531
+ # # [
2532
+ # # 1.12
2533
+ # # 2.57
2534
+ # # 3.9
2535
+ # # ]
2536
+ def round(decimals = 0)
2537
+ super
2538
+ end
2539
+
2540
+ # Compute the dot/inner product between two Series.
2541
+ #
2542
+ # @param other [Object]
2543
+ # Series (or array) to compute dot product with.
2544
+ #
2545
+ # @return [Numeric]
2546
+ #
2547
+ # @example
2548
+ # s = Polars::Series.new("a", [1, 2, 3])
2549
+ # s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
2550
+ # s.dot(s2)
2551
+ # # => 32.0
2552
+ def dot(other)
2553
+ if !other.is_a?(Series)
2554
+ other = Series.new(other)
2555
+ end
2556
+ if len != other.len
2557
+ n, m = len, other.len
2558
+ raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
2559
+ end
2560
+ _s.dot(other._s)
2561
+ end
2562
+
2563
+ # Compute the most occurring value(s).
2564
+ #
2565
+ # Can return multiple Values.
2566
+ #
2567
+ # @return [Series]
2568
+ #
2569
+ # @example
2570
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2571
+ # s.mode
2572
+ # # =>
2573
+ # # shape: (1,)
2574
+ # # Series: 'a' [i64]
2575
+ # # [
2576
+ # # 2
2577
+ # # ]
2578
+ def mode
2579
+ super
2580
+ end
2581
+
2582
+ # Compute the element-wise indication of the sign.
2583
+ #
2584
+ # @return [Series]
2585
+ #
2586
+ # @example
2587
+ # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
2588
+ # s.sign
2589
+ # # =>
2590
+ # # shape: (5,)
2591
+ # # Series: 'a' [i64]
2592
+ # # [
2593
+ # # -1
2594
+ # # 0
2595
+ # # 0
2596
+ # # 1
2597
+ # # null
2598
+ # # ]
2599
+ def sign
2600
+ super
2601
+ end
2602
+
2603
+ # Compute the element-wise value for the sine.
2604
+ #
2605
+ # @return [Series]
2606
+ #
2607
+ # @example
2608
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2609
+ # s.sin
2610
+ # # =>
2611
+ # # shape: (3,)
2612
+ # # Series: 'a' [f64]
2613
+ # # [
2614
+ # # 0.0
2615
+ # # 1.0
2616
+ # # 1.2246e-16
2617
+ # # ]
2618
+ def sin
2619
+ super
2620
+ end
2621
+
2622
+ # Compute the element-wise value for the cosine.
2623
+ #
2624
+ # @return [Series]
2625
+ #
2626
+ # @example
2627
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2628
+ # s.cos
2629
+ # # =>
2630
+ # # shape: (3,)
2631
+ # # Series: 'a' [f64]
2632
+ # # [
2633
+ # # 1.0
2634
+ # # 6.1232e-17
2635
+ # # -1.0
2636
+ # # ]
2637
+ def cos
2638
+ super
2639
+ end
2640
+
2641
+ # Compute the element-wise value for the tangent.
2642
+ #
2643
+ # @return [Series]
2644
+ #
2645
+ # @example
2646
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2647
+ # s.tan
2648
+ # # =>
2649
+ # # shape: (3,)
2650
+ # # Series: 'a' [f64]
2651
+ # # [
2652
+ # # 0.0
2653
+ # # 1.6331e16
2654
+ # # -1.2246e-16
2655
+ # # ]
2656
+ def tan
2657
+ super
2658
+ end
2659
+
2660
+ # Compute the element-wise value for the inverse sine.
2661
+ #
2662
+ # @return [Series]
2663
+ #
2664
+ # @example
2665
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2666
+ # s.arcsin
2667
+ # # =>
2668
+ # # shape: (3,)
2669
+ # # Series: 'a' [f64]
2670
+ # # [
2671
+ # # 1.570796
2672
+ # # 0.0
2673
+ # # -1.570796
2674
+ # # ]
2675
+ def arcsin
2676
+ super
2677
+ end
2678
+ alias_method :asin, :arcsin
2679
+
2680
+ # Compute the element-wise value for the inverse cosine.
2681
+ #
2682
+ # @return [Series]
2683
+ #
2684
+ # @example
2685
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2686
+ # s.arccos
2687
+ # # =>
2688
+ # # shape: (3,)
2689
+ # # Series: 'a' [f64]
2690
+ # # [
2691
+ # # 0.0
2692
+ # # 1.570796
2693
+ # # 3.141593
2694
+ # # ]
2695
+ def arccos
2696
+ super
2697
+ end
2698
+ alias_method :acos, :arccos
2699
+
2700
+ # Compute the element-wise value for the inverse tangent.
2701
+ #
2702
+ # @return [Series]
2703
+ #
2704
+ # @example
2705
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2706
+ # s.arctan
2707
+ # # =>
2708
+ # # shape: (3,)
2709
+ # # Series: 'a' [f64]
2710
+ # # [
2711
+ # # 0.785398
2712
+ # # 0.0
2713
+ # # -0.785398
2714
+ # # ]
2715
+ def arctan
2716
+ super
2717
+ end
2718
+ alias_method :atan, :arctan
2719
+
2720
+ # Compute the element-wise value for the inverse hyperbolic sine.
2721
+ #
2722
+ # @return [Series]
2723
+ #
2724
+ # @example
2725
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2726
+ # s.arcsinh
2727
+ # # =>
2728
+ # # shape: (3,)
2729
+ # # Series: 'a' [f64]
2730
+ # # [
2731
+ # # 0.881374
2732
+ # # 0.0
2733
+ # # -0.881374
2734
+ # # ]
2735
+ def arcsinh
2736
+ super
2737
+ end
2738
+ alias_method :asinh, :arcsinh
2739
+
2740
+ # Compute the element-wise value for the inverse hyperbolic cosine.
2741
+ #
2742
+ # @return [Series]
2743
+ #
2744
+ # @example
2745
+ # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
2746
+ # s.arccosh
2747
+ # # =>
2748
+ # # shape: (4,)
2749
+ # # Series: 'a' [f64]
2750
+ # # [
2751
+ # # 2.292432
2752
+ # # 0.0
2753
+ # # NaN
2754
+ # # NaN
2755
+ # # ]
2756
+ def arccosh
2757
+ super
2758
+ end
2759
+ alias_method :acosh, :arccosh
2760
+
2761
+ # Compute the element-wise value for the inverse hyperbolic tangent.
2762
+ #
2763
+ # @return [Series]
2764
+ #
2765
+ # @example
2766
+ # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
2767
+ # s.arctanh
2768
+ # # =>
2769
+ # # shape: (7,)
2770
+ # # Series: 'a' [f64]
2771
+ # # [
2772
+ # # NaN
2773
+ # # inf
2774
+ # # 0.549306
2775
+ # # 0.0
2776
+ # # -0.549306
2777
+ # # -inf
2778
+ # # NaN
2779
+ # # ]
2780
+ def arctanh
2781
+ super
2782
+ end
2783
+ alias_method :atanh, :arctanh
2784
+
2785
+ # Compute the element-wise value for the hyperbolic sine.
2786
+ #
2787
+ # @return [Series]
2788
+ #
2789
+ # @example
2790
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2791
+ # s.sinh
2792
+ # # =>
2793
+ # # shape: (3,)
2794
+ # # Series: 'a' [f64]
2795
+ # # [
2796
+ # # 1.175201
2797
+ # # 0.0
2798
+ # # -1.175201
2799
+ # # ]
2800
+ def sinh
2801
+ super
2802
+ end
2803
+
2804
+ # Compute the element-wise value for the hyperbolic cosine.
2805
+ #
2806
+ # @return [Series]
2807
+ #
2808
+ # @example
2809
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2810
+ # s.cosh
2811
+ # # =>
2812
+ # # shape: (3,)
2813
+ # # Series: 'a' [f64]
2814
+ # # [
2815
+ # # 1.543081
2816
+ # # 1.0
2817
+ # # 1.543081
2818
+ # # ]
2819
+ def cosh
2820
+ super
2821
+ end
2822
+
2823
+ # Compute the element-wise value for the hyperbolic tangent.
2824
+ #
2825
+ # @return [Series]
2826
+ #
2827
+ # @example
2828
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2829
+ # s.tanh
2830
+ # # =>
2831
+ # # shape: (3,)
2832
+ # # Series: 'a' [f64]
2833
+ # # [
2834
+ # # 0.761594
2835
+ # # 0.0
2836
+ # # -0.761594
2837
+ # # ]
2838
+ def tanh
2839
+ super
2840
+ end
2841
+
2842
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
2843
+ # return a new Series.
2844
+ #
2845
+ # If the function returns another datatype, the return_dtype arg should be set,
2846
+ # otherwise the method will fail.
2847
+ #
2848
+ # @param return_dtype [Symbol]
2849
+ # Output datatype. If none is given, the same datatype as this Series will be
2850
+ # used.
2851
+ # @param skip_nulls [Boolean]
2852
+ # Nulls will be skipped and not passed to the Ruby function.
2853
+ # This is faster because Ruby can be skipped and because we call
2854
+ # more specialized functions.
2855
+ #
2856
+ # @return [Series]
2857
+ #
2858
+ # @example
2859
+ # s = Polars::Series.new("a", [1, 2, 3])
2860
+ # s.map_elements { |x| x + 10 }
2861
+ # # =>
2862
+ # # shape: (3,)
2863
+ # # Series: 'a' [i64]
2864
+ # # [
2865
+ # # 11
2866
+ # # 12
2867
+ # # 13
2868
+ # # ]
2869
+ def map_elements(return_dtype: nil, skip_nulls: true, &func)
2870
+ if return_dtype.nil?
2871
+ pl_return_dtype = nil
2872
+ else
2873
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
2874
+ end
2875
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2876
+ end
2877
+ alias_method :map, :map_elements
2878
+ alias_method :apply, :map_elements
2879
+
2880
+ # Shift the values by a given period.
2881
+ #
2882
+ # @param periods [Integer]
2883
+ # Number of places to shift (may be negative).
2884
+ #
2885
+ # @return [Series]
2886
+ #
2887
+ # @example
2888
+ # s = Polars::Series.new("a", [1, 2, 3])
2889
+ # s.shift(1)
2890
+ # # =>
2891
+ # # shape: (3,)
2892
+ # # Series: 'a' [i64]
2893
+ # # [
2894
+ # # null
2895
+ # # 1
2896
+ # # 2
2897
+ # # ]
2898
+ #
2899
+ # @example
2900
+ # s.shift(-1)
2901
+ # # =>
2902
+ # # shape: (3,)
2903
+ # # Series: 'a' [i64]
2904
+ # # [
2905
+ # # 2
2906
+ # # 3
2907
+ # # null
2908
+ # # ]
2909
+ def shift(periods = 1)
2910
+ super
2911
+ end
2912
+
2913
+ # Shift the values by a given period and fill the resulting null values.
2914
+ #
2915
+ # @param periods [Integer]
2916
+ # Number of places to shift (may be negative).
2917
+ # @param fill_value [Object]
2918
+ # Fill None values with the result of this expression.
2919
+ #
2920
+ # @return [Series]
2921
+ def shift_and_fill(periods, fill_value)
2922
+ super
2923
+ end
2924
+
2925
+ # Take values from self or other based on the given mask.
2926
+ #
2927
+ # Where mask evaluates true, take values from self. Where mask evaluates false,
2928
+ # take values from other.
2929
+ #
2930
+ # @param mask [Series]
2931
+ # Boolean Series.
2932
+ # @param other [Series]
2933
+ # Series of same type.
2934
+ #
2935
+ # @return [Series]
2936
+ #
2937
+ # @example
2938
+ # s1 = Polars::Series.new([1, 2, 3, 4, 5])
2939
+ # s2 = Polars::Series.new([5, 4, 3, 2, 1])
2940
+ # s1.zip_with(s1 < s2, s2)
2941
+ # # =>
2942
+ # # shape: (5,)
2943
+ # # Series: '' [i64]
2944
+ # # [
2945
+ # # 1
2946
+ # # 2
2947
+ # # 3
2948
+ # # 2
2949
+ # # 1
2950
+ # # ]
2951
+ #
2952
+ # @example
2953
+ # mask = Polars::Series.new([true, false, true, false, true])
2954
+ # s1.zip_with(mask, s2)
2955
+ # # =>
2956
+ # # shape: (5,)
2957
+ # # Series: '' [i64]
2958
+ # # [
2959
+ # # 1
2960
+ # # 4
2961
+ # # 3
2962
+ # # 2
2963
+ # # 5
2964
+ # # ]
2965
+ def zip_with(mask, other)
2966
+ Utils.wrap_s(_s.zip_with(mask._s, other._s))
2967
+ end
2968
+
2969
+ # Apply a rolling min (moving min) over the values in this array.
2970
+ #
2971
+ # A window of length `window_size` will traverse the array. The values that fill
2972
+ # this window will (optionally) be multiplied with the weights given by the
2973
+ # `weight` vector. The resulting values will be aggregated to their sum.
2974
+ #
2975
+ # @param window_size [Integer]
2976
+ # The length of the window.
2977
+ # @param weights [Array]
2978
+ # An optional slice with the same length as the window that will be multiplied
2979
+ # elementwise with the values in the window.
2980
+ # @param min_periods [Integer]
2981
+ # The number of values in the window that should be non-null before computing
2982
+ # a result. If None, it will be set equal to window size.
2983
+ # @param center [Boolean]
2984
+ # Set the labels at the center of the window
2985
+ #
2986
+ # @return [Series]
2987
+ #
2988
+ # @example
2989
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2990
+ # s.rolling_min(3)
2991
+ # # =>
2992
+ # # shape: (5,)
2993
+ # # Series: 'a' [i64]
2994
+ # # [
2995
+ # # null
2996
+ # # null
2997
+ # # 100
2998
+ # # 200
2999
+ # # 300
3000
+ # # ]
3001
+ def rolling_min(
3002
+ window_size,
3003
+ weights: nil,
3004
+ min_periods: nil,
3005
+ center: false
3006
+ )
3007
+ to_frame
3008
+ .select(
3009
+ Polars.col(name).rolling_min(
3010
+ window_size,
3011
+ weights: weights,
3012
+ min_periods: min_periods,
3013
+ center: center
3014
+ )
3015
+ )
3016
+ .to_series
3017
+ end
3018
+
3019
+ # Apply a rolling max (moving max) over the values in this array.
3020
+ #
3021
+ # A window of length `window_size` will traverse the array. The values that fill
3022
+ # this window will (optionally) be multiplied with the weights given by the
3023
+ # `weight` vector. The resulting values will be aggregated to their sum.
3024
+ #
3025
+ # @param window_size [Integer]
3026
+ # The length of the window.
3027
+ # @param weights [Array]
3028
+ # An optional slice with the same length as the window that will be multiplied
3029
+ # elementwise with the values in the window.
3030
+ # @param min_periods [Integer]
3031
+ # The number of values in the window that should be non-null before computing
3032
+ # a result. If None, it will be set equal to window size.
3033
+ # @param center [Boolean]
3034
+ # Set the labels at the center of the window
3035
+ #
3036
+ # @return [Series]
3037
+ #
3038
+ # @example
3039
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3040
+ # s.rolling_max(2)
3041
+ # # =>
3042
+ # # shape: (5,)
3043
+ # # Series: 'a' [i64]
3044
+ # # [
3045
+ # # null
3046
+ # # 200
3047
+ # # 300
3048
+ # # 400
3049
+ # # 500
3050
+ # # ]
3051
+ def rolling_max(
3052
+ window_size,
3053
+ weights: nil,
3054
+ min_periods: nil,
3055
+ center: false
3056
+ )
3057
+ to_frame
3058
+ .select(
3059
+ Polars.col(name).rolling_max(
3060
+ window_size,
3061
+ weights: weights,
3062
+ min_periods: min_periods,
3063
+ center: center
3064
+ )
3065
+ )
3066
+ .to_series
3067
+ end
3068
+
3069
+ # Apply a rolling mean (moving mean) over the values in this array.
3070
+ #
3071
+ # A window of length `window_size` will traverse the array. The values that fill
3072
+ # this window will (optionally) be multiplied with the weights given by the
3073
+ # `weight` vector. The resulting values will be aggregated to their sum.
3074
+ #
3075
+ # @param window_size [Integer]
3076
+ # The length of the window.
3077
+ # @param weights [Array]
3078
+ # An optional slice with the same length as the window that will be multiplied
3079
+ # elementwise with the values in the window.
3080
+ # @param min_periods [Integer]
3081
+ # The number of values in the window that should be non-null before computing
3082
+ # a result. If None, it will be set equal to window size.
3083
+ # @param center [Boolean]
3084
+ # Set the labels at the center of the window
3085
+ #
3086
+ # @return [Series]
3087
+ #
3088
+ # @example
3089
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3090
+ # s.rolling_mean(2)
3091
+ # # =>
3092
+ # # shape: (5,)
3093
+ # # Series: 'a' [f64]
3094
+ # # [
3095
+ # # null
3096
+ # # 150.0
3097
+ # # 250.0
3098
+ # # 350.0
3099
+ # # 450.0
3100
+ # # ]
3101
+ def rolling_mean(
3102
+ window_size,
3103
+ weights: nil,
3104
+ min_periods: nil,
3105
+ center: false
3106
+ )
3107
+ to_frame
3108
+ .select(
3109
+ Polars.col(name).rolling_mean(
3110
+ window_size,
3111
+ weights: weights,
3112
+ min_periods: min_periods,
3113
+ center: center
3114
+ )
3115
+ )
3116
+ .to_series
3117
+ end
3118
+
3119
+ # Apply a rolling sum (moving sum) over the values in this array.
3120
+ #
3121
+ # A window of length `window_size` will traverse the array. The values that fill
3122
+ # this window will (optionally) be multiplied with the weights given by the
3123
+ # `weight` vector. The resulting values will be aggregated to their sum.
3124
+ #
3125
+ # @param window_size [Integer]
3126
+ # The length of the window.
3127
+ # @param weights [Array]
3128
+ # An optional slice with the same length as the window that will be multiplied
3129
+ # elementwise with the values in the window.
3130
+ # @param min_periods [Integer]
3131
+ # The number of values in the window that should be non-null before computing
3132
+ # a result. If None, it will be set equal to window size.
3133
+ # @param center [Boolean]
3134
+ # Set the labels at the center of the window
3135
+ #
3136
+ # @return [Series]
3137
+ #
3138
+ # @example
3139
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3140
+ # s.rolling_sum(2)
3141
+ # # =>
3142
+ # # shape: (5,)
3143
+ # # Series: 'a' [i64]
3144
+ # # [
3145
+ # # null
3146
+ # # 3
3147
+ # # 5
3148
+ # # 7
3149
+ # # 9
3150
+ # # ]
3151
+ def rolling_sum(
3152
+ window_size,
3153
+ weights: nil,
3154
+ min_periods: nil,
3155
+ center: false
3156
+ )
3157
+ to_frame
3158
+ .select(
3159
+ Polars.col(name).rolling_sum(
3160
+ window_size,
3161
+ weights: weights,
3162
+ min_periods: min_periods,
3163
+ center: center
3164
+ )
3165
+ )
3166
+ .to_series
3167
+ end
3168
+
3169
+ # Compute a rolling std dev.
3170
+ #
3171
+ # A window of length `window_size` will traverse the array. The values that fill
3172
+ # this window will (optionally) be multiplied with the weights given by the
3173
+ # `weight` vector. The resulting values will be aggregated to their sum.
3174
+ #
3175
+ # @param window_size [Integer]
3176
+ # The length of the window.
3177
+ # @param weights [Array]
3178
+ # An optional slice with the same length as the window that will be multiplied
3179
+ # elementwise with the values in the window.
3180
+ # @param min_periods [Integer]
3181
+ # The number of values in the window that should be non-null before computing
3182
+ # a result. If None, it will be set equal to window size.
3183
+ # @param center [Boolean]
3184
+ # Set the labels at the center of the window
3185
+ #
3186
+ # @return [Series]
3187
+ #
3188
+ # @example
3189
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3190
+ # s.rolling_std(3)
3191
+ # # =>
3192
+ # # shape: (6,)
3193
+ # # Series: 'a' [f64]
3194
+ # # [
3195
+ # # null
3196
+ # # null
3197
+ # # 1.0
3198
+ # # 1.0
3199
+ # # 1.527525
3200
+ # # 2.0
3201
+ # # ]
3202
+ def rolling_std(
3203
+ window_size,
3204
+ weights: nil,
3205
+ min_periods: nil,
3206
+ center: false,
3207
+ ddof: 1,
3208
+ warn_if_unsorted: true
3209
+ )
3210
+ to_frame
3211
+ .select(
3212
+ Polars.col(name).rolling_std(
3213
+ window_size,
3214
+ weights: weights,
3215
+ min_periods: min_periods,
3216
+ center: center,
3217
+ ddof: ddof,
3218
+ warn_if_unsorted: warn_if_unsorted
3219
+ )
3220
+ )
3221
+ .to_series
3222
+ end
3223
+
3224
+ # Compute a rolling variance.
3225
+ #
3226
+ # A window of length `window_size` will traverse the array. The values that fill
3227
+ # this window will (optionally) be multiplied with the weights given by the
3228
+ # `weight` vector. The resulting values will be aggregated to their sum.
3229
+ #
3230
+ # @param window_size [Integer]
3231
+ # The length of the window.
3232
+ # @param weights [Array]
3233
+ # An optional slice with the same length as the window that will be multiplied
3234
+ # elementwise with the values in the window.
3235
+ # @param min_periods [Integer]
3236
+ # The number of values in the window that should be non-null before computing
3237
+ # a result. If None, it will be set equal to window size.
3238
+ # @param center [Boolean]
3239
+ # Set the labels at the center of the window
3240
+ #
3241
+ # @return [Series]
3242
+ #
3243
+ # @example
3244
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3245
+ # s.rolling_var(3)
3246
+ # # =>
3247
+ # # shape: (6,)
3248
+ # # Series: 'a' [f64]
3249
+ # # [
3250
+ # # null
3251
+ # # null
3252
+ # # 1.0
3253
+ # # 1.0
3254
+ # # 2.333333
3255
+ # # 4.0
3256
+ # # ]
3257
+ def rolling_var(
3258
+ window_size,
3259
+ weights: nil,
3260
+ min_periods: nil,
3261
+ center: false,
3262
+ ddof: 1,
3263
+ warn_if_unsorted: true
3264
+ )
3265
+ to_frame
3266
+ .select(
3267
+ Polars.col(name).rolling_var(
3268
+ window_size,
3269
+ weights: weights,
3270
+ min_periods: min_periods,
3271
+ center: center,
3272
+ ddof: ddof,
3273
+ warn_if_unsorted: warn_if_unsorted
3274
+ )
3275
+ )
3276
+ .to_series
3277
+ end
3278
+
3279
+ # def rolling_apply
3280
+ # end
3281
+
3282
+ # Compute a rolling median.
3283
+ #
3284
+ # @param window_size [Integer]
3285
+ # The length of the window.
3286
+ # @param weights [Array]
3287
+ # An optional slice with the same length as the window that will be multiplied
3288
+ # elementwise with the values in the window.
3289
+ # @param min_periods [Integer]
3290
+ # The number of values in the window that should be non-null before computing
3291
+ # a result. If None, it will be set equal to window size.
3292
+ # @param center [Boolean]
3293
+ # Set the labels at the center of the window
3294
+ #
3295
+ # @return [Series]
3296
+ #
3297
+ # @example
3298
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3299
+ # s.rolling_median(3)
3300
+ # # =>
3301
+ # # shape: (6,)
3302
+ # # Series: 'a' [f64]
3303
+ # # [
3304
+ # # null
3305
+ # # null
3306
+ # # 2.0
3307
+ # # 3.0
3308
+ # # 4.0
3309
+ # # 6.0
3310
+ # # ]
3311
+ def rolling_median(
3312
+ window_size,
3313
+ weights: nil,
3314
+ min_periods: nil,
3315
+ center: false,
3316
+ warn_if_unsorted: true
3317
+ )
3318
+ if min_periods.nil?
3319
+ min_periods = window_size
3320
+ end
3321
+
3322
+ to_frame
3323
+ .select(
3324
+ Polars.col(name).rolling_median(
3325
+ window_size,
3326
+ weights: weights,
3327
+ min_periods: min_periods,
3328
+ center: center,
3329
+ warn_if_unsorted: warn_if_unsorted
3330
+ )
3331
+ )
3332
+ .to_series
3333
+ end
3334
+
3335
+ # Compute a rolling quantile.
3336
+ #
3337
+ # @param quantile [Float]
3338
+ # Quantile between 0.0 and 1.0.
3339
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3340
+ # Interpolation method.
3341
+ # @param window_size [Integer]
3342
+ # The length of the window.
3343
+ # @param weights [Array]
3344
+ # An optional slice with the same length as the window that will be multiplied
3345
+ # elementwise with the values in the window.
3346
+ # @param min_periods [Integer]
3347
+ # The number of values in the window that should be non-null before computing
3348
+ # a result. If None, it will be set equal to window size.
3349
+ # @param center [Boolean]
3350
+ # Set the labels at the center of the window
3351
+ #
3352
+ # @return [Series]
3353
+ #
3354
+ # @example
3355
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3356
+ # s.rolling_quantile(0.33, window_size: 3)
3357
+ # # =>
3358
+ # # shape: (6,)
3359
+ # # Series: 'a' [f64]
3360
+ # # [
3361
+ # # null
3362
+ # # null
3363
+ # # 1.0
3364
+ # # 2.0
3365
+ # # 3.0
3366
+ # # 4.0
3367
+ # # ]
3368
+ #
3369
+ # @example
3370
+ # s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
3371
+ # # =>
3372
+ # # shape: (6,)
3373
+ # # Series: 'a' [f64]
3374
+ # # [
3375
+ # # null
3376
+ # # null
3377
+ # # 1.66
3378
+ # # 2.66
3379
+ # # 3.66
3380
+ # # 5.32
3381
+ # # ]
3382
+ def rolling_quantile(
3383
+ quantile,
3384
+ interpolation: "nearest",
3385
+ window_size: 2,
3386
+ weights: nil,
3387
+ min_periods: nil,
3388
+ center: false,
3389
+ warn_if_unsorted: true
3390
+ )
3391
+ if min_periods.nil?
3392
+ min_periods = window_size
3393
+ end
3394
+
3395
+ to_frame
3396
+ .select(
3397
+ Polars.col(name).rolling_quantile(
3398
+ quantile,
3399
+ interpolation: interpolation,
3400
+ window_size: window_size,
3401
+ weights: weights,
3402
+ min_periods: min_periods,
3403
+ center: center,
3404
+ warn_if_unsorted: warn_if_unsorted
3405
+ )
3406
+ )
3407
+ .to_series
3408
+ end
3409
+
3410
+ # Compute a rolling skew.
3411
+ #
3412
+ # @param window_size [Integer]
3413
+ # Integer size of the rolling window.
3414
+ # @param bias [Boolean]
3415
+ # If false, the calculations are corrected for statistical bias.
3416
+ #
3417
+ # @return [Series]
3418
+ #
3419
+ # @example
3420
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3421
+ # s.rolling_skew(3)
3422
+ # # =>
3423
+ # # shape: (6,)
3424
+ # # Series: 'a' [f64]
3425
+ # # [
3426
+ # # null
3427
+ # # null
3428
+ # # 0.0
3429
+ # # 0.0
3430
+ # # 0.381802
3431
+ # # 0.0
3432
+ # # ]
3433
+ def rolling_skew(window_size, bias: true)
3434
+ super
3435
+ end
3436
+
3437
+ # Sample from this Series.
3438
+ #
3439
+ # @param n [Integer]
3440
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
3441
+ # `frac` is None.
3442
+ # @param frac [Float]
3443
+ # Fraction of items to return. Cannot be used with `n`.
3444
+ # @param with_replacement [Boolean]
3445
+ # Allow values to be sampled more than once.
3446
+ # @param shuffle [Boolean]
3447
+ # Shuffle the order of sampled data points.
3448
+ # @param seed [Integer]
3449
+ # Seed for the random number generator. If set to None (default), a random
3450
+ # seed is used.
3451
+ #
3452
+ # @return [Series]
3453
+ #
3454
+ # @example
3455
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3456
+ # s.sample(n: 2, seed: 0)
3457
+ # # =>
3458
+ # # shape: (2,)
3459
+ # # Series: 'a' [i64]
3460
+ # # [
3461
+ # # 5
3462
+ # # 3
3463
+ # # ]
3464
+ def sample(
3465
+ n: nil,
3466
+ frac: nil,
3467
+ with_replacement: false,
3468
+ shuffle: false,
3469
+ seed: nil
3470
+ )
3471
+ if !n.nil? && !frac.nil?
3472
+ raise ArgumentError, "cannot specify both `n` and `frac`"
3473
+ end
3474
+
3475
+ if n.nil? && !frac.nil?
3476
+ return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
3477
+ end
3478
+
3479
+ if n.nil?
3480
+ n = 1
3481
+ end
3482
+ Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
3483
+ end
3484
+
3485
+ # Get a boolean mask of the local maximum peaks.
3486
+ #
3487
+ # @return [Series]
3488
+ #
3489
+ # @example
3490
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3491
+ # s.peak_max
3492
+ # # =>
3493
+ # # shape: (5,)
3494
+ # # Series: 'a' [bool]
3495
+ # # [
3496
+ # # false
3497
+ # # false
3498
+ # # false
3499
+ # # false
3500
+ # # true
3501
+ # # ]
3502
+ def peak_max
3503
+ super
3504
+ end
3505
+
3506
+ # Get a boolean mask of the local minimum peaks.
3507
+ #
3508
+ # @return [Series]
3509
+ #
3510
+ # @example
3511
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
3512
+ # s.peak_min
3513
+ # # =>
3514
+ # # shape: (5,)
3515
+ # # Series: 'a' [bool]
3516
+ # # [
3517
+ # # false
3518
+ # # true
3519
+ # # false
3520
+ # # true
3521
+ # # false
3522
+ # # ]
3523
+ def peak_min
3524
+ super
3525
+ end
3526
+
3527
+ # Count the number of unique values in this Series.
3528
+ #
3529
+ # @return [Integer]
3530
+ #
3531
+ # @example
3532
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
3533
+ # s.n_unique
3534
+ # # => 3
3535
+ def n_unique
3536
+ _s.n_unique
3537
+ end
3538
+
3539
+ # Shrink Series memory usage.
3540
+ #
3541
+ # Shrinks the underlying array capacity to exactly fit the actual data.
3542
+ # (Note that this function does not change the Series data type).
3543
+ #
3544
+ # @return [Series]
3545
+ def shrink_to_fit(in_place: false)
3546
+ if in_place
3547
+ _s.shrink_to_fit
3548
+ self
3549
+ else
3550
+ series = clone
3551
+ series._s.shrink_to_fit
3552
+ series
3553
+ end
3554
+ end
3555
+
3556
+ # Hash the Series.
3557
+ #
3558
+ # The hash value is of type `:u64`.
3559
+ #
3560
+ # @param seed [Integer]
3561
+ # Random seed parameter. Defaults to 0.
3562
+ # @param seed_1 [Integer]
3563
+ # Random seed parameter. Defaults to `seed` if not set.
3564
+ # @param seed_2 [Integer]
3565
+ # Random seed parameter. Defaults to `seed` if not set.
3566
+ # @param seed_3 [Integer]
3567
+ # Random seed parameter. Defaults to `seed` if not set.
3568
+ #
3569
+ # @return [Series]
3570
+ #
3571
+ # @example
3572
+ # s = Polars::Series.new("a", [1, 2, 3])
3573
+ # s._hash(42)
3574
+ # # =>
3575
+ # # shape: (3,)
3576
+ # # Series: 'a' [u64]
3577
+ # # [
3578
+ # # 2374023516666777365
3579
+ # # 10386026231460783898
3580
+ # # 17796317186427479491
3581
+ # # ]
3582
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3583
+ super
3584
+ end
3585
+
3586
+ # Reinterpret the underlying bits as a signed/unsigned integer.
3587
+ #
3588
+ # This operation is only allowed for 64bit integers. For lower bits integers,
3589
+ # you can safely use that cast operation.
3590
+ #
3591
+ # @param signed [Boolean]
3592
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
3593
+ #
3594
+ # @return [Series]
3595
+ def reinterpret(signed: true)
3596
+ super
3597
+ end
3598
+
3599
+ # Interpolate intermediate values. The interpolation method is linear.
3600
+ #
3601
+ # @return [Series]
3602
+ #
3603
+ # @example
3604
+ # s = Polars::Series.new("a", [1, 2, nil, nil, 5])
3605
+ # s.interpolate
3606
+ # # =>
3607
+ # # shape: (5,)
3608
+ # # Series: 'a' [f64]
3609
+ # # [
3610
+ # # 1.0
3611
+ # # 2.0
3612
+ # # 3.0
3613
+ # # 4.0
3614
+ # # 5.0
3615
+ # # ]
3616
+ def interpolate(method: "linear")
3617
+ super
3618
+ end
3619
+
3620
+ # Compute absolute values.
3621
+ #
3622
+ # @return [Series]
3623
+ def abs
3624
+ super
3625
+ end
3626
+
3627
+ # Assign ranks to data, dealing with ties appropriately.
3628
+ #
3629
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3630
+ # The method used to assign ranks to tied elements.
3631
+ # The following methods are available (default is 'average'):
3632
+ #
3633
+ # - 'average' : The average of the ranks that would have been assigned to
3634
+ # all the tied values is assigned to each value.
3635
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3636
+ # the tied values is assigned to each value. (This is also referred to
3637
+ # as "competition" ranking.)
3638
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3639
+ # the tied values is assigned to each value.
3640
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3641
+ # assigned the rank immediately after those assigned to the tied
3642
+ # elements.
3643
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3644
+ # the order that the values occur in the Series.
3645
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3646
+ # on the order that the values occur in the Series.
3647
+ # @param reverse [Boolean]
3648
+ # Reverse the operation.
3649
+ # @param seed [Integer]
3650
+ # If `method: "random"`, use this as seed.
3651
+ #
3652
+ # @return [Series]
3653
+ #
3654
+ # @example The 'average' method:
3655
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3656
+ # s.rank
3657
+ # # =>
3658
+ # # shape: (5,)
3659
+ # # Series: 'a' [f64]
3660
+ # # [
3661
+ # # 3.0
3662
+ # # 4.5
3663
+ # # 1.5
3664
+ # # 1.5
3665
+ # # 4.5
3666
+ # # ]
3667
+ #
3668
+ # @example The 'ordinal' method:
3669
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3670
+ # s.rank(method: "ordinal")
3671
+ # # =>
3672
+ # # shape: (5,)
3673
+ # # Series: 'a' [u32]
3674
+ # # [
3675
+ # # 3
3676
+ # # 4
3677
+ # # 1
3678
+ # # 2
3679
+ # # 5
3680
+ # # ]
3681
+ def rank(method: "average", reverse: false, seed: nil)
3682
+ super
3683
+ end
3684
+
3685
+ # Calculate the n-th discrete difference.
3686
+ #
3687
+ # @param n [Integer]
3688
+ # Number of slots to shift.
3689
+ # @param null_behavior ["ignore", "drop"]
3690
+ # How to handle null values.
3691
+ #
3692
+ # @return [Series]
3693
+ def diff(n: 1, null_behavior: "ignore")
3694
+ super
3695
+ end
3696
+
3697
+ # Computes percentage change between values.
3698
+ #
3699
+ # Percentage change (as fraction) between current element and most-recent
3700
+ # non-null element at least `n` period(s) before the current element.
3701
+ #
3702
+ # Computes the change from the previous row by default.
3703
+ #
3704
+ # @param n [Integer]
3705
+ # periods to shift for forming percent change.
3706
+ #
3707
+ # @return [Series]
3708
+ #
3709
+ # @example
3710
+ # Polars::Series.new(0..9).pct_change
3711
+ # # =>
3712
+ # # shape: (10,)
3713
+ # # Series: '' [f64]
3714
+ # # [
3715
+ # # null
3716
+ # # inf
3717
+ # # 1.0
3718
+ # # 0.5
3719
+ # # 0.333333
3720
+ # # 0.25
3721
+ # # 0.2
3722
+ # # 0.166667
3723
+ # # 0.142857
3724
+ # # 0.125
3725
+ # # ]
3726
+ #
3727
+ # @example
3728
+ # Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
3729
+ # # =>
3730
+ # # shape: (10,)
3731
+ # # Series: '' [f64]
3732
+ # # [
3733
+ # # null
3734
+ # # null
3735
+ # # 3.0
3736
+ # # 3.0
3737
+ # # 3.0
3738
+ # # 3.0
3739
+ # # 3.0
3740
+ # # 3.0
3741
+ # # 3.0
3742
+ # # 3.0
3743
+ # # ]
3744
+ def pct_change(n: 1)
3745
+ super
3746
+ end
3747
+
3748
+ # Compute the sample skewness of a data set.
3749
+ #
3750
+ # For normally distributed data, the skewness should be about zero. For
3751
+ # unimodal continuous distributions, a skewness value greater than zero means
3752
+ # that there is more weight in the right tail of the distribution. The
3753
+ # function `skewtest` can be used to determine if the skewness value
3754
+ # is close enough to zero, statistically speaking.
3755
+ #
3756
+ # @param bias [Boolean]
3757
+ # If `false`, the calculations are corrected for statistical bias.
3758
+ #
3759
+ # @return [Float, nil]
3760
+ def skew(bias: true)
3761
+ _s.skew(bias)
3762
+ end
3763
+
3764
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
3765
+ #
3766
+ # Kurtosis is the fourth central moment divided by the square of the
3767
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
3768
+ # the result to give 0.0 for a normal distribution.
3769
+ # If bias is false, then the kurtosis is calculated using k statistics to
3770
+ # eliminate bias coming from biased moment estimators
3771
+ #
3772
+ # @param fisher [Boolean]
3773
+ # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
3774
+ # Pearson's definition is used (normal ==> 3.0).
3775
+ # @param bias [Boolean]
3776
+ # If `false`, the calculations are corrected for statistical bias.
3777
+ #
3778
+ # @return [Float, nil]
3779
+ def kurtosis(fisher: true, bias: true)
3780
+ _s.kurtosis(fisher, bias)
3781
+ end
3782
+
3783
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
3784
+ #
3785
+ # Only works for numerical types.
3786
+ #
3787
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3788
+ # expression. See {#when} for more information.
3789
+ #
3790
+ # @param min_val [Numeric]
3791
+ # Minimum value.
3792
+ # @param max_val [Numeric]
3793
+ # Maximum value.
3794
+ #
3795
+ # @return [Series]
3796
+ #
3797
+ # @example
3798
+ # s = Polars::Series.new("foo", [-50, 5, nil, 50])
3799
+ # s.clip(1, 10)
3800
+ # # =>
3801
+ # # shape: (4,)
3802
+ # # Series: 'foo' [i64]
3803
+ # # [
3804
+ # # 1
3805
+ # # 5
3806
+ # # null
3807
+ # # 10
3808
+ # # ]
3809
+ def clip(min_val, max_val)
3810
+ super
3811
+ end
3812
+
3813
+ # Clip (limit) the values in an array to a `min` boundary.
3814
+ #
3815
+ # Only works for numerical types.
3816
+ #
3817
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3818
+ # expression. See {#when} for more information.
3819
+ #
3820
+ # @param min_val [Numeric]
3821
+ # Minimum value.
3822
+ #
3823
+ # @return [Series]
3824
+ def clip_min(min_val)
3825
+ super
3826
+ end
3827
+
3828
+ # Clip (limit) the values in an array to a `max` boundary.
3829
+ #
3830
+ # Only works for numerical types.
3831
+ #
3832
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3833
+ # expression. See {#when} for more information.
3834
+ #
3835
+ # @param max_val [Numeric]
3836
+ # Maximum value.
3837
+ #
3838
+ # @return [Series]
3839
+ def clip_max(max_val)
3840
+ super
3841
+ end
3842
+
3843
+ # Replace values by different values.
3844
+ #
3845
+ # @param old [Object]
3846
+ # Value or sequence of values to replace.
3847
+ # Also accepts a mapping of values to their replacement.
3848
+ # @param new [Object]
3849
+ # Value or sequence of values to replace by.
3850
+ # Length must match the length of `old` or have length 1.
3851
+ # @param default [Object]
3852
+ # Set values that were not replaced to this value.
3853
+ # Defaults to keeping the original value.
3854
+ # Accepts expression input. Non-expression inputs are parsed as literals.
3855
+ # @param return_dtype [Object]
3856
+ # The data type of the resulting Series. If set to `nil` (default),
3857
+ # the data type is determined automatically based on the other inputs.
3858
+ #
3859
+ # @return [Series]
3860
+ #
3861
+ # @example Replace a single value by another value. Values that were not replaced remain unchanged.
3862
+ # s = Polars::Series.new([1, 2, 2, 3])
3863
+ # s.replace(2, 100)
3864
+ # # =>
3865
+ # # shape: (4,)
3866
+ # # Series: '' [i64]
3867
+ # # [
3868
+ # # 1
3869
+ # # 100
3870
+ # # 100
3871
+ # # 3
3872
+ # # ]
3873
+ #
3874
+ # @example Replace multiple values by passing sequences to the `old` and `new` parameters.
3875
+ # s.replace([2, 3], [100, 200])
3876
+ # # =>
3877
+ # # shape: (4,)
3878
+ # # Series: '' [i64]
3879
+ # # [
3880
+ # # 1
3881
+ # # 100
3882
+ # # 100
3883
+ # # 200
3884
+ # # ]
3885
+ #
3886
+ # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3887
+ # mapping = {2 => 100, 3 => 200}
3888
+ # s.replace(mapping, default: -1)
3889
+ # # =>
3890
+ # # shape: (4,)
3891
+ # # Series: '' [i64]
3892
+ # # [
3893
+ # # -1
3894
+ # # 100
3895
+ # # 100
3896
+ # # 200
3897
+ # # ]
3898
+ #
3899
+ # @example The default can be another Series.
3900
+ # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3901
+ # s.replace(2, 100, default: default)
3902
+ # # =>
3903
+ # # shape: (4,)
3904
+ # # Series: '' [f64]
3905
+ # # [
3906
+ # # 2.5
3907
+ # # 100.0
3908
+ # # 100.0
3909
+ # # 10.0
3910
+ # # ]
3911
+ #
3912
+ # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3913
+ # s = Polars::Series.new(["x", "y", "z"])
3914
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
3915
+ # s.replace(mapping)
3916
+ # # =>
3917
+ # # shape: (3,)
3918
+ # # Series: '' [str]
3919
+ # # [
3920
+ # # "1"
3921
+ # # "2"
3922
+ # # "3"
3923
+ # # ]
3924
+ #
3925
+ # @example
3926
+ # s.replace(mapping, default: nil)
3927
+ # # =>
3928
+ # # shape: (3,)
3929
+ # # Series: '' [i64]
3930
+ # # [
3931
+ # # 1
3932
+ # # 2
3933
+ # # 3
3934
+ # # ]
3935
+ #
3936
+ # @example Set the `return_dtype` parameter to control the resulting data type directly.
3937
+ # s.replace(mapping, return_dtype: Polars::UInt8)
3938
+ # # =>
3939
+ # # shape: (3,)
3940
+ # # Series: '' [u8]
3941
+ # # [
3942
+ # # 1
3943
+ # # 2
3944
+ # # 3
3945
+ # # ]
3946
+ def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3947
+ super
3948
+ end
3949
+
3950
+ # Reshape this Series to a flat Series or a Series of Lists.
3951
+ #
3952
+ # @param dims [Array]
3953
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
3954
+ # dimension is inferred.
3955
+ #
3956
+ # @return [Series]
3957
+ def reshape(dims)
3958
+ super
3959
+ end
3960
+
3961
+ # Shuffle the contents of this Series.
3962
+ #
3963
+ # @param seed [Integer, nil]
3964
+ # Seed for the random number generator.
3965
+ #
3966
+ # @return [Series]
3967
+ #
3968
+ # @example
3969
+ # s = Polars::Series.new("a", [1, 2, 3])
3970
+ # s.shuffle(seed: 1)
3971
+ # # =>
3972
+ # # shape: (3,)
3973
+ # # Series: 'a' [i64]
3974
+ # # [
3975
+ # # 2
3976
+ # # 1
3977
+ # # 3
3978
+ # # ]
3979
+ def shuffle(seed: nil)
3980
+ super
3981
+ end
3982
+
3983
+ # Exponentially-weighted moving average.
3984
+ #
3985
+ # @return [Series]
3986
+ def ewm_mean(
3987
+ com: nil,
3988
+ span: nil,
3989
+ half_life: nil,
3990
+ alpha: nil,
3991
+ adjust: true,
3992
+ min_periods: 1
3993
+ )
3994
+ super
3995
+ end
3996
+
3997
+ # Exponentially-weighted moving standard deviation.
3998
+ #
3999
+ # @return [Series]
4000
+ def ewm_std(
4001
+ com: nil,
4002
+ span: nil,
4003
+ half_life: nil,
4004
+ alpha: nil,
4005
+ adjust: true,
4006
+ bias: false,
4007
+ min_periods: 1
4008
+ )
4009
+ super
4010
+ end
4011
+
4012
+ # Exponentially-weighted moving variance.
4013
+ #
4014
+ # @return [Series]
4015
+ def ewm_var(
4016
+ com: nil,
4017
+ span: nil,
4018
+ half_life: nil,
4019
+ alpha: nil,
4020
+ adjust: true,
4021
+ bias: false,
4022
+ min_periods: 1
4023
+ )
4024
+ super
4025
+ end
4026
+
4027
+ # Extend the Series with given number of values.
4028
+ #
4029
+ # @param value [Object]
4030
+ # The value to extend the Series with. This value may be `nil` to fill with
4031
+ # nulls.
4032
+ # @param n [Integer]
4033
+ # The number of values to extend.
4034
+ #
4035
+ # @return [Series]
4036
+ #
4037
+ # @example
4038
+ # s = Polars::Series.new("a", [1, 2, 3])
4039
+ # s.extend_constant(99, 2)
4040
+ # # =>
4041
+ # # shape: (5,)
4042
+ # # Series: 'a' [i64]
4043
+ # # [
4044
+ # # 1
4045
+ # # 2
4046
+ # # 3
4047
+ # # 99
4048
+ # # 99
4049
+ # # ]
4050
+ def extend_constant(value, n)
4051
+ Utils.wrap_s(_s.extend_constant(value, n))
4052
+ end
4053
+
4054
+ # Flags the Series as sorted.
4055
+ #
4056
+ # Enables downstream code to user fast paths for sorted arrays.
4057
+ #
4058
+ # @param reverse [Boolean]
4059
+ # If the Series order is reversed, e.g. descending.
4060
+ #
4061
+ # @return [Series]
4062
+ #
4063
+ # @note
4064
+ # This can lead to incorrect results if this Series is not sorted!!
4065
+ # Use with care!
4066
+ #
4067
+ # @example
4068
+ # s = Polars::Series.new("a", [1, 2, 3])
4069
+ # s.set_sorted.max
4070
+ # # => 3
4071
+ def set_sorted(reverse: false)
4072
+ Utils.wrap_s(_s.set_sorted(reverse))
4073
+ end
4074
+
4075
+ # Create a new Series filled with values from the given index.
4076
+ #
4077
+ # @return [Series]
4078
+ def new_from_index(index, length)
4079
+ Utils.wrap_s(_s.new_from_index(index, length))
4080
+ end
4081
+
4082
+ # Shrink numeric columns to the minimal required datatype.
4083
+ #
4084
+ # Shrink to the dtype needed to fit the extrema of this Series.
4085
+ # This can be used to reduce memory pressure.
4086
+ #
4087
+ # @return [Series]
4088
+ def shrink_dtype
4089
+ super
4090
+ end
4091
+
4092
+ # Create an object namespace of all list related methods.
4093
+ #
4094
+ # @return [ListNameSpace]
4095
+ def list
4096
+ ListNameSpace.new(self)
4097
+ end
4098
+
4099
+ # Create an object namespace of all array related methods.
4100
+ #
4101
+ # @return [ArrayNameSpace]
4102
+ def arr
4103
+ ArrayNameSpace.new(self)
4104
+ end
4105
+
4106
+ # Create an object namespace of all binary related methods.
4107
+ #
4108
+ # @return [BinaryNameSpace]
4109
+ def bin
4110
+ BinaryNameSpace.new(self)
4111
+ end
4112
+
4113
+ # Create an object namespace of all categorical related methods.
4114
+ #
4115
+ # @return [CatNameSpace]
4116
+ def cat
4117
+ CatNameSpace.new(self)
4118
+ end
4119
+
4120
+ # Create an object namespace of all datetime related methods.
4121
+ #
4122
+ # @return [DateTimeNameSpace]
4123
+ def dt
4124
+ DateTimeNameSpace.new(self)
4125
+ end
4126
+
4127
+ # Create an object namespace of all string related methods.
4128
+ #
4129
+ # @return [StringNameSpace]
4130
+ def str
4131
+ StringNameSpace.new(self)
4132
+ end
4133
+
4134
+ # Create an object namespace of all struct related methods.
4135
+ #
4136
+ # @return [StructNameSpace]
4137
+ def struct
4138
+ StructNameSpace.new(self)
4139
+ end
4140
+
4141
+ private
4142
+
4143
+ def initialize_copy(other)
4144
+ super
4145
+ self._s = _s._clone
4146
+ end
4147
+
4148
+ def coerce(other)
4149
+ if other.is_a?(Numeric)
4150
+ # TODO improve
4151
+ series = to_frame.select(Polars.lit(other)).to_series
4152
+ [series, self]
4153
+ else
4154
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
4155
+ end
4156
+ end
4157
+
4158
+ def _pos_idxs(idxs)
4159
+ idx_type = Plr.get_index_type
4160
+
4161
+ if idxs.is_a?(Series)
4162
+ if idxs.dtype == idx_type
4163
+ return idxs
4164
+ end
4165
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4166
+ if idx_type == UInt32
4167
+ if [Int64, UInt64].include?(idxs.dtype)
4168
+ if idxs.max >= 2**32
4169
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4170
+ end
4171
+ end
4172
+ if idxs.dtype == Int64
4173
+ if idxs.min < -(2**32)
4174
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4175
+ end
4176
+ end
4177
+ end
4178
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4179
+ if idxs.min < 0
4180
+ if idx_type == UInt32
4181
+ if [Int8, Int16].include?(idxs.dtype)
4182
+ idxs = idxs.cast(Int32)
4183
+ end
4184
+ else
4185
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4186
+ idxs = idxs.cast(Int64)
4187
+ end
4188
+ end
4189
+
4190
+ # Update negative indexes to absolute indexes.
4191
+ return (
4192
+ idxs.to_frame
4193
+ .select(
4194
+ Polars.when(Polars.col(idxs.name) < 0)
4195
+ .then(len + Polars.col(idxs.name))
4196
+ .otherwise(Polars.col(idxs.name))
4197
+ .cast(idx_type)
4198
+ )
4199
+ .to_series(0)
4200
+ )
4201
+ end
4202
+ end
4203
+
4204
+ return idxs.cast(idx_type)
4205
+ end
4206
+ end
4207
+
4208
+ raise ArgumentError, "Unsupported idxs datatype."
4209
+ end
4210
+
4211
+ def _comp(other, op)
4212
+ if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
4213
+ if (other == true && op == :eq) || (other == false && op == :neq)
4214
+ return clone
4215
+ elsif (other == false && op == :eq) || (other == true && op == :neq)
4216
+ return !self
4217
+ end
4218
+ end
4219
+
4220
+ if other.is_a?(::Time) && dtype.is_a?(Datetime)
4221
+ ts = Utils._datetime_to_pl_timestamp(other, time_unit)
4222
+ f = ffi_func("#{op}_<>", Int64, _s)
4223
+ fail if f.nil?
4224
+ return Utils.wrap_s(f.call(ts))
4225
+ elsif other.is_a?(::Date) && dtype == Date
4226
+ d = Utils._date_to_pl_date(other)
4227
+ f = ffi_func("#{op}_<>", Int32, _s)
4228
+ fail if f.nil?
4229
+ return Utils.wrap_s(f.call(d))
4230
+ end
4231
+
4232
+ if other.is_a?(Series)
4233
+ return Utils.wrap_s(_s.send(op, other._s))
4234
+ end
4235
+
4236
+ f = ffi_func("#{op}_<>", dtype, _s)
4237
+ if f.nil?
4238
+ raise NotImplementedError
4239
+ end
4240
+ Utils.wrap_s(f.call(other))
4241
+ end
4242
+
4243
+ def ffi_func(name, dtype, _s)
4244
+ _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype.class))) if DTYPE_TO_FFINAME.key?(dtype.class)
4245
+ end
4246
+
4247
+ def _arithmetic(other, op)
4248
+ if other.is_a?(Expr)
4249
+ other = to_frame.select(other).to_series
4250
+ end
4251
+ if other.is_a?(Series)
4252
+ return Utils.wrap_s(_s.send(op, other._s))
4253
+ end
4254
+
4255
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
4256
+ _s2 = sequence_to_rbseries(name, [other])
4257
+ return Utils.wrap_s(_s.send(op, _s2))
4258
+ end
4259
+
4260
+ f = ffi_func("#{op}_<>", dtype, _s)
4261
+ if f.nil?
4262
+ raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
4263
+ end
4264
+ Utils.wrap_s(f.call(other))
4265
+ end
4266
+
4267
+ DTYPE_TO_FFINAME = {
4268
+ Int8 => "i8",
4269
+ Int16 => "i16",
4270
+ Int32 => "i32",
4271
+ Int64 => "i64",
4272
+ UInt8 => "u8",
4273
+ UInt16 => "u16",
4274
+ UInt32 => "u32",
4275
+ UInt64 => "u64",
4276
+ Float32 => "f32",
4277
+ Float64 => "f64",
4278
+ Boolean => "bool",
4279
+ Utf8 => "str",
4280
+ List => "list",
4281
+ Date => "date",
4282
+ Datetime => "datetime",
4283
+ Duration => "duration",
4284
+ Time => "time",
4285
+ Object => "object",
4286
+ Categorical => "categorical",
4287
+ Struct => "struct",
4288
+ Binary => "binary"
4289
+ }
4290
+
4291
+ def series_to_rbseries(name, values)
4292
+ # should not be in-place?
4293
+ values.rename(name, in_place: true)
4294
+ values._s
4295
+ end
4296
+
4297
+ def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
4298
+ # not needed yet
4299
+ # if !values.contiguous?
4300
+ # end
4301
+
4302
+ if values.shape.length == 1
4303
+ values, dtype = numo_values_and_dtype(values)
4304
+ strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
4305
+ if dtype == Numo::RObject
4306
+ sequence_to_rbseries(name, values.to_a, strict: strict)
4307
+ else
4308
+ constructor = numo_type_to_constructor(dtype)
4309
+ # TODO improve performance
4310
+ constructor.call(name, values.to_a, strict)
4311
+ end
4312
+ elsif values.shape.length == 2
4313
+ raise Todo
4314
+ else
4315
+ raise Todo
4316
+ end
4317
+ end
4318
+
4319
+ def numo_values_and_dtype(values)
4320
+ [values, values.class]
4321
+ end
4322
+
4323
+ def numo_type_to_constructor(dtype)
4324
+ {
4325
+ Numo::Float32 => RbSeries.method(:new_opt_f32),
4326
+ Numo::Float64 => RbSeries.method(:new_opt_f64),
4327
+ Numo::Int8 => RbSeries.method(:new_opt_i8),
4328
+ Numo::Int16 => RbSeries.method(:new_opt_i16),
4329
+ Numo::Int32 => RbSeries.method(:new_opt_i32),
4330
+ Numo::Int64 => RbSeries.method(:new_opt_i64),
4331
+ Numo::UInt8 => RbSeries.method(:new_opt_u8),
4332
+ Numo::UInt16 => RbSeries.method(:new_opt_u16),
4333
+ Numo::UInt32 => RbSeries.method(:new_opt_u32),
4334
+ Numo::UInt64 => RbSeries.method(:new_opt_u64)
4335
+ }.fetch(dtype)
4336
+ rescue KeyError
4337
+ RbSeries.method(:new_object)
4338
+ end
4339
+
4340
+ def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
4341
+ ruby_dtype = nil
4342
+
4343
+ if (values.nil? || values.empty?) && dtype.nil?
4344
+ dtype = dtype_if_empty || Float32
4345
+ elsif dtype == List
4346
+ ruby_dtype = ::Array
4347
+ end
4348
+
4349
+ rb_temporal_types = [::Date, ::DateTime, ::Time]
4350
+ rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
4351
+
4352
+ value = _get_first_non_none(values)
4353
+ if !value.nil?
4354
+ if value.is_a?(Hash)
4355
+ return DataFrame.new(values).to_struct(name)._s
4356
+ end
4357
+ end
4358
+
4359
+ if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
4360
+ if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
4361
+ dtype = Array.new(nil, value.size)
4362
+ end
4363
+
4364
+ constructor = polars_type_to_constructor(dtype)
4365
+ rbseries = constructor.call(name, values, strict)
4366
+
4367
+ base_type = dtype.is_a?(DataType) ? dtype.class : dtype
4368
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type)
4369
+ if rbseries.dtype != dtype
4370
+ rbseries = rbseries.cast(dtype, true)
4371
+ end
4372
+ end
4373
+ return rbseries
4374
+ elsif dtype == Struct
4375
+ struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4376
+ empty = {}
4377
+ return DataFrame.sequence_to_rbdf(
4378
+ values.map { |v| v.nil? ? empty : v },
4379
+ schema: struct_schema,
4380
+ orient: "row",
4381
+ ).to_struct(name)
4382
+ else
4383
+ if ruby_dtype.nil?
4384
+ if value.nil?
4385
+ # generic default dtype
4386
+ ruby_dtype = Float
4387
+ else
4388
+ ruby_dtype = value.class
4389
+ end
4390
+ end
4391
+
4392
+ # temporal branch
4393
+ if rb_temporal_types.include?(ruby_dtype)
4394
+ if dtype.nil?
4395
+ dtype = Utils.rb_type_to_dtype(ruby_dtype)
4396
+ elsif rb_temporal_types.include?(dtype)
4397
+ dtype = Utils.rb_type_to_dtype(dtype)
4398
+ end
4399
+ # TODO
4400
+ time_unit = nil
4401
+
4402
+ rb_series = RbSeries.new_from_anyvalues(name, values, strict)
4403
+ if time_unit.nil?
4404
+ s = Utils.wrap_s(rb_series)
4405
+ else
4406
+ s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4407
+ end
4408
+ return s._s
4409
+ elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4410
+ raise Todo
4411
+ elsif ruby_dtype == ::Array
4412
+ if dtype.is_a?(Object)
4413
+ return RbSeries.new_object(name, values, strict)
4414
+ end
4415
+ if dtype
4416
+ srs = sequence_from_anyvalue_or_object(name, values)
4417
+ if dtype != srs.dtype
4418
+ srs = srs.cast(dtype, strict: false)
4419
+ end
4420
+ return srs
4421
+ end
4422
+ return sequence_from_anyvalue_or_object(name, values)
4423
+ elsif ruby_dtype == Series
4424
+ return RbSeries.new_series_list(name, values.map(&:_s), strict)
4425
+ elsif ruby_dtype == RbSeries
4426
+ return RbSeries.new_series_list(name, values, strict)
4427
+ else
4428
+ constructor =
4429
+ if value.is_a?(::String)
4430
+ if value.encoding == Encoding::UTF_8
4431
+ RbSeries.method(:new_str)
4432
+ else
4433
+ RbSeries.method(:new_binary)
4434
+ end
4435
+ elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
4436
+ # TODO improve performance
4437
+ RbSeries.method(:new_opt_f64)
4438
+ else
4439
+ rb_type_to_constructor(value.class)
4440
+ end
4441
+ constructor.call(name, values, strict)
4442
+ end
4443
+ end
4444
+ end
4445
+
4446
+ def sequence_from_anyvalue_or_object(name, values)
4447
+ RbSeries.new_from_anyvalues(name, values, true)
4448
+ rescue
4449
+ RbSeries.new_object(name, values, false)
4450
+ end
4451
+
4452
+ POLARS_TYPE_TO_CONSTRUCTOR = {
4453
+ Float32 => RbSeries.method(:new_opt_f32),
4454
+ Float64 => RbSeries.method(:new_opt_f64),
4455
+ Int8 => RbSeries.method(:new_opt_i8),
4456
+ Int16 => RbSeries.method(:new_opt_i16),
4457
+ Int32 => RbSeries.method(:new_opt_i32),
4458
+ Int64 => RbSeries.method(:new_opt_i64),
4459
+ UInt8 => RbSeries.method(:new_opt_u8),
4460
+ UInt16 => RbSeries.method(:new_opt_u16),
4461
+ UInt32 => RbSeries.method(:new_opt_u32),
4462
+ UInt64 => RbSeries.method(:new_opt_u64),
4463
+ Decimal => RbSeries.method(:new_decimal),
4464
+ Date => RbSeries.method(:new_from_anyvalues),
4465
+ Datetime => RbSeries.method(:new_from_anyvalues),
4466
+ Duration => RbSeries.method(:new_from_anyvalues),
4467
+ Time => RbSeries.method(:new_from_anyvalues),
4468
+ Boolean => RbSeries.method(:new_opt_bool),
4469
+ Utf8 => RbSeries.method(:new_str),
4470
+ Object => RbSeries.method(:new_object),
4471
+ Categorical => RbSeries.method(:new_str),
4472
+ Enum => RbSeries.method(:new_str),
4473
+ Binary => RbSeries.method(:new_binary),
4474
+ Null => RbSeries.method(:new_null)
4475
+ }
4476
+
4477
+ SYM_TYPE_TO_CONSTRUCTOR = {
4478
+ f32: RbSeries.method(:new_opt_f32),
4479
+ f64: RbSeries.method(:new_opt_f64),
4480
+ i8: RbSeries.method(:new_opt_i8),
4481
+ i16: RbSeries.method(:new_opt_i16),
4482
+ i32: RbSeries.method(:new_opt_i32),
4483
+ i64: RbSeries.method(:new_opt_i64),
4484
+ u8: RbSeries.method(:new_opt_u8),
4485
+ u16: RbSeries.method(:new_opt_u16),
4486
+ u32: RbSeries.method(:new_opt_u32),
4487
+ u64: RbSeries.method(:new_opt_u64),
4488
+ bool: RbSeries.method(:new_opt_bool),
4489
+ str: RbSeries.method(:new_str)
4490
+ }
4491
+
4492
+ def polars_type_to_constructor(dtype)
4493
+ if dtype.is_a?(Array)
4494
+ lambda do |name, values, strict|
4495
+ RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
4496
+ end
4497
+ elsif dtype.is_a?(Class) && dtype < DataType
4498
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4499
+ elsif dtype.is_a?(DataType)
4500
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
4501
+ else
4502
+ SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
4503
+ end
4504
+ rescue KeyError
4505
+ raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
4506
+ end
4507
+
4508
+ RB_TYPE_TO_CONSTRUCTOR = {
4509
+ Float => RbSeries.method(:new_opt_f64),
4510
+ Integer => RbSeries.method(:new_opt_i64),
4511
+ TrueClass => RbSeries.method(:new_opt_bool),
4512
+ FalseClass => RbSeries.method(:new_opt_bool),
4513
+ BigDecimal => RbSeries.method(:new_decimal),
4514
+ NilClass => RbSeries.method(:new_null)
4515
+ }
4516
+
4517
+ def rb_type_to_constructor(dtype)
4518
+ RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4519
+ rescue KeyError
4520
+ RbSeries.method(:new_object)
4521
+ end
4522
+
4523
+ def _get_first_non_none(values)
4524
+ values.find { |v| !v.nil? }
4525
+ end
4526
+ end
4527
+ end