polars-df 0.10.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,4527 @@
1
+ module Polars
2
+ # A Series represents a single column in a polars DataFrame.
3
+ class Series
4
+ include ExprDispatch
5
+
6
+ # Create a new Series.
7
+ #
8
+ # @param name [String, Array, nil]
9
+ # Name of the series. Will be used as a column name when used in a DataFrame.
10
+ # When not specified, name is set to an empty string.
11
+ # @param values [Array, nil]
12
+ # One-dimensional data in various forms. Supported are: Array and Series.
13
+ # @param dtype [Symbol, nil]
14
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
15
+ # @param strict [Boolean]
16
+ # Throw error on numeric overflow.
17
+ # @param nan_to_null [Boolean]
18
+ # Not used.
19
+ # @param dtype_if_empty [Symbol, nil]
20
+ # If no dtype is specified and values contains `nil` or an empty array,
21
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
22
+ #
23
+ # @example Constructing a Series by specifying name and values positionally:
24
+ # s = Polars::Series.new("a", [1, 2, 3])
25
+ #
26
+ # @example Notice that the dtype is automatically inferred as a polars `Int64`:
27
+ # s.dtype
28
+ # # => Polars::Int64
29
+ #
30
+ # @example Constructing a Series with a specific dtype:
31
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
32
+ #
33
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
34
+ # s3 = Polars::Series.new([1, 2, 3])
35
+ def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
36
+ # Handle case where values are passed as the first argument
37
+ if !name.nil? && !name.is_a?(::String)
38
+ if values.nil?
39
+ values = name
40
+ name = nil
41
+ else
42
+ raise ArgumentError, "Series name must be a string."
43
+ end
44
+ end
45
+
46
+ name = "" if name.nil?
47
+
48
+ # TODO improve
49
+ if values.is_a?(Range) && values.begin.is_a?(::String)
50
+ values = values.to_a
51
+ end
52
+
53
+ if values.nil?
54
+ self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
55
+ elsif values.is_a?(Series)
56
+ self._s = series_to_rbseries(name, values)
57
+ elsif values.is_a?(Range)
58
+ self._s =
59
+ Polars.arange(
60
+ values.first,
61
+ values.last + (values.exclude_end? ? 0 : 1),
62
+ step: 1,
63
+ eager: true,
64
+ dtype: dtype
65
+ )
66
+ .rename(name, in_place: true)
67
+ ._s
68
+ elsif values.is_a?(::Array)
69
+ self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
70
+ elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
+ self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
72
+
73
+ if !dtype.nil?
74
+ self._s = self.cast(dtype, strict: true)._s
75
+ end
76
+ else
77
+ raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
78
+ end
79
+ end
80
+
81
+ # @private
82
+ def self._from_rbseries(s)
83
+ series = Series.allocate
84
+ series._s = s
85
+ series
86
+ end
87
+
88
+ # Get the data type of this Series.
89
+ #
90
+ # @return [Symbol]
91
+ def dtype
92
+ _s.dtype
93
+ end
94
+
95
+ # Get flags that are set on the Series.
96
+ #
97
+ # @return [Hash]
98
+ def flags
99
+ out = {
100
+ "SORTED_ASC" => _s.is_sorted_flag,
101
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
102
+ }
103
+ if dtype.is_a?(List)
104
+ out["FAST_EXPLODE"] = _s.can_fast_explode_flag
105
+ end
106
+ out
107
+ end
108
+
109
+ # Get the inner dtype in of a List typed Series.
110
+ #
111
+ # @return [Symbol]
112
+ def inner_dtype
113
+ _s.inner_dtype
114
+ end
115
+
116
+ # Get the name of this Series.
117
+ #
118
+ # @return [String]
119
+ def name
120
+ _s.name
121
+ end
122
+
123
+ # Shape of this Series.
124
+ #
125
+ # @return [Array]
126
+ def shape
127
+ [_s.len]
128
+ end
129
+
130
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
131
+ #
132
+ # @return [String]
133
+ def time_unit
134
+ _s.time_unit
135
+ end
136
+
137
+ # Returns a string representing the Series.
138
+ #
139
+ # @return [String]
140
+ def to_s
141
+ _s.to_s
142
+ end
143
+ alias_method :inspect, :to_s
144
+
145
+ # Bitwise AND.
146
+ #
147
+ # @return [Series]
148
+ def &(other)
149
+ if !other.is_a?(Series)
150
+ other = Series.new([other])
151
+ end
152
+ Utils.wrap_s(_s.bitand(other._s))
153
+ end
154
+
155
+ # Bitwise OR.
156
+ #
157
+ # @return [Series]
158
+ def |(other)
159
+ if !other.is_a?(Series)
160
+ other = Series.new([other])
161
+ end
162
+ Utils.wrap_s(_s.bitor(other._s))
163
+ end
164
+
165
+ # Bitwise XOR.
166
+ #
167
+ # @return [Series]
168
+ def ^(other)
169
+ if !other.is_a?(Series)
170
+ other = Series.new([other])
171
+ end
172
+ Utils.wrap_s(_s.bitxor(other._s))
173
+ end
174
+
175
+ # Equal.
176
+ #
177
+ # @return [Series]
178
+ def ==(other)
179
+ _comp(other, :eq)
180
+ end
181
+
182
+ # Not equal.
183
+ #
184
+ # @return [Series]
185
+ def !=(other)
186
+ _comp(other, :neq)
187
+ end
188
+
189
+ # Greater than.
190
+ #
191
+ # @return [Series]
192
+ def >(other)
193
+ _comp(other, :gt)
194
+ end
195
+
196
+ # Less than.
197
+ #
198
+ # @return [Series]
199
+ def <(other)
200
+ _comp(other, :lt)
201
+ end
202
+
203
+ # Greater than or equal.
204
+ #
205
+ # @return [Series]
206
+ def >=(other)
207
+ _comp(other, :gt_eq)
208
+ end
209
+
210
+ # Less than or equal.
211
+ #
212
+ # @return [Series]
213
+ def <=(other)
214
+ _comp(other, :lt_eq)
215
+ end
216
+
217
+ # Method equivalent of operator expression `series <= other`.
218
+ #
219
+ # @return [Series]
220
+ def le(other)
221
+ self <= other
222
+ end
223
+
224
+ # Method equivalent of operator expression `series < other`.
225
+ #
226
+ # @return [Series]
227
+ def lt(other)
228
+ self < other
229
+ end
230
+
231
+ # Method equivalent of operator expression `series == other`.
232
+ #
233
+ # @return [Series]
234
+ def eq(other)
235
+ self == other
236
+ end
237
+
238
+ # Method equivalent of equality operator `series == other` where `nil == nil`.
239
+ #
240
+ # This differs from the standard `ne` where null values are propagated.
241
+ #
242
+ # @param other [Object]
243
+ # A literal or expression value to compare with.
244
+ #
245
+ # @return [Object]
246
+ #
247
+ # @example
248
+ # s1 = Polars::Series.new("a", [333, 200, nil])
249
+ # s2 = Polars::Series.new("a", [100, 200, nil])
250
+ # s1.eq(s2)
251
+ # # =>
252
+ # # shape: (3,)
253
+ # # Series: 'a' [bool]
254
+ # # [
255
+ # # false
256
+ # # true
257
+ # # null
258
+ # # ]
259
+ #
260
+ # @example
261
+ # s1.eq_missing(s2)
262
+ # # =>
263
+ # # shape: (3,)
264
+ # # Series: 'a' [bool]
265
+ # # [
266
+ # # false
267
+ # # true
268
+ # # true
269
+ # # ]
270
+ def eq_missing(other)
271
+ if other.is_a?(Expr)
272
+ return Polars.lit(self).eq_missing(other)
273
+ end
274
+ to_frame.select(Polars.col(name).eq_missing(other)).to_series
275
+ end
276
+
277
+ # Method equivalent of operator expression `series != other`.
278
+ #
279
+ # @return [Series]
280
+ def ne(other)
281
+ self != other
282
+ end
283
+
284
+ # Method equivalent of equality operator `series != other` where `None == None`.
285
+ #
286
+ # This differs from the standard `ne` where null values are propagated.
287
+ #
288
+ # @param other [Object]
289
+ # A literal or expression value to compare with.
290
+ #
291
+ # @return [Object]
292
+ #
293
+ # @example
294
+ # s1 = Polars::Series.new("a", [333, 200, nil])
295
+ # s2 = Polars::Series.new("a", [100, 200, nil])
296
+ # s1.ne(s2)
297
+ # # =>
298
+ # # shape: (3,)
299
+ # # Series: 'a' [bool]
300
+ # # [
301
+ # # true
302
+ # # false
303
+ # # null
304
+ # # ]
305
+ #
306
+ # @example
307
+ # s1.ne_missing(s2)
308
+ # # =>
309
+ # # shape: (3,)
310
+ # # Series: 'a' [bool]
311
+ # # [
312
+ # # true
313
+ # # false
314
+ # # false
315
+ # # ]
316
+ def ne_missing(other)
317
+ if other.is_a?(Expr)
318
+ return Polars.lit(self).ne_missing(other)
319
+ end
320
+ to_frame.select(Polars.col(name).ne_missing(other)).to_series
321
+ end
322
+
323
+ # Method equivalent of operator expression `series >= other`.
324
+ #
325
+ # @return [Series]
326
+ def ge(other)
327
+ self >= other
328
+ end
329
+
330
+ # Method equivalent of operator expression `series > other`.
331
+ #
332
+ # @return [Series]
333
+ def gt(other)
334
+ self > other
335
+ end
336
+
337
+ # Performs addition.
338
+ #
339
+ # @return [Series]
340
+ def +(other)
341
+ _arithmetic(other, :add)
342
+ end
343
+
344
+ # Performs subtraction.
345
+ #
346
+ # @return [Series]
347
+ def -(other)
348
+ _arithmetic(other, :sub)
349
+ end
350
+
351
+ # Performs multiplication.
352
+ #
353
+ # @return [Series]
354
+ def *(other)
355
+ if is_temporal
356
+ raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
357
+ elsif other.is_a?(DataFrame)
358
+ other * self
359
+ else
360
+ _arithmetic(other, :mul)
361
+ end
362
+ end
363
+
364
+ # Performs division.
365
+ #
366
+ # @return [Series]
367
+ def /(other)
368
+ if is_temporal
369
+ raise ArgumentError, "first cast to integer before dividing datelike dtypes"
370
+ end
371
+
372
+ if is_float
373
+ return _arithmetic(other, :div)
374
+ end
375
+
376
+ cast(Float64) / other
377
+ end
378
+
379
+ # Returns the modulo.
380
+ #
381
+ # @return [Series]
382
+ def %(other)
383
+ if is_datelike
384
+ raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
385
+ end
386
+ _arithmetic(other, :rem)
387
+ end
388
+
389
+ # Raises to the power of exponent.
390
+ #
391
+ # @return [Series]
392
+ def **(power)
393
+ if is_datelike
394
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
395
+ end
396
+ to_frame.select(Polars.col(name).pow(power)).to_series
397
+ end
398
+
399
+ # Performs boolean not.
400
+ #
401
+ # @return [Series]
402
+ def !
403
+ if dtype == Boolean
404
+ return Utils.wrap_s(_s.not)
405
+ end
406
+ raise NotImplementedError
407
+ end
408
+
409
+ # Performs negation.
410
+ #
411
+ # @return [Series]
412
+ def -@
413
+ 0 - self
414
+ end
415
+
416
+ # Returns an enumerator.
417
+ #
418
+ # @return [Object]
419
+ def each
420
+ return to_enum(:each) unless block_given?
421
+
422
+ length.times do |i|
423
+ yield self[i]
424
+ end
425
+ end
426
+
427
+ # Returns elements of the Series.
428
+ #
429
+ # @return [Object]
430
+ def [](item)
431
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
432
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
433
+ end
434
+
435
+ if item.is_a?(Series) && item.bool?
436
+ return filter(item)
437
+ end
438
+
439
+ if item.is_a?(Integer)
440
+ if item < 0
441
+ item = len + item
442
+ end
443
+
444
+ return _s.get_idx(item)
445
+ end
446
+
447
+ if item.is_a?(Range)
448
+ return Slice.new(self).apply(item)
449
+ end
450
+
451
+ if Utils.is_int_sequence(item)
452
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
453
+ end
454
+
455
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
456
+ end
457
+
458
+ # Sets an element of the Series.
459
+ #
460
+ # @return [Object]
461
+ def []=(key, value)
462
+ if value.is_a?(::Array)
463
+ if is_numeric || is_datelike
464
+ scatter(key, value)
465
+ return
466
+ end
467
+ raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
468
+ end
469
+
470
+ if key.is_a?(Series)
471
+ if key.dtype == Boolean
472
+ self._s = set(key, value)._s
473
+ elsif key.dtype == UInt64
474
+ self._s = scatter(key.cast(UInt32), value)._s
475
+ elsif key.dtype == UInt32
476
+ self._s = scatter(key, value)._s
477
+ else
478
+ raise Todo
479
+ end
480
+ elsif key.is_a?(::Array)
481
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
482
+ self[s] = value
483
+ elsif key.is_a?(Range)
484
+ s = Series.new("", key, dtype: UInt32)
485
+ self[s] = value
486
+ elsif key.is_a?(Integer)
487
+ self[[key]] = value
488
+ else
489
+ raise ArgumentError, "cannot use #{key} for indexing"
490
+ end
491
+ end
492
+
493
+ # Return an estimation of the total (heap) allocated size of the Series.
494
+ #
495
+ # Estimated size is given in the specified unit (bytes by default).
496
+ #
497
+ # This estimation is the sum of the size of its buffers, validity, including
498
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
499
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
500
+ # particular, StructArray's size is an upper bound.
501
+ #
502
+ # When an array is sliced, its allocated size remains constant because the buffer
503
+ # unchanged. However, this function will yield a smaller number. This is because
504
+ # this function returns the visible size of the buffer, not its total capacity.
505
+ #
506
+ # FFI buffers are included in this estimation.
507
+ #
508
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
509
+ # Scale the returned size to the given unit.
510
+ #
511
+ # @return [Numeric]
512
+ #
513
+ # @example
514
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
515
+ # s.estimated_size
516
+ # # => 4000000
517
+ # s.estimated_size("mb")
518
+ # # => 3.814697265625
519
+ def estimated_size(unit = "b")
520
+ sz = _s.estimated_size
521
+ Utils.scale_bytes(sz, to: unit)
522
+ end
523
+
524
+ # Compute the square root of the elements.
525
+ #
526
+ # @return [Series]
527
+ def sqrt
528
+ self**0.5
529
+ end
530
+
531
+ # Check if any boolean value in the column is `true`.
532
+ #
533
+ # @return [Boolean]
534
+ def any?(ignore_nulls: true, &block)
535
+ if block_given?
536
+ apply(skip_nulls: ignore_nulls, &block).any?
537
+ else
538
+ _s.any(ignore_nulls)
539
+ end
540
+ end
541
+ alias_method :any, :any?
542
+
543
+ # Check if all boolean values in the column are `true`.
544
+ #
545
+ # @return [Boolean]
546
+ def all?(ignore_nulls: true, &block)
547
+ if block_given?
548
+ apply(skip_nulls: ignore_nulls, &block).all?
549
+ else
550
+ _s.all(ignore_nulls)
551
+ end
552
+ end
553
+ alias_method :all, :all?
554
+
555
+ # Check if all boolean values in the column are `false`.
556
+ #
557
+ # @return [Boolean]
558
+ def none?(&block)
559
+ if block_given?
560
+ apply(&block).none?
561
+ else
562
+ to_frame.select(Polars.col(name).is_not.all).to_series[0]
563
+ end
564
+ end
565
+ alias_method :none, :none?
566
+
567
+ # Compute the logarithm to a given base.
568
+ #
569
+ # @param base [Float]
570
+ # Given base, defaults to `Math::E`.
571
+ #
572
+ # @return [Series]
573
+ def log(base = Math::E)
574
+ super
575
+ end
576
+
577
+ # Compute the base 10 logarithm of the input array, element-wise.
578
+ #
579
+ # @return [Series]
580
+ def log10
581
+ super
582
+ end
583
+
584
+ # Compute the exponential, element-wise.
585
+ #
586
+ # @return [Series]
587
+ def exp
588
+ super
589
+ end
590
+
591
+ # Create a new Series that copies data from this Series without null values.
592
+ #
593
+ # @return [Series]
594
+ def drop_nulls
595
+ super
596
+ end
597
+
598
+ # Drop NaN values.
599
+ #
600
+ # @return [Series]
601
+ def drop_nans
602
+ super
603
+ end
604
+
605
+ # Cast this Series to a DataFrame.
606
+ #
607
+ # @return [DataFrame]
608
+ def to_frame
609
+ Utils.wrap_df(RbDataFrame.new([_s]))
610
+ end
611
+
612
+ # Quick summary statistics of a series.
613
+ #
614
+ # Series with mixed datatypes will return summary statistics for the datatype of
615
+ # the first value.
616
+ #
617
+ # @return [DataFrame]
618
+ #
619
+ # @example
620
+ # series_num = Polars::Series.new([1, 2, 3, 4, 5])
621
+ # series_num.describe
622
+ # # =>
623
+ # # shape: (6, 2)
624
+ # # ┌────────────┬──────────┐
625
+ # # │ statistic ┆ value │
626
+ # # │ --- ┆ --- │
627
+ # # │ str ┆ f64 │
628
+ # # ╞════════════╪══════════╡
629
+ # # │ min ┆ 1.0 │
630
+ # # │ max ┆ 5.0 │
631
+ # # │ null_count ┆ 0.0 │
632
+ # # │ mean ┆ 3.0 │
633
+ # # │ std ┆ 1.581139 │
634
+ # # │ count ┆ 5.0 │
635
+ # # └────────────┴──────────┘
636
+ #
637
+ # @example
638
+ # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
639
+ # series_str.describe
640
+ # # =>
641
+ # # shape: (3, 2)
642
+ # # ┌────────────┬───────┐
643
+ # # │ statistic ┆ value │
644
+ # # │ --- ┆ --- │
645
+ # # │ str ┆ i64 │
646
+ # # ╞════════════╪═══════╡
647
+ # # │ unique ┆ 4 │
648
+ # # │ null_count ┆ 1 │
649
+ # # │ count ┆ 5 │
650
+ # # └────────────┴───────┘
651
+ def describe
652
+ if len == 0
653
+ raise ArgumentError, "Series must contain at least one value"
654
+ elsif is_numeric
655
+ s = cast(:f64)
656
+ stats = {
657
+ "min" => s.min,
658
+ "max" => s.max,
659
+ "null_count" => s.null_count,
660
+ "mean" => s.mean,
661
+ "std" => s.std,
662
+ "count" => s.len
663
+ }
664
+ elsif is_boolean
665
+ stats = {
666
+ "sum" => sum,
667
+ "null_count" => null_count,
668
+ "count" => len
669
+ }
670
+ elsif is_utf8
671
+ stats = {
672
+ "unique" => unique.length,
673
+ "null_count" => null_count,
674
+ "count" => len
675
+ }
676
+ elsif is_datelike
677
+ # we coerce all to string, because a polars column
678
+ # only has a single dtype and dates: datetime and count: int don't match
679
+ stats = {
680
+ "min" => dt.min.to_s,
681
+ "max" => dt.max.to_s,
682
+ "null_count" => null_count.to_s,
683
+ "count" => len.to_s
684
+ }
685
+ else
686
+ raise TypeError, "This type is not supported"
687
+ end
688
+
689
+ Polars::DataFrame.new(
690
+ {"statistic" => stats.keys, "value" => stats.values}
691
+ )
692
+ end
693
+
694
+ # Reduce this Series to the sum value.
695
+ #
696
+ # @return [Numeric]
697
+ #
698
+ # @note
699
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
700
+ # `:i64` before summing to prevent overflow issues.
701
+ #
702
+ # @example
703
+ # s = Polars::Series.new("a", [1, 2, 3])
704
+ # s.sum
705
+ # # => 6
706
+ def sum
707
+ _s.sum
708
+ end
709
+
710
+ # Reduce this Series to the mean value.
711
+ #
712
+ # @return [Float, nil]
713
+ #
714
+ # @example
715
+ # s = Polars::Series.new("a", [1, 2, 3])
716
+ # s.mean
717
+ # # => 2.0
718
+ def mean
719
+ _s.mean
720
+ end
721
+
722
+ # Reduce this Series to the product value.
723
+ #
724
+ # @return [Numeric]
725
+ def product
726
+ to_frame.select(Polars.col(name).product).to_series[0]
727
+ end
728
+
729
+ # Get the minimal value in this Series.
730
+ #
731
+ # @return [Object]
732
+ #
733
+ # @example
734
+ # s = Polars::Series.new("a", [1, 2, 3])
735
+ # s.min
736
+ # # => 1
737
+ def min
738
+ _s.min
739
+ end
740
+
741
+ # Get the maximum value in this Series.
742
+ #
743
+ # @return [Object]
744
+ #
745
+ # @example
746
+ # s = Polars::Series.new("a", [1, 2, 3])
747
+ # s.max
748
+ # # => 3
749
+ def max
750
+ _s.max
751
+ end
752
+
753
+ # Get maximum value, but propagate/poison encountered NaN values.
754
+ #
755
+ # @return [Object]
756
+ def nan_max
757
+ to_frame.select(Polars.col(name).nan_max)[0, 0]
758
+ end
759
+
760
+ # Get minimum value, but propagate/poison encountered NaN values.
761
+ #
762
+ # @return [Object]
763
+ def nan_min
764
+ to_frame.select(Polars.col(name).nan_min)[0, 0]
765
+ end
766
+
767
+ # Get the standard deviation of this Series.
768
+ #
769
+ # @param ddof [Integer]
770
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
771
+ # where N represents the number of elements.
772
+ #
773
+ # @return [Float, nil]
774
+ #
775
+ # @example
776
+ # s = Polars::Series.new("a", [1, 2, 3])
777
+ # s.std
778
+ # # => 1.0
779
+ def std(ddof: 1)
780
+ if !is_numeric
781
+ nil
782
+ else
783
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
784
+ end
785
+ end
786
+
787
+ # Get variance of this Series.
788
+ #
789
+ # @param ddof [Integer]
790
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
791
+ # where N represents the number of elements.
792
+ #
793
+ # @return [Float, nil]
794
+ #
795
+ # @example
796
+ # s = Polars::Series.new("a", [1, 2, 3])
797
+ # s.var
798
+ # # => 1.0
799
+ def var(ddof: 1)
800
+ if !is_numeric
801
+ nil
802
+ else
803
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
804
+ end
805
+ end
806
+
807
+ # Get the median of this Series.
808
+ #
809
+ # @return [Float, nil]
810
+ #
811
+ # @example
812
+ # s = Polars::Series.new("a", [1, 2, 3])
813
+ # s.median
814
+ # # => 2.0
815
+ def median
816
+ _s.median
817
+ end
818
+
819
+ # Get the quantile value of this Series.
820
+ #
821
+ # @param quantile [Float, nil]
822
+ # Quantile between 0.0 and 1.0.
823
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
824
+ # Interpolation method.
825
+ #
826
+ # @return [Float, nil]
827
+ #
828
+ # @example
829
+ # s = Polars::Series.new("a", [1, 2, 3])
830
+ # s.quantile(0.5)
831
+ # # => 2.0
832
+ def quantile(quantile, interpolation: "nearest")
833
+ _s.quantile(quantile, interpolation)
834
+ end
835
+
836
+ # Get dummy variables.
837
+ #
838
+ # @return [DataFrame]
839
+ #
840
+ # @example
841
+ # s = Polars::Series.new("a", [1, 2, 3])
842
+ # s.to_dummies
843
+ # # =>
844
+ # # shape: (3, 3)
845
+ # # ┌─────┬─────┬─────┐
846
+ # # │ a_1 ┆ a_2 ┆ a_3 │
847
+ # # │ --- ┆ --- ┆ --- │
848
+ # # │ u8 ┆ u8 ┆ u8 │
849
+ # # ╞═════╪═════╪═════╡
850
+ # # │ 1 ┆ 0 ┆ 0 │
851
+ # # │ 0 ┆ 1 ┆ 0 │
852
+ # # │ 0 ┆ 0 ┆ 1 │
853
+ # # └─────┴─────┴─────┘
854
+ def to_dummies(separator: "_", drop_first: false)
855
+ Utils.wrap_df(_s.to_dummies(separator, drop_first))
856
+ end
857
+
858
+ # Bin continuous values into discrete categories.
859
+ #
860
+ # @param breaks [Array]
861
+ # List of unique cut points.
862
+ # @param labels [Array]
863
+ # Names of the categories. The number of labels must be equal to the number
864
+ # of cut points plus one.
865
+ # @param left_closed [Boolean]
866
+ # Set the intervals to be left-closed instead of right-closed.
867
+ # @param include_breaks [Boolean]
868
+ # Include a column with the right endpoint of the bin each observation falls
869
+ # in. This will change the data type of the output from a
870
+ # `Categorical` to a `Struct`.
871
+ #
872
+ # @return [Series]
873
+ #
874
+ # @example Divide the column into three categories.
875
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
876
+ # s.cut([-1, 1], labels: ["a", "b", "c"])
877
+ # # =>
878
+ # # shape: (5,)
879
+ # # Series: 'foo' [cat]
880
+ # # [
881
+ # # "a"
882
+ # # "a"
883
+ # # "b"
884
+ # # "b"
885
+ # # "c"
886
+ # # ]
887
+ #
888
+ # @example Create a DataFrame with the breakpoint and category for each value.
889
+ # cut = s.cut([-1, 1], include_breaks: true).alias("cut")
890
+ # s.to_frame.with_columns(cut).unnest("cut")
891
+ # # =>
892
+ # # shape: (5, 3)
893
+ # # ┌─────┬─────────────┬────────────┐
894
+ # # │ foo ┆ break_point ┆ category │
895
+ # # │ --- ┆ --- ┆ --- │
896
+ # # │ i64 ┆ f64 ┆ cat │
897
+ # # ╞═════╪═════════════╪════════════╡
898
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
899
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
900
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
901
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
902
+ # # │ 2 ┆ inf ┆ (1, inf] │
903
+ # # └─────┴─────────────┴────────────┘
904
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
905
+ result = (
906
+ to_frame
907
+ .select(
908
+ Polars.col(name).cut(
909
+ breaks,
910
+ labels: labels,
911
+ left_closed: left_closed,
912
+ include_breaks: include_breaks
913
+ )
914
+ )
915
+ .to_series
916
+ )
917
+
918
+ if include_breaks
919
+ result = result.struct.rename_fields(["break_point", "category"])
920
+ end
921
+
922
+ result
923
+ end
924
+
925
+ # Bin continuous values into discrete categories based on their quantiles.
926
+ #
927
+ # @param quantiles [Array]
928
+ # Either a list of quantile probabilities between 0 and 1 or a positive
929
+ # integer determining the number of bins with uniform probability.
930
+ # @param labels [Array]
931
+ # Names of the categories. The number of labels must be equal to the number
932
+ # of cut points plus one.
933
+ # @param left_closed [Boolean]
934
+ # Set the intervals to be left-closed instead of right-closed.
935
+ # @param allow_duplicates [Boolean]
936
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
937
+ # rather than raising a `DuplicateError`. This can happen even with unique
938
+ # probabilities, depending on the data.
939
+ # @param include_breaks [Boolean]
940
+ # Include a column with the right endpoint of the bin each observation falls
941
+ # in. This will change the data type of the output from a
942
+ # `Categorical` to a `Struct`.
943
+ #
944
+ # @return [Series]
945
+ #
946
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
947
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
948
+ # s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
949
+ # # =>
950
+ # # shape: (5,)
951
+ # # Series: 'foo' [cat]
952
+ # # [
953
+ # # "a"
954
+ # # "a"
955
+ # # "b"
956
+ # # "b"
957
+ # # "c"
958
+ # # ]
959
+ #
960
+ # @example Divide a column into two categories using uniform quantile probabilities.
961
+ # s.qcut(2, labels: ["low", "high"], left_closed: true)
962
+ # # =>
963
+ # # shape: (5,)
964
+ # # Series: 'foo' [cat]
965
+ # # [
966
+ # # "low"
967
+ # # "low"
968
+ # # "high"
969
+ # # "high"
970
+ # # "high"
971
+ # # ]
972
+ #
973
+ # @example Create a DataFrame with the breakpoint and category for each value.
974
+ # cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
975
+ # s.to_frame.with_columns(cut).unnest("cut")
976
+ # # =>
977
+ # # shape: (5, 3)
978
+ # # ┌─────┬─────────────┬────────────┐
979
+ # # │ foo ┆ break_point ┆ category │
980
+ # # │ --- ┆ --- ┆ --- │
981
+ # # │ i64 ┆ f64 ┆ cat │
982
+ # # ╞═════╪═════════════╪════════════╡
983
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
984
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
985
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
986
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
987
+ # # │ 2 ┆ inf ┆ (1, inf] │
988
+ # # └─────┴─────────────┴────────────┘
989
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
990
+ result = (
991
+ to_frame
992
+ .select(
993
+ Polars.col(name).qcut(
994
+ quantiles,
995
+ labels: labels,
996
+ left_closed: left_closed,
997
+ allow_duplicates: allow_duplicates,
998
+ include_breaks: include_breaks
999
+ )
1000
+ )
1001
+ .to_series
1002
+ )
1003
+
1004
+ if include_breaks
1005
+ result = result.struct.rename_fields(["break_point", "category"])
1006
+ end
1007
+
1008
+ result
1009
+ end
1010
+
1011
+ # Get the lengths of runs of identical values.
1012
+ #
1013
+ # @return [Series]
1014
+ #
1015
+ # @example
1016
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1017
+ # s.rle.struct.unnest
1018
+ # # =>
1019
+ # # shape: (6, 2)
1020
+ # # ┌─────────┬────────┐
1021
+ # # │ lengths ┆ values │
1022
+ # # │ --- ┆ --- │
1023
+ # # │ i32 ┆ i64 │
1024
+ # # ╞═════════╪════════╡
1025
+ # # │ 2 ┆ 1 │
1026
+ # # │ 1 ┆ 2 │
1027
+ # # │ 1 ┆ 1 │
1028
+ # # │ 1 ┆ null │
1029
+ # # │ 1 ┆ 1 │
1030
+ # # │ 2 ┆ 3 │
1031
+ # # └─────────┴────────┘
1032
+ def rle
1033
+ super
1034
+ end
1035
+
1036
+ # Map values to run IDs.
1037
+ #
1038
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
1039
+ # which it falls. This is especially useful when you want to define groups by
1040
+ # runs of identical values rather than the values themselves.
1041
+ #
1042
+ # @return [Series]
1043
+ #
1044
+ # @example
1045
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1046
+ # s.rle_id
1047
+ # # =>
1048
+ # # shape: (8,)
1049
+ # # Series: 's' [u32]
1050
+ # # [
1051
+ # # 0
1052
+ # # 0
1053
+ # # 1
1054
+ # # 2
1055
+ # # 3
1056
+ # # 4
1057
+ # # 5
1058
+ # # 5
1059
+ # # ]
1060
+ def rle_id
1061
+ super
1062
+ end
1063
+
1064
+ # Count the unique values in a Series.
1065
+ #
1066
+ # @param sort [Boolean]
1067
+ # Ensure the output is sorted from most values to least.
1068
+ #
1069
+ # @return [DataFrame]
1070
+ #
1071
+ # @example
1072
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1073
+ # s.value_counts.sort("a")
1074
+ # # =>
1075
+ # # shape: (3, 2)
1076
+ # # ┌─────┬────────┐
1077
+ # # │ a ┆ counts │
1078
+ # # │ --- ┆ --- │
1079
+ # # │ i64 ┆ u32 │
1080
+ # # ╞═════╪════════╡
1081
+ # # │ 1 ┆ 1 │
1082
+ # # │ 2 ┆ 2 │
1083
+ # # │ 3 ┆ 1 │
1084
+ # # └─────┴────────┘
1085
+ def value_counts(sort: false)
1086
+ Utils.wrap_df(_s.value_counts(sort))
1087
+ end
1088
+
1089
+ # Return a count of the unique values in the order of appearance.
1090
+ #
1091
+ # @return [Series]
1092
+ #
1093
+ # @example
1094
+ # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
1095
+ # s.unique_counts
1096
+ # # =>
1097
+ # # shape: (3,)
1098
+ # # Series: 'id' [u32]
1099
+ # # [
1100
+ # # 1
1101
+ # # 2
1102
+ # # 3
1103
+ # # ]
1104
+ def unique_counts
1105
+ super
1106
+ end
1107
+
1108
+ # Computes the entropy.
1109
+ #
1110
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
1111
+ #
1112
+ # @param base [Float]
1113
+ # Given base, defaults to `e`
1114
+ # @param normalize [Boolean]
1115
+ # Normalize pk if it doesn't sum to 1.
1116
+ #
1117
+ # @return [Float, nil]
1118
+ #
1119
+ # @example
1120
+ # a = Polars::Series.new([0.99, 0.005, 0.005])
1121
+ # a.entropy(normalize: true)
1122
+ # # => 0.06293300616044681
1123
+ #
1124
+ # @example
1125
+ # b = Polars::Series.new([0.65, 0.10, 0.25])
1126
+ # b.entropy(normalize: true)
1127
+ # # => 0.8568409950394724
1128
+ def entropy(base: Math::E, normalize: false)
1129
+ Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
1130
+ end
1131
+
1132
+ # Run an expression over a sliding window that increases `1` slot every iteration.
1133
+ #
1134
+ # @param expr [Expr]
1135
+ # Expression to evaluate
1136
+ # @param min_periods [Integer]
1137
+ # Number of valid values there should be in the window before the expression
1138
+ # is evaluated. valid values = `length - null_count`
1139
+ # @param parallel [Boolean]
1140
+ # Run in parallel. Don't do this in a group by or another operation that
1141
+ # already has much parallelization.
1142
+ #
1143
+ # @return [Series]
1144
+ #
1145
+ # @note
1146
+ # This functionality is experimental and may change without it being considered a
1147
+ # breaking change.
1148
+ #
1149
+ # @note
1150
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
1151
+ # for operations that visit all elements.
1152
+ #
1153
+ # @example
1154
+ # s = Polars::Series.new("values", [1, 2, 3, 4, 5])
1155
+ # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
1156
+ # # =>
1157
+ # # shape: (5,)
1158
+ # # Series: 'values' [i64]
1159
+ # # [
1160
+ # # 0
1161
+ # # -3
1162
+ # # -8
1163
+ # # -15
1164
+ # # -24
1165
+ # # ]
1166
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
1167
+ super
1168
+ end
1169
+
1170
+ # Return a copy of the Series with a new alias/name.
1171
+ #
1172
+ # @param name [String]
1173
+ # New name.
1174
+ #
1175
+ # @return [Series]
1176
+ #
1177
+ # @example
1178
+ # s = Polars::Series.new("x", [1, 2, 3])
1179
+ # s.alias("y")
1180
+ def alias(name)
1181
+ s = dup
1182
+ s._s.rename(name)
1183
+ s
1184
+ end
1185
+
1186
+ # Rename this Series.
1187
+ #
1188
+ # @param name [String]
1189
+ # New name.
1190
+ # @param in_place [Boolean]
1191
+ # Modify the Series in-place.
1192
+ #
1193
+ # @return [Series]
1194
+ #
1195
+ # @example
1196
+ # s = Polars::Series.new("a", [1, 2, 3])
1197
+ # s.rename("b")
1198
+ def rename(name, in_place: false)
1199
+ if in_place
1200
+ _s.rename(name)
1201
+ self
1202
+ else
1203
+ self.alias(name)
1204
+ end
1205
+ end
1206
+
1207
+ # Get the length of each individual chunk.
1208
+ #
1209
+ # @return [Array]
1210
+ #
1211
+ # @example
1212
+ # s = Polars::Series.new("a", [1, 2, 3])
1213
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1214
+ #
1215
+ # @example Concatenate Series with rechunk: true
1216
+ # Polars.concat([s, s2]).chunk_lengths
1217
+ # # => [6]
1218
+ #
1219
+ # @example Concatenate Series with rechunk: false
1220
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
1221
+ # # => [3, 3]
1222
+ def chunk_lengths
1223
+ _s.chunk_lengths
1224
+ end
1225
+
1226
+ # Get the number of chunks that this Series contains.
1227
+ #
1228
+ # @return [Integer]
1229
+ #
1230
+ # @example
1231
+ # s = Polars::Series.new("a", [1, 2, 3])
1232
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1233
+ #
1234
+ # @example Concatenate Series with rechunk: true
1235
+ # Polars.concat([s, s2]).n_chunks
1236
+ # # => 1
1237
+ #
1238
+ # @example Concatenate Series with rechunk: false
1239
+ # Polars.concat([s, s2], rechunk: false).n_chunks
1240
+ # # => 2
1241
+ def n_chunks
1242
+ _s.n_chunks
1243
+ end
1244
+
1245
+ # Get an array with the cumulative sum computed at every element.
1246
+ #
1247
+ # @param reverse [Boolean]
1248
+ # reverse the operation.
1249
+ #
1250
+ # @return [Series]
1251
+ #
1252
+ # @note
1253
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1254
+ # `:i64` before summing to prevent overflow issues.
1255
+ #
1256
+ # @example
1257
+ # s = Polars::Series.new("a", [1, 2, 3])
1258
+ # s.cum_sum
1259
+ # # =>
1260
+ # # shape: (3,)
1261
+ # # Series: 'a' [i64]
1262
+ # # [
1263
+ # # 1
1264
+ # # 3
1265
+ # # 6
1266
+ # # ]
1267
+ def cum_sum(reverse: false)
1268
+ super
1269
+ end
1270
+ alias_method :cumsum, :cum_sum
1271
+
1272
+ # Get an array with the cumulative min computed at every element.
1273
+ #
1274
+ # @param reverse [Boolean]
1275
+ # reverse the operation.
1276
+ #
1277
+ # @return [Series]
1278
+ #
1279
+ # @example
1280
+ # s = Polars::Series.new("a", [3, 5, 1])
1281
+ # s.cum_min
1282
+ # # =>
1283
+ # # shape: (3,)
1284
+ # # Series: 'a' [i64]
1285
+ # # [
1286
+ # # 3
1287
+ # # 3
1288
+ # # 1
1289
+ # # ]
1290
+ def cum_min(reverse: false)
1291
+ super
1292
+ end
1293
+ alias_method :cummin, :cum_min
1294
+
1295
+ # Get an array with the cumulative max computed at every element.
1296
+ #
1297
+ # @param reverse [Boolean]
1298
+ # reverse the operation.
1299
+ #
1300
+ # @return [Series]
1301
+ #
1302
+ # @example
1303
+ # s = Polars::Series.new("a", [3, 5, 1])
1304
+ # s.cum_max
1305
+ # # =>
1306
+ # # shape: (3,)
1307
+ # # Series: 'a' [i64]
1308
+ # # [
1309
+ # # 3
1310
+ # # 5
1311
+ # # 5
1312
+ # # ]
1313
+ def cum_max(reverse: false)
1314
+ super
1315
+ end
1316
+ alias_method :cummax, :cum_max
1317
+
1318
+ # Get an array with the cumulative product computed at every element.
1319
+ #
1320
+ # @param reverse [Boolean]
1321
+ # reverse the operation.
1322
+ #
1323
+ # @return [Series]
1324
+ #
1325
+ # @note
1326
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1327
+ # `:i64` before multiplying to prevent overflow issues.
1328
+ #
1329
+ # @example
1330
+ # s = Polars::Series.new("a", [1, 2, 3])
1331
+ # s.cum_prod
1332
+ # # =>
1333
+ # # shape: (3,)
1334
+ # # Series: 'a' [i64]
1335
+ # # [
1336
+ # # 1
1337
+ # # 2
1338
+ # # 6
1339
+ # # ]
1340
+ def cum_prod(reverse: false)
1341
+ super
1342
+ end
1343
+ alias_method :cumprod, :cum_prod
1344
+
1345
+ # Get the first `n` rows.
1346
+ #
1347
+ # Alias for {#head}.
1348
+ #
1349
+ # @param n [Integer]
1350
+ # Number of rows to return.
1351
+ #
1352
+ # @return [Series]
1353
+ #
1354
+ # @example
1355
+ # s = Polars::Series.new("a", [1, 2, 3])
1356
+ # s.limit(2)
1357
+ # # =>
1358
+ # # shape: (2,)
1359
+ # # Series: 'a' [i64]
1360
+ # # [
1361
+ # # 1
1362
+ # # 2
1363
+ # # ]
1364
+ def limit(n = 10)
1365
+ to_frame.select(Utils.col(name).limit(n)).to_series
1366
+ end
1367
+
1368
+ # Get a slice of this Series.
1369
+ #
1370
+ # @param offset [Integer]
1371
+ # Start index. Negative indexing is supported.
1372
+ # @param length [Integer, nil]
1373
+ # Length of the slice. If set to `nil`, all rows starting at the offset
1374
+ # will be selected.
1375
+ #
1376
+ # @return [Series]
1377
+ #
1378
+ # @example
1379
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1380
+ # s.slice(1, 2)
1381
+ # # =>
1382
+ # # shape: (2,)
1383
+ # # Series: 'a' [i64]
1384
+ # # [
1385
+ # # 2
1386
+ # # 3
1387
+ # # ]
1388
+ def slice(offset, length = nil)
1389
+ super
1390
+ end
1391
+
1392
+ # Append a Series to this one.
1393
+ #
1394
+ # @param other [Series]
1395
+ # Series to append.
1396
+ # @param append_chunks [Boolean]
1397
+ # If set to `true` the append operation will add the chunks from `other` to
1398
+ # self. This is super cheap.
1399
+ #
1400
+ # If set to `false` the append operation will do the same as
1401
+ # {DataFrame#extend} which extends the memory backed by this Series with
1402
+ # the values from `other`.
1403
+ #
1404
+ # Different from `append_chunks`, `extend` appends the data from `other` to
1405
+ # the underlying memory locations and thus may cause a reallocation (which is
1406
+ # expensive).
1407
+ #
1408
+ # If this does not cause a reallocation, the resulting data structure will not
1409
+ # have any extra chunks and thus will yield faster queries.
1410
+ #
1411
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
1412
+ # single append. For instance during online operations where you add `n` rows
1413
+ # and rerun a query.
1414
+ #
1415
+ # Prefer `append_chunks` over `extend` when you want to append many times
1416
+ # before doing a query. For instance, when you read in multiple files and when
1417
+ # to store them in a single Series. In the latter case, finish the sequence
1418
+ # of `append_chunks` operations with a `rechunk`.
1419
+ #
1420
+ # @return [Series]
1421
+ #
1422
+ # @example
1423
+ # s = Polars::Series.new("a", [1, 2, 3])
1424
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1425
+ # s.append(s2)
1426
+ # # =>
1427
+ # # shape: (6,)
1428
+ # # Series: 'a' [i64]
1429
+ # # [
1430
+ # # 1
1431
+ # # 2
1432
+ # # 3
1433
+ # # 4
1434
+ # # 5
1435
+ # # 6
1436
+ # # ]
1437
+ def append(other, append_chunks: true)
1438
+ begin
1439
+ if append_chunks
1440
+ _s.append(other._s)
1441
+ else
1442
+ _s.extend(other._s)
1443
+ end
1444
+ rescue => e
1445
+ if e.message == "Already mutably borrowed"
1446
+ append(other.clone, append_chunks)
1447
+ else
1448
+ raise e
1449
+ end
1450
+ end
1451
+ self
1452
+ end
1453
+
1454
+ # Filter elements by a boolean mask.
1455
+ #
1456
+ # @param predicate [Series, Array]
1457
+ # Boolean mask.
1458
+ #
1459
+ # @return [Series]
1460
+ #
1461
+ # @example
1462
+ # s = Polars::Series.new("a", [1, 2, 3])
1463
+ # mask = Polars::Series.new("", [true, false, true])
1464
+ # s.filter(mask)
1465
+ # # =>
1466
+ # # shape: (2,)
1467
+ # # Series: 'a' [i64]
1468
+ # # [
1469
+ # # 1
1470
+ # # 3
1471
+ # # ]
1472
+ def filter(predicate)
1473
+ if predicate.is_a?(::Array)
1474
+ predicate = Series.new("", predicate)
1475
+ end
1476
+ Utils.wrap_s(_s.filter(predicate._s))
1477
+ end
1478
+
1479
+ # Get the first `n` rows.
1480
+ #
1481
+ # @param n [Integer]
1482
+ # Number of rows to return.
1483
+ #
1484
+ # @return [Series]
1485
+ #
1486
+ # @example
1487
+ # s = Polars::Series.new("a", [1, 2, 3])
1488
+ # s.head(2)
1489
+ # # =>
1490
+ # # shape: (2,)
1491
+ # # Series: 'a' [i64]
1492
+ # # [
1493
+ # # 1
1494
+ # # 2
1495
+ # # ]
1496
+ def head(n = 10)
1497
+ to_frame.select(Utils.col(name).head(n)).to_series
1498
+ end
1499
+
1500
+ # Get the last `n` rows.
1501
+ #
1502
+ # @param n [Integer]
1503
+ # Number of rows to return.
1504
+ #
1505
+ # @return [Series]
1506
+ #
1507
+ # @example
1508
+ # s = Polars::Series.new("a", [1, 2, 3])
1509
+ # s.tail(2)
1510
+ # # =>
1511
+ # # shape: (2,)
1512
+ # # Series: 'a' [i64]
1513
+ # # [
1514
+ # # 2
1515
+ # # 3
1516
+ # # ]
1517
+ def tail(n = 10)
1518
+ to_frame.select(Utils.col(name).tail(n)).to_series
1519
+ end
1520
+
1521
+ # Take every nth value in the Series and return as new Series.
1522
+ #
1523
+ # @return [Series]
1524
+ #
1525
+ # @example
1526
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1527
+ # s.take_every(2)
1528
+ # # =>
1529
+ # # shape: (2,)
1530
+ # # Series: 'a' [i64]
1531
+ # # [
1532
+ # # 1
1533
+ # # 3
1534
+ # # ]
1535
+ def take_every(n)
1536
+ super
1537
+ end
1538
+
1539
+ # Sort this Series.
1540
+ #
1541
+ # @param reverse [Boolean]
1542
+ # Reverse sort.
1543
+ # @param in_place [Boolean]
1544
+ # Sort in place.
1545
+ #
1546
+ # @return [Series]
1547
+ #
1548
+ # @example
1549
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
1550
+ # s.sort
1551
+ # # =>
1552
+ # # shape: (4,)
1553
+ # # Series: 'a' [i64]
1554
+ # # [
1555
+ # # 1
1556
+ # # 2
1557
+ # # 3
1558
+ # # 4
1559
+ # # ]
1560
+ # s.sort(reverse: true)
1561
+ # # =>
1562
+ # # shape: (4,)
1563
+ # # Series: 'a' [i64]
1564
+ # # [
1565
+ # # 4
1566
+ # # 3
1567
+ # # 2
1568
+ # # 1
1569
+ # # ]
1570
+ def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
1571
+ if in_place
1572
+ self._s = _s.sort(reverse, nulls_last, multithreaded)
1573
+ self
1574
+ else
1575
+ Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
1576
+ end
1577
+ end
1578
+
1579
+ # Return the `k` largest elements.
1580
+ #
1581
+ # @param k [Integer]
1582
+ # Number of elements to return.
1583
+ #
1584
+ # @return [Boolean]
1585
+ #
1586
+ # @example
1587
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1588
+ # s.top_k(k: 3)
1589
+ # # =>
1590
+ # # shape: (3,)
1591
+ # # Series: 'a' [i64]
1592
+ # # [
1593
+ # # 5
1594
+ # # 4
1595
+ # # 3
1596
+ # # ]
1597
+ def top_k(k: 5)
1598
+ super
1599
+ end
1600
+
1601
+ # Return the `k` smallest elements.
1602
+ #
1603
+ # @param k [Integer]
1604
+ # Number of elements to return.
1605
+ #
1606
+ # @return [Boolean]
1607
+ #
1608
+ # @example
1609
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1610
+ # s.bottom_k(k: 3)
1611
+ # # =>
1612
+ # # shape: (3,)
1613
+ # # Series: 'a' [i64]
1614
+ # # [
1615
+ # # 1
1616
+ # # 2
1617
+ # # 3
1618
+ # # ]
1619
+ def bottom_k(k: 5)
1620
+ super
1621
+ end
1622
+
1623
+ # Get the index values that would sort this Series.
1624
+ #
1625
+ # @param reverse [Boolean]
1626
+ # Sort in reverse (descending) order.
1627
+ # @param nulls_last [Boolean]
1628
+ # Place null values last instead of first.
1629
+ #
1630
+ # @return [Series]
1631
+ #
1632
+ # @example
1633
+ # s = Polars::Series.new("a", [5, 3, 4, 1, 2])
1634
+ # s.arg_sort
1635
+ # # =>
1636
+ # # shape: (5,)
1637
+ # # Series: 'a' [u32]
1638
+ # # [
1639
+ # # 3
1640
+ # # 4
1641
+ # # 1
1642
+ # # 2
1643
+ # # 0
1644
+ # # ]
1645
+ def arg_sort(reverse: false, nulls_last: false)
1646
+ super
1647
+ end
1648
+
1649
+ # Get the index values that would sort this Series.
1650
+ #
1651
+ # Alias for {#arg_sort}.
1652
+ #
1653
+ # @param reverse [Boolean]
1654
+ # Sort in reverse (descending) order.
1655
+ # @param nulls_last [Boolean]
1656
+ # Place null values last instead of first.
1657
+ #
1658
+ # @return [Series]
1659
+ def argsort(reverse: false, nulls_last: false)
1660
+ super
1661
+ end
1662
+
1663
+ # Get unique index as Series.
1664
+ #
1665
+ # @return [Series]
1666
+ #
1667
+ # @example
1668
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1669
+ # s.arg_unique
1670
+ # # =>
1671
+ # # shape: (3,)
1672
+ # # Series: 'a' [u32]
1673
+ # # [
1674
+ # # 0
1675
+ # # 1
1676
+ # # 3
1677
+ # # ]
1678
+ def arg_unique
1679
+ super
1680
+ end
1681
+
1682
+ # Get the index of the minimal value.
1683
+ #
1684
+ # @return [Integer, nil]
1685
+ #
1686
+ # @example
1687
+ # s = Polars::Series.new("a", [3, 2, 1])
1688
+ # s.arg_min
1689
+ # # => 2
1690
+ def arg_min
1691
+ _s.arg_min
1692
+ end
1693
+
1694
+ # Get the index of the maximal value.
1695
+ #
1696
+ # @return [Integer, nil]
1697
+ #
1698
+ # @example
1699
+ # s = Polars::Series.new("a", [3, 2, 1])
1700
+ # s.arg_max
1701
+ # # => 0
1702
+ def arg_max
1703
+ _s.arg_max
1704
+ end
1705
+
1706
+ # Find indices where elements should be inserted to maintain order.
1707
+ #
1708
+ # @param element [Object]
1709
+ # Expression or scalar value.
1710
+ #
1711
+ # @return [Integer]
1712
+ def search_sorted(element, side: "any")
1713
+ if element.is_a?(Integer) || element.is_a?(Float)
1714
+ return Polars.select(Polars.lit(self).search_sorted(element, side: side)).item
1715
+ end
1716
+ element = Series.new(element)
1717
+ Polars.select(Polars.lit(self).search_sorted(element, side: side)).to_series
1718
+ end
1719
+
1720
+ # Get unique elements in series.
1721
+ #
1722
+ # @param maintain_order [Boolean]
1723
+ # Maintain order of data. This requires more work.
1724
+ #
1725
+ # @return [Series]
1726
+ #
1727
+ # @example
1728
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1729
+ # s.unique.sort
1730
+ # # =>
1731
+ # # shape: (3,)
1732
+ # # Series: 'a' [i64]
1733
+ # # [
1734
+ # # 1
1735
+ # # 2
1736
+ # # 3
1737
+ # # ]
1738
+ def unique(maintain_order: false)
1739
+ super
1740
+ end
1741
+ alias_method :uniq, :unique
1742
+
1743
+ # Take values by index.
1744
+ #
1745
+ # @param indices [Array]
1746
+ # Index location used for selection.
1747
+ #
1748
+ # @return [Series]
1749
+ #
1750
+ # @example
1751
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1752
+ # s.take([1, 3])
1753
+ # # =>
1754
+ # # shape: (2,)
1755
+ # # Series: 'a' [i64]
1756
+ # # [
1757
+ # # 2
1758
+ # # 4
1759
+ # # ]
1760
+ def take(indices)
1761
+ to_frame.select(Polars.col(name).take(indices)).to_series
1762
+ end
1763
+
1764
+ # Count the null values in this Series.
1765
+ #
1766
+ # @return [Integer]
1767
+ def null_count
1768
+ _s.null_count
1769
+ end
1770
+
1771
+ # Return `true` if the Series has a validity bitmask.
1772
+ #
1773
+ # If there is none, it means that there are no null values.
1774
+ # Use this to swiftly assert a Series does not have null values.
1775
+ #
1776
+ # @return [Boolean]
1777
+ def has_validity
1778
+ _s.has_validity
1779
+ end
1780
+
1781
+ # Check if the Series is empty.
1782
+ #
1783
+ # @return [Boolean]
1784
+ #
1785
+ # @example
1786
+ # s = Polars::Series.new("a", [])
1787
+ # s.is_empty
1788
+ # # => true
1789
+ def is_empty
1790
+ len == 0
1791
+ end
1792
+ alias_method :empty?, :is_empty
1793
+
1794
+ # Returns a boolean Series indicating which values are null.
1795
+ #
1796
+ # @return [Series]
1797
+ #
1798
+ # @example
1799
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1800
+ # s.is_null
1801
+ # # =>
1802
+ # # shape: (4,)
1803
+ # # Series: 'a' [bool]
1804
+ # # [
1805
+ # # false
1806
+ # # false
1807
+ # # false
1808
+ # # true
1809
+ # # ]
1810
+ def is_null
1811
+ super
1812
+ end
1813
+
1814
+ # Returns a boolean Series indicating which values are not null.
1815
+ #
1816
+ # @return [Series]
1817
+ #
1818
+ # @example
1819
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1820
+ # s.is_not_null
1821
+ # # =>
1822
+ # # shape: (4,)
1823
+ # # Series: 'a' [bool]
1824
+ # # [
1825
+ # # true
1826
+ # # true
1827
+ # # true
1828
+ # # false
1829
+ # # ]
1830
+ def is_not_null
1831
+ super
1832
+ end
1833
+
1834
+ # Returns a boolean Series indicating which values are finite.
1835
+ #
1836
+ # @return [Series]
1837
+ #
1838
+ # @example
1839
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1840
+ # s.is_finite
1841
+ # # =>
1842
+ # # shape: (3,)
1843
+ # # Series: 'a' [bool]
1844
+ # # [
1845
+ # # true
1846
+ # # true
1847
+ # # false
1848
+ # # ]
1849
+ def is_finite
1850
+ super
1851
+ end
1852
+
1853
+ # Returns a boolean Series indicating which values are infinite.
1854
+ #
1855
+ # @return [Series]
1856
+ #
1857
+ # @example
1858
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1859
+ # s.is_infinite
1860
+ # # =>
1861
+ # # shape: (3,)
1862
+ # # Series: 'a' [bool]
1863
+ # # [
1864
+ # # false
1865
+ # # false
1866
+ # # true
1867
+ # # ]
1868
+ def is_infinite
1869
+ super
1870
+ end
1871
+
1872
+ # Returns a boolean Series indicating which values are NaN.
1873
+ #
1874
+ # @return [Series]
1875
+ #
1876
+ # @example
1877
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1878
+ # s.is_nan
1879
+ # # =>
1880
+ # # shape: (4,)
1881
+ # # Series: 'a' [bool]
1882
+ # # [
1883
+ # # false
1884
+ # # false
1885
+ # # false
1886
+ # # true
1887
+ # # ]
1888
+ def is_nan
1889
+ super
1890
+ end
1891
+
1892
+ # Returns a boolean Series indicating which values are not NaN.
1893
+ #
1894
+ # @return [Series]
1895
+ #
1896
+ # @example
1897
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1898
+ # s.is_not_nan
1899
+ # # =>
1900
+ # # shape: (4,)
1901
+ # # Series: 'a' [bool]
1902
+ # # [
1903
+ # # true
1904
+ # # true
1905
+ # # true
1906
+ # # false
1907
+ # # ]
1908
+ def is_not_nan
1909
+ super
1910
+ end
1911
+
1912
+ # Check if elements of this Series are in the other Series.
1913
+ #
1914
+ # @return [Series]
1915
+ #
1916
+ # @example
1917
+ # s = Polars::Series.new("a", [1, 2, 3])
1918
+ # s2 = Polars::Series.new("b", [2, 4])
1919
+ # s2.is_in(s)
1920
+ # # =>
1921
+ # # shape: (2,)
1922
+ # # Series: 'b' [bool]
1923
+ # # [
1924
+ # # true
1925
+ # # false
1926
+ # # ]
1927
+ #
1928
+ # @example
1929
+ # sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
1930
+ # # =>
1931
+ # # shape: (3,)
1932
+ # # Series: 'sets' [list[i64]]
1933
+ # # [
1934
+ # # [1, 2, 3]
1935
+ # # [1, 2]
1936
+ # # [9, 10]
1937
+ # # ]
1938
+ #
1939
+ # @example
1940
+ # optional_members = Polars::Series.new("optional_members", [1, 2, 3])
1941
+ # # =>
1942
+ # # shape: (3,)
1943
+ # # Series: 'optional_members' [i64]
1944
+ # # [
1945
+ # # 1
1946
+ # # 2
1947
+ # # 3
1948
+ # # ]
1949
+ #
1950
+ # @example
1951
+ # optional_members.is_in(sets)
1952
+ # # =>
1953
+ # # shape: (3,)
1954
+ # # Series: 'optional_members' [bool]
1955
+ # # [
1956
+ # # true
1957
+ # # true
1958
+ # # false
1959
+ # # ]
1960
+ def is_in(other)
1961
+ super
1962
+ end
1963
+ alias_method :in?, :is_in
1964
+
1965
+ # Get index values where Boolean Series evaluate `true`.
1966
+ #
1967
+ # @return [Series]
1968
+ #
1969
+ # @example
1970
+ # s = Polars::Series.new("a", [1, 2, 3])
1971
+ # (s == 2).arg_true
1972
+ # # =>
1973
+ # # shape: (1,)
1974
+ # # Series: 'a' [u32]
1975
+ # # [
1976
+ # # 1
1977
+ # # ]
1978
+ def arg_true
1979
+ Polars.arg_where(self, eager: true)
1980
+ end
1981
+
1982
+ # Get mask of all unique values.
1983
+ #
1984
+ # @return [Series]
1985
+ #
1986
+ # @example
1987
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1988
+ # s.is_unique
1989
+ # # =>
1990
+ # # shape: (4,)
1991
+ # # Series: 'a' [bool]
1992
+ # # [
1993
+ # # true
1994
+ # # false
1995
+ # # false
1996
+ # # true
1997
+ # # ]
1998
+ def is_unique
1999
+ super
2000
+ end
2001
+
2002
+ # Get a mask of the first unique value.
2003
+ #
2004
+ # @return [Series]
2005
+ def is_first
2006
+ super
2007
+ end
2008
+
2009
+ # Get mask of all duplicated values.
2010
+ #
2011
+ # @return [Series]
2012
+ #
2013
+ # @example
2014
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2015
+ # s.is_duplicated
2016
+ # # =>
2017
+ # # shape: (4,)
2018
+ # # Series: 'a' [bool]
2019
+ # # [
2020
+ # # false
2021
+ # # true
2022
+ # # true
2023
+ # # false
2024
+ # # ]
2025
+ def is_duplicated
2026
+ super
2027
+ end
2028
+
2029
+ # Explode a list or utf8 Series.
2030
+ #
2031
+ # This means that every item is expanded to a new row.
2032
+ #
2033
+ # @return [Series]
2034
+ #
2035
+ # @example
2036
+ # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
2037
+ # s.explode
2038
+ # # =>
2039
+ # # shape: (6,)
2040
+ # # Series: 'a' [i64]
2041
+ # # [
2042
+ # # 1
2043
+ # # 2
2044
+ # # 3
2045
+ # # 4
2046
+ # # 9
2047
+ # # 10
2048
+ # # ]
2049
+ def explode
2050
+ super
2051
+ end
2052
+
2053
+ # Check if series is equal with another Series.
2054
+ #
2055
+ # @param other [Series]
2056
+ # Series to compare with.
2057
+ # @param null_equal [Boolean]
2058
+ # Consider null values as equal.
2059
+ # @param strict [Boolean]
2060
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
2061
+ # `:i64` will return `false`.
2062
+ #
2063
+ # @return [Boolean]
2064
+ #
2065
+ # @example
2066
+ # s = Polars::Series.new("a", [1, 2, 3])
2067
+ # s2 = Polars::Series.new("b", [4, 5, 6])
2068
+ # s.equals(s)
2069
+ # # => true
2070
+ # s.equals(s2)
2071
+ # # => false
2072
+ def equals(other, null_equal: false, strict: false)
2073
+ _s.equals(other._s, null_equal, strict)
2074
+ end
2075
+ alias_method :series_equal, :equals
2076
+
2077
+ # Return the number of elements in the Series.
2078
+ #
2079
+ # @return [Integer]
2080
+ #
2081
+ # @example
2082
+ # s = Polars::Series.new("a", [1, 2, nil])
2083
+ # s.count
2084
+ # # => 2
2085
+ def count
2086
+ len - null_count
2087
+ end
2088
+
2089
+ # Return the number of elements in the Series.
2090
+ #
2091
+ # @return [Integer]
2092
+ #
2093
+ # @example
2094
+ # s = Polars::Series.new("a", [1, 2, nil])
2095
+ # s.len
2096
+ # # => 3
2097
+ def len
2098
+ _s.len
2099
+ end
2100
+ alias_method :length, :len
2101
+ alias_method :size, :len
2102
+
2103
+ # Cast between data types.
2104
+ #
2105
+ # @param dtype [Symbol]
2106
+ # DataType to cast to
2107
+ # @param strict [Boolean]
2108
+ # Throw an error if a cast could not be done for instance due to an overflow
2109
+ #
2110
+ # @return [Series]
2111
+ #
2112
+ # @example
2113
+ # s = Polars::Series.new("a", [true, false, true])
2114
+ # s.cast(:u32)
2115
+ # # =>
2116
+ # # shape: (3,)
2117
+ # # Series: 'a' [u32]
2118
+ # # [
2119
+ # # 1
2120
+ # # 0
2121
+ # # 1
2122
+ # # ]
2123
+ def cast(dtype, strict: true)
2124
+ super
2125
+ end
2126
+
2127
+ # Cast to physical representation of the logical dtype.
2128
+ #
2129
+ # - `:date` -> `:i32`
2130
+ # - `:datetime` -> `:i64`
2131
+ # - `:time` -> `:i64`
2132
+ # - `:duration` -> `:i64`
2133
+ # - `:cat` -> `:u32`
2134
+ # - other data types will be left unchanged.
2135
+ #
2136
+ # @return [Series]
2137
+ #
2138
+ # @example
2139
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
2140
+ # s.cast(:cat).to_physical
2141
+ # # =>
2142
+ # # shape: (4,)
2143
+ # # Series: 'values' [u32]
2144
+ # # [
2145
+ # # 0
2146
+ # # null
2147
+ # # 1
2148
+ # # 0
2149
+ # # ]
2150
+ def to_physical
2151
+ super
2152
+ end
2153
+
2154
+ # Convert this Series to a Ruby Array. This operation clones data.
2155
+ #
2156
+ # @return [Array]
2157
+ #
2158
+ # @example
2159
+ # s = Polars::Series.new("a", [1, 2, 3])
2160
+ # s.to_a
2161
+ # # => [1, 2, 3]
2162
+ def to_a
2163
+ _s.to_a
2164
+ end
2165
+
2166
+ # Create a single chunk of memory for this Series.
2167
+ #
2168
+ # @param in_place [Boolean]
2169
+ # In place or not.
2170
+ #
2171
+ # @return [Series]
2172
+ def rechunk(in_place: false)
2173
+ opt_s = _s.rechunk(in_place)
2174
+ in_place ? self : Utils.wrap_s(opt_s)
2175
+ end
2176
+
2177
+ # Return Series in reverse order.
2178
+ #
2179
+ # @return [Series]
2180
+ #
2181
+ # @example
2182
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
2183
+ # s.reverse
2184
+ # # =>
2185
+ # # shape: (3,)
2186
+ # # Series: 'a' [i8]
2187
+ # # [
2188
+ # # 3
2189
+ # # 2
2190
+ # # 1
2191
+ # # ]
2192
+ def reverse
2193
+ super
2194
+ end
2195
+
2196
+ # Check if this Series datatype is numeric.
2197
+ #
2198
+ # @return [Boolean]
2199
+ #
2200
+ # @example
2201
+ # s = Polars::Series.new("a", [1, 2, 3])
2202
+ # s.is_numeric
2203
+ # # => true
2204
+ def is_numeric
2205
+ [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
2206
+ end
2207
+ alias_method :numeric?, :is_numeric
2208
+
2209
+ # Check if this Series datatype is datelike.
2210
+ #
2211
+ # @return [Boolean]
2212
+ #
2213
+ # @example
2214
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
2215
+ # s.is_datelike
2216
+ # # => true
2217
+ def is_datelike
2218
+ [Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
2219
+ end
2220
+ alias_method :datelike?, :is_datelike
2221
+ alias_method :is_temporal, :is_datelike
2222
+ alias_method :temporal?, :is_datelike
2223
+
2224
+ # Check if this Series has floating point numbers.
2225
+ #
2226
+ # @return [Boolean]
2227
+ #
2228
+ # @example
2229
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
2230
+ # s.is_float
2231
+ # # => true
2232
+ def is_float
2233
+ [Float32, Float64].include?(dtype)
2234
+ end
2235
+ alias_method :float?, :is_float
2236
+
2237
+ # Check if this Series is a Boolean.
2238
+ #
2239
+ # @return [Boolean]
2240
+ #
2241
+ # @example
2242
+ # s = Polars::Series.new("a", [true, false, true])
2243
+ # s.is_boolean
2244
+ # # => true
2245
+ def is_boolean
2246
+ dtype == Boolean
2247
+ end
2248
+ alias_method :boolean?, :is_boolean
2249
+ alias_method :is_bool, :is_boolean
2250
+ alias_method :bool?, :is_boolean
2251
+
2252
+ # Check if this Series datatype is a Utf8.
2253
+ #
2254
+ # @return [Boolean]
2255
+ #
2256
+ # @example
2257
+ # s = Polars::Series.new("x", ["a", "b", "c"])
2258
+ # s.is_utf8
2259
+ # # => true
2260
+ def is_utf8
2261
+ dtype == String
2262
+ end
2263
+ alias_method :utf8?, :is_utf8
2264
+
2265
+ # def view
2266
+ # end
2267
+
2268
+ # Convert this Series to a Numo array. This operation clones data but is completely safe.
2269
+ #
2270
+ # @return [Numo::NArray]
2271
+ #
2272
+ # @example
2273
+ # s = Polars::Series.new("a", [1, 2, 3])
2274
+ # s.to_numo
2275
+ # # =>
2276
+ # # Numo::Int64#shape=[3]
2277
+ # # [1, 2, 3]
2278
+ def to_numo
2279
+ if !has_validity
2280
+ if is_datelike
2281
+ Numo::RObject.cast(to_a)
2282
+ elsif is_numeric
2283
+ # TODO make more efficient
2284
+ {
2285
+ UInt8 => Numo::UInt8,
2286
+ UInt16 => Numo::UInt16,
2287
+ UInt32 => Numo::UInt32,
2288
+ UInt64 => Numo::UInt64,
2289
+ Int8 => Numo::Int8,
2290
+ Int16 => Numo::Int16,
2291
+ Int32 => Numo::Int32,
2292
+ Int64 => Numo::Int64,
2293
+ Float32 => Numo::SFloat,
2294
+ Float64 => Numo::DFloat
2295
+ }.fetch(dtype.class).cast(to_a)
2296
+ elsif is_boolean
2297
+ Numo::Bit.cast(to_a)
2298
+ else
2299
+ _s.to_numo
2300
+ end
2301
+ elsif is_datelike
2302
+ Numo::RObject.cast(to_a)
2303
+ else
2304
+ _s.to_numo
2305
+ end
2306
+ end
2307
+
2308
+ # Set masked values.
2309
+ #
2310
+ # @param filter [Series]
2311
+ # Boolean mask.
2312
+ # @param value [Object]
2313
+ # Value with which to replace the masked values.
2314
+ #
2315
+ # @return [Series]
2316
+ #
2317
+ # @note
2318
+ # Use of this function is frequently an anti-pattern, as it can
2319
+ # block optimization (predicate pushdown, etc). Consider using
2320
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
2321
+ #
2322
+ # @example
2323
+ # s = Polars::Series.new("a", [1, 2, 3])
2324
+ # s.set(s == 2, 10)
2325
+ # # =>
2326
+ # # shape: (3,)
2327
+ # # Series: 'a' [i64]
2328
+ # # [
2329
+ # # 1
2330
+ # # 10
2331
+ # # 3
2332
+ # # ]
2333
+ def set(filter, value)
2334
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype.class)}", filter._s, value))
2335
+ end
2336
+
2337
+ # Set values at the index locations.
2338
+ #
2339
+ # @param idx [Object]
2340
+ # Integers representing the index locations.
2341
+ # @param value [Object]
2342
+ # Replacement values.
2343
+ #
2344
+ # @return [Series]
2345
+ #
2346
+ # @example
2347
+ # s = Polars::Series.new("a", [1, 2, 3])
2348
+ # s.set_at_idx(1, 10)
2349
+ # # =>
2350
+ # # shape: (3,)
2351
+ # # Series: 'a' [i64]
2352
+ # # [
2353
+ # # 1
2354
+ # # 10
2355
+ # # 3
2356
+ # # ]
2357
+ def scatter(idx, value)
2358
+ if idx.is_a?(Integer)
2359
+ idx = [idx]
2360
+ end
2361
+ if idx.length == 0
2362
+ return self
2363
+ end
2364
+
2365
+ idx = Series.new("", idx)
2366
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
2367
+ value = Series.new("", [value])
2368
+
2369
+ # if we need to set more than a single value, we extend it
2370
+ if idx.length > 0
2371
+ value = value.extend_constant(value[0], idx.length - 1)
2372
+ end
2373
+ elsif !value.is_a?(Series)
2374
+ value = Series.new("", value)
2375
+ end
2376
+ _s.scatter(idx._s, value._s)
2377
+ self
2378
+ end
2379
+ alias_method :set_at_idx, :scatter
2380
+
2381
+ # Create an empty copy of the current Series.
2382
+ #
2383
+ # The copy has identical name/dtype but no data.
2384
+ #
2385
+ # @return [Series]
2386
+ #
2387
+ # @example
2388
+ # s = Polars::Series.new("a", [nil, true, false])
2389
+ # s.cleared
2390
+ # # =>
2391
+ # # shape: (0,)
2392
+ # # Series: 'a' [bool]
2393
+ # # [
2394
+ # # ]
2395
+ def cleared
2396
+ len > 0 ? limit(0) : clone
2397
+ end
2398
+
2399
+ # clone handled by initialize_copy
2400
+
2401
+ # Fill floating point NaN value with a fill value.
2402
+ #
2403
+ # @param fill_value [Object]
2404
+ # Value used to fill nan values.
2405
+ #
2406
+ # @return [Series]
2407
+ #
2408
+ # @example
2409
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
2410
+ # s.fill_nan(0)
2411
+ # # =>
2412
+ # # shape: (4,)
2413
+ # # Series: 'a' [f64]
2414
+ # # [
2415
+ # # 1.0
2416
+ # # 2.0
2417
+ # # 3.0
2418
+ # # 0.0
2419
+ # # ]
2420
+ def fill_nan(fill_value)
2421
+ super
2422
+ end
2423
+
2424
+ # Fill null values using the specified value or strategy.
2425
+ #
2426
+ # @param value [Object]
2427
+ # Value used to fill null values.
2428
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2429
+ # Strategy used to fill null values.
2430
+ # @param limit
2431
+ # Number of consecutive null values to fill when using the "forward" or
2432
+ # "backward" strategy.
2433
+ #
2434
+ # @return [Series]
2435
+ #
2436
+ # @example
2437
+ # s = Polars::Series.new("a", [1, 2, 3, nil])
2438
+ # s.fill_null(strategy: "forward")
2439
+ # # =>
2440
+ # # shape: (4,)
2441
+ # # Series: 'a' [i64]
2442
+ # # [
2443
+ # # 1
2444
+ # # 2
2445
+ # # 3
2446
+ # # 3
2447
+ # # ]
2448
+ #
2449
+ # @example
2450
+ # s.fill_null(strategy: "min")
2451
+ # # =>
2452
+ # # shape: (4,)
2453
+ # # Series: 'a' [i64]
2454
+ # # [
2455
+ # # 1
2456
+ # # 2
2457
+ # # 3
2458
+ # # 1
2459
+ # # ]
2460
+ #
2461
+ # @example
2462
+ # s = Polars::Series.new("b", ["x", nil, "z"])
2463
+ # s.fill_null(Polars.lit(""))
2464
+ # # =>
2465
+ # # shape: (3,)
2466
+ # # Series: 'b' [str]
2467
+ # # [
2468
+ # # "x"
2469
+ # # ""
2470
+ # # "z"
2471
+ # # ]
2472
+ def fill_null(value = nil, strategy: nil, limit: nil)
2473
+ super
2474
+ end
2475
+
2476
+ # Rounds down to the nearest integer value.
2477
+ #
2478
+ # Only works on floating point Series.
2479
+ #
2480
+ # @return [Series]
2481
+ #
2482
+ # @example
2483
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2484
+ # s.floor
2485
+ # # =>
2486
+ # # shape: (3,)
2487
+ # # Series: 'a' [f64]
2488
+ # # [
2489
+ # # 1.0
2490
+ # # 2.0
2491
+ # # 3.0
2492
+ # # ]
2493
+ def floor
2494
+ Utils.wrap_s(_s.floor)
2495
+ end
2496
+
2497
+ # Rounds up to the nearest integer value.
2498
+ #
2499
+ # Only works on floating point Series.
2500
+ #
2501
+ # @return [Series]
2502
+ #
2503
+ # @example
2504
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2505
+ # s.ceil
2506
+ # # =>
2507
+ # # shape: (3,)
2508
+ # # Series: 'a' [f64]
2509
+ # # [
2510
+ # # 2.0
2511
+ # # 3.0
2512
+ # # 4.0
2513
+ # # ]
2514
+ def ceil
2515
+ super
2516
+ end
2517
+
2518
+ # Round underlying floating point data by `decimals` digits.
2519
+ #
2520
+ # @param decimals [Integer]
2521
+ # number of decimals to round by.
2522
+ #
2523
+ # @return [Series]
2524
+ #
2525
+ # @example
2526
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2527
+ # s.round(2)
2528
+ # # =>
2529
+ # # shape: (3,)
2530
+ # # Series: 'a' [f64]
2531
+ # # [
2532
+ # # 1.12
2533
+ # # 2.57
2534
+ # # 3.9
2535
+ # # ]
2536
+ def round(decimals = 0)
2537
+ super
2538
+ end
2539
+
2540
+ # Compute the dot/inner product between two Series.
2541
+ #
2542
+ # @param other [Object]
2543
+ # Series (or array) to compute dot product with.
2544
+ #
2545
+ # @return [Numeric]
2546
+ #
2547
+ # @example
2548
+ # s = Polars::Series.new("a", [1, 2, 3])
2549
+ # s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
2550
+ # s.dot(s2)
2551
+ # # => 32.0
2552
+ def dot(other)
2553
+ if !other.is_a?(Series)
2554
+ other = Series.new(other)
2555
+ end
2556
+ if len != other.len
2557
+ n, m = len, other.len
2558
+ raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
2559
+ end
2560
+ _s.dot(other._s)
2561
+ end
2562
+
2563
+ # Compute the most occurring value(s).
2564
+ #
2565
+ # Can return multiple Values.
2566
+ #
2567
+ # @return [Series]
2568
+ #
2569
+ # @example
2570
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2571
+ # s.mode
2572
+ # # =>
2573
+ # # shape: (1,)
2574
+ # # Series: 'a' [i64]
2575
+ # # [
2576
+ # # 2
2577
+ # # ]
2578
+ def mode
2579
+ super
2580
+ end
2581
+
2582
+ # Compute the element-wise indication of the sign.
2583
+ #
2584
+ # @return [Series]
2585
+ #
2586
+ # @example
2587
+ # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
2588
+ # s.sign
2589
+ # # =>
2590
+ # # shape: (5,)
2591
+ # # Series: 'a' [i64]
2592
+ # # [
2593
+ # # -1
2594
+ # # 0
2595
+ # # 0
2596
+ # # 1
2597
+ # # null
2598
+ # # ]
2599
+ def sign
2600
+ super
2601
+ end
2602
+
2603
+ # Compute the element-wise value for the sine.
2604
+ #
2605
+ # @return [Series]
2606
+ #
2607
+ # @example
2608
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2609
+ # s.sin
2610
+ # # =>
2611
+ # # shape: (3,)
2612
+ # # Series: 'a' [f64]
2613
+ # # [
2614
+ # # 0.0
2615
+ # # 1.0
2616
+ # # 1.2246e-16
2617
+ # # ]
2618
+ def sin
2619
+ super
2620
+ end
2621
+
2622
+ # Compute the element-wise value for the cosine.
2623
+ #
2624
+ # @return [Series]
2625
+ #
2626
+ # @example
2627
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2628
+ # s.cos
2629
+ # # =>
2630
+ # # shape: (3,)
2631
+ # # Series: 'a' [f64]
2632
+ # # [
2633
+ # # 1.0
2634
+ # # 6.1232e-17
2635
+ # # -1.0
2636
+ # # ]
2637
+ def cos
2638
+ super
2639
+ end
2640
+
2641
+ # Compute the element-wise value for the tangent.
2642
+ #
2643
+ # @return [Series]
2644
+ #
2645
+ # @example
2646
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2647
+ # s.tan
2648
+ # # =>
2649
+ # # shape: (3,)
2650
+ # # Series: 'a' [f64]
2651
+ # # [
2652
+ # # 0.0
2653
+ # # 1.6331e16
2654
+ # # -1.2246e-16
2655
+ # # ]
2656
+ def tan
2657
+ super
2658
+ end
2659
+
2660
+ # Compute the element-wise value for the inverse sine.
2661
+ #
2662
+ # @return [Series]
2663
+ #
2664
+ # @example
2665
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2666
+ # s.arcsin
2667
+ # # =>
2668
+ # # shape: (3,)
2669
+ # # Series: 'a' [f64]
2670
+ # # [
2671
+ # # 1.570796
2672
+ # # 0.0
2673
+ # # -1.570796
2674
+ # # ]
2675
+ def arcsin
2676
+ super
2677
+ end
2678
+ alias_method :asin, :arcsin
2679
+
2680
+ # Compute the element-wise value for the inverse cosine.
2681
+ #
2682
+ # @return [Series]
2683
+ #
2684
+ # @example
2685
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2686
+ # s.arccos
2687
+ # # =>
2688
+ # # shape: (3,)
2689
+ # # Series: 'a' [f64]
2690
+ # # [
2691
+ # # 0.0
2692
+ # # 1.570796
2693
+ # # 3.141593
2694
+ # # ]
2695
+ def arccos
2696
+ super
2697
+ end
2698
+ alias_method :acos, :arccos
2699
+
2700
+ # Compute the element-wise value for the inverse tangent.
2701
+ #
2702
+ # @return [Series]
2703
+ #
2704
+ # @example
2705
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2706
+ # s.arctan
2707
+ # # =>
2708
+ # # shape: (3,)
2709
+ # # Series: 'a' [f64]
2710
+ # # [
2711
+ # # 0.785398
2712
+ # # 0.0
2713
+ # # -0.785398
2714
+ # # ]
2715
+ def arctan
2716
+ super
2717
+ end
2718
+ alias_method :atan, :arctan
2719
+
2720
+ # Compute the element-wise value for the inverse hyperbolic sine.
2721
+ #
2722
+ # @return [Series]
2723
+ #
2724
+ # @example
2725
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2726
+ # s.arcsinh
2727
+ # # =>
2728
+ # # shape: (3,)
2729
+ # # Series: 'a' [f64]
2730
+ # # [
2731
+ # # 0.881374
2732
+ # # 0.0
2733
+ # # -0.881374
2734
+ # # ]
2735
+ def arcsinh
2736
+ super
2737
+ end
2738
+ alias_method :asinh, :arcsinh
2739
+
2740
+ # Compute the element-wise value for the inverse hyperbolic cosine.
2741
+ #
2742
+ # @return [Series]
2743
+ #
2744
+ # @example
2745
+ # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
2746
+ # s.arccosh
2747
+ # # =>
2748
+ # # shape: (4,)
2749
+ # # Series: 'a' [f64]
2750
+ # # [
2751
+ # # 2.292432
2752
+ # # 0.0
2753
+ # # NaN
2754
+ # # NaN
2755
+ # # ]
2756
+ def arccosh
2757
+ super
2758
+ end
2759
+ alias_method :acosh, :arccosh
2760
+
2761
+ # Compute the element-wise value for the inverse hyperbolic tangent.
2762
+ #
2763
+ # @return [Series]
2764
+ #
2765
+ # @example
2766
+ # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
2767
+ # s.arctanh
2768
+ # # =>
2769
+ # # shape: (7,)
2770
+ # # Series: 'a' [f64]
2771
+ # # [
2772
+ # # NaN
2773
+ # # inf
2774
+ # # 0.549306
2775
+ # # 0.0
2776
+ # # -0.549306
2777
+ # # -inf
2778
+ # # NaN
2779
+ # # ]
2780
+ def arctanh
2781
+ super
2782
+ end
2783
+ alias_method :atanh, :arctanh
2784
+
2785
+ # Compute the element-wise value for the hyperbolic sine.
2786
+ #
2787
+ # @return [Series]
2788
+ #
2789
+ # @example
2790
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2791
+ # s.sinh
2792
+ # # =>
2793
+ # # shape: (3,)
2794
+ # # Series: 'a' [f64]
2795
+ # # [
2796
+ # # 1.175201
2797
+ # # 0.0
2798
+ # # -1.175201
2799
+ # # ]
2800
+ def sinh
2801
+ super
2802
+ end
2803
+
2804
+ # Compute the element-wise value for the hyperbolic cosine.
2805
+ #
2806
+ # @return [Series]
2807
+ #
2808
+ # @example
2809
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2810
+ # s.cosh
2811
+ # # =>
2812
+ # # shape: (3,)
2813
+ # # Series: 'a' [f64]
2814
+ # # [
2815
+ # # 1.543081
2816
+ # # 1.0
2817
+ # # 1.543081
2818
+ # # ]
2819
+ def cosh
2820
+ super
2821
+ end
2822
+
2823
+ # Compute the element-wise value for the hyperbolic tangent.
2824
+ #
2825
+ # @return [Series]
2826
+ #
2827
+ # @example
2828
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2829
+ # s.tanh
2830
+ # # =>
2831
+ # # shape: (3,)
2832
+ # # Series: 'a' [f64]
2833
+ # # [
2834
+ # # 0.761594
2835
+ # # 0.0
2836
+ # # -0.761594
2837
+ # # ]
2838
+ def tanh
2839
+ super
2840
+ end
2841
+
2842
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
2843
+ # return a new Series.
2844
+ #
2845
+ # If the function returns another datatype, the return_dtype arg should be set,
2846
+ # otherwise the method will fail.
2847
+ #
2848
+ # @param return_dtype [Symbol]
2849
+ # Output datatype. If none is given, the same datatype as this Series will be
2850
+ # used.
2851
+ # @param skip_nulls [Boolean]
2852
+ # Nulls will be skipped and not passed to the Ruby function.
2853
+ # This is faster because Ruby can be skipped and because we call
2854
+ # more specialized functions.
2855
+ #
2856
+ # @return [Series]
2857
+ #
2858
+ # @example
2859
+ # s = Polars::Series.new("a", [1, 2, 3])
2860
+ # s.map_elements { |x| x + 10 }
2861
+ # # =>
2862
+ # # shape: (3,)
2863
+ # # Series: 'a' [i64]
2864
+ # # [
2865
+ # # 11
2866
+ # # 12
2867
+ # # 13
2868
+ # # ]
2869
+ def map_elements(return_dtype: nil, skip_nulls: true, &func)
2870
+ if return_dtype.nil?
2871
+ pl_return_dtype = nil
2872
+ else
2873
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
2874
+ end
2875
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2876
+ end
2877
+ alias_method :map, :map_elements
2878
+ alias_method :apply, :map_elements
2879
+
2880
+ # Shift the values by a given period.
2881
+ #
2882
+ # @param periods [Integer]
2883
+ # Number of places to shift (may be negative).
2884
+ #
2885
+ # @return [Series]
2886
+ #
2887
+ # @example
2888
+ # s = Polars::Series.new("a", [1, 2, 3])
2889
+ # s.shift(1)
2890
+ # # =>
2891
+ # # shape: (3,)
2892
+ # # Series: 'a' [i64]
2893
+ # # [
2894
+ # # null
2895
+ # # 1
2896
+ # # 2
2897
+ # # ]
2898
+ #
2899
+ # @example
2900
+ # s.shift(-1)
2901
+ # # =>
2902
+ # # shape: (3,)
2903
+ # # Series: 'a' [i64]
2904
+ # # [
2905
+ # # 2
2906
+ # # 3
2907
+ # # null
2908
+ # # ]
2909
+ def shift(periods = 1)
2910
+ super
2911
+ end
2912
+
2913
+ # Shift the values by a given period and fill the resulting null values.
2914
+ #
2915
+ # @param periods [Integer]
2916
+ # Number of places to shift (may be negative).
2917
+ # @param fill_value [Object]
2918
+ # Fill None values with the result of this expression.
2919
+ #
2920
+ # @return [Series]
2921
+ def shift_and_fill(periods, fill_value)
2922
+ super
2923
+ end
2924
+
2925
+ # Take values from self or other based on the given mask.
2926
+ #
2927
+ # Where mask evaluates true, take values from self. Where mask evaluates false,
2928
+ # take values from other.
2929
+ #
2930
+ # @param mask [Series]
2931
+ # Boolean Series.
2932
+ # @param other [Series]
2933
+ # Series of same type.
2934
+ #
2935
+ # @return [Series]
2936
+ #
2937
+ # @example
2938
+ # s1 = Polars::Series.new([1, 2, 3, 4, 5])
2939
+ # s2 = Polars::Series.new([5, 4, 3, 2, 1])
2940
+ # s1.zip_with(s1 < s2, s2)
2941
+ # # =>
2942
+ # # shape: (5,)
2943
+ # # Series: '' [i64]
2944
+ # # [
2945
+ # # 1
2946
+ # # 2
2947
+ # # 3
2948
+ # # 2
2949
+ # # 1
2950
+ # # ]
2951
+ #
2952
+ # @example
2953
+ # mask = Polars::Series.new([true, false, true, false, true])
2954
+ # s1.zip_with(mask, s2)
2955
+ # # =>
2956
+ # # shape: (5,)
2957
+ # # Series: '' [i64]
2958
+ # # [
2959
+ # # 1
2960
+ # # 4
2961
+ # # 3
2962
+ # # 2
2963
+ # # 5
2964
+ # # ]
2965
+ def zip_with(mask, other)
2966
+ Utils.wrap_s(_s.zip_with(mask._s, other._s))
2967
+ end
2968
+
2969
+ # Apply a rolling min (moving min) over the values in this array.
2970
+ #
2971
+ # A window of length `window_size` will traverse the array. The values that fill
2972
+ # this window will (optionally) be multiplied with the weights given by the
2973
+ # `weight` vector. The resulting values will be aggregated to their sum.
2974
+ #
2975
+ # @param window_size [Integer]
2976
+ # The length of the window.
2977
+ # @param weights [Array]
2978
+ # An optional slice with the same length as the window that will be multiplied
2979
+ # elementwise with the values in the window.
2980
+ # @param min_periods [Integer]
2981
+ # The number of values in the window that should be non-null before computing
2982
+ # a result. If None, it will be set equal to window size.
2983
+ # @param center [Boolean]
2984
+ # Set the labels at the center of the window
2985
+ #
2986
+ # @return [Series]
2987
+ #
2988
+ # @example
2989
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2990
+ # s.rolling_min(3)
2991
+ # # =>
2992
+ # # shape: (5,)
2993
+ # # Series: 'a' [i64]
2994
+ # # [
2995
+ # # null
2996
+ # # null
2997
+ # # 100
2998
+ # # 200
2999
+ # # 300
3000
+ # # ]
3001
+ def rolling_min(
3002
+ window_size,
3003
+ weights: nil,
3004
+ min_periods: nil,
3005
+ center: false
3006
+ )
3007
+ to_frame
3008
+ .select(
3009
+ Polars.col(name).rolling_min(
3010
+ window_size,
3011
+ weights: weights,
3012
+ min_periods: min_periods,
3013
+ center: center
3014
+ )
3015
+ )
3016
+ .to_series
3017
+ end
3018
+
3019
+ # Apply a rolling max (moving max) over the values in this array.
3020
+ #
3021
+ # A window of length `window_size` will traverse the array. The values that fill
3022
+ # this window will (optionally) be multiplied with the weights given by the
3023
+ # `weight` vector. The resulting values will be aggregated to their sum.
3024
+ #
3025
+ # @param window_size [Integer]
3026
+ # The length of the window.
3027
+ # @param weights [Array]
3028
+ # An optional slice with the same length as the window that will be multiplied
3029
+ # elementwise with the values in the window.
3030
+ # @param min_periods [Integer]
3031
+ # The number of values in the window that should be non-null before computing
3032
+ # a result. If None, it will be set equal to window size.
3033
+ # @param center [Boolean]
3034
+ # Set the labels at the center of the window
3035
+ #
3036
+ # @return [Series]
3037
+ #
3038
+ # @example
3039
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3040
+ # s.rolling_max(2)
3041
+ # # =>
3042
+ # # shape: (5,)
3043
+ # # Series: 'a' [i64]
3044
+ # # [
3045
+ # # null
3046
+ # # 200
3047
+ # # 300
3048
+ # # 400
3049
+ # # 500
3050
+ # # ]
3051
+ def rolling_max(
3052
+ window_size,
3053
+ weights: nil,
3054
+ min_periods: nil,
3055
+ center: false
3056
+ )
3057
+ to_frame
3058
+ .select(
3059
+ Polars.col(name).rolling_max(
3060
+ window_size,
3061
+ weights: weights,
3062
+ min_periods: min_periods,
3063
+ center: center
3064
+ )
3065
+ )
3066
+ .to_series
3067
+ end
3068
+
3069
+ # Apply a rolling mean (moving mean) over the values in this array.
3070
+ #
3071
+ # A window of length `window_size` will traverse the array. The values that fill
3072
+ # this window will (optionally) be multiplied with the weights given by the
3073
+ # `weight` vector. The resulting values will be aggregated to their sum.
3074
+ #
3075
+ # @param window_size [Integer]
3076
+ # The length of the window.
3077
+ # @param weights [Array]
3078
+ # An optional slice with the same length as the window that will be multiplied
3079
+ # elementwise with the values in the window.
3080
+ # @param min_periods [Integer]
3081
+ # The number of values in the window that should be non-null before computing
3082
+ # a result. If None, it will be set equal to window size.
3083
+ # @param center [Boolean]
3084
+ # Set the labels at the center of the window
3085
+ #
3086
+ # @return [Series]
3087
+ #
3088
+ # @example
3089
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3090
+ # s.rolling_mean(2)
3091
+ # # =>
3092
+ # # shape: (5,)
3093
+ # # Series: 'a' [f64]
3094
+ # # [
3095
+ # # null
3096
+ # # 150.0
3097
+ # # 250.0
3098
+ # # 350.0
3099
+ # # 450.0
3100
+ # # ]
3101
+ def rolling_mean(
3102
+ window_size,
3103
+ weights: nil,
3104
+ min_periods: nil,
3105
+ center: false
3106
+ )
3107
+ to_frame
3108
+ .select(
3109
+ Polars.col(name).rolling_mean(
3110
+ window_size,
3111
+ weights: weights,
3112
+ min_periods: min_periods,
3113
+ center: center
3114
+ )
3115
+ )
3116
+ .to_series
3117
+ end
3118
+
3119
+ # Apply a rolling sum (moving sum) over the values in this array.
3120
+ #
3121
+ # A window of length `window_size` will traverse the array. The values that fill
3122
+ # this window will (optionally) be multiplied with the weights given by the
3123
+ # `weight` vector. The resulting values will be aggregated to their sum.
3124
+ #
3125
+ # @param window_size [Integer]
3126
+ # The length of the window.
3127
+ # @param weights [Array]
3128
+ # An optional slice with the same length as the window that will be multiplied
3129
+ # elementwise with the values in the window.
3130
+ # @param min_periods [Integer]
3131
+ # The number of values in the window that should be non-null before computing
3132
+ # a result. If None, it will be set equal to window size.
3133
+ # @param center [Boolean]
3134
+ # Set the labels at the center of the window
3135
+ #
3136
+ # @return [Series]
3137
+ #
3138
+ # @example
3139
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3140
+ # s.rolling_sum(2)
3141
+ # # =>
3142
+ # # shape: (5,)
3143
+ # # Series: 'a' [i64]
3144
+ # # [
3145
+ # # null
3146
+ # # 3
3147
+ # # 5
3148
+ # # 7
3149
+ # # 9
3150
+ # # ]
3151
+ def rolling_sum(
3152
+ window_size,
3153
+ weights: nil,
3154
+ min_periods: nil,
3155
+ center: false
3156
+ )
3157
+ to_frame
3158
+ .select(
3159
+ Polars.col(name).rolling_sum(
3160
+ window_size,
3161
+ weights: weights,
3162
+ min_periods: min_periods,
3163
+ center: center
3164
+ )
3165
+ )
3166
+ .to_series
3167
+ end
3168
+
3169
+ # Compute a rolling std dev.
3170
+ #
3171
+ # A window of length `window_size` will traverse the array. The values that fill
3172
+ # this window will (optionally) be multiplied with the weights given by the
3173
+ # `weight` vector. The resulting values will be aggregated to their sum.
3174
+ #
3175
+ # @param window_size [Integer]
3176
+ # The length of the window.
3177
+ # @param weights [Array]
3178
+ # An optional slice with the same length as the window that will be multiplied
3179
+ # elementwise with the values in the window.
3180
+ # @param min_periods [Integer]
3181
+ # The number of values in the window that should be non-null before computing
3182
+ # a result. If None, it will be set equal to window size.
3183
+ # @param center [Boolean]
3184
+ # Set the labels at the center of the window
3185
+ #
3186
+ # @return [Series]
3187
+ #
3188
+ # @example
3189
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3190
+ # s.rolling_std(3)
3191
+ # # =>
3192
+ # # shape: (6,)
3193
+ # # Series: 'a' [f64]
3194
+ # # [
3195
+ # # null
3196
+ # # null
3197
+ # # 1.0
3198
+ # # 1.0
3199
+ # # 1.527525
3200
+ # # 2.0
3201
+ # # ]
3202
+ def rolling_std(
3203
+ window_size,
3204
+ weights: nil,
3205
+ min_periods: nil,
3206
+ center: false,
3207
+ ddof: 1,
3208
+ warn_if_unsorted: true
3209
+ )
3210
+ to_frame
3211
+ .select(
3212
+ Polars.col(name).rolling_std(
3213
+ window_size,
3214
+ weights: weights,
3215
+ min_periods: min_periods,
3216
+ center: center,
3217
+ ddof: ddof,
3218
+ warn_if_unsorted: warn_if_unsorted
3219
+ )
3220
+ )
3221
+ .to_series
3222
+ end
3223
+
3224
+ # Compute a rolling variance.
3225
+ #
3226
+ # A window of length `window_size` will traverse the array. The values that fill
3227
+ # this window will (optionally) be multiplied with the weights given by the
3228
+ # `weight` vector. The resulting values will be aggregated to their sum.
3229
+ #
3230
+ # @param window_size [Integer]
3231
+ # The length of the window.
3232
+ # @param weights [Array]
3233
+ # An optional slice with the same length as the window that will be multiplied
3234
+ # elementwise with the values in the window.
3235
+ # @param min_periods [Integer]
3236
+ # The number of values in the window that should be non-null before computing
3237
+ # a result. If None, it will be set equal to window size.
3238
+ # @param center [Boolean]
3239
+ # Set the labels at the center of the window
3240
+ #
3241
+ # @return [Series]
3242
+ #
3243
+ # @example
3244
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3245
+ # s.rolling_var(3)
3246
+ # # =>
3247
+ # # shape: (6,)
3248
+ # # Series: 'a' [f64]
3249
+ # # [
3250
+ # # null
3251
+ # # null
3252
+ # # 1.0
3253
+ # # 1.0
3254
+ # # 2.333333
3255
+ # # 4.0
3256
+ # # ]
3257
+ def rolling_var(
3258
+ window_size,
3259
+ weights: nil,
3260
+ min_periods: nil,
3261
+ center: false,
3262
+ ddof: 1,
3263
+ warn_if_unsorted: true
3264
+ )
3265
+ to_frame
3266
+ .select(
3267
+ Polars.col(name).rolling_var(
3268
+ window_size,
3269
+ weights: weights,
3270
+ min_periods: min_periods,
3271
+ center: center,
3272
+ ddof: ddof,
3273
+ warn_if_unsorted: warn_if_unsorted
3274
+ )
3275
+ )
3276
+ .to_series
3277
+ end
3278
+
3279
+ # def rolling_apply
3280
+ # end
3281
+
3282
+ # Compute a rolling median.
3283
+ #
3284
+ # @param window_size [Integer]
3285
+ # The length of the window.
3286
+ # @param weights [Array]
3287
+ # An optional slice with the same length as the window that will be multiplied
3288
+ # elementwise with the values in the window.
3289
+ # @param min_periods [Integer]
3290
+ # The number of values in the window that should be non-null before computing
3291
+ # a result. If None, it will be set equal to window size.
3292
+ # @param center [Boolean]
3293
+ # Set the labels at the center of the window
3294
+ #
3295
+ # @return [Series]
3296
+ #
3297
+ # @example
3298
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3299
+ # s.rolling_median(3)
3300
+ # # =>
3301
+ # # shape: (6,)
3302
+ # # Series: 'a' [f64]
3303
+ # # [
3304
+ # # null
3305
+ # # null
3306
+ # # 2.0
3307
+ # # 3.0
3308
+ # # 4.0
3309
+ # # 6.0
3310
+ # # ]
3311
+ def rolling_median(
3312
+ window_size,
3313
+ weights: nil,
3314
+ min_periods: nil,
3315
+ center: false,
3316
+ warn_if_unsorted: true
3317
+ )
3318
+ if min_periods.nil?
3319
+ min_periods = window_size
3320
+ end
3321
+
3322
+ to_frame
3323
+ .select(
3324
+ Polars.col(name).rolling_median(
3325
+ window_size,
3326
+ weights: weights,
3327
+ min_periods: min_periods,
3328
+ center: center,
3329
+ warn_if_unsorted: warn_if_unsorted
3330
+ )
3331
+ )
3332
+ .to_series
3333
+ end
3334
+
3335
+ # Compute a rolling quantile.
3336
+ #
3337
+ # @param quantile [Float]
3338
+ # Quantile between 0.0 and 1.0.
3339
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3340
+ # Interpolation method.
3341
+ # @param window_size [Integer]
3342
+ # The length of the window.
3343
+ # @param weights [Array]
3344
+ # An optional slice with the same length as the window that will be multiplied
3345
+ # elementwise with the values in the window.
3346
+ # @param min_periods [Integer]
3347
+ # The number of values in the window that should be non-null before computing
3348
+ # a result. If None, it will be set equal to window size.
3349
+ # @param center [Boolean]
3350
+ # Set the labels at the center of the window
3351
+ #
3352
+ # @return [Series]
3353
+ #
3354
+ # @example
3355
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3356
+ # s.rolling_quantile(0.33, window_size: 3)
3357
+ # # =>
3358
+ # # shape: (6,)
3359
+ # # Series: 'a' [f64]
3360
+ # # [
3361
+ # # null
3362
+ # # null
3363
+ # # 1.0
3364
+ # # 2.0
3365
+ # # 3.0
3366
+ # # 4.0
3367
+ # # ]
3368
+ #
3369
+ # @example
3370
+ # s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
3371
+ # # =>
3372
+ # # shape: (6,)
3373
+ # # Series: 'a' [f64]
3374
+ # # [
3375
+ # # null
3376
+ # # null
3377
+ # # 1.66
3378
+ # # 2.66
3379
+ # # 3.66
3380
+ # # 5.32
3381
+ # # ]
3382
+ def rolling_quantile(
3383
+ quantile,
3384
+ interpolation: "nearest",
3385
+ window_size: 2,
3386
+ weights: nil,
3387
+ min_periods: nil,
3388
+ center: false,
3389
+ warn_if_unsorted: true
3390
+ )
3391
+ if min_periods.nil?
3392
+ min_periods = window_size
3393
+ end
3394
+
3395
+ to_frame
3396
+ .select(
3397
+ Polars.col(name).rolling_quantile(
3398
+ quantile,
3399
+ interpolation: interpolation,
3400
+ window_size: window_size,
3401
+ weights: weights,
3402
+ min_periods: min_periods,
3403
+ center: center,
3404
+ warn_if_unsorted: warn_if_unsorted
3405
+ )
3406
+ )
3407
+ .to_series
3408
+ end
3409
+
3410
+ # Compute a rolling skew.
3411
+ #
3412
+ # @param window_size [Integer]
3413
+ # Integer size of the rolling window.
3414
+ # @param bias [Boolean]
3415
+ # If false, the calculations are corrected for statistical bias.
3416
+ #
3417
+ # @return [Series]
3418
+ #
3419
+ # @example
3420
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3421
+ # s.rolling_skew(3)
3422
+ # # =>
3423
+ # # shape: (6,)
3424
+ # # Series: 'a' [f64]
3425
+ # # [
3426
+ # # null
3427
+ # # null
3428
+ # # 0.0
3429
+ # # 0.0
3430
+ # # 0.381802
3431
+ # # 0.0
3432
+ # # ]
3433
+ def rolling_skew(window_size, bias: true)
3434
+ super
3435
+ end
3436
+
3437
+ # Sample from this Series.
3438
+ #
3439
+ # @param n [Integer]
3440
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
3441
+ # `frac` is None.
3442
+ # @param frac [Float]
3443
+ # Fraction of items to return. Cannot be used with `n`.
3444
+ # @param with_replacement [Boolean]
3445
+ # Allow values to be sampled more than once.
3446
+ # @param shuffle [Boolean]
3447
+ # Shuffle the order of sampled data points.
3448
+ # @param seed [Integer]
3449
+ # Seed for the random number generator. If set to None (default), a random
3450
+ # seed is used.
3451
+ #
3452
+ # @return [Series]
3453
+ #
3454
+ # @example
3455
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3456
+ # s.sample(n: 2, seed: 0)
3457
+ # # =>
3458
+ # # shape: (2,)
3459
+ # # Series: 'a' [i64]
3460
+ # # [
3461
+ # # 5
3462
+ # # 3
3463
+ # # ]
3464
+ def sample(
3465
+ n: nil,
3466
+ frac: nil,
3467
+ with_replacement: false,
3468
+ shuffle: false,
3469
+ seed: nil
3470
+ )
3471
+ if !n.nil? && !frac.nil?
3472
+ raise ArgumentError, "cannot specify both `n` and `frac`"
3473
+ end
3474
+
3475
+ if n.nil? && !frac.nil?
3476
+ return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
3477
+ end
3478
+
3479
+ if n.nil?
3480
+ n = 1
3481
+ end
3482
+ Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
3483
+ end
3484
+
3485
+ # Get a boolean mask of the local maximum peaks.
3486
+ #
3487
+ # @return [Series]
3488
+ #
3489
+ # @example
3490
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3491
+ # s.peak_max
3492
+ # # =>
3493
+ # # shape: (5,)
3494
+ # # Series: 'a' [bool]
3495
+ # # [
3496
+ # # false
3497
+ # # false
3498
+ # # false
3499
+ # # false
3500
+ # # true
3501
+ # # ]
3502
+ def peak_max
3503
+ super
3504
+ end
3505
+
3506
+ # Get a boolean mask of the local minimum peaks.
3507
+ #
3508
+ # @return [Series]
3509
+ #
3510
+ # @example
3511
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
3512
+ # s.peak_min
3513
+ # # =>
3514
+ # # shape: (5,)
3515
+ # # Series: 'a' [bool]
3516
+ # # [
3517
+ # # false
3518
+ # # true
3519
+ # # false
3520
+ # # true
3521
+ # # false
3522
+ # # ]
3523
+ def peak_min
3524
+ super
3525
+ end
3526
+
3527
+ # Count the number of unique values in this Series.
3528
+ #
3529
+ # @return [Integer]
3530
+ #
3531
+ # @example
3532
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
3533
+ # s.n_unique
3534
+ # # => 3
3535
+ def n_unique
3536
+ _s.n_unique
3537
+ end
3538
+
3539
+ # Shrink Series memory usage.
3540
+ #
3541
+ # Shrinks the underlying array capacity to exactly fit the actual data.
3542
+ # (Note that this function does not change the Series data type).
3543
+ #
3544
+ # @return [Series]
3545
+ def shrink_to_fit(in_place: false)
3546
+ if in_place
3547
+ _s.shrink_to_fit
3548
+ self
3549
+ else
3550
+ series = clone
3551
+ series._s.shrink_to_fit
3552
+ series
3553
+ end
3554
+ end
3555
+
3556
+ # Hash the Series.
3557
+ #
3558
+ # The hash value is of type `:u64`.
3559
+ #
3560
+ # @param seed [Integer]
3561
+ # Random seed parameter. Defaults to 0.
3562
+ # @param seed_1 [Integer]
3563
+ # Random seed parameter. Defaults to `seed` if not set.
3564
+ # @param seed_2 [Integer]
3565
+ # Random seed parameter. Defaults to `seed` if not set.
3566
+ # @param seed_3 [Integer]
3567
+ # Random seed parameter. Defaults to `seed` if not set.
3568
+ #
3569
+ # @return [Series]
3570
+ #
3571
+ # @example
3572
+ # s = Polars::Series.new("a", [1, 2, 3])
3573
+ # s._hash(42)
3574
+ # # =>
3575
+ # # shape: (3,)
3576
+ # # Series: 'a' [u64]
3577
+ # # [
3578
+ # # 2374023516666777365
3579
+ # # 10386026231460783898
3580
+ # # 17796317186427479491
3581
+ # # ]
3582
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3583
+ super
3584
+ end
3585
+
3586
+ # Reinterpret the underlying bits as a signed/unsigned integer.
3587
+ #
3588
+ # This operation is only allowed for 64bit integers. For lower bits integers,
3589
+ # you can safely use that cast operation.
3590
+ #
3591
+ # @param signed [Boolean]
3592
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
3593
+ #
3594
+ # @return [Series]
3595
+ def reinterpret(signed: true)
3596
+ super
3597
+ end
3598
+
3599
+ # Interpolate intermediate values. The interpolation method is linear.
3600
+ #
3601
+ # @return [Series]
3602
+ #
3603
+ # @example
3604
+ # s = Polars::Series.new("a", [1, 2, nil, nil, 5])
3605
+ # s.interpolate
3606
+ # # =>
3607
+ # # shape: (5,)
3608
+ # # Series: 'a' [f64]
3609
+ # # [
3610
+ # # 1.0
3611
+ # # 2.0
3612
+ # # 3.0
3613
+ # # 4.0
3614
+ # # 5.0
3615
+ # # ]
3616
+ def interpolate(method: "linear")
3617
+ super
3618
+ end
3619
+
3620
+ # Compute absolute values.
3621
+ #
3622
+ # @return [Series]
3623
+ def abs
3624
+ super
3625
+ end
3626
+
3627
+ # Assign ranks to data, dealing with ties appropriately.
3628
+ #
3629
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3630
+ # The method used to assign ranks to tied elements.
3631
+ # The following methods are available (default is 'average'):
3632
+ #
3633
+ # - 'average' : The average of the ranks that would have been assigned to
3634
+ # all the tied values is assigned to each value.
3635
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3636
+ # the tied values is assigned to each value. (This is also referred to
3637
+ # as "competition" ranking.)
3638
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3639
+ # the tied values is assigned to each value.
3640
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3641
+ # assigned the rank immediately after those assigned to the tied
3642
+ # elements.
3643
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3644
+ # the order that the values occur in the Series.
3645
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3646
+ # on the order that the values occur in the Series.
3647
+ # @param reverse [Boolean]
3648
+ # Reverse the operation.
3649
+ # @param seed [Integer]
3650
+ # If `method: "random"`, use this as seed.
3651
+ #
3652
+ # @return [Series]
3653
+ #
3654
+ # @example The 'average' method:
3655
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3656
+ # s.rank
3657
+ # # =>
3658
+ # # shape: (5,)
3659
+ # # Series: 'a' [f64]
3660
+ # # [
3661
+ # # 3.0
3662
+ # # 4.5
3663
+ # # 1.5
3664
+ # # 1.5
3665
+ # # 4.5
3666
+ # # ]
3667
+ #
3668
+ # @example The 'ordinal' method:
3669
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3670
+ # s.rank(method: "ordinal")
3671
+ # # =>
3672
+ # # shape: (5,)
3673
+ # # Series: 'a' [u32]
3674
+ # # [
3675
+ # # 3
3676
+ # # 4
3677
+ # # 1
3678
+ # # 2
3679
+ # # 5
3680
+ # # ]
3681
+ def rank(method: "average", reverse: false, seed: nil)
3682
+ super
3683
+ end
3684
+
3685
+ # Calculate the n-th discrete difference.
3686
+ #
3687
+ # @param n [Integer]
3688
+ # Number of slots to shift.
3689
+ # @param null_behavior ["ignore", "drop"]
3690
+ # How to handle null values.
3691
+ #
3692
+ # @return [Series]
3693
+ def diff(n: 1, null_behavior: "ignore")
3694
+ super
3695
+ end
3696
+
3697
+ # Computes percentage change between values.
3698
+ #
3699
+ # Percentage change (as fraction) between current element and most-recent
3700
+ # non-null element at least `n` period(s) before the current element.
3701
+ #
3702
+ # Computes the change from the previous row by default.
3703
+ #
3704
+ # @param n [Integer]
3705
+ # periods to shift for forming percent change.
3706
+ #
3707
+ # @return [Series]
3708
+ #
3709
+ # @example
3710
+ # Polars::Series.new(0..9).pct_change
3711
+ # # =>
3712
+ # # shape: (10,)
3713
+ # # Series: '' [f64]
3714
+ # # [
3715
+ # # null
3716
+ # # inf
3717
+ # # 1.0
3718
+ # # 0.5
3719
+ # # 0.333333
3720
+ # # 0.25
3721
+ # # 0.2
3722
+ # # 0.166667
3723
+ # # 0.142857
3724
+ # # 0.125
3725
+ # # ]
3726
+ #
3727
+ # @example
3728
+ # Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
3729
+ # # =>
3730
+ # # shape: (10,)
3731
+ # # Series: '' [f64]
3732
+ # # [
3733
+ # # null
3734
+ # # null
3735
+ # # 3.0
3736
+ # # 3.0
3737
+ # # 3.0
3738
+ # # 3.0
3739
+ # # 3.0
3740
+ # # 3.0
3741
+ # # 3.0
3742
+ # # 3.0
3743
+ # # ]
3744
+ def pct_change(n: 1)
3745
+ super
3746
+ end
3747
+
3748
+ # Compute the sample skewness of a data set.
3749
+ #
3750
+ # For normally distributed data, the skewness should be about zero. For
3751
+ # unimodal continuous distributions, a skewness value greater than zero means
3752
+ # that there is more weight in the right tail of the distribution. The
3753
+ # function `skewtest` can be used to determine if the skewness value
3754
+ # is close enough to zero, statistically speaking.
3755
+ #
3756
+ # @param bias [Boolean]
3757
+ # If `false`, the calculations are corrected for statistical bias.
3758
+ #
3759
+ # @return [Float, nil]
3760
+ def skew(bias: true)
3761
+ _s.skew(bias)
3762
+ end
3763
+
3764
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
3765
+ #
3766
+ # Kurtosis is the fourth central moment divided by the square of the
3767
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
3768
+ # the result to give 0.0 for a normal distribution.
3769
+ # If bias is false, then the kurtosis is calculated using k statistics to
3770
+ # eliminate bias coming from biased moment estimators
3771
+ #
3772
+ # @param fisher [Boolean]
3773
+ # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
3774
+ # Pearson's definition is used (normal ==> 3.0).
3775
+ # @param bias [Boolean]
3776
+ # If `false`, the calculations are corrected for statistical bias.
3777
+ #
3778
+ # @return [Float, nil]
3779
+ def kurtosis(fisher: true, bias: true)
3780
+ _s.kurtosis(fisher, bias)
3781
+ end
3782
+
3783
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
3784
+ #
3785
+ # Only works for numerical types.
3786
+ #
3787
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3788
+ # expression. See {#when} for more information.
3789
+ #
3790
+ # @param min_val [Numeric]
3791
+ # Minimum value.
3792
+ # @param max_val [Numeric]
3793
+ # Maximum value.
3794
+ #
3795
+ # @return [Series]
3796
+ #
3797
+ # @example
3798
+ # s = Polars::Series.new("foo", [-50, 5, nil, 50])
3799
+ # s.clip(1, 10)
3800
+ # # =>
3801
+ # # shape: (4,)
3802
+ # # Series: 'foo' [i64]
3803
+ # # [
3804
+ # # 1
3805
+ # # 5
3806
+ # # null
3807
+ # # 10
3808
+ # # ]
3809
+ def clip(min_val, max_val)
3810
+ super
3811
+ end
3812
+
3813
+ # Clip (limit) the values in an array to a `min` boundary.
3814
+ #
3815
+ # Only works for numerical types.
3816
+ #
3817
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3818
+ # expression. See {#when} for more information.
3819
+ #
3820
+ # @param min_val [Numeric]
3821
+ # Minimum value.
3822
+ #
3823
+ # @return [Series]
3824
+ def clip_min(min_val)
3825
+ super
3826
+ end
3827
+
3828
+ # Clip (limit) the values in an array to a `max` boundary.
3829
+ #
3830
+ # Only works for numerical types.
3831
+ #
3832
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3833
+ # expression. See {#when} for more information.
3834
+ #
3835
+ # @param max_val [Numeric]
3836
+ # Maximum value.
3837
+ #
3838
+ # @return [Series]
3839
+ def clip_max(max_val)
3840
+ super
3841
+ end
3842
+
3843
+ # Replace values by different values.
3844
+ #
3845
+ # @param old [Object]
3846
+ # Value or sequence of values to replace.
3847
+ # Also accepts a mapping of values to their replacement.
3848
+ # @param new [Object]
3849
+ # Value or sequence of values to replace by.
3850
+ # Length must match the length of `old` or have length 1.
3851
+ # @param default [Object]
3852
+ # Set values that were not replaced to this value.
3853
+ # Defaults to keeping the original value.
3854
+ # Accepts expression input. Non-expression inputs are parsed as literals.
3855
+ # @param return_dtype [Object]
3856
+ # The data type of the resulting Series. If set to `nil` (default),
3857
+ # the data type is determined automatically based on the other inputs.
3858
+ #
3859
+ # @return [Series]
3860
+ #
3861
+ # @example Replace a single value by another value. Values that were not replaced remain unchanged.
3862
+ # s = Polars::Series.new([1, 2, 2, 3])
3863
+ # s.replace(2, 100)
3864
+ # # =>
3865
+ # # shape: (4,)
3866
+ # # Series: '' [i64]
3867
+ # # [
3868
+ # # 1
3869
+ # # 100
3870
+ # # 100
3871
+ # # 3
3872
+ # # ]
3873
+ #
3874
+ # @example Replace multiple values by passing sequences to the `old` and `new` parameters.
3875
+ # s.replace([2, 3], [100, 200])
3876
+ # # =>
3877
+ # # shape: (4,)
3878
+ # # Series: '' [i64]
3879
+ # # [
3880
+ # # 1
3881
+ # # 100
3882
+ # # 100
3883
+ # # 200
3884
+ # # ]
3885
+ #
3886
+ # @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
3887
+ # mapping = {2 => 100, 3 => 200}
3888
+ # s.replace(mapping, default: -1)
3889
+ # # =>
3890
+ # # shape: (4,)
3891
+ # # Series: '' [i64]
3892
+ # # [
3893
+ # # -1
3894
+ # # 100
3895
+ # # 100
3896
+ # # 200
3897
+ # # ]
3898
+ #
3899
+ # @example The default can be another Series.
3900
+ # default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
3901
+ # s.replace(2, 100, default: default)
3902
+ # # =>
3903
+ # # shape: (4,)
3904
+ # # Series: '' [f64]
3905
+ # # [
3906
+ # # 2.5
3907
+ # # 100.0
3908
+ # # 100.0
3909
+ # # 10.0
3910
+ # # ]
3911
+ #
3912
+ # @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
3913
+ # s = Polars::Series.new(["x", "y", "z"])
3914
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
3915
+ # s.replace(mapping)
3916
+ # # =>
3917
+ # # shape: (3,)
3918
+ # # Series: '' [str]
3919
+ # # [
3920
+ # # "1"
3921
+ # # "2"
3922
+ # # "3"
3923
+ # # ]
3924
+ #
3925
+ # @example
3926
+ # s.replace(mapping, default: nil)
3927
+ # # =>
3928
+ # # shape: (3,)
3929
+ # # Series: '' [i64]
3930
+ # # [
3931
+ # # 1
3932
+ # # 2
3933
+ # # 3
3934
+ # # ]
3935
+ #
3936
+ # @example Set the `return_dtype` parameter to control the resulting data type directly.
3937
+ # s.replace(mapping, return_dtype: Polars::UInt8)
3938
+ # # =>
3939
+ # # shape: (3,)
3940
+ # # Series: '' [u8]
3941
+ # # [
3942
+ # # 1
3943
+ # # 2
3944
+ # # 3
3945
+ # # ]
3946
+ def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3947
+ super
3948
+ end
3949
+
3950
+ # Reshape this Series to a flat Series or a Series of Lists.
3951
+ #
3952
+ # @param dims [Array]
3953
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
3954
+ # dimension is inferred.
3955
+ #
3956
+ # @return [Series]
3957
+ def reshape(dims)
3958
+ super
3959
+ end
3960
+
3961
+ # Shuffle the contents of this Series.
3962
+ #
3963
+ # @param seed [Integer, nil]
3964
+ # Seed for the random number generator.
3965
+ #
3966
+ # @return [Series]
3967
+ #
3968
+ # @example
3969
+ # s = Polars::Series.new("a", [1, 2, 3])
3970
+ # s.shuffle(seed: 1)
3971
+ # # =>
3972
+ # # shape: (3,)
3973
+ # # Series: 'a' [i64]
3974
+ # # [
3975
+ # # 2
3976
+ # # 1
3977
+ # # 3
3978
+ # # ]
3979
+ def shuffle(seed: nil)
3980
+ super
3981
+ end
3982
+
3983
+ # Exponentially-weighted moving average.
3984
+ #
3985
+ # @return [Series]
3986
+ def ewm_mean(
3987
+ com: nil,
3988
+ span: nil,
3989
+ half_life: nil,
3990
+ alpha: nil,
3991
+ adjust: true,
3992
+ min_periods: 1
3993
+ )
3994
+ super
3995
+ end
3996
+
3997
+ # Exponentially-weighted moving standard deviation.
3998
+ #
3999
+ # @return [Series]
4000
+ def ewm_std(
4001
+ com: nil,
4002
+ span: nil,
4003
+ half_life: nil,
4004
+ alpha: nil,
4005
+ adjust: true,
4006
+ bias: false,
4007
+ min_periods: 1
4008
+ )
4009
+ super
4010
+ end
4011
+
4012
+ # Exponentially-weighted moving variance.
4013
+ #
4014
+ # @return [Series]
4015
+ def ewm_var(
4016
+ com: nil,
4017
+ span: nil,
4018
+ half_life: nil,
4019
+ alpha: nil,
4020
+ adjust: true,
4021
+ bias: false,
4022
+ min_periods: 1
4023
+ )
4024
+ super
4025
+ end
4026
+
4027
+ # Extend the Series with given number of values.
4028
+ #
4029
+ # @param value [Object]
4030
+ # The value to extend the Series with. This value may be `nil` to fill with
4031
+ # nulls.
4032
+ # @param n [Integer]
4033
+ # The number of values to extend.
4034
+ #
4035
+ # @return [Series]
4036
+ #
4037
+ # @example
4038
+ # s = Polars::Series.new("a", [1, 2, 3])
4039
+ # s.extend_constant(99, 2)
4040
+ # # =>
4041
+ # # shape: (5,)
4042
+ # # Series: 'a' [i64]
4043
+ # # [
4044
+ # # 1
4045
+ # # 2
4046
+ # # 3
4047
+ # # 99
4048
+ # # 99
4049
+ # # ]
4050
+ def extend_constant(value, n)
4051
+ Utils.wrap_s(_s.extend_constant(value, n))
4052
+ end
4053
+
4054
+ # Flags the Series as sorted.
4055
+ #
4056
+ # Enables downstream code to user fast paths for sorted arrays.
4057
+ #
4058
+ # @param reverse [Boolean]
4059
+ # If the Series order is reversed, e.g. descending.
4060
+ #
4061
+ # @return [Series]
4062
+ #
4063
+ # @note
4064
+ # This can lead to incorrect results if this Series is not sorted!!
4065
+ # Use with care!
4066
+ #
4067
+ # @example
4068
+ # s = Polars::Series.new("a", [1, 2, 3])
4069
+ # s.set_sorted.max
4070
+ # # => 3
4071
+ def set_sorted(reverse: false)
4072
+ Utils.wrap_s(_s.set_sorted(reverse))
4073
+ end
4074
+
4075
+ # Create a new Series filled with values from the given index.
4076
+ #
4077
+ # @return [Series]
4078
+ def new_from_index(index, length)
4079
+ Utils.wrap_s(_s.new_from_index(index, length))
4080
+ end
4081
+
4082
+ # Shrink numeric columns to the minimal required datatype.
4083
+ #
4084
+ # Shrink to the dtype needed to fit the extrema of this Series.
4085
+ # This can be used to reduce memory pressure.
4086
+ #
4087
+ # @return [Series]
4088
+ def shrink_dtype
4089
+ super
4090
+ end
4091
+
4092
+ # Create an object namespace of all list related methods.
4093
+ #
4094
+ # @return [ListNameSpace]
4095
+ def list
4096
+ ListNameSpace.new(self)
4097
+ end
4098
+
4099
+ # Create an object namespace of all array related methods.
4100
+ #
4101
+ # @return [ArrayNameSpace]
4102
+ def arr
4103
+ ArrayNameSpace.new(self)
4104
+ end
4105
+
4106
+ # Create an object namespace of all binary related methods.
4107
+ #
4108
+ # @return [BinaryNameSpace]
4109
+ def bin
4110
+ BinaryNameSpace.new(self)
4111
+ end
4112
+
4113
+ # Create an object namespace of all categorical related methods.
4114
+ #
4115
+ # @return [CatNameSpace]
4116
+ def cat
4117
+ CatNameSpace.new(self)
4118
+ end
4119
+
4120
+ # Create an object namespace of all datetime related methods.
4121
+ #
4122
+ # @return [DateTimeNameSpace]
4123
+ def dt
4124
+ DateTimeNameSpace.new(self)
4125
+ end
4126
+
4127
+ # Create an object namespace of all string related methods.
4128
+ #
4129
+ # @return [StringNameSpace]
4130
+ def str
4131
+ StringNameSpace.new(self)
4132
+ end
4133
+
4134
+ # Create an object namespace of all struct related methods.
4135
+ #
4136
+ # @return [StructNameSpace]
4137
+ def struct
4138
+ StructNameSpace.new(self)
4139
+ end
4140
+
4141
+ private
4142
+
4143
+ def initialize_copy(other)
4144
+ super
4145
+ self._s = _s._clone
4146
+ end
4147
+
4148
+ def coerce(other)
4149
+ if other.is_a?(Numeric)
4150
+ # TODO improve
4151
+ series = to_frame.select(Polars.lit(other)).to_series
4152
+ [series, self]
4153
+ else
4154
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
4155
+ end
4156
+ end
4157
+
4158
+ def _pos_idxs(idxs)
4159
+ idx_type = Plr.get_index_type
4160
+
4161
+ if idxs.is_a?(Series)
4162
+ if idxs.dtype == idx_type
4163
+ return idxs
4164
+ end
4165
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4166
+ if idx_type == UInt32
4167
+ if [Int64, UInt64].include?(idxs.dtype)
4168
+ if idxs.max >= 2**32
4169
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4170
+ end
4171
+ end
4172
+ if idxs.dtype == Int64
4173
+ if idxs.min < -(2**32)
4174
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4175
+ end
4176
+ end
4177
+ end
4178
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4179
+ if idxs.min < 0
4180
+ if idx_type == UInt32
4181
+ if [Int8, Int16].include?(idxs.dtype)
4182
+ idxs = idxs.cast(Int32)
4183
+ end
4184
+ else
4185
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4186
+ idxs = idxs.cast(Int64)
4187
+ end
4188
+ end
4189
+
4190
+ # Update negative indexes to absolute indexes.
4191
+ return (
4192
+ idxs.to_frame
4193
+ .select(
4194
+ Polars.when(Polars.col(idxs.name) < 0)
4195
+ .then(len + Polars.col(idxs.name))
4196
+ .otherwise(Polars.col(idxs.name))
4197
+ .cast(idx_type)
4198
+ )
4199
+ .to_series(0)
4200
+ )
4201
+ end
4202
+ end
4203
+
4204
+ return idxs.cast(idx_type)
4205
+ end
4206
+ end
4207
+
4208
+ raise ArgumentError, "Unsupported idxs datatype."
4209
+ end
4210
+
4211
+ def _comp(other, op)
4212
+ if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
4213
+ if (other == true && op == :eq) || (other == false && op == :neq)
4214
+ return clone
4215
+ elsif (other == false && op == :eq) || (other == true && op == :neq)
4216
+ return !self
4217
+ end
4218
+ end
4219
+
4220
+ if other.is_a?(::Time) && dtype.is_a?(Datetime)
4221
+ ts = Utils._datetime_to_pl_timestamp(other, time_unit)
4222
+ f = ffi_func("#{op}_<>", Int64, _s)
4223
+ fail if f.nil?
4224
+ return Utils.wrap_s(f.call(ts))
4225
+ elsif other.is_a?(::Date) && dtype == Date
4226
+ d = Utils._date_to_pl_date(other)
4227
+ f = ffi_func("#{op}_<>", Int32, _s)
4228
+ fail if f.nil?
4229
+ return Utils.wrap_s(f.call(d))
4230
+ end
4231
+
4232
+ if other.is_a?(Series)
4233
+ return Utils.wrap_s(_s.send(op, other._s))
4234
+ end
4235
+
4236
+ f = ffi_func("#{op}_<>", dtype, _s)
4237
+ if f.nil?
4238
+ raise NotImplementedError
4239
+ end
4240
+ Utils.wrap_s(f.call(other))
4241
+ end
4242
+
4243
+ def ffi_func(name, dtype, _s)
4244
+ _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype.class))) if DTYPE_TO_FFINAME.key?(dtype.class)
4245
+ end
4246
+
4247
+ def _arithmetic(other, op)
4248
+ if other.is_a?(Expr)
4249
+ other = to_frame.select(other).to_series
4250
+ end
4251
+ if other.is_a?(Series)
4252
+ return Utils.wrap_s(_s.send(op, other._s))
4253
+ end
4254
+
4255
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
4256
+ _s2 = sequence_to_rbseries(name, [other])
4257
+ return Utils.wrap_s(_s.send(op, _s2))
4258
+ end
4259
+
4260
+ f = ffi_func("#{op}_<>", dtype, _s)
4261
+ if f.nil?
4262
+ raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
4263
+ end
4264
+ Utils.wrap_s(f.call(other))
4265
+ end
4266
+
4267
+ DTYPE_TO_FFINAME = {
4268
+ Int8 => "i8",
4269
+ Int16 => "i16",
4270
+ Int32 => "i32",
4271
+ Int64 => "i64",
4272
+ UInt8 => "u8",
4273
+ UInt16 => "u16",
4274
+ UInt32 => "u32",
4275
+ UInt64 => "u64",
4276
+ Float32 => "f32",
4277
+ Float64 => "f64",
4278
+ Boolean => "bool",
4279
+ Utf8 => "str",
4280
+ List => "list",
4281
+ Date => "date",
4282
+ Datetime => "datetime",
4283
+ Duration => "duration",
4284
+ Time => "time",
4285
+ Object => "object",
4286
+ Categorical => "categorical",
4287
+ Struct => "struct",
4288
+ Binary => "binary"
4289
+ }
4290
+
4291
+ def series_to_rbseries(name, values)
4292
+ # should not be in-place?
4293
+ values.rename(name, in_place: true)
4294
+ values._s
4295
+ end
4296
+
4297
+ def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
4298
+ # not needed yet
4299
+ # if !values.contiguous?
4300
+ # end
4301
+
4302
+ if values.shape.length == 1
4303
+ values, dtype = numo_values_and_dtype(values)
4304
+ strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
4305
+ if dtype == Numo::RObject
4306
+ sequence_to_rbseries(name, values.to_a, strict: strict)
4307
+ else
4308
+ constructor = numo_type_to_constructor(dtype)
4309
+ # TODO improve performance
4310
+ constructor.call(name, values.to_a, strict)
4311
+ end
4312
+ elsif values.shape.length == 2
4313
+ raise Todo
4314
+ else
4315
+ raise Todo
4316
+ end
4317
+ end
4318
+
4319
+ def numo_values_and_dtype(values)
4320
+ [values, values.class]
4321
+ end
4322
+
4323
+ def numo_type_to_constructor(dtype)
4324
+ {
4325
+ Numo::Float32 => RbSeries.method(:new_opt_f32),
4326
+ Numo::Float64 => RbSeries.method(:new_opt_f64),
4327
+ Numo::Int8 => RbSeries.method(:new_opt_i8),
4328
+ Numo::Int16 => RbSeries.method(:new_opt_i16),
4329
+ Numo::Int32 => RbSeries.method(:new_opt_i32),
4330
+ Numo::Int64 => RbSeries.method(:new_opt_i64),
4331
+ Numo::UInt8 => RbSeries.method(:new_opt_u8),
4332
+ Numo::UInt16 => RbSeries.method(:new_opt_u16),
4333
+ Numo::UInt32 => RbSeries.method(:new_opt_u32),
4334
+ Numo::UInt64 => RbSeries.method(:new_opt_u64)
4335
+ }.fetch(dtype)
4336
+ rescue KeyError
4337
+ RbSeries.method(:new_object)
4338
+ end
4339
+
4340
+ def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
4341
+ ruby_dtype = nil
4342
+
4343
+ if (values.nil? || values.empty?) && dtype.nil?
4344
+ dtype = dtype_if_empty || Float32
4345
+ elsif dtype == List
4346
+ ruby_dtype = ::Array
4347
+ end
4348
+
4349
+ rb_temporal_types = [::Date, ::DateTime, ::Time]
4350
+ rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
4351
+
4352
+ value = _get_first_non_none(values)
4353
+ if !value.nil?
4354
+ if value.is_a?(Hash)
4355
+ return DataFrame.new(values).to_struct(name)._s
4356
+ end
4357
+ end
4358
+
4359
+ if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
4360
+ if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
4361
+ dtype = Array.new(nil, value.size)
4362
+ end
4363
+
4364
+ constructor = polars_type_to_constructor(dtype)
4365
+ rbseries = constructor.call(name, values, strict)
4366
+
4367
+ base_type = dtype.is_a?(DataType) ? dtype.class : dtype
4368
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type)
4369
+ if rbseries.dtype != dtype
4370
+ rbseries = rbseries.cast(dtype, true)
4371
+ end
4372
+ end
4373
+ return rbseries
4374
+ elsif dtype == Struct
4375
+ struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4376
+ empty = {}
4377
+ return DataFrame.sequence_to_rbdf(
4378
+ values.map { |v| v.nil? ? empty : v },
4379
+ schema: struct_schema,
4380
+ orient: "row",
4381
+ ).to_struct(name)
4382
+ else
4383
+ if ruby_dtype.nil?
4384
+ if value.nil?
4385
+ # generic default dtype
4386
+ ruby_dtype = Float
4387
+ else
4388
+ ruby_dtype = value.class
4389
+ end
4390
+ end
4391
+
4392
+ # temporal branch
4393
+ if rb_temporal_types.include?(ruby_dtype)
4394
+ if dtype.nil?
4395
+ dtype = Utils.rb_type_to_dtype(ruby_dtype)
4396
+ elsif rb_temporal_types.include?(dtype)
4397
+ dtype = Utils.rb_type_to_dtype(dtype)
4398
+ end
4399
+ # TODO
4400
+ time_unit = nil
4401
+
4402
+ rb_series = RbSeries.new_from_anyvalues(name, values, strict)
4403
+ if time_unit.nil?
4404
+ s = Utils.wrap_s(rb_series)
4405
+ else
4406
+ s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4407
+ end
4408
+ return s._s
4409
+ elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4410
+ raise Todo
4411
+ elsif ruby_dtype == ::Array
4412
+ if dtype.is_a?(Object)
4413
+ return RbSeries.new_object(name, values, strict)
4414
+ end
4415
+ if dtype
4416
+ srs = sequence_from_anyvalue_or_object(name, values)
4417
+ if dtype != srs.dtype
4418
+ srs = srs.cast(dtype, strict: false)
4419
+ end
4420
+ return srs
4421
+ end
4422
+ return sequence_from_anyvalue_or_object(name, values)
4423
+ elsif ruby_dtype == Series
4424
+ return RbSeries.new_series_list(name, values.map(&:_s), strict)
4425
+ elsif ruby_dtype == RbSeries
4426
+ return RbSeries.new_series_list(name, values, strict)
4427
+ else
4428
+ constructor =
4429
+ if value.is_a?(::String)
4430
+ if value.encoding == Encoding::UTF_8
4431
+ RbSeries.method(:new_str)
4432
+ else
4433
+ RbSeries.method(:new_binary)
4434
+ end
4435
+ elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
4436
+ # TODO improve performance
4437
+ RbSeries.method(:new_opt_f64)
4438
+ else
4439
+ rb_type_to_constructor(value.class)
4440
+ end
4441
+ constructor.call(name, values, strict)
4442
+ end
4443
+ end
4444
+ end
4445
+
4446
+ def sequence_from_anyvalue_or_object(name, values)
4447
+ RbSeries.new_from_anyvalues(name, values, true)
4448
+ rescue
4449
+ RbSeries.new_object(name, values, false)
4450
+ end
4451
+
4452
+ POLARS_TYPE_TO_CONSTRUCTOR = {
4453
+ Float32 => RbSeries.method(:new_opt_f32),
4454
+ Float64 => RbSeries.method(:new_opt_f64),
4455
+ Int8 => RbSeries.method(:new_opt_i8),
4456
+ Int16 => RbSeries.method(:new_opt_i16),
4457
+ Int32 => RbSeries.method(:new_opt_i32),
4458
+ Int64 => RbSeries.method(:new_opt_i64),
4459
+ UInt8 => RbSeries.method(:new_opt_u8),
4460
+ UInt16 => RbSeries.method(:new_opt_u16),
4461
+ UInt32 => RbSeries.method(:new_opt_u32),
4462
+ UInt64 => RbSeries.method(:new_opt_u64),
4463
+ Decimal => RbSeries.method(:new_decimal),
4464
+ Date => RbSeries.method(:new_from_anyvalues),
4465
+ Datetime => RbSeries.method(:new_from_anyvalues),
4466
+ Duration => RbSeries.method(:new_from_anyvalues),
4467
+ Time => RbSeries.method(:new_from_anyvalues),
4468
+ Boolean => RbSeries.method(:new_opt_bool),
4469
+ Utf8 => RbSeries.method(:new_str),
4470
+ Object => RbSeries.method(:new_object),
4471
+ Categorical => RbSeries.method(:new_str),
4472
+ Enum => RbSeries.method(:new_str),
4473
+ Binary => RbSeries.method(:new_binary),
4474
+ Null => RbSeries.method(:new_null)
4475
+ }
4476
+
4477
+ SYM_TYPE_TO_CONSTRUCTOR = {
4478
+ f32: RbSeries.method(:new_opt_f32),
4479
+ f64: RbSeries.method(:new_opt_f64),
4480
+ i8: RbSeries.method(:new_opt_i8),
4481
+ i16: RbSeries.method(:new_opt_i16),
4482
+ i32: RbSeries.method(:new_opt_i32),
4483
+ i64: RbSeries.method(:new_opt_i64),
4484
+ u8: RbSeries.method(:new_opt_u8),
4485
+ u16: RbSeries.method(:new_opt_u16),
4486
+ u32: RbSeries.method(:new_opt_u32),
4487
+ u64: RbSeries.method(:new_opt_u64),
4488
+ bool: RbSeries.method(:new_opt_bool),
4489
+ str: RbSeries.method(:new_str)
4490
+ }
4491
+
4492
+ def polars_type_to_constructor(dtype)
4493
+ if dtype.is_a?(Array)
4494
+ lambda do |name, values, strict|
4495
+ RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
4496
+ end
4497
+ elsif dtype.is_a?(Class) && dtype < DataType
4498
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4499
+ elsif dtype.is_a?(DataType)
4500
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
4501
+ else
4502
+ SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
4503
+ end
4504
+ rescue KeyError
4505
+ raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
4506
+ end
4507
+
4508
+ RB_TYPE_TO_CONSTRUCTOR = {
4509
+ Float => RbSeries.method(:new_opt_f64),
4510
+ Integer => RbSeries.method(:new_opt_i64),
4511
+ TrueClass => RbSeries.method(:new_opt_bool),
4512
+ FalseClass => RbSeries.method(:new_opt_bool),
4513
+ BigDecimal => RbSeries.method(:new_decimal),
4514
+ NilClass => RbSeries.method(:new_null)
4515
+ }
4516
+
4517
+ def rb_type_to_constructor(dtype)
4518
+ RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4519
+ rescue KeyError
4520
+ RbSeries.method(:new_object)
4521
+ end
4522
+
4523
+ def _get_first_non_none(values)
4524
+ values.find { |v| !v.nil? }
4525
+ end
4526
+ end
4527
+ end