polars-df 0.13.0-aarch64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39059 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,4444 @@
1
+ module Polars
2
+ # A Series represents a single column in a polars DataFrame.
3
+ class Series
4
+ include ExprDispatch
5
+
6
+ # Create a new Series.
7
+ #
8
+ # @param name [String, Array, nil]
9
+ # Name of the series. Will be used as a column name when used in a DataFrame.
10
+ # When not specified, name is set to an empty string.
11
+ # @param values [Array, nil]
12
+ # One-dimensional data in various forms. Supported are: Array and Series.
13
+ # @param dtype [Symbol, nil]
14
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
15
+ # @param strict [Boolean]
16
+ # Throw error on numeric overflow.
17
+ # @param nan_to_null [Boolean]
18
+ # Not used.
19
+ # @param dtype_if_empty [Symbol, nil]
20
+ # If no dtype is specified and values contains `nil` or an empty array,
21
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
22
+ #
23
+ # @example Constructing a Series by specifying name and values positionally:
24
+ # s = Polars::Series.new("a", [1, 2, 3])
25
+ #
26
+ # @example Notice that the dtype is automatically inferred as a polars `Int64`:
27
+ # s.dtype
28
+ # # => Polars::Int64
29
+ #
30
+ # @example Constructing a Series with a specific dtype:
31
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
32
+ #
33
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
34
+ # s3 = Polars::Series.new([1, 2, 3])
35
+ def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
36
+ # Handle case where values are passed as the first argument
37
+ if !name.nil? && !name.is_a?(::String)
38
+ if values.nil?
39
+ values = name
40
+ name = nil
41
+ else
42
+ raise ArgumentError, "Series name must be a string."
43
+ end
44
+ end
45
+
46
+ name = "" if name.nil?
47
+
48
+ # TODO improve
49
+ if values.is_a?(Range) && values.begin.is_a?(::String)
50
+ values = values.to_a
51
+ end
52
+
53
+ if values.nil?
54
+ self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
55
+ elsif values.is_a?(Series)
56
+ self._s = series_to_rbseries(name, values)
57
+ elsif values.is_a?(Range)
58
+ self._s =
59
+ Polars.arange(
60
+ values.first,
61
+ values.last + (values.exclude_end? ? 0 : 1),
62
+ step: 1,
63
+ eager: true,
64
+ dtype: dtype
65
+ )
66
+ .rename(name, in_place: true)
67
+ ._s
68
+ elsif values.is_a?(::Array)
69
+ self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
70
+ elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
+ self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
72
+
73
+ if !dtype.nil?
74
+ self._s = self.cast(dtype, strict: true)._s
75
+ end
76
+ else
77
+ raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
78
+ end
79
+ end
80
+
81
+ # @private
82
+ def self._from_rbseries(s)
83
+ series = Series.allocate
84
+ series._s = s
85
+ series
86
+ end
87
+
88
+ # Get the data type of this Series.
89
+ #
90
+ # @return [Symbol]
91
+ def dtype
92
+ _s.dtype
93
+ end
94
+
95
+ # Get flags that are set on the Series.
96
+ #
97
+ # @return [Hash]
98
+ def flags
99
+ out = {
100
+ "SORTED_ASC" => _s.is_sorted_flag,
101
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
102
+ }
103
+ if dtype.is_a?(List)
104
+ out["FAST_EXPLODE"] = _s.can_fast_explode_flag
105
+ end
106
+ out
107
+ end
108
+
109
+ # Get the inner dtype in of a List typed Series.
110
+ #
111
+ # @return [Symbol]
112
+ def inner_dtype
113
+ _s.inner_dtype
114
+ end
115
+
116
+ # Get the name of this Series.
117
+ #
118
+ # @return [String]
119
+ def name
120
+ _s.name
121
+ end
122
+
123
+ # Shape of this Series.
124
+ #
125
+ # @return [Array]
126
+ def shape
127
+ [_s.len]
128
+ end
129
+
130
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
131
+ #
132
+ # @return [String]
133
+ def time_unit
134
+ _s.time_unit
135
+ end
136
+
137
+ # Returns a string representing the Series.
138
+ #
139
+ # @return [String]
140
+ def to_s
141
+ _s.to_s
142
+ end
143
+ alias_method :inspect, :to_s
144
+
145
+ # Bitwise AND.
146
+ #
147
+ # @return [Series]
148
+ def &(other)
149
+ if !other.is_a?(Series)
150
+ other = Series.new([other])
151
+ end
152
+ Utils.wrap_s(_s.bitand(other._s))
153
+ end
154
+
155
+ # Bitwise OR.
156
+ #
157
+ # @return [Series]
158
+ def |(other)
159
+ if !other.is_a?(Series)
160
+ other = Series.new([other])
161
+ end
162
+ Utils.wrap_s(_s.bitor(other._s))
163
+ end
164
+
165
+ # Bitwise XOR.
166
+ #
167
+ # @return [Series]
168
+ def ^(other)
169
+ if !other.is_a?(Series)
170
+ other = Series.new([other])
171
+ end
172
+ Utils.wrap_s(_s.bitxor(other._s))
173
+ end
174
+
175
+ # Equal.
176
+ #
177
+ # @return [Series]
178
+ def ==(other)
179
+ _comp(other, :eq)
180
+ end
181
+
182
+ # Not equal.
183
+ #
184
+ # @return [Series]
185
+ def !=(other)
186
+ _comp(other, :neq)
187
+ end
188
+
189
+ # Greater than.
190
+ #
191
+ # @return [Series]
192
+ def >(other)
193
+ _comp(other, :gt)
194
+ end
195
+
196
+ # Less than.
197
+ #
198
+ # @return [Series]
199
+ def <(other)
200
+ _comp(other, :lt)
201
+ end
202
+
203
+ # Greater than or equal.
204
+ #
205
+ # @return [Series]
206
+ def >=(other)
207
+ _comp(other, :gt_eq)
208
+ end
209
+
210
+ # Less than or equal.
211
+ #
212
+ # @return [Series]
213
+ def <=(other)
214
+ _comp(other, :lt_eq)
215
+ end
216
+
217
+ # Method equivalent of operator expression `series <= other`.
218
+ #
219
+ # @return [Series]
220
+ def le(other)
221
+ self <= other
222
+ end
223
+
224
+ # Method equivalent of operator expression `series < other`.
225
+ #
226
+ # @return [Series]
227
+ def lt(other)
228
+ self < other
229
+ end
230
+
231
+ # Method equivalent of operator expression `series == other`.
232
+ #
233
+ # @return [Series]
234
+ def eq(other)
235
+ self == other
236
+ end
237
+
238
+ # Method equivalent of equality operator `series == other` where `nil == nil`.
239
+ #
240
+ # This differs from the standard `ne` where null values are propagated.
241
+ #
242
+ # @param other [Object]
243
+ # A literal or expression value to compare with.
244
+ #
245
+ # @return [Object]
246
+ #
247
+ # @example
248
+ # s1 = Polars::Series.new("a", [333, 200, nil])
249
+ # s2 = Polars::Series.new("a", [100, 200, nil])
250
+ # s1.eq(s2)
251
+ # # =>
252
+ # # shape: (3,)
253
+ # # Series: 'a' [bool]
254
+ # # [
255
+ # # false
256
+ # # true
257
+ # # null
258
+ # # ]
259
+ #
260
+ # @example
261
+ # s1.eq_missing(s2)
262
+ # # =>
263
+ # # shape: (3,)
264
+ # # Series: 'a' [bool]
265
+ # # [
266
+ # # false
267
+ # # true
268
+ # # true
269
+ # # ]
270
+ def eq_missing(other)
271
+ if other.is_a?(Expr)
272
+ return Polars.lit(self).eq_missing(other)
273
+ end
274
+ to_frame.select(Polars.col(name).eq_missing(other)).to_series
275
+ end
276
+
277
+ # Method equivalent of operator expression `series != other`.
278
+ #
279
+ # @return [Series]
280
+ def ne(other)
281
+ self != other
282
+ end
283
+
284
+ # Method equivalent of equality operator `series != other` where `None == None`.
285
+ #
286
+ # This differs from the standard `ne` where null values are propagated.
287
+ #
288
+ # @param other [Object]
289
+ # A literal or expression value to compare with.
290
+ #
291
+ # @return [Object]
292
+ #
293
+ # @example
294
+ # s1 = Polars::Series.new("a", [333, 200, nil])
295
+ # s2 = Polars::Series.new("a", [100, 200, nil])
296
+ # s1.ne(s2)
297
+ # # =>
298
+ # # shape: (3,)
299
+ # # Series: 'a' [bool]
300
+ # # [
301
+ # # true
302
+ # # false
303
+ # # null
304
+ # # ]
305
+ #
306
+ # @example
307
+ # s1.ne_missing(s2)
308
+ # # =>
309
+ # # shape: (3,)
310
+ # # Series: 'a' [bool]
311
+ # # [
312
+ # # true
313
+ # # false
314
+ # # false
315
+ # # ]
316
+ def ne_missing(other)
317
+ if other.is_a?(Expr)
318
+ return Polars.lit(self).ne_missing(other)
319
+ end
320
+ to_frame.select(Polars.col(name).ne_missing(other)).to_series
321
+ end
322
+
323
+ # Method equivalent of operator expression `series >= other`.
324
+ #
325
+ # @return [Series]
326
+ def ge(other)
327
+ self >= other
328
+ end
329
+
330
+ # Method equivalent of operator expression `series > other`.
331
+ #
332
+ # @return [Series]
333
+ def gt(other)
334
+ self > other
335
+ end
336
+
337
+ # Performs addition.
338
+ #
339
+ # @return [Series]
340
+ def +(other)
341
+ _arithmetic(other, :add)
342
+ end
343
+
344
+ # Performs subtraction.
345
+ #
346
+ # @return [Series]
347
+ def -(other)
348
+ _arithmetic(other, :sub)
349
+ end
350
+
351
+ # Performs multiplication.
352
+ #
353
+ # @return [Series]
354
+ def *(other)
355
+ if is_temporal
356
+ raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
357
+ elsif other.is_a?(DataFrame)
358
+ other * self
359
+ else
360
+ _arithmetic(other, :mul)
361
+ end
362
+ end
363
+
364
+ # Performs division.
365
+ #
366
+ # @return [Series]
367
+ def /(other)
368
+ if is_temporal
369
+ raise ArgumentError, "first cast to integer before dividing datelike dtypes"
370
+ end
371
+
372
+ if is_float
373
+ return _arithmetic(other, :div)
374
+ end
375
+
376
+ cast(Float64) / other
377
+ end
378
+
379
+ # Returns the modulo.
380
+ #
381
+ # @return [Series]
382
+ def %(other)
383
+ if is_datelike
384
+ raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
385
+ end
386
+ _arithmetic(other, :rem)
387
+ end
388
+
389
+ # Raises to the power of exponent.
390
+ #
391
+ # @return [Series]
392
+ def **(power)
393
+ if is_datelike
394
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
395
+ end
396
+ to_frame.select(Polars.col(name).pow(power)).to_series
397
+ end
398
+
399
+ # Performs boolean not.
400
+ #
401
+ # @return [Series]
402
+ def !
403
+ if dtype == Boolean
404
+ return Utils.wrap_s(_s.not)
405
+ end
406
+ raise NotImplementedError
407
+ end
408
+
409
+ # Performs negation.
410
+ #
411
+ # @return [Series]
412
+ def -@
413
+ 0 - self
414
+ end
415
+
416
+ # Returns an enumerator.
417
+ #
418
+ # @return [Object]
419
+ def each
420
+ return to_enum(:each) unless block_given?
421
+
422
+ length.times do |i|
423
+ yield self[i]
424
+ end
425
+ end
426
+
427
+ # Returns elements of the Series.
428
+ #
429
+ # @return [Object]
430
+ def [](item)
431
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
432
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
433
+ end
434
+
435
+ if item.is_a?(Series) && item.bool?
436
+ return filter(item)
437
+ end
438
+
439
+ if item.is_a?(Integer)
440
+ if item < 0
441
+ item = len + item
442
+ end
443
+
444
+ return _s.get_idx(item)
445
+ end
446
+
447
+ if item.is_a?(Range)
448
+ return Slice.new(self).apply(item)
449
+ end
450
+
451
+ if Utils.is_int_sequence(item)
452
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
453
+ end
454
+
455
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
456
+ end
457
+
458
+ # Sets an element of the Series.
459
+ #
460
+ # @return [Object]
461
+ def []=(key, value)
462
+ if value.is_a?(::Array)
463
+ if is_numeric || is_datelike
464
+ scatter(key, value)
465
+ return
466
+ end
467
+ raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
468
+ end
469
+
470
+ if key.is_a?(Series)
471
+ if key.dtype == Boolean
472
+ self._s = set(key, value)._s
473
+ elsif key.dtype == UInt64
474
+ self._s = scatter(key.cast(UInt32), value)._s
475
+ elsif key.dtype == UInt32
476
+ self._s = scatter(key, value)._s
477
+ else
478
+ raise Todo
479
+ end
480
+ elsif key.is_a?(::Array)
481
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
482
+ self[s] = value
483
+ elsif key.is_a?(Range)
484
+ s = Series.new("", key, dtype: UInt32)
485
+ self[s] = value
486
+ elsif key.is_a?(Integer)
487
+ self[[key]] = value
488
+ else
489
+ raise ArgumentError, "cannot use #{key} for indexing"
490
+ end
491
+ end
492
+
493
+ # Return an estimation of the total (heap) allocated size of the Series.
494
+ #
495
+ # Estimated size is given in the specified unit (bytes by default).
496
+ #
497
+ # This estimation is the sum of the size of its buffers, validity, including
498
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
499
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
500
+ # particular, StructArray's size is an upper bound.
501
+ #
502
+ # When an array is sliced, its allocated size remains constant because the buffer
503
+ # unchanged. However, this function will yield a smaller number. This is because
504
+ # this function returns the visible size of the buffer, not its total capacity.
505
+ #
506
+ # FFI buffers are included in this estimation.
507
+ #
508
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
509
+ # Scale the returned size to the given unit.
510
+ #
511
+ # @return [Numeric]
512
+ #
513
+ # @example
514
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
515
+ # s.estimated_size
516
+ # # => 4000000
517
+ # s.estimated_size("mb")
518
+ # # => 3.814697265625
519
+ def estimated_size(unit = "b")
520
+ sz = _s.estimated_size
521
+ Utils.scale_bytes(sz, to: unit)
522
+ end
523
+
524
+ # Compute the square root of the elements.
525
+ #
526
+ # @return [Series]
527
+ def sqrt
528
+ self**0.5
529
+ end
530
+
531
+ # Check if any boolean value in the column is `true`.
532
+ #
533
+ # @return [Boolean]
534
+ def any?(ignore_nulls: true, &block)
535
+ if block_given?
536
+ apply(skip_nulls: ignore_nulls, &block).any?
537
+ else
538
+ _s.any(ignore_nulls)
539
+ end
540
+ end
541
+ alias_method :any, :any?
542
+
543
+ # Check if all boolean values in the column are `true`.
544
+ #
545
+ # @return [Boolean]
546
+ def all?(ignore_nulls: true, &block)
547
+ if block_given?
548
+ apply(skip_nulls: ignore_nulls, &block).all?
549
+ else
550
+ _s.all(ignore_nulls)
551
+ end
552
+ end
553
+ alias_method :all, :all?
554
+
555
+ # Check if all boolean values in the column are `false`.
556
+ #
557
+ # @return [Boolean]
558
+ def none?(&block)
559
+ if block_given?
560
+ apply(&block).none?
561
+ else
562
+ to_frame.select(Polars.col(name).is_not.all).to_series[0]
563
+ end
564
+ end
565
+ alias_method :none, :none?
566
+
567
+ # Compute the logarithm to a given base.
568
+ #
569
+ # @param base [Float]
570
+ # Given base, defaults to `Math::E`.
571
+ #
572
+ # @return [Series]
573
+ def log(base = Math::E)
574
+ super
575
+ end
576
+
577
+ # Compute the base 10 logarithm of the input array, element-wise.
578
+ #
579
+ # @return [Series]
580
+ def log10
581
+ super
582
+ end
583
+
584
+ # Compute the exponential, element-wise.
585
+ #
586
+ # @return [Series]
587
+ def exp
588
+ super
589
+ end
590
+
591
+ # Create a new Series that copies data from this Series without null values.
592
+ #
593
+ # @return [Series]
594
+ def drop_nulls
595
+ super
596
+ end
597
+
598
+ # Drop NaN values.
599
+ #
600
+ # @return [Series]
601
+ def drop_nans
602
+ super
603
+ end
604
+
605
+ # Cast this Series to a DataFrame.
606
+ #
607
+ # @return [DataFrame]
608
+ def to_frame
609
+ Utils.wrap_df(RbDataFrame.new([_s]))
610
+ end
611
+
612
+ # Quick summary statistics of a series.
613
+ #
614
+ # Series with mixed datatypes will return summary statistics for the datatype of
615
+ # the first value.
616
+ #
617
+ # @return [DataFrame]
618
+ #
619
+ # @example
620
+ # series_num = Polars::Series.new([1, 2, 3, 4, 5])
621
+ # series_num.describe
622
+ # # =>
623
+ # # shape: (6, 2)
624
+ # # ┌────────────┬──────────┐
625
+ # # │ statistic ┆ value │
626
+ # # │ --- ┆ --- │
627
+ # # │ str ┆ f64 │
628
+ # # ╞════════════╪══════════╡
629
+ # # │ min ┆ 1.0 │
630
+ # # │ max ┆ 5.0 │
631
+ # # │ null_count ┆ 0.0 │
632
+ # # │ mean ┆ 3.0 │
633
+ # # │ std ┆ 1.581139 │
634
+ # # │ count ┆ 5.0 │
635
+ # # └────────────┴──────────┘
636
+ #
637
+ # @example
638
+ # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
639
+ # series_str.describe
640
+ # # =>
641
+ # # shape: (3, 2)
642
+ # # ┌────────────┬───────┐
643
+ # # │ statistic ┆ value │
644
+ # # │ --- ┆ --- │
645
+ # # │ str ┆ i64 │
646
+ # # ╞════════════╪═══════╡
647
+ # # │ unique ┆ 4 │
648
+ # # │ null_count ┆ 1 │
649
+ # # │ count ┆ 5 │
650
+ # # └────────────┴───────┘
651
+ def describe
652
+ if len == 0
653
+ raise ArgumentError, "Series must contain at least one value"
654
+ elsif is_numeric
655
+ s = cast(:f64)
656
+ stats = {
657
+ "min" => s.min,
658
+ "max" => s.max,
659
+ "null_count" => s.null_count,
660
+ "mean" => s.mean,
661
+ "std" => s.std,
662
+ "count" => s.len
663
+ }
664
+ elsif is_boolean
665
+ stats = {
666
+ "sum" => sum,
667
+ "null_count" => null_count,
668
+ "count" => len
669
+ }
670
+ elsif is_utf8
671
+ stats = {
672
+ "unique" => unique.length,
673
+ "null_count" => null_count,
674
+ "count" => len
675
+ }
676
+ elsif is_datelike
677
+ # we coerce all to string, because a polars column
678
+ # only has a single dtype and dates: datetime and count: int don't match
679
+ stats = {
680
+ "min" => dt.min.to_s,
681
+ "max" => dt.max.to_s,
682
+ "null_count" => null_count.to_s,
683
+ "count" => len.to_s
684
+ }
685
+ else
686
+ raise TypeError, "This type is not supported"
687
+ end
688
+
689
+ Polars::DataFrame.new(
690
+ {"statistic" => stats.keys, "value" => stats.values}
691
+ )
692
+ end
693
+
694
+ # Reduce this Series to the sum value.
695
+ #
696
+ # @return [Numeric]
697
+ #
698
+ # @note
699
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
700
+ # `:i64` before summing to prevent overflow issues.
701
+ #
702
+ # @example
703
+ # s = Polars::Series.new("a", [1, 2, 3])
704
+ # s.sum
705
+ # # => 6
706
+ def sum
707
+ _s.sum
708
+ end
709
+
710
+ # Reduce this Series to the mean value.
711
+ #
712
+ # @return [Float, nil]
713
+ #
714
+ # @example
715
+ # s = Polars::Series.new("a", [1, 2, 3])
716
+ # s.mean
717
+ # # => 2.0
718
+ def mean
719
+ _s.mean
720
+ end
721
+
722
+ # Reduce this Series to the product value.
723
+ #
724
+ # @return [Numeric]
725
+ def product
726
+ to_frame.select(Polars.col(name).product).to_series[0]
727
+ end
728
+
729
+ # Get the minimal value in this Series.
730
+ #
731
+ # @return [Object]
732
+ #
733
+ # @example
734
+ # s = Polars::Series.new("a", [1, 2, 3])
735
+ # s.min
736
+ # # => 1
737
+ def min
738
+ _s.min
739
+ end
740
+
741
+ # Get the maximum value in this Series.
742
+ #
743
+ # @return [Object]
744
+ #
745
+ # @example
746
+ # s = Polars::Series.new("a", [1, 2, 3])
747
+ # s.max
748
+ # # => 3
749
+ def max
750
+ _s.max
751
+ end
752
+
753
+ # Get maximum value, but propagate/poison encountered NaN values.
754
+ #
755
+ # @return [Object]
756
+ def nan_max
757
+ to_frame.select(Polars.col(name).nan_max)[0, 0]
758
+ end
759
+
760
+ # Get minimum value, but propagate/poison encountered NaN values.
761
+ #
762
+ # @return [Object]
763
+ def nan_min
764
+ to_frame.select(Polars.col(name).nan_min)[0, 0]
765
+ end
766
+
767
+ # Get the standard deviation of this Series.
768
+ #
769
+ # @param ddof [Integer]
770
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
771
+ # where N represents the number of elements.
772
+ #
773
+ # @return [Float, nil]
774
+ #
775
+ # @example
776
+ # s = Polars::Series.new("a", [1, 2, 3])
777
+ # s.std
778
+ # # => 1.0
779
+ def std(ddof: 1)
780
+ if !is_numeric
781
+ nil
782
+ else
783
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
784
+ end
785
+ end
786
+
787
+ # Get variance of this Series.
788
+ #
789
+ # @param ddof [Integer]
790
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
791
+ # where N represents the number of elements.
792
+ #
793
+ # @return [Float, nil]
794
+ #
795
+ # @example
796
+ # s = Polars::Series.new("a", [1, 2, 3])
797
+ # s.var
798
+ # # => 1.0
799
+ def var(ddof: 1)
800
+ if !is_numeric
801
+ nil
802
+ else
803
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
804
+ end
805
+ end
806
+
807
+ # Get the median of this Series.
808
+ #
809
+ # @return [Float, nil]
810
+ #
811
+ # @example
812
+ # s = Polars::Series.new("a", [1, 2, 3])
813
+ # s.median
814
+ # # => 2.0
815
+ def median
816
+ _s.median
817
+ end
818
+
819
+ # Get the quantile value of this Series.
820
+ #
821
+ # @param quantile [Float, nil]
822
+ # Quantile between 0.0 and 1.0.
823
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
824
+ # Interpolation method.
825
+ #
826
+ # @return [Float, nil]
827
+ #
828
+ # @example
829
+ # s = Polars::Series.new("a", [1, 2, 3])
830
+ # s.quantile(0.5)
831
+ # # => 2.0
832
+ def quantile(quantile, interpolation: "nearest")
833
+ _s.quantile(quantile, interpolation)
834
+ end
835
+
836
+ # Get dummy variables.
837
+ #
838
+ # @return [DataFrame]
839
+ #
840
+ # @example
841
+ # s = Polars::Series.new("a", [1, 2, 3])
842
+ # s.to_dummies
843
+ # # =>
844
+ # # shape: (3, 3)
845
+ # # ┌─────┬─────┬─────┐
846
+ # # │ a_1 ┆ a_2 ┆ a_3 │
847
+ # # │ --- ┆ --- ┆ --- │
848
+ # # │ u8 ┆ u8 ┆ u8 │
849
+ # # ╞═════╪═════╪═════╡
850
+ # # │ 1 ┆ 0 ┆ 0 │
851
+ # # │ 0 ┆ 1 ┆ 0 │
852
+ # # │ 0 ┆ 0 ┆ 1 │
853
+ # # └─────┴─────┴─────┘
854
+ def to_dummies(separator: "_", drop_first: false)
855
+ Utils.wrap_df(_s.to_dummies(separator, drop_first))
856
+ end
857
+
858
+ # Bin continuous values into discrete categories.
859
+ #
860
+ # @param breaks [Array]
861
+ # List of unique cut points.
862
+ # @param labels [Array]
863
+ # Names of the categories. The number of labels must be equal to the number
864
+ # of cut points plus one.
865
+ # @param left_closed [Boolean]
866
+ # Set the intervals to be left-closed instead of right-closed.
867
+ # @param include_breaks [Boolean]
868
+ # Include a column with the right endpoint of the bin each observation falls
869
+ # in. This will change the data type of the output from a
870
+ # `Categorical` to a `Struct`.
871
+ #
872
+ # @return [Series]
873
+ #
874
+ # @example Divide the column into three categories.
875
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
876
+ # s.cut([-1, 1], labels: ["a", "b", "c"])
877
+ # # =>
878
+ # # shape: (5,)
879
+ # # Series: 'foo' [cat]
880
+ # # [
881
+ # # "a"
882
+ # # "a"
883
+ # # "b"
884
+ # # "b"
885
+ # # "c"
886
+ # # ]
887
+ #
888
+ # @example Create a DataFrame with the breakpoint and category for each value.
889
+ # cut = s.cut([-1, 1], include_breaks: true).alias("cut")
890
+ # s.to_frame.with_columns(cut).unnest("cut")
891
+ # # =>
892
+ # # shape: (5, 3)
893
+ # # ┌─────┬─────────────┬────────────┐
894
+ # # │ foo ┆ break_point ┆ category │
895
+ # # │ --- ┆ --- ┆ --- │
896
+ # # │ i64 ┆ f64 ┆ cat │
897
+ # # ╞═════╪═════════════╪════════════╡
898
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
899
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
900
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
901
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
902
+ # # │ 2 ┆ inf ┆ (1, inf] │
903
+ # # └─────┴─────────────┴────────────┘
904
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
905
+ result = (
906
+ to_frame
907
+ .select(
908
+ Polars.col(name).cut(
909
+ breaks,
910
+ labels: labels,
911
+ left_closed: left_closed,
912
+ include_breaks: include_breaks
913
+ )
914
+ )
915
+ .to_series
916
+ )
917
+
918
+ if include_breaks
919
+ result = result.struct.rename_fields(["break_point", "category"])
920
+ end
921
+
922
+ result
923
+ end
924
+
925
+ # Bin continuous values into discrete categories based on their quantiles.
926
+ #
927
+ # @param quantiles [Array]
928
+ # Either a list of quantile probabilities between 0 and 1 or a positive
929
+ # integer determining the number of bins with uniform probability.
930
+ # @param labels [Array]
931
+ # Names of the categories. The number of labels must be equal to the number
932
+ # of cut points plus one.
933
+ # @param left_closed [Boolean]
934
+ # Set the intervals to be left-closed instead of right-closed.
935
+ # @param allow_duplicates [Boolean]
936
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
937
+ # rather than raising a `DuplicateError`. This can happen even with unique
938
+ # probabilities, depending on the data.
939
+ # @param include_breaks [Boolean]
940
+ # Include a column with the right endpoint of the bin each observation falls
941
+ # in. This will change the data type of the output from a
942
+ # `Categorical` to a `Struct`.
943
+ #
944
+ # @return [Series]
945
+ #
946
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
947
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
948
+ # s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
949
+ # # =>
950
+ # # shape: (5,)
951
+ # # Series: 'foo' [cat]
952
+ # # [
953
+ # # "a"
954
+ # # "a"
955
+ # # "b"
956
+ # # "b"
957
+ # # "c"
958
+ # # ]
959
+ #
960
+ # @example Divide a column into two categories using uniform quantile probabilities.
961
+ # s.qcut(2, labels: ["low", "high"], left_closed: true)
962
+ # # =>
963
+ # # shape: (5,)
964
+ # # Series: 'foo' [cat]
965
+ # # [
966
+ # # "low"
967
+ # # "low"
968
+ # # "high"
969
+ # # "high"
970
+ # # "high"
971
+ # # ]
972
+ #
973
+ # @example Create a DataFrame with the breakpoint and category for each value.
974
+ # cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
975
+ # s.to_frame.with_columns(cut).unnest("cut")
976
+ # # =>
977
+ # # shape: (5, 3)
978
+ # # ┌─────┬─────────────┬────────────┐
979
+ # # │ foo ┆ break_point ┆ category │
980
+ # # │ --- ┆ --- ┆ --- │
981
+ # # │ i64 ┆ f64 ┆ cat │
982
+ # # ╞═════╪═════════════╪════════════╡
983
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
984
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
985
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
986
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
987
+ # # │ 2 ┆ inf ┆ (1, inf] │
988
+ # # └─────┴─────────────┴────────────┘
989
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
990
+ result = (
991
+ to_frame
992
+ .select(
993
+ Polars.col(name).qcut(
994
+ quantiles,
995
+ labels: labels,
996
+ left_closed: left_closed,
997
+ allow_duplicates: allow_duplicates,
998
+ include_breaks: include_breaks
999
+ )
1000
+ )
1001
+ .to_series
1002
+ )
1003
+
1004
+ if include_breaks
1005
+ result = result.struct.rename_fields(["break_point", "category"])
1006
+ end
1007
+
1008
+ result
1009
+ end
1010
+
1011
+ # Get the lengths of runs of identical values.
1012
+ #
1013
+ # @return [Series]
1014
+ #
1015
+ # @example
1016
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1017
+ # s.rle.struct.unnest
1018
+ # # =>
1019
+ # # shape: (6, 2)
1020
+ # # ┌─────┬───────┐
1021
+ # # │ len ┆ value │
1022
+ # # │ --- ┆ --- │
1023
+ # # │ u32 ┆ i64 │
1024
+ # # ╞═════╪═══════╡
1025
+ # # │ 2 ┆ 1 │
1026
+ # # │ 1 ┆ 2 │
1027
+ # # │ 1 ┆ 1 │
1028
+ # # │ 1 ┆ null │
1029
+ # # │ 1 ┆ 1 │
1030
+ # # │ 2 ┆ 3 │
1031
+ # # └─────┴───────┘
1032
+ def rle
1033
+ super
1034
+ end
1035
+
1036
+ # Map values to run IDs.
1037
+ #
1038
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
1039
+ # which it falls. This is especially useful when you want to define groups by
1040
+ # runs of identical values rather than the values themselves.
1041
+ #
1042
+ # @return [Series]
1043
+ #
1044
+ # @example
1045
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1046
+ # s.rle_id
1047
+ # # =>
1048
+ # # shape: (8,)
1049
+ # # Series: 's' [u32]
1050
+ # # [
1051
+ # # 0
1052
+ # # 0
1053
+ # # 1
1054
+ # # 2
1055
+ # # 3
1056
+ # # 4
1057
+ # # 5
1058
+ # # 5
1059
+ # # ]
1060
+ def rle_id
1061
+ super
1062
+ end
1063
+
1064
+ # Count the unique values in a Series.
1065
+ #
1066
+ # @param sort [Boolean]
1067
+ # Ensure the output is sorted from most values to least.
1068
+ #
1069
+ # @return [DataFrame]
1070
+ #
1071
+ # @example
1072
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1073
+ # s.value_counts.sort("a")
1074
+ # # =>
1075
+ # # shape: (3, 2)
1076
+ # # ┌─────┬────────┐
1077
+ # # │ a ┆ counts │
1078
+ # # │ --- ┆ --- │
1079
+ # # │ i64 ┆ u32 │
1080
+ # # ╞═════╪════════╡
1081
+ # # │ 1 ┆ 1 │
1082
+ # # │ 2 ┆ 2 │
1083
+ # # │ 3 ┆ 1 │
1084
+ # # └─────┴────────┘
1085
+ def value_counts(
1086
+ sort: false,
1087
+ parallel: false,
1088
+ name: nil,
1089
+ normalize: false
1090
+ )
1091
+ if name.nil?
1092
+ if normalize
1093
+ name = "proportion"
1094
+ else
1095
+ name = "count"
1096
+ end
1097
+ end
1098
+ DataFrame._from_rbdf(
1099
+ self._s.value_counts(
1100
+ sort, parallel, name, normalize
1101
+ )
1102
+ )
1103
+ end
1104
+
1105
+ # Return a count of the unique values in the order of appearance.
1106
+ #
1107
+ # @return [Series]
1108
+ #
1109
+ # @example
1110
+ # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
1111
+ # s.unique_counts
1112
+ # # =>
1113
+ # # shape: (3,)
1114
+ # # Series: 'id' [u32]
1115
+ # # [
1116
+ # # 1
1117
+ # # 2
1118
+ # # 3
1119
+ # # ]
1120
+ def unique_counts
1121
+ super
1122
+ end
1123
+
1124
+ # Computes the entropy.
1125
+ #
1126
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
1127
+ #
1128
+ # @param base [Float]
1129
+ # Given base, defaults to `e`
1130
+ # @param normalize [Boolean]
1131
+ # Normalize pk if it doesn't sum to 1.
1132
+ #
1133
+ # @return [Float, nil]
1134
+ #
1135
+ # @example
1136
+ # a = Polars::Series.new([0.99, 0.005, 0.005])
1137
+ # a.entropy(normalize: true)
1138
+ # # => 0.06293300616044681
1139
+ #
1140
+ # @example
1141
+ # b = Polars::Series.new([0.65, 0.10, 0.25])
1142
+ # b.entropy(normalize: true)
1143
+ # # => 0.8568409950394724
1144
+ def entropy(base: Math::E, normalize: false)
1145
+ Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
1146
+ end
1147
+
1148
+ # Run an expression over a sliding window that increases `1` slot every iteration.
1149
+ #
1150
+ # @param expr [Expr]
1151
+ # Expression to evaluate
1152
+ # @param min_periods [Integer]
1153
+ # Number of valid values there should be in the window before the expression
1154
+ # is evaluated. valid values = `length - null_count`
1155
+ # @param parallel [Boolean]
1156
+ # Run in parallel. Don't do this in a group by or another operation that
1157
+ # already has much parallelization.
1158
+ #
1159
+ # @return [Series]
1160
+ #
1161
+ # @note
1162
+ # This functionality is experimental and may change without it being considered a
1163
+ # breaking change.
1164
+ #
1165
+ # @note
1166
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
1167
+ # for operations that visit all elements.
1168
+ #
1169
+ # @example
1170
+ # s = Polars::Series.new("values", [1, 2, 3, 4, 5])
1171
+ # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
1172
+ # # =>
1173
+ # # shape: (5,)
1174
+ # # Series: 'values' [i64]
1175
+ # # [
1176
+ # # 0
1177
+ # # -3
1178
+ # # -8
1179
+ # # -15
1180
+ # # -24
1181
+ # # ]
1182
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
1183
+ super
1184
+ end
1185
+
1186
+ # Return a copy of the Series with a new alias/name.
1187
+ #
1188
+ # @param name [String]
1189
+ # New name.
1190
+ #
1191
+ # @return [Series]
1192
+ #
1193
+ # @example
1194
+ # s = Polars::Series.new("x", [1, 2, 3])
1195
+ # s.alias("y")
1196
+ def alias(name)
1197
+ s = dup
1198
+ s._s.rename(name)
1199
+ s
1200
+ end
1201
+
1202
+ # Rename this Series.
1203
+ #
1204
+ # @param name [String]
1205
+ # New name.
1206
+ # @param in_place [Boolean]
1207
+ # Modify the Series in-place.
1208
+ #
1209
+ # @return [Series]
1210
+ #
1211
+ # @example
1212
+ # s = Polars::Series.new("a", [1, 2, 3])
1213
+ # s.rename("b")
1214
+ def rename(name, in_place: false)
1215
+ if in_place
1216
+ _s.rename(name)
1217
+ self
1218
+ else
1219
+ self.alias(name)
1220
+ end
1221
+ end
1222
+
1223
+ # Get the length of each individual chunk.
1224
+ #
1225
+ # @return [Array]
1226
+ #
1227
+ # @example
1228
+ # s = Polars::Series.new("a", [1, 2, 3])
1229
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1230
+ #
1231
+ # @example Concatenate Series with rechunk: true
1232
+ # Polars.concat([s, s2]).chunk_lengths
1233
+ # # => [6]
1234
+ #
1235
+ # @example Concatenate Series with rechunk: false
1236
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
1237
+ # # => [3, 3]
1238
+ def chunk_lengths
1239
+ _s.chunk_lengths
1240
+ end
1241
+
1242
+ # Get the number of chunks that this Series contains.
1243
+ #
1244
+ # @return [Integer]
1245
+ #
1246
+ # @example
1247
+ # s = Polars::Series.new("a", [1, 2, 3])
1248
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1249
+ #
1250
+ # @example Concatenate Series with rechunk: true
1251
+ # Polars.concat([s, s2]).n_chunks
1252
+ # # => 1
1253
+ #
1254
+ # @example Concatenate Series with rechunk: false
1255
+ # Polars.concat([s, s2], rechunk: false).n_chunks
1256
+ # # => 2
1257
+ def n_chunks
1258
+ _s.n_chunks
1259
+ end
1260
+
1261
+ # Get an array with the cumulative sum computed at every element.
1262
+ #
1263
+ # @param reverse [Boolean]
1264
+ # reverse the operation.
1265
+ #
1266
+ # @return [Series]
1267
+ #
1268
+ # @note
1269
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1270
+ # `:i64` before summing to prevent overflow issues.
1271
+ #
1272
+ # @example
1273
+ # s = Polars::Series.new("a", [1, 2, 3])
1274
+ # s.cum_sum
1275
+ # # =>
1276
+ # # shape: (3,)
1277
+ # # Series: 'a' [i64]
1278
+ # # [
1279
+ # # 1
1280
+ # # 3
1281
+ # # 6
1282
+ # # ]
1283
+ def cum_sum(reverse: false)
1284
+ super
1285
+ end
1286
+ alias_method :cumsum, :cum_sum
1287
+
1288
+ # Get an array with the cumulative min computed at every element.
1289
+ #
1290
+ # @param reverse [Boolean]
1291
+ # reverse the operation.
1292
+ #
1293
+ # @return [Series]
1294
+ #
1295
+ # @example
1296
+ # s = Polars::Series.new("a", [3, 5, 1])
1297
+ # s.cum_min
1298
+ # # =>
1299
+ # # shape: (3,)
1300
+ # # Series: 'a' [i64]
1301
+ # # [
1302
+ # # 3
1303
+ # # 3
1304
+ # # 1
1305
+ # # ]
1306
+ def cum_min(reverse: false)
1307
+ super
1308
+ end
1309
+ alias_method :cummin, :cum_min
1310
+
1311
+ # Get an array with the cumulative max computed at every element.
1312
+ #
1313
+ # @param reverse [Boolean]
1314
+ # reverse the operation.
1315
+ #
1316
+ # @return [Series]
1317
+ #
1318
+ # @example
1319
+ # s = Polars::Series.new("a", [3, 5, 1])
1320
+ # s.cum_max
1321
+ # # =>
1322
+ # # shape: (3,)
1323
+ # # Series: 'a' [i64]
1324
+ # # [
1325
+ # # 3
1326
+ # # 5
1327
+ # # 5
1328
+ # # ]
1329
+ def cum_max(reverse: false)
1330
+ super
1331
+ end
1332
+ alias_method :cummax, :cum_max
1333
+
1334
+ # Get an array with the cumulative product computed at every element.
1335
+ #
1336
+ # @param reverse [Boolean]
1337
+ # reverse the operation.
1338
+ #
1339
+ # @return [Series]
1340
+ #
1341
+ # @note
1342
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1343
+ # `:i64` before multiplying to prevent overflow issues.
1344
+ #
1345
+ # @example
1346
+ # s = Polars::Series.new("a", [1, 2, 3])
1347
+ # s.cum_prod
1348
+ # # =>
1349
+ # # shape: (3,)
1350
+ # # Series: 'a' [i64]
1351
+ # # [
1352
+ # # 1
1353
+ # # 2
1354
+ # # 6
1355
+ # # ]
1356
+ def cum_prod(reverse: false)
1357
+ super
1358
+ end
1359
+ alias_method :cumprod, :cum_prod
1360
+
1361
+ # Get the first `n` rows.
1362
+ #
1363
+ # Alias for {#head}.
1364
+ #
1365
+ # @param n [Integer]
1366
+ # Number of rows to return.
1367
+ #
1368
+ # @return [Series]
1369
+ #
1370
+ # @example
1371
+ # s = Polars::Series.new("a", [1, 2, 3])
1372
+ # s.limit(2)
1373
+ # # =>
1374
+ # # shape: (2,)
1375
+ # # Series: 'a' [i64]
1376
+ # # [
1377
+ # # 1
1378
+ # # 2
1379
+ # # ]
1380
+ def limit(n = 10)
1381
+ to_frame.select(F.col(name).limit(n)).to_series
1382
+ end
1383
+
1384
+ # Get a slice of this Series.
1385
+ #
1386
+ # @param offset [Integer]
1387
+ # Start index. Negative indexing is supported.
1388
+ # @param length [Integer, nil]
1389
+ # Length of the slice. If set to `nil`, all rows starting at the offset
1390
+ # will be selected.
1391
+ #
1392
+ # @return [Series]
1393
+ #
1394
+ # @example
1395
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1396
+ # s.slice(1, 2)
1397
+ # # =>
1398
+ # # shape: (2,)
1399
+ # # Series: 'a' [i64]
1400
+ # # [
1401
+ # # 2
1402
+ # # 3
1403
+ # # ]
1404
+ def slice(offset, length = nil)
1405
+ self.class._from_rbseries(_s.slice(offset, length))
1406
+ end
1407
+
1408
+ # Append a Series to this one.
1409
+ #
1410
+ # @param other [Series]
1411
+ # Series to append.
1412
+ # @param append_chunks [Boolean]
1413
+ # If set to `true` the append operation will add the chunks from `other` to
1414
+ # self. This is super cheap.
1415
+ #
1416
+ # If set to `false` the append operation will do the same as
1417
+ # {DataFrame#extend} which extends the memory backed by this Series with
1418
+ # the values from `other`.
1419
+ #
1420
+ # Different from `append_chunks`, `extend` appends the data from `other` to
1421
+ # the underlying memory locations and thus may cause a reallocation (which is
1422
+ # expensive).
1423
+ #
1424
+ # If this does not cause a reallocation, the resulting data structure will not
1425
+ # have any extra chunks and thus will yield faster queries.
1426
+ #
1427
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
1428
+ # single append. For instance during online operations where you add `n` rows
1429
+ # and rerun a query.
1430
+ #
1431
+ # Prefer `append_chunks` over `extend` when you want to append many times
1432
+ # before doing a query. For instance, when you read in multiple files and when
1433
+ # to store them in a single Series. In the latter case, finish the sequence
1434
+ # of `append_chunks` operations with a `rechunk`.
1435
+ #
1436
+ # @return [Series]
1437
+ #
1438
+ # @example
1439
+ # s = Polars::Series.new("a", [1, 2, 3])
1440
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1441
+ # s.append(s2)
1442
+ # # =>
1443
+ # # shape: (6,)
1444
+ # # Series: 'a' [i64]
1445
+ # # [
1446
+ # # 1
1447
+ # # 2
1448
+ # # 3
1449
+ # # 4
1450
+ # # 5
1451
+ # # 6
1452
+ # # ]
1453
+ def append(other, append_chunks: true)
1454
+ begin
1455
+ if append_chunks
1456
+ _s.append(other._s)
1457
+ else
1458
+ _s.extend(other._s)
1459
+ end
1460
+ rescue => e
1461
+ if e.message == "Already mutably borrowed"
1462
+ append(other.clone, append_chunks)
1463
+ else
1464
+ raise e
1465
+ end
1466
+ end
1467
+ self
1468
+ end
1469
+
1470
+ # Filter elements by a boolean mask.
1471
+ #
1472
+ # @param predicate [Series, Array]
1473
+ # Boolean mask.
1474
+ #
1475
+ # @return [Series]
1476
+ #
1477
+ # @example
1478
+ # s = Polars::Series.new("a", [1, 2, 3])
1479
+ # mask = Polars::Series.new("", [true, false, true])
1480
+ # s.filter(mask)
1481
+ # # =>
1482
+ # # shape: (2,)
1483
+ # # Series: 'a' [i64]
1484
+ # # [
1485
+ # # 1
1486
+ # # 3
1487
+ # # ]
1488
+ def filter(predicate)
1489
+ if predicate.is_a?(::Array)
1490
+ predicate = Series.new("", predicate)
1491
+ end
1492
+ Utils.wrap_s(_s.filter(predicate._s))
1493
+ end
1494
+
1495
+ # Get the first `n` rows.
1496
+ #
1497
+ # @param n [Integer]
1498
+ # Number of rows to return.
1499
+ #
1500
+ # @return [Series]
1501
+ #
1502
+ # @example
1503
+ # s = Polars::Series.new("a", [1, 2, 3])
1504
+ # s.head(2)
1505
+ # # =>
1506
+ # # shape: (2,)
1507
+ # # Series: 'a' [i64]
1508
+ # # [
1509
+ # # 1
1510
+ # # 2
1511
+ # # ]
1512
+ def head(n = 10)
1513
+ to_frame.select(F.col(name).head(n)).to_series
1514
+ end
1515
+
1516
+ # Get the last `n` rows.
1517
+ #
1518
+ # @param n [Integer]
1519
+ # Number of rows to return.
1520
+ #
1521
+ # @return [Series]
1522
+ #
1523
+ # @example
1524
+ # s = Polars::Series.new("a", [1, 2, 3])
1525
+ # s.tail(2)
1526
+ # # =>
1527
+ # # shape: (2,)
1528
+ # # Series: 'a' [i64]
1529
+ # # [
1530
+ # # 2
1531
+ # # 3
1532
+ # # ]
1533
+ def tail(n = 10)
1534
+ to_frame.select(F.col(name).tail(n)).to_series
1535
+ end
1536
+
1537
+ # Take every nth value in the Series and return as new Series.
1538
+ #
1539
+ # @return [Series]
1540
+ #
1541
+ # @example
1542
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1543
+ # s.take_every(2)
1544
+ # # =>
1545
+ # # shape: (2,)
1546
+ # # Series: 'a' [i64]
1547
+ # # [
1548
+ # # 1
1549
+ # # 3
1550
+ # # ]
1551
+ def take_every(n)
1552
+ super
1553
+ end
1554
+
1555
+ # Sort this Series.
1556
+ #
1557
+ # @param reverse [Boolean]
1558
+ # Reverse sort.
1559
+ # @param in_place [Boolean]
1560
+ # Sort in place.
1561
+ #
1562
+ # @return [Series]
1563
+ #
1564
+ # @example
1565
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
1566
+ # s.sort
1567
+ # # =>
1568
+ # # shape: (4,)
1569
+ # # Series: 'a' [i64]
1570
+ # # [
1571
+ # # 1
1572
+ # # 2
1573
+ # # 3
1574
+ # # 4
1575
+ # # ]
1576
+ # s.sort(reverse: true)
1577
+ # # =>
1578
+ # # shape: (4,)
1579
+ # # Series: 'a' [i64]
1580
+ # # [
1581
+ # # 4
1582
+ # # 3
1583
+ # # 2
1584
+ # # 1
1585
+ # # ]
1586
+ def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
1587
+ if in_place
1588
+ self._s = _s.sort(reverse, nulls_last, multithreaded)
1589
+ self
1590
+ else
1591
+ Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
1592
+ end
1593
+ end
1594
+
1595
+ # Return the `k` largest elements.
1596
+ #
1597
+ # @param k [Integer]
1598
+ # Number of elements to return.
1599
+ #
1600
+ # @return [Boolean]
1601
+ #
1602
+ # @example
1603
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1604
+ # s.top_k(k: 3)
1605
+ # # =>
1606
+ # # shape: (3,)
1607
+ # # Series: 'a' [i64]
1608
+ # # [
1609
+ # # 5
1610
+ # # 4
1611
+ # # 3
1612
+ # # ]
1613
+ def top_k(k: 5)
1614
+ super
1615
+ end
1616
+
1617
+ # Return the `k` smallest elements.
1618
+ #
1619
+ # @param k [Integer]
1620
+ # Number of elements to return.
1621
+ #
1622
+ # @return [Boolean]
1623
+ #
1624
+ # @example
1625
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1626
+ # s.bottom_k(k: 3)
1627
+ # # =>
1628
+ # # shape: (3,)
1629
+ # # Series: 'a' [i64]
1630
+ # # [
1631
+ # # 1
1632
+ # # 2
1633
+ # # 3
1634
+ # # ]
1635
+ def bottom_k(k: 5)
1636
+ super
1637
+ end
1638
+
1639
+ # Get the index values that would sort this Series.
1640
+ #
1641
+ # @param reverse [Boolean]
1642
+ # Sort in reverse (descending) order.
1643
+ # @param nulls_last [Boolean]
1644
+ # Place null values last instead of first.
1645
+ #
1646
+ # @return [Series]
1647
+ #
1648
+ # @example
1649
+ # s = Polars::Series.new("a", [5, 3, 4, 1, 2])
1650
+ # s.arg_sort
1651
+ # # =>
1652
+ # # shape: (5,)
1653
+ # # Series: 'a' [u32]
1654
+ # # [
1655
+ # # 3
1656
+ # # 4
1657
+ # # 1
1658
+ # # 2
1659
+ # # 0
1660
+ # # ]
1661
+ def arg_sort(reverse: false, nulls_last: false)
1662
+ super
1663
+ end
1664
+
1665
+ # Get the index values that would sort this Series.
1666
+ #
1667
+ # Alias for {#arg_sort}.
1668
+ #
1669
+ # @param reverse [Boolean]
1670
+ # Sort in reverse (descending) order.
1671
+ # @param nulls_last [Boolean]
1672
+ # Place null values last instead of first.
1673
+ #
1674
+ # @return [Series]
1675
+ def argsort(reverse: false, nulls_last: false)
1676
+ super
1677
+ end
1678
+
1679
+ # Get unique index as Series.
1680
+ #
1681
+ # @return [Series]
1682
+ #
1683
+ # @example
1684
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1685
+ # s.arg_unique
1686
+ # # =>
1687
+ # # shape: (3,)
1688
+ # # Series: 'a' [u32]
1689
+ # # [
1690
+ # # 0
1691
+ # # 1
1692
+ # # 3
1693
+ # # ]
1694
+ def arg_unique
1695
+ super
1696
+ end
1697
+
1698
+ # Get the index of the minimal value.
1699
+ #
1700
+ # @return [Integer, nil]
1701
+ #
1702
+ # @example
1703
+ # s = Polars::Series.new("a", [3, 2, 1])
1704
+ # s.arg_min
1705
+ # # => 2
1706
+ def arg_min
1707
+ _s.arg_min
1708
+ end
1709
+
1710
+ # Get the index of the maximal value.
1711
+ #
1712
+ # @return [Integer, nil]
1713
+ #
1714
+ # @example
1715
+ # s = Polars::Series.new("a", [3, 2, 1])
1716
+ # s.arg_max
1717
+ # # => 0
1718
+ def arg_max
1719
+ _s.arg_max
1720
+ end
1721
+
1722
+ # Find indices where elements should be inserted to maintain order.
1723
+ #
1724
+ # @param element [Object]
1725
+ # Expression or scalar value.
1726
+ #
1727
+ # @return [Integer]
1728
+ def search_sorted(element, side: "any")
1729
+ if element.is_a?(Integer) || element.is_a?(Float)
1730
+ return Polars.select(Polars.lit(self).search_sorted(element, side: side)).item
1731
+ end
1732
+ element = Series.new(element)
1733
+ Polars.select(Polars.lit(self).search_sorted(element, side: side)).to_series
1734
+ end
1735
+
1736
+ # Get unique elements in series.
1737
+ #
1738
+ # @param maintain_order [Boolean]
1739
+ # Maintain order of data. This requires more work.
1740
+ #
1741
+ # @return [Series]
1742
+ #
1743
+ # @example
1744
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1745
+ # s.unique.sort
1746
+ # # =>
1747
+ # # shape: (3,)
1748
+ # # Series: 'a' [i64]
1749
+ # # [
1750
+ # # 1
1751
+ # # 2
1752
+ # # 3
1753
+ # # ]
1754
+ def unique(maintain_order: false)
1755
+ super
1756
+ end
1757
+ alias_method :uniq, :unique
1758
+
1759
+ # Take values by index.
1760
+ #
1761
+ # @param indices [Array]
1762
+ # Index location used for selection.
1763
+ #
1764
+ # @return [Series]
1765
+ #
1766
+ # @example
1767
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1768
+ # s.take([1, 3])
1769
+ # # =>
1770
+ # # shape: (2,)
1771
+ # # Series: 'a' [i64]
1772
+ # # [
1773
+ # # 2
1774
+ # # 4
1775
+ # # ]
1776
+ def take(indices)
1777
+ to_frame.select(Polars.col(name).take(indices)).to_series
1778
+ end
1779
+
1780
+ # Count the null values in this Series.
1781
+ #
1782
+ # @return [Integer]
1783
+ def null_count
1784
+ _s.null_count
1785
+ end
1786
+
1787
+ # Return `true` if the Series has a validity bitmask.
1788
+ #
1789
+ # If there is none, it means that there are no null values.
1790
+ # Use this to swiftly assert a Series does not have null values.
1791
+ #
1792
+ # @return [Boolean]
1793
+ def has_nulls
1794
+ _s.has_nulls
1795
+ end
1796
+ alias_method :has_validity, :has_nulls
1797
+
1798
+ # Check if the Series is empty.
1799
+ #
1800
+ # @return [Boolean]
1801
+ #
1802
+ # @example
1803
+ # s = Polars::Series.new("a", [])
1804
+ # s.is_empty
1805
+ # # => true
1806
+ def is_empty
1807
+ len == 0
1808
+ end
1809
+ alias_method :empty?, :is_empty
1810
+
1811
+ # Returns a boolean Series indicating which values are null.
1812
+ #
1813
+ # @return [Series]
1814
+ #
1815
+ # @example
1816
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1817
+ # s.is_null
1818
+ # # =>
1819
+ # # shape: (4,)
1820
+ # # Series: 'a' [bool]
1821
+ # # [
1822
+ # # false
1823
+ # # false
1824
+ # # false
1825
+ # # true
1826
+ # # ]
1827
+ def is_null
1828
+ super
1829
+ end
1830
+
1831
+ # Returns a boolean Series indicating which values are not null.
1832
+ #
1833
+ # @return [Series]
1834
+ #
1835
+ # @example
1836
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1837
+ # s.is_not_null
1838
+ # # =>
1839
+ # # shape: (4,)
1840
+ # # Series: 'a' [bool]
1841
+ # # [
1842
+ # # true
1843
+ # # true
1844
+ # # true
1845
+ # # false
1846
+ # # ]
1847
+ def is_not_null
1848
+ super
1849
+ end
1850
+
1851
+ # Returns a boolean Series indicating which values are finite.
1852
+ #
1853
+ # @return [Series]
1854
+ #
1855
+ # @example
1856
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1857
+ # s.is_finite
1858
+ # # =>
1859
+ # # shape: (3,)
1860
+ # # Series: 'a' [bool]
1861
+ # # [
1862
+ # # true
1863
+ # # true
1864
+ # # false
1865
+ # # ]
1866
+ def is_finite
1867
+ super
1868
+ end
1869
+
1870
+ # Returns a boolean Series indicating which values are infinite.
1871
+ #
1872
+ # @return [Series]
1873
+ #
1874
+ # @example
1875
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1876
+ # s.is_infinite
1877
+ # # =>
1878
+ # # shape: (3,)
1879
+ # # Series: 'a' [bool]
1880
+ # # [
1881
+ # # false
1882
+ # # false
1883
+ # # true
1884
+ # # ]
1885
+ def is_infinite
1886
+ super
1887
+ end
1888
+
1889
+ # Returns a boolean Series indicating which values are NaN.
1890
+ #
1891
+ # @return [Series]
1892
+ #
1893
+ # @example
1894
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1895
+ # s.is_nan
1896
+ # # =>
1897
+ # # shape: (4,)
1898
+ # # Series: 'a' [bool]
1899
+ # # [
1900
+ # # false
1901
+ # # false
1902
+ # # false
1903
+ # # true
1904
+ # # ]
1905
+ def is_nan
1906
+ super
1907
+ end
1908
+
1909
+ # Returns a boolean Series indicating which values are not NaN.
1910
+ #
1911
+ # @return [Series]
1912
+ #
1913
+ # @example
1914
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1915
+ # s.is_not_nan
1916
+ # # =>
1917
+ # # shape: (4,)
1918
+ # # Series: 'a' [bool]
1919
+ # # [
1920
+ # # true
1921
+ # # true
1922
+ # # true
1923
+ # # false
1924
+ # # ]
1925
+ def is_not_nan
1926
+ super
1927
+ end
1928
+
1929
+ # Check if elements of this Series are in the other Series.
1930
+ #
1931
+ # @return [Series]
1932
+ #
1933
+ # @example
1934
+ # s = Polars::Series.new("a", [1, 2, 3])
1935
+ # s2 = Polars::Series.new("b", [2, 4])
1936
+ # s2.is_in(s)
1937
+ # # =>
1938
+ # # shape: (2,)
1939
+ # # Series: 'b' [bool]
1940
+ # # [
1941
+ # # true
1942
+ # # false
1943
+ # # ]
1944
+ #
1945
+ # @example
1946
+ # sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
1947
+ # # =>
1948
+ # # shape: (3,)
1949
+ # # Series: 'sets' [list[i64]]
1950
+ # # [
1951
+ # # [1, 2, 3]
1952
+ # # [1, 2]
1953
+ # # [9, 10]
1954
+ # # ]
1955
+ #
1956
+ # @example
1957
+ # optional_members = Polars::Series.new("optional_members", [1, 2, 3])
1958
+ # # =>
1959
+ # # shape: (3,)
1960
+ # # Series: 'optional_members' [i64]
1961
+ # # [
1962
+ # # 1
1963
+ # # 2
1964
+ # # 3
1965
+ # # ]
1966
+ #
1967
+ # @example
1968
+ # optional_members.is_in(sets)
1969
+ # # =>
1970
+ # # shape: (3,)
1971
+ # # Series: 'optional_members' [bool]
1972
+ # # [
1973
+ # # true
1974
+ # # true
1975
+ # # false
1976
+ # # ]
1977
+ def is_in(other)
1978
+ super
1979
+ end
1980
+ alias_method :in?, :is_in
1981
+
1982
+ # Get index values where Boolean Series evaluate `true`.
1983
+ #
1984
+ # @return [Series]
1985
+ #
1986
+ # @example
1987
+ # s = Polars::Series.new("a", [1, 2, 3])
1988
+ # (s == 2).arg_true
1989
+ # # =>
1990
+ # # shape: (1,)
1991
+ # # Series: 'a' [u32]
1992
+ # # [
1993
+ # # 1
1994
+ # # ]
1995
+ def arg_true
1996
+ Polars.arg_where(self, eager: true)
1997
+ end
1998
+
1999
+ # Get mask of all unique values.
2000
+ #
2001
+ # @return [Series]
2002
+ #
2003
+ # @example
2004
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2005
+ # s.is_unique
2006
+ # # =>
2007
+ # # shape: (4,)
2008
+ # # Series: 'a' [bool]
2009
+ # # [
2010
+ # # true
2011
+ # # false
2012
+ # # false
2013
+ # # true
2014
+ # # ]
2015
+ def is_unique
2016
+ super
2017
+ end
2018
+
2019
+ # Get a mask of the first unique value.
2020
+ #
2021
+ # @return [Series]
2022
+ def is_first
2023
+ super
2024
+ end
2025
+
2026
+ # Get mask of all duplicated values.
2027
+ #
2028
+ # @return [Series]
2029
+ #
2030
+ # @example
2031
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2032
+ # s.is_duplicated
2033
+ # # =>
2034
+ # # shape: (4,)
2035
+ # # Series: 'a' [bool]
2036
+ # # [
2037
+ # # false
2038
+ # # true
2039
+ # # true
2040
+ # # false
2041
+ # # ]
2042
+ def is_duplicated
2043
+ super
2044
+ end
2045
+
2046
+ # Explode a list or utf8 Series.
2047
+ #
2048
+ # This means that every item is expanded to a new row.
2049
+ #
2050
+ # @return [Series]
2051
+ #
2052
+ # @example
2053
+ # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
2054
+ # s.explode
2055
+ # # =>
2056
+ # # shape: (6,)
2057
+ # # Series: 'a' [i64]
2058
+ # # [
2059
+ # # 1
2060
+ # # 2
2061
+ # # 3
2062
+ # # 4
2063
+ # # 9
2064
+ # # 10
2065
+ # # ]
2066
+ def explode
2067
+ super
2068
+ end
2069
+
2070
+ # Check if series is equal with another Series.
2071
+ #
2072
+ # @param other [Series]
2073
+ # Series to compare with.
2074
+ # @param strict [Boolean]
2075
+ # Require data types to match.
2076
+ # @param check_names [Boolean]
2077
+ # Require names to match.
2078
+ # @param null_equal [Boolean]
2079
+ # Consider null values as equal.
2080
+ #
2081
+ # @return [Boolean]
2082
+ #
2083
+ # @example
2084
+ # s = Polars::Series.new("a", [1, 2, 3])
2085
+ # s2 = Polars::Series.new("b", [4, 5, 6])
2086
+ # s.equals(s)
2087
+ # # => true
2088
+ # s.equals(s2)
2089
+ # # => false
2090
+ def equals(other, strict: false, check_names: false, null_equal: false)
2091
+ _s.equals(other._s, strict, check_names, null_equal)
2092
+ end
2093
+ alias_method :series_equal, :equals
2094
+
2095
+ # Return the number of elements in the Series.
2096
+ #
2097
+ # @return [Integer]
2098
+ #
2099
+ # @example
2100
+ # s = Polars::Series.new("a", [1, 2, nil])
2101
+ # s.count
2102
+ # # => 2
2103
+ def count
2104
+ len - null_count
2105
+ end
2106
+
2107
+ # Return the number of elements in the Series.
2108
+ #
2109
+ # @return [Integer]
2110
+ #
2111
+ # @example
2112
+ # s = Polars::Series.new("a", [1, 2, nil])
2113
+ # s.len
2114
+ # # => 3
2115
+ def len
2116
+ _s.len
2117
+ end
2118
+ alias_method :length, :len
2119
+ alias_method :size, :len
2120
+
2121
+ # Cast between data types.
2122
+ #
2123
+ # @param dtype [Symbol]
2124
+ # DataType to cast to
2125
+ # @param strict [Boolean]
2126
+ # Throw an error if a cast could not be done for instance due to an overflow
2127
+ #
2128
+ # @return [Series]
2129
+ #
2130
+ # @example
2131
+ # s = Polars::Series.new("a", [true, false, true])
2132
+ # s.cast(:u32)
2133
+ # # =>
2134
+ # # shape: (3,)
2135
+ # # Series: 'a' [u32]
2136
+ # # [
2137
+ # # 1
2138
+ # # 0
2139
+ # # 1
2140
+ # # ]
2141
+ def cast(dtype, strict: true)
2142
+ super
2143
+ end
2144
+
2145
+ # Cast to physical representation of the logical dtype.
2146
+ #
2147
+ # - `:date` -> `:i32`
2148
+ # - `:datetime` -> `:i64`
2149
+ # - `:time` -> `:i64`
2150
+ # - `:duration` -> `:i64`
2151
+ # - `:cat` -> `:u32`
2152
+ # - other data types will be left unchanged.
2153
+ #
2154
+ # @return [Series]
2155
+ #
2156
+ # @example
2157
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
2158
+ # s.cast(:cat).to_physical
2159
+ # # =>
2160
+ # # shape: (4,)
2161
+ # # Series: 'values' [u32]
2162
+ # # [
2163
+ # # 0
2164
+ # # null
2165
+ # # 1
2166
+ # # 0
2167
+ # # ]
2168
+ def to_physical
2169
+ super
2170
+ end
2171
+
2172
+ # Convert this Series to a Ruby Array. This operation clones data.
2173
+ #
2174
+ # @return [Array]
2175
+ #
2176
+ # @example
2177
+ # s = Polars::Series.new("a", [1, 2, 3])
2178
+ # s.to_a
2179
+ # # => [1, 2, 3]
2180
+ def to_a
2181
+ _s.to_a
2182
+ end
2183
+
2184
+ # Create a single chunk of memory for this Series.
2185
+ #
2186
+ # @param in_place [Boolean]
2187
+ # In place or not.
2188
+ #
2189
+ # @return [Series]
2190
+ def rechunk(in_place: false)
2191
+ opt_s = _s.rechunk(in_place)
2192
+ in_place ? self : Utils.wrap_s(opt_s)
2193
+ end
2194
+
2195
+ # Return Series in reverse order.
2196
+ #
2197
+ # @return [Series]
2198
+ #
2199
+ # @example
2200
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
2201
+ # s.reverse
2202
+ # # =>
2203
+ # # shape: (3,)
2204
+ # # Series: 'a' [i8]
2205
+ # # [
2206
+ # # 3
2207
+ # # 2
2208
+ # # 1
2209
+ # # ]
2210
+ def reverse
2211
+ super
2212
+ end
2213
+
2214
+ # Check if this Series datatype is numeric.
2215
+ #
2216
+ # @return [Boolean]
2217
+ #
2218
+ # @example
2219
+ # s = Polars::Series.new("a", [1, 2, 3])
2220
+ # s.is_numeric
2221
+ # # => true
2222
+ def is_numeric
2223
+ [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
2224
+ end
2225
+ alias_method :numeric?, :is_numeric
2226
+
2227
+ # Check if this Series datatype is datelike.
2228
+ #
2229
+ # @return [Boolean]
2230
+ #
2231
+ # @example
2232
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
2233
+ # s.is_datelike
2234
+ # # => true
2235
+ def is_datelike
2236
+ [Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
2237
+ end
2238
+ alias_method :datelike?, :is_datelike
2239
+ alias_method :is_temporal, :is_datelike
2240
+ alias_method :temporal?, :is_datelike
2241
+
2242
+ # Check if this Series has floating point numbers.
2243
+ #
2244
+ # @return [Boolean]
2245
+ #
2246
+ # @example
2247
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
2248
+ # s.is_float
2249
+ # # => true
2250
+ def is_float
2251
+ [Float32, Float64].include?(dtype)
2252
+ end
2253
+ alias_method :float?, :is_float
2254
+
2255
+ # Check if this Series is a Boolean.
2256
+ #
2257
+ # @return [Boolean]
2258
+ #
2259
+ # @example
2260
+ # s = Polars::Series.new("a", [true, false, true])
2261
+ # s.is_boolean
2262
+ # # => true
2263
+ def is_boolean
2264
+ dtype == Boolean
2265
+ end
2266
+ alias_method :boolean?, :is_boolean
2267
+ alias_method :is_bool, :is_boolean
2268
+ alias_method :bool?, :is_boolean
2269
+
2270
+ # Check if this Series datatype is a Utf8.
2271
+ #
2272
+ # @return [Boolean]
2273
+ #
2274
+ # @example
2275
+ # s = Polars::Series.new("x", ["a", "b", "c"])
2276
+ # s.is_utf8
2277
+ # # => true
2278
+ def is_utf8
2279
+ dtype == String
2280
+ end
2281
+ alias_method :utf8?, :is_utf8
2282
+
2283
+ # def view
2284
+ # end
2285
+
2286
+ # Convert this Series to a Numo array. This operation clones data but is completely safe.
2287
+ #
2288
+ # @return [Numo::NArray]
2289
+ #
2290
+ # @example
2291
+ # s = Polars::Series.new("a", [1, 2, 3])
2292
+ # s.to_numo
2293
+ # # =>
2294
+ # # Numo::Int64#shape=[3]
2295
+ # # [1, 2, 3]
2296
+ def to_numo
2297
+ if !has_validity
2298
+ if is_datelike
2299
+ Numo::RObject.cast(to_a)
2300
+ elsif is_numeric
2301
+ # TODO make more efficient
2302
+ {
2303
+ UInt8 => Numo::UInt8,
2304
+ UInt16 => Numo::UInt16,
2305
+ UInt32 => Numo::UInt32,
2306
+ UInt64 => Numo::UInt64,
2307
+ Int8 => Numo::Int8,
2308
+ Int16 => Numo::Int16,
2309
+ Int32 => Numo::Int32,
2310
+ Int64 => Numo::Int64,
2311
+ Float32 => Numo::SFloat,
2312
+ Float64 => Numo::DFloat
2313
+ }.fetch(dtype.class).cast(to_a)
2314
+ elsif is_boolean
2315
+ Numo::Bit.cast(to_a)
2316
+ else
2317
+ _s.to_numo
2318
+ end
2319
+ elsif is_datelike
2320
+ Numo::RObject.cast(to_a)
2321
+ else
2322
+ _s.to_numo
2323
+ end
2324
+ end
2325
+
2326
+ # Set masked values.
2327
+ #
2328
+ # @param filter [Series]
2329
+ # Boolean mask.
2330
+ # @param value [Object]
2331
+ # Value with which to replace the masked values.
2332
+ #
2333
+ # @return [Series]
2334
+ #
2335
+ # @note
2336
+ # Use of this function is frequently an anti-pattern, as it can
2337
+ # block optimization (predicate pushdown, etc). Consider using
2338
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
2339
+ #
2340
+ # @example
2341
+ # s = Polars::Series.new("a", [1, 2, 3])
2342
+ # s.set(s == 2, 10)
2343
+ # # =>
2344
+ # # shape: (3,)
2345
+ # # Series: 'a' [i64]
2346
+ # # [
2347
+ # # 1
2348
+ # # 10
2349
+ # # 3
2350
+ # # ]
2351
+ def set(filter, value)
2352
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype.class)}", filter._s, value))
2353
+ end
2354
+
2355
+ # Set values at the index locations.
2356
+ #
2357
+ # @param idx [Object]
2358
+ # Integers representing the index locations.
2359
+ # @param value [Object]
2360
+ # Replacement values.
2361
+ #
2362
+ # @return [Series]
2363
+ #
2364
+ # @example
2365
+ # s = Polars::Series.new("a", [1, 2, 3])
2366
+ # s.set_at_idx(1, 10)
2367
+ # # =>
2368
+ # # shape: (3,)
2369
+ # # Series: 'a' [i64]
2370
+ # # [
2371
+ # # 1
2372
+ # # 10
2373
+ # # 3
2374
+ # # ]
2375
+ def scatter(idx, value)
2376
+ if idx.is_a?(Integer)
2377
+ idx = [idx]
2378
+ end
2379
+ if idx.length == 0
2380
+ return self
2381
+ end
2382
+
2383
+ idx = Series.new("", idx)
2384
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
2385
+ value = Series.new("", [value])
2386
+
2387
+ # if we need to set more than a single value, we extend it
2388
+ if idx.length > 0
2389
+ value = value.extend_constant(value[0], idx.length - 1)
2390
+ end
2391
+ elsif !value.is_a?(Series)
2392
+ value = Series.new("", value)
2393
+ end
2394
+ _s.scatter(idx._s, value._s)
2395
+ self
2396
+ end
2397
+ alias_method :set_at_idx, :scatter
2398
+
2399
+ # Create an empty copy of the current Series.
2400
+ #
2401
+ # The copy has identical name/dtype but no data.
2402
+ #
2403
+ # @return [Series]
2404
+ #
2405
+ # @example
2406
+ # s = Polars::Series.new("a", [nil, true, false])
2407
+ # s.cleared
2408
+ # # =>
2409
+ # # shape: (0,)
2410
+ # # Series: 'a' [bool]
2411
+ # # [
2412
+ # # ]
2413
+ def cleared
2414
+ len > 0 ? limit(0) : clone
2415
+ end
2416
+
2417
+ # clone handled by initialize_copy
2418
+
2419
+ # Fill floating point NaN value with a fill value.
2420
+ #
2421
+ # @param fill_value [Object]
2422
+ # Value used to fill nan values.
2423
+ #
2424
+ # @return [Series]
2425
+ #
2426
+ # @example
2427
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
2428
+ # s.fill_nan(0)
2429
+ # # =>
2430
+ # # shape: (4,)
2431
+ # # Series: 'a' [f64]
2432
+ # # [
2433
+ # # 1.0
2434
+ # # 2.0
2435
+ # # 3.0
2436
+ # # 0.0
2437
+ # # ]
2438
+ def fill_nan(fill_value)
2439
+ super
2440
+ end
2441
+
2442
+ # Fill null values using the specified value or strategy.
2443
+ #
2444
+ # @param value [Object]
2445
+ # Value used to fill null values.
2446
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2447
+ # Strategy used to fill null values.
2448
+ # @param limit
2449
+ # Number of consecutive null values to fill when using the "forward" or
2450
+ # "backward" strategy.
2451
+ #
2452
+ # @return [Series]
2453
+ #
2454
+ # @example
2455
+ # s = Polars::Series.new("a", [1, 2, 3, nil])
2456
+ # s.fill_null(strategy: "forward")
2457
+ # # =>
2458
+ # # shape: (4,)
2459
+ # # Series: 'a' [i64]
2460
+ # # [
2461
+ # # 1
2462
+ # # 2
2463
+ # # 3
2464
+ # # 3
2465
+ # # ]
2466
+ #
2467
+ # @example
2468
+ # s.fill_null(strategy: "min")
2469
+ # # =>
2470
+ # # shape: (4,)
2471
+ # # Series: 'a' [i64]
2472
+ # # [
2473
+ # # 1
2474
+ # # 2
2475
+ # # 3
2476
+ # # 1
2477
+ # # ]
2478
+ #
2479
+ # @example
2480
+ # s = Polars::Series.new("b", ["x", nil, "z"])
2481
+ # s.fill_null(Polars.lit(""))
2482
+ # # =>
2483
+ # # shape: (3,)
2484
+ # # Series: 'b' [str]
2485
+ # # [
2486
+ # # "x"
2487
+ # # ""
2488
+ # # "z"
2489
+ # # ]
2490
+ def fill_null(value = nil, strategy: nil, limit: nil)
2491
+ super
2492
+ end
2493
+
2494
+ # Rounds down to the nearest integer value.
2495
+ #
2496
+ # Only works on floating point Series.
2497
+ #
2498
+ # @return [Series]
2499
+ #
2500
+ # @example
2501
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2502
+ # s.floor
2503
+ # # =>
2504
+ # # shape: (3,)
2505
+ # # Series: 'a' [f64]
2506
+ # # [
2507
+ # # 1.0
2508
+ # # 2.0
2509
+ # # 3.0
2510
+ # # ]
2511
+ def floor
2512
+ Utils.wrap_s(_s.floor)
2513
+ end
2514
+
2515
+ # Rounds up to the nearest integer value.
2516
+ #
2517
+ # Only works on floating point Series.
2518
+ #
2519
+ # @return [Series]
2520
+ #
2521
+ # @example
2522
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2523
+ # s.ceil
2524
+ # # =>
2525
+ # # shape: (3,)
2526
+ # # Series: 'a' [f64]
2527
+ # # [
2528
+ # # 2.0
2529
+ # # 3.0
2530
+ # # 4.0
2531
+ # # ]
2532
+ def ceil
2533
+ super
2534
+ end
2535
+
2536
+ # Round underlying floating point data by `decimals` digits.
2537
+ #
2538
+ # @param decimals [Integer]
2539
+ # number of decimals to round by.
2540
+ #
2541
+ # @return [Series]
2542
+ #
2543
+ # @example
2544
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2545
+ # s.round(2)
2546
+ # # =>
2547
+ # # shape: (3,)
2548
+ # # Series: 'a' [f64]
2549
+ # # [
2550
+ # # 1.12
2551
+ # # 2.57
2552
+ # # 3.9
2553
+ # # ]
2554
+ def round(decimals = 0)
2555
+ super
2556
+ end
2557
+
2558
+ # Compute the dot/inner product between two Series.
2559
+ #
2560
+ # @param other [Object]
2561
+ # Series (or array) to compute dot product with.
2562
+ #
2563
+ # @return [Numeric]
2564
+ #
2565
+ # @example
2566
+ # s = Polars::Series.new("a", [1, 2, 3])
2567
+ # s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
2568
+ # s.dot(s2)
2569
+ # # => 32.0
2570
+ def dot(other)
2571
+ if !other.is_a?(Series)
2572
+ other = Series.new(other)
2573
+ end
2574
+ if len != other.len
2575
+ n, m = len, other.len
2576
+ raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
2577
+ end
2578
+ _s.dot(other._s)
2579
+ end
2580
+
2581
+ # Compute the most occurring value(s).
2582
+ #
2583
+ # Can return multiple Values.
2584
+ #
2585
+ # @return [Series]
2586
+ #
2587
+ # @example
2588
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2589
+ # s.mode
2590
+ # # =>
2591
+ # # shape: (1,)
2592
+ # # Series: 'a' [i64]
2593
+ # # [
2594
+ # # 2
2595
+ # # ]
2596
+ def mode
2597
+ super
2598
+ end
2599
+
2600
+ # Compute the element-wise indication of the sign.
2601
+ #
2602
+ # @return [Series]
2603
+ #
2604
+ # @example
2605
+ # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
2606
+ # s.sign
2607
+ # # =>
2608
+ # # shape: (5,)
2609
+ # # Series: 'a' [i64]
2610
+ # # [
2611
+ # # -1
2612
+ # # 0
2613
+ # # 0
2614
+ # # 1
2615
+ # # null
2616
+ # # ]
2617
+ def sign
2618
+ super
2619
+ end
2620
+
2621
+ # Compute the element-wise value for the sine.
2622
+ #
2623
+ # @return [Series]
2624
+ #
2625
+ # @example
2626
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2627
+ # s.sin
2628
+ # # =>
2629
+ # # shape: (3,)
2630
+ # # Series: 'a' [f64]
2631
+ # # [
2632
+ # # 0.0
2633
+ # # 1.0
2634
+ # # 1.2246e-16
2635
+ # # ]
2636
+ def sin
2637
+ super
2638
+ end
2639
+
2640
+ # Compute the element-wise value for the cosine.
2641
+ #
2642
+ # @return [Series]
2643
+ #
2644
+ # @example
2645
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2646
+ # s.cos
2647
+ # # =>
2648
+ # # shape: (3,)
2649
+ # # Series: 'a' [f64]
2650
+ # # [
2651
+ # # 1.0
2652
+ # # 6.1232e-17
2653
+ # # -1.0
2654
+ # # ]
2655
+ def cos
2656
+ super
2657
+ end
2658
+
2659
+ # Compute the element-wise value for the tangent.
2660
+ #
2661
+ # @return [Series]
2662
+ #
2663
+ # @example
2664
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2665
+ # s.tan
2666
+ # # =>
2667
+ # # shape: (3,)
2668
+ # # Series: 'a' [f64]
2669
+ # # [
2670
+ # # 0.0
2671
+ # # 1.6331e16
2672
+ # # -1.2246e-16
2673
+ # # ]
2674
+ def tan
2675
+ super
2676
+ end
2677
+
2678
+ # Compute the element-wise value for the inverse sine.
2679
+ #
2680
+ # @return [Series]
2681
+ #
2682
+ # @example
2683
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2684
+ # s.arcsin
2685
+ # # =>
2686
+ # # shape: (3,)
2687
+ # # Series: 'a' [f64]
2688
+ # # [
2689
+ # # 1.570796
2690
+ # # 0.0
2691
+ # # -1.570796
2692
+ # # ]
2693
+ def arcsin
2694
+ super
2695
+ end
2696
+ alias_method :asin, :arcsin
2697
+
2698
+ # Compute the element-wise value for the inverse cosine.
2699
+ #
2700
+ # @return [Series]
2701
+ #
2702
+ # @example
2703
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2704
+ # s.arccos
2705
+ # # =>
2706
+ # # shape: (3,)
2707
+ # # Series: 'a' [f64]
2708
+ # # [
2709
+ # # 0.0
2710
+ # # 1.570796
2711
+ # # 3.141593
2712
+ # # ]
2713
+ def arccos
2714
+ super
2715
+ end
2716
+ alias_method :acos, :arccos
2717
+
2718
+ # Compute the element-wise value for the inverse tangent.
2719
+ #
2720
+ # @return [Series]
2721
+ #
2722
+ # @example
2723
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2724
+ # s.arctan
2725
+ # # =>
2726
+ # # shape: (3,)
2727
+ # # Series: 'a' [f64]
2728
+ # # [
2729
+ # # 0.785398
2730
+ # # 0.0
2731
+ # # -0.785398
2732
+ # # ]
2733
+ def arctan
2734
+ super
2735
+ end
2736
+ alias_method :atan, :arctan
2737
+
2738
+ # Compute the element-wise value for the inverse hyperbolic sine.
2739
+ #
2740
+ # @return [Series]
2741
+ #
2742
+ # @example
2743
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2744
+ # s.arcsinh
2745
+ # # =>
2746
+ # # shape: (3,)
2747
+ # # Series: 'a' [f64]
2748
+ # # [
2749
+ # # 0.881374
2750
+ # # 0.0
2751
+ # # -0.881374
2752
+ # # ]
2753
+ def arcsinh
2754
+ super
2755
+ end
2756
+ alias_method :asinh, :arcsinh
2757
+
2758
+ # Compute the element-wise value for the inverse hyperbolic cosine.
2759
+ #
2760
+ # @return [Series]
2761
+ #
2762
+ # @example
2763
+ # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
2764
+ # s.arccosh
2765
+ # # =>
2766
+ # # shape: (4,)
2767
+ # # Series: 'a' [f64]
2768
+ # # [
2769
+ # # 2.292432
2770
+ # # 0.0
2771
+ # # NaN
2772
+ # # NaN
2773
+ # # ]
2774
+ def arccosh
2775
+ super
2776
+ end
2777
+ alias_method :acosh, :arccosh
2778
+
2779
+ # Compute the element-wise value for the inverse hyperbolic tangent.
2780
+ #
2781
+ # @return [Series]
2782
+ #
2783
+ # @example
2784
+ # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
2785
+ # s.arctanh
2786
+ # # =>
2787
+ # # shape: (7,)
2788
+ # # Series: 'a' [f64]
2789
+ # # [
2790
+ # # NaN
2791
+ # # inf
2792
+ # # 0.549306
2793
+ # # 0.0
2794
+ # # -0.549306
2795
+ # # -inf
2796
+ # # NaN
2797
+ # # ]
2798
+ def arctanh
2799
+ super
2800
+ end
2801
+ alias_method :atanh, :arctanh
2802
+
2803
+ # Compute the element-wise value for the hyperbolic sine.
2804
+ #
2805
+ # @return [Series]
2806
+ #
2807
+ # @example
2808
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2809
+ # s.sinh
2810
+ # # =>
2811
+ # # shape: (3,)
2812
+ # # Series: 'a' [f64]
2813
+ # # [
2814
+ # # 1.175201
2815
+ # # 0.0
2816
+ # # -1.175201
2817
+ # # ]
2818
+ def sinh
2819
+ super
2820
+ end
2821
+
2822
+ # Compute the element-wise value for the hyperbolic cosine.
2823
+ #
2824
+ # @return [Series]
2825
+ #
2826
+ # @example
2827
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2828
+ # s.cosh
2829
+ # # =>
2830
+ # # shape: (3,)
2831
+ # # Series: 'a' [f64]
2832
+ # # [
2833
+ # # 1.543081
2834
+ # # 1.0
2835
+ # # 1.543081
2836
+ # # ]
2837
+ def cosh
2838
+ super
2839
+ end
2840
+
2841
+ # Compute the element-wise value for the hyperbolic tangent.
2842
+ #
2843
+ # @return [Series]
2844
+ #
2845
+ # @example
2846
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2847
+ # s.tanh
2848
+ # # =>
2849
+ # # shape: (3,)
2850
+ # # Series: 'a' [f64]
2851
+ # # [
2852
+ # # 0.761594
2853
+ # # 0.0
2854
+ # # -0.761594
2855
+ # # ]
2856
+ def tanh
2857
+ super
2858
+ end
2859
+
2860
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
2861
+ # return a new Series.
2862
+ #
2863
+ # If the function returns another datatype, the return_dtype arg should be set,
2864
+ # otherwise the method will fail.
2865
+ #
2866
+ # @param return_dtype [Symbol]
2867
+ # Output datatype. If none is given, the same datatype as this Series will be
2868
+ # used.
2869
+ # @param skip_nulls [Boolean]
2870
+ # Nulls will be skipped and not passed to the Ruby function.
2871
+ # This is faster because Ruby can be skipped and because we call
2872
+ # more specialized functions.
2873
+ #
2874
+ # @return [Series]
2875
+ #
2876
+ # @example
2877
+ # s = Polars::Series.new("a", [1, 2, 3])
2878
+ # s.map_elements { |x| x + 10 }
2879
+ # # =>
2880
+ # # shape: (3,)
2881
+ # # Series: 'a' [i64]
2882
+ # # [
2883
+ # # 11
2884
+ # # 12
2885
+ # # 13
2886
+ # # ]
2887
+ def map_elements(return_dtype: nil, skip_nulls: true, &func)
2888
+ if return_dtype.nil?
2889
+ pl_return_dtype = nil
2890
+ else
2891
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
2892
+ end
2893
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2894
+ end
2895
+ alias_method :map, :map_elements
2896
+ alias_method :apply, :map_elements
2897
+
2898
+ # Shift the values by a given period.
2899
+ #
2900
+ # @param periods [Integer]
2901
+ # Number of places to shift (may be negative).
2902
+ #
2903
+ # @return [Series]
2904
+ #
2905
+ # @example
2906
+ # s = Polars::Series.new("a", [1, 2, 3])
2907
+ # s.shift(1)
2908
+ # # =>
2909
+ # # shape: (3,)
2910
+ # # Series: 'a' [i64]
2911
+ # # [
2912
+ # # null
2913
+ # # 1
2914
+ # # 2
2915
+ # # ]
2916
+ #
2917
+ # @example
2918
+ # s.shift(-1)
2919
+ # # =>
2920
+ # # shape: (3,)
2921
+ # # Series: 'a' [i64]
2922
+ # # [
2923
+ # # 2
2924
+ # # 3
2925
+ # # null
2926
+ # # ]
2927
+ def shift(periods = 1)
2928
+ super
2929
+ end
2930
+
2931
+ # Shift the values by a given period and fill the resulting null values.
2932
+ #
2933
+ # @param periods [Integer]
2934
+ # Number of places to shift (may be negative).
2935
+ # @param fill_value [Object]
2936
+ # Fill None values with the result of this expression.
2937
+ #
2938
+ # @return [Series]
2939
+ def shift_and_fill(periods, fill_value)
2940
+ super
2941
+ end
2942
+
2943
+ # Take values from self or other based on the given mask.
2944
+ #
2945
+ # Where mask evaluates true, take values from self. Where mask evaluates false,
2946
+ # take values from other.
2947
+ #
2948
+ # @param mask [Series]
2949
+ # Boolean Series.
2950
+ # @param other [Series]
2951
+ # Series of same type.
2952
+ #
2953
+ # @return [Series]
2954
+ #
2955
+ # @example
2956
+ # s1 = Polars::Series.new([1, 2, 3, 4, 5])
2957
+ # s2 = Polars::Series.new([5, 4, 3, 2, 1])
2958
+ # s1.zip_with(s1 < s2, s2)
2959
+ # # =>
2960
+ # # shape: (5,)
2961
+ # # Series: '' [i64]
2962
+ # # [
2963
+ # # 1
2964
+ # # 2
2965
+ # # 3
2966
+ # # 2
2967
+ # # 1
2968
+ # # ]
2969
+ #
2970
+ # @example
2971
+ # mask = Polars::Series.new([true, false, true, false, true])
2972
+ # s1.zip_with(mask, s2)
2973
+ # # =>
2974
+ # # shape: (5,)
2975
+ # # Series: '' [i64]
2976
+ # # [
2977
+ # # 1
2978
+ # # 4
2979
+ # # 3
2980
+ # # 2
2981
+ # # 5
2982
+ # # ]
2983
+ def zip_with(mask, other)
2984
+ Utils.wrap_s(_s.zip_with(mask._s, other._s))
2985
+ end
2986
+
2987
+ # Apply a rolling min (moving min) over the values in this array.
2988
+ #
2989
+ # A window of length `window_size` will traverse the array. The values that fill
2990
+ # this window will (optionally) be multiplied with the weights given by the
2991
+ # `weight` vector. The resulting values will be aggregated to their sum.
2992
+ #
2993
+ # @param window_size [Integer]
2994
+ # The length of the window.
2995
+ # @param weights [Array]
2996
+ # An optional slice with the same length as the window that will be multiplied
2997
+ # elementwise with the values in the window.
2998
+ # @param min_periods [Integer]
2999
+ # The number of values in the window that should be non-null before computing
3000
+ # a result. If None, it will be set equal to window size.
3001
+ # @param center [Boolean]
3002
+ # Set the labels at the center of the window
3003
+ #
3004
+ # @return [Series]
3005
+ #
3006
+ # @example
3007
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3008
+ # s.rolling_min(3)
3009
+ # # =>
3010
+ # # shape: (5,)
3011
+ # # Series: 'a' [i64]
3012
+ # # [
3013
+ # # null
3014
+ # # null
3015
+ # # 100
3016
+ # # 200
3017
+ # # 300
3018
+ # # ]
3019
+ def rolling_min(
3020
+ window_size,
3021
+ weights: nil,
3022
+ min_periods: nil,
3023
+ center: false
3024
+ )
3025
+ super
3026
+ end
3027
+
3028
+ # Apply a rolling max (moving max) over the values in this array.
3029
+ #
3030
+ # A window of length `window_size` will traverse the array. The values that fill
3031
+ # this window will (optionally) be multiplied with the weights given by the
3032
+ # `weight` vector. The resulting values will be aggregated to their sum.
3033
+ #
3034
+ # @param window_size [Integer]
3035
+ # The length of the window.
3036
+ # @param weights [Array]
3037
+ # An optional slice with the same length as the window that will be multiplied
3038
+ # elementwise with the values in the window.
3039
+ # @param min_periods [Integer]
3040
+ # The number of values in the window that should be non-null before computing
3041
+ # a result. If None, it will be set equal to window size.
3042
+ # @param center [Boolean]
3043
+ # Set the labels at the center of the window
3044
+ #
3045
+ # @return [Series]
3046
+ #
3047
+ # @example
3048
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3049
+ # s.rolling_max(2)
3050
+ # # =>
3051
+ # # shape: (5,)
3052
+ # # Series: 'a' [i64]
3053
+ # # [
3054
+ # # null
3055
+ # # 200
3056
+ # # 300
3057
+ # # 400
3058
+ # # 500
3059
+ # # ]
3060
+ def rolling_max(
3061
+ window_size,
3062
+ weights: nil,
3063
+ min_periods: nil,
3064
+ center: false
3065
+ )
3066
+ super
3067
+ end
3068
+
3069
+ # Apply a rolling mean (moving mean) over the values in this array.
3070
+ #
3071
+ # A window of length `window_size` will traverse the array. The values that fill
3072
+ # this window will (optionally) be multiplied with the weights given by the
3073
+ # `weight` vector. The resulting values will be aggregated to their sum.
3074
+ #
3075
+ # @param window_size [Integer]
3076
+ # The length of the window.
3077
+ # @param weights [Array]
3078
+ # An optional slice with the same length as the window that will be multiplied
3079
+ # elementwise with the values in the window.
3080
+ # @param min_periods [Integer]
3081
+ # The number of values in the window that should be non-null before computing
3082
+ # a result. If None, it will be set equal to window size.
3083
+ # @param center [Boolean]
3084
+ # Set the labels at the center of the window
3085
+ #
3086
+ # @return [Series]
3087
+ #
3088
+ # @example
3089
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3090
+ # s.rolling_mean(2)
3091
+ # # =>
3092
+ # # shape: (5,)
3093
+ # # Series: 'a' [f64]
3094
+ # # [
3095
+ # # null
3096
+ # # 150.0
3097
+ # # 250.0
3098
+ # # 350.0
3099
+ # # 450.0
3100
+ # # ]
3101
+ def rolling_mean(
3102
+ window_size,
3103
+ weights: nil,
3104
+ min_periods: nil,
3105
+ center: false
3106
+ )
3107
+ super
3108
+ end
3109
+
3110
+ # Apply a rolling sum (moving sum) over the values in this array.
3111
+ #
3112
+ # A window of length `window_size` will traverse the array. The values that fill
3113
+ # this window will (optionally) be multiplied with the weights given by the
3114
+ # `weight` vector. The resulting values will be aggregated to their sum.
3115
+ #
3116
+ # @param window_size [Integer]
3117
+ # The length of the window.
3118
+ # @param weights [Array]
3119
+ # An optional slice with the same length as the window that will be multiplied
3120
+ # elementwise with the values in the window.
3121
+ # @param min_periods [Integer]
3122
+ # The number of values in the window that should be non-null before computing
3123
+ # a result. If None, it will be set equal to window size.
3124
+ # @param center [Boolean]
3125
+ # Set the labels at the center of the window
3126
+ #
3127
+ # @return [Series]
3128
+ #
3129
+ # @example
3130
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3131
+ # s.rolling_sum(2)
3132
+ # # =>
3133
+ # # shape: (5,)
3134
+ # # Series: 'a' [i64]
3135
+ # # [
3136
+ # # null
3137
+ # # 3
3138
+ # # 5
3139
+ # # 7
3140
+ # # 9
3141
+ # # ]
3142
+ def rolling_sum(
3143
+ window_size,
3144
+ weights: nil,
3145
+ min_periods: nil,
3146
+ center: false
3147
+ )
3148
+ super
3149
+ end
3150
+
3151
+ # Compute a rolling std dev.
3152
+ #
3153
+ # A window of length `window_size` will traverse the array. The values that fill
3154
+ # this window will (optionally) be multiplied with the weights given by the
3155
+ # `weight` vector. The resulting values will be aggregated to their sum.
3156
+ #
3157
+ # @param window_size [Integer]
3158
+ # The length of the window.
3159
+ # @param weights [Array]
3160
+ # An optional slice with the same length as the window that will be multiplied
3161
+ # elementwise with the values in the window.
3162
+ # @param min_periods [Integer]
3163
+ # The number of values in the window that should be non-null before computing
3164
+ # a result. If None, it will be set equal to window size.
3165
+ # @param center [Boolean]
3166
+ # Set the labels at the center of the window
3167
+ #
3168
+ # @return [Series]
3169
+ #
3170
+ # @example
3171
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3172
+ # s.rolling_std(3)
3173
+ # # =>
3174
+ # # shape: (6,)
3175
+ # # Series: 'a' [f64]
3176
+ # # [
3177
+ # # null
3178
+ # # null
3179
+ # # 1.0
3180
+ # # 1.0
3181
+ # # 1.527525
3182
+ # # 2.0
3183
+ # # ]
3184
+ def rolling_std(
3185
+ window_size,
3186
+ weights: nil,
3187
+ min_periods: nil,
3188
+ center: false,
3189
+ ddof: 1
3190
+ )
3191
+ super
3192
+ end
3193
+
3194
+ # Compute a rolling variance.
3195
+ #
3196
+ # A window of length `window_size` will traverse the array. The values that fill
3197
+ # this window will (optionally) be multiplied with the weights given by the
3198
+ # `weight` vector. The resulting values will be aggregated to their sum.
3199
+ #
3200
+ # @param window_size [Integer]
3201
+ # The length of the window.
3202
+ # @param weights [Array]
3203
+ # An optional slice with the same length as the window that will be multiplied
3204
+ # elementwise with the values in the window.
3205
+ # @param min_periods [Integer]
3206
+ # The number of values in the window that should be non-null before computing
3207
+ # a result. If None, it will be set equal to window size.
3208
+ # @param center [Boolean]
3209
+ # Set the labels at the center of the window
3210
+ #
3211
+ # @return [Series]
3212
+ #
3213
+ # @example
3214
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3215
+ # s.rolling_var(3)
3216
+ # # =>
3217
+ # # shape: (6,)
3218
+ # # Series: 'a' [f64]
3219
+ # # [
3220
+ # # null
3221
+ # # null
3222
+ # # 1.0
3223
+ # # 1.0
3224
+ # # 2.333333
3225
+ # # 4.0
3226
+ # # ]
3227
+ def rolling_var(
3228
+ window_size,
3229
+ weights: nil,
3230
+ min_periods: nil,
3231
+ center: false,
3232
+ ddof: 1
3233
+ )
3234
+ super
3235
+ end
3236
+
3237
+ # def rolling_apply
3238
+ # end
3239
+
3240
+ # Compute a rolling median.
3241
+ #
3242
+ # @param window_size [Integer]
3243
+ # The length of the window.
3244
+ # @param weights [Array]
3245
+ # An optional slice with the same length as the window that will be multiplied
3246
+ # elementwise with the values in the window.
3247
+ # @param min_periods [Integer]
3248
+ # The number of values in the window that should be non-null before computing
3249
+ # a result. If None, it will be set equal to window size.
3250
+ # @param center [Boolean]
3251
+ # Set the labels at the center of the window
3252
+ #
3253
+ # @return [Series]
3254
+ #
3255
+ # @example
3256
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3257
+ # s.rolling_median(3)
3258
+ # # =>
3259
+ # # shape: (6,)
3260
+ # # Series: 'a' [f64]
3261
+ # # [
3262
+ # # null
3263
+ # # null
3264
+ # # 2.0
3265
+ # # 3.0
3266
+ # # 4.0
3267
+ # # 6.0
3268
+ # # ]
3269
+ def rolling_median(
3270
+ window_size,
3271
+ weights: nil,
3272
+ min_periods: nil,
3273
+ center: false
3274
+ )
3275
+ super
3276
+ end
3277
+
3278
+ # Compute a rolling quantile.
3279
+ #
3280
+ # @param quantile [Float]
3281
+ # Quantile between 0.0 and 1.0.
3282
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3283
+ # Interpolation method.
3284
+ # @param window_size [Integer]
3285
+ # The length of the window.
3286
+ # @param weights [Array]
3287
+ # An optional slice with the same length as the window that will be multiplied
3288
+ # elementwise with the values in the window.
3289
+ # @param min_periods [Integer]
3290
+ # The number of values in the window that should be non-null before computing
3291
+ # a result. If None, it will be set equal to window size.
3292
+ # @param center [Boolean]
3293
+ # Set the labels at the center of the window
3294
+ #
3295
+ # @return [Series]
3296
+ #
3297
+ # @example
3298
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3299
+ # s.rolling_quantile(0.33, window_size: 3)
3300
+ # # =>
3301
+ # # shape: (6,)
3302
+ # # Series: 'a' [f64]
3303
+ # # [
3304
+ # # null
3305
+ # # null
3306
+ # # 1.0
3307
+ # # 2.0
3308
+ # # 3.0
3309
+ # # 4.0
3310
+ # # ]
3311
+ #
3312
+ # @example
3313
+ # s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
3314
+ # # =>
3315
+ # # shape: (6,)
3316
+ # # Series: 'a' [f64]
3317
+ # # [
3318
+ # # null
3319
+ # # null
3320
+ # # 1.66
3321
+ # # 2.66
3322
+ # # 3.66
3323
+ # # 5.32
3324
+ # # ]
3325
+ def rolling_quantile(
3326
+ quantile,
3327
+ interpolation: "nearest",
3328
+ window_size: 2,
3329
+ weights: nil,
3330
+ min_periods: nil,
3331
+ center: false
3332
+ )
3333
+ super
3334
+ end
3335
+
3336
+ # Compute a rolling skew.
3337
+ #
3338
+ # @param window_size [Integer]
3339
+ # Integer size of the rolling window.
3340
+ # @param bias [Boolean]
3341
+ # If false, the calculations are corrected for statistical bias.
3342
+ #
3343
+ # @return [Series]
3344
+ #
3345
+ # @example
3346
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3347
+ # s.rolling_skew(3)
3348
+ # # =>
3349
+ # # shape: (6,)
3350
+ # # Series: 'a' [f64]
3351
+ # # [
3352
+ # # null
3353
+ # # null
3354
+ # # 0.0
3355
+ # # 0.0
3356
+ # # 0.381802
3357
+ # # 0.0
3358
+ # # ]
3359
+ def rolling_skew(window_size, bias: true)
3360
+ super
3361
+ end
3362
+
3363
+ # Sample from this Series.
3364
+ #
3365
+ # @param n [Integer]
3366
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
3367
+ # `frac` is None.
3368
+ # @param frac [Float]
3369
+ # Fraction of items to return. Cannot be used with `n`.
3370
+ # @param with_replacement [Boolean]
3371
+ # Allow values to be sampled more than once.
3372
+ # @param shuffle [Boolean]
3373
+ # Shuffle the order of sampled data points.
3374
+ # @param seed [Integer]
3375
+ # Seed for the random number generator. If set to None (default), a random
3376
+ # seed is used.
3377
+ #
3378
+ # @return [Series]
3379
+ #
3380
+ # @example
3381
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3382
+ # s.sample(n: 2, seed: 0)
3383
+ # # =>
3384
+ # # shape: (2,)
3385
+ # # Series: 'a' [i64]
3386
+ # # [
3387
+ # # 5
3388
+ # # 3
3389
+ # # ]
3390
+ def sample(
3391
+ n: nil,
3392
+ frac: nil,
3393
+ with_replacement: false,
3394
+ shuffle: false,
3395
+ seed: nil
3396
+ )
3397
+ if !n.nil? && !frac.nil?
3398
+ raise ArgumentError, "cannot specify both `n` and `frac`"
3399
+ end
3400
+
3401
+ if n.nil? && !frac.nil?
3402
+ return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
3403
+ end
3404
+
3405
+ if n.nil?
3406
+ n = 1
3407
+ end
3408
+ Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
3409
+ end
3410
+
3411
+ # Get a boolean mask of the local maximum peaks.
3412
+ #
3413
+ # @return [Series]
3414
+ #
3415
+ # @example
3416
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3417
+ # s.peak_max
3418
+ # # =>
3419
+ # # shape: (5,)
3420
+ # # Series: 'a' [bool]
3421
+ # # [
3422
+ # # false
3423
+ # # false
3424
+ # # false
3425
+ # # false
3426
+ # # true
3427
+ # # ]
3428
+ def peak_max
3429
+ super
3430
+ end
3431
+
3432
+ # Get a boolean mask of the local minimum peaks.
3433
+ #
3434
+ # @return [Series]
3435
+ #
3436
+ # @example
3437
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
3438
+ # s.peak_min
3439
+ # # =>
3440
+ # # shape: (5,)
3441
+ # # Series: 'a' [bool]
3442
+ # # [
3443
+ # # false
3444
+ # # true
3445
+ # # false
3446
+ # # true
3447
+ # # false
3448
+ # # ]
3449
+ def peak_min
3450
+ super
3451
+ end
3452
+
3453
+ # Count the number of unique values in this Series.
3454
+ #
3455
+ # @return [Integer]
3456
+ #
3457
+ # @example
3458
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
3459
+ # s.n_unique
3460
+ # # => 3
3461
+ def n_unique
3462
+ _s.n_unique
3463
+ end
3464
+
3465
+ # Shrink Series memory usage.
3466
+ #
3467
+ # Shrinks the underlying array capacity to exactly fit the actual data.
3468
+ # (Note that this function does not change the Series data type).
3469
+ #
3470
+ # @return [Series]
3471
+ def shrink_to_fit(in_place: false)
3472
+ if in_place
3473
+ _s.shrink_to_fit
3474
+ self
3475
+ else
3476
+ series = clone
3477
+ series._s.shrink_to_fit
3478
+ series
3479
+ end
3480
+ end
3481
+
3482
+ # Hash the Series.
3483
+ #
3484
+ # The hash value is of type `:u64`.
3485
+ #
3486
+ # @param seed [Integer]
3487
+ # Random seed parameter. Defaults to 0.
3488
+ # @param seed_1 [Integer]
3489
+ # Random seed parameter. Defaults to `seed` if not set.
3490
+ # @param seed_2 [Integer]
3491
+ # Random seed parameter. Defaults to `seed` if not set.
3492
+ # @param seed_3 [Integer]
3493
+ # Random seed parameter. Defaults to `seed` if not set.
3494
+ #
3495
+ # @return [Series]
3496
+ #
3497
+ # @example
3498
+ # s = Polars::Series.new("a", [1, 2, 3])
3499
+ # s._hash(42)
3500
+ # # =>
3501
+ # # shape: (3,)
3502
+ # # Series: 'a' [u64]
3503
+ # # [
3504
+ # # 2374023516666777365
3505
+ # # 10386026231460783898
3506
+ # # 17796317186427479491
3507
+ # # ]
3508
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3509
+ super
3510
+ end
3511
+
3512
+ # Reinterpret the underlying bits as a signed/unsigned integer.
3513
+ #
3514
+ # This operation is only allowed for 64bit integers. For lower bits integers,
3515
+ # you can safely use that cast operation.
3516
+ #
3517
+ # @param signed [Boolean]
3518
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
3519
+ #
3520
+ # @return [Series]
3521
+ def reinterpret(signed: true)
3522
+ super
3523
+ end
3524
+
3525
+ # Interpolate intermediate values. The interpolation method is linear.
3526
+ #
3527
+ # @return [Series]
3528
+ #
3529
+ # @example
3530
+ # s = Polars::Series.new("a", [1, 2, nil, nil, 5])
3531
+ # s.interpolate
3532
+ # # =>
3533
+ # # shape: (5,)
3534
+ # # Series: 'a' [f64]
3535
+ # # [
3536
+ # # 1.0
3537
+ # # 2.0
3538
+ # # 3.0
3539
+ # # 4.0
3540
+ # # 5.0
3541
+ # # ]
3542
+ def interpolate(method: "linear")
3543
+ super
3544
+ end
3545
+
3546
+ # Compute absolute values.
3547
+ #
3548
+ # @return [Series]
3549
+ def abs
3550
+ super
3551
+ end
3552
+
3553
+ # Assign ranks to data, dealing with ties appropriately.
3554
+ #
3555
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3556
+ # The method used to assign ranks to tied elements.
3557
+ # The following methods are available (default is 'average'):
3558
+ #
3559
+ # - 'average' : The average of the ranks that would have been assigned to
3560
+ # all the tied values is assigned to each value.
3561
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3562
+ # the tied values is assigned to each value. (This is also referred to
3563
+ # as "competition" ranking.)
3564
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3565
+ # the tied values is assigned to each value.
3566
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3567
+ # assigned the rank immediately after those assigned to the tied
3568
+ # elements.
3569
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3570
+ # the order that the values occur in the Series.
3571
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3572
+ # on the order that the values occur in the Series.
3573
+ # @param reverse [Boolean]
3574
+ # Reverse the operation.
3575
+ # @param seed [Integer]
3576
+ # If `method: "random"`, use this as seed.
3577
+ #
3578
+ # @return [Series]
3579
+ #
3580
+ # @example The 'average' method:
3581
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3582
+ # s.rank
3583
+ # # =>
3584
+ # # shape: (5,)
3585
+ # # Series: 'a' [f64]
3586
+ # # [
3587
+ # # 3.0
3588
+ # # 4.5
3589
+ # # 1.5
3590
+ # # 1.5
3591
+ # # 4.5
3592
+ # # ]
3593
+ #
3594
+ # @example The 'ordinal' method:
3595
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3596
+ # s.rank(method: "ordinal")
3597
+ # # =>
3598
+ # # shape: (5,)
3599
+ # # Series: 'a' [u32]
3600
+ # # [
3601
+ # # 3
3602
+ # # 4
3603
+ # # 1
3604
+ # # 2
3605
+ # # 5
3606
+ # # ]
3607
+ def rank(method: "average", reverse: false, seed: nil)
3608
+ super
3609
+ end
3610
+
3611
+ # Calculate the n-th discrete difference.
3612
+ #
3613
+ # @param n [Integer]
3614
+ # Number of slots to shift.
3615
+ # @param null_behavior ["ignore", "drop"]
3616
+ # How to handle null values.
3617
+ #
3618
+ # @return [Series]
3619
+ def diff(n: 1, null_behavior: "ignore")
3620
+ super
3621
+ end
3622
+
3623
+ # Computes percentage change between values.
3624
+ #
3625
+ # Percentage change (as fraction) between current element and most-recent
3626
+ # non-null element at least `n` period(s) before the current element.
3627
+ #
3628
+ # Computes the change from the previous row by default.
3629
+ #
3630
+ # @param n [Integer]
3631
+ # periods to shift for forming percent change.
3632
+ #
3633
+ # @return [Series]
3634
+ #
3635
+ # @example
3636
+ # Polars::Series.new(0..9).pct_change
3637
+ # # =>
3638
+ # # shape: (10,)
3639
+ # # Series: '' [f64]
3640
+ # # [
3641
+ # # null
3642
+ # # inf
3643
+ # # 1.0
3644
+ # # 0.5
3645
+ # # 0.333333
3646
+ # # 0.25
3647
+ # # 0.2
3648
+ # # 0.166667
3649
+ # # 0.142857
3650
+ # # 0.125
3651
+ # # ]
3652
+ #
3653
+ # @example
3654
+ # Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
3655
+ # # =>
3656
+ # # shape: (10,)
3657
+ # # Series: '' [f64]
3658
+ # # [
3659
+ # # null
3660
+ # # null
3661
+ # # 3.0
3662
+ # # 3.0
3663
+ # # 3.0
3664
+ # # 3.0
3665
+ # # 3.0
3666
+ # # 3.0
3667
+ # # 3.0
3668
+ # # 3.0
3669
+ # # ]
3670
+ def pct_change(n: 1)
3671
+ super
3672
+ end
3673
+
3674
+ # Compute the sample skewness of a data set.
3675
+ #
3676
+ # For normally distributed data, the skewness should be about zero. For
3677
+ # unimodal continuous distributions, a skewness value greater than zero means
3678
+ # that there is more weight in the right tail of the distribution. The
3679
+ # function `skewtest` can be used to determine if the skewness value
3680
+ # is close enough to zero, statistically speaking.
3681
+ #
3682
+ # @param bias [Boolean]
3683
+ # If `false`, the calculations are corrected for statistical bias.
3684
+ #
3685
+ # @return [Float, nil]
3686
+ def skew(bias: true)
3687
+ _s.skew(bias)
3688
+ end
3689
+
3690
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
3691
+ #
3692
+ # Kurtosis is the fourth central moment divided by the square of the
3693
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
3694
+ # the result to give 0.0 for a normal distribution.
3695
+ # If bias is false, then the kurtosis is calculated using k statistics to
3696
+ # eliminate bias coming from biased moment estimators
3697
+ #
3698
+ # @param fisher [Boolean]
3699
+ # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
3700
+ # Pearson's definition is used (normal ==> 3.0).
3701
+ # @param bias [Boolean]
3702
+ # If `false`, the calculations are corrected for statistical bias.
3703
+ #
3704
+ # @return [Float, nil]
3705
+ def kurtosis(fisher: true, bias: true)
3706
+ _s.kurtosis(fisher, bias)
3707
+ end
3708
+
3709
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
3710
+ #
3711
+ # Only works for numerical types.
3712
+ #
3713
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3714
+ # expression. See {#when} for more information.
3715
+ #
3716
+ # @param min_val [Numeric]
3717
+ # Minimum value.
3718
+ # @param max_val [Numeric]
3719
+ # Maximum value.
3720
+ #
3721
+ # @return [Series]
3722
+ #
3723
+ # @example
3724
+ # s = Polars::Series.new("foo", [-50, 5, nil, 50])
3725
+ # s.clip(1, 10)
3726
+ # # =>
3727
+ # # shape: (4,)
3728
+ # # Series: 'foo' [i64]
3729
+ # # [
3730
+ # # 1
3731
+ # # 5
3732
+ # # null
3733
+ # # 10
3734
+ # # ]
3735
+ def clip(min_val = nil, max_val = nil)
3736
+ super
3737
+ end
3738
+
3739
+ # Clip (limit) the values in an array to a `min` boundary.
3740
+ #
3741
+ # Only works for numerical types.
3742
+ #
3743
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3744
+ # expression. See {#when} for more information.
3745
+ #
3746
+ # @param min_val [Numeric]
3747
+ # Minimum value.
3748
+ #
3749
+ # @return [Series]
3750
+ def clip_min(min_val)
3751
+ super
3752
+ end
3753
+
3754
+ # Clip (limit) the values in an array to a `max` boundary.
3755
+ #
3756
+ # Only works for numerical types.
3757
+ #
3758
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3759
+ # expression. See {#when} for more information.
3760
+ #
3761
+ # @param max_val [Numeric]
3762
+ # Maximum value.
3763
+ #
3764
+ # @return [Series]
3765
+ def clip_max(max_val)
3766
+ super
3767
+ end
3768
+
3769
+ # Replace values by different values.
3770
+ #
3771
+ # @param old [Object]
3772
+ # Value or sequence of values to replace.
3773
+ # Also accepts a mapping of values to their replacement.
3774
+ # @param new [Object]
3775
+ # Value or sequence of values to replace by.
3776
+ # Length must match the length of `old` or have length 1.
3777
+ # @param default [Object]
3778
+ # Set values that were not replaced to this value.
3779
+ # Defaults to keeping the original value.
3780
+ # Accepts expression input. Non-expression inputs are parsed as literals.
3781
+ # @param return_dtype [Object]
3782
+ # The data type of the resulting Series. If set to `nil` (default),
3783
+ # the data type is determined automatically based on the other inputs.
3784
+ #
3785
+ # @return [Series]
3786
+ #
3787
+ # @example Replace a single value by another value. Values that were not replaced remain unchanged.
3788
+ # s = Polars::Series.new([1, 2, 2, 3])
3789
+ # s.replace(2, 100)
3790
+ # # =>
3791
+ # # shape: (4,)
3792
+ # # Series: '' [i64]
3793
+ # # [
3794
+ # # 1
3795
+ # # 100
3796
+ # # 100
3797
+ # # 3
3798
+ # # ]
3799
+ #
3800
+ # @example Replace multiple values by passing sequences to the `old` and `new` parameters.
3801
+ # s.replace([2, 3], [100, 200])
3802
+ # # =>
3803
+ # # shape: (4,)
3804
+ # # Series: '' [i64]
3805
+ # # [
3806
+ # # 1
3807
+ # # 100
3808
+ # # 100
3809
+ # # 200
3810
+ # # ]
3811
+ #
3812
+ # @example Passing a mapping with replacements is also supported as syntactic sugar.
3813
+ # mapping = {2 => 100, 3 => 200}
3814
+ # s.replace(mapping)
3815
+ # # =>
3816
+ # # shape: (4,)
3817
+ # # Series: '' [i64]
3818
+ # # [
3819
+ # # 1
3820
+ # # 100
3821
+ # # 100
3822
+ # # 200
3823
+ # # ]
3824
+ #
3825
+ # @example The original data type is preserved when replacing by values of a different data type.
3826
+ # s = Polars::Series.new(["x", "y", "z"])
3827
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
3828
+ # s.replace(mapping)
3829
+ # # =>
3830
+ # # shape: (3,)
3831
+ # # Series: '' [str]
3832
+ # # [
3833
+ # # "1"
3834
+ # # "2"
3835
+ # # "3"
3836
+ # # ]
3837
+ def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3838
+ super
3839
+ end
3840
+
3841
+ # Reshape this Series to a flat Series or a Series of Lists.
3842
+ #
3843
+ # @param dims [Array]
3844
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
3845
+ # dimension is inferred.
3846
+ #
3847
+ # @return [Series]
3848
+ def reshape(dims)
3849
+ super
3850
+ end
3851
+
3852
+ # Shuffle the contents of this Series.
3853
+ #
3854
+ # @param seed [Integer, nil]
3855
+ # Seed for the random number generator.
3856
+ #
3857
+ # @return [Series]
3858
+ #
3859
+ # @example
3860
+ # s = Polars::Series.new("a", [1, 2, 3])
3861
+ # s.shuffle(seed: 1)
3862
+ # # =>
3863
+ # # shape: (3,)
3864
+ # # Series: 'a' [i64]
3865
+ # # [
3866
+ # # 2
3867
+ # # 1
3868
+ # # 3
3869
+ # # ]
3870
+ def shuffle(seed: nil)
3871
+ super
3872
+ end
3873
+
3874
+ # Exponentially-weighted moving average.
3875
+ #
3876
+ # @return [Series]
3877
+ def ewm_mean(
3878
+ com: nil,
3879
+ span: nil,
3880
+ half_life: nil,
3881
+ alpha: nil,
3882
+ adjust: true,
3883
+ min_periods: 1,
3884
+ ignore_nulls: true
3885
+ )
3886
+ super
3887
+ end
3888
+
3889
+ # Exponentially-weighted moving standard deviation.
3890
+ #
3891
+ # @return [Series]
3892
+ def ewm_std(
3893
+ com: nil,
3894
+ span: nil,
3895
+ half_life: nil,
3896
+ alpha: nil,
3897
+ adjust: true,
3898
+ bias: false,
3899
+ min_periods: 1,
3900
+ ignore_nulls: true
3901
+ )
3902
+ super
3903
+ end
3904
+
3905
+ # Exponentially-weighted moving variance.
3906
+ #
3907
+ # @return [Series]
3908
+ def ewm_var(
3909
+ com: nil,
3910
+ span: nil,
3911
+ half_life: nil,
3912
+ alpha: nil,
3913
+ adjust: true,
3914
+ bias: false,
3915
+ min_periods: 1,
3916
+ ignore_nulls: true
3917
+ )
3918
+ super
3919
+ end
3920
+
3921
+ # Extend the Series with given number of values.
3922
+ #
3923
+ # @param value [Object]
3924
+ # The value to extend the Series with. This value may be `nil` to fill with
3925
+ # nulls.
3926
+ # @param n [Integer]
3927
+ # The number of values to extend.
3928
+ #
3929
+ # @return [Series]
3930
+ #
3931
+ # @example
3932
+ # s = Polars::Series.new("a", [1, 2, 3])
3933
+ # s.extend_constant(99, 2)
3934
+ # # =>
3935
+ # # shape: (5,)
3936
+ # # Series: 'a' [i64]
3937
+ # # [
3938
+ # # 1
3939
+ # # 2
3940
+ # # 3
3941
+ # # 99
3942
+ # # 99
3943
+ # # ]
3944
+ def extend_constant(value, n)
3945
+ Utils.wrap_s(_s.extend_constant(value, n))
3946
+ end
3947
+
3948
+ # Flags the Series as sorted.
3949
+ #
3950
+ # Enables downstream code to user fast paths for sorted arrays.
3951
+ #
3952
+ # @param reverse [Boolean]
3953
+ # If the Series order is reversed, e.g. descending.
3954
+ #
3955
+ # @return [Series]
3956
+ #
3957
+ # @note
3958
+ # This can lead to incorrect results if this Series is not sorted!!
3959
+ # Use with care!
3960
+ #
3961
+ # @example
3962
+ # s = Polars::Series.new("a", [1, 2, 3])
3963
+ # s.set_sorted.max
3964
+ # # => 3
3965
+ def set_sorted(reverse: false)
3966
+ Utils.wrap_s(_s.set_sorted(reverse))
3967
+ end
3968
+
3969
+ # Create a new Series filled with values from the given index.
3970
+ #
3971
+ # @return [Series]
3972
+ def new_from_index(index, length)
3973
+ Utils.wrap_s(_s.new_from_index(index, length))
3974
+ end
3975
+
3976
+ # Shrink numeric columns to the minimal required datatype.
3977
+ #
3978
+ # Shrink to the dtype needed to fit the extrema of this Series.
3979
+ # This can be used to reduce memory pressure.
3980
+ #
3981
+ # @return [Series]
3982
+ def shrink_dtype
3983
+ super
3984
+ end
3985
+
3986
+ # Create an object namespace of all list related methods.
3987
+ #
3988
+ # @return [ListNameSpace]
3989
+ def list
3990
+ ListNameSpace.new(self)
3991
+ end
3992
+
3993
+ # Create an object namespace of all array related methods.
3994
+ #
3995
+ # @return [ArrayNameSpace]
3996
+ def arr
3997
+ ArrayNameSpace.new(self)
3998
+ end
3999
+
4000
+ # Create an object namespace of all binary related methods.
4001
+ #
4002
+ # @return [BinaryNameSpace]
4003
+ def bin
4004
+ BinaryNameSpace.new(self)
4005
+ end
4006
+
4007
+ # Create an object namespace of all categorical related methods.
4008
+ #
4009
+ # @return [CatNameSpace]
4010
+ def cat
4011
+ CatNameSpace.new(self)
4012
+ end
4013
+
4014
+ # Create an object namespace of all datetime related methods.
4015
+ #
4016
+ # @return [DateTimeNameSpace]
4017
+ def dt
4018
+ DateTimeNameSpace.new(self)
4019
+ end
4020
+
4021
+ # Create an object namespace of all string related methods.
4022
+ #
4023
+ # @return [StringNameSpace]
4024
+ def str
4025
+ StringNameSpace.new(self)
4026
+ end
4027
+
4028
+ # Create an object namespace of all struct related methods.
4029
+ #
4030
+ # @return [StructNameSpace]
4031
+ def struct
4032
+ StructNameSpace.new(self)
4033
+ end
4034
+
4035
+ private
4036
+
4037
+ def initialize_copy(other)
4038
+ super
4039
+ self._s = _s._clone
4040
+ end
4041
+
4042
+ def coerce(other)
4043
+ if other.is_a?(Numeric)
4044
+ # TODO improve
4045
+ series = to_frame.select(Polars.lit(other)).to_series
4046
+ [series, self]
4047
+ else
4048
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
4049
+ end
4050
+ end
4051
+
4052
+ def _pos_idxs(idxs)
4053
+ idx_type = Plr.get_index_type
4054
+
4055
+ if idxs.is_a?(Series)
4056
+ if idxs.dtype == idx_type
4057
+ return idxs
4058
+ end
4059
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4060
+ if idx_type == UInt32
4061
+ if [Int64, UInt64].include?(idxs.dtype)
4062
+ if idxs.max >= 2**32
4063
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4064
+ end
4065
+ end
4066
+ if idxs.dtype == Int64
4067
+ if idxs.min < -(2**32)
4068
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4069
+ end
4070
+ end
4071
+ end
4072
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4073
+ if idxs.min < 0
4074
+ if idx_type == UInt32
4075
+ if [Int8, Int16].include?(idxs.dtype)
4076
+ idxs = idxs.cast(Int32)
4077
+ end
4078
+ else
4079
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4080
+ idxs = idxs.cast(Int64)
4081
+ end
4082
+ end
4083
+
4084
+ # Update negative indexes to absolute indexes.
4085
+ return (
4086
+ idxs.to_frame
4087
+ .select(
4088
+ Polars.when(Polars.col(idxs.name) < 0)
4089
+ .then(len + Polars.col(idxs.name))
4090
+ .otherwise(Polars.col(idxs.name))
4091
+ .cast(idx_type)
4092
+ )
4093
+ .to_series(0)
4094
+ )
4095
+ end
4096
+ end
4097
+
4098
+ return idxs.cast(idx_type)
4099
+ end
4100
+ end
4101
+
4102
+ raise ArgumentError, "Unsupported idxs datatype."
4103
+ end
4104
+
4105
+ def _comp(other, op)
4106
+ if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
4107
+ if (other == true && op == :eq) || (other == false && op == :neq)
4108
+ return clone
4109
+ elsif (other == false && op == :eq) || (other == true && op == :neq)
4110
+ return !self
4111
+ end
4112
+ end
4113
+
4114
+ if other.is_a?(::Time) && dtype.is_a?(Datetime)
4115
+ ts = Utils.datetime_to_int(other, time_unit)
4116
+ f = ffi_func("#{op}_<>", Int64, _s)
4117
+ fail if f.nil?
4118
+ return Utils.wrap_s(f.call(ts))
4119
+ elsif other.is_a?(::Date) && dtype == Date
4120
+ d = Utils.date_to_int(other)
4121
+ f = ffi_func("#{op}_<>", Int32, _s)
4122
+ fail if f.nil?
4123
+ return Utils.wrap_s(f.call(d))
4124
+ end
4125
+
4126
+ if other.is_a?(Series)
4127
+ return Utils.wrap_s(_s.send(op, other._s))
4128
+ end
4129
+
4130
+ f = ffi_func("#{op}_<>", dtype, _s)
4131
+ if f.nil?
4132
+ raise NotImplementedError
4133
+ end
4134
+ Utils.wrap_s(f.call(other))
4135
+ end
4136
+
4137
+ def ffi_func(name, dtype, _s)
4138
+ _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype.class))) if DTYPE_TO_FFINAME.key?(dtype.class)
4139
+ end
4140
+
4141
+ def _arithmetic(other, op)
4142
+ if other.is_a?(Expr)
4143
+ other = to_frame.select(other).to_series
4144
+ end
4145
+ if other.is_a?(Series)
4146
+ return Utils.wrap_s(_s.send(op, other._s))
4147
+ end
4148
+
4149
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
4150
+ _s2 = sequence_to_rbseries(name, [other])
4151
+ return Utils.wrap_s(_s.send(op, _s2))
4152
+ end
4153
+
4154
+ f = ffi_func("#{op}_<>", dtype, _s)
4155
+ if f.nil?
4156
+ raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
4157
+ end
4158
+ Utils.wrap_s(f.call(other))
4159
+ end
4160
+
4161
+ DTYPE_TO_FFINAME = {
4162
+ Int8 => "i8",
4163
+ Int16 => "i16",
4164
+ Int32 => "i32",
4165
+ Int64 => "i64",
4166
+ UInt8 => "u8",
4167
+ UInt16 => "u16",
4168
+ UInt32 => "u32",
4169
+ UInt64 => "u64",
4170
+ Float32 => "f32",
4171
+ Float64 => "f64",
4172
+ Boolean => "bool",
4173
+ Utf8 => "str",
4174
+ List => "list",
4175
+ Date => "date",
4176
+ Datetime => "datetime",
4177
+ Duration => "duration",
4178
+ Time => "time",
4179
+ Object => "object",
4180
+ Categorical => "categorical",
4181
+ Struct => "struct",
4182
+ Binary => "binary"
4183
+ }
4184
+
4185
+ def series_to_rbseries(name, values)
4186
+ # should not be in-place?
4187
+ values.rename(name, in_place: true)
4188
+ values._s
4189
+ end
4190
+
4191
+ def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
4192
+ # not needed yet
4193
+ # if !values.contiguous?
4194
+ # end
4195
+
4196
+ if values.shape.length == 1
4197
+ values, dtype = numo_values_and_dtype(values)
4198
+ strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
4199
+ if dtype == Numo::RObject
4200
+ sequence_to_rbseries(name, values.to_a, strict: strict)
4201
+ else
4202
+ constructor = numo_type_to_constructor(dtype)
4203
+ # TODO improve performance
4204
+ constructor.call(name, values.to_a, strict)
4205
+ end
4206
+ elsif values.shape.sum == 0
4207
+ raise Todo
4208
+ else
4209
+ original_shape = values.shape
4210
+ values = values.reshape(original_shape.inject(&:*))
4211
+ rb_s = numo_to_rbseries(
4212
+ name,
4213
+ values,
4214
+ strict: strict,
4215
+ nan_to_null: nan_to_null
4216
+ )
4217
+ Utils.wrap_s(rb_s).reshape(original_shape)._s
4218
+ end
4219
+ end
4220
+
4221
+ def numo_values_and_dtype(values)
4222
+ [values, values.class]
4223
+ end
4224
+
4225
+ def numo_type_to_constructor(dtype)
4226
+ {
4227
+ Numo::Float32 => RbSeries.method(:new_opt_f32),
4228
+ Numo::Float64 => RbSeries.method(:new_opt_f64),
4229
+ Numo::Int8 => RbSeries.method(:new_opt_i8),
4230
+ Numo::Int16 => RbSeries.method(:new_opt_i16),
4231
+ Numo::Int32 => RbSeries.method(:new_opt_i32),
4232
+ Numo::Int64 => RbSeries.method(:new_opt_i64),
4233
+ Numo::UInt8 => RbSeries.method(:new_opt_u8),
4234
+ Numo::UInt16 => RbSeries.method(:new_opt_u16),
4235
+ Numo::UInt32 => RbSeries.method(:new_opt_u32),
4236
+ Numo::UInt64 => RbSeries.method(:new_opt_u64)
4237
+ }.fetch(dtype)
4238
+ rescue KeyError
4239
+ RbSeries.method(:new_object)
4240
+ end
4241
+
4242
+ def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
4243
+ ruby_dtype = nil
4244
+
4245
+ if (values.nil? || values.empty?) && dtype.nil?
4246
+ dtype = dtype_if_empty || Float32
4247
+ elsif dtype == List
4248
+ ruby_dtype = ::Array
4249
+ end
4250
+
4251
+ rb_temporal_types = [::Date, ::DateTime, ::Time]
4252
+ rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
4253
+
4254
+ value = _get_first_non_none(values)
4255
+ if !value.nil?
4256
+ if value.is_a?(Hash)
4257
+ return DataFrame.new(values).to_struct(name)._s
4258
+ end
4259
+ end
4260
+
4261
+ if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
4262
+ if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
4263
+ dtype = Array.new(nil, value.size)
4264
+ end
4265
+
4266
+ constructor = polars_type_to_constructor(dtype)
4267
+ # TODO remove
4268
+ strict = false if dtype == Decimal
4269
+ rbseries = constructor.call(name, values, strict)
4270
+
4271
+ base_type = dtype.is_a?(DataType) ? dtype.class : dtype
4272
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type)
4273
+ if rbseries.dtype != dtype
4274
+ rbseries = rbseries.cast(dtype, true)
4275
+ end
4276
+ end
4277
+ rbseries
4278
+ elsif dtype == Struct
4279
+ struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4280
+ empty = {}
4281
+ DataFrame.sequence_to_rbdf(
4282
+ values.map { |v| v.nil? ? empty : v },
4283
+ schema: struct_schema,
4284
+ orient: "row",
4285
+ ).to_struct(name)
4286
+ else
4287
+ if ruby_dtype.nil?
4288
+ if value.nil?
4289
+ # generic default dtype
4290
+ ruby_dtype = Float
4291
+ else
4292
+ ruby_dtype = value.class
4293
+ end
4294
+ end
4295
+
4296
+ # temporal branch
4297
+ if rb_temporal_types.include?(ruby_dtype)
4298
+ if dtype.nil?
4299
+ dtype = Utils.rb_type_to_dtype(ruby_dtype)
4300
+ elsif rb_temporal_types.include?(dtype)
4301
+ dtype = Utils.rb_type_to_dtype(dtype)
4302
+ end
4303
+ # TODO
4304
+ time_unit = nil
4305
+
4306
+ rb_series = RbSeries.new_from_any_values(name, values, strict)
4307
+ if time_unit.nil?
4308
+ s = Utils.wrap_s(rb_series)
4309
+ else
4310
+ s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4311
+ end
4312
+ s._s
4313
+ elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4314
+ raise Todo
4315
+ elsif ruby_dtype == ::Array
4316
+ if dtype.is_a?(Object)
4317
+ return RbSeries.new_object(name, values, strict)
4318
+ end
4319
+ if dtype
4320
+ srs = sequence_from_anyvalue_or_object(name, values)
4321
+ if dtype != srs.dtype
4322
+ srs = srs.cast(dtype, strict: false)
4323
+ end
4324
+ return srs
4325
+ end
4326
+ sequence_from_anyvalue_or_object(name, values)
4327
+ elsif ruby_dtype == Series
4328
+ RbSeries.new_series_list(name, values.map(&:_s), strict)
4329
+ elsif ruby_dtype == RbSeries
4330
+ RbSeries.new_series_list(name, values, strict)
4331
+ else
4332
+ constructor =
4333
+ if value.is_a?(::String)
4334
+ if value.encoding == Encoding::UTF_8
4335
+ RbSeries.method(:new_str)
4336
+ else
4337
+ RbSeries.method(:new_binary)
4338
+ end
4339
+ elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
4340
+ # TODO improve performance
4341
+ RbSeries.method(:new_opt_f64)
4342
+ else
4343
+ rb_type_to_constructor(value.class)
4344
+ end
4345
+
4346
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4347
+ end
4348
+ end
4349
+ end
4350
+
4351
+ def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
4352
+ begin
4353
+ constructor.call(name, values, strict)
4354
+ rescue
4355
+ if dtype.nil?
4356
+ RbSeries.new_from_any_values(name, values, strict)
4357
+ else
4358
+ RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
4359
+ end
4360
+ end
4361
+ end
4362
+
4363
+ def sequence_from_anyvalue_or_object(name, values)
4364
+ RbSeries.new_from_any_values(name, values, true)
4365
+ rescue
4366
+ RbSeries.new_object(name, values, false)
4367
+ end
4368
+
4369
+ POLARS_TYPE_TO_CONSTRUCTOR = {
4370
+ Float32 => RbSeries.method(:new_opt_f32),
4371
+ Float64 => RbSeries.method(:new_opt_f64),
4372
+ Int8 => RbSeries.method(:new_opt_i8),
4373
+ Int16 => RbSeries.method(:new_opt_i16),
4374
+ Int32 => RbSeries.method(:new_opt_i32),
4375
+ Int64 => RbSeries.method(:new_opt_i64),
4376
+ UInt8 => RbSeries.method(:new_opt_u8),
4377
+ UInt16 => RbSeries.method(:new_opt_u16),
4378
+ UInt32 => RbSeries.method(:new_opt_u32),
4379
+ UInt64 => RbSeries.method(:new_opt_u64),
4380
+ Decimal => RbSeries.method(:new_decimal),
4381
+ Date => RbSeries.method(:new_from_any_values),
4382
+ Datetime => RbSeries.method(:new_from_any_values),
4383
+ Duration => RbSeries.method(:new_from_any_values),
4384
+ Time => RbSeries.method(:new_from_any_values),
4385
+ Boolean => RbSeries.method(:new_opt_bool),
4386
+ Utf8 => RbSeries.method(:new_str),
4387
+ Object => RbSeries.method(:new_object),
4388
+ Categorical => RbSeries.method(:new_str),
4389
+ Enum => RbSeries.method(:new_str),
4390
+ Binary => RbSeries.method(:new_binary),
4391
+ Null => RbSeries.method(:new_null)
4392
+ }
4393
+
4394
+ SYM_TYPE_TO_CONSTRUCTOR = {
4395
+ f32: RbSeries.method(:new_opt_f32),
4396
+ f64: RbSeries.method(:new_opt_f64),
4397
+ i8: RbSeries.method(:new_opt_i8),
4398
+ i16: RbSeries.method(:new_opt_i16),
4399
+ i32: RbSeries.method(:new_opt_i32),
4400
+ i64: RbSeries.method(:new_opt_i64),
4401
+ u8: RbSeries.method(:new_opt_u8),
4402
+ u16: RbSeries.method(:new_opt_u16),
4403
+ u32: RbSeries.method(:new_opt_u32),
4404
+ u64: RbSeries.method(:new_opt_u64),
4405
+ bool: RbSeries.method(:new_opt_bool),
4406
+ str: RbSeries.method(:new_str)
4407
+ }
4408
+
4409
+ def polars_type_to_constructor(dtype)
4410
+ if dtype.is_a?(Array)
4411
+ lambda do |name, values, strict|
4412
+ RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
4413
+ end
4414
+ elsif dtype.is_a?(Class) && dtype < DataType
4415
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4416
+ elsif dtype.is_a?(DataType)
4417
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
4418
+ else
4419
+ SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
4420
+ end
4421
+ rescue KeyError
4422
+ raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
4423
+ end
4424
+
4425
+ RB_TYPE_TO_CONSTRUCTOR = {
4426
+ Float => RbSeries.method(:new_opt_f64),
4427
+ Integer => RbSeries.method(:new_opt_i64),
4428
+ TrueClass => RbSeries.method(:new_opt_bool),
4429
+ FalseClass => RbSeries.method(:new_opt_bool),
4430
+ BigDecimal => RbSeries.method(:new_decimal),
4431
+ NilClass => RbSeries.method(:new_null)
4432
+ }
4433
+
4434
+ def rb_type_to_constructor(dtype)
4435
+ RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4436
+ rescue KeyError
4437
+ RbSeries.method(:new_object)
4438
+ end
4439
+
4440
+ def _get_first_non_none(values)
4441
+ values.find { |v| !v.nil? }
4442
+ end
4443
+ end
4444
+ end