polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,4444 @@
1
+ module Polars
2
+ # A Series represents a single column in a polars DataFrame.
3
+ class Series
4
+ include ExprDispatch
5
+
6
+ # Create a new Series.
7
+ #
8
+ # @param name [String, Array, nil]
9
+ # Name of the series. Will be used as a column name when used in a DataFrame.
10
+ # When not specified, name is set to an empty string.
11
+ # @param values [Array, nil]
12
+ # One-dimensional data in various forms. Supported are: Array and Series.
13
+ # @param dtype [Symbol, nil]
14
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
15
+ # @param strict [Boolean]
16
+ # Throw error on numeric overflow.
17
+ # @param nan_to_null [Boolean]
18
+ # Not used.
19
+ # @param dtype_if_empty [Symbol, nil]
20
+ # If no dtype is specified and values contains `nil` or an empty array,
21
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
22
+ #
23
+ # @example Constructing a Series by specifying name and values positionally:
24
+ # s = Polars::Series.new("a", [1, 2, 3])
25
+ #
26
+ # @example Notice that the dtype is automatically inferred as a polars `Int64`:
27
+ # s.dtype
28
+ # # => Polars::Int64
29
+ #
30
+ # @example Constructing a Series with a specific dtype:
31
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
32
+ #
33
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
34
+ # s3 = Polars::Series.new([1, 2, 3])
35
+ def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
36
+ # Handle case where values are passed as the first argument
37
+ if !name.nil? && !name.is_a?(::String)
38
+ if values.nil?
39
+ values = name
40
+ name = nil
41
+ else
42
+ raise ArgumentError, "Series name must be a string."
43
+ end
44
+ end
45
+
46
+ name = "" if name.nil?
47
+
48
+ # TODO improve
49
+ if values.is_a?(Range) && values.begin.is_a?(::String)
50
+ values = values.to_a
51
+ end
52
+
53
+ if values.nil?
54
+ self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
55
+ elsif values.is_a?(Series)
56
+ self._s = series_to_rbseries(name, values)
57
+ elsif values.is_a?(Range)
58
+ self._s =
59
+ Polars.arange(
60
+ values.first,
61
+ values.last + (values.exclude_end? ? 0 : 1),
62
+ step: 1,
63
+ eager: true,
64
+ dtype: dtype
65
+ )
66
+ .rename(name, in_place: true)
67
+ ._s
68
+ elsif values.is_a?(::Array)
69
+ self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
70
+ elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
+ self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
72
+
73
+ if !dtype.nil?
74
+ self._s = self.cast(dtype, strict: true)._s
75
+ end
76
+ else
77
+ raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
78
+ end
79
+ end
80
+
81
+ # @private
82
+ def self._from_rbseries(s)
83
+ series = Series.allocate
84
+ series._s = s
85
+ series
86
+ end
87
+
88
+ # Get the data type of this Series.
89
+ #
90
+ # @return [Symbol]
91
+ def dtype
92
+ _s.dtype
93
+ end
94
+
95
+ # Get flags that are set on the Series.
96
+ #
97
+ # @return [Hash]
98
+ def flags
99
+ out = {
100
+ "SORTED_ASC" => _s.is_sorted_flag,
101
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
102
+ }
103
+ if dtype.is_a?(List)
104
+ out["FAST_EXPLODE"] = _s.can_fast_explode_flag
105
+ end
106
+ out
107
+ end
108
+
109
+ # Get the inner dtype in of a List typed Series.
110
+ #
111
+ # @return [Symbol]
112
+ def inner_dtype
113
+ _s.inner_dtype
114
+ end
115
+
116
+ # Get the name of this Series.
117
+ #
118
+ # @return [String]
119
+ def name
120
+ _s.name
121
+ end
122
+
123
+ # Shape of this Series.
124
+ #
125
+ # @return [Array]
126
+ def shape
127
+ [_s.len]
128
+ end
129
+
130
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
131
+ #
132
+ # @return [String]
133
+ def time_unit
134
+ _s.time_unit
135
+ end
136
+
137
+ # Returns a string representing the Series.
138
+ #
139
+ # @return [String]
140
+ def to_s
141
+ _s.to_s
142
+ end
143
+ alias_method :inspect, :to_s
144
+
145
+ # Bitwise AND.
146
+ #
147
+ # @return [Series]
148
+ def &(other)
149
+ if !other.is_a?(Series)
150
+ other = Series.new([other])
151
+ end
152
+ Utils.wrap_s(_s.bitand(other._s))
153
+ end
154
+
155
+ # Bitwise OR.
156
+ #
157
+ # @return [Series]
158
+ def |(other)
159
+ if !other.is_a?(Series)
160
+ other = Series.new([other])
161
+ end
162
+ Utils.wrap_s(_s.bitor(other._s))
163
+ end
164
+
165
+ # Bitwise XOR.
166
+ #
167
+ # @return [Series]
168
+ def ^(other)
169
+ if !other.is_a?(Series)
170
+ other = Series.new([other])
171
+ end
172
+ Utils.wrap_s(_s.bitxor(other._s))
173
+ end
174
+
175
+ # Equal.
176
+ #
177
+ # @return [Series]
178
+ def ==(other)
179
+ _comp(other, :eq)
180
+ end
181
+
182
+ # Not equal.
183
+ #
184
+ # @return [Series]
185
+ def !=(other)
186
+ _comp(other, :neq)
187
+ end
188
+
189
+ # Greater than.
190
+ #
191
+ # @return [Series]
192
+ def >(other)
193
+ _comp(other, :gt)
194
+ end
195
+
196
+ # Less than.
197
+ #
198
+ # @return [Series]
199
+ def <(other)
200
+ _comp(other, :lt)
201
+ end
202
+
203
+ # Greater than or equal.
204
+ #
205
+ # @return [Series]
206
+ def >=(other)
207
+ _comp(other, :gt_eq)
208
+ end
209
+
210
+ # Less than or equal.
211
+ #
212
+ # @return [Series]
213
+ def <=(other)
214
+ _comp(other, :lt_eq)
215
+ end
216
+
217
+ # Method equivalent of operator expression `series <= other`.
218
+ #
219
+ # @return [Series]
220
+ def le(other)
221
+ self <= other
222
+ end
223
+
224
+ # Method equivalent of operator expression `series < other`.
225
+ #
226
+ # @return [Series]
227
+ def lt(other)
228
+ self < other
229
+ end
230
+
231
+ # Method equivalent of operator expression `series == other`.
232
+ #
233
+ # @return [Series]
234
+ def eq(other)
235
+ self == other
236
+ end
237
+
238
+ # Method equivalent of equality operator `series == other` where `nil == nil`.
239
+ #
240
+ # This differs from the standard `ne` where null values are propagated.
241
+ #
242
+ # @param other [Object]
243
+ # A literal or expression value to compare with.
244
+ #
245
+ # @return [Object]
246
+ #
247
+ # @example
248
+ # s1 = Polars::Series.new("a", [333, 200, nil])
249
+ # s2 = Polars::Series.new("a", [100, 200, nil])
250
+ # s1.eq(s2)
251
+ # # =>
252
+ # # shape: (3,)
253
+ # # Series: 'a' [bool]
254
+ # # [
255
+ # # false
256
+ # # true
257
+ # # null
258
+ # # ]
259
+ #
260
+ # @example
261
+ # s1.eq_missing(s2)
262
+ # # =>
263
+ # # shape: (3,)
264
+ # # Series: 'a' [bool]
265
+ # # [
266
+ # # false
267
+ # # true
268
+ # # true
269
+ # # ]
270
+ def eq_missing(other)
271
+ if other.is_a?(Expr)
272
+ return Polars.lit(self).eq_missing(other)
273
+ end
274
+ to_frame.select(Polars.col(name).eq_missing(other)).to_series
275
+ end
276
+
277
+ # Method equivalent of operator expression `series != other`.
278
+ #
279
+ # @return [Series]
280
+ def ne(other)
281
+ self != other
282
+ end
283
+
284
+ # Method equivalent of equality operator `series != other` where `None == None`.
285
+ #
286
+ # This differs from the standard `ne` where null values are propagated.
287
+ #
288
+ # @param other [Object]
289
+ # A literal or expression value to compare with.
290
+ #
291
+ # @return [Object]
292
+ #
293
+ # @example
294
+ # s1 = Polars::Series.new("a", [333, 200, nil])
295
+ # s2 = Polars::Series.new("a", [100, 200, nil])
296
+ # s1.ne(s2)
297
+ # # =>
298
+ # # shape: (3,)
299
+ # # Series: 'a' [bool]
300
+ # # [
301
+ # # true
302
+ # # false
303
+ # # null
304
+ # # ]
305
+ #
306
+ # @example
307
+ # s1.ne_missing(s2)
308
+ # # =>
309
+ # # shape: (3,)
310
+ # # Series: 'a' [bool]
311
+ # # [
312
+ # # true
313
+ # # false
314
+ # # false
315
+ # # ]
316
+ def ne_missing(other)
317
+ if other.is_a?(Expr)
318
+ return Polars.lit(self).ne_missing(other)
319
+ end
320
+ to_frame.select(Polars.col(name).ne_missing(other)).to_series
321
+ end
322
+
323
+ # Method equivalent of operator expression `series >= other`.
324
+ #
325
+ # @return [Series]
326
+ def ge(other)
327
+ self >= other
328
+ end
329
+
330
+ # Method equivalent of operator expression `series > other`.
331
+ #
332
+ # @return [Series]
333
+ def gt(other)
334
+ self > other
335
+ end
336
+
337
+ # Performs addition.
338
+ #
339
+ # @return [Series]
340
+ def +(other)
341
+ _arithmetic(other, :add)
342
+ end
343
+
344
+ # Performs subtraction.
345
+ #
346
+ # @return [Series]
347
+ def -(other)
348
+ _arithmetic(other, :sub)
349
+ end
350
+
351
+ # Performs multiplication.
352
+ #
353
+ # @return [Series]
354
+ def *(other)
355
+ if is_temporal
356
+ raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
357
+ elsif other.is_a?(DataFrame)
358
+ other * self
359
+ else
360
+ _arithmetic(other, :mul)
361
+ end
362
+ end
363
+
364
+ # Performs division.
365
+ #
366
+ # @return [Series]
367
+ def /(other)
368
+ if is_temporal
369
+ raise ArgumentError, "first cast to integer before dividing datelike dtypes"
370
+ end
371
+
372
+ if is_float
373
+ return _arithmetic(other, :div)
374
+ end
375
+
376
+ cast(Float64) / other
377
+ end
378
+
379
+ # Returns the modulo.
380
+ #
381
+ # @return [Series]
382
+ def %(other)
383
+ if is_datelike
384
+ raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
385
+ end
386
+ _arithmetic(other, :rem)
387
+ end
388
+
389
+ # Raises to the power of exponent.
390
+ #
391
+ # @return [Series]
392
+ def **(power)
393
+ if is_datelike
394
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
395
+ end
396
+ to_frame.select(Polars.col(name).pow(power)).to_series
397
+ end
398
+
399
+ # Performs boolean not.
400
+ #
401
+ # @return [Series]
402
+ def !
403
+ if dtype == Boolean
404
+ return Utils.wrap_s(_s.not)
405
+ end
406
+ raise NotImplementedError
407
+ end
408
+
409
+ # Performs negation.
410
+ #
411
+ # @return [Series]
412
+ def -@
413
+ 0 - self
414
+ end
415
+
416
+ # Returns an enumerator.
417
+ #
418
+ # @return [Object]
419
+ def each
420
+ return to_enum(:each) unless block_given?
421
+
422
+ length.times do |i|
423
+ yield self[i]
424
+ end
425
+ end
426
+
427
+ # Returns elements of the Series.
428
+ #
429
+ # @return [Object]
430
+ def [](item)
431
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
432
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
433
+ end
434
+
435
+ if item.is_a?(Series) && item.bool?
436
+ return filter(item)
437
+ end
438
+
439
+ if item.is_a?(Integer)
440
+ if item < 0
441
+ item = len + item
442
+ end
443
+
444
+ return _s.get_idx(item)
445
+ end
446
+
447
+ if item.is_a?(Range)
448
+ return Slice.new(self).apply(item)
449
+ end
450
+
451
+ if Utils.is_int_sequence(item)
452
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
453
+ end
454
+
455
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
456
+ end
457
+
458
+ # Sets an element of the Series.
459
+ #
460
+ # @return [Object]
461
+ def []=(key, value)
462
+ if value.is_a?(::Array)
463
+ if is_numeric || is_datelike
464
+ scatter(key, value)
465
+ return
466
+ end
467
+ raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
468
+ end
469
+
470
+ if key.is_a?(Series)
471
+ if key.dtype == Boolean
472
+ self._s = set(key, value)._s
473
+ elsif key.dtype == UInt64
474
+ self._s = scatter(key.cast(UInt32), value)._s
475
+ elsif key.dtype == UInt32
476
+ self._s = scatter(key, value)._s
477
+ else
478
+ raise Todo
479
+ end
480
+ elsif key.is_a?(::Array)
481
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
482
+ self[s] = value
483
+ elsif key.is_a?(Range)
484
+ s = Series.new("", key, dtype: UInt32)
485
+ self[s] = value
486
+ elsif key.is_a?(Integer)
487
+ self[[key]] = value
488
+ else
489
+ raise ArgumentError, "cannot use #{key} for indexing"
490
+ end
491
+ end
492
+
493
+ # Return an estimation of the total (heap) allocated size of the Series.
494
+ #
495
+ # Estimated size is given in the specified unit (bytes by default).
496
+ #
497
+ # This estimation is the sum of the size of its buffers, validity, including
498
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
499
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
500
+ # particular, StructArray's size is an upper bound.
501
+ #
502
+ # When an array is sliced, its allocated size remains constant because the buffer
503
+ # unchanged. However, this function will yield a smaller number. This is because
504
+ # this function returns the visible size of the buffer, not its total capacity.
505
+ #
506
+ # FFI buffers are included in this estimation.
507
+ #
508
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
509
+ # Scale the returned size to the given unit.
510
+ #
511
+ # @return [Numeric]
512
+ #
513
+ # @example
514
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
515
+ # s.estimated_size
516
+ # # => 4000000
517
+ # s.estimated_size("mb")
518
+ # # => 3.814697265625
519
+ def estimated_size(unit = "b")
520
+ sz = _s.estimated_size
521
+ Utils.scale_bytes(sz, to: unit)
522
+ end
523
+
524
+ # Compute the square root of the elements.
525
+ #
526
+ # @return [Series]
527
+ def sqrt
528
+ self**0.5
529
+ end
530
+
531
+ # Check if any boolean value in the column is `true`.
532
+ #
533
+ # @return [Boolean]
534
+ def any?(ignore_nulls: true, &block)
535
+ if block_given?
536
+ apply(skip_nulls: ignore_nulls, &block).any?
537
+ else
538
+ _s.any(ignore_nulls)
539
+ end
540
+ end
541
+ alias_method :any, :any?
542
+
543
+ # Check if all boolean values in the column are `true`.
544
+ #
545
+ # @return [Boolean]
546
+ def all?(ignore_nulls: true, &block)
547
+ if block_given?
548
+ apply(skip_nulls: ignore_nulls, &block).all?
549
+ else
550
+ _s.all(ignore_nulls)
551
+ end
552
+ end
553
+ alias_method :all, :all?
554
+
555
+ # Check if all boolean values in the column are `false`.
556
+ #
557
+ # @return [Boolean]
558
+ def none?(&block)
559
+ if block_given?
560
+ apply(&block).none?
561
+ else
562
+ to_frame.select(Polars.col(name).is_not.all).to_series[0]
563
+ end
564
+ end
565
+ alias_method :none, :none?
566
+
567
+ # Compute the logarithm to a given base.
568
+ #
569
+ # @param base [Float]
570
+ # Given base, defaults to `Math::E`.
571
+ #
572
+ # @return [Series]
573
+ def log(base = Math::E)
574
+ super
575
+ end
576
+
577
+ # Compute the base 10 logarithm of the input array, element-wise.
578
+ #
579
+ # @return [Series]
580
+ def log10
581
+ super
582
+ end
583
+
584
+ # Compute the exponential, element-wise.
585
+ #
586
+ # @return [Series]
587
+ def exp
588
+ super
589
+ end
590
+
591
+ # Create a new Series that copies data from this Series without null values.
592
+ #
593
+ # @return [Series]
594
+ def drop_nulls
595
+ super
596
+ end
597
+
598
+ # Drop NaN values.
599
+ #
600
+ # @return [Series]
601
+ def drop_nans
602
+ super
603
+ end
604
+
605
+ # Cast this Series to a DataFrame.
606
+ #
607
+ # @return [DataFrame]
608
+ def to_frame
609
+ Utils.wrap_df(RbDataFrame.new([_s]))
610
+ end
611
+
612
+ # Quick summary statistics of a series.
613
+ #
614
+ # Series with mixed datatypes will return summary statistics for the datatype of
615
+ # the first value.
616
+ #
617
+ # @return [DataFrame]
618
+ #
619
+ # @example
620
+ # series_num = Polars::Series.new([1, 2, 3, 4, 5])
621
+ # series_num.describe
622
+ # # =>
623
+ # # shape: (6, 2)
624
+ # # ┌────────────┬──────────┐
625
+ # # │ statistic ┆ value │
626
+ # # │ --- ┆ --- │
627
+ # # │ str ┆ f64 │
628
+ # # ╞════════════╪══════════╡
629
+ # # │ min ┆ 1.0 │
630
+ # # │ max ┆ 5.0 │
631
+ # # │ null_count ┆ 0.0 │
632
+ # # │ mean ┆ 3.0 │
633
+ # # │ std ┆ 1.581139 │
634
+ # # │ count ┆ 5.0 │
635
+ # # └────────────┴──────────┘
636
+ #
637
+ # @example
638
+ # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
639
+ # series_str.describe
640
+ # # =>
641
+ # # shape: (3, 2)
642
+ # # ┌────────────┬───────┐
643
+ # # │ statistic ┆ value │
644
+ # # │ --- ┆ --- │
645
+ # # │ str ┆ i64 │
646
+ # # ╞════════════╪═══════╡
647
+ # # │ unique ┆ 4 │
648
+ # # │ null_count ┆ 1 │
649
+ # # │ count ┆ 5 │
650
+ # # └────────────┴───────┘
651
+ def describe
652
+ if len == 0
653
+ raise ArgumentError, "Series must contain at least one value"
654
+ elsif is_numeric
655
+ s = cast(:f64)
656
+ stats = {
657
+ "min" => s.min,
658
+ "max" => s.max,
659
+ "null_count" => s.null_count,
660
+ "mean" => s.mean,
661
+ "std" => s.std,
662
+ "count" => s.len
663
+ }
664
+ elsif is_boolean
665
+ stats = {
666
+ "sum" => sum,
667
+ "null_count" => null_count,
668
+ "count" => len
669
+ }
670
+ elsif is_utf8
671
+ stats = {
672
+ "unique" => unique.length,
673
+ "null_count" => null_count,
674
+ "count" => len
675
+ }
676
+ elsif is_datelike
677
+ # we coerce all to string, because a polars column
678
+ # only has a single dtype and dates: datetime and count: int don't match
679
+ stats = {
680
+ "min" => dt.min.to_s,
681
+ "max" => dt.max.to_s,
682
+ "null_count" => null_count.to_s,
683
+ "count" => len.to_s
684
+ }
685
+ else
686
+ raise TypeError, "This type is not supported"
687
+ end
688
+
689
+ Polars::DataFrame.new(
690
+ {"statistic" => stats.keys, "value" => stats.values}
691
+ )
692
+ end
693
+
694
+ # Reduce this Series to the sum value.
695
+ #
696
+ # @return [Numeric]
697
+ #
698
+ # @note
699
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
700
+ # `:i64` before summing to prevent overflow issues.
701
+ #
702
+ # @example
703
+ # s = Polars::Series.new("a", [1, 2, 3])
704
+ # s.sum
705
+ # # => 6
706
+ def sum
707
+ _s.sum
708
+ end
709
+
710
+ # Reduce this Series to the mean value.
711
+ #
712
+ # @return [Float, nil]
713
+ #
714
+ # @example
715
+ # s = Polars::Series.new("a", [1, 2, 3])
716
+ # s.mean
717
+ # # => 2.0
718
+ def mean
719
+ _s.mean
720
+ end
721
+
722
+ # Reduce this Series to the product value.
723
+ #
724
+ # @return [Numeric]
725
+ def product
726
+ to_frame.select(Polars.col(name).product).to_series[0]
727
+ end
728
+
729
+ # Get the minimal value in this Series.
730
+ #
731
+ # @return [Object]
732
+ #
733
+ # @example
734
+ # s = Polars::Series.new("a", [1, 2, 3])
735
+ # s.min
736
+ # # => 1
737
+ def min
738
+ _s.min
739
+ end
740
+
741
+ # Get the maximum value in this Series.
742
+ #
743
+ # @return [Object]
744
+ #
745
+ # @example
746
+ # s = Polars::Series.new("a", [1, 2, 3])
747
+ # s.max
748
+ # # => 3
749
+ def max
750
+ _s.max
751
+ end
752
+
753
+ # Get maximum value, but propagate/poison encountered NaN values.
754
+ #
755
+ # @return [Object]
756
+ def nan_max
757
+ to_frame.select(Polars.col(name).nan_max)[0, 0]
758
+ end
759
+
760
+ # Get minimum value, but propagate/poison encountered NaN values.
761
+ #
762
+ # @return [Object]
763
+ def nan_min
764
+ to_frame.select(Polars.col(name).nan_min)[0, 0]
765
+ end
766
+
767
+ # Get the standard deviation of this Series.
768
+ #
769
+ # @param ddof [Integer]
770
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
771
+ # where N represents the number of elements.
772
+ #
773
+ # @return [Float, nil]
774
+ #
775
+ # @example
776
+ # s = Polars::Series.new("a", [1, 2, 3])
777
+ # s.std
778
+ # # => 1.0
779
+ def std(ddof: 1)
780
+ if !is_numeric
781
+ nil
782
+ else
783
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
784
+ end
785
+ end
786
+
787
+ # Get variance of this Series.
788
+ #
789
+ # @param ddof [Integer]
790
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
791
+ # where N represents the number of elements.
792
+ #
793
+ # @return [Float, nil]
794
+ #
795
+ # @example
796
+ # s = Polars::Series.new("a", [1, 2, 3])
797
+ # s.var
798
+ # # => 1.0
799
+ def var(ddof: 1)
800
+ if !is_numeric
801
+ nil
802
+ else
803
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
804
+ end
805
+ end
806
+
807
+ # Get the median of this Series.
808
+ #
809
+ # @return [Float, nil]
810
+ #
811
+ # @example
812
+ # s = Polars::Series.new("a", [1, 2, 3])
813
+ # s.median
814
+ # # => 2.0
815
+ def median
816
+ _s.median
817
+ end
818
+
819
+ # Get the quantile value of this Series.
820
+ #
821
+ # @param quantile [Float, nil]
822
+ # Quantile between 0.0 and 1.0.
823
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
824
+ # Interpolation method.
825
+ #
826
+ # @return [Float, nil]
827
+ #
828
+ # @example
829
+ # s = Polars::Series.new("a", [1, 2, 3])
830
+ # s.quantile(0.5)
831
+ # # => 2.0
832
+ def quantile(quantile, interpolation: "nearest")
833
+ _s.quantile(quantile, interpolation)
834
+ end
835
+
836
+ # Get dummy variables.
837
+ #
838
+ # @return [DataFrame]
839
+ #
840
+ # @example
841
+ # s = Polars::Series.new("a", [1, 2, 3])
842
+ # s.to_dummies
843
+ # # =>
844
+ # # shape: (3, 3)
845
+ # # ┌─────┬─────┬─────┐
846
+ # # │ a_1 ┆ a_2 ┆ a_3 │
847
+ # # │ --- ┆ --- ┆ --- │
848
+ # # │ u8 ┆ u8 ┆ u8 │
849
+ # # ╞═════╪═════╪═════╡
850
+ # # │ 1 ┆ 0 ┆ 0 │
851
+ # # │ 0 ┆ 1 ┆ 0 │
852
+ # # │ 0 ┆ 0 ┆ 1 │
853
+ # # └─────┴─────┴─────┘
854
+ def to_dummies(separator: "_", drop_first: false)
855
+ Utils.wrap_df(_s.to_dummies(separator, drop_first))
856
+ end
857
+
858
+ # Bin continuous values into discrete categories.
859
+ #
860
+ # @param breaks [Array]
861
+ # List of unique cut points.
862
+ # @param labels [Array]
863
+ # Names of the categories. The number of labels must be equal to the number
864
+ # of cut points plus one.
865
+ # @param left_closed [Boolean]
866
+ # Set the intervals to be left-closed instead of right-closed.
867
+ # @param include_breaks [Boolean]
868
+ # Include a column with the right endpoint of the bin each observation falls
869
+ # in. This will change the data type of the output from a
870
+ # `Categorical` to a `Struct`.
871
+ #
872
+ # @return [Series]
873
+ #
874
+ # @example Divide the column into three categories.
875
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
876
+ # s.cut([-1, 1], labels: ["a", "b", "c"])
877
+ # # =>
878
+ # # shape: (5,)
879
+ # # Series: 'foo' [cat]
880
+ # # [
881
+ # # "a"
882
+ # # "a"
883
+ # # "b"
884
+ # # "b"
885
+ # # "c"
886
+ # # ]
887
+ #
888
+ # @example Create a DataFrame with the breakpoint and category for each value.
889
+ # cut = s.cut([-1, 1], include_breaks: true).alias("cut")
890
+ # s.to_frame.with_columns(cut).unnest("cut")
891
+ # # =>
892
+ # # shape: (5, 3)
893
+ # # ┌─────┬─────────────┬────────────┐
894
+ # # │ foo ┆ break_point ┆ category │
895
+ # # │ --- ┆ --- ┆ --- │
896
+ # # │ i64 ┆ f64 ┆ cat │
897
+ # # ╞═════╪═════════════╪════════════╡
898
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
899
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
900
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
901
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
902
+ # # │ 2 ┆ inf ┆ (1, inf] │
903
+ # # └─────┴─────────────┴────────────┘
904
+ def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
905
+ result = (
906
+ to_frame
907
+ .select(
908
+ Polars.col(name).cut(
909
+ breaks,
910
+ labels: labels,
911
+ left_closed: left_closed,
912
+ include_breaks: include_breaks
913
+ )
914
+ )
915
+ .to_series
916
+ )
917
+
918
+ if include_breaks
919
+ result = result.struct.rename_fields(["break_point", "category"])
920
+ end
921
+
922
+ result
923
+ end
924
+
925
+ # Bin continuous values into discrete categories based on their quantiles.
926
+ #
927
+ # @param quantiles [Array]
928
+ # Either a list of quantile probabilities between 0 and 1 or a positive
929
+ # integer determining the number of bins with uniform probability.
930
+ # @param labels [Array]
931
+ # Names of the categories. The number of labels must be equal to the number
932
+ # of cut points plus one.
933
+ # @param left_closed [Boolean]
934
+ # Set the intervals to be left-closed instead of right-closed.
935
+ # @param allow_duplicates [Boolean]
936
+ # If set to `true`, duplicates in the resulting quantiles are dropped,
937
+ # rather than raising a `DuplicateError`. This can happen even with unique
938
+ # probabilities, depending on the data.
939
+ # @param include_breaks [Boolean]
940
+ # Include a column with the right endpoint of the bin each observation falls
941
+ # in. This will change the data type of the output from a
942
+ # `Categorical` to a `Struct`.
943
+ #
944
+ # @return [Series]
945
+ #
946
+ # @example Divide a column into three categories according to pre-defined quantile probabilities.
947
+ # s = Polars::Series.new("foo", [-2, -1, 0, 1, 2])
948
+ # s.qcut([0.25, 0.75], labels: ["a", "b", "c"])
949
+ # # =>
950
+ # # shape: (5,)
951
+ # # Series: 'foo' [cat]
952
+ # # [
953
+ # # "a"
954
+ # # "a"
955
+ # # "b"
956
+ # # "b"
957
+ # # "c"
958
+ # # ]
959
+ #
960
+ # @example Divide a column into two categories using uniform quantile probabilities.
961
+ # s.qcut(2, labels: ["low", "high"], left_closed: true)
962
+ # # =>
963
+ # # shape: (5,)
964
+ # # Series: 'foo' [cat]
965
+ # # [
966
+ # # "low"
967
+ # # "low"
968
+ # # "high"
969
+ # # "high"
970
+ # # "high"
971
+ # # ]
972
+ #
973
+ # @example Create a DataFrame with the breakpoint and category for each value.
974
+ # cut = s.qcut([0.25, 0.75], include_breaks: true).alias("cut")
975
+ # s.to_frame.with_columns(cut).unnest("cut")
976
+ # # =>
977
+ # # shape: (5, 3)
978
+ # # ┌─────┬─────────────┬────────────┐
979
+ # # │ foo ┆ break_point ┆ category │
980
+ # # │ --- ┆ --- ┆ --- │
981
+ # # │ i64 ┆ f64 ┆ cat │
982
+ # # ╞═════╪═════════════╪════════════╡
983
+ # # │ -2 ┆ -1.0 ┆ (-inf, -1] │
984
+ # # │ -1 ┆ -1.0 ┆ (-inf, -1] │
985
+ # # │ 0 ┆ 1.0 ┆ (-1, 1] │
986
+ # # │ 1 ┆ 1.0 ┆ (-1, 1] │
987
+ # # │ 2 ┆ inf ┆ (1, inf] │
988
+ # # └─────┴─────────────┴────────────┘
989
+ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
990
+ result = (
991
+ to_frame
992
+ .select(
993
+ Polars.col(name).qcut(
994
+ quantiles,
995
+ labels: labels,
996
+ left_closed: left_closed,
997
+ allow_duplicates: allow_duplicates,
998
+ include_breaks: include_breaks
999
+ )
1000
+ )
1001
+ .to_series
1002
+ )
1003
+
1004
+ if include_breaks
1005
+ result = result.struct.rename_fields(["break_point", "category"])
1006
+ end
1007
+
1008
+ result
1009
+ end
1010
+
1011
+ # Get the lengths of runs of identical values.
1012
+ #
1013
+ # @return [Series]
1014
+ #
1015
+ # @example
1016
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1017
+ # s.rle.struct.unnest
1018
+ # # =>
1019
+ # # shape: (6, 2)
1020
+ # # ┌─────┬───────┐
1021
+ # # │ len ┆ value │
1022
+ # # │ --- ┆ --- │
1023
+ # # │ u32 ┆ i64 │
1024
+ # # ╞═════╪═══════╡
1025
+ # # │ 2 ┆ 1 │
1026
+ # # │ 1 ┆ 2 │
1027
+ # # │ 1 ┆ 1 │
1028
+ # # │ 1 ┆ null │
1029
+ # # │ 1 ┆ 1 │
1030
+ # # │ 2 ┆ 3 │
1031
+ # # └─────┴───────┘
1032
+ def rle
1033
+ super
1034
+ end
1035
+
1036
+ # Map values to run IDs.
1037
+ #
1038
+ # Similar to RLE, but it maps each value to an ID corresponding to the run into
1039
+ # which it falls. This is especially useful when you want to define groups by
1040
+ # runs of identical values rather than the values themselves.
1041
+ #
1042
+ # @return [Series]
1043
+ #
1044
+ # @example
1045
+ # s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
1046
+ # s.rle_id
1047
+ # # =>
1048
+ # # shape: (8,)
1049
+ # # Series: 's' [u32]
1050
+ # # [
1051
+ # # 0
1052
+ # # 0
1053
+ # # 1
1054
+ # # 2
1055
+ # # 3
1056
+ # # 4
1057
+ # # 5
1058
+ # # 5
1059
+ # # ]
1060
+ def rle_id
1061
+ super
1062
+ end
1063
+
1064
+ # Count the unique values in a Series.
1065
+ #
1066
+ # @param sort [Boolean]
1067
+ # Ensure the output is sorted from most values to least.
1068
+ #
1069
+ # @return [DataFrame]
1070
+ #
1071
+ # @example
1072
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1073
+ # s.value_counts.sort("a")
1074
+ # # =>
1075
+ # # shape: (3, 2)
1076
+ # # ┌─────┬────────┐
1077
+ # # │ a ┆ counts │
1078
+ # # │ --- ┆ --- │
1079
+ # # │ i64 ┆ u32 │
1080
+ # # ╞═════╪════════╡
1081
+ # # │ 1 ┆ 1 │
1082
+ # # │ 2 ┆ 2 │
1083
+ # # │ 3 ┆ 1 │
1084
+ # # └─────┴────────┘
1085
+ def value_counts(
1086
+ sort: false,
1087
+ parallel: false,
1088
+ name: nil,
1089
+ normalize: false
1090
+ )
1091
+ if name.nil?
1092
+ if normalize
1093
+ name = "proportion"
1094
+ else
1095
+ name = "count"
1096
+ end
1097
+ end
1098
+ DataFrame._from_rbdf(
1099
+ self._s.value_counts(
1100
+ sort, parallel, name, normalize
1101
+ )
1102
+ )
1103
+ end
1104
+
1105
+ # Return a count of the unique values in the order of appearance.
1106
+ #
1107
+ # @return [Series]
1108
+ #
1109
+ # @example
1110
+ # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
1111
+ # s.unique_counts
1112
+ # # =>
1113
+ # # shape: (3,)
1114
+ # # Series: 'id' [u32]
1115
+ # # [
1116
+ # # 1
1117
+ # # 2
1118
+ # # 3
1119
+ # # ]
1120
+ def unique_counts
1121
+ super
1122
+ end
1123
+
1124
+ # Computes the entropy.
1125
+ #
1126
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
1127
+ #
1128
+ # @param base [Float]
1129
+ # Given base, defaults to `e`
1130
+ # @param normalize [Boolean]
1131
+ # Normalize pk if it doesn't sum to 1.
1132
+ #
1133
+ # @return [Float, nil]
1134
+ #
1135
+ # @example
1136
+ # a = Polars::Series.new([0.99, 0.005, 0.005])
1137
+ # a.entropy(normalize: true)
1138
+ # # => 0.06293300616044681
1139
+ #
1140
+ # @example
1141
+ # b = Polars::Series.new([0.65, 0.10, 0.25])
1142
+ # b.entropy(normalize: true)
1143
+ # # => 0.8568409950394724
1144
+ def entropy(base: Math::E, normalize: false)
1145
+ Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
1146
+ end
1147
+
1148
+ # Run an expression over a sliding window that increases `1` slot every iteration.
1149
+ #
1150
+ # @param expr [Expr]
1151
+ # Expression to evaluate
1152
+ # @param min_periods [Integer]
1153
+ # Number of valid values there should be in the window before the expression
1154
+ # is evaluated. valid values = `length - null_count`
1155
+ # @param parallel [Boolean]
1156
+ # Run in parallel. Don't do this in a group by or another operation that
1157
+ # already has much parallelization.
1158
+ #
1159
+ # @return [Series]
1160
+ #
1161
+ # @note
1162
+ # This functionality is experimental and may change without it being considered a
1163
+ # breaking change.
1164
+ #
1165
+ # @note
1166
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
1167
+ # for operations that visit all elements.
1168
+ #
1169
+ # @example
1170
+ # s = Polars::Series.new("values", [1, 2, 3, 4, 5])
1171
+ # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
1172
+ # # =>
1173
+ # # shape: (5,)
1174
+ # # Series: 'values' [i64]
1175
+ # # [
1176
+ # # 0
1177
+ # # -3
1178
+ # # -8
1179
+ # # -15
1180
+ # # -24
1181
+ # # ]
1182
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
1183
+ super
1184
+ end
1185
+
1186
+ # Return a copy of the Series with a new alias/name.
1187
+ #
1188
+ # @param name [String]
1189
+ # New name.
1190
+ #
1191
+ # @return [Series]
1192
+ #
1193
+ # @example
1194
+ # s = Polars::Series.new("x", [1, 2, 3])
1195
+ # s.alias("y")
1196
+ def alias(name)
1197
+ s = dup
1198
+ s._s.rename(name)
1199
+ s
1200
+ end
1201
+
1202
+ # Rename this Series.
1203
+ #
1204
+ # @param name [String]
1205
+ # New name.
1206
+ # @param in_place [Boolean]
1207
+ # Modify the Series in-place.
1208
+ #
1209
+ # @return [Series]
1210
+ #
1211
+ # @example
1212
+ # s = Polars::Series.new("a", [1, 2, 3])
1213
+ # s.rename("b")
1214
+ def rename(name, in_place: false)
1215
+ if in_place
1216
+ _s.rename(name)
1217
+ self
1218
+ else
1219
+ self.alias(name)
1220
+ end
1221
+ end
1222
+
1223
+ # Get the length of each individual chunk.
1224
+ #
1225
+ # @return [Array]
1226
+ #
1227
+ # @example
1228
+ # s = Polars::Series.new("a", [1, 2, 3])
1229
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1230
+ #
1231
+ # @example Concatenate Series with rechunk: true
1232
+ # Polars.concat([s, s2]).chunk_lengths
1233
+ # # => [6]
1234
+ #
1235
+ # @example Concatenate Series with rechunk: false
1236
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
1237
+ # # => [3, 3]
1238
+ def chunk_lengths
1239
+ _s.chunk_lengths
1240
+ end
1241
+
1242
+ # Get the number of chunks that this Series contains.
1243
+ #
1244
+ # @return [Integer]
1245
+ #
1246
+ # @example
1247
+ # s = Polars::Series.new("a", [1, 2, 3])
1248
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1249
+ #
1250
+ # @example Concatenate Series with rechunk: true
1251
+ # Polars.concat([s, s2]).n_chunks
1252
+ # # => 1
1253
+ #
1254
+ # @example Concatenate Series with rechunk: false
1255
+ # Polars.concat([s, s2], rechunk: false).n_chunks
1256
+ # # => 2
1257
+ def n_chunks
1258
+ _s.n_chunks
1259
+ end
1260
+
1261
+ # Get an array with the cumulative sum computed at every element.
1262
+ #
1263
+ # @param reverse [Boolean]
1264
+ # reverse the operation.
1265
+ #
1266
+ # @return [Series]
1267
+ #
1268
+ # @note
1269
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1270
+ # `:i64` before summing to prevent overflow issues.
1271
+ #
1272
+ # @example
1273
+ # s = Polars::Series.new("a", [1, 2, 3])
1274
+ # s.cum_sum
1275
+ # # =>
1276
+ # # shape: (3,)
1277
+ # # Series: 'a' [i64]
1278
+ # # [
1279
+ # # 1
1280
+ # # 3
1281
+ # # 6
1282
+ # # ]
1283
+ def cum_sum(reverse: false)
1284
+ super
1285
+ end
1286
+ alias_method :cumsum, :cum_sum
1287
+
1288
+ # Get an array with the cumulative min computed at every element.
1289
+ #
1290
+ # @param reverse [Boolean]
1291
+ # reverse the operation.
1292
+ #
1293
+ # @return [Series]
1294
+ #
1295
+ # @example
1296
+ # s = Polars::Series.new("a", [3, 5, 1])
1297
+ # s.cum_min
1298
+ # # =>
1299
+ # # shape: (3,)
1300
+ # # Series: 'a' [i64]
1301
+ # # [
1302
+ # # 3
1303
+ # # 3
1304
+ # # 1
1305
+ # # ]
1306
+ def cum_min(reverse: false)
1307
+ super
1308
+ end
1309
+ alias_method :cummin, :cum_min
1310
+
1311
+ # Get an array with the cumulative max computed at every element.
1312
+ #
1313
+ # @param reverse [Boolean]
1314
+ # reverse the operation.
1315
+ #
1316
+ # @return [Series]
1317
+ #
1318
+ # @example
1319
+ # s = Polars::Series.new("a", [3, 5, 1])
1320
+ # s.cum_max
1321
+ # # =>
1322
+ # # shape: (3,)
1323
+ # # Series: 'a' [i64]
1324
+ # # [
1325
+ # # 3
1326
+ # # 5
1327
+ # # 5
1328
+ # # ]
1329
+ def cum_max(reverse: false)
1330
+ super
1331
+ end
1332
+ alias_method :cummax, :cum_max
1333
+
1334
+ # Get an array with the cumulative product computed at every element.
1335
+ #
1336
+ # @param reverse [Boolean]
1337
+ # reverse the operation.
1338
+ #
1339
+ # @return [Series]
1340
+ #
1341
+ # @note
1342
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
1343
+ # `:i64` before multiplying to prevent overflow issues.
1344
+ #
1345
+ # @example
1346
+ # s = Polars::Series.new("a", [1, 2, 3])
1347
+ # s.cum_prod
1348
+ # # =>
1349
+ # # shape: (3,)
1350
+ # # Series: 'a' [i64]
1351
+ # # [
1352
+ # # 1
1353
+ # # 2
1354
+ # # 6
1355
+ # # ]
1356
+ def cum_prod(reverse: false)
1357
+ super
1358
+ end
1359
+ alias_method :cumprod, :cum_prod
1360
+
1361
+ # Get the first `n` rows.
1362
+ #
1363
+ # Alias for {#head}.
1364
+ #
1365
+ # @param n [Integer]
1366
+ # Number of rows to return.
1367
+ #
1368
+ # @return [Series]
1369
+ #
1370
+ # @example
1371
+ # s = Polars::Series.new("a", [1, 2, 3])
1372
+ # s.limit(2)
1373
+ # # =>
1374
+ # # shape: (2,)
1375
+ # # Series: 'a' [i64]
1376
+ # # [
1377
+ # # 1
1378
+ # # 2
1379
+ # # ]
1380
+ def limit(n = 10)
1381
+ to_frame.select(F.col(name).limit(n)).to_series
1382
+ end
1383
+
1384
+ # Get a slice of this Series.
1385
+ #
1386
+ # @param offset [Integer]
1387
+ # Start index. Negative indexing is supported.
1388
+ # @param length [Integer, nil]
1389
+ # Length of the slice. If set to `nil`, all rows starting at the offset
1390
+ # will be selected.
1391
+ #
1392
+ # @return [Series]
1393
+ #
1394
+ # @example
1395
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1396
+ # s.slice(1, 2)
1397
+ # # =>
1398
+ # # shape: (2,)
1399
+ # # Series: 'a' [i64]
1400
+ # # [
1401
+ # # 2
1402
+ # # 3
1403
+ # # ]
1404
+ def slice(offset, length = nil)
1405
+ self.class._from_rbseries(_s.slice(offset, length))
1406
+ end
1407
+
1408
+ # Append a Series to this one.
1409
+ #
1410
+ # @param other [Series]
1411
+ # Series to append.
1412
+ # @param append_chunks [Boolean]
1413
+ # If set to `true` the append operation will add the chunks from `other` to
1414
+ # self. This is super cheap.
1415
+ #
1416
+ # If set to `false` the append operation will do the same as
1417
+ # {DataFrame#extend} which extends the memory backed by this Series with
1418
+ # the values from `other`.
1419
+ #
1420
+ # Different from `append_chunks`, `extend` appends the data from `other` to
1421
+ # the underlying memory locations and thus may cause a reallocation (which is
1422
+ # expensive).
1423
+ #
1424
+ # If this does not cause a reallocation, the resulting data structure will not
1425
+ # have any extra chunks and thus will yield faster queries.
1426
+ #
1427
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
1428
+ # single append. For instance during online operations where you add `n` rows
1429
+ # and rerun a query.
1430
+ #
1431
+ # Prefer `append_chunks` over `extend` when you want to append many times
1432
+ # before doing a query. For instance, when you read in multiple files and when
1433
+ # to store them in a single Series. In the latter case, finish the sequence
1434
+ # of `append_chunks` operations with a `rechunk`.
1435
+ #
1436
+ # @return [Series]
1437
+ #
1438
+ # @example
1439
+ # s = Polars::Series.new("a", [1, 2, 3])
1440
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1441
+ # s.append(s2)
1442
+ # # =>
1443
+ # # shape: (6,)
1444
+ # # Series: 'a' [i64]
1445
+ # # [
1446
+ # # 1
1447
+ # # 2
1448
+ # # 3
1449
+ # # 4
1450
+ # # 5
1451
+ # # 6
1452
+ # # ]
1453
+ def append(other, append_chunks: true)
1454
+ begin
1455
+ if append_chunks
1456
+ _s.append(other._s)
1457
+ else
1458
+ _s.extend(other._s)
1459
+ end
1460
+ rescue => e
1461
+ if e.message == "Already mutably borrowed"
1462
+ append(other.clone, append_chunks)
1463
+ else
1464
+ raise e
1465
+ end
1466
+ end
1467
+ self
1468
+ end
1469
+
1470
+ # Filter elements by a boolean mask.
1471
+ #
1472
+ # @param predicate [Series, Array]
1473
+ # Boolean mask.
1474
+ #
1475
+ # @return [Series]
1476
+ #
1477
+ # @example
1478
+ # s = Polars::Series.new("a", [1, 2, 3])
1479
+ # mask = Polars::Series.new("", [true, false, true])
1480
+ # s.filter(mask)
1481
+ # # =>
1482
+ # # shape: (2,)
1483
+ # # Series: 'a' [i64]
1484
+ # # [
1485
+ # # 1
1486
+ # # 3
1487
+ # # ]
1488
+ def filter(predicate)
1489
+ if predicate.is_a?(::Array)
1490
+ predicate = Series.new("", predicate)
1491
+ end
1492
+ Utils.wrap_s(_s.filter(predicate._s))
1493
+ end
1494
+
1495
+ # Get the first `n` rows.
1496
+ #
1497
+ # @param n [Integer]
1498
+ # Number of rows to return.
1499
+ #
1500
+ # @return [Series]
1501
+ #
1502
+ # @example
1503
+ # s = Polars::Series.new("a", [1, 2, 3])
1504
+ # s.head(2)
1505
+ # # =>
1506
+ # # shape: (2,)
1507
+ # # Series: 'a' [i64]
1508
+ # # [
1509
+ # # 1
1510
+ # # 2
1511
+ # # ]
1512
+ def head(n = 10)
1513
+ to_frame.select(F.col(name).head(n)).to_series
1514
+ end
1515
+
1516
+ # Get the last `n` rows.
1517
+ #
1518
+ # @param n [Integer]
1519
+ # Number of rows to return.
1520
+ #
1521
+ # @return [Series]
1522
+ #
1523
+ # @example
1524
+ # s = Polars::Series.new("a", [1, 2, 3])
1525
+ # s.tail(2)
1526
+ # # =>
1527
+ # # shape: (2,)
1528
+ # # Series: 'a' [i64]
1529
+ # # [
1530
+ # # 2
1531
+ # # 3
1532
+ # # ]
1533
+ def tail(n = 10)
1534
+ to_frame.select(F.col(name).tail(n)).to_series
1535
+ end
1536
+
1537
+ # Take every nth value in the Series and return as new Series.
1538
+ #
1539
+ # @return [Series]
1540
+ #
1541
+ # @example
1542
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1543
+ # s.take_every(2)
1544
+ # # =>
1545
+ # # shape: (2,)
1546
+ # # Series: 'a' [i64]
1547
+ # # [
1548
+ # # 1
1549
+ # # 3
1550
+ # # ]
1551
+ def take_every(n)
1552
+ super
1553
+ end
1554
+
1555
+ # Sort this Series.
1556
+ #
1557
+ # @param reverse [Boolean]
1558
+ # Reverse sort.
1559
+ # @param in_place [Boolean]
1560
+ # Sort in place.
1561
+ #
1562
+ # @return [Series]
1563
+ #
1564
+ # @example
1565
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
1566
+ # s.sort
1567
+ # # =>
1568
+ # # shape: (4,)
1569
+ # # Series: 'a' [i64]
1570
+ # # [
1571
+ # # 1
1572
+ # # 2
1573
+ # # 3
1574
+ # # 4
1575
+ # # ]
1576
+ # s.sort(reverse: true)
1577
+ # # =>
1578
+ # # shape: (4,)
1579
+ # # Series: 'a' [i64]
1580
+ # # [
1581
+ # # 4
1582
+ # # 3
1583
+ # # 2
1584
+ # # 1
1585
+ # # ]
1586
+ def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
1587
+ if in_place
1588
+ self._s = _s.sort(reverse, nulls_last, multithreaded)
1589
+ self
1590
+ else
1591
+ Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
1592
+ end
1593
+ end
1594
+
1595
+ # Return the `k` largest elements.
1596
+ #
1597
+ # @param k [Integer]
1598
+ # Number of elements to return.
1599
+ #
1600
+ # @return [Boolean]
1601
+ #
1602
+ # @example
1603
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1604
+ # s.top_k(k: 3)
1605
+ # # =>
1606
+ # # shape: (3,)
1607
+ # # Series: 'a' [i64]
1608
+ # # [
1609
+ # # 5
1610
+ # # 4
1611
+ # # 3
1612
+ # # ]
1613
+ def top_k(k: 5)
1614
+ super
1615
+ end
1616
+
1617
+ # Return the `k` smallest elements.
1618
+ #
1619
+ # @param k [Integer]
1620
+ # Number of elements to return.
1621
+ #
1622
+ # @return [Boolean]
1623
+ #
1624
+ # @example
1625
+ # s = Polars::Series.new("a", [2, 5, 1, 4, 3])
1626
+ # s.bottom_k(k: 3)
1627
+ # # =>
1628
+ # # shape: (3,)
1629
+ # # Series: 'a' [i64]
1630
+ # # [
1631
+ # # 1
1632
+ # # 2
1633
+ # # 3
1634
+ # # ]
1635
+ def bottom_k(k: 5)
1636
+ super
1637
+ end
1638
+
1639
+ # Get the index values that would sort this Series.
1640
+ #
1641
+ # @param reverse [Boolean]
1642
+ # Sort in reverse (descending) order.
1643
+ # @param nulls_last [Boolean]
1644
+ # Place null values last instead of first.
1645
+ #
1646
+ # @return [Series]
1647
+ #
1648
+ # @example
1649
+ # s = Polars::Series.new("a", [5, 3, 4, 1, 2])
1650
+ # s.arg_sort
1651
+ # # =>
1652
+ # # shape: (5,)
1653
+ # # Series: 'a' [u32]
1654
+ # # [
1655
+ # # 3
1656
+ # # 4
1657
+ # # 1
1658
+ # # 2
1659
+ # # 0
1660
+ # # ]
1661
+ def arg_sort(reverse: false, nulls_last: false)
1662
+ super
1663
+ end
1664
+
1665
+ # Get the index values that would sort this Series.
1666
+ #
1667
+ # Alias for {#arg_sort}.
1668
+ #
1669
+ # @param reverse [Boolean]
1670
+ # Sort in reverse (descending) order.
1671
+ # @param nulls_last [Boolean]
1672
+ # Place null values last instead of first.
1673
+ #
1674
+ # @return [Series]
1675
+ def argsort(reverse: false, nulls_last: false)
1676
+ super
1677
+ end
1678
+
1679
+ # Get unique index as Series.
1680
+ #
1681
+ # @return [Series]
1682
+ #
1683
+ # @example
1684
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1685
+ # s.arg_unique
1686
+ # # =>
1687
+ # # shape: (3,)
1688
+ # # Series: 'a' [u32]
1689
+ # # [
1690
+ # # 0
1691
+ # # 1
1692
+ # # 3
1693
+ # # ]
1694
+ def arg_unique
1695
+ super
1696
+ end
1697
+
1698
+ # Get the index of the minimal value.
1699
+ #
1700
+ # @return [Integer, nil]
1701
+ #
1702
+ # @example
1703
+ # s = Polars::Series.new("a", [3, 2, 1])
1704
+ # s.arg_min
1705
+ # # => 2
1706
+ def arg_min
1707
+ _s.arg_min
1708
+ end
1709
+
1710
+ # Get the index of the maximal value.
1711
+ #
1712
+ # @return [Integer, nil]
1713
+ #
1714
+ # @example
1715
+ # s = Polars::Series.new("a", [3, 2, 1])
1716
+ # s.arg_max
1717
+ # # => 0
1718
+ def arg_max
1719
+ _s.arg_max
1720
+ end
1721
+
1722
+ # Find indices where elements should be inserted to maintain order.
1723
+ #
1724
+ # @param element [Object]
1725
+ # Expression or scalar value.
1726
+ #
1727
+ # @return [Integer]
1728
+ def search_sorted(element, side: "any")
1729
+ if element.is_a?(Integer) || element.is_a?(Float)
1730
+ return Polars.select(Polars.lit(self).search_sorted(element, side: side)).item
1731
+ end
1732
+ element = Series.new(element)
1733
+ Polars.select(Polars.lit(self).search_sorted(element, side: side)).to_series
1734
+ end
1735
+
1736
+ # Get unique elements in series.
1737
+ #
1738
+ # @param maintain_order [Boolean]
1739
+ # Maintain order of data. This requires more work.
1740
+ #
1741
+ # @return [Series]
1742
+ #
1743
+ # @example
1744
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1745
+ # s.unique.sort
1746
+ # # =>
1747
+ # # shape: (3,)
1748
+ # # Series: 'a' [i64]
1749
+ # # [
1750
+ # # 1
1751
+ # # 2
1752
+ # # 3
1753
+ # # ]
1754
+ def unique(maintain_order: false)
1755
+ super
1756
+ end
1757
+ alias_method :uniq, :unique
1758
+
1759
+ # Take values by index.
1760
+ #
1761
+ # @param indices [Array]
1762
+ # Index location used for selection.
1763
+ #
1764
+ # @return [Series]
1765
+ #
1766
+ # @example
1767
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1768
+ # s.take([1, 3])
1769
+ # # =>
1770
+ # # shape: (2,)
1771
+ # # Series: 'a' [i64]
1772
+ # # [
1773
+ # # 2
1774
+ # # 4
1775
+ # # ]
1776
+ def take(indices)
1777
+ to_frame.select(Polars.col(name).take(indices)).to_series
1778
+ end
1779
+
1780
+ # Count the null values in this Series.
1781
+ #
1782
+ # @return [Integer]
1783
+ def null_count
1784
+ _s.null_count
1785
+ end
1786
+
1787
+ # Return `true` if the Series has a validity bitmask.
1788
+ #
1789
+ # If there is none, it means that there are no null values.
1790
+ # Use this to swiftly assert a Series does not have null values.
1791
+ #
1792
+ # @return [Boolean]
1793
+ def has_nulls
1794
+ _s.has_nulls
1795
+ end
1796
+ alias_method :has_validity, :has_nulls
1797
+
1798
+ # Check if the Series is empty.
1799
+ #
1800
+ # @return [Boolean]
1801
+ #
1802
+ # @example
1803
+ # s = Polars::Series.new("a", [])
1804
+ # s.is_empty
1805
+ # # => true
1806
+ def is_empty
1807
+ len == 0
1808
+ end
1809
+ alias_method :empty?, :is_empty
1810
+
1811
+ # Returns a boolean Series indicating which values are null.
1812
+ #
1813
+ # @return [Series]
1814
+ #
1815
+ # @example
1816
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1817
+ # s.is_null
1818
+ # # =>
1819
+ # # shape: (4,)
1820
+ # # Series: 'a' [bool]
1821
+ # # [
1822
+ # # false
1823
+ # # false
1824
+ # # false
1825
+ # # true
1826
+ # # ]
1827
+ def is_null
1828
+ super
1829
+ end
1830
+
1831
+ # Returns a boolean Series indicating which values are not null.
1832
+ #
1833
+ # @return [Series]
1834
+ #
1835
+ # @example
1836
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1837
+ # s.is_not_null
1838
+ # # =>
1839
+ # # shape: (4,)
1840
+ # # Series: 'a' [bool]
1841
+ # # [
1842
+ # # true
1843
+ # # true
1844
+ # # true
1845
+ # # false
1846
+ # # ]
1847
+ def is_not_null
1848
+ super
1849
+ end
1850
+
1851
+ # Returns a boolean Series indicating which values are finite.
1852
+ #
1853
+ # @return [Series]
1854
+ #
1855
+ # @example
1856
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1857
+ # s.is_finite
1858
+ # # =>
1859
+ # # shape: (3,)
1860
+ # # Series: 'a' [bool]
1861
+ # # [
1862
+ # # true
1863
+ # # true
1864
+ # # false
1865
+ # # ]
1866
+ def is_finite
1867
+ super
1868
+ end
1869
+
1870
+ # Returns a boolean Series indicating which values are infinite.
1871
+ #
1872
+ # @return [Series]
1873
+ #
1874
+ # @example
1875
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1876
+ # s.is_infinite
1877
+ # # =>
1878
+ # # shape: (3,)
1879
+ # # Series: 'a' [bool]
1880
+ # # [
1881
+ # # false
1882
+ # # false
1883
+ # # true
1884
+ # # ]
1885
+ def is_infinite
1886
+ super
1887
+ end
1888
+
1889
+ # Returns a boolean Series indicating which values are NaN.
1890
+ #
1891
+ # @return [Series]
1892
+ #
1893
+ # @example
1894
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1895
+ # s.is_nan
1896
+ # # =>
1897
+ # # shape: (4,)
1898
+ # # Series: 'a' [bool]
1899
+ # # [
1900
+ # # false
1901
+ # # false
1902
+ # # false
1903
+ # # true
1904
+ # # ]
1905
+ def is_nan
1906
+ super
1907
+ end
1908
+
1909
+ # Returns a boolean Series indicating which values are not NaN.
1910
+ #
1911
+ # @return [Series]
1912
+ #
1913
+ # @example
1914
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1915
+ # s.is_not_nan
1916
+ # # =>
1917
+ # # shape: (4,)
1918
+ # # Series: 'a' [bool]
1919
+ # # [
1920
+ # # true
1921
+ # # true
1922
+ # # true
1923
+ # # false
1924
+ # # ]
1925
+ def is_not_nan
1926
+ super
1927
+ end
1928
+
1929
+ # Check if elements of this Series are in the other Series.
1930
+ #
1931
+ # @return [Series]
1932
+ #
1933
+ # @example
1934
+ # s = Polars::Series.new("a", [1, 2, 3])
1935
+ # s2 = Polars::Series.new("b", [2, 4])
1936
+ # s2.is_in(s)
1937
+ # # =>
1938
+ # # shape: (2,)
1939
+ # # Series: 'b' [bool]
1940
+ # # [
1941
+ # # true
1942
+ # # false
1943
+ # # ]
1944
+ #
1945
+ # @example
1946
+ # sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
1947
+ # # =>
1948
+ # # shape: (3,)
1949
+ # # Series: 'sets' [list[i64]]
1950
+ # # [
1951
+ # # [1, 2, 3]
1952
+ # # [1, 2]
1953
+ # # [9, 10]
1954
+ # # ]
1955
+ #
1956
+ # @example
1957
+ # optional_members = Polars::Series.new("optional_members", [1, 2, 3])
1958
+ # # =>
1959
+ # # shape: (3,)
1960
+ # # Series: 'optional_members' [i64]
1961
+ # # [
1962
+ # # 1
1963
+ # # 2
1964
+ # # 3
1965
+ # # ]
1966
+ #
1967
+ # @example
1968
+ # optional_members.is_in(sets)
1969
+ # # =>
1970
+ # # shape: (3,)
1971
+ # # Series: 'optional_members' [bool]
1972
+ # # [
1973
+ # # true
1974
+ # # true
1975
+ # # false
1976
+ # # ]
1977
+ def is_in(other)
1978
+ super
1979
+ end
1980
+ alias_method :in?, :is_in
1981
+
1982
+ # Get index values where Boolean Series evaluate `true`.
1983
+ #
1984
+ # @return [Series]
1985
+ #
1986
+ # @example
1987
+ # s = Polars::Series.new("a", [1, 2, 3])
1988
+ # (s == 2).arg_true
1989
+ # # =>
1990
+ # # shape: (1,)
1991
+ # # Series: 'a' [u32]
1992
+ # # [
1993
+ # # 1
1994
+ # # ]
1995
+ def arg_true
1996
+ Polars.arg_where(self, eager: true)
1997
+ end
1998
+
1999
+ # Get mask of all unique values.
2000
+ #
2001
+ # @return [Series]
2002
+ #
2003
+ # @example
2004
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2005
+ # s.is_unique
2006
+ # # =>
2007
+ # # shape: (4,)
2008
+ # # Series: 'a' [bool]
2009
+ # # [
2010
+ # # true
2011
+ # # false
2012
+ # # false
2013
+ # # true
2014
+ # # ]
2015
+ def is_unique
2016
+ super
2017
+ end
2018
+
2019
+ # Get a mask of the first unique value.
2020
+ #
2021
+ # @return [Series]
2022
+ def is_first
2023
+ super
2024
+ end
2025
+
2026
+ # Get mask of all duplicated values.
2027
+ #
2028
+ # @return [Series]
2029
+ #
2030
+ # @example
2031
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2032
+ # s.is_duplicated
2033
+ # # =>
2034
+ # # shape: (4,)
2035
+ # # Series: 'a' [bool]
2036
+ # # [
2037
+ # # false
2038
+ # # true
2039
+ # # true
2040
+ # # false
2041
+ # # ]
2042
+ def is_duplicated
2043
+ super
2044
+ end
2045
+
2046
+ # Explode a list or utf8 Series.
2047
+ #
2048
+ # This means that every item is expanded to a new row.
2049
+ #
2050
+ # @return [Series]
2051
+ #
2052
+ # @example
2053
+ # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
2054
+ # s.explode
2055
+ # # =>
2056
+ # # shape: (6,)
2057
+ # # Series: 'a' [i64]
2058
+ # # [
2059
+ # # 1
2060
+ # # 2
2061
+ # # 3
2062
+ # # 4
2063
+ # # 9
2064
+ # # 10
2065
+ # # ]
2066
+ def explode
2067
+ super
2068
+ end
2069
+
2070
+ # Check if series is equal with another Series.
2071
+ #
2072
+ # @param other [Series]
2073
+ # Series to compare with.
2074
+ # @param strict [Boolean]
2075
+ # Require data types to match.
2076
+ # @param check_names [Boolean]
2077
+ # Require names to match.
2078
+ # @param null_equal [Boolean]
2079
+ # Consider null values as equal.
2080
+ #
2081
+ # @return [Boolean]
2082
+ #
2083
+ # @example
2084
+ # s = Polars::Series.new("a", [1, 2, 3])
2085
+ # s2 = Polars::Series.new("b", [4, 5, 6])
2086
+ # s.equals(s)
2087
+ # # => true
2088
+ # s.equals(s2)
2089
+ # # => false
2090
+ def equals(other, strict: false, check_names: false, null_equal: false)
2091
+ _s.equals(other._s, strict, check_names, null_equal)
2092
+ end
2093
+ alias_method :series_equal, :equals
2094
+
2095
+ # Return the number of elements in the Series.
2096
+ #
2097
+ # @return [Integer]
2098
+ #
2099
+ # @example
2100
+ # s = Polars::Series.new("a", [1, 2, nil])
2101
+ # s.count
2102
+ # # => 2
2103
+ def count
2104
+ len - null_count
2105
+ end
2106
+
2107
+ # Return the number of elements in the Series.
2108
+ #
2109
+ # @return [Integer]
2110
+ #
2111
+ # @example
2112
+ # s = Polars::Series.new("a", [1, 2, nil])
2113
+ # s.len
2114
+ # # => 3
2115
+ def len
2116
+ _s.len
2117
+ end
2118
+ alias_method :length, :len
2119
+ alias_method :size, :len
2120
+
2121
+ # Cast between data types.
2122
+ #
2123
+ # @param dtype [Symbol]
2124
+ # DataType to cast to
2125
+ # @param strict [Boolean]
2126
+ # Throw an error if a cast could not be done for instance due to an overflow
2127
+ #
2128
+ # @return [Series]
2129
+ #
2130
+ # @example
2131
+ # s = Polars::Series.new("a", [true, false, true])
2132
+ # s.cast(:u32)
2133
+ # # =>
2134
+ # # shape: (3,)
2135
+ # # Series: 'a' [u32]
2136
+ # # [
2137
+ # # 1
2138
+ # # 0
2139
+ # # 1
2140
+ # # ]
2141
+ def cast(dtype, strict: true)
2142
+ super
2143
+ end
2144
+
2145
+ # Cast to physical representation of the logical dtype.
2146
+ #
2147
+ # - `:date` -> `:i32`
2148
+ # - `:datetime` -> `:i64`
2149
+ # - `:time` -> `:i64`
2150
+ # - `:duration` -> `:i64`
2151
+ # - `:cat` -> `:u32`
2152
+ # - other data types will be left unchanged.
2153
+ #
2154
+ # @return [Series]
2155
+ #
2156
+ # @example
2157
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
2158
+ # s.cast(:cat).to_physical
2159
+ # # =>
2160
+ # # shape: (4,)
2161
+ # # Series: 'values' [u32]
2162
+ # # [
2163
+ # # 0
2164
+ # # null
2165
+ # # 1
2166
+ # # 0
2167
+ # # ]
2168
+ def to_physical
2169
+ super
2170
+ end
2171
+
2172
+ # Convert this Series to a Ruby Array. This operation clones data.
2173
+ #
2174
+ # @return [Array]
2175
+ #
2176
+ # @example
2177
+ # s = Polars::Series.new("a", [1, 2, 3])
2178
+ # s.to_a
2179
+ # # => [1, 2, 3]
2180
+ def to_a
2181
+ _s.to_a
2182
+ end
2183
+
2184
+ # Create a single chunk of memory for this Series.
2185
+ #
2186
+ # @param in_place [Boolean]
2187
+ # In place or not.
2188
+ #
2189
+ # @return [Series]
2190
+ def rechunk(in_place: false)
2191
+ opt_s = _s.rechunk(in_place)
2192
+ in_place ? self : Utils.wrap_s(opt_s)
2193
+ end
2194
+
2195
+ # Return Series in reverse order.
2196
+ #
2197
+ # @return [Series]
2198
+ #
2199
+ # @example
2200
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
2201
+ # s.reverse
2202
+ # # =>
2203
+ # # shape: (3,)
2204
+ # # Series: 'a' [i8]
2205
+ # # [
2206
+ # # 3
2207
+ # # 2
2208
+ # # 1
2209
+ # # ]
2210
+ def reverse
2211
+ super
2212
+ end
2213
+
2214
+ # Check if this Series datatype is numeric.
2215
+ #
2216
+ # @return [Boolean]
2217
+ #
2218
+ # @example
2219
+ # s = Polars::Series.new("a", [1, 2, 3])
2220
+ # s.is_numeric
2221
+ # # => true
2222
+ def is_numeric
2223
+ [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
2224
+ end
2225
+ alias_method :numeric?, :is_numeric
2226
+
2227
+ # Check if this Series datatype is datelike.
2228
+ #
2229
+ # @return [Boolean]
2230
+ #
2231
+ # @example
2232
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
2233
+ # s.is_datelike
2234
+ # # => true
2235
+ def is_datelike
2236
+ [Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
2237
+ end
2238
+ alias_method :datelike?, :is_datelike
2239
+ alias_method :is_temporal, :is_datelike
2240
+ alias_method :temporal?, :is_datelike
2241
+
2242
+ # Check if this Series has floating point numbers.
2243
+ #
2244
+ # @return [Boolean]
2245
+ #
2246
+ # @example
2247
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
2248
+ # s.is_float
2249
+ # # => true
2250
+ def is_float
2251
+ [Float32, Float64].include?(dtype)
2252
+ end
2253
+ alias_method :float?, :is_float
2254
+
2255
+ # Check if this Series is a Boolean.
2256
+ #
2257
+ # @return [Boolean]
2258
+ #
2259
+ # @example
2260
+ # s = Polars::Series.new("a", [true, false, true])
2261
+ # s.is_boolean
2262
+ # # => true
2263
+ def is_boolean
2264
+ dtype == Boolean
2265
+ end
2266
+ alias_method :boolean?, :is_boolean
2267
+ alias_method :is_bool, :is_boolean
2268
+ alias_method :bool?, :is_boolean
2269
+
2270
+ # Check if this Series datatype is a Utf8.
2271
+ #
2272
+ # @return [Boolean]
2273
+ #
2274
+ # @example
2275
+ # s = Polars::Series.new("x", ["a", "b", "c"])
2276
+ # s.is_utf8
2277
+ # # => true
2278
+ def is_utf8
2279
+ dtype == String
2280
+ end
2281
+ alias_method :utf8?, :is_utf8
2282
+
2283
+ # def view
2284
+ # end
2285
+
2286
+ # Convert this Series to a Numo array. This operation clones data but is completely safe.
2287
+ #
2288
+ # @return [Numo::NArray]
2289
+ #
2290
+ # @example
2291
+ # s = Polars::Series.new("a", [1, 2, 3])
2292
+ # s.to_numo
2293
+ # # =>
2294
+ # # Numo::Int64#shape=[3]
2295
+ # # [1, 2, 3]
2296
+ def to_numo
2297
+ if !has_validity
2298
+ if is_datelike
2299
+ Numo::RObject.cast(to_a)
2300
+ elsif is_numeric
2301
+ # TODO make more efficient
2302
+ {
2303
+ UInt8 => Numo::UInt8,
2304
+ UInt16 => Numo::UInt16,
2305
+ UInt32 => Numo::UInt32,
2306
+ UInt64 => Numo::UInt64,
2307
+ Int8 => Numo::Int8,
2308
+ Int16 => Numo::Int16,
2309
+ Int32 => Numo::Int32,
2310
+ Int64 => Numo::Int64,
2311
+ Float32 => Numo::SFloat,
2312
+ Float64 => Numo::DFloat
2313
+ }.fetch(dtype.class).cast(to_a)
2314
+ elsif is_boolean
2315
+ Numo::Bit.cast(to_a)
2316
+ else
2317
+ _s.to_numo
2318
+ end
2319
+ elsif is_datelike
2320
+ Numo::RObject.cast(to_a)
2321
+ else
2322
+ _s.to_numo
2323
+ end
2324
+ end
2325
+
2326
+ # Set masked values.
2327
+ #
2328
+ # @param filter [Series]
2329
+ # Boolean mask.
2330
+ # @param value [Object]
2331
+ # Value with which to replace the masked values.
2332
+ #
2333
+ # @return [Series]
2334
+ #
2335
+ # @note
2336
+ # Use of this function is frequently an anti-pattern, as it can
2337
+ # block optimization (predicate pushdown, etc). Consider using
2338
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
2339
+ #
2340
+ # @example
2341
+ # s = Polars::Series.new("a", [1, 2, 3])
2342
+ # s.set(s == 2, 10)
2343
+ # # =>
2344
+ # # shape: (3,)
2345
+ # # Series: 'a' [i64]
2346
+ # # [
2347
+ # # 1
2348
+ # # 10
2349
+ # # 3
2350
+ # # ]
2351
+ def set(filter, value)
2352
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype.class)}", filter._s, value))
2353
+ end
2354
+
2355
+ # Set values at the index locations.
2356
+ #
2357
+ # @param idx [Object]
2358
+ # Integers representing the index locations.
2359
+ # @param value [Object]
2360
+ # Replacement values.
2361
+ #
2362
+ # @return [Series]
2363
+ #
2364
+ # @example
2365
+ # s = Polars::Series.new("a", [1, 2, 3])
2366
+ # s.set_at_idx(1, 10)
2367
+ # # =>
2368
+ # # shape: (3,)
2369
+ # # Series: 'a' [i64]
2370
+ # # [
2371
+ # # 1
2372
+ # # 10
2373
+ # # 3
2374
+ # # ]
2375
+ def scatter(idx, value)
2376
+ if idx.is_a?(Integer)
2377
+ idx = [idx]
2378
+ end
2379
+ if idx.length == 0
2380
+ return self
2381
+ end
2382
+
2383
+ idx = Series.new("", idx)
2384
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(::String) || value.nil?
2385
+ value = Series.new("", [value])
2386
+
2387
+ # if we need to set more than a single value, we extend it
2388
+ if idx.length > 0
2389
+ value = value.extend_constant(value[0], idx.length - 1)
2390
+ end
2391
+ elsif !value.is_a?(Series)
2392
+ value = Series.new("", value)
2393
+ end
2394
+ _s.scatter(idx._s, value._s)
2395
+ self
2396
+ end
2397
+ alias_method :set_at_idx, :scatter
2398
+
2399
+ # Create an empty copy of the current Series.
2400
+ #
2401
+ # The copy has identical name/dtype but no data.
2402
+ #
2403
+ # @return [Series]
2404
+ #
2405
+ # @example
2406
+ # s = Polars::Series.new("a", [nil, true, false])
2407
+ # s.cleared
2408
+ # # =>
2409
+ # # shape: (0,)
2410
+ # # Series: 'a' [bool]
2411
+ # # [
2412
+ # # ]
2413
+ def cleared
2414
+ len > 0 ? limit(0) : clone
2415
+ end
2416
+
2417
+ # clone handled by initialize_copy
2418
+
2419
+ # Fill floating point NaN value with a fill value.
2420
+ #
2421
+ # @param fill_value [Object]
2422
+ # Value used to fill nan values.
2423
+ #
2424
+ # @return [Series]
2425
+ #
2426
+ # @example
2427
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
2428
+ # s.fill_nan(0)
2429
+ # # =>
2430
+ # # shape: (4,)
2431
+ # # Series: 'a' [f64]
2432
+ # # [
2433
+ # # 1.0
2434
+ # # 2.0
2435
+ # # 3.0
2436
+ # # 0.0
2437
+ # # ]
2438
+ def fill_nan(fill_value)
2439
+ super
2440
+ end
2441
+
2442
+ # Fill null values using the specified value or strategy.
2443
+ #
2444
+ # @param value [Object]
2445
+ # Value used to fill null values.
2446
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2447
+ # Strategy used to fill null values.
2448
+ # @param limit
2449
+ # Number of consecutive null values to fill when using the "forward" or
2450
+ # "backward" strategy.
2451
+ #
2452
+ # @return [Series]
2453
+ #
2454
+ # @example
2455
+ # s = Polars::Series.new("a", [1, 2, 3, nil])
2456
+ # s.fill_null(strategy: "forward")
2457
+ # # =>
2458
+ # # shape: (4,)
2459
+ # # Series: 'a' [i64]
2460
+ # # [
2461
+ # # 1
2462
+ # # 2
2463
+ # # 3
2464
+ # # 3
2465
+ # # ]
2466
+ #
2467
+ # @example
2468
+ # s.fill_null(strategy: "min")
2469
+ # # =>
2470
+ # # shape: (4,)
2471
+ # # Series: 'a' [i64]
2472
+ # # [
2473
+ # # 1
2474
+ # # 2
2475
+ # # 3
2476
+ # # 1
2477
+ # # ]
2478
+ #
2479
+ # @example
2480
+ # s = Polars::Series.new("b", ["x", nil, "z"])
2481
+ # s.fill_null(Polars.lit(""))
2482
+ # # =>
2483
+ # # shape: (3,)
2484
+ # # Series: 'b' [str]
2485
+ # # [
2486
+ # # "x"
2487
+ # # ""
2488
+ # # "z"
2489
+ # # ]
2490
+ def fill_null(value = nil, strategy: nil, limit: nil)
2491
+ super
2492
+ end
2493
+
2494
+ # Rounds down to the nearest integer value.
2495
+ #
2496
+ # Only works on floating point Series.
2497
+ #
2498
+ # @return [Series]
2499
+ #
2500
+ # @example
2501
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2502
+ # s.floor
2503
+ # # =>
2504
+ # # shape: (3,)
2505
+ # # Series: 'a' [f64]
2506
+ # # [
2507
+ # # 1.0
2508
+ # # 2.0
2509
+ # # 3.0
2510
+ # # ]
2511
+ def floor
2512
+ Utils.wrap_s(_s.floor)
2513
+ end
2514
+
2515
+ # Rounds up to the nearest integer value.
2516
+ #
2517
+ # Only works on floating point Series.
2518
+ #
2519
+ # @return [Series]
2520
+ #
2521
+ # @example
2522
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2523
+ # s.ceil
2524
+ # # =>
2525
+ # # shape: (3,)
2526
+ # # Series: 'a' [f64]
2527
+ # # [
2528
+ # # 2.0
2529
+ # # 3.0
2530
+ # # 4.0
2531
+ # # ]
2532
+ def ceil
2533
+ super
2534
+ end
2535
+
2536
+ # Round underlying floating point data by `decimals` digits.
2537
+ #
2538
+ # @param decimals [Integer]
2539
+ # number of decimals to round by.
2540
+ #
2541
+ # @return [Series]
2542
+ #
2543
+ # @example
2544
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
2545
+ # s.round(2)
2546
+ # # =>
2547
+ # # shape: (3,)
2548
+ # # Series: 'a' [f64]
2549
+ # # [
2550
+ # # 1.12
2551
+ # # 2.57
2552
+ # # 3.9
2553
+ # # ]
2554
+ def round(decimals = 0)
2555
+ super
2556
+ end
2557
+
2558
+ # Compute the dot/inner product between two Series.
2559
+ #
2560
+ # @param other [Object]
2561
+ # Series (or array) to compute dot product with.
2562
+ #
2563
+ # @return [Numeric]
2564
+ #
2565
+ # @example
2566
+ # s = Polars::Series.new("a", [1, 2, 3])
2567
+ # s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
2568
+ # s.dot(s2)
2569
+ # # => 32.0
2570
+ def dot(other)
2571
+ if !other.is_a?(Series)
2572
+ other = Series.new(other)
2573
+ end
2574
+ if len != other.len
2575
+ n, m = len, other.len
2576
+ raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
2577
+ end
2578
+ _s.dot(other._s)
2579
+ end
2580
+
2581
+ # Compute the most occurring value(s).
2582
+ #
2583
+ # Can return multiple Values.
2584
+ #
2585
+ # @return [Series]
2586
+ #
2587
+ # @example
2588
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2589
+ # s.mode
2590
+ # # =>
2591
+ # # shape: (1,)
2592
+ # # Series: 'a' [i64]
2593
+ # # [
2594
+ # # 2
2595
+ # # ]
2596
+ def mode
2597
+ super
2598
+ end
2599
+
2600
+ # Compute the element-wise indication of the sign.
2601
+ #
2602
+ # @return [Series]
2603
+ #
2604
+ # @example
2605
+ # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
2606
+ # s.sign
2607
+ # # =>
2608
+ # # shape: (5,)
2609
+ # # Series: 'a' [i64]
2610
+ # # [
2611
+ # # -1
2612
+ # # 0
2613
+ # # 0
2614
+ # # 1
2615
+ # # null
2616
+ # # ]
2617
+ def sign
2618
+ super
2619
+ end
2620
+
2621
+ # Compute the element-wise value for the sine.
2622
+ #
2623
+ # @return [Series]
2624
+ #
2625
+ # @example
2626
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2627
+ # s.sin
2628
+ # # =>
2629
+ # # shape: (3,)
2630
+ # # Series: 'a' [f64]
2631
+ # # [
2632
+ # # 0.0
2633
+ # # 1.0
2634
+ # # 1.2246e-16
2635
+ # # ]
2636
+ def sin
2637
+ super
2638
+ end
2639
+
2640
+ # Compute the element-wise value for the cosine.
2641
+ #
2642
+ # @return [Series]
2643
+ #
2644
+ # @example
2645
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2646
+ # s.cos
2647
+ # # =>
2648
+ # # shape: (3,)
2649
+ # # Series: 'a' [f64]
2650
+ # # [
2651
+ # # 1.0
2652
+ # # 6.1232e-17
2653
+ # # -1.0
2654
+ # # ]
2655
+ def cos
2656
+ super
2657
+ end
2658
+
2659
+ # Compute the element-wise value for the tangent.
2660
+ #
2661
+ # @return [Series]
2662
+ #
2663
+ # @example
2664
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
2665
+ # s.tan
2666
+ # # =>
2667
+ # # shape: (3,)
2668
+ # # Series: 'a' [f64]
2669
+ # # [
2670
+ # # 0.0
2671
+ # # 1.6331e16
2672
+ # # -1.2246e-16
2673
+ # # ]
2674
+ def tan
2675
+ super
2676
+ end
2677
+
2678
+ # Compute the element-wise value for the inverse sine.
2679
+ #
2680
+ # @return [Series]
2681
+ #
2682
+ # @example
2683
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2684
+ # s.arcsin
2685
+ # # =>
2686
+ # # shape: (3,)
2687
+ # # Series: 'a' [f64]
2688
+ # # [
2689
+ # # 1.570796
2690
+ # # 0.0
2691
+ # # -1.570796
2692
+ # # ]
2693
+ def arcsin
2694
+ super
2695
+ end
2696
+ alias_method :asin, :arcsin
2697
+
2698
+ # Compute the element-wise value for the inverse cosine.
2699
+ #
2700
+ # @return [Series]
2701
+ #
2702
+ # @example
2703
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2704
+ # s.arccos
2705
+ # # =>
2706
+ # # shape: (3,)
2707
+ # # Series: 'a' [f64]
2708
+ # # [
2709
+ # # 0.0
2710
+ # # 1.570796
2711
+ # # 3.141593
2712
+ # # ]
2713
+ def arccos
2714
+ super
2715
+ end
2716
+ alias_method :acos, :arccos
2717
+
2718
+ # Compute the element-wise value for the inverse tangent.
2719
+ #
2720
+ # @return [Series]
2721
+ #
2722
+ # @example
2723
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2724
+ # s.arctan
2725
+ # # =>
2726
+ # # shape: (3,)
2727
+ # # Series: 'a' [f64]
2728
+ # # [
2729
+ # # 0.785398
2730
+ # # 0.0
2731
+ # # -0.785398
2732
+ # # ]
2733
+ def arctan
2734
+ super
2735
+ end
2736
+ alias_method :atan, :arctan
2737
+
2738
+ # Compute the element-wise value for the inverse hyperbolic sine.
2739
+ #
2740
+ # @return [Series]
2741
+ #
2742
+ # @example
2743
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2744
+ # s.arcsinh
2745
+ # # =>
2746
+ # # shape: (3,)
2747
+ # # Series: 'a' [f64]
2748
+ # # [
2749
+ # # 0.881374
2750
+ # # 0.0
2751
+ # # -0.881374
2752
+ # # ]
2753
+ def arcsinh
2754
+ super
2755
+ end
2756
+ alias_method :asinh, :arcsinh
2757
+
2758
+ # Compute the element-wise value for the inverse hyperbolic cosine.
2759
+ #
2760
+ # @return [Series]
2761
+ #
2762
+ # @example
2763
+ # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
2764
+ # s.arccosh
2765
+ # # =>
2766
+ # # shape: (4,)
2767
+ # # Series: 'a' [f64]
2768
+ # # [
2769
+ # # 2.292432
2770
+ # # 0.0
2771
+ # # NaN
2772
+ # # NaN
2773
+ # # ]
2774
+ def arccosh
2775
+ super
2776
+ end
2777
+ alias_method :acosh, :arccosh
2778
+
2779
+ # Compute the element-wise value for the inverse hyperbolic tangent.
2780
+ #
2781
+ # @return [Series]
2782
+ #
2783
+ # @example
2784
+ # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
2785
+ # s.arctanh
2786
+ # # =>
2787
+ # # shape: (7,)
2788
+ # # Series: 'a' [f64]
2789
+ # # [
2790
+ # # NaN
2791
+ # # inf
2792
+ # # 0.549306
2793
+ # # 0.0
2794
+ # # -0.549306
2795
+ # # -inf
2796
+ # # NaN
2797
+ # # ]
2798
+ def arctanh
2799
+ super
2800
+ end
2801
+ alias_method :atanh, :arctanh
2802
+
2803
+ # Compute the element-wise value for the hyperbolic sine.
2804
+ #
2805
+ # @return [Series]
2806
+ #
2807
+ # @example
2808
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2809
+ # s.sinh
2810
+ # # =>
2811
+ # # shape: (3,)
2812
+ # # Series: 'a' [f64]
2813
+ # # [
2814
+ # # 1.175201
2815
+ # # 0.0
2816
+ # # -1.175201
2817
+ # # ]
2818
+ def sinh
2819
+ super
2820
+ end
2821
+
2822
+ # Compute the element-wise value for the hyperbolic cosine.
2823
+ #
2824
+ # @return [Series]
2825
+ #
2826
+ # @example
2827
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2828
+ # s.cosh
2829
+ # # =>
2830
+ # # shape: (3,)
2831
+ # # Series: 'a' [f64]
2832
+ # # [
2833
+ # # 1.543081
2834
+ # # 1.0
2835
+ # # 1.543081
2836
+ # # ]
2837
+ def cosh
2838
+ super
2839
+ end
2840
+
2841
+ # Compute the element-wise value for the hyperbolic tangent.
2842
+ #
2843
+ # @return [Series]
2844
+ #
2845
+ # @example
2846
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2847
+ # s.tanh
2848
+ # # =>
2849
+ # # shape: (3,)
2850
+ # # Series: 'a' [f64]
2851
+ # # [
2852
+ # # 0.761594
2853
+ # # 0.0
2854
+ # # -0.761594
2855
+ # # ]
2856
+ def tanh
2857
+ super
2858
+ end
2859
+
2860
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
2861
+ # return a new Series.
2862
+ #
2863
+ # If the function returns another datatype, the return_dtype arg should be set,
2864
+ # otherwise the method will fail.
2865
+ #
2866
+ # @param return_dtype [Symbol]
2867
+ # Output datatype. If none is given, the same datatype as this Series will be
2868
+ # used.
2869
+ # @param skip_nulls [Boolean]
2870
+ # Nulls will be skipped and not passed to the Ruby function.
2871
+ # This is faster because Ruby can be skipped and because we call
2872
+ # more specialized functions.
2873
+ #
2874
+ # @return [Series]
2875
+ #
2876
+ # @example
2877
+ # s = Polars::Series.new("a", [1, 2, 3])
2878
+ # s.map_elements { |x| x + 10 }
2879
+ # # =>
2880
+ # # shape: (3,)
2881
+ # # Series: 'a' [i64]
2882
+ # # [
2883
+ # # 11
2884
+ # # 12
2885
+ # # 13
2886
+ # # ]
2887
+ def map_elements(return_dtype: nil, skip_nulls: true, &func)
2888
+ if return_dtype.nil?
2889
+ pl_return_dtype = nil
2890
+ else
2891
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
2892
+ end
2893
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2894
+ end
2895
+ alias_method :map, :map_elements
2896
+ alias_method :apply, :map_elements
2897
+
2898
+ # Shift the values by a given period.
2899
+ #
2900
+ # @param periods [Integer]
2901
+ # Number of places to shift (may be negative).
2902
+ #
2903
+ # @return [Series]
2904
+ #
2905
+ # @example
2906
+ # s = Polars::Series.new("a", [1, 2, 3])
2907
+ # s.shift(1)
2908
+ # # =>
2909
+ # # shape: (3,)
2910
+ # # Series: 'a' [i64]
2911
+ # # [
2912
+ # # null
2913
+ # # 1
2914
+ # # 2
2915
+ # # ]
2916
+ #
2917
+ # @example
2918
+ # s.shift(-1)
2919
+ # # =>
2920
+ # # shape: (3,)
2921
+ # # Series: 'a' [i64]
2922
+ # # [
2923
+ # # 2
2924
+ # # 3
2925
+ # # null
2926
+ # # ]
2927
+ def shift(periods = 1)
2928
+ super
2929
+ end
2930
+
2931
+ # Shift the values by a given period and fill the resulting null values.
2932
+ #
2933
+ # @param periods [Integer]
2934
+ # Number of places to shift (may be negative).
2935
+ # @param fill_value [Object]
2936
+ # Fill None values with the result of this expression.
2937
+ #
2938
+ # @return [Series]
2939
+ def shift_and_fill(periods, fill_value)
2940
+ super
2941
+ end
2942
+
2943
+ # Take values from self or other based on the given mask.
2944
+ #
2945
+ # Where mask evaluates true, take values from self. Where mask evaluates false,
2946
+ # take values from other.
2947
+ #
2948
+ # @param mask [Series]
2949
+ # Boolean Series.
2950
+ # @param other [Series]
2951
+ # Series of same type.
2952
+ #
2953
+ # @return [Series]
2954
+ #
2955
+ # @example
2956
+ # s1 = Polars::Series.new([1, 2, 3, 4, 5])
2957
+ # s2 = Polars::Series.new([5, 4, 3, 2, 1])
2958
+ # s1.zip_with(s1 < s2, s2)
2959
+ # # =>
2960
+ # # shape: (5,)
2961
+ # # Series: '' [i64]
2962
+ # # [
2963
+ # # 1
2964
+ # # 2
2965
+ # # 3
2966
+ # # 2
2967
+ # # 1
2968
+ # # ]
2969
+ #
2970
+ # @example
2971
+ # mask = Polars::Series.new([true, false, true, false, true])
2972
+ # s1.zip_with(mask, s2)
2973
+ # # =>
2974
+ # # shape: (5,)
2975
+ # # Series: '' [i64]
2976
+ # # [
2977
+ # # 1
2978
+ # # 4
2979
+ # # 3
2980
+ # # 2
2981
+ # # 5
2982
+ # # ]
2983
+ def zip_with(mask, other)
2984
+ Utils.wrap_s(_s.zip_with(mask._s, other._s))
2985
+ end
2986
+
2987
+ # Apply a rolling min (moving min) over the values in this array.
2988
+ #
2989
+ # A window of length `window_size` will traverse the array. The values that fill
2990
+ # this window will (optionally) be multiplied with the weights given by the
2991
+ # `weight` vector. The resulting values will be aggregated to their sum.
2992
+ #
2993
+ # @param window_size [Integer]
2994
+ # The length of the window.
2995
+ # @param weights [Array]
2996
+ # An optional slice with the same length as the window that will be multiplied
2997
+ # elementwise with the values in the window.
2998
+ # @param min_periods [Integer]
2999
+ # The number of values in the window that should be non-null before computing
3000
+ # a result. If None, it will be set equal to window size.
3001
+ # @param center [Boolean]
3002
+ # Set the labels at the center of the window
3003
+ #
3004
+ # @return [Series]
3005
+ #
3006
+ # @example
3007
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3008
+ # s.rolling_min(3)
3009
+ # # =>
3010
+ # # shape: (5,)
3011
+ # # Series: 'a' [i64]
3012
+ # # [
3013
+ # # null
3014
+ # # null
3015
+ # # 100
3016
+ # # 200
3017
+ # # 300
3018
+ # # ]
3019
+ def rolling_min(
3020
+ window_size,
3021
+ weights: nil,
3022
+ min_periods: nil,
3023
+ center: false
3024
+ )
3025
+ super
3026
+ end
3027
+
3028
+ # Apply a rolling max (moving max) over the values in this array.
3029
+ #
3030
+ # A window of length `window_size` will traverse the array. The values that fill
3031
+ # this window will (optionally) be multiplied with the weights given by the
3032
+ # `weight` vector. The resulting values will be aggregated to their sum.
3033
+ #
3034
+ # @param window_size [Integer]
3035
+ # The length of the window.
3036
+ # @param weights [Array]
3037
+ # An optional slice with the same length as the window that will be multiplied
3038
+ # elementwise with the values in the window.
3039
+ # @param min_periods [Integer]
3040
+ # The number of values in the window that should be non-null before computing
3041
+ # a result. If None, it will be set equal to window size.
3042
+ # @param center [Boolean]
3043
+ # Set the labels at the center of the window
3044
+ #
3045
+ # @return [Series]
3046
+ #
3047
+ # @example
3048
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3049
+ # s.rolling_max(2)
3050
+ # # =>
3051
+ # # shape: (5,)
3052
+ # # Series: 'a' [i64]
3053
+ # # [
3054
+ # # null
3055
+ # # 200
3056
+ # # 300
3057
+ # # 400
3058
+ # # 500
3059
+ # # ]
3060
+ def rolling_max(
3061
+ window_size,
3062
+ weights: nil,
3063
+ min_periods: nil,
3064
+ center: false
3065
+ )
3066
+ super
3067
+ end
3068
+
3069
+ # Apply a rolling mean (moving mean) over the values in this array.
3070
+ #
3071
+ # A window of length `window_size` will traverse the array. The values that fill
3072
+ # this window will (optionally) be multiplied with the weights given by the
3073
+ # `weight` vector. The resulting values will be aggregated to their sum.
3074
+ #
3075
+ # @param window_size [Integer]
3076
+ # The length of the window.
3077
+ # @param weights [Array]
3078
+ # An optional slice with the same length as the window that will be multiplied
3079
+ # elementwise with the values in the window.
3080
+ # @param min_periods [Integer]
3081
+ # The number of values in the window that should be non-null before computing
3082
+ # a result. If None, it will be set equal to window size.
3083
+ # @param center [Boolean]
3084
+ # Set the labels at the center of the window
3085
+ #
3086
+ # @return [Series]
3087
+ #
3088
+ # @example
3089
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
3090
+ # s.rolling_mean(2)
3091
+ # # =>
3092
+ # # shape: (5,)
3093
+ # # Series: 'a' [f64]
3094
+ # # [
3095
+ # # null
3096
+ # # 150.0
3097
+ # # 250.0
3098
+ # # 350.0
3099
+ # # 450.0
3100
+ # # ]
3101
+ def rolling_mean(
3102
+ window_size,
3103
+ weights: nil,
3104
+ min_periods: nil,
3105
+ center: false
3106
+ )
3107
+ super
3108
+ end
3109
+
3110
+ # Apply a rolling sum (moving sum) over the values in this array.
3111
+ #
3112
+ # A window of length `window_size` will traverse the array. The values that fill
3113
+ # this window will (optionally) be multiplied with the weights given by the
3114
+ # `weight` vector. The resulting values will be aggregated to their sum.
3115
+ #
3116
+ # @param window_size [Integer]
3117
+ # The length of the window.
3118
+ # @param weights [Array]
3119
+ # An optional slice with the same length as the window that will be multiplied
3120
+ # elementwise with the values in the window.
3121
+ # @param min_periods [Integer]
3122
+ # The number of values in the window that should be non-null before computing
3123
+ # a result. If None, it will be set equal to window size.
3124
+ # @param center [Boolean]
3125
+ # Set the labels at the center of the window
3126
+ #
3127
+ # @return [Series]
3128
+ #
3129
+ # @example
3130
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3131
+ # s.rolling_sum(2)
3132
+ # # =>
3133
+ # # shape: (5,)
3134
+ # # Series: 'a' [i64]
3135
+ # # [
3136
+ # # null
3137
+ # # 3
3138
+ # # 5
3139
+ # # 7
3140
+ # # 9
3141
+ # # ]
3142
+ def rolling_sum(
3143
+ window_size,
3144
+ weights: nil,
3145
+ min_periods: nil,
3146
+ center: false
3147
+ )
3148
+ super
3149
+ end
3150
+
3151
+ # Compute a rolling std dev.
3152
+ #
3153
+ # A window of length `window_size` will traverse the array. The values that fill
3154
+ # this window will (optionally) be multiplied with the weights given by the
3155
+ # `weight` vector. The resulting values will be aggregated to their sum.
3156
+ #
3157
+ # @param window_size [Integer]
3158
+ # The length of the window.
3159
+ # @param weights [Array]
3160
+ # An optional slice with the same length as the window that will be multiplied
3161
+ # elementwise with the values in the window.
3162
+ # @param min_periods [Integer]
3163
+ # The number of values in the window that should be non-null before computing
3164
+ # a result. If None, it will be set equal to window size.
3165
+ # @param center [Boolean]
3166
+ # Set the labels at the center of the window
3167
+ #
3168
+ # @return [Series]
3169
+ #
3170
+ # @example
3171
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3172
+ # s.rolling_std(3)
3173
+ # # =>
3174
+ # # shape: (6,)
3175
+ # # Series: 'a' [f64]
3176
+ # # [
3177
+ # # null
3178
+ # # null
3179
+ # # 1.0
3180
+ # # 1.0
3181
+ # # 1.527525
3182
+ # # 2.0
3183
+ # # ]
3184
+ def rolling_std(
3185
+ window_size,
3186
+ weights: nil,
3187
+ min_periods: nil,
3188
+ center: false,
3189
+ ddof: 1
3190
+ )
3191
+ super
3192
+ end
3193
+
3194
+ # Compute a rolling variance.
3195
+ #
3196
+ # A window of length `window_size` will traverse the array. The values that fill
3197
+ # this window will (optionally) be multiplied with the weights given by the
3198
+ # `weight` vector. The resulting values will be aggregated to their sum.
3199
+ #
3200
+ # @param window_size [Integer]
3201
+ # The length of the window.
3202
+ # @param weights [Array]
3203
+ # An optional slice with the same length as the window that will be multiplied
3204
+ # elementwise with the values in the window.
3205
+ # @param min_periods [Integer]
3206
+ # The number of values in the window that should be non-null before computing
3207
+ # a result. If None, it will be set equal to window size.
3208
+ # @param center [Boolean]
3209
+ # Set the labels at the center of the window
3210
+ #
3211
+ # @return [Series]
3212
+ #
3213
+ # @example
3214
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3215
+ # s.rolling_var(3)
3216
+ # # =>
3217
+ # # shape: (6,)
3218
+ # # Series: 'a' [f64]
3219
+ # # [
3220
+ # # null
3221
+ # # null
3222
+ # # 1.0
3223
+ # # 1.0
3224
+ # # 2.333333
3225
+ # # 4.0
3226
+ # # ]
3227
+ def rolling_var(
3228
+ window_size,
3229
+ weights: nil,
3230
+ min_periods: nil,
3231
+ center: false,
3232
+ ddof: 1
3233
+ )
3234
+ super
3235
+ end
3236
+
3237
+ # def rolling_apply
3238
+ # end
3239
+
3240
+ # Compute a rolling median.
3241
+ #
3242
+ # @param window_size [Integer]
3243
+ # The length of the window.
3244
+ # @param weights [Array]
3245
+ # An optional slice with the same length as the window that will be multiplied
3246
+ # elementwise with the values in the window.
3247
+ # @param min_periods [Integer]
3248
+ # The number of values in the window that should be non-null before computing
3249
+ # a result. If None, it will be set equal to window size.
3250
+ # @param center [Boolean]
3251
+ # Set the labels at the center of the window
3252
+ #
3253
+ # @return [Series]
3254
+ #
3255
+ # @example
3256
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3257
+ # s.rolling_median(3)
3258
+ # # =>
3259
+ # # shape: (6,)
3260
+ # # Series: 'a' [f64]
3261
+ # # [
3262
+ # # null
3263
+ # # null
3264
+ # # 2.0
3265
+ # # 3.0
3266
+ # # 4.0
3267
+ # # 6.0
3268
+ # # ]
3269
+ def rolling_median(
3270
+ window_size,
3271
+ weights: nil,
3272
+ min_periods: nil,
3273
+ center: false
3274
+ )
3275
+ super
3276
+ end
3277
+
3278
+ # Compute a rolling quantile.
3279
+ #
3280
+ # @param quantile [Float]
3281
+ # Quantile between 0.0 and 1.0.
3282
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
3283
+ # Interpolation method.
3284
+ # @param window_size [Integer]
3285
+ # The length of the window.
3286
+ # @param weights [Array]
3287
+ # An optional slice with the same length as the window that will be multiplied
3288
+ # elementwise with the values in the window.
3289
+ # @param min_periods [Integer]
3290
+ # The number of values in the window that should be non-null before computing
3291
+ # a result. If None, it will be set equal to window size.
3292
+ # @param center [Boolean]
3293
+ # Set the labels at the center of the window
3294
+ #
3295
+ # @return [Series]
3296
+ #
3297
+ # @example
3298
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3299
+ # s.rolling_quantile(0.33, window_size: 3)
3300
+ # # =>
3301
+ # # shape: (6,)
3302
+ # # Series: 'a' [f64]
3303
+ # # [
3304
+ # # null
3305
+ # # null
3306
+ # # 1.0
3307
+ # # 2.0
3308
+ # # 3.0
3309
+ # # 4.0
3310
+ # # ]
3311
+ #
3312
+ # @example
3313
+ # s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
3314
+ # # =>
3315
+ # # shape: (6,)
3316
+ # # Series: 'a' [f64]
3317
+ # # [
3318
+ # # null
3319
+ # # null
3320
+ # # 1.66
3321
+ # # 2.66
3322
+ # # 3.66
3323
+ # # 5.32
3324
+ # # ]
3325
+ def rolling_quantile(
3326
+ quantile,
3327
+ interpolation: "nearest",
3328
+ window_size: 2,
3329
+ weights: nil,
3330
+ min_periods: nil,
3331
+ center: false
3332
+ )
3333
+ super
3334
+ end
3335
+
3336
+ # Compute a rolling skew.
3337
+ #
3338
+ # @param window_size [Integer]
3339
+ # Integer size of the rolling window.
3340
+ # @param bias [Boolean]
3341
+ # If false, the calculations are corrected for statistical bias.
3342
+ #
3343
+ # @return [Series]
3344
+ #
3345
+ # @example
3346
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3347
+ # s.rolling_skew(3)
3348
+ # # =>
3349
+ # # shape: (6,)
3350
+ # # Series: 'a' [f64]
3351
+ # # [
3352
+ # # null
3353
+ # # null
3354
+ # # 0.0
3355
+ # # 0.0
3356
+ # # 0.381802
3357
+ # # 0.0
3358
+ # # ]
3359
+ def rolling_skew(window_size, bias: true)
3360
+ super
3361
+ end
3362
+
3363
+ # Sample from this Series.
3364
+ #
3365
+ # @param n [Integer]
3366
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
3367
+ # `frac` is None.
3368
+ # @param frac [Float]
3369
+ # Fraction of items to return. Cannot be used with `n`.
3370
+ # @param with_replacement [Boolean]
3371
+ # Allow values to be sampled more than once.
3372
+ # @param shuffle [Boolean]
3373
+ # Shuffle the order of sampled data points.
3374
+ # @param seed [Integer]
3375
+ # Seed for the random number generator. If set to None (default), a random
3376
+ # seed is used.
3377
+ #
3378
+ # @return [Series]
3379
+ #
3380
+ # @example
3381
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3382
+ # s.sample(n: 2, seed: 0)
3383
+ # # =>
3384
+ # # shape: (2,)
3385
+ # # Series: 'a' [i64]
3386
+ # # [
3387
+ # # 5
3388
+ # # 3
3389
+ # # ]
3390
+ def sample(
3391
+ n: nil,
3392
+ frac: nil,
3393
+ with_replacement: false,
3394
+ shuffle: false,
3395
+ seed: nil
3396
+ )
3397
+ if !n.nil? && !frac.nil?
3398
+ raise ArgumentError, "cannot specify both `n` and `frac`"
3399
+ end
3400
+
3401
+ if n.nil? && !frac.nil?
3402
+ return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
3403
+ end
3404
+
3405
+ if n.nil?
3406
+ n = 1
3407
+ end
3408
+ Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
3409
+ end
3410
+
3411
+ # Get a boolean mask of the local maximum peaks.
3412
+ #
3413
+ # @return [Series]
3414
+ #
3415
+ # @example
3416
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
3417
+ # s.peak_max
3418
+ # # =>
3419
+ # # shape: (5,)
3420
+ # # Series: 'a' [bool]
3421
+ # # [
3422
+ # # false
3423
+ # # false
3424
+ # # false
3425
+ # # false
3426
+ # # true
3427
+ # # ]
3428
+ def peak_max
3429
+ super
3430
+ end
3431
+
3432
+ # Get a boolean mask of the local minimum peaks.
3433
+ #
3434
+ # @return [Series]
3435
+ #
3436
+ # @example
3437
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
3438
+ # s.peak_min
3439
+ # # =>
3440
+ # # shape: (5,)
3441
+ # # Series: 'a' [bool]
3442
+ # # [
3443
+ # # false
3444
+ # # true
3445
+ # # false
3446
+ # # true
3447
+ # # false
3448
+ # # ]
3449
+ def peak_min
3450
+ super
3451
+ end
3452
+
3453
+ # Count the number of unique values in this Series.
3454
+ #
3455
+ # @return [Integer]
3456
+ #
3457
+ # @example
3458
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
3459
+ # s.n_unique
3460
+ # # => 3
3461
+ def n_unique
3462
+ _s.n_unique
3463
+ end
3464
+
3465
+ # Shrink Series memory usage.
3466
+ #
3467
+ # Shrinks the underlying array capacity to exactly fit the actual data.
3468
+ # (Note that this function does not change the Series data type).
3469
+ #
3470
+ # @return [Series]
3471
+ def shrink_to_fit(in_place: false)
3472
+ if in_place
3473
+ _s.shrink_to_fit
3474
+ self
3475
+ else
3476
+ series = clone
3477
+ series._s.shrink_to_fit
3478
+ series
3479
+ end
3480
+ end
3481
+
3482
+ # Hash the Series.
3483
+ #
3484
+ # The hash value is of type `:u64`.
3485
+ #
3486
+ # @param seed [Integer]
3487
+ # Random seed parameter. Defaults to 0.
3488
+ # @param seed_1 [Integer]
3489
+ # Random seed parameter. Defaults to `seed` if not set.
3490
+ # @param seed_2 [Integer]
3491
+ # Random seed parameter. Defaults to `seed` if not set.
3492
+ # @param seed_3 [Integer]
3493
+ # Random seed parameter. Defaults to `seed` if not set.
3494
+ #
3495
+ # @return [Series]
3496
+ #
3497
+ # @example
3498
+ # s = Polars::Series.new("a", [1, 2, 3])
3499
+ # s._hash(42)
3500
+ # # =>
3501
+ # # shape: (3,)
3502
+ # # Series: 'a' [u64]
3503
+ # # [
3504
+ # # 2374023516666777365
3505
+ # # 10386026231460783898
3506
+ # # 17796317186427479491
3507
+ # # ]
3508
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3509
+ super
3510
+ end
3511
+
3512
+ # Reinterpret the underlying bits as a signed/unsigned integer.
3513
+ #
3514
+ # This operation is only allowed for 64bit integers. For lower bits integers,
3515
+ # you can safely use that cast operation.
3516
+ #
3517
+ # @param signed [Boolean]
3518
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
3519
+ #
3520
+ # @return [Series]
3521
+ def reinterpret(signed: true)
3522
+ super
3523
+ end
3524
+
3525
+ # Interpolate intermediate values. The interpolation method is linear.
3526
+ #
3527
+ # @return [Series]
3528
+ #
3529
+ # @example
3530
+ # s = Polars::Series.new("a", [1, 2, nil, nil, 5])
3531
+ # s.interpolate
3532
+ # # =>
3533
+ # # shape: (5,)
3534
+ # # Series: 'a' [f64]
3535
+ # # [
3536
+ # # 1.0
3537
+ # # 2.0
3538
+ # # 3.0
3539
+ # # 4.0
3540
+ # # 5.0
3541
+ # # ]
3542
+ def interpolate(method: "linear")
3543
+ super
3544
+ end
3545
+
3546
+ # Compute absolute values.
3547
+ #
3548
+ # @return [Series]
3549
+ def abs
3550
+ super
3551
+ end
3552
+
3553
+ # Assign ranks to data, dealing with ties appropriately.
3554
+ #
3555
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3556
+ # The method used to assign ranks to tied elements.
3557
+ # The following methods are available (default is 'average'):
3558
+ #
3559
+ # - 'average' : The average of the ranks that would have been assigned to
3560
+ # all the tied values is assigned to each value.
3561
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3562
+ # the tied values is assigned to each value. (This is also referred to
3563
+ # as "competition" ranking.)
3564
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3565
+ # the tied values is assigned to each value.
3566
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3567
+ # assigned the rank immediately after those assigned to the tied
3568
+ # elements.
3569
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3570
+ # the order that the values occur in the Series.
3571
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3572
+ # on the order that the values occur in the Series.
3573
+ # @param reverse [Boolean]
3574
+ # Reverse the operation.
3575
+ # @param seed [Integer]
3576
+ # If `method: "random"`, use this as seed.
3577
+ #
3578
+ # @return [Series]
3579
+ #
3580
+ # @example The 'average' method:
3581
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3582
+ # s.rank
3583
+ # # =>
3584
+ # # shape: (5,)
3585
+ # # Series: 'a' [f64]
3586
+ # # [
3587
+ # # 3.0
3588
+ # # 4.5
3589
+ # # 1.5
3590
+ # # 1.5
3591
+ # # 4.5
3592
+ # # ]
3593
+ #
3594
+ # @example The 'ordinal' method:
3595
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3596
+ # s.rank(method: "ordinal")
3597
+ # # =>
3598
+ # # shape: (5,)
3599
+ # # Series: 'a' [u32]
3600
+ # # [
3601
+ # # 3
3602
+ # # 4
3603
+ # # 1
3604
+ # # 2
3605
+ # # 5
3606
+ # # ]
3607
+ def rank(method: "average", reverse: false, seed: nil)
3608
+ super
3609
+ end
3610
+
3611
+ # Calculate the n-th discrete difference.
3612
+ #
3613
+ # @param n [Integer]
3614
+ # Number of slots to shift.
3615
+ # @param null_behavior ["ignore", "drop"]
3616
+ # How to handle null values.
3617
+ #
3618
+ # @return [Series]
3619
+ def diff(n: 1, null_behavior: "ignore")
3620
+ super
3621
+ end
3622
+
3623
+ # Computes percentage change between values.
3624
+ #
3625
+ # Percentage change (as fraction) between current element and most-recent
3626
+ # non-null element at least `n` period(s) before the current element.
3627
+ #
3628
+ # Computes the change from the previous row by default.
3629
+ #
3630
+ # @param n [Integer]
3631
+ # periods to shift for forming percent change.
3632
+ #
3633
+ # @return [Series]
3634
+ #
3635
+ # @example
3636
+ # Polars::Series.new(0..9).pct_change
3637
+ # # =>
3638
+ # # shape: (10,)
3639
+ # # Series: '' [f64]
3640
+ # # [
3641
+ # # null
3642
+ # # inf
3643
+ # # 1.0
3644
+ # # 0.5
3645
+ # # 0.333333
3646
+ # # 0.25
3647
+ # # 0.2
3648
+ # # 0.166667
3649
+ # # 0.142857
3650
+ # # 0.125
3651
+ # # ]
3652
+ #
3653
+ # @example
3654
+ # Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
3655
+ # # =>
3656
+ # # shape: (10,)
3657
+ # # Series: '' [f64]
3658
+ # # [
3659
+ # # null
3660
+ # # null
3661
+ # # 3.0
3662
+ # # 3.0
3663
+ # # 3.0
3664
+ # # 3.0
3665
+ # # 3.0
3666
+ # # 3.0
3667
+ # # 3.0
3668
+ # # 3.0
3669
+ # # ]
3670
+ def pct_change(n: 1)
3671
+ super
3672
+ end
3673
+
3674
+ # Compute the sample skewness of a data set.
3675
+ #
3676
+ # For normally distributed data, the skewness should be about zero. For
3677
+ # unimodal continuous distributions, a skewness value greater than zero means
3678
+ # that there is more weight in the right tail of the distribution. The
3679
+ # function `skewtest` can be used to determine if the skewness value
3680
+ # is close enough to zero, statistically speaking.
3681
+ #
3682
+ # @param bias [Boolean]
3683
+ # If `false`, the calculations are corrected for statistical bias.
3684
+ #
3685
+ # @return [Float, nil]
3686
+ def skew(bias: true)
3687
+ _s.skew(bias)
3688
+ end
3689
+
3690
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
3691
+ #
3692
+ # Kurtosis is the fourth central moment divided by the square of the
3693
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
3694
+ # the result to give 0.0 for a normal distribution.
3695
+ # If bias is false, then the kurtosis is calculated using k statistics to
3696
+ # eliminate bias coming from biased moment estimators
3697
+ #
3698
+ # @param fisher [Boolean]
3699
+ # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
3700
+ # Pearson's definition is used (normal ==> 3.0).
3701
+ # @param bias [Boolean]
3702
+ # If `false`, the calculations are corrected for statistical bias.
3703
+ #
3704
+ # @return [Float, nil]
3705
+ def kurtosis(fisher: true, bias: true)
3706
+ _s.kurtosis(fisher, bias)
3707
+ end
3708
+
3709
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
3710
+ #
3711
+ # Only works for numerical types.
3712
+ #
3713
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3714
+ # expression. See {#when} for more information.
3715
+ #
3716
+ # @param min_val [Numeric]
3717
+ # Minimum value.
3718
+ # @param max_val [Numeric]
3719
+ # Maximum value.
3720
+ #
3721
+ # @return [Series]
3722
+ #
3723
+ # @example
3724
+ # s = Polars::Series.new("foo", [-50, 5, nil, 50])
3725
+ # s.clip(1, 10)
3726
+ # # =>
3727
+ # # shape: (4,)
3728
+ # # Series: 'foo' [i64]
3729
+ # # [
3730
+ # # 1
3731
+ # # 5
3732
+ # # null
3733
+ # # 10
3734
+ # # ]
3735
+ def clip(min_val = nil, max_val = nil)
3736
+ super
3737
+ end
3738
+
3739
+ # Clip (limit) the values in an array to a `min` boundary.
3740
+ #
3741
+ # Only works for numerical types.
3742
+ #
3743
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3744
+ # expression. See {#when} for more information.
3745
+ #
3746
+ # @param min_val [Numeric]
3747
+ # Minimum value.
3748
+ #
3749
+ # @return [Series]
3750
+ def clip_min(min_val)
3751
+ super
3752
+ end
3753
+
3754
+ # Clip (limit) the values in an array to a `max` boundary.
3755
+ #
3756
+ # Only works for numerical types.
3757
+ #
3758
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
3759
+ # expression. See {#when} for more information.
3760
+ #
3761
+ # @param max_val [Numeric]
3762
+ # Maximum value.
3763
+ #
3764
+ # @return [Series]
3765
+ def clip_max(max_val)
3766
+ super
3767
+ end
3768
+
3769
+ # Replace values by different values.
3770
+ #
3771
+ # @param old [Object]
3772
+ # Value or sequence of values to replace.
3773
+ # Also accepts a mapping of values to their replacement.
3774
+ # @param new [Object]
3775
+ # Value or sequence of values to replace by.
3776
+ # Length must match the length of `old` or have length 1.
3777
+ # @param default [Object]
3778
+ # Set values that were not replaced to this value.
3779
+ # Defaults to keeping the original value.
3780
+ # Accepts expression input. Non-expression inputs are parsed as literals.
3781
+ # @param return_dtype [Object]
3782
+ # The data type of the resulting Series. If set to `nil` (default),
3783
+ # the data type is determined automatically based on the other inputs.
3784
+ #
3785
+ # @return [Series]
3786
+ #
3787
+ # @example Replace a single value by another value. Values that were not replaced remain unchanged.
3788
+ # s = Polars::Series.new([1, 2, 2, 3])
3789
+ # s.replace(2, 100)
3790
+ # # =>
3791
+ # # shape: (4,)
3792
+ # # Series: '' [i64]
3793
+ # # [
3794
+ # # 1
3795
+ # # 100
3796
+ # # 100
3797
+ # # 3
3798
+ # # ]
3799
+ #
3800
+ # @example Replace multiple values by passing sequences to the `old` and `new` parameters.
3801
+ # s.replace([2, 3], [100, 200])
3802
+ # # =>
3803
+ # # shape: (4,)
3804
+ # # Series: '' [i64]
3805
+ # # [
3806
+ # # 1
3807
+ # # 100
3808
+ # # 100
3809
+ # # 200
3810
+ # # ]
3811
+ #
3812
+ # @example Passing a mapping with replacements is also supported as syntactic sugar.
3813
+ # mapping = {2 => 100, 3 => 200}
3814
+ # s.replace(mapping)
3815
+ # # =>
3816
+ # # shape: (4,)
3817
+ # # Series: '' [i64]
3818
+ # # [
3819
+ # # 1
3820
+ # # 100
3821
+ # # 100
3822
+ # # 200
3823
+ # # ]
3824
+ #
3825
+ # @example The original data type is preserved when replacing by values of a different data type.
3826
+ # s = Polars::Series.new(["x", "y", "z"])
3827
+ # mapping = {"x" => 1, "y" => 2, "z" => 3}
3828
+ # s.replace(mapping)
3829
+ # # =>
3830
+ # # shape: (3,)
3831
+ # # Series: '' [str]
3832
+ # # [
3833
+ # # "1"
3834
+ # # "2"
3835
+ # # "3"
3836
+ # # ]
3837
+ def replace(old, new = Expr::NO_DEFAULT, default: Expr::NO_DEFAULT, return_dtype: nil)
3838
+ super
3839
+ end
3840
+
3841
+ # Reshape this Series to a flat Series or a Series of Lists.
3842
+ #
3843
+ # @param dims [Array]
3844
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
3845
+ # dimension is inferred.
3846
+ #
3847
+ # @return [Series]
3848
+ def reshape(dims)
3849
+ super
3850
+ end
3851
+
3852
+ # Shuffle the contents of this Series.
3853
+ #
3854
+ # @param seed [Integer, nil]
3855
+ # Seed for the random number generator.
3856
+ #
3857
+ # @return [Series]
3858
+ #
3859
+ # @example
3860
+ # s = Polars::Series.new("a", [1, 2, 3])
3861
+ # s.shuffle(seed: 1)
3862
+ # # =>
3863
+ # # shape: (3,)
3864
+ # # Series: 'a' [i64]
3865
+ # # [
3866
+ # # 2
3867
+ # # 1
3868
+ # # 3
3869
+ # # ]
3870
+ def shuffle(seed: nil)
3871
+ super
3872
+ end
3873
+
3874
+ # Exponentially-weighted moving average.
3875
+ #
3876
+ # @return [Series]
3877
+ def ewm_mean(
3878
+ com: nil,
3879
+ span: nil,
3880
+ half_life: nil,
3881
+ alpha: nil,
3882
+ adjust: true,
3883
+ min_periods: 1,
3884
+ ignore_nulls: true
3885
+ )
3886
+ super
3887
+ end
3888
+
3889
+ # Exponentially-weighted moving standard deviation.
3890
+ #
3891
+ # @return [Series]
3892
+ def ewm_std(
3893
+ com: nil,
3894
+ span: nil,
3895
+ half_life: nil,
3896
+ alpha: nil,
3897
+ adjust: true,
3898
+ bias: false,
3899
+ min_periods: 1,
3900
+ ignore_nulls: true
3901
+ )
3902
+ super
3903
+ end
3904
+
3905
+ # Exponentially-weighted moving variance.
3906
+ #
3907
+ # @return [Series]
3908
+ def ewm_var(
3909
+ com: nil,
3910
+ span: nil,
3911
+ half_life: nil,
3912
+ alpha: nil,
3913
+ adjust: true,
3914
+ bias: false,
3915
+ min_periods: 1,
3916
+ ignore_nulls: true
3917
+ )
3918
+ super
3919
+ end
3920
+
3921
+ # Extend the Series with given number of values.
3922
+ #
3923
+ # @param value [Object]
3924
+ # The value to extend the Series with. This value may be `nil` to fill with
3925
+ # nulls.
3926
+ # @param n [Integer]
3927
+ # The number of values to extend.
3928
+ #
3929
+ # @return [Series]
3930
+ #
3931
+ # @example
3932
+ # s = Polars::Series.new("a", [1, 2, 3])
3933
+ # s.extend_constant(99, 2)
3934
+ # # =>
3935
+ # # shape: (5,)
3936
+ # # Series: 'a' [i64]
3937
+ # # [
3938
+ # # 1
3939
+ # # 2
3940
+ # # 3
3941
+ # # 99
3942
+ # # 99
3943
+ # # ]
3944
+ def extend_constant(value, n)
3945
+ Utils.wrap_s(_s.extend_constant(value, n))
3946
+ end
3947
+
3948
+ # Flags the Series as sorted.
3949
+ #
3950
+ # Enables downstream code to user fast paths for sorted arrays.
3951
+ #
3952
+ # @param reverse [Boolean]
3953
+ # If the Series order is reversed, e.g. descending.
3954
+ #
3955
+ # @return [Series]
3956
+ #
3957
+ # @note
3958
+ # This can lead to incorrect results if this Series is not sorted!!
3959
+ # Use with care!
3960
+ #
3961
+ # @example
3962
+ # s = Polars::Series.new("a", [1, 2, 3])
3963
+ # s.set_sorted.max
3964
+ # # => 3
3965
+ def set_sorted(reverse: false)
3966
+ Utils.wrap_s(_s.set_sorted(reverse))
3967
+ end
3968
+
3969
+ # Create a new Series filled with values from the given index.
3970
+ #
3971
+ # @return [Series]
3972
+ def new_from_index(index, length)
3973
+ Utils.wrap_s(_s.new_from_index(index, length))
3974
+ end
3975
+
3976
+ # Shrink numeric columns to the minimal required datatype.
3977
+ #
3978
+ # Shrink to the dtype needed to fit the extrema of this Series.
3979
+ # This can be used to reduce memory pressure.
3980
+ #
3981
+ # @return [Series]
3982
+ def shrink_dtype
3983
+ super
3984
+ end
3985
+
3986
+ # Create an object namespace of all list related methods.
3987
+ #
3988
+ # @return [ListNameSpace]
3989
+ def list
3990
+ ListNameSpace.new(self)
3991
+ end
3992
+
3993
+ # Create an object namespace of all array related methods.
3994
+ #
3995
+ # @return [ArrayNameSpace]
3996
+ def arr
3997
+ ArrayNameSpace.new(self)
3998
+ end
3999
+
4000
+ # Create an object namespace of all binary related methods.
4001
+ #
4002
+ # @return [BinaryNameSpace]
4003
+ def bin
4004
+ BinaryNameSpace.new(self)
4005
+ end
4006
+
4007
+ # Create an object namespace of all categorical related methods.
4008
+ #
4009
+ # @return [CatNameSpace]
4010
+ def cat
4011
+ CatNameSpace.new(self)
4012
+ end
4013
+
4014
+ # Create an object namespace of all datetime related methods.
4015
+ #
4016
+ # @return [DateTimeNameSpace]
4017
+ def dt
4018
+ DateTimeNameSpace.new(self)
4019
+ end
4020
+
4021
+ # Create an object namespace of all string related methods.
4022
+ #
4023
+ # @return [StringNameSpace]
4024
+ def str
4025
+ StringNameSpace.new(self)
4026
+ end
4027
+
4028
+ # Create an object namespace of all struct related methods.
4029
+ #
4030
+ # @return [StructNameSpace]
4031
+ def struct
4032
+ StructNameSpace.new(self)
4033
+ end
4034
+
4035
+ private
4036
+
4037
+ def initialize_copy(other)
4038
+ super
4039
+ self._s = _s._clone
4040
+ end
4041
+
4042
+ def coerce(other)
4043
+ if other.is_a?(Numeric)
4044
+ # TODO improve
4045
+ series = to_frame.select(Polars.lit(other)).to_series
4046
+ [series, self]
4047
+ else
4048
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
4049
+ end
4050
+ end
4051
+
4052
+ def _pos_idxs(idxs)
4053
+ idx_type = Plr.get_index_type
4054
+
4055
+ if idxs.is_a?(Series)
4056
+ if idxs.dtype == idx_type
4057
+ return idxs
4058
+ end
4059
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4060
+ if idx_type == UInt32
4061
+ if [Int64, UInt64].include?(idxs.dtype)
4062
+ if idxs.max >= 2**32
4063
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4064
+ end
4065
+ end
4066
+ if idxs.dtype == Int64
4067
+ if idxs.min < -(2**32)
4068
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4069
+ end
4070
+ end
4071
+ end
4072
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4073
+ if idxs.min < 0
4074
+ if idx_type == UInt32
4075
+ if [Int8, Int16].include?(idxs.dtype)
4076
+ idxs = idxs.cast(Int32)
4077
+ end
4078
+ else
4079
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4080
+ idxs = idxs.cast(Int64)
4081
+ end
4082
+ end
4083
+
4084
+ # Update negative indexes to absolute indexes.
4085
+ return (
4086
+ idxs.to_frame
4087
+ .select(
4088
+ Polars.when(Polars.col(idxs.name) < 0)
4089
+ .then(len + Polars.col(idxs.name))
4090
+ .otherwise(Polars.col(idxs.name))
4091
+ .cast(idx_type)
4092
+ )
4093
+ .to_series(0)
4094
+ )
4095
+ end
4096
+ end
4097
+
4098
+ return idxs.cast(idx_type)
4099
+ end
4100
+ end
4101
+
4102
+ raise ArgumentError, "Unsupported idxs datatype."
4103
+ end
4104
+
4105
+ def _comp(other, op)
4106
+ if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
4107
+ if (other == true && op == :eq) || (other == false && op == :neq)
4108
+ return clone
4109
+ elsif (other == false && op == :eq) || (other == true && op == :neq)
4110
+ return !self
4111
+ end
4112
+ end
4113
+
4114
+ if other.is_a?(::Time) && dtype.is_a?(Datetime)
4115
+ ts = Utils.datetime_to_int(other, time_unit)
4116
+ f = ffi_func("#{op}_<>", Int64, _s)
4117
+ fail if f.nil?
4118
+ return Utils.wrap_s(f.call(ts))
4119
+ elsif other.is_a?(::Date) && dtype == Date
4120
+ d = Utils.date_to_int(other)
4121
+ f = ffi_func("#{op}_<>", Int32, _s)
4122
+ fail if f.nil?
4123
+ return Utils.wrap_s(f.call(d))
4124
+ end
4125
+
4126
+ if other.is_a?(Series)
4127
+ return Utils.wrap_s(_s.send(op, other._s))
4128
+ end
4129
+
4130
+ f = ffi_func("#{op}_<>", dtype, _s)
4131
+ if f.nil?
4132
+ raise NotImplementedError
4133
+ end
4134
+ Utils.wrap_s(f.call(other))
4135
+ end
4136
+
4137
+ def ffi_func(name, dtype, _s)
4138
+ _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype.class))) if DTYPE_TO_FFINAME.key?(dtype.class)
4139
+ end
4140
+
4141
+ def _arithmetic(other, op)
4142
+ if other.is_a?(Expr)
4143
+ other = to_frame.select(other).to_series
4144
+ end
4145
+ if other.is_a?(Series)
4146
+ return Utils.wrap_s(_s.send(op, other._s))
4147
+ end
4148
+
4149
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(::String)) && !is_float
4150
+ _s2 = sequence_to_rbseries(name, [other])
4151
+ return Utils.wrap_s(_s.send(op, _s2))
4152
+ end
4153
+
4154
+ f = ffi_func("#{op}_<>", dtype, _s)
4155
+ if f.nil?
4156
+ raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
4157
+ end
4158
+ Utils.wrap_s(f.call(other))
4159
+ end
4160
+
4161
+ DTYPE_TO_FFINAME = {
4162
+ Int8 => "i8",
4163
+ Int16 => "i16",
4164
+ Int32 => "i32",
4165
+ Int64 => "i64",
4166
+ UInt8 => "u8",
4167
+ UInt16 => "u16",
4168
+ UInt32 => "u32",
4169
+ UInt64 => "u64",
4170
+ Float32 => "f32",
4171
+ Float64 => "f64",
4172
+ Boolean => "bool",
4173
+ Utf8 => "str",
4174
+ List => "list",
4175
+ Date => "date",
4176
+ Datetime => "datetime",
4177
+ Duration => "duration",
4178
+ Time => "time",
4179
+ Object => "object",
4180
+ Categorical => "categorical",
4181
+ Struct => "struct",
4182
+ Binary => "binary"
4183
+ }
4184
+
4185
+ def series_to_rbseries(name, values)
4186
+ # should not be in-place?
4187
+ values.rename(name, in_place: true)
4188
+ values._s
4189
+ end
4190
+
4191
+ def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
4192
+ # not needed yet
4193
+ # if !values.contiguous?
4194
+ # end
4195
+
4196
+ if values.shape.length == 1
4197
+ values, dtype = numo_values_and_dtype(values)
4198
+ strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
4199
+ if dtype == Numo::RObject
4200
+ sequence_to_rbseries(name, values.to_a, strict: strict)
4201
+ else
4202
+ constructor = numo_type_to_constructor(dtype)
4203
+ # TODO improve performance
4204
+ constructor.call(name, values.to_a, strict)
4205
+ end
4206
+ elsif values.shape.sum == 0
4207
+ raise Todo
4208
+ else
4209
+ original_shape = values.shape
4210
+ values = values.reshape(original_shape.inject(&:*))
4211
+ rb_s = numo_to_rbseries(
4212
+ name,
4213
+ values,
4214
+ strict: strict,
4215
+ nan_to_null: nan_to_null
4216
+ )
4217
+ Utils.wrap_s(rb_s).reshape(original_shape)._s
4218
+ end
4219
+ end
4220
+
4221
+ def numo_values_and_dtype(values)
4222
+ [values, values.class]
4223
+ end
4224
+
4225
+ def numo_type_to_constructor(dtype)
4226
+ {
4227
+ Numo::Float32 => RbSeries.method(:new_opt_f32),
4228
+ Numo::Float64 => RbSeries.method(:new_opt_f64),
4229
+ Numo::Int8 => RbSeries.method(:new_opt_i8),
4230
+ Numo::Int16 => RbSeries.method(:new_opt_i16),
4231
+ Numo::Int32 => RbSeries.method(:new_opt_i32),
4232
+ Numo::Int64 => RbSeries.method(:new_opt_i64),
4233
+ Numo::UInt8 => RbSeries.method(:new_opt_u8),
4234
+ Numo::UInt16 => RbSeries.method(:new_opt_u16),
4235
+ Numo::UInt32 => RbSeries.method(:new_opt_u32),
4236
+ Numo::UInt64 => RbSeries.method(:new_opt_u64)
4237
+ }.fetch(dtype)
4238
+ rescue KeyError
4239
+ RbSeries.method(:new_object)
4240
+ end
4241
+
4242
+ def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
4243
+ ruby_dtype = nil
4244
+
4245
+ if (values.nil? || values.empty?) && dtype.nil?
4246
+ dtype = dtype_if_empty || Float32
4247
+ elsif dtype == List
4248
+ ruby_dtype = ::Array
4249
+ end
4250
+
4251
+ rb_temporal_types = [::Date, ::DateTime, ::Time]
4252
+ rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
4253
+
4254
+ value = _get_first_non_none(values)
4255
+ if !value.nil?
4256
+ if value.is_a?(Hash)
4257
+ return DataFrame.new(values).to_struct(name)._s
4258
+ end
4259
+ end
4260
+
4261
+ if !dtype.nil? && ![List, Struct, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
4262
+ if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
4263
+ dtype = Array.new(nil, value.size)
4264
+ end
4265
+
4266
+ constructor = polars_type_to_constructor(dtype)
4267
+ # TODO remove
4268
+ strict = false if dtype == Decimal
4269
+ rbseries = constructor.call(name, values, strict)
4270
+
4271
+ base_type = dtype.is_a?(DataType) ? dtype.class : dtype
4272
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type)
4273
+ if rbseries.dtype != dtype
4274
+ rbseries = rbseries.cast(dtype, true)
4275
+ end
4276
+ end
4277
+ rbseries
4278
+ elsif dtype == Struct
4279
+ struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
4280
+ empty = {}
4281
+ DataFrame.sequence_to_rbdf(
4282
+ values.map { |v| v.nil? ? empty : v },
4283
+ schema: struct_schema,
4284
+ orient: "row",
4285
+ ).to_struct(name)
4286
+ else
4287
+ if ruby_dtype.nil?
4288
+ if value.nil?
4289
+ # generic default dtype
4290
+ ruby_dtype = Float
4291
+ else
4292
+ ruby_dtype = value.class
4293
+ end
4294
+ end
4295
+
4296
+ # temporal branch
4297
+ if rb_temporal_types.include?(ruby_dtype)
4298
+ if dtype.nil?
4299
+ dtype = Utils.rb_type_to_dtype(ruby_dtype)
4300
+ elsif rb_temporal_types.include?(dtype)
4301
+ dtype = Utils.rb_type_to_dtype(dtype)
4302
+ end
4303
+ # TODO
4304
+ time_unit = nil
4305
+
4306
+ rb_series = RbSeries.new_from_any_values(name, values, strict)
4307
+ if time_unit.nil?
4308
+ s = Utils.wrap_s(rb_series)
4309
+ else
4310
+ s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
4311
+ end
4312
+ s._s
4313
+ elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
4314
+ raise Todo
4315
+ elsif ruby_dtype == ::Array
4316
+ if dtype.is_a?(Object)
4317
+ return RbSeries.new_object(name, values, strict)
4318
+ end
4319
+ if dtype
4320
+ srs = sequence_from_anyvalue_or_object(name, values)
4321
+ if dtype != srs.dtype
4322
+ srs = srs.cast(dtype, strict: false)
4323
+ end
4324
+ return srs
4325
+ end
4326
+ sequence_from_anyvalue_or_object(name, values)
4327
+ elsif ruby_dtype == Series
4328
+ RbSeries.new_series_list(name, values.map(&:_s), strict)
4329
+ elsif ruby_dtype == RbSeries
4330
+ RbSeries.new_series_list(name, values, strict)
4331
+ else
4332
+ constructor =
4333
+ if value.is_a?(::String)
4334
+ if value.encoding == Encoding::UTF_8
4335
+ RbSeries.method(:new_str)
4336
+ else
4337
+ RbSeries.method(:new_binary)
4338
+ end
4339
+ elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
4340
+ # TODO improve performance
4341
+ RbSeries.method(:new_opt_f64)
4342
+ else
4343
+ rb_type_to_constructor(value.class)
4344
+ end
4345
+
4346
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4347
+ end
4348
+ end
4349
+ end
4350
+
4351
+ def construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
4352
+ begin
4353
+ constructor.call(name, values, strict)
4354
+ rescue
4355
+ if dtype.nil?
4356
+ RbSeries.new_from_any_values(name, values, strict)
4357
+ else
4358
+ RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
4359
+ end
4360
+ end
4361
+ end
4362
+
4363
+ def sequence_from_anyvalue_or_object(name, values)
4364
+ RbSeries.new_from_any_values(name, values, true)
4365
+ rescue
4366
+ RbSeries.new_object(name, values, false)
4367
+ end
4368
+
4369
+ POLARS_TYPE_TO_CONSTRUCTOR = {
4370
+ Float32 => RbSeries.method(:new_opt_f32),
4371
+ Float64 => RbSeries.method(:new_opt_f64),
4372
+ Int8 => RbSeries.method(:new_opt_i8),
4373
+ Int16 => RbSeries.method(:new_opt_i16),
4374
+ Int32 => RbSeries.method(:new_opt_i32),
4375
+ Int64 => RbSeries.method(:new_opt_i64),
4376
+ UInt8 => RbSeries.method(:new_opt_u8),
4377
+ UInt16 => RbSeries.method(:new_opt_u16),
4378
+ UInt32 => RbSeries.method(:new_opt_u32),
4379
+ UInt64 => RbSeries.method(:new_opt_u64),
4380
+ Decimal => RbSeries.method(:new_decimal),
4381
+ Date => RbSeries.method(:new_from_any_values),
4382
+ Datetime => RbSeries.method(:new_from_any_values),
4383
+ Duration => RbSeries.method(:new_from_any_values),
4384
+ Time => RbSeries.method(:new_from_any_values),
4385
+ Boolean => RbSeries.method(:new_opt_bool),
4386
+ Utf8 => RbSeries.method(:new_str),
4387
+ Object => RbSeries.method(:new_object),
4388
+ Categorical => RbSeries.method(:new_str),
4389
+ Enum => RbSeries.method(:new_str),
4390
+ Binary => RbSeries.method(:new_binary),
4391
+ Null => RbSeries.method(:new_null)
4392
+ }
4393
+
4394
+ SYM_TYPE_TO_CONSTRUCTOR = {
4395
+ f32: RbSeries.method(:new_opt_f32),
4396
+ f64: RbSeries.method(:new_opt_f64),
4397
+ i8: RbSeries.method(:new_opt_i8),
4398
+ i16: RbSeries.method(:new_opt_i16),
4399
+ i32: RbSeries.method(:new_opt_i32),
4400
+ i64: RbSeries.method(:new_opt_i64),
4401
+ u8: RbSeries.method(:new_opt_u8),
4402
+ u16: RbSeries.method(:new_opt_u16),
4403
+ u32: RbSeries.method(:new_opt_u32),
4404
+ u64: RbSeries.method(:new_opt_u64),
4405
+ bool: RbSeries.method(:new_opt_bool),
4406
+ str: RbSeries.method(:new_str)
4407
+ }
4408
+
4409
+ def polars_type_to_constructor(dtype)
4410
+ if dtype.is_a?(Array)
4411
+ lambda do |name, values, strict|
4412
+ RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
4413
+ end
4414
+ elsif dtype.is_a?(Class) && dtype < DataType
4415
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4416
+ elsif dtype.is_a?(DataType)
4417
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
4418
+ else
4419
+ SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
4420
+ end
4421
+ rescue KeyError
4422
+ raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
4423
+ end
4424
+
4425
+ RB_TYPE_TO_CONSTRUCTOR = {
4426
+ Float => RbSeries.method(:new_opt_f64),
4427
+ Integer => RbSeries.method(:new_opt_i64),
4428
+ TrueClass => RbSeries.method(:new_opt_bool),
4429
+ FalseClass => RbSeries.method(:new_opt_bool),
4430
+ BigDecimal => RbSeries.method(:new_decimal),
4431
+ NilClass => RbSeries.method(:new_null)
4432
+ }
4433
+
4434
+ def rb_type_to_constructor(dtype)
4435
+ RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
4436
+ rescue KeyError
4437
+ RbSeries.method(:new_object)
4438
+ end
4439
+
4440
+ def _get_first_non_none(values)
4441
+ values.find { |v| !v.nil? }
4442
+ end
4443
+ end
4444
+ end