polars-df 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +8 -0
  4. data/Cargo.lock +2 -1
  5. data/README.md +1 -1
  6. data/ext/polars/Cargo.toml +7 -1
  7. data/ext/polars/src/batched_csv.rs +120 -0
  8. data/ext/polars/src/conversion.rs +139 -6
  9. data/ext/polars/src/dataframe.rs +360 -15
  10. data/ext/polars/src/error.rs +9 -0
  11. data/ext/polars/src/file.rs +8 -7
  12. data/ext/polars/src/lazy/apply.rs +7 -0
  13. data/ext/polars/src/lazy/dataframe.rs +135 -3
  14. data/ext/polars/src/lazy/dsl.rs +97 -2
  15. data/ext/polars/src/lazy/meta.rs +1 -1
  16. data/ext/polars/src/lazy/mod.rs +1 -0
  17. data/ext/polars/src/lib.rs +227 -12
  18. data/ext/polars/src/series.rs +190 -38
  19. data/ext/polars/src/set.rs +91 -0
  20. data/ext/polars/src/utils.rs +19 -0
  21. data/lib/polars/batched_csv_reader.rb +96 -0
  22. data/lib/polars/cat_expr.rb +39 -0
  23. data/lib/polars/data_frame.rb +2813 -100
  24. data/lib/polars/date_time_expr.rb +1282 -7
  25. data/lib/polars/exceptions.rb +20 -0
  26. data/lib/polars/expr.rb +631 -11
  27. data/lib/polars/expr_dispatch.rb +14 -0
  28. data/lib/polars/functions.rb +219 -0
  29. data/lib/polars/group_by.rb +517 -0
  30. data/lib/polars/io.rb +763 -4
  31. data/lib/polars/lazy_frame.rb +1415 -67
  32. data/lib/polars/lazy_functions.rb +430 -9
  33. data/lib/polars/lazy_group_by.rb +79 -0
  34. data/lib/polars/list_expr.rb +5 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2244 -192
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +663 -2
  39. data/lib/polars/struct_expr.rb +73 -0
  40. data/lib/polars/utils.rb +76 -3
  41. data/lib/polars/version.rb +2 -1
  42. data/lib/polars/when.rb +1 -0
  43. data/lib/polars/when_then.rb +1 -0
  44. data/lib/polars.rb +8 -2
  45. metadata +12 -2
data/lib/polars/series.rb CHANGED
@@ -1,7 +1,40 @@
1
1
  module Polars
2
+ # A Series represents a single column in a polars DataFrame.
2
3
  class Series
4
+ include ExprDispatch
5
+
6
+ # @private
3
7
  attr_accessor :_s
4
8
 
9
+ # Create a new Series.
10
+ #
11
+ # @param name [String, Array, nil]
12
+ # Name of the series. Will be used as a column name when used in a DataFrame.
13
+ # When not specified, name is set to an empty string.
14
+ # @param values [Array, nil]
15
+ # One-dimensional data in various forms. Supported are: Array and Series.
16
+ # @param dtype [Symbol, nil]
17
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
18
+ # @param strict [Boolean]
19
+ # Throw error on numeric overflow.
20
+ # @param nan_to_null [Boolean]
21
+ # Not used.
22
+ # @param dtype_if_empty [Symbol, nil]
23
+ # If no dtype is specified and values contains `nil` or an empty array,
24
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
25
+ #
26
+ # @example Constructing a Series by specifying name and values positionally:
27
+ # s = Polars::Series.new("a", [1, 2, 3])
28
+ #
29
+ # @example Notice that the dtype is automatically inferred as a polars Int64:
30
+ # s.dtype
31
+ # # => :i64
32
+ #
33
+ # @example Constructing a Series with a specific dtype:
34
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
35
+ #
36
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
37
+ # s3 = Polars::Series.new([1, 2, 3])
5
38
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
6
39
  # Handle case where values are passed as the first argument
7
40
  if !name.nil? && !name.is_a?(String)
@@ -17,6 +50,8 @@ module Polars
17
50
 
18
51
  if values.nil?
19
52
  self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
53
+ elsif values.is_a?(Series)
54
+ self._s = series_to_rbseries(name, values)
20
55
  elsif values.is_a?(Range)
21
56
  self._s =
22
57
  Polars.arange(
@@ -35,16 +70,23 @@ module Polars
35
70
  end
36
71
  end
37
72
 
73
+ # @private
38
74
  def self._from_rbseries(s)
39
75
  series = Series.allocate
40
76
  series._s = s
41
77
  series
42
78
  end
43
79
 
80
+ # Get the data type of this Series.
81
+ #
82
+ # @return [Symbol]
44
83
  def dtype
45
- _s.dtype.to_sym
84
+ _s.dtype
46
85
  end
47
86
 
87
+ # Get flags that are set on the Series.
88
+ #
89
+ # @return [Hash]
48
90
  def flags
49
91
  {
50
92
  "SORTED_ASC" => _s.is_sorted_flag,
@@ -52,154 +94,491 @@ module Polars
52
94
  }
53
95
  end
54
96
 
97
+ # Get the inner dtype in of a List typed Series.
98
+ #
99
+ # @return [Symbol]
55
100
  def inner_dtype
56
- _s.inner_dtype&.to_sym
101
+ _s.inner_dtype
57
102
  end
58
103
 
104
+ # Get the name of this Series.
105
+ #
106
+ # @return [String]
59
107
  def name
60
108
  _s.name
61
109
  end
62
110
 
111
+ # Shape of this Series.
112
+ #
113
+ # @return [Array]
63
114
  def shape
64
115
  [_s.len]
65
116
  end
66
117
 
67
- # def time_unit
68
- # end
118
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
119
+ #
120
+ # @return [String]
121
+ def time_unit
122
+ _s.time_unit
123
+ end
69
124
 
125
+ # Returns a string representing the Series.
126
+ #
127
+ # @return [String]
70
128
  def to_s
71
129
  _s.to_s
72
130
  end
73
131
  alias_method :inspect, :to_s
74
132
 
133
+ # Bitwise AND.
134
+ #
135
+ # @return [Series]
75
136
  def &(other)
137
+ if !other.is_a?(Series)
138
+ other = Series.new([other])
139
+ end
76
140
  Utils.wrap_s(_s.bitand(other._s))
77
141
  end
78
142
 
143
+ # Bitwise OR.
144
+ #
145
+ # @return [Series]
79
146
  def |(other)
147
+ if !other.is_a?(Series)
148
+ other = Series.new([other])
149
+ end
80
150
  Utils.wrap_s(_s.bitor(other._s))
81
151
  end
82
152
 
153
+ # Bitwise XOR.
154
+ #
155
+ # @return [Series]
83
156
  def ^(other)
157
+ if !other.is_a?(Series)
158
+ other = Series.new([other])
159
+ end
84
160
  Utils.wrap_s(_s.bitxor(other._s))
85
161
  end
86
162
 
87
- # def ==(other)
88
- # end
163
+ # Equal.
164
+ #
165
+ # @return [Series]
166
+ def ==(other)
167
+ _comp(other, :eq)
168
+ end
89
169
 
90
- # def !=(other)
91
- # end
170
+ # Not equal.
171
+ #
172
+ # @return [Series]
173
+ def !=(other)
174
+ _comp(other, :neq)
175
+ end
92
176
 
93
- # def >(other)
94
- # end
177
+ # Greater than.
178
+ #
179
+ # @return [Series]
180
+ def >(other)
181
+ _comp(other, :gt)
182
+ end
95
183
 
96
- # def <(other)
97
- # end
184
+ # Less than.
185
+ #
186
+ # @return [Series]
187
+ def <(other)
188
+ _comp(other, :lt)
189
+ end
98
190
 
99
- # def >=(other)
100
- # end
191
+ # Greater than or equal.
192
+ #
193
+ # @return [Series]
194
+ def >=(other)
195
+ _comp(other, :gt_eq)
196
+ end
101
197
 
102
- # def <=(other)
103
- # end
198
+ # Less than or equal.
199
+ #
200
+ # @return [Series]
201
+ def <=(other)
202
+ _comp(other, :lt_eq)
203
+ end
104
204
 
205
+ # Performs addition.
206
+ #
207
+ # @return [Series]
105
208
  def +(other)
106
- Utils. wrap_s(_s.add(other._s))
209
+ _arithmetic(other, :add)
107
210
  end
108
211
 
212
+ # Performs subtraction.
213
+ #
214
+ # @return [Series]
109
215
  def -(other)
110
- Utils.wrap_s(_s.sub(other._s))
216
+ _arithmetic(other, :sub)
111
217
  end
112
218
 
219
+ # Performs multiplication.
220
+ #
221
+ # @return [Series]
113
222
  def *(other)
114
- Utils.wrap_s(_s.mul(other._s))
223
+ _arithmetic(other, :mul)
115
224
  end
116
225
 
226
+ # Performs division.
227
+ #
228
+ # @return [Series]
117
229
  def /(other)
118
- Utils.wrap_s(_s.div(other._s))
230
+ _arithmetic(other, :div)
119
231
  end
120
232
 
233
+ # Returns the modulo.
234
+ #
235
+ # @return [Series]
236
+ def %(other)
237
+ if is_datelike
238
+ raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
239
+ end
240
+ _arithmetic(other, :rem)
241
+ end
242
+
243
+ # Raises to the power of exponent.
244
+ #
245
+ # @return [Series]
121
246
  def **(power)
122
- # if is_datelike
123
- # raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
124
- # end
247
+ if is_datelike
248
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
249
+ end
125
250
  to_frame.select(Polars.col(name).pow(power)).to_series
126
251
  end
127
252
 
128
- # def -@(other)
129
- # end
253
+ # Performs negation.
254
+ #
255
+ # @return [Series]
256
+ def -@
257
+ 0 - self
258
+ end
130
259
 
260
+ # Returns elements of the Series.
261
+ #
262
+ # @return [Object]
131
263
  def [](item)
132
- _s.get_idx(item)
264
+ if item.is_a?(Integer)
265
+ return _s.get_idx(item)
266
+ end
267
+
268
+ if item.is_a?(Range)
269
+ return Slice.new(self).apply(item)
270
+ end
271
+
272
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
133
273
  end
134
274
 
135
- # def []=(key, value)
136
- # end
275
+ # Sets an element of the Series.
276
+ #
277
+ # @return [Object]
278
+ def []=(key, value)
279
+ if value.is_a?(Array)
280
+ if is_numeric || is_datelike
281
+ set_at_idx(key, value)
282
+ return
283
+ end
284
+ raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
285
+ end
286
+
287
+ if key.is_a?(Series)
288
+ if key.dtype == :bool
289
+ self._s = set(key, value)._s
290
+ elsif key.dtype == :u64
291
+ self._s = set_at_idx(key.cast(:u32), value)._s
292
+ elsif key.dtype == :u32
293
+ self._s = set_at_idx(key, value)._s
294
+ else
295
+ raise Todo
296
+ end
297
+ end
137
298
 
299
+ if key.is_a?(Array)
300
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
301
+ self[s] = value
302
+ elsif key.is_a?(Integer)
303
+ # TODO fix
304
+ # self[[key]] = value
305
+ set_at_idx(key, value)
306
+ else
307
+ raise ArgumentError, "cannot use #{key} for indexing"
308
+ end
309
+ end
310
+
311
+ # Return an estimation of the total (heap) allocated size of the Series.
312
+ #
313
+ # Estimated size is given in the specified unit (bytes by default).
314
+ #
315
+ # This estimation is the sum of the size of its buffers, validity, including
316
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
317
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
318
+ # particular, StructArray's size is an upper bound.
319
+ #
320
+ # When an array is sliced, its allocated size remains constant because the buffer
321
+ # unchanged. However, this function will yield a smaller number. This is because
322
+ # this function returns the visible size of the buffer, not its total capacity.
323
+ #
324
+ # FFI buffers are included in this estimation.
325
+ #
326
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
327
+ # Scale the returned size to the given unit.
328
+ #
329
+ # @return [Numeric]
330
+ #
331
+ # @example
332
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
333
+ # s.estimated_size
334
+ # # => 4000000
335
+ # s.estimated_size("mb")
336
+ # # => 3.814697265625
138
337
  def estimated_size(unit = "b")
139
338
  sz = _s.estimated_size
140
339
  Utils.scale_bytes(sz, to: unit)
141
340
  end
142
341
 
342
+ # Compute the square root of the elements.
343
+ #
344
+ # @return [Series]
143
345
  def sqrt
144
- self ** 0.5
346
+ self**0.5
145
347
  end
146
348
 
349
+ # Check if any boolean value in the column is `true`.
350
+ #
351
+ # @return [Boolean]
147
352
  def any
148
353
  to_frame.select(Polars.col(name).any).to_series[0]
149
354
  end
150
355
 
356
+ # Check if all boolean values in the column are `true`.
357
+ #
358
+ # @return [Boolean]
151
359
  def all
152
360
  to_frame.select(Polars.col(name).all).to_series[0]
153
361
  end
154
362
 
155
- # def log
156
- # end
363
+ # Compute the logarithm to a given base.
364
+ #
365
+ # @param base [Float]
366
+ # Given base, defaults to `Math::E`.
367
+ #
368
+ # @return [Series]
369
+ def log(base = Math::E)
370
+ super
371
+ end
157
372
 
158
- # def log10
159
- # end
373
+ # Compute the base 10 logarithm of the input array, element-wise.
374
+ #
375
+ # @return [Series]
376
+ def log10
377
+ super
378
+ end
160
379
 
161
- # def exp
162
- # end
380
+ # Compute the exponential, element-wise.
381
+ #
382
+ # @return [Series]
383
+ def exp
384
+ super
385
+ end
163
386
 
164
- # def drop_nulls
165
- # end
387
+ # Create a new Series that copies data from this Series without null values.
388
+ #
389
+ # @return [Series]
390
+ def drop_nulls
391
+ super
392
+ end
166
393
 
167
- # def drop_nans
168
- # end
394
+ # Drop NaN values.
395
+ #
396
+ # @return [Series]
397
+ def drop_nans
398
+ super
399
+ end
169
400
 
401
+ # Cast this Series to a DataFrame.
402
+ #
403
+ # @return [DataFrame]
170
404
  def to_frame
171
405
  Utils.wrap_df(RbDataFrame.new([_s]))
172
406
  end
173
407
 
174
- # def describe
175
- # end
408
+ # Quick summary statistics of a series.
409
+ #
410
+ # Series with mixed datatypes will return summary statistics for the datatype of
411
+ # the first value.
412
+ #
413
+ # @return [DataFrame]
414
+ #
415
+ # @example
416
+ # series_num = Polars::Series.new([1, 2, 3, 4, 5])
417
+ # series_num.describe
418
+ # # =>
419
+ # # shape: (6, 2)
420
+ # # ┌────────────┬──────────┐
421
+ # # │ statistic ┆ value │
422
+ # # │ --- ┆ --- │
423
+ # # │ str ┆ f64 │
424
+ # # ╞════════════╪══════════╡
425
+ # # │ min ┆ 1.0 │
426
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
427
+ # # │ max ┆ 5.0 │
428
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
429
+ # # │ null_count ┆ 0.0 │
430
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
431
+ # # │ mean ┆ 3.0 │
432
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
433
+ # # │ std ┆ 1.581139 │
434
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
435
+ # # │ count ┆ 5.0 │
436
+ # # └────────────┴──────────┘
437
+ #
438
+ # @example
439
+ # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
440
+ # series_str.describe
441
+ # # =>
442
+ # # shape: (3, 2)
443
+ # # ┌────────────┬───────┐
444
+ # # │ statistic ┆ value │
445
+ # # │ --- ┆ --- │
446
+ # # │ str ┆ i64 │
447
+ # # ╞════════════╪═══════╡
448
+ # # │ unique ┆ 4 │
449
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
450
+ # # │ null_count ┆ 1 │
451
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
452
+ # # │ count ┆ 5 │
453
+ # # └────────────┴───────┘
454
+ def describe
455
+ if len == 0
456
+ raise ArgumentError, "Series must contain at least one value"
457
+ elsif is_numeric
458
+ s = cast(:f64)
459
+ stats = {
460
+ "min" => s.min,
461
+ "max" => s.max,
462
+ "null_count" => s.null_count,
463
+ "mean" => s.mean,
464
+ "std" => s.std,
465
+ "count" => s.len
466
+ }
467
+ elsif is_boolean
468
+ stats = {
469
+ "sum" => sum,
470
+ "null_count" => null_count,
471
+ "count" => len
472
+ }
473
+ elsif is_utf8
474
+ stats = {
475
+ "unique" => unique.length,
476
+ "null_count" => null_count,
477
+ "count" => len
478
+ }
479
+ elsif is_datelike
480
+ # we coerce all to string, because a polars column
481
+ # only has a single dtype and dates: datetime and count: int don't match
482
+ stats = {
483
+ "min" => dt.min.to_s,
484
+ "max" => dt.max.to_s,
485
+ "null_count" => null_count.to_s,
486
+ "count" => len.to_s
487
+ }
488
+ else
489
+ raise TypeError, "This type is not supported"
490
+ end
176
491
 
492
+ Polars::DataFrame.new(
493
+ {"statistic" => stats.keys, "value" => stats.values}
494
+ )
495
+ end
496
+
497
+ # Reduce this Series to the sum value.
498
+ #
499
+ # @return [Numeric]
500
+ #
501
+ # @note
502
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
503
+ # `:i64` before summing to prevent overflow issues.
504
+ #
505
+ # @example
506
+ # s = Polars::Series.new("a", [1, 2, 3])
507
+ # s.sum
508
+ # # => 6
177
509
  def sum
178
510
  _s.sum
179
511
  end
180
512
 
513
+ # Reduce this Series to the mean value.
514
+ #
515
+ # @return [Float, nil]
516
+ #
517
+ # @example
518
+ # s = Polars::Series.new("a", [1, 2, 3])
519
+ # s.mean
520
+ # # => 2.0
181
521
  def mean
182
522
  _s.mean
183
523
  end
184
524
 
525
+ # Reduce this Series to the product value.
526
+ #
527
+ # @return [Numeric]
185
528
  def product
186
529
  to_frame.select(Polars.col(name).product).to_series[0]
187
530
  end
188
531
 
532
+ # Get the minimal value in this Series.
533
+ #
534
+ # @return [Object]
535
+ #
536
+ # @example
537
+ # s = Polars::Series.new("a", [1, 2, 3])
538
+ # s.min
539
+ # # => 1
189
540
  def min
190
541
  _s.min
191
542
  end
192
543
 
544
+ # Get the maximum value in this Series.
545
+ #
546
+ # @return [Object]
547
+ #
548
+ # @example
549
+ # s = Polars::Series.new("a", [1, 2, 3])
550
+ # s.max
551
+ # # => 3
193
552
  def max
194
553
  _s.max
195
554
  end
196
555
 
197
- # def nan_max
198
- # end
199
-
200
- # def nan_min
201
- # end
202
-
556
+ # Get maximum value, but propagate/poison encountered NaN values.
557
+ #
558
+ # @return [Object]
559
+ def nan_max
560
+ to_frame.select(Polars.col(name).nan_max)[0, 0]
561
+ end
562
+
563
+ # Get minimum value, but propagate/poison encountered NaN values.
564
+ #
565
+ # @return [Object]
566
+ def nan_min
567
+ to_frame.select(Polars.col(name).nan_min)[0, 0]
568
+ end
569
+
570
+ # Get the standard deviation of this Series.
571
+ #
572
+ # @param ddof [Integer]
573
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
574
+ # where N represents the number of elements.
575
+ #
576
+ # @return [Float, nil]
577
+ #
578
+ # @example
579
+ # s = Polars::Series.new("a", [1, 2, 3])
580
+ # s.std
581
+ # # => 1.0
203
582
  def std(ddof: 1)
204
583
  if !is_numeric
205
584
  nil
@@ -208,6 +587,18 @@ module Polars
208
587
  end
209
588
  end
210
589
 
590
+ # Get variance of this Series.
591
+ #
592
+ # @param ddof [Integer]
593
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
594
+ # where N represents the number of elements.
595
+ #
596
+ # @return [Float, nil]
597
+ #
598
+ # @example
599
+ # s = Polars::Series.new("a", [1, 2, 3])
600
+ # s.var
601
+ # # => 1.0
211
602
  def var(ddof: 1)
212
603
  if !is_numeric
213
604
  nil
@@ -216,37 +607,160 @@ module Polars
216
607
  end
217
608
  end
218
609
 
610
+ # Get the median of this Series.
611
+ #
612
+ # @return [Float, nil]
613
+ #
614
+ # @example
615
+ # s = Polars::Series.new("a", [1, 2, 3])
616
+ # s.median
617
+ # # => 2.0
219
618
  def median
220
619
  _s.median
221
620
  end
222
621
 
622
+ # Get the quantile value of this Series.
623
+ #
624
+ # @param quantile [Float, nil]
625
+ # Quantile between 0.0 and 1.0.
626
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
627
+ # Interpolation method.
628
+ #
629
+ # @return [Float, nil]
630
+ #
631
+ # @example
632
+ # s = Polars::Series.new("a", [1, 2, 3])
633
+ # s.quantile(0.5)
634
+ # # => 2.0
223
635
  def quantile(quantile, interpolation: "nearest")
224
636
  _s.quantile(quantile, interpolation)
225
637
  end
226
638
 
639
+ # Get dummy variables.
640
+ #
641
+ # @return [DataFrame]
642
+ #
643
+ # @example
644
+ # s = Polars::Series.new("a", [1, 2, 3])
645
+ # s.to_dummies
646
+ # # =>
647
+ # # shape: (3, 3)
648
+ # # ┌─────┬─────┬─────┐
649
+ # # │ a_1 ┆ a_2 ┆ a_3 │
650
+ # # │ --- ┆ --- ┆ --- │
651
+ # # │ u8 ┆ u8 ┆ u8 │
652
+ # # ╞═════╪═════╪═════╡
653
+ # # │ 1 ┆ 0 ┆ 0 │
654
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
655
+ # # │ 0 ┆ 1 ┆ 0 │
656
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
657
+ # # │ 0 ┆ 0 ┆ 1 │
658
+ # # └─────┴─────┴─────┘
227
659
  def to_dummies
228
660
  Utils.wrap_df(_s.to_dummies)
229
661
  end
230
662
 
663
+ # Count the unique values in a Series.
664
+ #
665
+ # @param sort [Boolean]
666
+ # Ensure the output is sorted from most values to least.
667
+ #
668
+ # @return [DataFrame]
669
+ #
670
+ # @example
671
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
672
+ # s.value_counts.sort("a")
673
+ # # =>
674
+ # # shape: (3, 2)
675
+ # # ┌─────┬────────┐
676
+ # # │ a ┆ counts │
677
+ # # │ --- ┆ --- │
678
+ # # │ i64 ┆ u32 │
679
+ # # ╞═════╪════════╡
680
+ # # │ 1 ┆ 1 │
681
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
682
+ # # │ 2 ┆ 2 │
683
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
684
+ # # │ 3 ┆ 1 │
685
+ # # └─────┴────────┘
231
686
  def value_counts(sort: false)
232
687
  Utils.wrap_df(_s.value_counts(sort))
233
688
  end
234
689
 
235
- # def unique_counts
236
- # end
690
+ # Return a count of the unique values in the order of appearance.
691
+ #
692
+ # @return [Series]
693
+ #
694
+ # @example
695
+ # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
696
+ # s.unique_counts
697
+ # # =>
698
+ # # shape: (3,)
699
+ # # Series: 'id' [u32]
700
+ # # [
701
+ # # 1
702
+ # # 2
703
+ # # 3
704
+ # # ]
705
+ def unique_counts
706
+ super
707
+ end
237
708
 
238
- # def entropy
239
- # end
709
+ # Computes the entropy.
710
+ #
711
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
712
+ #
713
+ # @param base [Float]
714
+ # Given base, defaults to `e`
715
+ # @param normalize [Boolean]
716
+ # Normalize pk if it doesn't sum to 1.
717
+ #
718
+ # @return [Float, nil]
719
+ #
720
+ # @example
721
+ # a = Polars::Series.new([0.99, 0.005, 0.005])
722
+ # a.entropy(normalize: true)
723
+ # # => 0.06293300616044681
724
+ #
725
+ # @example
726
+ # b = Polars::Series.new([0.65, 0.10, 0.25])
727
+ # b.entropy(normalize: true)
728
+ # # => 0.8568409950394724
729
+ def entropy(base: Math::E, normalize: false)
730
+ Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
731
+ end
240
732
 
241
733
  # def cumulative_eval
242
734
  # end
243
735
 
736
+ # Return a copy of the Series with a new alias/name.
737
+ #
738
+ # @param name [String]
739
+ # New name.
740
+ #
741
+ # @return [Series]
742
+ #
743
+ # @example
744
+ # s = Polars::Series.new("x", [1, 2, 3])
745
+ # s.alias("y")
244
746
  def alias(name)
245
747
  s = dup
246
748
  s._s.rename(name)
247
749
  s
248
750
  end
249
751
 
752
+ # Rename this Series.
753
+ #
754
+ # @param name [String]
755
+ # New name.
756
+ # @param in_place [Boolean]
757
+ # Modify the Series in-place.
758
+ #
759
+ # @return [Series]
760
+ #
761
+ # @example
762
+ # s = Polars::Series.new("a", [1, 2, 3])
763
+ # s.rename("b")
250
764
  def rename(name, in_place: false)
251
765
  if in_place
252
766
  _s.rename(name)
@@ -256,59 +770,365 @@ module Polars
256
770
  end
257
771
  end
258
772
 
773
+ # Get the length of each individual chunk.
774
+ #
775
+ # @return [Array]
776
+ #
777
+ # @example
778
+ # s = Polars::Series.new("a", [1, 2, 3])
779
+ # s2 = Polars::Series.new("b", [4, 5, 6])
780
+ #
781
+ # @example Concatenate Series with rechunk: true
782
+ # Polars.concat([s, s2]).chunk_lengths
783
+ # # => [6]
784
+ #
785
+ # @example Concatenate Series with rechunk: false
786
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
787
+ # # => [3, 3]
259
788
  def chunk_lengths
260
789
  _s.chunk_lengths
261
790
  end
262
791
 
792
+ # Get the number of chunks that this Series contains.
793
+ #
794
+ # @return [Integer]
795
+ #
796
+ # @example
797
+ # s = Polars::Series.new("a", [1, 2, 3])
798
+ # s2 = Polars::Series.new("b", [4, 5, 6])
799
+ #
800
+ # @example Concatenate Series with rechunk: true
801
+ # Polars.concat([s, s2]).n_chunks
802
+ # # => 1
803
+ #
804
+ # @example Concatenate Series with rechunk: false
805
+ # Polars.concat([s, s2], rechunk: false).n_chunks
806
+ # # => 2
263
807
  def n_chunks
264
808
  _s.n_chunks
265
809
  end
266
810
 
811
+ # Get an array with the cumulative sum computed at every element.
812
+ #
813
+ # @param reverse [Boolean]
814
+ # reverse the operation.
815
+ #
816
+ # @return [Series]
817
+ #
818
+ # @note
819
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
820
+ # `:i64` before summing to prevent overflow issues.
821
+ #
822
+ # @example
823
+ # s = Polars::Series.new("a", [1, 2, 3])
824
+ # s.cumsum
825
+ # # =>
826
+ # # shape: (3,)
827
+ # # Series: 'a' [i64]
828
+ # # [
829
+ # # 1
830
+ # # 3
831
+ # # 6
832
+ # # ]
267
833
  def cumsum(reverse: false)
268
- Utils.wrap_s(_s.cumsum(reverse))
834
+ super
269
835
  end
270
836
 
837
+ # Get an array with the cumulative min computed at every element.
838
+ #
839
+ # @param reverse [Boolean]
840
+ # reverse the operation.
841
+ #
842
+ # @return [Series]
843
+ #
844
+ # @example
845
+ # s = Polars::Series.new("a", [3, 5, 1])
846
+ # s.cummin
847
+ # # =>
848
+ # # shape: (3,)
849
+ # # Series: 'a' [i64]
850
+ # # [
851
+ # # 3
852
+ # # 3
853
+ # # 1
854
+ # # ]
271
855
  def cummin(reverse: false)
272
- Utils.wrap_s(_s.cummin(reverse))
856
+ super
273
857
  end
274
858
 
859
+ # Get an array with the cumulative max computed at every element.
860
+ #
861
+ # @param reverse [Boolean]
862
+ # reverse the operation.
863
+ #
864
+ # @return [Series]
865
+ #
866
+ # @example
867
+ # s = Polars::Series.new("a", [3, 5, 1])
868
+ # s.cummax
869
+ # # =>
870
+ # # shape: (3,)
871
+ # # Series: 'a' [i64]
872
+ # # [
873
+ # # 3
874
+ # # 5
875
+ # # 5
876
+ # # ]
275
877
  def cummax(reverse: false)
276
- Utils.wrap_s(_s.cummax(reverse))
878
+ super
277
879
  end
278
880
 
881
+ # Get an array with the cumulative product computed at every element.
882
+ #
883
+ # @param reverse [Boolean]
884
+ # reverse the operation.
885
+ #
886
+ # @return [Series]
887
+ #
888
+ # @note
889
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
890
+ # `:i64` before multiplying to prevent overflow issues.
891
+ #
892
+ # @example
893
+ # s = Polars::Series.new("a", [1, 2, 3])
894
+ # s.cumprod
895
+ # # =>
896
+ # # shape: (3,)
897
+ # # Series: 'a' [i64]
898
+ # # [
899
+ # # 1
900
+ # # 2
901
+ # # 6
902
+ # # ]
279
903
  def cumprod(reverse: false)
280
- Utils.wrap_s(_s.cumprod(reverse))
904
+ super
281
905
  end
282
906
 
907
+ # Get the first `n` rows.
908
+ #
909
+ # Alias for {#head}.
910
+ #
911
+ # @param n [Integer]
912
+ # Number of rows to return.
913
+ #
914
+ # @return [Series]
915
+ #
916
+ # @example
917
+ # s = Polars::Series.new("a", [1, 2, 3])
918
+ # s.limit(2)
919
+ # # =>
920
+ # # shape: (2,)
921
+ # # Series: 'a' [i64]
922
+ # # [
923
+ # # 1
924
+ # # 2
925
+ # # ]
283
926
  def limit(n = 10)
284
927
  to_frame.select(Utils.col(name).limit(n)).to_series
285
928
  end
286
929
 
930
+ # Get a slice of this Series.
931
+ #
932
+ # @param offset [Integer]
933
+ # Start index. Negative indexing is supported.
934
+ # @param length [Integer, nil]
935
+ # Length of the slice. If set to `nil`, all rows starting at the offset
936
+ # will be selected.
937
+ #
938
+ # @return [Series]
939
+ #
940
+ # @example
941
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
942
+ # s.slice(1, 2)
943
+ # # =>
944
+ # # shape: (2,)
945
+ # # Series: 'a' [i64]
946
+ # # [
947
+ # # 2
948
+ # # 3
949
+ # # ]
287
950
  def slice(offset, length = nil)
288
- length = len if length.nil?
289
- Utils.wrap_s(_s.slice(offset, length))
951
+ super
290
952
  end
291
953
 
292
- def append(other)
293
- _s.append(other._s)
954
+ # Append a Series to this one.
955
+ #
956
+ # @param other [Series]
957
+ # Series to append.
958
+ # @param append_chunks [Boolean]
959
+ # If set to `true` the append operation will add the chunks from `other` to
960
+ # self. This is super cheap.
961
+ #
962
+ # If set to `false` the append operation will do the same as
963
+ # {DataFrame#extend} which extends the memory backed by this Series with
964
+ # the values from `other`.
965
+ #
966
+ # Different from `append_chunks`, `extend` appends the data from `other` to
967
+ # the underlying memory locations and thus may cause a reallocation (which is
968
+ # expensive).
969
+ #
970
+ # If this does not cause a reallocation, the resulting data structure will not
971
+ # have any extra chunks and thus will yield faster queries.
972
+ #
973
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
974
+ # single append. For instance during online operations where you add `n` rows
975
+ # and rerun a query.
976
+ #
977
+ # Prefer `append_chunks` over `extend` when you want to append many times
978
+ # before doing a query. For instance, when you read in multiple files and when
979
+ # to store them in a single Series. In the latter case, finish the sequence
980
+ # of `append_chunks` operations with a `rechunk`.
981
+ #
982
+ # @return [Series]
983
+ #
984
+ # @example
985
+ # s = Polars::Series.new("a", [1, 2, 3])
986
+ # s2 = Polars::Series.new("b", [4, 5, 6])
987
+ # s.append(s2)
988
+ # # =>
989
+ # # shape: (6,)
990
+ # # Series: 'a' [i64]
991
+ # # [
992
+ # # 1
993
+ # # 2
994
+ # # 3
995
+ # # 4
996
+ # # 5
997
+ # # 6
998
+ # # ]
999
+ def append(other, append_chunks: true)
1000
+ begin
1001
+ if append_chunks
1002
+ _s.append(other._s)
1003
+ else
1004
+ _s.extend(other._s)
1005
+ end
1006
+ rescue => e
1007
+ if e.message == "Already mutably borrowed"
1008
+ append(other.clone, append_chunks)
1009
+ else
1010
+ raise e
1011
+ end
1012
+ end
294
1013
  self
295
1014
  end
296
1015
 
1016
+ # Filter elements by a boolean mask.
1017
+ #
1018
+ # @param predicate [Series, Array]
1019
+ # Boolean mask.
1020
+ #
1021
+ # @return [Series]
1022
+ #
1023
+ # @example
1024
+ # s = Polars::Series.new("a", [1, 2, 3])
1025
+ # mask = Polars::Series.new("", [true, false, true])
1026
+ # s.filter(mask)
1027
+ # # =>
1028
+ # # shape: (2,)
1029
+ # # Series: 'a' [i64]
1030
+ # # [
1031
+ # # 1
1032
+ # # 3
1033
+ # # ]
297
1034
  def filter(predicate)
1035
+ if predicate.is_a?(Array)
1036
+ predicate = Series.new("", predicate)
1037
+ end
298
1038
  Utils.wrap_s(_s.filter(predicate._s))
299
1039
  end
300
1040
 
1041
+ # Get the first `n` rows.
1042
+ #
1043
+ # @param n [Integer]
1044
+ # Number of rows to return.
1045
+ #
1046
+ # @return [Series]
1047
+ #
1048
+ # @example
1049
+ # s = Polars::Series.new("a", [1, 2, 3])
1050
+ # s.head(2)
1051
+ # # =>
1052
+ # # shape: (2,)
1053
+ # # Series: 'a' [i64]
1054
+ # # [
1055
+ # # 1
1056
+ # # 2
1057
+ # # ]
301
1058
  def head(n = 10)
302
1059
  to_frame.select(Utils.col(name).head(n)).to_series
303
1060
  end
304
1061
 
1062
+ # Get the last `n` rows.
1063
+ #
1064
+ # @param n [Integer]
1065
+ # Number of rows to return.
1066
+ #
1067
+ # @return [Series]
1068
+ #
1069
+ # @example
1070
+ # s = Polars::Series.new("a", [1, 2, 3])
1071
+ # s.tail(2)
1072
+ # # =>
1073
+ # # shape: (2,)
1074
+ # # Series: 'a' [i64]
1075
+ # # [
1076
+ # # 2
1077
+ # # 3
1078
+ # # ]
305
1079
  def tail(n = 10)
306
1080
  to_frame.select(Utils.col(name).tail(n)).to_series
307
1081
  end
308
1082
 
309
- # def take_every
310
- # end
1083
+ # Take every nth value in the Series and return as new Series.
1084
+ #
1085
+ # @return [Series]
1086
+ #
1087
+ # @example
1088
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1089
+ # s.take_every(2)
1090
+ # # =>
1091
+ # # shape: (2,)
1092
+ # # Series: 'a' [i64]
1093
+ # # [
1094
+ # # 1
1095
+ # # 3
1096
+ # # ]
1097
+ def take_every(n)
1098
+ super
1099
+ end
311
1100
 
1101
+ # Sort this Series.
1102
+ #
1103
+ # @param reverse [Boolean]
1104
+ # Reverse sort.
1105
+ # @param in_place [Boolean]
1106
+ # Sort in place.
1107
+ #
1108
+ # @return [Series]
1109
+ #
1110
+ # @example
1111
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
1112
+ # s.sort
1113
+ # # =>
1114
+ # # shape: (4,)
1115
+ # # Series: 'a' [i64]
1116
+ # # [
1117
+ # # 1
1118
+ # # 2
1119
+ # # 3
1120
+ # # 4
1121
+ # # ]
1122
+ # s.sort(reverse: true)
1123
+ # # =>
1124
+ # # shape: (4,)
1125
+ # # Series: 'a' [i64]
1126
+ # # [
1127
+ # # 4
1128
+ # # 3
1129
+ # # 2
1130
+ # # 1
1131
+ # # ]
312
1132
  def sort(reverse: false, in_place: false)
313
1133
  if in_place
314
1134
  self._s = _s.sort(reverse)
@@ -318,128 +1138,561 @@ module Polars
318
1138
  end
319
1139
  end
320
1140
 
321
- # def top_k
322
- # end
1141
+ # Return the `k` largest elements.
1142
+ #
1143
+ # If `reverse: true`, the smallest elements will be given.
1144
+ #
1145
+ # @param k [Integer]
1146
+ # Number of elements to return.
1147
+ # @param reverse [Boolean]
1148
+ # Return the smallest elements.
1149
+ #
1150
+ # @return [Boolean]
1151
+ def top_k(k: 5, reverse: false)
1152
+ super
1153
+ end
323
1154
 
324
- # def arg_sort
325
- # end
1155
+ # Get the index values that would sort this Series.
1156
+ #
1157
+ # @param reverse [Boolean]
1158
+ # Sort in reverse (descending) order.
1159
+ # @param nulls_last [Boolean]
1160
+ # Place null values last instead of first.
1161
+ #
1162
+ # @return [Series]
1163
+ #
1164
+ # @example
1165
+ # s = Polars::Series.new("a", [5, 3, 4, 1, 2])
1166
+ # s.arg_sort
1167
+ # # =>
1168
+ # # shape: (5,)
1169
+ # # Series: 'a' [u32]
1170
+ # # [
1171
+ # # 3
1172
+ # # 4
1173
+ # # 1
1174
+ # # 2
1175
+ # # 0
1176
+ # # ]
1177
+ def arg_sort(reverse: false, nulls_last: false)
1178
+ super
1179
+ end
326
1180
 
327
- # def argsort
328
- # end
1181
+ # Get the index values that would sort this Series.
1182
+ #
1183
+ # Alias for {#arg_sort}.
1184
+ #
1185
+ # @param reverse [Boolean]
1186
+ # Sort in reverse (descending) order.
1187
+ # @param nulls_last [Boolean]
1188
+ # Place null values last instead of first.
1189
+ #
1190
+ # @return [Series]
1191
+ def argsort(reverse: false, nulls_last: false)
1192
+ super
1193
+ end
329
1194
 
330
- # def arg_unique
331
- # end
1195
+ # Get unique index as Series.
1196
+ #
1197
+ # @return [Series]
1198
+ #
1199
+ # @example
1200
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1201
+ # s.arg_unique
1202
+ # # =>
1203
+ # # shape: (3,)
1204
+ # # Series: 'a' [u32]
1205
+ # # [
1206
+ # # 0
1207
+ # # 1
1208
+ # # 3
1209
+ # # ]
1210
+ def arg_unique
1211
+ super
1212
+ end
332
1213
 
1214
+ # Get the index of the minimal value.
1215
+ #
1216
+ # @return [Integer, nil]
1217
+ #
1218
+ # @example
1219
+ # s = Polars::Series.new("a", [3, 2, 1])
1220
+ # s.arg_min
1221
+ # # => 2
333
1222
  def arg_min
334
1223
  _s.arg_min
335
1224
  end
336
1225
 
1226
+ # Get the index of the maximal value.
1227
+ #
1228
+ # @return [Integer, nil]
1229
+ #
1230
+ # @example
1231
+ # s = Polars::Series.new("a", [3, 2, 1])
1232
+ # s.arg_max
1233
+ # # => 0
337
1234
  def arg_max
338
1235
  _s.arg_max
339
1236
  end
340
1237
 
341
- # def search_sorted
342
- # end
343
-
344
- # def unique
345
- # end
346
-
347
- # def take
348
- # end
1238
+ # Find indices where elements should be inserted to maintain order.
1239
+ #
1240
+ # @param element [Object]
1241
+ # Expression or scalar value.
1242
+ #
1243
+ # @return [Integer]
1244
+ def search_sorted(element)
1245
+ Polars.select(Polars.lit(self).search_sorted(element))[0, 0]
1246
+ end
1247
+
1248
+ # Get unique elements in series.
1249
+ #
1250
+ # @param maintain_order [Boolean]
1251
+ # Maintain order of data. This requires more work.
1252
+ #
1253
+ # @return [Series]
1254
+ #
1255
+ # @example
1256
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1257
+ # s.unique.sort
1258
+ # # =>
1259
+ # # shape: (3,)
1260
+ # # Series: 'a' [i64]
1261
+ # # [
1262
+ # # 1
1263
+ # # 2
1264
+ # # 3
1265
+ # # ]
1266
+ def unique(maintain_order: false)
1267
+ super
1268
+ end
349
1269
 
1270
+ # Take values by index.
1271
+ #
1272
+ # @param indices [Array]
1273
+ # Index location used for selection.
1274
+ #
1275
+ # @return [Series]
1276
+ #
1277
+ # @example
1278
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1279
+ # s.take([1, 3])
1280
+ # # =>
1281
+ # # shape: (2,)
1282
+ # # Series: 'a' [i64]
1283
+ # # [
1284
+ # # 2
1285
+ # # 4
1286
+ # # ]
1287
+ def take(indices)
1288
+ to_frame.select(Polars.col(name).take(indices)).to_series
1289
+ end
1290
+
1291
+ # Count the null values in this Series.
1292
+ #
1293
+ # @return [Integer]
350
1294
  def null_count
351
1295
  _s.null_count
352
1296
  end
353
1297
 
1298
+ # Return `true` if the Series has a validity bitmask.
1299
+ #
1300
+ # If there is none, it means that there are no null values.
1301
+ # Use this to swiftly assert a Series does not have null values.
1302
+ #
1303
+ # @return [Boolean]
354
1304
  def has_validity
355
1305
  _s.has_validity
356
1306
  end
357
1307
 
1308
+ # Check if the Series is empty.
1309
+ #
1310
+ # @return [Boolean]
1311
+ #
1312
+ # @example
1313
+ # s = Polars::Series.new("a", [])
1314
+ # s.is_empty
1315
+ # # => true
358
1316
  def is_empty
359
1317
  len == 0
360
1318
  end
361
1319
  alias_method :empty?, :is_empty
362
1320
 
363
- # def is_null
364
- # end
1321
+ # Returns a boolean Series indicating which values are null.
1322
+ #
1323
+ # @return [Series]
1324
+ #
1325
+ # @example
1326
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1327
+ # s.is_null
1328
+ # # =>
1329
+ # # shape: (4,)
1330
+ # # Series: 'a' [bool]
1331
+ # # [
1332
+ # # false
1333
+ # # false
1334
+ # # false
1335
+ # # true
1336
+ # # ]
1337
+ def is_null
1338
+ super
1339
+ end
365
1340
 
366
- # def is_not_null
367
- # end
1341
+ # Returns a boolean Series indicating which values are not null.
1342
+ #
1343
+ # @return [Series]
1344
+ #
1345
+ # @example
1346
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1347
+ # s.is_not_null
1348
+ # # =>
1349
+ # # shape: (4,)
1350
+ # # Series: 'a' [bool]
1351
+ # # [
1352
+ # # true
1353
+ # # true
1354
+ # # true
1355
+ # # false
1356
+ # # ]
1357
+ def is_not_null
1358
+ super
1359
+ end
368
1360
 
369
- # def is_finite
370
- # end
1361
+ # Returns a boolean Series indicating which values are finite.
1362
+ #
1363
+ # @return [Series]
1364
+ #
1365
+ # @example
1366
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1367
+ # s.is_finite
1368
+ # # =>
1369
+ # # shape: (3,)
1370
+ # # Series: 'a' [bool]
1371
+ # # [
1372
+ # # true
1373
+ # # true
1374
+ # # false
1375
+ # # ]
1376
+ def is_finite
1377
+ super
1378
+ end
371
1379
 
372
- # def is_infinite
373
- # end
1380
+ # Returns a boolean Series indicating which values are infinite.
1381
+ #
1382
+ # @return [Series]
1383
+ #
1384
+ # @example
1385
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1386
+ # s.is_infinite
1387
+ # # =>
1388
+ # # shape: (3,)
1389
+ # # Series: 'a' [bool]
1390
+ # # [
1391
+ # # false
1392
+ # # false
1393
+ # # true
1394
+ # # ]
1395
+ def is_infinite
1396
+ super
1397
+ end
374
1398
 
375
- # def is_nan
376
- # end
1399
+ # Returns a boolean Series indicating which values are NaN.
1400
+ #
1401
+ # @return [Series]
1402
+ #
1403
+ # @example
1404
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1405
+ # s.is_nan
1406
+ # # =>
1407
+ # # shape: (4,)
1408
+ # # Series: 'a' [bool]
1409
+ # # [
1410
+ # # false
1411
+ # # false
1412
+ # # false
1413
+ # # true
1414
+ # # ]
1415
+ def is_nan
1416
+ super
1417
+ end
377
1418
 
378
- # def is_not_nan
379
- # end
1419
+ # Returns a boolean Series indicating which values are not NaN.
1420
+ #
1421
+ # @return [Series]
1422
+ #
1423
+ # @example
1424
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1425
+ # s.is_not_nan
1426
+ # # =>
1427
+ # # shape: (4,)
1428
+ # # Series: 'a' [bool]
1429
+ # # [
1430
+ # # true
1431
+ # # true
1432
+ # # true
1433
+ # # false
1434
+ # # ]
1435
+ def is_not_nan
1436
+ super
1437
+ end
380
1438
 
381
1439
  # def is_in
382
1440
  # end
383
1441
 
384
- # def arg_true
385
- # end
386
-
387
- # def is_unique
388
- # end
1442
+ # Get index values where Boolean Series evaluate `true`.
1443
+ #
1444
+ # @return [Series]
1445
+ #
1446
+ # @example
1447
+ # s = Polars::Series.new("a", [1, 2, 3])
1448
+ # (s == 2).arg_true
1449
+ # # =>
1450
+ # # shape: (1,)
1451
+ # # Series: 'a' [u32]
1452
+ # # [
1453
+ # # 1
1454
+ # # ]
1455
+ def arg_true
1456
+ Polars.arg_where(self, eager: true)
1457
+ end
1458
+
1459
+ # Get mask of all unique values.
1460
+ #
1461
+ # @return [Series]
1462
+ #
1463
+ # @example
1464
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1465
+ # s.is_unique
1466
+ # # =>
1467
+ # # shape: (4,)
1468
+ # # Series: 'a' [bool]
1469
+ # # [
1470
+ # # true
1471
+ # # false
1472
+ # # false
1473
+ # # true
1474
+ # # ]
1475
+ def is_unique
1476
+ super
1477
+ end
389
1478
 
390
- # def is_first
391
- # end
1479
+ # Get a mask of the first unique value.
1480
+ #
1481
+ # @return [Series]
1482
+ def is_first
1483
+ super
1484
+ end
392
1485
 
393
- # def is_duplicated
394
- # end
1486
+ # Get mask of all duplicated values.
1487
+ #
1488
+ # @return [Series]
1489
+ #
1490
+ # @example
1491
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1492
+ # s.is_duplicated
1493
+ # # =>
1494
+ # # shape: (4,)
1495
+ # # Series: 'a' [bool]
1496
+ # # [
1497
+ # # false
1498
+ # # true
1499
+ # # true
1500
+ # # false
1501
+ # # ]
1502
+ def is_duplicated
1503
+ super
1504
+ end
395
1505
 
396
- # def explode
397
- # end
1506
+ # Explode a list or utf8 Series.
1507
+ #
1508
+ # This means that every item is expanded to a new row.
1509
+ #
1510
+ # @return [Series]
1511
+ #
1512
+ # @example
1513
+ # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
1514
+ # s.explode
1515
+ # # =>
1516
+ # # shape: (6,)
1517
+ # # Series: 'a' [i64]
1518
+ # # [
1519
+ # # 1
1520
+ # # 2
1521
+ # # 3
1522
+ # # 4
1523
+ # # 9
1524
+ # # 10
1525
+ # # ]
1526
+ def explode
1527
+ super
1528
+ end
398
1529
 
1530
+ # Check if series is equal with another Series.
1531
+ #
1532
+ # @param other [Series]
1533
+ # Series to compare with.
1534
+ # @param null_equal [Boolean]
1535
+ # Consider null values as equal.
1536
+ # @param strict [Boolean]
1537
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
1538
+ # `:i64` will return `false`.
1539
+ #
1540
+ # @return [Boolean]
1541
+ #
1542
+ # @example
1543
+ # s = Polars::Series.new("a", [1, 2, 3])
1544
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1545
+ # s.series_equal(s)
1546
+ # # => true
1547
+ # s.series_equal(s2)
1548
+ # # => false
399
1549
  def series_equal(other, null_equal: false, strict: false)
400
1550
  _s.series_equal(other._s, null_equal, strict)
401
1551
  end
402
1552
 
1553
+ # Length of this Series.
1554
+ #
1555
+ # @return [Integer]
1556
+ #
1557
+ # @example
1558
+ # s = Polars::Series.new("a", [1, 2, 3])
1559
+ # s.len
1560
+ # # => 3
403
1561
  def len
404
1562
  _s.len
405
1563
  end
406
-
407
- # def cast
408
- # end
1564
+ alias_method :length, :len
1565
+
1566
+ # Cast between data types.
1567
+ #
1568
+ # @param dtype [Symbol]
1569
+ # DataType to cast to
1570
+ # @param strict [Boolean]
1571
+ # Throw an error if a cast could not be done for instance due to an overflow
1572
+ #
1573
+ # @return [Series]
1574
+ #
1575
+ # @example
1576
+ # s = Polars::Series.new("a", [true, false, true])
1577
+ # s.cast(:u32)
1578
+ # # =>
1579
+ # # shape: (3,)
1580
+ # # Series: 'a' [u32]
1581
+ # # [
1582
+ # # 1
1583
+ # # 0
1584
+ # # 1
1585
+ # # ]
1586
+ def cast(dtype, strict: true)
1587
+ super
1588
+ end
409
1589
 
410
1590
  # def to_physical
411
1591
  # end
412
1592
 
1593
+ # Convert this Series to a Ruby Array. This operation clones data.
1594
+ #
1595
+ # @return [Array]
1596
+ #
1597
+ # @example
1598
+ # s = Polars::Series.new("a", [1, 2, 3])
1599
+ # s.to_a
1600
+ # # => [1, 2, 3]
413
1601
  def to_a
414
1602
  _s.to_a
415
1603
  end
416
1604
 
1605
+ # Create a single chunk of memory for this Series.
1606
+ #
1607
+ # @param in_place [Boolean]
1608
+ # In place or not.
1609
+ #
1610
+ # @return [Series]
417
1611
  def rechunk(in_place: false)
418
1612
  opt_s = _s.rechunk(in_place)
419
1613
  in_place ? self : Utils.wrap_s(opt_s)
420
1614
  end
421
1615
 
422
- # def reverse
423
- # end
1616
+ # Return Series in reverse order.
1617
+ #
1618
+ # @return [Series]
1619
+ #
1620
+ # @example
1621
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
1622
+ # s.reverse
1623
+ # # =>
1624
+ # # shape: (3,)
1625
+ # # Series: 'a' [i8]
1626
+ # # [
1627
+ # # 3
1628
+ # # 2
1629
+ # # 1
1630
+ # # ]
1631
+ def reverse
1632
+ super
1633
+ end
424
1634
 
1635
+ # Check if this Series datatype is numeric.
1636
+ #
1637
+ # @return [Boolean]
1638
+ #
1639
+ # @example
1640
+ # s = Polars::Series.new("a", [1, 2, 3])
1641
+ # s.is_numeric
1642
+ # # => true
425
1643
  def is_numeric
426
1644
  [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
427
1645
  end
428
1646
  alias_method :numeric?, :is_numeric
429
1647
 
430
- # def is_datelike
431
- # end
432
-
1648
+ # Check if this Series datatype is datelike.
1649
+ #
1650
+ # @return [Boolean]
1651
+ #
1652
+ # @example
1653
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
1654
+ # s.is_datelike
1655
+ # # => true
1656
+ def is_datelike
1657
+ [:date, :datetime, :duration, :time].include?(dtype)
1658
+ end
1659
+
1660
+ # Check if this Series has floating point numbers.
1661
+ #
1662
+ # @return [Boolean]
1663
+ #
1664
+ # @example
1665
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
1666
+ # s.is_float
1667
+ # # => true
433
1668
  def is_float
434
1669
  [:f32, :f64].include?(dtype)
435
1670
  end
436
1671
  alias_method :float?, :is_float
437
1672
 
438
- def is_bool
1673
+ # Check if this Series is a Boolean.
1674
+ #
1675
+ # @return [Boolean]
1676
+ #
1677
+ # @example
1678
+ # s = Polars::Series.new("a", [true, false, true])
1679
+ # s.is_boolean
1680
+ # # => true
1681
+ def is_boolean
439
1682
  dtype == :bool
440
1683
  end
441
- alias_method :bool?, :is_bool
442
-
1684
+ alias_method :boolean?, :is_boolean
1685
+ alias_method :is_bool, :is_boolean
1686
+ alias_method :bool?, :is_boolean
1687
+
1688
+ # Check if this Series datatype is a Utf8.
1689
+ #
1690
+ # @return [Boolean]
1691
+ #
1692
+ # @example
1693
+ # s = Polars::Series.new("x", ["a", "b", "c"])
1694
+ # s.is_utf8
1695
+ # # => true
443
1696
  def is_utf8
444
1697
  dtype == :str
445
1698
  end
@@ -454,89 +1707,575 @@ module Polars
454
1707
  # def set
455
1708
  # end
456
1709
 
457
- # def set_at_idx
458
- # end
1710
+ # Set values at the index locations.
1711
+ #
1712
+ # @param idx [Object]
1713
+ # Integers representing the index locations.
1714
+ # @param value [Object]
1715
+ # Replacement values.
1716
+ #
1717
+ # @return [Series]
1718
+ #
1719
+ # @example
1720
+ # s = Polars::Series.new("a", [1, 2, 3])
1721
+ # s.set_at_idx(1, 10)
1722
+ # # =>
1723
+ # # shape: (3,)
1724
+ # # Series: 'a' [i64]
1725
+ # # [
1726
+ # # 1
1727
+ # # 10
1728
+ # # 3
1729
+ # # ]
1730
+ def set_at_idx(idx, value)
1731
+ if idx.is_a?(Integer)
1732
+ idx = [idx]
1733
+ end
1734
+ if idx.length == 0
1735
+ return self
1736
+ end
459
1737
 
460
- # def cleared
461
- # end
1738
+ idx = Series.new("", idx)
1739
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
1740
+ value = Series.new("", [value])
1741
+
1742
+ # if we need to set more than a single value, we extend it
1743
+ if idx.length > 0
1744
+ value = value.extend_constant(value[0], idx.length - 1)
1745
+ end
1746
+ elsif !value.is_a?(Series)
1747
+ value = Series.new("", value)
1748
+ end
1749
+ _s.set_at_idx(idx._s, value._s)
1750
+ self
1751
+ end
1752
+
1753
+ # Create an empty copy of the current Series.
1754
+ #
1755
+ # The copy has identical name/dtype but no data.
1756
+ #
1757
+ # @return [Series]
1758
+ #
1759
+ # @example
1760
+ # s = Polars::Series.new("a", [nil, true, false])
1761
+ # s.cleared
1762
+ # # =>
1763
+ # # shape: (0,)
1764
+ # # Series: 'a' [bool]
1765
+ # # [
1766
+ # # ]
1767
+ def cleared
1768
+ len > 0 ? limit(0) : clone
1769
+ end
462
1770
 
463
1771
  # clone handled by initialize_copy
464
1772
 
465
- # def fill_nan
466
- # end
1773
+ # Fill floating point NaN value with a fill value.
1774
+ #
1775
+ # @param fill_value [Object]
1776
+ # Value used to fill nan values.
1777
+ #
1778
+ # @return [Series]
1779
+ #
1780
+ # @example
1781
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1782
+ # s.fill_nan(0)
1783
+ # # =>
1784
+ # # shape: (4,)
1785
+ # # Series: 'a' [f64]
1786
+ # # [
1787
+ # # 1.0
1788
+ # # 2.0
1789
+ # # 3.0
1790
+ # # 0.0
1791
+ # # ]
1792
+ def fill_nan(fill_value)
1793
+ super
1794
+ end
467
1795
 
468
- # def fill_null
469
- # end
1796
+ # Fill null values using the specified value or strategy.
1797
+ #
1798
+ # @param value [Object]
1799
+ # Value used to fill null values.
1800
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
1801
+ # Strategy used to fill null values.
1802
+ # @param limit
1803
+ # Number of consecutive null values to fill when using the "forward" or
1804
+ # "backward" strategy.
1805
+ #
1806
+ # @return [Series]
1807
+ #
1808
+ # @example
1809
+ # s = Polars::Series.new("a", [1, 2, 3, nil])
1810
+ # s.fill_null(strategy: "forward")
1811
+ # # =>
1812
+ # # shape: (4,)
1813
+ # # Series: 'a' [i64]
1814
+ # # [
1815
+ # # 1
1816
+ # # 2
1817
+ # # 3
1818
+ # # 3
1819
+ # # ]
1820
+ #
1821
+ # @example
1822
+ # s.fill_null(strategy: "min")
1823
+ # # =>
1824
+ # # shape: (4,)
1825
+ # # Series: 'a' [i64]
1826
+ # # [
1827
+ # # 1
1828
+ # # 2
1829
+ # # 3
1830
+ # # 1
1831
+ # # ]
1832
+ #
1833
+ # @example
1834
+ # s = Polars::Series.new("b", ["x", nil, "z"])
1835
+ # s.fill_null(Polars.lit(""))
1836
+ # # =>
1837
+ # # shape: (3,)
1838
+ # # Series: 'b' [str]
1839
+ # # [
1840
+ # # "x"
1841
+ # # ""
1842
+ # # "z"
1843
+ # # ]
1844
+ def fill_null(value = nil, strategy: nil, limit: nil)
1845
+ super
1846
+ end
470
1847
 
1848
+ # Rounds down to the nearest integer value.
1849
+ #
1850
+ # Only works on floating point Series.
1851
+ #
1852
+ # @return [Series]
1853
+ #
1854
+ # @example
1855
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1856
+ # s.floor
1857
+ # # =>
1858
+ # # shape: (3,)
1859
+ # # Series: 'a' [f64]
1860
+ # # [
1861
+ # # 1.0
1862
+ # # 2.0
1863
+ # # 3.0
1864
+ # # ]
471
1865
  def floor
472
1866
  Utils.wrap_s(_s.floor)
473
1867
  end
474
1868
 
1869
+ # Rounds up to the nearest integer value.
1870
+ #
1871
+ # Only works on floating point Series.
1872
+ #
1873
+ # @return [Series]
1874
+ #
1875
+ # @example
1876
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1877
+ # s.ceil
1878
+ # # =>
1879
+ # # shape: (3,)
1880
+ # # Series: 'a' [f64]
1881
+ # # [
1882
+ # # 2.0
1883
+ # # 3.0
1884
+ # # 4.0
1885
+ # # ]
475
1886
  def ceil
476
- Utils.wrap_s(_s.ceil)
1887
+ super
477
1888
  end
478
1889
 
479
- # default to 0 like Ruby
1890
+ # Round underlying floating point data by `decimals` digits.
1891
+ #
1892
+ # @param decimals [Integer]
1893
+ # number of decimals to round by.
1894
+ #
1895
+ # @return [Series]
1896
+ #
1897
+ # @example
1898
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1899
+ # s.round(2)
1900
+ # # =>
1901
+ # # shape: (3,)
1902
+ # # Series: 'a' [f64]
1903
+ # # [
1904
+ # # 1.12
1905
+ # # 2.57
1906
+ # # 3.9
1907
+ # # ]
480
1908
  def round(decimals = 0)
481
- Utils.wrap_s(_s.round(decimals))
1909
+ super
482
1910
  end
483
1911
 
484
1912
  # def dot
485
1913
  # end
486
1914
 
487
- # def mode
488
- # end
1915
+ # Compute the most occurring value(s).
1916
+ #
1917
+ # Can return multiple Values.
1918
+ #
1919
+ # @return [Series]
1920
+ #
1921
+ # @example
1922
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1923
+ # s.mode
1924
+ # # =>
1925
+ # # shape: (1,)
1926
+ # # Series: 'a' [i64]
1927
+ # # [
1928
+ # # 2
1929
+ # # ]
1930
+ def mode
1931
+ super
1932
+ end
489
1933
 
490
- # def sign
491
- # end
1934
+ # Compute the element-wise indication of the sign.
1935
+ #
1936
+ # @return [Series]
1937
+ #
1938
+ # @example
1939
+ # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
1940
+ # s.sign
1941
+ # # =>
1942
+ # # shape: (5,)
1943
+ # # Series: 'a' [i64]
1944
+ # # [
1945
+ # # -1
1946
+ # # 0
1947
+ # # 0
1948
+ # # 1
1949
+ # # null
1950
+ # # ]
1951
+ def sign
1952
+ super
1953
+ end
492
1954
 
493
- # def sin
494
- # end
1955
+ # Compute the element-wise value for the sine.
1956
+ #
1957
+ # @return [Series]
1958
+ #
1959
+ # @example
1960
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
1961
+ # s.sin
1962
+ # # =>
1963
+ # # shape: (3,)
1964
+ # # Series: 'a' [f64]
1965
+ # # [
1966
+ # # 0.0
1967
+ # # 1.0
1968
+ # # 1.2246e-16
1969
+ # # ]
1970
+ def sin
1971
+ super
1972
+ end
495
1973
 
496
- # def cos
497
- # end
1974
+ # Compute the element-wise value for the cosine.
1975
+ #
1976
+ # @return [Series]
1977
+ #
1978
+ # @example
1979
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
1980
+ # s.cos
1981
+ # # =>
1982
+ # # shape: (3,)
1983
+ # # Series: 'a' [f64]
1984
+ # # [
1985
+ # # 1.0
1986
+ # # 6.1232e-17
1987
+ # # -1.0
1988
+ # # ]
1989
+ def cos
1990
+ super
1991
+ end
498
1992
 
499
- # def tan
500
- # end
1993
+ # Compute the element-wise value for the tangent.
1994
+ #
1995
+ # @return [Series]
1996
+ #
1997
+ # @example
1998
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
1999
+ # s.tan
2000
+ # # =>
2001
+ # # shape: (3,)
2002
+ # # Series: 'a' [f64]
2003
+ # # [
2004
+ # # 0.0
2005
+ # # 1.6331e16
2006
+ # # -1.2246e-16
2007
+ # # ]
2008
+ def tan
2009
+ super
2010
+ end
501
2011
 
502
- # def arcsin
503
- # end
2012
+ # Compute the element-wise value for the inverse sine.
2013
+ #
2014
+ # @return [Series]
2015
+ #
2016
+ # @example
2017
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2018
+ # s.arcsin
2019
+ # # =>
2020
+ # # shape: (3,)
2021
+ # # Series: 'a' [f64]
2022
+ # # [
2023
+ # # 1.570796
2024
+ # # 0.0
2025
+ # # -1.570796
2026
+ # # ]
2027
+ def arcsin
2028
+ super
2029
+ end
504
2030
 
505
- # def arccos
506
- # end
2031
+ # Compute the element-wise value for the inverse cosine.
2032
+ #
2033
+ # @return [Series]
2034
+ #
2035
+ # @example
2036
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2037
+ # s.arccos
2038
+ # # =>
2039
+ # # shape: (3,)
2040
+ # # Series: 'a' [f64]
2041
+ # # [
2042
+ # # 0.0
2043
+ # # 1.570796
2044
+ # # 3.141593
2045
+ # # ]
2046
+ def arccos
2047
+ super
2048
+ end
507
2049
 
508
- # def arctan
509
- # end
2050
+ # Compute the element-wise value for the inverse tangent.
2051
+ #
2052
+ # @return [Series]
2053
+ #
2054
+ # @example
2055
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2056
+ # s.arctan
2057
+ # # =>
2058
+ # # shape: (3,)
2059
+ # # Series: 'a' [f64]
2060
+ # # [
2061
+ # # 0.785398
2062
+ # # 0.0
2063
+ # # -0.785398
2064
+ # # ]
2065
+ def arctan
2066
+ super
2067
+ end
510
2068
 
511
- # def arcsinh
512
- # end
2069
+ # Compute the element-wise value for the inverse hyperbolic sine.
2070
+ #
2071
+ # @return [Series]
2072
+ #
2073
+ # @example
2074
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2075
+ # s.arcsinh
2076
+ # # =>
2077
+ # # shape: (3,)
2078
+ # # Series: 'a' [f64]
2079
+ # # [
2080
+ # # 0.881374
2081
+ # # 0.0
2082
+ # # -0.881374
2083
+ # # ]
2084
+ def arcsinh
2085
+ super
2086
+ end
513
2087
 
514
- # def arccosh
515
- # end
2088
+ # Compute the element-wise value for the inverse hyperbolic cosine.
2089
+ #
2090
+ # @return [Series]
2091
+ #
2092
+ # @example
2093
+ # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
2094
+ # s.arccosh
2095
+ # # =>
2096
+ # # shape: (4,)
2097
+ # # Series: 'a' [f64]
2098
+ # # [
2099
+ # # 2.292432
2100
+ # # 0.0
2101
+ # # NaN
2102
+ # # NaN
2103
+ # # ]
2104
+ def arccosh
2105
+ super
2106
+ end
516
2107
 
517
- # def arctanh
518
- # end
2108
+ # Compute the element-wise value for the inverse hyperbolic tangent.
2109
+ #
2110
+ # @return [Series]
2111
+ #
2112
+ # @example
2113
+ # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
2114
+ # s.arctanh
2115
+ # # =>
2116
+ # # shape: (7,)
2117
+ # # Series: 'a' [f64]
2118
+ # # [
2119
+ # # NaN
2120
+ # # inf
2121
+ # # 0.549306
2122
+ # # 0.0
2123
+ # # -0.549306
2124
+ # # -inf
2125
+ # # NaN
2126
+ # # ]
2127
+ def arctanh
2128
+ super
2129
+ end
519
2130
 
520
- # def sinh
521
- # end
2131
+ # Compute the element-wise value for the hyperbolic sine.
2132
+ #
2133
+ # @return [Series]
2134
+ #
2135
+ # @example
2136
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2137
+ # s.sinh
2138
+ # # =>
2139
+ # # shape: (3,)
2140
+ # # Series: 'a' [f64]
2141
+ # # [
2142
+ # # 1.175201
2143
+ # # 0.0
2144
+ # # -1.175201
2145
+ # # ]
2146
+ def sinh
2147
+ super
2148
+ end
522
2149
 
523
- # def cosh
524
- # end
2150
+ # Compute the element-wise value for the hyperbolic cosine.
2151
+ #
2152
+ # @return [Series]
2153
+ #
2154
+ # @example
2155
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2156
+ # s.cosh
2157
+ # # =>
2158
+ # # shape: (3,)
2159
+ # # Series: 'a' [f64]
2160
+ # # [
2161
+ # # 1.543081
2162
+ # # 1.0
2163
+ # # 1.543081
2164
+ # # ]
2165
+ def cosh
2166
+ super
2167
+ end
525
2168
 
526
- # def tanh
527
- # end
2169
+ # Compute the element-wise value for the hyperbolic tangent.
2170
+ #
2171
+ # @return [Series]
2172
+ #
2173
+ # @example
2174
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2175
+ # s.tanh
2176
+ # # =>
2177
+ # # shape: (3,)
2178
+ # # Series: 'a' [f64]
2179
+ # # [
2180
+ # # 0.761594
2181
+ # # 0.0
2182
+ # # -0.761594
2183
+ # # ]
2184
+ def tanh
2185
+ super
2186
+ end
528
2187
 
529
2188
  # def apply
530
2189
  # end
531
2190
 
532
- # def shift
533
- # end
2191
+ # Shift the values by a given period.
2192
+ #
2193
+ # @param periods [Integer]
2194
+ # Number of places to shift (may be negative).
2195
+ #
2196
+ # @return [Series]
2197
+ #
2198
+ # @example
2199
+ # s = Polars::Series.new("a", [1, 2, 3])
2200
+ # s.shift(1)
2201
+ # # =>
2202
+ # # shape: (3,)
2203
+ # # Series: 'a' [i64]
2204
+ # # [
2205
+ # # null
2206
+ # # 1
2207
+ # # 2
2208
+ # # ]
2209
+ #
2210
+ # @example
2211
+ # s.shift(-1)
2212
+ # # =>
2213
+ # # shape: (3,)
2214
+ # # Series: 'a' [i64]
2215
+ # # [
2216
+ # # 2
2217
+ # # 3
2218
+ # # null
2219
+ # # ]
2220
+ def shift(periods = 1)
2221
+ super
2222
+ end
534
2223
 
535
- # def shift_and_fill
536
- # end
2224
+ # Shift the values by a given period and fill the resulting null values.
2225
+ #
2226
+ # @param periods [Integer]
2227
+ # Number of places to shift (may be negative).
2228
+ # @param fill_value [Object]
2229
+ # Fill None values with the result of this expression.
2230
+ #
2231
+ # @return [Series]
2232
+ def shift_and_fill(periods, fill_value)
2233
+ super
2234
+ end
537
2235
 
538
- # def zip_with
539
- # end
2236
+ # Take values from self or other based on the given mask.
2237
+ #
2238
+ # Where mask evaluates true, take values from self. Where mask evaluates false,
2239
+ # take values from other.
2240
+ #
2241
+ # @param mask [Series]
2242
+ # Boolean Series.
2243
+ # @param other [Series]
2244
+ # Series of same type.
2245
+ #
2246
+ # @return [Series]
2247
+ #
2248
+ # @example
2249
+ # s1 = Polars::Series.new([1, 2, 3, 4, 5])
2250
+ # s2 = Polars::Series.new([5, 4, 3, 2, 1])
2251
+ # s1.zip_with(s1 < s2, s2)
2252
+ # # =>
2253
+ # # shape: (5,)
2254
+ # # Series: '' [i64]
2255
+ # # [
2256
+ # # 1
2257
+ # # 2
2258
+ # # 3
2259
+ # # 2
2260
+ # # 1
2261
+ # # ]
2262
+ #
2263
+ # @example
2264
+ # mask = Polars::Series.new([true, false, true, false, true])
2265
+ # s1.zip_with(mask, s2)
2266
+ # # =>
2267
+ # # shape: (5,)
2268
+ # # Series: '' [i64]
2269
+ # # [
2270
+ # # 1
2271
+ # # 4
2272
+ # # 3
2273
+ # # 2
2274
+ # # 5
2275
+ # # ]
2276
+ def zip_with(mask, other)
2277
+ Utils.wrap_s(_s.zip_with(mask._s, other._s))
2278
+ end
540
2279
 
541
2280
  # def rolling_min
542
2281
  # end
@@ -571,62 +2310,266 @@ module Polars
571
2310
  # def sample
572
2311
  # end
573
2312
 
2313
+ # Get a boolean mask of the local maximum peaks.
2314
+ #
2315
+ # @return [Series]
2316
+ #
2317
+ # @example
2318
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
2319
+ # s.peak_max
2320
+ # # =>
2321
+ # # shape: (5,)
2322
+ # # Series: '' [bool]
2323
+ # # [
2324
+ # # false
2325
+ # # false
2326
+ # # false
2327
+ # # false
2328
+ # # true
2329
+ # # ]
574
2330
  def peak_max
575
2331
  Utils.wrap_s(_s.peak_max)
576
2332
  end
577
2333
 
2334
+ # Get a boolean mask of the local minimum peaks.
2335
+ #
2336
+ # @return [Series]
2337
+ #
2338
+ # @example
2339
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
2340
+ # s.peak_min
2341
+ # # =>
2342
+ # # shape: (5,)
2343
+ # # Series: '' [bool]
2344
+ # # [
2345
+ # # false
2346
+ # # true
2347
+ # # false
2348
+ # # true
2349
+ # # false
2350
+ # # ]
578
2351
  def peak_min
579
2352
  Utils.wrap_s(_s.peak_min)
580
2353
  end
581
2354
 
2355
+ # Count the number of unique values in this Series.
2356
+ #
2357
+ # @return [Integer]
2358
+ #
2359
+ # @example
2360
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2361
+ # s.n_unique
2362
+ # # => 3
582
2363
  def n_unique
583
2364
  _s.n_unique
584
2365
  end
585
2366
 
586
- # def shrink_to_fit
587
- # end
2367
+ # Shrink Series memory usage.
2368
+ #
2369
+ # Shrinks the underlying array capacity to exactly fit the actual data.
2370
+ # (Note that this function does not change the Series data type).
2371
+ #
2372
+ # @return [Series]
2373
+ def shrink_to_fit(in_place: false)
2374
+ if in_place
2375
+ _s.shrink_to_fit
2376
+ self
2377
+ else
2378
+ series = clone
2379
+ series._s.shrink_to_fit
2380
+ series
2381
+ end
2382
+ end
588
2383
 
589
2384
  # def _hash
590
2385
  # end
591
2386
 
592
- # def reinterpret
593
- # end
2387
+ # Reinterpret the underlying bits as a signed/unsigned integer.
2388
+ #
2389
+ # This operation is only allowed for 64bit integers. For lower bits integers,
2390
+ # you can safely use that cast operation.
2391
+ #
2392
+ # @param signed [Boolean]
2393
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
2394
+ #
2395
+ # @return [Series]
2396
+ def reinterpret(signed: true)
2397
+ super
2398
+ end
594
2399
 
595
- # def interpolate
596
- # end
2400
+ # Interpolate intermediate values. The interpolation method is linear.
2401
+ #
2402
+ # @return [Series]
2403
+ #
2404
+ # @example
2405
+ # s = Polars::Series.new("a", [1, 2, nil, nil, 5])
2406
+ # s.interpolate
2407
+ # # =>
2408
+ # # shape: (5,)
2409
+ # # Series: 'a' [i64]
2410
+ # # [
2411
+ # # 1
2412
+ # # 2
2413
+ # # 3
2414
+ # # 4
2415
+ # # 5
2416
+ # # ]
2417
+ def interpolate
2418
+ super
2419
+ end
597
2420
 
598
- # def abs
599
- # end
2421
+ # Compute absolute values.
2422
+ #
2423
+ # @return [Series]
2424
+ def abs
2425
+ super
2426
+ end
600
2427
 
601
2428
  # def rank
602
2429
  # end
603
2430
 
604
- # def diff
605
- # end
2431
+ # Calculate the n-th discrete difference.
2432
+ #
2433
+ # @param n [Integer]
2434
+ # Number of slots to shift.
2435
+ # @param null_behavior ["ignore", "drop"]
2436
+ # How to handle null values.
2437
+ #
2438
+ # @return [Series]
2439
+ def diff(n: 1, null_behavior: "ignore")
2440
+ super
2441
+ end
606
2442
 
607
2443
  # def pct_change
608
2444
  # end
609
2445
 
610
- # def skew
611
- # end
612
-
613
- # def kurtosis
614
- # end
615
-
616
- # def clip
617
- # end
2446
+ # Compute the sample skewness of a data set.
2447
+ #
2448
+ # For normally distributed data, the skewness should be about zero. For
2449
+ # unimodal continuous distributions, a skewness value greater than zero means
2450
+ # that there is more weight in the right tail of the distribution. The
2451
+ # function `skewtest` can be used to determine if the skewness value
2452
+ # is close enough to zero, statistically speaking.
2453
+ #
2454
+ # @param bias [Boolean]
2455
+ # If `false`, the calculations are corrected for statistical bias.
2456
+ #
2457
+ # @return [Float, nil]
2458
+ def skew(bias: true)
2459
+ _s.skew(bias)
2460
+ end
2461
+
2462
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
2463
+ #
2464
+ # Kurtosis is the fourth central moment divided by the square of the
2465
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
2466
+ # the result to give 0.0 for a normal distribution.
2467
+ # If bias is false, then the kurtosis is calculated using k statistics to
2468
+ # eliminate bias coming from biased moment estimators
2469
+ #
2470
+ # @param fisher [Boolean]
2471
+ # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
2472
+ # Pearson's definition is used (normal ==> 3.0).
2473
+ # @param bias [Boolean]
2474
+ # If `false`, the calculations are corrected for statistical bias.
2475
+ #
2476
+ # @return [Float, nil]
2477
+ def kurtosis(fisher: true, bias: true)
2478
+ _s.kurtosis(fisher, bias)
2479
+ end
2480
+
2481
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
2482
+ #
2483
+ # Only works for numerical types.
2484
+ #
2485
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
2486
+ # expression. See {#when} for more information.
2487
+ #
2488
+ # @param min_val [Numeric]
2489
+ # Minimum value.
2490
+ # @param max_val [Numeric]
2491
+ # Maximum value.
2492
+ #
2493
+ # @return [Series]
2494
+ #
2495
+ # @example
2496
+ # s = Polars::Series.new("foo", [-50, 5, nil, 50])
2497
+ # s.clip(1, 10)
2498
+ # # =>
2499
+ # # shape: (4,)
2500
+ # # Series: 'foo' [i64]
2501
+ # # [
2502
+ # # 1
2503
+ # # 5
2504
+ # # null
2505
+ # # 10
2506
+ # # ]
2507
+ def clip(min_val, max_val)
2508
+ super
2509
+ end
618
2510
 
619
- # def clip_min
620
- # end
2511
+ # Clip (limit) the values in an array to a `min` boundary.
2512
+ #
2513
+ # Only works for numerical types.
2514
+ #
2515
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
2516
+ # expression. See {#when} for more information.
2517
+ #
2518
+ # @param min_val [Numeric]
2519
+ # Minimum value.
2520
+ #
2521
+ # @return [Series]
2522
+ def clip_min(min_val)
2523
+ super
2524
+ end
621
2525
 
622
- # def clip_max
623
- # end
2526
+ # Clip (limit) the values in an array to a `max` boundary.
2527
+ #
2528
+ # Only works for numerical types.
2529
+ #
2530
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
2531
+ # expression. See {#when} for more information.
2532
+ #
2533
+ # @param max_val [Numeric]
2534
+ # Maximum value.
2535
+ #
2536
+ # @return [Series]
2537
+ def clip_max(max_val)
2538
+ super
2539
+ end
624
2540
 
625
- # def reshape
626
- # end
2541
+ # Reshape this Series to a flat Series or a Series of Lists.
2542
+ #
2543
+ # @param dims [Array]
2544
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
2545
+ # dimension is inferred.
2546
+ #
2547
+ # @return [Series]
2548
+ def reshape(dims)
2549
+ super
2550
+ end
627
2551
 
628
- # def shuffle
629
- # end
2552
+ # Shuffle the contents of this Series.
2553
+ #
2554
+ # @param seed [Integer, nil]
2555
+ # Seed for the random number generator.
2556
+ #
2557
+ # @return [Series]
2558
+ #
2559
+ # @example
2560
+ # s = Polars::Series.new("a", [1, 2, 3])
2561
+ # s.shuffle(seed: 1)
2562
+ # # =>
2563
+ # # shape: (3,)
2564
+ # # Series: 'a' [i64]
2565
+ # # [
2566
+ # # 2
2567
+ # # 1
2568
+ # # 3
2569
+ # # ]
2570
+ def shuffle(seed: nil)
2571
+ super
2572
+ end
630
2573
 
631
2574
  # def ewm_mean
632
2575
  # end
@@ -637,18 +2580,70 @@ module Polars
637
2580
  # def ewm_var
638
2581
  # end
639
2582
 
640
- # def extend_constant
641
- # end
2583
+ # Extend the Series with given number of values.
2584
+ #
2585
+ # @param value [Object]
2586
+ # The value to extend the Series with. This value may be `nil` to fill with
2587
+ # nulls.
2588
+ # @param n [Integer]
2589
+ # The number of values to extend.
2590
+ #
2591
+ # @return [Series]
2592
+ #
2593
+ # @example
2594
+ # s = Polars::Series.new("a", [1, 2, 3])
2595
+ # s.extend_constant(99, 2)
2596
+ # # =>
2597
+ # # shape: (5,)
2598
+ # # Series: 'a' [i64]
2599
+ # # [
2600
+ # # 1
2601
+ # # 2
2602
+ # # 3
2603
+ # # 99
2604
+ # # 99
2605
+ # # ]
2606
+ def extend_constant(value, n)
2607
+ super
2608
+ end
642
2609
 
2610
+ # Flags the Series as sorted.
2611
+ #
2612
+ # Enables downstream code to user fast paths for sorted arrays.
2613
+ #
2614
+ # @param reverse [Boolean]
2615
+ # If the Series order is reversed, e.g. descending.
2616
+ #
2617
+ # @return [Series]
2618
+ #
2619
+ # @note
2620
+ # This can lead to incorrect results if this Series is not sorted!!
2621
+ # Use with care!
2622
+ #
2623
+ # @example
2624
+ # s = Polars::Series.new("a", [1, 2, 3])
2625
+ # s.set_sorted.max
2626
+ # # => 3
643
2627
  def set_sorted(reverse: false)
644
2628
  Utils.wrap_s(_s.set_sorted(reverse))
645
2629
  end
646
2630
 
647
- # def new_from_index
648
- # end
2631
+ # Create a new Series filled with values from the given index.
2632
+ #
2633
+ # @return [Series]
2634
+ def new_from_index(index, length)
2635
+ Utils.wrap_s(_s.new_from_index(index, length))
2636
+ end
649
2637
 
650
- # def shrink_dtype
651
- # end
2638
+ # Shrink numeric columns to the minimal required datatype.
2639
+ #
2640
+ # Shrink to the dtype needed to fit the extrema of this Series.
2641
+ # This can be used to reduce memory pressure.
2642
+ #
2643
+ # @return [Series]
2644
+ def shrink_dtype
2645
+ super
2646
+ end
652
2647
 
653
2648
  # def arr
654
2649
  # end
@@ -672,8 +2667,47 @@ module Polars
672
2667
  self._s = _s._clone
673
2668
  end
674
2669
 
2670
+ def coerce(other)
2671
+ if other.is_a?(Numeric)
2672
+ # TODO improve
2673
+ series = to_frame.select(Polars.lit(other)).to_series
2674
+ [series, self]
2675
+ else
2676
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
2677
+ end
2678
+ end
2679
+
2680
+ def _comp(other, op)
2681
+ if other.is_a?(Series)
2682
+ return Utils.wrap_s(_s.send(op, other._s))
2683
+ end
2684
+
2685
+ if dtype == :str
2686
+ raise Todo
2687
+ end
2688
+ Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
2689
+ end
2690
+
2691
+ def _arithmetic(other, op)
2692
+ if other.is_a?(Expr)
2693
+ other = to_frame.select(other).to_series
2694
+ end
2695
+ if other.is_a?(Series)
2696
+ return Utils.wrap_s(_s.send(op, other._s))
2697
+ end
2698
+
2699
+ raise Todo
2700
+ end
2701
+
2702
+ def series_to_rbseries(name, values)
2703
+ # should not be in-place?
2704
+ values.rename(name, in_place: true)
2705
+ values._s
2706
+ end
2707
+
675
2708
  def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
676
2709
  ruby_dtype = nil
2710
+ nested_dtype = nil
677
2711
 
678
2712
  if (values.nil? || values.empty?) && dtype.nil?
679
2713
  if dtype_if_empty
@@ -682,7 +2716,7 @@ module Polars
682
2716
  dtype = dtype_if_empty
683
2717
  else
684
2718
  # default to Float32 type
685
- dtype = "f32"
2719
+ dtype = :f32
686
2720
  end
687
2721
  end
688
2722
 
@@ -691,8 +2725,7 @@ module Polars
691
2725
  rb_temporal_types << DateTime if defined?(DateTime)
692
2726
  rb_temporal_types << Time if defined?(Time)
693
2727
 
694
- # _get_first_non_none
695
- value = values.find { |v| !v.nil? }
2728
+ value = _get_first_non_none(values)
696
2729
 
697
2730
  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
698
2731
  constructor = polars_type_to_constructor(dtype)
@@ -716,7 +2749,22 @@ module Polars
716
2749
  # dtype = rb_type_to_dtype(dtype)
717
2750
  # end
718
2751
 
719
- raise "todo"
2752
+ if ruby_dtype == Date
2753
+ RbSeries.new_opt_date(name, values, strict)
2754
+ else
2755
+ raise Todo
2756
+ end
2757
+ elsif ruby_dtype == Array
2758
+ if nested_dtype.nil?
2759
+ nested_value = _get_first_non_none(value)
2760
+ nested_dtype = nested_value.nil? ? Float : nested_value.class
2761
+ end
2762
+
2763
+ if nested_dtype == Array
2764
+ raise Todo
2765
+ end
2766
+
2767
+ raise Todo
720
2768
  else
721
2769
  constructor = rb_type_to_constructor(value.class)
722
2770
  constructor.call(name, values, strict)
@@ -759,5 +2807,9 @@ module Polars
759
2807
  # RbSeries.method(:new_object)
760
2808
  raise ArgumentError, "Cannot determine type"
761
2809
  end
2810
+
2811
+ def _get_first_non_none(values)
2812
+ values.find { |v| !v.nil? }
2813
+ end
762
2814
  end
763
2815
  end