polars-df 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +8 -0
  4. data/Cargo.lock +2 -1
  5. data/README.md +1 -1
  6. data/ext/polars/Cargo.toml +7 -1
  7. data/ext/polars/src/batched_csv.rs +120 -0
  8. data/ext/polars/src/conversion.rs +139 -6
  9. data/ext/polars/src/dataframe.rs +360 -15
  10. data/ext/polars/src/error.rs +9 -0
  11. data/ext/polars/src/file.rs +8 -7
  12. data/ext/polars/src/lazy/apply.rs +7 -0
  13. data/ext/polars/src/lazy/dataframe.rs +135 -3
  14. data/ext/polars/src/lazy/dsl.rs +97 -2
  15. data/ext/polars/src/lazy/meta.rs +1 -1
  16. data/ext/polars/src/lazy/mod.rs +1 -0
  17. data/ext/polars/src/lib.rs +227 -12
  18. data/ext/polars/src/series.rs +190 -38
  19. data/ext/polars/src/set.rs +91 -0
  20. data/ext/polars/src/utils.rs +19 -0
  21. data/lib/polars/batched_csv_reader.rb +96 -0
  22. data/lib/polars/cat_expr.rb +39 -0
  23. data/lib/polars/data_frame.rb +2813 -100
  24. data/lib/polars/date_time_expr.rb +1282 -7
  25. data/lib/polars/exceptions.rb +20 -0
  26. data/lib/polars/expr.rb +631 -11
  27. data/lib/polars/expr_dispatch.rb +14 -0
  28. data/lib/polars/functions.rb +219 -0
  29. data/lib/polars/group_by.rb +517 -0
  30. data/lib/polars/io.rb +763 -4
  31. data/lib/polars/lazy_frame.rb +1415 -67
  32. data/lib/polars/lazy_functions.rb +430 -9
  33. data/lib/polars/lazy_group_by.rb +79 -0
  34. data/lib/polars/list_expr.rb +5 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2244 -192
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +663 -2
  39. data/lib/polars/struct_expr.rb +73 -0
  40. data/lib/polars/utils.rb +76 -3
  41. data/lib/polars/version.rb +2 -1
  42. data/lib/polars/when.rb +1 -0
  43. data/lib/polars/when_then.rb +1 -0
  44. data/lib/polars.rb +8 -2
  45. metadata +12 -2
data/lib/polars/series.rb CHANGED
@@ -1,7 +1,40 @@
1
1
  module Polars
2
+ # A Series represents a single column in a polars DataFrame.
2
3
  class Series
4
+ include ExprDispatch
5
+
6
+ # @private
3
7
  attr_accessor :_s
4
8
 
9
+ # Create a new Series.
10
+ #
11
+ # @param name [String, Array, nil]
12
+ # Name of the series. Will be used as a column name when used in a DataFrame.
13
+ # When not specified, name is set to an empty string.
14
+ # @param values [Array, nil]
15
+ # One-dimensional data in various forms. Supported are: Array and Series.
16
+ # @param dtype [Symbol, nil]
17
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
18
+ # @param strict [Boolean]
19
+ # Throw error on numeric overflow.
20
+ # @param nan_to_null [Boolean]
21
+ # Not used.
22
+ # @param dtype_if_empty [Symbol, nil]
23
+ # If no dtype is specified and values contains `nil` or an empty array,
24
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
25
+ #
26
+ # @example Constructing a Series by specifying name and values positionally:
27
+ # s = Polars::Series.new("a", [1, 2, 3])
28
+ #
29
+ # @example Notice that the dtype is automatically inferred as a polars Int64:
30
+ # s.dtype
31
+ # # => :i64
32
+ #
33
+ # @example Constructing a Series with a specific dtype:
34
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
35
+ #
36
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
37
+ # s3 = Polars::Series.new([1, 2, 3])
5
38
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
6
39
  # Handle case where values are passed as the first argument
7
40
  if !name.nil? && !name.is_a?(String)
@@ -17,6 +50,8 @@ module Polars
17
50
 
18
51
  if values.nil?
19
52
  self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
53
+ elsif values.is_a?(Series)
54
+ self._s = series_to_rbseries(name, values)
20
55
  elsif values.is_a?(Range)
21
56
  self._s =
22
57
  Polars.arange(
@@ -35,16 +70,23 @@ module Polars
35
70
  end
36
71
  end
37
72
 
73
+ # @private
38
74
  def self._from_rbseries(s)
39
75
  series = Series.allocate
40
76
  series._s = s
41
77
  series
42
78
  end
43
79
 
80
+ # Get the data type of this Series.
81
+ #
82
+ # @return [Symbol]
44
83
  def dtype
45
- _s.dtype.to_sym
84
+ _s.dtype
46
85
  end
47
86
 
87
+ # Get flags that are set on the Series.
88
+ #
89
+ # @return [Hash]
48
90
  def flags
49
91
  {
50
92
  "SORTED_ASC" => _s.is_sorted_flag,
@@ -52,154 +94,491 @@ module Polars
52
94
  }
53
95
  end
54
96
 
97
+ # Get the inner dtype in of a List typed Series.
98
+ #
99
+ # @return [Symbol]
55
100
  def inner_dtype
56
- _s.inner_dtype&.to_sym
101
+ _s.inner_dtype
57
102
  end
58
103
 
104
+ # Get the name of this Series.
105
+ #
106
+ # @return [String]
59
107
  def name
60
108
  _s.name
61
109
  end
62
110
 
111
+ # Shape of this Series.
112
+ #
113
+ # @return [Array]
63
114
  def shape
64
115
  [_s.len]
65
116
  end
66
117
 
67
- # def time_unit
68
- # end
118
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
119
+ #
120
+ # @return [String]
121
+ def time_unit
122
+ _s.time_unit
123
+ end
69
124
 
125
+ # Returns a string representing the Series.
126
+ #
127
+ # @return [String]
70
128
  def to_s
71
129
  _s.to_s
72
130
  end
73
131
  alias_method :inspect, :to_s
74
132
 
133
+ # Bitwise AND.
134
+ #
135
+ # @return [Series]
75
136
  def &(other)
137
+ if !other.is_a?(Series)
138
+ other = Series.new([other])
139
+ end
76
140
  Utils.wrap_s(_s.bitand(other._s))
77
141
  end
78
142
 
143
+ # Bitwise OR.
144
+ #
145
+ # @return [Series]
79
146
  def |(other)
147
+ if !other.is_a?(Series)
148
+ other = Series.new([other])
149
+ end
80
150
  Utils.wrap_s(_s.bitor(other._s))
81
151
  end
82
152
 
153
+ # Bitwise XOR.
154
+ #
155
+ # @return [Series]
83
156
  def ^(other)
157
+ if !other.is_a?(Series)
158
+ other = Series.new([other])
159
+ end
84
160
  Utils.wrap_s(_s.bitxor(other._s))
85
161
  end
86
162
 
87
- # def ==(other)
88
- # end
163
+ # Equal.
164
+ #
165
+ # @return [Series]
166
+ def ==(other)
167
+ _comp(other, :eq)
168
+ end
89
169
 
90
- # def !=(other)
91
- # end
170
+ # Not equal.
171
+ #
172
+ # @return [Series]
173
+ def !=(other)
174
+ _comp(other, :neq)
175
+ end
92
176
 
93
- # def >(other)
94
- # end
177
+ # Greater than.
178
+ #
179
+ # @return [Series]
180
+ def >(other)
181
+ _comp(other, :gt)
182
+ end
95
183
 
96
- # def <(other)
97
- # end
184
+ # Less than.
185
+ #
186
+ # @return [Series]
187
+ def <(other)
188
+ _comp(other, :lt)
189
+ end
98
190
 
99
- # def >=(other)
100
- # end
191
+ # Greater than or equal.
192
+ #
193
+ # @return [Series]
194
+ def >=(other)
195
+ _comp(other, :gt_eq)
196
+ end
101
197
 
102
- # def <=(other)
103
- # end
198
+ # Less than or equal.
199
+ #
200
+ # @return [Series]
201
+ def <=(other)
202
+ _comp(other, :lt_eq)
203
+ end
104
204
 
205
+ # Performs addition.
206
+ #
207
+ # @return [Series]
105
208
  def +(other)
106
- Utils. wrap_s(_s.add(other._s))
209
+ _arithmetic(other, :add)
107
210
  end
108
211
 
212
+ # Performs subtraction.
213
+ #
214
+ # @return [Series]
109
215
  def -(other)
110
- Utils.wrap_s(_s.sub(other._s))
216
+ _arithmetic(other, :sub)
111
217
  end
112
218
 
219
+ # Performs multiplication.
220
+ #
221
+ # @return [Series]
113
222
  def *(other)
114
- Utils.wrap_s(_s.mul(other._s))
223
+ _arithmetic(other, :mul)
115
224
  end
116
225
 
226
+ # Performs division.
227
+ #
228
+ # @return [Series]
117
229
  def /(other)
118
- Utils.wrap_s(_s.div(other._s))
230
+ _arithmetic(other, :div)
119
231
  end
120
232
 
233
+ # Returns the modulo.
234
+ #
235
+ # @return [Series]
236
+ def %(other)
237
+ if is_datelike
238
+ raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
239
+ end
240
+ _arithmetic(other, :rem)
241
+ end
242
+
243
+ # Raises to the power of exponent.
244
+ #
245
+ # @return [Series]
121
246
  def **(power)
122
- # if is_datelike
123
- # raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
124
- # end
247
+ if is_datelike
248
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
249
+ end
125
250
  to_frame.select(Polars.col(name).pow(power)).to_series
126
251
  end
127
252
 
128
- # def -@(other)
129
- # end
253
+ # Performs negation.
254
+ #
255
+ # @return [Series]
256
+ def -@
257
+ 0 - self
258
+ end
130
259
 
260
+ # Returns elements of the Series.
261
+ #
262
+ # @return [Object]
131
263
  def [](item)
132
- _s.get_idx(item)
264
+ if item.is_a?(Integer)
265
+ return _s.get_idx(item)
266
+ end
267
+
268
+ if item.is_a?(Range)
269
+ return Slice.new(self).apply(item)
270
+ end
271
+
272
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
133
273
  end
134
274
 
135
- # def []=(key, value)
136
- # end
275
+ # Sets an element of the Series.
276
+ #
277
+ # @return [Object]
278
+ def []=(key, value)
279
+ if value.is_a?(Array)
280
+ if is_numeric || is_datelike
281
+ set_at_idx(key, value)
282
+ return
283
+ end
284
+ raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
285
+ end
286
+
287
+ if key.is_a?(Series)
288
+ if key.dtype == :bool
289
+ self._s = set(key, value)._s
290
+ elsif key.dtype == :u64
291
+ self._s = set_at_idx(key.cast(:u32), value)._s
292
+ elsif key.dtype == :u32
293
+ self._s = set_at_idx(key, value)._s
294
+ else
295
+ raise Todo
296
+ end
297
+ end
137
298
 
299
+ if key.is_a?(Array)
300
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
301
+ self[s] = value
302
+ elsif key.is_a?(Integer)
303
+ # TODO fix
304
+ # self[[key]] = value
305
+ set_at_idx(key, value)
306
+ else
307
+ raise ArgumentError, "cannot use #{key} for indexing"
308
+ end
309
+ end
310
+
311
+ # Return an estimation of the total (heap) allocated size of the Series.
312
+ #
313
+ # Estimated size is given in the specified unit (bytes by default).
314
+ #
315
+ # This estimation is the sum of the size of its buffers, validity, including
316
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
317
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
318
+ # particular, StructArray's size is an upper bound.
319
+ #
320
+ # When an array is sliced, its allocated size remains constant because the buffer
321
+ # unchanged. However, this function will yield a smaller number. This is because
322
+ # this function returns the visible size of the buffer, not its total capacity.
323
+ #
324
+ # FFI buffers are included in this estimation.
325
+ #
326
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
327
+ # Scale the returned size to the given unit.
328
+ #
329
+ # @return [Numeric]
330
+ #
331
+ # @example
332
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
333
+ # s.estimated_size
334
+ # # => 4000000
335
+ # s.estimated_size("mb")
336
+ # # => 3.814697265625
138
337
  def estimated_size(unit = "b")
139
338
  sz = _s.estimated_size
140
339
  Utils.scale_bytes(sz, to: unit)
141
340
  end
142
341
 
342
+ # Compute the square root of the elements.
343
+ #
344
+ # @return [Series]
143
345
  def sqrt
144
- self ** 0.5
346
+ self**0.5
145
347
  end
146
348
 
349
+ # Check if any boolean value in the column is `true`.
350
+ #
351
+ # @return [Boolean]
147
352
  def any
148
353
  to_frame.select(Polars.col(name).any).to_series[0]
149
354
  end
150
355
 
356
+ # Check if all boolean values in the column are `true`.
357
+ #
358
+ # @return [Boolean]
151
359
  def all
152
360
  to_frame.select(Polars.col(name).all).to_series[0]
153
361
  end
154
362
 
155
- # def log
156
- # end
363
+ # Compute the logarithm to a given base.
364
+ #
365
+ # @param base [Float]
366
+ # Given base, defaults to `Math::E`.
367
+ #
368
+ # @return [Series]
369
+ def log(base = Math::E)
370
+ super
371
+ end
157
372
 
158
- # def log10
159
- # end
373
+ # Compute the base 10 logarithm of the input array, element-wise.
374
+ #
375
+ # @return [Series]
376
+ def log10
377
+ super
378
+ end
160
379
 
161
- # def exp
162
- # end
380
+ # Compute the exponential, element-wise.
381
+ #
382
+ # @return [Series]
383
+ def exp
384
+ super
385
+ end
163
386
 
164
- # def drop_nulls
165
- # end
387
+ # Create a new Series that copies data from this Series without null values.
388
+ #
389
+ # @return [Series]
390
+ def drop_nulls
391
+ super
392
+ end
166
393
 
167
- # def drop_nans
168
- # end
394
+ # Drop NaN values.
395
+ #
396
+ # @return [Series]
397
+ def drop_nans
398
+ super
399
+ end
169
400
 
401
+ # Cast this Series to a DataFrame.
402
+ #
403
+ # @return [DataFrame]
170
404
  def to_frame
171
405
  Utils.wrap_df(RbDataFrame.new([_s]))
172
406
  end
173
407
 
174
- # def describe
175
- # end
408
+ # Quick summary statistics of a series.
409
+ #
410
+ # Series with mixed datatypes will return summary statistics for the datatype of
411
+ # the first value.
412
+ #
413
+ # @return [DataFrame]
414
+ #
415
+ # @example
416
+ # series_num = Polars::Series.new([1, 2, 3, 4, 5])
417
+ # series_num.describe
418
+ # # =>
419
+ # # shape: (6, 2)
420
+ # # ┌────────────┬──────────┐
421
+ # # │ statistic ┆ value │
422
+ # # │ --- ┆ --- │
423
+ # # │ str ┆ f64 │
424
+ # # ╞════════════╪══════════╡
425
+ # # │ min ┆ 1.0 │
426
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
427
+ # # │ max ┆ 5.0 │
428
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
429
+ # # │ null_count ┆ 0.0 │
430
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
431
+ # # │ mean ┆ 3.0 │
432
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
433
+ # # │ std ┆ 1.581139 │
434
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
435
+ # # │ count ┆ 5.0 │
436
+ # # └────────────┴──────────┘
437
+ #
438
+ # @example
439
+ # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
440
+ # series_str.describe
441
+ # # =>
442
+ # # shape: (3, 2)
443
+ # # ┌────────────┬───────┐
444
+ # # │ statistic ┆ value │
445
+ # # │ --- ┆ --- │
446
+ # # │ str ┆ i64 │
447
+ # # ╞════════════╪═══════╡
448
+ # # │ unique ┆ 4 │
449
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
450
+ # # │ null_count ┆ 1 │
451
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
452
+ # # │ count ┆ 5 │
453
+ # # └────────────┴───────┘
454
+ def describe
455
+ if len == 0
456
+ raise ArgumentError, "Series must contain at least one value"
457
+ elsif is_numeric
458
+ s = cast(:f64)
459
+ stats = {
460
+ "min" => s.min,
461
+ "max" => s.max,
462
+ "null_count" => s.null_count,
463
+ "mean" => s.mean,
464
+ "std" => s.std,
465
+ "count" => s.len
466
+ }
467
+ elsif is_boolean
468
+ stats = {
469
+ "sum" => sum,
470
+ "null_count" => null_count,
471
+ "count" => len
472
+ }
473
+ elsif is_utf8
474
+ stats = {
475
+ "unique" => unique.length,
476
+ "null_count" => null_count,
477
+ "count" => len
478
+ }
479
+ elsif is_datelike
480
+ # we coerce all to string, because a polars column
481
+ # only has a single dtype and dates: datetime and count: int don't match
482
+ stats = {
483
+ "min" => dt.min.to_s,
484
+ "max" => dt.max.to_s,
485
+ "null_count" => null_count.to_s,
486
+ "count" => len.to_s
487
+ }
488
+ else
489
+ raise TypeError, "This type is not supported"
490
+ end
176
491
 
492
+ Polars::DataFrame.new(
493
+ {"statistic" => stats.keys, "value" => stats.values}
494
+ )
495
+ end
496
+
497
+ # Reduce this Series to the sum value.
498
+ #
499
+ # @return [Numeric]
500
+ #
501
+ # @note
502
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
503
+ # `:i64` before summing to prevent overflow issues.
504
+ #
505
+ # @example
506
+ # s = Polars::Series.new("a", [1, 2, 3])
507
+ # s.sum
508
+ # # => 6
177
509
  def sum
178
510
  _s.sum
179
511
  end
180
512
 
513
+ # Reduce this Series to the mean value.
514
+ #
515
+ # @return [Float, nil]
516
+ #
517
+ # @example
518
+ # s = Polars::Series.new("a", [1, 2, 3])
519
+ # s.mean
520
+ # # => 2.0
181
521
  def mean
182
522
  _s.mean
183
523
  end
184
524
 
525
+ # Reduce this Series to the product value.
526
+ #
527
+ # @return [Numeric]
185
528
  def product
186
529
  to_frame.select(Polars.col(name).product).to_series[0]
187
530
  end
188
531
 
532
+ # Get the minimal value in this Series.
533
+ #
534
+ # @return [Object]
535
+ #
536
+ # @example
537
+ # s = Polars::Series.new("a", [1, 2, 3])
538
+ # s.min
539
+ # # => 1
189
540
  def min
190
541
  _s.min
191
542
  end
192
543
 
544
+ # Get the maximum value in this Series.
545
+ #
546
+ # @return [Object]
547
+ #
548
+ # @example
549
+ # s = Polars::Series.new("a", [1, 2, 3])
550
+ # s.max
551
+ # # => 3
193
552
  def max
194
553
  _s.max
195
554
  end
196
555
 
197
- # def nan_max
198
- # end
199
-
200
- # def nan_min
201
- # end
202
-
556
+ # Get maximum value, but propagate/poison encountered NaN values.
557
+ #
558
+ # @return [Object]
559
+ def nan_max
560
+ to_frame.select(Polars.col(name).nan_max)[0, 0]
561
+ end
562
+
563
+ # Get minimum value, but propagate/poison encountered NaN values.
564
+ #
565
+ # @return [Object]
566
+ def nan_min
567
+ to_frame.select(Polars.col(name).nan_min)[0, 0]
568
+ end
569
+
570
+ # Get the standard deviation of this Series.
571
+ #
572
+ # @param ddof [Integer]
573
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
574
+ # where N represents the number of elements.
575
+ #
576
+ # @return [Float, nil]
577
+ #
578
+ # @example
579
+ # s = Polars::Series.new("a", [1, 2, 3])
580
+ # s.std
581
+ # # => 1.0
203
582
  def std(ddof: 1)
204
583
  if !is_numeric
205
584
  nil
@@ -208,6 +587,18 @@ module Polars
208
587
  end
209
588
  end
210
589
 
590
+ # Get variance of this Series.
591
+ #
592
+ # @param ddof [Integer]
593
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
594
+ # where N represents the number of elements.
595
+ #
596
+ # @return [Float, nil]
597
+ #
598
+ # @example
599
+ # s = Polars::Series.new("a", [1, 2, 3])
600
+ # s.var
601
+ # # => 1.0
211
602
  def var(ddof: 1)
212
603
  if !is_numeric
213
604
  nil
@@ -216,37 +607,160 @@ module Polars
216
607
  end
217
608
  end
218
609
 
610
+ # Get the median of this Series.
611
+ #
612
+ # @return [Float, nil]
613
+ #
614
+ # @example
615
+ # s = Polars::Series.new("a", [1, 2, 3])
616
+ # s.median
617
+ # # => 2.0
219
618
  def median
220
619
  _s.median
221
620
  end
222
621
 
622
+ # Get the quantile value of this Series.
623
+ #
624
+ # @param quantile [Float, nil]
625
+ # Quantile between 0.0 and 1.0.
626
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
627
+ # Interpolation method.
628
+ #
629
+ # @return [Float, nil]
630
+ #
631
+ # @example
632
+ # s = Polars::Series.new("a", [1, 2, 3])
633
+ # s.quantile(0.5)
634
+ # # => 2.0
223
635
  def quantile(quantile, interpolation: "nearest")
224
636
  _s.quantile(quantile, interpolation)
225
637
  end
226
638
 
639
+ # Get dummy variables.
640
+ #
641
+ # @return [DataFrame]
642
+ #
643
+ # @example
644
+ # s = Polars::Series.new("a", [1, 2, 3])
645
+ # s.to_dummies
646
+ # # =>
647
+ # # shape: (3, 3)
648
+ # # ┌─────┬─────┬─────┐
649
+ # # │ a_1 ┆ a_2 ┆ a_3 │
650
+ # # │ --- ┆ --- ┆ --- │
651
+ # # │ u8 ┆ u8 ┆ u8 │
652
+ # # ╞═════╪═════╪═════╡
653
+ # # │ 1 ┆ 0 ┆ 0 │
654
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
655
+ # # │ 0 ┆ 1 ┆ 0 │
656
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
657
+ # # │ 0 ┆ 0 ┆ 1 │
658
+ # # └─────┴─────┴─────┘
227
659
  def to_dummies
228
660
  Utils.wrap_df(_s.to_dummies)
229
661
  end
230
662
 
663
+ # Count the unique values in a Series.
664
+ #
665
+ # @param sort [Boolean]
666
+ # Ensure the output is sorted from most values to least.
667
+ #
668
+ # @return [DataFrame]
669
+ #
670
+ # @example
671
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
672
+ # s.value_counts.sort("a")
673
+ # # =>
674
+ # # shape: (3, 2)
675
+ # # ┌─────┬────────┐
676
+ # # │ a ┆ counts │
677
+ # # │ --- ┆ --- │
678
+ # # │ i64 ┆ u32 │
679
+ # # ╞═════╪════════╡
680
+ # # │ 1 ┆ 1 │
681
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
682
+ # # │ 2 ┆ 2 │
683
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
684
+ # # │ 3 ┆ 1 │
685
+ # # └─────┴────────┘
231
686
  def value_counts(sort: false)
232
687
  Utils.wrap_df(_s.value_counts(sort))
233
688
  end
234
689
 
235
- # def unique_counts
236
- # end
690
+ # Return a count of the unique values in the order of appearance.
691
+ #
692
+ # @return [Series]
693
+ #
694
+ # @example
695
+ # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
696
+ # s.unique_counts
697
+ # # =>
698
+ # # shape: (3,)
699
+ # # Series: 'id' [u32]
700
+ # # [
701
+ # # 1
702
+ # # 2
703
+ # # 3
704
+ # # ]
705
+ def unique_counts
706
+ super
707
+ end
237
708
 
238
- # def entropy
239
- # end
709
+ # Computes the entropy.
710
+ #
711
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
712
+ #
713
+ # @param base [Float]
714
+ # Given base, defaults to `e`
715
+ # @param normalize [Boolean]
716
+ # Normalize pk if it doesn't sum to 1.
717
+ #
718
+ # @return [Float, nil]
719
+ #
720
+ # @example
721
+ # a = Polars::Series.new([0.99, 0.005, 0.005])
722
+ # a.entropy(normalize: true)
723
+ # # => 0.06293300616044681
724
+ #
725
+ # @example
726
+ # b = Polars::Series.new([0.65, 0.10, 0.25])
727
+ # b.entropy(normalize: true)
728
+ # # => 0.8568409950394724
729
+ def entropy(base: Math::E, normalize: false)
730
+ Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
731
+ end
240
732
 
241
733
  # def cumulative_eval
242
734
  # end
243
735
 
736
+ # Return a copy of the Series with a new alias/name.
737
+ #
738
+ # @param name [String]
739
+ # New name.
740
+ #
741
+ # @return [Series]
742
+ #
743
+ # @example
744
+ # s = Polars::Series.new("x", [1, 2, 3])
745
+ # s.alias("y")
244
746
  def alias(name)
245
747
  s = dup
246
748
  s._s.rename(name)
247
749
  s
248
750
  end
249
751
 
752
+ # Rename this Series.
753
+ #
754
+ # @param name [String]
755
+ # New name.
756
+ # @param in_place [Boolean]
757
+ # Modify the Series in-place.
758
+ #
759
+ # @return [Series]
760
+ #
761
+ # @example
762
+ # s = Polars::Series.new("a", [1, 2, 3])
763
+ # s.rename("b")
250
764
  def rename(name, in_place: false)
251
765
  if in_place
252
766
  _s.rename(name)
@@ -256,59 +770,365 @@ module Polars
256
770
  end
257
771
  end
258
772
 
773
+ # Get the length of each individual chunk.
774
+ #
775
+ # @return [Array]
776
+ #
777
+ # @example
778
+ # s = Polars::Series.new("a", [1, 2, 3])
779
+ # s2 = Polars::Series.new("b", [4, 5, 6])
780
+ #
781
+ # @example Concatenate Series with rechunk: true
782
+ # Polars.concat([s, s2]).chunk_lengths
783
+ # # => [6]
784
+ #
785
+ # @example Concatenate Series with rechunk: false
786
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
787
+ # # => [3, 3]
259
788
  def chunk_lengths
260
789
  _s.chunk_lengths
261
790
  end
262
791
 
792
+ # Get the number of chunks that this Series contains.
793
+ #
794
+ # @return [Integer]
795
+ #
796
+ # @example
797
+ # s = Polars::Series.new("a", [1, 2, 3])
798
+ # s2 = Polars::Series.new("b", [4, 5, 6])
799
+ #
800
+ # @example Concatenate Series with rechunk: true
801
+ # Polars.concat([s, s2]).n_chunks
802
+ # # => 1
803
+ #
804
+ # @example Concatenate Series with rechunk: false
805
+ # Polars.concat([s, s2], rechunk: false).n_chunks
806
+ # # => 2
263
807
  def n_chunks
264
808
  _s.n_chunks
265
809
  end
266
810
 
811
+ # Get an array with the cumulative sum computed at every element.
812
+ #
813
+ # @param reverse [Boolean]
814
+ # reverse the operation.
815
+ #
816
+ # @return [Series]
817
+ #
818
+ # @note
819
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
820
+ # `:i64` before summing to prevent overflow issues.
821
+ #
822
+ # @example
823
+ # s = Polars::Series.new("a", [1, 2, 3])
824
+ # s.cumsum
825
+ # # =>
826
+ # # shape: (3,)
827
+ # # Series: 'a' [i64]
828
+ # # [
829
+ # # 1
830
+ # # 3
831
+ # # 6
832
+ # # ]
267
833
  def cumsum(reverse: false)
268
- Utils.wrap_s(_s.cumsum(reverse))
834
+ super
269
835
  end
270
836
 
837
+ # Get an array with the cumulative min computed at every element.
838
+ #
839
+ # @param reverse [Boolean]
840
+ # reverse the operation.
841
+ #
842
+ # @return [Series]
843
+ #
844
+ # @example
845
+ # s = Polars::Series.new("a", [3, 5, 1])
846
+ # s.cummin
847
+ # # =>
848
+ # # shape: (3,)
849
+ # # Series: 'a' [i64]
850
+ # # [
851
+ # # 3
852
+ # # 3
853
+ # # 1
854
+ # # ]
271
855
  def cummin(reverse: false)
272
- Utils.wrap_s(_s.cummin(reverse))
856
+ super
273
857
  end
274
858
 
859
+ # Get an array with the cumulative max computed at every element.
860
+ #
861
+ # @param reverse [Boolean]
862
+ # reverse the operation.
863
+ #
864
+ # @return [Series]
865
+ #
866
+ # @example
867
+ # s = Polars::Series.new("a", [3, 5, 1])
868
+ # s.cummax
869
+ # # =>
870
+ # # shape: (3,)
871
+ # # Series: 'a' [i64]
872
+ # # [
873
+ # # 3
874
+ # # 5
875
+ # # 5
876
+ # # ]
275
877
  def cummax(reverse: false)
276
- Utils.wrap_s(_s.cummax(reverse))
878
+ super
277
879
  end
278
880
 
881
+ # Get an array with the cumulative product computed at every element.
882
+ #
883
+ # @param reverse [Boolean]
884
+ # reverse the operation.
885
+ #
886
+ # @return [Series]
887
+ #
888
+ # @note
889
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
890
+ # `:i64` before multiplying to prevent overflow issues.
891
+ #
892
+ # @example
893
+ # s = Polars::Series.new("a", [1, 2, 3])
894
+ # s.cumprod
895
+ # # =>
896
+ # # shape: (3,)
897
+ # # Series: 'a' [i64]
898
+ # # [
899
+ # # 1
900
+ # # 2
901
+ # # 6
902
+ # # ]
279
903
  def cumprod(reverse: false)
280
- Utils.wrap_s(_s.cumprod(reverse))
904
+ super
281
905
  end
282
906
 
907
+ # Get the first `n` rows.
908
+ #
909
+ # Alias for {#head}.
910
+ #
911
+ # @param n [Integer]
912
+ # Number of rows to return.
913
+ #
914
+ # @return [Series]
915
+ #
916
+ # @example
917
+ # s = Polars::Series.new("a", [1, 2, 3])
918
+ # s.limit(2)
919
+ # # =>
920
+ # # shape: (2,)
921
+ # # Series: 'a' [i64]
922
+ # # [
923
+ # # 1
924
+ # # 2
925
+ # # ]
283
926
  def limit(n = 10)
284
927
  to_frame.select(Utils.col(name).limit(n)).to_series
285
928
  end
286
929
 
930
+ # Get a slice of this Series.
931
+ #
932
+ # @param offset [Integer]
933
+ # Start index. Negative indexing is supported.
934
+ # @param length [Integer, nil]
935
+ # Length of the slice. If set to `nil`, all rows starting at the offset
936
+ # will be selected.
937
+ #
938
+ # @return [Series]
939
+ #
940
+ # @example
941
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
942
+ # s.slice(1, 2)
943
+ # # =>
944
+ # # shape: (2,)
945
+ # # Series: 'a' [i64]
946
+ # # [
947
+ # # 2
948
+ # # 3
949
+ # # ]
287
950
  def slice(offset, length = nil)
288
- length = len if length.nil?
289
- Utils.wrap_s(_s.slice(offset, length))
951
+ super
290
952
  end
291
953
 
292
- def append(other)
293
- _s.append(other._s)
954
+ # Append a Series to this one.
955
+ #
956
+ # @param other [Series]
957
+ # Series to append.
958
+ # @param append_chunks [Boolean]
959
+ # If set to `true` the append operation will add the chunks from `other` to
960
+ # self. This is super cheap.
961
+ #
962
+ # If set to `false` the append operation will do the same as
963
+ # {DataFrame#extend} which extends the memory backed by this Series with
964
+ # the values from `other`.
965
+ #
966
+ # Different from `append_chunks`, `extend` appends the data from `other` to
967
+ # the underlying memory locations and thus may cause a reallocation (which is
968
+ # expensive).
969
+ #
970
+ # If this does not cause a reallocation, the resulting data structure will not
971
+ # have any extra chunks and thus will yield faster queries.
972
+ #
973
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
974
+ # single append. For instance during online operations where you add `n` rows
975
+ # and rerun a query.
976
+ #
977
+ # Prefer `append_chunks` over `extend` when you want to append many times
978
+ # before doing a query. For instance, when you read in multiple files and when
979
+ # to store them in a single Series. In the latter case, finish the sequence
980
+ # of `append_chunks` operations with a `rechunk`.
981
+ #
982
+ # @return [Series]
983
+ #
984
+ # @example
985
+ # s = Polars::Series.new("a", [1, 2, 3])
986
+ # s2 = Polars::Series.new("b", [4, 5, 6])
987
+ # s.append(s2)
988
+ # # =>
989
+ # # shape: (6,)
990
+ # # Series: 'a' [i64]
991
+ # # [
992
+ # # 1
993
+ # # 2
994
+ # # 3
995
+ # # 4
996
+ # # 5
997
+ # # 6
998
+ # # ]
999
+ def append(other, append_chunks: true)
1000
+ begin
1001
+ if append_chunks
1002
+ _s.append(other._s)
1003
+ else
1004
+ _s.extend(other._s)
1005
+ end
1006
+ rescue => e
1007
+ if e.message == "Already mutably borrowed"
1008
+ append(other.clone, append_chunks)
1009
+ else
1010
+ raise e
1011
+ end
1012
+ end
294
1013
  self
295
1014
  end
296
1015
 
1016
+ # Filter elements by a boolean mask.
1017
+ #
1018
+ # @param predicate [Series, Array]
1019
+ # Boolean mask.
1020
+ #
1021
+ # @return [Series]
1022
+ #
1023
+ # @example
1024
+ # s = Polars::Series.new("a", [1, 2, 3])
1025
+ # mask = Polars::Series.new("", [true, false, true])
1026
+ # s.filter(mask)
1027
+ # # =>
1028
+ # # shape: (2,)
1029
+ # # Series: 'a' [i64]
1030
+ # # [
1031
+ # # 1
1032
+ # # 3
1033
+ # # ]
297
1034
  def filter(predicate)
1035
+ if predicate.is_a?(Array)
1036
+ predicate = Series.new("", predicate)
1037
+ end
298
1038
  Utils.wrap_s(_s.filter(predicate._s))
299
1039
  end
300
1040
 
1041
+ # Get the first `n` rows.
1042
+ #
1043
+ # @param n [Integer]
1044
+ # Number of rows to return.
1045
+ #
1046
+ # @return [Series]
1047
+ #
1048
+ # @example
1049
+ # s = Polars::Series.new("a", [1, 2, 3])
1050
+ # s.head(2)
1051
+ # # =>
1052
+ # # shape: (2,)
1053
+ # # Series: 'a' [i64]
1054
+ # # [
1055
+ # # 1
1056
+ # # 2
1057
+ # # ]
301
1058
  def head(n = 10)
302
1059
  to_frame.select(Utils.col(name).head(n)).to_series
303
1060
  end
304
1061
 
1062
+ # Get the last `n` rows.
1063
+ #
1064
+ # @param n [Integer]
1065
+ # Number of rows to return.
1066
+ #
1067
+ # @return [Series]
1068
+ #
1069
+ # @example
1070
+ # s = Polars::Series.new("a", [1, 2, 3])
1071
+ # s.tail(2)
1072
+ # # =>
1073
+ # # shape: (2,)
1074
+ # # Series: 'a' [i64]
1075
+ # # [
1076
+ # # 2
1077
+ # # 3
1078
+ # # ]
305
1079
  def tail(n = 10)
306
1080
  to_frame.select(Utils.col(name).tail(n)).to_series
307
1081
  end
308
1082
 
309
- # def take_every
310
- # end
1083
+ # Take every nth value in the Series and return as new Series.
1084
+ #
1085
+ # @return [Series]
1086
+ #
1087
+ # @example
1088
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1089
+ # s.take_every(2)
1090
+ # # =>
1091
+ # # shape: (2,)
1092
+ # # Series: 'a' [i64]
1093
+ # # [
1094
+ # # 1
1095
+ # # 3
1096
+ # # ]
1097
+ def take_every(n)
1098
+ super
1099
+ end
311
1100
 
1101
+ # Sort this Series.
1102
+ #
1103
+ # @param reverse [Boolean]
1104
+ # Reverse sort.
1105
+ # @param in_place [Boolean]
1106
+ # Sort in place.
1107
+ #
1108
+ # @return [Series]
1109
+ #
1110
+ # @example
1111
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
1112
+ # s.sort
1113
+ # # =>
1114
+ # # shape: (4,)
1115
+ # # Series: 'a' [i64]
1116
+ # # [
1117
+ # # 1
1118
+ # # 2
1119
+ # # 3
1120
+ # # 4
1121
+ # # ]
1122
+ # s.sort(reverse: true)
1123
+ # # =>
1124
+ # # shape: (4,)
1125
+ # # Series: 'a' [i64]
1126
+ # # [
1127
+ # # 4
1128
+ # # 3
1129
+ # # 2
1130
+ # # 1
1131
+ # # ]
312
1132
  def sort(reverse: false, in_place: false)
313
1133
  if in_place
314
1134
  self._s = _s.sort(reverse)
@@ -318,128 +1138,561 @@ module Polars
318
1138
  end
319
1139
  end
320
1140
 
321
- # def top_k
322
- # end
1141
+ # Return the `k` largest elements.
1142
+ #
1143
+ # If `reverse: true`, the smallest elements will be given.
1144
+ #
1145
+ # @param k [Integer]
1146
+ # Number of elements to return.
1147
+ # @param reverse [Boolean]
1148
+ # Return the smallest elements.
1149
+ #
1150
+ # @return [Boolean]
1151
+ def top_k(k: 5, reverse: false)
1152
+ super
1153
+ end
323
1154
 
324
- # def arg_sort
325
- # end
1155
+ # Get the index values that would sort this Series.
1156
+ #
1157
+ # @param reverse [Boolean]
1158
+ # Sort in reverse (descending) order.
1159
+ # @param nulls_last [Boolean]
1160
+ # Place null values last instead of first.
1161
+ #
1162
+ # @return [Series]
1163
+ #
1164
+ # @example
1165
+ # s = Polars::Series.new("a", [5, 3, 4, 1, 2])
1166
+ # s.arg_sort
1167
+ # # =>
1168
+ # # shape: (5,)
1169
+ # # Series: 'a' [u32]
1170
+ # # [
1171
+ # # 3
1172
+ # # 4
1173
+ # # 1
1174
+ # # 2
1175
+ # # 0
1176
+ # # ]
1177
+ def arg_sort(reverse: false, nulls_last: false)
1178
+ super
1179
+ end
326
1180
 
327
- # def argsort
328
- # end
1181
+ # Get the index values that would sort this Series.
1182
+ #
1183
+ # Alias for {#arg_sort}.
1184
+ #
1185
+ # @param reverse [Boolean]
1186
+ # Sort in reverse (descending) order.
1187
+ # @param nulls_last [Boolean]
1188
+ # Place null values last instead of first.
1189
+ #
1190
+ # @return [Series]
1191
+ def argsort(reverse: false, nulls_last: false)
1192
+ super
1193
+ end
329
1194
 
330
- # def arg_unique
331
- # end
1195
+ # Get unique index as Series.
1196
+ #
1197
+ # @return [Series]
1198
+ #
1199
+ # @example
1200
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1201
+ # s.arg_unique
1202
+ # # =>
1203
+ # # shape: (3,)
1204
+ # # Series: 'a' [u32]
1205
+ # # [
1206
+ # # 0
1207
+ # # 1
1208
+ # # 3
1209
+ # # ]
1210
+ def arg_unique
1211
+ super
1212
+ end
332
1213
 
1214
+ # Get the index of the minimal value.
1215
+ #
1216
+ # @return [Integer, nil]
1217
+ #
1218
+ # @example
1219
+ # s = Polars::Series.new("a", [3, 2, 1])
1220
+ # s.arg_min
1221
+ # # => 2
333
1222
  def arg_min
334
1223
  _s.arg_min
335
1224
  end
336
1225
 
1226
+ # Get the index of the maximal value.
1227
+ #
1228
+ # @return [Integer, nil]
1229
+ #
1230
+ # @example
1231
+ # s = Polars::Series.new("a", [3, 2, 1])
1232
+ # s.arg_max
1233
+ # # => 0
337
1234
  def arg_max
338
1235
  _s.arg_max
339
1236
  end
340
1237
 
341
- # def search_sorted
342
- # end
343
-
344
- # def unique
345
- # end
346
-
347
- # def take
348
- # end
1238
+ # Find indices where elements should be inserted to maintain order.
1239
+ #
1240
+ # @param element [Object]
1241
+ # Expression or scalar value.
1242
+ #
1243
+ # @return [Integer]
1244
+ def search_sorted(element)
1245
+ Polars.select(Polars.lit(self).search_sorted(element))[0, 0]
1246
+ end
1247
+
1248
+ # Get unique elements in series.
1249
+ #
1250
+ # @param maintain_order [Boolean]
1251
+ # Maintain order of data. This requires more work.
1252
+ #
1253
+ # @return [Series]
1254
+ #
1255
+ # @example
1256
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1257
+ # s.unique.sort
1258
+ # # =>
1259
+ # # shape: (3,)
1260
+ # # Series: 'a' [i64]
1261
+ # # [
1262
+ # # 1
1263
+ # # 2
1264
+ # # 3
1265
+ # # ]
1266
+ def unique(maintain_order: false)
1267
+ super
1268
+ end
349
1269
 
1270
+ # Take values by index.
1271
+ #
1272
+ # @param indices [Array]
1273
+ # Index location used for selection.
1274
+ #
1275
+ # @return [Series]
1276
+ #
1277
+ # @example
1278
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
1279
+ # s.take([1, 3])
1280
+ # # =>
1281
+ # # shape: (2,)
1282
+ # # Series: 'a' [i64]
1283
+ # # [
1284
+ # # 2
1285
+ # # 4
1286
+ # # ]
1287
+ def take(indices)
1288
+ to_frame.select(Polars.col(name).take(indices)).to_series
1289
+ end
1290
+
1291
+ # Count the null values in this Series.
1292
+ #
1293
+ # @return [Integer]
350
1294
  def null_count
351
1295
  _s.null_count
352
1296
  end
353
1297
 
1298
+ # Return `true` if the Series has a validity bitmask.
1299
+ #
1300
+ # If there is none, it means that there are no null values.
1301
+ # Use this to swiftly assert a Series does not have null values.
1302
+ #
1303
+ # @return [Boolean]
354
1304
  def has_validity
355
1305
  _s.has_validity
356
1306
  end
357
1307
 
1308
+ # Check if the Series is empty.
1309
+ #
1310
+ # @return [Boolean]
1311
+ #
1312
+ # @example
1313
+ # s = Polars::Series.new("a", [])
1314
+ # s.is_empty
1315
+ # # => true
358
1316
  def is_empty
359
1317
  len == 0
360
1318
  end
361
1319
  alias_method :empty?, :is_empty
362
1320
 
363
- # def is_null
364
- # end
1321
+ # Returns a boolean Series indicating which values are null.
1322
+ #
1323
+ # @return [Series]
1324
+ #
1325
+ # @example
1326
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1327
+ # s.is_null
1328
+ # # =>
1329
+ # # shape: (4,)
1330
+ # # Series: 'a' [bool]
1331
+ # # [
1332
+ # # false
1333
+ # # false
1334
+ # # false
1335
+ # # true
1336
+ # # ]
1337
+ def is_null
1338
+ super
1339
+ end
365
1340
 
366
- # def is_not_null
367
- # end
1341
+ # Returns a boolean Series indicating which values are not null.
1342
+ #
1343
+ # @return [Series]
1344
+ #
1345
+ # @example
1346
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
1347
+ # s.is_not_null
1348
+ # # =>
1349
+ # # shape: (4,)
1350
+ # # Series: 'a' [bool]
1351
+ # # [
1352
+ # # true
1353
+ # # true
1354
+ # # true
1355
+ # # false
1356
+ # # ]
1357
+ def is_not_null
1358
+ super
1359
+ end
368
1360
 
369
- # def is_finite
370
- # end
1361
+ # Returns a boolean Series indicating which values are finite.
1362
+ #
1363
+ # @return [Series]
1364
+ #
1365
+ # @example
1366
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1367
+ # s.is_finite
1368
+ # # =>
1369
+ # # shape: (3,)
1370
+ # # Series: 'a' [bool]
1371
+ # # [
1372
+ # # true
1373
+ # # true
1374
+ # # false
1375
+ # # ]
1376
+ def is_finite
1377
+ super
1378
+ end
371
1379
 
372
- # def is_infinite
373
- # end
1380
+ # Returns a boolean Series indicating which values are infinite.
1381
+ #
1382
+ # @return [Series]
1383
+ #
1384
+ # @example
1385
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
1386
+ # s.is_infinite
1387
+ # # =>
1388
+ # # shape: (3,)
1389
+ # # Series: 'a' [bool]
1390
+ # # [
1391
+ # # false
1392
+ # # false
1393
+ # # true
1394
+ # # ]
1395
+ def is_infinite
1396
+ super
1397
+ end
374
1398
 
375
- # def is_nan
376
- # end
1399
+ # Returns a boolean Series indicating which values are NaN.
1400
+ #
1401
+ # @return [Series]
1402
+ #
1403
+ # @example
1404
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1405
+ # s.is_nan
1406
+ # # =>
1407
+ # # shape: (4,)
1408
+ # # Series: 'a' [bool]
1409
+ # # [
1410
+ # # false
1411
+ # # false
1412
+ # # false
1413
+ # # true
1414
+ # # ]
1415
+ def is_nan
1416
+ super
1417
+ end
377
1418
 
378
- # def is_not_nan
379
- # end
1419
+ # Returns a boolean Series indicating which values are not NaN.
1420
+ #
1421
+ # @return [Series]
1422
+ #
1423
+ # @example
1424
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1425
+ # s.is_not_nan
1426
+ # # =>
1427
+ # # shape: (4,)
1428
+ # # Series: 'a' [bool]
1429
+ # # [
1430
+ # # true
1431
+ # # true
1432
+ # # true
1433
+ # # false
1434
+ # # ]
1435
+ def is_not_nan
1436
+ super
1437
+ end
380
1438
 
381
1439
  # def is_in
382
1440
  # end
383
1441
 
384
- # def arg_true
385
- # end
386
-
387
- # def is_unique
388
- # end
1442
+ # Get index values where Boolean Series evaluate `true`.
1443
+ #
1444
+ # @return [Series]
1445
+ #
1446
+ # @example
1447
+ # s = Polars::Series.new("a", [1, 2, 3])
1448
+ # (s == 2).arg_true
1449
+ # # =>
1450
+ # # shape: (1,)
1451
+ # # Series: 'a' [u32]
1452
+ # # [
1453
+ # # 1
1454
+ # # ]
1455
+ def arg_true
1456
+ Polars.arg_where(self, eager: true)
1457
+ end
1458
+
1459
+ # Get mask of all unique values.
1460
+ #
1461
+ # @return [Series]
1462
+ #
1463
+ # @example
1464
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1465
+ # s.is_unique
1466
+ # # =>
1467
+ # # shape: (4,)
1468
+ # # Series: 'a' [bool]
1469
+ # # [
1470
+ # # true
1471
+ # # false
1472
+ # # false
1473
+ # # true
1474
+ # # ]
1475
+ def is_unique
1476
+ super
1477
+ end
389
1478
 
390
- # def is_first
391
- # end
1479
+ # Get a mask of the first unique value.
1480
+ #
1481
+ # @return [Series]
1482
+ def is_first
1483
+ super
1484
+ end
392
1485
 
393
- # def is_duplicated
394
- # end
1486
+ # Get mask of all duplicated values.
1487
+ #
1488
+ # @return [Series]
1489
+ #
1490
+ # @example
1491
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1492
+ # s.is_duplicated
1493
+ # # =>
1494
+ # # shape: (4,)
1495
+ # # Series: 'a' [bool]
1496
+ # # [
1497
+ # # false
1498
+ # # true
1499
+ # # true
1500
+ # # false
1501
+ # # ]
1502
+ def is_duplicated
1503
+ super
1504
+ end
395
1505
 
396
- # def explode
397
- # end
1506
+ # Explode a list or utf8 Series.
1507
+ #
1508
+ # This means that every item is expanded to a new row.
1509
+ #
1510
+ # @return [Series]
1511
+ #
1512
+ # @example
1513
+ # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
1514
+ # s.explode
1515
+ # # =>
1516
+ # # shape: (6,)
1517
+ # # Series: 'a' [i64]
1518
+ # # [
1519
+ # # 1
1520
+ # # 2
1521
+ # # 3
1522
+ # # 4
1523
+ # # 9
1524
+ # # 10
1525
+ # # ]
1526
+ def explode
1527
+ super
1528
+ end
398
1529
 
1530
+ # Check if series is equal with another Series.
1531
+ #
1532
+ # @param other [Series]
1533
+ # Series to compare with.
1534
+ # @param null_equal [Boolean]
1535
+ # Consider null values as equal.
1536
+ # @param strict [Boolean]
1537
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
1538
+ # `:i64` will return `false`.
1539
+ #
1540
+ # @return [Boolean]
1541
+ #
1542
+ # @example
1543
+ # s = Polars::Series.new("a", [1, 2, 3])
1544
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1545
+ # s.series_equal(s)
1546
+ # # => true
1547
+ # s.series_equal(s2)
1548
+ # # => false
399
1549
  def series_equal(other, null_equal: false, strict: false)
400
1550
  _s.series_equal(other._s, null_equal, strict)
401
1551
  end
402
1552
 
1553
+ # Length of this Series.
1554
+ #
1555
+ # @return [Integer]
1556
+ #
1557
+ # @example
1558
+ # s = Polars::Series.new("a", [1, 2, 3])
1559
+ # s.len
1560
+ # # => 3
403
1561
  def len
404
1562
  _s.len
405
1563
  end
406
-
407
- # def cast
408
- # end
1564
+ alias_method :length, :len
1565
+
1566
+ # Cast between data types.
1567
+ #
1568
+ # @param dtype [Symbol]
1569
+ # DataType to cast to
1570
+ # @param strict [Boolean]
1571
+ # Throw an error if a cast could not be done for instance due to an overflow
1572
+ #
1573
+ # @return [Series]
1574
+ #
1575
+ # @example
1576
+ # s = Polars::Series.new("a", [true, false, true])
1577
+ # s.cast(:u32)
1578
+ # # =>
1579
+ # # shape: (3,)
1580
+ # # Series: 'a' [u32]
1581
+ # # [
1582
+ # # 1
1583
+ # # 0
1584
+ # # 1
1585
+ # # ]
1586
+ def cast(dtype, strict: true)
1587
+ super
1588
+ end
409
1589
 
410
1590
  # def to_physical
411
1591
  # end
412
1592
 
1593
+ # Convert this Series to a Ruby Array. This operation clones data.
1594
+ #
1595
+ # @return [Array]
1596
+ #
1597
+ # @example
1598
+ # s = Polars::Series.new("a", [1, 2, 3])
1599
+ # s.to_a
1600
+ # # => [1, 2, 3]
413
1601
  def to_a
414
1602
  _s.to_a
415
1603
  end
416
1604
 
1605
+ # Create a single chunk of memory for this Series.
1606
+ #
1607
+ # @param in_place [Boolean]
1608
+ # In place or not.
1609
+ #
1610
+ # @return [Series]
417
1611
  def rechunk(in_place: false)
418
1612
  opt_s = _s.rechunk(in_place)
419
1613
  in_place ? self : Utils.wrap_s(opt_s)
420
1614
  end
421
1615
 
422
- # def reverse
423
- # end
1616
+ # Return Series in reverse order.
1617
+ #
1618
+ # @return [Series]
1619
+ #
1620
+ # @example
1621
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
1622
+ # s.reverse
1623
+ # # =>
1624
+ # # shape: (3,)
1625
+ # # Series: 'a' [i8]
1626
+ # # [
1627
+ # # 3
1628
+ # # 2
1629
+ # # 1
1630
+ # # ]
1631
+ def reverse
1632
+ super
1633
+ end
424
1634
 
1635
+ # Check if this Series datatype is numeric.
1636
+ #
1637
+ # @return [Boolean]
1638
+ #
1639
+ # @example
1640
+ # s = Polars::Series.new("a", [1, 2, 3])
1641
+ # s.is_numeric
1642
+ # # => true
425
1643
  def is_numeric
426
1644
  [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
427
1645
  end
428
1646
  alias_method :numeric?, :is_numeric
429
1647
 
430
- # def is_datelike
431
- # end
432
-
1648
+ # Check if this Series datatype is datelike.
1649
+ #
1650
+ # @return [Boolean]
1651
+ #
1652
+ # @example
1653
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
1654
+ # s.is_datelike
1655
+ # # => true
1656
+ def is_datelike
1657
+ [:date, :datetime, :duration, :time].include?(dtype)
1658
+ end
1659
+
1660
+ # Check if this Series has floating point numbers.
1661
+ #
1662
+ # @return [Boolean]
1663
+ #
1664
+ # @example
1665
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
1666
+ # s.is_float
1667
+ # # => true
433
1668
  def is_float
434
1669
  [:f32, :f64].include?(dtype)
435
1670
  end
436
1671
  alias_method :float?, :is_float
437
1672
 
438
- def is_bool
1673
+ # Check if this Series is a Boolean.
1674
+ #
1675
+ # @return [Boolean]
1676
+ #
1677
+ # @example
1678
+ # s = Polars::Series.new("a", [true, false, true])
1679
+ # s.is_boolean
1680
+ # # => true
1681
+ def is_boolean
439
1682
  dtype == :bool
440
1683
  end
441
- alias_method :bool?, :is_bool
442
-
1684
+ alias_method :boolean?, :is_boolean
1685
+ alias_method :is_bool, :is_boolean
1686
+ alias_method :bool?, :is_boolean
1687
+
1688
+ # Check if this Series datatype is a Utf8.
1689
+ #
1690
+ # @return [Boolean]
1691
+ #
1692
+ # @example
1693
+ # s = Polars::Series.new("x", ["a", "b", "c"])
1694
+ # s.is_utf8
1695
+ # # => true
443
1696
  def is_utf8
444
1697
  dtype == :str
445
1698
  end
@@ -454,89 +1707,575 @@ module Polars
454
1707
  # def set
455
1708
  # end
456
1709
 
457
- # def set_at_idx
458
- # end
1710
+ # Set values at the index locations.
1711
+ #
1712
+ # @param idx [Object]
1713
+ # Integers representing the index locations.
1714
+ # @param value [Object]
1715
+ # Replacement values.
1716
+ #
1717
+ # @return [Series]
1718
+ #
1719
+ # @example
1720
+ # s = Polars::Series.new("a", [1, 2, 3])
1721
+ # s.set_at_idx(1, 10)
1722
+ # # =>
1723
+ # # shape: (3,)
1724
+ # # Series: 'a' [i64]
1725
+ # # [
1726
+ # # 1
1727
+ # # 10
1728
+ # # 3
1729
+ # # ]
1730
+ def set_at_idx(idx, value)
1731
+ if idx.is_a?(Integer)
1732
+ idx = [idx]
1733
+ end
1734
+ if idx.length == 0
1735
+ return self
1736
+ end
459
1737
 
460
- # def cleared
461
- # end
1738
+ idx = Series.new("", idx)
1739
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
1740
+ value = Series.new("", [value])
1741
+
1742
+ # if we need to set more than a single value, we extend it
1743
+ if idx.length > 0
1744
+ value = value.extend_constant(value[0], idx.length - 1)
1745
+ end
1746
+ elsif !value.is_a?(Series)
1747
+ value = Series.new("", value)
1748
+ end
1749
+ _s.set_at_idx(idx._s, value._s)
1750
+ self
1751
+ end
1752
+
1753
+ # Create an empty copy of the current Series.
1754
+ #
1755
+ # The copy has identical name/dtype but no data.
1756
+ #
1757
+ # @return [Series]
1758
+ #
1759
+ # @example
1760
+ # s = Polars::Series.new("a", [nil, true, false])
1761
+ # s.cleared
1762
+ # # =>
1763
+ # # shape: (0,)
1764
+ # # Series: 'a' [bool]
1765
+ # # [
1766
+ # # ]
1767
+ def cleared
1768
+ len > 0 ? limit(0) : clone
1769
+ end
462
1770
 
463
1771
  # clone handled by initialize_copy
464
1772
 
465
- # def fill_nan
466
- # end
1773
+ # Fill floating point NaN value with a fill value.
1774
+ #
1775
+ # @param fill_value [Object]
1776
+ # Value used to fill nan values.
1777
+ #
1778
+ # @return [Series]
1779
+ #
1780
+ # @example
1781
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
1782
+ # s.fill_nan(0)
1783
+ # # =>
1784
+ # # shape: (4,)
1785
+ # # Series: 'a' [f64]
1786
+ # # [
1787
+ # # 1.0
1788
+ # # 2.0
1789
+ # # 3.0
1790
+ # # 0.0
1791
+ # # ]
1792
+ def fill_nan(fill_value)
1793
+ super
1794
+ end
467
1795
 
468
- # def fill_null
469
- # end
1796
+ # Fill null values using the specified value or strategy.
1797
+ #
1798
+ # @param value [Object]
1799
+ # Value used to fill null values.
1800
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
1801
+ # Strategy used to fill null values.
1802
+ # @param limit
1803
+ # Number of consecutive null values to fill when using the "forward" or
1804
+ # "backward" strategy.
1805
+ #
1806
+ # @return [Series]
1807
+ #
1808
+ # @example
1809
+ # s = Polars::Series.new("a", [1, 2, 3, nil])
1810
+ # s.fill_null(strategy: "forward")
1811
+ # # =>
1812
+ # # shape: (4,)
1813
+ # # Series: 'a' [i64]
1814
+ # # [
1815
+ # # 1
1816
+ # # 2
1817
+ # # 3
1818
+ # # 3
1819
+ # # ]
1820
+ #
1821
+ # @example
1822
+ # s.fill_null(strategy: "min")
1823
+ # # =>
1824
+ # # shape: (4,)
1825
+ # # Series: 'a' [i64]
1826
+ # # [
1827
+ # # 1
1828
+ # # 2
1829
+ # # 3
1830
+ # # 1
1831
+ # # ]
1832
+ #
1833
+ # @example
1834
+ # s = Polars::Series.new("b", ["x", nil, "z"])
1835
+ # s.fill_null(Polars.lit(""))
1836
+ # # =>
1837
+ # # shape: (3,)
1838
+ # # Series: 'b' [str]
1839
+ # # [
1840
+ # # "x"
1841
+ # # ""
1842
+ # # "z"
1843
+ # # ]
1844
+ def fill_null(value = nil, strategy: nil, limit: nil)
1845
+ super
1846
+ end
470
1847
 
1848
+ # Rounds down to the nearest integer value.
1849
+ #
1850
+ # Only works on floating point Series.
1851
+ #
1852
+ # @return [Series]
1853
+ #
1854
+ # @example
1855
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1856
+ # s.floor
1857
+ # # =>
1858
+ # # shape: (3,)
1859
+ # # Series: 'a' [f64]
1860
+ # # [
1861
+ # # 1.0
1862
+ # # 2.0
1863
+ # # 3.0
1864
+ # # ]
471
1865
  def floor
472
1866
  Utils.wrap_s(_s.floor)
473
1867
  end
474
1868
 
1869
+ # Rounds up to the nearest integer value.
1870
+ #
1871
+ # Only works on floating point Series.
1872
+ #
1873
+ # @return [Series]
1874
+ #
1875
+ # @example
1876
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1877
+ # s.ceil
1878
+ # # =>
1879
+ # # shape: (3,)
1880
+ # # Series: 'a' [f64]
1881
+ # # [
1882
+ # # 2.0
1883
+ # # 3.0
1884
+ # # 4.0
1885
+ # # ]
475
1886
  def ceil
476
- Utils.wrap_s(_s.ceil)
1887
+ super
477
1888
  end
478
1889
 
479
- # default to 0 like Ruby
1890
+ # Round underlying floating point data by `decimals` digits.
1891
+ #
1892
+ # @param decimals [Integer]
1893
+ # number of decimals to round by.
1894
+ #
1895
+ # @return [Series]
1896
+ #
1897
+ # @example
1898
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1899
+ # s.round(2)
1900
+ # # =>
1901
+ # # shape: (3,)
1902
+ # # Series: 'a' [f64]
1903
+ # # [
1904
+ # # 1.12
1905
+ # # 2.57
1906
+ # # 3.9
1907
+ # # ]
480
1908
  def round(decimals = 0)
481
- Utils.wrap_s(_s.round(decimals))
1909
+ super
482
1910
  end
483
1911
 
484
1912
  # def dot
485
1913
  # end
486
1914
 
487
- # def mode
488
- # end
1915
+ # Compute the most occurring value(s).
1916
+ #
1917
+ # Can return multiple Values.
1918
+ #
1919
+ # @return [Series]
1920
+ #
1921
+ # @example
1922
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1923
+ # s.mode
1924
+ # # =>
1925
+ # # shape: (1,)
1926
+ # # Series: 'a' [i64]
1927
+ # # [
1928
+ # # 2
1929
+ # # ]
1930
+ def mode
1931
+ super
1932
+ end
489
1933
 
490
- # def sign
491
- # end
1934
+ # Compute the element-wise indication of the sign.
1935
+ #
1936
+ # @return [Series]
1937
+ #
1938
+ # @example
1939
+ # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
1940
+ # s.sign
1941
+ # # =>
1942
+ # # shape: (5,)
1943
+ # # Series: 'a' [i64]
1944
+ # # [
1945
+ # # -1
1946
+ # # 0
1947
+ # # 0
1948
+ # # 1
1949
+ # # null
1950
+ # # ]
1951
+ def sign
1952
+ super
1953
+ end
492
1954
 
493
- # def sin
494
- # end
1955
+ # Compute the element-wise value for the sine.
1956
+ #
1957
+ # @return [Series]
1958
+ #
1959
+ # @example
1960
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
1961
+ # s.sin
1962
+ # # =>
1963
+ # # shape: (3,)
1964
+ # # Series: 'a' [f64]
1965
+ # # [
1966
+ # # 0.0
1967
+ # # 1.0
1968
+ # # 1.2246e-16
1969
+ # # ]
1970
+ def sin
1971
+ super
1972
+ end
495
1973
 
496
- # def cos
497
- # end
1974
+ # Compute the element-wise value for the cosine.
1975
+ #
1976
+ # @return [Series]
1977
+ #
1978
+ # @example
1979
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
1980
+ # s.cos
1981
+ # # =>
1982
+ # # shape: (3,)
1983
+ # # Series: 'a' [f64]
1984
+ # # [
1985
+ # # 1.0
1986
+ # # 6.1232e-17
1987
+ # # -1.0
1988
+ # # ]
1989
+ def cos
1990
+ super
1991
+ end
498
1992
 
499
- # def tan
500
- # end
1993
+ # Compute the element-wise value for the tangent.
1994
+ #
1995
+ # @return [Series]
1996
+ #
1997
+ # @example
1998
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
1999
+ # s.tan
2000
+ # # =>
2001
+ # # shape: (3,)
2002
+ # # Series: 'a' [f64]
2003
+ # # [
2004
+ # # 0.0
2005
+ # # 1.6331e16
2006
+ # # -1.2246e-16
2007
+ # # ]
2008
+ def tan
2009
+ super
2010
+ end
501
2011
 
502
- # def arcsin
503
- # end
2012
+ # Compute the element-wise value for the inverse sine.
2013
+ #
2014
+ # @return [Series]
2015
+ #
2016
+ # @example
2017
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2018
+ # s.arcsin
2019
+ # # =>
2020
+ # # shape: (3,)
2021
+ # # Series: 'a' [f64]
2022
+ # # [
2023
+ # # 1.570796
2024
+ # # 0.0
2025
+ # # -1.570796
2026
+ # # ]
2027
+ def arcsin
2028
+ super
2029
+ end
504
2030
 
505
- # def arccos
506
- # end
2031
+ # Compute the element-wise value for the inverse cosine.
2032
+ #
2033
+ # @return [Series]
2034
+ #
2035
+ # @example
2036
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2037
+ # s.arccos
2038
+ # # =>
2039
+ # # shape: (3,)
2040
+ # # Series: 'a' [f64]
2041
+ # # [
2042
+ # # 0.0
2043
+ # # 1.570796
2044
+ # # 3.141593
2045
+ # # ]
2046
+ def arccos
2047
+ super
2048
+ end
507
2049
 
508
- # def arctan
509
- # end
2050
+ # Compute the element-wise value for the inverse tangent.
2051
+ #
2052
+ # @return [Series]
2053
+ #
2054
+ # @example
2055
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2056
+ # s.arctan
2057
+ # # =>
2058
+ # # shape: (3,)
2059
+ # # Series: 'a' [f64]
2060
+ # # [
2061
+ # # 0.785398
2062
+ # # 0.0
2063
+ # # -0.785398
2064
+ # # ]
2065
+ def arctan
2066
+ super
2067
+ end
510
2068
 
511
- # def arcsinh
512
- # end
2069
+ # Compute the element-wise value for the inverse hyperbolic sine.
2070
+ #
2071
+ # @return [Series]
2072
+ #
2073
+ # @example
2074
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2075
+ # s.arcsinh
2076
+ # # =>
2077
+ # # shape: (3,)
2078
+ # # Series: 'a' [f64]
2079
+ # # [
2080
+ # # 0.881374
2081
+ # # 0.0
2082
+ # # -0.881374
2083
+ # # ]
2084
+ def arcsinh
2085
+ super
2086
+ end
513
2087
 
514
- # def arccosh
515
- # end
2088
+ # Compute the element-wise value for the inverse hyperbolic cosine.
2089
+ #
2090
+ # @return [Series]
2091
+ #
2092
+ # @example
2093
+ # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
2094
+ # s.arccosh
2095
+ # # =>
2096
+ # # shape: (4,)
2097
+ # # Series: 'a' [f64]
2098
+ # # [
2099
+ # # 2.292432
2100
+ # # 0.0
2101
+ # # NaN
2102
+ # # NaN
2103
+ # # ]
2104
+ def arccosh
2105
+ super
2106
+ end
516
2107
 
517
- # def arctanh
518
- # end
2108
+ # Compute the element-wise value for the inverse hyperbolic tangent.
2109
+ #
2110
+ # @return [Series]
2111
+ #
2112
+ # @example
2113
+ # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
2114
+ # s.arctanh
2115
+ # # =>
2116
+ # # shape: (7,)
2117
+ # # Series: 'a' [f64]
2118
+ # # [
2119
+ # # NaN
2120
+ # # inf
2121
+ # # 0.549306
2122
+ # # 0.0
2123
+ # # -0.549306
2124
+ # # -inf
2125
+ # # NaN
2126
+ # # ]
2127
+ def arctanh
2128
+ super
2129
+ end
519
2130
 
520
- # def sinh
521
- # end
2131
+ # Compute the element-wise value for the hyperbolic sine.
2132
+ #
2133
+ # @return [Series]
2134
+ #
2135
+ # @example
2136
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2137
+ # s.sinh
2138
+ # # =>
2139
+ # # shape: (3,)
2140
+ # # Series: 'a' [f64]
2141
+ # # [
2142
+ # # 1.175201
2143
+ # # 0.0
2144
+ # # -1.175201
2145
+ # # ]
2146
+ def sinh
2147
+ super
2148
+ end
522
2149
 
523
- # def cosh
524
- # end
2150
+ # Compute the element-wise value for the hyperbolic cosine.
2151
+ #
2152
+ # @return [Series]
2153
+ #
2154
+ # @example
2155
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2156
+ # s.cosh
2157
+ # # =>
2158
+ # # shape: (3,)
2159
+ # # Series: 'a' [f64]
2160
+ # # [
2161
+ # # 1.543081
2162
+ # # 1.0
2163
+ # # 1.543081
2164
+ # # ]
2165
+ def cosh
2166
+ super
2167
+ end
525
2168
 
526
- # def tanh
527
- # end
2169
+ # Compute the element-wise value for the hyperbolic tangent.
2170
+ #
2171
+ # @return [Series]
2172
+ #
2173
+ # @example
2174
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
2175
+ # s.tanh
2176
+ # # =>
2177
+ # # shape: (3,)
2178
+ # # Series: 'a' [f64]
2179
+ # # [
2180
+ # # 0.761594
2181
+ # # 0.0
2182
+ # # -0.761594
2183
+ # # ]
2184
+ def tanh
2185
+ super
2186
+ end
528
2187
 
529
2188
  # def apply
530
2189
  # end
531
2190
 
532
- # def shift
533
- # end
2191
+ # Shift the values by a given period.
2192
+ #
2193
+ # @param periods [Integer]
2194
+ # Number of places to shift (may be negative).
2195
+ #
2196
+ # @return [Series]
2197
+ #
2198
+ # @example
2199
+ # s = Polars::Series.new("a", [1, 2, 3])
2200
+ # s.shift(1)
2201
+ # # =>
2202
+ # # shape: (3,)
2203
+ # # Series: 'a' [i64]
2204
+ # # [
2205
+ # # null
2206
+ # # 1
2207
+ # # 2
2208
+ # # ]
2209
+ #
2210
+ # @example
2211
+ # s.shift(-1)
2212
+ # # =>
2213
+ # # shape: (3,)
2214
+ # # Series: 'a' [i64]
2215
+ # # [
2216
+ # # 2
2217
+ # # 3
2218
+ # # null
2219
+ # # ]
2220
+ def shift(periods = 1)
2221
+ super
2222
+ end
534
2223
 
535
- # def shift_and_fill
536
- # end
2224
+ # Shift the values by a given period and fill the resulting null values.
2225
+ #
2226
+ # @param periods [Integer]
2227
+ # Number of places to shift (may be negative).
2228
+ # @param fill_value [Object]
2229
+ # Fill None values with the result of this expression.
2230
+ #
2231
+ # @return [Series]
2232
+ def shift_and_fill(periods, fill_value)
2233
+ super
2234
+ end
537
2235
 
538
- # def zip_with
539
- # end
2236
+ # Take values from self or other based on the given mask.
2237
+ #
2238
+ # Where mask evaluates true, take values from self. Where mask evaluates false,
2239
+ # take values from other.
2240
+ #
2241
+ # @param mask [Series]
2242
+ # Boolean Series.
2243
+ # @param other [Series]
2244
+ # Series of same type.
2245
+ #
2246
+ # @return [Series]
2247
+ #
2248
+ # @example
2249
+ # s1 = Polars::Series.new([1, 2, 3, 4, 5])
2250
+ # s2 = Polars::Series.new([5, 4, 3, 2, 1])
2251
+ # s1.zip_with(s1 < s2, s2)
2252
+ # # =>
2253
+ # # shape: (5,)
2254
+ # # Series: '' [i64]
2255
+ # # [
2256
+ # # 1
2257
+ # # 2
2258
+ # # 3
2259
+ # # 2
2260
+ # # 1
2261
+ # # ]
2262
+ #
2263
+ # @example
2264
+ # mask = Polars::Series.new([true, false, true, false, true])
2265
+ # s1.zip_with(mask, s2)
2266
+ # # =>
2267
+ # # shape: (5,)
2268
+ # # Series: '' [i64]
2269
+ # # [
2270
+ # # 1
2271
+ # # 4
2272
+ # # 3
2273
+ # # 2
2274
+ # # 5
2275
+ # # ]
2276
+ def zip_with(mask, other)
2277
+ Utils.wrap_s(_s.zip_with(mask._s, other._s))
2278
+ end
540
2279
 
541
2280
  # def rolling_min
542
2281
  # end
@@ -571,62 +2310,266 @@ module Polars
571
2310
  # def sample
572
2311
  # end
573
2312
 
2313
+ # Get a boolean mask of the local maximum peaks.
2314
+ #
2315
+ # @return [Series]
2316
+ #
2317
+ # @example
2318
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
2319
+ # s.peak_max
2320
+ # # =>
2321
+ # # shape: (5,)
2322
+ # # Series: '' [bool]
2323
+ # # [
2324
+ # # false
2325
+ # # false
2326
+ # # false
2327
+ # # false
2328
+ # # true
2329
+ # # ]
574
2330
  def peak_max
575
2331
  Utils.wrap_s(_s.peak_max)
576
2332
  end
577
2333
 
2334
+ # Get a boolean mask of the local minimum peaks.
2335
+ #
2336
+ # @return [Series]
2337
+ #
2338
+ # @example
2339
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
2340
+ # s.peak_min
2341
+ # # =>
2342
+ # # shape: (5,)
2343
+ # # Series: '' [bool]
2344
+ # # [
2345
+ # # false
2346
+ # # true
2347
+ # # false
2348
+ # # true
2349
+ # # false
2350
+ # # ]
578
2351
  def peak_min
579
2352
  Utils.wrap_s(_s.peak_min)
580
2353
  end
581
2354
 
2355
+ # Count the number of unique values in this Series.
2356
+ #
2357
+ # @return [Integer]
2358
+ #
2359
+ # @example
2360
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
2361
+ # s.n_unique
2362
+ # # => 3
582
2363
  def n_unique
583
2364
  _s.n_unique
584
2365
  end
585
2366
 
586
- # def shrink_to_fit
587
- # end
2367
+ # Shrink Series memory usage.
2368
+ #
2369
+ # Shrinks the underlying array capacity to exactly fit the actual data.
2370
+ # (Note that this function does not change the Series data type).
2371
+ #
2372
+ # @return [Series]
2373
+ def shrink_to_fit(in_place: false)
2374
+ if in_place
2375
+ _s.shrink_to_fit
2376
+ self
2377
+ else
2378
+ series = clone
2379
+ series._s.shrink_to_fit
2380
+ series
2381
+ end
2382
+ end
588
2383
 
589
2384
  # def _hash
590
2385
  # end
591
2386
 
592
- # def reinterpret
593
- # end
2387
+ # Reinterpret the underlying bits as a signed/unsigned integer.
2388
+ #
2389
+ # This operation is only allowed for 64bit integers. For lower bits integers,
2390
+ # you can safely use that cast operation.
2391
+ #
2392
+ # @param signed [Boolean]
2393
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
2394
+ #
2395
+ # @return [Series]
2396
+ def reinterpret(signed: true)
2397
+ super
2398
+ end
594
2399
 
595
- # def interpolate
596
- # end
2400
+ # Interpolate intermediate values. The interpolation method is linear.
2401
+ #
2402
+ # @return [Series]
2403
+ #
2404
+ # @example
2405
+ # s = Polars::Series.new("a", [1, 2, nil, nil, 5])
2406
+ # s.interpolate
2407
+ # # =>
2408
+ # # shape: (5,)
2409
+ # # Series: 'a' [i64]
2410
+ # # [
2411
+ # # 1
2412
+ # # 2
2413
+ # # 3
2414
+ # # 4
2415
+ # # 5
2416
+ # # ]
2417
+ def interpolate
2418
+ super
2419
+ end
597
2420
 
598
- # def abs
599
- # end
2421
+ # Compute absolute values.
2422
+ #
2423
+ # @return [Series]
2424
+ def abs
2425
+ super
2426
+ end
600
2427
 
601
2428
  # def rank
602
2429
  # end
603
2430
 
604
- # def diff
605
- # end
2431
+ # Calculate the n-th discrete difference.
2432
+ #
2433
+ # @param n [Integer]
2434
+ # Number of slots to shift.
2435
+ # @param null_behavior ["ignore", "drop"]
2436
+ # How to handle null values.
2437
+ #
2438
+ # @return [Series]
2439
+ def diff(n: 1, null_behavior: "ignore")
2440
+ super
2441
+ end
606
2442
 
607
2443
  # def pct_change
608
2444
  # end
609
2445
 
610
- # def skew
611
- # end
612
-
613
- # def kurtosis
614
- # end
615
-
616
- # def clip
617
- # end
2446
+ # Compute the sample skewness of a data set.
2447
+ #
2448
+ # For normally distributed data, the skewness should be about zero. For
2449
+ # unimodal continuous distributions, a skewness value greater than zero means
2450
+ # that there is more weight in the right tail of the distribution. The
2451
+ # function `skewtest` can be used to determine if the skewness value
2452
+ # is close enough to zero, statistically speaking.
2453
+ #
2454
+ # @param bias [Boolean]
2455
+ # If `false`, the calculations are corrected for statistical bias.
2456
+ #
2457
+ # @return [Float, nil]
2458
+ def skew(bias: true)
2459
+ _s.skew(bias)
2460
+ end
2461
+
2462
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
2463
+ #
2464
+ # Kurtosis is the fourth central moment divided by the square of the
2465
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
2466
+ # the result to give 0.0 for a normal distribution.
2467
+ # If bias is false, then the kurtosis is calculated using k statistics to
2468
+ # eliminate bias coming from biased moment estimators
2469
+ #
2470
+ # @param fisher [Boolean]
2471
+ # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
2472
+ # Pearson's definition is used (normal ==> 3.0).
2473
+ # @param bias [Boolean]
2474
+ # If `false`, the calculations are corrected for statistical bias.
2475
+ #
2476
+ # @return [Float, nil]
2477
+ def kurtosis(fisher: true, bias: true)
2478
+ _s.kurtosis(fisher, bias)
2479
+ end
2480
+
2481
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
2482
+ #
2483
+ # Only works for numerical types.
2484
+ #
2485
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
2486
+ # expression. See {#when} for more information.
2487
+ #
2488
+ # @param min_val [Numeric]
2489
+ # Minimum value.
2490
+ # @param max_val [Numeric]
2491
+ # Maximum value.
2492
+ #
2493
+ # @return [Series]
2494
+ #
2495
+ # @example
2496
+ # s = Polars::Series.new("foo", [-50, 5, nil, 50])
2497
+ # s.clip(1, 10)
2498
+ # # =>
2499
+ # # shape: (4,)
2500
+ # # Series: 'foo' [i64]
2501
+ # # [
2502
+ # # 1
2503
+ # # 5
2504
+ # # null
2505
+ # # 10
2506
+ # # ]
2507
+ def clip(min_val, max_val)
2508
+ super
2509
+ end
618
2510
 
619
- # def clip_min
620
- # end
2511
+ # Clip (limit) the values in an array to a `min` boundary.
2512
+ #
2513
+ # Only works for numerical types.
2514
+ #
2515
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
2516
+ # expression. See {#when} for more information.
2517
+ #
2518
+ # @param min_val [Numeric]
2519
+ # Minimum value.
2520
+ #
2521
+ # @return [Series]
2522
+ def clip_min(min_val)
2523
+ super
2524
+ end
621
2525
 
622
- # def clip_max
623
- # end
2526
+ # Clip (limit) the values in an array to a `max` boundary.
2527
+ #
2528
+ # Only works for numerical types.
2529
+ #
2530
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
2531
+ # expression. See {#when} for more information.
2532
+ #
2533
+ # @param max_val [Numeric]
2534
+ # Maximum value.
2535
+ #
2536
+ # @return [Series]
2537
+ def clip_max(max_val)
2538
+ super
2539
+ end
624
2540
 
625
- # def reshape
626
- # end
2541
+ # Reshape this Series to a flat Series or a Series of Lists.
2542
+ #
2543
+ # @param dims [Array]
2544
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
2545
+ # dimension is inferred.
2546
+ #
2547
+ # @return [Series]
2548
+ def reshape(dims)
2549
+ super
2550
+ end
627
2551
 
628
- # def shuffle
629
- # end
2552
+ # Shuffle the contents of this Series.
2553
+ #
2554
+ # @param seed [Integer, nil]
2555
+ # Seed for the random number generator.
2556
+ #
2557
+ # @return [Series]
2558
+ #
2559
+ # @example
2560
+ # s = Polars::Series.new("a", [1, 2, 3])
2561
+ # s.shuffle(seed: 1)
2562
+ # # =>
2563
+ # # shape: (3,)
2564
+ # # Series: 'a' [i64]
2565
+ # # [
2566
+ # # 2
2567
+ # # 1
2568
+ # # 3
2569
+ # # ]
2570
+ def shuffle(seed: nil)
2571
+ super
2572
+ end
630
2573
 
631
2574
  # def ewm_mean
632
2575
  # end
@@ -637,18 +2580,70 @@ module Polars
637
2580
  # def ewm_var
638
2581
  # end
639
2582
 
640
- # def extend_constant
641
- # end
2583
+ # Extend the Series with given number of values.
2584
+ #
2585
+ # @param value [Object]
2586
+ # The value to extend the Series with. This value may be `nil` to fill with
2587
+ # nulls.
2588
+ # @param n [Integer]
2589
+ # The number of values to extend.
2590
+ #
2591
+ # @return [Series]
2592
+ #
2593
+ # @example
2594
+ # s = Polars::Series.new("a", [1, 2, 3])
2595
+ # s.extend_constant(99, 2)
2596
+ # # =>
2597
+ # # shape: (5,)
2598
+ # # Series: 'a' [i64]
2599
+ # # [
2600
+ # # 1
2601
+ # # 2
2602
+ # # 3
2603
+ # # 99
2604
+ # # 99
2605
+ # # ]
2606
+ def extend_constant(value, n)
2607
+ super
2608
+ end
642
2609
 
2610
+ # Flags the Series as sorted.
2611
+ #
2612
+ # Enables downstream code to user fast paths for sorted arrays.
2613
+ #
2614
+ # @param reverse [Boolean]
2615
+ # If the Series order is reversed, e.g. descending.
2616
+ #
2617
+ # @return [Series]
2618
+ #
2619
+ # @note
2620
+ # This can lead to incorrect results if this Series is not sorted!!
2621
+ # Use with care!
2622
+ #
2623
+ # @example
2624
+ # s = Polars::Series.new("a", [1, 2, 3])
2625
+ # s.set_sorted.max
2626
+ # # => 3
643
2627
  def set_sorted(reverse: false)
644
2628
  Utils.wrap_s(_s.set_sorted(reverse))
645
2629
  end
646
2630
 
647
- # def new_from_index
648
- # end
2631
+ # Create a new Series filled with values from the given index.
2632
+ #
2633
+ # @return [Series]
2634
+ def new_from_index(index, length)
2635
+ Utils.wrap_s(_s.new_from_index(index, length))
2636
+ end
649
2637
 
650
- # def shrink_dtype
651
- # end
2638
+ # Shrink numeric columns to the minimal required datatype.
2639
+ #
2640
+ # Shrink to the dtype needed to fit the extrema of this Series.
2641
+ # This can be used to reduce memory pressure.
2642
+ #
2643
+ # @return [Series]
2644
+ def shrink_dtype
2645
+ super
2646
+ end
652
2647
 
653
2648
  # def arr
654
2649
  # end
@@ -672,8 +2667,47 @@ module Polars
672
2667
  self._s = _s._clone
673
2668
  end
674
2669
 
2670
+ def coerce(other)
2671
+ if other.is_a?(Numeric)
2672
+ # TODO improve
2673
+ series = to_frame.select(Polars.lit(other)).to_series
2674
+ [series, self]
2675
+ else
2676
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
2677
+ end
2678
+ end
2679
+
2680
+ def _comp(other, op)
2681
+ if other.is_a?(Series)
2682
+ return Utils.wrap_s(_s.send(op, other._s))
2683
+ end
2684
+
2685
+ if dtype == :str
2686
+ raise Todo
2687
+ end
2688
+ Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
2689
+ end
2690
+
2691
+ def _arithmetic(other, op)
2692
+ if other.is_a?(Expr)
2693
+ other = to_frame.select(other).to_series
2694
+ end
2695
+ if other.is_a?(Series)
2696
+ return Utils.wrap_s(_s.send(op, other._s))
2697
+ end
2698
+
2699
+ raise Todo
2700
+ end
2701
+
2702
+ def series_to_rbseries(name, values)
2703
+ # should not be in-place?
2704
+ values.rename(name, in_place: true)
2705
+ values._s
2706
+ end
2707
+
675
2708
  def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
676
2709
  ruby_dtype = nil
2710
+ nested_dtype = nil
677
2711
 
678
2712
  if (values.nil? || values.empty?) && dtype.nil?
679
2713
  if dtype_if_empty
@@ -682,7 +2716,7 @@ module Polars
682
2716
  dtype = dtype_if_empty
683
2717
  else
684
2718
  # default to Float32 type
685
- dtype = "f32"
2719
+ dtype = :f32
686
2720
  end
687
2721
  end
688
2722
 
@@ -691,8 +2725,7 @@ module Polars
691
2725
  rb_temporal_types << DateTime if defined?(DateTime)
692
2726
  rb_temporal_types << Time if defined?(Time)
693
2727
 
694
- # _get_first_non_none
695
- value = values.find { |v| !v.nil? }
2728
+ value = _get_first_non_none(values)
696
2729
 
697
2730
  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
698
2731
  constructor = polars_type_to_constructor(dtype)
@@ -716,7 +2749,22 @@ module Polars
716
2749
  # dtype = rb_type_to_dtype(dtype)
717
2750
  # end
718
2751
 
719
- raise "todo"
2752
+ if ruby_dtype == Date
2753
+ RbSeries.new_opt_date(name, values, strict)
2754
+ else
2755
+ raise Todo
2756
+ end
2757
+ elsif ruby_dtype == Array
2758
+ if nested_dtype.nil?
2759
+ nested_value = _get_first_non_none(value)
2760
+ nested_dtype = nested_value.nil? ? Float : nested_value.class
2761
+ end
2762
+
2763
+ if nested_dtype == Array
2764
+ raise Todo
2765
+ end
2766
+
2767
+ raise Todo
720
2768
  else
721
2769
  constructor = rb_type_to_constructor(value.class)
722
2770
  constructor.call(name, values, strict)
@@ -759,5 +2807,9 @@ module Polars
759
2807
  # RbSeries.method(:new_object)
760
2808
  raise ArgumentError, "Cannot determine type"
761
2809
  end
2810
+
2811
+ def _get_first_non_none(values)
2812
+ values.find { |v| !v.nil? }
2813
+ end
762
2814
  end
763
2815
  end