polars-df 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/series.rb CHANGED
@@ -1,7 +1,38 @@
1
1
  module Polars
2
+ # A Series represents a single column in a polars DataFrame.
2
3
  class Series
4
+ # @private
3
5
  attr_accessor :_s
4
6
 
7
+ # Create a new Series.
8
+ #
9
+ # @param name [String, Array, nil]
10
+ # Name of the series. Will be used as a column name when used in a DataFrame.
11
+ # When not specified, name is set to an empty string.
12
+ # @param values [Array, nil]
13
+ # One-dimensional data in various forms. Supported are: Array and Series.
14
+ # @param dtype [Symbol, nil]
15
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
16
+ # @param strict [Boolean]
17
+ # Throw error on numeric overflow.
18
+ # @param nan_to_null [Boolean]
19
+ # Not used.
20
+ # @param dtype_if_empty [Symbol, nil]
21
+ # If no dtype is specified and values contains `nil` or an empty array,
22
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
23
+ #
24
+ # @example Constructing a Series by specifying name and values positionally:
25
+ # s = Polars::Series.new("a", [1, 2, 3])
26
+ #
27
+ # @example Notice that the dtype is automatically inferred as a polars Int64:
28
+ # s.dtype
29
+ # # => :i64
30
+ #
31
+ # @example Constructing a Series with a specific dtype:
32
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
33
+ #
34
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
35
+ # s3 = Polars::Series.new([1, 2, 3])
5
36
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
6
37
  # Handle case where values are passed as the first argument
7
38
  if !name.nil? && !name.is_a?(String)
@@ -35,16 +66,23 @@ module Polars
35
66
  end
36
67
  end
37
68
 
69
+ # @private
38
70
  def self._from_rbseries(s)
39
71
  series = Series.allocate
40
72
  series._s = s
41
73
  series
42
74
  end
43
75
 
76
+ # Get the data type of this Series.
77
+ #
78
+ # @return [Symbol]
44
79
  def dtype
45
- _s.dtype.to_sym
80
+ _s.dtype
46
81
  end
47
82
 
83
+ # Get flags that are set on the Series.
84
+ #
85
+ # @return [Hash]
48
86
  def flags
49
87
  {
50
88
  "SORTED_ASC" => _s.is_sorted_flag,
@@ -52,34 +90,59 @@ module Polars
52
90
  }
53
91
  end
54
92
 
93
+ # Get the inner dtype in of a List typed Series.
94
+ #
95
+ # @return [Symbol]
55
96
  def inner_dtype
56
- _s.inner_dtype&.to_sym
97
+ _s.inner_dtype
57
98
  end
58
99
 
100
+ # Get the name of this Series.
101
+ #
102
+ # @return [String]
59
103
  def name
60
104
  _s.name
61
105
  end
62
106
 
107
+ # Shape of this Series.
108
+ #
109
+ # @return [Array]
63
110
  def shape
64
111
  [_s.len]
65
112
  end
66
113
 
67
- # def time_unit
68
- # end
114
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
115
+ #
116
+ # @return [String]
117
+ def time_unit
118
+ _s.time_unit
119
+ end
69
120
 
121
+ # Returns a string representing the Series.
122
+ #
123
+ # @return [String]
70
124
  def to_s
71
125
  _s.to_s
72
126
  end
73
127
  alias_method :inspect, :to_s
74
128
 
129
+ # Bitwise AND.
130
+ #
131
+ # @return [Series]
75
132
  def &(other)
76
133
  Utils.wrap_s(_s.bitand(other._s))
77
134
  end
78
135
 
136
+ # Bitwise OR.
137
+ #
138
+ # @return [Series]
79
139
  def |(other)
80
140
  Utils.wrap_s(_s.bitor(other._s))
81
141
  end
82
142
 
143
+ # Bitwise XOR.
144
+ #
145
+ # @return [Series]
83
146
  def ^(other)
84
147
  Utils.wrap_s(_s.bitxor(other._s))
85
148
  end
@@ -102,32 +165,50 @@ module Polars
102
165
  # def <=(other)
103
166
  # end
104
167
 
168
+ # Performs addition.
169
+ #
170
+ # @return [Series]
105
171
  def +(other)
106
172
  Utils. wrap_s(_s.add(other._s))
107
173
  end
108
174
 
175
+ # Performs subtraction.
176
+ #
177
+ # @return [Series]
109
178
  def -(other)
110
179
  Utils.wrap_s(_s.sub(other._s))
111
180
  end
112
181
 
182
+ # Performs multiplication.
183
+ #
184
+ # @return [Series]
113
185
  def *(other)
114
186
  Utils.wrap_s(_s.mul(other._s))
115
187
  end
116
188
 
189
+ # Performs division.
190
+ #
191
+ # @return [Series]
117
192
  def /(other)
118
193
  Utils.wrap_s(_s.div(other._s))
119
194
  end
120
195
 
196
+ # Raises to the power of exponent.
197
+ #
198
+ # @return [Series]
121
199
  def **(power)
122
- # if is_datelike
123
- # raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
124
- # end
200
+ if is_datelike
201
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
202
+ end
125
203
  to_frame.select(Polars.col(name).pow(power)).to_series
126
204
  end
127
205
 
128
206
  # def -@(other)
129
207
  # end
130
208
 
209
+ # Returns elements of the Series.
210
+ #
211
+ # @return [Object]
131
212
  def [](item)
132
213
  _s.get_idx(item)
133
214
  end
@@ -135,19 +216,54 @@ module Polars
135
216
  # def []=(key, value)
136
217
  # end
137
218
 
219
+ # Return an estimation of the total (heap) allocated size of the Series.
220
+ #
221
+ # Estimated size is given in the specified unit (bytes by default).
222
+ #
223
+ # This estimation is the sum of the size of its buffers, validity, including
224
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
225
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
226
+ # particular, StructArray's size is an upper bound.
227
+ #
228
+ # When an array is sliced, its allocated size remains constant because the buffer
229
+ # unchanged. However, this function will yield a smaller number. This is because
230
+ # this function returns the visible size of the buffer, not its total capacity.
231
+ #
232
+ # FFI buffers are included in this estimation.
233
+ #
234
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
235
+ # Scale the returned size to the given unit.
236
+ #
237
+ # @return [Numeric]
238
+ #
239
+ # @example
240
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
241
+ # s.estimated_size
242
+ # # => 4000000
243
+ # s.estimated_size("mb")
244
+ # # => 3.814697265625
138
245
  def estimated_size(unit = "b")
139
246
  sz = _s.estimated_size
140
247
  Utils.scale_bytes(sz, to: unit)
141
248
  end
142
249
 
250
+ # Compute the square root of the elements.
251
+ #
252
+ # @return [Series]
143
253
  def sqrt
144
- self ** 0.5
254
+ self**0.5
145
255
  end
146
256
 
257
+ # Check if any boolean value in the column is `true`.
258
+ #
259
+ # @return [Boolean]
147
260
  def any
148
261
  to_frame.select(Polars.col(name).any).to_series[0]
149
262
  end
150
263
 
264
+ # Check if all boolean values in the column are `true`.
265
+ #
266
+ # @return [Boolean]
151
267
  def all
152
268
  to_frame.select(Polars.col(name).all).to_series[0]
153
269
  end
@@ -167,6 +283,9 @@ module Polars
167
283
  # def drop_nans
168
284
  # end
169
285
 
286
+ # Cast this Series to a DataFrame.
287
+ #
288
+ # @return [DataFrame]
170
289
  def to_frame
171
290
  Utils.wrap_df(RbDataFrame.new([_s]))
172
291
  end
@@ -174,22 +293,61 @@ module Polars
174
293
  # def describe
175
294
  # end
176
295
 
296
+ # Reduce this Series to the sum value.
297
+ #
298
+ # @return [Numeric]
299
+ #
300
+ # @note
301
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
302
+ # `:i64` before summing to prevent overflow issues.
303
+ #
304
+ # @example
305
+ # s = Polars::Series.new("a", [1, 2, 3])
306
+ # s.sum
307
+ # # => 6
177
308
  def sum
178
309
  _s.sum
179
310
  end
180
311
 
312
+ # Reduce this Series to the mean value.
313
+ #
314
+ # @return [Float, nil]
315
+ #
316
+ # @example
317
+ # s = Polars::Series.new("a", [1, 2, 3])
318
+ # s.mean
319
+ # # => 2.0
181
320
  def mean
182
321
  _s.mean
183
322
  end
184
323
 
324
+ # Reduce this Series to the product value.
325
+ #
326
+ # @return [Numeric]
185
327
  def product
186
328
  to_frame.select(Polars.col(name).product).to_series[0]
187
329
  end
188
330
 
331
+ # Get the minimal value in this Series.
332
+ #
333
+ # @return [Object]
334
+ #
335
+ # @example
336
+ # s = Polars::Series.new("a", [1, 2, 3])
337
+ # s.min
338
+ # # => 1
189
339
  def min
190
340
  _s.min
191
341
  end
192
342
 
343
+ # Get the maximum value in this Series.
344
+ #
345
+ # @return [Object]
346
+ #
347
+ # @example
348
+ # s = Polars::Series.new("a", [1, 2, 3])
349
+ # s.max
350
+ # # => 3
193
351
  def max
194
352
  _s.max
195
353
  end
@@ -200,6 +358,18 @@ module Polars
200
358
  # def nan_min
201
359
  # end
202
360
 
361
+ # Get the standard deviation of this Series.
362
+ #
363
+ # @param ddof [Integer]
364
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
365
+ # where N represents the number of elements.
366
+ #
367
+ # @return [Float, nil]
368
+ #
369
+ # @example
370
+ # s = Polars::Series.new("a", [1, 2, 3])
371
+ # s.std
372
+ # # => 1.0
203
373
  def std(ddof: 1)
204
374
  if !is_numeric
205
375
  nil
@@ -208,6 +378,18 @@ module Polars
208
378
  end
209
379
  end
210
380
 
381
+ # Get variance of this Series.
382
+ #
383
+ # @param ddof [Integer]
384
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
385
+ # where N represents the number of elements.
386
+ #
387
+ # @return [Float, nil]
388
+ #
389
+ # @example
390
+ # s = Polars::Series.new("a", [1, 2, 3])
391
+ # s.var
392
+ # # => 1.0
211
393
  def var(ddof: 1)
212
394
  if !is_numeric
213
395
  nil
@@ -216,18 +398,82 @@ module Polars
216
398
  end
217
399
  end
218
400
 
401
+ # Get the median of this Series.
402
+ #
403
+ # @return [Float, nil]
404
+ #
405
+ # @example
406
+ # s = Polars::Series.new("a", [1, 2, 3])
407
+ # s.median
408
+ # # => 2.0
219
409
  def median
220
410
  _s.median
221
411
  end
222
412
 
413
+ # Get the quantile value of this Series.
414
+ #
415
+ # @param quantile [Float, nil]
416
+ # Quantile between 0.0 and 1.0.
417
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
418
+ # Interpolation method.
419
+ #
420
+ # @return [Float, nil]
421
+ #
422
+ # @example
423
+ # s = Polars::Series.new("a", [1, 2, 3])
424
+ # s.quantile(0.5)
425
+ # # => 2.0
223
426
  def quantile(quantile, interpolation: "nearest")
224
427
  _s.quantile(quantile, interpolation)
225
428
  end
226
429
 
430
+ # Get dummy variables.
431
+ #
432
+ # @return [DataFrame]
433
+ #
434
+ # @example
435
+ # s = Polars::Series.new("a", [1, 2, 3])
436
+ # s.to_dummies
437
+ # # =>
438
+ # # shape: (3, 3)
439
+ # # ┌─────┬─────┬─────┐
440
+ # # │ a_1 ┆ a_2 ┆ a_3 │
441
+ # # │ --- ┆ --- ┆ --- │
442
+ # # │ u8 ┆ u8 ┆ u8 │
443
+ # # ╞═════╪═════╪═════╡
444
+ # # │ 1 ┆ 0 ┆ 0 │
445
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
446
+ # # │ 0 ┆ 1 ┆ 0 │
447
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
448
+ # # │ 0 ┆ 0 ┆ 1 │
449
+ # # └─────┴─────┴─────┘
227
450
  def to_dummies
228
451
  Utils.wrap_df(_s.to_dummies)
229
452
  end
230
453
 
454
+ # Count the unique values in a Series.
455
+ #
456
+ # @param sort [Boolean]
457
+ # Ensure the output is sorted from most values to least.
458
+ #
459
+ # @return [DataFrame]
460
+ #
461
+ # @example
462
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
463
+ # s.value_counts.sort("a")
464
+ # # =>
465
+ # # shape: (3, 2)
466
+ # # ┌─────┬────────┐
467
+ # # │ a ┆ counts │
468
+ # # │ --- ┆ --- │
469
+ # # │ i64 ┆ u32 │
470
+ # # ╞═════╪════════╡
471
+ # # │ 1 ┆ 1 │
472
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
473
+ # # │ 2 ┆ 2 │
474
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
475
+ # # │ 3 ┆ 1 │
476
+ # # └─────┴────────┘
231
477
  def value_counts(sort: false)
232
478
  Utils.wrap_df(_s.value_counts(sort))
233
479
  end
@@ -241,12 +487,34 @@ module Polars
241
487
  # def cumulative_eval
242
488
  # end
243
489
 
490
+ # Return a copy of the Series with a new alias/name.
491
+ #
492
+ # @param name [String]
493
+ # New name.
494
+ #
495
+ # @return [Series]
496
+ #
497
+ # @example
498
+ # s = Polars::Series.new("x", [1, 2, 3])
499
+ # s.alias("y")
244
500
  def alias(name)
245
501
  s = dup
246
502
  s._s.rename(name)
247
503
  s
248
504
  end
249
505
 
506
+ # Rename this Series.
507
+ #
508
+ # @param name [String]
509
+ # New name.
510
+ # @param in_place [Boolean]
511
+ # Modify the Series in-place.
512
+ #
513
+ # @return [Series]
514
+ #
515
+ # @example
516
+ # s = Polars::Series.new("a", [1, 2, 3])
517
+ # s.rename("b")
250
518
  def rename(name, in_place: false)
251
519
  if in_place
252
520
  _s.rename(name)
@@ -256,52 +524,313 @@ module Polars
256
524
  end
257
525
  end
258
526
 
527
+ # Get the length of each individual chunk.
528
+ #
529
+ # @return [Array]
530
+ #
531
+ # @example
532
+ # s = Polars::Series.new("a", [1, 2, 3])
533
+ # s2 = Polars::Series.new("b", [4, 5, 6])
534
+ #
535
+ # @example Concatenate Series with rechunk: true
536
+ # Polars.concat([s, s2]).chunk_lengths
537
+ # # => [6]
538
+ #
539
+ # @example Concatenate Series with rechunk: false
540
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
541
+ # # => [3, 3]
259
542
  def chunk_lengths
260
543
  _s.chunk_lengths
261
544
  end
262
545
 
546
+ # Get the number of chunks that this Series contains.
547
+ #
548
+ # @return [Integer]
549
+ #
550
+ # @example
551
+ # s = Polars::Series.new("a", [1, 2, 3])
552
+ # s2 = Polars::Series.new("b", [4, 5, 6])
553
+ #
554
+ # @example Concatenate Series with rechunk: true
555
+ # Polars.concat([s, s2]).n_chunks
556
+ # # => 1
557
+ #
558
+ # @example Concatenate Series with rechunk: false
559
+ # Polars.concat([s, s2], rechunk: false).n_chunks
560
+ # # => 2
263
561
  def n_chunks
264
562
  _s.n_chunks
265
563
  end
266
564
 
565
+ # Get an array with the cumulative sum computed at every element.
566
+ #
567
+ # @param reverse [Boolean]
568
+ # reverse the operation.
569
+ #
570
+ # @return [Series]
571
+ #
572
+ # @note
573
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
574
+ # `:i64` before summing to prevent overflow issues.
575
+ #
576
+ # @example
577
+ # s = Polars::Series.new("a", [1, 2, 3])
578
+ # s.cumsum
579
+ # # =>
580
+ # # shape: (3,)
581
+ # # Series: 'a' [i64]
582
+ # # [
583
+ # # 1
584
+ # # 3
585
+ # # 6
586
+ # # ]
267
587
  def cumsum(reverse: false)
268
588
  Utils.wrap_s(_s.cumsum(reverse))
269
589
  end
270
590
 
591
+ # Get an array with the cumulative min computed at every element.
592
+ #
593
+ # @param reverse [Boolean]
594
+ # reverse the operation.
595
+ #
596
+ # @return [Series]
597
+ #
598
+ # @example
599
+ # s = Polars::Series.new("a", [3, 5, 1])
600
+ # s.cummin
601
+ # # =>
602
+ # # shape: (3,)
603
+ # # Series: 'a' [i64]
604
+ # # [
605
+ # # 3
606
+ # # 3
607
+ # # 1
608
+ # # ]
271
609
  def cummin(reverse: false)
272
610
  Utils.wrap_s(_s.cummin(reverse))
273
611
  end
274
612
 
613
+ # Get an array with the cumulative max computed at every element.
614
+ #
615
+ # @param reverse [Boolean]
616
+ # reverse the operation.
617
+ #
618
+ # @return [Series]
619
+ #
620
+ # @example
621
+ # s = Polars::Series.new("a", [3, 5, 1])
622
+ # s.cummax
623
+ # # =>
624
+ # # shape: (3,)
625
+ # # Series: 'a' [i64]
626
+ # # [
627
+ # # 3
628
+ # # 5
629
+ # # 5
630
+ # # ]
275
631
  def cummax(reverse: false)
276
632
  Utils.wrap_s(_s.cummax(reverse))
277
633
  end
278
634
 
635
+ # Get an array with the cumulative product computed at every element.
636
+ #
637
+ # @param reverse [Boolean]
638
+ # reverse the operation.
639
+ #
640
+ # @return [Series]
641
+ #
642
+ # @note
643
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
644
+ # `:i64` before multiplying to prevent overflow issues.
645
+ #
646
+ # @example
647
+ # s = Polars::Series.new("a", [1, 2, 3])
648
+ # s.cumprod
649
+ # # =>
650
+ # # shape: (3,)
651
+ # # Series: 'a' [i64]
652
+ # # [
653
+ # # 1
654
+ # # 2
655
+ # # 6
656
+ # # ]
279
657
  def cumprod(reverse: false)
280
658
  Utils.wrap_s(_s.cumprod(reverse))
281
659
  end
282
660
 
661
+ # Get the first `n` rows.
662
+ #
663
+ # Alias for {#head}.
664
+ #
665
+ # @param n [Integer]
666
+ # Number of rows to return.
667
+ #
668
+ # @return [Series]
669
+ #
670
+ # @example
671
+ # s = Polars::Series.new("a", [1, 2, 3])
672
+ # s.limit(2)
673
+ # # =>
674
+ # # shape: (2,)
675
+ # # Series: 'a' [i64]
676
+ # # [
677
+ # # 1
678
+ # # 2
679
+ # # ]
283
680
  def limit(n = 10)
284
681
  to_frame.select(Utils.col(name).limit(n)).to_series
285
682
  end
286
683
 
684
+ # Get a slice of this Series.
685
+ #
686
+ # @param offset [Integer]
687
+ # Start index. Negative indexing is supported.
688
+ # @param length [Integer, nil]
689
+ # Length of the slice. If set to `nil`, all rows starting at the offset
690
+ # will be selected.
691
+ #
692
+ # @return [Series]
693
+ #
694
+ # @example
695
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
696
+ # s.slice(1, 2)
697
+ # # =>
698
+ # # shape: (2,)
699
+ # # Series: 'a' [i64]
700
+ # # [
701
+ # # 2
702
+ # # 3
703
+ # # ]
287
704
  def slice(offset, length = nil)
288
705
  length = len if length.nil?
289
706
  Utils.wrap_s(_s.slice(offset, length))
290
707
  end
291
708
 
292
- def append(other)
293
- _s.append(other._s)
709
+ # Append a Series to this one.
710
+ #
711
+ # @param other [Series]
712
+ # Series to append.
713
+ # @param append_chunks [Boolean]
714
+ # If set to `true` the append operation will add the chunks from `other` to
715
+ # self. This is super cheap.
716
+ #
717
+ # If set to `false` the append operation will do the same as
718
+ # {DataFrame#extend} which extends the memory backed by this Series with
719
+ # the values from `other`.
720
+ #
721
+ # Different from `append_chunks`, `extend` appends the data from `other` to
722
+ # the underlying memory locations and thus may cause a reallocation (which is
723
+ # expensive).
724
+ #
725
+ # If this does not cause a reallocation, the resulting data structure will not
726
+ # have any extra chunks and thus will yield faster queries.
727
+ #
728
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
729
+ # single append. For instance during online operations where you add `n` rows
730
+ # and rerun a query.
731
+ #
732
+ # Prefer `append_chunks` over `extend` when you want to append many times
733
+ # before doing a query. For instance, when you read in multiple files and when
734
+ # to store them in a single Series. In the latter case, finish the sequence
735
+ # of `append_chunks` operations with a `rechunk`.
736
+ #
737
+ # @return [Series]
738
+ #
739
+ # @example
740
+ # s = Polars::Series.new("a", [1, 2, 3])
741
+ # s2 = Polars::Series.new("b", [4, 5, 6])
742
+ # s.append(s2)
743
+ # # =>
744
+ # # shape: (6,)
745
+ # # Series: 'a' [i64]
746
+ # # [
747
+ # # 1
748
+ # # 2
749
+ # # 3
750
+ # # 4
751
+ # # 5
752
+ # # 6
753
+ # # ]
754
+ def append(other, append_chunks: true)
755
+ begin
756
+ if append_chunks
757
+ _s.append(other._s)
758
+ else
759
+ _s.extend(other._s)
760
+ end
761
+ rescue => e
762
+ if e.message == "Already mutably borrowed"
763
+ append(other.clone, append_chunks)
764
+ else
765
+ raise e
766
+ end
767
+ end
294
768
  self
295
769
  end
296
770
 
771
+ # Filter elements by a boolean mask.
772
+ #
773
+ # @param predicate [Series, Array]
774
+ # Boolean mask.
775
+ #
776
+ # @return [Series]
777
+ #
778
+ # @example
779
+ # s = Polars::Series.new("a", [1, 2, 3])
780
+ # mask = Polars::Series.new("", [true, false, true])
781
+ # s.filter(mask)
782
+ # # =>
783
+ # # shape: (2,)
784
+ # # Series: 'a' [i64]
785
+ # # [
786
+ # # 1
787
+ # # 3
788
+ # # ]
297
789
  def filter(predicate)
790
+ if predicate.is_a?(Array)
791
+ predicate = Series.new("", predicate)
792
+ end
298
793
  Utils.wrap_s(_s.filter(predicate._s))
299
794
  end
300
795
 
796
+ # Get the first `n` rows.
797
+ #
798
+ # @param n [Integer]
799
+ # Number of rows to return.
800
+ #
801
+ # @return [Series]
802
+ #
803
+ # @example
804
+ # s = Polars::Series.new("a", [1, 2, 3])
805
+ # s.head(2)
806
+ # # =>
807
+ # # shape: (2,)
808
+ # # Series: 'a' [i64]
809
+ # # [
810
+ # # 1
811
+ # # 2
812
+ # # ]
301
813
  def head(n = 10)
302
814
  to_frame.select(Utils.col(name).head(n)).to_series
303
815
  end
304
816
 
817
+ # Get the last `n` rows.
818
+ #
819
+ # @param n [Integer]
820
+ # Number of rows to return.
821
+ #
822
+ # @return [Series]
823
+ #
824
+ # @example
825
+ # s = Polars::Series.new("a", [1, 2, 3])
826
+ # s.tail(2)
827
+ # # =>
828
+ # # shape: (2,)
829
+ # # Series: 'a' [i64]
830
+ # # [
831
+ # # 2
832
+ # # 3
833
+ # # ]
305
834
  def tail(n = 10)
306
835
  to_frame.select(Utils.col(name).tail(n)).to_series
307
836
  end
@@ -309,6 +838,37 @@ module Polars
309
838
  # def take_every
310
839
  # end
311
840
 
841
+ # Sort this Series.
842
+ #
843
+ # @param reverse [Boolean]
844
+ # Reverse sort.
845
+ # @param in_place [Boolean]
846
+ # Sort in place.
847
+ #
848
+ # @return [Series]
849
+ #
850
+ # @example
851
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
852
+ # s.sort
853
+ # # =>
854
+ # # shape: (4,)
855
+ # # Series: 'a' [i64]
856
+ # # [
857
+ # # 1
858
+ # # 2
859
+ # # 3
860
+ # # 4
861
+ # # ]
862
+ # s.sort(reverse: true)
863
+ # # =>
864
+ # # shape: (4,)
865
+ # # Series: 'a' [i64]
866
+ # # [
867
+ # # 4
868
+ # # 3
869
+ # # 2
870
+ # # 1
871
+ # # ]
312
872
  def sort(reverse: false, in_place: false)
313
873
  if in_place
314
874
  self._s = _s.sort(reverse)
@@ -330,10 +890,26 @@ module Polars
330
890
  # def arg_unique
331
891
  # end
332
892
 
893
+ # Get the index of the minimal value.
894
+ #
895
+ # @return [Integer, nil]
896
+ #
897
+ # @example
898
+ # s = Polars::Series.new("a", [3, 2, 1])
899
+ # s.arg_min
900
+ # # => 2
333
901
  def arg_min
334
902
  _s.arg_min
335
903
  end
336
904
 
905
+ # Get the index of the maximal value.
906
+ #
907
+ # @return [Integer, nil]
908
+ #
909
+ # @example
910
+ # s = Polars::Series.new("a", [3, 2, 1])
911
+ # s.arg_max
912
+ # # => 0
337
913
  def arg_max
338
914
  _s.arg_max
339
915
  end
@@ -347,14 +923,31 @@ module Polars
347
923
  # def take
348
924
  # end
349
925
 
926
+ # Count the null values in this Series.
927
+ #
928
+ # @return [Integer]
350
929
  def null_count
351
930
  _s.null_count
352
931
  end
353
932
 
933
+ # Return True if the Series has a validity bitmask.
934
+ #
935
+ # If there is none, it means that there are no null values.
936
+ # Use this to swiftly assert a Series does not have null values.
937
+ #
938
+ # @return [Boolean]
354
939
  def has_validity
355
940
  _s.has_validity
356
941
  end
357
942
 
943
+ # Check if the Series is empty.
944
+ #
945
+ # @return [Boolean]
946
+ #
947
+ # @example
948
+ # s = Polars::Series.new("a", [])
949
+ # s.is_empty
950
+ # # => true
358
951
  def is_empty
359
952
  len == 0
360
953
  end
@@ -396,13 +989,41 @@ module Polars
396
989
  # def explode
397
990
  # end
398
991
 
992
+ # Check if series is equal with another Series.
993
+ #
994
+ # @param other [Series]
995
+ # Series to compare with.
996
+ # @param null_equal [Boolean]
997
+ # Consider null values as equal.
998
+ # @param strict [Boolean]
999
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
1000
+ # `:i64` will return `false`.
1001
+ #
1002
+ # @return [Boolean]
1003
+ #
1004
+ # @example
1005
+ # s = Polars::Series.new("a", [1, 2, 3])
1006
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1007
+ # s.series_equal(s)
1008
+ # # => true
1009
+ # s.series_equal(s2)
1010
+ # # => false
399
1011
  def series_equal(other, null_equal: false, strict: false)
400
1012
  _s.series_equal(other._s, null_equal, strict)
401
1013
  end
402
1014
 
1015
+ # Length of this Series.
1016
+ #
1017
+ # @return [Integer]
1018
+ #
1019
+ # @example
1020
+ # s = Polars::Series.new("a", [1, 2, 3])
1021
+ # s.len
1022
+ # # => 3
403
1023
  def len
404
1024
  _s.len
405
1025
  end
1026
+ alias_method :length, :len
406
1027
 
407
1028
  # def cast
408
1029
  # end
@@ -410,10 +1031,24 @@ module Polars
410
1031
  # def to_physical
411
1032
  # end
412
1033
 
1034
+ # Convert this Series to a Ruby Array. This operation clones data.
1035
+ #
1036
+ # @return [Array]
1037
+ #
1038
+ # @example
1039
+ # s = Polars::Series.new("a", [1, 2, 3])
1040
+ # s.to_a
1041
+ # # => [1, 2, 3]
413
1042
  def to_a
414
1043
  _s.to_a
415
1044
  end
416
1045
 
1046
+ # Create a single chunk of memory for this Series.
1047
+ #
1048
+ # @param in_place [Boolean]
1049
+ # In place or not.
1050
+ #
1051
+ # @return [Series]
417
1052
  def rechunk(in_place: false)
418
1053
  opt_s = _s.rechunk(in_place)
419
1054
  in_place ? self : Utils.wrap_s(opt_s)
@@ -422,24 +1057,67 @@ module Polars
422
1057
  # def reverse
423
1058
  # end
424
1059
 
1060
+ # Check if this Series datatype is numeric.
1061
+ #
1062
+ # @return [Boolean]
1063
+ #
1064
+ # @example
1065
+ # s = Polars::Series.new("a", [1, 2, 3])
1066
+ # s.is_numeric
1067
+ # # => true
425
1068
  def is_numeric
426
1069
  [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
427
1070
  end
428
1071
  alias_method :numeric?, :is_numeric
429
1072
 
430
- # def is_datelike
431
- # end
432
-
1073
+ # Check if this Series datatype is datelike.
1074
+ #
1075
+ # @return [Boolean]
1076
+ #
1077
+ # @example
1078
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
1079
+ # s.is_datelike
1080
+ # # => true
1081
+ def is_datelike
1082
+ [:date, :datetime, :duration, :time].include?(dtype)
1083
+ end
1084
+
1085
+ # Check if this Series has floating point numbers.
1086
+ #
1087
+ # @return [Boolean]
1088
+ #
1089
+ # @example
1090
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
1091
+ # s.is_float
1092
+ # # => true
433
1093
  def is_float
434
1094
  [:f32, :f64].include?(dtype)
435
1095
  end
436
1096
  alias_method :float?, :is_float
437
1097
 
438
- def is_bool
1098
+ # Check if this Series is a Boolean.
1099
+ #
1100
+ # @return [Boolean]
1101
+ #
1102
+ # @example
1103
+ # s = Polars::Series.new("a", [true, false, true])
1104
+ # s.is_boolean
1105
+ # # => true
1106
+ def is_boolean
439
1107
  dtype == :bool
440
1108
  end
441
- alias_method :bool?, :is_bool
442
-
1109
+ alias_method :boolean?, :is_boolean
1110
+ alias_method :is_bool, :is_boolean
1111
+ alias_method :bool?, :is_boolean
1112
+
1113
+ # Check if this Series datatype is a Utf8.
1114
+ #
1115
+ # @return [Boolean]
1116
+ #
1117
+ # @example
1118
+ # s = Polars::Series.new("x", ["a", "b", "c"])
1119
+ # s.is_utf8
1120
+ # # => true
443
1121
  def is_utf8
444
1122
  dtype == :str
445
1123
  end
@@ -468,15 +1146,66 @@ module Polars
468
1146
  # def fill_null
469
1147
  # end
470
1148
 
1149
+ # Rounds down to the nearest integer value.
1150
+ #
1151
+ # Only works on floating point Series.
1152
+ #
1153
+ # @return [Series]
1154
+ #
1155
+ # @example
1156
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1157
+ # s.floor
1158
+ # # =>
1159
+ # # shape: (3,)
1160
+ # # Series: 'a' [f64]
1161
+ # # [
1162
+ # # 1.0
1163
+ # # 2.0
1164
+ # # 3.0
1165
+ # # ]
471
1166
  def floor
472
1167
  Utils.wrap_s(_s.floor)
473
1168
  end
474
1169
 
1170
+ # Rounds up to the nearest integer value.
1171
+ #
1172
+ # Only works on floating point Series.
1173
+ #
1174
+ # @return [Series]
1175
+ #
1176
+ # @example
1177
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1178
+ # s.ceil
1179
+ # # =>
1180
+ # # shape: (3,)
1181
+ # # Series: 'a' [f64]
1182
+ # # [
1183
+ # # 2.0
1184
+ # # 3.0
1185
+ # # 4.0
1186
+ # # ]
475
1187
  def ceil
476
1188
  Utils.wrap_s(_s.ceil)
477
1189
  end
478
1190
 
479
- # default to 0 like Ruby
1191
+ # Round underlying floating point data by `decimals` digits.
1192
+ #
1193
+ # @param decimals [Integer]
1194
+ # number of decimals to round by.
1195
+ #
1196
+ # @return [Series]
1197
+ #
1198
+ # @example
1199
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1200
+ # s.round(2)
1201
+ # # =>
1202
+ # # shape: (3,)
1203
+ # # Series: 'a' [f64]
1204
+ # # [
1205
+ # # 1.12
1206
+ # # 2.57
1207
+ # # 3.9
1208
+ # # ]
480
1209
  def round(decimals = 0)
481
1210
  Utils.wrap_s(_s.round(decimals))
482
1211
  end
@@ -571,14 +1300,56 @@ module Polars
571
1300
  # def sample
572
1301
  # end
573
1302
 
1303
+ # Get a boolean mask of the local maximum peaks.
1304
+ #
1305
+ # @return [Series]
1306
+ #
1307
+ # @example
1308
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
1309
+ # s.peak_max
1310
+ # # =>
1311
+ # # shape: (5,)
1312
+ # # Series: '' [bool]
1313
+ # # [
1314
+ # # false
1315
+ # # false
1316
+ # # false
1317
+ # # false
1318
+ # # true
1319
+ # # ]
574
1320
  def peak_max
575
1321
  Utils.wrap_s(_s.peak_max)
576
1322
  end
577
1323
 
1324
+ # Get a boolean mask of the local minimum peaks.
1325
+ #
1326
+ # @return [Series]
1327
+ #
1328
+ # @example
1329
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
1330
+ # s.peak_min
1331
+ # # =>
1332
+ # # shape: (5,)
1333
+ # # Series: '' [bool]
1334
+ # # [
1335
+ # # false
1336
+ # # true
1337
+ # # false
1338
+ # # true
1339
+ # # false
1340
+ # # ]
578
1341
  def peak_min
579
1342
  Utils.wrap_s(_s.peak_min)
580
1343
  end
581
1344
 
1345
+ # Count the number of unique values in this Series.
1346
+ #
1347
+ # @return [Integer]
1348
+ #
1349
+ # @example
1350
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1351
+ # s.n_unique
1352
+ # # => 3
582
1353
  def n_unique
583
1354
  _s.n_unique
584
1355
  end
@@ -640,6 +1411,23 @@ module Polars
640
1411
  # def extend_constant
641
1412
  # end
642
1413
 
1414
+ # Flags the Series as sorted.
1415
+ #
1416
+ # Enables downstream code to user fast paths for sorted arrays.
1417
+ #
1418
+ # @param reverse [Boolean]
1419
+ # If the Series order is reversed, e.g. descending.
1420
+ #
1421
+ # @return [Series]
1422
+ #
1423
+ # @note
1424
+ # This can lead to incorrect results if this Series is not sorted!!
1425
+ # Use with care!
1426
+ #
1427
+ # @example
1428
+ # s = Polars::Series.new("a", [1, 2, 3])
1429
+ # s.set_sorted.max
1430
+ # # => 3
643
1431
  def set_sorted(reverse: false)
644
1432
  Utils.wrap_s(_s.set_sorted(reverse))
645
1433
  end
@@ -716,7 +1504,7 @@ module Polars
716
1504
  # dtype = rb_type_to_dtype(dtype)
717
1505
  # end
718
1506
 
719
- raise "todo"
1507
+ raise Todo
720
1508
  else
721
1509
  constructor = rb_type_to_constructor(value.class)
722
1510
  constructor.call(name, values, strict)