polars-df 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/series.rb CHANGED
@@ -1,7 +1,38 @@
1
1
  module Polars
2
+ # A Series represents a single column in a polars DataFrame.
2
3
  class Series
4
+ # @private
3
5
  attr_accessor :_s
4
6
 
7
+ # Create a new Series.
8
+ #
9
+ # @param name [String, Array, nil]
10
+ # Name of the series. Will be used as a column name when used in a DataFrame.
11
+ # When not specified, name is set to an empty string.
12
+ # @param values [Array, nil]
13
+ # One-dimensional data in various forms. Supported are: Array and Series.
14
+ # @param dtype [Symbol, nil]
15
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
16
+ # @param strict [Boolean]
17
+ # Throw error on numeric overflow.
18
+ # @param nan_to_null [Boolean]
19
+ # Not used.
20
+ # @param dtype_if_empty [Symbol, nil]
21
+ # If no dtype is specified and values contains `nil` or an empty array,
22
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
23
+ #
24
+ # @example Constructing a Series by specifying name and values positionally:
25
+ # s = Polars::Series.new("a", [1, 2, 3])
26
+ #
27
+ # @example Notice that the dtype is automatically inferred as a polars Int64:
28
+ # s.dtype
29
+ # # => :i64
30
+ #
31
+ # @example Constructing a Series with a specific dtype:
32
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
33
+ #
34
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
35
+ # s3 = Polars::Series.new([1, 2, 3])
5
36
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
6
37
  # Handle case where values are passed as the first argument
7
38
  if !name.nil? && !name.is_a?(String)
@@ -35,16 +66,23 @@ module Polars
35
66
  end
36
67
  end
37
68
 
69
+ # @private
38
70
  def self._from_rbseries(s)
39
71
  series = Series.allocate
40
72
  series._s = s
41
73
  series
42
74
  end
43
75
 
76
+ # Get the data type of this Series.
77
+ #
78
+ # @return [Symbol]
44
79
  def dtype
45
- _s.dtype.to_sym
80
+ _s.dtype
46
81
  end
47
82
 
83
+ # Get flags that are set on the Series.
84
+ #
85
+ # @return [Hash]
48
86
  def flags
49
87
  {
50
88
  "SORTED_ASC" => _s.is_sorted_flag,
@@ -52,34 +90,59 @@ module Polars
52
90
  }
53
91
  end
54
92
 
93
+ # Get the inner dtype in of a List typed Series.
94
+ #
95
+ # @return [Symbol]
55
96
  def inner_dtype
56
- _s.inner_dtype&.to_sym
97
+ _s.inner_dtype
57
98
  end
58
99
 
100
+ # Get the name of this Series.
101
+ #
102
+ # @return [String]
59
103
  def name
60
104
  _s.name
61
105
  end
62
106
 
107
+ # Shape of this Series.
108
+ #
109
+ # @return [Array]
63
110
  def shape
64
111
  [_s.len]
65
112
  end
66
113
 
67
- # def time_unit
68
- # end
114
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
115
+ #
116
+ # @return [String]
117
+ def time_unit
118
+ _s.time_unit
119
+ end
69
120
 
121
+ # Returns a string representing the Series.
122
+ #
123
+ # @return [String]
70
124
  def to_s
71
125
  _s.to_s
72
126
  end
73
127
  alias_method :inspect, :to_s
74
128
 
129
+ # Bitwise AND.
130
+ #
131
+ # @return [Series]
75
132
  def &(other)
76
133
  Utils.wrap_s(_s.bitand(other._s))
77
134
  end
78
135
 
136
+ # Bitwise OR.
137
+ #
138
+ # @return [Series]
79
139
  def |(other)
80
140
  Utils.wrap_s(_s.bitor(other._s))
81
141
  end
82
142
 
143
+ # Bitwise XOR.
144
+ #
145
+ # @return [Series]
83
146
  def ^(other)
84
147
  Utils.wrap_s(_s.bitxor(other._s))
85
148
  end
@@ -102,32 +165,50 @@ module Polars
102
165
  # def <=(other)
103
166
  # end
104
167
 
168
+ # Performs addition.
169
+ #
170
+ # @return [Series]
105
171
  def +(other)
106
172
  Utils. wrap_s(_s.add(other._s))
107
173
  end
108
174
 
175
+ # Performs subtraction.
176
+ #
177
+ # @return [Series]
109
178
  def -(other)
110
179
  Utils.wrap_s(_s.sub(other._s))
111
180
  end
112
181
 
182
+ # Performs multiplication.
183
+ #
184
+ # @return [Series]
113
185
  def *(other)
114
186
  Utils.wrap_s(_s.mul(other._s))
115
187
  end
116
188
 
189
+ # Performs division.
190
+ #
191
+ # @return [Series]
117
192
  def /(other)
118
193
  Utils.wrap_s(_s.div(other._s))
119
194
  end
120
195
 
196
+ # Raises to the power of exponent.
197
+ #
198
+ # @return [Series]
121
199
  def **(power)
122
- # if is_datelike
123
- # raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
124
- # end
200
+ if is_datelike
201
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
202
+ end
125
203
  to_frame.select(Polars.col(name).pow(power)).to_series
126
204
  end
127
205
 
128
206
  # def -@(other)
129
207
  # end
130
208
 
209
+ # Returns elements of the Series.
210
+ #
211
+ # @return [Object]
131
212
  def [](item)
132
213
  _s.get_idx(item)
133
214
  end
@@ -135,19 +216,54 @@ module Polars
135
216
  # def []=(key, value)
136
217
  # end
137
218
 
219
+ # Return an estimation of the total (heap) allocated size of the Series.
220
+ #
221
+ # Estimated size is given in the specified unit (bytes by default).
222
+ #
223
+ # This estimation is the sum of the size of its buffers, validity, including
224
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
225
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
226
+ # particular, StructArray's size is an upper bound.
227
+ #
228
+ # When an array is sliced, its allocated size remains constant because the buffer
229
+ # unchanged. However, this function will yield a smaller number. This is because
230
+ # this function returns the visible size of the buffer, not its total capacity.
231
+ #
232
+ # FFI buffers are included in this estimation.
233
+ #
234
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
235
+ # Scale the returned size to the given unit.
236
+ #
237
+ # @return [Numeric]
238
+ #
239
+ # @example
240
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
241
+ # s.estimated_size
242
+ # # => 4000000
243
+ # s.estimated_size("mb")
244
+ # # => 3.814697265625
138
245
  def estimated_size(unit = "b")
139
246
  sz = _s.estimated_size
140
247
  Utils.scale_bytes(sz, to: unit)
141
248
  end
142
249
 
250
+ # Compute the square root of the elements.
251
+ #
252
+ # @return [Series]
143
253
  def sqrt
144
- self ** 0.5
254
+ self**0.5
145
255
  end
146
256
 
257
+ # Check if any boolean value in the column is `true`.
258
+ #
259
+ # @return [Boolean]
147
260
  def any
148
261
  to_frame.select(Polars.col(name).any).to_series[0]
149
262
  end
150
263
 
264
+ # Check if all boolean values in the column are `true`.
265
+ #
266
+ # @return [Boolean]
151
267
  def all
152
268
  to_frame.select(Polars.col(name).all).to_series[0]
153
269
  end
@@ -167,6 +283,9 @@ module Polars
167
283
  # def drop_nans
168
284
  # end
169
285
 
286
+ # Cast this Series to a DataFrame.
287
+ #
288
+ # @return [DataFrame]
170
289
  def to_frame
171
290
  Utils.wrap_df(RbDataFrame.new([_s]))
172
291
  end
@@ -174,22 +293,61 @@ module Polars
174
293
  # def describe
175
294
  # end
176
295
 
296
+ # Reduce this Series to the sum value.
297
+ #
298
+ # @return [Numeric]
299
+ #
300
+ # @note
301
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
302
+ # `:i64` before summing to prevent overflow issues.
303
+ #
304
+ # @example
305
+ # s = Polars::Series.new("a", [1, 2, 3])
306
+ # s.sum
307
+ # # => 6
177
308
  def sum
178
309
  _s.sum
179
310
  end
180
311
 
312
+ # Reduce this Series to the mean value.
313
+ #
314
+ # @return [Float, nil]
315
+ #
316
+ # @example
317
+ # s = Polars::Series.new("a", [1, 2, 3])
318
+ # s.mean
319
+ # # => 2.0
181
320
  def mean
182
321
  _s.mean
183
322
  end
184
323
 
324
+ # Reduce this Series to the product value.
325
+ #
326
+ # @return [Numeric]
185
327
  def product
186
328
  to_frame.select(Polars.col(name).product).to_series[0]
187
329
  end
188
330
 
331
+ # Get the minimal value in this Series.
332
+ #
333
+ # @return [Object]
334
+ #
335
+ # @example
336
+ # s = Polars::Series.new("a", [1, 2, 3])
337
+ # s.min
338
+ # # => 1
189
339
  def min
190
340
  _s.min
191
341
  end
192
342
 
343
+ # Get the maximum value in this Series.
344
+ #
345
+ # @return [Object]
346
+ #
347
+ # @example
348
+ # s = Polars::Series.new("a", [1, 2, 3])
349
+ # s.max
350
+ # # => 3
193
351
  def max
194
352
  _s.max
195
353
  end
@@ -200,6 +358,18 @@ module Polars
200
358
  # def nan_min
201
359
  # end
202
360
 
361
+ # Get the standard deviation of this Series.
362
+ #
363
+ # @param ddof [Integer]
364
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
365
+ # where N represents the number of elements.
366
+ #
367
+ # @return [Float, nil]
368
+ #
369
+ # @example
370
+ # s = Polars::Series.new("a", [1, 2, 3])
371
+ # s.std
372
+ # # => 1.0
203
373
  def std(ddof: 1)
204
374
  if !is_numeric
205
375
  nil
@@ -208,6 +378,18 @@ module Polars
208
378
  end
209
379
  end
210
380
 
381
+ # Get variance of this Series.
382
+ #
383
+ # @param ddof [Integer]
384
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
385
+ # where N represents the number of elements.
386
+ #
387
+ # @return [Float, nil]
388
+ #
389
+ # @example
390
+ # s = Polars::Series.new("a", [1, 2, 3])
391
+ # s.var
392
+ # # => 1.0
211
393
  def var(ddof: 1)
212
394
  if !is_numeric
213
395
  nil
@@ -216,18 +398,82 @@ module Polars
216
398
  end
217
399
  end
218
400
 
401
+ # Get the median of this Series.
402
+ #
403
+ # @return [Float, nil]
404
+ #
405
+ # @example
406
+ # s = Polars::Series.new("a", [1, 2, 3])
407
+ # s.median
408
+ # # => 2.0
219
409
  def median
220
410
  _s.median
221
411
  end
222
412
 
413
+ # Get the quantile value of this Series.
414
+ #
415
+ # @param quantile [Float, nil]
416
+ # Quantile between 0.0 and 1.0.
417
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
418
+ # Interpolation method.
419
+ #
420
+ # @return [Float, nil]
421
+ #
422
+ # @example
423
+ # s = Polars::Series.new("a", [1, 2, 3])
424
+ # s.quantile(0.5)
425
+ # # => 2.0
223
426
  def quantile(quantile, interpolation: "nearest")
224
427
  _s.quantile(quantile, interpolation)
225
428
  end
226
429
 
430
+ # Get dummy variables.
431
+ #
432
+ # @return [DataFrame]
433
+ #
434
+ # @example
435
+ # s = Polars::Series.new("a", [1, 2, 3])
436
+ # s.to_dummies
437
+ # # =>
438
+ # # shape: (3, 3)
439
+ # # ┌─────┬─────┬─────┐
440
+ # # │ a_1 ┆ a_2 ┆ a_3 │
441
+ # # │ --- ┆ --- ┆ --- │
442
+ # # │ u8 ┆ u8 ┆ u8 │
443
+ # # ╞═════╪═════╪═════╡
444
+ # # │ 1 ┆ 0 ┆ 0 │
445
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
446
+ # # │ 0 ┆ 1 ┆ 0 │
447
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
448
+ # # │ 0 ┆ 0 ┆ 1 │
449
+ # # └─────┴─────┴─────┘
227
450
  def to_dummies
228
451
  Utils.wrap_df(_s.to_dummies)
229
452
  end
230
453
 
454
+ # Count the unique values in a Series.
455
+ #
456
+ # @param sort [Boolean]
457
+ # Ensure the output is sorted from most values to least.
458
+ #
459
+ # @return [DataFrame]
460
+ #
461
+ # @example
462
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
463
+ # s.value_counts.sort("a")
464
+ # # =>
465
+ # # shape: (3, 2)
466
+ # # ┌─────┬────────┐
467
+ # # │ a ┆ counts │
468
+ # # │ --- ┆ --- │
469
+ # # │ i64 ┆ u32 │
470
+ # # ╞═════╪════════╡
471
+ # # │ 1 ┆ 1 │
472
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
473
+ # # │ 2 ┆ 2 │
474
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
475
+ # # │ 3 ┆ 1 │
476
+ # # └─────┴────────┘
231
477
  def value_counts(sort: false)
232
478
  Utils.wrap_df(_s.value_counts(sort))
233
479
  end
@@ -241,12 +487,34 @@ module Polars
241
487
  # def cumulative_eval
242
488
  # end
243
489
 
490
+ # Return a copy of the Series with a new alias/name.
491
+ #
492
+ # @param name [String]
493
+ # New name.
494
+ #
495
+ # @return [Series]
496
+ #
497
+ # @example
498
+ # s = Polars::Series.new("x", [1, 2, 3])
499
+ # s.alias("y")
244
500
  def alias(name)
245
501
  s = dup
246
502
  s._s.rename(name)
247
503
  s
248
504
  end
249
505
 
506
+ # Rename this Series.
507
+ #
508
+ # @param name [String]
509
+ # New name.
510
+ # @param in_place [Boolean]
511
+ # Modify the Series in-place.
512
+ #
513
+ # @return [Series]
514
+ #
515
+ # @example
516
+ # s = Polars::Series.new("a", [1, 2, 3])
517
+ # s.rename("b")
250
518
  def rename(name, in_place: false)
251
519
  if in_place
252
520
  _s.rename(name)
@@ -256,52 +524,313 @@ module Polars
256
524
  end
257
525
  end
258
526
 
527
+ # Get the length of each individual chunk.
528
+ #
529
+ # @return [Array]
530
+ #
531
+ # @example
532
+ # s = Polars::Series.new("a", [1, 2, 3])
533
+ # s2 = Polars::Series.new("b", [4, 5, 6])
534
+ #
535
+ # @example Concatenate Series with rechunk: true
536
+ # Polars.concat([s, s2]).chunk_lengths
537
+ # # => [6]
538
+ #
539
+ # @example Concatenate Series with rechunk: false
540
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
541
+ # # => [3, 3]
259
542
  def chunk_lengths
260
543
  _s.chunk_lengths
261
544
  end
262
545
 
546
+ # Get the number of chunks that this Series contains.
547
+ #
548
+ # @return [Integer]
549
+ #
550
+ # @example
551
+ # s = Polars::Series.new("a", [1, 2, 3])
552
+ # s2 = Polars::Series.new("b", [4, 5, 6])
553
+ #
554
+ # @example Concatenate Series with rechunk: true
555
+ # Polars.concat([s, s2]).n_chunks
556
+ # # => 1
557
+ #
558
+ # @example Concatenate Series with rechunk: false
559
+ # Polars.concat([s, s2], rechunk: false).n_chunks
560
+ # # => 2
263
561
  def n_chunks
264
562
  _s.n_chunks
265
563
  end
266
564
 
565
+ # Get an array with the cumulative sum computed at every element.
566
+ #
567
+ # @param reverse [Boolean]
568
+ # reverse the operation.
569
+ #
570
+ # @return [Series]
571
+ #
572
+ # @note
573
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
574
+ # `:i64` before summing to prevent overflow issues.
575
+ #
576
+ # @example
577
+ # s = Polars::Series.new("a", [1, 2, 3])
578
+ # s.cumsum
579
+ # # =>
580
+ # # shape: (3,)
581
+ # # Series: 'a' [i64]
582
+ # # [
583
+ # # 1
584
+ # # 3
585
+ # # 6
586
+ # # ]
267
587
  def cumsum(reverse: false)
268
588
  Utils.wrap_s(_s.cumsum(reverse))
269
589
  end
270
590
 
591
+ # Get an array with the cumulative min computed at every element.
592
+ #
593
+ # @param reverse [Boolean]
594
+ # reverse the operation.
595
+ #
596
+ # @return [Series]
597
+ #
598
+ # @example
599
+ # s = Polars::Series.new("a", [3, 5, 1])
600
+ # s.cummin
601
+ # # =>
602
+ # # shape: (3,)
603
+ # # Series: 'a' [i64]
604
+ # # [
605
+ # # 3
606
+ # # 3
607
+ # # 1
608
+ # # ]
271
609
  def cummin(reverse: false)
272
610
  Utils.wrap_s(_s.cummin(reverse))
273
611
  end
274
612
 
613
+ # Get an array with the cumulative max computed at every element.
614
+ #
615
+ # @param reverse [Boolean]
616
+ # reverse the operation.
617
+ #
618
+ # @return [Series]
619
+ #
620
+ # @example
621
+ # s = Polars::Series.new("a", [3, 5, 1])
622
+ # s.cummax
623
+ # # =>
624
+ # # shape: (3,)
625
+ # # Series: 'a' [i64]
626
+ # # [
627
+ # # 3
628
+ # # 5
629
+ # # 5
630
+ # # ]
275
631
  def cummax(reverse: false)
276
632
  Utils.wrap_s(_s.cummax(reverse))
277
633
  end
278
634
 
635
+ # Get an array with the cumulative product computed at every element.
636
+ #
637
+ # @param reverse [Boolean]
638
+ # reverse the operation.
639
+ #
640
+ # @return [Series]
641
+ #
642
+ # @note
643
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
644
+ # `:i64` before multiplying to prevent overflow issues.
645
+ #
646
+ # @example
647
+ # s = Polars::Series.new("a", [1, 2, 3])
648
+ # s.cumprod
649
+ # # =>
650
+ # # shape: (3,)
651
+ # # Series: 'a' [i64]
652
+ # # [
653
+ # # 1
654
+ # # 2
655
+ # # 6
656
+ # # ]
279
657
  def cumprod(reverse: false)
280
658
  Utils.wrap_s(_s.cumprod(reverse))
281
659
  end
282
660
 
661
+ # Get the first `n` rows.
662
+ #
663
+ # Alias for {#head}.
664
+ #
665
+ # @param n [Integer]
666
+ # Number of rows to return.
667
+ #
668
+ # @return [Series]
669
+ #
670
+ # @example
671
+ # s = Polars::Series.new("a", [1, 2, 3])
672
+ # s.limit(2)
673
+ # # =>
674
+ # # shape: (2,)
675
+ # # Series: 'a' [i64]
676
+ # # [
677
+ # # 1
678
+ # # 2
679
+ # # ]
283
680
  def limit(n = 10)
284
681
  to_frame.select(Utils.col(name).limit(n)).to_series
285
682
  end
286
683
 
684
+ # Get a slice of this Series.
685
+ #
686
+ # @param offset [Integer]
687
+ # Start index. Negative indexing is supported.
688
+ # @param length [Integer, nil]
689
+ # Length of the slice. If set to `nil`, all rows starting at the offset
690
+ # will be selected.
691
+ #
692
+ # @return [Series]
693
+ #
694
+ # @example
695
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
696
+ # s.slice(1, 2)
697
+ # # =>
698
+ # # shape: (2,)
699
+ # # Series: 'a' [i64]
700
+ # # [
701
+ # # 2
702
+ # # 3
703
+ # # ]
287
704
  def slice(offset, length = nil)
288
705
  length = len if length.nil?
289
706
  Utils.wrap_s(_s.slice(offset, length))
290
707
  end
291
708
 
292
- def append(other)
293
- _s.append(other._s)
709
+ # Append a Series to this one.
710
+ #
711
+ # @param other [Series]
712
+ # Series to append.
713
+ # @param append_chunks [Boolean]
714
+ # If set to `true` the append operation will add the chunks from `other` to
715
+ # self. This is super cheap.
716
+ #
717
+ # If set to `false` the append operation will do the same as
718
+ # {DataFrame#extend} which extends the memory backed by this Series with
719
+ # the values from `other`.
720
+ #
721
+ # Different from `append_chunks`, `extend` appends the data from `other` to
722
+ # the underlying memory locations and thus may cause a reallocation (which is
723
+ # expensive).
724
+ #
725
+ # If this does not cause a reallocation, the resulting data structure will not
726
+ # have any extra chunks and thus will yield faster queries.
727
+ #
728
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
729
+ # single append. For instance during online operations where you add `n` rows
730
+ # and rerun a query.
731
+ #
732
+ # Prefer `append_chunks` over `extend` when you want to append many times
733
+ # before doing a query. For instance, when you read in multiple files and when
734
+ # to store them in a single Series. In the latter case, finish the sequence
735
+ # of `append_chunks` operations with a `rechunk`.
736
+ #
737
+ # @return [Series]
738
+ #
739
+ # @example
740
+ # s = Polars::Series.new("a", [1, 2, 3])
741
+ # s2 = Polars::Series.new("b", [4, 5, 6])
742
+ # s.append(s2)
743
+ # # =>
744
+ # # shape: (6,)
745
+ # # Series: 'a' [i64]
746
+ # # [
747
+ # # 1
748
+ # # 2
749
+ # # 3
750
+ # # 4
751
+ # # 5
752
+ # # 6
753
+ # # ]
754
+ def append(other, append_chunks: true)
755
+ begin
756
+ if append_chunks
757
+ _s.append(other._s)
758
+ else
759
+ _s.extend(other._s)
760
+ end
761
+ rescue => e
762
+ if e.message == "Already mutably borrowed"
763
+ append(other.clone, append_chunks)
764
+ else
765
+ raise e
766
+ end
767
+ end
294
768
  self
295
769
  end
296
770
 
771
+ # Filter elements by a boolean mask.
772
+ #
773
+ # @param predicate [Series, Array]
774
+ # Boolean mask.
775
+ #
776
+ # @return [Series]
777
+ #
778
+ # @example
779
+ # s = Polars::Series.new("a", [1, 2, 3])
780
+ # mask = Polars::Series.new("", [true, false, true])
781
+ # s.filter(mask)
782
+ # # =>
783
+ # # shape: (2,)
784
+ # # Series: 'a' [i64]
785
+ # # [
786
+ # # 1
787
+ # # 3
788
+ # # ]
297
789
  def filter(predicate)
790
+ if predicate.is_a?(Array)
791
+ predicate = Series.new("", predicate)
792
+ end
298
793
  Utils.wrap_s(_s.filter(predicate._s))
299
794
  end
300
795
 
796
+ # Get the first `n` rows.
797
+ #
798
+ # @param n [Integer]
799
+ # Number of rows to return.
800
+ #
801
+ # @return [Series]
802
+ #
803
+ # @example
804
+ # s = Polars::Series.new("a", [1, 2, 3])
805
+ # s.head(2)
806
+ # # =>
807
+ # # shape: (2,)
808
+ # # Series: 'a' [i64]
809
+ # # [
810
+ # # 1
811
+ # # 2
812
+ # # ]
301
813
  def head(n = 10)
302
814
  to_frame.select(Utils.col(name).head(n)).to_series
303
815
  end
304
816
 
817
+ # Get the last `n` rows.
818
+ #
819
+ # @param n [Integer]
820
+ # Number of rows to return.
821
+ #
822
+ # @return [Series]
823
+ #
824
+ # @example
825
+ # s = Polars::Series.new("a", [1, 2, 3])
826
+ # s.tail(2)
827
+ # # =>
828
+ # # shape: (2,)
829
+ # # Series: 'a' [i64]
830
+ # # [
831
+ # # 2
832
+ # # 3
833
+ # # ]
305
834
  def tail(n = 10)
306
835
  to_frame.select(Utils.col(name).tail(n)).to_series
307
836
  end
@@ -309,6 +838,37 @@ module Polars
309
838
  # def take_every
310
839
  # end
311
840
 
841
+ # Sort this Series.
842
+ #
843
+ # @param reverse [Boolean]
844
+ # Reverse sort.
845
+ # @param in_place [Boolean]
846
+ # Sort in place.
847
+ #
848
+ # @return [Series]
849
+ #
850
+ # @example
851
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
852
+ # s.sort
853
+ # # =>
854
+ # # shape: (4,)
855
+ # # Series: 'a' [i64]
856
+ # # [
857
+ # # 1
858
+ # # 2
859
+ # # 3
860
+ # # 4
861
+ # # ]
862
+ # s.sort(reverse: true)
863
+ # # =>
864
+ # # shape: (4,)
865
+ # # Series: 'a' [i64]
866
+ # # [
867
+ # # 4
868
+ # # 3
869
+ # # 2
870
+ # # 1
871
+ # # ]
312
872
  def sort(reverse: false, in_place: false)
313
873
  if in_place
314
874
  self._s = _s.sort(reverse)
@@ -330,10 +890,26 @@ module Polars
330
890
  # def arg_unique
331
891
  # end
332
892
 
893
+ # Get the index of the minimal value.
894
+ #
895
+ # @return [Integer, nil]
896
+ #
897
+ # @example
898
+ # s = Polars::Series.new("a", [3, 2, 1])
899
+ # s.arg_min
900
+ # # => 2
333
901
  def arg_min
334
902
  _s.arg_min
335
903
  end
336
904
 
905
+ # Get the index of the maximal value.
906
+ #
907
+ # @return [Integer, nil]
908
+ #
909
+ # @example
910
+ # s = Polars::Series.new("a", [3, 2, 1])
911
+ # s.arg_max
912
+ # # => 0
337
913
  def arg_max
338
914
  _s.arg_max
339
915
  end
@@ -347,14 +923,31 @@ module Polars
347
923
  # def take
348
924
  # end
349
925
 
926
+ # Count the null values in this Series.
927
+ #
928
+ # @return [Integer]
350
929
  def null_count
351
930
  _s.null_count
352
931
  end
353
932
 
933
+ # Return True if the Series has a validity bitmask.
934
+ #
935
+ # If there is none, it means that there are no null values.
936
+ # Use this to swiftly assert a Series does not have null values.
937
+ #
938
+ # @return [Boolean]
354
939
  def has_validity
355
940
  _s.has_validity
356
941
  end
357
942
 
943
+ # Check if the Series is empty.
944
+ #
945
+ # @return [Boolean]
946
+ #
947
+ # @example
948
+ # s = Polars::Series.new("a", [])
949
+ # s.is_empty
950
+ # # => true
358
951
  def is_empty
359
952
  len == 0
360
953
  end
@@ -396,13 +989,41 @@ module Polars
396
989
  # def explode
397
990
  # end
398
991
 
992
+ # Check if series is equal with another Series.
993
+ #
994
+ # @param other [Series]
995
+ # Series to compare with.
996
+ # @param null_equal [Boolean]
997
+ # Consider null values as equal.
998
+ # @param strict [Boolean]
999
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
1000
+ # `:i64` will return `false`.
1001
+ #
1002
+ # @return [Boolean]
1003
+ #
1004
+ # @example
1005
+ # s = Polars::Series.new("a", [1, 2, 3])
1006
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1007
+ # s.series_equal(s)
1008
+ # # => true
1009
+ # s.series_equal(s2)
1010
+ # # => false
399
1011
  def series_equal(other, null_equal: false, strict: false)
400
1012
  _s.series_equal(other._s, null_equal, strict)
401
1013
  end
402
1014
 
1015
+ # Length of this Series.
1016
+ #
1017
+ # @return [Integer]
1018
+ #
1019
+ # @example
1020
+ # s = Polars::Series.new("a", [1, 2, 3])
1021
+ # s.len
1022
+ # # => 3
403
1023
  def len
404
1024
  _s.len
405
1025
  end
1026
+ alias_method :length, :len
406
1027
 
407
1028
  # def cast
408
1029
  # end
@@ -410,10 +1031,24 @@ module Polars
410
1031
  # def to_physical
411
1032
  # end
412
1033
 
1034
+ # Convert this Series to a Ruby Array. This operation clones data.
1035
+ #
1036
+ # @return [Array]
1037
+ #
1038
+ # @example
1039
+ # s = Polars::Series.new("a", [1, 2, 3])
1040
+ # s.to_a
1041
+ # # => [1, 2, 3]
413
1042
  def to_a
414
1043
  _s.to_a
415
1044
  end
416
1045
 
1046
+ # Create a single chunk of memory for this Series.
1047
+ #
1048
+ # @param in_place [Boolean]
1049
+ # In place or not.
1050
+ #
1051
+ # @return [Series]
417
1052
  def rechunk(in_place: false)
418
1053
  opt_s = _s.rechunk(in_place)
419
1054
  in_place ? self : Utils.wrap_s(opt_s)
@@ -422,24 +1057,67 @@ module Polars
422
1057
  # def reverse
423
1058
  # end
424
1059
 
1060
+ # Check if this Series datatype is numeric.
1061
+ #
1062
+ # @return [Boolean]
1063
+ #
1064
+ # @example
1065
+ # s = Polars::Series.new("a", [1, 2, 3])
1066
+ # s.is_numeric
1067
+ # # => true
425
1068
  def is_numeric
426
1069
  [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
427
1070
  end
428
1071
  alias_method :numeric?, :is_numeric
429
1072
 
430
- # def is_datelike
431
- # end
432
-
1073
+ # Check if this Series datatype is datelike.
1074
+ #
1075
+ # @return [Boolean]
1076
+ #
1077
+ # @example
1078
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
1079
+ # s.is_datelike
1080
+ # # => true
1081
+ def is_datelike
1082
+ [:date, :datetime, :duration, :time].include?(dtype)
1083
+ end
1084
+
1085
+ # Check if this Series has floating point numbers.
1086
+ #
1087
+ # @return [Boolean]
1088
+ #
1089
+ # @example
1090
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
1091
+ # s.is_float
1092
+ # # => true
433
1093
  def is_float
434
1094
  [:f32, :f64].include?(dtype)
435
1095
  end
436
1096
  alias_method :float?, :is_float
437
1097
 
438
- def is_bool
1098
+ # Check if this Series is a Boolean.
1099
+ #
1100
+ # @return [Boolean]
1101
+ #
1102
+ # @example
1103
+ # s = Polars::Series.new("a", [true, false, true])
1104
+ # s.is_boolean
1105
+ # # => true
1106
+ def is_boolean
439
1107
  dtype == :bool
440
1108
  end
441
- alias_method :bool?, :is_bool
442
-
1109
+ alias_method :boolean?, :is_boolean
1110
+ alias_method :is_bool, :is_boolean
1111
+ alias_method :bool?, :is_boolean
1112
+
1113
+ # Check if this Series datatype is a Utf8.
1114
+ #
1115
+ # @return [Boolean]
1116
+ #
1117
+ # @example
1118
+ # s = Polars::Series.new("x", ["a", "b", "c"])
1119
+ # s.is_utf8
1120
+ # # => true
443
1121
  def is_utf8
444
1122
  dtype == :str
445
1123
  end
@@ -468,15 +1146,66 @@ module Polars
468
1146
  # def fill_null
469
1147
  # end
470
1148
 
1149
+ # Rounds down to the nearest integer value.
1150
+ #
1151
+ # Only works on floating point Series.
1152
+ #
1153
+ # @return [Series]
1154
+ #
1155
+ # @example
1156
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1157
+ # s.floor
1158
+ # # =>
1159
+ # # shape: (3,)
1160
+ # # Series: 'a' [f64]
1161
+ # # [
1162
+ # # 1.0
1163
+ # # 2.0
1164
+ # # 3.0
1165
+ # # ]
471
1166
  def floor
472
1167
  Utils.wrap_s(_s.floor)
473
1168
  end
474
1169
 
1170
+ # Rounds up to the nearest integer value.
1171
+ #
1172
+ # Only works on floating point Series.
1173
+ #
1174
+ # @return [Series]
1175
+ #
1176
+ # @example
1177
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1178
+ # s.ceil
1179
+ # # =>
1180
+ # # shape: (3,)
1181
+ # # Series: 'a' [f64]
1182
+ # # [
1183
+ # # 2.0
1184
+ # # 3.0
1185
+ # # 4.0
1186
+ # # ]
475
1187
  def ceil
476
1188
  Utils.wrap_s(_s.ceil)
477
1189
  end
478
1190
 
479
- # default to 0 like Ruby
1191
+ # Round underlying floating point data by `decimals` digits.
1192
+ #
1193
+ # @param decimals [Integer]
1194
+ # number of decimals to round by.
1195
+ #
1196
+ # @return [Series]
1197
+ #
1198
+ # @example
1199
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1200
+ # s.round(2)
1201
+ # # =>
1202
+ # # shape: (3,)
1203
+ # # Series: 'a' [f64]
1204
+ # # [
1205
+ # # 1.12
1206
+ # # 2.57
1207
+ # # 3.9
1208
+ # # ]
480
1209
  def round(decimals = 0)
481
1210
  Utils.wrap_s(_s.round(decimals))
482
1211
  end
@@ -571,14 +1300,56 @@ module Polars
571
1300
  # def sample
572
1301
  # end
573
1302
 
1303
+ # Get a boolean mask of the local maximum peaks.
1304
+ #
1305
+ # @return [Series]
1306
+ #
1307
+ # @example
1308
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
1309
+ # s.peak_max
1310
+ # # =>
1311
+ # # shape: (5,)
1312
+ # # Series: '' [bool]
1313
+ # # [
1314
+ # # false
1315
+ # # false
1316
+ # # false
1317
+ # # false
1318
+ # # true
1319
+ # # ]
574
1320
  def peak_max
575
1321
  Utils.wrap_s(_s.peak_max)
576
1322
  end
577
1323
 
1324
+ # Get a boolean mask of the local minimum peaks.
1325
+ #
1326
+ # @return [Series]
1327
+ #
1328
+ # @example
1329
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
1330
+ # s.peak_min
1331
+ # # =>
1332
+ # # shape: (5,)
1333
+ # # Series: '' [bool]
1334
+ # # [
1335
+ # # false
1336
+ # # true
1337
+ # # false
1338
+ # # true
1339
+ # # false
1340
+ # # ]
578
1341
  def peak_min
579
1342
  Utils.wrap_s(_s.peak_min)
580
1343
  end
581
1344
 
1345
+ # Count the number of unique values in this Series.
1346
+ #
1347
+ # @return [Integer]
1348
+ #
1349
+ # @example
1350
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1351
+ # s.n_unique
1352
+ # # => 3
582
1353
  def n_unique
583
1354
  _s.n_unique
584
1355
  end
@@ -640,6 +1411,23 @@ module Polars
640
1411
  # def extend_constant
641
1412
  # end
642
1413
 
1414
+ # Flags the Series as sorted.
1415
+ #
1416
+ # Enables downstream code to user fast paths for sorted arrays.
1417
+ #
1418
+ # @param reverse [Boolean]
1419
+ # If the Series order is reversed, e.g. descending.
1420
+ #
1421
+ # @return [Series]
1422
+ #
1423
+ # @note
1424
+ # This can lead to incorrect results if this Series is not sorted!!
1425
+ # Use with care!
1426
+ #
1427
+ # @example
1428
+ # s = Polars::Series.new("a", [1, 2, 3])
1429
+ # s.set_sorted.max
1430
+ # # => 3
643
1431
  def set_sorted(reverse: false)
644
1432
  Utils.wrap_s(_s.set_sorted(reverse))
645
1433
  end
@@ -716,7 +1504,7 @@ module Polars
716
1504
  # dtype = rb_type_to_dtype(dtype)
717
1505
  # end
718
1506
 
719
- raise "todo"
1507
+ raise Todo
720
1508
  else
721
1509
  constructor = rb_type_to_constructor(value.class)
722
1510
  constructor.call(name, values, strict)