polars-df 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/series.rb CHANGED
@@ -1,7 +1,38 @@
1
1
  module Polars
2
+ # A Series represents a single column in a polars DataFrame.
2
3
  class Series
4
+ # @private
3
5
  attr_accessor :_s
4
6
 
7
+ # Create a new Series.
8
+ #
9
+ # @param name [String, Array, nil]
10
+ # Name of the series. Will be used as a column name when used in a DataFrame.
11
+ # When not specified, name is set to an empty string.
12
+ # @param values [Array, nil]
13
+ # One-dimensional data in various forms. Supported are: Array and Series.
14
+ # @param dtype [Symbol, nil]
15
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
16
+ # @param strict [Boolean]
17
+ # Throw error on numeric overflow.
18
+ # @param nan_to_null [Boolean]
19
+ # Not used.
20
+ # @param dtype_if_empty [Symbol, nil]
21
+ # If no dtype is specified and values contains `nil` or an empty array,
22
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
23
+ #
24
+ # @example Constructing a Series by specifying name and values positionally:
25
+ # s = Polars::Series.new("a", [1, 2, 3])
26
+ #
27
+ # @example Notice that the dtype is automatically inferred as a polars Int64:
28
+ # s.dtype
29
+ # # => :i64
30
+ #
31
+ # @example Constructing a Series with a specific dtype:
32
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
33
+ #
34
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
35
+ # s3 = Polars::Series.new([1, 2, 3])
5
36
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
6
37
  # Handle case where values are passed as the first argument
7
38
  if !name.nil? && !name.is_a?(String)
@@ -35,83 +66,455 @@ module Polars
35
66
  end
36
67
  end
37
68
 
69
+ # @private
38
70
  def self._from_rbseries(s)
39
71
  series = Series.allocate
40
72
  series._s = s
41
73
  series
42
74
  end
43
75
 
76
+ # Get the data type of this Series.
77
+ #
78
+ # @return [Symbol]
44
79
  def dtype
45
- _s.dtype.to_sym
80
+ _s.dtype
46
81
  end
47
82
 
83
+ # Get flags that are set on the Series.
84
+ #
85
+ # @return [Hash]
86
+ def flags
87
+ {
88
+ "SORTED_ASC" => _s.is_sorted_flag,
89
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
90
+ }
91
+ end
92
+
93
+ # Get the inner dtype in of a List typed Series.
94
+ #
95
+ # @return [Symbol]
96
+ def inner_dtype
97
+ _s.inner_dtype
98
+ end
99
+
100
+ # Get the name of this Series.
101
+ #
102
+ # @return [String]
48
103
  def name
49
104
  _s.name
50
105
  end
51
106
 
107
+ # Shape of this Series.
108
+ #
109
+ # @return [Array]
52
110
  def shape
53
111
  [_s.len]
54
112
  end
55
113
 
114
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
115
+ #
116
+ # @return [String]
117
+ def time_unit
118
+ _s.time_unit
119
+ end
120
+
121
+ # Returns a string representing the Series.
122
+ #
123
+ # @return [String]
56
124
  def to_s
57
125
  _s.to_s
58
126
  end
59
127
  alias_method :inspect, :to_s
60
128
 
129
+ # Bitwise AND.
130
+ #
131
+ # @return [Series]
61
132
  def &(other)
62
133
  Utils.wrap_s(_s.bitand(other._s))
63
134
  end
64
135
 
136
+ # Bitwise OR.
137
+ #
138
+ # @return [Series]
65
139
  def |(other)
66
140
  Utils.wrap_s(_s.bitor(other._s))
67
141
  end
68
142
 
143
+ # Bitwise XOR.
144
+ #
145
+ # @return [Series]
69
146
  def ^(other)
70
147
  Utils.wrap_s(_s.bitxor(other._s))
71
148
  end
72
149
 
150
+ # def ==(other)
151
+ # end
152
+
153
+ # def !=(other)
154
+ # end
155
+
156
+ # def >(other)
157
+ # end
158
+
159
+ # def <(other)
160
+ # end
161
+
162
+ # def >=(other)
163
+ # end
164
+
165
+ # def <=(other)
166
+ # end
167
+
168
+ # Performs addition.
169
+ #
170
+ # @return [Series]
73
171
  def +(other)
74
172
  Utils. wrap_s(_s.add(other._s))
75
173
  end
76
174
 
175
+ # Performs subtraction.
176
+ #
177
+ # @return [Series]
77
178
  def -(other)
78
179
  Utils.wrap_s(_s.sub(other._s))
79
180
  end
80
181
 
182
+ # Performs multiplication.
183
+ #
184
+ # @return [Series]
81
185
  def *(other)
82
186
  Utils.wrap_s(_s.mul(other._s))
83
187
  end
84
188
 
189
+ # Performs division.
190
+ #
191
+ # @return [Series]
85
192
  def /(other)
86
193
  Utils.wrap_s(_s.div(other._s))
87
194
  end
88
195
 
196
+ # Raises to the power of exponent.
197
+ #
198
+ # @return [Series]
199
+ def **(power)
200
+ if is_datelike
201
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
202
+ end
203
+ to_frame.select(Polars.col(name).pow(power)).to_series
204
+ end
205
+
206
+ # def -@(other)
207
+ # end
208
+
209
+ # Returns elements of the Series.
210
+ #
211
+ # @return [Object]
212
+ def [](item)
213
+ _s.get_idx(item)
214
+ end
215
+
216
+ # def []=(key, value)
217
+ # end
218
+
219
+ # Return an estimation of the total (heap) allocated size of the Series.
220
+ #
221
+ # Estimated size is given in the specified unit (bytes by default).
222
+ #
223
+ # This estimation is the sum of the size of its buffers, validity, including
224
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
225
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
226
+ # particular, StructArray's size is an upper bound.
227
+ #
228
+ # When an array is sliced, its allocated size remains constant because the buffer
229
+ # unchanged. However, this function will yield a smaller number. This is because
230
+ # this function returns the visible size of the buffer, not its total capacity.
231
+ #
232
+ # FFI buffers are included in this estimation.
233
+ #
234
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
235
+ # Scale the returned size to the given unit.
236
+ #
237
+ # @return [Numeric]
238
+ #
239
+ # @example
240
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
241
+ # s.estimated_size
242
+ # # => 4000000
243
+ # s.estimated_size("mb")
244
+ # # => 3.814697265625
245
+ def estimated_size(unit = "b")
246
+ sz = _s.estimated_size
247
+ Utils.scale_bytes(sz, to: unit)
248
+ end
249
+
250
+ # Compute the square root of the elements.
251
+ #
252
+ # @return [Series]
253
+ def sqrt
254
+ self**0.5
255
+ end
256
+
257
+ # Check if any boolean value in the column is `true`.
258
+ #
259
+ # @return [Boolean]
260
+ def any
261
+ to_frame.select(Polars.col(name).any).to_series[0]
262
+ end
263
+
264
+ # Check if all boolean values in the column are `true`.
265
+ #
266
+ # @return [Boolean]
267
+ def all
268
+ to_frame.select(Polars.col(name).all).to_series[0]
269
+ end
270
+
271
+ # def log
272
+ # end
273
+
274
+ # def log10
275
+ # end
276
+
277
+ # def exp
278
+ # end
279
+
280
+ # def drop_nulls
281
+ # end
282
+
283
+ # def drop_nans
284
+ # end
285
+
286
+ # Cast this Series to a DataFrame.
287
+ #
288
+ # @return [DataFrame]
89
289
  def to_frame
90
290
  Utils.wrap_df(RbDataFrame.new([_s]))
91
291
  end
92
292
 
293
+ # def describe
294
+ # end
295
+
296
+ # Reduce this Series to the sum value.
297
+ #
298
+ # @return [Numeric]
299
+ #
300
+ # @note
301
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
302
+ # `:i64` before summing to prevent overflow issues.
303
+ #
304
+ # @example
305
+ # s = Polars::Series.new("a", [1, 2, 3])
306
+ # s.sum
307
+ # # => 6
93
308
  def sum
94
309
  _s.sum
95
310
  end
96
311
 
312
+ # Reduce this Series to the mean value.
313
+ #
314
+ # @return [Float, nil]
315
+ #
316
+ # @example
317
+ # s = Polars::Series.new("a", [1, 2, 3])
318
+ # s.mean
319
+ # # => 2.0
97
320
  def mean
98
321
  _s.mean
99
322
  end
100
323
 
324
+ # Reduce this Series to the product value.
325
+ #
326
+ # @return [Numeric]
327
+ def product
328
+ to_frame.select(Polars.col(name).product).to_series[0]
329
+ end
330
+
331
+ # Get the minimal value in this Series.
332
+ #
333
+ # @return [Object]
334
+ #
335
+ # @example
336
+ # s = Polars::Series.new("a", [1, 2, 3])
337
+ # s.min
338
+ # # => 1
101
339
  def min
102
340
  _s.min
103
341
  end
104
342
 
343
+ # Get the maximum value in this Series.
344
+ #
345
+ # @return [Object]
346
+ #
347
+ # @example
348
+ # s = Polars::Series.new("a", [1, 2, 3])
349
+ # s.max
350
+ # # => 3
105
351
  def max
106
352
  _s.max
107
353
  end
108
354
 
355
+ # def nan_max
356
+ # end
357
+
358
+ # def nan_min
359
+ # end
360
+
361
+ # Get the standard deviation of this Series.
362
+ #
363
+ # @param ddof [Integer]
364
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
365
+ # where N represents the number of elements.
366
+ #
367
+ # @return [Float, nil]
368
+ #
369
+ # @example
370
+ # s = Polars::Series.new("a", [1, 2, 3])
371
+ # s.std
372
+ # # => 1.0
373
+ def std(ddof: 1)
374
+ if !is_numeric
375
+ nil
376
+ else
377
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
378
+ end
379
+ end
380
+
381
+ # Get variance of this Series.
382
+ #
383
+ # @param ddof [Integer]
384
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
385
+ # where N represents the number of elements.
386
+ #
387
+ # @return [Float, nil]
388
+ #
389
+ # @example
390
+ # s = Polars::Series.new("a", [1, 2, 3])
391
+ # s.var
392
+ # # => 1.0
393
+ def var(ddof: 1)
394
+ if !is_numeric
395
+ nil
396
+ else
397
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
398
+ end
399
+ end
400
+
401
+ # Get the median of this Series.
402
+ #
403
+ # @return [Float, nil]
404
+ #
405
+ # @example
406
+ # s = Polars::Series.new("a", [1, 2, 3])
407
+ # s.median
408
+ # # => 2.0
409
+ def median
410
+ _s.median
411
+ end
412
+
413
+ # Get the quantile value of this Series.
414
+ #
415
+ # @param quantile [Float, nil]
416
+ # Quantile between 0.0 and 1.0.
417
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
418
+ # Interpolation method.
419
+ #
420
+ # @return [Float, nil]
421
+ #
422
+ # @example
423
+ # s = Polars::Series.new("a", [1, 2, 3])
424
+ # s.quantile(0.5)
425
+ # # => 2.0
426
+ def quantile(quantile, interpolation: "nearest")
427
+ _s.quantile(quantile, interpolation)
428
+ end
429
+
430
+ # Get dummy variables.
431
+ #
432
+ # @return [DataFrame]
433
+ #
434
+ # @example
435
+ # s = Polars::Series.new("a", [1, 2, 3])
436
+ # s.to_dummies
437
+ # # =>
438
+ # # shape: (3, 3)
439
+ # # ┌─────┬─────┬─────┐
440
+ # # │ a_1 ┆ a_2 ┆ a_3 │
441
+ # # │ --- ┆ --- ┆ --- │
442
+ # # │ u8 ┆ u8 ┆ u8 │
443
+ # # ╞═════╪═════╪═════╡
444
+ # # │ 1 ┆ 0 ┆ 0 │
445
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
446
+ # # │ 0 ┆ 1 ┆ 0 │
447
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
448
+ # # │ 0 ┆ 0 ┆ 1 │
449
+ # # └─────┴─────┴─────┘
450
+ def to_dummies
451
+ Utils.wrap_df(_s.to_dummies)
452
+ end
453
+
454
+ # Count the unique values in a Series.
455
+ #
456
+ # @param sort [Boolean]
457
+ # Ensure the output is sorted from most values to least.
458
+ #
459
+ # @return [DataFrame]
460
+ #
461
+ # @example
462
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
463
+ # s.value_counts.sort("a")
464
+ # # =>
465
+ # # shape: (3, 2)
466
+ # # ┌─────┬────────┐
467
+ # # │ a ┆ counts │
468
+ # # │ --- ┆ --- │
469
+ # # │ i64 ┆ u32 │
470
+ # # ╞═════╪════════╡
471
+ # # │ 1 ┆ 1 │
472
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
473
+ # # │ 2 ┆ 2 │
474
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
475
+ # # │ 3 ┆ 1 │
476
+ # # └─────┴────────┘
477
+ def value_counts(sort: false)
478
+ Utils.wrap_df(_s.value_counts(sort))
479
+ end
480
+
481
+ # def unique_counts
482
+ # end
483
+
484
+ # def entropy
485
+ # end
486
+
487
+ # def cumulative_eval
488
+ # end
489
+
490
+ # Return a copy of the Series with a new alias/name.
491
+ #
492
+ # @param name [String]
493
+ # New name.
494
+ #
495
+ # @return [Series]
496
+ #
497
+ # @example
498
+ # s = Polars::Series.new("x", [1, 2, 3])
499
+ # s.alias("y")
109
500
  def alias(name)
110
501
  s = dup
111
502
  s._s.rename(name)
112
503
  s
113
504
  end
114
505
 
506
+ # Rename this Series.
507
+ #
508
+ # @param name [String]
509
+ # New name.
510
+ # @param in_place [Boolean]
511
+ # Modify the Series in-place.
512
+ #
513
+ # @return [Series]
514
+ #
515
+ # @example
516
+ # s = Polars::Series.new("a", [1, 2, 3])
517
+ # s.rename("b")
115
518
  def rename(name, in_place: false)
116
519
  if in_place
117
520
  _s.rename(name)
@@ -121,52 +524,351 @@ module Polars
121
524
  end
122
525
  end
123
526
 
527
+ # Get the length of each individual chunk.
528
+ #
529
+ # @return [Array]
530
+ #
531
+ # @example
532
+ # s = Polars::Series.new("a", [1, 2, 3])
533
+ # s2 = Polars::Series.new("b", [4, 5, 6])
534
+ #
535
+ # @example Concatenate Series with rechunk: true
536
+ # Polars.concat([s, s2]).chunk_lengths
537
+ # # => [6]
538
+ #
539
+ # @example Concatenate Series with rechunk: false
540
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
541
+ # # => [3, 3]
124
542
  def chunk_lengths
125
543
  _s.chunk_lengths
126
544
  end
127
545
 
546
+ # Get the number of chunks that this Series contains.
547
+ #
548
+ # @return [Integer]
549
+ #
550
+ # @example
551
+ # s = Polars::Series.new("a", [1, 2, 3])
552
+ # s2 = Polars::Series.new("b", [4, 5, 6])
553
+ #
554
+ # @example Concatenate Series with rechunk: true
555
+ # Polars.concat([s, s2]).n_chunks
556
+ # # => 1
557
+ #
558
+ # @example Concatenate Series with rechunk: false
559
+ # Polars.concat([s, s2], rechunk: false).n_chunks
560
+ # # => 2
128
561
  def n_chunks
129
562
  _s.n_chunks
130
563
  end
131
564
 
565
+ # Get an array with the cumulative sum computed at every element.
566
+ #
567
+ # @param reverse [Boolean]
568
+ # reverse the operation.
569
+ #
570
+ # @return [Series]
571
+ #
572
+ # @note
573
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
574
+ # `:i64` before summing to prevent overflow issues.
575
+ #
576
+ # @example
577
+ # s = Polars::Series.new("a", [1, 2, 3])
578
+ # s.cumsum
579
+ # # =>
580
+ # # shape: (3,)
581
+ # # Series: 'a' [i64]
582
+ # # [
583
+ # # 1
584
+ # # 3
585
+ # # 6
586
+ # # ]
132
587
  def cumsum(reverse: false)
133
588
  Utils.wrap_s(_s.cumsum(reverse))
134
589
  end
135
590
 
591
+ # Get an array with the cumulative min computed at every element.
592
+ #
593
+ # @param reverse [Boolean]
594
+ # reverse the operation.
595
+ #
596
+ # @return [Series]
597
+ #
598
+ # @example
599
+ # s = Polars::Series.new("a", [3, 5, 1])
600
+ # s.cummin
601
+ # # =>
602
+ # # shape: (3,)
603
+ # # Series: 'a' [i64]
604
+ # # [
605
+ # # 3
606
+ # # 3
607
+ # # 1
608
+ # # ]
136
609
  def cummin(reverse: false)
137
610
  Utils.wrap_s(_s.cummin(reverse))
138
611
  end
139
612
 
613
+ # Get an array with the cumulative max computed at every element.
614
+ #
615
+ # @param reverse [Boolean]
616
+ # reverse the operation.
617
+ #
618
+ # @return [Series]
619
+ #
620
+ # @example
621
+ # s = Polars::Series.new("a", [3, 5, 1])
622
+ # s.cummax
623
+ # # =>
624
+ # # shape: (3,)
625
+ # # Series: 'a' [i64]
626
+ # # [
627
+ # # 3
628
+ # # 5
629
+ # # 5
630
+ # # ]
140
631
  def cummax(reverse: false)
141
632
  Utils.wrap_s(_s.cummax(reverse))
142
633
  end
143
634
 
635
+ # Get an array with the cumulative product computed at every element.
636
+ #
637
+ # @param reverse [Boolean]
638
+ # reverse the operation.
639
+ #
640
+ # @return [Series]
641
+ #
642
+ # @note
643
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
644
+ # `:i64` before multiplying to prevent overflow issues.
645
+ #
646
+ # @example
647
+ # s = Polars::Series.new("a", [1, 2, 3])
648
+ # s.cumprod
649
+ # # =>
650
+ # # shape: (3,)
651
+ # # Series: 'a' [i64]
652
+ # # [
653
+ # # 1
654
+ # # 2
655
+ # # 6
656
+ # # ]
657
+ def cumprod(reverse: false)
658
+ Utils.wrap_s(_s.cumprod(reverse))
659
+ end
660
+
661
+ # Get the first `n` rows.
662
+ #
663
+ # Alias for {#head}.
664
+ #
665
+ # @param n [Integer]
666
+ # Number of rows to return.
667
+ #
668
+ # @return [Series]
669
+ #
670
+ # @example
671
+ # s = Polars::Series.new("a", [1, 2, 3])
672
+ # s.limit(2)
673
+ # # =>
674
+ # # shape: (2,)
675
+ # # Series: 'a' [i64]
676
+ # # [
677
+ # # 1
678
+ # # 2
679
+ # # ]
144
680
  def limit(n = 10)
145
- to_frame().select(Utils.col(name).limit(n)).to_series
681
+ to_frame.select(Utils.col(name).limit(n)).to_series
146
682
  end
147
683
 
684
+ # Get a slice of this Series.
685
+ #
686
+ # @param offset [Integer]
687
+ # Start index. Negative indexing is supported.
688
+ # @param length [Integer, nil]
689
+ # Length of the slice. If set to `nil`, all rows starting at the offset
690
+ # will be selected.
691
+ #
692
+ # @return [Series]
693
+ #
694
+ # @example
695
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
696
+ # s.slice(1, 2)
697
+ # # =>
698
+ # # shape: (2,)
699
+ # # Series: 'a' [i64]
700
+ # # [
701
+ # # 2
702
+ # # 3
703
+ # # ]
148
704
  def slice(offset, length = nil)
149
705
  length = len if length.nil?
150
706
  Utils.wrap_s(_s.slice(offset, length))
151
707
  end
152
708
 
153
- def append(other)
154
- _s.append(other._s)
709
+ # Append a Series to this one.
710
+ #
711
+ # @param other [Series]
712
+ # Series to append.
713
+ # @param append_chunks [Boolean]
714
+ # If set to `true` the append operation will add the chunks from `other` to
715
+ # self. This is super cheap.
716
+ #
717
+ # If set to `false` the append operation will do the same as
718
+ # {DataFrame#extend} which extends the memory backed by this Series with
719
+ # the values from `other`.
720
+ #
721
+ # Different from `append_chunks`, `extend` appends the data from `other` to
722
+ # the underlying memory locations and thus may cause a reallocation (which is
723
+ # expensive).
724
+ #
725
+ # If this does not cause a reallocation, the resulting data structure will not
726
+ # have any extra chunks and thus will yield faster queries.
727
+ #
728
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
729
+ # single append. For instance during online operations where you add `n` rows
730
+ # and rerun a query.
731
+ #
732
+ # Prefer `append_chunks` over `extend` when you want to append many times
733
+ # before doing a query. For instance, when you read in multiple files and when
734
+ # to store them in a single Series. In the latter case, finish the sequence
735
+ # of `append_chunks` operations with a `rechunk`.
736
+ #
737
+ # @return [Series]
738
+ #
739
+ # @example
740
+ # s = Polars::Series.new("a", [1, 2, 3])
741
+ # s2 = Polars::Series.new("b", [4, 5, 6])
742
+ # s.append(s2)
743
+ # # =>
744
+ # # shape: (6,)
745
+ # # Series: 'a' [i64]
746
+ # # [
747
+ # # 1
748
+ # # 2
749
+ # # 3
750
+ # # 4
751
+ # # 5
752
+ # # 6
753
+ # # ]
754
+ def append(other, append_chunks: true)
755
+ begin
756
+ if append_chunks
757
+ _s.append(other._s)
758
+ else
759
+ _s.extend(other._s)
760
+ end
761
+ rescue => e
762
+ if e.message == "Already mutably borrowed"
763
+ append(other.clone, append_chunks)
764
+ else
765
+ raise e
766
+ end
767
+ end
155
768
  self
156
769
  end
157
770
 
771
+ # Filter elements by a boolean mask.
772
+ #
773
+ # @param predicate [Series, Array]
774
+ # Boolean mask.
775
+ #
776
+ # @return [Series]
777
+ #
778
+ # @example
779
+ # s = Polars::Series.new("a", [1, 2, 3])
780
+ # mask = Polars::Series.new("", [true, false, true])
781
+ # s.filter(mask)
782
+ # # =>
783
+ # # shape: (2,)
784
+ # # Series: 'a' [i64]
785
+ # # [
786
+ # # 1
787
+ # # 3
788
+ # # ]
158
789
  def filter(predicate)
790
+ if predicate.is_a?(Array)
791
+ predicate = Series.new("", predicate)
792
+ end
159
793
  Utils.wrap_s(_s.filter(predicate._s))
160
794
  end
161
795
 
796
+ # Get the first `n` rows.
797
+ #
798
+ # @param n [Integer]
799
+ # Number of rows to return.
800
+ #
801
+ # @return [Series]
802
+ #
803
+ # @example
804
+ # s = Polars::Series.new("a", [1, 2, 3])
805
+ # s.head(2)
806
+ # # =>
807
+ # # shape: (2,)
808
+ # # Series: 'a' [i64]
809
+ # # [
810
+ # # 1
811
+ # # 2
812
+ # # ]
162
813
  def head(n = 10)
163
814
  to_frame.select(Utils.col(name).head(n)).to_series
164
815
  end
165
816
 
817
+ # Get the last `n` rows.
818
+ #
819
+ # @param n [Integer]
820
+ # Number of rows to return.
821
+ #
822
+ # @return [Series]
823
+ #
824
+ # @example
825
+ # s = Polars::Series.new("a", [1, 2, 3])
826
+ # s.tail(2)
827
+ # # =>
828
+ # # shape: (2,)
829
+ # # Series: 'a' [i64]
830
+ # # [
831
+ # # 2
832
+ # # 3
833
+ # # ]
166
834
  def tail(n = 10)
167
835
  to_frame.select(Utils.col(name).tail(n)).to_series
168
836
  end
169
837
 
838
+ # def take_every
839
+ # end
840
+
841
+ # Sort this Series.
842
+ #
843
+ # @param reverse [Boolean]
844
+ # Reverse sort.
845
+ # @param in_place [Boolean]
846
+ # Sort in place.
847
+ #
848
+ # @return [Series]
849
+ #
850
+ # @example
851
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
852
+ # s.sort
853
+ # # =>
854
+ # # shape: (4,)
855
+ # # Series: 'a' [i64]
856
+ # # [
857
+ # # 1
858
+ # # 2
859
+ # # 3
860
+ # # 4
861
+ # # ]
862
+ # s.sort(reverse: true)
863
+ # # =>
864
+ # # shape: (4,)
865
+ # # Series: 'a' [i64]
866
+ # # [
867
+ # # 4
868
+ # # 3
869
+ # # 2
870
+ # # 1
871
+ # # ]
170
872
  def sort(reverse: false, in_place: false)
171
873
  if in_place
172
874
  self._s = _s.sort(reverse)
@@ -176,21 +878,588 @@ module Polars
176
878
  end
177
879
  end
178
880
 
179
- def to_a
180
- _s.to_a
881
+ # def top_k
882
+ # end
883
+
884
+ # def arg_sort
885
+ # end
886
+
887
+ # def argsort
888
+ # end
889
+
890
+ # def arg_unique
891
+ # end
892
+
893
+ # Get the index of the minimal value.
894
+ #
895
+ # @return [Integer, nil]
896
+ #
897
+ # @example
898
+ # s = Polars::Series.new("a", [3, 2, 1])
899
+ # s.arg_min
900
+ # # => 2
901
+ def arg_min
902
+ _s.arg_min
181
903
  end
182
904
 
905
+ # Get the index of the maximal value.
906
+ #
907
+ # @return [Integer, nil]
908
+ #
909
+ # @example
910
+ # s = Polars::Series.new("a", [3, 2, 1])
911
+ # s.arg_max
912
+ # # => 0
913
+ def arg_max
914
+ _s.arg_max
915
+ end
916
+
917
+ # def search_sorted
918
+ # end
919
+
920
+ # def unique
921
+ # end
922
+
923
+ # def take
924
+ # end
925
+
926
+ # Count the null values in this Series.
927
+ #
928
+ # @return [Integer]
929
+ def null_count
930
+ _s.null_count
931
+ end
932
+
933
+ # Return True if the Series has a validity bitmask.
934
+ #
935
+ # If there is none, it means that there are no null values.
936
+ # Use this to swiftly assert a Series does not have null values.
937
+ #
938
+ # @return [Boolean]
939
+ def has_validity
940
+ _s.has_validity
941
+ end
942
+
943
+ # Check if the Series is empty.
944
+ #
945
+ # @return [Boolean]
946
+ #
947
+ # @example
948
+ # s = Polars::Series.new("a", [])
949
+ # s.is_empty
950
+ # # => true
951
+ def is_empty
952
+ len == 0
953
+ end
954
+ alias_method :empty?, :is_empty
955
+
956
+ # def is_null
957
+ # end
958
+
959
+ # def is_not_null
960
+ # end
961
+
962
+ # def is_finite
963
+ # end
964
+
965
+ # def is_infinite
966
+ # end
967
+
968
+ # def is_nan
969
+ # end
970
+
971
+ # def is_not_nan
972
+ # end
973
+
974
+ # def is_in
975
+ # end
976
+
977
+ # def arg_true
978
+ # end
979
+
980
+ # def is_unique
981
+ # end
982
+
983
+ # def is_first
984
+ # end
985
+
986
+ # def is_duplicated
987
+ # end
988
+
989
+ # def explode
990
+ # end
991
+
992
+ # Check if series is equal with another Series.
993
+ #
994
+ # @param other [Series]
995
+ # Series to compare with.
996
+ # @param null_equal [Boolean]
997
+ # Consider null values as equal.
998
+ # @param strict [Boolean]
999
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
1000
+ # `:i64` will return `false`.
1001
+ #
1002
+ # @return [Boolean]
1003
+ #
1004
+ # @example
1005
+ # s = Polars::Series.new("a", [1, 2, 3])
1006
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1007
+ # s.series_equal(s)
1008
+ # # => true
1009
+ # s.series_equal(s2)
1010
+ # # => false
1011
+ def series_equal(other, null_equal: false, strict: false)
1012
+ _s.series_equal(other._s, null_equal, strict)
1013
+ end
1014
+
1015
+ # Length of this Series.
1016
+ #
1017
+ # @return [Integer]
1018
+ #
1019
+ # @example
1020
+ # s = Polars::Series.new("a", [1, 2, 3])
1021
+ # s.len
1022
+ # # => 3
183
1023
  def len
184
1024
  _s.len
185
1025
  end
1026
+ alias_method :length, :len
1027
+
1028
+ # def cast
1029
+ # end
1030
+
1031
+ # def to_physical
1032
+ # end
1033
+
1034
+ # Convert this Series to a Ruby Array. This operation clones data.
1035
+ #
1036
+ # @return [Array]
1037
+ #
1038
+ # @example
1039
+ # s = Polars::Series.new("a", [1, 2, 3])
1040
+ # s.to_a
1041
+ # # => [1, 2, 3]
1042
+ def to_a
1043
+ _s.to_a
1044
+ end
186
1045
 
1046
+ # Create a single chunk of memory for this Series.
1047
+ #
1048
+ # @param in_place [Boolean]
1049
+ # In place or not.
1050
+ #
1051
+ # @return [Series]
187
1052
  def rechunk(in_place: false)
188
1053
  opt_s = _s.rechunk(in_place)
189
1054
  in_place ? self : Utils.wrap_s(opt_s)
190
1055
  end
191
1056
 
1057
+ # def reverse
1058
+ # end
1059
+
1060
+ # Check if this Series datatype is numeric.
1061
+ #
1062
+ # @return [Boolean]
1063
+ #
1064
+ # @example
1065
+ # s = Polars::Series.new("a", [1, 2, 3])
1066
+ # s.is_numeric
1067
+ # # => true
1068
+ def is_numeric
1069
+ [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
1070
+ end
1071
+ alias_method :numeric?, :is_numeric
1072
+
1073
+ # Check if this Series datatype is datelike.
1074
+ #
1075
+ # @return [Boolean]
1076
+ #
1077
+ # @example
1078
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
1079
+ # s.is_datelike
1080
+ # # => true
1081
+ def is_datelike
1082
+ [:date, :datetime, :duration, :time].include?(dtype)
1083
+ end
1084
+
1085
+ # Check if this Series has floating point numbers.
1086
+ #
1087
+ # @return [Boolean]
1088
+ #
1089
+ # @example
1090
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
1091
+ # s.is_float
1092
+ # # => true
1093
+ def is_float
1094
+ [:f32, :f64].include?(dtype)
1095
+ end
1096
+ alias_method :float?, :is_float
1097
+
1098
+ # Check if this Series is a Boolean.
1099
+ #
1100
+ # @return [Boolean]
1101
+ #
1102
+ # @example
1103
+ # s = Polars::Series.new("a", [true, false, true])
1104
+ # s.is_boolean
1105
+ # # => true
1106
+ def is_boolean
1107
+ dtype == :bool
1108
+ end
1109
+ alias_method :boolean?, :is_boolean
1110
+ alias_method :is_bool, :is_boolean
1111
+ alias_method :bool?, :is_boolean
1112
+
1113
+ # Check if this Series datatype is a Utf8.
1114
+ #
1115
+ # @return [Boolean]
1116
+ #
1117
+ # @example
1118
+ # s = Polars::Series.new("x", ["a", "b", "c"])
1119
+ # s.is_utf8
1120
+ # # => true
1121
+ def is_utf8
1122
+ dtype == :str
1123
+ end
1124
+ alias_method :utf8?, :is_utf8
1125
+
1126
+ # def view
1127
+ # end
1128
+
1129
+ # def to_numo
1130
+ # end
1131
+
1132
+ # def set
1133
+ # end
1134
+
1135
+ # def set_at_idx
1136
+ # end
1137
+
1138
+ # def cleared
1139
+ # end
1140
+
1141
+ # clone handled by initialize_copy
1142
+
1143
+ # def fill_nan
1144
+ # end
1145
+
1146
+ # def fill_null
1147
+ # end
1148
+
1149
+ # Rounds down to the nearest integer value.
1150
+ #
1151
+ # Only works on floating point Series.
1152
+ #
1153
+ # @return [Series]
1154
+ #
1155
+ # @example
1156
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1157
+ # s.floor
1158
+ # # =>
1159
+ # # shape: (3,)
1160
+ # # Series: 'a' [f64]
1161
+ # # [
1162
+ # # 1.0
1163
+ # # 2.0
1164
+ # # 3.0
1165
+ # # ]
1166
+ def floor
1167
+ Utils.wrap_s(_s.floor)
1168
+ end
1169
+
1170
+ # Rounds up to the nearest integer value.
1171
+ #
1172
+ # Only works on floating point Series.
1173
+ #
1174
+ # @return [Series]
1175
+ #
1176
+ # @example
1177
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1178
+ # s.ceil
1179
+ # # =>
1180
+ # # shape: (3,)
1181
+ # # Series: 'a' [f64]
1182
+ # # [
1183
+ # # 2.0
1184
+ # # 3.0
1185
+ # # 4.0
1186
+ # # ]
1187
+ def ceil
1188
+ Utils.wrap_s(_s.ceil)
1189
+ end
1190
+
1191
+ # Round underlying floating point data by `decimals` digits.
1192
+ #
1193
+ # @param decimals [Integer]
1194
+ # number of decimals to round by.
1195
+ #
1196
+ # @return [Series]
1197
+ #
1198
+ # @example
1199
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1200
+ # s.round(2)
1201
+ # # =>
1202
+ # # shape: (3,)
1203
+ # # Series: 'a' [f64]
1204
+ # # [
1205
+ # # 1.12
1206
+ # # 2.57
1207
+ # # 3.9
1208
+ # # ]
1209
+ def round(decimals = 0)
1210
+ Utils.wrap_s(_s.round(decimals))
1211
+ end
1212
+
1213
+ # def dot
1214
+ # end
1215
+
1216
+ # def mode
1217
+ # end
1218
+
1219
+ # def sign
1220
+ # end
1221
+
1222
+ # def sin
1223
+ # end
1224
+
1225
+ # def cos
1226
+ # end
1227
+
1228
+ # def tan
1229
+ # end
1230
+
1231
+ # def arcsin
1232
+ # end
1233
+
1234
+ # def arccos
1235
+ # end
1236
+
1237
+ # def arctan
1238
+ # end
1239
+
1240
+ # def arcsinh
1241
+ # end
1242
+
1243
+ # def arccosh
1244
+ # end
1245
+
1246
+ # def arctanh
1247
+ # end
1248
+
1249
+ # def sinh
1250
+ # end
1251
+
1252
+ # def cosh
1253
+ # end
1254
+
1255
+ # def tanh
1256
+ # end
1257
+
1258
+ # def apply
1259
+ # end
1260
+
1261
+ # def shift
1262
+ # end
1263
+
1264
+ # def shift_and_fill
1265
+ # end
1266
+
1267
+ # def zip_with
1268
+ # end
1269
+
1270
+ # def rolling_min
1271
+ # end
1272
+
1273
+ # def rolling_max
1274
+ # end
1275
+
1276
+ # def rolling_mean
1277
+ # end
1278
+
1279
+ # def rolling_sum
1280
+ # end
1281
+
1282
+ # def rolling_std
1283
+ # end
1284
+
1285
+ # def rolling_var
1286
+ # end
1287
+
1288
+ # def rolling_apply
1289
+ # end
1290
+
1291
+ # def rolling_median
1292
+ # end
1293
+
1294
+ # def rolling_quantile
1295
+ # end
1296
+
1297
+ # def rolling_skew
1298
+ # end
1299
+
1300
+ # def sample
1301
+ # end
1302
+
1303
+ # Get a boolean mask of the local maximum peaks.
1304
+ #
1305
+ # @return [Series]
1306
+ #
1307
+ # @example
1308
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
1309
+ # s.peak_max
1310
+ # # =>
1311
+ # # shape: (5,)
1312
+ # # Series: '' [bool]
1313
+ # # [
1314
+ # # false
1315
+ # # false
1316
+ # # false
1317
+ # # false
1318
+ # # true
1319
+ # # ]
1320
+ def peak_max
1321
+ Utils.wrap_s(_s.peak_max)
1322
+ end
1323
+
1324
+ # Get a boolean mask of the local minimum peaks.
1325
+ #
1326
+ # @return [Series]
1327
+ #
1328
+ # @example
1329
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
1330
+ # s.peak_min
1331
+ # # =>
1332
+ # # shape: (5,)
1333
+ # # Series: '' [bool]
1334
+ # # [
1335
+ # # false
1336
+ # # true
1337
+ # # false
1338
+ # # true
1339
+ # # false
1340
+ # # ]
1341
+ def peak_min
1342
+ Utils.wrap_s(_s.peak_min)
1343
+ end
1344
+
1345
+ # Count the number of unique values in this Series.
1346
+ #
1347
+ # @return [Integer]
1348
+ #
1349
+ # @example
1350
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1351
+ # s.n_unique
1352
+ # # => 3
1353
+ def n_unique
1354
+ _s.n_unique
1355
+ end
1356
+
1357
+ # def shrink_to_fit
1358
+ # end
1359
+
1360
+ # def _hash
1361
+ # end
1362
+
1363
+ # def reinterpret
1364
+ # end
1365
+
1366
+ # def interpolate
1367
+ # end
1368
+
1369
+ # def abs
1370
+ # end
1371
+
1372
+ # def rank
1373
+ # end
1374
+
1375
+ # def diff
1376
+ # end
1377
+
1378
+ # def pct_change
1379
+ # end
1380
+
1381
+ # def skew
1382
+ # end
1383
+
1384
+ # def kurtosis
1385
+ # end
1386
+
1387
+ # def clip
1388
+ # end
1389
+
1390
+ # def clip_min
1391
+ # end
1392
+
1393
+ # def clip_max
1394
+ # end
1395
+
1396
+ # def reshape
1397
+ # end
1398
+
1399
+ # def shuffle
1400
+ # end
1401
+
1402
+ # def ewm_mean
1403
+ # end
1404
+
1405
+ # def ewm_std
1406
+ # end
1407
+
1408
+ # def ewm_var
1409
+ # end
1410
+
1411
+ # def extend_constant
1412
+ # end
1413
+
1414
+ # Flags the Series as sorted.
1415
+ #
1416
+ # Enables downstream code to user fast paths for sorted arrays.
1417
+ #
1418
+ # @param reverse [Boolean]
1419
+ # If the Series order is reversed, e.g. descending.
1420
+ #
1421
+ # @return [Series]
1422
+ #
1423
+ # @note
1424
+ # This can lead to incorrect results if this Series is not sorted!!
1425
+ # Use with care!
1426
+ #
1427
+ # @example
1428
+ # s = Polars::Series.new("a", [1, 2, 3])
1429
+ # s.set_sorted.max
1430
+ # # => 3
1431
+ def set_sorted(reverse: false)
1432
+ Utils.wrap_s(_s.set_sorted(reverse))
1433
+ end
1434
+
1435
+ # def new_from_index
1436
+ # end
1437
+
1438
+ # def shrink_dtype
1439
+ # end
1440
+
1441
+ # def arr
1442
+ # end
1443
+
1444
+ # def cat
1445
+ # end
1446
+
1447
+ # def dt
1448
+ # end
1449
+
1450
+ # def str
1451
+ # end
1452
+
1453
+ # def struct
1454
+ # end
1455
+
192
1456
  private
193
1457
 
1458
+ def initialize_copy(other)
1459
+ super
1460
+ self._s = _s._clone
1461
+ end
1462
+
194
1463
  def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
195
1464
  ruby_dtype = nil
196
1465
 
@@ -205,17 +1474,42 @@ module Polars
205
1474
  end
206
1475
  end
207
1476
 
1477
+ rb_temporal_types = []
1478
+ rb_temporal_types << Date if defined?(Date)
1479
+ rb_temporal_types << DateTime if defined?(DateTime)
1480
+ rb_temporal_types << Time if defined?(Time)
1481
+
208
1482
  # _get_first_non_none
209
1483
  value = values.find { |v| !v.nil? }
210
1484
 
211
- if !dtype.nil? && is_polars_dtype(dtype) && ruby_dtype.nil?
1485
+ if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
212
1486
  constructor = polars_type_to_constructor(dtype)
213
1487
  rbseries = constructor.call(name, values, strict)
214
1488
  return rbseries
215
- end
1489
+ else
1490
+ if ruby_dtype.nil?
1491
+ if value.nil?
1492
+ # generic default dtype
1493
+ ruby_dtype = Float
1494
+ else
1495
+ ruby_dtype = value.class
1496
+ end
1497
+ end
216
1498
 
217
- constructor = rb_type_to_constructor(value.class)
218
- constructor.call(name, values, strict)
1499
+ # temporal branch
1500
+ if rb_temporal_types.include?(ruby_dtype)
1501
+ # if dtype.nil?
1502
+ # dtype = rb_type_to_dtype(ruby_dtype)
1503
+ # elsif rb_temporal_types.include?(dtype)
1504
+ # dtype = rb_type_to_dtype(dtype)
1505
+ # end
1506
+
1507
+ raise Todo
1508
+ else
1509
+ constructor = rb_type_to_constructor(value.class)
1510
+ constructor.call(name, values, strict)
1511
+ end
1512
+ end
219
1513
  end
220
1514
 
221
1515
  POLARS_TYPE_TO_CONSTRUCTOR = {
@@ -253,9 +1547,5 @@ module Polars
253
1547
  # RbSeries.method(:new_object)
254
1548
  raise ArgumentError, "Cannot determine type"
255
1549
  end
256
-
257
- def is_polars_dtype(data_type)
258
- true
259
- end
260
1550
  end
261
1551
  end