polars-df 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/series.rb CHANGED
@@ -1,7 +1,38 @@
1
1
  module Polars
2
+ # A Series represents a single column in a polars DataFrame.
2
3
  class Series
4
+ # @private
3
5
  attr_accessor :_s
4
6
 
7
+ # Create a new Series.
8
+ #
9
+ # @param name [String, Array, nil]
10
+ # Name of the series. Will be used as a column name when used in a DataFrame.
11
+ # When not specified, name is set to an empty string.
12
+ # @param values [Array, nil]
13
+ # One-dimensional data in various forms. Supported are: Array and Series.
14
+ # @param dtype [Symbol, nil]
15
+ # Polars dtype of the Series data. If not specified, the dtype is inferred.
16
+ # @param strict [Boolean]
17
+ # Throw error on numeric overflow.
18
+ # @param nan_to_null [Boolean]
19
+ # Not used.
20
+ # @param dtype_if_empty [Symbol, nil]
21
+ # If no dtype is specified and values contains `nil` or an empty array,
22
+ # set the Polars dtype of the Series data. If not specified, Float32 is used.
23
+ #
24
+ # @example Constructing a Series by specifying name and values positionally:
25
+ # s = Polars::Series.new("a", [1, 2, 3])
26
+ #
27
+ # @example Notice that the dtype is automatically inferred as a polars Int64:
28
+ # s.dtype
29
+ # # => :i64
30
+ #
31
+ # @example Constructing a Series with a specific dtype:
32
+ # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
33
+ #
34
+ # @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
35
+ # s3 = Polars::Series.new([1, 2, 3])
5
36
  def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
6
37
  # Handle case where values are passed as the first argument
7
38
  if !name.nil? && !name.is_a?(String)
@@ -35,83 +66,455 @@ module Polars
35
66
  end
36
67
  end
37
68
 
69
+ # @private
38
70
  def self._from_rbseries(s)
39
71
  series = Series.allocate
40
72
  series._s = s
41
73
  series
42
74
  end
43
75
 
76
+ # Get the data type of this Series.
77
+ #
78
+ # @return [Symbol]
44
79
  def dtype
45
- _s.dtype.to_sym
80
+ _s.dtype
46
81
  end
47
82
 
83
+ # Get flags that are set on the Series.
84
+ #
85
+ # @return [Hash]
86
+ def flags
87
+ {
88
+ "SORTED_ASC" => _s.is_sorted_flag,
89
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
90
+ }
91
+ end
92
+
93
+ # Get the inner dtype in of a List typed Series.
94
+ #
95
+ # @return [Symbol]
96
+ def inner_dtype
97
+ _s.inner_dtype
98
+ end
99
+
100
+ # Get the name of this Series.
101
+ #
102
+ # @return [String]
48
103
  def name
49
104
  _s.name
50
105
  end
51
106
 
107
+ # Shape of this Series.
108
+ #
109
+ # @return [Array]
52
110
  def shape
53
111
  [_s.len]
54
112
  end
55
113
 
114
+ # Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
115
+ #
116
+ # @return [String]
117
+ def time_unit
118
+ _s.time_unit
119
+ end
120
+
121
+ # Returns a string representing the Series.
122
+ #
123
+ # @return [String]
56
124
  def to_s
57
125
  _s.to_s
58
126
  end
59
127
  alias_method :inspect, :to_s
60
128
 
129
+ # Bitwise AND.
130
+ #
131
+ # @return [Series]
61
132
  def &(other)
62
133
  Utils.wrap_s(_s.bitand(other._s))
63
134
  end
64
135
 
136
+ # Bitwise OR.
137
+ #
138
+ # @return [Series]
65
139
  def |(other)
66
140
  Utils.wrap_s(_s.bitor(other._s))
67
141
  end
68
142
 
143
+ # Bitwise XOR.
144
+ #
145
+ # @return [Series]
69
146
  def ^(other)
70
147
  Utils.wrap_s(_s.bitxor(other._s))
71
148
  end
72
149
 
150
+ # def ==(other)
151
+ # end
152
+
153
+ # def !=(other)
154
+ # end
155
+
156
+ # def >(other)
157
+ # end
158
+
159
+ # def <(other)
160
+ # end
161
+
162
+ # def >=(other)
163
+ # end
164
+
165
+ # def <=(other)
166
+ # end
167
+
168
+ # Performs addition.
169
+ #
170
+ # @return [Series]
73
171
  def +(other)
74
172
  Utils. wrap_s(_s.add(other._s))
75
173
  end
76
174
 
175
+ # Performs subtraction.
176
+ #
177
+ # @return [Series]
77
178
  def -(other)
78
179
  Utils.wrap_s(_s.sub(other._s))
79
180
  end
80
181
 
182
+ # Performs multiplication.
183
+ #
184
+ # @return [Series]
81
185
  def *(other)
82
186
  Utils.wrap_s(_s.mul(other._s))
83
187
  end
84
188
 
189
+ # Performs division.
190
+ #
191
+ # @return [Series]
85
192
  def /(other)
86
193
  Utils.wrap_s(_s.div(other._s))
87
194
  end
88
195
 
196
+ # Raises to the power of exponent.
197
+ #
198
+ # @return [Series]
199
+ def **(power)
200
+ if is_datelike
201
+ raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
202
+ end
203
+ to_frame.select(Polars.col(name).pow(power)).to_series
204
+ end
205
+
206
+ # def -@(other)
207
+ # end
208
+
209
+ # Returns elements of the Series.
210
+ #
211
+ # @return [Object]
212
+ def [](item)
213
+ _s.get_idx(item)
214
+ end
215
+
216
+ # def []=(key, value)
217
+ # end
218
+
219
+ # Return an estimation of the total (heap) allocated size of the Series.
220
+ #
221
+ # Estimated size is given in the specified unit (bytes by default).
222
+ #
223
+ # This estimation is the sum of the size of its buffers, validity, including
224
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
225
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
226
+ # particular, StructArray's size is an upper bound.
227
+ #
228
+ # When an array is sliced, its allocated size remains constant because the buffer
229
+ # unchanged. However, this function will yield a smaller number. This is because
230
+ # this function returns the visible size of the buffer, not its total capacity.
231
+ #
232
+ # FFI buffers are included in this estimation.
233
+ #
234
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
235
+ # Scale the returned size to the given unit.
236
+ #
237
+ # @return [Numeric]
238
+ #
239
+ # @example
240
+ # s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
241
+ # s.estimated_size
242
+ # # => 4000000
243
+ # s.estimated_size("mb")
244
+ # # => 3.814697265625
245
+ def estimated_size(unit = "b")
246
+ sz = _s.estimated_size
247
+ Utils.scale_bytes(sz, to: unit)
248
+ end
249
+
250
+ # Compute the square root of the elements.
251
+ #
252
+ # @return [Series]
253
+ def sqrt
254
+ self**0.5
255
+ end
256
+
257
+ # Check if any boolean value in the column is `true`.
258
+ #
259
+ # @return [Boolean]
260
+ def any
261
+ to_frame.select(Polars.col(name).any).to_series[0]
262
+ end
263
+
264
+ # Check if all boolean values in the column are `true`.
265
+ #
266
+ # @return [Boolean]
267
+ def all
268
+ to_frame.select(Polars.col(name).all).to_series[0]
269
+ end
270
+
271
+ # def log
272
+ # end
273
+
274
+ # def log10
275
+ # end
276
+
277
+ # def exp
278
+ # end
279
+
280
+ # def drop_nulls
281
+ # end
282
+
283
+ # def drop_nans
284
+ # end
285
+
286
+ # Cast this Series to a DataFrame.
287
+ #
288
+ # @return [DataFrame]
89
289
  def to_frame
90
290
  Utils.wrap_df(RbDataFrame.new([_s]))
91
291
  end
92
292
 
293
+ # def describe
294
+ # end
295
+
296
+ # Reduce this Series to the sum value.
297
+ #
298
+ # @return [Numeric]
299
+ #
300
+ # @note
301
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
302
+ # `:i64` before summing to prevent overflow issues.
303
+ #
304
+ # @example
305
+ # s = Polars::Series.new("a", [1, 2, 3])
306
+ # s.sum
307
+ # # => 6
93
308
  def sum
94
309
  _s.sum
95
310
  end
96
311
 
312
+ # Reduce this Series to the mean value.
313
+ #
314
+ # @return [Float, nil]
315
+ #
316
+ # @example
317
+ # s = Polars::Series.new("a", [1, 2, 3])
318
+ # s.mean
319
+ # # => 2.0
97
320
  def mean
98
321
  _s.mean
99
322
  end
100
323
 
324
+ # Reduce this Series to the product value.
325
+ #
326
+ # @return [Numeric]
327
+ def product
328
+ to_frame.select(Polars.col(name).product).to_series[0]
329
+ end
330
+
331
+ # Get the minimal value in this Series.
332
+ #
333
+ # @return [Object]
334
+ #
335
+ # @example
336
+ # s = Polars::Series.new("a", [1, 2, 3])
337
+ # s.min
338
+ # # => 1
101
339
  def min
102
340
  _s.min
103
341
  end
104
342
 
343
+ # Get the maximum value in this Series.
344
+ #
345
+ # @return [Object]
346
+ #
347
+ # @example
348
+ # s = Polars::Series.new("a", [1, 2, 3])
349
+ # s.max
350
+ # # => 3
105
351
  def max
106
352
  _s.max
107
353
  end
108
354
 
355
+ # def nan_max
356
+ # end
357
+
358
+ # def nan_min
359
+ # end
360
+
361
+ # Get the standard deviation of this Series.
362
+ #
363
+ # @param ddof [Integer]
364
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
365
+ # where N represents the number of elements.
366
+ #
367
+ # @return [Float, nil]
368
+ #
369
+ # @example
370
+ # s = Polars::Series.new("a", [1, 2, 3])
371
+ # s.std
372
+ # # => 1.0
373
+ def std(ddof: 1)
374
+ if !is_numeric
375
+ nil
376
+ else
377
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
378
+ end
379
+ end
380
+
381
+ # Get variance of this Series.
382
+ #
383
+ # @param ddof [Integer]
384
+ # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
385
+ # where N represents the number of elements.
386
+ #
387
+ # @return [Float, nil]
388
+ #
389
+ # @example
390
+ # s = Polars::Series.new("a", [1, 2, 3])
391
+ # s.var
392
+ # # => 1.0
393
+ def var(ddof: 1)
394
+ if !is_numeric
395
+ nil
396
+ else
397
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
398
+ end
399
+ end
400
+
401
+ # Get the median of this Series.
402
+ #
403
+ # @return [Float, nil]
404
+ #
405
+ # @example
406
+ # s = Polars::Series.new("a", [1, 2, 3])
407
+ # s.median
408
+ # # => 2.0
409
+ def median
410
+ _s.median
411
+ end
412
+
413
+ # Get the quantile value of this Series.
414
+ #
415
+ # @param quantile [Float, nil]
416
+ # Quantile between 0.0 and 1.0.
417
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
418
+ # Interpolation method.
419
+ #
420
+ # @return [Float, nil]
421
+ #
422
+ # @example
423
+ # s = Polars::Series.new("a", [1, 2, 3])
424
+ # s.quantile(0.5)
425
+ # # => 2.0
426
+ def quantile(quantile, interpolation: "nearest")
427
+ _s.quantile(quantile, interpolation)
428
+ end
429
+
430
+ # Get dummy variables.
431
+ #
432
+ # @return [DataFrame]
433
+ #
434
+ # @example
435
+ # s = Polars::Series.new("a", [1, 2, 3])
436
+ # s.to_dummies
437
+ # # =>
438
+ # # shape: (3, 3)
439
+ # # ┌─────┬─────┬─────┐
440
+ # # │ a_1 ┆ a_2 ┆ a_3 │
441
+ # # │ --- ┆ --- ┆ --- │
442
+ # # │ u8 ┆ u8 ┆ u8 │
443
+ # # ╞═════╪═════╪═════╡
444
+ # # │ 1 ┆ 0 ┆ 0 │
445
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
446
+ # # │ 0 ┆ 1 ┆ 0 │
447
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
448
+ # # │ 0 ┆ 0 ┆ 1 │
449
+ # # └─────┴─────┴─────┘
450
+ def to_dummies
451
+ Utils.wrap_df(_s.to_dummies)
452
+ end
453
+
454
+ # Count the unique values in a Series.
455
+ #
456
+ # @param sort [Boolean]
457
+ # Ensure the output is sorted from most values to least.
458
+ #
459
+ # @return [DataFrame]
460
+ #
461
+ # @example
462
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
463
+ # s.value_counts.sort("a")
464
+ # # =>
465
+ # # shape: (3, 2)
466
+ # # ┌─────┬────────┐
467
+ # # │ a ┆ counts │
468
+ # # │ --- ┆ --- │
469
+ # # │ i64 ┆ u32 │
470
+ # # ╞═════╪════════╡
471
+ # # │ 1 ┆ 1 │
472
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
473
+ # # │ 2 ┆ 2 │
474
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
475
+ # # │ 3 ┆ 1 │
476
+ # # └─────┴────────┘
477
+ def value_counts(sort: false)
478
+ Utils.wrap_df(_s.value_counts(sort))
479
+ end
480
+
481
+ # def unique_counts
482
+ # end
483
+
484
+ # def entropy
485
+ # end
486
+
487
+ # def cumulative_eval
488
+ # end
489
+
490
+ # Return a copy of the Series with a new alias/name.
491
+ #
492
+ # @param name [String]
493
+ # New name.
494
+ #
495
+ # @return [Series]
496
+ #
497
+ # @example
498
+ # s = Polars::Series.new("x", [1, 2, 3])
499
+ # s.alias("y")
109
500
  def alias(name)
110
501
  s = dup
111
502
  s._s.rename(name)
112
503
  s
113
504
  end
114
505
 
506
+ # Rename this Series.
507
+ #
508
+ # @param name [String]
509
+ # New name.
510
+ # @param in_place [Boolean]
511
+ # Modify the Series in-place.
512
+ #
513
+ # @return [Series]
514
+ #
515
+ # @example
516
+ # s = Polars::Series.new("a", [1, 2, 3])
517
+ # s.rename("b")
115
518
  def rename(name, in_place: false)
116
519
  if in_place
117
520
  _s.rename(name)
@@ -121,52 +524,351 @@ module Polars
121
524
  end
122
525
  end
123
526
 
527
+ # Get the length of each individual chunk.
528
+ #
529
+ # @return [Array]
530
+ #
531
+ # @example
532
+ # s = Polars::Series.new("a", [1, 2, 3])
533
+ # s2 = Polars::Series.new("b", [4, 5, 6])
534
+ #
535
+ # @example Concatenate Series with rechunk: true
536
+ # Polars.concat([s, s2]).chunk_lengths
537
+ # # => [6]
538
+ #
539
+ # @example Concatenate Series with rechunk: false
540
+ # Polars.concat([s, s2], rechunk: false).chunk_lengths
541
+ # # => [3, 3]
124
542
  def chunk_lengths
125
543
  _s.chunk_lengths
126
544
  end
127
545
 
546
+ # Get the number of chunks that this Series contains.
547
+ #
548
+ # @return [Integer]
549
+ #
550
+ # @example
551
+ # s = Polars::Series.new("a", [1, 2, 3])
552
+ # s2 = Polars::Series.new("b", [4, 5, 6])
553
+ #
554
+ # @example Concatenate Series with rechunk: true
555
+ # Polars.concat([s, s2]).n_chunks
556
+ # # => 1
557
+ #
558
+ # @example Concatenate Series with rechunk: false
559
+ # Polars.concat([s, s2], rechunk: false).n_chunks
560
+ # # => 2
128
561
  def n_chunks
129
562
  _s.n_chunks
130
563
  end
131
564
 
565
+ # Get an array with the cumulative sum computed at every element.
566
+ #
567
+ # @param reverse [Boolean]
568
+ # reverse the operation.
569
+ #
570
+ # @return [Series]
571
+ #
572
+ # @note
573
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
574
+ # `:i64` before summing to prevent overflow issues.
575
+ #
576
+ # @example
577
+ # s = Polars::Series.new("a", [1, 2, 3])
578
+ # s.cumsum
579
+ # # =>
580
+ # # shape: (3,)
581
+ # # Series: 'a' [i64]
582
+ # # [
583
+ # # 1
584
+ # # 3
585
+ # # 6
586
+ # # ]
132
587
  def cumsum(reverse: false)
133
588
  Utils.wrap_s(_s.cumsum(reverse))
134
589
  end
135
590
 
591
+ # Get an array with the cumulative min computed at every element.
592
+ #
593
+ # @param reverse [Boolean]
594
+ # reverse the operation.
595
+ #
596
+ # @return [Series]
597
+ #
598
+ # @example
599
+ # s = Polars::Series.new("a", [3, 5, 1])
600
+ # s.cummin
601
+ # # =>
602
+ # # shape: (3,)
603
+ # # Series: 'a' [i64]
604
+ # # [
605
+ # # 3
606
+ # # 3
607
+ # # 1
608
+ # # ]
136
609
  def cummin(reverse: false)
137
610
  Utils.wrap_s(_s.cummin(reverse))
138
611
  end
139
612
 
613
+ # Get an array with the cumulative max computed at every element.
614
+ #
615
+ # @param reverse [Boolean]
616
+ # reverse the operation.
617
+ #
618
+ # @return [Series]
619
+ #
620
+ # @example
621
+ # s = Polars::Series.new("a", [3, 5, 1])
622
+ # s.cummax
623
+ # # =>
624
+ # # shape: (3,)
625
+ # # Series: 'a' [i64]
626
+ # # [
627
+ # # 3
628
+ # # 5
629
+ # # 5
630
+ # # ]
140
631
  def cummax(reverse: false)
141
632
  Utils.wrap_s(_s.cummax(reverse))
142
633
  end
143
634
 
635
+ # Get an array with the cumulative product computed at every element.
636
+ #
637
+ # @param reverse [Boolean]
638
+ # reverse the operation.
639
+ #
640
+ # @return [Series]
641
+ #
642
+ # @note
643
+ # Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
644
+ # `:i64` before multiplying to prevent overflow issues.
645
+ #
646
+ # @example
647
+ # s = Polars::Series.new("a", [1, 2, 3])
648
+ # s.cumprod
649
+ # # =>
650
+ # # shape: (3,)
651
+ # # Series: 'a' [i64]
652
+ # # [
653
+ # # 1
654
+ # # 2
655
+ # # 6
656
+ # # ]
657
+ def cumprod(reverse: false)
658
+ Utils.wrap_s(_s.cumprod(reverse))
659
+ end
660
+
661
+ # Get the first `n` rows.
662
+ #
663
+ # Alias for {#head}.
664
+ #
665
+ # @param n [Integer]
666
+ # Number of rows to return.
667
+ #
668
+ # @return [Series]
669
+ #
670
+ # @example
671
+ # s = Polars::Series.new("a", [1, 2, 3])
672
+ # s.limit(2)
673
+ # # =>
674
+ # # shape: (2,)
675
+ # # Series: 'a' [i64]
676
+ # # [
677
+ # # 1
678
+ # # 2
679
+ # # ]
144
680
  def limit(n = 10)
145
- to_frame().select(Utils.col(name).limit(n)).to_series
681
+ to_frame.select(Utils.col(name).limit(n)).to_series
146
682
  end
147
683
 
684
+ # Get a slice of this Series.
685
+ #
686
+ # @param offset [Integer]
687
+ # Start index. Negative indexing is supported.
688
+ # @param length [Integer, nil]
689
+ # Length of the slice. If set to `nil`, all rows starting at the offset
690
+ # will be selected.
691
+ #
692
+ # @return [Series]
693
+ #
694
+ # @example
695
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
696
+ # s.slice(1, 2)
697
+ # # =>
698
+ # # shape: (2,)
699
+ # # Series: 'a' [i64]
700
+ # # [
701
+ # # 2
702
+ # # 3
703
+ # # ]
148
704
  def slice(offset, length = nil)
149
705
  length = len if length.nil?
150
706
  Utils.wrap_s(_s.slice(offset, length))
151
707
  end
152
708
 
153
- def append(other)
154
- _s.append(other._s)
709
+ # Append a Series to this one.
710
+ #
711
+ # @param other [Series]
712
+ # Series to append.
713
+ # @param append_chunks [Boolean]
714
+ # If set to `true` the append operation will add the chunks from `other` to
715
+ # self. This is super cheap.
716
+ #
717
+ # If set to `false` the append operation will do the same as
718
+ # {DataFrame#extend} which extends the memory backed by this Series with
719
+ # the values from `other`.
720
+ #
721
+ # Different from `append_chunks`, `extend` appends the data from `other` to
722
+ # the underlying memory locations and thus may cause a reallocation (which is
723
+ # expensive).
724
+ #
725
+ # If this does not cause a reallocation, the resulting data structure will not
726
+ # have any extra chunks and thus will yield faster queries.
727
+ #
728
+ # Prefer `extend` over `append_chunks` when you want to do a query after a
729
+ # single append. For instance during online operations where you add `n` rows
730
+ # and rerun a query.
731
+ #
732
+ # Prefer `append_chunks` over `extend` when you want to append many times
733
+ # before doing a query. For instance, when you read in multiple files and when
734
+ # to store them in a single Series. In the latter case, finish the sequence
735
+ # of `append_chunks` operations with a `rechunk`.
736
+ #
737
+ # @return [Series]
738
+ #
739
+ # @example
740
+ # s = Polars::Series.new("a", [1, 2, 3])
741
+ # s2 = Polars::Series.new("b", [4, 5, 6])
742
+ # s.append(s2)
743
+ # # =>
744
+ # # shape: (6,)
745
+ # # Series: 'a' [i64]
746
+ # # [
747
+ # # 1
748
+ # # 2
749
+ # # 3
750
+ # # 4
751
+ # # 5
752
+ # # 6
753
+ # # ]
754
+ def append(other, append_chunks: true)
755
+ begin
756
+ if append_chunks
757
+ _s.append(other._s)
758
+ else
759
+ _s.extend(other._s)
760
+ end
761
+ rescue => e
762
+ if e.message == "Already mutably borrowed"
763
+ append(other.clone, append_chunks)
764
+ else
765
+ raise e
766
+ end
767
+ end
155
768
  self
156
769
  end
157
770
 
771
+ # Filter elements by a boolean mask.
772
+ #
773
+ # @param predicate [Series, Array]
774
+ # Boolean mask.
775
+ #
776
+ # @return [Series]
777
+ #
778
+ # @example
779
+ # s = Polars::Series.new("a", [1, 2, 3])
780
+ # mask = Polars::Series.new("", [true, false, true])
781
+ # s.filter(mask)
782
+ # # =>
783
+ # # shape: (2,)
784
+ # # Series: 'a' [i64]
785
+ # # [
786
+ # # 1
787
+ # # 3
788
+ # # ]
158
789
  def filter(predicate)
790
+ if predicate.is_a?(Array)
791
+ predicate = Series.new("", predicate)
792
+ end
159
793
  Utils.wrap_s(_s.filter(predicate._s))
160
794
  end
161
795
 
796
+ # Get the first `n` rows.
797
+ #
798
+ # @param n [Integer]
799
+ # Number of rows to return.
800
+ #
801
+ # @return [Series]
802
+ #
803
+ # @example
804
+ # s = Polars::Series.new("a", [1, 2, 3])
805
+ # s.head(2)
806
+ # # =>
807
+ # # shape: (2,)
808
+ # # Series: 'a' [i64]
809
+ # # [
810
+ # # 1
811
+ # # 2
812
+ # # ]
162
813
  def head(n = 10)
163
814
  to_frame.select(Utils.col(name).head(n)).to_series
164
815
  end
165
816
 
817
+ # Get the last `n` rows.
818
+ #
819
+ # @param n [Integer]
820
+ # Number of rows to return.
821
+ #
822
+ # @return [Series]
823
+ #
824
+ # @example
825
+ # s = Polars::Series.new("a", [1, 2, 3])
826
+ # s.tail(2)
827
+ # # =>
828
+ # # shape: (2,)
829
+ # # Series: 'a' [i64]
830
+ # # [
831
+ # # 2
832
+ # # 3
833
+ # # ]
166
834
  def tail(n = 10)
167
835
  to_frame.select(Utils.col(name).tail(n)).to_series
168
836
  end
169
837
 
838
+ # def take_every
839
+ # end
840
+
841
+ # Sort this Series.
842
+ #
843
+ # @param reverse [Boolean]
844
+ # Reverse sort.
845
+ # @param in_place [Boolean]
846
+ # Sort in place.
847
+ #
848
+ # @return [Series]
849
+ #
850
+ # @example
851
+ # s = Polars::Series.new("a", [1, 3, 4, 2])
852
+ # s.sort
853
+ # # =>
854
+ # # shape: (4,)
855
+ # # Series: 'a' [i64]
856
+ # # [
857
+ # # 1
858
+ # # 2
859
+ # # 3
860
+ # # 4
861
+ # # ]
862
+ # s.sort(reverse: true)
863
+ # # =>
864
+ # # shape: (4,)
865
+ # # Series: 'a' [i64]
866
+ # # [
867
+ # # 4
868
+ # # 3
869
+ # # 2
870
+ # # 1
871
+ # # ]
170
872
  def sort(reverse: false, in_place: false)
171
873
  if in_place
172
874
  self._s = _s.sort(reverse)
@@ -176,21 +878,588 @@ module Polars
176
878
  end
177
879
  end
178
880
 
179
- def to_a
180
- _s.to_a
881
+ # def top_k
882
+ # end
883
+
884
+ # def arg_sort
885
+ # end
886
+
887
+ # def argsort
888
+ # end
889
+
890
+ # def arg_unique
891
+ # end
892
+
893
+ # Get the index of the minimal value.
894
+ #
895
+ # @return [Integer, nil]
896
+ #
897
+ # @example
898
+ # s = Polars::Series.new("a", [3, 2, 1])
899
+ # s.arg_min
900
+ # # => 2
901
+ def arg_min
902
+ _s.arg_min
181
903
  end
182
904
 
905
+ # Get the index of the maximal value.
906
+ #
907
+ # @return [Integer, nil]
908
+ #
909
+ # @example
910
+ # s = Polars::Series.new("a", [3, 2, 1])
911
+ # s.arg_max
912
+ # # => 0
913
+ def arg_max
914
+ _s.arg_max
915
+ end
916
+
917
+ # def search_sorted
918
+ # end
919
+
920
+ # def unique
921
+ # end
922
+
923
+ # def take
924
+ # end
925
+
926
+ # Count the null values in this Series.
927
+ #
928
+ # @return [Integer]
929
+ def null_count
930
+ _s.null_count
931
+ end
932
+
933
+ # Return True if the Series has a validity bitmask.
934
+ #
935
+ # If there is none, it means that there are no null values.
936
+ # Use this to swiftly assert a Series does not have null values.
937
+ #
938
+ # @return [Boolean]
939
+ def has_validity
940
+ _s.has_validity
941
+ end
942
+
943
+ # Check if the Series is empty.
944
+ #
945
+ # @return [Boolean]
946
+ #
947
+ # @example
948
+ # s = Polars::Series.new("a", [])
949
+ # s.is_empty
950
+ # # => true
951
+ def is_empty
952
+ len == 0
953
+ end
954
+ alias_method :empty?, :is_empty
955
+
956
+ # def is_null
957
+ # end
958
+
959
+ # def is_not_null
960
+ # end
961
+
962
+ # def is_finite
963
+ # end
964
+
965
+ # def is_infinite
966
+ # end
967
+
968
+ # def is_nan
969
+ # end
970
+
971
+ # def is_not_nan
972
+ # end
973
+
974
+ # def is_in
975
+ # end
976
+
977
+ # def arg_true
978
+ # end
979
+
980
+ # def is_unique
981
+ # end
982
+
983
+ # def is_first
984
+ # end
985
+
986
+ # def is_duplicated
987
+ # end
988
+
989
+ # def explode
990
+ # end
991
+
992
+ # Check if series is equal with another Series.
993
+ #
994
+ # @param other [Series]
995
+ # Series to compare with.
996
+ # @param null_equal [Boolean]
997
+ # Consider null values as equal.
998
+ # @param strict [Boolean]
999
+ # Don't allow different numerical dtypes, e.g. comparing `:u32` with a
1000
+ # `:i64` will return `false`.
1001
+ #
1002
+ # @return [Boolean]
1003
+ #
1004
+ # @example
1005
+ # s = Polars::Series.new("a", [1, 2, 3])
1006
+ # s2 = Polars::Series.new("b", [4, 5, 6])
1007
+ # s.series_equal(s)
1008
+ # # => true
1009
+ # s.series_equal(s2)
1010
+ # # => false
1011
+ def series_equal(other, null_equal: false, strict: false)
1012
+ _s.series_equal(other._s, null_equal, strict)
1013
+ end
1014
+
1015
+ # Length of this Series.
1016
+ #
1017
+ # @return [Integer]
1018
+ #
1019
+ # @example
1020
+ # s = Polars::Series.new("a", [1, 2, 3])
1021
+ # s.len
1022
+ # # => 3
183
1023
  def len
184
1024
  _s.len
185
1025
  end
1026
+ alias_method :length, :len
1027
+
1028
+ # def cast
1029
+ # end
1030
+
1031
+ # def to_physical
1032
+ # end
1033
+
1034
+ # Convert this Series to a Ruby Array. This operation clones data.
1035
+ #
1036
+ # @return [Array]
1037
+ #
1038
+ # @example
1039
+ # s = Polars::Series.new("a", [1, 2, 3])
1040
+ # s.to_a
1041
+ # # => [1, 2, 3]
1042
+ def to_a
1043
+ _s.to_a
1044
+ end
186
1045
 
1046
+ # Create a single chunk of memory for this Series.
1047
+ #
1048
+ # @param in_place [Boolean]
1049
+ # In place or not.
1050
+ #
1051
+ # @return [Series]
187
1052
  def rechunk(in_place: false)
188
1053
  opt_s = _s.rechunk(in_place)
189
1054
  in_place ? self : Utils.wrap_s(opt_s)
190
1055
  end
191
1056
 
1057
+ # def reverse
1058
+ # end
1059
+
1060
+ # Check if this Series datatype is numeric.
1061
+ #
1062
+ # @return [Boolean]
1063
+ #
1064
+ # @example
1065
+ # s = Polars::Series.new("a", [1, 2, 3])
1066
+ # s.is_numeric
1067
+ # # => true
1068
+ def is_numeric
1069
+ [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
1070
+ end
1071
+ alias_method :numeric?, :is_numeric
1072
+
1073
+ # Check if this Series datatype is datelike.
1074
+ #
1075
+ # @return [Boolean]
1076
+ #
1077
+ # @example
1078
+ # s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
1079
+ # s.is_datelike
1080
+ # # => true
1081
+ def is_datelike
1082
+ [:date, :datetime, :duration, :time].include?(dtype)
1083
+ end
1084
+
1085
+ # Check if this Series has floating point numbers.
1086
+ #
1087
+ # @return [Boolean]
1088
+ #
1089
+ # @example
1090
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0])
1091
+ # s.is_float
1092
+ # # => true
1093
+ def is_float
1094
+ [:f32, :f64].include?(dtype)
1095
+ end
1096
+ alias_method :float?, :is_float
1097
+
1098
+ # Check if this Series is a Boolean.
1099
+ #
1100
+ # @return [Boolean]
1101
+ #
1102
+ # @example
1103
+ # s = Polars::Series.new("a", [true, false, true])
1104
+ # s.is_boolean
1105
+ # # => true
1106
+ def is_boolean
1107
+ dtype == :bool
1108
+ end
1109
+ alias_method :boolean?, :is_boolean
1110
+ alias_method :is_bool, :is_boolean
1111
+ alias_method :bool?, :is_boolean
1112
+
1113
+ # Check if this Series datatype is a Utf8.
1114
+ #
1115
+ # @return [Boolean]
1116
+ #
1117
+ # @example
1118
+ # s = Polars::Series.new("x", ["a", "b", "c"])
1119
+ # s.is_utf8
1120
+ # # => true
1121
+ def is_utf8
1122
+ dtype == :str
1123
+ end
1124
+ alias_method :utf8?, :is_utf8
1125
+
1126
+ # def view
1127
+ # end
1128
+
1129
+ # def to_numo
1130
+ # end
1131
+
1132
+ # def set
1133
+ # end
1134
+
1135
+ # def set_at_idx
1136
+ # end
1137
+
1138
+ # def cleared
1139
+ # end
1140
+
1141
+ # clone handled by initialize_copy
1142
+
1143
+ # def fill_nan
1144
+ # end
1145
+
1146
+ # def fill_null
1147
+ # end
1148
+
1149
+ # Rounds down to the nearest integer value.
1150
+ #
1151
+ # Only works on floating point Series.
1152
+ #
1153
+ # @return [Series]
1154
+ #
1155
+ # @example
1156
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1157
+ # s.floor
1158
+ # # =>
1159
+ # # shape: (3,)
1160
+ # # Series: 'a' [f64]
1161
+ # # [
1162
+ # # 1.0
1163
+ # # 2.0
1164
+ # # 3.0
1165
+ # # ]
1166
+ def floor
1167
+ Utils.wrap_s(_s.floor)
1168
+ end
1169
+
1170
+ # Rounds up to the nearest integer value.
1171
+ #
1172
+ # Only works on floating point Series.
1173
+ #
1174
+ # @return [Series]
1175
+ #
1176
+ # @example
1177
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1178
+ # s.ceil
1179
+ # # =>
1180
+ # # shape: (3,)
1181
+ # # Series: 'a' [f64]
1182
+ # # [
1183
+ # # 2.0
1184
+ # # 3.0
1185
+ # # 4.0
1186
+ # # ]
1187
+ def ceil
1188
+ Utils.wrap_s(_s.ceil)
1189
+ end
1190
+
1191
+ # Round underlying floating point data by `decimals` digits.
1192
+ #
1193
+ # @param decimals [Integer]
1194
+ # number of decimals to round by.
1195
+ #
1196
+ # @return [Series]
1197
+ #
1198
+ # @example
1199
+ # s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
1200
+ # s.round(2)
1201
+ # # =>
1202
+ # # shape: (3,)
1203
+ # # Series: 'a' [f64]
1204
+ # # [
1205
+ # # 1.12
1206
+ # # 2.57
1207
+ # # 3.9
1208
+ # # ]
1209
+ def round(decimals = 0)
1210
+ Utils.wrap_s(_s.round(decimals))
1211
+ end
1212
+
1213
+ # def dot
1214
+ # end
1215
+
1216
+ # def mode
1217
+ # end
1218
+
1219
+ # def sign
1220
+ # end
1221
+
1222
+ # def sin
1223
+ # end
1224
+
1225
+ # def cos
1226
+ # end
1227
+
1228
+ # def tan
1229
+ # end
1230
+
1231
+ # def arcsin
1232
+ # end
1233
+
1234
+ # def arccos
1235
+ # end
1236
+
1237
+ # def arctan
1238
+ # end
1239
+
1240
+ # def arcsinh
1241
+ # end
1242
+
1243
+ # def arccosh
1244
+ # end
1245
+
1246
+ # def arctanh
1247
+ # end
1248
+
1249
+ # def sinh
1250
+ # end
1251
+
1252
+ # def cosh
1253
+ # end
1254
+
1255
+ # def tanh
1256
+ # end
1257
+
1258
+ # def apply
1259
+ # end
1260
+
1261
+ # def shift
1262
+ # end
1263
+
1264
+ # def shift_and_fill
1265
+ # end
1266
+
1267
+ # def zip_with
1268
+ # end
1269
+
1270
+ # def rolling_min
1271
+ # end
1272
+
1273
+ # def rolling_max
1274
+ # end
1275
+
1276
+ # def rolling_mean
1277
+ # end
1278
+
1279
+ # def rolling_sum
1280
+ # end
1281
+
1282
+ # def rolling_std
1283
+ # end
1284
+
1285
+ # def rolling_var
1286
+ # end
1287
+
1288
+ # def rolling_apply
1289
+ # end
1290
+
1291
+ # def rolling_median
1292
+ # end
1293
+
1294
+ # def rolling_quantile
1295
+ # end
1296
+
1297
+ # def rolling_skew
1298
+ # end
1299
+
1300
+ # def sample
1301
+ # end
1302
+
1303
+ # Get a boolean mask of the local maximum peaks.
1304
+ #
1305
+ # @return [Series]
1306
+ #
1307
+ # @example
1308
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
1309
+ # s.peak_max
1310
+ # # =>
1311
+ # # shape: (5,)
1312
+ # # Series: '' [bool]
1313
+ # # [
1314
+ # # false
1315
+ # # false
1316
+ # # false
1317
+ # # false
1318
+ # # true
1319
+ # # ]
1320
+ def peak_max
1321
+ Utils.wrap_s(_s.peak_max)
1322
+ end
1323
+
1324
+ # Get a boolean mask of the local minimum peaks.
1325
+ #
1326
+ # @return [Series]
1327
+ #
1328
+ # @example
1329
+ # s = Polars::Series.new("a", [4, 1, 3, 2, 5])
1330
+ # s.peak_min
1331
+ # # =>
1332
+ # # shape: (5,)
1333
+ # # Series: '' [bool]
1334
+ # # [
1335
+ # # false
1336
+ # # true
1337
+ # # false
1338
+ # # true
1339
+ # # false
1340
+ # # ]
1341
+ def peak_min
1342
+ Utils.wrap_s(_s.peak_min)
1343
+ end
1344
+
1345
+ # Count the number of unique values in this Series.
1346
+ #
1347
+ # @return [Integer]
1348
+ #
1349
+ # @example
1350
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
1351
+ # s.n_unique
1352
+ # # => 3
1353
+ def n_unique
1354
+ _s.n_unique
1355
+ end
1356
+
1357
+ # def shrink_to_fit
1358
+ # end
1359
+
1360
+ # def _hash
1361
+ # end
1362
+
1363
+ # def reinterpret
1364
+ # end
1365
+
1366
+ # def interpolate
1367
+ # end
1368
+
1369
+ # def abs
1370
+ # end
1371
+
1372
+ # def rank
1373
+ # end
1374
+
1375
+ # def diff
1376
+ # end
1377
+
1378
+ # def pct_change
1379
+ # end
1380
+
1381
+ # def skew
1382
+ # end
1383
+
1384
+ # def kurtosis
1385
+ # end
1386
+
1387
+ # def clip
1388
+ # end
1389
+
1390
+ # def clip_min
1391
+ # end
1392
+
1393
+ # def clip_max
1394
+ # end
1395
+
1396
+ # def reshape
1397
+ # end
1398
+
1399
+ # def shuffle
1400
+ # end
1401
+
1402
+ # def ewm_mean
1403
+ # end
1404
+
1405
+ # def ewm_std
1406
+ # end
1407
+
1408
+ # def ewm_var
1409
+ # end
1410
+
1411
+ # def extend_constant
1412
+ # end
1413
+
1414
+ # Flags the Series as sorted.
1415
+ #
1416
+ # Enables downstream code to user fast paths for sorted arrays.
1417
+ #
1418
+ # @param reverse [Boolean]
1419
+ # If the Series order is reversed, e.g. descending.
1420
+ #
1421
+ # @return [Series]
1422
+ #
1423
+ # @note
1424
+ # This can lead to incorrect results if this Series is not sorted!!
1425
+ # Use with care!
1426
+ #
1427
+ # @example
1428
+ # s = Polars::Series.new("a", [1, 2, 3])
1429
+ # s.set_sorted.max
1430
+ # # => 3
1431
+ def set_sorted(reverse: false)
1432
+ Utils.wrap_s(_s.set_sorted(reverse))
1433
+ end
1434
+
1435
+ # def new_from_index
1436
+ # end
1437
+
1438
+ # def shrink_dtype
1439
+ # end
1440
+
1441
+ # def arr
1442
+ # end
1443
+
1444
+ # def cat
1445
+ # end
1446
+
1447
+ # def dt
1448
+ # end
1449
+
1450
+ # def str
1451
+ # end
1452
+
1453
+ # def struct
1454
+ # end
1455
+
192
1456
  private
193
1457
 
1458
+ def initialize_copy(other)
1459
+ super
1460
+ self._s = _s._clone
1461
+ end
1462
+
194
1463
  def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
195
1464
  ruby_dtype = nil
196
1465
 
@@ -205,17 +1474,42 @@ module Polars
205
1474
  end
206
1475
  end
207
1476
 
1477
+ rb_temporal_types = []
1478
+ rb_temporal_types << Date if defined?(Date)
1479
+ rb_temporal_types << DateTime if defined?(DateTime)
1480
+ rb_temporal_types << Time if defined?(Time)
1481
+
208
1482
  # _get_first_non_none
209
1483
  value = values.find { |v| !v.nil? }
210
1484
 
211
- if !dtype.nil? && is_polars_dtype(dtype) && ruby_dtype.nil?
1485
+ if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
212
1486
  constructor = polars_type_to_constructor(dtype)
213
1487
  rbseries = constructor.call(name, values, strict)
214
1488
  return rbseries
215
- end
1489
+ else
1490
+ if ruby_dtype.nil?
1491
+ if value.nil?
1492
+ # generic default dtype
1493
+ ruby_dtype = Float
1494
+ else
1495
+ ruby_dtype = value.class
1496
+ end
1497
+ end
216
1498
 
217
- constructor = rb_type_to_constructor(value.class)
218
- constructor.call(name, values, strict)
1499
+ # temporal branch
1500
+ if rb_temporal_types.include?(ruby_dtype)
1501
+ # if dtype.nil?
1502
+ # dtype = rb_type_to_dtype(ruby_dtype)
1503
+ # elsif rb_temporal_types.include?(dtype)
1504
+ # dtype = rb_type_to_dtype(dtype)
1505
+ # end
1506
+
1507
+ raise Todo
1508
+ else
1509
+ constructor = rb_type_to_constructor(value.class)
1510
+ constructor.call(name, values, strict)
1511
+ end
1512
+ end
219
1513
  end
220
1514
 
221
1515
  POLARS_TYPE_TO_CONSTRUCTOR = {
@@ -253,9 +1547,5 @@ module Polars
253
1547
  # RbSeries.method(:new_object)
254
1548
  raise ArgumentError, "Cannot determine type"
255
1549
  end
256
-
257
- def is_polars_dtype(data_type)
258
- true
259
- end
260
1550
  end
261
1551
  end