polars-df 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/series.rb CHANGED
@@ -45,6 +45,17 @@ module Polars
45
45
  _s.dtype.to_sym
46
46
  end
47
47
 
48
+ def flags
49
+ {
50
+ "SORTED_ASC" => _s.is_sorted_flag,
51
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
52
+ }
53
+ end
54
+
55
+ def inner_dtype
56
+ _s.inner_dtype&.to_sym
57
+ end
58
+
48
59
  def name
49
60
  _s.name
50
61
  end
@@ -53,6 +64,9 @@ module Polars
53
64
  [_s.len]
54
65
  end
55
66
 
67
+ # def time_unit
68
+ # end
69
+
56
70
  def to_s
57
71
  _s.to_s
58
72
  end
@@ -70,6 +84,24 @@ module Polars
70
84
  Utils.wrap_s(_s.bitxor(other._s))
71
85
  end
72
86
 
87
+ # def ==(other)
88
+ # end
89
+
90
+ # def !=(other)
91
+ # end
92
+
93
+ # def >(other)
94
+ # end
95
+
96
+ # def <(other)
97
+ # end
98
+
99
+ # def >=(other)
100
+ # end
101
+
102
+ # def <=(other)
103
+ # end
104
+
73
105
  def +(other)
74
106
  Utils. wrap_s(_s.add(other._s))
75
107
  end
@@ -86,10 +118,62 @@ module Polars
86
118
  Utils.wrap_s(_s.div(other._s))
87
119
  end
88
120
 
121
+ def **(power)
122
+ # if is_datelike
123
+ # raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
124
+ # end
125
+ to_frame.select(Polars.col(name).pow(power)).to_series
126
+ end
127
+
128
+ # def -@(other)
129
+ # end
130
+
131
+ def [](item)
132
+ _s.get_idx(item)
133
+ end
134
+
135
+ # def []=(key, value)
136
+ # end
137
+
138
+ def estimated_size(unit = "b")
139
+ sz = _s.estimated_size
140
+ Utils.scale_bytes(sz, to: unit)
141
+ end
142
+
143
+ def sqrt
144
+ self ** 0.5
145
+ end
146
+
147
+ def any
148
+ to_frame.select(Polars.col(name).any).to_series[0]
149
+ end
150
+
151
+ def all
152
+ to_frame.select(Polars.col(name).all).to_series[0]
153
+ end
154
+
155
+ # def log
156
+ # end
157
+
158
+ # def log10
159
+ # end
160
+
161
+ # def exp
162
+ # end
163
+
164
+ # def drop_nulls
165
+ # end
166
+
167
+ # def drop_nans
168
+ # end
169
+
89
170
  def to_frame
90
171
  Utils.wrap_df(RbDataFrame.new([_s]))
91
172
  end
92
173
 
174
+ # def describe
175
+ # end
176
+
93
177
  def sum
94
178
  _s.sum
95
179
  end
@@ -98,6 +182,10 @@ module Polars
98
182
  _s.mean
99
183
  end
100
184
 
185
+ def product
186
+ to_frame.select(Polars.col(name).product).to_series[0]
187
+ end
188
+
101
189
  def min
102
190
  _s.min
103
191
  end
@@ -106,6 +194,53 @@ module Polars
106
194
  _s.max
107
195
  end
108
196
 
197
+ # def nan_max
198
+ # end
199
+
200
+ # def nan_min
201
+ # end
202
+
203
+ def std(ddof: 1)
204
+ if !is_numeric
205
+ nil
206
+ else
207
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
208
+ end
209
+ end
210
+
211
+ def var(ddof: 1)
212
+ if !is_numeric
213
+ nil
214
+ else
215
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
216
+ end
217
+ end
218
+
219
+ def median
220
+ _s.median
221
+ end
222
+
223
+ def quantile(quantile, interpolation: "nearest")
224
+ _s.quantile(quantile, interpolation)
225
+ end
226
+
227
+ def to_dummies
228
+ Utils.wrap_df(_s.to_dummies)
229
+ end
230
+
231
+ def value_counts(sort: false)
232
+ Utils.wrap_df(_s.value_counts(sort))
233
+ end
234
+
235
+ # def unique_counts
236
+ # end
237
+
238
+ # def entropy
239
+ # end
240
+
241
+ # def cumulative_eval
242
+ # end
243
+
109
244
  def alias(name)
110
245
  s = dup
111
246
  s._s.rename(name)
@@ -141,8 +276,12 @@ module Polars
141
276
  Utils.wrap_s(_s.cummax(reverse))
142
277
  end
143
278
 
279
+ def cumprod(reverse: false)
280
+ Utils.wrap_s(_s.cumprod(reverse))
281
+ end
282
+
144
283
  def limit(n = 10)
145
- to_frame().select(Utils.col(name).limit(n)).to_series
284
+ to_frame.select(Utils.col(name).limit(n)).to_series
146
285
  end
147
286
 
148
287
  def slice(offset, length = nil)
@@ -167,6 +306,9 @@ module Polars
167
306
  to_frame.select(Utils.col(name).tail(n)).to_series
168
307
  end
169
308
 
309
+ # def take_every
310
+ # end
311
+
170
312
  def sort(reverse: false, in_place: false)
171
313
  if in_place
172
314
  self._s = _s.sort(reverse)
@@ -176,21 +318,360 @@ module Polars
176
318
  end
177
319
  end
178
320
 
179
- def to_a
180
- _s.to_a
321
+ # def top_k
322
+ # end
323
+
324
+ # def arg_sort
325
+ # end
326
+
327
+ # def argsort
328
+ # end
329
+
330
+ # def arg_unique
331
+ # end
332
+
333
+ def arg_min
334
+ _s.arg_min
335
+ end
336
+
337
+ def arg_max
338
+ _s.arg_max
339
+ end
340
+
341
+ # def search_sorted
342
+ # end
343
+
344
+ # def unique
345
+ # end
346
+
347
+ # def take
348
+ # end
349
+
350
+ def null_count
351
+ _s.null_count
352
+ end
353
+
354
+ def has_validity
355
+ _s.has_validity
356
+ end
357
+
358
+ def is_empty
359
+ len == 0
360
+ end
361
+ alias_method :empty?, :is_empty
362
+
363
+ # def is_null
364
+ # end
365
+
366
+ # def is_not_null
367
+ # end
368
+
369
+ # def is_finite
370
+ # end
371
+
372
+ # def is_infinite
373
+ # end
374
+
375
+ # def is_nan
376
+ # end
377
+
378
+ # def is_not_nan
379
+ # end
380
+
381
+ # def is_in
382
+ # end
383
+
384
+ # def arg_true
385
+ # end
386
+
387
+ # def is_unique
388
+ # end
389
+
390
+ # def is_first
391
+ # end
392
+
393
+ # def is_duplicated
394
+ # end
395
+
396
+ # def explode
397
+ # end
398
+
399
+ def series_equal(other, null_equal: false, strict: false)
400
+ _s.series_equal(other._s, null_equal, strict)
181
401
  end
182
402
 
183
403
  def len
184
404
  _s.len
185
405
  end
186
406
 
407
+ # def cast
408
+ # end
409
+
410
+ # def to_physical
411
+ # end
412
+
413
+ def to_a
414
+ _s.to_a
415
+ end
416
+
187
417
  def rechunk(in_place: false)
188
418
  opt_s = _s.rechunk(in_place)
189
419
  in_place ? self : Utils.wrap_s(opt_s)
190
420
  end
191
421
 
422
+ # def reverse
423
+ # end
424
+
425
+ def is_numeric
426
+ [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
427
+ end
428
+ alias_method :numeric?, :is_numeric
429
+
430
+ # def is_datelike
431
+ # end
432
+
433
+ def is_float
434
+ [:f32, :f64].include?(dtype)
435
+ end
436
+ alias_method :float?, :is_float
437
+
438
+ def is_bool
439
+ dtype == :bool
440
+ end
441
+ alias_method :bool?, :is_bool
442
+
443
+ def is_utf8
444
+ dtype == :str
445
+ end
446
+ alias_method :utf8?, :is_utf8
447
+
448
+ # def view
449
+ # end
450
+
451
+ # def to_numo
452
+ # end
453
+
454
+ # def set
455
+ # end
456
+
457
+ # def set_at_idx
458
+ # end
459
+
460
+ # def cleared
461
+ # end
462
+
463
+ # clone handled by initialize_copy
464
+
465
+ # def fill_nan
466
+ # end
467
+
468
+ # def fill_null
469
+ # end
470
+
471
+ def floor
472
+ Utils.wrap_s(_s.floor)
473
+ end
474
+
475
+ def ceil
476
+ Utils.wrap_s(_s.ceil)
477
+ end
478
+
479
+ # default to 0 like Ruby
480
+ def round(decimals = 0)
481
+ Utils.wrap_s(_s.round(decimals))
482
+ end
483
+
484
+ # def dot
485
+ # end
486
+
487
+ # def mode
488
+ # end
489
+
490
+ # def sign
491
+ # end
492
+
493
+ # def sin
494
+ # end
495
+
496
+ # def cos
497
+ # end
498
+
499
+ # def tan
500
+ # end
501
+
502
+ # def arcsin
503
+ # end
504
+
505
+ # def arccos
506
+ # end
507
+
508
+ # def arctan
509
+ # end
510
+
511
+ # def arcsinh
512
+ # end
513
+
514
+ # def arccosh
515
+ # end
516
+
517
+ # def arctanh
518
+ # end
519
+
520
+ # def sinh
521
+ # end
522
+
523
+ # def cosh
524
+ # end
525
+
526
+ # def tanh
527
+ # end
528
+
529
+ # def apply
530
+ # end
531
+
532
+ # def shift
533
+ # end
534
+
535
+ # def shift_and_fill
536
+ # end
537
+
538
+ # def zip_with
539
+ # end
540
+
541
+ # def rolling_min
542
+ # end
543
+
544
+ # def rolling_max
545
+ # end
546
+
547
+ # def rolling_mean
548
+ # end
549
+
550
+ # def rolling_sum
551
+ # end
552
+
553
+ # def rolling_std
554
+ # end
555
+
556
+ # def rolling_var
557
+ # end
558
+
559
+ # def rolling_apply
560
+ # end
561
+
562
+ # def rolling_median
563
+ # end
564
+
565
+ # def rolling_quantile
566
+ # end
567
+
568
+ # def rolling_skew
569
+ # end
570
+
571
+ # def sample
572
+ # end
573
+
574
+ def peak_max
575
+ Utils.wrap_s(_s.peak_max)
576
+ end
577
+
578
+ def peak_min
579
+ Utils.wrap_s(_s.peak_min)
580
+ end
581
+
582
+ def n_unique
583
+ _s.n_unique
584
+ end
585
+
586
+ # def shrink_to_fit
587
+ # end
588
+
589
+ # def _hash
590
+ # end
591
+
592
+ # def reinterpret
593
+ # end
594
+
595
+ # def interpolate
596
+ # end
597
+
598
+ # def abs
599
+ # end
600
+
601
+ # def rank
602
+ # end
603
+
604
+ # def diff
605
+ # end
606
+
607
+ # def pct_change
608
+ # end
609
+
610
+ # def skew
611
+ # end
612
+
613
+ # def kurtosis
614
+ # end
615
+
616
+ # def clip
617
+ # end
618
+
619
+ # def clip_min
620
+ # end
621
+
622
+ # def clip_max
623
+ # end
624
+
625
+ # def reshape
626
+ # end
627
+
628
+ # def shuffle
629
+ # end
630
+
631
+ # def ewm_mean
632
+ # end
633
+
634
+ # def ewm_std
635
+ # end
636
+
637
+ # def ewm_var
638
+ # end
639
+
640
+ # def extend_constant
641
+ # end
642
+
643
+ def set_sorted(reverse: false)
644
+ Utils.wrap_s(_s.set_sorted(reverse))
645
+ end
646
+
647
+ # def new_from_index
648
+ # end
649
+
650
+ # def shrink_dtype
651
+ # end
652
+
653
+ # def arr
654
+ # end
655
+
656
+ # def cat
657
+ # end
658
+
659
+ # def dt
660
+ # end
661
+
662
+ # def str
663
+ # end
664
+
665
+ # def struct
666
+ # end
667
+
192
668
  private
193
669
 
670
+ def initialize_copy(other)
671
+ super
672
+ self._s = _s._clone
673
+ end
674
+
194
675
  def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
195
676
  ruby_dtype = nil
196
677
 
@@ -205,17 +686,42 @@ module Polars
205
686
  end
206
687
  end
207
688
 
689
+ rb_temporal_types = []
690
+ rb_temporal_types << Date if defined?(Date)
691
+ rb_temporal_types << DateTime if defined?(DateTime)
692
+ rb_temporal_types << Time if defined?(Time)
693
+
208
694
  # _get_first_non_none
209
695
  value = values.find { |v| !v.nil? }
210
696
 
211
- if !dtype.nil? && is_polars_dtype(dtype) && ruby_dtype.nil?
697
+ if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
212
698
  constructor = polars_type_to_constructor(dtype)
213
699
  rbseries = constructor.call(name, values, strict)
214
700
  return rbseries
215
- end
701
+ else
702
+ if ruby_dtype.nil?
703
+ if value.nil?
704
+ # generic default dtype
705
+ ruby_dtype = Float
706
+ else
707
+ ruby_dtype = value.class
708
+ end
709
+ end
216
710
 
217
- constructor = rb_type_to_constructor(value.class)
218
- constructor.call(name, values, strict)
711
+ # temporal branch
712
+ if rb_temporal_types.include?(ruby_dtype)
713
+ # if dtype.nil?
714
+ # dtype = rb_type_to_dtype(ruby_dtype)
715
+ # elsif rb_temporal_types.include?(dtype)
716
+ # dtype = rb_type_to_dtype(dtype)
717
+ # end
718
+
719
+ raise "todo"
720
+ else
721
+ constructor = rb_type_to_constructor(value.class)
722
+ constructor.call(name, values, strict)
723
+ end
724
+ end
219
725
  end
220
726
 
221
727
  POLARS_TYPE_TO_CONSTRUCTOR = {
@@ -253,9 +759,5 @@ module Polars
253
759
  # RbSeries.method(:new_object)
254
760
  raise ArgumentError, "Cannot determine type"
255
761
  end
256
-
257
- def is_polars_dtype(data_type)
258
- true
259
- end
260
762
  end
261
763
  end
@@ -6,12 +6,129 @@ module Polars
6
6
  self._rbexpr = expr._rbexpr
7
7
  end
8
8
 
9
+ # def strptime
10
+ # end
11
+
9
12
  def lengths
10
13
  Utils.wrap_expr(_rbexpr.str_lengths)
11
14
  end
12
15
 
16
+ def n_chars
17
+ Utils.wrap_expr(_rbexpr.str_n_chars)
18
+ end
19
+
20
+ def concat(delimiter = "-")
21
+ Utils.wrap_expr(_rbexpr.str_concat(delimiter))
22
+ end
23
+
24
+ def to_uppercase
25
+ Utils.wrap_expr(_rbexpr.str_to_uppercase)
26
+ end
27
+
28
+ def to_lowercase
29
+ Utils.wrap_expr(_rbexpr.str_to_lowercase)
30
+ end
31
+
32
+ def strip(matches = nil)
33
+ if !matches.nil? && matches.length > 1
34
+ raise ArgumentError, "matches should contain a single character"
35
+ end
36
+ Utils.wrap_expr(_rbexpr.str_strip(matches))
37
+ end
38
+
39
+ def lstrip(matches = nil)
40
+ if !matches.nil? && matches.length > 1
41
+ raise ArgumentError, "matches should contain a single character"
42
+ end
43
+ Utils.wrap_expr(_rbexpr.str_lstrip(matches))
44
+ end
45
+
46
+ def rstrip(matches = nil)
47
+ if !matches.nil? && matches.length > 1
48
+ raise ArgumentError, "matches should contain a single character"
49
+ end
50
+ Utils.wrap_expr(_rbexpr.str_rstrip(matches))
51
+ end
52
+
53
+ def zfill(alignment)
54
+ Utils.wrap_expr(_rbexpr.str_zfill(alignment))
55
+ end
56
+
57
+ def ljust(width, fillchar = " ")
58
+ Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
59
+ end
60
+
61
+ def rjust(width, fillchar = " ")
62
+ Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
63
+ end
64
+
13
65
  def contains(pattern, literal: false)
14
66
  Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
15
67
  end
68
+
69
+ def ends_with(sub)
70
+ Utils.wrap_expr(_rbexpr.str_ends_with(sub))
71
+ end
72
+
73
+ def starts_with(sub)
74
+ Utils.wrap_expr(_rbexpr.str_starts_with(sub))
75
+ end
76
+
77
+ # def json_path_match
78
+ # end
79
+
80
+ # def decode
81
+ # end
82
+
83
+ # def encode
84
+ # end
85
+
86
+ def extract(pattern, group_index: 1)
87
+ Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
88
+ end
89
+
90
+ def extract_all(pattern)
91
+ Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
92
+ end
93
+
94
+ def count_match(pattern)
95
+ Utils.wrap_expr(_rbexpr.count_match(pattern))
96
+ end
97
+
98
+ def split(by, inclusive: false)
99
+ if inclusive
100
+ Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
101
+ else
102
+ Utils.wrap_expr(_rbexpr.str_split(by))
103
+ end
104
+ end
105
+
106
+ def split_exact(by, n, inclusive: false)
107
+ if inclusive
108
+ Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
109
+ else
110
+ Utils.wrap_expr(_rbexpr.str_split_exact(by, n))
111
+ end
112
+ end
113
+
114
+ def splitn(by, n)
115
+ Utils.wrap_expr(_rbexpr.str_splitn(by, n))
116
+ end
117
+
118
+ def replace(pattern, literal: false)
119
+ pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
120
+ value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
121
+ Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
122
+ end
123
+
124
+ def replace_all(pattern, literal: false)
125
+ pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
126
+ value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
127
+ Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
128
+ end
129
+
130
+ def slice(offset, length = nil)
131
+ Utils.wrap_expr(_rbexpr.str_slice(offset, length))
132
+ end
16
133
  end
17
134
  end