polars-df 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/series.rb CHANGED
@@ -45,6 +45,17 @@ module Polars
45
45
  _s.dtype.to_sym
46
46
  end
47
47
 
48
+ def flags
49
+ {
50
+ "SORTED_ASC" => _s.is_sorted_flag,
51
+ "SORTED_DESC" => _s.is_sorted_reverse_flag
52
+ }
53
+ end
54
+
55
+ def inner_dtype
56
+ _s.inner_dtype&.to_sym
57
+ end
58
+
48
59
  def name
49
60
  _s.name
50
61
  end
@@ -53,6 +64,9 @@ module Polars
53
64
  [_s.len]
54
65
  end
55
66
 
67
+ # def time_unit
68
+ # end
69
+
56
70
  def to_s
57
71
  _s.to_s
58
72
  end
@@ -70,6 +84,24 @@ module Polars
70
84
  Utils.wrap_s(_s.bitxor(other._s))
71
85
  end
72
86
 
87
+ # def ==(other)
88
+ # end
89
+
90
+ # def !=(other)
91
+ # end
92
+
93
+ # def >(other)
94
+ # end
95
+
96
+ # def <(other)
97
+ # end
98
+
99
+ # def >=(other)
100
+ # end
101
+
102
+ # def <=(other)
103
+ # end
104
+
73
105
  def +(other)
74
106
  Utils. wrap_s(_s.add(other._s))
75
107
  end
@@ -86,10 +118,62 @@ module Polars
86
118
  Utils.wrap_s(_s.div(other._s))
87
119
  end
88
120
 
121
+ def **(power)
122
+ # if is_datelike
123
+ # raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
124
+ # end
125
+ to_frame.select(Polars.col(name).pow(power)).to_series
126
+ end
127
+
128
+ # def -@(other)
129
+ # end
130
+
131
+ def [](item)
132
+ _s.get_idx(item)
133
+ end
134
+
135
+ # def []=(key, value)
136
+ # end
137
+
138
+ def estimated_size(unit = "b")
139
+ sz = _s.estimated_size
140
+ Utils.scale_bytes(sz, to: unit)
141
+ end
142
+
143
+ def sqrt
144
+ self ** 0.5
145
+ end
146
+
147
+ def any
148
+ to_frame.select(Polars.col(name).any).to_series[0]
149
+ end
150
+
151
+ def all
152
+ to_frame.select(Polars.col(name).all).to_series[0]
153
+ end
154
+
155
+ # def log
156
+ # end
157
+
158
+ # def log10
159
+ # end
160
+
161
+ # def exp
162
+ # end
163
+
164
+ # def drop_nulls
165
+ # end
166
+
167
+ # def drop_nans
168
+ # end
169
+
89
170
  def to_frame
90
171
  Utils.wrap_df(RbDataFrame.new([_s]))
91
172
  end
92
173
 
174
+ # def describe
175
+ # end
176
+
93
177
  def sum
94
178
  _s.sum
95
179
  end
@@ -98,6 +182,10 @@ module Polars
98
182
  _s.mean
99
183
  end
100
184
 
185
+ def product
186
+ to_frame.select(Polars.col(name).product).to_series[0]
187
+ end
188
+
101
189
  def min
102
190
  _s.min
103
191
  end
@@ -106,6 +194,53 @@ module Polars
106
194
  _s.max
107
195
  end
108
196
 
197
+ # def nan_max
198
+ # end
199
+
200
+ # def nan_min
201
+ # end
202
+
203
+ def std(ddof: 1)
204
+ if !is_numeric
205
+ nil
206
+ else
207
+ to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0]
208
+ end
209
+ end
210
+
211
+ def var(ddof: 1)
212
+ if !is_numeric
213
+ nil
214
+ else
215
+ to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0]
216
+ end
217
+ end
218
+
219
+ def median
220
+ _s.median
221
+ end
222
+
223
+ def quantile(quantile, interpolation: "nearest")
224
+ _s.quantile(quantile, interpolation)
225
+ end
226
+
227
+ def to_dummies
228
+ Utils.wrap_df(_s.to_dummies)
229
+ end
230
+
231
+ def value_counts(sort: false)
232
+ Utils.wrap_df(_s.value_counts(sort))
233
+ end
234
+
235
+ # def unique_counts
236
+ # end
237
+
238
+ # def entropy
239
+ # end
240
+
241
+ # def cumulative_eval
242
+ # end
243
+
109
244
  def alias(name)
110
245
  s = dup
111
246
  s._s.rename(name)
@@ -141,8 +276,12 @@ module Polars
141
276
  Utils.wrap_s(_s.cummax(reverse))
142
277
  end
143
278
 
279
+ def cumprod(reverse: false)
280
+ Utils.wrap_s(_s.cumprod(reverse))
281
+ end
282
+
144
283
  def limit(n = 10)
145
- to_frame().select(Utils.col(name).limit(n)).to_series
284
+ to_frame.select(Utils.col(name).limit(n)).to_series
146
285
  end
147
286
 
148
287
  def slice(offset, length = nil)
@@ -167,6 +306,9 @@ module Polars
167
306
  to_frame.select(Utils.col(name).tail(n)).to_series
168
307
  end
169
308
 
309
+ # def take_every
310
+ # end
311
+
170
312
  def sort(reverse: false, in_place: false)
171
313
  if in_place
172
314
  self._s = _s.sort(reverse)
@@ -176,21 +318,360 @@ module Polars
176
318
  end
177
319
  end
178
320
 
179
- def to_a
180
- _s.to_a
321
+ # def top_k
322
+ # end
323
+
324
+ # def arg_sort
325
+ # end
326
+
327
+ # def argsort
328
+ # end
329
+
330
+ # def arg_unique
331
+ # end
332
+
333
+ def arg_min
334
+ _s.arg_min
335
+ end
336
+
337
+ def arg_max
338
+ _s.arg_max
339
+ end
340
+
341
+ # def search_sorted
342
+ # end
343
+
344
+ # def unique
345
+ # end
346
+
347
+ # def take
348
+ # end
349
+
350
+ def null_count
351
+ _s.null_count
352
+ end
353
+
354
+ def has_validity
355
+ _s.has_validity
356
+ end
357
+
358
+ def is_empty
359
+ len == 0
360
+ end
361
+ alias_method :empty?, :is_empty
362
+
363
+ # def is_null
364
+ # end
365
+
366
+ # def is_not_null
367
+ # end
368
+
369
+ # def is_finite
370
+ # end
371
+
372
+ # def is_infinite
373
+ # end
374
+
375
+ # def is_nan
376
+ # end
377
+
378
+ # def is_not_nan
379
+ # end
380
+
381
+ # def is_in
382
+ # end
383
+
384
+ # def arg_true
385
+ # end
386
+
387
+ # def is_unique
388
+ # end
389
+
390
+ # def is_first
391
+ # end
392
+
393
+ # def is_duplicated
394
+ # end
395
+
396
+ # def explode
397
+ # end
398
+
399
+ def series_equal(other, null_equal: false, strict: false)
400
+ _s.series_equal(other._s, null_equal, strict)
181
401
  end
182
402
 
183
403
  def len
184
404
  _s.len
185
405
  end
186
406
 
407
+ # def cast
408
+ # end
409
+
410
+ # def to_physical
411
+ # end
412
+
413
+ def to_a
414
+ _s.to_a
415
+ end
416
+
187
417
  def rechunk(in_place: false)
188
418
  opt_s = _s.rechunk(in_place)
189
419
  in_place ? self : Utils.wrap_s(opt_s)
190
420
  end
191
421
 
422
+ # def reverse
423
+ # end
424
+
425
+ def is_numeric
426
+ [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
427
+ end
428
+ alias_method :numeric?, :is_numeric
429
+
430
+ # def is_datelike
431
+ # end
432
+
433
+ def is_float
434
+ [:f32, :f64].include?(dtype)
435
+ end
436
+ alias_method :float?, :is_float
437
+
438
+ def is_bool
439
+ dtype == :bool
440
+ end
441
+ alias_method :bool?, :is_bool
442
+
443
+ def is_utf8
444
+ dtype == :str
445
+ end
446
+ alias_method :utf8?, :is_utf8
447
+
448
+ # def view
449
+ # end
450
+
451
+ # def to_numo
452
+ # end
453
+
454
+ # def set
455
+ # end
456
+
457
+ # def set_at_idx
458
+ # end
459
+
460
+ # def cleared
461
+ # end
462
+
463
+ # clone handled by initialize_copy
464
+
465
+ # def fill_nan
466
+ # end
467
+
468
+ # def fill_null
469
+ # end
470
+
471
+ def floor
472
+ Utils.wrap_s(_s.floor)
473
+ end
474
+
475
+ def ceil
476
+ Utils.wrap_s(_s.ceil)
477
+ end
478
+
479
+ # default to 0 like Ruby
480
+ def round(decimals = 0)
481
+ Utils.wrap_s(_s.round(decimals))
482
+ end
483
+
484
+ # def dot
485
+ # end
486
+
487
+ # def mode
488
+ # end
489
+
490
+ # def sign
491
+ # end
492
+
493
+ # def sin
494
+ # end
495
+
496
+ # def cos
497
+ # end
498
+
499
+ # def tan
500
+ # end
501
+
502
+ # def arcsin
503
+ # end
504
+
505
+ # def arccos
506
+ # end
507
+
508
+ # def arctan
509
+ # end
510
+
511
+ # def arcsinh
512
+ # end
513
+
514
+ # def arccosh
515
+ # end
516
+
517
+ # def arctanh
518
+ # end
519
+
520
+ # def sinh
521
+ # end
522
+
523
+ # def cosh
524
+ # end
525
+
526
+ # def tanh
527
+ # end
528
+
529
+ # def apply
530
+ # end
531
+
532
+ # def shift
533
+ # end
534
+
535
+ # def shift_and_fill
536
+ # end
537
+
538
+ # def zip_with
539
+ # end
540
+
541
+ # def rolling_min
542
+ # end
543
+
544
+ # def rolling_max
545
+ # end
546
+
547
+ # def rolling_mean
548
+ # end
549
+
550
+ # def rolling_sum
551
+ # end
552
+
553
+ # def rolling_std
554
+ # end
555
+
556
+ # def rolling_var
557
+ # end
558
+
559
+ # def rolling_apply
560
+ # end
561
+
562
+ # def rolling_median
563
+ # end
564
+
565
+ # def rolling_quantile
566
+ # end
567
+
568
+ # def rolling_skew
569
+ # end
570
+
571
+ # def sample
572
+ # end
573
+
574
+ def peak_max
575
+ Utils.wrap_s(_s.peak_max)
576
+ end
577
+
578
+ def peak_min
579
+ Utils.wrap_s(_s.peak_min)
580
+ end
581
+
582
+ def n_unique
583
+ _s.n_unique
584
+ end
585
+
586
+ # def shrink_to_fit
587
+ # end
588
+
589
+ # def _hash
590
+ # end
591
+
592
+ # def reinterpret
593
+ # end
594
+
595
+ # def interpolate
596
+ # end
597
+
598
+ # def abs
599
+ # end
600
+
601
+ # def rank
602
+ # end
603
+
604
+ # def diff
605
+ # end
606
+
607
+ # def pct_change
608
+ # end
609
+
610
+ # def skew
611
+ # end
612
+
613
+ # def kurtosis
614
+ # end
615
+
616
+ # def clip
617
+ # end
618
+
619
+ # def clip_min
620
+ # end
621
+
622
+ # def clip_max
623
+ # end
624
+
625
+ # def reshape
626
+ # end
627
+
628
+ # def shuffle
629
+ # end
630
+
631
+ # def ewm_mean
632
+ # end
633
+
634
+ # def ewm_std
635
+ # end
636
+
637
+ # def ewm_var
638
+ # end
639
+
640
+ # def extend_constant
641
+ # end
642
+
643
+ def set_sorted(reverse: false)
644
+ Utils.wrap_s(_s.set_sorted(reverse))
645
+ end
646
+
647
+ # def new_from_index
648
+ # end
649
+
650
+ # def shrink_dtype
651
+ # end
652
+
653
+ # def arr
654
+ # end
655
+
656
+ # def cat
657
+ # end
658
+
659
+ # def dt
660
+ # end
661
+
662
+ # def str
663
+ # end
664
+
665
+ # def struct
666
+ # end
667
+
192
668
  private
193
669
 
670
+ def initialize_copy(other)
671
+ super
672
+ self._s = _s._clone
673
+ end
674
+
194
675
  def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
195
676
  ruby_dtype = nil
196
677
 
@@ -205,17 +686,42 @@ module Polars
205
686
  end
206
687
  end
207
688
 
689
+ rb_temporal_types = []
690
+ rb_temporal_types << Date if defined?(Date)
691
+ rb_temporal_types << DateTime if defined?(DateTime)
692
+ rb_temporal_types << Time if defined?(Time)
693
+
208
694
  # _get_first_non_none
209
695
  value = values.find { |v| !v.nil? }
210
696
 
211
- if !dtype.nil? && is_polars_dtype(dtype) && ruby_dtype.nil?
697
+ if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
212
698
  constructor = polars_type_to_constructor(dtype)
213
699
  rbseries = constructor.call(name, values, strict)
214
700
  return rbseries
215
- end
701
+ else
702
+ if ruby_dtype.nil?
703
+ if value.nil?
704
+ # generic default dtype
705
+ ruby_dtype = Float
706
+ else
707
+ ruby_dtype = value.class
708
+ end
709
+ end
216
710
 
217
- constructor = rb_type_to_constructor(value.class)
218
- constructor.call(name, values, strict)
711
+ # temporal branch
712
+ if rb_temporal_types.include?(ruby_dtype)
713
+ # if dtype.nil?
714
+ # dtype = rb_type_to_dtype(ruby_dtype)
715
+ # elsif rb_temporal_types.include?(dtype)
716
+ # dtype = rb_type_to_dtype(dtype)
717
+ # end
718
+
719
+ raise "todo"
720
+ else
721
+ constructor = rb_type_to_constructor(value.class)
722
+ constructor.call(name, values, strict)
723
+ end
724
+ end
219
725
  end
220
726
 
221
727
  POLARS_TYPE_TO_CONSTRUCTOR = {
@@ -253,9 +759,5 @@ module Polars
253
759
  # RbSeries.method(:new_object)
254
760
  raise ArgumentError, "Cannot determine type"
255
761
  end
256
-
257
- def is_polars_dtype(data_type)
258
- true
259
- end
260
762
  end
261
763
  end
@@ -6,12 +6,129 @@ module Polars
6
6
  self._rbexpr = expr._rbexpr
7
7
  end
8
8
 
9
+ # def strptime
10
+ # end
11
+
9
12
  def lengths
10
13
  Utils.wrap_expr(_rbexpr.str_lengths)
11
14
  end
12
15
 
16
+ def n_chars
17
+ Utils.wrap_expr(_rbexpr.str_n_chars)
18
+ end
19
+
20
+ def concat(delimiter = "-")
21
+ Utils.wrap_expr(_rbexpr.str_concat(delimiter))
22
+ end
23
+
24
+ def to_uppercase
25
+ Utils.wrap_expr(_rbexpr.str_to_uppercase)
26
+ end
27
+
28
+ def to_lowercase
29
+ Utils.wrap_expr(_rbexpr.str_to_lowercase)
30
+ end
31
+
32
+ def strip(matches = nil)
33
+ if !matches.nil? && matches.length > 1
34
+ raise ArgumentError, "matches should contain a single character"
35
+ end
36
+ Utils.wrap_expr(_rbexpr.str_strip(matches))
37
+ end
38
+
39
+ def lstrip(matches = nil)
40
+ if !matches.nil? && matches.length > 1
41
+ raise ArgumentError, "matches should contain a single character"
42
+ end
43
+ Utils.wrap_expr(_rbexpr.str_lstrip(matches))
44
+ end
45
+
46
+ def rstrip(matches = nil)
47
+ if !matches.nil? && matches.length > 1
48
+ raise ArgumentError, "matches should contain a single character"
49
+ end
50
+ Utils.wrap_expr(_rbexpr.str_rstrip(matches))
51
+ end
52
+
53
+ def zfill(alignment)
54
+ Utils.wrap_expr(_rbexpr.str_zfill(alignment))
55
+ end
56
+
57
+ def ljust(width, fillchar = " ")
58
+ Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
59
+ end
60
+
61
+ def rjust(width, fillchar = " ")
62
+ Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
63
+ end
64
+
13
65
  def contains(pattern, literal: false)
14
66
  Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
15
67
  end
68
+
69
+ def ends_with(sub)
70
+ Utils.wrap_expr(_rbexpr.str_ends_with(sub))
71
+ end
72
+
73
+ def starts_with(sub)
74
+ Utils.wrap_expr(_rbexpr.str_starts_with(sub))
75
+ end
76
+
77
+ # def json_path_match
78
+ # end
79
+
80
+ # def decode
81
+ # end
82
+
83
+ # def encode
84
+ # end
85
+
86
+ def extract(pattern, group_index: 1)
87
+ Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
88
+ end
89
+
90
+ def extract_all(pattern)
91
+ Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
92
+ end
93
+
94
+ def count_match(pattern)
95
+ Utils.wrap_expr(_rbexpr.count_match(pattern))
96
+ end
97
+
98
+ def split(by, inclusive: false)
99
+ if inclusive
100
+ Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
101
+ else
102
+ Utils.wrap_expr(_rbexpr.str_split(by))
103
+ end
104
+ end
105
+
106
+ def split_exact(by, n, inclusive: false)
107
+ if inclusive
108
+ Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
109
+ else
110
+ Utils.wrap_expr(_rbexpr.str_split_exact(by, n))
111
+ end
112
+ end
113
+
114
+ def splitn(by, n)
115
+ Utils.wrap_expr(_rbexpr.str_splitn(by, n))
116
+ end
117
+
118
+ def replace(pattern, literal: false)
119
+ pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
120
+ value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
121
+ Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
122
+ end
123
+
124
+ def replace_all(pattern, literal: false)
125
+ pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
126
+ value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
127
+ Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
128
+ end
129
+
130
+ def slice(offset, length = nil)
131
+ Utils.wrap_expr(_rbexpr.str_slice(offset, length))
132
+ end
16
133
  end
17
134
  end