polars-df 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ module Polars
2
+ class DateTimeExpr
3
+ attr_accessor :_rbexpr
4
+
5
+ def initialize(expr)
6
+ self._rbexpr = expr._rbexpr
7
+ end
8
+
9
+ # def truncate
10
+ # end
11
+
12
+ # def round
13
+ # end
14
+
15
+ def strftime(fmt)
16
+ Utils.wrap_expr(_rbexpr.strftime(fmt))
17
+ end
18
+
19
+ def year
20
+ Utils.wrap_expr(_rbexpr.year)
21
+ end
22
+
23
+ def iso_year
24
+ Utils.wrap_expr(_rbexpr.iso_year)
25
+ end
26
+
27
+ def quarter
28
+ Utils.wrap_expr(_rbexpr.quarter)
29
+ end
30
+
31
+ def month
32
+ Utils.wrap_expr(_rbexpr.month)
33
+ end
34
+
35
+ def week
36
+ Utils.wrap_expr(_rbexpr.week)
37
+ end
38
+
39
+ def weekday
40
+ Utils.wrap_expr(_rbexpr.weekday)
41
+ end
42
+
43
+ def day
44
+ Utils.wrap_expr(_rbexpr.day)
45
+ end
46
+
47
+ def ordinal_day
48
+ Utils.wrap_expr(_rbexpr.ordinal_day)
49
+ end
50
+
51
+ def hour
52
+ Utils.wrap_expr(_rbexpr.hour)
53
+ end
54
+
55
+ def minute
56
+ Utils.wrap_expr(_rbexpr.minute)
57
+ end
58
+
59
+ def second
60
+ Utils.wrap_expr(_rbexpr.second)
61
+ end
62
+
63
+ def millisecond
64
+ Utils.wrap_expr(_rbexpr.millisecond)
65
+ end
66
+
67
+ def microsecond
68
+ Utils.wrap_expr(_rbexpr.microsecond)
69
+ end
70
+
71
+ def nanosecond
72
+ Utils.wrap_expr(_rbexpr.nanosecond)
73
+ end
74
+
75
+ def epoch(tu = "us")
76
+ if Utils::DTYPE_TEMPORAL_UNITS.include?(tu)
77
+ timestamp(tu)
78
+ elsif tu == "s"
79
+ Utils.wrap_expr(_rbexpr.dt_epoch_seconds)
80
+ elsif tu == "d"
81
+ Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
82
+ else
83
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got {tu}"
84
+ end
85
+ end
86
+
87
+ def timestamp(tu = "us")
88
+ Utils.wrap_expr(_rbexpr.timestamp(tu))
89
+ end
90
+
91
+ def with_time_unit(tu)
92
+ Utils.wrap_expr(_rbexpr.dt_with_time_unit(tu))
93
+ end
94
+
95
+ def cast_time_unit(tu)
96
+ Utils.wrap_expr(_rbexpr.dt_cast_time_unit(tu))
97
+ end
98
+
99
+ def with_time_zone(tz)
100
+ Utils.wrap_expr(_rbexpr.dt_with_time_zone(tz))
101
+ end
102
+
103
+ def cast_time_zone(tz)
104
+ Utils.wrap_expr(_rbexpr.dt_cast_time_zone(tz))
105
+ end
106
+
107
+ def tz_localize(tz)
108
+ Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
109
+ end
110
+
111
+ def days
112
+ Utils.wrap_expr(_rbexpr.duration_days)
113
+ end
114
+
115
+ def hours
116
+ Utils.wrap_expr(_rbexpr.duration_hours)
117
+ end
118
+
119
+ def minutes
120
+ Utils.wrap_expr(_rbexpr.duration_minutes)
121
+ end
122
+
123
+ def seconds
124
+ Utils.wrap_expr(_rbexpr.duration_seconds)
125
+ end
126
+
127
+ def milliseconds
128
+ Utils.wrap_expr(_rbexpr.duration_milliseconds)
129
+ end
130
+
131
+ def microseconds
132
+ Utils.wrap_expr(_rbexpr.duration_microseconds)
133
+ end
134
+
135
+ def nanoseconds
136
+ Utils.wrap_expr(_rbexpr.duration_nanoseconds)
137
+ end
138
+
139
+ def offset_by(by)
140
+ Utils.wrap_expr(_rbexpr.dt_offset_by(by))
141
+ end
142
+ end
143
+ end
data/lib/polars/expr.rb CHANGED
@@ -25,10 +25,30 @@ module Polars
25
25
  wrap_expr(_rbexpr._or(_to_rbexpr(other)))
26
26
  end
27
27
 
28
+ def +(other)
29
+ wrap_expr(_rbexpr + _to_rbexpr(other))
30
+ end
31
+
32
+ def -(other)
33
+ wrap_expr(_rbexpr - _to_rbexpr(other))
34
+ end
35
+
28
36
  def *(other)
29
37
  wrap_expr(_rbexpr * _to_rbexpr(other))
30
38
  end
31
39
 
40
+ def /(other)
41
+ wrap_expr(_rbexpr / _to_rbexpr(other))
42
+ end
43
+
44
+ def %(other)
45
+ wrap_expr(_rbexpr % _to_rbexpr(other))
46
+ end
47
+
48
+ def **(power)
49
+ pow(power)
50
+ end
51
+
32
52
  def >=(other)
33
53
  wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
34
54
  end
@@ -53,14 +73,73 @@ module Polars
53
73
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
54
74
  end
55
75
 
76
+ def -@
77
+ Utils.lit(0) - self
78
+ end
79
+
80
+ # def to_physical
81
+ # end
82
+
83
+ def any
84
+ wrap_expr(_rbexpr.any)
85
+ end
86
+
87
+ def all
88
+ wrap_expr(_rbexpr.all)
89
+ end
90
+
91
+ def sqrt
92
+ self ** 0.5
93
+ end
94
+
95
+ def log10
96
+ log(10)
97
+ end
98
+
99
+ def exp
100
+ wrap_expr(_rbexpr.exp)
101
+ end
102
+
56
103
  def alias(name)
57
104
  wrap_expr(_rbexpr._alias(name))
58
105
  end
59
106
 
107
+ # TODO support symbols
108
+ def exclude(columns)
109
+ if columns.is_a?(String)
110
+ columns = [columns]
111
+ return wrap_expr(_rbexpr.exclude(columns))
112
+ elsif !columns.is_a?(Array)
113
+ columns = [columns]
114
+ return wrap_expr(_rbexpr.exclude_dtype(columns))
115
+ end
116
+
117
+ if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
118
+ raise ArgumentError, "input should be all string or all DataType"
119
+ end
120
+
121
+ if columns[0].is_a?(String)
122
+ wrap_expr(_rbexpr.exclude(columns))
123
+ else
124
+ wrap_expr(_rbexpr.exclude_dtype(columns))
125
+ end
126
+ end
127
+
128
+ def keep_name
129
+ wrap_expr(_rbexpr.keep_name)
130
+ end
131
+
132
+ def prefix(prefix)
133
+ wrap_expr(_rbexpr.prefix(prefix))
134
+ end
135
+
60
136
  def suffix(suffix)
61
137
  wrap_expr(_rbexpr.suffix(suffix))
62
138
  end
63
139
 
140
+ # def map_alias
141
+ # end
142
+
64
143
  def is_not
65
144
  wrap_expr(_rbexpr.is_not)
66
145
  end
@@ -73,6 +152,26 @@ module Polars
73
152
  wrap_expr(_rbexpr.is_not_null)
74
153
  end
75
154
 
155
+ def is_finite
156
+ wrap_expr(_rbexpr.is_finite)
157
+ end
158
+
159
+ def is_infinite
160
+ wrap_expr(_rbexpr.is_infinite)
161
+ end
162
+
163
+ def is_nan
164
+ wrap_expr(_rbexpr.is_nan)
165
+ end
166
+
167
+ def is_not_nan
168
+ wrap_expr(_rbexpr.is_not_nan)
169
+ end
170
+
171
+ def agg_groups
172
+ wrap_expr(_rbexpr.agg_groups)
173
+ end
174
+
76
175
  def count
77
176
  wrap_expr(_rbexpr.count)
78
177
  end
@@ -81,10 +180,104 @@ module Polars
81
180
  count
82
181
  end
83
182
 
183
+ def slice(offset, length = nil)
184
+ if !offset.is_a?(Expr)
185
+ offset = Polars.lit(offset)
186
+ end
187
+ if !length.is_a?(Expr)
188
+ length = Polars.lit(length)
189
+ end
190
+ wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
191
+ end
192
+
193
+ def append(other, upcast: true)
194
+ other = Utils.expr_to_lit_or_expr(other)
195
+ wrap_expr(_rbexpr.append(other._rbexpr, upcast))
196
+ end
197
+
198
+ def rechunk
199
+ wrap_expr(_rbexpr.rechunk)
200
+ end
201
+
202
+ def drop_nulls
203
+ wrap_expr(_rbexpr.drop_nulls)
204
+ end
205
+
206
+ def drop_nans
207
+ wrap_expr(_rbexpr.drop_nans)
208
+ end
209
+
210
+ def cumsum(reverse: false)
211
+ wrap_expr(_rbexpr.cumsum(reverse))
212
+ end
213
+
214
+ def cumprod(reverse: false)
215
+ wrap_expr(_rbexpr.cumprod(reverse))
216
+ end
217
+
218
+ def cummin(reverse: false)
219
+ wrap_expr(_rbexpr.cummin(reverse))
220
+ end
221
+
222
+ def cummax(reverse: false)
223
+ wrap_expr(_rbexpr.cummax(reverse))
224
+ end
225
+
226
+ def cumcount(reverse: false)
227
+ wrap_expr(_rbexpr.cumcount(reverse))
228
+ end
229
+
230
+ def floor
231
+ wrap_expr(_rbexpr.floor)
232
+ end
233
+
234
+ def ceil
235
+ wrap_expr(_rbexpr.ceil)
236
+ end
237
+
238
+ def round(decimals = 0)
239
+ wrap_expr(_rbexpr.round(decimals))
240
+ end
241
+
242
+ def dot(other)
243
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
244
+ wrap_expr(_rbexpr.dot(other._rbexpr))
245
+ end
246
+
247
+ def mode
248
+ wrap_expr(_rbexpr.mode)
249
+ end
250
+
251
+ def cast(dtype, strict: true)
252
+ dtype = Utils.rb_type_to_dtype(dtype)
253
+ wrap_expr(_rbexpr.cast(dtype, strict))
254
+ end
255
+
84
256
  def sort(reverse: false, nulls_last: false)
85
257
  wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
86
258
  end
87
259
 
260
+ def top_k(k: 5, reverse: false)
261
+ wrap_expr(_rbexpr.top_k(k, reverse))
262
+ end
263
+
264
+ def arg_sort(reverse: false, nulls_last: false)
265
+ wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
266
+ end
267
+
268
+ def arg_max
269
+ wrap_expr(_rbexpr.arg_max)
270
+ end
271
+
272
+ def arg_min
273
+ wrap_expr(_rbexpr.arg_min)
274
+ end
275
+
276
+ def search_sorted(element)
277
+ element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
278
+ wrap_expr(_rbexpr.search_sorted(element._rbexpr))
279
+ end
280
+
88
281
  def sort_by(by, reverse: false)
89
282
  if !by.is_a?(Array)
90
283
  by = [by]
@@ -97,6 +290,18 @@ module Polars
97
290
  wrap_expr(_rbexpr.sort_by(by, reverse))
98
291
  end
99
292
 
293
+ # def take
294
+ # end
295
+
296
+ def shift(periods)
297
+ wrap_expr(_rbexpr.shift(periods))
298
+ end
299
+
300
+ def shift_and_fill(periods, fill_value)
301
+ fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
302
+ wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
303
+ end
304
+
100
305
  def fill_null(value = nil, strategy: nil, limit: nil)
101
306
  if !value.nil? && !strategy.nil?
102
307
  raise ArgumentError, "cannot specify both 'value' and 'strategy'."
@@ -119,6 +324,14 @@ module Polars
119
324
  wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
120
325
  end
121
326
 
327
+ def forward_fill(limit: nil)
328
+ wrap_expr(_rbexpr.forward_fill(limit))
329
+ end
330
+
331
+ def backward_fill(limit: nil)
332
+ wrap_expr(_rbexpr.backward_fill(limit))
333
+ end
334
+
122
335
  def reverse
123
336
  wrap_expr(_rbexpr.reverse)
124
337
  end
@@ -167,6 +380,14 @@ module Polars
167
380
  wrap_expr(_rbexpr.n_unique)
168
381
  end
169
382
 
383
+ def null_count
384
+ wrap_expr(_rbexpr.null_count)
385
+ end
386
+
387
+ def arg_unique
388
+ wrap_expr(_rbexpr.arg_unique)
389
+ end
390
+
170
391
  def unique(maintain_order: false)
171
392
  if maintain_order
172
393
  wrap_expr(_rbexpr.unique_stable)
@@ -188,10 +409,48 @@ module Polars
188
409
  wrap_expr(_rbexpr.over(rbexprs))
189
410
  end
190
411
 
412
+ def is_unique
413
+ wrap_expr(_rbexpr.is_unique)
414
+ end
415
+
416
+ def is_first
417
+ wrap_expr(_rbexpr.is_first)
418
+ end
419
+
420
+ def is_duplicated
421
+ wrap_expr(_rbexpr.is_duplicated)
422
+ end
423
+
424
+ def quantile(quantile, interpolation: "nearest")
425
+ wrap_expr(_rbexpr.quantile(quantile, interpolation))
426
+ end
427
+
191
428
  def filter(predicate)
192
429
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
193
430
  end
194
431
 
432
+ def where(predicate)
433
+ filter(predicate)
434
+ end
435
+
436
+ # def map
437
+ # end
438
+
439
+ # def apply
440
+ # end
441
+
442
+ def flatten
443
+ wrap_expr(_rbexpr.explode)
444
+ end
445
+
446
+ def explode
447
+ wrap_expr(_rbexpr.explode)
448
+ end
449
+
450
+ def take_every(n)
451
+ wrap_expr(_rbexpr.take_every(n))
452
+ end
453
+
195
454
  def head(n = 10)
196
455
  wrap_expr(_rbexpr.head(n))
197
456
  end
@@ -204,18 +463,247 @@ module Polars
204
463
  head(n)
205
464
  end
206
465
 
466
+ def pow(exponent)
467
+ exponent = Utils.expr_to_lit_or_expr(exponent)
468
+ wrap_expr(_rbexpr.pow(exponent._rbexpr))
469
+ end
470
+
471
+ # def is_in
472
+ # end
473
+
474
+ def repeat_by(by)
475
+ by = Utils.expr_to_lit_or_expr(by, false)
476
+ wrap_expr(_rbexpr.repeat_by(by._rbexpr))
477
+ end
478
+
479
+ # def is_between
480
+ # end
481
+
482
+ # def _hash
483
+ # end
484
+
485
+ def reinterpret(signed: false)
486
+ wrap_expr(_rbexpr.reinterpret(signed))
487
+ end
488
+
489
+ # def _inspect
490
+ # end
491
+
207
492
  def interpolate
208
493
  wrap_expr(_rbexpr.interpolate)
209
494
  end
210
495
 
496
+ # def rolling_min
497
+ # end
498
+
499
+ # def rolling_max
500
+ # end
501
+
502
+ # def rolling_mean
503
+ # end
504
+
505
+ # def rolling_sum
506
+ # end
507
+
508
+ # def rolling_std
509
+ # end
510
+
511
+ # def rolling_var
512
+ # end
513
+
514
+ # def rolling_median
515
+ # end
516
+
517
+ # def rolling_quantile
518
+ # end
519
+
520
+ # def rolling_apply
521
+ # end
522
+
523
+ def rolling_skew(window_size, bias: true)
524
+ wrap_expr(_rbexpr.rolling_skew(window_size, bias))
525
+ end
526
+
527
+ def abs
528
+ wrap_expr(_rbexpr.abs)
529
+ end
530
+
531
+ def argsort(reverse: false, nulls_last: false)
532
+ arg_sort(reverse: reverse, nulls_last: nulls_last)
533
+ end
534
+
535
+ def rank(method: "average", reverse: false)
536
+ wrap_expr(_rbexpr.rank(method, reverse))
537
+ end
538
+
539
+ def diff(n: 1, null_behavior: "ignore")
540
+ wrap_expr(_rbexpr.diff(n, null_behavior))
541
+ end
542
+
543
+ def pct_change(n: 1)
544
+ wrap_expr(_rbexpr.pct_change(n))
545
+ end
546
+
547
+ def skew(bias: true)
548
+ wrap_expr(_rbexpr.skew(bias))
549
+ end
550
+
551
+ def kurtosis(fisher: true, bias: true)
552
+ wrap_expr(_rbexpr.kurtosis(fisher, bias))
553
+ end
554
+
555
+ def clip(min_val, max_val)
556
+ wrap_expr(_rbexpr.clip(min_val, max_val))
557
+ end
558
+
559
+ def clip_min(min_val)
560
+ wrap_expr(_rbexpr.clip_min(min_val))
561
+ end
562
+
563
+ def clip_max(max_val)
564
+ wrap_expr(_rbexpr.clip_max(max_val))
565
+ end
566
+
567
+ def lower_bound
568
+ wrap_expr(_rbexpr.lower_bound)
569
+ end
570
+
571
+ def upper_bound
572
+ wrap_expr(_rbexpr.upper_bound)
573
+ end
574
+
575
+ def sign
576
+ wrap_expr(_rbexpr.sign)
577
+ end
578
+
579
+ def sin
580
+ wrap_expr(_rbexpr.sin)
581
+ end
582
+
583
+ def cos
584
+ wrap_expr(_rbexpr.cos)
585
+ end
586
+
587
+ def tan
588
+ wrap_expr(_rbexpr.tan)
589
+ end
590
+
591
+ def arcsin
592
+ wrap_expr(_rbexpr.arcsin)
593
+ end
594
+
595
+ def arccos
596
+ wrap_expr(_rbexpr.arccos)
597
+ end
598
+
599
+ def arctan
600
+ wrap_expr(_rbexpr.arctan)
601
+ end
602
+
603
+ def sinh
604
+ wrap_expr(_rbexpr.sinh)
605
+ end
606
+
607
+ def cosh
608
+ wrap_expr(_rbexpr.cosh)
609
+ end
610
+
611
+ def tanh
612
+ wrap_expr(_rbexpr.tanh)
613
+ end
614
+
615
+ def arcsinh
616
+ wrap_expr(_rbexpr.arcsinh)
617
+ end
618
+
619
+ def arccosh
620
+ wrap_expr(_rbexpr.arccosh)
621
+ end
622
+
623
+ def arctanh
624
+ wrap_expr(_rbexpr.arctanh)
625
+ end
626
+
627
+ def reshape(dims)
628
+ wrap_expr(_rbexpr.reshape(dims))
629
+ end
630
+
631
+ def shuffle(seed: nil)
632
+ if seed.nil?
633
+ seed = rand(10000)
634
+ end
635
+ wrap_expr(_rbexpr.shuffle(seed))
636
+ end
637
+
638
+ # def sample
639
+ # end
640
+
641
+ # def ewm_mean
642
+ # end
643
+
644
+ # def ewm_std
645
+ # end
646
+
647
+ # def ewm_var
648
+ # end
649
+
650
+ # def extend_constant
651
+ # end
652
+
653
+ def value_counts(multithreaded: false, sort: false)
654
+ wrap_expr(_rbexpr.value_counts(multithreaded, sort))
655
+ end
656
+
657
+ def unique_counts
658
+ wrap_expr(_rbexpr.unique_counts)
659
+ end
660
+
661
+ def log(base = Math::E)
662
+ wrap_expr(self._rbexpr.log(base))
663
+ end
664
+
665
+ def entropy(base: 2, normalize: false)
666
+ wrap_expr(_rbexpr.entropy(base, normalize))
667
+ end
668
+
669
+ # def cumulative_eval
670
+ # end
671
+
672
+ # def set_sorted
673
+ # end
674
+
211
675
  def list
212
676
  wrap_expr(_rbexpr.list)
213
677
  end
214
678
 
679
+ def shrink_dtype
680
+ wrap_expr(_rbexpr.shrink_dtype)
681
+ end
682
+
683
+ def arr
684
+ ListExpr.new(self)
685
+ end
686
+
687
+ def cat
688
+ CatExpr.new(self)
689
+ end
690
+
691
+ def dt
692
+ DateTimeExpr.new(self)
693
+ end
694
+
695
+ def meta
696
+ MetaExpr.new(self)
697
+ end
698
+
215
699
  def str
216
700
  StringExpr.new(self)
217
701
  end
218
702
 
703
+ def struct
704
+ StructExpr.new(self)
705
+ end
706
+
219
707
  private
220
708
 
221
709
  def wrap_expr(expr)