polars-df 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ module Polars
2
+ class DateTimeExpr
3
+ attr_accessor :_rbexpr
4
+
5
+ def initialize(expr)
6
+ self._rbexpr = expr._rbexpr
7
+ end
8
+
9
+ # def truncate
10
+ # end
11
+
12
+ # def round
13
+ # end
14
+
15
+ def strftime(fmt)
16
+ Utils.wrap_expr(_rbexpr.strftime(fmt))
17
+ end
18
+
19
+ def year
20
+ Utils.wrap_expr(_rbexpr.year)
21
+ end
22
+
23
+ def iso_year
24
+ Utils.wrap_expr(_rbexpr.iso_year)
25
+ end
26
+
27
+ def quarter
28
+ Utils.wrap_expr(_rbexpr.quarter)
29
+ end
30
+
31
+ def month
32
+ Utils.wrap_expr(_rbexpr.month)
33
+ end
34
+
35
+ def week
36
+ Utils.wrap_expr(_rbexpr.week)
37
+ end
38
+
39
+ def weekday
40
+ Utils.wrap_expr(_rbexpr.weekday)
41
+ end
42
+
43
+ def day
44
+ Utils.wrap_expr(_rbexpr.day)
45
+ end
46
+
47
+ def ordinal_day
48
+ Utils.wrap_expr(_rbexpr.ordinal_day)
49
+ end
50
+
51
+ def hour
52
+ Utils.wrap_expr(_rbexpr.hour)
53
+ end
54
+
55
+ def minute
56
+ Utils.wrap_expr(_rbexpr.minute)
57
+ end
58
+
59
+ def second
60
+ Utils.wrap_expr(_rbexpr.second)
61
+ end
62
+
63
+ def millisecond
64
+ Utils.wrap_expr(_rbexpr.millisecond)
65
+ end
66
+
67
+ def microsecond
68
+ Utils.wrap_expr(_rbexpr.microsecond)
69
+ end
70
+
71
+ def nanosecond
72
+ Utils.wrap_expr(_rbexpr.nanosecond)
73
+ end
74
+
75
+ def epoch(tu = "us")
76
+ if Utils::DTYPE_TEMPORAL_UNITS.include?(tu)
77
+ timestamp(tu)
78
+ elsif tu == "s"
79
+ Utils.wrap_expr(_rbexpr.dt_epoch_seconds)
80
+ elsif tu == "d"
81
+ Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
82
+ else
83
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got {tu}"
84
+ end
85
+ end
86
+
87
+ def timestamp(tu = "us")
88
+ Utils.wrap_expr(_rbexpr.timestamp(tu))
89
+ end
90
+
91
+ def with_time_unit(tu)
92
+ Utils.wrap_expr(_rbexpr.dt_with_time_unit(tu))
93
+ end
94
+
95
+ def cast_time_unit(tu)
96
+ Utils.wrap_expr(_rbexpr.dt_cast_time_unit(tu))
97
+ end
98
+
99
+ def with_time_zone(tz)
100
+ Utils.wrap_expr(_rbexpr.dt_with_time_zone(tz))
101
+ end
102
+
103
+ def cast_time_zone(tz)
104
+ Utils.wrap_expr(_rbexpr.dt_cast_time_zone(tz))
105
+ end
106
+
107
+ def tz_localize(tz)
108
+ Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
109
+ end
110
+
111
+ def days
112
+ Utils.wrap_expr(_rbexpr.duration_days)
113
+ end
114
+
115
+ def hours
116
+ Utils.wrap_expr(_rbexpr.duration_hours)
117
+ end
118
+
119
+ def minutes
120
+ Utils.wrap_expr(_rbexpr.duration_minutes)
121
+ end
122
+
123
+ def seconds
124
+ Utils.wrap_expr(_rbexpr.duration_seconds)
125
+ end
126
+
127
+ def milliseconds
128
+ Utils.wrap_expr(_rbexpr.duration_milliseconds)
129
+ end
130
+
131
+ def microseconds
132
+ Utils.wrap_expr(_rbexpr.duration_microseconds)
133
+ end
134
+
135
+ def nanoseconds
136
+ Utils.wrap_expr(_rbexpr.duration_nanoseconds)
137
+ end
138
+
139
+ def offset_by(by)
140
+ Utils.wrap_expr(_rbexpr.dt_offset_by(by))
141
+ end
142
+ end
143
+ end
data/lib/polars/expr.rb CHANGED
@@ -1,7 +1,10 @@
1
1
  module Polars
2
+ # Expressions that can be used in various contexts.
2
3
  class Expr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def self._from_rbexpr(rbexpr)
6
9
  expr = Expr.allocate
7
10
  expr._rbexpr = rbexpr
@@ -25,10 +28,30 @@ module Polars
25
28
  wrap_expr(_rbexpr._or(_to_rbexpr(other)))
26
29
  end
27
30
 
31
+ def +(other)
32
+ wrap_expr(_rbexpr + _to_rbexpr(other))
33
+ end
34
+
35
+ def -(other)
36
+ wrap_expr(_rbexpr - _to_rbexpr(other))
37
+ end
38
+
28
39
  def *(other)
29
40
  wrap_expr(_rbexpr * _to_rbexpr(other))
30
41
  end
31
42
 
43
+ def /(other)
44
+ wrap_expr(_rbexpr / _to_rbexpr(other))
45
+ end
46
+
47
+ def %(other)
48
+ wrap_expr(_rbexpr % _to_rbexpr(other))
49
+ end
50
+
51
+ def **(power)
52
+ pow(power)
53
+ end
54
+
32
55
  def >=(other)
33
56
  wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
34
57
  end
@@ -53,14 +76,77 @@ module Polars
53
76
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
54
77
  end
55
78
 
79
+ def -@
80
+ Utils.lit(0) - self
81
+ end
82
+
83
+ # def to_physical
84
+ # end
85
+
86
+ #
87
+ def any
88
+ wrap_expr(_rbexpr.any)
89
+ end
90
+
91
+ def all
92
+ wrap_expr(_rbexpr.all)
93
+ end
94
+
95
+ def sqrt
96
+ self ** 0.5
97
+ end
98
+
99
+ def log10
100
+ log(10)
101
+ end
102
+
103
+ def exp
104
+ wrap_expr(_rbexpr.exp)
105
+ end
106
+
56
107
  def alias(name)
57
108
  wrap_expr(_rbexpr._alias(name))
58
109
  end
59
110
 
111
+ # TODO support symbols for exclude
112
+
113
+ #
114
+ def exclude(columns)
115
+ if columns.is_a?(String)
116
+ columns = [columns]
117
+ return wrap_expr(_rbexpr.exclude(columns))
118
+ elsif !columns.is_a?(Array)
119
+ columns = [columns]
120
+ return wrap_expr(_rbexpr.exclude_dtype(columns))
121
+ end
122
+
123
+ if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
124
+ raise ArgumentError, "input should be all string or all DataType"
125
+ end
126
+
127
+ if columns[0].is_a?(String)
128
+ wrap_expr(_rbexpr.exclude(columns))
129
+ else
130
+ wrap_expr(_rbexpr.exclude_dtype(columns))
131
+ end
132
+ end
133
+
134
+ def keep_name
135
+ wrap_expr(_rbexpr.keep_name)
136
+ end
137
+
138
+ def prefix(prefix)
139
+ wrap_expr(_rbexpr.prefix(prefix))
140
+ end
141
+
60
142
  def suffix(suffix)
61
143
  wrap_expr(_rbexpr.suffix(suffix))
62
144
  end
63
145
 
146
+ # def map_alias
147
+ # end
148
+
149
+ #
64
150
  def is_not
65
151
  wrap_expr(_rbexpr.is_not)
66
152
  end
@@ -73,6 +159,26 @@ module Polars
73
159
  wrap_expr(_rbexpr.is_not_null)
74
160
  end
75
161
 
162
+ def is_finite
163
+ wrap_expr(_rbexpr.is_finite)
164
+ end
165
+
166
+ def is_infinite
167
+ wrap_expr(_rbexpr.is_infinite)
168
+ end
169
+
170
+ def is_nan
171
+ wrap_expr(_rbexpr.is_nan)
172
+ end
173
+
174
+ def is_not_nan
175
+ wrap_expr(_rbexpr.is_not_nan)
176
+ end
177
+
178
+ def agg_groups
179
+ wrap_expr(_rbexpr.agg_groups)
180
+ end
181
+
76
182
  def count
77
183
  wrap_expr(_rbexpr.count)
78
184
  end
@@ -81,10 +187,104 @@ module Polars
81
187
  count
82
188
  end
83
189
 
190
+ def slice(offset, length = nil)
191
+ if !offset.is_a?(Expr)
192
+ offset = Polars.lit(offset)
193
+ end
194
+ if !length.is_a?(Expr)
195
+ length = Polars.lit(length)
196
+ end
197
+ wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
198
+ end
199
+
200
+ def append(other, upcast: true)
201
+ other = Utils.expr_to_lit_or_expr(other)
202
+ wrap_expr(_rbexpr.append(other._rbexpr, upcast))
203
+ end
204
+
205
+ def rechunk
206
+ wrap_expr(_rbexpr.rechunk)
207
+ end
208
+
209
+ def drop_nulls
210
+ wrap_expr(_rbexpr.drop_nulls)
211
+ end
212
+
213
+ def drop_nans
214
+ wrap_expr(_rbexpr.drop_nans)
215
+ end
216
+
217
+ def cumsum(reverse: false)
218
+ wrap_expr(_rbexpr.cumsum(reverse))
219
+ end
220
+
221
+ def cumprod(reverse: false)
222
+ wrap_expr(_rbexpr.cumprod(reverse))
223
+ end
224
+
225
+ def cummin(reverse: false)
226
+ wrap_expr(_rbexpr.cummin(reverse))
227
+ end
228
+
229
+ def cummax(reverse: false)
230
+ wrap_expr(_rbexpr.cummax(reverse))
231
+ end
232
+
233
+ def cumcount(reverse: false)
234
+ wrap_expr(_rbexpr.cumcount(reverse))
235
+ end
236
+
237
+ def floor
238
+ wrap_expr(_rbexpr.floor)
239
+ end
240
+
241
+ def ceil
242
+ wrap_expr(_rbexpr.ceil)
243
+ end
244
+
245
+ def round(decimals = 0)
246
+ wrap_expr(_rbexpr.round(decimals))
247
+ end
248
+
249
+ def dot(other)
250
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
251
+ wrap_expr(_rbexpr.dot(other._rbexpr))
252
+ end
253
+
254
+ def mode
255
+ wrap_expr(_rbexpr.mode)
256
+ end
257
+
258
+ def cast(dtype, strict: true)
259
+ dtype = Utils.rb_type_to_dtype(dtype)
260
+ wrap_expr(_rbexpr.cast(dtype, strict))
261
+ end
262
+
84
263
  def sort(reverse: false, nulls_last: false)
85
264
  wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
86
265
  end
87
266
 
267
+ def top_k(k: 5, reverse: false)
268
+ wrap_expr(_rbexpr.top_k(k, reverse))
269
+ end
270
+
271
+ def arg_sort(reverse: false, nulls_last: false)
272
+ wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
273
+ end
274
+
275
+ def arg_max
276
+ wrap_expr(_rbexpr.arg_max)
277
+ end
278
+
279
+ def arg_min
280
+ wrap_expr(_rbexpr.arg_min)
281
+ end
282
+
283
+ def search_sorted(element)
284
+ element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
285
+ wrap_expr(_rbexpr.search_sorted(element._rbexpr))
286
+ end
287
+
88
288
  def sort_by(by, reverse: false)
89
289
  if !by.is_a?(Array)
90
290
  by = [by]
@@ -97,6 +297,19 @@ module Polars
97
297
  wrap_expr(_rbexpr.sort_by(by, reverse))
98
298
  end
99
299
 
300
+ # def take
301
+ # end
302
+
303
+ #
304
+ def shift(periods = 1)
305
+ wrap_expr(_rbexpr.shift(periods))
306
+ end
307
+
308
+ def shift_and_fill(periods, fill_value)
309
+ fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
310
+ wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
311
+ end
312
+
100
313
  def fill_null(value = nil, strategy: nil, limit: nil)
101
314
  if !value.nil? && !strategy.nil?
102
315
  raise ArgumentError, "cannot specify both 'value' and 'strategy'."
@@ -119,6 +332,14 @@ module Polars
119
332
  wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
120
333
  end
121
334
 
335
+ def forward_fill(limit: nil)
336
+ wrap_expr(_rbexpr.forward_fill(limit))
337
+ end
338
+
339
+ def backward_fill(limit: nil)
340
+ wrap_expr(_rbexpr.backward_fill(limit))
341
+ end
342
+
122
343
  def reverse
123
344
  wrap_expr(_rbexpr.reverse)
124
345
  end
@@ -167,6 +388,14 @@ module Polars
167
388
  wrap_expr(_rbexpr.n_unique)
168
389
  end
169
390
 
391
+ def null_count
392
+ wrap_expr(_rbexpr.null_count)
393
+ end
394
+
395
+ def arg_unique
396
+ wrap_expr(_rbexpr.arg_unique)
397
+ end
398
+
170
399
  def unique(maintain_order: false)
171
400
  if maintain_order
172
401
  wrap_expr(_rbexpr.unique_stable)
@@ -188,10 +417,49 @@ module Polars
188
417
  wrap_expr(_rbexpr.over(rbexprs))
189
418
  end
190
419
 
420
+ def is_unique
421
+ wrap_expr(_rbexpr.is_unique)
422
+ end
423
+
424
+ def is_first
425
+ wrap_expr(_rbexpr.is_first)
426
+ end
427
+
428
+ def is_duplicated
429
+ wrap_expr(_rbexpr.is_duplicated)
430
+ end
431
+
432
+ def quantile(quantile, interpolation: "nearest")
433
+ wrap_expr(_rbexpr.quantile(quantile, interpolation))
434
+ end
435
+
191
436
  def filter(predicate)
192
437
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
193
438
  end
194
439
 
440
+ def where(predicate)
441
+ filter(predicate)
442
+ end
443
+
444
+ # def map
445
+ # end
446
+
447
+ # def apply
448
+ # end
449
+
450
+ #
451
+ def flatten
452
+ wrap_expr(_rbexpr.explode)
453
+ end
454
+
455
+ def explode
456
+ wrap_expr(_rbexpr.explode)
457
+ end
458
+
459
+ def take_every(n)
460
+ wrap_expr(_rbexpr.take_every(n))
461
+ end
462
+
195
463
  def head(n = 10)
196
464
  wrap_expr(_rbexpr.head(n))
197
465
  end
@@ -204,18 +472,253 @@ module Polars
204
472
  head(n)
205
473
  end
206
474
 
475
+ def pow(exponent)
476
+ exponent = Utils.expr_to_lit_or_expr(exponent)
477
+ wrap_expr(_rbexpr.pow(exponent._rbexpr))
478
+ end
479
+
480
+ # def is_in
481
+ # end
482
+
483
+ #
484
+ def repeat_by(by)
485
+ by = Utils.expr_to_lit_or_expr(by, false)
486
+ wrap_expr(_rbexpr.repeat_by(by._rbexpr))
487
+ end
488
+
489
+ # def is_between
490
+ # end
491
+
492
+ # def _hash
493
+ # end
494
+
495
+ #
496
+ def reinterpret(signed: false)
497
+ wrap_expr(_rbexpr.reinterpret(signed))
498
+ end
499
+
500
+ # def _inspect
501
+ # end
502
+
503
+ #
207
504
  def interpolate
208
505
  wrap_expr(_rbexpr.interpolate)
209
506
  end
210
507
 
508
+ # def rolling_min
509
+ # end
510
+
511
+ # def rolling_max
512
+ # end
513
+
514
+ # def rolling_mean
515
+ # end
516
+
517
+ # def rolling_sum
518
+ # end
519
+
520
+ # def rolling_std
521
+ # end
522
+
523
+ # def rolling_var
524
+ # end
525
+
526
+ # def rolling_median
527
+ # end
528
+
529
+ # def rolling_quantile
530
+ # end
531
+
532
+ # def rolling_apply
533
+ # end
534
+
535
+ #
536
+ def rolling_skew(window_size, bias: true)
537
+ wrap_expr(_rbexpr.rolling_skew(window_size, bias))
538
+ end
539
+
540
+ def abs
541
+ wrap_expr(_rbexpr.abs)
542
+ end
543
+
544
+ def argsort(reverse: false, nulls_last: false)
545
+ arg_sort(reverse: reverse, nulls_last: nulls_last)
546
+ end
547
+
548
+ def rank(method: "average", reverse: false)
549
+ wrap_expr(_rbexpr.rank(method, reverse))
550
+ end
551
+
552
+ def diff(n: 1, null_behavior: "ignore")
553
+ wrap_expr(_rbexpr.diff(n, null_behavior))
554
+ end
555
+
556
+ def pct_change(n: 1)
557
+ wrap_expr(_rbexpr.pct_change(n))
558
+ end
559
+
560
+ def skew(bias: true)
561
+ wrap_expr(_rbexpr.skew(bias))
562
+ end
563
+
564
+ def kurtosis(fisher: true, bias: true)
565
+ wrap_expr(_rbexpr.kurtosis(fisher, bias))
566
+ end
567
+
568
+ def clip(min_val, max_val)
569
+ wrap_expr(_rbexpr.clip(min_val, max_val))
570
+ end
571
+
572
+ def clip_min(min_val)
573
+ wrap_expr(_rbexpr.clip_min(min_val))
574
+ end
575
+
576
+ def clip_max(max_val)
577
+ wrap_expr(_rbexpr.clip_max(max_val))
578
+ end
579
+
580
+ def lower_bound
581
+ wrap_expr(_rbexpr.lower_bound)
582
+ end
583
+
584
+ def upper_bound
585
+ wrap_expr(_rbexpr.upper_bound)
586
+ end
587
+
588
+ def sign
589
+ wrap_expr(_rbexpr.sign)
590
+ end
591
+
592
+ def sin
593
+ wrap_expr(_rbexpr.sin)
594
+ end
595
+
596
+ def cos
597
+ wrap_expr(_rbexpr.cos)
598
+ end
599
+
600
+ def tan
601
+ wrap_expr(_rbexpr.tan)
602
+ end
603
+
604
+ def arcsin
605
+ wrap_expr(_rbexpr.arcsin)
606
+ end
607
+
608
+ def arccos
609
+ wrap_expr(_rbexpr.arccos)
610
+ end
611
+
612
+ def arctan
613
+ wrap_expr(_rbexpr.arctan)
614
+ end
615
+
616
+ def sinh
617
+ wrap_expr(_rbexpr.sinh)
618
+ end
619
+
620
+ def cosh
621
+ wrap_expr(_rbexpr.cosh)
622
+ end
623
+
624
+ def tanh
625
+ wrap_expr(_rbexpr.tanh)
626
+ end
627
+
628
+ def arcsinh
629
+ wrap_expr(_rbexpr.arcsinh)
630
+ end
631
+
632
+ def arccosh
633
+ wrap_expr(_rbexpr.arccosh)
634
+ end
635
+
636
+ def arctanh
637
+ wrap_expr(_rbexpr.arctanh)
638
+ end
639
+
640
+ def reshape(dims)
641
+ wrap_expr(_rbexpr.reshape(dims))
642
+ end
643
+
644
+ def shuffle(seed: nil)
645
+ if seed.nil?
646
+ seed = rand(10000)
647
+ end
648
+ wrap_expr(_rbexpr.shuffle(seed))
649
+ end
650
+
651
+ # def sample
652
+ # end
653
+
654
+ # def ewm_mean
655
+ # end
656
+
657
+ # def ewm_std
658
+ # end
659
+
660
+ # def ewm_var
661
+ # end
662
+
663
+ # def extend_constant
664
+ # end
665
+
666
+ #
667
+ def value_counts(multithreaded: false, sort: false)
668
+ wrap_expr(_rbexpr.value_counts(multithreaded, sort))
669
+ end
670
+
671
+ def unique_counts
672
+ wrap_expr(_rbexpr.unique_counts)
673
+ end
674
+
675
+ def log(base = Math::E)
676
+ wrap_expr(self._rbexpr.log(base))
677
+ end
678
+
679
+ def entropy(base: 2, normalize: false)
680
+ wrap_expr(_rbexpr.entropy(base, normalize))
681
+ end
682
+
683
+ # def cumulative_eval
684
+ # end
685
+
686
+ # def set_sorted
687
+ # end
688
+
689
+ #
211
690
  def list
212
691
  wrap_expr(_rbexpr.list)
213
692
  end
214
693
 
694
+ def shrink_dtype
695
+ wrap_expr(_rbexpr.shrink_dtype)
696
+ end
697
+
698
+ def arr
699
+ ListExpr.new(self)
700
+ end
701
+
702
+ def cat
703
+ CatExpr.new(self)
704
+ end
705
+
706
+ def dt
707
+ DateTimeExpr.new(self)
708
+ end
709
+
710
+ def meta
711
+ MetaExpr.new(self)
712
+ end
713
+
215
714
  def str
216
715
  StringExpr.new(self)
217
716
  end
218
717
 
718
+ def struct
719
+ StructExpr.new(self)
720
+ end
721
+
219
722
  private
220
723
 
221
724
  def wrap_expr(expr)