polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
@@ -43,7 +43,7 @@ module Polars
43
43
  # # ┌─────┬─────┬────────────┐
44
44
  # # │ a ┆ b ┆ rank │
45
45
  # # │ --- ┆ --- ┆ --- │
46
- # # │ i64 ┆ i64 ┆ list[f32] │
46
+ # # │ i64 ┆ i64 ┆ list[f64] │
47
47
  # # ╞═════╪═════╪════════════╡
48
48
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
49
49
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
@@ -107,44 +107,28 @@ module Polars
107
107
  # Get the maximum value.
108
108
  #
109
109
  # @param column [Object]
110
- # Column(s) to be used in aggregation. Will lead to different behavior based on
111
- # the input:
112
- #
113
- # - [String, Series] -> aggregate the maximum value of that column.
114
- # - [Array<Expr>] -> aggregate the maximum value horizontally.
110
+ # Column(s) to be used in aggregation.
115
111
  #
116
112
  # @return [Expr, Object]
117
113
  def max(column)
118
114
  if column.is_a?(Series)
119
115
  column.max
120
- elsif Utils.strlike?(column)
121
- col(column).max
122
116
  else
123
- exprs = Utils.selection_to_rbexpr_list(column)
124
- # TODO
125
- Utils.wrap_expr(_max_exprs(exprs))
117
+ col(column).max
126
118
  end
127
119
  end
128
120
 
129
121
  # Get the minimum value.
130
122
  #
131
123
  # @param column [Object]
132
- # Column(s) to be used in aggregation. Will lead to different behavior based on
133
- # the input:
134
- #
135
- # - [String, Series] -> aggregate the minimum value of that column.
136
- # - [Array<Expr>] -> aggregate the minimum value horizontally.
124
+ # Column(s) to be used in aggregation.
137
125
  #
138
126
  # @return [Expr, Object]
139
127
  def min(column)
140
128
  if column.is_a?(Series)
141
129
  column.min
142
- elsif Utils.strlike?(column)
143
- col(column).min
144
130
  else
145
- exprs = Utils.selection_to_rbexpr_list(column)
146
- # TODO
147
- Utils.wrap_expr(_min_exprs(exprs))
131
+ col(column).min
148
132
  end
149
133
  end
150
134
 
@@ -158,7 +142,7 @@ module Polars
158
142
  col(column.to_s).sum
159
143
  elsif column.is_a?(::Array)
160
144
  exprs = Utils.selection_to_rbexpr_list(column)
161
- Utils.wrap_expr(_sum_exprs(exprs))
145
+ Utils.wrap_expr(_sum_horizontal(exprs))
162
146
  else
163
147
  fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
164
148
  end
@@ -625,16 +609,16 @@ module Polars
625
609
  # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
626
610
  # range size is equal to the length of the DataFrame you are collecting.
627
611
  #
628
- # @param low [Integer, Expr, Series]
612
+ # @param start [Integer, Expr, Series]
629
613
  # Lower bound of range.
630
- # @param high [Integer, Expr, Series]
614
+ # @param stop [Integer, Expr, Series]
631
615
  # Upper bound of range.
632
616
  # @param step [Integer]
633
617
  # Step size of the range.
634
618
  # @param eager [Boolean]
635
619
  # If eager evaluation is `True`, a Series is returned instead of an Expr.
636
620
  # @param dtype [Symbol]
637
- # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
621
+ # Apply an explicit integer dtype to the resulting expression (default is `Int64`).
638
622
  #
639
623
  # @return [Expr, Series]
640
624
  #
@@ -648,35 +632,20 @@ module Polars
648
632
  # # 1
649
633
  # # 2
650
634
  # # ]
651
- #
652
- # @example
653
- # df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]})
654
- # df.select(Polars.arange(Polars.col("a"), Polars.col("b")))
655
- # # =>
656
- # # shape: (2, 1)
657
- # # ┌───────────┐
658
- # # │ arange │
659
- # # │ --- │
660
- # # │ list[i64] │
661
- # # ╞═══════════╡
662
- # # │ [1, 2] │
663
- # # │ [2, 3] │
664
- # # └───────────┘
665
- def arange(low, high, step: 1, eager: false, dtype: nil)
666
- low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
667
- high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
668
- range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
669
-
670
- if !dtype.nil? && !["i64", Int64].include?(dtype)
671
- range_expr = range_expr.cast(dtype)
672
- end
635
+ def int_range(start, stop, step: 1, eager: false, dtype: nil)
636
+ start = Utils.parse_as_expression(start)
637
+ stop = Utils.parse_as_expression(stop)
638
+ dtype ||= Int64
639
+ dtype = dtype.to_s if dtype.is_a?(Symbol)
640
+ result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange")
673
641
 
674
- if !eager
675
- range_expr
676
- else
677
- DataFrame.new.select(range_expr.alias("arange")).to_series
642
+ if eager
643
+ return select(result).to_series
678
644
  end
645
+
646
+ result
679
647
  end
648
+ alias_method :arange, :int_range
680
649
 
681
650
  # Find the indexes that would sort the columns.
682
651
  #
@@ -735,15 +704,22 @@ module Polars
735
704
  # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
736
705
  # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
737
706
  def duration(
707
+ weeks: nil,
738
708
  days: nil,
709
+ hours: nil,
710
+ minutes: nil,
739
711
  seconds: nil,
740
- nanoseconds: nil,
741
- microseconds: nil,
742
712
  milliseconds: nil,
743
- minutes: nil,
744
- hours: nil,
745
- weeks: nil
713
+ microseconds: nil,
714
+ nanoseconds: nil,
715
+ time_unit: "us"
746
716
  )
717
+ if !weeks.nil?
718
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
719
+ end
720
+ if !days.nil?
721
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
722
+ end
747
723
  if !hours.nil?
748
724
  hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
749
725
  end
@@ -762,23 +738,18 @@ module Polars
762
738
  if !nanoseconds.nil?
763
739
  nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
764
740
  end
765
- if !days.nil?
766
- days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
767
- end
768
- if !weeks.nil?
769
- weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
770
- end
771
741
 
772
742
  Utils.wrap_expr(
773
743
  _rb_duration(
744
+ weeks,
774
745
  days,
746
+ hours,
747
+ minutes,
775
748
  seconds,
776
- nanoseconds,
777
- microseconds,
778
749
  milliseconds,
779
- minutes,
780
- hours,
781
- weeks
750
+ microseconds,
751
+ nanoseconds,
752
+ time_unit
782
753
  )
783
754
  )
784
755
  end
@@ -944,7 +915,8 @@ module Polars
944
915
  simplify_expression,
945
916
  slice_pushdown,
946
917
  common_subplan_elimination,
947
- allow_streaming
918
+ allow_streaming,
919
+ false
948
920
  )
949
921
  prepared << ldf
950
922
  end
@@ -1,10 +1,9 @@
1
1
  module Polars
2
- # Created by `df.lazy.groupby("foo")`.
2
+ # Created by `df.lazy.group_by("foo")`.
3
3
  class LazyGroupBy
4
4
  # @private
5
- def initialize(lgb, lazyframe_class)
5
+ def initialize(lgb)
6
6
  @lgb = lgb
7
- @lazyframe_class = lazyframe_class
8
7
  end
9
8
 
10
9
  # Describe the aggregation that need to be done on a group.
@@ -12,7 +11,7 @@ module Polars
12
11
  # @return [LazyFrame]
13
12
  def agg(aggs)
14
13
  rbexprs = Utils.selection_to_rbexpr_list(aggs)
15
- @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
14
+ Utils.wrap_ldf(@lgb.agg(rbexprs))
16
15
  end
17
16
 
18
17
  # Get the first `n` rows of each group.
@@ -29,7 +28,7 @@ module Polars
29
28
  # "nrs" => [1, 2, 3, 4, 5, 6]
30
29
  # }
31
30
  # )
32
- # df.groupby("letters").head(2).sort("letters")
31
+ # df.group_by("letters").head(2).sort("letters")
33
32
  # # =>
34
33
  # # shape: (5, 2)
35
34
  # # ┌─────────┬─────┐
@@ -44,7 +43,7 @@ module Polars
44
43
  # # │ c ┆ 2 │
45
44
  # # └─────────┴─────┘
46
45
  def head(n = 5)
47
- @lazyframe_class._from_rbldf(@lgb.head(n))
46
+ Utils.wrap_ldf(@lgb.head(n))
48
47
  end
49
48
 
50
49
  # Get the last `n` rows of each group.
@@ -61,7 +60,7 @@ module Polars
61
60
  # "nrs" => [1, 2, 3, 4, 5, 6]
62
61
  # }
63
62
  # )
64
- # df.groupby("letters").tail(2).sort("letters")
63
+ # df.group_by("letters").tail(2).sort("letters")
65
64
  # # =>
66
65
  # # shape: (5, 2)
67
66
  # # ┌─────────┬─────┐
@@ -76,7 +75,7 @@ module Polars
76
75
  # # │ c ┆ 4 │
77
76
  # # └─────────┴─────┘
78
77
  def tail(n = 5)
79
- @lazyframe_class._from_rbldf(@lgb.tail(n))
78
+ Utils.wrap_ldf(@lgb.tail(n))
80
79
  end
81
80
 
82
81
  # def apply
@@ -27,8 +27,9 @@ module Polars
27
27
  # # │ 1 │
28
28
  # # └─────┘
29
29
  def lengths
30
- Utils.wrap_expr(_rbexpr.list_lengths)
30
+ Utils.wrap_expr(_rbexpr.list_len)
31
31
  end
32
+ alias_method :len, :lengths
32
33
 
33
34
  # Sum all the lists in the array.
34
35
  #
@@ -379,6 +380,7 @@ module Polars
379
380
  # # │ x y │
380
381
  # # └───────┘
381
382
  def join(separator)
383
+ separator = Utils.parse_as_expression(separator, str_as_lit: true)
382
384
  Utils.wrap_expr(_rbexpr.list_join(separator))
383
385
  end
384
386
 
@@ -457,7 +459,7 @@ module Polars
457
459
 
458
460
  # Shift values by the given period.
459
461
  #
460
- # @param periods [Integer]
462
+ # @param n [Integer]
461
463
  # Number of places to shift (may be negative).
462
464
  #
463
465
  # @return [Expr]
@@ -472,8 +474,9 @@ module Polars
472
474
  # # [null, 1, … 3]
473
475
  # # [null, 10, 2]
474
476
  # # ]
475
- def shift(periods = 1)
476
- Utils.wrap_expr(_rbexpr.list_shift(periods))
477
+ def shift(n = 1)
478
+ n = Utils.parse_as_expression(n)
479
+ Utils.wrap_expr(_rbexpr.list_shift(n))
477
480
  end
478
481
 
479
482
  # Slice every sublist.
@@ -568,9 +571,10 @@ module Polars
568
571
  # # │ 1 │
569
572
  # # │ 0 │
570
573
  # # └────────────────┘
571
- def count_match(element)
572
- Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
574
+ def count_matches(element)
575
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
573
576
  end
577
+ alias_method :count_match, :count_matches
574
578
 
575
579
  # Convert the series of type `List` to a series of type `Struct`.
576
580
  #
@@ -609,7 +613,7 @@ module Polars
609
613
  # Run all expression parallel. Don't activate this blindly.
610
614
  # Parallelism is worth it if there is enough work to do per thread.
611
615
  #
612
- # This likely should not be use in the groupby context, because we already
616
+ # This likely should not be use in the group by context, because we already
613
617
  # parallel execution per group
614
618
  #
615
619
  # @return [Expr]
@@ -624,7 +628,7 @@ module Polars
624
628
  # # ┌─────┬─────┬────────────┐
625
629
  # # │ a ┆ b ┆ rank │
626
630
  # # │ --- ┆ --- ┆ --- │
627
- # # │ i64 ┆ i64 ┆ list[f32] │
631
+ # # │ i64 ┆ i64 ┆ list[f64] │
628
632
  # # ╞═════╪═════╪════════════╡
629
633
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
630
634
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
@@ -315,7 +315,7 @@ module Polars
315
315
  # Run all expression parallel. Don't activate this blindly.
316
316
  # Parallelism is worth it if there is enough work to do per thread.
317
317
  #
318
- # This likely should not be use in the groupby context, because we already
318
+ # This likely should not be use in the group by context, because we already
319
319
  # parallel execution per group
320
320
  #
321
321
  # @return [Series]
@@ -330,7 +330,7 @@ module Polars
330
330
  # # ┌─────┬─────┬────────────┐
331
331
  # # │ a ┆ b ┆ rank │
332
332
  # # │ --- ┆ --- ┆ --- │
333
- # # │ i64 ┆ i64 ┆ list[f32] │
333
+ # # │ i64 ┆ i64 ┆ list[f64] │
334
334
  # # ╞═════╪═════╪════════════╡
335
335
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
336
336
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
@@ -0,0 +1,198 @@
1
+ module Polars
2
+ # Namespace for expressions that operate on expression names.
3
+ class NameExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Keep the original root name of the expression.
13
+ #
14
+ # @note
15
+ # Due to implementation constraints, this method can only be called as the last
16
+ # expression in a chain.
17
+ #
18
+ # @return [Expr]
19
+ #
20
+ # @example Prevent errors due to potential duplicate column names.
21
+ # df = Polars::DataFrame.new(
22
+ # {
23
+ # "a" => [1, 2],
24
+ # "b" => [3, 4]
25
+ # }
26
+ # )
27
+ # df.select((Polars.lit(10) / Polars.all).name.keep)
28
+ # # =>
29
+ # # shape: (2, 2)
30
+ # # ┌──────┬──────────┐
31
+ # # │ a ┆ b │
32
+ # # │ --- ┆ --- │
33
+ # # │ f64 ┆ f64 │
34
+ # # ╞══════╪══════════╡
35
+ # # │ 10.0 ┆ 3.333333 │
36
+ # # │ 5.0 ┆ 2.5 │
37
+ # # └──────┴──────────┘
38
+ #
39
+ # @example Undo an alias operation.
40
+ # df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
41
+ # # =>
42
+ # # shape: (2, 2)
43
+ # # ┌─────┬─────┐
44
+ # # │ a ┆ b │
45
+ # # │ --- ┆ --- │
46
+ # # │ i64 ┆ i64 │
47
+ # # ╞═════╪═════╡
48
+ # # │ 9 ┆ 3 │
49
+ # # │ 18 ┆ 4 │
50
+ # # └─────┴─────┘
51
+ def keep
52
+ Utils.wrap_expr(_rbexpr.name_keep)
53
+ end
54
+
55
+ # Rename the output of an expression by mapping a function over the root name.
56
+ #
57
+ # @return [Expr]
58
+ #
59
+ # @example Remove a common suffix and convert to lower case.
60
+ # df = Polars::DataFrame.new(
61
+ # {
62
+ # "A_reverse" => [3, 2, 1],
63
+ # "B_reverse" => ["z", "y", "x"]
64
+ # }
65
+ # )
66
+ # df.with_columns(
67
+ # Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
68
+ # )
69
+ # # =>
70
+ # # shape: (3, 4)
71
+ # # ┌───────────┬───────────┬─────┬─────┐
72
+ # # │ A_reverse ┆ B_reverse ┆ a ┆ b │
73
+ # # │ --- ┆ --- ┆ --- ┆ --- │
74
+ # # │ i64 ┆ str ┆ i64 ┆ str │
75
+ # # ╞═══════════╪═══════════╪═════╪═════╡
76
+ # # │ 3 ┆ z ┆ 1 ┆ x │
77
+ # # │ 2 ┆ y ┆ 2 ┆ y │
78
+ # # │ 1 ┆ x ┆ 3 ┆ z │
79
+ # # └───────────┴───────────┴─────┴─────┘
80
+ def map(&f)
81
+ Utils.wrap_expr(_rbexpr.name_map(f))
82
+ end
83
+
84
+ # Add a prefix to the root column name of the expression.
85
+ #
86
+ # @param prefix [Object]
87
+ # Prefix to add to the root column name.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [1, 2, 3],
95
+ # "b" => ["x", "y", "z"]
96
+ # }
97
+ # )
98
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
99
+ # # =>
100
+ # # shape: (3, 4)
101
+ # # ┌─────┬─────┬───────────┬───────────┐
102
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
103
+ # # │ --- ┆ --- ┆ --- ┆ --- │
104
+ # # │ i64 ┆ str ┆ i64 ┆ str │
105
+ # # ╞═════╪═════╪═══════════╪═══════════╡
106
+ # # │ 1 ┆ x ┆ 3 ┆ z │
107
+ # # │ 2 ┆ y ┆ 2 ┆ y │
108
+ # # │ 3 ┆ z ┆ 1 ┆ x │
109
+ # # └─────┴─────┴───────────┴───────────┘
110
+ def prefix(prefix)
111
+ Utils.wrap_expr(_rbexpr.name_prefix(prefix))
112
+ end
113
+
114
+ # Add a suffix to the root column name of the expression.
115
+ #
116
+ # @param suffix [Object]
117
+ # Suffix to add to the root column name.
118
+ #
119
+ # @return [Expr]
120
+ #
121
+ # @example
122
+ # df = Polars::DataFrame.new(
123
+ # {
124
+ # "a" => [1, 2, 3],
125
+ # "b" => ["x", "y", "z"]
126
+ # }
127
+ # )
128
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
129
+ # # =>
130
+ # # shape: (3, 4)
131
+ # # ┌─────┬─────┬───────────┬───────────┐
132
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
133
+ # # │ --- ┆ --- ┆ --- ┆ --- │
134
+ # # │ i64 ┆ str ┆ i64 ┆ str │
135
+ # # ╞═════╪═════╪═══════════╪═══════════╡
136
+ # # │ 1 ┆ x ┆ 3 ┆ z │
137
+ # # │ 2 ┆ y ┆ 2 ┆ y │
138
+ # # │ 3 ┆ z ┆ 1 ┆ x │
139
+ # # └─────┴─────┴───────────┴───────────┘
140
+ def suffix(suffix)
141
+ Utils.wrap_expr(_rbexpr.name_suffix(suffix))
142
+ end
143
+
144
+ # Make the root column name lowercase.
145
+ #
146
+ # @return [Expr]
147
+ #
148
+ # @example
149
+ # df = Polars::DataFrame.new(
150
+ # {
151
+ # "ColX" => [1, 2, 3],
152
+ # "ColY" => ["x", "y", "z"],
153
+ # }
154
+ # )
155
+ # df.with_columns(Polars.all.name.to_lowercase)
156
+ # # =>
157
+ # # shape: (3, 4)
158
+ # # ┌──────┬──────┬──────┬──────┐
159
+ # # │ ColX ┆ ColY ┆ colx ┆ coly │
160
+ # # │ --- ┆ --- ┆ --- ┆ --- │
161
+ # # │ i64 ┆ str ┆ i64 ┆ str │
162
+ # # ╞══════╪══════╪══════╪══════╡
163
+ # # │ 1 ┆ x ┆ 1 ┆ x │
164
+ # # │ 2 ┆ y ┆ 2 ┆ y │
165
+ # # │ 3 ┆ z ┆ 3 ┆ z │
166
+ # # └──────┴──────┴──────┴──────┘
167
+ def to_lowercase
168
+ Utils.wrap_expr(_rbexpr.name_to_lowercase)
169
+ end
170
+
171
+ # Make the root column name uppercase.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "ColX" => [1, 2, 3],
179
+ # "ColY" => ["x", "y", "z"]
180
+ # }
181
+ # )
182
+ # df.with_columns(Polars.all.name.to_uppercase)
183
+ # # =>
184
+ # # shape: (3, 4)
185
+ # # ┌──────┬──────┬──────┬──────┐
186
+ # # │ ColX ┆ ColY ┆ COLX ┆ COLY │
187
+ # # │ --- ┆ --- ┆ --- ┆ --- │
188
+ # # │ i64 ┆ str ┆ i64 ┆ str │
189
+ # # ╞══════╪══════╪══════╪══════╡
190
+ # # │ 1 ┆ x ┆ 1 ┆ x │
191
+ # # │ 2 ┆ y ┆ 2 ┆ y │
192
+ # # │ 3 ┆ z ┆ 3 ┆ z │
193
+ # # └──────┴──────┴──────┴──────┘
194
+ def to_uppercase
195
+ Utils.wrap_expr(_rbexpr.name_to_uppercase)
196
+ end
197
+ end
198
+ end
@@ -2,7 +2,7 @@ module Polars
2
2
  # A rolling grouper.
3
3
  #
4
4
  # This has an `.agg` method which will allow you to run all polars expressions in a
5
- # groupby context.
5
+ # group by context.
6
6
  class RollingGroupBy
7
7
  def initialize(
8
8
  df,
@@ -27,7 +27,7 @@ module Polars
27
27
 
28
28
  def agg(aggs)
29
29
  @df.lazy
30
- .groupby_rolling(
30
+ .group_by_rolling(
31
31
  index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
32
32
  )
33
33
  .agg(aggs)