polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,7 @@ module Polars
43
43
  # # ┌─────┬─────┬────────────┐
44
44
  # # │ a ┆ b ┆ rank │
45
45
  # # │ --- ┆ --- ┆ --- │
46
- # # │ i64 ┆ i64 ┆ list[f32] │
46
+ # # │ i64 ┆ i64 ┆ list[f64] │
47
47
  # # ╞═════╪═════╪════════════╡
48
48
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
49
49
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
@@ -107,44 +107,28 @@ module Polars
107
107
  # Get the maximum value.
108
108
  #
109
109
  # @param column [Object]
110
- # Column(s) to be used in aggregation. Will lead to different behavior based on
111
- # the input:
112
- #
113
- # - [String, Series] -> aggregate the maximum value of that column.
114
- # - [Array<Expr>] -> aggregate the maximum value horizontally.
110
+ # Column(s) to be used in aggregation.
115
111
  #
116
112
  # @return [Expr, Object]
117
113
  def max(column)
118
114
  if column.is_a?(Series)
119
115
  column.max
120
- elsif Utils.strlike?(column)
121
- col(column).max
122
116
  else
123
- exprs = Utils.selection_to_rbexpr_list(column)
124
- # TODO
125
- Utils.wrap_expr(_max_exprs(exprs))
117
+ col(column).max
126
118
  end
127
119
  end
128
120
 
129
121
  # Get the minimum value.
130
122
  #
131
123
  # @param column [Object]
132
- # Column(s) to be used in aggregation. Will lead to different behavior based on
133
- # the input:
134
- #
135
- # - [String, Series] -> aggregate the minimum value of that column.
136
- # - [Array<Expr>] -> aggregate the minimum value horizontally.
124
+ # Column(s) to be used in aggregation.
137
125
  #
138
126
  # @return [Expr, Object]
139
127
  def min(column)
140
128
  if column.is_a?(Series)
141
129
  column.min
142
- elsif Utils.strlike?(column)
143
- col(column).min
144
130
  else
145
- exprs = Utils.selection_to_rbexpr_list(column)
146
- # TODO
147
- Utils.wrap_expr(_min_exprs(exprs))
131
+ col(column).min
148
132
  end
149
133
  end
150
134
 
@@ -158,7 +142,7 @@ module Polars
158
142
  col(column.to_s).sum
159
143
  elsif column.is_a?(::Array)
160
144
  exprs = Utils.selection_to_rbexpr_list(column)
161
- Utils.wrap_expr(_sum_exprs(exprs))
145
+ Utils.wrap_expr(_sum_horizontal(exprs))
162
146
  else
163
147
  fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
164
148
  end
@@ -625,16 +609,16 @@ module Polars
625
609
  # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
626
610
  # range size is equal to the length of the DataFrame you are collecting.
627
611
  #
628
- # @param low [Integer, Expr, Series]
612
+ # @param start [Integer, Expr, Series]
629
613
  # Lower bound of range.
630
- # @param high [Integer, Expr, Series]
614
+ # @param stop [Integer, Expr, Series]
631
615
  # Upper bound of range.
632
616
  # @param step [Integer]
633
617
  # Step size of the range.
634
618
  # @param eager [Boolean]
635
619
  # If eager evaluation is `True`, a Series is returned instead of an Expr.
636
620
  # @param dtype [Symbol]
637
- # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
621
+ # Apply an explicit integer dtype to the resulting expression (default is `Int64`).
638
622
  #
639
623
  # @return [Expr, Series]
640
624
  #
@@ -648,35 +632,20 @@ module Polars
648
632
  # # 1
649
633
  # # 2
650
634
  # # ]
651
- #
652
- # @example
653
- # df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]})
654
- # df.select(Polars.arange(Polars.col("a"), Polars.col("b")))
655
- # # =>
656
- # # shape: (2, 1)
657
- # # ┌───────────┐
658
- # # │ arange │
659
- # # │ --- │
660
- # # │ list[i64] │
661
- # # ╞═══════════╡
662
- # # │ [1, 2] │
663
- # # │ [2, 3] │
664
- # # └───────────┘
665
- def arange(low, high, step: 1, eager: false, dtype: nil)
666
- low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
667
- high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
668
- range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
669
-
670
- if !dtype.nil? && !["i64", Int64].include?(dtype)
671
- range_expr = range_expr.cast(dtype)
672
- end
635
+ def int_range(start, stop, step: 1, eager: false, dtype: nil)
636
+ start = Utils.parse_as_expression(start)
637
+ stop = Utils.parse_as_expression(stop)
638
+ dtype ||= Int64
639
+ dtype = dtype.to_s if dtype.is_a?(Symbol)
640
+ result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange")
673
641
 
674
- if !eager
675
- range_expr
676
- else
677
- DataFrame.new.select(range_expr.alias("arange")).to_series
642
+ if eager
643
+ return select(result).to_series
678
644
  end
645
+
646
+ result
679
647
  end
648
+ alias_method :arange, :int_range
680
649
 
681
650
  # Find the indexes that would sort the columns.
682
651
  #
@@ -735,15 +704,22 @@ module Polars
735
704
  # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
736
705
  # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
737
706
  def duration(
707
+ weeks: nil,
738
708
  days: nil,
709
+ hours: nil,
710
+ minutes: nil,
739
711
  seconds: nil,
740
- nanoseconds: nil,
741
- microseconds: nil,
742
712
  milliseconds: nil,
743
- minutes: nil,
744
- hours: nil,
745
- weeks: nil
713
+ microseconds: nil,
714
+ nanoseconds: nil,
715
+ time_unit: "us"
746
716
  )
717
+ if !weeks.nil?
718
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
719
+ end
720
+ if !days.nil?
721
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
722
+ end
747
723
  if !hours.nil?
748
724
  hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
749
725
  end
@@ -762,23 +738,18 @@ module Polars
762
738
  if !nanoseconds.nil?
763
739
  nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
764
740
  end
765
- if !days.nil?
766
- days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
767
- end
768
- if !weeks.nil?
769
- weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
770
- end
771
741
 
772
742
  Utils.wrap_expr(
773
743
  _rb_duration(
744
+ weeks,
774
745
  days,
746
+ hours,
747
+ minutes,
775
748
  seconds,
776
- nanoseconds,
777
- microseconds,
778
749
  milliseconds,
779
- minutes,
780
- hours,
781
- weeks
750
+ microseconds,
751
+ nanoseconds,
752
+ time_unit
782
753
  )
783
754
  )
784
755
  end
@@ -944,7 +915,8 @@ module Polars
944
915
  simplify_expression,
945
916
  slice_pushdown,
946
917
  common_subplan_elimination,
947
- allow_streaming
918
+ allow_streaming,
919
+ false
948
920
  )
949
921
  prepared << ldf
950
922
  end
@@ -1,10 +1,9 @@
1
1
  module Polars
2
- # Created by `df.lazy.groupby("foo")`.
2
+ # Created by `df.lazy.group_by("foo")`.
3
3
  class LazyGroupBy
4
4
  # @private
5
- def initialize(lgb, lazyframe_class)
5
+ def initialize(lgb)
6
6
  @lgb = lgb
7
- @lazyframe_class = lazyframe_class
8
7
  end
9
8
 
10
9
  # Describe the aggregation that need to be done on a group.
@@ -12,7 +11,7 @@ module Polars
12
11
  # @return [LazyFrame]
13
12
  def agg(aggs)
14
13
  rbexprs = Utils.selection_to_rbexpr_list(aggs)
15
- @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
14
+ Utils.wrap_ldf(@lgb.agg(rbexprs))
16
15
  end
17
16
 
18
17
  # Get the first `n` rows of each group.
@@ -29,7 +28,7 @@ module Polars
29
28
  # "nrs" => [1, 2, 3, 4, 5, 6]
30
29
  # }
31
30
  # )
32
- # df.groupby("letters").head(2).sort("letters")
31
+ # df.group_by("letters").head(2).sort("letters")
33
32
  # # =>
34
33
  # # shape: (5, 2)
35
34
  # # ┌─────────┬─────┐
@@ -44,7 +43,7 @@ module Polars
44
43
  # # │ c ┆ 2 │
45
44
  # # └─────────┴─────┘
46
45
  def head(n = 5)
47
- @lazyframe_class._from_rbldf(@lgb.head(n))
46
+ Utils.wrap_ldf(@lgb.head(n))
48
47
  end
49
48
 
50
49
  # Get the last `n` rows of each group.
@@ -61,7 +60,7 @@ module Polars
61
60
  # "nrs" => [1, 2, 3, 4, 5, 6]
62
61
  # }
63
62
  # )
64
- # df.groupby("letters").tail(2).sort("letters")
63
+ # df.group_by("letters").tail(2).sort("letters")
65
64
  # # =>
66
65
  # # shape: (5, 2)
67
66
  # # ┌─────────┬─────┐
@@ -76,7 +75,7 @@ module Polars
76
75
  # # │ c ┆ 4 │
77
76
  # # └─────────┴─────┘
78
77
  def tail(n = 5)
79
- @lazyframe_class._from_rbldf(@lgb.tail(n))
78
+ Utils.wrap_ldf(@lgb.tail(n))
80
79
  end
81
80
 
82
81
  # def apply
@@ -27,8 +27,9 @@ module Polars
27
27
  # # │ 1 │
28
28
  # # └─────┘
29
29
  def lengths
30
- Utils.wrap_expr(_rbexpr.list_lengths)
30
+ Utils.wrap_expr(_rbexpr.list_len)
31
31
  end
32
+ alias_method :len, :lengths
32
33
 
33
34
  # Sum all the lists in the array.
34
35
  #
@@ -379,6 +380,7 @@ module Polars
379
380
  # # │ x y │
380
381
  # # └───────┘
381
382
  def join(separator)
383
+ separator = Utils.parse_as_expression(separator, str_as_lit: true)
382
384
  Utils.wrap_expr(_rbexpr.list_join(separator))
383
385
  end
384
386
 
@@ -457,7 +459,7 @@ module Polars
457
459
 
458
460
  # Shift values by the given period.
459
461
  #
460
- # @param periods [Integer]
462
+ # @param n [Integer]
461
463
  # Number of places to shift (may be negative).
462
464
  #
463
465
  # @return [Expr]
@@ -472,8 +474,9 @@ module Polars
472
474
  # # [null, 1, … 3]
473
475
  # # [null, 10, 2]
474
476
  # # ]
475
- def shift(periods = 1)
476
- Utils.wrap_expr(_rbexpr.list_shift(periods))
477
+ def shift(n = 1)
478
+ n = Utils.parse_as_expression(n)
479
+ Utils.wrap_expr(_rbexpr.list_shift(n))
477
480
  end
478
481
 
479
482
  # Slice every sublist.
@@ -568,9 +571,10 @@ module Polars
568
571
  # # │ 1 │
569
572
  # # │ 0 │
570
573
  # # └────────────────┘
571
- def count_match(element)
572
- Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
574
+ def count_matches(element)
575
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
573
576
  end
577
+ alias_method :count_match, :count_matches
574
578
 
575
579
  # Convert the series of type `List` to a series of type `Struct`.
576
580
  #
@@ -609,7 +613,7 @@ module Polars
609
613
  # Run all expression parallel. Don't activate this blindly.
610
614
  # Parallelism is worth it if there is enough work to do per thread.
611
615
  #
612
- # This likely should not be use in the groupby context, because we already
616
+ # This likely should not be use in the group by context, because we already
613
617
  # parallel execution per group
614
618
  #
615
619
  # @return [Expr]
@@ -624,7 +628,7 @@ module Polars
624
628
  # # ┌─────┬─────┬────────────┐
625
629
  # # │ a ┆ b ┆ rank │
626
630
  # # │ --- ┆ --- ┆ --- │
627
- # # │ i64 ┆ i64 ┆ list[f32] │
631
+ # # │ i64 ┆ i64 ┆ list[f64] │
628
632
  # # ╞═════╪═════╪════════════╡
629
633
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
630
634
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
@@ -315,7 +315,7 @@ module Polars
315
315
  # Run all expression parallel. Don't activate this blindly.
316
316
  # Parallelism is worth it if there is enough work to do per thread.
317
317
  #
318
- # This likely should not be use in the groupby context, because we already
318
+ # This likely should not be use in the group by context, because we already
319
319
  # parallel execution per group
320
320
  #
321
321
  # @return [Series]
@@ -330,7 +330,7 @@ module Polars
330
330
  # # ┌─────┬─────┬────────────┐
331
331
  # # │ a ┆ b ┆ rank │
332
332
  # # │ --- ┆ --- ┆ --- │
333
- # # │ i64 ┆ i64 ┆ list[f32] │
333
+ # # │ i64 ┆ i64 ┆ list[f64] │
334
334
  # # ╞═════╪═════╪════════════╡
335
335
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
336
336
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
@@ -0,0 +1,198 @@
1
+ module Polars
2
+ # Namespace for expressions that operate on expression names.
3
+ class NameExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Keep the original root name of the expression.
13
+ #
14
+ # @note
15
+ # Due to implementation constraints, this method can only be called as the last
16
+ # expression in a chain.
17
+ #
18
+ # @return [Expr]
19
+ #
20
+ # @example Prevent errors due to potential duplicate column names.
21
+ # df = Polars::DataFrame.new(
22
+ # {
23
+ # "a" => [1, 2],
24
+ # "b" => [3, 4]
25
+ # }
26
+ # )
27
+ # df.select((Polars.lit(10) / Polars.all).name.keep)
28
+ # # =>
29
+ # # shape: (2, 2)
30
+ # # ┌──────┬──────────┐
31
+ # # │ a ┆ b │
32
+ # # │ --- ┆ --- │
33
+ # # │ f64 ┆ f64 │
34
+ # # ╞══════╪══════════╡
35
+ # # │ 10.0 ┆ 3.333333 │
36
+ # # │ 5.0 ┆ 2.5 │
37
+ # # └──────┴──────────┘
38
+ #
39
+ # @example Undo an alias operation.
40
+ # df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
41
+ # # =>
42
+ # # shape: (2, 2)
43
+ # # ┌─────┬─────┐
44
+ # # │ a ┆ b │
45
+ # # │ --- ┆ --- │
46
+ # # │ i64 ┆ i64 │
47
+ # # ╞═════╪═════╡
48
+ # # │ 9 ┆ 3 │
49
+ # # │ 18 ┆ 4 │
50
+ # # └─────┴─────┘
51
+ def keep
52
+ Utils.wrap_expr(_rbexpr.name_keep)
53
+ end
54
+
55
+ # Rename the output of an expression by mapping a function over the root name.
56
+ #
57
+ # @return [Expr]
58
+ #
59
+ # @example Remove a common suffix and convert to lower case.
60
+ # df = Polars::DataFrame.new(
61
+ # {
62
+ # "A_reverse" => [3, 2, 1],
63
+ # "B_reverse" => ["z", "y", "x"]
64
+ # }
65
+ # )
66
+ # df.with_columns(
67
+ # Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
68
+ # )
69
+ # # =>
70
+ # # shape: (3, 4)
71
+ # # ┌───────────┬───────────┬─────┬─────┐
72
+ # # │ A_reverse ┆ B_reverse ┆ a ┆ b │
73
+ # # │ --- ┆ --- ┆ --- ┆ --- │
74
+ # # │ i64 ┆ str ┆ i64 ┆ str │
75
+ # # ╞═══════════╪═══════════╪═════╪═════╡
76
+ # # │ 3 ┆ z ┆ 1 ┆ x │
77
+ # # │ 2 ┆ y ┆ 2 ┆ y │
78
+ # # │ 1 ┆ x ┆ 3 ┆ z │
79
+ # # └───────────┴───────────┴─────┴─────┘
80
+ def map(&f)
81
+ Utils.wrap_expr(_rbexpr.name_map(f))
82
+ end
83
+
84
+ # Add a prefix to the root column name of the expression.
85
+ #
86
+ # @param prefix [Object]
87
+ # Prefix to add to the root column name.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [1, 2, 3],
95
+ # "b" => ["x", "y", "z"]
96
+ # }
97
+ # )
98
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
99
+ # # =>
100
+ # # shape: (3, 4)
101
+ # # ┌─────┬─────┬───────────┬───────────┐
102
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
103
+ # # │ --- ┆ --- ┆ --- ┆ --- │
104
+ # # │ i64 ┆ str ┆ i64 ┆ str │
105
+ # # ╞═════╪═════╪═══════════╪═══════════╡
106
+ # # │ 1 ┆ x ┆ 3 ┆ z │
107
+ # # │ 2 ┆ y ┆ 2 ┆ y │
108
+ # # │ 3 ┆ z ┆ 1 ┆ x │
109
+ # # └─────┴─────┴───────────┴───────────┘
110
+ def prefix(prefix)
111
+ Utils.wrap_expr(_rbexpr.name_prefix(prefix))
112
+ end
113
+
114
+ # Add a suffix to the root column name of the expression.
115
+ #
116
+ # @param suffix [Object]
117
+ # Suffix to add to the root column name.
118
+ #
119
+ # @return [Expr]
120
+ #
121
+ # @example
122
+ # df = Polars::DataFrame.new(
123
+ # {
124
+ # "a" => [1, 2, 3],
125
+ # "b" => ["x", "y", "z"]
126
+ # }
127
+ # )
128
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
129
+ # # =>
130
+ # # shape: (3, 4)
131
+ # # ┌─────┬─────┬───────────┬───────────┐
132
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
133
+ # # │ --- ┆ --- ┆ --- ┆ --- │
134
+ # # │ i64 ┆ str ┆ i64 ┆ str │
135
+ # # ╞═════╪═════╪═══════════╪═══════════╡
136
+ # # │ 1 ┆ x ┆ 3 ┆ z │
137
+ # # │ 2 ┆ y ┆ 2 ┆ y │
138
+ # # │ 3 ┆ z ┆ 1 ┆ x │
139
+ # # └─────┴─────┴───────────┴───────────┘
140
+ def suffix(suffix)
141
+ Utils.wrap_expr(_rbexpr.name_suffix(suffix))
142
+ end
143
+
144
+ # Make the root column name lowercase.
145
+ #
146
+ # @return [Expr]
147
+ #
148
+ # @example
149
+ # df = Polars::DataFrame.new(
150
+ # {
151
+ # "ColX" => [1, 2, 3],
152
+ # "ColY" => ["x", "y", "z"],
153
+ # }
154
+ # )
155
+ # df.with_columns(Polars.all.name.to_lowercase)
156
+ # # =>
157
+ # # shape: (3, 4)
158
+ # # ┌──────┬──────┬──────┬──────┐
159
+ # # │ ColX ┆ ColY ┆ colx ┆ coly │
160
+ # # │ --- ┆ --- ┆ --- ┆ --- │
161
+ # # │ i64 ┆ str ┆ i64 ┆ str │
162
+ # # ╞══════╪══════╪══════╪══════╡
163
+ # # │ 1 ┆ x ┆ 1 ┆ x │
164
+ # # │ 2 ┆ y ┆ 2 ┆ y │
165
+ # # │ 3 ┆ z ┆ 3 ┆ z │
166
+ # # └──────┴──────┴──────┴──────┘
167
+ def to_lowercase
168
+ Utils.wrap_expr(_rbexpr.name_to_lowercase)
169
+ end
170
+
171
+ # Make the root column name uppercase.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "ColX" => [1, 2, 3],
179
+ # "ColY" => ["x", "y", "z"]
180
+ # }
181
+ # )
182
+ # df.with_columns(Polars.all.name.to_uppercase)
183
+ # # =>
184
+ # # shape: (3, 4)
185
+ # # ┌──────┬──────┬──────┬──────┐
186
+ # # │ ColX ┆ ColY ┆ COLX ┆ COLY │
187
+ # # │ --- ┆ --- ┆ --- ┆ --- │
188
+ # # │ i64 ┆ str ┆ i64 ┆ str │
189
+ # # ╞══════╪══════╪══════╪══════╡
190
+ # # │ 1 ┆ x ┆ 1 ┆ x │
191
+ # # │ 2 ┆ y ┆ 2 ┆ y │
192
+ # # │ 3 ┆ z ┆ 3 ┆ z │
193
+ # # └──────┴──────┴──────┴──────┘
194
+ def to_uppercase
195
+ Utils.wrap_expr(_rbexpr.name_to_uppercase)
196
+ end
197
+ end
198
+ end
@@ -2,7 +2,7 @@ module Polars
2
2
  # A rolling grouper.
3
3
  #
4
4
  # This has an `.agg` method which will allow you to run all polars expressions in a
5
- # groupby context.
5
+ # group by context.
6
6
  class RollingGroupBy
7
7
  def initialize(
8
8
  df,
@@ -27,7 +27,7 @@ module Polars
27
27
 
28
28
  def agg(aggs)
29
29
  @df.lazy
30
- .groupby_rolling(
30
+ .group_by_rolling(
31
31
  index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
32
32
  )
33
33
  .agg(aggs)