polars-df 0.8.0-x86_64-linux → 0.9.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1726 -754
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +179 -43
  17. data/lib/polars/data_types.rb +191 -28
  18. data/lib/polars/date_time_expr.rb +31 -14
  19. data/lib/polars/exceptions.rb +12 -1
  20. data/lib/polars/expr.rb +866 -186
  21. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  22. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  23. data/lib/polars/functions/as_datatype.rb +248 -0
  24. data/lib/polars/functions/col.rb +47 -0
  25. data/lib/polars/functions/eager.rb +182 -0
  26. data/lib/polars/functions/lazy.rb +1280 -0
  27. data/lib/polars/functions/len.rb +49 -0
  28. data/lib/polars/functions/lit.rb +35 -0
  29. data/lib/polars/functions/random.rb +16 -0
  30. data/lib/polars/functions/range/date_range.rb +103 -0
  31. data/lib/polars/functions/range/int_range.rb +51 -0
  32. data/lib/polars/functions/repeat.rb +144 -0
  33. data/lib/polars/functions/whenthen.rb +27 -0
  34. data/lib/polars/functions.rb +29 -416
  35. data/lib/polars/group_by.rb +2 -2
  36. data/lib/polars/io.rb +18 -25
  37. data/lib/polars/lazy_frame.rb +367 -53
  38. data/lib/polars/list_expr.rb +152 -6
  39. data/lib/polars/list_name_space.rb +102 -0
  40. data/lib/polars/meta_expr.rb +175 -7
  41. data/lib/polars/series.rb +273 -34
  42. data/lib/polars/string_cache.rb +75 -0
  43. data/lib/polars/string_expr.rb +412 -96
  44. data/lib/polars/string_name_space.rb +4 -4
  45. data/lib/polars/testing.rb +507 -0
  46. data/lib/polars/utils.rb +52 -8
  47. data/lib/polars/version.rb +1 -1
  48. data/lib/polars.rb +15 -2
  49. metadata +33 -4
  50. data/lib/polars/lazy_functions.rb +0 -1181
@@ -0,0 +1,246 @@
1
+ module Polars
2
+ module Functions
3
+ # Compute the bitwise AND horizontally across columns.
4
+ #
5
+ # @param exprs [Array]
6
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
7
+ # parsed as column names, other non-expression inputs are parsed as literals.
8
+ #
9
+ # @return [Expr]
10
+ #
11
+ # @example
12
+ # df = Polars::DataFrame.new(
13
+ # {
14
+ # "a" => [false, false, true, true, false, nil],
15
+ # "b" => [false, true, true, nil, nil, nil],
16
+ # "c" => ["u", "v", "w", "x", "y", "z"]
17
+ # }
18
+ # )
19
+ # df.with_columns(all: Polars.all_horizontal("a", "b"))
20
+ # # =>
21
+ # # shape: (6, 4)
22
+ # # ┌───────┬───────┬─────┬───────┐
23
+ # # │ a ┆ b ┆ c ┆ all │
24
+ # # │ --- ┆ --- ┆ --- ┆ --- │
25
+ # # │ bool ┆ bool ┆ str ┆ bool │
26
+ # # ╞═══════╪═══════╪═════╪═══════╡
27
+ # # │ false ┆ false ┆ u ┆ false │
28
+ # # │ false ┆ true ┆ v ┆ false │
29
+ # # │ true ┆ true ┆ w ┆ true │
30
+ # # │ true ┆ null ┆ x ┆ null │
31
+ # # │ false ┆ null ┆ y ┆ false │
32
+ # # │ null ┆ null ┆ z ┆ null │
33
+ # # └───────┴───────┴─────┴───────┘
34
+ def all_horizontal(*exprs)
35
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
36
+ Utils.wrap_expr(Plr.all_horizontal(rbexprs))
37
+ end
38
+
39
+ # Compute the bitwise OR horizontally across columns.
40
+ #
41
+ # @param exprs [Array]
42
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
43
+ # parsed as column names, other non-expression inputs are parsed as literals.
44
+ #
45
+ # @return [Expr]
46
+ #
47
+ # @example
48
+ # df = Polars::DataFrame.new(
49
+ # {
50
+ # "a" => [false, false, true, true, false, nil],
51
+ # "b" => [false, true, true, nil, nil, nil],
52
+ # "c" => ["u", "v", "w", "x", "y", "z"]
53
+ # }
54
+ # )
55
+ # df.with_columns(any: Polars.any_horizontal("a", "b"))
56
+ # # =>
57
+ # # shape: (6, 4)
58
+ # # ┌───────┬───────┬─────┬───────┐
59
+ # # │ a ┆ b ┆ c ┆ any │
60
+ # # │ --- ┆ --- ┆ --- ┆ --- │
61
+ # # │ bool ┆ bool ┆ str ┆ bool │
62
+ # # ╞═══════╪═══════╪═════╪═══════╡
63
+ # # │ false ┆ false ┆ u ┆ false │
64
+ # # │ false ┆ true ┆ v ┆ true │
65
+ # # │ true ┆ true ┆ w ┆ true │
66
+ # # │ true ┆ null ┆ x ┆ true │
67
+ # # │ false ┆ null ┆ y ┆ null │
68
+ # # │ null ┆ null ┆ z ┆ null │
69
+ # # └───────┴───────┴─────┴───────┘
70
+ def any_horizontal(*exprs)
71
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
72
+ Utils.wrap_expr(Plr.any_horizontal(rbexprs))
73
+ end
74
+
75
+ # Get the maximum value horizontally across columns.
76
+ #
77
+ # @param exprs [Array]
78
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
79
+ # parsed as column names, other non-expression inputs are parsed as literals.
80
+ #
81
+ # @return [Expr]
82
+ #
83
+ # @example
84
+ # df = Polars::DataFrame.new(
85
+ # {
86
+ # "a" => [1, 8, 3],
87
+ # "b" => [4, 5, nil],
88
+ # "c" => ["x", "y", "z"]
89
+ # }
90
+ # )
91
+ # df.with_columns(max: Polars.max_horizontal("a", "b"))
92
+ # # =>
93
+ # # shape: (3, 4)
94
+ # # ┌─────┬──────┬─────┬─────┐
95
+ # # │ a ┆ b ┆ c ┆ max │
96
+ # # │ --- ┆ --- ┆ --- ┆ --- │
97
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
98
+ # # ╞═════╪══════╪═════╪═════╡
99
+ # # │ 1 ┆ 4 ┆ x ┆ 4 │
100
+ # # │ 8 ┆ 5 ┆ y ┆ 8 │
101
+ # # │ 3 ┆ null ┆ z ┆ 3 │
102
+ # # └─────┴──────┴─────┴─────┘
103
+ def max_horizontal(*exprs)
104
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
105
+ Utils.wrap_expr(Plr.max_horizontal(rbexprs))
106
+ end
107
+
108
+ # Get the minimum value horizontally across columns.
109
+ #
110
+ # @param exprs [Array]
111
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
112
+ # parsed as column names, other non-expression inputs are parsed as literals.
113
+ #
114
+ # @return [Expr]
115
+ #
116
+ # @example
117
+ # df = Polars::DataFrame.new(
118
+ # {
119
+ # "a" => [1, 8, 3],
120
+ # "b" => [4, 5, nil],
121
+ # "c" => ["x", "y", "z"]
122
+ # }
123
+ # )
124
+ # df.with_columns(min: Polars.min_horizontal("a", "b"))
125
+ # # =>
126
+ # # shape: (3, 4)
127
+ # # ┌─────┬──────┬─────┬─────┐
128
+ # # │ a ┆ b ┆ c ┆ min │
129
+ # # │ --- ┆ --- ┆ --- ┆ --- │
130
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
131
+ # # ╞═════╪══════╪═════╪═════╡
132
+ # # │ 1 ┆ 4 ┆ x ┆ 1 │
133
+ # # │ 8 ┆ 5 ┆ y ┆ 5 │
134
+ # # │ 3 ┆ null ┆ z ┆ 3 │
135
+ # # └─────┴──────┴─────┴─────┘
136
+ def min_horizontal(*exprs)
137
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
138
+ Utils.wrap_expr(Plr.min_horizontal(rbexprs))
139
+ end
140
+
141
+ # Sum all values horizontally across columns.
142
+ #
143
+ # @param exprs [Array]
144
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
145
+ # parsed as column names, other non-expression inputs are parsed as literals.
146
+ #
147
+ # @return [Expr]
148
+ #
149
+ # @example
150
+ # df = Polars::DataFrame.new(
151
+ # {
152
+ # "a" => [1, 8, 3],
153
+ # "b" => [4, 5, nil],
154
+ # "c" => ["x", "y", "z"]
155
+ # }
156
+ # )
157
+ # df.with_columns(sum: Polars.sum_horizontal("a", "b"))
158
+ # # =>
159
+ # # shape: (3, 4)
160
+ # # ┌─────┬──────┬─────┬─────┐
161
+ # # │ a ┆ b ┆ c ┆ sum │
162
+ # # │ --- ┆ --- ┆ --- ┆ --- │
163
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
164
+ # # ╞═════╪══════╪═════╪═════╡
165
+ # # │ 1 ┆ 4 ┆ x ┆ 5 │
166
+ # # │ 8 ┆ 5 ┆ y ┆ 13 │
167
+ # # │ 3 ┆ null ┆ z ┆ 3 │
168
+ # # └─────┴──────┴─────┴─────┘
169
+ def sum_horizontal(*exprs)
170
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
171
+ Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
172
+ end
173
+
174
+ # Compute the mean of all values horizontally across columns.
175
+ #
176
+ # @param exprs [Array]
177
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
178
+ # parsed as column names, other non-expression inputs are parsed as literals.
179
+ #
180
+ # @return [Expr]
181
+ #
182
+ # @example
183
+ # df = Polars::DataFrame.new(
184
+ # {
185
+ # "a" => [1, 8, 3],
186
+ # "b" => [4, 5, nil],
187
+ # "c" => ["x", "y", "z"]
188
+ # }
189
+ # )
190
+ # df.with_columns(mean: Polars.mean_horizontal("a", "b"))
191
+ # # =>
192
+ # # shape: (3, 4)
193
+ # # ┌─────┬──────┬─────┬──────┐
194
+ # # │ a ┆ b ┆ c ┆ mean │
195
+ # # │ --- ┆ --- ┆ --- ┆ --- │
196
+ # # │ i64 ┆ i64 ┆ str ┆ f64 │
197
+ # # ╞═════╪══════╪═════╪══════╡
198
+ # # │ 1 ┆ 4 ┆ x ┆ 2.5 │
199
+ # # │ 8 ┆ 5 ┆ y ┆ 6.5 │
200
+ # # │ 3 ┆ null ┆ z ┆ 3.0 │
201
+ # # └─────┴──────┴─────┴──────┘
202
+ def mean_horizontal(*exprs)
203
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
204
+ Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
205
+ end
206
+
207
+ # Cumulatively sum all values horizontally across columns.
208
+ #
209
+ # @param exprs [Array]
210
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
211
+ # parsed as column names, other non-expression inputs are parsed as literals.
212
+ #
213
+ # @return [Expr]
214
+ #
215
+ # @example
216
+ # df = Polars::DataFrame.new(
217
+ # {
218
+ # "a" => [1, 8, 3],
219
+ # "b" => [4, 5, nil],
220
+ # "c" => ["x", "y", "z"]
221
+ # }
222
+ # )
223
+ # df.with_columns(Polars.cum_sum_horizontal("a", "b"))
224
+ # # =>
225
+ # # shape: (3, 4)
226
+ # # ┌─────┬──────┬─────┬───────────┐
227
+ # # │ a ┆ b ┆ c ┆ cum_sum │
228
+ # # │ --- ┆ --- ┆ --- ┆ --- │
229
+ # # │ i64 ┆ i64 ┆ str ┆ struct[2] │
230
+ # # ╞═════╪══════╪═════╪═══════════╡
231
+ # # │ 1 ┆ 4 ┆ x ┆ {1,5} │
232
+ # # │ 8 ┆ 5 ┆ y ┆ {8,13} │
233
+ # # │ 3 ┆ null ┆ z ┆ {3,null} │
234
+ # # └─────┴──────┴─────┴───────────┘
235
+ def cum_sum_horizontal(*exprs)
236
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
237
+ exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }
238
+
239
+ # (Expr): use u32 as that will not cast to float as eagerly
240
+ Polars.cum_fold(Polars.lit(0).cast(UInt32), -> (a, b) { a + b }, exprs_wrapped).alias(
241
+ "cum_sum"
242
+ )
243
+ end
244
+ alias_method :cumsum_horizontal, :cum_sum_horizontal
245
+ end
246
+ end
@@ -0,0 +1,282 @@
1
+ module Polars
2
+ module Functions
3
+ # Either return an expression representing all columns, or evaluate a bitwise AND operation.
4
+ #
5
+ # If no arguments are passed, this function is syntactic sugar for `col("*")`.
6
+ # Otherwise, this function is syntactic sugar for `col(names).all`.
7
+ #
8
+ # @param names [Array]
9
+ # Name(s) of the columns to use in the aggregation.
10
+ # @param ignore_nulls [Boolean]
11
+ # Ignore null values (default).
12
+ #
13
+ # @return [Expr]
14
+ #
15
+ # @example Selecting all columns.
16
+ # df = Polars::DataFrame.new(
17
+ # {
18
+ # "a" => [true, false, true],
19
+ # "b" => [false, false, false]
20
+ # }
21
+ # )
22
+ # df.select(Polars.all.sum)
23
+ # # =>
24
+ # # shape: (1, 2)
25
+ # # ┌─────┬─────┐
26
+ # # │ a ┆ b │
27
+ # # │ --- ┆ --- │
28
+ # # │ u32 ┆ u32 │
29
+ # # ╞═════╪═════╡
30
+ # # │ 2 ┆ 0 │
31
+ # # └─────┴─────┘
32
+ #
33
+ # @example Evaluate bitwise AND for a column.
34
+ # df.select(Polars.all("a"))
35
+ # # =>
36
+ # # shape: (1, 1)
37
+ # # ┌───────┐
38
+ # # │ a │
39
+ # # │ --- │
40
+ # # │ bool │
41
+ # # ╞═══════╡
42
+ # # │ false │
43
+ # # └───────┘
44
+ def all(*names, ignore_nulls: true)
45
+ if names.empty?
46
+ return col("*")
47
+ end
48
+
49
+ col(*names).all(drop_nulls: ignore_nulls)
50
+ end
51
+
52
+ # Evaluate a bitwise OR operation.
53
+ #
54
+ # Syntactic sugar for `col(names).any`.
55
+ #
56
+ # @param names [Array]
57
+ # Name(s) of the columns to use in the aggregation.
58
+ # @param ignore_nulls [Boolean]
59
+ # Ignore null values (default).
60
+ #
61
+ # @return [Expr]
62
+ #
63
+ # @example
64
+ # df = Polars::DataFrame.new(
65
+ # {
66
+ # "a" => [true, false, true],
67
+ # "b" => [false, false, false]
68
+ # }
69
+ # )
70
+ # df.select(Polars.any("a"))
71
+ # # =>
72
+ # # shape: (1, 1)
73
+ # # ┌──────┐
74
+ # # │ a │
75
+ # # │ --- │
76
+ # # │ bool │
77
+ # # ╞══════╡
78
+ # # │ true │
79
+ # # └──────┘
80
+ def any(*names, ignore_nulls: true)
81
+ col(*names).any(drop_nulls: ignore_nulls)
82
+ end
83
+
84
+ # Get the maximum value.
85
+ #
86
+ # Syntactic sugar for `col(names).max`.
87
+ #
88
+ # @param names [Array]
89
+ # Name(s) of the columns to use in the aggregation.
90
+ #
91
+ # @return [Expr]
92
+ #
93
+ # @example Get the maximum value of a column.
94
+ # df = Polars::DataFrame.new(
95
+ # {
96
+ # "a" => [1, 8, 3],
97
+ # "b" => [4, 5, 2],
98
+ # "c" => ["foo", "bar", "foo"]
99
+ # }
100
+ # )
101
+ # df.select(Polars.max("a"))
102
+ # # =>
103
+ # # shape: (1, 1)
104
+ # # ┌─────┐
105
+ # # │ a │
106
+ # # │ --- │
107
+ # # │ i64 │
108
+ # # ╞═════╡
109
+ # # │ 8 │
110
+ # # └─────┘
111
+ #
112
+ # @example Get the maximum value of multiple columns.
113
+ # df.select(Polars.max("^a|b$"))
114
+ # # =>
115
+ # # shape: (1, 2)
116
+ # # ┌─────┬─────┐
117
+ # # │ a ┆ b │
118
+ # # │ --- ┆ --- │
119
+ # # │ i64 ┆ i64 │
120
+ # # ╞═════╪═════╡
121
+ # # │ 8 ┆ 5 │
122
+ # # └─────┴─────┘
123
+ #
124
+ # @example
125
+ # df.select(Polars.max("a", "b"))
126
+ # # =>
127
+ # # shape: (1, 2)
128
+ # # ┌─────┬─────┐
129
+ # # │ a ┆ b │
130
+ # # │ --- ┆ --- │
131
+ # # │ i64 ┆ i64 │
132
+ # # ╞═════╪═════╡
133
+ # # │ 8 ┆ 5 │
134
+ # # └─────┴─────┘
135
+ def max(*names)
136
+ col(*names).max
137
+ end
138
+
139
+ # Get the minimum value.
140
+ #
141
+ # Syntactic sugar for `col(names).min`.
142
+ #
143
+ # @param names [Array]
144
+ # Name(s) of the columns to use in the aggregation.
145
+ #
146
+ # @return [Expr]
147
+ #
148
+ # @example Get the minimum value of a column.
149
+ # df = Polars::DataFrame.new(
150
+ # {
151
+ # "a" => [1, 8, 3],
152
+ # "b" => [4, 5, 2],
153
+ # "c" => ["foo", "bar", "foo"]
154
+ # }
155
+ # )
156
+ # df.select(Polars.min("a"))
157
+ # # =>
158
+ # # shape: (1, 1)
159
+ # # ┌─────┐
160
+ # # │ a │
161
+ # # │ --- │
162
+ # # │ i64 │
163
+ # # ╞═════╡
164
+ # # │ 1 │
165
+ # # └─────┘
166
+ #
167
+ # @example Get the minimum value of multiple columns.
168
+ # df.select(Polars.min("^a|b$"))
169
+ # # =>
170
+ # # shape: (1, 2)
171
+ # # ┌─────┬─────┐
172
+ # # │ a ┆ b │
173
+ # # │ --- ┆ --- │
174
+ # # │ i64 ┆ i64 │
175
+ # # ╞═════╪═════╡
176
+ # # │ 1 ┆ 2 │
177
+ # # └─────┴─────┘
178
+ #
179
+ # @example
180
+ # df.select(Polars.min("a", "b"))
181
+ # # =>
182
+ # # shape: (1, 2)
183
+ # # ┌─────┬─────┐
184
+ # # │ a ┆ b │
185
+ # # │ --- ┆ --- │
186
+ # # │ i64 ┆ i64 │
187
+ # # ╞═════╪═════╡
188
+ # # │ 1 ┆ 2 │
189
+ # # └─────┴─────┘
190
+ def min(*names)
191
+ col(*names).min
192
+ end
193
+
194
+ # Sum all values.
195
+ #
196
+ # Syntactic sugar for `col(name).sum`.
197
+ #
198
+ # @param names [Array]
199
+ # Name(s) of the columns to use in the aggregation.
200
+ #
201
+ # @return [Expr]
202
+ #
203
+ # @example Sum a column.
204
+ # df = Polars::DataFrame.new(
205
+ # {
206
+ # "a" => [1, 2],
207
+ # "b" => [3, 4],
208
+ # "c" => [5, 6]
209
+ # }
210
+ # )
211
+ # df.select(Polars.sum("a"))
212
+ # # =>
213
+ # # shape: (1, 1)
214
+ # # ┌─────┐
215
+ # # │ a │
216
+ # # │ --- │
217
+ # # │ i64 │
218
+ # # ╞═════╡
219
+ # # │ 3 │
220
+ # # └─────┘
221
+ #
222
+ # @example Sum multiple columns.
223
+ # df.select(Polars.sum("a", "c"))
224
+ # # =>
225
+ # # shape: (1, 2)
226
+ # # ┌─────┬─────┐
227
+ # # │ a ┆ c │
228
+ # # │ --- ┆ --- │
229
+ # # │ i64 ┆ i64 │
230
+ # # ╞═════╪═════╡
231
+ # # │ 3 ┆ 11 │
232
+ # # └─────┴─────┘
233
+ #
234
+ # @example
235
+ # df.select(Polars.sum("^.*[bc]$"))
236
+ # # =>
237
+ # # shape: (1, 2)
238
+ # # ┌─────┬─────┐
239
+ # # │ b ┆ c │
240
+ # # │ --- ┆ --- │
241
+ # # │ i64 ┆ i64 │
242
+ # # ╞═════╪═════╡
243
+ # # │ 7 ┆ 11 │
244
+ # # └─────┴─────┘
245
+ def sum(*names)
246
+ col(*names).sum
247
+ end
248
+
249
+ # Cumulatively sum all values.
250
+ #
251
+ # Syntactic sugar for `col(names).cum_sum`.
252
+ #
253
+ # @param names [Object]
254
+ # Name(s) of the columns to use in the aggregation.
255
+ #
256
+ # @return [Expr]
257
+ #
258
+ # @example
259
+ # df = Polars::DataFrame.new(
260
+ # {
261
+ # "a" => [1, 2, 3],
262
+ # "b" => [4, 5, 6]
263
+ # }
264
+ # )
265
+ # df.select(Polars.cum_sum("a"))
266
+ # # =>
267
+ # # shape: (3, 1)
268
+ # # ┌─────┐
269
+ # # │ a │
270
+ # # │ --- │
271
+ # # │ i64 │
272
+ # # ╞═════╡
273
+ # # │ 1 │
274
+ # # │ 3 │
275
+ # # │ 6 │
276
+ # # └─────┘
277
+ def cum_sum(*names)
278
+ col(*names).cum_sum
279
+ end
280
+ alias_method :cumsum, :cum_sum
281
+ end
282
+ end