polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -0
  3. data/Cargo.lock +353 -237
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1978 -1459
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +248 -108
  17. data/lib/polars/data_types.rb +195 -29
  18. data/lib/polars/date_time_expr.rb +41 -24
  19. data/lib/polars/date_time_name_space.rb +12 -12
  20. data/lib/polars/exceptions.rb +12 -1
  21. data/lib/polars/expr.rb +1080 -195
  22. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  23. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  24. data/lib/polars/functions/as_datatype.rb +248 -0
  25. data/lib/polars/functions/col.rb +47 -0
  26. data/lib/polars/functions/eager.rb +182 -0
  27. data/lib/polars/functions/lazy.rb +1280 -0
  28. data/lib/polars/functions/len.rb +49 -0
  29. data/lib/polars/functions/lit.rb +35 -0
  30. data/lib/polars/functions/random.rb +16 -0
  31. data/lib/polars/functions/range/date_range.rb +103 -0
  32. data/lib/polars/functions/range/int_range.rb +51 -0
  33. data/lib/polars/functions/repeat.rb +144 -0
  34. data/lib/polars/functions/whenthen.rb +27 -0
  35. data/lib/polars/functions.rb +29 -416
  36. data/lib/polars/group_by.rb +3 -3
  37. data/lib/polars/io.rb +21 -28
  38. data/lib/polars/lazy_frame.rb +390 -76
  39. data/lib/polars/list_expr.rb +152 -6
  40. data/lib/polars/list_name_space.rb +102 -0
  41. data/lib/polars/meta_expr.rb +175 -7
  42. data/lib/polars/series.rb +557 -59
  43. data/lib/polars/sql_context.rb +1 -1
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +412 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/struct_expr.rb +1 -1
  48. data/lib/polars/struct_name_space.rb +1 -1
  49. data/lib/polars/testing.rb +507 -0
  50. data/lib/polars/utils.rb +64 -20
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +15 -2
  53. metadata +36 -7
  54. data/lib/polars/lazy_functions.rb +0 -1197
@@ -0,0 +1,246 @@
1
+ module Polars
2
+ module Functions
3
+ # Compute the bitwise AND horizontally across columns.
4
+ #
5
+ # @param exprs [Array]
6
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
7
+ # parsed as column names, other non-expression inputs are parsed as literals.
8
+ #
9
+ # @return [Expr]
10
+ #
11
+ # @example
12
+ # df = Polars::DataFrame.new(
13
+ # {
14
+ # "a" => [false, false, true, true, false, nil],
15
+ # "b" => [false, true, true, nil, nil, nil],
16
+ # "c" => ["u", "v", "w", "x", "y", "z"]
17
+ # }
18
+ # )
19
+ # df.with_columns(all: Polars.all_horizontal("a", "b"))
20
+ # # =>
21
+ # # shape: (6, 4)
22
+ # # ┌───────┬───────┬─────┬───────┐
23
+ # # │ a ┆ b ┆ c ┆ all │
24
+ # # │ --- ┆ --- ┆ --- ┆ --- │
25
+ # # │ bool ┆ bool ┆ str ┆ bool │
26
+ # # ╞═══════╪═══════╪═════╪═══════╡
27
+ # # │ false ┆ false ┆ u ┆ false │
28
+ # # │ false ┆ true ┆ v ┆ false │
29
+ # # │ true ┆ true ┆ w ┆ true │
30
+ # # │ true ┆ null ┆ x ┆ null │
31
+ # # │ false ┆ null ┆ y ┆ false │
32
+ # # │ null ┆ null ┆ z ┆ null │
33
+ # # └───────┴───────┴─────┴───────┘
34
+ def all_horizontal(*exprs)
35
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
36
+ Utils.wrap_expr(Plr.all_horizontal(rbexprs))
37
+ end
38
+
39
+ # Compute the bitwise OR horizontally across columns.
40
+ #
41
+ # @param exprs [Array]
42
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
43
+ # parsed as column names, other non-expression inputs are parsed as literals.
44
+ #
45
+ # @return [Expr]
46
+ #
47
+ # @example
48
+ # df = Polars::DataFrame.new(
49
+ # {
50
+ # "a" => [false, false, true, true, false, nil],
51
+ # "b" => [false, true, true, nil, nil, nil],
52
+ # "c" => ["u", "v", "w", "x", "y", "z"]
53
+ # }
54
+ # )
55
+ # df.with_columns(any: Polars.any_horizontal("a", "b"))
56
+ # # =>
57
+ # # shape: (6, 4)
58
+ # # ┌───────┬───────┬─────┬───────┐
59
+ # # │ a ┆ b ┆ c ┆ any │
60
+ # # │ --- ┆ --- ┆ --- ┆ --- │
61
+ # # │ bool ┆ bool ┆ str ┆ bool │
62
+ # # ╞═══════╪═══════╪═════╪═══════╡
63
+ # # │ false ┆ false ┆ u ┆ false │
64
+ # # │ false ┆ true ┆ v ┆ true │
65
+ # # │ true ┆ true ┆ w ┆ true │
66
+ # # │ true ┆ null ┆ x ┆ true │
67
+ # # │ false ┆ null ┆ y ┆ null │
68
+ # # │ null ┆ null ┆ z ┆ null │
69
+ # # └───────┴───────┴─────┴───────┘
70
+ def any_horizontal(*exprs)
71
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
72
+ Utils.wrap_expr(Plr.any_horizontal(rbexprs))
73
+ end
74
+
75
+ # Get the maximum value horizontally across columns.
76
+ #
77
+ # @param exprs [Array]
78
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
79
+ # parsed as column names, other non-expression inputs are parsed as literals.
80
+ #
81
+ # @return [Expr]
82
+ #
83
+ # @example
84
+ # df = Polars::DataFrame.new(
85
+ # {
86
+ # "a" => [1, 8, 3],
87
+ # "b" => [4, 5, nil],
88
+ # "c" => ["x", "y", "z"]
89
+ # }
90
+ # )
91
+ # df.with_columns(max: Polars.max_horizontal("a", "b"))
92
+ # # =>
93
+ # # shape: (3, 4)
94
+ # # ┌─────┬──────┬─────┬─────┐
95
+ # # │ a ┆ b ┆ c ┆ max │
96
+ # # │ --- ┆ --- ┆ --- ┆ --- │
97
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
98
+ # # ╞═════╪══════╪═════╪═════╡
99
+ # # │ 1 ┆ 4 ┆ x ┆ 4 │
100
+ # # │ 8 ┆ 5 ┆ y ┆ 8 │
101
+ # # │ 3 ┆ null ┆ z ┆ 3 │
102
+ # # └─────┴──────┴─────┴─────┘
103
+ def max_horizontal(*exprs)
104
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
105
+ Utils.wrap_expr(Plr.max_horizontal(rbexprs))
106
+ end
107
+
108
+ # Get the minimum value horizontally across columns.
109
+ #
110
+ # @param exprs [Array]
111
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
112
+ # parsed as column names, other non-expression inputs are parsed as literals.
113
+ #
114
+ # @return [Expr]
115
+ #
116
+ # @example
117
+ # df = Polars::DataFrame.new(
118
+ # {
119
+ # "a" => [1, 8, 3],
120
+ # "b" => [4, 5, nil],
121
+ # "c" => ["x", "y", "z"]
122
+ # }
123
+ # )
124
+ # df.with_columns(min: Polars.min_horizontal("a", "b"))
125
+ # # =>
126
+ # # shape: (3, 4)
127
+ # # ┌─────┬──────┬─────┬─────┐
128
+ # # │ a ┆ b ┆ c ┆ min │
129
+ # # │ --- ┆ --- ┆ --- ┆ --- │
130
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
131
+ # # ╞═════╪══════╪═════╪═════╡
132
+ # # │ 1 ┆ 4 ┆ x ┆ 1 │
133
+ # # │ 8 ┆ 5 ┆ y ┆ 5 │
134
+ # # │ 3 ┆ null ┆ z ┆ 3 │
135
+ # # └─────┴──────┴─────┴─────┘
136
+ def min_horizontal(*exprs)
137
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
138
+ Utils.wrap_expr(Plr.min_horizontal(rbexprs))
139
+ end
140
+
141
+ # Sum all values horizontally across columns.
142
+ #
143
+ # @param exprs [Array]
144
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
145
+ # parsed as column names, other non-expression inputs are parsed as literals.
146
+ #
147
+ # @return [Expr]
148
+ #
149
+ # @example
150
+ # df = Polars::DataFrame.new(
151
+ # {
152
+ # "a" => [1, 8, 3],
153
+ # "b" => [4, 5, nil],
154
+ # "c" => ["x", "y", "z"]
155
+ # }
156
+ # )
157
+ # df.with_columns(sum: Polars.sum_horizontal("a", "b"))
158
+ # # =>
159
+ # # shape: (3, 4)
160
+ # # ┌─────┬──────┬─────┬─────┐
161
+ # # │ a ┆ b ┆ c ┆ sum │
162
+ # # │ --- ┆ --- ┆ --- ┆ --- │
163
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
164
+ # # ╞═════╪══════╪═════╪═════╡
165
+ # # │ 1 ┆ 4 ┆ x ┆ 5 │
166
+ # # │ 8 ┆ 5 ┆ y ┆ 13 │
167
+ # # │ 3 ┆ null ┆ z ┆ 3 │
168
+ # # └─────┴──────┴─────┴─────┘
169
+ def sum_horizontal(*exprs)
170
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
171
+ Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
172
+ end
173
+
174
+ # Compute the mean of all values horizontally across columns.
175
+ #
176
+ # @param exprs [Array]
177
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
178
+ # parsed as column names, other non-expression inputs are parsed as literals.
179
+ #
180
+ # @return [Expr]
181
+ #
182
+ # @example
183
+ # df = Polars::DataFrame.new(
184
+ # {
185
+ # "a" => [1, 8, 3],
186
+ # "b" => [4, 5, nil],
187
+ # "c" => ["x", "y", "z"]
188
+ # }
189
+ # )
190
+ # df.with_columns(mean: Polars.mean_horizontal("a", "b"))
191
+ # # =>
192
+ # # shape: (3, 4)
193
+ # # ┌─────┬──────┬─────┬──────┐
194
+ # # │ a ┆ b ┆ c ┆ mean │
195
+ # # │ --- ┆ --- ┆ --- ┆ --- │
196
+ # # │ i64 ┆ i64 ┆ str ┆ f64 │
197
+ # # ╞═════╪══════╪═════╪══════╡
198
+ # # │ 1 ┆ 4 ┆ x ┆ 2.5 │
199
+ # # │ 8 ┆ 5 ┆ y ┆ 6.5 │
200
+ # # │ 3 ┆ null ┆ z ┆ 3.0 │
201
+ # # └─────┴──────┴─────┴──────┘
202
+ def mean_horizontal(*exprs)
203
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
204
+ Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
205
+ end
206
+
207
+ # Cumulatively sum all values horizontally across columns.
208
+ #
209
+ # @param exprs [Array]
210
+ # Column(s) to use in the aggregation. Accepts expression input. Strings are
211
+ # parsed as column names, other non-expression inputs are parsed as literals.
212
+ #
213
+ # @return [Expr]
214
+ #
215
+ # @example
216
+ # df = Polars::DataFrame.new(
217
+ # {
218
+ # "a" => [1, 8, 3],
219
+ # "b" => [4, 5, nil],
220
+ # "c" => ["x", "y", "z"]
221
+ # }
222
+ # )
223
+ # df.with_columns(Polars.cum_sum_horizontal("a", "b"))
224
+ # # =>
225
+ # # shape: (3, 4)
226
+ # # ┌─────┬──────┬─────┬───────────┐
227
+ # # │ a ┆ b ┆ c ┆ cum_sum │
228
+ # # │ --- ┆ --- ┆ --- ┆ --- │
229
+ # # │ i64 ┆ i64 ┆ str ┆ struct[2] │
230
+ # # ╞═════╪══════╪═════╪═══════════╡
231
+ # # │ 1 ┆ 4 ┆ x ┆ {1,5} │
232
+ # # │ 8 ┆ 5 ┆ y ┆ {8,13} │
233
+ # # │ 3 ┆ null ┆ z ┆ {3,null} │
234
+ # # └─────┴──────┴─────┴───────────┘
235
+ def cum_sum_horizontal(*exprs)
236
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs)
237
+ exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }
238
+
239
+ # (Expr): use u32 as that will not cast to float as eagerly
240
+ Polars.cum_fold(Polars.lit(0).cast(UInt32), -> (a, b) { a + b }, exprs_wrapped).alias(
241
+ "cum_sum"
242
+ )
243
+ end
244
+ alias_method :cumsum_horizontal, :cum_sum_horizontal
245
+ end
246
+ end
@@ -0,0 +1,282 @@
1
+ module Polars
2
+ module Functions
3
+ # Either return an expression representing all columns, or evaluate a bitwise AND operation.
4
+ #
5
+ # If no arguments are passed, this function is syntactic sugar for `col("*")`.
6
+ # Otherwise, this function is syntactic sugar for `col(names).all`.
7
+ #
8
+ # @param names [Array]
9
+ # Name(s) of the columns to use in the aggregation.
10
+ # @param ignore_nulls [Boolean]
11
+ # Ignore null values (default).
12
+ #
13
+ # @return [Expr]
14
+ #
15
+ # @example Selecting all columns.
16
+ # df = Polars::DataFrame.new(
17
+ # {
18
+ # "a" => [true, false, true],
19
+ # "b" => [false, false, false]
20
+ # }
21
+ # )
22
+ # df.select(Polars.all.sum)
23
+ # # =>
24
+ # # shape: (1, 2)
25
+ # # ┌─────┬─────┐
26
+ # # │ a ┆ b │
27
+ # # │ --- ┆ --- │
28
+ # # │ u32 ┆ u32 │
29
+ # # ╞═════╪═════╡
30
+ # # │ 2 ┆ 0 │
31
+ # # └─────┴─────┘
32
+ #
33
+ # @example Evaluate bitwise AND for a column.
34
+ # df.select(Polars.all("a"))
35
+ # # =>
36
+ # # shape: (1, 1)
37
+ # # ┌───────┐
38
+ # # │ a │
39
+ # # │ --- │
40
+ # # │ bool │
41
+ # # ╞═══════╡
42
+ # # │ false │
43
+ # # └───────┘
44
+ def all(*names, ignore_nulls: true)
45
+ if names.empty?
46
+ return col("*")
47
+ end
48
+
49
+ col(*names).all(drop_nulls: ignore_nulls)
50
+ end
51
+
52
+ # Evaluate a bitwise OR operation.
53
+ #
54
+ # Syntactic sugar for `col(names).any`.
55
+ #
56
+ # @param names [Array]
57
+ # Name(s) of the columns to use in the aggregation.
58
+ # @param ignore_nulls [Boolean]
59
+ # Ignore null values (default).
60
+ #
61
+ # @return [Expr]
62
+ #
63
+ # @example
64
+ # df = Polars::DataFrame.new(
65
+ # {
66
+ # "a" => [true, false, true],
67
+ # "b" => [false, false, false]
68
+ # }
69
+ # )
70
+ # df.select(Polars.any("a"))
71
+ # # =>
72
+ # # shape: (1, 1)
73
+ # # ┌──────┐
74
+ # # │ a │
75
+ # # │ --- │
76
+ # # │ bool │
77
+ # # ╞══════╡
78
+ # # │ true │
79
+ # # └──────┘
80
+ def any(*names, ignore_nulls: true)
81
+ col(*names).any(drop_nulls: ignore_nulls)
82
+ end
83
+
84
+ # Get the maximum value.
85
+ #
86
+ # Syntactic sugar for `col(names).max`.
87
+ #
88
+ # @param names [Array]
89
+ # Name(s) of the columns to use in the aggregation.
90
+ #
91
+ # @return [Expr]
92
+ #
93
+ # @example Get the maximum value of a column.
94
+ # df = Polars::DataFrame.new(
95
+ # {
96
+ # "a" => [1, 8, 3],
97
+ # "b" => [4, 5, 2],
98
+ # "c" => ["foo", "bar", "foo"]
99
+ # }
100
+ # )
101
+ # df.select(Polars.max("a"))
102
+ # # =>
103
+ # # shape: (1, 1)
104
+ # # ┌─────┐
105
+ # # │ a │
106
+ # # │ --- │
107
+ # # │ i64 │
108
+ # # ╞═════╡
109
+ # # │ 8 │
110
+ # # └─────┘
111
+ #
112
+ # @example Get the maximum value of multiple columns.
113
+ # df.select(Polars.max("^a|b$"))
114
+ # # =>
115
+ # # shape: (1, 2)
116
+ # # ┌─────┬─────┐
117
+ # # │ a ┆ b │
118
+ # # │ --- ┆ --- │
119
+ # # │ i64 ┆ i64 │
120
+ # # ╞═════╪═════╡
121
+ # # │ 8 ┆ 5 │
122
+ # # └─────┴─────┘
123
+ #
124
+ # @example
125
+ # df.select(Polars.max("a", "b"))
126
+ # # =>
127
+ # # shape: (1, 2)
128
+ # # ┌─────┬─────┐
129
+ # # │ a ┆ b │
130
+ # # │ --- ┆ --- │
131
+ # # │ i64 ┆ i64 │
132
+ # # ╞═════╪═════╡
133
+ # # │ 8 ┆ 5 │
134
+ # # └─────┴─────┘
135
+ def max(*names)
136
+ col(*names).max
137
+ end
138
+
139
+ # Get the minimum value.
140
+ #
141
+ # Syntactic sugar for `col(names).min`.
142
+ #
143
+ # @param names [Array]
144
+ # Name(s) of the columns to use in the aggregation.
145
+ #
146
+ # @return [Expr]
147
+ #
148
+ # @example Get the minimum value of a column.
149
+ # df = Polars::DataFrame.new(
150
+ # {
151
+ # "a" => [1, 8, 3],
152
+ # "b" => [4, 5, 2],
153
+ # "c" => ["foo", "bar", "foo"]
154
+ # }
155
+ # )
156
+ # df.select(Polars.min("a"))
157
+ # # =>
158
+ # # shape: (1, 1)
159
+ # # ┌─────┐
160
+ # # │ a │
161
+ # # │ --- │
162
+ # # │ i64 │
163
+ # # ╞═════╡
164
+ # # │ 1 │
165
+ # # └─────┘
166
+ #
167
+ # @example Get the minimum value of multiple columns.
168
+ # df.select(Polars.min("^a|b$"))
169
+ # # =>
170
+ # # shape: (1, 2)
171
+ # # ┌─────┬─────┐
172
+ # # │ a ┆ b │
173
+ # # │ --- ┆ --- │
174
+ # # │ i64 ┆ i64 │
175
+ # # ╞═════╪═════╡
176
+ # # │ 1 ┆ 2 │
177
+ # # └─────┴─────┘
178
+ #
179
+ # @example
180
+ # df.select(Polars.min("a", "b"))
181
+ # # =>
182
+ # # shape: (1, 2)
183
+ # # ┌─────┬─────┐
184
+ # # │ a ┆ b │
185
+ # # │ --- ┆ --- │
186
+ # # │ i64 ┆ i64 │
187
+ # # ╞═════╪═════╡
188
+ # # │ 1 ┆ 2 │
189
+ # # └─────┴─────┘
190
+ def min(*names)
191
+ col(*names).min
192
+ end
193
+
194
+ # Sum all values.
195
+ #
196
+ # Syntactic sugar for `col(name).sum`.
197
+ #
198
+ # @param names [Array]
199
+ # Name(s) of the columns to use in the aggregation.
200
+ #
201
+ # @return [Expr]
202
+ #
203
+ # @example Sum a column.
204
+ # df = Polars::DataFrame.new(
205
+ # {
206
+ # "a" => [1, 2],
207
+ # "b" => [3, 4],
208
+ # "c" => [5, 6]
209
+ # }
210
+ # )
211
+ # df.select(Polars.sum("a"))
212
+ # # =>
213
+ # # shape: (1, 1)
214
+ # # ┌─────┐
215
+ # # │ a │
216
+ # # │ --- │
217
+ # # │ i64 │
218
+ # # ╞═════╡
219
+ # # │ 3 │
220
+ # # └─────┘
221
+ #
222
+ # @example Sum multiple columns.
223
+ # df.select(Polars.sum("a", "c"))
224
+ # # =>
225
+ # # shape: (1, 2)
226
+ # # ┌─────┬─────┐
227
+ # # │ a ┆ c │
228
+ # # │ --- ┆ --- │
229
+ # # │ i64 ┆ i64 │
230
+ # # ╞═════╪═════╡
231
+ # # │ 3 ┆ 11 │
232
+ # # └─────┴─────┘
233
+ #
234
+ # @example
235
+ # df.select(Polars.sum("^.*[bc]$"))
236
+ # # =>
237
+ # # shape: (1, 2)
238
+ # # ┌─────┬─────┐
239
+ # # │ b ┆ c │
240
+ # # │ --- ┆ --- │
241
+ # # │ i64 ┆ i64 │
242
+ # # ╞═════╪═════╡
243
+ # # │ 7 ┆ 11 │
244
+ # # └─────┴─────┘
245
+ def sum(*names)
246
+ col(*names).sum
247
+ end
248
+
249
+ # Cumulatively sum all values.
250
+ #
251
+ # Syntactic sugar for `col(names).cum_sum`.
252
+ #
253
+ # @param names [Object]
254
+ # Name(s) of the columns to use in the aggregation.
255
+ #
256
+ # @return [Expr]
257
+ #
258
+ # @example
259
+ # df = Polars::DataFrame.new(
260
+ # {
261
+ # "a" => [1, 2, 3],
262
+ # "b" => [4, 5, 6]
263
+ # }
264
+ # )
265
+ # df.select(Polars.cum_sum("a"))
266
+ # # =>
267
+ # # shape: (3, 1)
268
+ # # ┌─────┐
269
+ # # │ a │
270
+ # # │ --- │
271
+ # # │ i64 │
272
+ # # ╞═════╡
273
+ # # │ 1 │
274
+ # # │ 3 │
275
+ # # │ 6 │
276
+ # # └─────┘
277
+ def cum_sum(*names)
278
+ col(*names).cum_sum
279
+ end
280
+ alias_method :cumsum, :cum_sum
281
+ end
282
+ end