polars-df 0.10.0-x86_64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +175 -0
- data/Cargo.lock +2536 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +38726 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +98 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +72 -0
- data/lib/polars/cat_name_space.rb +125 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +93 -0
- data/lib/polars/data_frame.rb +5418 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1444 -0
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +31 -0
- data/lib/polars/expr.rb +6105 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +548 -0
- data/lib/polars/io.rb +890 -0
- data/lib/polars/lazy_frame.rb +2833 -0
- data/lib/polars/lazy_group_by.rb +84 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +445 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +37 -0
- data/lib/polars/series.rb +4527 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1519 -0
- data/lib/polars/string_name_space.rb +810 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +422 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +72 -0
- metadata +125 -0
@@ -0,0 +1,537 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for array related expressions.
|
3
|
+
class ArrayExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Compute the min values of the sub-arrays.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new(
|
18
|
+
# {"a" => [[1, 2], [4, 3]]},
|
19
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
20
|
+
# )
|
21
|
+
# df.select(Polars.col("a").arr.min)
|
22
|
+
# # =>
|
23
|
+
# # shape: (2, 1)
|
24
|
+
# # ┌─────┐
|
25
|
+
# # │ a │
|
26
|
+
# # │ --- │
|
27
|
+
# # │ i64 │
|
28
|
+
# # ╞═════╡
|
29
|
+
# # │ 1 │
|
30
|
+
# # │ 3 │
|
31
|
+
# # └─────┘
|
32
|
+
def min
|
33
|
+
Utils.wrap_expr(_rbexpr.array_min)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Compute the max values of the sub-arrays.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# df = Polars::DataFrame.new(
|
42
|
+
# {"a" => [[1, 2], [4, 3]]},
|
43
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("a").arr.max)
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ a │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ i64 │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ 2 │
|
54
|
+
# # │ 4 │
|
55
|
+
# # └─────┘
|
56
|
+
def max
|
57
|
+
Utils.wrap_expr(_rbexpr.array_max)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Compute the sum values of the sub-arrays.
|
61
|
+
#
|
62
|
+
# @return [Expr]
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# df = Polars::DataFrame.new(
|
66
|
+
# {"a" => [[1, 2], [4, 3]]},
|
67
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
68
|
+
# )
|
69
|
+
# df.select(Polars.col("a").arr.sum)
|
70
|
+
# # =>
|
71
|
+
# # shape: (2, 1)
|
72
|
+
# # ┌─────┐
|
73
|
+
# # │ a │
|
74
|
+
# # │ --- │
|
75
|
+
# # │ i64 │
|
76
|
+
# # ╞═════╡
|
77
|
+
# # │ 3 │
|
78
|
+
# # │ 7 │
|
79
|
+
# # └─────┘
|
80
|
+
def sum
|
81
|
+
Utils.wrap_expr(_rbexpr.array_sum)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Get the unique/distinct values in the array.
|
85
|
+
#
|
86
|
+
# @param maintain_order [Boolean]
|
87
|
+
# Maintain order of data. This requires more work.
|
88
|
+
#
|
89
|
+
# @return [Expr]
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# df = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "a" => [[1, 1, 2]]
|
95
|
+
# },
|
96
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
97
|
+
# )
|
98
|
+
# df.select(Polars.col("a").arr.unique)
|
99
|
+
# # =>
|
100
|
+
# # shape: (1, 1)
|
101
|
+
# # ┌───────────┐
|
102
|
+
# # │ a │
|
103
|
+
# # │ --- │
|
104
|
+
# # │ list[i64] │
|
105
|
+
# # ╞═══════════╡
|
106
|
+
# # │ [1, 2] │
|
107
|
+
# # └───────────┘
|
108
|
+
def unique(maintain_order: false)
|
109
|
+
Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Convert an Array column into a List column with the same inner data type.
|
113
|
+
#
|
114
|
+
# @return [Expr]
|
115
|
+
#
|
116
|
+
# @example
|
117
|
+
# df = Polars::DataFrame.new(
|
118
|
+
# {"a" => [[1, 2], [3, 4]]},
|
119
|
+
# schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
|
120
|
+
# )
|
121
|
+
# df.select(Polars.col("a").arr.to_list)
|
122
|
+
# # =>
|
123
|
+
# # shape: (2, 1)
|
124
|
+
# # ┌──────────┐
|
125
|
+
# # │ a │
|
126
|
+
# # │ --- │
|
127
|
+
# # │ list[i8] │
|
128
|
+
# # ╞══════════╡
|
129
|
+
# # │ [1, 2] │
|
130
|
+
# # │ [3, 4] │
|
131
|
+
# # └──────────┘
|
132
|
+
def to_list
|
133
|
+
Utils.wrap_expr(_rbexpr.arr_to_list)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Evaluate whether any boolean value is true for every subarray.
|
137
|
+
#
|
138
|
+
# @return [Expr]
|
139
|
+
#
|
140
|
+
# @example
|
141
|
+
# df = Polars::DataFrame.new(
|
142
|
+
# {
|
143
|
+
# "a": [
|
144
|
+
# [true, true],
|
145
|
+
# [false, true],
|
146
|
+
# [false, false],
|
147
|
+
# [nil, nil],
|
148
|
+
# nil
|
149
|
+
# ]
|
150
|
+
# },
|
151
|
+
# schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
|
152
|
+
# )
|
153
|
+
# df.with_columns(any: Polars.col("a").arr.any)
|
154
|
+
# # =>
|
155
|
+
# # shape: (5, 2)
|
156
|
+
# # ┌────────────────┬───────┐
|
157
|
+
# # │ a ┆ any │
|
158
|
+
# # │ --- ┆ --- │
|
159
|
+
# # │ array[bool, 2] ┆ bool │
|
160
|
+
# # ╞════════════════╪═══════╡
|
161
|
+
# # │ [true, true] ┆ true │
|
162
|
+
# # │ [false, true] ┆ true │
|
163
|
+
# # │ [false, false] ┆ false │
|
164
|
+
# # │ [null, null] ┆ false │
|
165
|
+
# # │ null ┆ null │
|
166
|
+
# # └────────────────┴───────┘
|
167
|
+
def any
|
168
|
+
Utils.wrap_expr(_rbexpr.arr_any)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Evaluate whether all boolean values are true for every subarray.
|
172
|
+
#
|
173
|
+
# @return [Expr]
|
174
|
+
#
|
175
|
+
# @example
|
176
|
+
# df = Polars::DataFrame.new(
|
177
|
+
# {
|
178
|
+
# "a": [
|
179
|
+
# [true, true],
|
180
|
+
# [false, true],
|
181
|
+
# [false, false],
|
182
|
+
# [nil, nil],
|
183
|
+
# nil
|
184
|
+
# ]
|
185
|
+
# },
|
186
|
+
# schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
|
187
|
+
# )
|
188
|
+
# df.with_columns(all: Polars.col("a").arr.all)
|
189
|
+
# # =>
|
190
|
+
# # shape: (5, 2)
|
191
|
+
# # ┌────────────────┬───────┐
|
192
|
+
# # │ a ┆ all │
|
193
|
+
# # │ --- ┆ --- │
|
194
|
+
# # │ array[bool, 2] ┆ bool │
|
195
|
+
# # ╞════════════════╪═══════╡
|
196
|
+
# # │ [true, true] ┆ true │
|
197
|
+
# # │ [false, true] ┆ false │
|
198
|
+
# # │ [false, false] ┆ false │
|
199
|
+
# # │ [null, null] ┆ true │
|
200
|
+
# # │ null ┆ null │
|
201
|
+
# # └────────────────┴───────┘
|
202
|
+
def all
|
203
|
+
Utils.wrap_expr(_rbexpr.arr_all)
|
204
|
+
end
|
205
|
+
|
206
|
+
# Sort the arrays in this column.
|
207
|
+
#
|
208
|
+
# @param descending [Boolean]
|
209
|
+
# Sort in descending order.
|
210
|
+
# @param nulls_last [Boolean]
|
211
|
+
# Place null values last.
|
212
|
+
#
|
213
|
+
# @return [Expr]
|
214
|
+
#
|
215
|
+
# @example
|
216
|
+
# df = Polars::DataFrame.new(
|
217
|
+
# {
|
218
|
+
# "a" => [[3, 2, 1], [9, 1, 2]],
|
219
|
+
# },
|
220
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
221
|
+
# )
|
222
|
+
# df.with_columns(sort: Polars.col("a").arr.sort)
|
223
|
+
# # =>
|
224
|
+
# # shape: (2, 2)
|
225
|
+
# # ┌───────────────┬───────────────┐
|
226
|
+
# # │ a ┆ sort │
|
227
|
+
# # │ --- ┆ --- │
|
228
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
229
|
+
# # ╞═══════════════╪═══════════════╡
|
230
|
+
# # │ [3, 2, 1] ┆ [1, 2, 3] │
|
231
|
+
# # │ [9, 1, 2] ┆ [1, 2, 9] │
|
232
|
+
# # └───────────────┴───────────────┘
|
233
|
+
#
|
234
|
+
# @example
|
235
|
+
# df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
|
236
|
+
# # =>
|
237
|
+
# # shape: (2, 2)
|
238
|
+
# # ┌───────────────┬───────────────┐
|
239
|
+
# # │ a ┆ sort │
|
240
|
+
# # │ --- ┆ --- │
|
241
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
242
|
+
# # ╞═══════════════╪═══════════════╡
|
243
|
+
# # │ [3, 2, 1] ┆ [3, 2, 1] │
|
244
|
+
# # │ [9, 1, 2] ┆ [9, 2, 1] │
|
245
|
+
# # └───────────────┴───────────────┘
|
246
|
+
def sort(descending: false, nulls_last: false)
|
247
|
+
Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
|
248
|
+
end
|
249
|
+
|
250
|
+
# Reverse the arrays in this column.
|
251
|
+
#
|
252
|
+
# @return [Expr]
|
253
|
+
#
|
254
|
+
# @example
|
255
|
+
# df = Polars::DataFrame.new(
|
256
|
+
# {
|
257
|
+
# "a" => [[3, 2, 1], [9, 1, 2]]
|
258
|
+
# },
|
259
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
260
|
+
# )
|
261
|
+
# df.with_columns(reverse: Polars.col("a").arr.reverse)
|
262
|
+
# # =>
|
263
|
+
# # shape: (2, 2)
|
264
|
+
# # ┌───────────────┬───────────────┐
|
265
|
+
# # │ a ┆ reverse │
|
266
|
+
# # │ --- ┆ --- │
|
267
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
268
|
+
# # ╞═══════════════╪═══════════════╡
|
269
|
+
# # │ [3, 2, 1] ┆ [1, 2, 3] │
|
270
|
+
# # │ [9, 1, 2] ┆ [2, 1, 9] │
|
271
|
+
# # └───────────────┴───────────────┘
|
272
|
+
def reverse
|
273
|
+
Utils.wrap_expr(_rbexpr.arr_reverse)
|
274
|
+
end
|
275
|
+
|
276
|
+
# Retrieve the index of the minimal value in every sub-array.
|
277
|
+
#
|
278
|
+
# @return [Expr]
|
279
|
+
#
|
280
|
+
# @example
|
281
|
+
# df = Polars::DataFrame.new(
|
282
|
+
# {
|
283
|
+
# "a" => [[1, 2], [2, 1]]
|
284
|
+
# },
|
285
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
286
|
+
# )
|
287
|
+
# df.with_columns(arg_min: Polars.col("a").arr.arg_min)
|
288
|
+
# # =>
|
289
|
+
# # shape: (2, 2)
|
290
|
+
# # ┌───────────────┬─────────┐
|
291
|
+
# # │ a ┆ arg_min │
|
292
|
+
# # │ --- ┆ --- │
|
293
|
+
# # │ array[i64, 2] ┆ u32 │
|
294
|
+
# # ╞═══════════════╪═════════╡
|
295
|
+
# # │ [1, 2] ┆ 0 │
|
296
|
+
# # │ [2, 1] ┆ 1 │
|
297
|
+
# # └───────────────┴─────────┘
|
298
|
+
def arg_min
|
299
|
+
Utils.wrap_expr(_rbexpr.arr_arg_min)
|
300
|
+
end
|
301
|
+
|
302
|
+
# Retrieve the index of the maximum value in every sub-array.
|
303
|
+
#
|
304
|
+
# @return [Expr]
|
305
|
+
#
|
306
|
+
# @example
|
307
|
+
# df = Polars::DataFrame.new(
|
308
|
+
# {
|
309
|
+
# "a" => [[1, 2], [2, 1]]
|
310
|
+
# },
|
311
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
312
|
+
# )
|
313
|
+
# df.with_columns(arg_max: Polars.col("a").arr.arg_max)
|
314
|
+
# # =>
|
315
|
+
# # shape: (2, 2)
|
316
|
+
# # ┌───────────────┬─────────┐
|
317
|
+
# # │ a ┆ arg_max │
|
318
|
+
# # │ --- ┆ --- │
|
319
|
+
# # │ array[i64, 2] ┆ u32 │
|
320
|
+
# # ╞═══════════════╪═════════╡
|
321
|
+
# # │ [1, 2] ┆ 1 │
|
322
|
+
# # │ [2, 1] ┆ 0 │
|
323
|
+
# # └───────────────┴─────────┘
|
324
|
+
def arg_max
|
325
|
+
Utils.wrap_expr(_rbexpr.arr_arg_max)
|
326
|
+
end
|
327
|
+
|
328
|
+
# Get the value by index in the sub-arrays.
|
329
|
+
#
|
330
|
+
# So index `0` would return the first item of every sublist
|
331
|
+
# and index `-1` would return the last item of every sublist
|
332
|
+
# if an index is out of bounds, it will return a `nil`.
|
333
|
+
#
|
334
|
+
# @param index [Integer]
|
335
|
+
# Index to return per sub-array
|
336
|
+
# @param null_on_oob [Boolean]
|
337
|
+
# Behavior if an index is out of bounds:
|
338
|
+
# true -> set as null
|
339
|
+
# false -> raise an error
|
340
|
+
#
|
341
|
+
# @return [Expr]
|
342
|
+
#
|
343
|
+
# @example
|
344
|
+
# df = Polars::DataFrame.new(
|
345
|
+
# {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
|
346
|
+
# schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
|
347
|
+
# )
|
348
|
+
# df.with_columns(get: Polars.col("arr").arr.get("idx"))
|
349
|
+
# # =>
|
350
|
+
# # shape: (3, 3)
|
351
|
+
# # ┌───────────────┬─────┬──────┐
|
352
|
+
# # │ arr ┆ idx ┆ get │
|
353
|
+
# # │ --- ┆ --- ┆ --- │
|
354
|
+
# # │ array[i32, 3] ┆ i32 ┆ i32 │
|
355
|
+
# # ╞═══════════════╪═════╪══════╡
|
356
|
+
# # │ [1, 2, 3] ┆ 1 ┆ 2 │
|
357
|
+
# # │ [4, 5, 6] ┆ -2 ┆ 5 │
|
358
|
+
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
|
+
# # └───────────────┴─────┴──────┘
|
360
|
+
def get(index, null_on_oob: true)
|
361
|
+
index = Utils.parse_as_expression(index)
|
362
|
+
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
|
+
end
|
364
|
+
|
365
|
+
# Get the first value of the sub-arrays.
|
366
|
+
#
|
367
|
+
# @return [Expr]
|
368
|
+
#
|
369
|
+
# @example
|
370
|
+
# df = Polars::DataFrame.new(
|
371
|
+
# {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
|
372
|
+
# schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
|
373
|
+
# )
|
374
|
+
# df.with_columns(first: Polars.col("a").arr.first)
|
375
|
+
# # =>
|
376
|
+
# # shape: (3, 2)
|
377
|
+
# # ┌───────────────┬───────┐
|
378
|
+
# # │ a ┆ first │
|
379
|
+
# # │ --- ┆ --- │
|
380
|
+
# # │ array[i32, 3] ┆ i32 │
|
381
|
+
# # ╞═══════════════╪═══════╡
|
382
|
+
# # │ [1, 2, 3] ┆ 1 │
|
383
|
+
# # │ [4, 5, 6] ┆ 4 │
|
384
|
+
# # │ [7, 8, 9] ┆ 7 │
|
385
|
+
# # └───────────────┴───────┘
|
386
|
+
def first
|
387
|
+
get(0)
|
388
|
+
end
|
389
|
+
|
390
|
+
# Get the last value of the sub-arrays.
|
391
|
+
#
|
392
|
+
# @return [Expr]
|
393
|
+
#
|
394
|
+
# @example
|
395
|
+
# df = Polars::DataFrame.new(
|
396
|
+
# {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
|
397
|
+
# schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
|
398
|
+
# )
|
399
|
+
# df.with_columns(last: Polars.col("a").arr.last)
|
400
|
+
# # =>
|
401
|
+
# # shape: (3, 2)
|
402
|
+
# # ┌───────────────┬──────┐
|
403
|
+
# # │ a ┆ last │
|
404
|
+
# # │ --- ┆ --- │
|
405
|
+
# # │ array[i32, 3] ┆ i32 │
|
406
|
+
# # ╞═══════════════╪══════╡
|
407
|
+
# # │ [1, 2, 3] ┆ 3 │
|
408
|
+
# # │ [4, 5, 6] ┆ 6 │
|
409
|
+
# # │ [7, 8, 9] ┆ 9 │
|
410
|
+
# # └───────────────┴──────┘
|
411
|
+
def last
|
412
|
+
get(-1)
|
413
|
+
end
|
414
|
+
|
415
|
+
# Join all string items in a sub-array and place a separator between them.
|
416
|
+
#
|
417
|
+
# This errors if inner type of array `!= String`.
|
418
|
+
#
|
419
|
+
# @param separator [String]
|
420
|
+
# string to separate the items with
|
421
|
+
# @param ignore_nulls [Boolean]
|
422
|
+
# Ignore null values (default).
|
423
|
+
#
|
424
|
+
# If set to `false`, null values will be propagated.
|
425
|
+
# If the sub-list contains any null values, the output is `nil`.
|
426
|
+
#
|
427
|
+
# @return [Expr]
|
428
|
+
#
|
429
|
+
# @example
|
430
|
+
# df = Polars::DataFrame.new(
|
431
|
+
# {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
|
432
|
+
# schema: {
|
433
|
+
# "s" => Polars::Array.new(Polars::String, 2),
|
434
|
+
# "separator" => Polars::String
|
435
|
+
# }
|
436
|
+
# )
|
437
|
+
# df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
|
438
|
+
# # =>
|
439
|
+
# # shape: (2, 3)
|
440
|
+
# # ┌───────────────┬───────────┬──────┐
|
441
|
+
# # │ s ┆ separator ┆ join │
|
442
|
+
# # │ --- ┆ --- ┆ --- │
|
443
|
+
# # │ array[str, 2] ┆ str ┆ str │
|
444
|
+
# # ╞═══════════════╪═══════════╪══════╡
|
445
|
+
# # │ ["a", "b"] ┆ * ┆ a*b │
|
446
|
+
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
|
+
# # └───────────────┴───────────┴──────┘
|
448
|
+
def join(separator, ignore_nulls: true)
|
449
|
+
separator = Utils.parse_as_expression(separator, str_as_lit: true)
|
450
|
+
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
|
+
end
|
452
|
+
|
453
|
+
# Returns a column with a separate row for every array element.
|
454
|
+
#
|
455
|
+
# @return [Expr]
|
456
|
+
#
|
457
|
+
# @example
|
458
|
+
# df = Polars::DataFrame.new(
|
459
|
+
# {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
460
|
+
# )
|
461
|
+
# df.select(Polars.col("a").arr.explode)
|
462
|
+
# # =>
|
463
|
+
# # shape: (6, 1)
|
464
|
+
# # ┌─────┐
|
465
|
+
# # │ a │
|
466
|
+
# # │ --- │
|
467
|
+
# # │ i64 │
|
468
|
+
# # ╞═════╡
|
469
|
+
# # │ 1 │
|
470
|
+
# # │ 2 │
|
471
|
+
# # │ 3 │
|
472
|
+
# # │ 4 │
|
473
|
+
# # │ 5 │
|
474
|
+
# # │ 6 │
|
475
|
+
# # └─────┘
|
476
|
+
def explode
|
477
|
+
Utils.wrap_expr(_rbexpr.explode)
|
478
|
+
end
|
479
|
+
|
480
|
+
# Check if sub-arrays contain the given item.
|
481
|
+
#
|
482
|
+
# @param item [Object]
|
483
|
+
# Item that will be checked for membership
|
484
|
+
#
|
485
|
+
# @return [Expr]
|
486
|
+
#
|
487
|
+
# @example
|
488
|
+
# df = Polars::DataFrame.new(
|
489
|
+
# {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
|
490
|
+
# schema: {"a" => Polars::Array.new(Polars::String, 2)}
|
491
|
+
# )
|
492
|
+
# df.with_columns(contains: Polars.col("a").arr.contains("a"))
|
493
|
+
# # =>
|
494
|
+
# # shape: (3, 2)
|
495
|
+
# # ┌───────────────┬──────────┐
|
496
|
+
# # │ a ┆ contains │
|
497
|
+
# # │ --- ┆ --- │
|
498
|
+
# # │ array[str, 2] ┆ bool │
|
499
|
+
# # ╞═══════════════╪══════════╡
|
500
|
+
# # │ ["a", "b"] ┆ true │
|
501
|
+
# # │ ["x", "y"] ┆ false │
|
502
|
+
# # │ ["a", "c"] ┆ true │
|
503
|
+
# # └───────────────┴──────────┘
|
504
|
+
def contains(item)
|
505
|
+
item = Utils.parse_as_expression(item, str_as_lit: true)
|
506
|
+
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
|
+
end
|
508
|
+
|
509
|
+
# Count how often the value produced by `element` occurs.
|
510
|
+
#
|
511
|
+
# @param element [Object]
|
512
|
+
# An expression that produces a single value
|
513
|
+
#
|
514
|
+
# @return [Expr]
|
515
|
+
#
|
516
|
+
# @example
|
517
|
+
# df = Polars::DataFrame.new(
|
518
|
+
# {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
519
|
+
# )
|
520
|
+
# df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
|
521
|
+
# # =>
|
522
|
+
# # shape: (3, 2)
|
523
|
+
# # ┌───────────────┬────────────────┐
|
524
|
+
# # │ a ┆ number_of_twos │
|
525
|
+
# # │ --- ┆ --- │
|
526
|
+
# # │ array[i64, 2] ┆ u32 │
|
527
|
+
# # ╞═══════════════╪════════════════╡
|
528
|
+
# # │ [1, 2] ┆ 1 │
|
529
|
+
# # │ [1, 1] ┆ 0 │
|
530
|
+
# # │ [2, 2] ┆ 2 │
|
531
|
+
# # └───────────────┴────────────────┘
|
532
|
+
def count_matches(element)
|
533
|
+
element = Utils.parse_as_expression(element, str_as_lit: true)
|
534
|
+
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
|
+
end
|
536
|
+
end
|
537
|
+
end
|