polars-df 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +15 -7
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
- data/ext/polars/src/dataframe.rs +69 -53
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +61 -33
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +59 -8
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
- data/ext/polars/src/lib.rs +226 -168
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +25 -4
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +35 -5
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -74,6 +74,24 @@ impl RbSeries {
|
|
74
74
|
}
|
75
75
|
}
|
76
76
|
|
77
|
+
pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
|
78
|
+
let binding = self.series.borrow();
|
79
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
80
|
+
Ok(ca.uses_lexical_ordering())
|
81
|
+
}
|
82
|
+
|
83
|
+
pub fn cat_is_local(&self) -> RbResult<bool> {
|
84
|
+
let binding = self.series.borrow();
|
85
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
86
|
+
Ok(ca.get_rev_map().is_local())
|
87
|
+
}
|
88
|
+
|
89
|
+
pub fn cat_to_local(&self) -> RbResult<Self> {
|
90
|
+
let binding = self.series.borrow();
|
91
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
92
|
+
Ok(ca.to_local().into_series().into())
|
93
|
+
}
|
94
|
+
|
77
95
|
pub fn estimated_size(&self) -> usize {
|
78
96
|
self.series.borrow().estimated_size()
|
79
97
|
}
|
@@ -215,8 +233,8 @@ impl RbSeries {
|
|
215
233
|
}
|
216
234
|
}
|
217
235
|
|
218
|
-
pub fn sort(&self,
|
219
|
-
(self.series.borrow_mut().sort(
|
236
|
+
pub fn sort(&self, descending: bool, nulls_last: bool) -> Self {
|
237
|
+
(self.series.borrow_mut().sort(descending, nulls_last)).into()
|
220
238
|
}
|
221
239
|
|
222
240
|
pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
|
@@ -313,7 +331,7 @@ impl RbSeries {
|
|
313
331
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
314
332
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
315
333
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
316
|
-
DataType::Categorical(_, _) => {
|
334
|
+
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
317
335
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
318
336
|
}
|
319
337
|
DataType::Object(_, _) => {
|
@@ -415,7 +433,10 @@ impl RbSeries {
|
|
415
433
|
RArray::from_iter(NullIter { iter, n }).into_value()
|
416
434
|
}
|
417
435
|
DataType::Unknown => {
|
418
|
-
panic!("to_a not implemented for
|
436
|
+
panic!("to_a not implemented for unknown")
|
437
|
+
}
|
438
|
+
DataType::BinaryOffset => {
|
439
|
+
unreachable!()
|
419
440
|
}
|
420
441
|
};
|
421
442
|
rblist
|
data/lib/polars/array_expr.rb
CHANGED
@@ -80,5 +80,454 @@ module Polars
|
|
80
80
|
def sum
|
81
81
|
Utils.wrap_expr(_rbexpr.array_sum)
|
82
82
|
end
|
83
|
+
|
84
|
+
# Get the unique/distinct values in the array.
|
85
|
+
#
|
86
|
+
# @param maintain_order [Boolean]
|
87
|
+
# Maintain order of data. This requires more work.
|
88
|
+
#
|
89
|
+
# @return [Expr]
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# df = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "a" => [[1, 1, 2]]
|
95
|
+
# },
|
96
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
97
|
+
# )
|
98
|
+
# df.select(Polars.col("a").arr.unique)
|
99
|
+
# # =>
|
100
|
+
# # shape: (1, 1)
|
101
|
+
# # ┌───────────┐
|
102
|
+
# # │ a │
|
103
|
+
# # │ --- │
|
104
|
+
# # │ list[i64] │
|
105
|
+
# # ╞═══════════╡
|
106
|
+
# # │ [1, 2] │
|
107
|
+
# # └───────────┘
|
108
|
+
def unique(maintain_order: false)
|
109
|
+
Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Convert an Array column into a List column with the same inner data type.
|
113
|
+
#
|
114
|
+
# @return [Expr]
|
115
|
+
#
|
116
|
+
# @example
|
117
|
+
# df = Polars::DataFrame.new(
|
118
|
+
# {"a" => [[1, 2], [3, 4]]},
|
119
|
+
# schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
|
120
|
+
# )
|
121
|
+
# df.select(Polars.col("a").arr.to_list)
|
122
|
+
# # =>
|
123
|
+
# # shape: (2, 1)
|
124
|
+
# # ┌──────────┐
|
125
|
+
# # │ a │
|
126
|
+
# # │ --- │
|
127
|
+
# # │ list[i8] │
|
128
|
+
# # ╞══════════╡
|
129
|
+
# # │ [1, 2] │
|
130
|
+
# # │ [3, 4] │
|
131
|
+
# # └──────────┘
|
132
|
+
def to_list
|
133
|
+
Utils.wrap_expr(_rbexpr.arr_to_list)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Evaluate whether any boolean value is true for every subarray.
|
137
|
+
#
|
138
|
+
# @return [Expr]
|
139
|
+
#
|
140
|
+
# @example
|
141
|
+
# df = Polars::DataFrame.new(
|
142
|
+
# {
|
143
|
+
# "a": [
|
144
|
+
# [true, true],
|
145
|
+
# [false, true],
|
146
|
+
# [false, false],
|
147
|
+
# [nil, nil],
|
148
|
+
# nil
|
149
|
+
# ]
|
150
|
+
# },
|
151
|
+
# schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
|
152
|
+
# )
|
153
|
+
# df.with_columns(any: Polars.col("a").arr.any)
|
154
|
+
# # =>
|
155
|
+
# # shape: (5, 2)
|
156
|
+
# # ┌────────────────┬───────┐
|
157
|
+
# # │ a ┆ any │
|
158
|
+
# # │ --- ┆ --- │
|
159
|
+
# # │ array[bool, 2] ┆ bool │
|
160
|
+
# # ╞════════════════╪═══════╡
|
161
|
+
# # │ [true, true] ┆ true │
|
162
|
+
# # │ [false, true] ┆ true │
|
163
|
+
# # │ [false, false] ┆ false │
|
164
|
+
# # │ [null, null] ┆ false │
|
165
|
+
# # │ null ┆ null │
|
166
|
+
# # └────────────────┴───────┘
|
167
|
+
def any
|
168
|
+
Utils.wrap_expr(_rbexpr.arr_any)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Evaluate whether all boolean values are true for every subarray.
|
172
|
+
#
|
173
|
+
# @return [Expr]
|
174
|
+
#
|
175
|
+
# @example
|
176
|
+
# df = Polars::DataFrame.new(
|
177
|
+
# {
|
178
|
+
# "a": [
|
179
|
+
# [true, true],
|
180
|
+
# [false, true],
|
181
|
+
# [false, false],
|
182
|
+
# [nil, nil],
|
183
|
+
# nil
|
184
|
+
# ]
|
185
|
+
# },
|
186
|
+
# schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
|
187
|
+
# )
|
188
|
+
# df.with_columns(all: Polars.col("a").arr.all)
|
189
|
+
# # =>
|
190
|
+
# # shape: (5, 2)
|
191
|
+
# # ┌────────────────┬───────┐
|
192
|
+
# # │ a ┆ all │
|
193
|
+
# # │ --- ┆ --- │
|
194
|
+
# # │ array[bool, 2] ┆ bool │
|
195
|
+
# # ╞════════════════╪═══════╡
|
196
|
+
# # │ [true, true] ┆ true │
|
197
|
+
# # │ [false, true] ┆ false │
|
198
|
+
# # │ [false, false] ┆ false │
|
199
|
+
# # │ [null, null] ┆ true │
|
200
|
+
# # │ null ┆ null │
|
201
|
+
# # └────────────────┴───────┘
|
202
|
+
def all
|
203
|
+
Utils.wrap_expr(_rbexpr.arr_all)
|
204
|
+
end
|
205
|
+
|
206
|
+
# Sort the arrays in this column.
|
207
|
+
#
|
208
|
+
# @param descending [Boolean]
|
209
|
+
# Sort in descending order.
|
210
|
+
# @param nulls_last [Boolean]
|
211
|
+
# Place null values last.
|
212
|
+
#
|
213
|
+
# @return [Expr]
|
214
|
+
#
|
215
|
+
# @example
|
216
|
+
# df = Polars::DataFrame.new(
|
217
|
+
# {
|
218
|
+
# "a" => [[3, 2, 1], [9, 1, 2]],
|
219
|
+
# },
|
220
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
221
|
+
# )
|
222
|
+
# df.with_columns(sort: Polars.col("a").arr.sort)
|
223
|
+
# # =>
|
224
|
+
# # shape: (2, 2)
|
225
|
+
# # ┌───────────────┬───────────────┐
|
226
|
+
# # │ a ┆ sort │
|
227
|
+
# # │ --- ┆ --- │
|
228
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
229
|
+
# # ╞═══════════════╪═══════════════╡
|
230
|
+
# # │ [3, 2, 1] ┆ [1, 2, 3] │
|
231
|
+
# # │ [9, 1, 2] ┆ [1, 2, 9] │
|
232
|
+
# # └───────────────┴───────────────┘
|
233
|
+
#
|
234
|
+
# @example
|
235
|
+
# df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
|
236
|
+
# # =>
|
237
|
+
# # shape: (2, 2)
|
238
|
+
# # ┌───────────────┬───────────────┐
|
239
|
+
# # │ a ┆ sort │
|
240
|
+
# # │ --- ┆ --- │
|
241
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
242
|
+
# # ╞═══════════════╪═══════════════╡
|
243
|
+
# # │ [3, 2, 1] ┆ [3, 2, 1] │
|
244
|
+
# # │ [9, 1, 2] ┆ [9, 2, 1] │
|
245
|
+
# # └───────────────┴───────────────┘
|
246
|
+
def sort(descending: false, nulls_last: false)
|
247
|
+
Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
|
248
|
+
end
|
249
|
+
|
250
|
+
# Reverse the arrays in this column.
|
251
|
+
#
|
252
|
+
# @return [Expr]
|
253
|
+
#
|
254
|
+
# @example
|
255
|
+
# df = Polars::DataFrame.new(
|
256
|
+
# {
|
257
|
+
# "a" => [[3, 2, 1], [9, 1, 2]]
|
258
|
+
# },
|
259
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
260
|
+
# )
|
261
|
+
# df.with_columns(reverse: Polars.col("a").arr.reverse)
|
262
|
+
# # =>
|
263
|
+
# # shape: (2, 2)
|
264
|
+
# # ┌───────────────┬───────────────┐
|
265
|
+
# # │ a ┆ reverse │
|
266
|
+
# # │ --- ┆ --- │
|
267
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
268
|
+
# # ╞═══════════════╪═══════════════╡
|
269
|
+
# # │ [3, 2, 1] ┆ [1, 2, 3] │
|
270
|
+
# # │ [9, 1, 2] ┆ [2, 1, 9] │
|
271
|
+
# # └───────────────┴───────────────┘
|
272
|
+
def reverse
|
273
|
+
Utils.wrap_expr(_rbexpr.arr_reverse)
|
274
|
+
end
|
275
|
+
|
276
|
+
# Retrieve the index of the minimal value in every sub-array.
|
277
|
+
#
|
278
|
+
# @return [Expr]
|
279
|
+
#
|
280
|
+
# @example
|
281
|
+
# df = Polars::DataFrame.new(
|
282
|
+
# {
|
283
|
+
# "a" => [[1, 2], [2, 1]]
|
284
|
+
# },
|
285
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
286
|
+
# )
|
287
|
+
# df.with_columns(arg_min: Polars.col("a").arr.arg_min)
|
288
|
+
# # =>
|
289
|
+
# # shape: (2, 2)
|
290
|
+
# # ┌───────────────┬─────────┐
|
291
|
+
# # │ a ┆ arg_min │
|
292
|
+
# # │ --- ┆ --- │
|
293
|
+
# # │ array[i64, 2] ┆ u32 │
|
294
|
+
# # ╞═══════════════╪═════════╡
|
295
|
+
# # │ [1, 2] ┆ 0 │
|
296
|
+
# # │ [2, 1] ┆ 1 │
|
297
|
+
# # └───────────────┴─────────┘
|
298
|
+
def arg_min
|
299
|
+
Utils.wrap_expr(_rbexpr.arr_arg_min)
|
300
|
+
end
|
301
|
+
|
302
|
+
# Retrieve the index of the maximum value in every sub-array.
|
303
|
+
#
|
304
|
+
# @return [Expr]
|
305
|
+
#
|
306
|
+
# @example
|
307
|
+
# df = Polars::DataFrame.new(
|
308
|
+
# {
|
309
|
+
# "a" => [[1, 2], [2, 1]]
|
310
|
+
# },
|
311
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
312
|
+
# )
|
313
|
+
# df.with_columns(arg_max: Polars.col("a").arr.arg_max)
|
314
|
+
# # =>
|
315
|
+
# # shape: (2, 2)
|
316
|
+
# # ┌───────────────┬─────────┐
|
317
|
+
# # │ a ┆ arg_max │
|
318
|
+
# # │ --- ┆ --- │
|
319
|
+
# # │ array[i64, 2] ┆ u32 │
|
320
|
+
# # ╞═══════════════╪═════════╡
|
321
|
+
# # │ [1, 2] ┆ 1 │
|
322
|
+
# # │ [2, 1] ┆ 0 │
|
323
|
+
# # └───────────────┴─────────┘
|
324
|
+
def arg_max
|
325
|
+
Utils.wrap_expr(_rbexpr.arr_arg_max)
|
326
|
+
end
|
327
|
+
|
328
|
+
# Get the value by index in the sub-arrays.
|
329
|
+
#
|
330
|
+
# So index `0` would return the first item of every sublist
|
331
|
+
# and index `-1` would return the last item of every sublist
|
332
|
+
# if an index is out of bounds, it will return a `nil`.
|
333
|
+
#
|
334
|
+
# @param index [Integer]
|
335
|
+
# Index to return per sub-array
|
336
|
+
#
|
337
|
+
# @return [Expr]
|
338
|
+
#
|
339
|
+
# @example
|
340
|
+
# df = Polars::DataFrame.new(
|
341
|
+
# {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
|
342
|
+
# schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
|
343
|
+
# )
|
344
|
+
# df.with_columns(get: Polars.col("arr").arr.get("idx"))
|
345
|
+
# # =>
|
346
|
+
# # shape: (3, 3)
|
347
|
+
# # ┌───────────────┬─────┬──────┐
|
348
|
+
# # │ arr ┆ idx ┆ get │
|
349
|
+
# # │ --- ┆ --- ┆ --- │
|
350
|
+
# # │ array[i32, 3] ┆ i32 ┆ i32 │
|
351
|
+
# # ╞═══════════════╪═════╪══════╡
|
352
|
+
# # │ [1, 2, 3] ┆ 1 ┆ 2 │
|
353
|
+
# # │ [4, 5, 6] ┆ -2 ┆ 5 │
|
354
|
+
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
355
|
+
# # └───────────────┴─────┴──────┘
|
356
|
+
def get(index)
|
357
|
+
index = Utils.parse_as_expression(index)
|
358
|
+
Utils.wrap_expr(_rbexpr.arr_get(index))
|
359
|
+
end
|
360
|
+
|
361
|
+
# Get the first value of the sub-arrays.
|
362
|
+
#
|
363
|
+
# @return [Expr]
|
364
|
+
#
|
365
|
+
# @example
|
366
|
+
# df = Polars::DataFrame.new(
|
367
|
+
# {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
|
368
|
+
# schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
|
369
|
+
# )
|
370
|
+
# df.with_columns(first: Polars.col("a").arr.first)
|
371
|
+
# # =>
|
372
|
+
# # shape: (3, 2)
|
373
|
+
# # ┌───────────────┬───────┐
|
374
|
+
# # │ a ┆ first │
|
375
|
+
# # │ --- ┆ --- │
|
376
|
+
# # │ array[i32, 3] ┆ i32 │
|
377
|
+
# # ╞═══════════════╪═══════╡
|
378
|
+
# # │ [1, 2, 3] ┆ 1 │
|
379
|
+
# # │ [4, 5, 6] ┆ 4 │
|
380
|
+
# # │ [7, 8, 9] ┆ 7 │
|
381
|
+
# # └───────────────┴───────┘
|
382
|
+
def first
|
383
|
+
get(0)
|
384
|
+
end
|
385
|
+
|
386
|
+
# Get the last value of the sub-arrays.
|
387
|
+
#
|
388
|
+
# @return [Expr]
|
389
|
+
#
|
390
|
+
# @example
|
391
|
+
# df = Polars::DataFrame.new(
|
392
|
+
# {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
|
393
|
+
# schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
|
394
|
+
# )
|
395
|
+
# df.with_columns(last: Polars.col("a").arr.last)
|
396
|
+
# # =>
|
397
|
+
# # shape: (3, 2)
|
398
|
+
# # ┌───────────────┬──────┐
|
399
|
+
# # │ a ┆ last │
|
400
|
+
# # │ --- ┆ --- │
|
401
|
+
# # │ array[i32, 3] ┆ i32 │
|
402
|
+
# # ╞═══════════════╪══════╡
|
403
|
+
# # │ [1, 2, 3] ┆ 3 │
|
404
|
+
# # │ [4, 5, 6] ┆ 6 │
|
405
|
+
# # │ [7, 8, 9] ┆ 9 │
|
406
|
+
# # └───────────────┴──────┘
|
407
|
+
def last
|
408
|
+
get(-1)
|
409
|
+
end
|
410
|
+
|
411
|
+
# Join all string items in a sub-array and place a separator between them.
|
412
|
+
#
|
413
|
+
# This errors if inner type of array `!= String`.
|
414
|
+
#
|
415
|
+
# @param separator [String]
|
416
|
+
# string to separate the items with
|
417
|
+
# @param ignore_nulls [Boolean]
|
418
|
+
# Ignore null values (default).
|
419
|
+
#
|
420
|
+
# If set to `false`, null values will be propagated.
|
421
|
+
# If the sub-list contains any null values, the output is `nil`.
|
422
|
+
#
|
423
|
+
# @return [Expr]
|
424
|
+
#
|
425
|
+
# @example
|
426
|
+
# df = Polars::DataFrame.new(
|
427
|
+
# {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
|
428
|
+
# schema: {
|
429
|
+
# "s" => Polars::Array.new(Polars::String, 2),
|
430
|
+
# "separator" => Polars::String
|
431
|
+
# }
|
432
|
+
# )
|
433
|
+
# df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
|
434
|
+
# # =>
|
435
|
+
# # shape: (2, 3)
|
436
|
+
# # ┌───────────────┬───────────┬──────┐
|
437
|
+
# # │ s ┆ separator ┆ join │
|
438
|
+
# # │ --- ┆ --- ┆ --- │
|
439
|
+
# # │ array[str, 2] ┆ str ┆ str │
|
440
|
+
# # ╞═══════════════╪═══════════╪══════╡
|
441
|
+
# # │ ["a", "b"] ┆ * ┆ a*b │
|
442
|
+
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
443
|
+
# # └───────────────┴───────────┴──────┘
|
444
|
+
def join(separator, ignore_nulls: true)
|
445
|
+
separator = Utils.parse_as_expression(separator, str_as_lit: true)
|
446
|
+
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
447
|
+
end
|
448
|
+
|
449
|
+
# Returns a column with a separate row for every array element.
|
450
|
+
#
|
451
|
+
# @return [Expr]
|
452
|
+
#
|
453
|
+
# @example
|
454
|
+
# df = Polars::DataFrame.new(
|
455
|
+
# {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
456
|
+
# )
|
457
|
+
# df.select(Polars.col("a").arr.explode)
|
458
|
+
# # =>
|
459
|
+
# # shape: (6, 1)
|
460
|
+
# # ┌─────┐
|
461
|
+
# # │ a │
|
462
|
+
# # │ --- │
|
463
|
+
# # │ i64 │
|
464
|
+
# # ╞═════╡
|
465
|
+
# # │ 1 │
|
466
|
+
# # │ 2 │
|
467
|
+
# # │ 3 │
|
468
|
+
# # │ 4 │
|
469
|
+
# # │ 5 │
|
470
|
+
# # │ 6 │
|
471
|
+
# # └─────┘
|
472
|
+
def explode
|
473
|
+
Utils.wrap_expr(_rbexpr.explode)
|
474
|
+
end
|
475
|
+
|
476
|
+
# Check if sub-arrays contain the given item.
|
477
|
+
#
|
478
|
+
# @param item [Object]
|
479
|
+
# Item that will be checked for membership
|
480
|
+
#
|
481
|
+
# @return [Expr]
|
482
|
+
#
|
483
|
+
# @example
|
484
|
+
# df = Polars::DataFrame.new(
|
485
|
+
# {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
|
486
|
+
# schema: {"a" => Polars::Array.new(Polars::String, 2)}
|
487
|
+
# )
|
488
|
+
# df.with_columns(contains: Polars.col("a").arr.contains("a"))
|
489
|
+
# # =>
|
490
|
+
# # shape: (3, 2)
|
491
|
+
# # ┌───────────────┬──────────┐
|
492
|
+
# # │ a ┆ contains │
|
493
|
+
# # │ --- ┆ --- │
|
494
|
+
# # │ array[str, 2] ┆ bool │
|
495
|
+
# # ╞═══════════════╪══════════╡
|
496
|
+
# # │ ["a", "b"] ┆ true │
|
497
|
+
# # │ ["x", "y"] ┆ false │
|
498
|
+
# # │ ["a", "c"] ┆ true │
|
499
|
+
# # └───────────────┴──────────┘
|
500
|
+
def contains(item)
|
501
|
+
item = Utils.parse_as_expression(item, str_as_lit: true)
|
502
|
+
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
503
|
+
end
|
504
|
+
|
505
|
+
# Count how often the value produced by `element` occurs.
|
506
|
+
#
|
507
|
+
# @param element [Object]
|
508
|
+
# An expression that produces a single value
|
509
|
+
#
|
510
|
+
# @return [Expr]
|
511
|
+
#
|
512
|
+
# @example
|
513
|
+
# df = Polars::DataFrame.new(
|
514
|
+
# {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
515
|
+
# )
|
516
|
+
# df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
|
517
|
+
# # =>
|
518
|
+
# # shape: (3, 2)
|
519
|
+
# # ┌───────────────┬────────────────┐
|
520
|
+
# # │ a ┆ number_of_twos │
|
521
|
+
# # │ --- ┆ --- │
|
522
|
+
# # │ array[i64, 2] ┆ u32 │
|
523
|
+
# # ╞═══════════════╪════════════════╡
|
524
|
+
# # │ [1, 2] ┆ 1 │
|
525
|
+
# # │ [1, 1] ┆ 0 │
|
526
|
+
# # │ [2, 2] ┆ 2 │
|
527
|
+
# # └───────────────┴────────────────┘
|
528
|
+
def count_matches(element)
|
529
|
+
element = Utils.parse_as_expression(element, str_as_lit: true)
|
530
|
+
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
531
|
+
end
|
83
532
|
end
|
84
533
|
end
|