polars-df 0.8.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +3 -2
  7. data/ext/polars/Cargo.toml +18 -8
  8. data/ext/polars/src/batched_csv.rs +7 -5
  9. data/ext/polars/src/conversion/anyvalue.rs +186 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
  12. data/ext/polars/src/dataframe.rs +108 -66
  13. data/ext/polars/src/expr/array.rs +78 -0
  14. data/ext/polars/src/expr/datetime.rs +29 -58
  15. data/ext/polars/src/expr/general.rs +83 -36
  16. data/ext/polars/src/expr/list.rs +58 -6
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +62 -11
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/file.rs +158 -11
  22. data/ext/polars/src/functions/aggregation.rs +6 -0
  23. data/ext/polars/src/functions/lazy.rs +120 -50
  24. data/ext/polars/src/functions/meta.rs +45 -1
  25. data/ext/polars/src/functions/string_cache.rs +14 -0
  26. data/ext/polars/src/functions/whenthen.rs +47 -17
  27. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
  28. data/ext/polars/src/lib.rs +246 -179
  29. data/ext/polars/src/map/dataframe.rs +17 -9
  30. data/ext/polars/src/series/aggregation.rs +20 -0
  31. data/ext/polars/src/series/mod.rs +35 -4
  32. data/lib/polars/array_expr.rb +453 -0
  33. data/lib/polars/array_name_space.rb +346 -0
  34. data/lib/polars/batched_csv_reader.rb +4 -2
  35. data/lib/polars/cat_expr.rb +24 -0
  36. data/lib/polars/cat_name_space.rb +75 -0
  37. data/lib/polars/config.rb +2 -2
  38. data/lib/polars/data_frame.rb +306 -96
  39. data/lib/polars/data_types.rb +191 -28
  40. data/lib/polars/date_time_expr.rb +41 -18
  41. data/lib/polars/date_time_name_space.rb +9 -3
  42. data/lib/polars/exceptions.rb +12 -1
  43. data/lib/polars/expr.rb +898 -215
  44. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  45. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  46. data/lib/polars/functions/as_datatype.rb +248 -0
  47. data/lib/polars/functions/col.rb +47 -0
  48. data/lib/polars/functions/eager.rb +182 -0
  49. data/lib/polars/functions/lazy.rb +1280 -0
  50. data/lib/polars/functions/len.rb +49 -0
  51. data/lib/polars/functions/lit.rb +35 -0
  52. data/lib/polars/functions/random.rb +16 -0
  53. data/lib/polars/functions/range/date_range.rb +103 -0
  54. data/lib/polars/functions/range/int_range.rb +51 -0
  55. data/lib/polars/functions/repeat.rb +144 -0
  56. data/lib/polars/functions/whenthen.rb +96 -0
  57. data/lib/polars/functions.rb +29 -416
  58. data/lib/polars/group_by.rb +2 -2
  59. data/lib/polars/io.rb +36 -31
  60. data/lib/polars/lazy_frame.rb +405 -88
  61. data/lib/polars/list_expr.rb +158 -8
  62. data/lib/polars/list_name_space.rb +102 -0
  63. data/lib/polars/meta_expr.rb +175 -7
  64. data/lib/polars/series.rb +282 -41
  65. data/lib/polars/string_cache.rb +75 -0
  66. data/lib/polars/string_expr.rb +413 -96
  67. data/lib/polars/string_name_space.rb +4 -4
  68. data/lib/polars/testing.rb +507 -0
  69. data/lib/polars/utils.rb +106 -8
  70. data/lib/polars/version.rb +1 -1
  71. data/lib/polars/whenthen.rb +83 -0
  72. data/lib/polars.rb +16 -4
  73. metadata +37 -8
  74. data/lib/polars/lazy_functions.rb +0 -1181
  75. data/lib/polars/when.rb +0 -16
  76. data/lib/polars/when_then.rb +0 -19
@@ -265,9 +265,9 @@ pub fn apply_lambda_with_rows_output<'a>(
265
265
  // to the row. Before we mutate the row buf again, the reference is dropped.
266
266
  // we only cannot prove it to the compiler.
267
267
  // we still do this because it saves a Vec allocation in a hot loop.
268
- unsafe { &*ptr }
268
+ Ok(unsafe { &*ptr })
269
269
  }
270
- None => &null_row,
270
+ None => Ok(&null_row),
271
271
  }
272
272
  }
273
273
  Err(e) => panic!("ruby function failed {}", e),
@@ -277,22 +277,30 @@ pub fn apply_lambda_with_rows_output<'a>(
277
277
  // first rows for schema inference
278
278
  let mut buf = Vec::with_capacity(inference_size);
279
279
  buf.push(first_value);
280
- buf.extend((&mut row_iter).take(inference_size).cloned());
281
- let schema = rows_to_schema_first_non_null(&buf, Some(50));
280
+ for v in (&mut row_iter).take(inference_size) {
281
+ buf.push(v?.clone());
282
+ }
283
+
284
+ let schema = rows_to_schema_first_non_null(&buf, Some(50))?;
282
285
 
283
286
  if init_null_count > 0 {
284
287
  // Safety: we know the iterators size
285
288
  let iter = unsafe {
286
289
  (0..init_null_count)
287
- .map(|_| &null_row)
288
- .chain(buf.iter())
290
+ .map(|_| Ok(&null_row))
291
+ .chain(buf.iter().map(Ok))
289
292
  .chain(row_iter)
290
293
  .trust_my_length(df.height())
291
294
  };
292
- DataFrame::from_rows_iter_and_schema(iter, &schema)
295
+ DataFrame::try_from_rows_iter_and_schema(iter, &schema)
293
296
  } else {
294
297
  // Safety: we know the iterators size
295
- let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
296
- DataFrame::from_rows_iter_and_schema(iter, &schema)
298
+ let iter = unsafe {
299
+ buf.iter()
300
+ .map(Ok)
301
+ .chain(row_iter)
302
+ .trust_my_length(df.height())
303
+ };
304
+ DataFrame::try_from_rows_iter_and_schema(iter, &schema)
297
305
  }
298
306
  }
@@ -4,6 +4,26 @@ use crate::{RbResult, RbSeries, RbValueError};
4
4
  use magnus::{IntoValue, Value};
5
5
 
6
6
  impl RbSeries {
7
+ pub fn any(&self, ignore_nulls: bool) -> RbResult<Option<bool>> {
8
+ let binding = self.series.borrow();
9
+ let s = binding.bool().map_err(RbPolarsErr::from)?;
10
+ Ok(if ignore_nulls {
11
+ Some(s.any())
12
+ } else {
13
+ s.any_kleene()
14
+ })
15
+ }
16
+
17
+ pub fn all(&self, ignore_nulls: bool) -> RbResult<Option<bool>> {
18
+ let binding = self.series.borrow();
19
+ let s = binding.bool().map_err(RbPolarsErr::from)?;
20
+ Ok(if ignore_nulls {
21
+ Some(s.all())
22
+ } else {
23
+ s.all_kleene()
24
+ })
25
+ }
26
+
7
27
  pub fn arg_max(&self) -> Option<usize> {
8
28
  self.series.borrow().arg_max()
9
29
  }
@@ -74,6 +74,24 @@ impl RbSeries {
74
74
  }
75
75
  }
76
76
 
77
+ pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
78
+ let binding = self.series.borrow();
79
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
80
+ Ok(ca.uses_lexical_ordering())
81
+ }
82
+
83
+ pub fn cat_is_local(&self) -> RbResult<bool> {
84
+ let binding = self.series.borrow();
85
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
86
+ Ok(ca.get_rev_map().is_local())
87
+ }
88
+
89
+ pub fn cat_to_local(&self) -> RbResult<Self> {
90
+ let binding = self.series.borrow();
91
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
92
+ Ok(ca.to_local().into_series().into())
93
+ }
94
+
77
95
  pub fn estimated_size(&self) -> usize {
78
96
  self.series.borrow().estimated_size()
79
97
  }
@@ -215,8 +233,18 @@ impl RbSeries {
215
233
  }
216
234
  }
217
235
 
218
- pub fn sort(&self, reverse: bool) -> Self {
219
- (self.series.borrow_mut().sort(reverse)).into()
236
+ pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
237
+ Ok(self
238
+ .series
239
+ .borrow_mut()
240
+ .sort(
241
+ SortOptions::default()
242
+ .with_order_descending(descending)
243
+ .with_nulls_last(nulls_last)
244
+ .with_multithreaded(multithreaded),
245
+ )
246
+ .map_err(RbPolarsErr::from)?
247
+ .into())
220
248
  }
221
249
 
222
250
  pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
@@ -313,7 +341,7 @@ impl RbSeries {
313
341
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
314
342
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
315
343
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
316
- DataType::Categorical(_, _) => {
344
+ DataType::Categorical(_, _) | DataType::Enum(_, _) => {
317
345
  RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
318
346
  }
319
347
  DataType::Object(_, _) => {
@@ -415,7 +443,10 @@ impl RbSeries {
415
443
  RArray::from_iter(NullIter { iter, n }).into_value()
416
444
  }
417
445
  DataType::Unknown => {
418
- panic!("to_a not implemented for null/unknown")
446
+ panic!("to_a not implemented for unknown")
447
+ }
448
+ DataType::BinaryOffset => {
449
+ unreachable!()
419
450
  }
420
451
  };
421
452
  rblist
@@ -80,5 +80,458 @@ module Polars
80
80
  def sum
81
81
  Utils.wrap_expr(_rbexpr.array_sum)
82
82
  end
83
+
84
+ # Get the unique/distinct values in the array.
85
+ #
86
+ # @param maintain_order [Boolean]
87
+ # Maintain order of data. This requires more work.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [[1, 1, 2]]
95
+ # },
96
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
97
+ # )
98
+ # df.select(Polars.col("a").arr.unique)
99
+ # # =>
100
+ # # shape: (1, 1)
101
+ # # ┌───────────┐
102
+ # # │ a │
103
+ # # │ --- │
104
+ # # │ list[i64] │
105
+ # # ╞═══════════╡
106
+ # # │ [1, 2] │
107
+ # # └───────────┘
108
+ def unique(maintain_order: false)
109
+ Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
110
+ end
111
+
112
+ # Convert an Array column into a List column with the same inner data type.
113
+ #
114
+ # @return [Expr]
115
+ #
116
+ # @example
117
+ # df = Polars::DataFrame.new(
118
+ # {"a" => [[1, 2], [3, 4]]},
119
+ # schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
120
+ # )
121
+ # df.select(Polars.col("a").arr.to_list)
122
+ # # =>
123
+ # # shape: (2, 1)
124
+ # # ┌──────────┐
125
+ # # │ a │
126
+ # # │ --- │
127
+ # # │ list[i8] │
128
+ # # ╞══════════╡
129
+ # # │ [1, 2] │
130
+ # # │ [3, 4] │
131
+ # # └──────────┘
132
+ def to_list
133
+ Utils.wrap_expr(_rbexpr.arr_to_list)
134
+ end
135
+
136
+ # Evaluate whether any boolean value is true for every subarray.
137
+ #
138
+ # @return [Expr]
139
+ #
140
+ # @example
141
+ # df = Polars::DataFrame.new(
142
+ # {
143
+ # "a": [
144
+ # [true, true],
145
+ # [false, true],
146
+ # [false, false],
147
+ # [nil, nil],
148
+ # nil
149
+ # ]
150
+ # },
151
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
152
+ # )
153
+ # df.with_columns(any: Polars.col("a").arr.any)
154
+ # # =>
155
+ # # shape: (5, 2)
156
+ # # ┌────────────────┬───────┐
157
+ # # │ a ┆ any │
158
+ # # │ --- ┆ --- │
159
+ # # │ array[bool, 2] ┆ bool │
160
+ # # ╞════════════════╪═══════╡
161
+ # # │ [true, true] ┆ true │
162
+ # # │ [false, true] ┆ true │
163
+ # # │ [false, false] ┆ false │
164
+ # # │ [null, null] ┆ false │
165
+ # # │ null ┆ null │
166
+ # # └────────────────┴───────┘
167
+ def any
168
+ Utils.wrap_expr(_rbexpr.arr_any)
169
+ end
170
+
171
+ # Evaluate whether all boolean values are true for every subarray.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "a": [
179
+ # [true, true],
180
+ # [false, true],
181
+ # [false, false],
182
+ # [nil, nil],
183
+ # nil
184
+ # ]
185
+ # },
186
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
187
+ # )
188
+ # df.with_columns(all: Polars.col("a").arr.all)
189
+ # # =>
190
+ # # shape: (5, 2)
191
+ # # ┌────────────────┬───────┐
192
+ # # │ a ┆ all │
193
+ # # │ --- ┆ --- │
194
+ # # │ array[bool, 2] ┆ bool │
195
+ # # ╞════════════════╪═══════╡
196
+ # # │ [true, true] ┆ true │
197
+ # # │ [false, true] ┆ false │
198
+ # # │ [false, false] ┆ false │
199
+ # # │ [null, null] ┆ true │
200
+ # # │ null ┆ null │
201
+ # # └────────────────┴───────┘
202
+ def all
203
+ Utils.wrap_expr(_rbexpr.arr_all)
204
+ end
205
+
206
+ # Sort the arrays in this column.
207
+ #
208
+ # @param descending [Boolean]
209
+ # Sort in descending order.
210
+ # @param nulls_last [Boolean]
211
+ # Place null values last.
212
+ #
213
+ # @return [Expr]
214
+ #
215
+ # @example
216
+ # df = Polars::DataFrame.new(
217
+ # {
218
+ # "a" => [[3, 2, 1], [9, 1, 2]],
219
+ # },
220
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
221
+ # )
222
+ # df.with_columns(sort: Polars.col("a").arr.sort)
223
+ # # =>
224
+ # # shape: (2, 2)
225
+ # # ┌───────────────┬───────────────┐
226
+ # # │ a ┆ sort │
227
+ # # │ --- ┆ --- │
228
+ # # │ array[i64, 3] ┆ array[i64, 3] │
229
+ # # ╞═══════════════╪═══════════════╡
230
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
231
+ # # │ [9, 1, 2] ┆ [1, 2, 9] │
232
+ # # └───────────────┴───────────────┘
233
+ #
234
+ # @example
235
+ # df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
236
+ # # =>
237
+ # # shape: (2, 2)
238
+ # # ┌───────────────┬───────────────┐
239
+ # # │ a ┆ sort │
240
+ # # │ --- ┆ --- │
241
+ # # │ array[i64, 3] ┆ array[i64, 3] │
242
+ # # ╞═══════════════╪═══════════════╡
243
+ # # │ [3, 2, 1] ┆ [3, 2, 1] │
244
+ # # │ [9, 1, 2] ┆ [9, 2, 1] │
245
+ # # └───────────────┴───────────────┘
246
+ def sort(descending: false, nulls_last: false)
247
+ Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
248
+ end
249
+
250
+ # Reverse the arrays in this column.
251
+ #
252
+ # @return [Expr]
253
+ #
254
+ # @example
255
+ # df = Polars::DataFrame.new(
256
+ # {
257
+ # "a" => [[3, 2, 1], [9, 1, 2]]
258
+ # },
259
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
260
+ # )
261
+ # df.with_columns(reverse: Polars.col("a").arr.reverse)
262
+ # # =>
263
+ # # shape: (2, 2)
264
+ # # ┌───────────────┬───────────────┐
265
+ # # │ a ┆ reverse │
266
+ # # │ --- ┆ --- │
267
+ # # │ array[i64, 3] ┆ array[i64, 3] │
268
+ # # ╞═══════════════╪═══════════════╡
269
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
270
+ # # │ [9, 1, 2] ┆ [2, 1, 9] │
271
+ # # └───────────────┴───────────────┘
272
+ def reverse
273
+ Utils.wrap_expr(_rbexpr.arr_reverse)
274
+ end
275
+
276
+ # Retrieve the index of the minimal value in every sub-array.
277
+ #
278
+ # @return [Expr]
279
+ #
280
+ # @example
281
+ # df = Polars::DataFrame.new(
282
+ # {
283
+ # "a" => [[1, 2], [2, 1]]
284
+ # },
285
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
286
+ # )
287
+ # df.with_columns(arg_min: Polars.col("a").arr.arg_min)
288
+ # # =>
289
+ # # shape: (2, 2)
290
+ # # ┌───────────────┬─────────┐
291
+ # # │ a ┆ arg_min │
292
+ # # │ --- ┆ --- │
293
+ # # │ array[i64, 2] ┆ u32 │
294
+ # # ╞═══════════════╪═════════╡
295
+ # # │ [1, 2] ┆ 0 │
296
+ # # │ [2, 1] ┆ 1 │
297
+ # # └───────────────┴─────────┘
298
+ def arg_min
299
+ Utils.wrap_expr(_rbexpr.arr_arg_min)
300
+ end
301
+
302
+ # Retrieve the index of the maximum value in every sub-array.
303
+ #
304
+ # @return [Expr]
305
+ #
306
+ # @example
307
+ # df = Polars::DataFrame.new(
308
+ # {
309
+ # "a" => [[1, 2], [2, 1]]
310
+ # },
311
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
312
+ # )
313
+ # df.with_columns(arg_max: Polars.col("a").arr.arg_max)
314
+ # # =>
315
+ # # shape: (2, 2)
316
+ # # ┌───────────────┬─────────┐
317
+ # # │ a ┆ arg_max │
318
+ # # │ --- ┆ --- │
319
+ # # │ array[i64, 2] ┆ u32 │
320
+ # # ╞═══════════════╪═════════╡
321
+ # # │ [1, 2] ┆ 1 │
322
+ # # │ [2, 1] ┆ 0 │
323
+ # # └───────────────┴─────────┘
324
+ def arg_max
325
+ Utils.wrap_expr(_rbexpr.arr_arg_max)
326
+ end
327
+
328
+ # Get the value by index in the sub-arrays.
329
+ #
330
+ # So index `0` would return the first item of every sublist
331
+ # and index `-1` would return the last item of every sublist
332
+ # if an index is out of bounds, it will return a `nil`.
333
+ #
334
+ # @param index [Integer]
335
+ # Index to return per sub-array
336
+ # @param null_on_oob [Boolean]
337
+ # Behavior if an index is out of bounds:
338
+ # true -> set as null
339
+ # false -> raise an error
340
+ #
341
+ # @return [Expr]
342
+ #
343
+ # @example
344
+ # df = Polars::DataFrame.new(
345
+ # {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
346
+ # schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
347
+ # )
348
+ # df.with_columns(get: Polars.col("arr").arr.get("idx"))
349
+ # # =>
350
+ # # shape: (3, 3)
351
+ # # ┌───────────────┬─────┬──────┐
352
+ # # │ arr ┆ idx ┆ get │
353
+ # # │ --- ┆ --- ┆ --- │
354
+ # # │ array[i32, 3] ┆ i32 ┆ i32 │
355
+ # # ╞═══════════════╪═════╪══════╡
356
+ # # │ [1, 2, 3] ┆ 1 ┆ 2 │
357
+ # # │ [4, 5, 6] ┆ -2 ┆ 5 │
358
+ # # │ [7, 8, 9] ┆ 4 ┆ null │
359
+ # # └───────────────┴─────┴──────┘
360
+ def get(index, null_on_oob: true)
361
+ index = Utils.parse_as_expression(index)
362
+ Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
363
+ end
364
+
365
+ # Get the first value of the sub-arrays.
366
+ #
367
+ # @return [Expr]
368
+ #
369
+ # @example
370
+ # df = Polars::DataFrame.new(
371
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
372
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
373
+ # )
374
+ # df.with_columns(first: Polars.col("a").arr.first)
375
+ # # =>
376
+ # # shape: (3, 2)
377
+ # # ┌───────────────┬───────┐
378
+ # # │ a ┆ first │
379
+ # # │ --- ┆ --- │
380
+ # # │ array[i32, 3] ┆ i32 │
381
+ # # ╞═══════════════╪═══════╡
382
+ # # │ [1, 2, 3] ┆ 1 │
383
+ # # │ [4, 5, 6] ┆ 4 │
384
+ # # │ [7, 8, 9] ┆ 7 │
385
+ # # └───────────────┴───────┘
386
+ def first
387
+ get(0)
388
+ end
389
+
390
+ # Get the last value of the sub-arrays.
391
+ #
392
+ # @return [Expr]
393
+ #
394
+ # @example
395
+ # df = Polars::DataFrame.new(
396
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
397
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
398
+ # )
399
+ # df.with_columns(last: Polars.col("a").arr.last)
400
+ # # =>
401
+ # # shape: (3, 2)
402
+ # # ┌───────────────┬──────┐
403
+ # # │ a ┆ last │
404
+ # # │ --- ┆ --- │
405
+ # # │ array[i32, 3] ┆ i32 │
406
+ # # ╞═══════════════╪══════╡
407
+ # # │ [1, 2, 3] ┆ 3 │
408
+ # # │ [4, 5, 6] ┆ 6 │
409
+ # # │ [7, 8, 9] ┆ 9 │
410
+ # # └───────────────┴──────┘
411
+ def last
412
+ get(-1)
413
+ end
414
+
415
+ # Join all string items in a sub-array and place a separator between them.
416
+ #
417
+ # This errors if inner type of array `!= String`.
418
+ #
419
+ # @param separator [String]
420
+ # string to separate the items with
421
+ # @param ignore_nulls [Boolean]
422
+ # Ignore null values (default).
423
+ #
424
+ # If set to `false`, null values will be propagated.
425
+ # If the sub-list contains any null values, the output is `nil`.
426
+ #
427
+ # @return [Expr]
428
+ #
429
+ # @example
430
+ # df = Polars::DataFrame.new(
431
+ # {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
432
+ # schema: {
433
+ # "s" => Polars::Array.new(Polars::String, 2),
434
+ # "separator" => Polars::String
435
+ # }
436
+ # )
437
+ # df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
438
+ # # =>
439
+ # # shape: (2, 3)
440
+ # # ┌───────────────┬───────────┬──────┐
441
+ # # │ s ┆ separator ┆ join │
442
+ # # │ --- ┆ --- ┆ --- │
443
+ # # │ array[str, 2] ┆ str ┆ str │
444
+ # # ╞═══════════════╪═══════════╪══════╡
445
+ # # │ ["a", "b"] ┆ * ┆ a*b │
446
+ # # │ ["x", "y"] ┆ _ ┆ x_y │
447
+ # # └───────────────┴───────────┴──────┘
448
+ def join(separator, ignore_nulls: true)
449
+ separator = Utils.parse_as_expression(separator, str_as_lit: true)
450
+ Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
451
+ end
452
+
453
+ # Returns a column with a separate row for every array element.
454
+ #
455
+ # @return [Expr]
456
+ #
457
+ # @example
458
+ # df = Polars::DataFrame.new(
459
+ # {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
460
+ # )
461
+ # df.select(Polars.col("a").arr.explode)
462
+ # # =>
463
+ # # shape: (6, 1)
464
+ # # ┌─────┐
465
+ # # │ a │
466
+ # # │ --- │
467
+ # # │ i64 │
468
+ # # ╞═════╡
469
+ # # │ 1 │
470
+ # # │ 2 │
471
+ # # │ 3 │
472
+ # # │ 4 │
473
+ # # │ 5 │
474
+ # # │ 6 │
475
+ # # └─────┘
476
+ def explode
477
+ Utils.wrap_expr(_rbexpr.explode)
478
+ end
479
+
480
+ # Check if sub-arrays contain the given item.
481
+ #
482
+ # @param item [Object]
483
+ # Item that will be checked for membership
484
+ #
485
+ # @return [Expr]
486
+ #
487
+ # @example
488
+ # df = Polars::DataFrame.new(
489
+ # {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
490
+ # schema: {"a" => Polars::Array.new(Polars::String, 2)}
491
+ # )
492
+ # df.with_columns(contains: Polars.col("a").arr.contains("a"))
493
+ # # =>
494
+ # # shape: (3, 2)
495
+ # # ┌───────────────┬──────────┐
496
+ # # │ a ┆ contains │
497
+ # # │ --- ┆ --- │
498
+ # # │ array[str, 2] ┆ bool │
499
+ # # ╞═══════════════╪══════════╡
500
+ # # │ ["a", "b"] ┆ true │
501
+ # # │ ["x", "y"] ┆ false │
502
+ # # │ ["a", "c"] ┆ true │
503
+ # # └───────────────┴──────────┘
504
+ def contains(item)
505
+ item = Utils.parse_as_expression(item, str_as_lit: true)
506
+ Utils.wrap_expr(_rbexpr.arr_contains(item))
507
+ end
508
+
509
+ # Count how often the value produced by `element` occurs.
510
+ #
511
+ # @param element [Object]
512
+ # An expression that produces a single value
513
+ #
514
+ # @return [Expr]
515
+ #
516
+ # @example
517
+ # df = Polars::DataFrame.new(
518
+ # {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
519
+ # )
520
+ # df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
521
+ # # =>
522
+ # # shape: (3, 2)
523
+ # # ┌───────────────┬────────────────┐
524
+ # # │ a ┆ number_of_twos │
525
+ # # │ --- ┆ --- │
526
+ # # │ array[i64, 2] ┆ u32 │
527
+ # # ╞═══════════════╪════════════════╡
528
+ # # │ [1, 2] ┆ 1 │
529
+ # # │ [1, 1] ┆ 0 │
530
+ # # │ [2, 2] ┆ 2 │
531
+ # # └───────────────┴────────────────┘
532
+ def count_matches(element)
533
+ element = Utils.parse_as_expression(element, str_as_lit: true)
534
+ Utils.wrap_expr(_rbexpr.arr_count_matches(element))
535
+ end
83
536
  end
84
537
  end