polars-df 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +3 -2
  7. data/ext/polars/Cargo.toml +18 -8
  8. data/ext/polars/src/batched_csv.rs +7 -5
  9. data/ext/polars/src/conversion/anyvalue.rs +186 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
  12. data/ext/polars/src/dataframe.rs +108 -66
  13. data/ext/polars/src/expr/array.rs +78 -0
  14. data/ext/polars/src/expr/datetime.rs +29 -58
  15. data/ext/polars/src/expr/general.rs +83 -36
  16. data/ext/polars/src/expr/list.rs +58 -6
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +62 -11
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/file.rs +158 -11
  22. data/ext/polars/src/functions/aggregation.rs +6 -0
  23. data/ext/polars/src/functions/lazy.rs +120 -50
  24. data/ext/polars/src/functions/meta.rs +45 -1
  25. data/ext/polars/src/functions/string_cache.rs +14 -0
  26. data/ext/polars/src/functions/whenthen.rs +47 -17
  27. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
  28. data/ext/polars/src/lib.rs +246 -179
  29. data/ext/polars/src/map/dataframe.rs +17 -9
  30. data/ext/polars/src/series/aggregation.rs +20 -0
  31. data/ext/polars/src/series/mod.rs +35 -4
  32. data/lib/polars/array_expr.rb +453 -0
  33. data/lib/polars/array_name_space.rb +346 -0
  34. data/lib/polars/batched_csv_reader.rb +4 -2
  35. data/lib/polars/cat_expr.rb +24 -0
  36. data/lib/polars/cat_name_space.rb +75 -0
  37. data/lib/polars/config.rb +2 -2
  38. data/lib/polars/data_frame.rb +306 -96
  39. data/lib/polars/data_types.rb +191 -28
  40. data/lib/polars/date_time_expr.rb +41 -18
  41. data/lib/polars/date_time_name_space.rb +9 -3
  42. data/lib/polars/exceptions.rb +12 -1
  43. data/lib/polars/expr.rb +898 -215
  44. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  45. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  46. data/lib/polars/functions/as_datatype.rb +248 -0
  47. data/lib/polars/functions/col.rb +47 -0
  48. data/lib/polars/functions/eager.rb +182 -0
  49. data/lib/polars/functions/lazy.rb +1280 -0
  50. data/lib/polars/functions/len.rb +49 -0
  51. data/lib/polars/functions/lit.rb +35 -0
  52. data/lib/polars/functions/random.rb +16 -0
  53. data/lib/polars/functions/range/date_range.rb +103 -0
  54. data/lib/polars/functions/range/int_range.rb +51 -0
  55. data/lib/polars/functions/repeat.rb +144 -0
  56. data/lib/polars/functions/whenthen.rb +96 -0
  57. data/lib/polars/functions.rb +29 -416
  58. data/lib/polars/group_by.rb +2 -2
  59. data/lib/polars/io.rb +36 -31
  60. data/lib/polars/lazy_frame.rb +405 -88
  61. data/lib/polars/list_expr.rb +158 -8
  62. data/lib/polars/list_name_space.rb +102 -0
  63. data/lib/polars/meta_expr.rb +175 -7
  64. data/lib/polars/series.rb +282 -41
  65. data/lib/polars/string_cache.rb +75 -0
  66. data/lib/polars/string_expr.rb +413 -96
  67. data/lib/polars/string_name_space.rb +4 -4
  68. data/lib/polars/testing.rb +507 -0
  69. data/lib/polars/utils.rb +106 -8
  70. data/lib/polars/version.rb +1 -1
  71. data/lib/polars/whenthen.rb +83 -0
  72. data/lib/polars.rb +16 -4
  73. metadata +37 -8
  74. data/lib/polars/lazy_functions.rb +0 -1181
  75. data/lib/polars/when.rb +0 -16
  76. data/lib/polars/when_then.rb +0 -19
@@ -265,9 +265,9 @@ pub fn apply_lambda_with_rows_output<'a>(
265
265
  // to the row. Before we mutate the row buf again, the reference is dropped.
266
266
  // we only cannot prove it to the compiler.
267
267
  // we still do this because it saves a Vec allocation in a hot loop.
268
- unsafe { &*ptr }
268
+ Ok(unsafe { &*ptr })
269
269
  }
270
- None => &null_row,
270
+ None => Ok(&null_row),
271
271
  }
272
272
  }
273
273
  Err(e) => panic!("ruby function failed {}", e),
@@ -277,22 +277,30 @@ pub fn apply_lambda_with_rows_output<'a>(
277
277
  // first rows for schema inference
278
278
  let mut buf = Vec::with_capacity(inference_size);
279
279
  buf.push(first_value);
280
- buf.extend((&mut row_iter).take(inference_size).cloned());
281
- let schema = rows_to_schema_first_non_null(&buf, Some(50));
280
+ for v in (&mut row_iter).take(inference_size) {
281
+ buf.push(v?.clone());
282
+ }
283
+
284
+ let schema = rows_to_schema_first_non_null(&buf, Some(50))?;
282
285
 
283
286
  if init_null_count > 0 {
284
287
  // Safety: we know the iterators size
285
288
  let iter = unsafe {
286
289
  (0..init_null_count)
287
- .map(|_| &null_row)
288
- .chain(buf.iter())
290
+ .map(|_| Ok(&null_row))
291
+ .chain(buf.iter().map(Ok))
289
292
  .chain(row_iter)
290
293
  .trust_my_length(df.height())
291
294
  };
292
- DataFrame::from_rows_iter_and_schema(iter, &schema)
295
+ DataFrame::try_from_rows_iter_and_schema(iter, &schema)
293
296
  } else {
294
297
  // Safety: we know the iterators size
295
- let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) };
296
- DataFrame::from_rows_iter_and_schema(iter, &schema)
298
+ let iter = unsafe {
299
+ buf.iter()
300
+ .map(Ok)
301
+ .chain(row_iter)
302
+ .trust_my_length(df.height())
303
+ };
304
+ DataFrame::try_from_rows_iter_and_schema(iter, &schema)
297
305
  }
298
306
  }
@@ -4,6 +4,26 @@ use crate::{RbResult, RbSeries, RbValueError};
4
4
  use magnus::{IntoValue, Value};
5
5
 
6
6
  impl RbSeries {
7
+ pub fn any(&self, ignore_nulls: bool) -> RbResult<Option<bool>> {
8
+ let binding = self.series.borrow();
9
+ let s = binding.bool().map_err(RbPolarsErr::from)?;
10
+ Ok(if ignore_nulls {
11
+ Some(s.any())
12
+ } else {
13
+ s.any_kleene()
14
+ })
15
+ }
16
+
17
+ pub fn all(&self, ignore_nulls: bool) -> RbResult<Option<bool>> {
18
+ let binding = self.series.borrow();
19
+ let s = binding.bool().map_err(RbPolarsErr::from)?;
20
+ Ok(if ignore_nulls {
21
+ Some(s.all())
22
+ } else {
23
+ s.all_kleene()
24
+ })
25
+ }
26
+
7
27
  pub fn arg_max(&self) -> Option<usize> {
8
28
  self.series.borrow().arg_max()
9
29
  }
@@ -74,6 +74,24 @@ impl RbSeries {
74
74
  }
75
75
  }
76
76
 
77
+ pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
78
+ let binding = self.series.borrow();
79
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
80
+ Ok(ca.uses_lexical_ordering())
81
+ }
82
+
83
+ pub fn cat_is_local(&self) -> RbResult<bool> {
84
+ let binding = self.series.borrow();
85
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
86
+ Ok(ca.get_rev_map().is_local())
87
+ }
88
+
89
+ pub fn cat_to_local(&self) -> RbResult<Self> {
90
+ let binding = self.series.borrow();
91
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
92
+ Ok(ca.to_local().into_series().into())
93
+ }
94
+
77
95
  pub fn estimated_size(&self) -> usize {
78
96
  self.series.borrow().estimated_size()
79
97
  }
@@ -215,8 +233,18 @@ impl RbSeries {
215
233
  }
216
234
  }
217
235
 
218
- pub fn sort(&self, reverse: bool) -> Self {
219
- (self.series.borrow_mut().sort(reverse)).into()
236
+ pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
237
+ Ok(self
238
+ .series
239
+ .borrow_mut()
240
+ .sort(
241
+ SortOptions::default()
242
+ .with_order_descending(descending)
243
+ .with_nulls_last(nulls_last)
244
+ .with_multithreaded(multithreaded),
245
+ )
246
+ .map_err(RbPolarsErr::from)?
247
+ .into())
220
248
  }
221
249
 
222
250
  pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
@@ -313,7 +341,7 @@ impl RbSeries {
313
341
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
314
342
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
315
343
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
316
- DataType::Categorical(_, _) => {
344
+ DataType::Categorical(_, _) | DataType::Enum(_, _) => {
317
345
  RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
318
346
  }
319
347
  DataType::Object(_, _) => {
@@ -415,7 +443,10 @@ impl RbSeries {
415
443
  RArray::from_iter(NullIter { iter, n }).into_value()
416
444
  }
417
445
  DataType::Unknown => {
418
- panic!("to_a not implemented for null/unknown")
446
+ panic!("to_a not implemented for unknown")
447
+ }
448
+ DataType::BinaryOffset => {
449
+ unreachable!()
419
450
  }
420
451
  };
421
452
  rblist
@@ -80,5 +80,458 @@ module Polars
80
80
  def sum
81
81
  Utils.wrap_expr(_rbexpr.array_sum)
82
82
  end
83
+
84
+ # Get the unique/distinct values in the array.
85
+ #
86
+ # @param maintain_order [Boolean]
87
+ # Maintain order of data. This requires more work.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [[1, 1, 2]]
95
+ # },
96
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
97
+ # )
98
+ # df.select(Polars.col("a").arr.unique)
99
+ # # =>
100
+ # # shape: (1, 1)
101
+ # # ┌───────────┐
102
+ # # │ a │
103
+ # # │ --- │
104
+ # # │ list[i64] │
105
+ # # ╞═══════════╡
106
+ # # │ [1, 2] │
107
+ # # └───────────┘
108
+ def unique(maintain_order: false)
109
+ Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
110
+ end
111
+
112
+ # Convert an Array column into a List column with the same inner data type.
113
+ #
114
+ # @return [Expr]
115
+ #
116
+ # @example
117
+ # df = Polars::DataFrame.new(
118
+ # {"a" => [[1, 2], [3, 4]]},
119
+ # schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
120
+ # )
121
+ # df.select(Polars.col("a").arr.to_list)
122
+ # # =>
123
+ # # shape: (2, 1)
124
+ # # ┌──────────┐
125
+ # # │ a │
126
+ # # │ --- │
127
+ # # │ list[i8] │
128
+ # # ╞══════════╡
129
+ # # │ [1, 2] │
130
+ # # │ [3, 4] │
131
+ # # └──────────┘
132
+ def to_list
133
+ Utils.wrap_expr(_rbexpr.arr_to_list)
134
+ end
135
+
136
+ # Evaluate whether any boolean value is true for every subarray.
137
+ #
138
+ # @return [Expr]
139
+ #
140
+ # @example
141
+ # df = Polars::DataFrame.new(
142
+ # {
143
+ # "a": [
144
+ # [true, true],
145
+ # [false, true],
146
+ # [false, false],
147
+ # [nil, nil],
148
+ # nil
149
+ # ]
150
+ # },
151
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
152
+ # )
153
+ # df.with_columns(any: Polars.col("a").arr.any)
154
+ # # =>
155
+ # # shape: (5, 2)
156
+ # # ┌────────────────┬───────┐
157
+ # # │ a ┆ any │
158
+ # # │ --- ┆ --- │
159
+ # # │ array[bool, 2] ┆ bool │
160
+ # # ╞════════════════╪═══════╡
161
+ # # │ [true, true] ┆ true │
162
+ # # │ [false, true] ┆ true │
163
+ # # │ [false, false] ┆ false │
164
+ # # │ [null, null] ┆ false │
165
+ # # │ null ┆ null │
166
+ # # └────────────────┴───────┘
167
+ def any
168
+ Utils.wrap_expr(_rbexpr.arr_any)
169
+ end
170
+
171
+ # Evaluate whether all boolean values are true for every subarray.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "a": [
179
+ # [true, true],
180
+ # [false, true],
181
+ # [false, false],
182
+ # [nil, nil],
183
+ # nil
184
+ # ]
185
+ # },
186
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
187
+ # )
188
+ # df.with_columns(all: Polars.col("a").arr.all)
189
+ # # =>
190
+ # # shape: (5, 2)
191
+ # # ┌────────────────┬───────┐
192
+ # # │ a ┆ all │
193
+ # # │ --- ┆ --- │
194
+ # # │ array[bool, 2] ┆ bool │
195
+ # # ╞════════════════╪═══════╡
196
+ # # │ [true, true] ┆ true │
197
+ # # │ [false, true] ┆ false │
198
+ # # │ [false, false] ┆ false │
199
+ # # │ [null, null] ┆ true │
200
+ # # │ null ┆ null │
201
+ # # └────────────────┴───────┘
202
+ def all
203
+ Utils.wrap_expr(_rbexpr.arr_all)
204
+ end
205
+
206
+ # Sort the arrays in this column.
207
+ #
208
+ # @param descending [Boolean]
209
+ # Sort in descending order.
210
+ # @param nulls_last [Boolean]
211
+ # Place null values last.
212
+ #
213
+ # @return [Expr]
214
+ #
215
+ # @example
216
+ # df = Polars::DataFrame.new(
217
+ # {
218
+ # "a" => [[3, 2, 1], [9, 1, 2]],
219
+ # },
220
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
221
+ # )
222
+ # df.with_columns(sort: Polars.col("a").arr.sort)
223
+ # # =>
224
+ # # shape: (2, 2)
225
+ # # ┌───────────────┬───────────────┐
226
+ # # │ a ┆ sort │
227
+ # # │ --- ┆ --- │
228
+ # # │ array[i64, 3] ┆ array[i64, 3] │
229
+ # # ╞═══════════════╪═══════════════╡
230
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
231
+ # # │ [9, 1, 2] ┆ [1, 2, 9] │
232
+ # # └───────────────┴───────────────┘
233
+ #
234
+ # @example
235
+ # df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
236
+ # # =>
237
+ # # shape: (2, 2)
238
+ # # ┌───────────────┬───────────────┐
239
+ # # │ a ┆ sort │
240
+ # # │ --- ┆ --- │
241
+ # # │ array[i64, 3] ┆ array[i64, 3] │
242
+ # # ╞═══════════════╪═══════════════╡
243
+ # # │ [3, 2, 1] ┆ [3, 2, 1] │
244
+ # # │ [9, 1, 2] ┆ [9, 2, 1] │
245
+ # # └───────────────┴───────────────┘
246
+ def sort(descending: false, nulls_last: false)
247
+ Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
248
+ end
249
+
250
+ # Reverse the arrays in this column.
251
+ #
252
+ # @return [Expr]
253
+ #
254
+ # @example
255
+ # df = Polars::DataFrame.new(
256
+ # {
257
+ # "a" => [[3, 2, 1], [9, 1, 2]]
258
+ # },
259
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
260
+ # )
261
+ # df.with_columns(reverse: Polars.col("a").arr.reverse)
262
+ # # =>
263
+ # # shape: (2, 2)
264
+ # # ┌───────────────┬───────────────┐
265
+ # # │ a ┆ reverse │
266
+ # # │ --- ┆ --- │
267
+ # # │ array[i64, 3] ┆ array[i64, 3] │
268
+ # # ╞═══════════════╪═══════════════╡
269
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
270
+ # # │ [9, 1, 2] ┆ [2, 1, 9] │
271
+ # # └───────────────┴───────────────┘
272
+ def reverse
273
+ Utils.wrap_expr(_rbexpr.arr_reverse)
274
+ end
275
+
276
+ # Retrieve the index of the minimal value in every sub-array.
277
+ #
278
+ # @return [Expr]
279
+ #
280
+ # @example
281
+ # df = Polars::DataFrame.new(
282
+ # {
283
+ # "a" => [[1, 2], [2, 1]]
284
+ # },
285
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
286
+ # )
287
+ # df.with_columns(arg_min: Polars.col("a").arr.arg_min)
288
+ # # =>
289
+ # # shape: (2, 2)
290
+ # # ┌───────────────┬─────────┐
291
+ # # │ a ┆ arg_min │
292
+ # # │ --- ┆ --- │
293
+ # # │ array[i64, 2] ┆ u32 │
294
+ # # ╞═══════════════╪═════════╡
295
+ # # │ [1, 2] ┆ 0 │
296
+ # # │ [2, 1] ┆ 1 │
297
+ # # └───────────────┴─────────┘
298
+ def arg_min
299
+ Utils.wrap_expr(_rbexpr.arr_arg_min)
300
+ end
301
+
302
+ # Retrieve the index of the maximum value in every sub-array.
303
+ #
304
+ # @return [Expr]
305
+ #
306
+ # @example
307
+ # df = Polars::DataFrame.new(
308
+ # {
309
+ # "a" => [[1, 2], [2, 1]]
310
+ # },
311
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
312
+ # )
313
+ # df.with_columns(arg_max: Polars.col("a").arr.arg_max)
314
+ # # =>
315
+ # # shape: (2, 2)
316
+ # # ┌───────────────┬─────────┐
317
+ # # │ a ┆ arg_max │
318
+ # # │ --- ┆ --- │
319
+ # # │ array[i64, 2] ┆ u32 │
320
+ # # ╞═══════════════╪═════════╡
321
+ # # │ [1, 2] ┆ 1 │
322
+ # # │ [2, 1] ┆ 0 │
323
+ # # └───────────────┴─────────┘
324
+ def arg_max
325
+ Utils.wrap_expr(_rbexpr.arr_arg_max)
326
+ end
327
+
328
+ # Get the value by index in the sub-arrays.
329
+ #
330
+ # So index `0` would return the first item of every sublist
331
+ # and index `-1` would return the last item of every sublist
332
+ # if an index is out of bounds, it will return a `nil`.
333
+ #
334
+ # @param index [Integer]
335
+ # Index to return per sub-array
336
+ # @param null_on_oob [Boolean]
337
+ # Behavior if an index is out of bounds:
338
+ # true -> set as null
339
+ # false -> raise an error
340
+ #
341
+ # @return [Expr]
342
+ #
343
+ # @example
344
+ # df = Polars::DataFrame.new(
345
+ # {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
346
+ # schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
347
+ # )
348
+ # df.with_columns(get: Polars.col("arr").arr.get("idx"))
349
+ # # =>
350
+ # # shape: (3, 3)
351
+ # # ┌───────────────┬─────┬──────┐
352
+ # # │ arr ┆ idx ┆ get │
353
+ # # │ --- ┆ --- ┆ --- │
354
+ # # │ array[i32, 3] ┆ i32 ┆ i32 │
355
+ # # ╞═══════════════╪═════╪══════╡
356
+ # # │ [1, 2, 3] ┆ 1 ┆ 2 │
357
+ # # │ [4, 5, 6] ┆ -2 ┆ 5 │
358
+ # # │ [7, 8, 9] ┆ 4 ┆ null │
359
+ # # └───────────────┴─────┴──────┘
360
+ def get(index, null_on_oob: true)
361
+ index = Utils.parse_as_expression(index)
362
+ Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
363
+ end
364
+
365
+ # Get the first value of the sub-arrays.
366
+ #
367
+ # @return [Expr]
368
+ #
369
+ # @example
370
+ # df = Polars::DataFrame.new(
371
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
372
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
373
+ # )
374
+ # df.with_columns(first: Polars.col("a").arr.first)
375
+ # # =>
376
+ # # shape: (3, 2)
377
+ # # ┌───────────────┬───────┐
378
+ # # │ a ┆ first │
379
+ # # │ --- ┆ --- │
380
+ # # │ array[i32, 3] ┆ i32 │
381
+ # # ╞═══════════════╪═══════╡
382
+ # # │ [1, 2, 3] ┆ 1 │
383
+ # # │ [4, 5, 6] ┆ 4 │
384
+ # # │ [7, 8, 9] ┆ 7 │
385
+ # # └───────────────┴───────┘
386
+ def first
387
+ get(0)
388
+ end
389
+
390
+ # Get the last value of the sub-arrays.
391
+ #
392
+ # @return [Expr]
393
+ #
394
+ # @example
395
+ # df = Polars::DataFrame.new(
396
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
397
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
398
+ # )
399
+ # df.with_columns(last: Polars.col("a").arr.last)
400
+ # # =>
401
+ # # shape: (3, 2)
402
+ # # ┌───────────────┬──────┐
403
+ # # │ a ┆ last │
404
+ # # │ --- ┆ --- │
405
+ # # │ array[i32, 3] ┆ i32 │
406
+ # # ╞═══════════════╪══════╡
407
+ # # │ [1, 2, 3] ┆ 3 │
408
+ # # │ [4, 5, 6] ┆ 6 │
409
+ # # │ [7, 8, 9] ┆ 9 │
410
+ # # └───────────────┴──────┘
411
+ def last
412
+ get(-1)
413
+ end
414
+
415
+ # Join all string items in a sub-array and place a separator between them.
416
+ #
417
+ # This errors if inner type of array `!= String`.
418
+ #
419
+ # @param separator [String]
420
+ # string to separate the items with
421
+ # @param ignore_nulls [Boolean]
422
+ # Ignore null values (default).
423
+ #
424
+ # If set to `false`, null values will be propagated.
425
+ # If the sub-list contains any null values, the output is `nil`.
426
+ #
427
+ # @return [Expr]
428
+ #
429
+ # @example
430
+ # df = Polars::DataFrame.new(
431
+ # {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
432
+ # schema: {
433
+ # "s" => Polars::Array.new(Polars::String, 2),
434
+ # "separator" => Polars::String
435
+ # }
436
+ # )
437
+ # df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
438
+ # # =>
439
+ # # shape: (2, 3)
440
+ # # ┌───────────────┬───────────┬──────┐
441
+ # # │ s ┆ separator ┆ join │
442
+ # # │ --- ┆ --- ┆ --- │
443
+ # # │ array[str, 2] ┆ str ┆ str │
444
+ # # ╞═══════════════╪═══════════╪══════╡
445
+ # # │ ["a", "b"] ┆ * ┆ a*b │
446
+ # # │ ["x", "y"] ┆ _ ┆ x_y │
447
+ # # └───────────────┴───────────┴──────┘
448
+ def join(separator, ignore_nulls: true)
449
+ separator = Utils.parse_as_expression(separator, str_as_lit: true)
450
+ Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
451
+ end
452
+
453
+ # Returns a column with a separate row for every array element.
454
+ #
455
+ # @return [Expr]
456
+ #
457
+ # @example
458
+ # df = Polars::DataFrame.new(
459
+ # {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
460
+ # )
461
+ # df.select(Polars.col("a").arr.explode)
462
+ # # =>
463
+ # # shape: (6, 1)
464
+ # # ┌─────┐
465
+ # # │ a │
466
+ # # │ --- │
467
+ # # │ i64 │
468
+ # # ╞═════╡
469
+ # # │ 1 │
470
+ # # │ 2 │
471
+ # # │ 3 │
472
+ # # │ 4 │
473
+ # # │ 5 │
474
+ # # │ 6 │
475
+ # # └─────┘
476
+ def explode
477
+ Utils.wrap_expr(_rbexpr.explode)
478
+ end
479
+
480
+ # Check if sub-arrays contain the given item.
481
+ #
482
+ # @param item [Object]
483
+ # Item that will be checked for membership
484
+ #
485
+ # @return [Expr]
486
+ #
487
+ # @example
488
+ # df = Polars::DataFrame.new(
489
+ # {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
490
+ # schema: {"a" => Polars::Array.new(Polars::String, 2)}
491
+ # )
492
+ # df.with_columns(contains: Polars.col("a").arr.contains("a"))
493
+ # # =>
494
+ # # shape: (3, 2)
495
+ # # ┌───────────────┬──────────┐
496
+ # # │ a ┆ contains │
497
+ # # │ --- ┆ --- │
498
+ # # │ array[str, 2] ┆ bool │
499
+ # # ╞═══════════════╪══════════╡
500
+ # # │ ["a", "b"] ┆ true │
501
+ # # │ ["x", "y"] ┆ false │
502
+ # # │ ["a", "c"] ┆ true │
503
+ # # └───────────────┴──────────┘
504
+ def contains(item)
505
+ item = Utils.parse_as_expression(item, str_as_lit: true)
506
+ Utils.wrap_expr(_rbexpr.arr_contains(item))
507
+ end
508
+
509
+ # Count how often the value produced by `element` occurs.
510
+ #
511
+ # @param element [Object]
512
+ # An expression that produces a single value
513
+ #
514
+ # @return [Expr]
515
+ #
516
+ # @example
517
+ # df = Polars::DataFrame.new(
518
+ # {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
519
+ # )
520
+ # df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
521
+ # # =>
522
+ # # shape: (3, 2)
523
+ # # ┌───────────────┬────────────────┐
524
+ # # │ a ┆ number_of_twos │
525
+ # # │ --- ┆ --- │
526
+ # # │ array[i64, 2] ┆ u32 │
527
+ # # ╞═══════════════╪════════════════╡
528
+ # # │ [1, 2] ┆ 1 │
529
+ # # │ [1, 1] ┆ 0 │
530
+ # # │ [2, 2] ┆ 2 │
531
+ # # └───────────────┴────────────────┘
532
+ def count_matches(element)
533
+ element = Utils.parse_as_expression(element, str_as_lit: true)
534
+ Utils.wrap_expr(_rbexpr.arr_count_matches(element))
535
+ end
83
536
  end
84
537
  end