polars-df 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +55 -48
  4. data/Cargo.toml +3 -0
  5. data/README.md +12 -0
  6. data/ext/polars/Cargo.toml +22 -11
  7. data/ext/polars/src/batched_csv.rs +4 -4
  8. data/ext/polars/src/catalog/unity.rs +96 -94
  9. data/ext/polars/src/conversion/any_value.rs +26 -30
  10. data/ext/polars/src/conversion/chunked_array.rs +32 -28
  11. data/ext/polars/src/conversion/datetime.rs +11 -0
  12. data/ext/polars/src/conversion/mod.rs +307 -34
  13. data/ext/polars/src/dataframe/construction.rs +4 -3
  14. data/ext/polars/src/dataframe/export.rs +17 -15
  15. data/ext/polars/src/dataframe/general.rs +15 -12
  16. data/ext/polars/src/dataframe/io.rs +1 -2
  17. data/ext/polars/src/dataframe/mod.rs +25 -1
  18. data/ext/polars/src/dataframe/serde.rs +23 -8
  19. data/ext/polars/src/exceptions.rs +8 -4
  20. data/ext/polars/src/expr/array.rs +73 -4
  21. data/ext/polars/src/expr/binary.rs +26 -1
  22. data/ext/polars/src/expr/bitwise.rs +39 -0
  23. data/ext/polars/src/expr/categorical.rs +20 -0
  24. data/ext/polars/src/expr/datatype.rs +24 -1
  25. data/ext/polars/src/expr/datetime.rs +58 -14
  26. data/ext/polars/src/expr/general.rs +87 -15
  27. data/ext/polars/src/expr/list.rs +32 -24
  28. data/ext/polars/src/expr/meta.rs +15 -6
  29. data/ext/polars/src/expr/mod.rs +3 -0
  30. data/ext/polars/src/expr/name.rs +19 -14
  31. data/ext/polars/src/expr/rolling.rs +20 -0
  32. data/ext/polars/src/expr/serde.rs +28 -0
  33. data/ext/polars/src/expr/string.rs +64 -10
  34. data/ext/polars/src/expr/struct.rs +9 -1
  35. data/ext/polars/src/file.rs +15 -9
  36. data/ext/polars/src/functions/business.rs +0 -1
  37. data/ext/polars/src/functions/io.rs +25 -3
  38. data/ext/polars/src/functions/lazy.rs +11 -6
  39. data/ext/polars/src/functions/meta.rs +3 -3
  40. data/ext/polars/src/functions/string_cache.rs +3 -3
  41. data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
  42. data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
  43. data/ext/polars/src/io/mod.rs +6 -0
  44. data/ext/polars/src/lazyframe/general.rs +59 -9
  45. data/ext/polars/src/lazyframe/mod.rs +16 -1
  46. data/ext/polars/src/lazyframe/optflags.rs +58 -0
  47. data/ext/polars/src/lazyframe/serde.rs +27 -3
  48. data/ext/polars/src/lib.rs +261 -19
  49. data/ext/polars/src/map/dataframe.rs +20 -17
  50. data/ext/polars/src/map/lazy.rs +6 -5
  51. data/ext/polars/src/map/series.rs +8 -7
  52. data/ext/polars/src/on_startup.rs +12 -5
  53. data/ext/polars/src/rb_modules.rs +2 -2
  54. data/ext/polars/src/series/aggregation.rs +85 -28
  55. data/ext/polars/src/series/construction.rs +1 -0
  56. data/ext/polars/src/series/export.rs +37 -33
  57. data/ext/polars/src/series/general.rs +120 -21
  58. data/ext/polars/src/series/mod.rs +29 -4
  59. data/lib/polars/array_expr.rb +382 -3
  60. data/lib/polars/array_name_space.rb +281 -0
  61. data/lib/polars/binary_expr.rb +67 -0
  62. data/lib/polars/binary_name_space.rb +43 -0
  63. data/lib/polars/cat_expr.rb +224 -0
  64. data/lib/polars/cat_name_space.rb +138 -0
  65. data/lib/polars/config.rb +2 -2
  66. data/lib/polars/convert.rb +6 -6
  67. data/lib/polars/data_frame.rb +794 -27
  68. data/lib/polars/data_type_expr.rb +52 -0
  69. data/lib/polars/data_types.rb +26 -5
  70. data/lib/polars/date_time_expr.rb +252 -1
  71. data/lib/polars/date_time_name_space.rb +299 -0
  72. data/lib/polars/expr.rb +1248 -206
  73. data/lib/polars/functions/business.rb +95 -0
  74. data/lib/polars/functions/datatype.rb +21 -0
  75. data/lib/polars/functions/lazy.rb +14 -1
  76. data/lib/polars/io/csv.rb +1 -1
  77. data/lib/polars/io/iceberg.rb +27 -0
  78. data/lib/polars/io/json.rb +4 -4
  79. data/lib/polars/io/ndjson.rb +4 -4
  80. data/lib/polars/io/parquet.rb +32 -7
  81. data/lib/polars/io/scan_options.rb +4 -1
  82. data/lib/polars/lazy_frame.rb +1028 -28
  83. data/lib/polars/list_expr.rb +217 -17
  84. data/lib/polars/list_name_space.rb +231 -22
  85. data/lib/polars/meta_expr.rb +89 -0
  86. data/lib/polars/name_expr.rb +36 -0
  87. data/lib/polars/query_opt_flags.rb +50 -0
  88. data/lib/polars/scan_cast_options.rb +20 -1
  89. data/lib/polars/schema.rb +79 -3
  90. data/lib/polars/selector.rb +72 -0
  91. data/lib/polars/selectors.rb +3 -3
  92. data/lib/polars/series.rb +1053 -54
  93. data/lib/polars/string_expr.rb +436 -32
  94. data/lib/polars/string_name_space.rb +736 -50
  95. data/lib/polars/struct_expr.rb +103 -0
  96. data/lib/polars/struct_name_space.rb +19 -1
  97. data/lib/polars/utils/serde.rb +17 -0
  98. data/lib/polars/utils/various.rb +22 -1
  99. data/lib/polars/utils.rb +5 -1
  100. data/lib/polars/version.rb +1 -1
  101. data/lib/polars.rb +6 -0
  102. metadata +11 -1
@@ -1,9 +1,12 @@
1
- use magnus::{Error, IntoValue, Value, exception};
1
+ use magnus::{Error, IntoValue, RArray, Ruby, Value, value::ReprValue};
2
2
  use polars::prelude::*;
3
3
  use polars::series::IsSorted;
4
+ use polars_core::utils::flatten::flatten_series;
4
5
 
5
6
  use crate::conversion::*;
6
- use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
7
+ use crate::exceptions::RbIndexError;
8
+ use crate::rb_modules;
9
+ use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
7
10
 
8
11
  impl RbSeries {
9
12
  pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
@@ -13,7 +16,6 @@ impl RbSeries {
13
16
  Ok(df.into())
14
17
  }
15
18
 
16
- // TODO add to Ruby
17
19
  pub fn struct_fields(&self) -> RbResult<Vec<String>> {
18
20
  let binding = self.series.borrow();
19
21
  let ca = binding.struct_().map_err(RbPolarsErr::from)?;
@@ -84,8 +86,39 @@ impl RbSeries {
84
86
  }
85
87
  }
86
88
 
87
- pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
88
- Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into_value())
89
+ pub fn get_index(ruby: &Ruby, rb_self: &Self, index: usize) -> RbResult<Value> {
90
+ let binding = rb_self.series.borrow();
91
+ let av = match binding.get(index) {
92
+ Ok(v) => v,
93
+ Err(PolarsError::OutOfBounds(err)) => {
94
+ return Err(RbIndexError::new_err(err.to_string()));
95
+ }
96
+ Err(e) => return Err(RbPolarsErr::from(e).into()),
97
+ };
98
+
99
+ match av {
100
+ AnyValue::List(s) | AnyValue::Array(s, _) => {
101
+ let rbseries = RbSeries::new(s);
102
+ rb_modules::pl_utils().funcall("wrap_s", (rbseries,))
103
+ }
104
+ _ => Ok(Wrap(av).into_value_with(ruby)),
105
+ }
106
+ }
107
+
108
+ pub fn get_index_signed(ruby: &Ruby, rb_self: &Self, index: isize) -> RbResult<Value> {
109
+ let index = if index < 0 {
110
+ match rb_self.len().checked_sub(index.unsigned_abs()) {
111
+ Some(v) => v,
112
+ None => {
113
+ return Err(RbIndexError::new_err(
114
+ polars_err!(oob = index, rb_self.len()).to_string(),
115
+ ));
116
+ }
117
+ }
118
+ } else {
119
+ usize::try_from(index).unwrap()
120
+ };
121
+ Self::get_index(ruby, rb_self, index)
89
122
  }
90
123
 
91
124
  pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
@@ -115,16 +148,17 @@ impl RbSeries {
115
148
  self.series.borrow_mut().rename(name.into());
116
149
  }
117
150
 
118
- pub fn dtype(&self) -> Value {
119
- Wrap(self.series.borrow().dtype().clone()).into_value()
151
+ pub fn dtype(ruby: &Ruby, rb_self: &Self) -> Value {
152
+ Wrap(rb_self.series.borrow().dtype().clone()).into_value_with(ruby)
120
153
  }
121
154
 
122
- pub fn inner_dtype(&self) -> Option<Value> {
123
- self.series
155
+ pub fn inner_dtype(ruby: &Ruby, rb_self: &Self) -> Option<Value> {
156
+ rb_self
157
+ .series
124
158
  .borrow()
125
159
  .dtype()
126
160
  .inner_dtype()
127
- .map(|dt| Wrap(dt.clone()).into_value())
161
+ .map(|dt| Wrap(dt.clone()).into_value_with(ruby))
128
162
  }
129
163
 
130
164
  pub fn set_sorted_flag(&self, descending: bool) -> Self {
@@ -141,11 +175,11 @@ impl RbSeries {
141
175
  self.series.borrow().n_chunks()
142
176
  }
143
177
 
144
- pub fn append(&self, other: &RbSeries) -> RbResult<()> {
145
- let mut binding = self.series.borrow_mut();
178
+ pub fn append(ruby: &Ruby, rb_self: &Self, other: &RbSeries) -> RbResult<()> {
179
+ let mut binding = rb_self.series.borrow_mut();
146
180
  let res = binding.append(&other.series.borrow());
147
181
  if let Err(e) = res {
148
- Err(Error::new(exception::runtime_error(), e.to_string()))
182
+ Err(Error::new(ruby.exception_runtime_error(), e.to_string()))
149
183
  } else {
150
184
  Ok(())
151
185
  }
@@ -159,22 +193,30 @@ impl RbSeries {
159
193
  Ok(())
160
194
  }
161
195
 
162
- pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
163
- if index >= self.series.borrow().len() {
164
- Err(Error::new(exception::arg_error(), "index is out of bounds"))
196
+ pub fn new_from_index(
197
+ ruby: &Ruby,
198
+ rb_self: &Self,
199
+ index: usize,
200
+ length: usize,
201
+ ) -> RbResult<Self> {
202
+ if index >= rb_self.series.borrow().len() {
203
+ Err(Error::new(
204
+ ruby.exception_arg_error(),
205
+ "index is out of bounds",
206
+ ))
165
207
  } else {
166
- Ok(self.series.borrow().new_from_index(index, length).into())
208
+ Ok(rb_self.series.borrow().new_from_index(index, length).into())
167
209
  }
168
210
  }
169
211
 
170
- pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
212
+ pub fn filter(ruby: &Ruby, rb_self: &Self, filter: &RbSeries) -> RbResult<Self> {
171
213
  let filter_series = &filter.series.borrow();
172
214
  if let Ok(ca) = filter_series.bool() {
173
- let series = self.series.borrow().filter(ca).unwrap();
215
+ let series = rb_self.series.borrow().filter(ca).unwrap();
174
216
  Ok(series.into())
175
217
  } else {
176
218
  Err(Error::new(
177
- exception::runtime_error(),
219
+ ruby.exception_runtime_error(),
178
220
  "Expected a boolean mask".to_string(),
179
221
  ))
180
222
  }
@@ -279,12 +321,42 @@ impl RbSeries {
279
321
  }
280
322
  }
281
323
 
282
- pub fn not(&self) -> RbResult<Self> {
324
+ pub fn not_(&self) -> RbResult<Self> {
283
325
  let binding = self.series.borrow();
284
326
  let bool = binding.bool().map_err(RbPolarsErr::from)?;
285
327
  Ok((!bool).into_series().into())
286
328
  }
287
329
 
330
+ pub fn shrink_dtype(&self) -> RbResult<Self> {
331
+ self.series
332
+ .borrow()
333
+ .shrink_type()
334
+ .map(Into::into)
335
+ .map_err(RbPolarsErr::from)
336
+ .map_err(RbErr::from)
337
+ }
338
+
339
+ pub fn str_to_decimal_infer(&self, inference_length: usize) -> RbResult<Self> {
340
+ let s = self.series.borrow();
341
+ let ca = s.str().map_err(RbPolarsErr::from)?;
342
+ ca.to_decimal_infer(inference_length)
343
+ .map(Series::from)
344
+ .map(Into::into)
345
+ .map_err(RbPolarsErr::from)
346
+ .map_err(RbErr::from)
347
+ }
348
+
349
+ pub fn str_json_decode(&self, infer_schema_length: Option<usize>) -> RbResult<Self> {
350
+ let lock = self.series.borrow();
351
+ lock.str()
352
+ .map_err(RbPolarsErr::from)?
353
+ .json_decode(None, infer_schema_length)
354
+ .map(|s| s.with_name(lock.name().clone()))
355
+ .map(Into::into)
356
+ .map_err(RbPolarsErr::from)
357
+ .map_err(RbErr::from)
358
+ }
359
+
288
360
  pub fn to_s(&self) -> String {
289
361
  format!("{}", self.series.borrow())
290
362
  }
@@ -370,6 +442,33 @@ impl RbSeries {
370
442
  Ok(out.into())
371
443
  }
372
444
 
445
+ pub fn get_chunks(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
446
+ ruby.ary_try_from_iter(
447
+ flatten_series(&rb_self.series.borrow())
448
+ .into_iter()
449
+ .map(|s| rb_modules::pl_utils().funcall::<_, _, Value>("wrap_s", (Self::new(s),))),
450
+ )
451
+ }
452
+
453
+ pub fn is_sorted(&self, descending: bool, nulls_last: bool) -> RbResult<bool> {
454
+ let options = SortOptions {
455
+ descending,
456
+ nulls_last,
457
+ multithreaded: true,
458
+ maintain_order: false,
459
+ limit: None,
460
+ };
461
+ Ok(self
462
+ .series
463
+ .borrow()
464
+ .is_sorted(options)
465
+ .map_err(RbPolarsErr::from)?)
466
+ }
467
+
468
+ pub fn clear(&self) -> Self {
469
+ self.series.borrow().clear().into()
470
+ }
471
+
373
472
  pub fn time_unit(&self) -> Option<String> {
374
473
  if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
375
474
  Some(
@@ -8,13 +8,14 @@ mod import;
8
8
  mod map;
9
9
  mod scatter;
10
10
 
11
- use magnus::{RArray, prelude::*};
11
+ use magnus::{DataTypeFunctions, RArray, Ruby, TypedData, gc, prelude::*};
12
12
  use polars::prelude::*;
13
13
  use std::cell::RefCell;
14
14
 
15
- use crate::RbResult;
15
+ use crate::{ObjectValue, RbResult};
16
16
 
17
- #[magnus::wrap(class = "Polars::RbSeries")]
17
+ #[derive(TypedData)]
18
+ #[magnus(class = "Polars::RbSeries", mark)]
18
19
  pub struct RbSeries {
19
20
  pub series: RefCell<Series>,
20
21
  }
@@ -42,9 +43,33 @@ pub fn to_series(rs: RArray) -> RbResult<Vec<Series>> {
42
43
  }
43
44
 
44
45
  pub fn to_rbseries(s: Vec<Column>) -> RArray {
45
- RArray::from_iter(
46
+ Ruby::get().unwrap().ary_from_iter(
46
47
  s.into_iter()
47
48
  .map(|c| c.take_materialized_series())
48
49
  .map(RbSeries::new),
49
50
  )
50
51
  }
52
+
53
+ pub fn mark_series(marker: &gc::Marker, series: &Series) {
54
+ if let DataType::Object(_) = series.dtype() {
55
+ for i in 0..series.len() {
56
+ let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
57
+ if let Some(o) = obj {
58
+ marker.mark(o.inner);
59
+ }
60
+ }
61
+ }
62
+ }
63
+
64
+ impl DataTypeFunctions for RbSeries {
65
+ fn mark(&self, marker: &gc::Marker) {
66
+ // this is not ideal, as objects will not be marked if unable to borrow
67
+ // this should never happen, but log for now to avoid panic,
68
+ // as most series will not use Object datatype
69
+ if let Ok(s) = &self.series.try_borrow() {
70
+ mark_series(marker, s);
71
+ } else {
72
+ eprintln!("[polars] Could not borrow!");
73
+ }
74
+ }
75
+ }
@@ -9,6 +9,181 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Return the number of elements in each array.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[1, 2], [4, 3]]},
19
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
20
+ # )
21
+ # df.select(Polars.col("a").arr.len)
22
+ # # =>
23
+ # # shape: (2, 1)
24
+ # # ┌─────┐
25
+ # # │ a │
26
+ # # │ --- │
27
+ # # │ u32 │
28
+ # # ╞═════╡
29
+ # # │ 2 │
30
+ # # │ 2 │
31
+ # # └─────┘
32
+ def len
33
+ Utils.wrap_expr(_rbexpr.arr_len)
34
+ end
35
+
36
+ # Slice every subarray.
37
+ #
38
+ # @param offset [Integer]
39
+ # Start index. Negative indexing is supported.
40
+ # @param length [Integer]
41
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
42
+ # end of the list.
43
+ # @param as_array [Boolean]
44
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
45
+ # If true `length` and `offset` must be constant values.
46
+ #
47
+ # @return [Expr]
48
+ #
49
+ # @example
50
+ # df = Polars::DataFrame.new(
51
+ # {"a" => [[1, 2], [4, 3]]},
52
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
53
+ # )
54
+ # df.select(Polars.col("a").arr.slice(0, 1))
55
+ # # =>
56
+ # # shape: (2, 1)
57
+ # # ┌───────────┐
58
+ # # │ a │
59
+ # # │ --- │
60
+ # # │ list[i64] │
61
+ # # ╞═══════════╡
62
+ # # │ [1] │
63
+ # # │ [4] │
64
+ # # └───────────┘
65
+ #
66
+ # @example
67
+ # df = Polars::DataFrame.new(
68
+ # {"a" => [[1, 2], [4, 3]]},
69
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
70
+ # )
71
+ # df.select(Polars.col("a").arr.slice(0, 1, as_array: true))
72
+ # # =>
73
+ # # shape: (2, 1)
74
+ # # ┌───────────────┐
75
+ # # │ a │
76
+ # # │ --- │
77
+ # # │ array[i64, 1] │
78
+ # # ╞═══════════════╡
79
+ # # │ [1] │
80
+ # # │ [4] │
81
+ # # └───────────────┘
82
+ def slice(
83
+ offset,
84
+ length = nil,
85
+ as_array: false
86
+ )
87
+ offset = Utils.parse_into_expression(offset)
88
+ length = !length.nil? ? Utils.parse_into_expression(length) : nil
89
+ Utils.wrap_expr(_rbexpr.arr_slice(offset, length, as_array))
90
+ end
91
+
92
+ # Get the first `n` elements of the sub-arrays.
93
+ #
94
+ # @param n [Integer]
95
+ # Number of values to return for each sublist.
96
+ # @param as_array [Boolean]
97
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
98
+ # If true `n` must be a constant value.
99
+ #
100
+ # @return [Expr]
101
+ #
102
+ # @example
103
+ # df = Polars::DataFrame.new(
104
+ # {"a" => [[1, 2], [4, 3]]},
105
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
106
+ # )
107
+ # df.select(Polars.col("a").arr.head(1))
108
+ # # =>
109
+ # # shape: (2, 1)
110
+ # # ┌───────────┐
111
+ # # │ a │
112
+ # # │ --- │
113
+ # # │ list[i64] │
114
+ # # ╞═══════════╡
115
+ # # │ [1] │
116
+ # # │ [4] │
117
+ # # └───────────┘
118
+ #
119
+ # @example
120
+ # df = Polars::DataFrame.new(
121
+ # {"a" => [[1, 2], [4, 3]]},
122
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
123
+ # )
124
+ # df.select(Polars.col("a").arr.head(1, as_array: true))
125
+ # # =>
126
+ # # shape: (2, 1)
127
+ # # ┌───────────────┐
128
+ # # │ a │
129
+ # # │ --- │
130
+ # # │ array[i64, 1] │
131
+ # # ╞═══════════════╡
132
+ # # │ [1] │
133
+ # # │ [4] │
134
+ # # └───────────────┘
135
+ def head(n = 5, as_array: false)
136
+ slice(0, n, as_array: as_array)
137
+ end
138
+
139
+ # Slice the last `n` values of every sublist.
140
+ #
141
+ # @param n [Integer]
142
+ # Number of values to return for each sublist.
143
+ # @param as_array [Boolean]
144
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
145
+ # If true `n` must be a constant value.
146
+ #
147
+ # @return [Expr]
148
+ #
149
+ # @example
150
+ # df = Polars::DataFrame.new(
151
+ # {"a" => [[1, 2], [4, 3]]},
152
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
153
+ # )
154
+ # df.select(Polars.col("a").arr.tail(1))
155
+ # # =>
156
+ # # shape: (2, 1)
157
+ # # ┌───────────┐
158
+ # # │ a │
159
+ # # │ --- │
160
+ # # │ list[i64] │
161
+ # # ╞═══════════╡
162
+ # # │ [2] │
163
+ # # │ [3] │
164
+ # # └───────────┘
165
+ #
166
+ # @example
167
+ # df = Polars::DataFrame.new(
168
+ # {"a" => [[1, 2], [4, 3]]},
169
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
170
+ # )
171
+ # df.select(Polars.col("a").arr.tail(1, as_array: true))
172
+ # # =>
173
+ # # shape: (2, 1)
174
+ # # ┌───────────────┐
175
+ # # │ a │
176
+ # # │ --- │
177
+ # # │ array[i64, 1] │
178
+ # # ╞═══════════════╡
179
+ # # │ [2] │
180
+ # # │ [3] │
181
+ # # └───────────────┘
182
+ def tail(n = 5, as_array: false)
183
+ n = Utils.parse_into_expression(n)
184
+ Utils.wrap_expr(_rbexpr.arr_tail(n, as_array))
185
+ end
186
+
12
187
  # Compute the min values of the sub-arrays.
13
188
  #
14
189
  # @return [Expr]
@@ -30,7 +205,7 @@ module Polars
30
205
  # # │ 3 │
31
206
  # # └─────┘
32
207
  def min
33
- Utils.wrap_expr(_rbexpr.array_min)
208
+ Utils.wrap_expr(_rbexpr.arr_min)
34
209
  end
35
210
 
36
211
  # Compute the max values of the sub-arrays.
@@ -54,7 +229,7 @@ module Polars
54
229
  # # │ 4 │
55
230
  # # └─────┘
56
231
  def max
57
- Utils.wrap_expr(_rbexpr.array_max)
232
+ Utils.wrap_expr(_rbexpr.arr_max)
58
233
  end
59
234
 
60
235
  # Compute the sum values of the sub-arrays.
@@ -78,7 +253,103 @@ module Polars
78
253
  # # │ 7 │
79
254
  # # └─────┘
80
255
  def sum
81
- Utils.wrap_expr(_rbexpr.array_sum)
256
+ Utils.wrap_expr(_rbexpr.arr_sum)
257
+ end
258
+
259
+ # Compute the std of the values of the sub-arrays.
260
+ #
261
+ # @return [Expr]
262
+ #
263
+ # @example
264
+ # df = Polars::DataFrame.new(
265
+ # {"a" => [[1, 2], [4, 3]]},
266
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
267
+ # )
268
+ # df.select(Polars.col("a").arr.std)
269
+ # # =>
270
+ # # shape: (2, 1)
271
+ # # ┌──────────┐
272
+ # # │ a │
273
+ # # │ --- │
274
+ # # │ f64 │
275
+ # # ╞══════════╡
276
+ # # │ 0.707107 │
277
+ # # │ 0.707107 │
278
+ # # └──────────┘
279
+ def std(ddof: 1)
280
+ Utils.wrap_expr(_rbexpr.arr_std(ddof))
281
+ end
282
+
283
+ # Compute the var of the values of the sub-arrays.
284
+ #
285
+ # @return [Expr]
286
+ #
287
+ # @example
288
+ # df = Polars::DataFrame.new(
289
+ # {"a" => [[1, 2], [4, 3]]},
290
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
291
+ # )
292
+ # df.select(Polars.col("a").arr.var)
293
+ # # =>
294
+ # # shape: (2, 1)
295
+ # # ┌─────┐
296
+ # # │ a │
297
+ # # │ --- │
298
+ # # │ f64 │
299
+ # # ╞═════╡
300
+ # # │ 0.5 │
301
+ # # │ 0.5 │
302
+ # # └─────┘
303
+ def var(ddof: 1)
304
+ Utils.wrap_expr(_rbexpr.arr_var(ddof))
305
+ end
306
+
307
+ # Compute the mean of the values of the sub-arrays.
308
+ #
309
+ # @return [Expr]
310
+ #
311
+ # @example
312
+ # df = Polars::DataFrame.new(
313
+ # {"a" => [[1, 2, 3], [1, 1, 16]]},
314
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
315
+ # )
316
+ # df.select(Polars.col("a").arr.mean)
317
+ # # =>
318
+ # # shape: (2, 1)
319
+ # # ┌─────┐
320
+ # # │ a │
321
+ # # │ --- │
322
+ # # │ f64 │
323
+ # # ╞═════╡
324
+ # # │ 2.0 │
325
+ # # │ 6.0 │
326
+ # # └─────┘
327
+ def mean
328
+ Utils.wrap_expr(_rbexpr.arr_mean)
329
+ end
330
+
331
+ # Compute the median of the values of the sub-arrays.
332
+ #
333
+ # @return [Expr]
334
+ #
335
+ # @example
336
+ # df = Polars::DataFrame.new(
337
+ # {"a" => [[1, 2], [4, 3]]},
338
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
339
+ # )
340
+ # df.select(Polars.col("a").arr.median)
341
+ # # =>
342
+ # # shape: (2, 1)
343
+ # # ┌─────┐
344
+ # # │ a │
345
+ # # │ --- │
346
+ # # │ f64 │
347
+ # # ╞═════╡
348
+ # # │ 1.5 │
349
+ # # │ 3.5 │
350
+ # # └─────┘
351
+ def median
352
+ Utils.wrap_expr(_rbexpr.arr_median)
82
353
  end
83
354
 
84
355
  # Get the unique/distinct values in the array.
@@ -109,6 +380,32 @@ module Polars
109
380
  Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
110
381
  end
111
382
 
383
+ # Count the number of unique values in every sub-arrays.
384
+ #
385
+ # @return [Expr]
386
+ #
387
+ # @example
388
+ # df = Polars::DataFrame.new(
389
+ # {
390
+ # "a" => [[1, 1, 2], [2, 3, 4]],
391
+ # },
392
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
393
+ # )
394
+ # df.with_columns(n_unique: Polars.col("a").arr.n_unique)
395
+ # # =>
396
+ # # shape: (2, 2)
397
+ # # ┌───────────────┬──────────┐
398
+ # # │ a ┆ n_unique │
399
+ # # │ --- ┆ --- │
400
+ # # │ array[i64, 3] ┆ u32 │
401
+ # # ╞═══════════════╪══════════╡
402
+ # # │ [1, 1, 2] ┆ 2 │
403
+ # # │ [2, 3, 4] ┆ 3 │
404
+ # # └───────────────┴──────────┘
405
+ def n_unique
406
+ Utils.wrap_expr(_rbexpr.arr_n_unique)
407
+ end
408
+
112
409
  # Convert an Array column into a List column with the same inner data type.
113
410
  #
114
411
  # @return [Expr]
@@ -535,5 +832,87 @@ module Polars
535
832
  element = Utils.parse_into_expression(element, str_as_lit: true)
536
833
  Utils.wrap_expr(_rbexpr.arr_count_matches(element))
537
834
  end
835
+
836
+ # Convert the Series of type `Array` to a Series of type `Struct`.
837
+ #
838
+ # @param fields [Object]
839
+ # If the name and number of the desired fields is known in advance
840
+ # a list of field names can be given, which will be assigned by index.
841
+ # Otherwise, to dynamically assign field names, a custom function can be
842
+ # used; if neither are set, fields will be `field_0, field_1 .. field_n`.
843
+ #
844
+ # @return [Expr]
845
+ #
846
+ # @example Convert array to struct with default field name assignment:
847
+ # df = Polars::DataFrame.new(
848
+ # {"n" => [[0, 1, 2], [3, 4, 5]]}, schema: {"n" => Polars::Array.new(Polars::Int8, 3)}
849
+ # )
850
+ # df.with_columns(struct: Polars.col("n").arr.to_struct)
851
+ # # =>
852
+ # # shape: (2, 2)
853
+ # # ┌──────────────┬───────────┐
854
+ # # │ n ┆ struct │
855
+ # # │ --- ┆ --- │
856
+ # # │ array[i8, 3] ┆ struct[3] │
857
+ # # ╞══════════════╪═══════════╡
858
+ # # │ [0, 1, 2] ┆ {0,1,2} │
859
+ # # │ [3, 4, 5] ┆ {3,4,5} │
860
+ # # └──────────────┴───────────┘
861
+ def to_struct(fields: nil)
862
+ raise Todo if fields
863
+ if fields.is_a?(Enumerable)
864
+ field_names = fields.to_a
865
+ rbexpr = _rbexpr.arr_to_struct(nil)
866
+ Utils.wrap_expr(rbexpr).struct.rename_fields(field_names)
867
+ else
868
+ rbexpr = _rbexpr.arr_to_struct(fields)
869
+ Utils.wrap_expr(rbexpr)
870
+ end
871
+ end
872
+
873
+ # Shift array values by the given number of indices.
874
+ #
875
+ # @param n [Integer]
876
+ # Number of indices to shift forward. If a negative value is passed, values
877
+ # are shifted in the opposite direction instead.
878
+ #
879
+ # @return [Expr]
880
+ #
881
+ # @note
882
+ # This method is similar to the `LAG` operation in SQL when the value for `n`
883
+ # is positive. With a negative value for `n`, it is similar to `LEAD`.
884
+ #
885
+ # @example By default, array values are shifted forward by one index.
886
+ # df = Polars::DataFrame.new(
887
+ # {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
888
+ # )
889
+ # df.with_columns(shift: Polars.col("a").arr.shift)
890
+ # # =>
891
+ # # shape: (2, 2)
892
+ # # ┌───────────────┬───────────────┐
893
+ # # │ a ┆ shift │
894
+ # # │ --- ┆ --- │
895
+ # # │ array[i64, 3] ┆ array[i64, 3] │
896
+ # # ╞═══════════════╪═══════════════╡
897
+ # # │ [1, 2, 3] ┆ [null, 1, 2] │
898
+ # # │ [4, 5, 6] ┆ [null, 4, 5] │
899
+ # # └───────────────┴───────────────┘
900
+ #
901
+ # @example Pass a negative value to shift in the opposite direction instead.
902
+ # df.with_columns(shift: Polars.col("a").arr.shift(-2))
903
+ # # =>
904
+ # # shape: (2, 2)
905
+ # # ┌───────────────┬─────────────────┐
906
+ # # │ a ┆ shift │
907
+ # # │ --- ┆ --- │
908
+ # # │ array[i64, 3] ┆ array[i64, 3] │
909
+ # # ╞═══════════════╪═════════════════╡
910
+ # # │ [1, 2, 3] ┆ [3, null, null] │
911
+ # # │ [4, 5, 6] ┆ [6, null, null] │
912
+ # # └───────────────┴─────────────────┘
913
+ def shift(n = 1)
914
+ n = Utils.parse_into_expression(n)
915
+ Utils.wrap_expr(_rbexpr.arr_shift(n))
916
+ end
538
917
  end
539
918
  end