polars-df 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/ext/polars/Cargo.toml +18 -8
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion/anyvalue.rs +186 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
- data/ext/polars/src/dataframe.rs +108 -66
- data/ext/polars/src/expr/array.rs +78 -0
- data/ext/polars/src/expr/datetime.rs +29 -58
- data/ext/polars/src/expr/general.rs +83 -36
- data/ext/polars/src/expr/list.rs +58 -6
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +62 -11
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +120 -50
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
- data/ext/polars/src/lib.rs +246 -179
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +35 -4
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +37 -8
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -265,9 +265,9 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
265
265
|
// to the row. Before we mutate the row buf again, the reference is dropped.
|
266
266
|
// we only cannot prove it to the compiler.
|
267
267
|
// we still do this because it saves a Vec allocation in a hot loop.
|
268
|
-
unsafe { &*ptr }
|
268
|
+
Ok(unsafe { &*ptr })
|
269
269
|
}
|
270
|
-
None => &null_row,
|
270
|
+
None => Ok(&null_row),
|
271
271
|
}
|
272
272
|
}
|
273
273
|
Err(e) => panic!("ruby function failed {}", e),
|
@@ -277,22 +277,30 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
277
277
|
// first rows for schema inference
|
278
278
|
let mut buf = Vec::with_capacity(inference_size);
|
279
279
|
buf.push(first_value);
|
280
|
-
|
281
|
-
|
280
|
+
for v in (&mut row_iter).take(inference_size) {
|
281
|
+
buf.push(v?.clone());
|
282
|
+
}
|
283
|
+
|
284
|
+
let schema = rows_to_schema_first_non_null(&buf, Some(50))?;
|
282
285
|
|
283
286
|
if init_null_count > 0 {
|
284
287
|
// Safety: we know the iterators size
|
285
288
|
let iter = unsafe {
|
286
289
|
(0..init_null_count)
|
287
|
-
.map(|_| &null_row)
|
288
|
-
.chain(buf.iter())
|
290
|
+
.map(|_| Ok(&null_row))
|
291
|
+
.chain(buf.iter().map(Ok))
|
289
292
|
.chain(row_iter)
|
290
293
|
.trust_my_length(df.height())
|
291
294
|
};
|
292
|
-
DataFrame::
|
295
|
+
DataFrame::try_from_rows_iter_and_schema(iter, &schema)
|
293
296
|
} else {
|
294
297
|
// Safety: we know the iterators size
|
295
|
-
let iter = unsafe {
|
296
|
-
|
298
|
+
let iter = unsafe {
|
299
|
+
buf.iter()
|
300
|
+
.map(Ok)
|
301
|
+
.chain(row_iter)
|
302
|
+
.trust_my_length(df.height())
|
303
|
+
};
|
304
|
+
DataFrame::try_from_rows_iter_and_schema(iter, &schema)
|
297
305
|
}
|
298
306
|
}
|
@@ -4,6 +4,26 @@ use crate::{RbResult, RbSeries, RbValueError};
|
|
4
4
|
use magnus::{IntoValue, Value};
|
5
5
|
|
6
6
|
impl RbSeries {
|
7
|
+
pub fn any(&self, ignore_nulls: bool) -> RbResult<Option<bool>> {
|
8
|
+
let binding = self.series.borrow();
|
9
|
+
let s = binding.bool().map_err(RbPolarsErr::from)?;
|
10
|
+
Ok(if ignore_nulls {
|
11
|
+
Some(s.any())
|
12
|
+
} else {
|
13
|
+
s.any_kleene()
|
14
|
+
})
|
15
|
+
}
|
16
|
+
|
17
|
+
pub fn all(&self, ignore_nulls: bool) -> RbResult<Option<bool>> {
|
18
|
+
let binding = self.series.borrow();
|
19
|
+
let s = binding.bool().map_err(RbPolarsErr::from)?;
|
20
|
+
Ok(if ignore_nulls {
|
21
|
+
Some(s.all())
|
22
|
+
} else {
|
23
|
+
s.all_kleene()
|
24
|
+
})
|
25
|
+
}
|
26
|
+
|
7
27
|
pub fn arg_max(&self) -> Option<usize> {
|
8
28
|
self.series.borrow().arg_max()
|
9
29
|
}
|
@@ -74,6 +74,24 @@ impl RbSeries {
|
|
74
74
|
}
|
75
75
|
}
|
76
76
|
|
77
|
+
pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
|
78
|
+
let binding = self.series.borrow();
|
79
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
80
|
+
Ok(ca.uses_lexical_ordering())
|
81
|
+
}
|
82
|
+
|
83
|
+
pub fn cat_is_local(&self) -> RbResult<bool> {
|
84
|
+
let binding = self.series.borrow();
|
85
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
86
|
+
Ok(ca.get_rev_map().is_local())
|
87
|
+
}
|
88
|
+
|
89
|
+
pub fn cat_to_local(&self) -> RbResult<Self> {
|
90
|
+
let binding = self.series.borrow();
|
91
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
92
|
+
Ok(ca.to_local().into_series().into())
|
93
|
+
}
|
94
|
+
|
77
95
|
pub fn estimated_size(&self) -> usize {
|
78
96
|
self.series.borrow().estimated_size()
|
79
97
|
}
|
@@ -215,8 +233,18 @@ impl RbSeries {
|
|
215
233
|
}
|
216
234
|
}
|
217
235
|
|
218
|
-
pub fn sort(&self,
|
219
|
-
(self
|
236
|
+
pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
|
237
|
+
Ok(self
|
238
|
+
.series
|
239
|
+
.borrow_mut()
|
240
|
+
.sort(
|
241
|
+
SortOptions::default()
|
242
|
+
.with_order_descending(descending)
|
243
|
+
.with_nulls_last(nulls_last)
|
244
|
+
.with_multithreaded(multithreaded),
|
245
|
+
)
|
246
|
+
.map_err(RbPolarsErr::from)?
|
247
|
+
.into())
|
220
248
|
}
|
221
249
|
|
222
250
|
pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
|
@@ -313,7 +341,7 @@ impl RbSeries {
|
|
313
341
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
314
342
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
315
343
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
316
|
-
DataType::Categorical(_, _) => {
|
344
|
+
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
317
345
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
318
346
|
}
|
319
347
|
DataType::Object(_, _) => {
|
@@ -415,7 +443,10 @@ impl RbSeries {
|
|
415
443
|
RArray::from_iter(NullIter { iter, n }).into_value()
|
416
444
|
}
|
417
445
|
DataType::Unknown => {
|
418
|
-
panic!("to_a not implemented for
|
446
|
+
panic!("to_a not implemented for unknown")
|
447
|
+
}
|
448
|
+
DataType::BinaryOffset => {
|
449
|
+
unreachable!()
|
419
450
|
}
|
420
451
|
};
|
421
452
|
rblist
|
data/lib/polars/array_expr.rb
CHANGED
@@ -80,5 +80,458 @@ module Polars
|
|
80
80
|
def sum
|
81
81
|
Utils.wrap_expr(_rbexpr.array_sum)
|
82
82
|
end
|
83
|
+
|
84
|
+
# Get the unique/distinct values in the array.
|
85
|
+
#
|
86
|
+
# @param maintain_order [Boolean]
|
87
|
+
# Maintain order of data. This requires more work.
|
88
|
+
#
|
89
|
+
# @return [Expr]
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# df = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "a" => [[1, 1, 2]]
|
95
|
+
# },
|
96
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
97
|
+
# )
|
98
|
+
# df.select(Polars.col("a").arr.unique)
|
99
|
+
# # =>
|
100
|
+
# # shape: (1, 1)
|
101
|
+
# # ┌───────────┐
|
102
|
+
# # │ a │
|
103
|
+
# # │ --- │
|
104
|
+
# # │ list[i64] │
|
105
|
+
# # ╞═══════════╡
|
106
|
+
# # │ [1, 2] │
|
107
|
+
# # └───────────┘
|
108
|
+
def unique(maintain_order: false)
|
109
|
+
Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Convert an Array column into a List column with the same inner data type.
|
113
|
+
#
|
114
|
+
# @return [Expr]
|
115
|
+
#
|
116
|
+
# @example
|
117
|
+
# df = Polars::DataFrame.new(
|
118
|
+
# {"a" => [[1, 2], [3, 4]]},
|
119
|
+
# schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
|
120
|
+
# )
|
121
|
+
# df.select(Polars.col("a").arr.to_list)
|
122
|
+
# # =>
|
123
|
+
# # shape: (2, 1)
|
124
|
+
# # ┌──────────┐
|
125
|
+
# # │ a │
|
126
|
+
# # │ --- │
|
127
|
+
# # │ list[i8] │
|
128
|
+
# # ╞══════════╡
|
129
|
+
# # │ [1, 2] │
|
130
|
+
# # │ [3, 4] │
|
131
|
+
# # └──────────┘
|
132
|
+
def to_list
|
133
|
+
Utils.wrap_expr(_rbexpr.arr_to_list)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Evaluate whether any boolean value is true for every subarray.
|
137
|
+
#
|
138
|
+
# @return [Expr]
|
139
|
+
#
|
140
|
+
# @example
|
141
|
+
# df = Polars::DataFrame.new(
|
142
|
+
# {
|
143
|
+
# "a": [
|
144
|
+
# [true, true],
|
145
|
+
# [false, true],
|
146
|
+
# [false, false],
|
147
|
+
# [nil, nil],
|
148
|
+
# nil
|
149
|
+
# ]
|
150
|
+
# },
|
151
|
+
# schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
|
152
|
+
# )
|
153
|
+
# df.with_columns(any: Polars.col("a").arr.any)
|
154
|
+
# # =>
|
155
|
+
# # shape: (5, 2)
|
156
|
+
# # ┌────────────────┬───────┐
|
157
|
+
# # │ a ┆ any │
|
158
|
+
# # │ --- ┆ --- │
|
159
|
+
# # │ array[bool, 2] ┆ bool │
|
160
|
+
# # ╞════════════════╪═══════╡
|
161
|
+
# # │ [true, true] ┆ true │
|
162
|
+
# # │ [false, true] ┆ true │
|
163
|
+
# # │ [false, false] ┆ false │
|
164
|
+
# # │ [null, null] ┆ false │
|
165
|
+
# # │ null ┆ null │
|
166
|
+
# # └────────────────┴───────┘
|
167
|
+
def any
|
168
|
+
Utils.wrap_expr(_rbexpr.arr_any)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Evaluate whether all boolean values are true for every subarray.
|
172
|
+
#
|
173
|
+
# @return [Expr]
|
174
|
+
#
|
175
|
+
# @example
|
176
|
+
# df = Polars::DataFrame.new(
|
177
|
+
# {
|
178
|
+
# "a": [
|
179
|
+
# [true, true],
|
180
|
+
# [false, true],
|
181
|
+
# [false, false],
|
182
|
+
# [nil, nil],
|
183
|
+
# nil
|
184
|
+
# ]
|
185
|
+
# },
|
186
|
+
# schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
|
187
|
+
# )
|
188
|
+
# df.with_columns(all: Polars.col("a").arr.all)
|
189
|
+
# # =>
|
190
|
+
# # shape: (5, 2)
|
191
|
+
# # ┌────────────────┬───────┐
|
192
|
+
# # │ a ┆ all │
|
193
|
+
# # │ --- ┆ --- │
|
194
|
+
# # │ array[bool, 2] ┆ bool │
|
195
|
+
# # ╞════════════════╪═══════╡
|
196
|
+
# # │ [true, true] ┆ true │
|
197
|
+
# # │ [false, true] ┆ false │
|
198
|
+
# # │ [false, false] ┆ false │
|
199
|
+
# # │ [null, null] ┆ true │
|
200
|
+
# # │ null ┆ null │
|
201
|
+
# # └────────────────┴───────┘
|
202
|
+
def all
|
203
|
+
Utils.wrap_expr(_rbexpr.arr_all)
|
204
|
+
end
|
205
|
+
|
206
|
+
# Sort the arrays in this column.
|
207
|
+
#
|
208
|
+
# @param descending [Boolean]
|
209
|
+
# Sort in descending order.
|
210
|
+
# @param nulls_last [Boolean]
|
211
|
+
# Place null values last.
|
212
|
+
#
|
213
|
+
# @return [Expr]
|
214
|
+
#
|
215
|
+
# @example
|
216
|
+
# df = Polars::DataFrame.new(
|
217
|
+
# {
|
218
|
+
# "a" => [[3, 2, 1], [9, 1, 2]],
|
219
|
+
# },
|
220
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
221
|
+
# )
|
222
|
+
# df.with_columns(sort: Polars.col("a").arr.sort)
|
223
|
+
# # =>
|
224
|
+
# # shape: (2, 2)
|
225
|
+
# # ┌───────────────┬───────────────┐
|
226
|
+
# # │ a ┆ sort │
|
227
|
+
# # │ --- ┆ --- │
|
228
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
229
|
+
# # ╞═══════════════╪═══════════════╡
|
230
|
+
# # │ [3, 2, 1] ┆ [1, 2, 3] │
|
231
|
+
# # │ [9, 1, 2] ┆ [1, 2, 9] │
|
232
|
+
# # └───────────────┴───────────────┘
|
233
|
+
#
|
234
|
+
# @example
|
235
|
+
# df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
|
236
|
+
# # =>
|
237
|
+
# # shape: (2, 2)
|
238
|
+
# # ┌───────────────┬───────────────┐
|
239
|
+
# # │ a ┆ sort │
|
240
|
+
# # │ --- ┆ --- │
|
241
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
242
|
+
# # ╞═══════════════╪═══════════════╡
|
243
|
+
# # │ [3, 2, 1] ┆ [3, 2, 1] │
|
244
|
+
# # │ [9, 1, 2] ┆ [9, 2, 1] │
|
245
|
+
# # └───────────────┴───────────────┘
|
246
|
+
def sort(descending: false, nulls_last: false)
|
247
|
+
Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
|
248
|
+
end
|
249
|
+
|
250
|
+
# Reverse the arrays in this column.
|
251
|
+
#
|
252
|
+
# @return [Expr]
|
253
|
+
#
|
254
|
+
# @example
|
255
|
+
# df = Polars::DataFrame.new(
|
256
|
+
# {
|
257
|
+
# "a" => [[3, 2, 1], [9, 1, 2]]
|
258
|
+
# },
|
259
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
260
|
+
# )
|
261
|
+
# df.with_columns(reverse: Polars.col("a").arr.reverse)
|
262
|
+
# # =>
|
263
|
+
# # shape: (2, 2)
|
264
|
+
# # ┌───────────────┬───────────────┐
|
265
|
+
# # │ a ┆ reverse │
|
266
|
+
# # │ --- ┆ --- │
|
267
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
268
|
+
# # ╞═══════════════╪═══════════════╡
|
269
|
+
# # │ [3, 2, 1] ┆ [1, 2, 3] │
|
270
|
+
# # │ [9, 1, 2] ┆ [2, 1, 9] │
|
271
|
+
# # └───────────────┴───────────────┘
|
272
|
+
def reverse
|
273
|
+
Utils.wrap_expr(_rbexpr.arr_reverse)
|
274
|
+
end
|
275
|
+
|
276
|
+
# Retrieve the index of the minimal value in every sub-array.
|
277
|
+
#
|
278
|
+
# @return [Expr]
|
279
|
+
#
|
280
|
+
# @example
|
281
|
+
# df = Polars::DataFrame.new(
|
282
|
+
# {
|
283
|
+
# "a" => [[1, 2], [2, 1]]
|
284
|
+
# },
|
285
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
286
|
+
# )
|
287
|
+
# df.with_columns(arg_min: Polars.col("a").arr.arg_min)
|
288
|
+
# # =>
|
289
|
+
# # shape: (2, 2)
|
290
|
+
# # ┌───────────────┬─────────┐
|
291
|
+
# # │ a ┆ arg_min │
|
292
|
+
# # │ --- ┆ --- │
|
293
|
+
# # │ array[i64, 2] ┆ u32 │
|
294
|
+
# # ╞═══════════════╪═════════╡
|
295
|
+
# # │ [1, 2] ┆ 0 │
|
296
|
+
# # │ [2, 1] ┆ 1 │
|
297
|
+
# # └───────────────┴─────────┘
|
298
|
+
def arg_min
|
299
|
+
Utils.wrap_expr(_rbexpr.arr_arg_min)
|
300
|
+
end
|
301
|
+
|
302
|
+
# Retrieve the index of the maximum value in every sub-array.
|
303
|
+
#
|
304
|
+
# @return [Expr]
|
305
|
+
#
|
306
|
+
# @example
|
307
|
+
# df = Polars::DataFrame.new(
|
308
|
+
# {
|
309
|
+
# "a" => [[1, 2], [2, 1]]
|
310
|
+
# },
|
311
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
312
|
+
# )
|
313
|
+
# df.with_columns(arg_max: Polars.col("a").arr.arg_max)
|
314
|
+
# # =>
|
315
|
+
# # shape: (2, 2)
|
316
|
+
# # ┌───────────────┬─────────┐
|
317
|
+
# # │ a ┆ arg_max │
|
318
|
+
# # │ --- ┆ --- │
|
319
|
+
# # │ array[i64, 2] ┆ u32 │
|
320
|
+
# # ╞═══════════════╪═════════╡
|
321
|
+
# # │ [1, 2] ┆ 1 │
|
322
|
+
# # │ [2, 1] ┆ 0 │
|
323
|
+
# # └───────────────┴─────────┘
|
324
|
+
def arg_max
|
325
|
+
Utils.wrap_expr(_rbexpr.arr_arg_max)
|
326
|
+
end
|
327
|
+
|
328
|
+
# Get the value by index in the sub-arrays.
|
329
|
+
#
|
330
|
+
# So index `0` would return the first item of every sublist
|
331
|
+
# and index `-1` would return the last item of every sublist
|
332
|
+
# if an index is out of bounds, it will return a `nil`.
|
333
|
+
#
|
334
|
+
# @param index [Integer]
|
335
|
+
# Index to return per sub-array
|
336
|
+
# @param null_on_oob [Boolean]
|
337
|
+
# Behavior if an index is out of bounds:
|
338
|
+
# true -> set as null
|
339
|
+
# false -> raise an error
|
340
|
+
#
|
341
|
+
# @return [Expr]
|
342
|
+
#
|
343
|
+
# @example
|
344
|
+
# df = Polars::DataFrame.new(
|
345
|
+
# {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
|
346
|
+
# schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
|
347
|
+
# )
|
348
|
+
# df.with_columns(get: Polars.col("arr").arr.get("idx"))
|
349
|
+
# # =>
|
350
|
+
# # shape: (3, 3)
|
351
|
+
# # ┌───────────────┬─────┬──────┐
|
352
|
+
# # │ arr ┆ idx ┆ get │
|
353
|
+
# # │ --- ┆ --- ┆ --- │
|
354
|
+
# # │ array[i32, 3] ┆ i32 ┆ i32 │
|
355
|
+
# # ╞═══════════════╪═════╪══════╡
|
356
|
+
# # │ [1, 2, 3] ┆ 1 ┆ 2 │
|
357
|
+
# # │ [4, 5, 6] ┆ -2 ┆ 5 │
|
358
|
+
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
|
+
# # └───────────────┴─────┴──────┘
|
360
|
+
def get(index, null_on_oob: true)
|
361
|
+
index = Utils.parse_as_expression(index)
|
362
|
+
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
|
+
end
|
364
|
+
|
365
|
+
# Get the first value of the sub-arrays.
|
366
|
+
#
|
367
|
+
# @return [Expr]
|
368
|
+
#
|
369
|
+
# @example
|
370
|
+
# df = Polars::DataFrame.new(
|
371
|
+
# {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
|
372
|
+
# schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
|
373
|
+
# )
|
374
|
+
# df.with_columns(first: Polars.col("a").arr.first)
|
375
|
+
# # =>
|
376
|
+
# # shape: (3, 2)
|
377
|
+
# # ┌───────────────┬───────┐
|
378
|
+
# # │ a ┆ first │
|
379
|
+
# # │ --- ┆ --- │
|
380
|
+
# # │ array[i32, 3] ┆ i32 │
|
381
|
+
# # ╞═══════════════╪═══════╡
|
382
|
+
# # │ [1, 2, 3] ┆ 1 │
|
383
|
+
# # │ [4, 5, 6] ┆ 4 │
|
384
|
+
# # │ [7, 8, 9] ┆ 7 │
|
385
|
+
# # └───────────────┴───────┘
|
386
|
+
def first
|
387
|
+
get(0)
|
388
|
+
end
|
389
|
+
|
390
|
+
# Get the last value of the sub-arrays.
|
391
|
+
#
|
392
|
+
# @return [Expr]
|
393
|
+
#
|
394
|
+
# @example
|
395
|
+
# df = Polars::DataFrame.new(
|
396
|
+
# {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
|
397
|
+
# schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
|
398
|
+
# )
|
399
|
+
# df.with_columns(last: Polars.col("a").arr.last)
|
400
|
+
# # =>
|
401
|
+
# # shape: (3, 2)
|
402
|
+
# # ┌───────────────┬──────┐
|
403
|
+
# # │ a ┆ last │
|
404
|
+
# # │ --- ┆ --- │
|
405
|
+
# # │ array[i32, 3] ┆ i32 │
|
406
|
+
# # ╞═══════════════╪══════╡
|
407
|
+
# # │ [1, 2, 3] ┆ 3 │
|
408
|
+
# # │ [4, 5, 6] ┆ 6 │
|
409
|
+
# # │ [7, 8, 9] ┆ 9 │
|
410
|
+
# # └───────────────┴──────┘
|
411
|
+
def last
|
412
|
+
get(-1)
|
413
|
+
end
|
414
|
+
|
415
|
+
# Join all string items in a sub-array and place a separator between them.
|
416
|
+
#
|
417
|
+
# This errors if inner type of array `!= String`.
|
418
|
+
#
|
419
|
+
# @param separator [String]
|
420
|
+
# string to separate the items with
|
421
|
+
# @param ignore_nulls [Boolean]
|
422
|
+
# Ignore null values (default).
|
423
|
+
#
|
424
|
+
# If set to `false`, null values will be propagated.
|
425
|
+
# If the sub-list contains any null values, the output is `nil`.
|
426
|
+
#
|
427
|
+
# @return [Expr]
|
428
|
+
#
|
429
|
+
# @example
|
430
|
+
# df = Polars::DataFrame.new(
|
431
|
+
# {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
|
432
|
+
# schema: {
|
433
|
+
# "s" => Polars::Array.new(Polars::String, 2),
|
434
|
+
# "separator" => Polars::String
|
435
|
+
# }
|
436
|
+
# )
|
437
|
+
# df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
|
438
|
+
# # =>
|
439
|
+
# # shape: (2, 3)
|
440
|
+
# # ┌───────────────┬───────────┬──────┐
|
441
|
+
# # │ s ┆ separator ┆ join │
|
442
|
+
# # │ --- ┆ --- ┆ --- │
|
443
|
+
# # │ array[str, 2] ┆ str ┆ str │
|
444
|
+
# # ╞═══════════════╪═══════════╪══════╡
|
445
|
+
# # │ ["a", "b"] ┆ * ┆ a*b │
|
446
|
+
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
|
+
# # └───────────────┴───────────┴──────┘
|
448
|
+
def join(separator, ignore_nulls: true)
|
449
|
+
separator = Utils.parse_as_expression(separator, str_as_lit: true)
|
450
|
+
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
|
+
end
|
452
|
+
|
453
|
+
# Returns a column with a separate row for every array element.
|
454
|
+
#
|
455
|
+
# @return [Expr]
|
456
|
+
#
|
457
|
+
# @example
|
458
|
+
# df = Polars::DataFrame.new(
|
459
|
+
# {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
460
|
+
# )
|
461
|
+
# df.select(Polars.col("a").arr.explode)
|
462
|
+
# # =>
|
463
|
+
# # shape: (6, 1)
|
464
|
+
# # ┌─────┐
|
465
|
+
# # │ a │
|
466
|
+
# # │ --- │
|
467
|
+
# # │ i64 │
|
468
|
+
# # ╞═════╡
|
469
|
+
# # │ 1 │
|
470
|
+
# # │ 2 │
|
471
|
+
# # │ 3 │
|
472
|
+
# # │ 4 │
|
473
|
+
# # │ 5 │
|
474
|
+
# # │ 6 │
|
475
|
+
# # └─────┘
|
476
|
+
def explode
|
477
|
+
Utils.wrap_expr(_rbexpr.explode)
|
478
|
+
end
|
479
|
+
|
480
|
+
# Check if sub-arrays contain the given item.
|
481
|
+
#
|
482
|
+
# @param item [Object]
|
483
|
+
# Item that will be checked for membership
|
484
|
+
#
|
485
|
+
# @return [Expr]
|
486
|
+
#
|
487
|
+
# @example
|
488
|
+
# df = Polars::DataFrame.new(
|
489
|
+
# {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
|
490
|
+
# schema: {"a" => Polars::Array.new(Polars::String, 2)}
|
491
|
+
# )
|
492
|
+
# df.with_columns(contains: Polars.col("a").arr.contains("a"))
|
493
|
+
# # =>
|
494
|
+
# # shape: (3, 2)
|
495
|
+
# # ┌───────────────┬──────────┐
|
496
|
+
# # │ a ┆ contains │
|
497
|
+
# # │ --- ┆ --- │
|
498
|
+
# # │ array[str, 2] ┆ bool │
|
499
|
+
# # ╞═══════════════╪══════════╡
|
500
|
+
# # │ ["a", "b"] ┆ true │
|
501
|
+
# # │ ["x", "y"] ┆ false │
|
502
|
+
# # │ ["a", "c"] ┆ true │
|
503
|
+
# # └───────────────┴──────────┘
|
504
|
+
def contains(item)
|
505
|
+
item = Utils.parse_as_expression(item, str_as_lit: true)
|
506
|
+
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
|
+
end
|
508
|
+
|
509
|
+
# Count how often the value produced by `element` occurs.
|
510
|
+
#
|
511
|
+
# @param element [Object]
|
512
|
+
# An expression that produces a single value
|
513
|
+
#
|
514
|
+
# @return [Expr]
|
515
|
+
#
|
516
|
+
# @example
|
517
|
+
# df = Polars::DataFrame.new(
|
518
|
+
# {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
519
|
+
# )
|
520
|
+
# df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
|
521
|
+
# # =>
|
522
|
+
# # shape: (3, 2)
|
523
|
+
# # ┌───────────────┬────────────────┐
|
524
|
+
# # │ a ┆ number_of_twos │
|
525
|
+
# # │ --- ┆ --- │
|
526
|
+
# # │ array[i64, 2] ┆ u32 │
|
527
|
+
# # ╞═══════════════╪════════════════╡
|
528
|
+
# # │ [1, 2] ┆ 1 │
|
529
|
+
# # │ [1, 1] ┆ 0 │
|
530
|
+
# # │ [2, 2] ┆ 2 │
|
531
|
+
# # └───────────────┴────────────────┘
|
532
|
+
def count_matches(element)
|
533
|
+
element = Utils.parse_as_expression(element, str_as_lit: true)
|
534
|
+
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
|
+
end
|
83
536
|
end
|
84
537
|
end
|