polars-df 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +55 -48
- data/Cargo.toml +3 -0
- data/README.md +12 -0
- data/ext/polars/Cargo.toml +22 -11
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/catalog/unity.rs +96 -94
- data/ext/polars/src/conversion/any_value.rs +26 -30
- data/ext/polars/src/conversion/chunked_array.rs +32 -28
- data/ext/polars/src/conversion/datetime.rs +11 -0
- data/ext/polars/src/conversion/mod.rs +307 -34
- data/ext/polars/src/dataframe/construction.rs +4 -3
- data/ext/polars/src/dataframe/export.rs +17 -15
- data/ext/polars/src/dataframe/general.rs +15 -12
- data/ext/polars/src/dataframe/io.rs +1 -2
- data/ext/polars/src/dataframe/mod.rs +25 -1
- data/ext/polars/src/dataframe/serde.rs +23 -8
- data/ext/polars/src/exceptions.rs +8 -4
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +24 -1
- data/ext/polars/src/expr/datetime.rs +58 -14
- data/ext/polars/src/expr/general.rs +87 -15
- data/ext/polars/src/expr/list.rs +32 -24
- data/ext/polars/src/expr/meta.rs +15 -6
- data/ext/polars/src/expr/mod.rs +3 -0
- data/ext/polars/src/expr/name.rs +19 -14
- data/ext/polars/src/expr/rolling.rs +20 -0
- data/ext/polars/src/expr/serde.rs +28 -0
- data/ext/polars/src/expr/string.rs +64 -10
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +15 -9
- data/ext/polars/src/functions/business.rs +0 -1
- data/ext/polars/src/functions/io.rs +25 -3
- data/ext/polars/src/functions/lazy.rs +11 -6
- data/ext/polars/src/functions/meta.rs +3 -3
- data/ext/polars/src/functions/string_cache.rs +3 -3
- data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
- data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
- data/ext/polars/src/io/mod.rs +6 -0
- data/ext/polars/src/lazyframe/general.rs +59 -9
- data/ext/polars/src/lazyframe/mod.rs +16 -1
- data/ext/polars/src/lazyframe/optflags.rs +58 -0
- data/ext/polars/src/lazyframe/serde.rs +27 -3
- data/ext/polars/src/lib.rs +261 -19
- data/ext/polars/src/map/dataframe.rs +20 -17
- data/ext/polars/src/map/lazy.rs +6 -5
- data/ext/polars/src/map/series.rs +8 -7
- data/ext/polars/src/on_startup.rs +12 -5
- data/ext/polars/src/rb_modules.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +85 -28
- data/ext/polars/src/series/construction.rs +1 -0
- data/ext/polars/src/series/export.rs +37 -33
- data/ext/polars/src/series/general.rs +120 -21
- data/ext/polars/src/series/mod.rs +29 -4
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +794 -27
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +26 -5
- data/lib/polars/date_time_expr.rb +252 -1
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1248 -206
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +14 -1
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/iceberg.rb +27 -0
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +32 -7
- data/lib/polars/io/scan_options.rb +4 -1
- data/lib/polars/lazy_frame.rb +1028 -28
- data/lib/polars/list_expr.rb +217 -17
- data/lib/polars/list_name_space.rb +231 -22
- data/lib/polars/meta_expr.rb +89 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +20 -1
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1053 -54
- data/lib/polars/string_expr.rb +436 -32
- data/lib/polars/string_name_space.rb +736 -50
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +22 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +11 -1
@@ -1,9 +1,12 @@
|
|
1
|
-
use magnus::{Error, IntoValue, Value,
|
1
|
+
use magnus::{Error, IntoValue, RArray, Ruby, Value, value::ReprValue};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars::series::IsSorted;
|
4
|
+
use polars_core::utils::flatten::flatten_series;
|
4
5
|
|
5
6
|
use crate::conversion::*;
|
6
|
-
use crate::
|
7
|
+
use crate::exceptions::RbIndexError;
|
8
|
+
use crate::rb_modules;
|
9
|
+
use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
|
7
10
|
|
8
11
|
impl RbSeries {
|
9
12
|
pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
|
@@ -13,7 +16,6 @@ impl RbSeries {
|
|
13
16
|
Ok(df.into())
|
14
17
|
}
|
15
18
|
|
16
|
-
// TODO add to Ruby
|
17
19
|
pub fn struct_fields(&self) -> RbResult<Vec<String>> {
|
18
20
|
let binding = self.series.borrow();
|
19
21
|
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
@@ -84,8 +86,39 @@ impl RbSeries {
|
|
84
86
|
}
|
85
87
|
}
|
86
88
|
|
87
|
-
pub fn
|
88
|
-
|
89
|
+
pub fn get_index(ruby: &Ruby, rb_self: &Self, index: usize) -> RbResult<Value> {
|
90
|
+
let binding = rb_self.series.borrow();
|
91
|
+
let av = match binding.get(index) {
|
92
|
+
Ok(v) => v,
|
93
|
+
Err(PolarsError::OutOfBounds(err)) => {
|
94
|
+
return Err(RbIndexError::new_err(err.to_string()));
|
95
|
+
}
|
96
|
+
Err(e) => return Err(RbPolarsErr::from(e).into()),
|
97
|
+
};
|
98
|
+
|
99
|
+
match av {
|
100
|
+
AnyValue::List(s) | AnyValue::Array(s, _) => {
|
101
|
+
let rbseries = RbSeries::new(s);
|
102
|
+
rb_modules::pl_utils().funcall("wrap_s", (rbseries,))
|
103
|
+
}
|
104
|
+
_ => Ok(Wrap(av).into_value_with(ruby)),
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn get_index_signed(ruby: &Ruby, rb_self: &Self, index: isize) -> RbResult<Value> {
|
109
|
+
let index = if index < 0 {
|
110
|
+
match rb_self.len().checked_sub(index.unsigned_abs()) {
|
111
|
+
Some(v) => v,
|
112
|
+
None => {
|
113
|
+
return Err(RbIndexError::new_err(
|
114
|
+
polars_err!(oob = index, rb_self.len()).to_string(),
|
115
|
+
));
|
116
|
+
}
|
117
|
+
}
|
118
|
+
} else {
|
119
|
+
usize::try_from(index).unwrap()
|
120
|
+
};
|
121
|
+
Self::get_index(ruby, rb_self, index)
|
89
122
|
}
|
90
123
|
|
91
124
|
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
@@ -115,16 +148,17 @@ impl RbSeries {
|
|
115
148
|
self.series.borrow_mut().rename(name.into());
|
116
149
|
}
|
117
150
|
|
118
|
-
pub fn dtype(&
|
119
|
-
Wrap(
|
151
|
+
pub fn dtype(ruby: &Ruby, rb_self: &Self) -> Value {
|
152
|
+
Wrap(rb_self.series.borrow().dtype().clone()).into_value_with(ruby)
|
120
153
|
}
|
121
154
|
|
122
|
-
pub fn inner_dtype(&
|
123
|
-
|
155
|
+
pub fn inner_dtype(ruby: &Ruby, rb_self: &Self) -> Option<Value> {
|
156
|
+
rb_self
|
157
|
+
.series
|
124
158
|
.borrow()
|
125
159
|
.dtype()
|
126
160
|
.inner_dtype()
|
127
|
-
.map(|dt| Wrap(dt.clone()).
|
161
|
+
.map(|dt| Wrap(dt.clone()).into_value_with(ruby))
|
128
162
|
}
|
129
163
|
|
130
164
|
pub fn set_sorted_flag(&self, descending: bool) -> Self {
|
@@ -141,11 +175,11 @@ impl RbSeries {
|
|
141
175
|
self.series.borrow().n_chunks()
|
142
176
|
}
|
143
177
|
|
144
|
-
pub fn append(&
|
145
|
-
let mut binding =
|
178
|
+
pub fn append(ruby: &Ruby, rb_self: &Self, other: &RbSeries) -> RbResult<()> {
|
179
|
+
let mut binding = rb_self.series.borrow_mut();
|
146
180
|
let res = binding.append(&other.series.borrow());
|
147
181
|
if let Err(e) = res {
|
148
|
-
Err(Error::new(
|
182
|
+
Err(Error::new(ruby.exception_runtime_error(), e.to_string()))
|
149
183
|
} else {
|
150
184
|
Ok(())
|
151
185
|
}
|
@@ -159,22 +193,30 @@ impl RbSeries {
|
|
159
193
|
Ok(())
|
160
194
|
}
|
161
195
|
|
162
|
-
pub fn new_from_index(
|
163
|
-
|
164
|
-
|
196
|
+
pub fn new_from_index(
|
197
|
+
ruby: &Ruby,
|
198
|
+
rb_self: &Self,
|
199
|
+
index: usize,
|
200
|
+
length: usize,
|
201
|
+
) -> RbResult<Self> {
|
202
|
+
if index >= rb_self.series.borrow().len() {
|
203
|
+
Err(Error::new(
|
204
|
+
ruby.exception_arg_error(),
|
205
|
+
"index is out of bounds",
|
206
|
+
))
|
165
207
|
} else {
|
166
|
-
Ok(
|
208
|
+
Ok(rb_self.series.borrow().new_from_index(index, length).into())
|
167
209
|
}
|
168
210
|
}
|
169
211
|
|
170
|
-
pub fn filter(&
|
212
|
+
pub fn filter(ruby: &Ruby, rb_self: &Self, filter: &RbSeries) -> RbResult<Self> {
|
171
213
|
let filter_series = &filter.series.borrow();
|
172
214
|
if let Ok(ca) = filter_series.bool() {
|
173
|
-
let series =
|
215
|
+
let series = rb_self.series.borrow().filter(ca).unwrap();
|
174
216
|
Ok(series.into())
|
175
217
|
} else {
|
176
218
|
Err(Error::new(
|
177
|
-
|
219
|
+
ruby.exception_runtime_error(),
|
178
220
|
"Expected a boolean mask".to_string(),
|
179
221
|
))
|
180
222
|
}
|
@@ -279,12 +321,42 @@ impl RbSeries {
|
|
279
321
|
}
|
280
322
|
}
|
281
323
|
|
282
|
-
pub fn
|
324
|
+
pub fn not_(&self) -> RbResult<Self> {
|
283
325
|
let binding = self.series.borrow();
|
284
326
|
let bool = binding.bool().map_err(RbPolarsErr::from)?;
|
285
327
|
Ok((!bool).into_series().into())
|
286
328
|
}
|
287
329
|
|
330
|
+
pub fn shrink_dtype(&self) -> RbResult<Self> {
|
331
|
+
self.series
|
332
|
+
.borrow()
|
333
|
+
.shrink_type()
|
334
|
+
.map(Into::into)
|
335
|
+
.map_err(RbPolarsErr::from)
|
336
|
+
.map_err(RbErr::from)
|
337
|
+
}
|
338
|
+
|
339
|
+
pub fn str_to_decimal_infer(&self, inference_length: usize) -> RbResult<Self> {
|
340
|
+
let s = self.series.borrow();
|
341
|
+
let ca = s.str().map_err(RbPolarsErr::from)?;
|
342
|
+
ca.to_decimal_infer(inference_length)
|
343
|
+
.map(Series::from)
|
344
|
+
.map(Into::into)
|
345
|
+
.map_err(RbPolarsErr::from)
|
346
|
+
.map_err(RbErr::from)
|
347
|
+
}
|
348
|
+
|
349
|
+
pub fn str_json_decode(&self, infer_schema_length: Option<usize>) -> RbResult<Self> {
|
350
|
+
let lock = self.series.borrow();
|
351
|
+
lock.str()
|
352
|
+
.map_err(RbPolarsErr::from)?
|
353
|
+
.json_decode(None, infer_schema_length)
|
354
|
+
.map(|s| s.with_name(lock.name().clone()))
|
355
|
+
.map(Into::into)
|
356
|
+
.map_err(RbPolarsErr::from)
|
357
|
+
.map_err(RbErr::from)
|
358
|
+
}
|
359
|
+
|
288
360
|
pub fn to_s(&self) -> String {
|
289
361
|
format!("{}", self.series.borrow())
|
290
362
|
}
|
@@ -370,6 +442,33 @@ impl RbSeries {
|
|
370
442
|
Ok(out.into())
|
371
443
|
}
|
372
444
|
|
445
|
+
pub fn get_chunks(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
446
|
+
ruby.ary_try_from_iter(
|
447
|
+
flatten_series(&rb_self.series.borrow())
|
448
|
+
.into_iter()
|
449
|
+
.map(|s| rb_modules::pl_utils().funcall::<_, _, Value>("wrap_s", (Self::new(s),))),
|
450
|
+
)
|
451
|
+
}
|
452
|
+
|
453
|
+
pub fn is_sorted(&self, descending: bool, nulls_last: bool) -> RbResult<bool> {
|
454
|
+
let options = SortOptions {
|
455
|
+
descending,
|
456
|
+
nulls_last,
|
457
|
+
multithreaded: true,
|
458
|
+
maintain_order: false,
|
459
|
+
limit: None,
|
460
|
+
};
|
461
|
+
Ok(self
|
462
|
+
.series
|
463
|
+
.borrow()
|
464
|
+
.is_sorted(options)
|
465
|
+
.map_err(RbPolarsErr::from)?)
|
466
|
+
}
|
467
|
+
|
468
|
+
pub fn clear(&self) -> Self {
|
469
|
+
self.series.borrow().clear().into()
|
470
|
+
}
|
471
|
+
|
373
472
|
pub fn time_unit(&self) -> Option<String> {
|
374
473
|
if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
|
375
474
|
Some(
|
@@ -8,13 +8,14 @@ mod import;
|
|
8
8
|
mod map;
|
9
9
|
mod scatter;
|
10
10
|
|
11
|
-
use magnus::{RArray, prelude::*};
|
11
|
+
use magnus::{DataTypeFunctions, RArray, Ruby, TypedData, gc, prelude::*};
|
12
12
|
use polars::prelude::*;
|
13
13
|
use std::cell::RefCell;
|
14
14
|
|
15
|
-
use crate::RbResult;
|
15
|
+
use crate::{ObjectValue, RbResult};
|
16
16
|
|
17
|
-
#[
|
17
|
+
#[derive(TypedData)]
|
18
|
+
#[magnus(class = "Polars::RbSeries", mark)]
|
18
19
|
pub struct RbSeries {
|
19
20
|
pub series: RefCell<Series>,
|
20
21
|
}
|
@@ -42,9 +43,33 @@ pub fn to_series(rs: RArray) -> RbResult<Vec<Series>> {
|
|
42
43
|
}
|
43
44
|
|
44
45
|
pub fn to_rbseries(s: Vec<Column>) -> RArray {
|
45
|
-
|
46
|
+
Ruby::get().unwrap().ary_from_iter(
|
46
47
|
s.into_iter()
|
47
48
|
.map(|c| c.take_materialized_series())
|
48
49
|
.map(RbSeries::new),
|
49
50
|
)
|
50
51
|
}
|
52
|
+
|
53
|
+
pub fn mark_series(marker: &gc::Marker, series: &Series) {
|
54
|
+
if let DataType::Object(_) = series.dtype() {
|
55
|
+
for i in 0..series.len() {
|
56
|
+
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
57
|
+
if let Some(o) = obj {
|
58
|
+
marker.mark(o.inner);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
impl DataTypeFunctions for RbSeries {
|
65
|
+
fn mark(&self, marker: &gc::Marker) {
|
66
|
+
// this is not ideal, as objects will not be marked if unable to borrow
|
67
|
+
// this should never happen, but log for now to avoid panic,
|
68
|
+
// as most series will not use Object datatype
|
69
|
+
if let Ok(s) = &self.series.try_borrow() {
|
70
|
+
mark_series(marker, s);
|
71
|
+
} else {
|
72
|
+
eprintln!("[polars] Could not borrow!");
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
data/lib/polars/array_expr.rb
CHANGED
@@ -9,6 +9,181 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Return the number of elements in each array.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new(
|
18
|
+
# {"a" => [[1, 2], [4, 3]]},
|
19
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
20
|
+
# )
|
21
|
+
# df.select(Polars.col("a").arr.len)
|
22
|
+
# # =>
|
23
|
+
# # shape: (2, 1)
|
24
|
+
# # ┌─────┐
|
25
|
+
# # │ a │
|
26
|
+
# # │ --- │
|
27
|
+
# # │ u32 │
|
28
|
+
# # ╞═════╡
|
29
|
+
# # │ 2 │
|
30
|
+
# # │ 2 │
|
31
|
+
# # └─────┘
|
32
|
+
def len
|
33
|
+
Utils.wrap_expr(_rbexpr.arr_len)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Slice every subarray.
|
37
|
+
#
|
38
|
+
# @param offset [Integer]
|
39
|
+
# Start index. Negative indexing is supported.
|
40
|
+
# @param length [Integer]
|
41
|
+
# Length of the slice. If set to `nil` (default), the slice is taken to the
|
42
|
+
# end of the list.
|
43
|
+
# @param as_array [Boolean]
|
44
|
+
# Return result as a fixed-length `Array`, otherwise as a `List`.
|
45
|
+
# If true `length` and `offset` must be constant values.
|
46
|
+
#
|
47
|
+
# @return [Expr]
|
48
|
+
#
|
49
|
+
# @example
|
50
|
+
# df = Polars::DataFrame.new(
|
51
|
+
# {"a" => [[1, 2], [4, 3]]},
|
52
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
53
|
+
# )
|
54
|
+
# df.select(Polars.col("a").arr.slice(0, 1))
|
55
|
+
# # =>
|
56
|
+
# # shape: (2, 1)
|
57
|
+
# # ┌───────────┐
|
58
|
+
# # │ a │
|
59
|
+
# # │ --- │
|
60
|
+
# # │ list[i64] │
|
61
|
+
# # ╞═══════════╡
|
62
|
+
# # │ [1] │
|
63
|
+
# # │ [4] │
|
64
|
+
# # └───────────┘
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# df = Polars::DataFrame.new(
|
68
|
+
# {"a" => [[1, 2], [4, 3]]},
|
69
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
70
|
+
# )
|
71
|
+
# df.select(Polars.col("a").arr.slice(0, 1, as_array: true))
|
72
|
+
# # =>
|
73
|
+
# # shape: (2, 1)
|
74
|
+
# # ┌───────────────┐
|
75
|
+
# # │ a │
|
76
|
+
# # │ --- │
|
77
|
+
# # │ array[i64, 1] │
|
78
|
+
# # ╞═══════════════╡
|
79
|
+
# # │ [1] │
|
80
|
+
# # │ [4] │
|
81
|
+
# # └───────────────┘
|
82
|
+
def slice(
|
83
|
+
offset,
|
84
|
+
length = nil,
|
85
|
+
as_array: false
|
86
|
+
)
|
87
|
+
offset = Utils.parse_into_expression(offset)
|
88
|
+
length = !length.nil? ? Utils.parse_into_expression(length) : nil
|
89
|
+
Utils.wrap_expr(_rbexpr.arr_slice(offset, length, as_array))
|
90
|
+
end
|
91
|
+
|
92
|
+
# Get the first `n` elements of the sub-arrays.
|
93
|
+
#
|
94
|
+
# @param n [Integer]
|
95
|
+
# Number of values to return for each sublist.
|
96
|
+
# @param as_array [Boolean]
|
97
|
+
# Return result as a fixed-length `Array`, otherwise as a `List`.
|
98
|
+
# If true `n` must be a constant value.
|
99
|
+
#
|
100
|
+
# @return [Expr]
|
101
|
+
#
|
102
|
+
# @example
|
103
|
+
# df = Polars::DataFrame.new(
|
104
|
+
# {"a" => [[1, 2], [4, 3]]},
|
105
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
106
|
+
# )
|
107
|
+
# df.select(Polars.col("a").arr.head(1))
|
108
|
+
# # =>
|
109
|
+
# # shape: (2, 1)
|
110
|
+
# # ┌───────────┐
|
111
|
+
# # │ a │
|
112
|
+
# # │ --- │
|
113
|
+
# # │ list[i64] │
|
114
|
+
# # ╞═══════════╡
|
115
|
+
# # │ [1] │
|
116
|
+
# # │ [4] │
|
117
|
+
# # └───────────┘
|
118
|
+
#
|
119
|
+
# @example
|
120
|
+
# df = Polars::DataFrame.new(
|
121
|
+
# {"a" => [[1, 2], [4, 3]]},
|
122
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
123
|
+
# )
|
124
|
+
# df.select(Polars.col("a").arr.head(1, as_array: true))
|
125
|
+
# # =>
|
126
|
+
# # shape: (2, 1)
|
127
|
+
# # ┌───────────────┐
|
128
|
+
# # │ a │
|
129
|
+
# # │ --- │
|
130
|
+
# # │ array[i64, 1] │
|
131
|
+
# # ╞═══════════════╡
|
132
|
+
# # │ [1] │
|
133
|
+
# # │ [4] │
|
134
|
+
# # └───────────────┘
|
135
|
+
def head(n = 5, as_array: false)
|
136
|
+
slice(0, n, as_array: as_array)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Slice the last `n` values of every sublist.
|
140
|
+
#
|
141
|
+
# @param n [Integer]
|
142
|
+
# Number of values to return for each sublist.
|
143
|
+
# @param as_array [Boolean]
|
144
|
+
# Return result as a fixed-length `Array`, otherwise as a `List`.
|
145
|
+
# If true `n` must be a constant value.
|
146
|
+
#
|
147
|
+
# @return [Expr]
|
148
|
+
#
|
149
|
+
# @example
|
150
|
+
# df = Polars::DataFrame.new(
|
151
|
+
# {"a" => [[1, 2], [4, 3]]},
|
152
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
153
|
+
# )
|
154
|
+
# df.select(Polars.col("a").arr.tail(1))
|
155
|
+
# # =>
|
156
|
+
# # shape: (2, 1)
|
157
|
+
# # ┌───────────┐
|
158
|
+
# # │ a │
|
159
|
+
# # │ --- │
|
160
|
+
# # │ list[i64] │
|
161
|
+
# # ╞═══════════╡
|
162
|
+
# # │ [2] │
|
163
|
+
# # │ [3] │
|
164
|
+
# # └───────────┘
|
165
|
+
#
|
166
|
+
# @example
|
167
|
+
# df = Polars::DataFrame.new(
|
168
|
+
# {"a" => [[1, 2], [4, 3]]},
|
169
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
170
|
+
# )
|
171
|
+
# df.select(Polars.col("a").arr.tail(1, as_array: true))
|
172
|
+
# # =>
|
173
|
+
# # shape: (2, 1)
|
174
|
+
# # ┌───────────────┐
|
175
|
+
# # │ a │
|
176
|
+
# # │ --- │
|
177
|
+
# # │ array[i64, 1] │
|
178
|
+
# # ╞═══════════════╡
|
179
|
+
# # │ [2] │
|
180
|
+
# # │ [3] │
|
181
|
+
# # └───────────────┘
|
182
|
+
def tail(n = 5, as_array: false)
|
183
|
+
n = Utils.parse_into_expression(n)
|
184
|
+
Utils.wrap_expr(_rbexpr.arr_tail(n, as_array))
|
185
|
+
end
|
186
|
+
|
12
187
|
# Compute the min values of the sub-arrays.
|
13
188
|
#
|
14
189
|
# @return [Expr]
|
@@ -30,7 +205,7 @@ module Polars
|
|
30
205
|
# # │ 3 │
|
31
206
|
# # └─────┘
|
32
207
|
def min
|
33
|
-
Utils.wrap_expr(_rbexpr.
|
208
|
+
Utils.wrap_expr(_rbexpr.arr_min)
|
34
209
|
end
|
35
210
|
|
36
211
|
# Compute the max values of the sub-arrays.
|
@@ -54,7 +229,7 @@ module Polars
|
|
54
229
|
# # │ 4 │
|
55
230
|
# # └─────┘
|
56
231
|
def max
|
57
|
-
Utils.wrap_expr(_rbexpr.
|
232
|
+
Utils.wrap_expr(_rbexpr.arr_max)
|
58
233
|
end
|
59
234
|
|
60
235
|
# Compute the sum values of the sub-arrays.
|
@@ -78,7 +253,103 @@ module Polars
|
|
78
253
|
# # │ 7 │
|
79
254
|
# # └─────┘
|
80
255
|
def sum
|
81
|
-
Utils.wrap_expr(_rbexpr.
|
256
|
+
Utils.wrap_expr(_rbexpr.arr_sum)
|
257
|
+
end
|
258
|
+
|
259
|
+
# Compute the std of the values of the sub-arrays.
|
260
|
+
#
|
261
|
+
# @return [Expr]
|
262
|
+
#
|
263
|
+
# @example
|
264
|
+
# df = Polars::DataFrame.new(
|
265
|
+
# {"a" => [[1, 2], [4, 3]]},
|
266
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
267
|
+
# )
|
268
|
+
# df.select(Polars.col("a").arr.std)
|
269
|
+
# # =>
|
270
|
+
# # shape: (2, 1)
|
271
|
+
# # ┌──────────┐
|
272
|
+
# # │ a │
|
273
|
+
# # │ --- │
|
274
|
+
# # │ f64 │
|
275
|
+
# # ╞══════════╡
|
276
|
+
# # │ 0.707107 │
|
277
|
+
# # │ 0.707107 │
|
278
|
+
# # └──────────┘
|
279
|
+
def std(ddof: 1)
|
280
|
+
Utils.wrap_expr(_rbexpr.arr_std(ddof))
|
281
|
+
end
|
282
|
+
|
283
|
+
# Compute the var of the values of the sub-arrays.
|
284
|
+
#
|
285
|
+
# @return [Expr]
|
286
|
+
#
|
287
|
+
# @example
|
288
|
+
# df = Polars::DataFrame.new(
|
289
|
+
# {"a" => [[1, 2], [4, 3]]},
|
290
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
291
|
+
# )
|
292
|
+
# df.select(Polars.col("a").arr.var)
|
293
|
+
# # =>
|
294
|
+
# # shape: (2, 1)
|
295
|
+
# # ┌─────┐
|
296
|
+
# # │ a │
|
297
|
+
# # │ --- │
|
298
|
+
# # │ f64 │
|
299
|
+
# # ╞═════╡
|
300
|
+
# # │ 0.5 │
|
301
|
+
# # │ 0.5 │
|
302
|
+
# # └─────┘
|
303
|
+
def var(ddof: 1)
|
304
|
+
Utils.wrap_expr(_rbexpr.arr_var(ddof))
|
305
|
+
end
|
306
|
+
|
307
|
+
# Compute the mean of the values of the sub-arrays.
|
308
|
+
#
|
309
|
+
# @return [Expr]
|
310
|
+
#
|
311
|
+
# @example
|
312
|
+
# df = Polars::DataFrame.new(
|
313
|
+
# {"a" => [[1, 2, 3], [1, 1, 16]]},
|
314
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
315
|
+
# )
|
316
|
+
# df.select(Polars.col("a").arr.mean)
|
317
|
+
# # =>
|
318
|
+
# # shape: (2, 1)
|
319
|
+
# # ┌─────┐
|
320
|
+
# # │ a │
|
321
|
+
# # │ --- │
|
322
|
+
# # │ f64 │
|
323
|
+
# # ╞═════╡
|
324
|
+
# # │ 2.0 │
|
325
|
+
# # │ 6.0 │
|
326
|
+
# # └─────┘
|
327
|
+
def mean
|
328
|
+
Utils.wrap_expr(_rbexpr.arr_mean)
|
329
|
+
end
|
330
|
+
|
331
|
+
# Compute the median of the values of the sub-arrays.
|
332
|
+
#
|
333
|
+
# @return [Expr]
|
334
|
+
#
|
335
|
+
# @example
|
336
|
+
# df = Polars::DataFrame.new(
|
337
|
+
# {"a" => [[1, 2], [4, 3]]},
|
338
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
339
|
+
# )
|
340
|
+
# df.select(Polars.col("a").arr.median)
|
341
|
+
# # =>
|
342
|
+
# # shape: (2, 1)
|
343
|
+
# # ┌─────┐
|
344
|
+
# # │ a │
|
345
|
+
# # │ --- │
|
346
|
+
# # │ f64 │
|
347
|
+
# # ╞═════╡
|
348
|
+
# # │ 1.5 │
|
349
|
+
# # │ 3.5 │
|
350
|
+
# # └─────┘
|
351
|
+
def median
|
352
|
+
Utils.wrap_expr(_rbexpr.arr_median)
|
82
353
|
end
|
83
354
|
|
84
355
|
# Get the unique/distinct values in the array.
|
@@ -109,6 +380,32 @@ module Polars
|
|
109
380
|
Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
|
110
381
|
end
|
111
382
|
|
383
|
+
# Count the number of unique values in every sub-arrays.
|
384
|
+
#
|
385
|
+
# @return [Expr]
|
386
|
+
#
|
387
|
+
# @example
|
388
|
+
# df = Polars::DataFrame.new(
|
389
|
+
# {
|
390
|
+
# "a" => [[1, 1, 2], [2, 3, 4]],
|
391
|
+
# },
|
392
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
393
|
+
# )
|
394
|
+
# df.with_columns(n_unique: Polars.col("a").arr.n_unique)
|
395
|
+
# # =>
|
396
|
+
# # shape: (2, 2)
|
397
|
+
# # ┌───────────────┬──────────┐
|
398
|
+
# # │ a ┆ n_unique │
|
399
|
+
# # │ --- ┆ --- │
|
400
|
+
# # │ array[i64, 3] ┆ u32 │
|
401
|
+
# # ╞═══════════════╪══════════╡
|
402
|
+
# # │ [1, 1, 2] ┆ 2 │
|
403
|
+
# # │ [2, 3, 4] ┆ 3 │
|
404
|
+
# # └───────────────┴──────────┘
|
405
|
+
def n_unique
|
406
|
+
Utils.wrap_expr(_rbexpr.arr_n_unique)
|
407
|
+
end
|
408
|
+
|
112
409
|
# Convert an Array column into a List column with the same inner data type.
|
113
410
|
#
|
114
411
|
# @return [Expr]
|
@@ -535,5 +832,87 @@ module Polars
|
|
535
832
|
element = Utils.parse_into_expression(element, str_as_lit: true)
|
536
833
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
537
834
|
end
|
835
|
+
|
836
|
+
# Convert the Series of type `Array` to a Series of type `Struct`.
|
837
|
+
#
|
838
|
+
# @param fields [Object]
|
839
|
+
# If the name and number of the desired fields is known in advance
|
840
|
+
# a list of field names can be given, which will be assigned by index.
|
841
|
+
# Otherwise, to dynamically assign field names, a custom function can be
|
842
|
+
# used; if neither are set, fields will be `field_0, field_1 .. field_n`.
|
843
|
+
#
|
844
|
+
# @return [Expr]
|
845
|
+
#
|
846
|
+
# @example Convert array to struct with default field name assignment:
|
847
|
+
# df = Polars::DataFrame.new(
|
848
|
+
# {"n" => [[0, 1, 2], [3, 4, 5]]}, schema: {"n" => Polars::Array.new(Polars::Int8, 3)}
|
849
|
+
# )
|
850
|
+
# df.with_columns(struct: Polars.col("n").arr.to_struct)
|
851
|
+
# # =>
|
852
|
+
# # shape: (2, 2)
|
853
|
+
# # ┌──────────────┬───────────┐
|
854
|
+
# # │ n ┆ struct │
|
855
|
+
# # │ --- ┆ --- │
|
856
|
+
# # │ array[i8, 3] ┆ struct[3] │
|
857
|
+
# # ╞══════════════╪═══════════╡
|
858
|
+
# # │ [0, 1, 2] ┆ {0,1,2} │
|
859
|
+
# # │ [3, 4, 5] ┆ {3,4,5} │
|
860
|
+
# # └──────────────┴───────────┘
|
861
|
+
def to_struct(fields: nil)
|
862
|
+
raise Todo if fields
|
863
|
+
if fields.is_a?(Enumerable)
|
864
|
+
field_names = fields.to_a
|
865
|
+
rbexpr = _rbexpr.arr_to_struct(nil)
|
866
|
+
Utils.wrap_expr(rbexpr).struct.rename_fields(field_names)
|
867
|
+
else
|
868
|
+
rbexpr = _rbexpr.arr_to_struct(fields)
|
869
|
+
Utils.wrap_expr(rbexpr)
|
870
|
+
end
|
871
|
+
end
|
872
|
+
|
873
|
+
# Shift array values by the given number of indices.
|
874
|
+
#
|
875
|
+
# @param n [Integer]
|
876
|
+
# Number of indices to shift forward. If a negative value is passed, values
|
877
|
+
# are shifted in the opposite direction instead.
|
878
|
+
#
|
879
|
+
# @return [Expr]
|
880
|
+
#
|
881
|
+
# @note
|
882
|
+
# This method is similar to the `LAG` operation in SQL when the value for `n`
|
883
|
+
# is positive. With a negative value for `n`, it is similar to `LEAD`.
|
884
|
+
#
|
885
|
+
# @example By default, array values are shifted forward by one index.
|
886
|
+
# df = Polars::DataFrame.new(
|
887
|
+
# {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
888
|
+
# )
|
889
|
+
# df.with_columns(shift: Polars.col("a").arr.shift)
|
890
|
+
# # =>
|
891
|
+
# # shape: (2, 2)
|
892
|
+
# # ┌───────────────┬───────────────┐
|
893
|
+
# # │ a ┆ shift │
|
894
|
+
# # │ --- ┆ --- │
|
895
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
896
|
+
# # ╞═══════════════╪═══════════════╡
|
897
|
+
# # │ [1, 2, 3] ┆ [null, 1, 2] │
|
898
|
+
# # │ [4, 5, 6] ┆ [null, 4, 5] │
|
899
|
+
# # └───────────────┴───────────────┘
|
900
|
+
#
|
901
|
+
# @example Pass a negative value to shift in the opposite direction instead.
|
902
|
+
# df.with_columns(shift: Polars.col("a").arr.shift(-2))
|
903
|
+
# # =>
|
904
|
+
# # shape: (2, 2)
|
905
|
+
# # ┌───────────────┬─────────────────┐
|
906
|
+
# # │ a ┆ shift │
|
907
|
+
# # │ --- ┆ --- │
|
908
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
909
|
+
# # ╞═══════════════╪═════════════════╡
|
910
|
+
# # │ [1, 2, 3] ┆ [3, null, null] │
|
911
|
+
# # │ [4, 5, 6] ┆ [6, null, null] │
|
912
|
+
# # └───────────────┴─────────────────┘
|
913
|
+
def shift(n = 1)
|
914
|
+
n = Utils.parse_into_expression(n)
|
915
|
+
Utils.wrap_expr(_rbexpr.arr_shift(n))
|
916
|
+
end
|
538
917
|
end
|
539
918
|
end
|