polars-df 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +284 -216
- data/ext/polars/Cargo.toml +7 -4
- data/ext/polars/src/batched_csv.rs +2 -3
- data/ext/polars/src/conversion.rs +18 -17
- data/ext/polars/src/dataframe.rs +27 -63
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/general.rs +63 -4
- data/ext/polars/src/expr/rolling.rs +15 -10
- data/ext/polars/src/expr/string.rs +9 -9
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/lazyframe.rs +28 -19
- data/ext/polars/src/lib.rs +20 -20
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +21 -18
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/data_frame.rb +69 -65
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/expr.rb +223 -18
- data/lib/polars/group_by.rb +1 -1
- data/lib/polars/io.rb +4 -4
- data/lib/polars/lazy_frame.rb +23 -23
- data/lib/polars/lazy_functions.rb +4 -20
- data/lib/polars/series.rb +289 -30
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +13 -13
- data/lib/polars/version.rb +1 -1
- metadata +7 -6
data/ext/polars/src/lazyframe.rs
CHANGED
@@ -87,7 +87,7 @@ impl RbLazyFrame {
|
|
87
87
|
let cache = bool::try_convert(arguments[6])?;
|
88
88
|
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
|
89
89
|
let low_memory = bool::try_convert(arguments[8])?;
|
90
|
-
let
|
90
|
+
let comment_prefix = Option::<String>::try_convert(arguments[9])?;
|
91
91
|
let quote_char = Option::<String>::try_convert(arguments[10])?;
|
92
92
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
|
93
93
|
let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
|
@@ -101,7 +101,6 @@ impl RbLazyFrame {
|
|
101
101
|
// end arguments
|
102
102
|
|
103
103
|
let null_values = null_values.map(|w| w.0);
|
104
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
105
104
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
106
105
|
let separator = separator.as_bytes()[0];
|
107
106
|
let eol_char = eol_char.as_bytes()[0];
|
@@ -124,7 +123,7 @@ impl RbLazyFrame {
|
|
124
123
|
.with_cache(cache)
|
125
124
|
.with_dtype_overwrite(overwrite_dtype.as_ref())
|
126
125
|
.low_memory(low_memory)
|
127
|
-
.
|
126
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
128
127
|
.with_quote_char(quote_char)
|
129
128
|
.with_end_of_line_char(eol_char)
|
130
129
|
.with_rechunk(rechunk)
|
@@ -510,48 +509,58 @@ impl RbLazyFrame {
|
|
510
509
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
511
510
|
}
|
512
511
|
|
513
|
-
pub fn min(&self) -> Self {
|
512
|
+
pub fn min(&self) -> RbResult<Self> {
|
514
513
|
let ldf = self.ldf.clone();
|
515
|
-
ldf.min().
|
514
|
+
let out = ldf.min().map_err(RbPolarsErr::from)?;
|
515
|
+
Ok(out.into())
|
516
516
|
}
|
517
517
|
|
518
|
-
pub fn max(&self) -> Self {
|
518
|
+
pub fn max(&self) -> RbResult<Self> {
|
519
519
|
let ldf = self.ldf.clone();
|
520
|
-
ldf.max().
|
520
|
+
let out = ldf.max().map_err(RbPolarsErr::from)?;
|
521
|
+
Ok(out.into())
|
521
522
|
}
|
522
523
|
|
523
|
-
pub fn sum(&self) -> Self {
|
524
|
+
pub fn sum(&self) -> RbResult<Self> {
|
524
525
|
let ldf = self.ldf.clone();
|
525
|
-
ldf.sum().
|
526
|
+
let out = ldf.sum().map_err(RbPolarsErr::from)?;
|
527
|
+
Ok(out.into())
|
526
528
|
}
|
527
529
|
|
528
|
-
pub fn mean(&self) -> Self {
|
530
|
+
pub fn mean(&self) -> RbResult<Self> {
|
529
531
|
let ldf = self.ldf.clone();
|
530
|
-
ldf.mean().
|
532
|
+
let out = ldf.mean().map_err(RbPolarsErr::from)?;
|
533
|
+
Ok(out.into())
|
531
534
|
}
|
532
535
|
|
533
|
-
pub fn std(&self, ddof: u8) -> Self {
|
536
|
+
pub fn std(&self, ddof: u8) -> RbResult<Self> {
|
534
537
|
let ldf = self.ldf.clone();
|
535
|
-
ldf.std(ddof).
|
538
|
+
let out = ldf.std(ddof).map_err(RbPolarsErr::from)?;
|
539
|
+
Ok(out.into())
|
536
540
|
}
|
537
541
|
|
538
|
-
pub fn var(&self, ddof: u8) -> Self {
|
542
|
+
pub fn var(&self, ddof: u8) -> RbResult<Self> {
|
539
543
|
let ldf = self.ldf.clone();
|
540
|
-
ldf.var(ddof).
|
544
|
+
let out = ldf.var(ddof).map_err(RbPolarsErr::from)?;
|
545
|
+
Ok(out.into())
|
541
546
|
}
|
542
547
|
|
543
|
-
pub fn median(&self) -> Self {
|
548
|
+
pub fn median(&self) -> RbResult<Self> {
|
544
549
|
let ldf = self.ldf.clone();
|
545
|
-
ldf.median().
|
550
|
+
let out = ldf.median().map_err(RbPolarsErr::from)?;
|
551
|
+
Ok(out.into())
|
546
552
|
}
|
547
553
|
|
548
554
|
pub fn quantile(
|
549
555
|
&self,
|
550
556
|
quantile: &RbExpr,
|
551
557
|
interpolation: Wrap<QuantileInterpolOptions>,
|
552
|
-
) -> Self {
|
558
|
+
) -> RbResult<Self> {
|
553
559
|
let ldf = self.ldf.clone();
|
554
|
-
ldf
|
560
|
+
let out = ldf
|
561
|
+
.quantile(quantile.inner.clone(), interpolation.0)
|
562
|
+
.map_err(RbPolarsErr::from)?;
|
563
|
+
Ok(out.into())
|
555
564
|
}
|
556
565
|
|
557
566
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -8,6 +8,7 @@ mod functions;
|
|
8
8
|
mod lazyframe;
|
9
9
|
mod lazygroupby;
|
10
10
|
mod map;
|
11
|
+
mod on_startup;
|
11
12
|
mod object;
|
12
13
|
mod prelude;
|
13
14
|
pub(crate) mod rb_modules;
|
@@ -224,8 +225,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
224
225
|
class.define_method("drop", method!(RbDataFrame::drop, 1))?;
|
225
226
|
class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
|
226
227
|
class.define_method(
|
227
|
-
"
|
228
|
-
method!(RbDataFrame::
|
228
|
+
"get_column_index",
|
229
|
+
method!(RbDataFrame::get_column_index, 1),
|
229
230
|
)?;
|
230
231
|
class.define_method("column", method!(RbDataFrame::column, 1))?;
|
231
232
|
class.define_method("select", method!(RbDataFrame::select, 1))?;
|
@@ -235,14 +236,14 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
235
236
|
method!(RbDataFrame::take_with_series, 1),
|
236
237
|
)?;
|
237
238
|
class.define_method("replace", method!(RbDataFrame::replace, 2))?;
|
238
|
-
class.define_method("
|
239
|
-
class.define_method("
|
239
|
+
class.define_method("replace_column", method!(RbDataFrame::replace_column, 2))?;
|
240
|
+
class.define_method("insert_column", method!(RbDataFrame::insert_column, 2))?;
|
240
241
|
class.define_method("slice", method!(RbDataFrame::slice, 2))?;
|
241
242
|
class.define_method("head", method!(RbDataFrame::head, 1))?;
|
242
243
|
class.define_method("tail", method!(RbDataFrame::tail, 1))?;
|
243
244
|
class.define_method("is_unique", method!(RbDataFrame::is_unique, 0))?;
|
244
245
|
class.define_method("is_duplicated", method!(RbDataFrame::is_duplicated, 0))?;
|
245
|
-
class.define_method("
|
246
|
+
class.define_method("equals", method!(RbDataFrame::equals, 2))?;
|
246
247
|
class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
|
247
248
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
248
249
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
@@ -250,18 +251,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
250
251
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
251
252
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
252
253
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
253
|
-
class.define_method("max", method!(RbDataFrame::max, 0))?;
|
254
|
-
class.define_method("min", method!(RbDataFrame::min, 0))?;
|
255
|
-
class.define_method("sum", method!(RbDataFrame::sum, 0))?;
|
256
|
-
class.define_method("mean", method!(RbDataFrame::mean, 0))?;
|
257
|
-
class.define_method("std", method!(RbDataFrame::std, 1))?;
|
258
|
-
class.define_method("var", method!(RbDataFrame::var, 1))?;
|
259
|
-
class.define_method("median", method!(RbDataFrame::median, 0))?;
|
260
254
|
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
261
255
|
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
262
256
|
class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
|
263
257
|
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
264
|
-
class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
|
265
258
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
266
259
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
267
260
|
class.define_method("apply", method!(RbDataFrame::apply, 3))?;
|
@@ -309,8 +302,14 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
309
302
|
class.define_method("last", method!(RbExpr::last, 0))?;
|
310
303
|
class.define_method("implode", method!(RbExpr::implode, 0))?;
|
311
304
|
class.define_method("quantile", method!(RbExpr::quantile, 2))?;
|
305
|
+
class.define_method("cut", method!(RbExpr::cut, 4))?;
|
306
|
+
class.define_method("qcut", method!(RbExpr::qcut, 5))?;
|
307
|
+
class.define_method("qcut_uniform", method!(RbExpr::qcut_uniform, 5))?;
|
308
|
+
class.define_method("rle", method!(RbExpr::rle, 0))?;
|
309
|
+
class.define_method("rle_id", method!(RbExpr::rle_id, 0))?;
|
312
310
|
class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
|
313
311
|
class.define_method("count", method!(RbExpr::count, 0))?;
|
312
|
+
class.define_method("len", method!(RbExpr::len, 0))?;
|
314
313
|
class.define_method("value_counts", method!(RbExpr::value_counts, 2))?;
|
315
314
|
class.define_method("unique_counts", method!(RbExpr::unique_counts, 0))?;
|
316
315
|
class.define_method("null_count", method!(RbExpr::null_count, 0))?;
|
@@ -346,7 +345,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
346
345
|
class.define_method("is_first_distinct", method!(RbExpr::is_first_distinct, 0))?;
|
347
346
|
class.define_method("is_last_distinct", method!(RbExpr::is_last_distinct, 0))?;
|
348
347
|
class.define_method("explode", method!(RbExpr::explode, 0))?;
|
349
|
-
class.define_method("gather_every", method!(RbExpr::gather_every,
|
348
|
+
class.define_method("gather_every", method!(RbExpr::gather_every, 2))?;
|
350
349
|
class.define_method("tail", method!(RbExpr::tail, 1))?;
|
351
350
|
class.define_method("head", method!(RbExpr::head, 1))?;
|
352
351
|
class.define_method("slice", method!(RbExpr::slice, 2))?;
|
@@ -518,10 +517,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
518
517
|
class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
|
519
518
|
class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
|
520
519
|
class.define_method("rolling_mean", method!(RbExpr::rolling_mean, 6))?;
|
521
|
-
class.define_method("rolling_std", method!(RbExpr::rolling_std,
|
522
|
-
class.define_method("rolling_var", method!(RbExpr::rolling_var,
|
523
|
-
class.define_method("rolling_median", method!(RbExpr::rolling_median,
|
524
|
-
class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile,
|
520
|
+
class.define_method("rolling_std", method!(RbExpr::rolling_std, 8))?;
|
521
|
+
class.define_method("rolling_var", method!(RbExpr::rolling_var, 8))?;
|
522
|
+
class.define_method("rolling_median", method!(RbExpr::rolling_median, 7))?;
|
523
|
+
class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile, 9))?;
|
525
524
|
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
|
526
525
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
527
526
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
@@ -550,6 +549,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
550
549
|
class.define_method("kurtosis", method!(RbExpr::kurtosis, 2))?;
|
551
550
|
class.define_method("str_concat", method!(RbExpr::str_concat, 2))?;
|
552
551
|
class.define_method("cat_set_ordering", method!(RbExpr::cat_set_ordering, 1))?;
|
552
|
+
class.define_method("cat_get_categories", method!(RbExpr::cat_get_categories, 0))?;
|
553
553
|
class.define_method("reshape", method!(RbExpr::reshape, 1))?;
|
554
554
|
class.define_method("cum_count", method!(RbExpr::cum_count, 1))?;
|
555
555
|
class.define_method("to_physical", method!(RbExpr::to_physical, 0))?;
|
@@ -792,7 +792,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
792
792
|
class.define_method("has_validity", method!(RbSeries::has_validity, 0))?;
|
793
793
|
class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
|
794
794
|
class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
|
795
|
-
class.define_method("
|
795
|
+
class.define_method("equals", method!(RbSeries::equals, 3))?;
|
796
796
|
class.define_method("eq", method!(RbSeries::eq, 1))?;
|
797
797
|
class.define_method("neq", method!(RbSeries::neq, 1))?;
|
798
798
|
class.define_method("gt", method!(RbSeries::gt, 1))?;
|
@@ -817,7 +817,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
817
817
|
class.define_method("kurtosis", method!(RbSeries::kurtosis, 2))?;
|
818
818
|
class.define_method("cast", method!(RbSeries::cast, 2))?;
|
819
819
|
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
820
|
-
class.define_method("
|
820
|
+
class.define_method("scatter", method!(RbSeries::scatter, 2))?;
|
821
821
|
|
822
822
|
// set
|
823
823
|
// class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
|
@@ -187,7 +187,7 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
187
187
|
lambda: Value,
|
188
188
|
init_null_count: usize,
|
189
189
|
first_value: Option<&str>,
|
190
|
-
) ->
|
190
|
+
) -> StringChunked {
|
191
191
|
let skip = usize::from(first_value.is_some());
|
192
192
|
if init_null_count == df.height() {
|
193
193
|
ChunkedArray::full_null("apply", df.height())
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -198,11 +198,11 @@ fn iterator_to_utf8(
|
|
198
198
|
first_value: Option<&str>,
|
199
199
|
name: &str,
|
200
200
|
capacity: usize,
|
201
|
-
) ->
|
201
|
+
) -> StringChunked {
|
202
202
|
let first_value = first_value.map(|v| v.to_string());
|
203
203
|
|
204
204
|
// safety: we know the iterators len
|
205
|
-
let mut ca:
|
205
|
+
let mut ca: StringChunked = unsafe {
|
206
206
|
if init_null_count > 0 {
|
207
207
|
(0..init_null_count)
|
208
208
|
.map(|_| None)
|
@@ -101,7 +101,7 @@ pub trait ApplyLambda<'a> {
|
|
101
101
|
lambda: Value,
|
102
102
|
init_null_count: usize,
|
103
103
|
first_value: Option<&str>,
|
104
|
-
) -> RbResult<
|
104
|
+
) -> RbResult<StringChunked>;
|
105
105
|
|
106
106
|
/// Apply a lambda with list output type
|
107
107
|
fn apply_lambda_with_list_out_type(
|
@@ -285,7 +285,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
285
285
|
lambda: Value,
|
286
286
|
init_null_count: usize,
|
287
287
|
first_value: Option<&str>,
|
288
|
-
) -> RbResult<
|
288
|
+
) -> RbResult<StringChunked> {
|
289
289
|
let skip = usize::from(first_value.is_some());
|
290
290
|
if init_null_count == self.len() {
|
291
291
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
@@ -562,7 +562,7 @@ where
|
|
562
562
|
lambda: Value,
|
563
563
|
init_null_count: usize,
|
564
564
|
first_value: Option<&str>,
|
565
|
-
) -> RbResult<
|
565
|
+
) -> RbResult<StringChunked> {
|
566
566
|
let skip = usize::from(first_value.is_some());
|
567
567
|
if init_null_count == self.len() {
|
568
568
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
@@ -705,7 +705,7 @@ where
|
|
705
705
|
}
|
706
706
|
}
|
707
707
|
|
708
|
-
impl<'a> ApplyLambda<'a> for
|
708
|
+
impl<'a> ApplyLambda<'a> for StringChunked {
|
709
709
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
710
710
|
let mut null_count = 0;
|
711
711
|
for opt_v in self.into_iter() {
|
@@ -834,7 +834,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
|
|
834
834
|
lambda: Value,
|
835
835
|
init_null_count: usize,
|
836
836
|
first_value: Option<&str>,
|
837
|
-
) -> RbResult<
|
837
|
+
) -> RbResult<StringChunked> {
|
838
838
|
let skip = usize::from(first_value.is_some());
|
839
839
|
if init_null_count == self.len() {
|
840
840
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
@@ -1079,7 +1079,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1079
1079
|
lambda: Value,
|
1080
1080
|
init_null_count: usize,
|
1081
1081
|
first_value: Option<&str>,
|
1082
|
-
) -> RbResult<
|
1082
|
+
) -> RbResult<StringChunked> {
|
1083
1083
|
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1084
1084
|
|
1085
1085
|
let skip = usize::from(first_value.is_some());
|
data/ext/polars/src/object.rs
CHANGED
@@ -1,31 +1 @@
|
|
1
|
-
use std::any::Any;
|
2
|
-
use std::sync::Arc;
|
3
|
-
|
4
|
-
use magnus::IntoValue;
|
5
|
-
use polars_core::chunked_array::object::builder::ObjectChunkedBuilder;
|
6
|
-
use polars_core::chunked_array::object::registry;
|
7
|
-
use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
|
8
|
-
use polars_core::prelude::AnyValue;
|
9
|
-
|
10
|
-
use crate::prelude::ObjectValue;
|
11
|
-
use crate::Wrap;
|
12
|
-
|
13
1
|
pub(crate) const OBJECT_NAME: &str = "object";
|
14
|
-
|
15
|
-
pub(crate) fn register_object_builder() {
|
16
|
-
if !registry::is_object_builder_registered() {
|
17
|
-
let object_builder = Box::new(|name: &str, capacity: usize| {
|
18
|
-
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
19
|
-
as Box<dyn AnonymousObjectBuilder>
|
20
|
-
});
|
21
|
-
|
22
|
-
let object_converter = Arc::new(|av: AnyValue| {
|
23
|
-
let object = ObjectValue {
|
24
|
-
inner: Wrap(av).into_value().into(),
|
25
|
-
};
|
26
|
-
Box::new(object) as Box<dyn Any>
|
27
|
-
});
|
28
|
-
|
29
|
-
registry::register_object_builder(object_builder, object_converter)
|
30
|
-
}
|
31
|
-
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
use std::any::Any;
|
2
|
+
use std::sync::Arc;
|
3
|
+
|
4
|
+
use magnus::IntoValue;
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars_core::chunked_array::object::builder::ObjectChunkedBuilder;
|
7
|
+
use polars_core::chunked_array::object::registry;
|
8
|
+
use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
|
9
|
+
use polars_core::prelude::AnyValue;
|
10
|
+
|
11
|
+
use crate::prelude::ObjectValue;
|
12
|
+
use crate::Wrap;
|
13
|
+
|
14
|
+
pub(crate) fn register_object_builder() {
|
15
|
+
if !registry::is_object_builder_registered() {
|
16
|
+
let object_builder = Box::new(|name: &str, capacity: usize| {
|
17
|
+
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
18
|
+
as Box<dyn AnonymousObjectBuilder>
|
19
|
+
});
|
20
|
+
|
21
|
+
let object_converter = Arc::new(|av: AnyValue| {
|
22
|
+
let object = ObjectValue {
|
23
|
+
inner: Wrap(av).into_value().into(),
|
24
|
+
};
|
25
|
+
Box::new(object) as Box<dyn Any>
|
26
|
+
});
|
27
|
+
|
28
|
+
let object_size = std::mem::size_of::<ObjectValue>();
|
29
|
+
let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
|
30
|
+
registry::register_object_builder(object_builder, object_converter, physical_dtype)
|
31
|
+
}
|
32
|
+
}
|
@@ -17,6 +17,7 @@ impl RbSeries {
|
|
17
17
|
self.series
|
18
18
|
.borrow()
|
19
19
|
.max_as_series()
|
20
|
+
.map_err(RbPolarsErr::from)?
|
20
21
|
.get(0)
|
21
22
|
.map_err(RbPolarsErr::from)?,
|
22
23
|
)
|
@@ -48,6 +49,7 @@ impl RbSeries {
|
|
48
49
|
self.series
|
49
50
|
.borrow()
|
50
51
|
.min_as_series()
|
52
|
+
.map_err(RbPolarsErr::from)?
|
51
53
|
.get(0)
|
52
54
|
.map_err(RbPolarsErr::from)?,
|
53
55
|
)
|
@@ -75,6 +77,7 @@ impl RbSeries {
|
|
75
77
|
self.series
|
76
78
|
.borrow()
|
77
79
|
.sum_as_series()
|
80
|
+
.map_err(RbPolarsErr::from)?
|
78
81
|
.get(0)
|
79
82
|
.map_err(RbPolarsErr::from)?,
|
80
83
|
)
|
@@ -106,7 +106,7 @@ impl RbSeries {
|
|
106
106
|
Ok(s.into())
|
107
107
|
}
|
108
108
|
|
109
|
-
pub fn new_str(name: String, val: Wrap<
|
109
|
+
pub fn new_str(name: String, val: Wrap<StringChunked>, _strict: bool) -> Self {
|
110
110
|
let mut s = val.0.into_series();
|
111
111
|
s.rename(&name);
|
112
112
|
RbSeries::new(s)
|
@@ -9,8 +9,8 @@ impl RbSeries {
|
|
9
9
|
pub fn to_numo(&self) -> RbResult<Value> {
|
10
10
|
let s = &self.series.borrow();
|
11
11
|
match s.dtype() {
|
12
|
-
DataType::
|
13
|
-
let ca = s.
|
12
|
+
DataType::String => {
|
13
|
+
let ca = s.str().unwrap();
|
14
14
|
|
15
15
|
// TODO make more efficient
|
16
16
|
let np_arr = RArray::from_iter(ca);
|
@@ -3,7 +3,7 @@ mod arithmetic;
|
|
3
3
|
mod comparison;
|
4
4
|
mod construction;
|
5
5
|
mod export;
|
6
|
-
mod
|
6
|
+
mod scatter;
|
7
7
|
|
8
8
|
use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
@@ -80,7 +80,7 @@ impl RbSeries {
|
|
80
80
|
|
81
81
|
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
82
82
|
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
83
|
-
if let DataType::
|
83
|
+
if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
|
84
84
|
let v_trunc = &val[..val
|
85
85
|
.char_indices()
|
86
86
|
.take(str_lengths)
|
@@ -90,7 +90,7 @@ impl RbSeries {
|
|
90
90
|
if val == v_trunc {
|
91
91
|
val
|
92
92
|
} else {
|
93
|
-
format!("{}
|
93
|
+
format!("{}…", v_trunc)
|
94
94
|
}
|
95
95
|
} else {
|
96
96
|
val
|
@@ -273,15 +273,13 @@ impl RbSeries {
|
|
273
273
|
Ok(s.into())
|
274
274
|
}
|
275
275
|
|
276
|
-
pub fn
|
276
|
+
pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
|
277
277
|
if strict {
|
278
278
|
self.series.borrow().eq(&other.series.borrow())
|
279
279
|
} else if null_equal {
|
280
|
-
self.series
|
281
|
-
.borrow()
|
282
|
-
.series_equal_missing(&other.series.borrow())
|
280
|
+
self.series.borrow().equals_missing(&other.series.borrow())
|
283
281
|
} else {
|
284
|
-
self.series.borrow().
|
282
|
+
self.series.borrow().equals(&other.series.borrow())
|
285
283
|
}
|
286
284
|
}
|
287
285
|
|
@@ -315,10 +313,10 @@ impl RbSeries {
|
|
315
313
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
316
314
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
317
315
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
318
|
-
DataType::Categorical(_) => {
|
316
|
+
DataType::Categorical(_, _) => {
|
319
317
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
320
318
|
}
|
321
|
-
DataType::Object(_) => {
|
319
|
+
DataType::Object(_, _) => {
|
322
320
|
let v = RArray::with_capacity(series.len());
|
323
321
|
for i in 0..series.len() {
|
324
322
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
@@ -377,8 +375,8 @@ impl RbSeries {
|
|
377
375
|
let ca = series.decimal().unwrap();
|
378
376
|
return Wrap(ca).into_value();
|
379
377
|
}
|
380
|
-
DataType::
|
381
|
-
let ca = series.
|
378
|
+
DataType::String => {
|
379
|
+
let ca = series.str().unwrap();
|
382
380
|
return Wrap(ca).into_value();
|
383
381
|
}
|
384
382
|
DataType::Struct(_) => {
|
@@ -442,7 +440,7 @@ impl RbSeries {
|
|
442
440
|
|
443
441
|
macro_rules! dispatch_apply {
|
444
442
|
($self:expr, $method:ident, $($args:expr),*) => {
|
445
|
-
if matches!($self.dtype(), DataType::Object(_)) {
|
443
|
+
if matches!($self.dtype(), DataType::Object(_, _)) {
|
446
444
|
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
447
445
|
// ca.$method($($args),*)
|
448
446
|
todo!()
|
@@ -463,7 +461,7 @@ impl RbSeries {
|
|
463
461
|
DataType::Datetime(_, _)
|
464
462
|
| DataType::Date
|
465
463
|
| DataType::Duration(_)
|
466
|
-
| DataType::Categorical(_)
|
464
|
+
| DataType::Categorical(_, _)
|
467
465
|
| DataType::Time
|
468
466
|
) || !skip_nulls
|
469
467
|
{
|
@@ -604,12 +602,12 @@ impl RbSeries {
|
|
604
602
|
)?;
|
605
603
|
ca.into_datetime(tu, tz).into_series()
|
606
604
|
}
|
607
|
-
Some(DataType::
|
605
|
+
Some(DataType::String) => {
|
608
606
|
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
609
607
|
|
610
608
|
ca.into_series()
|
611
609
|
}
|
612
|
-
Some(DataType::Object(_)) => {
|
610
|
+
Some(DataType::Object(_, _)) => {
|
613
611
|
let ca =
|
614
612
|
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
615
613
|
ca.into_series()
|
@@ -656,8 +654,13 @@ impl RbSeries {
|
|
656
654
|
self.series.borrow_mut().shrink_to_fit();
|
657
655
|
}
|
658
656
|
|
659
|
-
pub fn dot(&self, other: &RbSeries) ->
|
660
|
-
self
|
657
|
+
pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
|
658
|
+
let out = self
|
659
|
+
.series
|
660
|
+
.borrow()
|
661
|
+
.dot(&other.series.borrow())
|
662
|
+
.map_err(RbPolarsErr::from)?;
|
663
|
+
Ok(out)
|
661
664
|
}
|
662
665
|
|
663
666
|
pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
|
@@ -5,9 +5,9 @@ use crate::error::RbPolarsErr;
|
|
5
5
|
use crate::{RbResult, RbSeries};
|
6
6
|
|
7
7
|
impl RbSeries {
|
8
|
-
pub fn
|
8
|
+
pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
9
9
|
let mut s = self.series.borrow_mut();
|
10
|
-
match
|
10
|
+
match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
11
11
|
Ok(out) => {
|
12
12
|
*s = out;
|
13
13
|
Ok(())
|
@@ -17,7 +17,7 @@ impl RbSeries {
|
|
17
17
|
}
|
18
18
|
}
|
19
19
|
|
20
|
-
fn
|
20
|
+
fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
|
21
21
|
let logical_dtype = s.dtype().clone();
|
22
22
|
let idx = idx.cast(&IDX_DTYPE)?;
|
23
23
|
let idx = idx.rechunk();
|
@@ -43,62 +43,62 @@ fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Seri
|
|
43
43
|
DataType::Int8 => {
|
44
44
|
let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
|
45
45
|
let values = values.i8()?;
|
46
|
-
std::mem::take(ca).
|
46
|
+
std::mem::take(ca).scatter(idx, values)
|
47
47
|
}
|
48
48
|
DataType::Int16 => {
|
49
49
|
let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
|
50
50
|
let values = values.i16()?;
|
51
|
-
std::mem::take(ca).
|
51
|
+
std::mem::take(ca).scatter(idx, values)
|
52
52
|
}
|
53
53
|
DataType::Int32 => {
|
54
54
|
let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
|
55
55
|
let values = values.i32()?;
|
56
|
-
std::mem::take(ca).
|
56
|
+
std::mem::take(ca).scatter(idx, values)
|
57
57
|
}
|
58
58
|
DataType::Int64 => {
|
59
59
|
let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
|
60
60
|
let values = values.i64()?;
|
61
|
-
std::mem::take(ca).
|
61
|
+
std::mem::take(ca).scatter(idx, values)
|
62
62
|
}
|
63
63
|
DataType::UInt8 => {
|
64
64
|
let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
|
65
65
|
let values = values.u8()?;
|
66
|
-
std::mem::take(ca).
|
66
|
+
std::mem::take(ca).scatter(idx, values)
|
67
67
|
}
|
68
68
|
DataType::UInt16 => {
|
69
69
|
let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
|
70
70
|
let values = values.u16()?;
|
71
|
-
std::mem::take(ca).
|
71
|
+
std::mem::take(ca).scatter(idx, values)
|
72
72
|
}
|
73
73
|
DataType::UInt32 => {
|
74
74
|
let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
|
75
75
|
let values = values.u32()?;
|
76
|
-
std::mem::take(ca).
|
76
|
+
std::mem::take(ca).scatter(idx, values)
|
77
77
|
}
|
78
78
|
DataType::UInt64 => {
|
79
79
|
let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
|
80
80
|
let values = values.u64()?;
|
81
|
-
std::mem::take(ca).
|
81
|
+
std::mem::take(ca).scatter(idx, values)
|
82
82
|
}
|
83
83
|
DataType::Float32 => {
|
84
84
|
let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
|
85
85
|
let values = values.f32()?;
|
86
|
-
std::mem::take(ca).
|
86
|
+
std::mem::take(ca).scatter(idx, values)
|
87
87
|
}
|
88
88
|
DataType::Float64 => {
|
89
89
|
let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
|
90
90
|
let values = values.f64()?;
|
91
|
-
std::mem::take(ca).
|
91
|
+
std::mem::take(ca).scatter(idx, values)
|
92
92
|
}
|
93
93
|
DataType::Boolean => {
|
94
94
|
let ca = s.bool()?;
|
95
95
|
let values = values.bool()?;
|
96
|
-
ca.
|
96
|
+
ca.scatter(idx, values)
|
97
97
|
}
|
98
|
-
DataType::
|
99
|
-
let ca = s.
|
100
|
-
let values = values.
|
101
|
-
ca.
|
98
|
+
DataType::String => {
|
99
|
+
let ca = s.str()?;
|
100
|
+
let values = values.str()?;
|
101
|
+
ca.scatter(idx, values)
|
102
102
|
}
|
103
103
|
_ => panic!("not yet implemented for dtype: {}", logical_dtype),
|
104
104
|
};
|
data/ext/polars/src/utils.rs
CHANGED
@@ -23,7 +23,7 @@ macro_rules! apply_method_all_arrow_series2 {
|
|
23
23
|
($self:expr, $method:ident, $($args:expr),*) => {
|
24
24
|
match $self.dtype() {
|
25
25
|
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
-
DataType::
|
26
|
+
DataType::String => $self.str().unwrap().$method($($args),*),
|
27
27
|
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
28
|
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
29
|
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|