polars-df 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +284 -216
- data/ext/polars/Cargo.toml +7 -4
- data/ext/polars/src/batched_csv.rs +2 -3
- data/ext/polars/src/conversion.rs +18 -17
- data/ext/polars/src/dataframe.rs +27 -63
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/general.rs +63 -4
- data/ext/polars/src/expr/rolling.rs +15 -10
- data/ext/polars/src/expr/string.rs +9 -9
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/lazyframe.rs +28 -19
- data/ext/polars/src/lib.rs +20 -20
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +21 -18
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/data_frame.rb +69 -65
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/expr.rb +223 -18
- data/lib/polars/group_by.rb +1 -1
- data/lib/polars/io.rb +4 -4
- data/lib/polars/lazy_frame.rb +23 -23
- data/lib/polars/lazy_functions.rb +4 -20
- data/lib/polars/series.rb +289 -30
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +13 -13
- data/lib/polars/version.rb +1 -1
- metadata +7 -6
data/ext/polars/src/lazyframe.rs
CHANGED
@@ -87,7 +87,7 @@ impl RbLazyFrame {
|
|
87
87
|
let cache = bool::try_convert(arguments[6])?;
|
88
88
|
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
|
89
89
|
let low_memory = bool::try_convert(arguments[8])?;
|
90
|
-
let
|
90
|
+
let comment_prefix = Option::<String>::try_convert(arguments[9])?;
|
91
91
|
let quote_char = Option::<String>::try_convert(arguments[10])?;
|
92
92
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
|
93
93
|
let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
|
@@ -101,7 +101,6 @@ impl RbLazyFrame {
|
|
101
101
|
// end arguments
|
102
102
|
|
103
103
|
let null_values = null_values.map(|w| w.0);
|
104
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
105
104
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
106
105
|
let separator = separator.as_bytes()[0];
|
107
106
|
let eol_char = eol_char.as_bytes()[0];
|
@@ -124,7 +123,7 @@ impl RbLazyFrame {
|
|
124
123
|
.with_cache(cache)
|
125
124
|
.with_dtype_overwrite(overwrite_dtype.as_ref())
|
126
125
|
.low_memory(low_memory)
|
127
|
-
.
|
126
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
128
127
|
.with_quote_char(quote_char)
|
129
128
|
.with_end_of_line_char(eol_char)
|
130
129
|
.with_rechunk(rechunk)
|
@@ -510,48 +509,58 @@ impl RbLazyFrame {
|
|
510
509
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
511
510
|
}
|
512
511
|
|
513
|
-
pub fn min(&self) -> Self {
|
512
|
+
pub fn min(&self) -> RbResult<Self> {
|
514
513
|
let ldf = self.ldf.clone();
|
515
|
-
ldf.min().
|
514
|
+
let out = ldf.min().map_err(RbPolarsErr::from)?;
|
515
|
+
Ok(out.into())
|
516
516
|
}
|
517
517
|
|
518
|
-
pub fn max(&self) -> Self {
|
518
|
+
pub fn max(&self) -> RbResult<Self> {
|
519
519
|
let ldf = self.ldf.clone();
|
520
|
-
ldf.max().
|
520
|
+
let out = ldf.max().map_err(RbPolarsErr::from)?;
|
521
|
+
Ok(out.into())
|
521
522
|
}
|
522
523
|
|
523
|
-
pub fn sum(&self) -> Self {
|
524
|
+
pub fn sum(&self) -> RbResult<Self> {
|
524
525
|
let ldf = self.ldf.clone();
|
525
|
-
ldf.sum().
|
526
|
+
let out = ldf.sum().map_err(RbPolarsErr::from)?;
|
527
|
+
Ok(out.into())
|
526
528
|
}
|
527
529
|
|
528
|
-
pub fn mean(&self) -> Self {
|
530
|
+
pub fn mean(&self) -> RbResult<Self> {
|
529
531
|
let ldf = self.ldf.clone();
|
530
|
-
ldf.mean().
|
532
|
+
let out = ldf.mean().map_err(RbPolarsErr::from)?;
|
533
|
+
Ok(out.into())
|
531
534
|
}
|
532
535
|
|
533
|
-
pub fn std(&self, ddof: u8) -> Self {
|
536
|
+
pub fn std(&self, ddof: u8) -> RbResult<Self> {
|
534
537
|
let ldf = self.ldf.clone();
|
535
|
-
ldf.std(ddof).
|
538
|
+
let out = ldf.std(ddof).map_err(RbPolarsErr::from)?;
|
539
|
+
Ok(out.into())
|
536
540
|
}
|
537
541
|
|
538
|
-
pub fn var(&self, ddof: u8) -> Self {
|
542
|
+
pub fn var(&self, ddof: u8) -> RbResult<Self> {
|
539
543
|
let ldf = self.ldf.clone();
|
540
|
-
ldf.var(ddof).
|
544
|
+
let out = ldf.var(ddof).map_err(RbPolarsErr::from)?;
|
545
|
+
Ok(out.into())
|
541
546
|
}
|
542
547
|
|
543
|
-
pub fn median(&self) -> Self {
|
548
|
+
pub fn median(&self) -> RbResult<Self> {
|
544
549
|
let ldf = self.ldf.clone();
|
545
|
-
ldf.median().
|
550
|
+
let out = ldf.median().map_err(RbPolarsErr::from)?;
|
551
|
+
Ok(out.into())
|
546
552
|
}
|
547
553
|
|
548
554
|
pub fn quantile(
|
549
555
|
&self,
|
550
556
|
quantile: &RbExpr,
|
551
557
|
interpolation: Wrap<QuantileInterpolOptions>,
|
552
|
-
) -> Self {
|
558
|
+
) -> RbResult<Self> {
|
553
559
|
let ldf = self.ldf.clone();
|
554
|
-
ldf
|
560
|
+
let out = ldf
|
561
|
+
.quantile(quantile.inner.clone(), interpolation.0)
|
562
|
+
.map_err(RbPolarsErr::from)?;
|
563
|
+
Ok(out.into())
|
555
564
|
}
|
556
565
|
|
557
566
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -8,6 +8,7 @@ mod functions;
|
|
8
8
|
mod lazyframe;
|
9
9
|
mod lazygroupby;
|
10
10
|
mod map;
|
11
|
+
mod on_startup;
|
11
12
|
mod object;
|
12
13
|
mod prelude;
|
13
14
|
pub(crate) mod rb_modules;
|
@@ -224,8 +225,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
224
225
|
class.define_method("drop", method!(RbDataFrame::drop, 1))?;
|
225
226
|
class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
|
226
227
|
class.define_method(
|
227
|
-
"
|
228
|
-
method!(RbDataFrame::
|
228
|
+
"get_column_index",
|
229
|
+
method!(RbDataFrame::get_column_index, 1),
|
229
230
|
)?;
|
230
231
|
class.define_method("column", method!(RbDataFrame::column, 1))?;
|
231
232
|
class.define_method("select", method!(RbDataFrame::select, 1))?;
|
@@ -235,14 +236,14 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
235
236
|
method!(RbDataFrame::take_with_series, 1),
|
236
237
|
)?;
|
237
238
|
class.define_method("replace", method!(RbDataFrame::replace, 2))?;
|
238
|
-
class.define_method("
|
239
|
-
class.define_method("
|
239
|
+
class.define_method("replace_column", method!(RbDataFrame::replace_column, 2))?;
|
240
|
+
class.define_method("insert_column", method!(RbDataFrame::insert_column, 2))?;
|
240
241
|
class.define_method("slice", method!(RbDataFrame::slice, 2))?;
|
241
242
|
class.define_method("head", method!(RbDataFrame::head, 1))?;
|
242
243
|
class.define_method("tail", method!(RbDataFrame::tail, 1))?;
|
243
244
|
class.define_method("is_unique", method!(RbDataFrame::is_unique, 0))?;
|
244
245
|
class.define_method("is_duplicated", method!(RbDataFrame::is_duplicated, 0))?;
|
245
|
-
class.define_method("
|
246
|
+
class.define_method("equals", method!(RbDataFrame::equals, 2))?;
|
246
247
|
class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
|
247
248
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
248
249
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
@@ -250,18 +251,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
250
251
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
251
252
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
252
253
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
253
|
-
class.define_method("max", method!(RbDataFrame::max, 0))?;
|
254
|
-
class.define_method("min", method!(RbDataFrame::min, 0))?;
|
255
|
-
class.define_method("sum", method!(RbDataFrame::sum, 0))?;
|
256
|
-
class.define_method("mean", method!(RbDataFrame::mean, 0))?;
|
257
|
-
class.define_method("std", method!(RbDataFrame::std, 1))?;
|
258
|
-
class.define_method("var", method!(RbDataFrame::var, 1))?;
|
259
|
-
class.define_method("median", method!(RbDataFrame::median, 0))?;
|
260
254
|
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
261
255
|
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
262
256
|
class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
|
263
257
|
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
264
|
-
class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
|
265
258
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
266
259
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
267
260
|
class.define_method("apply", method!(RbDataFrame::apply, 3))?;
|
@@ -309,8 +302,14 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
309
302
|
class.define_method("last", method!(RbExpr::last, 0))?;
|
310
303
|
class.define_method("implode", method!(RbExpr::implode, 0))?;
|
311
304
|
class.define_method("quantile", method!(RbExpr::quantile, 2))?;
|
305
|
+
class.define_method("cut", method!(RbExpr::cut, 4))?;
|
306
|
+
class.define_method("qcut", method!(RbExpr::qcut, 5))?;
|
307
|
+
class.define_method("qcut_uniform", method!(RbExpr::qcut_uniform, 5))?;
|
308
|
+
class.define_method("rle", method!(RbExpr::rle, 0))?;
|
309
|
+
class.define_method("rle_id", method!(RbExpr::rle_id, 0))?;
|
312
310
|
class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
|
313
311
|
class.define_method("count", method!(RbExpr::count, 0))?;
|
312
|
+
class.define_method("len", method!(RbExpr::len, 0))?;
|
314
313
|
class.define_method("value_counts", method!(RbExpr::value_counts, 2))?;
|
315
314
|
class.define_method("unique_counts", method!(RbExpr::unique_counts, 0))?;
|
316
315
|
class.define_method("null_count", method!(RbExpr::null_count, 0))?;
|
@@ -346,7 +345,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
346
345
|
class.define_method("is_first_distinct", method!(RbExpr::is_first_distinct, 0))?;
|
347
346
|
class.define_method("is_last_distinct", method!(RbExpr::is_last_distinct, 0))?;
|
348
347
|
class.define_method("explode", method!(RbExpr::explode, 0))?;
|
349
|
-
class.define_method("gather_every", method!(RbExpr::gather_every,
|
348
|
+
class.define_method("gather_every", method!(RbExpr::gather_every, 2))?;
|
350
349
|
class.define_method("tail", method!(RbExpr::tail, 1))?;
|
351
350
|
class.define_method("head", method!(RbExpr::head, 1))?;
|
352
351
|
class.define_method("slice", method!(RbExpr::slice, 2))?;
|
@@ -518,10 +517,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
518
517
|
class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
|
519
518
|
class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
|
520
519
|
class.define_method("rolling_mean", method!(RbExpr::rolling_mean, 6))?;
|
521
|
-
class.define_method("rolling_std", method!(RbExpr::rolling_std,
|
522
|
-
class.define_method("rolling_var", method!(RbExpr::rolling_var,
|
523
|
-
class.define_method("rolling_median", method!(RbExpr::rolling_median,
|
524
|
-
class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile,
|
520
|
+
class.define_method("rolling_std", method!(RbExpr::rolling_std, 8))?;
|
521
|
+
class.define_method("rolling_var", method!(RbExpr::rolling_var, 8))?;
|
522
|
+
class.define_method("rolling_median", method!(RbExpr::rolling_median, 7))?;
|
523
|
+
class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile, 9))?;
|
525
524
|
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
|
526
525
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
527
526
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
@@ -550,6 +549,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
550
549
|
class.define_method("kurtosis", method!(RbExpr::kurtosis, 2))?;
|
551
550
|
class.define_method("str_concat", method!(RbExpr::str_concat, 2))?;
|
552
551
|
class.define_method("cat_set_ordering", method!(RbExpr::cat_set_ordering, 1))?;
|
552
|
+
class.define_method("cat_get_categories", method!(RbExpr::cat_get_categories, 0))?;
|
553
553
|
class.define_method("reshape", method!(RbExpr::reshape, 1))?;
|
554
554
|
class.define_method("cum_count", method!(RbExpr::cum_count, 1))?;
|
555
555
|
class.define_method("to_physical", method!(RbExpr::to_physical, 0))?;
|
@@ -792,7 +792,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
792
792
|
class.define_method("has_validity", method!(RbSeries::has_validity, 0))?;
|
793
793
|
class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
|
794
794
|
class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
|
795
|
-
class.define_method("
|
795
|
+
class.define_method("equals", method!(RbSeries::equals, 3))?;
|
796
796
|
class.define_method("eq", method!(RbSeries::eq, 1))?;
|
797
797
|
class.define_method("neq", method!(RbSeries::neq, 1))?;
|
798
798
|
class.define_method("gt", method!(RbSeries::gt, 1))?;
|
@@ -817,7 +817,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
817
817
|
class.define_method("kurtosis", method!(RbSeries::kurtosis, 2))?;
|
818
818
|
class.define_method("cast", method!(RbSeries::cast, 2))?;
|
819
819
|
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
820
|
-
class.define_method("
|
820
|
+
class.define_method("scatter", method!(RbSeries::scatter, 2))?;
|
821
821
|
|
822
822
|
// set
|
823
823
|
// class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
|
@@ -187,7 +187,7 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
187
187
|
lambda: Value,
|
188
188
|
init_null_count: usize,
|
189
189
|
first_value: Option<&str>,
|
190
|
-
) ->
|
190
|
+
) -> StringChunked {
|
191
191
|
let skip = usize::from(first_value.is_some());
|
192
192
|
if init_null_count == df.height() {
|
193
193
|
ChunkedArray::full_null("apply", df.height())
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -198,11 +198,11 @@ fn iterator_to_utf8(
|
|
198
198
|
first_value: Option<&str>,
|
199
199
|
name: &str,
|
200
200
|
capacity: usize,
|
201
|
-
) ->
|
201
|
+
) -> StringChunked {
|
202
202
|
let first_value = first_value.map(|v| v.to_string());
|
203
203
|
|
204
204
|
// safety: we know the iterators len
|
205
|
-
let mut ca:
|
205
|
+
let mut ca: StringChunked = unsafe {
|
206
206
|
if init_null_count > 0 {
|
207
207
|
(0..init_null_count)
|
208
208
|
.map(|_| None)
|
@@ -101,7 +101,7 @@ pub trait ApplyLambda<'a> {
|
|
101
101
|
lambda: Value,
|
102
102
|
init_null_count: usize,
|
103
103
|
first_value: Option<&str>,
|
104
|
-
) -> RbResult<
|
104
|
+
) -> RbResult<StringChunked>;
|
105
105
|
|
106
106
|
/// Apply a lambda with list output type
|
107
107
|
fn apply_lambda_with_list_out_type(
|
@@ -285,7 +285,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
285
285
|
lambda: Value,
|
286
286
|
init_null_count: usize,
|
287
287
|
first_value: Option<&str>,
|
288
|
-
) -> RbResult<
|
288
|
+
) -> RbResult<StringChunked> {
|
289
289
|
let skip = usize::from(first_value.is_some());
|
290
290
|
if init_null_count == self.len() {
|
291
291
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
@@ -562,7 +562,7 @@ where
|
|
562
562
|
lambda: Value,
|
563
563
|
init_null_count: usize,
|
564
564
|
first_value: Option<&str>,
|
565
|
-
) -> RbResult<
|
565
|
+
) -> RbResult<StringChunked> {
|
566
566
|
let skip = usize::from(first_value.is_some());
|
567
567
|
if init_null_count == self.len() {
|
568
568
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
@@ -705,7 +705,7 @@ where
|
|
705
705
|
}
|
706
706
|
}
|
707
707
|
|
708
|
-
impl<'a> ApplyLambda<'a> for
|
708
|
+
impl<'a> ApplyLambda<'a> for StringChunked {
|
709
709
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
710
710
|
let mut null_count = 0;
|
711
711
|
for opt_v in self.into_iter() {
|
@@ -834,7 +834,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
|
|
834
834
|
lambda: Value,
|
835
835
|
init_null_count: usize,
|
836
836
|
first_value: Option<&str>,
|
837
|
-
) -> RbResult<
|
837
|
+
) -> RbResult<StringChunked> {
|
838
838
|
let skip = usize::from(first_value.is_some());
|
839
839
|
if init_null_count == self.len() {
|
840
840
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
@@ -1079,7 +1079,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1079
1079
|
lambda: Value,
|
1080
1080
|
init_null_count: usize,
|
1081
1081
|
first_value: Option<&str>,
|
1082
|
-
) -> RbResult<
|
1082
|
+
) -> RbResult<StringChunked> {
|
1083
1083
|
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1084
1084
|
|
1085
1085
|
let skip = usize::from(first_value.is_some());
|
data/ext/polars/src/object.rs
CHANGED
@@ -1,31 +1 @@
|
|
1
|
-
use std::any::Any;
|
2
|
-
use std::sync::Arc;
|
3
|
-
|
4
|
-
use magnus::IntoValue;
|
5
|
-
use polars_core::chunked_array::object::builder::ObjectChunkedBuilder;
|
6
|
-
use polars_core::chunked_array::object::registry;
|
7
|
-
use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
|
8
|
-
use polars_core::prelude::AnyValue;
|
9
|
-
|
10
|
-
use crate::prelude::ObjectValue;
|
11
|
-
use crate::Wrap;
|
12
|
-
|
13
1
|
pub(crate) const OBJECT_NAME: &str = "object";
|
14
|
-
|
15
|
-
pub(crate) fn register_object_builder() {
|
16
|
-
if !registry::is_object_builder_registered() {
|
17
|
-
let object_builder = Box::new(|name: &str, capacity: usize| {
|
18
|
-
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
19
|
-
as Box<dyn AnonymousObjectBuilder>
|
20
|
-
});
|
21
|
-
|
22
|
-
let object_converter = Arc::new(|av: AnyValue| {
|
23
|
-
let object = ObjectValue {
|
24
|
-
inner: Wrap(av).into_value().into(),
|
25
|
-
};
|
26
|
-
Box::new(object) as Box<dyn Any>
|
27
|
-
});
|
28
|
-
|
29
|
-
registry::register_object_builder(object_builder, object_converter)
|
30
|
-
}
|
31
|
-
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
use std::any::Any;
|
2
|
+
use std::sync::Arc;
|
3
|
+
|
4
|
+
use magnus::IntoValue;
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars_core::chunked_array::object::builder::ObjectChunkedBuilder;
|
7
|
+
use polars_core::chunked_array::object::registry;
|
8
|
+
use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
|
9
|
+
use polars_core::prelude::AnyValue;
|
10
|
+
|
11
|
+
use crate::prelude::ObjectValue;
|
12
|
+
use crate::Wrap;
|
13
|
+
|
14
|
+
pub(crate) fn register_object_builder() {
|
15
|
+
if !registry::is_object_builder_registered() {
|
16
|
+
let object_builder = Box::new(|name: &str, capacity: usize| {
|
17
|
+
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
18
|
+
as Box<dyn AnonymousObjectBuilder>
|
19
|
+
});
|
20
|
+
|
21
|
+
let object_converter = Arc::new(|av: AnyValue| {
|
22
|
+
let object = ObjectValue {
|
23
|
+
inner: Wrap(av).into_value().into(),
|
24
|
+
};
|
25
|
+
Box::new(object) as Box<dyn Any>
|
26
|
+
});
|
27
|
+
|
28
|
+
let object_size = std::mem::size_of::<ObjectValue>();
|
29
|
+
let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
|
30
|
+
registry::register_object_builder(object_builder, object_converter, physical_dtype)
|
31
|
+
}
|
32
|
+
}
|
@@ -17,6 +17,7 @@ impl RbSeries {
|
|
17
17
|
self.series
|
18
18
|
.borrow()
|
19
19
|
.max_as_series()
|
20
|
+
.map_err(RbPolarsErr::from)?
|
20
21
|
.get(0)
|
21
22
|
.map_err(RbPolarsErr::from)?,
|
22
23
|
)
|
@@ -48,6 +49,7 @@ impl RbSeries {
|
|
48
49
|
self.series
|
49
50
|
.borrow()
|
50
51
|
.min_as_series()
|
52
|
+
.map_err(RbPolarsErr::from)?
|
51
53
|
.get(0)
|
52
54
|
.map_err(RbPolarsErr::from)?,
|
53
55
|
)
|
@@ -75,6 +77,7 @@ impl RbSeries {
|
|
75
77
|
self.series
|
76
78
|
.borrow()
|
77
79
|
.sum_as_series()
|
80
|
+
.map_err(RbPolarsErr::from)?
|
78
81
|
.get(0)
|
79
82
|
.map_err(RbPolarsErr::from)?,
|
80
83
|
)
|
@@ -106,7 +106,7 @@ impl RbSeries {
|
|
106
106
|
Ok(s.into())
|
107
107
|
}
|
108
108
|
|
109
|
-
pub fn new_str(name: String, val: Wrap<
|
109
|
+
pub fn new_str(name: String, val: Wrap<StringChunked>, _strict: bool) -> Self {
|
110
110
|
let mut s = val.0.into_series();
|
111
111
|
s.rename(&name);
|
112
112
|
RbSeries::new(s)
|
@@ -9,8 +9,8 @@ impl RbSeries {
|
|
9
9
|
pub fn to_numo(&self) -> RbResult<Value> {
|
10
10
|
let s = &self.series.borrow();
|
11
11
|
match s.dtype() {
|
12
|
-
DataType::
|
13
|
-
let ca = s.
|
12
|
+
DataType::String => {
|
13
|
+
let ca = s.str().unwrap();
|
14
14
|
|
15
15
|
// TODO make more efficient
|
16
16
|
let np_arr = RArray::from_iter(ca);
|
@@ -3,7 +3,7 @@ mod arithmetic;
|
|
3
3
|
mod comparison;
|
4
4
|
mod construction;
|
5
5
|
mod export;
|
6
|
-
mod
|
6
|
+
mod scatter;
|
7
7
|
|
8
8
|
use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
@@ -80,7 +80,7 @@ impl RbSeries {
|
|
80
80
|
|
81
81
|
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
82
82
|
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
83
|
-
if let DataType::
|
83
|
+
if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
|
84
84
|
let v_trunc = &val[..val
|
85
85
|
.char_indices()
|
86
86
|
.take(str_lengths)
|
@@ -90,7 +90,7 @@ impl RbSeries {
|
|
90
90
|
if val == v_trunc {
|
91
91
|
val
|
92
92
|
} else {
|
93
|
-
format!("{}
|
93
|
+
format!("{}…", v_trunc)
|
94
94
|
}
|
95
95
|
} else {
|
96
96
|
val
|
@@ -273,15 +273,13 @@ impl RbSeries {
|
|
273
273
|
Ok(s.into())
|
274
274
|
}
|
275
275
|
|
276
|
-
pub fn
|
276
|
+
pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
|
277
277
|
if strict {
|
278
278
|
self.series.borrow().eq(&other.series.borrow())
|
279
279
|
} else if null_equal {
|
280
|
-
self.series
|
281
|
-
.borrow()
|
282
|
-
.series_equal_missing(&other.series.borrow())
|
280
|
+
self.series.borrow().equals_missing(&other.series.borrow())
|
283
281
|
} else {
|
284
|
-
self.series.borrow().
|
282
|
+
self.series.borrow().equals(&other.series.borrow())
|
285
283
|
}
|
286
284
|
}
|
287
285
|
|
@@ -315,10 +313,10 @@ impl RbSeries {
|
|
315
313
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
316
314
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
317
315
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
318
|
-
DataType::Categorical(_) => {
|
316
|
+
DataType::Categorical(_, _) => {
|
319
317
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
320
318
|
}
|
321
|
-
DataType::Object(_) => {
|
319
|
+
DataType::Object(_, _) => {
|
322
320
|
let v = RArray::with_capacity(series.len());
|
323
321
|
for i in 0..series.len() {
|
324
322
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
@@ -377,8 +375,8 @@ impl RbSeries {
|
|
377
375
|
let ca = series.decimal().unwrap();
|
378
376
|
return Wrap(ca).into_value();
|
379
377
|
}
|
380
|
-
DataType::
|
381
|
-
let ca = series.
|
378
|
+
DataType::String => {
|
379
|
+
let ca = series.str().unwrap();
|
382
380
|
return Wrap(ca).into_value();
|
383
381
|
}
|
384
382
|
DataType::Struct(_) => {
|
@@ -442,7 +440,7 @@ impl RbSeries {
|
|
442
440
|
|
443
441
|
macro_rules! dispatch_apply {
|
444
442
|
($self:expr, $method:ident, $($args:expr),*) => {
|
445
|
-
if matches!($self.dtype(), DataType::Object(_)) {
|
443
|
+
if matches!($self.dtype(), DataType::Object(_, _)) {
|
446
444
|
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
447
445
|
// ca.$method($($args),*)
|
448
446
|
todo!()
|
@@ -463,7 +461,7 @@ impl RbSeries {
|
|
463
461
|
DataType::Datetime(_, _)
|
464
462
|
| DataType::Date
|
465
463
|
| DataType::Duration(_)
|
466
|
-
| DataType::Categorical(_)
|
464
|
+
| DataType::Categorical(_, _)
|
467
465
|
| DataType::Time
|
468
466
|
) || !skip_nulls
|
469
467
|
{
|
@@ -604,12 +602,12 @@ impl RbSeries {
|
|
604
602
|
)?;
|
605
603
|
ca.into_datetime(tu, tz).into_series()
|
606
604
|
}
|
607
|
-
Some(DataType::
|
605
|
+
Some(DataType::String) => {
|
608
606
|
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
609
607
|
|
610
608
|
ca.into_series()
|
611
609
|
}
|
612
|
-
Some(DataType::Object(_)) => {
|
610
|
+
Some(DataType::Object(_, _)) => {
|
613
611
|
let ca =
|
614
612
|
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
615
613
|
ca.into_series()
|
@@ -656,8 +654,13 @@ impl RbSeries {
|
|
656
654
|
self.series.borrow_mut().shrink_to_fit();
|
657
655
|
}
|
658
656
|
|
659
|
-
pub fn dot(&self, other: &RbSeries) ->
|
660
|
-
self
|
657
|
+
pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
|
658
|
+
let out = self
|
659
|
+
.series
|
660
|
+
.borrow()
|
661
|
+
.dot(&other.series.borrow())
|
662
|
+
.map_err(RbPolarsErr::from)?;
|
663
|
+
Ok(out)
|
661
664
|
}
|
662
665
|
|
663
666
|
pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
|
@@ -5,9 +5,9 @@ use crate::error::RbPolarsErr;
|
|
5
5
|
use crate::{RbResult, RbSeries};
|
6
6
|
|
7
7
|
impl RbSeries {
|
8
|
-
pub fn
|
8
|
+
pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
9
9
|
let mut s = self.series.borrow_mut();
|
10
|
-
match
|
10
|
+
match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
11
11
|
Ok(out) => {
|
12
12
|
*s = out;
|
13
13
|
Ok(())
|
@@ -17,7 +17,7 @@ impl RbSeries {
|
|
17
17
|
}
|
18
18
|
}
|
19
19
|
|
20
|
-
fn
|
20
|
+
fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
|
21
21
|
let logical_dtype = s.dtype().clone();
|
22
22
|
let idx = idx.cast(&IDX_DTYPE)?;
|
23
23
|
let idx = idx.rechunk();
|
@@ -43,62 +43,62 @@ fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Seri
|
|
43
43
|
DataType::Int8 => {
|
44
44
|
let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
|
45
45
|
let values = values.i8()?;
|
46
|
-
std::mem::take(ca).
|
46
|
+
std::mem::take(ca).scatter(idx, values)
|
47
47
|
}
|
48
48
|
DataType::Int16 => {
|
49
49
|
let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
|
50
50
|
let values = values.i16()?;
|
51
|
-
std::mem::take(ca).
|
51
|
+
std::mem::take(ca).scatter(idx, values)
|
52
52
|
}
|
53
53
|
DataType::Int32 => {
|
54
54
|
let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
|
55
55
|
let values = values.i32()?;
|
56
|
-
std::mem::take(ca).
|
56
|
+
std::mem::take(ca).scatter(idx, values)
|
57
57
|
}
|
58
58
|
DataType::Int64 => {
|
59
59
|
let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
|
60
60
|
let values = values.i64()?;
|
61
|
-
std::mem::take(ca).
|
61
|
+
std::mem::take(ca).scatter(idx, values)
|
62
62
|
}
|
63
63
|
DataType::UInt8 => {
|
64
64
|
let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
|
65
65
|
let values = values.u8()?;
|
66
|
-
std::mem::take(ca).
|
66
|
+
std::mem::take(ca).scatter(idx, values)
|
67
67
|
}
|
68
68
|
DataType::UInt16 => {
|
69
69
|
let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
|
70
70
|
let values = values.u16()?;
|
71
|
-
std::mem::take(ca).
|
71
|
+
std::mem::take(ca).scatter(idx, values)
|
72
72
|
}
|
73
73
|
DataType::UInt32 => {
|
74
74
|
let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
|
75
75
|
let values = values.u32()?;
|
76
|
-
std::mem::take(ca).
|
76
|
+
std::mem::take(ca).scatter(idx, values)
|
77
77
|
}
|
78
78
|
DataType::UInt64 => {
|
79
79
|
let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
|
80
80
|
let values = values.u64()?;
|
81
|
-
std::mem::take(ca).
|
81
|
+
std::mem::take(ca).scatter(idx, values)
|
82
82
|
}
|
83
83
|
DataType::Float32 => {
|
84
84
|
let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
|
85
85
|
let values = values.f32()?;
|
86
|
-
std::mem::take(ca).
|
86
|
+
std::mem::take(ca).scatter(idx, values)
|
87
87
|
}
|
88
88
|
DataType::Float64 => {
|
89
89
|
let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
|
90
90
|
let values = values.f64()?;
|
91
|
-
std::mem::take(ca).
|
91
|
+
std::mem::take(ca).scatter(idx, values)
|
92
92
|
}
|
93
93
|
DataType::Boolean => {
|
94
94
|
let ca = s.bool()?;
|
95
95
|
let values = values.bool()?;
|
96
|
-
ca.
|
96
|
+
ca.scatter(idx, values)
|
97
97
|
}
|
98
|
-
DataType::
|
99
|
-
let ca = s.
|
100
|
-
let values = values.
|
101
|
-
ca.
|
98
|
+
DataType::String => {
|
99
|
+
let ca = s.str()?;
|
100
|
+
let values = values.str()?;
|
101
|
+
ca.scatter(idx, values)
|
102
102
|
}
|
103
103
|
_ => panic!("not yet implemented for dtype: {}", logical_dtype),
|
104
104
|
};
|
data/ext/polars/src/utils.rs
CHANGED
@@ -23,7 +23,7 @@ macro_rules! apply_method_all_arrow_series2 {
|
|
23
23
|
($self:expr, $method:ident, $($args:expr),*) => {
|
24
24
|
match $self.dtype() {
|
25
25
|
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
-
DataType::
|
26
|
+
DataType::String => $self.str().unwrap().$method($($args),*),
|
27
27
|
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
28
|
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
29
|
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|