polars-df 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +284 -216
  4. data/ext/polars/Cargo.toml +7 -4
  5. data/ext/polars/src/batched_csv.rs +2 -3
  6. data/ext/polars/src/conversion.rs +18 -17
  7. data/ext/polars/src/dataframe.rs +27 -63
  8. data/ext/polars/src/expr/categorical.rs +8 -1
  9. data/ext/polars/src/expr/general.rs +63 -4
  10. data/ext/polars/src/expr/rolling.rs +15 -10
  11. data/ext/polars/src/expr/string.rs +9 -9
  12. data/ext/polars/src/functions/range.rs +5 -10
  13. data/ext/polars/src/lazyframe.rs +28 -19
  14. data/ext/polars/src/lib.rs +20 -20
  15. data/ext/polars/src/map/dataframe.rs +1 -1
  16. data/ext/polars/src/map/mod.rs +2 -2
  17. data/ext/polars/src/map/series.rs +6 -6
  18. data/ext/polars/src/object.rs +0 -30
  19. data/ext/polars/src/on_startup.rs +32 -0
  20. data/ext/polars/src/series/aggregation.rs +3 -0
  21. data/ext/polars/src/series/construction.rs +1 -1
  22. data/ext/polars/src/series/export.rs +2 -2
  23. data/ext/polars/src/{series.rs → series/mod.rs} +21 -18
  24. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
  25. data/ext/polars/src/utils.rs +1 -1
  26. data/lib/polars/data_frame.rb +69 -65
  27. data/lib/polars/data_types.rb +4 -1
  28. data/lib/polars/date_time_expr.rb +10 -10
  29. data/lib/polars/date_time_name_space.rb +12 -12
  30. data/lib/polars/expr.rb +223 -18
  31. data/lib/polars/group_by.rb +1 -1
  32. data/lib/polars/io.rb +4 -4
  33. data/lib/polars/lazy_frame.rb +23 -23
  34. data/lib/polars/lazy_functions.rb +4 -20
  35. data/lib/polars/series.rb +289 -30
  36. data/lib/polars/sql_context.rb +1 -1
  37. data/lib/polars/struct_expr.rb +1 -1
  38. data/lib/polars/struct_name_space.rb +1 -1
  39. data/lib/polars/utils.rb +13 -13
  40. data/lib/polars/version.rb +1 -1
  41. metadata +7 -6
@@ -87,7 +87,7 @@ impl RbLazyFrame {
87
87
  let cache = bool::try_convert(arguments[6])?;
88
88
  let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
89
89
  let low_memory = bool::try_convert(arguments[8])?;
90
- let comment_char = Option::<String>::try_convert(arguments[9])?;
90
+ let comment_prefix = Option::<String>::try_convert(arguments[9])?;
91
91
  let quote_char = Option::<String>::try_convert(arguments[10])?;
92
92
  let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
93
93
  let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
@@ -101,7 +101,6 @@ impl RbLazyFrame {
101
101
  // end arguments
102
102
 
103
103
  let null_values = null_values.map(|w| w.0);
104
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
105
104
  let quote_char = quote_char.map(|s| s.as_bytes()[0]);
106
105
  let separator = separator.as_bytes()[0];
107
106
  let eol_char = eol_char.as_bytes()[0];
@@ -124,7 +123,7 @@ impl RbLazyFrame {
124
123
  .with_cache(cache)
125
124
  .with_dtype_overwrite(overwrite_dtype.as_ref())
126
125
  .low_memory(low_memory)
127
- .with_comment_char(comment_char)
126
+ .with_comment_prefix(comment_prefix.as_deref())
128
127
  .with_quote_char(quote_char)
129
128
  .with_end_of_line_char(eol_char)
130
129
  .with_rechunk(rechunk)
@@ -510,48 +509,58 @@ impl RbLazyFrame {
510
509
  ldf.fill_nan(fill_value.inner.clone()).into()
511
510
  }
512
511
 
513
- pub fn min(&self) -> Self {
512
+ pub fn min(&self) -> RbResult<Self> {
514
513
  let ldf = self.ldf.clone();
515
- ldf.min().into()
514
+ let out = ldf.min().map_err(RbPolarsErr::from)?;
515
+ Ok(out.into())
516
516
  }
517
517
 
518
- pub fn max(&self) -> Self {
518
+ pub fn max(&self) -> RbResult<Self> {
519
519
  let ldf = self.ldf.clone();
520
- ldf.max().into()
520
+ let out = ldf.max().map_err(RbPolarsErr::from)?;
521
+ Ok(out.into())
521
522
  }
522
523
 
523
- pub fn sum(&self) -> Self {
524
+ pub fn sum(&self) -> RbResult<Self> {
524
525
  let ldf = self.ldf.clone();
525
- ldf.sum().into()
526
+ let out = ldf.sum().map_err(RbPolarsErr::from)?;
527
+ Ok(out.into())
526
528
  }
527
529
 
528
- pub fn mean(&self) -> Self {
530
+ pub fn mean(&self) -> RbResult<Self> {
529
531
  let ldf = self.ldf.clone();
530
- ldf.mean().into()
532
+ let out = ldf.mean().map_err(RbPolarsErr::from)?;
533
+ Ok(out.into())
531
534
  }
532
535
 
533
- pub fn std(&self, ddof: u8) -> Self {
536
+ pub fn std(&self, ddof: u8) -> RbResult<Self> {
534
537
  let ldf = self.ldf.clone();
535
- ldf.std(ddof).into()
538
+ let out = ldf.std(ddof).map_err(RbPolarsErr::from)?;
539
+ Ok(out.into())
536
540
  }
537
541
 
538
- pub fn var(&self, ddof: u8) -> Self {
542
+ pub fn var(&self, ddof: u8) -> RbResult<Self> {
539
543
  let ldf = self.ldf.clone();
540
- ldf.var(ddof).into()
544
+ let out = ldf.var(ddof).map_err(RbPolarsErr::from)?;
545
+ Ok(out.into())
541
546
  }
542
547
 
543
- pub fn median(&self) -> Self {
548
+ pub fn median(&self) -> RbResult<Self> {
544
549
  let ldf = self.ldf.clone();
545
- ldf.median().into()
550
+ let out = ldf.median().map_err(RbPolarsErr::from)?;
551
+ Ok(out.into())
546
552
  }
547
553
 
548
554
  pub fn quantile(
549
555
  &self,
550
556
  quantile: &RbExpr,
551
557
  interpolation: Wrap<QuantileInterpolOptions>,
552
- ) -> Self {
558
+ ) -> RbResult<Self> {
553
559
  let ldf = self.ldf.clone();
554
- ldf.quantile(quantile.inner.clone(), interpolation.0).into()
560
+ let out = ldf
561
+ .quantile(quantile.inner.clone(), interpolation.0)
562
+ .map_err(RbPolarsErr::from)?;
563
+ Ok(out.into())
555
564
  }
556
565
 
557
566
  pub fn explode(&self, column: RArray) -> RbResult<Self> {
@@ -8,6 +8,7 @@ mod functions;
8
8
  mod lazyframe;
9
9
  mod lazygroupby;
10
10
  mod map;
11
+ mod on_startup;
11
12
  mod object;
12
13
  mod prelude;
13
14
  pub(crate) mod rb_modules;
@@ -224,8 +225,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
224
225
  class.define_method("drop", method!(RbDataFrame::drop, 1))?;
225
226
  class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
226
227
  class.define_method(
227
- "find_idx_by_name",
228
- method!(RbDataFrame::find_idx_by_name, 1),
228
+ "get_column_index",
229
+ method!(RbDataFrame::get_column_index, 1),
229
230
  )?;
230
231
  class.define_method("column", method!(RbDataFrame::column, 1))?;
231
232
  class.define_method("select", method!(RbDataFrame::select, 1))?;
@@ -235,14 +236,14 @@ fn init(ruby: &Ruby) -> RbResult<()> {
235
236
  method!(RbDataFrame::take_with_series, 1),
236
237
  )?;
237
238
  class.define_method("replace", method!(RbDataFrame::replace, 2))?;
238
- class.define_method("replace_at_idx", method!(RbDataFrame::replace_at_idx, 2))?;
239
- class.define_method("insert_at_idx", method!(RbDataFrame::insert_at_idx, 2))?;
239
+ class.define_method("replace_column", method!(RbDataFrame::replace_column, 2))?;
240
+ class.define_method("insert_column", method!(RbDataFrame::insert_column, 2))?;
240
241
  class.define_method("slice", method!(RbDataFrame::slice, 2))?;
241
242
  class.define_method("head", method!(RbDataFrame::head, 1))?;
242
243
  class.define_method("tail", method!(RbDataFrame::tail, 1))?;
243
244
  class.define_method("is_unique", method!(RbDataFrame::is_unique, 0))?;
244
245
  class.define_method("is_duplicated", method!(RbDataFrame::is_duplicated, 0))?;
245
- class.define_method("frame_equal", method!(RbDataFrame::frame_equal, 2))?;
246
+ class.define_method("equals", method!(RbDataFrame::equals, 2))?;
246
247
  class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
247
248
  class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
248
249
  class.define_method("melt", method!(RbDataFrame::melt, 4))?;
@@ -250,18 +251,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
250
251
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
251
252
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
252
253
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
253
- class.define_method("max", method!(RbDataFrame::max, 0))?;
254
- class.define_method("min", method!(RbDataFrame::min, 0))?;
255
- class.define_method("sum", method!(RbDataFrame::sum, 0))?;
256
- class.define_method("mean", method!(RbDataFrame::mean, 0))?;
257
- class.define_method("std", method!(RbDataFrame::std, 1))?;
258
- class.define_method("var", method!(RbDataFrame::var, 1))?;
259
- class.define_method("median", method!(RbDataFrame::median, 0))?;
260
254
  class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
261
255
  class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
262
256
  class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
263
257
  class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
264
- class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
265
258
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
266
259
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
267
260
  class.define_method("apply", method!(RbDataFrame::apply, 3))?;
@@ -309,8 +302,14 @@ fn init(ruby: &Ruby) -> RbResult<()> {
309
302
  class.define_method("last", method!(RbExpr::last, 0))?;
310
303
  class.define_method("implode", method!(RbExpr::implode, 0))?;
311
304
  class.define_method("quantile", method!(RbExpr::quantile, 2))?;
305
+ class.define_method("cut", method!(RbExpr::cut, 4))?;
306
+ class.define_method("qcut", method!(RbExpr::qcut, 5))?;
307
+ class.define_method("qcut_uniform", method!(RbExpr::qcut_uniform, 5))?;
308
+ class.define_method("rle", method!(RbExpr::rle, 0))?;
309
+ class.define_method("rle_id", method!(RbExpr::rle_id, 0))?;
312
310
  class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
313
311
  class.define_method("count", method!(RbExpr::count, 0))?;
312
+ class.define_method("len", method!(RbExpr::len, 0))?;
314
313
  class.define_method("value_counts", method!(RbExpr::value_counts, 2))?;
315
314
  class.define_method("unique_counts", method!(RbExpr::unique_counts, 0))?;
316
315
  class.define_method("null_count", method!(RbExpr::null_count, 0))?;
@@ -346,7 +345,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
346
345
  class.define_method("is_first_distinct", method!(RbExpr::is_first_distinct, 0))?;
347
346
  class.define_method("is_last_distinct", method!(RbExpr::is_last_distinct, 0))?;
348
347
  class.define_method("explode", method!(RbExpr::explode, 0))?;
349
- class.define_method("gather_every", method!(RbExpr::gather_every, 1))?;
348
+ class.define_method("gather_every", method!(RbExpr::gather_every, 2))?;
350
349
  class.define_method("tail", method!(RbExpr::tail, 1))?;
351
350
  class.define_method("head", method!(RbExpr::head, 1))?;
352
351
  class.define_method("slice", method!(RbExpr::slice, 2))?;
@@ -518,10 +517,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
518
517
  class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
519
518
  class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
520
519
  class.define_method("rolling_mean", method!(RbExpr::rolling_mean, 6))?;
521
- class.define_method("rolling_std", method!(RbExpr::rolling_std, 7))?;
522
- class.define_method("rolling_var", method!(RbExpr::rolling_var, 7))?;
523
- class.define_method("rolling_median", method!(RbExpr::rolling_median, 6))?;
524
- class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile, 8))?;
520
+ class.define_method("rolling_std", method!(RbExpr::rolling_std, 8))?;
521
+ class.define_method("rolling_var", method!(RbExpr::rolling_var, 8))?;
522
+ class.define_method("rolling_median", method!(RbExpr::rolling_median, 7))?;
523
+ class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile, 9))?;
525
524
  class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
526
525
  class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
527
526
  class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
@@ -550,6 +549,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
550
549
  class.define_method("kurtosis", method!(RbExpr::kurtosis, 2))?;
551
550
  class.define_method("str_concat", method!(RbExpr::str_concat, 2))?;
552
551
  class.define_method("cat_set_ordering", method!(RbExpr::cat_set_ordering, 1))?;
552
+ class.define_method("cat_get_categories", method!(RbExpr::cat_get_categories, 0))?;
553
553
  class.define_method("reshape", method!(RbExpr::reshape, 1))?;
554
554
  class.define_method("cum_count", method!(RbExpr::cum_count, 1))?;
555
555
  class.define_method("to_physical", method!(RbExpr::to_physical, 0))?;
@@ -792,7 +792,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
792
792
  class.define_method("has_validity", method!(RbSeries::has_validity, 0))?;
793
793
  class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
794
794
  class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
795
- class.define_method("series_equal", method!(RbSeries::series_equal, 3))?;
795
+ class.define_method("equals", method!(RbSeries::equals, 3))?;
796
796
  class.define_method("eq", method!(RbSeries::eq, 1))?;
797
797
  class.define_method("neq", method!(RbSeries::neq, 1))?;
798
798
  class.define_method("gt", method!(RbSeries::gt, 1))?;
@@ -817,7 +817,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
817
817
  class.define_method("kurtosis", method!(RbSeries::kurtosis, 2))?;
818
818
  class.define_method("cast", method!(RbSeries::cast, 2))?;
819
819
  class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
820
- class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
820
+ class.define_method("scatter", method!(RbSeries::scatter, 2))?;
821
821
 
822
822
  // set
823
823
  // class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
@@ -187,7 +187,7 @@ pub fn apply_lambda_with_utf8_out_type(
187
187
  lambda: Value,
188
188
  init_null_count: usize,
189
189
  first_value: Option<&str>,
190
- ) -> Utf8Chunked {
190
+ ) -> StringChunked {
191
191
  let skip = usize::from(first_value.is_some());
192
192
  if init_null_count == df.height() {
193
193
  ChunkedArray::full_null("apply", df.height())
@@ -198,11 +198,11 @@ fn iterator_to_utf8(
198
198
  first_value: Option<&str>,
199
199
  name: &str,
200
200
  capacity: usize,
201
- ) -> Utf8Chunked {
201
+ ) -> StringChunked {
202
202
  let first_value = first_value.map(|v| v.to_string());
203
203
 
204
204
  // safety: we know the iterators len
205
- let mut ca: Utf8Chunked = unsafe {
205
+ let mut ca: StringChunked = unsafe {
206
206
  if init_null_count > 0 {
207
207
  (0..init_null_count)
208
208
  .map(|_| None)
@@ -101,7 +101,7 @@ pub trait ApplyLambda<'a> {
101
101
  lambda: Value,
102
102
  init_null_count: usize,
103
103
  first_value: Option<&str>,
104
- ) -> RbResult<Utf8Chunked>;
104
+ ) -> RbResult<StringChunked>;
105
105
 
106
106
  /// Apply a lambda with list output type
107
107
  fn apply_lambda_with_list_out_type(
@@ -285,7 +285,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
285
285
  lambda: Value,
286
286
  init_null_count: usize,
287
287
  first_value: Option<&str>,
288
- ) -> RbResult<Utf8Chunked> {
288
+ ) -> RbResult<StringChunked> {
289
289
  let skip = usize::from(first_value.is_some());
290
290
  if init_null_count == self.len() {
291
291
  Ok(ChunkedArray::full_null(self.name(), self.len()))
@@ -562,7 +562,7 @@ where
562
562
  lambda: Value,
563
563
  init_null_count: usize,
564
564
  first_value: Option<&str>,
565
- ) -> RbResult<Utf8Chunked> {
565
+ ) -> RbResult<StringChunked> {
566
566
  let skip = usize::from(first_value.is_some());
567
567
  if init_null_count == self.len() {
568
568
  Ok(ChunkedArray::full_null(self.name(), self.len()))
@@ -705,7 +705,7 @@ where
705
705
  }
706
706
  }
707
707
 
708
- impl<'a> ApplyLambda<'a> for Utf8Chunked {
708
+ impl<'a> ApplyLambda<'a> for StringChunked {
709
709
  fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
710
710
  let mut null_count = 0;
711
711
  for opt_v in self.into_iter() {
@@ -834,7 +834,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
834
834
  lambda: Value,
835
835
  init_null_count: usize,
836
836
  first_value: Option<&str>,
837
- ) -> RbResult<Utf8Chunked> {
837
+ ) -> RbResult<StringChunked> {
838
838
  let skip = usize::from(first_value.is_some());
839
839
  if init_null_count == self.len() {
840
840
  Ok(ChunkedArray::full_null(self.name(), self.len()))
@@ -1079,7 +1079,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
1079
1079
  lambda: Value,
1080
1080
  init_null_count: usize,
1081
1081
  first_value: Option<&str>,
1082
- ) -> RbResult<Utf8Chunked> {
1082
+ ) -> RbResult<StringChunked> {
1083
1083
  let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1084
1084
 
1085
1085
  let skip = usize::from(first_value.is_some());
@@ -1,31 +1 @@
1
- use std::any::Any;
2
- use std::sync::Arc;
3
-
4
- use magnus::IntoValue;
5
- use polars_core::chunked_array::object::builder::ObjectChunkedBuilder;
6
- use polars_core::chunked_array::object::registry;
7
- use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
8
- use polars_core::prelude::AnyValue;
9
-
10
- use crate::prelude::ObjectValue;
11
- use crate::Wrap;
12
-
13
1
  pub(crate) const OBJECT_NAME: &str = "object";
14
-
15
- pub(crate) fn register_object_builder() {
16
- if !registry::is_object_builder_registered() {
17
- let object_builder = Box::new(|name: &str, capacity: usize| {
18
- Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
19
- as Box<dyn AnonymousObjectBuilder>
20
- });
21
-
22
- let object_converter = Arc::new(|av: AnyValue| {
23
- let object = ObjectValue {
24
- inner: Wrap(av).into_value().into(),
25
- };
26
- Box::new(object) as Box<dyn Any>
27
- });
28
-
29
- registry::register_object_builder(object_builder, object_converter)
30
- }
31
- }
@@ -0,0 +1,32 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use magnus::IntoValue;
5
+ use polars::prelude::*;
6
+ use polars_core::chunked_array::object::builder::ObjectChunkedBuilder;
7
+ use polars_core::chunked_array::object::registry;
8
+ use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
9
+ use polars_core::prelude::AnyValue;
10
+
11
+ use crate::prelude::ObjectValue;
12
+ use crate::Wrap;
13
+
14
+ pub(crate) fn register_object_builder() {
15
+ if !registry::is_object_builder_registered() {
16
+ let object_builder = Box::new(|name: &str, capacity: usize| {
17
+ Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
18
+ as Box<dyn AnonymousObjectBuilder>
19
+ });
20
+
21
+ let object_converter = Arc::new(|av: AnyValue| {
22
+ let object = ObjectValue {
23
+ inner: Wrap(av).into_value().into(),
24
+ };
25
+ Box::new(object) as Box<dyn Any>
26
+ });
27
+
28
+ let object_size = std::mem::size_of::<ObjectValue>();
29
+ let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
30
+ registry::register_object_builder(object_builder, object_converter, physical_dtype)
31
+ }
32
+ }
@@ -17,6 +17,7 @@ impl RbSeries {
17
17
  self.series
18
18
  .borrow()
19
19
  .max_as_series()
20
+ .map_err(RbPolarsErr::from)?
20
21
  .get(0)
21
22
  .map_err(RbPolarsErr::from)?,
22
23
  )
@@ -48,6 +49,7 @@ impl RbSeries {
48
49
  self.series
49
50
  .borrow()
50
51
  .min_as_series()
52
+ .map_err(RbPolarsErr::from)?
51
53
  .get(0)
52
54
  .map_err(RbPolarsErr::from)?,
53
55
  )
@@ -75,6 +77,7 @@ impl RbSeries {
75
77
  self.series
76
78
  .borrow()
77
79
  .sum_as_series()
80
+ .map_err(RbPolarsErr::from)?
78
81
  .get(0)
79
82
  .map_err(RbPolarsErr::from)?,
80
83
  )
@@ -106,7 +106,7 @@ impl RbSeries {
106
106
  Ok(s.into())
107
107
  }
108
108
 
109
- pub fn new_str(name: String, val: Wrap<Utf8Chunked>, _strict: bool) -> Self {
109
+ pub fn new_str(name: String, val: Wrap<StringChunked>, _strict: bool) -> Self {
110
110
  let mut s = val.0.into_series();
111
111
  s.rename(&name);
112
112
  RbSeries::new(s)
@@ -9,8 +9,8 @@ impl RbSeries {
9
9
  pub fn to_numo(&self) -> RbResult<Value> {
10
10
  let s = &self.series.borrow();
11
11
  match s.dtype() {
12
- DataType::Utf8 => {
13
- let ca = s.utf8().unwrap();
12
+ DataType::String => {
13
+ let ca = s.str().unwrap();
14
14
 
15
15
  // TODO make more efficient
16
16
  let np_arr = RArray::from_iter(ca);
@@ -3,7 +3,7 @@ mod arithmetic;
3
3
  mod comparison;
4
4
  mod construction;
5
5
  mod export;
6
- mod set_at_idx;
6
+ mod scatter;
7
7
 
8
8
  use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
@@ -80,7 +80,7 @@ impl RbSeries {
80
80
 
81
81
  pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
82
82
  let val = format!("{}", self.series.borrow().get(index).unwrap());
83
- if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
83
+ if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
84
84
  let v_trunc = &val[..val
85
85
  .char_indices()
86
86
  .take(str_lengths)
@@ -90,7 +90,7 @@ impl RbSeries {
90
90
  if val == v_trunc {
91
91
  val
92
92
  } else {
93
- format!("{}...", v_trunc)
93
+ format!("{}", v_trunc)
94
94
  }
95
95
  } else {
96
96
  val
@@ -273,15 +273,13 @@ impl RbSeries {
273
273
  Ok(s.into())
274
274
  }
275
275
 
276
- pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
276
+ pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
277
277
  if strict {
278
278
  self.series.borrow().eq(&other.series.borrow())
279
279
  } else if null_equal {
280
- self.series
281
- .borrow()
282
- .series_equal_missing(&other.series.borrow())
280
+ self.series.borrow().equals_missing(&other.series.borrow())
283
281
  } else {
284
- self.series.borrow().series_equal(&other.series.borrow())
282
+ self.series.borrow().equals(&other.series.borrow())
285
283
  }
286
284
  }
287
285
 
@@ -315,10 +313,10 @@ impl RbSeries {
315
313
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
316
314
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
317
315
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
318
- DataType::Categorical(_) => {
316
+ DataType::Categorical(_, _) => {
319
317
  RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
320
318
  }
321
- DataType::Object(_) => {
319
+ DataType::Object(_, _) => {
322
320
  let v = RArray::with_capacity(series.len());
323
321
  for i in 0..series.len() {
324
322
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
@@ -377,8 +375,8 @@ impl RbSeries {
377
375
  let ca = series.decimal().unwrap();
378
376
  return Wrap(ca).into_value();
379
377
  }
380
- DataType::Utf8 => {
381
- let ca = series.utf8().unwrap();
378
+ DataType::String => {
379
+ let ca = series.str().unwrap();
382
380
  return Wrap(ca).into_value();
383
381
  }
384
382
  DataType::Struct(_) => {
@@ -442,7 +440,7 @@ impl RbSeries {
442
440
 
443
441
  macro_rules! dispatch_apply {
444
442
  ($self:expr, $method:ident, $($args:expr),*) => {
445
- if matches!($self.dtype(), DataType::Object(_)) {
443
+ if matches!($self.dtype(), DataType::Object(_, _)) {
446
444
  // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
447
445
  // ca.$method($($args),*)
448
446
  todo!()
@@ -463,7 +461,7 @@ impl RbSeries {
463
461
  DataType::Datetime(_, _)
464
462
  | DataType::Date
465
463
  | DataType::Duration(_)
466
- | DataType::Categorical(_)
464
+ | DataType::Categorical(_, _)
467
465
  | DataType::Time
468
466
  ) || !skip_nulls
469
467
  {
@@ -604,12 +602,12 @@ impl RbSeries {
604
602
  )?;
605
603
  ca.into_datetime(tu, tz).into_series()
606
604
  }
607
- Some(DataType::Utf8) => {
605
+ Some(DataType::String) => {
608
606
  let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
609
607
 
610
608
  ca.into_series()
611
609
  }
612
- Some(DataType::Object(_)) => {
610
+ Some(DataType::Object(_, _)) => {
613
611
  let ca =
614
612
  dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
615
613
  ca.into_series()
@@ -656,8 +654,13 @@ impl RbSeries {
656
654
  self.series.borrow_mut().shrink_to_fit();
657
655
  }
658
656
 
659
- pub fn dot(&self, other: &RbSeries) -> Option<f64> {
660
- self.series.borrow().dot(&other.series.borrow())
657
+ pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
658
+ let out = self
659
+ .series
660
+ .borrow()
661
+ .dot(&other.series.borrow())
662
+ .map_err(RbPolarsErr::from)?;
663
+ Ok(out)
661
664
  }
662
665
 
663
666
  pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
@@ -5,9 +5,9 @@ use crate::error::RbPolarsErr;
5
5
  use crate::{RbResult, RbSeries};
6
6
 
7
7
  impl RbSeries {
8
- pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
8
+ pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
9
9
  let mut s = self.series.borrow_mut();
10
- match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
10
+ match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
11
11
  Ok(out) => {
12
12
  *s = out;
13
13
  Ok(())
@@ -17,7 +17,7 @@ impl RbSeries {
17
17
  }
18
18
  }
19
19
 
20
- fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
20
+ fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
21
21
  let logical_dtype = s.dtype().clone();
22
22
  let idx = idx.cast(&IDX_DTYPE)?;
23
23
  let idx = idx.rechunk();
@@ -43,62 +43,62 @@ fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Seri
43
43
  DataType::Int8 => {
44
44
  let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
45
45
  let values = values.i8()?;
46
- std::mem::take(ca).set_at_idx2(idx, values)
46
+ std::mem::take(ca).scatter(idx, values)
47
47
  }
48
48
  DataType::Int16 => {
49
49
  let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
50
50
  let values = values.i16()?;
51
- std::mem::take(ca).set_at_idx2(idx, values)
51
+ std::mem::take(ca).scatter(idx, values)
52
52
  }
53
53
  DataType::Int32 => {
54
54
  let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
55
55
  let values = values.i32()?;
56
- std::mem::take(ca).set_at_idx2(idx, values)
56
+ std::mem::take(ca).scatter(idx, values)
57
57
  }
58
58
  DataType::Int64 => {
59
59
  let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
60
60
  let values = values.i64()?;
61
- std::mem::take(ca).set_at_idx2(idx, values)
61
+ std::mem::take(ca).scatter(idx, values)
62
62
  }
63
63
  DataType::UInt8 => {
64
64
  let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
65
65
  let values = values.u8()?;
66
- std::mem::take(ca).set_at_idx2(idx, values)
66
+ std::mem::take(ca).scatter(idx, values)
67
67
  }
68
68
  DataType::UInt16 => {
69
69
  let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
70
70
  let values = values.u16()?;
71
- std::mem::take(ca).set_at_idx2(idx, values)
71
+ std::mem::take(ca).scatter(idx, values)
72
72
  }
73
73
  DataType::UInt32 => {
74
74
  let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
75
75
  let values = values.u32()?;
76
- std::mem::take(ca).set_at_idx2(idx, values)
76
+ std::mem::take(ca).scatter(idx, values)
77
77
  }
78
78
  DataType::UInt64 => {
79
79
  let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
80
80
  let values = values.u64()?;
81
- std::mem::take(ca).set_at_idx2(idx, values)
81
+ std::mem::take(ca).scatter(idx, values)
82
82
  }
83
83
  DataType::Float32 => {
84
84
  let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
85
85
  let values = values.f32()?;
86
- std::mem::take(ca).set_at_idx2(idx, values)
86
+ std::mem::take(ca).scatter(idx, values)
87
87
  }
88
88
  DataType::Float64 => {
89
89
  let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
90
90
  let values = values.f64()?;
91
- std::mem::take(ca).set_at_idx2(idx, values)
91
+ std::mem::take(ca).scatter(idx, values)
92
92
  }
93
93
  DataType::Boolean => {
94
94
  let ca = s.bool()?;
95
95
  let values = values.bool()?;
96
- ca.set_at_idx2(idx, values)
96
+ ca.scatter(idx, values)
97
97
  }
98
- DataType::Utf8 => {
99
- let ca = s.utf8()?;
100
- let values = values.utf8()?;
101
- ca.set_at_idx2(idx, values)
98
+ DataType::String => {
99
+ let ca = s.str()?;
100
+ let values = values.str()?;
101
+ ca.scatter(idx, values)
102
102
  }
103
103
  _ => panic!("not yet implemented for dtype: {}", logical_dtype),
104
104
  };
@@ -23,7 +23,7 @@ macro_rules! apply_method_all_arrow_series2 {
23
23
  ($self:expr, $method:ident, $($args:expr),*) => {
24
24
  match $self.dtype() {
25
25
  DataType::Boolean => $self.bool().unwrap().$method($($args),*),
26
- DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
26
+ DataType::String => $self.str().unwrap().$method($($args),*),
27
27
  DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
28
28
  DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
29
29
  DataType::UInt32 => $self.u32().unwrap().$method($($args),*),