polars-df 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/Cargo.lock +290 -137
  4. data/Cargo.toml +1 -1
  5. data/README.md +3 -3
  6. data/ext/polars/Cargo.toml +5 -4
  7. data/ext/polars/src/apply/dataframe.rs +6 -6
  8. data/ext/polars/src/apply/series.rs +10 -10
  9. data/ext/polars/src/batched_csv.rs +6 -4
  10. data/ext/polars/src/conversion.rs +40 -13
  11. data/ext/polars/src/dataframe.rs +45 -43
  12. data/ext/polars/src/error.rs +8 -8
  13. data/ext/polars/src/file.rs +5 -4
  14. data/ext/polars/src/lazy/apply.rs +1 -1
  15. data/ext/polars/src/lazy/dataframe.rs +12 -6
  16. data/ext/polars/src/lazy/dsl.rs +99 -45
  17. data/ext/polars/src/lazy/meta.rs +10 -9
  18. data/ext/polars/src/lib.rs +28 -29
  19. data/ext/polars/src/object.rs +2 -1
  20. data/ext/polars/src/series.rs +23 -21
  21. data/lib/polars/batched_csv_reader.rb +1 -1
  22. data/lib/polars/cat_expr.rb +0 -4
  23. data/lib/polars/cat_name_space.rb +0 -4
  24. data/lib/polars/convert.rb +0 -7
  25. data/lib/polars/data_frame.rb +184 -217
  26. data/lib/polars/date_time_expr.rb +19 -151
  27. data/lib/polars/date_time_name_space.rb +17 -17
  28. data/lib/polars/expr.rb +68 -315
  29. data/lib/polars/group_by.rb +68 -51
  30. data/lib/polars/io.rb +7 -7
  31. data/lib/polars/lazy_frame.rb +4 -106
  32. data/lib/polars/lazy_functions.rb +14 -40
  33. data/lib/polars/lazy_group_by.rb +0 -8
  34. data/lib/polars/list_expr.rb +5 -27
  35. data/lib/polars/list_name_space.rb +5 -8
  36. data/lib/polars/series.rb +20 -16
  37. data/lib/polars/string_expr.rb +20 -76
  38. data/lib/polars/string_name_space.rb +5 -15
  39. data/lib/polars/struct_expr.rb +0 -2
  40. data/lib/polars/utils.rb +8 -0
  41. data/lib/polars/version.rb +1 -1
  42. metadata +3 -3
data/Cargo.toml CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
6
  halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "9f36b2b97446e6dd495473e4361a70d863ac8027" }
7
+ arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
8
 
9
9
  [profile.release]
10
10
  strip = true
data/README.md CHANGED
@@ -73,9 +73,9 @@ From an array of hashes
73
73
 
74
74
  ```ruby
75
75
  Polars::DataFrame.new([
76
- {"a" => 1, "b" => "one"},
77
- {"a" => 2, "b" => "two"},
78
- {"a" => 3, "b" => "three"}
76
+ {a: 1, b: "one"},
77
+ {a: 2, b: "two"},
78
+ {a: 3, b: "three"}
79
79
  ])
80
80
  ```
81
81
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.4"
3
+ version = "0.3.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
- magnus = "0.4"
15
- polars-core = "0.26.1"
14
+ magnus = "0.5"
15
+ polars-core = "0.27.0"
16
16
  serde_json = "1"
17
17
 
18
18
  [dependencies.polars]
19
- version = "0.26.1"
19
+ version = "0.27.0"
20
20
  features = [
21
21
  "abs",
22
22
  "arange",
23
23
  "arg_where",
24
24
  "asof_join",
25
25
  "avro",
26
+ "binary_encoding",
26
27
  "concat_str",
27
28
  "cse",
28
29
  "csv-file",
@@ -1,4 +1,4 @@
1
- use magnus::{class, RArray, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RArray, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
  use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
4
4
  use polars_core::series::SeriesIter;
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
27
27
 
28
28
  for _ in 0..df.height() {
29
29
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
30
- let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
30
+ let arg = (RArray::from_iter(iter),);
31
31
  let out: Value = lambda.funcall("call", arg)?;
32
32
 
33
33
  if out.is_nil() {
@@ -141,7 +141,7 @@ where
141
141
  let mut iters = get_iters_skip(df, init_null_count + skip);
142
142
  ((init_null_count + skip)..df.height()).map(move |_| {
143
143
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
144
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
144
+ let tpl = (RArray::from_iter(iter),);
145
145
  match lambda.funcall::<_, _, Value>("call", tpl) {
146
146
  Ok(val) => val.try_convert::<T>().ok(),
147
147
  Err(e) => panic!("ruby function failed {}", e),
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
158
158
  ) -> ChunkedArray<D>
159
159
  where
160
160
  D: RbArrowPrimitiveType,
161
- D::Native: Into<Value> + TryConvert,
161
+ D::Native: IntoValue + TryConvert,
162
162
  {
163
163
  let skip = usize::from(first_value.is_some());
164
164
  if init_null_count == df.height() {
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
216
216
  let mut iters = get_iters_skip(df, init_null_count + skip);
217
217
  let iter = ((init_null_count + skip)..df.height()).map(|_| {
218
218
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
219
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
219
+ let tpl = (RArray::from_iter(iter),);
220
220
  match lambda.funcall::<_, _, Value>("call", tpl) {
221
221
  Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
222
222
  Ok(val) => val
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
254
254
  let mut iters = get_iters_skip(df, init_null_count + skip);
255
255
  let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
256
256
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
257
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
257
+ let tpl = (RArray::from_iter(iter),);
258
258
  match lambda.funcall::<_, _, Value>("call", tpl) {
259
259
  Ok(val) => {
260
260
  match val.try_convert::<RArray>().ok() {
@@ -1,4 +1,4 @@
1
- use magnus::{class, RHash, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RHash, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
 
4
4
  use super::*;
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
85
85
  ) -> RbResult<ChunkedArray<D>>
86
86
  where
87
87
  D: RbArrowPrimitiveType,
88
- D::Native: Into<Value> + TryConvert;
88
+ D::Native: IntoValue + TryConvert;
89
89
 
90
90
  /// Apply a lambda with a boolean output type
91
91
  fn apply_lambda_with_bool_out_type(
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
130
130
 
131
131
  pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
132
132
  where
133
- T: Into<Value>,
133
+ T: IntoValue,
134
134
  {
135
135
  lambda.funcall("call", (in_val,))
136
136
  }
137
137
 
138
138
  pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
139
139
  where
140
- T: Into<Value>,
140
+ T: IntoValue,
141
141
  S: TryConvert,
142
142
  {
143
143
  match call_lambda(lambda, in_val) {
@@ -148,7 +148,7 @@ where
148
148
 
149
149
  fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
150
150
  where
151
- T: Into<Value>,
151
+ T: IntoValue,
152
152
  {
153
153
  let out: Value = lambda.funcall("call", (in_val,))?;
154
154
  let py_series: Value = out.funcall("_s", ())?;
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
216
216
  ) -> RbResult<ChunkedArray<D>>
217
217
  where
218
218
  D: RbArrowPrimitiveType,
219
- D::Native: Into<Value> + TryConvert,
219
+ D::Native: IntoValue + TryConvert,
220
220
  {
221
221
  let skip = usize::from(first_value.is_some());
222
222
  if init_null_count == self.len() {
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
435
435
  impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
436
436
  where
437
437
  T: RbArrowPrimitiveType + PolarsNumericType,
438
- T::Native: Into<Value> + TryConvert,
438
+ T::Native: IntoValue + TryConvert,
439
439
  ChunkedArray<T>: IntoSeries,
440
440
  {
441
441
  fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
@@ -493,7 +493,7 @@ where
493
493
  ) -> RbResult<ChunkedArray<D>>
494
494
  where
495
495
  D: RbArrowPrimitiveType,
496
- D::Native: Into<Value> + TryConvert,
496
+ D::Native: IntoValue + TryConvert,
497
497
  {
498
498
  let skip = usize::from(first_value.is_some());
499
499
  if init_null_count == self.len() {
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
765
765
  ) -> RbResult<ChunkedArray<D>>
766
766
  where
767
767
  D: RbArrowPrimitiveType,
768
- D::Native: Into<Value> + TryConvert,
768
+ D::Native: IntoValue + TryConvert,
769
769
  {
770
770
  let skip = usize::from(first_value.is_some());
771
771
  if init_null_count == self.len() {
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
1036
1036
  ) -> RbResult<ChunkedArray<D>>
1037
1037
  where
1038
1038
  D: RbArrowPrimitiveType,
1039
- D::Native: Into<Value> + TryConvert,
1039
+ D::Native: IntoValue + TryConvert,
1040
1040
  {
1041
1041
  let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1042
1042
 
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
84
84
  .with_n_rows(n_rows)
85
85
  .with_delimiter(sep.as_bytes()[0])
86
86
  .with_skip_rows(skip_rows)
87
- .with_ignore_parser_errors(ignore_errors)
87
+ .with_ignore_errors(ignore_errors)
88
88
  .with_projection(projection)
89
89
  .with_rechunk(rechunk)
90
90
  .with_chunk_size(chunk_size)
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
109
109
  })
110
110
  }
111
111
 
112
- pub fn next_batches(&self, n: usize) -> RbResult<Option<Vec<RbDataFrame>>> {
112
+ pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
113
  let batches = self
114
114
  .reader
115
115
  .borrow_mut()
116
116
  .next_batches(n)
117
117
  .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| batches.into_iter().map(|out| out.1.into()).collect()))
118
+ Ok(batches.map(|batches| {
119
+ RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
+ }))
119
121
  }
120
122
  }
@@ -1,6 +1,6 @@
1
1
  use magnus::{
2
- class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
3
- Value, QNIL,
2
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
+ RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
4
4
  };
5
5
  use polars::chunked_array::object::PolarsObjectSafe;
6
6
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
98
98
  }
99
99
  }
100
100
 
101
- impl From<Wrap<AnyValue<'_>>> for Value {
102
- fn from(w: Wrap<AnyValue<'_>>) -> Self {
103
- match w.0 {
101
+ impl IntoValue for Wrap<AnyValue<'_>> {
102
+ fn into_value_with(self, _: &RubyHandle) -> Value {
103
+ match self.0 {
104
104
  AnyValue::UInt8(v) => Value::from(v),
105
105
  AnyValue::UInt16(v) => Value::from(v),
106
106
  AnyValue::UInt32(v) => Value::from(v),
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
114
114
  AnyValue::Null => *QNIL,
115
115
  AnyValue::Boolean(v) => Value::from(v),
116
116
  AnyValue::Utf8(v) => Value::from(v),
117
+ AnyValue::Utf8Owned(_v) => todo!(),
118
+ AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
117
119
  AnyValue::Date(v) => class::time()
118
120
  .funcall::<_, _, Value>("at", (v * 86400,))
119
121
  .unwrap()
@@ -140,16 +142,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
140
142
  t.funcall::<_, _, Value>("utc", ()).unwrap()
141
143
  }
142
144
  }
143
- _ => todo!(),
145
+ AnyValue::Duration(_v, _tu) => todo!(),
146
+ AnyValue::Time(_v) => todo!(),
147
+ AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
148
+ ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
149
+ AnyValue::StructOwned(_payload) => todo!(),
150
+ AnyValue::Object(_v) => todo!(),
151
+ AnyValue::ObjectOwned(_v) => todo!(),
152
+ AnyValue::Binary(_v) => todo!(),
153
+ AnyValue::BinaryOwned(_v) => todo!(),
144
154
  }
145
155
  }
146
156
  }
147
157
 
148
- impl From<Wrap<DataType>> for Value {
149
- fn from(w: Wrap<DataType>) -> Self {
158
+ impl IntoValue for Wrap<DataType> {
159
+ fn into_value_with(self, _: &RubyHandle) -> Value {
150
160
  let pl = crate::rb_modules::polars();
151
161
 
152
- match &w.0 {
162
+ match self.0 {
153
163
  DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
154
164
  DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
155
165
  DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
@@ -160,6 +170,7 @@ impl From<Wrap<DataType>> for Value {
160
170
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
161
171
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
162
172
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
173
+ DataType::Decimal128(_) => todo!(),
163
174
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
164
175
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
165
176
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
@@ -423,9 +434,9 @@ impl ObjectValue {
423
434
  }
424
435
  }
425
436
 
426
- impl From<ObjectValue> for Value {
427
- fn from(val: ObjectValue) -> Self {
428
- val.inner
437
+ impl IntoValue for ObjectValue {
438
+ fn into_value_with(self, _: &RubyHandle) -> Value {
439
+ self.inner
429
440
  }
430
441
  }
431
442
 
@@ -767,6 +778,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
767
778
  }
768
779
  }
769
780
 
781
+ impl TryConvert for Wrap<SearchSortedSide> {
782
+ fn try_convert(ob: Value) -> RbResult<Self> {
783
+ let parsed = match ob.try_convert::<String>()?.as_str() {
784
+ "any" => SearchSortedSide::Any,
785
+ "left" => SearchSortedSide::Left,
786
+ "right" => SearchSortedSide::Right,
787
+ v => {
788
+ return Err(RbValueError::new_err(format!(
789
+ "side must be one of {{'any', 'left', 'right'}}, got {v}",
790
+ )))
791
+ }
792
+ };
793
+ Ok(Wrap(parsed))
794
+ }
795
+ }
796
+
770
797
  pub fn parse_fill_null_strategy(
771
798
  strategy: &str,
772
799
  limit: FillNullLimit,
@@ -780,7 +807,7 @@ pub fn parse_fill_null_strategy(
780
807
  "zero" => FillNullStrategy::Zero,
781
808
  "one" => FillNullStrategy::One,
782
809
  e => {
783
- return Err(magnus::Error::runtime_error(format!(
810
+ return Err(magnus::Error::new(exception::runtime_error(), format!(
784
811
  "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
785
812
  e,
786
813
  )))
@@ -1,4 +1,4 @@
1
- use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
1
+ use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
2
2
  use polars::frame::row::{rows_to_schema_supertypes, Row};
3
3
  use polars::frame::NullStrategy;
4
4
  use polars::io::avro::AvroCompression;
@@ -68,7 +68,7 @@ impl RbDataFrame {
68
68
  *dtype_ = dtype;
69
69
  }
70
70
  } else {
71
- schema.with_column(name, dtype)
71
+ schema.with_column(name, dtype);
72
72
  }
73
73
  }
74
74
  }
@@ -159,7 +159,7 @@ impl RbDataFrame {
159
159
  .with_n_rows(n_rows)
160
160
  .with_delimiter(sep.as_bytes()[0])
161
161
  .with_skip_rows(skip_rows)
162
- .with_ignore_parser_errors(ignore_errors)
162
+ .with_ignore_errors(ignore_errors)
163
163
  .with_projection(projection)
164
164
  .with_rechunk(rechunk)
165
165
  .with_chunk_size(chunk_size)
@@ -457,7 +457,7 @@ impl RbDataFrame {
457
457
  } else {
458
458
  idx as usize
459
459
  };
460
- RArray::from_vec(
460
+ RArray::from_iter(
461
461
  self.df
462
462
  .borrow()
463
463
  .get_columns()
@@ -467,36 +467,29 @@ impl RbDataFrame {
467
467
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
468
468
  obj.unwrap().to_object()
469
469
  }
470
- _ => Wrap(s.get(idx).unwrap()).into(),
471
- })
472
- .collect(),
470
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
471
+ }),
473
472
  )
474
473
  .into()
475
474
  }
476
475
 
477
476
  pub fn row_tuples(&self) -> Value {
478
477
  let df = &self.df;
479
- RArray::from_vec(
480
- (0..df.borrow().height())
481
- .map(|idx| {
482
- RArray::from_vec(
483
- self.df
484
- .borrow()
485
- .get_columns()
486
- .iter()
487
- .map(|s| match s.dtype() {
488
- DataType::Object(_) => {
489
- let obj: Option<&ObjectValue> =
490
- s.get_object(idx).map(|any| any.into());
491
- obj.unwrap().to_object()
492
- }
493
- _ => Wrap(s.get(idx).unwrap()).into(),
494
- })
495
- .collect(),
496
- )
497
- })
498
- .collect(),
499
- )
478
+ RArray::from_iter((0..df.borrow().height()).map(|idx| {
479
+ RArray::from_iter(
480
+ self.df
481
+ .borrow()
482
+ .get_columns()
483
+ .iter()
484
+ .map(|s| match s.dtype() {
485
+ DataType::Object(_) => {
486
+ let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
487
+ obj.unwrap().to_object()
488
+ }
489
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
490
+ }),
491
+ )
492
+ }))
500
493
  .into()
501
494
  }
502
495
 
@@ -613,7 +606,7 @@ impl RbDataFrame {
613
606
  format!("{}", self.df.borrow())
614
607
  }
615
608
 
616
- pub fn get_columns(&self) -> Vec<RbSeries> {
609
+ pub fn get_columns(&self) -> RArray {
617
610
  let cols = self.df.borrow().get_columns().clone();
618
611
  to_rbseries_collection(cols)
619
612
  }
@@ -635,12 +628,13 @@ impl RbDataFrame {
635
628
  Ok(())
636
629
  }
637
630
 
638
- pub fn dtypes(&self) -> Vec<Value> {
639
- self.df
640
- .borrow()
641
- .iter()
642
- .map(|s| Wrap(s.dtype().clone()).into())
643
- .collect()
631
+ pub fn dtypes(&self) -> RArray {
632
+ RArray::from_iter(
633
+ self.df
634
+ .borrow()
635
+ .iter()
636
+ .map(|s| Wrap(s.dtype().clone()).into_value()),
637
+ )
644
638
  }
645
639
 
646
640
  pub fn n_chunks(&self) -> usize {
@@ -777,6 +771,7 @@ impl RbDataFrame {
777
771
  SortOptions {
778
772
  descending: reverse,
779
773
  nulls_last,
774
+ multithreaded: true,
780
775
  },
781
776
  )
782
777
  .map_err(RbPolarsErr::from)?;
@@ -876,6 +871,7 @@ impl RbDataFrame {
876
871
  Ok(RbDataFrame::new(df))
877
872
  }
878
873
 
874
+ #[allow(clippy::too_many_arguments)]
879
875
  pub fn pivot_expr(
880
876
  &self,
881
877
  values: Vec<String>,
@@ -884,6 +880,7 @@ impl RbDataFrame {
884
880
  aggregate_expr: &RbExpr,
885
881
  maintain_order: bool,
886
882
  sort_columns: bool,
883
+ separator: Option<String>,
887
884
  ) -> RbResult<Self> {
888
885
  let fun = match maintain_order {
889
886
  true => pivot_stable,
@@ -896,19 +893,20 @@ impl RbDataFrame {
896
893
  columns,
897
894
  aggregate_expr.inner.clone(),
898
895
  sort_columns,
896
+ separator.as_deref(),
899
897
  )
900
898
  .map_err(RbPolarsErr::from)?;
901
899
  Ok(RbDataFrame::new(df))
902
900
  }
903
901
 
904
- pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
902
+ pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
905
903
  let out = if stable {
906
904
  self.df.borrow().partition_by_stable(groups)
907
905
  } else {
908
906
  self.df.borrow().partition_by(groups)
909
907
  }
910
908
  .map_err(RbPolarsErr::from)?;
911
- Ok(out.into_iter().map(RbDataFrame::new).collect())
909
+ Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
912
910
  }
913
911
 
914
912
  pub fn shift(&self, periods: i64) -> Self {
@@ -1003,13 +1001,17 @@ impl RbDataFrame {
1003
1001
  Ok(df.into())
1004
1002
  }
1005
1003
 
1006
- pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
1004
+ pub fn to_dummies(
1005
+ &self,
1006
+ columns: Option<Vec<String>>,
1007
+ separator: Option<String>,
1008
+ ) -> RbResult<Self> {
1007
1009
  let df = match columns {
1008
- Some(cols) => self
1009
- .df
1010
- .borrow()
1011
- .columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
1012
- None => self.df.borrow().to_dummies(),
1010
+ Some(cols) => self.df.borrow().columns_to_dummies(
1011
+ cols.iter().map(|x| x as &str).collect(),
1012
+ separator.as_deref(),
1013
+ ),
1014
+ None => self.df.borrow().to_dummies(separator.as_deref()),
1013
1015
  }
1014
1016
  .map_err(RbPolarsErr::from)?;
1015
1017
  Ok(df.into())
@@ -1,4 +1,4 @@
1
- use magnus::exception::arg_error;
1
+ use magnus::exception;
2
2
  use magnus::Error;
3
3
  use polars::error::ArrowError;
4
4
  use polars::prelude::PolarsError;
@@ -8,23 +8,23 @@ pub struct RbPolarsErr {}
8
8
  impl RbPolarsErr {
9
9
  // convert to Error instead of Self
10
10
  pub fn from(e: PolarsError) -> Error {
11
- Error::runtime_error(e.to_string())
11
+ Error::new(exception::runtime_error(), e.to_string())
12
12
  }
13
13
 
14
14
  pub fn arrow(e: ArrowError) -> Error {
15
- Error::runtime_error(e.to_string())
15
+ Error::new(exception::runtime_error(), e.to_string())
16
16
  }
17
17
 
18
18
  pub fn io(e: std::io::Error) -> Error {
19
- Error::runtime_error(e.to_string())
19
+ Error::new(exception::runtime_error(), e.to_string())
20
20
  }
21
21
 
22
22
  pub fn other(message: String) -> Error {
23
- Error::runtime_error(message)
23
+ Error::new(exception::runtime_error(), message)
24
24
  }
25
25
 
26
26
  pub fn todo() -> Error {
27
- Error::runtime_error("not implemented yet")
27
+ Error::new(exception::runtime_error(), "not implemented yet")
28
28
  }
29
29
  }
30
30
 
@@ -32,7 +32,7 @@ pub struct RbValueError {}
32
32
 
33
33
  impl RbValueError {
34
34
  pub fn new_err(message: String) -> Error {
35
- Error::new(arg_error(), message)
35
+ Error::new(exception::arg_error(), message)
36
36
  }
37
37
  }
38
38
 
@@ -40,6 +40,6 @@ pub struct ComputeError {}
40
40
 
41
41
  impl ComputeError {
42
42
  pub fn new_err(message: String) -> Error {
43
- Error::runtime_error(message)
43
+ Error::new(exception::runtime_error(), message)
44
44
  }
45
45
  }
@@ -1,4 +1,4 @@
1
- use magnus::{Error, RString, Value};
1
+ use magnus::{exception, Error, RString, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use std::fs::File;
4
4
  use std::io::Cursor;
@@ -9,9 +9,10 @@ use crate::RbResult;
9
9
  pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
10
10
  let str_slice = f.try_convert::<PathBuf>()?;
11
11
  let f = if truncate {
12
- File::create(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
12
+ File::create(str_slice)
13
+ .map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
13
14
  } else {
14
- File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
15
+ File::open(str_slice).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
15
16
  };
16
17
  Ok(f)
17
18
  }
@@ -23,7 +24,7 @@ pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>>
23
24
  Ok(Box::new(Cursor::new(bytes.to_vec())))
24
25
  } else {
25
26
  let p = rb_f.try_convert::<PathBuf>()?;
26
- let f = File::open(p).map_err(|e| Error::runtime_error(e.to_string()))?;
27
+ let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
27
28
  Ok(Box::new(f))
28
29
  }
29
30
  }
@@ -4,7 +4,7 @@ use polars::prelude::*;
4
4
  use crate::lazy::dsl::RbExpr;
5
5
  use crate::Wrap;
6
6
 
7
- pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
7
+ pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
8
8
  todo!();
9
9
  }
10
10
 
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, RHash, Value};
1
+ use magnus::{IntoValue, RArray, RHash, Value};
2
2
  use polars::io::RowCount;
3
3
  use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
@@ -140,7 +140,7 @@ impl RbLazyFrame {
140
140
  .with_infer_schema_length(infer_schema_length)
141
141
  .with_delimiter(delimiter)
142
142
  .has_header(has_header)
143
- .with_ignore_parser_errors(ignore_errors)
143
+ .with_ignore_errors(ignore_errors)
144
144
  .with_skip_rows(skip_rows)
145
145
  .with_n_rows(n_rows)
146
146
  .with_cache(cache)
@@ -180,6 +180,8 @@ impl RbLazyFrame {
180
180
  rechunk,
181
181
  row_count,
182
182
  low_memory,
183
+ // TODO support cloud options
184
+ cloud_options: None,
183
185
  };
184
186
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
185
187
  Ok(lf.into())
@@ -254,6 +256,7 @@ impl RbLazyFrame {
254
256
  SortOptions {
255
257
  descending: reverse,
256
258
  nulls_last,
259
+ multithreaded: true,
257
260
  },
258
261
  )
259
262
  .into()
@@ -597,10 +600,10 @@ impl RbLazyFrame {
597
600
  Ok(self.get_schema()?.iter_names().cloned().collect())
598
601
  }
599
602
 
600
- pub fn dtypes(&self) -> RbResult<Vec<Value>> {
603
+ pub fn dtypes(&self) -> RbResult<RArray> {
601
604
  let schema = self.get_schema()?;
602
- let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into());
603
- Ok(iter.collect())
605
+ let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
606
+ Ok(RArray::from_iter(iter))
604
607
  }
605
608
 
606
609
  pub fn schema(&self) -> RbResult<RHash> {
@@ -610,7 +613,10 @@ impl RbLazyFrame {
610
613
  schema.iter_fields().for_each(|fld| {
611
614
  // TODO remove unwrap
612
615
  schema_dict
613
- .aset::<String, Value>(fld.name().clone(), Wrap(fld.data_type().clone()).into())
616
+ .aset::<String, Value>(
617
+ fld.name().clone(),
618
+ Wrap(fld.data_type().clone()).into_value(),
619
+ )
614
620
  .unwrap();
615
621
  });
616
622
  Ok(schema_dict)