polars-df 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Cargo.toml CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
6
  halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "9f36b2b97446e6dd495473e4361a70d863ac8027" }
7
+ arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
8
 
9
9
  [profile.release]
10
10
  strip = true
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.4"
3
+ version = "0.3.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
- magnus = "0.4"
15
- polars-core = "0.26.1"
14
+ magnus = "0.5"
15
+ polars-core = "0.27.0"
16
16
  serde_json = "1"
17
17
 
18
18
  [dependencies.polars]
19
- version = "0.26.1"
19
+ version = "0.27.0"
20
20
  features = [
21
21
  "abs",
22
22
  "arange",
23
23
  "arg_where",
24
24
  "asof_join",
25
25
  "avro",
26
+ "binary_encoding",
26
27
  "concat_str",
27
28
  "cse",
28
29
  "csv-file",
@@ -1,4 +1,4 @@
1
- use magnus::{class, RArray, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RArray, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
  use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
4
4
  use polars_core::series::SeriesIter;
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
27
27
 
28
28
  for _ in 0..df.height() {
29
29
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
30
- let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
30
+ let arg = (RArray::from_iter(iter),);
31
31
  let out: Value = lambda.funcall("call", arg)?;
32
32
 
33
33
  if out.is_nil() {
@@ -141,7 +141,7 @@ where
141
141
  let mut iters = get_iters_skip(df, init_null_count + skip);
142
142
  ((init_null_count + skip)..df.height()).map(move |_| {
143
143
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
144
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
144
+ let tpl = (RArray::from_iter(iter),);
145
145
  match lambda.funcall::<_, _, Value>("call", tpl) {
146
146
  Ok(val) => val.try_convert::<T>().ok(),
147
147
  Err(e) => panic!("ruby function failed {}", e),
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
158
158
  ) -> ChunkedArray<D>
159
159
  where
160
160
  D: RbArrowPrimitiveType,
161
- D::Native: Into<Value> + TryConvert,
161
+ D::Native: IntoValue + TryConvert,
162
162
  {
163
163
  let skip = usize::from(first_value.is_some());
164
164
  if init_null_count == df.height() {
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
216
216
  let mut iters = get_iters_skip(df, init_null_count + skip);
217
217
  let iter = ((init_null_count + skip)..df.height()).map(|_| {
218
218
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
219
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
219
+ let tpl = (RArray::from_iter(iter),);
220
220
  match lambda.funcall::<_, _, Value>("call", tpl) {
221
221
  Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
222
222
  Ok(val) => val
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
254
254
  let mut iters = get_iters_skip(df, init_null_count + skip);
255
255
  let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
256
256
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
257
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
257
+ let tpl = (RArray::from_iter(iter),);
258
258
  match lambda.funcall::<_, _, Value>("call", tpl) {
259
259
  Ok(val) => {
260
260
  match val.try_convert::<RArray>().ok() {
@@ -1,4 +1,4 @@
1
- use magnus::{class, RHash, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RHash, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
 
4
4
  use super::*;
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
85
85
  ) -> RbResult<ChunkedArray<D>>
86
86
  where
87
87
  D: RbArrowPrimitiveType,
88
- D::Native: Into<Value> + TryConvert;
88
+ D::Native: IntoValue + TryConvert;
89
89
 
90
90
  /// Apply a lambda with a boolean output type
91
91
  fn apply_lambda_with_bool_out_type(
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
130
130
 
131
131
  pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
132
132
  where
133
- T: Into<Value>,
133
+ T: IntoValue,
134
134
  {
135
135
  lambda.funcall("call", (in_val,))
136
136
  }
137
137
 
138
138
  pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
139
139
  where
140
- T: Into<Value>,
140
+ T: IntoValue,
141
141
  S: TryConvert,
142
142
  {
143
143
  match call_lambda(lambda, in_val) {
@@ -148,7 +148,7 @@ where
148
148
 
149
149
  fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
150
150
  where
151
- T: Into<Value>,
151
+ T: IntoValue,
152
152
  {
153
153
  let out: Value = lambda.funcall("call", (in_val,))?;
154
154
  let py_series: Value = out.funcall("_s", ())?;
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
216
216
  ) -> RbResult<ChunkedArray<D>>
217
217
  where
218
218
  D: RbArrowPrimitiveType,
219
- D::Native: Into<Value> + TryConvert,
219
+ D::Native: IntoValue + TryConvert,
220
220
  {
221
221
  let skip = usize::from(first_value.is_some());
222
222
  if init_null_count == self.len() {
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
435
435
  impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
436
436
  where
437
437
  T: RbArrowPrimitiveType + PolarsNumericType,
438
- T::Native: Into<Value> + TryConvert,
438
+ T::Native: IntoValue + TryConvert,
439
439
  ChunkedArray<T>: IntoSeries,
440
440
  {
441
441
  fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
@@ -493,7 +493,7 @@ where
493
493
  ) -> RbResult<ChunkedArray<D>>
494
494
  where
495
495
  D: RbArrowPrimitiveType,
496
- D::Native: Into<Value> + TryConvert,
496
+ D::Native: IntoValue + TryConvert,
497
497
  {
498
498
  let skip = usize::from(first_value.is_some());
499
499
  if init_null_count == self.len() {
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
765
765
  ) -> RbResult<ChunkedArray<D>>
766
766
  where
767
767
  D: RbArrowPrimitiveType,
768
- D::Native: Into<Value> + TryConvert,
768
+ D::Native: IntoValue + TryConvert,
769
769
  {
770
770
  let skip = usize::from(first_value.is_some());
771
771
  if init_null_count == self.len() {
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
1036
1036
  ) -> RbResult<ChunkedArray<D>>
1037
1037
  where
1038
1038
  D: RbArrowPrimitiveType,
1039
- D::Native: Into<Value> + TryConvert,
1039
+ D::Native: IntoValue + TryConvert,
1040
1040
  {
1041
1041
  let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1042
1042
 
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
84
84
  .with_n_rows(n_rows)
85
85
  .with_delimiter(sep.as_bytes()[0])
86
86
  .with_skip_rows(skip_rows)
87
- .with_ignore_parser_errors(ignore_errors)
87
+ .with_ignore_errors(ignore_errors)
88
88
  .with_projection(projection)
89
89
  .with_rechunk(rechunk)
90
90
  .with_chunk_size(chunk_size)
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
109
109
  })
110
110
  }
111
111
 
112
- pub fn next_batches(&self, n: usize) -> RbResult<Option<Vec<RbDataFrame>>> {
112
+ pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
113
  let batches = self
114
114
  .reader
115
115
  .borrow_mut()
116
116
  .next_batches(n)
117
117
  .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| batches.into_iter().map(|out| out.1.into()).collect()))
118
+ Ok(batches.map(|batches| {
119
+ RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
+ }))
119
121
  }
120
122
  }
@@ -1,6 +1,6 @@
1
1
  use magnus::{
2
- class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
3
- Value, QNIL,
2
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
+ RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
4
4
  };
5
5
  use polars::chunked_array::object::PolarsObjectSafe;
6
6
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
98
98
  }
99
99
  }
100
100
 
101
- impl From<Wrap<AnyValue<'_>>> for Value {
102
- fn from(w: Wrap<AnyValue<'_>>) -> Self {
103
- match w.0 {
101
+ impl IntoValue for Wrap<AnyValue<'_>> {
102
+ fn into_value_with(self, _: &RubyHandle) -> Value {
103
+ match self.0 {
104
104
  AnyValue::UInt8(v) => Value::from(v),
105
105
  AnyValue::UInt16(v) => Value::from(v),
106
106
  AnyValue::UInt32(v) => Value::from(v),
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
114
114
  AnyValue::Null => *QNIL,
115
115
  AnyValue::Boolean(v) => Value::from(v),
116
116
  AnyValue::Utf8(v) => Value::from(v),
117
+ AnyValue::Utf8Owned(_v) => todo!(),
118
+ AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
117
119
  AnyValue::Date(v) => class::time()
118
120
  .funcall::<_, _, Value>("at", (v * 86400,))
119
121
  .unwrap()
@@ -140,16 +142,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
140
142
  t.funcall::<_, _, Value>("utc", ()).unwrap()
141
143
  }
142
144
  }
143
- _ => todo!(),
145
+ AnyValue::Duration(_v, _tu) => todo!(),
146
+ AnyValue::Time(_v) => todo!(),
147
+ AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
148
+ ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
149
+ AnyValue::StructOwned(_payload) => todo!(),
150
+ AnyValue::Object(_v) => todo!(),
151
+ AnyValue::ObjectOwned(_v) => todo!(),
152
+ AnyValue::Binary(_v) => todo!(),
153
+ AnyValue::BinaryOwned(_v) => todo!(),
144
154
  }
145
155
  }
146
156
  }
147
157
 
148
- impl From<Wrap<DataType>> for Value {
149
- fn from(w: Wrap<DataType>) -> Self {
158
+ impl IntoValue for Wrap<DataType> {
159
+ fn into_value_with(self, _: &RubyHandle) -> Value {
150
160
  let pl = crate::rb_modules::polars();
151
161
 
152
- match &w.0 {
162
+ match self.0 {
153
163
  DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
154
164
  DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
155
165
  DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
@@ -160,6 +170,7 @@ impl From<Wrap<DataType>> for Value {
160
170
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
161
171
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
162
172
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
173
+ DataType::Decimal128(_) => todo!(),
163
174
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
164
175
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
165
176
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
@@ -423,9 +434,9 @@ impl ObjectValue {
423
434
  }
424
435
  }
425
436
 
426
- impl From<ObjectValue> for Value {
427
- fn from(val: ObjectValue) -> Self {
428
- val.inner
437
+ impl IntoValue for ObjectValue {
438
+ fn into_value_with(self, _: &RubyHandle) -> Value {
439
+ self.inner
429
440
  }
430
441
  }
431
442
 
@@ -767,6 +778,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
767
778
  }
768
779
  }
769
780
 
781
+ impl TryConvert for Wrap<SearchSortedSide> {
782
+ fn try_convert(ob: Value) -> RbResult<Self> {
783
+ let parsed = match ob.try_convert::<String>()?.as_str() {
784
+ "any" => SearchSortedSide::Any,
785
+ "left" => SearchSortedSide::Left,
786
+ "right" => SearchSortedSide::Right,
787
+ v => {
788
+ return Err(RbValueError::new_err(format!(
789
+ "side must be one of {{'any', 'left', 'right'}}, got {v}",
790
+ )))
791
+ }
792
+ };
793
+ Ok(Wrap(parsed))
794
+ }
795
+ }
796
+
770
797
  pub fn parse_fill_null_strategy(
771
798
  strategy: &str,
772
799
  limit: FillNullLimit,
@@ -780,7 +807,7 @@ pub fn parse_fill_null_strategy(
780
807
  "zero" => FillNullStrategy::Zero,
781
808
  "one" => FillNullStrategy::One,
782
809
  e => {
783
- return Err(magnus::Error::runtime_error(format!(
810
+ return Err(magnus::Error::new(exception::runtime_error(), format!(
784
811
  "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
785
812
  e,
786
813
  )))
@@ -1,4 +1,4 @@
1
- use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
1
+ use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
2
2
  use polars::frame::row::{rows_to_schema_supertypes, Row};
3
3
  use polars::frame::NullStrategy;
4
4
  use polars::io::avro::AvroCompression;
@@ -68,7 +68,7 @@ impl RbDataFrame {
68
68
  *dtype_ = dtype;
69
69
  }
70
70
  } else {
71
- schema.with_column(name, dtype)
71
+ schema.with_column(name, dtype);
72
72
  }
73
73
  }
74
74
  }
@@ -159,7 +159,7 @@ impl RbDataFrame {
159
159
  .with_n_rows(n_rows)
160
160
  .with_delimiter(sep.as_bytes()[0])
161
161
  .with_skip_rows(skip_rows)
162
- .with_ignore_parser_errors(ignore_errors)
162
+ .with_ignore_errors(ignore_errors)
163
163
  .with_projection(projection)
164
164
  .with_rechunk(rechunk)
165
165
  .with_chunk_size(chunk_size)
@@ -457,7 +457,7 @@ impl RbDataFrame {
457
457
  } else {
458
458
  idx as usize
459
459
  };
460
- RArray::from_vec(
460
+ RArray::from_iter(
461
461
  self.df
462
462
  .borrow()
463
463
  .get_columns()
@@ -467,36 +467,29 @@ impl RbDataFrame {
467
467
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
468
468
  obj.unwrap().to_object()
469
469
  }
470
- _ => Wrap(s.get(idx).unwrap()).into(),
471
- })
472
- .collect(),
470
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
471
+ }),
473
472
  )
474
473
  .into()
475
474
  }
476
475
 
477
476
  pub fn row_tuples(&self) -> Value {
478
477
  let df = &self.df;
479
- RArray::from_vec(
480
- (0..df.borrow().height())
481
- .map(|idx| {
482
- RArray::from_vec(
483
- self.df
484
- .borrow()
485
- .get_columns()
486
- .iter()
487
- .map(|s| match s.dtype() {
488
- DataType::Object(_) => {
489
- let obj: Option<&ObjectValue> =
490
- s.get_object(idx).map(|any| any.into());
491
- obj.unwrap().to_object()
492
- }
493
- _ => Wrap(s.get(idx).unwrap()).into(),
494
- })
495
- .collect(),
496
- )
497
- })
498
- .collect(),
499
- )
478
+ RArray::from_iter((0..df.borrow().height()).map(|idx| {
479
+ RArray::from_iter(
480
+ self.df
481
+ .borrow()
482
+ .get_columns()
483
+ .iter()
484
+ .map(|s| match s.dtype() {
485
+ DataType::Object(_) => {
486
+ let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
487
+ obj.unwrap().to_object()
488
+ }
489
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
490
+ }),
491
+ )
492
+ }))
500
493
  .into()
501
494
  }
502
495
 
@@ -613,7 +606,7 @@ impl RbDataFrame {
613
606
  format!("{}", self.df.borrow())
614
607
  }
615
608
 
616
- pub fn get_columns(&self) -> Vec<RbSeries> {
609
+ pub fn get_columns(&self) -> RArray {
617
610
  let cols = self.df.borrow().get_columns().clone();
618
611
  to_rbseries_collection(cols)
619
612
  }
@@ -635,12 +628,13 @@ impl RbDataFrame {
635
628
  Ok(())
636
629
  }
637
630
 
638
- pub fn dtypes(&self) -> Vec<Value> {
639
- self.df
640
- .borrow()
641
- .iter()
642
- .map(|s| Wrap(s.dtype().clone()).into())
643
- .collect()
631
+ pub fn dtypes(&self) -> RArray {
632
+ RArray::from_iter(
633
+ self.df
634
+ .borrow()
635
+ .iter()
636
+ .map(|s| Wrap(s.dtype().clone()).into_value()),
637
+ )
644
638
  }
645
639
 
646
640
  pub fn n_chunks(&self) -> usize {
@@ -777,6 +771,7 @@ impl RbDataFrame {
777
771
  SortOptions {
778
772
  descending: reverse,
779
773
  nulls_last,
774
+ multithreaded: true,
780
775
  },
781
776
  )
782
777
  .map_err(RbPolarsErr::from)?;
@@ -876,6 +871,7 @@ impl RbDataFrame {
876
871
  Ok(RbDataFrame::new(df))
877
872
  }
878
873
 
874
+ #[allow(clippy::too_many_arguments)]
879
875
  pub fn pivot_expr(
880
876
  &self,
881
877
  values: Vec<String>,
@@ -884,6 +880,7 @@ impl RbDataFrame {
884
880
  aggregate_expr: &RbExpr,
885
881
  maintain_order: bool,
886
882
  sort_columns: bool,
883
+ separator: Option<String>,
887
884
  ) -> RbResult<Self> {
888
885
  let fun = match maintain_order {
889
886
  true => pivot_stable,
@@ -896,19 +893,20 @@ impl RbDataFrame {
896
893
  columns,
897
894
  aggregate_expr.inner.clone(),
898
895
  sort_columns,
896
+ separator.as_deref(),
899
897
  )
900
898
  .map_err(RbPolarsErr::from)?;
901
899
  Ok(RbDataFrame::new(df))
902
900
  }
903
901
 
904
- pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
902
+ pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
905
903
  let out = if stable {
906
904
  self.df.borrow().partition_by_stable(groups)
907
905
  } else {
908
906
  self.df.borrow().partition_by(groups)
909
907
  }
910
908
  .map_err(RbPolarsErr::from)?;
911
- Ok(out.into_iter().map(RbDataFrame::new).collect())
909
+ Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
912
910
  }
913
911
 
914
912
  pub fn shift(&self, periods: i64) -> Self {
@@ -1003,13 +1001,17 @@ impl RbDataFrame {
1003
1001
  Ok(df.into())
1004
1002
  }
1005
1003
 
1006
- pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
1004
+ pub fn to_dummies(
1005
+ &self,
1006
+ columns: Option<Vec<String>>,
1007
+ separator: Option<String>,
1008
+ ) -> RbResult<Self> {
1007
1009
  let df = match columns {
1008
- Some(cols) => self
1009
- .df
1010
- .borrow()
1011
- .columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
1012
- None => self.df.borrow().to_dummies(),
1010
+ Some(cols) => self.df.borrow().columns_to_dummies(
1011
+ cols.iter().map(|x| x as &str).collect(),
1012
+ separator.as_deref(),
1013
+ ),
1014
+ None => self.df.borrow().to_dummies(separator.as_deref()),
1013
1015
  }
1014
1016
  .map_err(RbPolarsErr::from)?;
1015
1017
  Ok(df.into())
@@ -1,4 +1,4 @@
1
- use magnus::exception::arg_error;
1
+ use magnus::exception;
2
2
  use magnus::Error;
3
3
  use polars::error::ArrowError;
4
4
  use polars::prelude::PolarsError;
@@ -8,23 +8,23 @@ pub struct RbPolarsErr {}
8
8
  impl RbPolarsErr {
9
9
  // convert to Error instead of Self
10
10
  pub fn from(e: PolarsError) -> Error {
11
- Error::runtime_error(e.to_string())
11
+ Error::new(exception::runtime_error(), e.to_string())
12
12
  }
13
13
 
14
14
  pub fn arrow(e: ArrowError) -> Error {
15
- Error::runtime_error(e.to_string())
15
+ Error::new(exception::runtime_error(), e.to_string())
16
16
  }
17
17
 
18
18
  pub fn io(e: std::io::Error) -> Error {
19
- Error::runtime_error(e.to_string())
19
+ Error::new(exception::runtime_error(), e.to_string())
20
20
  }
21
21
 
22
22
  pub fn other(message: String) -> Error {
23
- Error::runtime_error(message)
23
+ Error::new(exception::runtime_error(), message)
24
24
  }
25
25
 
26
26
  pub fn todo() -> Error {
27
- Error::runtime_error("not implemented yet")
27
+ Error::new(exception::runtime_error(), "not implemented yet")
28
28
  }
29
29
  }
30
30
 
@@ -32,7 +32,7 @@ pub struct RbValueError {}
32
32
 
33
33
  impl RbValueError {
34
34
  pub fn new_err(message: String) -> Error {
35
- Error::new(arg_error(), message)
35
+ Error::new(exception::arg_error(), message)
36
36
  }
37
37
  }
38
38
 
@@ -40,6 +40,6 @@ pub struct ComputeError {}
40
40
 
41
41
  impl ComputeError {
42
42
  pub fn new_err(message: String) -> Error {
43
- Error::runtime_error(message)
43
+ Error::new(exception::runtime_error(), message)
44
44
  }
45
45
  }
@@ -1,4 +1,4 @@
1
- use magnus::{Error, RString, Value};
1
+ use magnus::{exception, Error, RString, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use std::fs::File;
4
4
  use std::io::Cursor;
@@ -9,9 +9,10 @@ use crate::RbResult;
9
9
  pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
10
10
  let str_slice = f.try_convert::<PathBuf>()?;
11
11
  let f = if truncate {
12
- File::create(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
12
+ File::create(str_slice)
13
+ .map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
13
14
  } else {
14
- File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
15
+ File::open(str_slice).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
15
16
  };
16
17
  Ok(f)
17
18
  }
@@ -23,7 +24,7 @@ pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>>
23
24
  Ok(Box::new(Cursor::new(bytes.to_vec())))
24
25
  } else {
25
26
  let p = rb_f.try_convert::<PathBuf>()?;
26
- let f = File::open(p).map_err(|e| Error::runtime_error(e.to_string()))?;
27
+ let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
27
28
  Ok(Box::new(f))
28
29
  }
29
30
  }
@@ -4,7 +4,7 @@ use polars::prelude::*;
4
4
  use crate::lazy::dsl::RbExpr;
5
5
  use crate::Wrap;
6
6
 
7
- pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
7
+ pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
8
8
  todo!();
9
9
  }
10
10
 
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, RHash, Value};
1
+ use magnus::{IntoValue, RArray, RHash, Value};
2
2
  use polars::io::RowCount;
3
3
  use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
@@ -140,7 +140,7 @@ impl RbLazyFrame {
140
140
  .with_infer_schema_length(infer_schema_length)
141
141
  .with_delimiter(delimiter)
142
142
  .has_header(has_header)
143
- .with_ignore_parser_errors(ignore_errors)
143
+ .with_ignore_errors(ignore_errors)
144
144
  .with_skip_rows(skip_rows)
145
145
  .with_n_rows(n_rows)
146
146
  .with_cache(cache)
@@ -180,6 +180,8 @@ impl RbLazyFrame {
180
180
  rechunk,
181
181
  row_count,
182
182
  low_memory,
183
+ // TODO support cloud options
184
+ cloud_options: None,
183
185
  };
184
186
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
185
187
  Ok(lf.into())
@@ -254,6 +256,7 @@ impl RbLazyFrame {
254
256
  SortOptions {
255
257
  descending: reverse,
256
258
  nulls_last,
259
+ multithreaded: true,
257
260
  },
258
261
  )
259
262
  .into()
@@ -597,10 +600,10 @@ impl RbLazyFrame {
597
600
  Ok(self.get_schema()?.iter_names().cloned().collect())
598
601
  }
599
602
 
600
- pub fn dtypes(&self) -> RbResult<Vec<Value>> {
603
+ pub fn dtypes(&self) -> RbResult<RArray> {
601
604
  let schema = self.get_schema()?;
602
- let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into());
603
- Ok(iter.collect())
605
+ let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
606
+ Ok(RArray::from_iter(iter))
604
607
  }
605
608
 
606
609
  pub fn schema(&self) -> RbResult<RHash> {
@@ -610,7 +613,10 @@ impl RbLazyFrame {
610
613
  schema.iter_fields().for_each(|fld| {
611
614
  // TODO remove unwrap
612
615
  schema_dict
613
- .aset::<String, Value>(fld.name().clone(), Wrap(fld.data_type().clone()).into())
616
+ .aset::<String, Value>(
617
+ fld.name().clone(),
618
+ Wrap(fld.data_type().clone()).into_value(),
619
+ )
614
620
  .unwrap();
615
621
  });
616
622
  Ok(schema_dict)