polars-df 0.2.5 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/Cargo.lock +290 -137
  4. data/Cargo.toml +1 -1
  5. data/README.md +40 -2
  6. data/ext/polars/Cargo.toml +5 -4
  7. data/ext/polars/src/apply/dataframe.rs +6 -6
  8. data/ext/polars/src/apply/series.rs +10 -10
  9. data/ext/polars/src/batched_csv.rs +6 -4
  10. data/ext/polars/src/conversion.rs +56 -17
  11. data/ext/polars/src/dataframe.rs +65 -43
  12. data/ext/polars/src/error.rs +16 -8
  13. data/ext/polars/src/file.rs +5 -4
  14. data/ext/polars/src/lazy/apply.rs +1 -1
  15. data/ext/polars/src/lazy/dataframe.rs +12 -6
  16. data/ext/polars/src/lazy/dsl.rs +99 -45
  17. data/ext/polars/src/lazy/meta.rs +10 -9
  18. data/ext/polars/src/lib.rs +33 -29
  19. data/ext/polars/src/numo.rs +57 -0
  20. data/ext/polars/src/object.rs +2 -1
  21. data/ext/polars/src/series.rs +67 -53
  22. data/lib/polars/cat_expr.rb +0 -4
  23. data/lib/polars/cat_name_space.rb +0 -4
  24. data/lib/polars/convert.rb +0 -7
  25. data/lib/polars/data_frame.rb +165 -209
  26. data/lib/polars/data_types.rb +4 -0
  27. data/lib/polars/date_time_expr.rb +19 -151
  28. data/lib/polars/date_time_name_space.rb +17 -17
  29. data/lib/polars/expr.rb +68 -315
  30. data/lib/polars/group_by.rb +79 -51
  31. data/lib/polars/io.rb +1 -1
  32. data/lib/polars/lazy_frame.rb +1 -103
  33. data/lib/polars/lazy_functions.rb +0 -26
  34. data/lib/polars/lazy_group_by.rb +0 -8
  35. data/lib/polars/list_expr.rb +5 -27
  36. data/lib/polars/list_name_space.rb +5 -8
  37. data/lib/polars/plot.rb +109 -0
  38. data/lib/polars/series.rb +61 -19
  39. data/lib/polars/string_expr.rb +20 -76
  40. data/lib/polars/string_name_space.rb +5 -15
  41. data/lib/polars/struct_expr.rb +0 -2
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +1 -0
  44. metadata +5 -3
data/Cargo.toml CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
6
  halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "9f36b2b97446e6dd495473e4361a70d863ac8027" }
7
+ arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
8
 
9
9
  [profile.release]
10
10
  strip = true
data/README.md CHANGED
@@ -282,10 +282,10 @@ df.to_dummies
282
282
 
283
283
  ## Conversion
284
284
 
285
- Array of rows
285
+ Array of hashes
286
286
 
287
287
  ```ruby
288
- df.rows
288
+ df.rows(named: true)
289
289
  ```
290
290
 
291
291
  Hash of series
@@ -308,6 +308,12 @@ Parquet
308
308
  df.write_parquet("file.parquet")
309
309
  ```
310
310
 
311
+ Numo array
312
+
313
+ ```ruby
314
+ df.to_numo
315
+ ```
316
+
311
317
  ## Types
312
318
 
313
319
  You can specify column types when creating a data frame
@@ -343,6 +349,38 @@ Cast a column
343
349
  df["a"].cast(Polars::Int32)
344
350
  ```
345
351
 
352
+ ## Visualization
353
+
354
+ Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
355
+
356
+ ```ruby
357
+ gem "vega"
358
+ ```
359
+
360
+ And use:
361
+
362
+ ```ruby
363
+ df.plot("a", "b")
364
+ ```
365
+
366
+ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
367
+
368
+ ```ruby
369
+ df.plot("a", "b", type: "pie")
370
+ ```
371
+
372
+ Group data
373
+
374
+ ```ruby
375
+ df.groupby("c").plot("a", "b")
376
+ ```
377
+
378
+ Stacked columns or bars
379
+
380
+ ```ruby
381
+ df.groupby("c").plot("a", "b", stacked: true)
382
+ ```
383
+
346
384
  ## History
347
385
 
348
386
  View the [changelog](CHANGELOG.md)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.4"
3
+ version = "0.3.1"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
- magnus = "0.4"
15
- polars-core = "0.26.1"
14
+ magnus = "0.5"
15
+ polars-core = "0.27.0"
16
16
  serde_json = "1"
17
17
 
18
18
  [dependencies.polars]
19
- version = "0.26.1"
19
+ version = "0.27.0"
20
20
  features = [
21
21
  "abs",
22
22
  "arange",
23
23
  "arg_where",
24
24
  "asof_join",
25
25
  "avro",
26
+ "binary_encoding",
26
27
  "concat_str",
27
28
  "cse",
28
29
  "csv-file",
@@ -1,4 +1,4 @@
1
- use magnus::{class, RArray, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RArray, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
  use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
4
4
  use polars_core::series::SeriesIter;
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
27
27
 
28
28
  for _ in 0..df.height() {
29
29
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
30
- let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
30
+ let arg = (RArray::from_iter(iter),);
31
31
  let out: Value = lambda.funcall("call", arg)?;
32
32
 
33
33
  if out.is_nil() {
@@ -141,7 +141,7 @@ where
141
141
  let mut iters = get_iters_skip(df, init_null_count + skip);
142
142
  ((init_null_count + skip)..df.height()).map(move |_| {
143
143
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
144
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
144
+ let tpl = (RArray::from_iter(iter),);
145
145
  match lambda.funcall::<_, _, Value>("call", tpl) {
146
146
  Ok(val) => val.try_convert::<T>().ok(),
147
147
  Err(e) => panic!("ruby function failed {}", e),
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
158
158
  ) -> ChunkedArray<D>
159
159
  where
160
160
  D: RbArrowPrimitiveType,
161
- D::Native: Into<Value> + TryConvert,
161
+ D::Native: IntoValue + TryConvert,
162
162
  {
163
163
  let skip = usize::from(first_value.is_some());
164
164
  if init_null_count == df.height() {
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
216
216
  let mut iters = get_iters_skip(df, init_null_count + skip);
217
217
  let iter = ((init_null_count + skip)..df.height()).map(|_| {
218
218
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
219
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
219
+ let tpl = (RArray::from_iter(iter),);
220
220
  match lambda.funcall::<_, _, Value>("call", tpl) {
221
221
  Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
222
222
  Ok(val) => val
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
254
254
  let mut iters = get_iters_skip(df, init_null_count + skip);
255
255
  let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
256
256
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
257
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
257
+ let tpl = (RArray::from_iter(iter),);
258
258
  match lambda.funcall::<_, _, Value>("call", tpl) {
259
259
  Ok(val) => {
260
260
  match val.try_convert::<RArray>().ok() {
@@ -1,4 +1,4 @@
1
- use magnus::{class, RHash, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RHash, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
 
4
4
  use super::*;
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
85
85
  ) -> RbResult<ChunkedArray<D>>
86
86
  where
87
87
  D: RbArrowPrimitiveType,
88
- D::Native: Into<Value> + TryConvert;
88
+ D::Native: IntoValue + TryConvert;
89
89
 
90
90
  /// Apply a lambda with a boolean output type
91
91
  fn apply_lambda_with_bool_out_type(
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
130
130
 
131
131
  pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
132
132
  where
133
- T: Into<Value>,
133
+ T: IntoValue,
134
134
  {
135
135
  lambda.funcall("call", (in_val,))
136
136
  }
137
137
 
138
138
  pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
139
139
  where
140
- T: Into<Value>,
140
+ T: IntoValue,
141
141
  S: TryConvert,
142
142
  {
143
143
  match call_lambda(lambda, in_val) {
@@ -148,7 +148,7 @@ where
148
148
 
149
149
  fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
150
150
  where
151
- T: Into<Value>,
151
+ T: IntoValue,
152
152
  {
153
153
  let out: Value = lambda.funcall("call", (in_val,))?;
154
154
  let py_series: Value = out.funcall("_s", ())?;
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
216
216
  ) -> RbResult<ChunkedArray<D>>
217
217
  where
218
218
  D: RbArrowPrimitiveType,
219
- D::Native: Into<Value> + TryConvert,
219
+ D::Native: IntoValue + TryConvert,
220
220
  {
221
221
  let skip = usize::from(first_value.is_some());
222
222
  if init_null_count == self.len() {
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
435
435
  impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
436
436
  where
437
437
  T: RbArrowPrimitiveType + PolarsNumericType,
438
- T::Native: Into<Value> + TryConvert,
438
+ T::Native: IntoValue + TryConvert,
439
439
  ChunkedArray<T>: IntoSeries,
440
440
  {
441
441
  fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
@@ -493,7 +493,7 @@ where
493
493
  ) -> RbResult<ChunkedArray<D>>
494
494
  where
495
495
  D: RbArrowPrimitiveType,
496
- D::Native: Into<Value> + TryConvert,
496
+ D::Native: IntoValue + TryConvert,
497
497
  {
498
498
  let skip = usize::from(first_value.is_some());
499
499
  if init_null_count == self.len() {
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
765
765
  ) -> RbResult<ChunkedArray<D>>
766
766
  where
767
767
  D: RbArrowPrimitiveType,
768
- D::Native: Into<Value> + TryConvert,
768
+ D::Native: IntoValue + TryConvert,
769
769
  {
770
770
  let skip = usize::from(first_value.is_some());
771
771
  if init_null_count == self.len() {
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
1036
1036
  ) -> RbResult<ChunkedArray<D>>
1037
1037
  where
1038
1038
  D: RbArrowPrimitiveType,
1039
- D::Native: Into<Value> + TryConvert,
1039
+ D::Native: IntoValue + TryConvert,
1040
1040
  {
1041
1041
  let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1042
1042
 
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
84
84
  .with_n_rows(n_rows)
85
85
  .with_delimiter(sep.as_bytes()[0])
86
86
  .with_skip_rows(skip_rows)
87
- .with_ignore_parser_errors(ignore_errors)
87
+ .with_ignore_errors(ignore_errors)
88
88
  .with_projection(projection)
89
89
  .with_rechunk(rechunk)
90
90
  .with_chunk_size(chunk_size)
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
109
109
  })
110
110
  }
111
111
 
112
- pub fn next_batches(&self, n: usize) -> RbResult<Option<Vec<RbDataFrame>>> {
112
+ pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
113
  let batches = self
114
114
  .reader
115
115
  .borrow_mut()
116
116
  .next_batches(n)
117
117
  .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| batches.into_iter().map(|out| out.1.into()).collect()))
118
+ Ok(batches.map(|batches| {
119
+ RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
+ }))
119
121
  }
120
122
  }
@@ -1,6 +1,6 @@
1
1
  use magnus::{
2
- class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
3
- Value, QNIL,
2
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
+ RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
4
4
  };
5
5
  use polars::chunked_array::object::PolarsObjectSafe;
6
6
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
98
98
  }
99
99
  }
100
100
 
101
- impl From<Wrap<AnyValue<'_>>> for Value {
102
- fn from(w: Wrap<AnyValue<'_>>) -> Self {
103
- match w.0 {
101
+ impl IntoValue for Wrap<AnyValue<'_>> {
102
+ fn into_value_with(self, _: &RubyHandle) -> Value {
103
+ match self.0 {
104
104
  AnyValue::UInt8(v) => Value::from(v),
105
105
  AnyValue::UInt16(v) => Value::from(v),
106
106
  AnyValue::UInt32(v) => Value::from(v),
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
114
114
  AnyValue::Null => *QNIL,
115
115
  AnyValue::Boolean(v) => Value::from(v),
116
116
  AnyValue::Utf8(v) => Value::from(v),
117
+ AnyValue::Utf8Owned(_v) => todo!(),
118
+ AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
117
119
  AnyValue::Date(v) => class::time()
118
120
  .funcall::<_, _, Value>("at", (v * 86400,))
119
121
  .unwrap()
@@ -123,7 +125,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
123
125
  .unwrap(),
124
126
  AnyValue::Datetime(v, tu, tz) => {
125
127
  let t = match tu {
126
- TimeUnit::Nanoseconds => todo!(),
128
+ TimeUnit::Nanoseconds => {
129
+ let sec = v / 1000000000;
130
+ let subsec = v % 1000000000;
131
+ class::time()
132
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
133
+ .unwrap()
134
+ }
127
135
  TimeUnit::Microseconds => {
128
136
  let sec = v / 1000000;
129
137
  let subsec = v % 1000000;
@@ -131,7 +139,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
131
139
  .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
132
140
  .unwrap()
133
141
  }
134
- TimeUnit::Milliseconds => todo!(),
142
+ TimeUnit::Milliseconds => {
143
+ let sec = v / 1000;
144
+ let subsec = v % 1000;
145
+ class::time()
146
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
147
+ .unwrap()
148
+ }
135
149
  };
136
150
 
137
151
  if tz.is_some() {
@@ -140,16 +154,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
140
154
  t.funcall::<_, _, Value>("utc", ()).unwrap()
141
155
  }
142
156
  }
143
- _ => todo!(),
157
+ AnyValue::Duration(_v, _tu) => todo!(),
158
+ AnyValue::Time(_v) => todo!(),
159
+ AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
160
+ ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
161
+ AnyValue::StructOwned(_payload) => todo!(),
162
+ AnyValue::Object(_v) => todo!(),
163
+ AnyValue::ObjectOwned(_v) => todo!(),
164
+ AnyValue::Binary(_v) => todo!(),
165
+ AnyValue::BinaryOwned(_v) => todo!(),
144
166
  }
145
167
  }
146
168
  }
147
169
 
148
- impl From<Wrap<DataType>> for Value {
149
- fn from(w: Wrap<DataType>) -> Self {
170
+ impl IntoValue for Wrap<DataType> {
171
+ fn into_value_with(self, _: &RubyHandle) -> Value {
150
172
  let pl = crate::rb_modules::polars();
151
173
 
152
- match &w.0 {
174
+ match self.0 {
153
175
  DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
154
176
  DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
155
177
  DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
@@ -160,11 +182,12 @@ impl From<Wrap<DataType>> for Value {
160
182
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
161
183
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
162
184
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
185
+ DataType::Decimal128(_) => todo!(),
163
186
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
164
187
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
165
188
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
166
189
  DataType::List(inner) => {
167
- let inner = Wrap(*inner.clone());
190
+ let inner = Wrap(*inner);
168
191
  let list_class = pl.const_get::<_, Value>("List").unwrap();
169
192
  list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
170
193
  }
@@ -172,7 +195,7 @@ impl From<Wrap<DataType>> for Value {
172
195
  DataType::Datetime(tu, tz) => {
173
196
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
174
197
  datetime_class
175
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
198
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
176
199
  .unwrap()
177
200
  }
178
201
  DataType::Duration(tu) => {
@@ -423,9 +446,9 @@ impl ObjectValue {
423
446
  }
424
447
  }
425
448
 
426
- impl From<ObjectValue> for Value {
427
- fn from(val: ObjectValue) -> Self {
428
- val.inner
449
+ impl IntoValue for ObjectValue {
450
+ fn into_value_with(self, _: &RubyHandle) -> Value {
451
+ self.inner
429
452
  }
430
453
  }
431
454
 
@@ -767,6 +790,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
767
790
  }
768
791
  }
769
792
 
793
+ impl TryConvert for Wrap<SearchSortedSide> {
794
+ fn try_convert(ob: Value) -> RbResult<Self> {
795
+ let parsed = match ob.try_convert::<String>()?.as_str() {
796
+ "any" => SearchSortedSide::Any,
797
+ "left" => SearchSortedSide::Left,
798
+ "right" => SearchSortedSide::Right,
799
+ v => {
800
+ return Err(RbValueError::new_err(format!(
801
+ "side must be one of {{'any', 'left', 'right'}}, got {v}",
802
+ )))
803
+ }
804
+ };
805
+ Ok(Wrap(parsed))
806
+ }
807
+ }
808
+
770
809
  pub fn parse_fill_null_strategy(
771
810
  strategy: &str,
772
811
  limit: FillNullLimit,
@@ -780,7 +819,7 @@ pub fn parse_fill_null_strategy(
780
819
  "zero" => FillNullStrategy::Zero,
781
820
  "one" => FillNullStrategy::One,
782
821
  e => {
783
- return Err(magnus::Error::runtime_error(format!(
822
+ return Err(magnus::Error::new(exception::runtime_error(), format!(
784
823
  "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
785
824
  e,
786
825
  )))
@@ -1,4 +1,4 @@
1
- use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
1
+ use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
2
2
  use polars::frame::row::{rows_to_schema_supertypes, Row};
3
3
  use polars::frame::NullStrategy;
4
4
  use polars::io::avro::AvroCompression;
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
6
6
  use polars::io::RowCount;
7
7
  use polars::prelude::pivot::{pivot, pivot_stable};
8
8
  use polars::prelude::*;
9
+ use polars_core::utils::try_get_supertype;
9
10
  use std::cell::RefCell;
10
11
  use std::io::{BufWriter, Cursor};
11
12
  use std::ops::Deref;
@@ -68,7 +69,7 @@ impl RbDataFrame {
68
69
  *dtype_ = dtype;
69
70
  }
70
71
  } else {
71
- schema.with_column(name, dtype)
72
+ schema.with_column(name, dtype);
72
73
  }
73
74
  }
74
75
  }
@@ -159,7 +160,7 @@ impl RbDataFrame {
159
160
  .with_n_rows(n_rows)
160
161
  .with_delimiter(sep.as_bytes()[0])
161
162
  .with_skip_rows(skip_rows)
162
- .with_ignore_parser_errors(ignore_errors)
163
+ .with_ignore_errors(ignore_errors)
163
164
  .with_projection(projection)
164
165
  .with_rechunk(rechunk)
165
166
  .with_chunk_size(chunk_size)
@@ -457,7 +458,7 @@ impl RbDataFrame {
457
458
  } else {
458
459
  idx as usize
459
460
  };
460
- RArray::from_vec(
461
+ RArray::from_iter(
461
462
  self.df
462
463
  .borrow()
463
464
  .get_columns()
@@ -467,39 +468,51 @@ impl RbDataFrame {
467
468
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
468
469
  obj.unwrap().to_object()
469
470
  }
470
- _ => Wrap(s.get(idx).unwrap()).into(),
471
- })
472
- .collect(),
471
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
472
+ }),
473
473
  )
474
474
  .into()
475
475
  }
476
476
 
477
477
  pub fn row_tuples(&self) -> Value {
478
478
  let df = &self.df;
479
- RArray::from_vec(
480
- (0..df.borrow().height())
481
- .map(|idx| {
482
- RArray::from_vec(
483
- self.df
484
- .borrow()
485
- .get_columns()
486
- .iter()
487
- .map(|s| match s.dtype() {
488
- DataType::Object(_) => {
489
- let obj: Option<&ObjectValue> =
490
- s.get_object(idx).map(|any| any.into());
491
- obj.unwrap().to_object()
492
- }
493
- _ => Wrap(s.get(idx).unwrap()).into(),
494
- })
495
- .collect(),
496
- )
497
- })
498
- .collect(),
499
- )
479
+ RArray::from_iter((0..df.borrow().height()).map(|idx| {
480
+ RArray::from_iter(
481
+ self.df
482
+ .borrow()
483
+ .get_columns()
484
+ .iter()
485
+ .map(|s| match s.dtype() {
486
+ DataType::Object(_) => {
487
+ let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
488
+ obj.unwrap().to_object()
489
+ }
490
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
491
+ }),
492
+ )
493
+ }))
500
494
  .into()
501
495
  }
502
496
 
497
+ pub fn to_numo(&self) -> Option<Value> {
498
+ let mut st = None;
499
+ for s in self.df.borrow().iter() {
500
+ let dt_i = s.dtype();
501
+ match st {
502
+ None => st = Some(dt_i.clone()),
503
+ Some(ref mut st) => {
504
+ *st = try_get_supertype(st, dt_i).ok()?;
505
+ }
506
+ }
507
+ }
508
+ let st = st?;
509
+
510
+ match st {
511
+ // TODO
512
+ _ => None,
513
+ }
514
+ }
515
+
503
516
  pub fn write_parquet(
504
517
  &self,
505
518
  rb_f: Value,
@@ -613,7 +626,7 @@ impl RbDataFrame {
613
626
  format!("{}", self.df.borrow())
614
627
  }
615
628
 
616
- pub fn get_columns(&self) -> Vec<RbSeries> {
629
+ pub fn get_columns(&self) -> RArray {
617
630
  let cols = self.df.borrow().get_columns().clone();
618
631
  to_rbseries_collection(cols)
619
632
  }
@@ -635,12 +648,13 @@ impl RbDataFrame {
635
648
  Ok(())
636
649
  }
637
650
 
638
- pub fn dtypes(&self) -> Vec<Value> {
639
- self.df
640
- .borrow()
641
- .iter()
642
- .map(|s| Wrap(s.dtype().clone()).into())
643
- .collect()
651
+ pub fn dtypes(&self) -> RArray {
652
+ RArray::from_iter(
653
+ self.df
654
+ .borrow()
655
+ .iter()
656
+ .map(|s| Wrap(s.dtype().clone()).into_value()),
657
+ )
644
658
  }
645
659
 
646
660
  pub fn n_chunks(&self) -> usize {
@@ -777,6 +791,7 @@ impl RbDataFrame {
777
791
  SortOptions {
778
792
  descending: reverse,
779
793
  nulls_last,
794
+ multithreaded: true,
780
795
  },
781
796
  )
782
797
  .map_err(RbPolarsErr::from)?;
@@ -876,6 +891,7 @@ impl RbDataFrame {
876
891
  Ok(RbDataFrame::new(df))
877
892
  }
878
893
 
894
+ #[allow(clippy::too_many_arguments)]
879
895
  pub fn pivot_expr(
880
896
  &self,
881
897
  values: Vec<String>,
@@ -884,6 +900,7 @@ impl RbDataFrame {
884
900
  aggregate_expr: &RbExpr,
885
901
  maintain_order: bool,
886
902
  sort_columns: bool,
903
+ separator: Option<String>,
887
904
  ) -> RbResult<Self> {
888
905
  let fun = match maintain_order {
889
906
  true => pivot_stable,
@@ -896,19 +913,20 @@ impl RbDataFrame {
896
913
  columns,
897
914
  aggregate_expr.inner.clone(),
898
915
  sort_columns,
916
+ separator.as_deref(),
899
917
  )
900
918
  .map_err(RbPolarsErr::from)?;
901
919
  Ok(RbDataFrame::new(df))
902
920
  }
903
921
 
904
- pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
922
+ pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
905
923
  let out = if stable {
906
924
  self.df.borrow().partition_by_stable(groups)
907
925
  } else {
908
926
  self.df.borrow().partition_by(groups)
909
927
  }
910
928
  .map_err(RbPolarsErr::from)?;
911
- Ok(out.into_iter().map(RbDataFrame::new).collect())
929
+ Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
912
930
  }
913
931
 
914
932
  pub fn shift(&self, periods: i64) -> Self {
@@ -1003,13 +1021,17 @@ impl RbDataFrame {
1003
1021
  Ok(df.into())
1004
1022
  }
1005
1023
 
1006
- pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
1024
+ pub fn to_dummies(
1025
+ &self,
1026
+ columns: Option<Vec<String>>,
1027
+ separator: Option<String>,
1028
+ ) -> RbResult<Self> {
1007
1029
  let df = match columns {
1008
- Some(cols) => self
1009
- .df
1010
- .borrow()
1011
- .columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
1012
- None => self.df.borrow().to_dummies(),
1030
+ Some(cols) => self.df.borrow().columns_to_dummies(
1031
+ cols.iter().map(|x| x as &str).collect(),
1032
+ separator.as_deref(),
1033
+ ),
1034
+ None => self.df.borrow().to_dummies(separator.as_deref()),
1013
1035
  }
1014
1036
  .map_err(RbPolarsErr::from)?;
1015
1037
  Ok(df.into())