polars-df 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/Cargo.lock +290 -137
  4. data/Cargo.toml +1 -1
  5. data/README.md +40 -2
  6. data/ext/polars/Cargo.toml +5 -4
  7. data/ext/polars/src/apply/dataframe.rs +6 -6
  8. data/ext/polars/src/apply/series.rs +10 -10
  9. data/ext/polars/src/batched_csv.rs +6 -4
  10. data/ext/polars/src/conversion.rs +56 -17
  11. data/ext/polars/src/dataframe.rs +65 -43
  12. data/ext/polars/src/error.rs +16 -8
  13. data/ext/polars/src/file.rs +5 -4
  14. data/ext/polars/src/lazy/apply.rs +1 -1
  15. data/ext/polars/src/lazy/dataframe.rs +12 -6
  16. data/ext/polars/src/lazy/dsl.rs +99 -45
  17. data/ext/polars/src/lazy/meta.rs +10 -9
  18. data/ext/polars/src/lib.rs +33 -29
  19. data/ext/polars/src/numo.rs +57 -0
  20. data/ext/polars/src/object.rs +2 -1
  21. data/ext/polars/src/series.rs +67 -53
  22. data/lib/polars/cat_expr.rb +0 -4
  23. data/lib/polars/cat_name_space.rb +0 -4
  24. data/lib/polars/convert.rb +0 -7
  25. data/lib/polars/data_frame.rb +165 -209
  26. data/lib/polars/data_types.rb +4 -0
  27. data/lib/polars/date_time_expr.rb +19 -151
  28. data/lib/polars/date_time_name_space.rb +17 -17
  29. data/lib/polars/expr.rb +68 -315
  30. data/lib/polars/group_by.rb +79 -51
  31. data/lib/polars/io.rb +1 -1
  32. data/lib/polars/lazy_frame.rb +1 -103
  33. data/lib/polars/lazy_functions.rb +0 -26
  34. data/lib/polars/lazy_group_by.rb +0 -8
  35. data/lib/polars/list_expr.rb +5 -27
  36. data/lib/polars/list_name_space.rb +5 -8
  37. data/lib/polars/plot.rb +109 -0
  38. data/lib/polars/series.rb +61 -19
  39. data/lib/polars/string_expr.rb +20 -76
  40. data/lib/polars/string_name_space.rb +5 -15
  41. data/lib/polars/struct_expr.rb +0 -2
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +1 -0
  44. metadata +5 -3
data/Cargo.toml CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
6
  halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "9f36b2b97446e6dd495473e4361a70d863ac8027" }
7
+ arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
8
 
9
9
  [profile.release]
10
10
  strip = true
data/README.md CHANGED
@@ -282,10 +282,10 @@ df.to_dummies
282
282
 
283
283
  ## Conversion
284
284
 
285
- Array of rows
285
+ Array of hashes
286
286
 
287
287
  ```ruby
288
- df.rows
288
+ df.rows(named: true)
289
289
  ```
290
290
 
291
291
  Hash of series
@@ -308,6 +308,12 @@ Parquet
308
308
  df.write_parquet("file.parquet")
309
309
  ```
310
310
 
311
+ Numo array
312
+
313
+ ```ruby
314
+ df.to_numo
315
+ ```
316
+
311
317
  ## Types
312
318
 
313
319
  You can specify column types when creating a data frame
@@ -343,6 +349,38 @@ Cast a column
343
349
  df["a"].cast(Polars::Int32)
344
350
  ```
345
351
 
352
+ ## Visualization
353
+
354
+ Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
355
+
356
+ ```ruby
357
+ gem "vega"
358
+ ```
359
+
360
+ And use:
361
+
362
+ ```ruby
363
+ df.plot("a", "b")
364
+ ```
365
+
366
+ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
367
+
368
+ ```ruby
369
+ df.plot("a", "b", type: "pie")
370
+ ```
371
+
372
+ Group data
373
+
374
+ ```ruby
375
+ df.groupby("c").plot("a", "b")
376
+ ```
377
+
378
+ Stacked columns or bars
379
+
380
+ ```ruby
381
+ df.groupby("c").plot("a", "b", stacked: true)
382
+ ```
383
+
346
384
  ## History
347
385
 
348
386
  View the [changelog](CHANGELOG.md)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.4"
3
+ version = "0.3.1"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
- magnus = "0.4"
15
- polars-core = "0.26.1"
14
+ magnus = "0.5"
15
+ polars-core = "0.27.0"
16
16
  serde_json = "1"
17
17
 
18
18
  [dependencies.polars]
19
- version = "0.26.1"
19
+ version = "0.27.0"
20
20
  features = [
21
21
  "abs",
22
22
  "arange",
23
23
  "arg_where",
24
24
  "asof_join",
25
25
  "avro",
26
+ "binary_encoding",
26
27
  "concat_str",
27
28
  "cse",
28
29
  "csv-file",
@@ -1,4 +1,4 @@
1
- use magnus::{class, RArray, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RArray, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
  use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
4
4
  use polars_core::series::SeriesIter;
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
27
27
 
28
28
  for _ in 0..df.height() {
29
29
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
30
- let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
30
+ let arg = (RArray::from_iter(iter),);
31
31
  let out: Value = lambda.funcall("call", arg)?;
32
32
 
33
33
  if out.is_nil() {
@@ -141,7 +141,7 @@ where
141
141
  let mut iters = get_iters_skip(df, init_null_count + skip);
142
142
  ((init_null_count + skip)..df.height()).map(move |_| {
143
143
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
144
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
144
+ let tpl = (RArray::from_iter(iter),);
145
145
  match lambda.funcall::<_, _, Value>("call", tpl) {
146
146
  Ok(val) => val.try_convert::<T>().ok(),
147
147
  Err(e) => panic!("ruby function failed {}", e),
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
158
158
  ) -> ChunkedArray<D>
159
159
  where
160
160
  D: RbArrowPrimitiveType,
161
- D::Native: Into<Value> + TryConvert,
161
+ D::Native: IntoValue + TryConvert,
162
162
  {
163
163
  let skip = usize::from(first_value.is_some());
164
164
  if init_null_count == df.height() {
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
216
216
  let mut iters = get_iters_skip(df, init_null_count + skip);
217
217
  let iter = ((init_null_count + skip)..df.height()).map(|_| {
218
218
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
219
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
219
+ let tpl = (RArray::from_iter(iter),);
220
220
  match lambda.funcall::<_, _, Value>("call", tpl) {
221
221
  Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
222
222
  Ok(val) => val
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
254
254
  let mut iters = get_iters_skip(df, init_null_count + skip);
255
255
  let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
256
256
  let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
257
- let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
257
+ let tpl = (RArray::from_iter(iter),);
258
258
  match lambda.funcall::<_, _, Value>("call", tpl) {
259
259
  Ok(val) => {
260
260
  match val.try_convert::<RArray>().ok() {
@@ -1,4 +1,4 @@
1
- use magnus::{class, RHash, TryConvert, Value};
1
+ use magnus::{class, IntoValue, RHash, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
 
4
4
  use super::*;
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
85
85
  ) -> RbResult<ChunkedArray<D>>
86
86
  where
87
87
  D: RbArrowPrimitiveType,
88
- D::Native: Into<Value> + TryConvert;
88
+ D::Native: IntoValue + TryConvert;
89
89
 
90
90
  /// Apply a lambda with a boolean output type
91
91
  fn apply_lambda_with_bool_out_type(
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
130
130
 
131
131
  pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
132
132
  where
133
- T: Into<Value>,
133
+ T: IntoValue,
134
134
  {
135
135
  lambda.funcall("call", (in_val,))
136
136
  }
137
137
 
138
138
  pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
139
139
  where
140
- T: Into<Value>,
140
+ T: IntoValue,
141
141
  S: TryConvert,
142
142
  {
143
143
  match call_lambda(lambda, in_val) {
@@ -148,7 +148,7 @@ where
148
148
 
149
149
  fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
150
150
  where
151
- T: Into<Value>,
151
+ T: IntoValue,
152
152
  {
153
153
  let out: Value = lambda.funcall("call", (in_val,))?;
154
154
  let py_series: Value = out.funcall("_s", ())?;
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
216
216
  ) -> RbResult<ChunkedArray<D>>
217
217
  where
218
218
  D: RbArrowPrimitiveType,
219
- D::Native: Into<Value> + TryConvert,
219
+ D::Native: IntoValue + TryConvert,
220
220
  {
221
221
  let skip = usize::from(first_value.is_some());
222
222
  if init_null_count == self.len() {
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
435
435
  impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
436
436
  where
437
437
  T: RbArrowPrimitiveType + PolarsNumericType,
438
- T::Native: Into<Value> + TryConvert,
438
+ T::Native: IntoValue + TryConvert,
439
439
  ChunkedArray<T>: IntoSeries,
440
440
  {
441
441
  fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
@@ -493,7 +493,7 @@ where
493
493
  ) -> RbResult<ChunkedArray<D>>
494
494
  where
495
495
  D: RbArrowPrimitiveType,
496
- D::Native: Into<Value> + TryConvert,
496
+ D::Native: IntoValue + TryConvert,
497
497
  {
498
498
  let skip = usize::from(first_value.is_some());
499
499
  if init_null_count == self.len() {
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
765
765
  ) -> RbResult<ChunkedArray<D>>
766
766
  where
767
767
  D: RbArrowPrimitiveType,
768
- D::Native: Into<Value> + TryConvert,
768
+ D::Native: IntoValue + TryConvert,
769
769
  {
770
770
  let skip = usize::from(first_value.is_some());
771
771
  if init_null_count == self.len() {
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
1036
1036
  ) -> RbResult<ChunkedArray<D>>
1037
1037
  where
1038
1038
  D: RbArrowPrimitiveType,
1039
- D::Native: Into<Value> + TryConvert,
1039
+ D::Native: IntoValue + TryConvert,
1040
1040
  {
1041
1041
  let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1042
1042
 
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
84
84
  .with_n_rows(n_rows)
85
85
  .with_delimiter(sep.as_bytes()[0])
86
86
  .with_skip_rows(skip_rows)
87
- .with_ignore_parser_errors(ignore_errors)
87
+ .with_ignore_errors(ignore_errors)
88
88
  .with_projection(projection)
89
89
  .with_rechunk(rechunk)
90
90
  .with_chunk_size(chunk_size)
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
109
109
  })
110
110
  }
111
111
 
112
- pub fn next_batches(&self, n: usize) -> RbResult<Option<Vec<RbDataFrame>>> {
112
+ pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
113
  let batches = self
114
114
  .reader
115
115
  .borrow_mut()
116
116
  .next_batches(n)
117
117
  .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| batches.into_iter().map(|out| out.1.into()).collect()))
118
+ Ok(batches.map(|batches| {
119
+ RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
+ }))
119
121
  }
120
122
  }
@@ -1,6 +1,6 @@
1
1
  use magnus::{
2
- class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
3
- Value, QNIL,
2
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
+ RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
4
4
  };
5
5
  use polars::chunked_array::object::PolarsObjectSafe;
6
6
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
98
98
  }
99
99
  }
100
100
 
101
- impl From<Wrap<AnyValue<'_>>> for Value {
102
- fn from(w: Wrap<AnyValue<'_>>) -> Self {
103
- match w.0 {
101
+ impl IntoValue for Wrap<AnyValue<'_>> {
102
+ fn into_value_with(self, _: &RubyHandle) -> Value {
103
+ match self.0 {
104
104
  AnyValue::UInt8(v) => Value::from(v),
105
105
  AnyValue::UInt16(v) => Value::from(v),
106
106
  AnyValue::UInt32(v) => Value::from(v),
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
114
114
  AnyValue::Null => *QNIL,
115
115
  AnyValue::Boolean(v) => Value::from(v),
116
116
  AnyValue::Utf8(v) => Value::from(v),
117
+ AnyValue::Utf8Owned(_v) => todo!(),
118
+ AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
117
119
  AnyValue::Date(v) => class::time()
118
120
  .funcall::<_, _, Value>("at", (v * 86400,))
119
121
  .unwrap()
@@ -123,7 +125,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
123
125
  .unwrap(),
124
126
  AnyValue::Datetime(v, tu, tz) => {
125
127
  let t = match tu {
126
- TimeUnit::Nanoseconds => todo!(),
128
+ TimeUnit::Nanoseconds => {
129
+ let sec = v / 1000000000;
130
+ let subsec = v % 1000000000;
131
+ class::time()
132
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
133
+ .unwrap()
134
+ }
127
135
  TimeUnit::Microseconds => {
128
136
  let sec = v / 1000000;
129
137
  let subsec = v % 1000000;
@@ -131,7 +139,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
131
139
  .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
132
140
  .unwrap()
133
141
  }
134
- TimeUnit::Milliseconds => todo!(),
142
+ TimeUnit::Milliseconds => {
143
+ let sec = v / 1000;
144
+ let subsec = v % 1000;
145
+ class::time()
146
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
147
+ .unwrap()
148
+ }
135
149
  };
136
150
 
137
151
  if tz.is_some() {
@@ -140,16 +154,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
140
154
  t.funcall::<_, _, Value>("utc", ()).unwrap()
141
155
  }
142
156
  }
143
- _ => todo!(),
157
+ AnyValue::Duration(_v, _tu) => todo!(),
158
+ AnyValue::Time(_v) => todo!(),
159
+ AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
160
+ ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
161
+ AnyValue::StructOwned(_payload) => todo!(),
162
+ AnyValue::Object(_v) => todo!(),
163
+ AnyValue::ObjectOwned(_v) => todo!(),
164
+ AnyValue::Binary(_v) => todo!(),
165
+ AnyValue::BinaryOwned(_v) => todo!(),
144
166
  }
145
167
  }
146
168
  }
147
169
 
148
- impl From<Wrap<DataType>> for Value {
149
- fn from(w: Wrap<DataType>) -> Self {
170
+ impl IntoValue for Wrap<DataType> {
171
+ fn into_value_with(self, _: &RubyHandle) -> Value {
150
172
  let pl = crate::rb_modules::polars();
151
173
 
152
- match &w.0 {
174
+ match self.0 {
153
175
  DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
154
176
  DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
155
177
  DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
@@ -160,11 +182,12 @@ impl From<Wrap<DataType>> for Value {
160
182
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
161
183
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
162
184
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
185
+ DataType::Decimal128(_) => todo!(),
163
186
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
164
187
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
165
188
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
166
189
  DataType::List(inner) => {
167
- let inner = Wrap(*inner.clone());
190
+ let inner = Wrap(*inner);
168
191
  let list_class = pl.const_get::<_, Value>("List").unwrap();
169
192
  list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
170
193
  }
@@ -172,7 +195,7 @@ impl From<Wrap<DataType>> for Value {
172
195
  DataType::Datetime(tu, tz) => {
173
196
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
174
197
  datetime_class
175
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
198
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
176
199
  .unwrap()
177
200
  }
178
201
  DataType::Duration(tu) => {
@@ -423,9 +446,9 @@ impl ObjectValue {
423
446
  }
424
447
  }
425
448
 
426
- impl From<ObjectValue> for Value {
427
- fn from(val: ObjectValue) -> Self {
428
- val.inner
449
+ impl IntoValue for ObjectValue {
450
+ fn into_value_with(self, _: &RubyHandle) -> Value {
451
+ self.inner
429
452
  }
430
453
  }
431
454
 
@@ -767,6 +790,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
767
790
  }
768
791
  }
769
792
 
793
+ impl TryConvert for Wrap<SearchSortedSide> {
794
+ fn try_convert(ob: Value) -> RbResult<Self> {
795
+ let parsed = match ob.try_convert::<String>()?.as_str() {
796
+ "any" => SearchSortedSide::Any,
797
+ "left" => SearchSortedSide::Left,
798
+ "right" => SearchSortedSide::Right,
799
+ v => {
800
+ return Err(RbValueError::new_err(format!(
801
+ "side must be one of {{'any', 'left', 'right'}}, got {v}",
802
+ )))
803
+ }
804
+ };
805
+ Ok(Wrap(parsed))
806
+ }
807
+ }
808
+
770
809
  pub fn parse_fill_null_strategy(
771
810
  strategy: &str,
772
811
  limit: FillNullLimit,
@@ -780,7 +819,7 @@ pub fn parse_fill_null_strategy(
780
819
  "zero" => FillNullStrategy::Zero,
781
820
  "one" => FillNullStrategy::One,
782
821
  e => {
783
- return Err(magnus::Error::runtime_error(format!(
822
+ return Err(magnus::Error::new(exception::runtime_error(), format!(
784
823
  "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
785
824
  e,
786
825
  )))
@@ -1,4 +1,4 @@
1
- use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
1
+ use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
2
2
  use polars::frame::row::{rows_to_schema_supertypes, Row};
3
3
  use polars::frame::NullStrategy;
4
4
  use polars::io::avro::AvroCompression;
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
6
6
  use polars::io::RowCount;
7
7
  use polars::prelude::pivot::{pivot, pivot_stable};
8
8
  use polars::prelude::*;
9
+ use polars_core::utils::try_get_supertype;
9
10
  use std::cell::RefCell;
10
11
  use std::io::{BufWriter, Cursor};
11
12
  use std::ops::Deref;
@@ -68,7 +69,7 @@ impl RbDataFrame {
68
69
  *dtype_ = dtype;
69
70
  }
70
71
  } else {
71
- schema.with_column(name, dtype)
72
+ schema.with_column(name, dtype);
72
73
  }
73
74
  }
74
75
  }
@@ -159,7 +160,7 @@ impl RbDataFrame {
159
160
  .with_n_rows(n_rows)
160
161
  .with_delimiter(sep.as_bytes()[0])
161
162
  .with_skip_rows(skip_rows)
162
- .with_ignore_parser_errors(ignore_errors)
163
+ .with_ignore_errors(ignore_errors)
163
164
  .with_projection(projection)
164
165
  .with_rechunk(rechunk)
165
166
  .with_chunk_size(chunk_size)
@@ -457,7 +458,7 @@ impl RbDataFrame {
457
458
  } else {
458
459
  idx as usize
459
460
  };
460
- RArray::from_vec(
461
+ RArray::from_iter(
461
462
  self.df
462
463
  .borrow()
463
464
  .get_columns()
@@ -467,39 +468,51 @@ impl RbDataFrame {
467
468
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
468
469
  obj.unwrap().to_object()
469
470
  }
470
- _ => Wrap(s.get(idx).unwrap()).into(),
471
- })
472
- .collect(),
471
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
472
+ }),
473
473
  )
474
474
  .into()
475
475
  }
476
476
 
477
477
  pub fn row_tuples(&self) -> Value {
478
478
  let df = &self.df;
479
- RArray::from_vec(
480
- (0..df.borrow().height())
481
- .map(|idx| {
482
- RArray::from_vec(
483
- self.df
484
- .borrow()
485
- .get_columns()
486
- .iter()
487
- .map(|s| match s.dtype() {
488
- DataType::Object(_) => {
489
- let obj: Option<&ObjectValue> =
490
- s.get_object(idx).map(|any| any.into());
491
- obj.unwrap().to_object()
492
- }
493
- _ => Wrap(s.get(idx).unwrap()).into(),
494
- })
495
- .collect(),
496
- )
497
- })
498
- .collect(),
499
- )
479
+ RArray::from_iter((0..df.borrow().height()).map(|idx| {
480
+ RArray::from_iter(
481
+ self.df
482
+ .borrow()
483
+ .get_columns()
484
+ .iter()
485
+ .map(|s| match s.dtype() {
486
+ DataType::Object(_) => {
487
+ let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
488
+ obj.unwrap().to_object()
489
+ }
490
+ _ => Wrap(s.get(idx).unwrap()).into_value(),
491
+ }),
492
+ )
493
+ }))
500
494
  .into()
501
495
  }
502
496
 
497
+ pub fn to_numo(&self) -> Option<Value> {
498
+ let mut st = None;
499
+ for s in self.df.borrow().iter() {
500
+ let dt_i = s.dtype();
501
+ match st {
502
+ None => st = Some(dt_i.clone()),
503
+ Some(ref mut st) => {
504
+ *st = try_get_supertype(st, dt_i).ok()?;
505
+ }
506
+ }
507
+ }
508
+ let st = st?;
509
+
510
+ match st {
511
+ // TODO
512
+ _ => None,
513
+ }
514
+ }
515
+
503
516
  pub fn write_parquet(
504
517
  &self,
505
518
  rb_f: Value,
@@ -613,7 +626,7 @@ impl RbDataFrame {
613
626
  format!("{}", self.df.borrow())
614
627
  }
615
628
 
616
- pub fn get_columns(&self) -> Vec<RbSeries> {
629
+ pub fn get_columns(&self) -> RArray {
617
630
  let cols = self.df.borrow().get_columns().clone();
618
631
  to_rbseries_collection(cols)
619
632
  }
@@ -635,12 +648,13 @@ impl RbDataFrame {
635
648
  Ok(())
636
649
  }
637
650
 
638
- pub fn dtypes(&self) -> Vec<Value> {
639
- self.df
640
- .borrow()
641
- .iter()
642
- .map(|s| Wrap(s.dtype().clone()).into())
643
- .collect()
651
+ pub fn dtypes(&self) -> RArray {
652
+ RArray::from_iter(
653
+ self.df
654
+ .borrow()
655
+ .iter()
656
+ .map(|s| Wrap(s.dtype().clone()).into_value()),
657
+ )
644
658
  }
645
659
 
646
660
  pub fn n_chunks(&self) -> usize {
@@ -777,6 +791,7 @@ impl RbDataFrame {
777
791
  SortOptions {
778
792
  descending: reverse,
779
793
  nulls_last,
794
+ multithreaded: true,
780
795
  },
781
796
  )
782
797
  .map_err(RbPolarsErr::from)?;
@@ -876,6 +891,7 @@ impl RbDataFrame {
876
891
  Ok(RbDataFrame::new(df))
877
892
  }
878
893
 
894
+ #[allow(clippy::too_many_arguments)]
879
895
  pub fn pivot_expr(
880
896
  &self,
881
897
  values: Vec<String>,
@@ -884,6 +900,7 @@ impl RbDataFrame {
884
900
  aggregate_expr: &RbExpr,
885
901
  maintain_order: bool,
886
902
  sort_columns: bool,
903
+ separator: Option<String>,
887
904
  ) -> RbResult<Self> {
888
905
  let fun = match maintain_order {
889
906
  true => pivot_stable,
@@ -896,19 +913,20 @@ impl RbDataFrame {
896
913
  columns,
897
914
  aggregate_expr.inner.clone(),
898
915
  sort_columns,
916
+ separator.as_deref(),
899
917
  )
900
918
  .map_err(RbPolarsErr::from)?;
901
919
  Ok(RbDataFrame::new(df))
902
920
  }
903
921
 
904
- pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
922
+ pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
905
923
  let out = if stable {
906
924
  self.df.borrow().partition_by_stable(groups)
907
925
  } else {
908
926
  self.df.borrow().partition_by(groups)
909
927
  }
910
928
  .map_err(RbPolarsErr::from)?;
911
- Ok(out.into_iter().map(RbDataFrame::new).collect())
929
+ Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
912
930
  }
913
931
 
914
932
  pub fn shift(&self, periods: i64) -> Self {
@@ -1003,13 +1021,17 @@ impl RbDataFrame {
1003
1021
  Ok(df.into())
1004
1022
  }
1005
1023
 
1006
- pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
1024
+ pub fn to_dummies(
1025
+ &self,
1026
+ columns: Option<Vec<String>>,
1027
+ separator: Option<String>,
1028
+ ) -> RbResult<Self> {
1007
1029
  let df = match columns {
1008
- Some(cols) => self
1009
- .df
1010
- .borrow()
1011
- .columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
1012
- None => self.df.borrow().to_dummies(),
1030
+ Some(cols) => self.df.borrow().columns_to_dummies(
1031
+ cols.iter().map(|x| x as &str).collect(),
1032
+ separator.as_deref(),
1033
+ ),
1034
+ None => self.df.borrow().to_dummies(separator.as_deref()),
1013
1035
  }
1014
1036
  .map_err(RbPolarsErr::from)?;
1015
1037
  Ok(df.into())