polars-df 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/Cargo.lock +1296 -283
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +125 -28
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +16 -11
  13. data/ext/polars/src/dataframe/io.rs +73 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +13 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/rolling.rs +6 -7
  23. data/ext/polars/src/expr/string.rs +9 -36
  24. data/ext/polars/src/file.rs +59 -22
  25. data/ext/polars/src/functions/business.rs +15 -0
  26. data/ext/polars/src/functions/lazy.rs +17 -8
  27. data/ext/polars/src/functions/mod.rs +1 -0
  28. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  29. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  30. data/ext/polars/src/interop/mod.rs +1 -0
  31. data/ext/polars/src/lazyframe/general.rs +877 -0
  32. data/ext/polars/src/lazyframe/mod.rs +3 -827
  33. data/ext/polars/src/lazyframe/serde.rs +31 -0
  34. data/ext/polars/src/lib.rs +45 -14
  35. data/ext/polars/src/map/dataframe.rs +10 -6
  36. data/ext/polars/src/map/lazy.rs +65 -4
  37. data/ext/polars/src/map/mod.rs +9 -8
  38. data/ext/polars/src/on_startup.rs +1 -1
  39. data/ext/polars/src/series/aggregation.rs +1 -5
  40. data/ext/polars/src/series/arithmetic.rs +10 -10
  41. data/ext/polars/src/series/construction.rs +2 -2
  42. data/ext/polars/src/series/export.rs +1 -1
  43. data/ext/polars/src/series/general.rs +643 -0
  44. data/ext/polars/src/series/import.rs +55 -0
  45. data/ext/polars/src/series/mod.rs +11 -638
  46. data/ext/polars/src/series/scatter.rs +2 -2
  47. data/ext/polars/src/utils.rs +0 -20
  48. data/lib/polars/batched_csv_reader.rb +0 -2
  49. data/lib/polars/binary_expr.rb +133 -9
  50. data/lib/polars/binary_name_space.rb +101 -6
  51. data/lib/polars/config.rb +4 -0
  52. data/lib/polars/data_frame.rb +275 -52
  53. data/lib/polars/data_type_group.rb +28 -0
  54. data/lib/polars/data_types.rb +2 -0
  55. data/lib/polars/date_time_expr.rb +244 -0
  56. data/lib/polars/date_time_name_space.rb +87 -0
  57. data/lib/polars/expr.rb +103 -2
  58. data/lib/polars/functions/as_datatype.rb +51 -2
  59. data/lib/polars/functions/col.rb +1 -1
  60. data/lib/polars/functions/eager.rb +1 -3
  61. data/lib/polars/functions/lazy.rb +88 -10
  62. data/lib/polars/functions/range/time_range.rb +21 -21
  63. data/lib/polars/io/csv.rb +14 -16
  64. data/lib/polars/io/database.rb +2 -2
  65. data/lib/polars/io/ipc.rb +14 -4
  66. data/lib/polars/io/ndjson.rb +10 -0
  67. data/lib/polars/io/parquet.rb +168 -111
  68. data/lib/polars/lazy_frame.rb +649 -15
  69. data/lib/polars/list_name_space.rb +169 -0
  70. data/lib/polars/selectors.rb +1144 -0
  71. data/lib/polars/series.rb +465 -35
  72. data/lib/polars/string_cache.rb +27 -1
  73. data/lib/polars/string_expr.rb +0 -1
  74. data/lib/polars/string_name_space.rb +73 -3
  75. data/lib/polars/struct_name_space.rb +31 -7
  76. data/lib/polars/utils/various.rb +5 -1
  77. data/lib/polars/utils.rb +45 -10
  78. data/lib/polars/version.rb +1 -1
  79. data/lib/polars.rb +2 -1
  80. metadata +14 -4
  81. data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
9
9
  use crate::conversion::{parse_fill_null_strategy, Wrap};
10
10
  use crate::map::lazy::map_single;
11
11
  use crate::rb_exprs_to_exprs;
12
- use crate::utils::reinterpret;
13
12
  use crate::{RbExpr, RbResult};
14
13
 
15
14
  impl RbExpr {
@@ -165,7 +164,7 @@ impl RbExpr {
165
164
  self.inner.clone().implode().into()
166
165
  }
167
166
 
168
- pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
167
+ pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
169
168
  self.inner
170
169
  .clone()
171
170
  .quantile(quantile.inner.clone(), interpolation.0)
@@ -463,14 +462,7 @@ impl RbExpr {
463
462
  }
464
463
 
465
464
  pub fn gather_every(&self, n: usize, offset: usize) -> Self {
466
- self.clone()
467
- .inner
468
- .map(
469
- move |s: Series| Ok(Some(s.gather_every(n, offset))),
470
- GetOutput::same_type(),
471
- )
472
- .with_fmt("gather_every")
473
- .into()
465
+ self.inner.clone().gather_every(n, offset).into()
474
466
  }
475
467
 
476
468
  pub fn tail(&self, n: Option<usize>) -> Self {
@@ -644,8 +636,16 @@ impl RbExpr {
644
636
  output_type: Option<Wrap<DataType>>,
645
637
  agg_list: bool,
646
638
  is_elementwise: bool,
639
+ returns_scalar: bool,
647
640
  ) -> Self {
648
- map_single(self, lambda, output_type, agg_list, is_elementwise)
641
+ map_single(
642
+ self,
643
+ lambda,
644
+ output_type,
645
+ agg_list,
646
+ is_elementwise,
647
+ returns_scalar,
648
+ )
649
649
  }
650
650
 
651
651
  pub fn dot(&self, other: &Self) -> Self {
@@ -653,16 +653,7 @@ impl RbExpr {
653
653
  }
654
654
 
655
655
  pub fn reinterpret(&self, signed: bool) -> Self {
656
- let function = move |s: Series| reinterpret(&s, signed).map(Some);
657
- let dt = if signed {
658
- DataType::Int64
659
- } else {
660
- DataType::UInt64
661
- };
662
- self.clone()
663
- .inner
664
- .map(function, GetOutput::from_type(dt))
665
- .into()
656
+ self.inner.clone().reinterpret(signed).into()
666
657
  }
667
658
 
668
659
  pub fn mode(&self) -> Self {
@@ -717,7 +708,7 @@ impl RbExpr {
717
708
  }
718
709
 
719
710
  pub fn reshape(&self, dims: Vec<i64>) -> Self {
720
- self.inner.clone().reshape(&dims, NestedType::Array).into()
711
+ self.inner.clone().reshape(&dims).into()
721
712
  }
722
713
 
723
714
  pub fn cum_count(&self, reverse: bool) -> Self {
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, value::Opaque, Ruby, Value};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -179,23 +179,30 @@ impl RbExpr {
179
179
  pub fn list_to_struct(
180
180
  &self,
181
181
  width_strat: Wrap<ListToStructWidthStrategy>,
182
- _name_gen: Option<Value>,
182
+ name_gen: Option<Value>,
183
183
  upper_bound: usize,
184
184
  ) -> RbResult<Self> {
185
- // TODO fix
186
- let name_gen = None;
187
- // let name_gen = name_gen.map(|lambda| {
188
- // Arc::new(move |idx: usize| {
189
- // let out: Value = lambda.funcall("call", (idx,)).unwrap();
190
- // String::try_convert(out).unwrap()
191
- // }) as NameGenerator
192
- // });
185
+ let name_gen = name_gen.map(|lambda| {
186
+ let lambda = Opaque::from(lambda);
187
+ Arc::new(move |idx: usize| {
188
+ let lambda = Ruby::get().unwrap().get_inner(lambda);
189
+ let out: String = lambda.funcall("call", (idx,)).unwrap();
190
+ PlSmallStr::from_string(out)
191
+ });
192
+
193
+ // non-Ruby thread
194
+ todo!();
195
+ });
193
196
 
194
197
  Ok(self
195
198
  .inner
196
199
  .clone()
197
200
  .list()
198
- .to_struct(width_strat.0, name_gen, upper_bound)
201
+ .to_struct(ListToStructArgs::InferWidth {
202
+ infer_field_strategy: width_strat.0,
203
+ get_index_name: name_gen,
204
+ max_fields: upper_bound,
205
+ })
199
206
  .into())
200
207
  }
201
208
 
@@ -1,5 +1,4 @@
1
1
  use polars::prelude::*;
2
- use std::any::Any;
3
2
 
4
3
  use crate::conversion::Wrap;
5
4
  use crate::RbExpr;
@@ -169,7 +168,7 @@ impl RbExpr {
169
168
  weights,
170
169
  min_periods,
171
170
  center,
172
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
171
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
173
172
  };
174
173
 
175
174
  self.inner.clone().rolling_std(options).into()
@@ -187,7 +186,7 @@ impl RbExpr {
187
186
  window_size: Duration::parse(&window_size),
188
187
  min_periods,
189
188
  closed_window: closed.0,
190
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
189
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
191
190
  };
192
191
 
193
192
  self.inner
@@ -210,7 +209,7 @@ impl RbExpr {
210
209
  weights,
211
210
  min_periods,
212
211
  center,
213
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
212
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
214
213
  };
215
214
 
216
215
  self.inner.clone().rolling_var(options).into()
@@ -228,7 +227,7 @@ impl RbExpr {
228
227
  window_size: Duration::parse(&window_size),
229
228
  min_periods,
230
229
  closed_window: closed.0,
231
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
230
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
232
231
  };
233
232
 
234
233
  self.inner
@@ -277,7 +276,7 @@ impl RbExpr {
277
276
  pub fn rolling_quantile(
278
277
  &self,
279
278
  quantile: f64,
280
- interpolation: Wrap<QuantileInterpolOptions>,
279
+ interpolation: Wrap<QuantileMethod>,
281
280
  window_size: usize,
282
281
  weights: Option<Vec<f64>>,
283
282
  min_periods: Option<usize>,
@@ -302,7 +301,7 @@ impl RbExpr {
302
301
  &self,
303
302
  by: &RbExpr,
304
303
  quantile: f64,
305
- interpolation: Wrap<QuantileInterpolOptions>,
304
+ interpolation: Wrap<QuantileMethod>,
306
305
  window_size: String,
307
306
  min_periods: usize,
308
307
  closed: Wrap<ClosedWindow>,
@@ -130,6 +130,11 @@ impl RbExpr {
130
130
  self.inner.clone().str().to_lowercase().into()
131
131
  }
132
132
 
133
+ // requires nightly
134
+ // pub fn str_to_titlecase(&self) -> Self {
135
+ // self.inner.clone().str().to_titlecase().into()
136
+ // }
137
+
133
138
  pub fn str_len_bytes(&self) -> Self {
134
139
  self.inner.clone().str().len_bytes().into()
135
140
  }
@@ -200,51 +205,19 @@ impl RbExpr {
200
205
  }
201
206
 
202
207
  pub fn str_hex_encode(&self) -> Self {
203
- self.clone()
204
- .inner
205
- .map(
206
- move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
207
- GetOutput::same_type(),
208
- )
209
- .with_fmt("str.hex_encode")
210
- .into()
208
+ self.inner.clone().str().hex_encode().into()
211
209
  }
212
210
 
213
211
  pub fn str_hex_decode(&self, strict: bool) -> Self {
214
- self.clone()
215
- .inner
216
- .map(
217
- move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
218
- GetOutput::same_type(),
219
- )
220
- .with_fmt("str.hex_decode")
221
- .into()
212
+ self.inner.clone().str().hex_decode(strict).into()
222
213
  }
223
214
 
224
215
  pub fn str_base64_encode(&self) -> Self {
225
- self.clone()
226
- .inner
227
- .map(
228
- move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
229
- GetOutput::same_type(),
230
- )
231
- .with_fmt("str.base64_encode")
232
- .into()
216
+ self.inner.clone().str().base64_encode().into()
233
217
  }
234
218
 
235
219
  pub fn str_base64_decode(&self, strict: bool) -> Self {
236
- self.clone()
237
- .inner
238
- .map(
239
- move |s| {
240
- s.str()?
241
- .base64_decode(strict)
242
- .map(|s| Some(s.into_series()))
243
- },
244
- GetOutput::same_type(),
245
- )
246
- .with_fmt("str.base64_decode")
247
- .into()
220
+ self.inner.clone().str().base64_decode(strict).into()
248
221
  }
249
222
 
250
223
  pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
@@ -1,6 +1,6 @@
1
1
  use std::fs::File;
2
2
  use std::io;
3
- use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
3
+ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
6
  use magnus::{exception, prelude::*, Error, RString, Value};
@@ -24,9 +24,8 @@ impl RbFileLikeObject {
24
24
  RbFileLikeObject { inner: object }
25
25
  }
26
26
 
27
- pub fn as_buffer(&self) -> std::io::Cursor<Vec<u8>> {
28
- let data = self.as_file_buffer().into_inner();
29
- std::io::Cursor::new(data)
27
+ pub fn as_bytes(&self) -> bytes::Bytes {
28
+ self.as_file_buffer().into_inner().into()
30
29
  }
31
30
 
32
31
  pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
@@ -132,7 +131,33 @@ impl FileLike for RbFileLikeObject {}
132
131
 
133
132
  pub enum EitherRustRubyFile {
134
133
  Rb(RbFileLikeObject),
135
- Rust(BufReader<File>),
134
+ Rust(File),
135
+ }
136
+
137
+ impl EitherRustRubyFile {
138
+ pub fn into_dyn(self) -> Box<dyn FileLike> {
139
+ match self {
140
+ EitherRustRubyFile::Rb(f) => Box::new(f),
141
+ EitherRustRubyFile::Rust(f) => Box::new(f),
142
+ }
143
+ }
144
+ }
145
+
146
+ pub enum RubyScanSourceInput {
147
+ Buffer(bytes::Bytes),
148
+ Path(PathBuf),
149
+ #[allow(dead_code)]
150
+ File(File),
151
+ }
152
+
153
+ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
154
+ if let Ok(file_path) = PathBuf::try_convert(rb_f) {
155
+ // TODO resolve_homedir
156
+ Ok(RubyScanSourceInput::Path(file_path))
157
+ } else {
158
+ let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
159
+ Ok(RubyScanSourceInput::Buffer(f.as_bytes()))
160
+ }
136
161
  }
137
162
 
138
163
  ///
@@ -144,12 +169,11 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
144
169
  let file_path = std::path::Path::new(&s);
145
170
  let file_path = resolve_homedir(file_path);
146
171
  let f = if truncate {
147
- File::create(file_path).map_err(RbPolarsErr::io)?
172
+ File::create(file_path).map_err(RbPolarsErr::from)?
148
173
  } else {
149
174
  polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
150
175
  };
151
- let reader = BufReader::new(f);
152
- Ok(EitherRustRubyFile::Rust(reader))
176
+ Ok(EitherRustRubyFile::Rust(f))
153
177
  } else {
154
178
  let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
155
179
  Ok(EitherRustRubyFile::Rb(f))
@@ -157,21 +181,34 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
157
181
  }
158
182
 
159
183
  pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
160
- use EitherRustRubyFile::*;
161
- match get_either_file(f, truncate)? {
162
- Rb(f) => Ok(Box::new(f)),
163
- Rust(f) => Ok(Box::new(f.into_inner())),
164
- }
184
+ Ok(get_either_file(f, truncate)?.into_dyn())
165
185
  }
166
186
 
167
- pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
168
- if let Ok(bytes) = rb_f.funcall::<_, _, RString>("read", ()) {
169
- let bytes = unsafe { bytes.as_slice() };
170
- // TODO avoid copy
171
- Ok(Box::new(Cursor::new(bytes.to_vec())))
172
- } else {
173
- let p = PathBuf::try_convert(rb_f)?;
174
- let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
175
- Ok(Box::new(f))
187
+ pub enum RbReadBytes {
188
+ Bytes(RString),
189
+ Other(Value),
190
+ }
191
+
192
+ pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
193
+ rb_f.funcall("read", ())
194
+ .map(RbReadBytes::Bytes)
195
+ .unwrap_or(RbReadBytes::Other(rb_f))
196
+ }
197
+
198
+ pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
199
+ get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
200
+ }
201
+
202
+ pub fn get_mmap_bytes_reader_and_path<'a>(
203
+ rb_f: &'a RbReadBytes,
204
+ ) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
205
+ match rb_f {
206
+ RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
207
+ RbReadBytes::Other(v) => {
208
+ let path = PathBuf::try_convert(*v)?;
209
+ let f = File::open(&path)
210
+ .map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
211
+ Ok((Box::new(f), Some(path)))
212
+ }
176
213
  }
177
214
  }
@@ -0,0 +1,15 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ // TODO add to Ruby
6
+ pub fn business_day_count(
7
+ start: &RbExpr,
8
+ end: &RbExpr,
9
+ week_mask: [bool; 7],
10
+ holidays: Vec<i32>,
11
+ ) -> RbExpr {
12
+ let start = start.inner.clone();
13
+ let end = end.inner.clone();
14
+ dsl::business_day_count(start, end, week_mask, holidays).into()
15
+ }
@@ -94,10 +94,7 @@ pub fn col(name: String) -> RbExpr {
94
94
  }
95
95
 
96
96
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
97
- let lfs = lfs
98
- .into_iter()
99
- .map(<&RbLazyFrame>::try_convert)
100
- .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
97
+ let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
101
98
 
102
99
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
103
100
  let df = lf.ldf.borrow().clone().collect().unwrap();
@@ -173,8 +170,14 @@ pub fn cum_fold(
173
170
  let exprs = rb_exprs_to_exprs(exprs)?;
174
171
  let lambda = Opaque::from(lambda);
175
172
 
176
- let func =
177
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
173
+ let func = move |a: Column, b: Column| {
174
+ binary_lambda(
175
+ Ruby::get().unwrap().get_inner(lambda),
176
+ a.take_materialized_series(),
177
+ b.take_materialized_series(),
178
+ )
179
+ .map(|v| v.map(Column::from))
180
+ };
178
181
  Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
179
182
  }
180
183
 
@@ -263,8 +266,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
263
266
  let exprs = rb_exprs_to_exprs(exprs)?;
264
267
  let lambda = Opaque::from(lambda);
265
268
 
266
- let func =
267
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
269
+ let func = move |a: Column, b: Column| {
270
+ binary_lambda(
271
+ Ruby::get().unwrap().get_inner(lambda),
272
+ a.take_materialized_series(),
273
+ b.take_materialized_series(),
274
+ )
275
+ .map(|v| v.map(Column::from))
276
+ };
268
277
  Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
269
278
  }
270
279
 
@@ -1,4 +1,5 @@
1
1
  pub mod aggregation;
2
+ pub mod business;
2
3
  pub mod eager;
3
4
  pub mod io;
4
5
  pub mod lazy;
@@ -0,0 +1 @@
1
+ pub mod to_ruby;
@@ -0,0 +1,83 @@
1
+ use arrow::datatypes::ArrowDataType;
2
+ use arrow::ffi;
3
+ use magnus::{IntoValue, Value};
4
+ use polars::datatypes::CompatLevel;
5
+ use polars::frame::DataFrame;
6
+ use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
7
+ use polars::series::Series;
8
+ use polars_core::utils::arrow;
9
+
10
+ use crate::RbResult;
11
+
12
+ #[magnus::wrap(class = "Polars::RbArrowArrayStream")]
13
+ pub struct RbArrowArrayStream {
14
+ stream: ffi::ArrowArrayStream,
15
+ }
16
+
17
+ impl RbArrowArrayStream {
18
+ pub fn to_i(&self) -> usize {
19
+ (&self.stream as *const _) as usize
20
+ }
21
+ }
22
+
23
+ pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
24
+ let iter = Box::new(DataFrameStreamIterator::new(df));
25
+ let field = iter.field();
26
+ let stream = ffi::export_iterator(iter, field);
27
+ Ok(RbArrowArrayStream { stream }.into_value())
28
+ }
29
+
30
+ pub struct DataFrameStreamIterator {
31
+ columns: Vec<Series>,
32
+ dtype: ArrowDataType,
33
+ idx: usize,
34
+ n_chunks: usize,
35
+ }
36
+
37
+ impl DataFrameStreamIterator {
38
+ fn new(df: &DataFrame) -> Self {
39
+ let schema = df.schema().to_arrow(CompatLevel::newest());
40
+ let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
41
+
42
+ Self {
43
+ columns: df
44
+ .get_columns()
45
+ .iter()
46
+ .map(|v| v.as_materialized_series().clone())
47
+ .collect(),
48
+ dtype,
49
+ idx: 0,
50
+ n_chunks: df.n_chunks(),
51
+ }
52
+ }
53
+
54
+ fn field(&self) -> ArrowField {
55
+ ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
56
+ }
57
+ }
58
+
59
+ impl Iterator for DataFrameStreamIterator {
60
+ type Item = PolarsResult<ArrayRef>;
61
+
62
+ fn next(&mut self) -> Option<Self::Item> {
63
+ if self.idx >= self.n_chunks {
64
+ None
65
+ } else {
66
+ // create a batch of the columns with the same chunk no.
67
+ let batch_cols = self
68
+ .columns
69
+ .iter()
70
+ .map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
71
+ .collect::<Vec<_>>();
72
+ self.idx += 1;
73
+
74
+ let array = arrow::array::StructArray::new(
75
+ self.dtype.clone(),
76
+ batch_cols[0].len(),
77
+ batch_cols,
78
+ None,
79
+ );
80
+ Some(Ok(Box::new(array)))
81
+ }
82
+ }
83
+ }
@@ -1 +1,2 @@
1
+ pub mod arrow;
1
2
  pub mod numo;