polars-df 0.13.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -6
  7. data/ext/polars/src/batched_csv.rs +10 -13
  8. data/ext/polars/src/conversion/any_value.rs +37 -21
  9. data/ext/polars/src/conversion/chunked_array.rs +3 -3
  10. data/ext/polars/src/conversion/mod.rs +159 -46
  11. data/ext/polars/src/dataframe/construction.rs +4 -7
  12. data/ext/polars/src/dataframe/export.rs +9 -2
  13. data/ext/polars/src/dataframe/general.rs +22 -16
  14. data/ext/polars/src/dataframe/io.rs +78 -174
  15. data/ext/polars/src/dataframe/mod.rs +1 -0
  16. data/ext/polars/src/dataframe/serde.rs +15 -0
  17. data/ext/polars/src/error.rs +31 -48
  18. data/ext/polars/src/exceptions.rs +24 -0
  19. data/ext/polars/src/expr/binary.rs +4 -42
  20. data/ext/polars/src/expr/datetime.rs +16 -7
  21. data/ext/polars/src/expr/general.rs +14 -23
  22. data/ext/polars/src/expr/list.rs +18 -11
  23. data/ext/polars/src/expr/name.rs +3 -2
  24. data/ext/polars/src/expr/rolling.rs +6 -7
  25. data/ext/polars/src/expr/string.rs +17 -37
  26. data/ext/polars/src/file.rs +59 -22
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +6 -6
  29. data/ext/polars/src/functions/lazy.rs +17 -8
  30. data/ext/polars/src/functions/mod.rs +1 -0
  31. data/ext/polars/src/functions/range.rs +4 -2
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +877 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -825
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +44 -13
  39. data/ext/polars/src/map/dataframe.rs +46 -14
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +17 -16
  42. data/ext/polars/src/map/series.rs +106 -64
  43. data/ext/polars/src/on_startup.rs +2 -2
  44. data/ext/polars/src/series/aggregation.rs +1 -5
  45. data/ext/polars/src/series/arithmetic.rs +10 -10
  46. data/ext/polars/src/series/construction.rs +52 -25
  47. data/ext/polars/src/series/export.rs +1 -1
  48. data/ext/polars/src/series/general.rs +643 -0
  49. data/ext/polars/src/series/import.rs +55 -0
  50. data/ext/polars/src/series/mod.rs +11 -638
  51. data/ext/polars/src/series/scatter.rs +2 -2
  52. data/ext/polars/src/utils.rs +0 -20
  53. data/lib/polars/batched_csv_reader.rb +0 -2
  54. data/lib/polars/binary_expr.rb +133 -9
  55. data/lib/polars/binary_name_space.rb +101 -6
  56. data/lib/polars/config.rb +4 -0
  57. data/lib/polars/data_frame.rb +285 -62
  58. data/lib/polars/data_type_group.rb +28 -0
  59. data/lib/polars/data_types.rb +2 -0
  60. data/lib/polars/date_time_expr.rb +244 -0
  61. data/lib/polars/date_time_name_space.rb +87 -0
  62. data/lib/polars/expr.rb +109 -8
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +88 -10
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/ipc.rb +14 -12
  71. data/lib/polars/io/ndjson.rb +10 -0
  72. data/lib/polars/io/parquet.rb +168 -111
  73. data/lib/polars/lazy_frame.rb +649 -15
  74. data/lib/polars/list_name_space.rb +169 -0
  75. data/lib/polars/selectors.rb +1144 -0
  76. data/lib/polars/series.rb +470 -40
  77. data/lib/polars/string_cache.rb +27 -1
  78. data/lib/polars/string_expr.rb +0 -1
  79. data/lib/polars/string_name_space.rb +73 -3
  80. data/lib/polars/struct_name_space.rb +31 -7
  81. data/lib/polars/utils/various.rb +5 -1
  82. data/lib/polars/utils.rb +45 -10
  83. data/lib/polars/version.rb +1 -1
  84. data/lib/polars.rb +2 -1
  85. metadata +14 -4
  86. data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
9
9
  use crate::conversion::{parse_fill_null_strategy, Wrap};
10
10
  use crate::map::lazy::map_single;
11
11
  use crate::rb_exprs_to_exprs;
12
- use crate::utils::reinterpret;
13
12
  use crate::{RbExpr, RbResult};
14
13
 
15
14
  impl RbExpr {
@@ -165,7 +164,7 @@ impl RbExpr {
165
164
  self.inner.clone().implode().into()
166
165
  }
167
166
 
168
- pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
167
+ pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
169
168
  self.inner
170
169
  .clone()
171
170
  .quantile(quantile.inner.clone(), interpolation.0)
@@ -242,7 +241,7 @@ impl RbExpr {
242
241
  pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self {
243
242
  self.inner
244
243
  .clone()
245
- .value_counts(sort, parallel, name, normalize)
244
+ .value_counts(sort, parallel, name.as_str(), normalize)
246
245
  .into()
247
246
  }
248
247
 
@@ -463,14 +462,7 @@ impl RbExpr {
463
462
  }
464
463
 
465
464
  pub fn gather_every(&self, n: usize, offset: usize) -> Self {
466
- self.clone()
467
- .inner
468
- .map(
469
- move |s: Series| Ok(Some(s.gather_every(n, offset))),
470
- GetOutput::same_type(),
471
- )
472
- .with_fmt("gather_every")
473
- .into()
465
+ self.inner.clone().gather_every(n, offset).into()
474
466
  }
475
467
 
476
468
  pub fn tail(&self, n: Option<usize>) -> Self {
@@ -644,8 +636,16 @@ impl RbExpr {
644
636
  output_type: Option<Wrap<DataType>>,
645
637
  agg_list: bool,
646
638
  is_elementwise: bool,
639
+ returns_scalar: bool,
647
640
  ) -> Self {
648
- map_single(self, lambda, output_type, agg_list, is_elementwise)
641
+ map_single(
642
+ self,
643
+ lambda,
644
+ output_type,
645
+ agg_list,
646
+ is_elementwise,
647
+ returns_scalar,
648
+ )
649
649
  }
650
650
 
651
651
  pub fn dot(&self, other: &Self) -> Self {
@@ -653,16 +653,7 @@ impl RbExpr {
653
653
  }
654
654
 
655
655
  pub fn reinterpret(&self, signed: bool) -> Self {
656
- let function = move |s: Series| reinterpret(&s, signed).map(Some);
657
- let dt = if signed {
658
- DataType::Int64
659
- } else {
660
- DataType::UInt64
661
- };
662
- self.clone()
663
- .inner
664
- .map(function, GetOutput::from_type(dt))
665
- .into()
656
+ self.inner.clone().reinterpret(signed).into()
666
657
  }
667
658
 
668
659
  pub fn mode(&self) -> Self {
@@ -717,7 +708,7 @@ impl RbExpr {
717
708
  }
718
709
 
719
710
  pub fn reshape(&self, dims: Vec<i64>) -> Self {
720
- self.inner.clone().reshape(&dims, NestedType::Array).into()
711
+ self.inner.clone().reshape(&dims).into()
721
712
  }
722
713
 
723
714
  pub fn cum_count(&self, reverse: bool) -> Self {
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, value::Opaque, Ruby, Value};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -179,23 +179,30 @@ impl RbExpr {
179
179
  pub fn list_to_struct(
180
180
  &self,
181
181
  width_strat: Wrap<ListToStructWidthStrategy>,
182
- _name_gen: Option<Value>,
182
+ name_gen: Option<Value>,
183
183
  upper_bound: usize,
184
184
  ) -> RbResult<Self> {
185
- // TODO fix
186
- let name_gen = None;
187
- // let name_gen = name_gen.map(|lambda| {
188
- // Arc::new(move |idx: usize| {
189
- // let out: Value = lambda.funcall("call", (idx,)).unwrap();
190
- // String::try_convert(out).unwrap()
191
- // }) as NameGenerator
192
- // });
185
+ let name_gen = name_gen.map(|lambda| {
186
+ let lambda = Opaque::from(lambda);
187
+ Arc::new(move |idx: usize| {
188
+ let lambda = Ruby::get().unwrap().get_inner(lambda);
189
+ let out: String = lambda.funcall("call", (idx,)).unwrap();
190
+ PlSmallStr::from_string(out)
191
+ });
192
+
193
+ // non-Ruby thread
194
+ todo!();
195
+ });
193
196
 
194
197
  Ok(self
195
198
  .inner
196
199
  .clone()
197
200
  .list()
198
- .to_struct(width_strat.0, name_gen, upper_bound)
201
+ .to_struct(ListToStructArgs::InferWidth {
202
+ infer_field_strategy: width_strat.0,
203
+ get_index_name: name_gen,
204
+ max_fields: upper_bound,
205
+ })
199
206
  .into())
200
207
  }
201
208
 
@@ -1,5 +1,6 @@
1
1
  use magnus::{block::Proc, value::Opaque, Ruby};
2
2
  use polars::prelude::*;
3
+ use polars_utils::format_pl_smallstr;
3
4
 
4
5
  use crate::RbExpr;
5
6
 
@@ -15,9 +16,9 @@ impl RbExpr {
15
16
  .name()
16
17
  .map(move |name| {
17
18
  let lambda = Ruby::get().unwrap().get_inner(lambda);
18
- let out = lambda.call::<_, String>((name,));
19
+ let out = lambda.call::<_, String>((name.as_str(),));
19
20
  match out {
20
- Ok(out) => Ok(out),
21
+ Ok(out) => Ok(format_pl_smallstr!("{}", out)),
21
22
  Err(e) => Err(PolarsError::ComputeError(
22
23
  format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
23
24
  )),
@@ -1,5 +1,4 @@
1
1
  use polars::prelude::*;
2
- use std::any::Any;
3
2
 
4
3
  use crate::conversion::Wrap;
5
4
  use crate::RbExpr;
@@ -169,7 +168,7 @@ impl RbExpr {
169
168
  weights,
170
169
  min_periods,
171
170
  center,
172
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
171
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
173
172
  };
174
173
 
175
174
  self.inner.clone().rolling_std(options).into()
@@ -187,7 +186,7 @@ impl RbExpr {
187
186
  window_size: Duration::parse(&window_size),
188
187
  min_periods,
189
188
  closed_window: closed.0,
190
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
189
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
191
190
  };
192
191
 
193
192
  self.inner
@@ -210,7 +209,7 @@ impl RbExpr {
210
209
  weights,
211
210
  min_periods,
212
211
  center,
213
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
212
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
214
213
  };
215
214
 
216
215
  self.inner.clone().rolling_var(options).into()
@@ -228,7 +227,7 @@ impl RbExpr {
228
227
  window_size: Duration::parse(&window_size),
229
228
  min_periods,
230
229
  closed_window: closed.0,
231
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
230
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
232
231
  };
233
232
 
234
233
  self.inner
@@ -277,7 +276,7 @@ impl RbExpr {
277
276
  pub fn rolling_quantile(
278
277
  &self,
279
278
  quantile: f64,
280
- interpolation: Wrap<QuantileInterpolOptions>,
279
+ interpolation: Wrap<QuantileMethod>,
281
280
  window_size: usize,
282
281
  weights: Option<Vec<f64>>,
283
282
  min_periods: Option<usize>,
@@ -302,7 +301,7 @@ impl RbExpr {
302
301
  &self,
303
302
  by: &RbExpr,
304
303
  quantile: f64,
305
- interpolation: Wrap<QuantileInterpolOptions>,
304
+ interpolation: Wrap<QuantileMethod>,
306
305
  window_size: String,
307
306
  min_periods: usize,
308
307
  closed: Wrap<ClosedWindow>,
@@ -19,6 +19,8 @@ impl RbExpr {
19
19
  exact: bool,
20
20
  cache: bool,
21
21
  ) -> Self {
22
+ let format = format.map(|x| x.into());
23
+
22
24
  let options = StrptimeOptions {
23
25
  format,
24
26
  strict,
@@ -33,12 +35,15 @@ impl RbExpr {
33
35
  &self,
34
36
  format: Option<String>,
35
37
  time_unit: Option<Wrap<TimeUnit>>,
36
- time_zone: Option<TimeZone>,
38
+ time_zone: Option<Wrap<TimeZone>>,
37
39
  strict: bool,
38
40
  exact: bool,
39
41
  cache: bool,
40
42
  ambiguous: &Self,
41
43
  ) -> Self {
44
+ let format = format.map(|x| x.into());
45
+ let time_zone = time_zone.map(|x| x.0);
46
+
42
47
  let options = StrptimeOptions {
43
48
  format,
44
49
  strict,
@@ -58,6 +63,8 @@ impl RbExpr {
58
63
  }
59
64
 
60
65
  pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
66
+ let format = format.map(|x| x.into());
67
+
61
68
  let options = StrptimeOptions {
62
69
  format,
63
70
  strict,
@@ -123,6 +130,11 @@ impl RbExpr {
123
130
  self.inner.clone().str().to_lowercase().into()
124
131
  }
125
132
 
133
+ // requires nightly
134
+ // pub fn str_to_titlecase(&self) -> Self {
135
+ // self.inner.clone().str().to_titlecase().into()
136
+ // }
137
+
126
138
  pub fn str_len_bytes(&self) -> Self {
127
139
  self.inner.clone().str().len_bytes().into()
128
140
  }
@@ -193,51 +205,19 @@ impl RbExpr {
193
205
  }
194
206
 
195
207
  pub fn str_hex_encode(&self) -> Self {
196
- self.clone()
197
- .inner
198
- .map(
199
- move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
200
- GetOutput::same_type(),
201
- )
202
- .with_fmt("str.hex_encode")
203
- .into()
208
+ self.inner.clone().str().hex_encode().into()
204
209
  }
205
210
 
206
211
  pub fn str_hex_decode(&self, strict: bool) -> Self {
207
- self.clone()
208
- .inner
209
- .map(
210
- move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
211
- GetOutput::same_type(),
212
- )
213
- .with_fmt("str.hex_decode")
214
- .into()
212
+ self.inner.clone().str().hex_decode(strict).into()
215
213
  }
216
214
 
217
215
  pub fn str_base64_encode(&self) -> Self {
218
- self.clone()
219
- .inner
220
- .map(
221
- move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
222
- GetOutput::same_type(),
223
- )
224
- .with_fmt("str.base64_encode")
225
- .into()
216
+ self.inner.clone().str().base64_encode().into()
226
217
  }
227
218
 
228
219
  pub fn str_base64_decode(&self, strict: bool) -> Self {
229
- self.clone()
230
- .inner
231
- .map(
232
- move |s| {
233
- s.str()?
234
- .base64_decode(strict)
235
- .map(|s| Some(s.into_series()))
236
- },
237
- GetOutput::same_type(),
238
- )
239
- .with_fmt("str.base64_decode")
240
- .into()
220
+ self.inner.clone().str().base64_decode(strict).into()
241
221
  }
242
222
 
243
223
  pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
@@ -1,6 +1,6 @@
1
1
  use std::fs::File;
2
2
  use std::io;
3
- use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
3
+ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
6
  use magnus::{exception, prelude::*, Error, RString, Value};
@@ -24,9 +24,8 @@ impl RbFileLikeObject {
24
24
  RbFileLikeObject { inner: object }
25
25
  }
26
26
 
27
- pub fn as_buffer(&self) -> std::io::Cursor<Vec<u8>> {
28
- let data = self.as_file_buffer().into_inner();
29
- std::io::Cursor::new(data)
27
+ pub fn as_bytes(&self) -> bytes::Bytes {
28
+ self.as_file_buffer().into_inner().into()
30
29
  }
31
30
 
32
31
  pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
@@ -132,7 +131,33 @@ impl FileLike for RbFileLikeObject {}
132
131
 
133
132
  pub enum EitherRustRubyFile {
134
133
  Rb(RbFileLikeObject),
135
- Rust(BufReader<File>),
134
+ Rust(File),
135
+ }
136
+
137
+ impl EitherRustRubyFile {
138
+ pub fn into_dyn(self) -> Box<dyn FileLike> {
139
+ match self {
140
+ EitherRustRubyFile::Rb(f) => Box::new(f),
141
+ EitherRustRubyFile::Rust(f) => Box::new(f),
142
+ }
143
+ }
144
+ }
145
+
146
+ pub enum RubyScanSourceInput {
147
+ Buffer(bytes::Bytes),
148
+ Path(PathBuf),
149
+ #[allow(dead_code)]
150
+ File(File),
151
+ }
152
+
153
+ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
154
+ if let Ok(file_path) = PathBuf::try_convert(rb_f) {
155
+ // TODO resolve_homedir
156
+ Ok(RubyScanSourceInput::Path(file_path))
157
+ } else {
158
+ let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
159
+ Ok(RubyScanSourceInput::Buffer(f.as_bytes()))
160
+ }
136
161
  }
137
162
 
138
163
  ///
@@ -144,12 +169,11 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
144
169
  let file_path = std::path::Path::new(&s);
145
170
  let file_path = resolve_homedir(file_path);
146
171
  let f = if truncate {
147
- File::create(file_path).map_err(RbPolarsErr::io)?
172
+ File::create(file_path).map_err(RbPolarsErr::from)?
148
173
  } else {
149
174
  polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
150
175
  };
151
- let reader = BufReader::new(f);
152
- Ok(EitherRustRubyFile::Rust(reader))
176
+ Ok(EitherRustRubyFile::Rust(f))
153
177
  } else {
154
178
  let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
155
179
  Ok(EitherRustRubyFile::Rb(f))
@@ -157,21 +181,34 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
157
181
  }
158
182
 
159
183
  pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
160
- use EitherRustRubyFile::*;
161
- match get_either_file(f, truncate)? {
162
- Rb(f) => Ok(Box::new(f)),
163
- Rust(f) => Ok(Box::new(f.into_inner())),
164
- }
184
+ Ok(get_either_file(f, truncate)?.into_dyn())
165
185
  }
166
186
 
167
- pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
168
- if let Ok(bytes) = rb_f.funcall::<_, _, RString>("read", ()) {
169
- let bytes = unsafe { bytes.as_slice() };
170
- // TODO avoid copy
171
- Ok(Box::new(Cursor::new(bytes.to_vec())))
172
- } else {
173
- let p = PathBuf::try_convert(rb_f)?;
174
- let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
175
- Ok(Box::new(f))
187
+ pub enum RbReadBytes {
188
+ Bytes(RString),
189
+ Other(Value),
190
+ }
191
+
192
+ pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
193
+ rb_f.funcall("read", ())
194
+ .map(RbReadBytes::Bytes)
195
+ .unwrap_or(RbReadBytes::Other(rb_f))
196
+ }
197
+
198
+ pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
199
+ get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
200
+ }
201
+
202
+ pub fn get_mmap_bytes_reader_and_path<'a>(
203
+ rb_f: &'a RbReadBytes,
204
+ ) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
205
+ match rb_f {
206
+ RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
207
+ RbReadBytes::Other(v) => {
208
+ let path = PathBuf::try_convert(*v)?;
209
+ let f = File::open(&path)
210
+ .map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
211
+ Ok((Box::new(f), Some(path)))
212
+ }
176
213
  }
177
214
  }
@@ -0,0 +1,15 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ // TODO add to Ruby
6
+ pub fn business_day_count(
7
+ start: &RbExpr,
8
+ end: &RbExpr,
9
+ week_mask: [bool; 7],
10
+ holidays: Vec<i32>,
11
+ ) -> RbExpr {
12
+ let start = start.inner.clone();
13
+ let end = end.inner.clone();
14
+ dsl::business_day_count(start, end, week_mask, holidays).into()
15
+ }
@@ -11,9 +11,9 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
11
11
  let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
12
12
 
13
13
  let dict = RHash::new();
14
- for field in &metadata.schema.fields {
15
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
16
- dict.aset(field.name.clone(), dt)?;
14
+ for field in metadata.schema.iter_values() {
15
+ let dt: Wrap<DataType> = Wrap((&field.dtype).into());
16
+ dict.aset(field.name.as_str(), dt)?;
17
17
  }
18
18
  Ok(dict)
19
19
  }
@@ -26,9 +26,9 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
26
26
  let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
27
27
 
28
28
  let dict = RHash::new();
29
- for field in arrow_schema.fields {
30
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
31
- dict.aset(field.name, dt)?;
29
+ for field in arrow_schema.iter_values() {
30
+ let dt: Wrap<DataType> = Wrap((&field.dtype).into());
31
+ dict.aset(field.name.as_str(), dt)?;
32
32
  }
33
33
  Ok(dict)
34
34
  }
@@ -94,10 +94,7 @@ pub fn col(name: String) -> RbExpr {
94
94
  }
95
95
 
96
96
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
97
- let lfs = lfs
98
- .into_iter()
99
- .map(<&RbLazyFrame>::try_convert)
100
- .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
97
+ let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
101
98
 
102
99
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
103
100
  let df = lf.ldf.borrow().clone().collect().unwrap();
@@ -173,8 +170,14 @@ pub fn cum_fold(
173
170
  let exprs = rb_exprs_to_exprs(exprs)?;
174
171
  let lambda = Opaque::from(lambda);
175
172
 
176
- let func =
177
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
173
+ let func = move |a: Column, b: Column| {
174
+ binary_lambda(
175
+ Ruby::get().unwrap().get_inner(lambda),
176
+ a.take_materialized_series(),
177
+ b.take_materialized_series(),
178
+ )
179
+ .map(|v| v.map(Column::from))
180
+ };
178
181
  Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
179
182
  }
180
183
 
@@ -263,8 +266,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
263
266
  let exprs = rb_exprs_to_exprs(exprs)?;
264
267
  let lambda = Opaque::from(lambda);
265
268
 
266
- let func =
267
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
269
+ let func = move |a: Column, b: Column| {
270
+ binary_lambda(
271
+ Ruby::get().unwrap().get_inner(lambda),
272
+ a.take_materialized_series(),
273
+ b.take_materialized_series(),
274
+ )
275
+ .map(|v| v.map(Column::from))
276
+ };
268
277
  Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
269
278
  }
270
279
 
@@ -1,4 +1,5 @@
1
1
  pub mod aggregation;
2
+ pub mod business;
2
3
  pub mod eager;
3
4
  pub mod io;
4
5
  pub mod lazy;
@@ -56,13 +56,14 @@ pub fn datetime_range(
56
56
  every: String,
57
57
  closed: Wrap<ClosedWindow>,
58
58
  time_unit: Option<Wrap<TimeUnit>>,
59
- time_zone: Option<TimeZone>,
59
+ time_zone: Option<Wrap<TimeZone>>,
60
60
  ) -> RbExpr {
61
61
  let start = start.inner.clone();
62
62
  let end = end.inner.clone();
63
63
  let every = Duration::parse(&every);
64
64
  let closed = closed.0;
65
65
  let time_unit = time_unit.map(|x| x.0);
66
+ let time_zone = time_zone.map(|x| x.0);
66
67
  dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
67
68
  }
68
69
 
@@ -72,13 +73,14 @@ pub fn datetime_ranges(
72
73
  every: String,
73
74
  closed: Wrap<ClosedWindow>,
74
75
  time_unit: Option<Wrap<TimeUnit>>,
75
- time_zone: Option<TimeZone>,
76
+ time_zone: Option<Wrap<TimeZone>>,
76
77
  ) -> RbExpr {
77
78
  let start = start.inner.clone();
78
79
  let end = end.inner.clone();
79
80
  let every = Duration::parse(&every);
80
81
  let closed = closed.0;
81
82
  let time_unit = time_unit.map(|x| x.0);
83
+ let time_zone = time_zone.map(|x| x.0);
82
84
  dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
83
85
  }
84
86
 
@@ -0,0 +1 @@
1
+ pub mod to_ruby;
@@ -0,0 +1,83 @@
1
+ use arrow::datatypes::ArrowDataType;
2
+ use arrow::ffi;
3
+ use magnus::{IntoValue, Value};
4
+ use polars::datatypes::CompatLevel;
5
+ use polars::frame::DataFrame;
6
+ use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
7
+ use polars::series::Series;
8
+ use polars_core::utils::arrow;
9
+
10
+ use crate::RbResult;
11
+
12
+ #[magnus::wrap(class = "Polars::RbArrowArrayStream")]
13
+ pub struct RbArrowArrayStream {
14
+ stream: ffi::ArrowArrayStream,
15
+ }
16
+
17
+ impl RbArrowArrayStream {
18
+ pub fn to_i(&self) -> usize {
19
+ (&self.stream as *const _) as usize
20
+ }
21
+ }
22
+
23
+ pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
24
+ let iter = Box::new(DataFrameStreamIterator::new(df));
25
+ let field = iter.field();
26
+ let stream = ffi::export_iterator(iter, field);
27
+ Ok(RbArrowArrayStream { stream }.into_value())
28
+ }
29
+
30
+ pub struct DataFrameStreamIterator {
31
+ columns: Vec<Series>,
32
+ dtype: ArrowDataType,
33
+ idx: usize,
34
+ n_chunks: usize,
35
+ }
36
+
37
+ impl DataFrameStreamIterator {
38
+ fn new(df: &DataFrame) -> Self {
39
+ let schema = df.schema().to_arrow(CompatLevel::newest());
40
+ let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
41
+
42
+ Self {
43
+ columns: df
44
+ .get_columns()
45
+ .iter()
46
+ .map(|v| v.as_materialized_series().clone())
47
+ .collect(),
48
+ dtype,
49
+ idx: 0,
50
+ n_chunks: df.n_chunks(),
51
+ }
52
+ }
53
+
54
+ fn field(&self) -> ArrowField {
55
+ ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
56
+ }
57
+ }
58
+
59
+ impl Iterator for DataFrameStreamIterator {
60
+ type Item = PolarsResult<ArrayRef>;
61
+
62
+ fn next(&mut self) -> Option<Self::Item> {
63
+ if self.idx >= self.n_chunks {
64
+ None
65
+ } else {
66
+ // create a batch of the columns with the same chunk no.
67
+ let batch_cols = self
68
+ .columns
69
+ .iter()
70
+ .map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
71
+ .collect::<Vec<_>>();
72
+ self.idx += 1;
73
+
74
+ let array = arrow::array::StructArray::new(
75
+ self.dtype.clone(),
76
+ batch_cols[0].len(),
77
+ batch_cols,
78
+ None,
79
+ );
80
+ Some(Ok(Box::new(array)))
81
+ }
82
+ }
83
+ }
@@ -1 +1,2 @@
1
+ pub mod arrow;
1
2
  pub mod numo;