polars-df 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
9
9
  use crate::conversion::{parse_fill_null_strategy, Wrap};
10
10
  use crate::map::lazy::map_single;
11
11
  use crate::rb_exprs_to_exprs;
12
- use crate::utils::reinterpret;
13
12
  use crate::{RbExpr, RbResult};
14
13
 
15
14
  impl RbExpr {
@@ -165,7 +164,7 @@ impl RbExpr {
165
164
  self.inner.clone().implode().into()
166
165
  }
167
166
 
168
- pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
167
+ pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
169
168
  self.inner
170
169
  .clone()
171
170
  .quantile(quantile.inner.clone(), interpolation.0)
@@ -272,6 +271,7 @@ impl RbExpr {
272
271
  nulls_last,
273
272
  multithreaded: true,
274
273
  maintain_order: false,
274
+ limit: None,
275
275
  })
276
276
  .into()
277
277
  }
@@ -284,6 +284,7 @@ impl RbExpr {
284
284
  nulls_last,
285
285
  multithreaded: true,
286
286
  maintain_order: false,
287
+ limit: None,
287
288
  })
288
289
  .into()
289
290
  }
@@ -364,6 +365,7 @@ impl RbExpr {
364
365
  nulls_last,
365
366
  multithreaded,
366
367
  maintain_order,
368
+ limit: None,
367
369
  },
368
370
  )
369
371
  .into())
@@ -463,14 +465,7 @@ impl RbExpr {
463
465
  }
464
466
 
465
467
  pub fn gather_every(&self, n: usize, offset: usize) -> Self {
466
- self.clone()
467
- .inner
468
- .map(
469
- move |s: Series| Ok(Some(s.gather_every(n, offset))),
470
- GetOutput::same_type(),
471
- )
472
- .with_fmt("gather_every")
473
- .into()
468
+ self.inner.clone().gather_every(n, offset).into()
474
469
  }
475
470
 
476
471
  pub fn tail(&self, n: Option<usize>) -> Self {
@@ -644,8 +639,16 @@ impl RbExpr {
644
639
  output_type: Option<Wrap<DataType>>,
645
640
  agg_list: bool,
646
641
  is_elementwise: bool,
642
+ returns_scalar: bool,
647
643
  ) -> Self {
648
- map_single(self, lambda, output_type, agg_list, is_elementwise)
644
+ map_single(
645
+ self,
646
+ lambda,
647
+ output_type,
648
+ agg_list,
649
+ is_elementwise,
650
+ returns_scalar,
651
+ )
649
652
  }
650
653
 
651
654
  pub fn dot(&self, other: &Self) -> Self {
@@ -653,16 +656,7 @@ impl RbExpr {
653
656
  }
654
657
 
655
658
  pub fn reinterpret(&self, signed: bool) -> Self {
656
- let function = move |s: Series| reinterpret(&s, signed).map(Some);
657
- let dt = if signed {
658
- DataType::Int64
659
- } else {
660
- DataType::UInt64
661
- };
662
- self.clone()
663
- .inner
664
- .map(function, GetOutput::from_type(dt))
665
- .into()
659
+ self.inner.clone().reinterpret(signed).into()
666
660
  }
667
661
 
668
662
  pub fn mode(&self) -> Self {
@@ -717,7 +711,7 @@ impl RbExpr {
717
711
  }
718
712
 
719
713
  pub fn reshape(&self, dims: Vec<i64>) -> Self {
720
- self.inner.clone().reshape(&dims, NestedType::Array).into()
714
+ self.inner.clone().reshape(&dims).into()
721
715
  }
722
716
 
723
717
  pub fn cum_count(&self, reverse: bool) -> Self {
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, value::Opaque, Ruby, Value};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -179,23 +179,30 @@ impl RbExpr {
179
179
  pub fn list_to_struct(
180
180
  &self,
181
181
  width_strat: Wrap<ListToStructWidthStrategy>,
182
- _name_gen: Option<Value>,
182
+ name_gen: Option<Value>,
183
183
  upper_bound: usize,
184
184
  ) -> RbResult<Self> {
185
- // TODO fix
186
- let name_gen = None;
187
- // let name_gen = name_gen.map(|lambda| {
188
- // Arc::new(move |idx: usize| {
189
- // let out: Value = lambda.funcall("call", (idx,)).unwrap();
190
- // String::try_convert(out).unwrap()
191
- // }) as NameGenerator
192
- // });
185
+ let name_gen = name_gen.map(|lambda| {
186
+ let lambda = Opaque::from(lambda);
187
+ Arc::new(move |idx: usize| {
188
+ let lambda = Ruby::get().unwrap().get_inner(lambda);
189
+ let out: String = lambda.funcall("call", (idx,)).unwrap();
190
+ PlSmallStr::from_string(out)
191
+ });
192
+
193
+ // non-Ruby thread
194
+ todo!();
195
+ });
193
196
 
194
197
  Ok(self
195
198
  .inner
196
199
  .clone()
197
200
  .list()
198
- .to_struct(width_strat.0, name_gen, upper_bound)
201
+ .to_struct(ListToStructArgs::InferWidth {
202
+ infer_field_strategy: width_strat.0,
203
+ get_index_name: name_gen,
204
+ max_fields: upper_bound,
205
+ })
199
206
  .into())
200
207
  }
201
208
 
@@ -84,13 +84,17 @@ impl RbExpr {
84
84
  self.inner.clone().meta()._into_selector().into()
85
85
  }
86
86
 
87
- pub fn meta_tree_format(&self) -> RbResult<String> {
87
+ fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
88
88
  let e = self
89
89
  .inner
90
90
  .clone()
91
91
  .meta()
92
- .into_tree_formatter()
92
+ .into_tree_formatter(display_as_dot)
93
93
  .map_err(RbPolarsErr::from)?;
94
94
  Ok(format!("{e}"))
95
95
  }
96
+
97
+ pub fn meta_tree_format(&self) -> RbResult<String> {
98
+ self.compute_tree_format(false)
99
+ }
96
100
  }
@@ -1,5 +1,4 @@
1
1
  use polars::prelude::*;
2
- use std::any::Any;
3
2
 
4
3
  use crate::conversion::Wrap;
5
4
  use crate::RbExpr;
@@ -169,7 +168,7 @@ impl RbExpr {
169
168
  weights,
170
169
  min_periods,
171
170
  center,
172
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
171
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
173
172
  };
174
173
 
175
174
  self.inner.clone().rolling_std(options).into()
@@ -187,7 +186,7 @@ impl RbExpr {
187
186
  window_size: Duration::parse(&window_size),
188
187
  min_periods,
189
188
  closed_window: closed.0,
190
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
189
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
191
190
  };
192
191
 
193
192
  self.inner
@@ -210,7 +209,7 @@ impl RbExpr {
210
209
  weights,
211
210
  min_periods,
212
211
  center,
213
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
212
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
214
213
  };
215
214
 
216
215
  self.inner.clone().rolling_var(options).into()
@@ -228,7 +227,7 @@ impl RbExpr {
228
227
  window_size: Duration::parse(&window_size),
229
228
  min_periods,
230
229
  closed_window: closed.0,
231
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
230
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
232
231
  };
233
232
 
234
233
  self.inner
@@ -277,7 +276,7 @@ impl RbExpr {
277
276
  pub fn rolling_quantile(
278
277
  &self,
279
278
  quantile: f64,
280
- interpolation: Wrap<QuantileInterpolOptions>,
279
+ interpolation: Wrap<QuantileMethod>,
281
280
  window_size: usize,
282
281
  weights: Option<Vec<f64>>,
283
282
  min_periods: Option<usize>,
@@ -302,7 +301,7 @@ impl RbExpr {
302
301
  &self,
303
302
  by: &RbExpr,
304
303
  quantile: f64,
305
- interpolation: Wrap<QuantileInterpolOptions>,
304
+ interpolation: Wrap<QuantileMethod>,
306
305
  window_size: String,
307
306
  min_periods: usize,
308
307
  closed: Wrap<ClosedWindow>,
@@ -130,6 +130,11 @@ impl RbExpr {
130
130
  self.inner.clone().str().to_lowercase().into()
131
131
  }
132
132
 
133
+ // requires nightly
134
+ // pub fn str_to_titlecase(&self) -> Self {
135
+ // self.inner.clone().str().to_titlecase().into()
136
+ // }
137
+
133
138
  pub fn str_len_bytes(&self) -> Self {
134
139
  self.inner.clone().str().len_bytes().into()
135
140
  }
@@ -200,51 +205,19 @@ impl RbExpr {
200
205
  }
201
206
 
202
207
  pub fn str_hex_encode(&self) -> Self {
203
- self.clone()
204
- .inner
205
- .map(
206
- move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
207
- GetOutput::same_type(),
208
- )
209
- .with_fmt("str.hex_encode")
210
- .into()
208
+ self.inner.clone().str().hex_encode().into()
211
209
  }
212
210
 
213
211
  pub fn str_hex_decode(&self, strict: bool) -> Self {
214
- self.clone()
215
- .inner
216
- .map(
217
- move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
218
- GetOutput::same_type(),
219
- )
220
- .with_fmt("str.hex_decode")
221
- .into()
212
+ self.inner.clone().str().hex_decode(strict).into()
222
213
  }
223
214
 
224
215
  pub fn str_base64_encode(&self) -> Self {
225
- self.clone()
226
- .inner
227
- .map(
228
- move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
229
- GetOutput::same_type(),
230
- )
231
- .with_fmt("str.base64_encode")
232
- .into()
216
+ self.inner.clone().str().base64_encode().into()
233
217
  }
234
218
 
235
219
  pub fn str_base64_decode(&self, strict: bool) -> Self {
236
- self.clone()
237
- .inner
238
- .map(
239
- move |s| {
240
- s.str()?
241
- .base64_decode(strict)
242
- .map(|s| Some(s.into_series()))
243
- },
244
- GetOutput::same_type(),
245
- )
246
- .with_fmt("str.base64_decode")
247
- .into()
220
+ self.inner.clone().str().base64_decode(strict).into()
248
221
  }
249
222
 
250
223
  pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
@@ -1,10 +1,12 @@
1
1
  use std::fs::File;
2
2
  use std::io;
3
- use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
3
+ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
6
  use magnus::{exception, prelude::*, Error, RString, Value};
7
+ use polars::io::cloud::CloudOptions;
7
8
  use polars::io::mmap::MmapBytesReader;
9
+ use polars_utils::mmap::MemSlice;
8
10
 
9
11
  use crate::error::RbPolarsErr;
10
12
  use crate::prelude::resolve_homedir;
@@ -24,9 +26,8 @@ impl RbFileLikeObject {
24
26
  RbFileLikeObject { inner: object }
25
27
  }
26
28
 
27
- pub fn as_buffer(&self) -> std::io::Cursor<Vec<u8>> {
28
- let data = self.as_file_buffer().into_inner();
29
- std::io::Cursor::new(data)
29
+ pub fn as_bytes(&self) -> bytes::Bytes {
30
+ self.as_file_buffer().into_inner().into()
30
31
  }
31
32
 
32
33
  pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
@@ -132,7 +133,42 @@ impl FileLike for RbFileLikeObject {}
132
133
 
133
134
  pub enum EitherRustRubyFile {
134
135
  Rb(RbFileLikeObject),
135
- Rust(BufReader<File>),
136
+ Rust(File),
137
+ }
138
+
139
+ impl EitherRustRubyFile {
140
+ pub fn into_dyn(self) -> Box<dyn FileLike> {
141
+ match self {
142
+ EitherRustRubyFile::Rb(f) => Box::new(f),
143
+ EitherRustRubyFile::Rust(f) => Box::new(f),
144
+ }
145
+ }
146
+
147
+ pub fn into_dyn_writeable(self) -> Box<dyn Write> {
148
+ match self {
149
+ EitherRustRubyFile::Rb(f) => Box::new(f),
150
+ EitherRustRubyFile::Rust(f) => Box::new(f),
151
+ }
152
+ }
153
+ }
154
+
155
+ pub enum RubyScanSourceInput {
156
+ Buffer(MemSlice),
157
+ Path(PathBuf),
158
+ #[allow(dead_code)]
159
+ File(File),
160
+ }
161
+
162
+ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
163
+ if let Ok(file_path) = PathBuf::try_convert(rb_f) {
164
+ // TODO resolve_homedir
165
+ Ok(RubyScanSourceInput::Path(file_path))
166
+ } else {
167
+ let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
168
+ Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
169
+ f.as_bytes(),
170
+ )))
171
+ }
136
172
  }
137
173
 
138
174
  ///
@@ -142,14 +178,13 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
142
178
  if let Ok(rstring) = RString::try_convert(rb_f) {
143
179
  let s = unsafe { rstring.as_str() }?;
144
180
  let file_path = std::path::Path::new(&s);
145
- let file_path = resolve_homedir(file_path);
181
+ let file_path = resolve_homedir(&file_path);
146
182
  let f = if truncate {
147
- File::create(file_path).map_err(RbPolarsErr::io)?
183
+ File::create(file_path).map_err(RbPolarsErr::from)?
148
184
  } else {
149
185
  polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
150
186
  };
151
- let reader = BufReader::new(f);
152
- Ok(EitherRustRubyFile::Rust(reader))
187
+ Ok(EitherRustRubyFile::Rust(f))
153
188
  } else {
154
189
  let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
155
190
  Ok(EitherRustRubyFile::Rb(f))
@@ -157,21 +192,41 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
157
192
  }
158
193
 
159
194
  pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
160
- use EitherRustRubyFile::*;
161
- match get_either_file(f, truncate)? {
162
- Rb(f) => Ok(Box::new(f)),
163
- Rust(f) => Ok(Box::new(f.into_inner())),
164
- }
195
+ Ok(get_either_file(f, truncate)?.into_dyn())
165
196
  }
166
197
 
167
- pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
168
- if let Ok(bytes) = rb_f.funcall::<_, _, RString>("read", ()) {
169
- let bytes = unsafe { bytes.as_slice() };
170
- // TODO avoid copy
171
- Ok(Box::new(Cursor::new(bytes.to_vec())))
172
- } else {
173
- let p = PathBuf::try_convert(rb_f)?;
174
- let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
175
- Ok(Box::new(f))
198
+ pub enum RbReadBytes {
199
+ Bytes(RString),
200
+ Other(Value),
201
+ }
202
+
203
+ pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
204
+ rb_f.funcall("read", ())
205
+ .map(RbReadBytes::Bytes)
206
+ .unwrap_or(RbReadBytes::Other(rb_f))
207
+ }
208
+
209
+ pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
210
+ get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
211
+ }
212
+
213
+ pub fn get_mmap_bytes_reader_and_path<'a>(
214
+ rb_f: &'a RbReadBytes,
215
+ ) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
216
+ match rb_f {
217
+ RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
218
+ RbReadBytes::Other(v) => {
219
+ let path = PathBuf::try_convert(*v)?;
220
+ let f = File::open(&path)
221
+ .map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
222
+ Ok((Box::new(f), Some(path)))
223
+ }
176
224
  }
177
225
  }
226
+
227
+ pub fn try_get_writeable(
228
+ rb_f: Value,
229
+ _cloud_options: Option<&CloudOptions>,
230
+ ) -> RbResult<Box<dyn Write>> {
231
+ Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
232
+ }
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
28
28
  Ok(e.into())
29
29
  }
30
30
 
31
- pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
31
+ pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
32
32
  let exprs = rb_exprs_to_exprs(exprs)?;
33
- let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
33
+ let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
34
34
  Ok(e.into())
35
35
  }
36
36
 
37
- pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
37
+ pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
38
38
  let exprs = rb_exprs_to_exprs(exprs)?;
39
- let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
39
+ let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
40
40
  Ok(e.into())
41
41
  }
@@ -0,0 +1,15 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ // TODO add to Ruby
6
+ pub fn business_day_count(
7
+ start: &RbExpr,
8
+ end: &RbExpr,
9
+ week_mask: [bool; 7],
10
+ holidays: Vec<i32>,
11
+ ) -> RbExpr {
12
+ let start = start.inner.clone();
13
+ let end = end.inner.clone();
14
+ dsl::business_day_count(start, end, week_mask, holidays).into()
15
+ }
@@ -1,34 +1,55 @@
1
+ use std::io::BufReader;
2
+
1
3
  use magnus::{RHash, Value};
4
+ use polars::prelude::ArrowSchema;
5
+ use polars_core::datatypes::create_enum_dtype;
6
+ use polars_core::export::arrow::array::Utf8ViewArray;
2
7
 
3
8
  use crate::conversion::Wrap;
4
- use crate::file::get_file_like;
5
- use crate::prelude::DataType;
9
+ use crate::file::{get_either_file, EitherRustRubyFile};
10
+ use crate::prelude::ArrowDataType;
6
11
  use crate::{RbPolarsErr, RbResult};
7
12
 
8
13
  pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
9
14
  use polars_core::export::arrow::io::ipc::read::read_file_metadata;
10
- let mut r = get_file_like(rb_f, false)?;
11
- let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
15
+ let metadata = match get_either_file(rb_f, false)? {
16
+ EitherRustRubyFile::Rust(r) => {
17
+ read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
18
+ }
19
+ EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
20
+ };
12
21
 
13
22
  let dict = RHash::new();
14
- for field in metadata.schema.iter_values() {
15
- let dt: Wrap<DataType> = Wrap((&field.dtype).into());
16
- dict.aset(field.name.as_str(), dt)?;
17
- }
23
+ fields_to_rbdict(&metadata.schema, &dict)?;
18
24
  Ok(dict)
19
25
  }
20
26
 
21
27
  pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
22
28
  use polars_parquet::read::{infer_schema, read_metadata};
23
29
 
24
- let mut r = get_file_like(rb_f, false)?;
25
- let metadata = read_metadata(&mut r).map_err(RbPolarsErr::from)?;
30
+ let metadata = match get_either_file(rb_f, false)? {
31
+ EitherRustRubyFile::Rust(r) => {
32
+ read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
33
+ }
34
+ EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
35
+ };
26
36
  let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
27
37
 
28
38
  let dict = RHash::new();
29
- for field in arrow_schema.iter_values() {
30
- let dt: Wrap<DataType> = Wrap((&field.dtype).into());
39
+ fields_to_rbdict(&arrow_schema, &dict)?;
40
+ Ok(dict)
41
+ }
42
+
43
+ fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
44
+ for field in schema.iter_values() {
45
+ let dt = if field.is_enum() {
46
+ Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
47
+ ArrowDataType::Utf8View,
48
+ )))
49
+ } else {
50
+ Wrap(polars::prelude::DataType::from_arrow_field(field))
51
+ };
31
52
  dict.aset(field.name.as_str(), dt)?;
32
53
  }
33
- Ok(dict)
54
+ Ok(())
34
55
  }
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
70
70
  nulls_last,
71
71
  multithreaded,
72
72
  maintain_order,
73
+ limit: None,
73
74
  },
74
75
  )
75
76
  .into())
@@ -94,10 +95,7 @@ pub fn col(name: String) -> RbExpr {
94
95
  }
95
96
 
96
97
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
97
- let lfs = lfs
98
- .into_iter()
99
- .map(<&RbLazyFrame>::try_convert)
100
- .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
98
+ let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
101
99
 
102
100
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
103
101
  let df = lf.ldf.borrow().clone().collect().unwrap();
@@ -173,8 +171,14 @@ pub fn cum_fold(
173
171
  let exprs = rb_exprs_to_exprs(exprs)?;
174
172
  let lambda = Opaque::from(lambda);
175
173
 
176
- let func =
177
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
174
+ let func = move |a: Column, b: Column| {
175
+ binary_lambda(
176
+ Ruby::get().unwrap().get_inner(lambda),
177
+ a.take_materialized_series(),
178
+ b.take_materialized_series(),
179
+ )
180
+ .map(|v| v.map(Column::from))
181
+ };
178
182
  Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
179
183
  }
180
184
 
@@ -263,8 +267,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
263
267
  let exprs = rb_exprs_to_exprs(exprs)?;
264
268
  let lambda = Opaque::from(lambda);
265
269
 
266
- let func =
267
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
270
+ let func = move |a: Column, b: Column| {
271
+ binary_lambda(
272
+ Ruby::get().unwrap().get_inner(lambda),
273
+ a.take_materialized_series(),
274
+ b.take_materialized_series(),
275
+ )
276
+ .map(|v| v.map(Column::from))
277
+ };
268
278
  Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
269
279
  }
270
280
 
@@ -311,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
311
321
  }
312
322
  }
313
323
 
314
- pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
315
- dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
324
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
325
+ dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
316
326
  }
317
327
 
318
328
  pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
@@ -336,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
336
346
  Ok(dsl::repeat(value, n).into())
337
347
  }
338
348
 
339
- pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
340
- dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans).into()
349
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
350
+ dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
341
351
  }
342
352
 
343
353
  pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
@@ -11,7 +11,7 @@ pub fn get_index_type() -> Value {
11
11
  Wrap(IDX_DTYPE).into_value()
12
12
  }
13
13
 
14
- pub fn threadpool_size() -> usize {
14
+ pub fn thread_pool_size() -> usize {
15
15
  POOL.current_num_threads()
16
16
  }
17
17
 
@@ -1,4 +1,5 @@
1
1
  pub mod aggregation;
2
+ pub mod business;
2
3
  pub mod eager;
3
4
  pub mod io;
4
5
  pub mod lazy;
@@ -0,0 +1 @@
1
+ pub mod to_ruby;