polars-df 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
9
9
  use crate::conversion::{parse_fill_null_strategy, Wrap};
10
10
  use crate::map::lazy::map_single;
11
11
  use crate::rb_exprs_to_exprs;
12
- use crate::utils::reinterpret;
13
12
  use crate::{RbExpr, RbResult};
14
13
 
15
14
  impl RbExpr {
@@ -165,7 +164,7 @@ impl RbExpr {
165
164
  self.inner.clone().implode().into()
166
165
  }
167
166
 
168
- pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileInterpolOptions>) -> Self {
167
+ pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
169
168
  self.inner
170
169
  .clone()
171
170
  .quantile(quantile.inner.clone(), interpolation.0)
@@ -272,6 +271,7 @@ impl RbExpr {
272
271
  nulls_last,
273
272
  multithreaded: true,
274
273
  maintain_order: false,
274
+ limit: None,
275
275
  })
276
276
  .into()
277
277
  }
@@ -284,6 +284,7 @@ impl RbExpr {
284
284
  nulls_last,
285
285
  multithreaded: true,
286
286
  maintain_order: false,
287
+ limit: None,
287
288
  })
288
289
  .into()
289
290
  }
@@ -364,6 +365,7 @@ impl RbExpr {
364
365
  nulls_last,
365
366
  multithreaded,
366
367
  maintain_order,
368
+ limit: None,
367
369
  },
368
370
  )
369
371
  .into())
@@ -463,14 +465,7 @@ impl RbExpr {
463
465
  }
464
466
 
465
467
  pub fn gather_every(&self, n: usize, offset: usize) -> Self {
466
- self.clone()
467
- .inner
468
- .map(
469
- move |s: Series| Ok(Some(s.gather_every(n, offset))),
470
- GetOutput::same_type(),
471
- )
472
- .with_fmt("gather_every")
473
- .into()
468
+ self.inner.clone().gather_every(n, offset).into()
474
469
  }
475
470
 
476
471
  pub fn tail(&self, n: Option<usize>) -> Self {
@@ -644,8 +639,16 @@ impl RbExpr {
644
639
  output_type: Option<Wrap<DataType>>,
645
640
  agg_list: bool,
646
641
  is_elementwise: bool,
642
+ returns_scalar: bool,
647
643
  ) -> Self {
648
- map_single(self, lambda, output_type, agg_list, is_elementwise)
644
+ map_single(
645
+ self,
646
+ lambda,
647
+ output_type,
648
+ agg_list,
649
+ is_elementwise,
650
+ returns_scalar,
651
+ )
649
652
  }
650
653
 
651
654
  pub fn dot(&self, other: &Self) -> Self {
@@ -653,16 +656,7 @@ impl RbExpr {
653
656
  }
654
657
 
655
658
  pub fn reinterpret(&self, signed: bool) -> Self {
656
- let function = move |s: Series| reinterpret(&s, signed).map(Some);
657
- let dt = if signed {
658
- DataType::Int64
659
- } else {
660
- DataType::UInt64
661
- };
662
- self.clone()
663
- .inner
664
- .map(function, GetOutput::from_type(dt))
665
- .into()
659
+ self.inner.clone().reinterpret(signed).into()
666
660
  }
667
661
 
668
662
  pub fn mode(&self) -> Self {
@@ -717,7 +711,7 @@ impl RbExpr {
717
711
  }
718
712
 
719
713
  pub fn reshape(&self, dims: Vec<i64>) -> Self {
720
- self.inner.clone().reshape(&dims, NestedType::Array).into()
714
+ self.inner.clone().reshape(&dims).into()
721
715
  }
722
716
 
723
717
  pub fn cum_count(&self, reverse: bool) -> Self {
@@ -1,4 +1,4 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, value::Opaque, Ruby, Value};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -179,23 +179,30 @@ impl RbExpr {
179
179
  pub fn list_to_struct(
180
180
  &self,
181
181
  width_strat: Wrap<ListToStructWidthStrategy>,
182
- _name_gen: Option<Value>,
182
+ name_gen: Option<Value>,
183
183
  upper_bound: usize,
184
184
  ) -> RbResult<Self> {
185
- // TODO fix
186
- let name_gen = None;
187
- // let name_gen = name_gen.map(|lambda| {
188
- // Arc::new(move |idx: usize| {
189
- // let out: Value = lambda.funcall("call", (idx,)).unwrap();
190
- // String::try_convert(out).unwrap()
191
- // }) as NameGenerator
192
- // });
185
+ let name_gen = name_gen.map(|lambda| {
186
+ let lambda = Opaque::from(lambda);
187
+ Arc::new(move |idx: usize| {
188
+ let lambda = Ruby::get().unwrap().get_inner(lambda);
189
+ let out: String = lambda.funcall("call", (idx,)).unwrap();
190
+ PlSmallStr::from_string(out)
191
+ });
192
+
193
+ // non-Ruby thread
194
+ todo!();
195
+ });
193
196
 
194
197
  Ok(self
195
198
  .inner
196
199
  .clone()
197
200
  .list()
198
- .to_struct(width_strat.0, name_gen, upper_bound)
201
+ .to_struct(ListToStructArgs::InferWidth {
202
+ infer_field_strategy: width_strat.0,
203
+ get_index_name: name_gen,
204
+ max_fields: upper_bound,
205
+ })
199
206
  .into())
200
207
  }
201
208
 
@@ -84,13 +84,17 @@ impl RbExpr {
84
84
  self.inner.clone().meta()._into_selector().into()
85
85
  }
86
86
 
87
- pub fn meta_tree_format(&self) -> RbResult<String> {
87
+ fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
88
88
  let e = self
89
89
  .inner
90
90
  .clone()
91
91
  .meta()
92
- .into_tree_formatter()
92
+ .into_tree_formatter(display_as_dot)
93
93
  .map_err(RbPolarsErr::from)?;
94
94
  Ok(format!("{e}"))
95
95
  }
96
+
97
+ pub fn meta_tree_format(&self) -> RbResult<String> {
98
+ self.compute_tree_format(false)
99
+ }
96
100
  }
@@ -1,5 +1,4 @@
1
1
  use polars::prelude::*;
2
- use std::any::Any;
3
2
 
4
3
  use crate::conversion::Wrap;
5
4
  use crate::RbExpr;
@@ -169,7 +168,7 @@ impl RbExpr {
169
168
  weights,
170
169
  min_periods,
171
170
  center,
172
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
171
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
173
172
  };
174
173
 
175
174
  self.inner.clone().rolling_std(options).into()
@@ -187,7 +186,7 @@ impl RbExpr {
187
186
  window_size: Duration::parse(&window_size),
188
187
  min_periods,
189
188
  closed_window: closed.0,
190
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
189
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
191
190
  };
192
191
 
193
192
  self.inner
@@ -210,7 +209,7 @@ impl RbExpr {
210
209
  weights,
211
210
  min_periods,
212
211
  center,
213
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
212
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
214
213
  };
215
214
 
216
215
  self.inner.clone().rolling_var(options).into()
@@ -228,7 +227,7 @@ impl RbExpr {
228
227
  window_size: Duration::parse(&window_size),
229
228
  min_periods,
230
229
  closed_window: closed.0,
231
- fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
230
+ fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
232
231
  };
233
232
 
234
233
  self.inner
@@ -277,7 +276,7 @@ impl RbExpr {
277
276
  pub fn rolling_quantile(
278
277
  &self,
279
278
  quantile: f64,
280
- interpolation: Wrap<QuantileInterpolOptions>,
279
+ interpolation: Wrap<QuantileMethod>,
281
280
  window_size: usize,
282
281
  weights: Option<Vec<f64>>,
283
282
  min_periods: Option<usize>,
@@ -302,7 +301,7 @@ impl RbExpr {
302
301
  &self,
303
302
  by: &RbExpr,
304
303
  quantile: f64,
305
- interpolation: Wrap<QuantileInterpolOptions>,
304
+ interpolation: Wrap<QuantileMethod>,
306
305
  window_size: String,
307
306
  min_periods: usize,
308
307
  closed: Wrap<ClosedWindow>,
@@ -130,6 +130,11 @@ impl RbExpr {
130
130
  self.inner.clone().str().to_lowercase().into()
131
131
  }
132
132
 
133
+ // requires nightly
134
+ // pub fn str_to_titlecase(&self) -> Self {
135
+ // self.inner.clone().str().to_titlecase().into()
136
+ // }
137
+
133
138
  pub fn str_len_bytes(&self) -> Self {
134
139
  self.inner.clone().str().len_bytes().into()
135
140
  }
@@ -200,51 +205,19 @@ impl RbExpr {
200
205
  }
201
206
 
202
207
  pub fn str_hex_encode(&self) -> Self {
203
- self.clone()
204
- .inner
205
- .map(
206
- move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
207
- GetOutput::same_type(),
208
- )
209
- .with_fmt("str.hex_encode")
210
- .into()
208
+ self.inner.clone().str().hex_encode().into()
211
209
  }
212
210
 
213
211
  pub fn str_hex_decode(&self, strict: bool) -> Self {
214
- self.clone()
215
- .inner
216
- .map(
217
- move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
218
- GetOutput::same_type(),
219
- )
220
- .with_fmt("str.hex_decode")
221
- .into()
212
+ self.inner.clone().str().hex_decode(strict).into()
222
213
  }
223
214
 
224
215
  pub fn str_base64_encode(&self) -> Self {
225
- self.clone()
226
- .inner
227
- .map(
228
- move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
229
- GetOutput::same_type(),
230
- )
231
- .with_fmt("str.base64_encode")
232
- .into()
216
+ self.inner.clone().str().base64_encode().into()
233
217
  }
234
218
 
235
219
  pub fn str_base64_decode(&self, strict: bool) -> Self {
236
- self.clone()
237
- .inner
238
- .map(
239
- move |s| {
240
- s.str()?
241
- .base64_decode(strict)
242
- .map(|s| Some(s.into_series()))
243
- },
244
- GetOutput::same_type(),
245
- )
246
- .with_fmt("str.base64_decode")
247
- .into()
220
+ self.inner.clone().str().base64_decode(strict).into()
248
221
  }
249
222
 
250
223
  pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
@@ -1,10 +1,12 @@
1
1
  use std::fs::File;
2
2
  use std::io;
3
- use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
3
+ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
6
  use magnus::{exception, prelude::*, Error, RString, Value};
7
+ use polars::io::cloud::CloudOptions;
7
8
  use polars::io::mmap::MmapBytesReader;
9
+ use polars_utils::mmap::MemSlice;
8
10
 
9
11
  use crate::error::RbPolarsErr;
10
12
  use crate::prelude::resolve_homedir;
@@ -24,9 +26,8 @@ impl RbFileLikeObject {
24
26
  RbFileLikeObject { inner: object }
25
27
  }
26
28
 
27
- pub fn as_buffer(&self) -> std::io::Cursor<Vec<u8>> {
28
- let data = self.as_file_buffer().into_inner();
29
- std::io::Cursor::new(data)
29
+ pub fn as_bytes(&self) -> bytes::Bytes {
30
+ self.as_file_buffer().into_inner().into()
30
31
  }
31
32
 
32
33
  pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
@@ -132,7 +133,42 @@ impl FileLike for RbFileLikeObject {}
132
133
 
133
134
  pub enum EitherRustRubyFile {
134
135
  Rb(RbFileLikeObject),
135
- Rust(BufReader<File>),
136
+ Rust(File),
137
+ }
138
+
139
+ impl EitherRustRubyFile {
140
+ pub fn into_dyn(self) -> Box<dyn FileLike> {
141
+ match self {
142
+ EitherRustRubyFile::Rb(f) => Box::new(f),
143
+ EitherRustRubyFile::Rust(f) => Box::new(f),
144
+ }
145
+ }
146
+
147
+ pub fn into_dyn_writeable(self) -> Box<dyn Write> {
148
+ match self {
149
+ EitherRustRubyFile::Rb(f) => Box::new(f),
150
+ EitherRustRubyFile::Rust(f) => Box::new(f),
151
+ }
152
+ }
153
+ }
154
+
155
+ pub enum RubyScanSourceInput {
156
+ Buffer(MemSlice),
157
+ Path(PathBuf),
158
+ #[allow(dead_code)]
159
+ File(File),
160
+ }
161
+
162
+ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
163
+ if let Ok(file_path) = PathBuf::try_convert(rb_f) {
164
+ // TODO resolve_homedir
165
+ Ok(RubyScanSourceInput::Path(file_path))
166
+ } else {
167
+ let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
168
+ Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
169
+ f.as_bytes(),
170
+ )))
171
+ }
136
172
  }
137
173
 
138
174
  ///
@@ -142,14 +178,13 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
142
178
  if let Ok(rstring) = RString::try_convert(rb_f) {
143
179
  let s = unsafe { rstring.as_str() }?;
144
180
  let file_path = std::path::Path::new(&s);
145
- let file_path = resolve_homedir(file_path);
181
+ let file_path = resolve_homedir(&file_path);
146
182
  let f = if truncate {
147
- File::create(file_path).map_err(RbPolarsErr::io)?
183
+ File::create(file_path).map_err(RbPolarsErr::from)?
148
184
  } else {
149
185
  polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
150
186
  };
151
- let reader = BufReader::new(f);
152
- Ok(EitherRustRubyFile::Rust(reader))
187
+ Ok(EitherRustRubyFile::Rust(f))
153
188
  } else {
154
189
  let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
155
190
  Ok(EitherRustRubyFile::Rb(f))
@@ -157,21 +192,41 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
157
192
  }
158
193
 
159
194
  pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
160
- use EitherRustRubyFile::*;
161
- match get_either_file(f, truncate)? {
162
- Rb(f) => Ok(Box::new(f)),
163
- Rust(f) => Ok(Box::new(f.into_inner())),
164
- }
195
+ Ok(get_either_file(f, truncate)?.into_dyn())
165
196
  }
166
197
 
167
- pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
168
- if let Ok(bytes) = rb_f.funcall::<_, _, RString>("read", ()) {
169
- let bytes = unsafe { bytes.as_slice() };
170
- // TODO avoid copy
171
- Ok(Box::new(Cursor::new(bytes.to_vec())))
172
- } else {
173
- let p = PathBuf::try_convert(rb_f)?;
174
- let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
175
- Ok(Box::new(f))
198
+ pub enum RbReadBytes {
199
+ Bytes(RString),
200
+ Other(Value),
201
+ }
202
+
203
+ pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
204
+ rb_f.funcall("read", ())
205
+ .map(RbReadBytes::Bytes)
206
+ .unwrap_or(RbReadBytes::Other(rb_f))
207
+ }
208
+
209
+ pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
210
+ get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
211
+ }
212
+
213
+ pub fn get_mmap_bytes_reader_and_path<'a>(
214
+ rb_f: &'a RbReadBytes,
215
+ ) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
216
+ match rb_f {
217
+ RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
218
+ RbReadBytes::Other(v) => {
219
+ let path = PathBuf::try_convert(*v)?;
220
+ let f = File::open(&path)
221
+ .map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
222
+ Ok((Box::new(f), Some(path)))
223
+ }
176
224
  }
177
225
  }
226
+
227
+ pub fn try_get_writeable(
228
+ rb_f: Value,
229
+ _cloud_options: Option<&CloudOptions>,
230
+ ) -> RbResult<Box<dyn Write>> {
231
+ Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
232
+ }
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
28
28
  Ok(e.into())
29
29
  }
30
30
 
31
- pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
31
+ pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
32
32
  let exprs = rb_exprs_to_exprs(exprs)?;
33
- let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
33
+ let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
34
34
  Ok(e.into())
35
35
  }
36
36
 
37
- pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
37
+ pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
38
38
  let exprs = rb_exprs_to_exprs(exprs)?;
39
- let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
39
+ let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
40
40
  Ok(e.into())
41
41
  }
@@ -0,0 +1,15 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ // TODO add to Ruby
6
+ pub fn business_day_count(
7
+ start: &RbExpr,
8
+ end: &RbExpr,
9
+ week_mask: [bool; 7],
10
+ holidays: Vec<i32>,
11
+ ) -> RbExpr {
12
+ let start = start.inner.clone();
13
+ let end = end.inner.clone();
14
+ dsl::business_day_count(start, end, week_mask, holidays).into()
15
+ }
@@ -1,34 +1,55 @@
1
+ use std::io::BufReader;
2
+
1
3
  use magnus::{RHash, Value};
4
+ use polars::prelude::ArrowSchema;
5
+ use polars_core::datatypes::create_enum_dtype;
6
+ use polars_core::export::arrow::array::Utf8ViewArray;
2
7
 
3
8
  use crate::conversion::Wrap;
4
- use crate::file::get_file_like;
5
- use crate::prelude::DataType;
9
+ use crate::file::{get_either_file, EitherRustRubyFile};
10
+ use crate::prelude::ArrowDataType;
6
11
  use crate::{RbPolarsErr, RbResult};
7
12
 
8
13
  pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
9
14
  use polars_core::export::arrow::io::ipc::read::read_file_metadata;
10
- let mut r = get_file_like(rb_f, false)?;
11
- let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
15
+ let metadata = match get_either_file(rb_f, false)? {
16
+ EitherRustRubyFile::Rust(r) => {
17
+ read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
18
+ }
19
+ EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
20
+ };
12
21
 
13
22
  let dict = RHash::new();
14
- for field in metadata.schema.iter_values() {
15
- let dt: Wrap<DataType> = Wrap((&field.dtype).into());
16
- dict.aset(field.name.as_str(), dt)?;
17
- }
23
+ fields_to_rbdict(&metadata.schema, &dict)?;
18
24
  Ok(dict)
19
25
  }
20
26
 
21
27
  pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
22
28
  use polars_parquet::read::{infer_schema, read_metadata};
23
29
 
24
- let mut r = get_file_like(rb_f, false)?;
25
- let metadata = read_metadata(&mut r).map_err(RbPolarsErr::from)?;
30
+ let metadata = match get_either_file(rb_f, false)? {
31
+ EitherRustRubyFile::Rust(r) => {
32
+ read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
33
+ }
34
+ EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
35
+ };
26
36
  let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
27
37
 
28
38
  let dict = RHash::new();
29
- for field in arrow_schema.iter_values() {
30
- let dt: Wrap<DataType> = Wrap((&field.dtype).into());
39
+ fields_to_rbdict(&arrow_schema, &dict)?;
40
+ Ok(dict)
41
+ }
42
+
43
+ fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
44
+ for field in schema.iter_values() {
45
+ let dt = if field.is_enum() {
46
+ Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
47
+ ArrowDataType::Utf8View,
48
+ )))
49
+ } else {
50
+ Wrap(polars::prelude::DataType::from_arrow_field(field))
51
+ };
31
52
  dict.aset(field.name.as_str(), dt)?;
32
53
  }
33
- Ok(dict)
54
+ Ok(())
34
55
  }
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
70
70
  nulls_last,
71
71
  multithreaded,
72
72
  maintain_order,
73
+ limit: None,
73
74
  },
74
75
  )
75
76
  .into())
@@ -94,10 +95,7 @@ pub fn col(name: String) -> RbExpr {
94
95
  }
95
96
 
96
97
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
97
- let lfs = lfs
98
- .into_iter()
99
- .map(<&RbLazyFrame>::try_convert)
100
- .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
98
+ let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
101
99
 
102
100
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
103
101
  let df = lf.ldf.borrow().clone().collect().unwrap();
@@ -173,8 +171,14 @@ pub fn cum_fold(
173
171
  let exprs = rb_exprs_to_exprs(exprs)?;
174
172
  let lambda = Opaque::from(lambda);
175
173
 
176
- let func =
177
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
174
+ let func = move |a: Column, b: Column| {
175
+ binary_lambda(
176
+ Ruby::get().unwrap().get_inner(lambda),
177
+ a.take_materialized_series(),
178
+ b.take_materialized_series(),
179
+ )
180
+ .map(|v| v.map(Column::from))
181
+ };
178
182
  Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
179
183
  }
180
184
 
@@ -263,8 +267,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
263
267
  let exprs = rb_exprs_to_exprs(exprs)?;
264
268
  let lambda = Opaque::from(lambda);
265
269
 
266
- let func =
267
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
270
+ let func = move |a: Column, b: Column| {
271
+ binary_lambda(
272
+ Ruby::get().unwrap().get_inner(lambda),
273
+ a.take_materialized_series(),
274
+ b.take_materialized_series(),
275
+ )
276
+ .map(|v| v.map(Column::from))
277
+ };
268
278
  Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
269
279
  }
270
280
 
@@ -311,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
311
321
  }
312
322
  }
313
323
 
314
- pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
315
- dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
324
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
325
+ dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
316
326
  }
317
327
 
318
328
  pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
@@ -336,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
336
346
  Ok(dsl::repeat(value, n).into())
337
347
  }
338
348
 
339
- pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
340
- dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans).into()
349
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
350
+ dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
341
351
  }
342
352
 
343
353
  pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
@@ -11,7 +11,7 @@ pub fn get_index_type() -> Value {
11
11
  Wrap(IDX_DTYPE).into_value()
12
12
  }
13
13
 
14
- pub fn threadpool_size() -> usize {
14
+ pub fn thread_pool_size() -> usize {
15
15
  POOL.current_num_threads()
16
16
  }
17
17
 
@@ -1,4 +1,5 @@
1
1
  pub mod aggregation;
2
+ pub mod business;
2
3
  pub mod eager;
3
4
  pub mod io;
4
5
  pub mod lazy;
@@ -0,0 +1 @@
1
+ pub mod to_ruby;