polars-df 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,10 +1,12 @@
1
1
  use magnus::encoding::{self, EncodingCapable};
2
- use magnus::{class, Float, Integer, RArray, RString, Value};
2
+ use magnus::{
3
+ class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
4
+ };
3
5
  use polars::lazy::dsl;
4
6
  use polars::prelude::*;
5
7
 
6
- use crate::apply::lazy::binary_lambda;
7
8
  use crate::conversion::{get_lf, get_rbseq, Wrap};
9
+ use crate::map::lazy::binary_lambda;
8
10
  use crate::prelude::vec_extract_wrapped;
9
11
  use crate::rb_exprs_to_exprs;
10
12
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
@@ -15,10 +17,6 @@ macro_rules! set_unwrapped_or_0 {
15
17
  };
16
18
  }
17
19
 
18
- pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
19
- dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
20
- }
21
-
22
20
  pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
23
21
  let by = rb_exprs_to_exprs(by)?;
24
22
  Ok(dsl::arg_sort_by(by, &descending).into())
@@ -30,7 +28,7 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
30
28
 
31
29
  pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
32
30
  let exprs = rb_exprs_to_exprs(exprs)?;
33
- Ok(dsl::as_struct(&exprs).into())
31
+ Ok(dsl::as_struct(exprs).into())
34
32
  }
35
33
 
36
34
  pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
@@ -45,7 +43,7 @@ pub fn col(name: String) -> RbExpr {
45
43
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
46
44
  let lfs = lfs
47
45
  .each()
48
- .map(|v| v?.try_convert::<&RbLazyFrame>())
46
+ .map(|v| <&RbLazyFrame>::try_convert(v?))
49
47
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
50
48
 
51
49
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
@@ -85,36 +83,65 @@ pub fn concat_lf(
85
83
  Ok(lf.into())
86
84
  }
87
85
 
86
+ pub fn concat_lf_diagonal(
87
+ lfs: RArray,
88
+ rechunk: bool,
89
+ parallel: bool,
90
+ to_supertypes: bool,
91
+ ) -> RbResult<RbLazyFrame> {
92
+ let iter = lfs.each();
93
+
94
+ let lfs = iter
95
+ .map(|item| {
96
+ let item = item?;
97
+ get_lf(item)
98
+ })
99
+ .collect::<RbResult<Vec<_>>>()?;
100
+
101
+ let lf = dsl::functions::concat_lf_diagonal(
102
+ lfs,
103
+ UnionArgs {
104
+ rechunk,
105
+ parallel,
106
+ to_supertypes,
107
+ },
108
+ )
109
+ .map_err(RbPolarsErr::from)?;
110
+ Ok(lf.into())
111
+ }
112
+
88
113
  #[allow(clippy::too_many_arguments)]
89
114
  pub fn duration(
115
+ weeks: Option<&RbExpr>,
90
116
  days: Option<&RbExpr>,
117
+ hours: Option<&RbExpr>,
118
+ minutes: Option<&RbExpr>,
91
119
  seconds: Option<&RbExpr>,
92
- nanoseconds: Option<&RbExpr>,
93
- microseconds: Option<&RbExpr>,
94
120
  milliseconds: Option<&RbExpr>,
95
- minutes: Option<&RbExpr>,
96
- hours: Option<&RbExpr>,
97
- weeks: Option<&RbExpr>,
121
+ microseconds: Option<&RbExpr>,
122
+ nanoseconds: Option<&RbExpr>,
123
+ time_unit: Wrap<TimeUnit>,
98
124
  ) -> RbExpr {
99
125
  set_unwrapped_or_0!(
126
+ weeks,
100
127
  days,
128
+ hours,
129
+ minutes,
101
130
  seconds,
102
- nanoseconds,
103
- microseconds,
104
131
  milliseconds,
105
- minutes,
106
- hours,
107
- weeks,
132
+ microseconds,
133
+ nanoseconds,
108
134
  );
109
135
  let args = DurationArgs {
136
+ weeks,
110
137
  days,
138
+ hours,
139
+ minutes,
111
140
  seconds,
112
- nanoseconds,
113
- microseconds,
114
141
  milliseconds,
115
- minutes,
116
- hours,
117
- weeks,
142
+ microseconds,
143
+ nanoseconds,
144
+ time_unit: time_unit.0,
118
145
  };
119
146
  dsl::duration(args).into()
120
147
  }
@@ -137,23 +164,27 @@ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
137
164
 
138
165
  pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
139
166
  let exprs = rb_exprs_to_exprs(exprs)?;
167
+ let lambda = Opaque::from(lambda);
140
168
 
141
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
169
+ let func =
170
+ move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
142
171
  Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
143
172
  }
144
173
 
145
174
  pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
146
175
  let exprs = rb_exprs_to_exprs(exprs)?;
176
+ let lambda = Opaque::from(lambda);
147
177
 
148
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
149
- Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
178
+ let func =
179
+ move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
180
+ Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
150
181
  }
151
182
 
152
183
  pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
153
184
  if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
154
- Ok(dsl::lit(value.try_convert::<bool>()?).into())
185
+ Ok(dsl::lit(bool::try_convert(value)?).into())
155
186
  } else if let Some(v) = Integer::from_value(value) {
156
- match v.try_convert::<i64>() {
187
+ match v.to_i64() {
157
188
  Ok(val) => {
158
189
  if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
159
190
  Ok(dsl::lit(val as i32).into())
@@ -162,19 +193,19 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
162
193
  }
163
194
  }
164
195
  _ => {
165
- let val = value.try_convert::<u64>()?;
196
+ let val = v.to_u64()?;
166
197
  Ok(dsl::lit(val).into())
167
198
  }
168
199
  }
169
200
  } else if let Some(v) = Float::from_value(value) {
170
- Ok(dsl::lit(v.try_convert::<f64>()?).into())
201
+ Ok(dsl::lit(v.to_f64()).into())
171
202
  } else if let Some(v) = RString::from_value(value) {
172
203
  if v.enc_get() == encoding::Index::utf8() {
173
- Ok(dsl::lit(v.try_convert::<String>()?).into())
204
+ Ok(dsl::lit(v.to_string()?).into())
174
205
  } else {
175
206
  Ok(dsl::lit(unsafe { v.as_slice() }).into())
176
207
  }
177
- } else if let Ok(series) = value.try_convert::<&RbSeries>() {
208
+ } else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
178
209
  Ok(dsl::lit(series.series.borrow().clone()).into())
179
210
  } else if value.is_nil() {
180
211
  Ok(dsl::lit(Null {}).into())
@@ -218,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
218
249
  .into()
219
250
  }
220
251
 
221
- pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
222
- polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
252
+ pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
253
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
223
254
  }
224
255
 
225
256
  pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
@@ -236,14 +267,8 @@ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
236
267
  pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
237
268
  let dtypes = dtypes
238
269
  .each()
239
- .map(|v| v?.try_convert::<Wrap<DataType>>())
270
+ .map(|v| Wrap::<DataType>::try_convert(v?))
240
271
  .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
241
272
  let dtypes = vec_extract_wrapped(dtypes);
242
273
  Ok(crate::functions::lazy::dtype_cols(dtypes))
243
274
  }
244
-
245
- // TODO rename to sum_horizontal
246
- pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
247
- let exprs = rb_exprs_to_exprs(exprs)?;
248
- Ok(polars::lazy::dsl::sum_horizontal(exprs).into())
249
- }
@@ -1,8 +1,38 @@
1
1
  use magnus::{IntoValue, Value};
2
+ use polars_core;
3
+ use polars_core::fmt::FloatFmt;
2
4
  use polars_core::prelude::IDX_DTYPE;
5
+ use polars_core::POOL;
3
6
 
4
7
  use crate::conversion::Wrap;
8
+ use crate::{RbResult, RbValueError};
5
9
 
6
10
  pub fn get_idx_type() -> Value {
7
11
  Wrap(IDX_DTYPE).into_value()
8
12
  }
13
+
14
+ pub fn threadpool_size() -> usize {
15
+ POOL.current_num_threads()
16
+ }
17
+
18
+ pub fn set_float_fmt(fmt: String) -> RbResult<()> {
19
+ let fmt = match fmt.as_str() {
20
+ "full" => FloatFmt::Full,
21
+ "mixed" => FloatFmt::Mixed,
22
+ e => {
23
+ return Err(RbValueError::new_err(format!(
24
+ "fmt must be one of {{'full', 'mixed'}}, got {e}",
25
+ )))
26
+ }
27
+ };
28
+ polars_core::fmt::set_float_fmt(fmt);
29
+ Ok(())
30
+ }
31
+
32
+ pub fn get_float_fmt() -> RbResult<String> {
33
+ let strfmt = match polars_core::fmt::get_float_fmt() {
34
+ FloatFmt::Full => "full",
35
+ FloatFmt::Mixed => "mixed",
36
+ };
37
+ Ok(strfmt.to_string())
38
+ }
@@ -0,0 +1,8 @@
1
+ use crate::conversion::Wrap;
2
+ use crate::prelude::DataType;
3
+ use crate::RbResult;
4
+
5
+ pub fn dtype_str_repr(dtype: Wrap<DataType>) -> RbResult<String> {
6
+ let dtype = dtype.0;
7
+ Ok(dtype.to_string())
8
+ }
@@ -1,5 +1,10 @@
1
+ pub mod aggregation;
1
2
  pub mod eager;
2
3
  pub mod io;
3
4
  pub mod lazy;
4
5
  pub mod meta;
6
+ pub mod misc;
7
+ pub mod random;
8
+ pub mod range;
9
+ pub mod string_cache;
5
10
  pub mod whenthen;
@@ -0,0 +1,6 @@
1
+ use crate::RbResult;
2
+
3
+ pub fn set_random_seed(seed: u64) -> RbResult<()> {
4
+ polars_core::random::set_global_random_seed(seed);
5
+ Ok(())
6
+ }
@@ -0,0 +1,41 @@
1
+ use polars::lazy::dsl;
2
+ use polars_core::datatypes::{TimeUnit, TimeZone};
3
+
4
+ use crate::conversion::Wrap;
5
+ use crate::prelude::*;
6
+ use crate::RbExpr;
7
+
8
+ pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
9
+ let start = start.inner.clone();
10
+ let end = end.inner.clone();
11
+ let dtype = dtype.0;
12
+ dsl::int_range(start, end, step, dtype).into()
13
+ }
14
+
15
+ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataType>) -> RbExpr {
16
+ let dtype = dtype.0;
17
+
18
+ let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step.inner.clone());
19
+
20
+ if dtype != DataType::Int64 {
21
+ result = result.cast(DataType::List(Box::new(dtype)))
22
+ }
23
+
24
+ result.into()
25
+ }
26
+
27
+ pub fn date_range(
28
+ start: &RbExpr,
29
+ end: &RbExpr,
30
+ every: String,
31
+ closed: Wrap<ClosedWindow>,
32
+ time_unit: Option<Wrap<TimeUnit>>,
33
+ time_zone: Option<TimeZone>,
34
+ ) -> RbExpr {
35
+ let start = start.inner.clone();
36
+ let end = end.inner.clone();
37
+ let every = Duration::parse(&every);
38
+ let closed = closed.0;
39
+ let time_unit = time_unit.map(|x| x.0);
40
+ dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
41
+ }
@@ -0,0 +1,11 @@
1
+ pub fn enable_string_cache() {
2
+ polars_core::enable_string_cache()
3
+ }
4
+
5
+ pub fn disable_string_cache() {
6
+ polars_core::disable_string_cache()
7
+ }
8
+
9
+ pub fn using_string_cache() -> bool {
10
+ polars_core::using_string_cache()
11
+ }
@@ -16,23 +16,23 @@ impl From<dsl::When> for RbWhen {
16
16
 
17
17
  #[magnus::wrap(class = "Polars::RbWhenThen")]
18
18
  #[derive(Clone)]
19
- pub struct RbWhenThen {
20
- pub inner: dsl::WhenThen,
19
+ pub struct RbThen {
20
+ pub inner: dsl::Then,
21
21
  }
22
22
 
23
- impl From<dsl::WhenThen> for RbWhenThen {
24
- fn from(inner: dsl::WhenThen) -> Self {
25
- RbWhenThen { inner }
23
+ impl From<dsl::Then> for RbThen {
24
+ fn from(inner: dsl::Then) -> Self {
25
+ RbThen { inner }
26
26
  }
27
27
  }
28
28
 
29
29
  impl RbWhen {
30
- pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
30
+ pub fn then(&self, expr: &RbExpr) -> RbThen {
31
31
  self.inner.clone().then(expr.inner.clone()).into()
32
32
  }
33
33
  }
34
34
 
35
- impl RbWhenThen {
35
+ impl RbThen {
36
36
  pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
37
37
  self.inner.clone().otherwise(expr.inner.clone()).into()
38
38
  }
@@ -1,4 +1,4 @@
1
- use magnus::{IntoValue, RArray, RHash, Value};
1
+ use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
2
2
  use polars::io::RowCount;
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
@@ -78,32 +78,31 @@ impl RbLazyFrame {
78
78
  pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
79
79
  // start arguments
80
80
  // this pattern is needed for more than 16
81
- let path: String = arguments[0].try_convert()?;
82
- let sep: String = arguments[1].try_convert()?;
83
- let has_header: bool = arguments[2].try_convert()?;
84
- let ignore_errors: bool = arguments[3].try_convert()?;
85
- let skip_rows: usize = arguments[4].try_convert()?;
86
- let n_rows: Option<usize> = arguments[5].try_convert()?;
87
- let cache: bool = arguments[6].try_convert()?;
88
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[7].try_convert()?;
89
- let low_memory: bool = arguments[8].try_convert()?;
90
- let comment_char: Option<String> = arguments[9].try_convert()?;
91
- let quote_char: Option<String> = arguments[10].try_convert()?;
92
- let null_values: Option<Wrap<NullValues>> = arguments[11].try_convert()?;
93
- let infer_schema_length: Option<usize> = arguments[12].try_convert()?;
94
- let with_schema_modify: Option<Value> = arguments[13].try_convert()?;
95
- let rechunk: bool = arguments[14].try_convert()?;
96
- let skip_rows_after_header: usize = arguments[15].try_convert()?;
97
- let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
98
- let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
99
- let try_parse_dates: bool = arguments[18].try_convert()?;
100
- let eol_char: String = arguments[19].try_convert()?;
81
+ let path = String::try_convert(arguments[0])?;
82
+ let separator = String::try_convert(arguments[1])?;
83
+ let has_header = bool::try_convert(arguments[2])?;
84
+ let ignore_errors = bool::try_convert(arguments[3])?;
85
+ let skip_rows = usize::try_convert(arguments[4])?;
86
+ let n_rows = Option::<usize>::try_convert(arguments[5])?;
87
+ let cache = bool::try_convert(arguments[6])?;
88
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
89
+ let low_memory = bool::try_convert(arguments[8])?;
90
+ let comment_prefix = Option::<String>::try_convert(arguments[9])?;
91
+ let quote_char = Option::<String>::try_convert(arguments[10])?;
92
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
93
+ let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
94
+ let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
95
+ let rechunk = bool::try_convert(arguments[14])?;
96
+ let skip_rows_after_header = usize::try_convert(arguments[15])?;
97
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
98
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
99
+ let try_parse_dates = bool::try_convert(arguments[18])?;
100
+ let eol_char = String::try_convert(arguments[19])?;
101
101
  // end arguments
102
102
 
103
103
  let null_values = null_values.map(|w| w.0);
104
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
105
104
  let quote_char = quote_char.map(|s| s.as_bytes()[0]);
106
- let delimiter = sep.as_bytes()[0];
105
+ let separator = separator.as_bytes()[0];
107
106
  let eol_char = eol_char.as_bytes()[0];
108
107
 
109
108
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
@@ -116,7 +115,7 @@ impl RbLazyFrame {
116
115
  });
117
116
  let r = LazyCsvReader::new(path)
118
117
  .with_infer_schema_length(infer_schema_length)
119
- .with_delimiter(delimiter)
118
+ .with_separator(separator)
120
119
  .has_header(has_header)
121
120
  .with_ignore_errors(ignore_errors)
122
121
  .with_skip_rows(skip_rows)
@@ -124,7 +123,7 @@ impl RbLazyFrame {
124
123
  .with_cache(cache)
125
124
  .with_dtype_overwrite(overwrite_dtype.as_ref())
126
125
  .low_memory(low_memory)
127
- .with_comment_char(comment_char)
126
+ .with_comment_prefix(comment_prefix.as_deref())
128
127
  .with_quote_char(quote_char)
129
128
  .with_end_of_line_char(eol_char)
130
129
  .with_rechunk(rechunk)
@@ -151,6 +150,7 @@ impl RbLazyFrame {
151
150
  row_count: Option<(String, IdxSize)>,
152
151
  low_memory: bool,
153
152
  use_statistics: bool,
153
+ hive_partitioning: bool,
154
154
  ) -> RbResult<Self> {
155
155
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
156
156
  let args = ScanArgsParquet {
@@ -163,6 +163,7 @@ impl RbLazyFrame {
163
163
  // TODO support cloud options
164
164
  cloud_options: None,
165
165
  use_statistics,
166
+ hive_partitioning,
166
167
  };
167
168
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
168
169
  Ok(lf.into())
@@ -217,6 +218,7 @@ impl RbLazyFrame {
217
218
  slice_pushdown: bool,
218
219
  cse: bool,
219
220
  allow_streaming: bool,
221
+ _eager: bool,
220
222
  ) -> RbLazyFrame {
221
223
  let ldf = self.ldf.clone();
222
224
  let ldf = ldf
@@ -224,8 +226,9 @@ impl RbLazyFrame {
224
226
  .with_predicate_pushdown(predicate_pushdown)
225
227
  .with_simplify_expr(simplify_expr)
226
228
  .with_slice_pushdown(slice_pushdown)
227
- .with_common_subplan_elimination(cse)
229
+ .with_comm_subplan_elim(cse)
228
230
  .with_streaming(allow_streaming)
231
+ ._with_eager(_eager)
229
232
  .with_projection_pushdown(projection_pushdown);
230
233
  ldf.into()
231
234
  }
@@ -318,20 +321,20 @@ impl RbLazyFrame {
318
321
  Ok(ldf.select(exprs).into())
319
322
  }
320
323
 
321
- pub fn groupby(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
324
+ pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
322
325
  let ldf = self.ldf.clone();
323
326
  let by = rb_exprs_to_exprs(by)?;
324
327
  let lazy_gb = if maintain_order {
325
- ldf.groupby_stable(by)
328
+ ldf.group_by_stable(by)
326
329
  } else {
327
- ldf.groupby(by)
330
+ ldf.group_by(by)
328
331
  };
329
332
  Ok(RbLazyGroupBy {
330
333
  lgb: RefCell::new(Some(lazy_gb)),
331
334
  })
332
335
  }
333
336
 
334
- pub fn groupby_rolling(
337
+ pub fn group_by_rolling(
335
338
  &self,
336
339
  index_column: &RbExpr,
337
340
  period: String,
@@ -343,7 +346,7 @@ impl RbLazyFrame {
343
346
  let closed_window = closed.0;
344
347
  let ldf = self.ldf.clone();
345
348
  let by = rb_exprs_to_exprs(by)?;
346
- let lazy_gb = ldf.groupby_rolling(
349
+ let lazy_gb = ldf.group_by_rolling(
347
350
  index_column.inner.clone(),
348
351
  by,
349
352
  RollingGroupOptions {
@@ -361,32 +364,34 @@ impl RbLazyFrame {
361
364
  }
362
365
 
363
366
  #[allow(clippy::too_many_arguments)]
364
- pub fn groupby_dynamic(
367
+ pub fn group_by_dynamic(
365
368
  &self,
366
369
  index_column: &RbExpr,
367
370
  every: String,
368
371
  period: String,
369
372
  offset: String,
370
- truncate: bool,
373
+ label: Wrap<Label>,
371
374
  include_boundaries: bool,
372
375
  closed: Wrap<ClosedWindow>,
373
376
  by: RArray,
374
377
  start_by: Wrap<StartBy>,
378
+ check_sorted: bool,
375
379
  ) -> RbResult<RbLazyGroupBy> {
376
380
  let closed_window = closed.0;
377
381
  let by = rb_exprs_to_exprs(by)?;
378
382
  let ldf = self.ldf.clone();
379
- let lazy_gb = ldf.groupby_dynamic(
383
+ let lazy_gb = ldf.group_by_dynamic(
380
384
  index_column.inner.clone(),
381
385
  by,
382
386
  DynamicGroupOptions {
383
387
  every: Duration::parse(&every),
384
388
  period: Duration::parse(&period),
385
389
  offset: Duration::parse(&offset),
386
- truncate,
390
+ label: label.0,
387
391
  include_boundaries,
388
392
  closed_window,
389
393
  start_by: start_by.0,
394
+ check_sorted,
390
395
  ..Default::default()
391
396
  },
392
397
  );
@@ -399,7 +404,7 @@ impl RbLazyFrame {
399
404
  pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
400
405
  let contexts = contexts
401
406
  .each()
402
- .map(|v| v.unwrap().try_convert())
407
+ .map(|v| TryConvert::try_convert(v.unwrap()))
403
408
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
404
409
  let contexts = contexts
405
410
  .into_iter()
@@ -490,14 +495,13 @@ impl RbLazyFrame {
490
495
  ldf.reverse().into()
491
496
  }
492
497
 
493
- pub fn shift(&self, periods: i64) -> Self {
494
- let ldf = self.ldf.clone();
495
- ldf.shift(periods).into()
496
- }
497
-
498
- pub fn shift_and_fill(&self, periods: i64, fill_value: &RbExpr) -> Self {
499
- let ldf = self.ldf.clone();
500
- ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
498
+ pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
499
+ let lf = self.ldf.clone();
500
+ let out = match fill_value {
501
+ Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
502
+ None => lf.shift(n.inner.clone()),
503
+ };
504
+ out.into()
501
505
  }
502
506
 
503
507
  pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
@@ -505,48 +509,58 @@ impl RbLazyFrame {
505
509
  ldf.fill_nan(fill_value.inner.clone()).into()
506
510
  }
507
511
 
508
- pub fn min(&self) -> Self {
512
+ pub fn min(&self) -> RbResult<Self> {
509
513
  let ldf = self.ldf.clone();
510
- ldf.min().into()
514
+ let out = ldf.min().map_err(RbPolarsErr::from)?;
515
+ Ok(out.into())
511
516
  }
512
517
 
513
- pub fn max(&self) -> Self {
518
+ pub fn max(&self) -> RbResult<Self> {
514
519
  let ldf = self.ldf.clone();
515
- ldf.max().into()
520
+ let out = ldf.max().map_err(RbPolarsErr::from)?;
521
+ Ok(out.into())
516
522
  }
517
523
 
518
- pub fn sum(&self) -> Self {
524
+ pub fn sum(&self) -> RbResult<Self> {
519
525
  let ldf = self.ldf.clone();
520
- ldf.sum().into()
526
+ let out = ldf.sum().map_err(RbPolarsErr::from)?;
527
+ Ok(out.into())
521
528
  }
522
529
 
523
- pub fn mean(&self) -> Self {
530
+ pub fn mean(&self) -> RbResult<Self> {
524
531
  let ldf = self.ldf.clone();
525
- ldf.mean().into()
532
+ let out = ldf.mean().map_err(RbPolarsErr::from)?;
533
+ Ok(out.into())
526
534
  }
527
535
 
528
- pub fn std(&self, ddof: u8) -> Self {
536
+ pub fn std(&self, ddof: u8) -> RbResult<Self> {
529
537
  let ldf = self.ldf.clone();
530
- ldf.std(ddof).into()
538
+ let out = ldf.std(ddof).map_err(RbPolarsErr::from)?;
539
+ Ok(out.into())
531
540
  }
532
541
 
533
- pub fn var(&self, ddof: u8) -> Self {
542
+ pub fn var(&self, ddof: u8) -> RbResult<Self> {
534
543
  let ldf = self.ldf.clone();
535
- ldf.var(ddof).into()
544
+ let out = ldf.var(ddof).map_err(RbPolarsErr::from)?;
545
+ Ok(out.into())
536
546
  }
537
547
 
538
- pub fn median(&self) -> Self {
548
+ pub fn median(&self) -> RbResult<Self> {
539
549
  let ldf = self.ldf.clone();
540
- ldf.median().into()
550
+ let out = ldf.median().map_err(RbPolarsErr::from)?;
551
+ Ok(out.into())
541
552
  }
542
553
 
543
554
  pub fn quantile(
544
555
  &self,
545
556
  quantile: &RbExpr,
546
557
  interpolation: Wrap<QuantileInterpolOptions>,
547
- ) -> Self {
558
+ ) -> RbResult<Self> {
548
559
  let ldf = self.ldf.clone();
549
- ldf.quantile(quantile.inner.clone(), interpolation.0).into()
560
+ let out = ldf
561
+ .quantile(quantile.inner.clone(), interpolation.0)
562
+ .map_err(RbPolarsErr::from)?;
563
+ Ok(out.into())
550
564
  }
551
565
 
552
566
  pub fn explode(&self, column: RArray) -> RbResult<Self> {