polars-df 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +468 -538
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +17 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +121 -93
  9. data/ext/polars/src/dataframe.rs +116 -71
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/datetime.rs +10 -12
  13. data/ext/polars/src/expr/general.rs +68 -284
  14. data/ext/polars/src/expr/list.rs +17 -9
  15. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  16. data/ext/polars/src/expr/name.rs +44 -0
  17. data/ext/polars/src/expr/rolling.rs +196 -0
  18. data/ext/polars/src/expr/string.rs +85 -58
  19. data/ext/polars/src/file.rs +3 -3
  20. data/ext/polars/src/functions/aggregation.rs +35 -0
  21. data/ext/polars/src/functions/eager.rs +7 -31
  22. data/ext/polars/src/functions/io.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +66 -41
  24. data/ext/polars/src/functions/meta.rs +30 -0
  25. data/ext/polars/src/functions/misc.rs +8 -0
  26. data/ext/polars/src/functions/mod.rs +5 -0
  27. data/ext/polars/src/functions/random.rs +6 -0
  28. data/ext/polars/src/functions/range.rs +46 -0
  29. data/ext/polars/src/functions/string_cache.rs +11 -0
  30. data/ext/polars/src/functions/whenthen.rs +7 -7
  31. data/ext/polars/src/lazyframe.rs +47 -42
  32. data/ext/polars/src/lib.rs +156 -72
  33. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  34. data/ext/polars/src/{apply → map}/mod.rs +3 -3
  35. data/ext/polars/src/{apply → map}/series.rs +12 -16
  36. data/ext/polars/src/object.rs +1 -1
  37. data/ext/polars/src/rb_modules.rs +22 -7
  38. data/ext/polars/src/series/construction.rs +4 -4
  39. data/ext/polars/src/series/export.rs +2 -2
  40. data/ext/polars/src/series/set_at_idx.rs +33 -17
  41. data/ext/polars/src/series.rs +7 -27
  42. data/ext/polars/src/sql.rs +46 -0
  43. data/lib/polars/config.rb +530 -0
  44. data/lib/polars/data_frame.rb +115 -82
  45. data/lib/polars/date_time_expr.rb +13 -18
  46. data/lib/polars/date_time_name_space.rb +5 -25
  47. data/lib/polars/dynamic_group_by.rb +2 -2
  48. data/lib/polars/expr.rb +177 -94
  49. data/lib/polars/functions.rb +29 -37
  50. data/lib/polars/group_by.rb +38 -55
  51. data/lib/polars/io.rb +37 -2
  52. data/lib/polars/lazy_frame.rb +93 -66
  53. data/lib/polars/lazy_functions.rb +36 -48
  54. data/lib/polars/lazy_group_by.rb +7 -8
  55. data/lib/polars/list_expr.rb +12 -8
  56. data/lib/polars/list_name_space.rb +2 -2
  57. data/lib/polars/name_expr.rb +198 -0
  58. data/lib/polars/rolling_group_by.rb +2 -2
  59. data/lib/polars/series.rb +26 -13
  60. data/lib/polars/sql_context.rb +194 -0
  61. data/lib/polars/string_expr.rb +114 -60
  62. data/lib/polars/string_name_space.rb +19 -4
  63. data/lib/polars/utils.rb +12 -0
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +3 -0
  66. metadata +18 -7
  67. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,10 +1,12 @@
1
1
  use magnus::encoding::{self, EncodingCapable};
2
- use magnus::{class, Float, Integer, RArray, RString, Value};
2
+ use magnus::{
3
+ class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
4
+ };
3
5
  use polars::lazy::dsl;
4
6
  use polars::prelude::*;
5
7
 
6
- use crate::apply::lazy::binary_lambda;
7
8
  use crate::conversion::{get_lf, get_rbseq, Wrap};
9
+ use crate::map::lazy::binary_lambda;
8
10
  use crate::prelude::vec_extract_wrapped;
9
11
  use crate::rb_exprs_to_exprs;
10
12
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
@@ -15,10 +17,6 @@ macro_rules! set_unwrapped_or_0 {
15
17
  };
16
18
  }
17
19
 
18
- pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
19
- dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
20
- }
21
-
22
20
  pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
23
21
  let by = rb_exprs_to_exprs(by)?;
24
22
  Ok(dsl::arg_sort_by(by, &descending).into())
@@ -30,7 +28,7 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
30
28
 
31
29
  pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
32
30
  let exprs = rb_exprs_to_exprs(exprs)?;
33
- Ok(dsl::as_struct(&exprs).into())
31
+ Ok(dsl::as_struct(exprs).into())
34
32
  }
35
33
 
36
34
  pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
@@ -45,7 +43,7 @@ pub fn col(name: String) -> RbExpr {
45
43
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
46
44
  let lfs = lfs
47
45
  .each()
48
- .map(|v| v?.try_convert::<&RbLazyFrame>())
46
+ .map(|v| <&RbLazyFrame>::try_convert(v?))
49
47
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
50
48
 
51
49
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
@@ -85,36 +83,65 @@ pub fn concat_lf(
85
83
  Ok(lf.into())
86
84
  }
87
85
 
86
+ pub fn concat_lf_diagonal(
87
+ lfs: RArray,
88
+ rechunk: bool,
89
+ parallel: bool,
90
+ to_supertypes: bool,
91
+ ) -> RbResult<RbLazyFrame> {
92
+ let iter = lfs.each();
93
+
94
+ let lfs = iter
95
+ .map(|item| {
96
+ let item = item?;
97
+ get_lf(item)
98
+ })
99
+ .collect::<RbResult<Vec<_>>>()?;
100
+
101
+ let lf = dsl::functions::concat_lf_diagonal(
102
+ lfs,
103
+ UnionArgs {
104
+ rechunk,
105
+ parallel,
106
+ to_supertypes,
107
+ },
108
+ )
109
+ .map_err(RbPolarsErr::from)?;
110
+ Ok(lf.into())
111
+ }
112
+
88
113
  #[allow(clippy::too_many_arguments)]
89
114
  pub fn duration(
115
+ weeks: Option<&RbExpr>,
90
116
  days: Option<&RbExpr>,
117
+ hours: Option<&RbExpr>,
118
+ minutes: Option<&RbExpr>,
91
119
  seconds: Option<&RbExpr>,
92
- nanoseconds: Option<&RbExpr>,
93
- microseconds: Option<&RbExpr>,
94
120
  milliseconds: Option<&RbExpr>,
95
- minutes: Option<&RbExpr>,
96
- hours: Option<&RbExpr>,
97
- weeks: Option<&RbExpr>,
121
+ microseconds: Option<&RbExpr>,
122
+ nanoseconds: Option<&RbExpr>,
123
+ time_unit: Wrap<TimeUnit>,
98
124
  ) -> RbExpr {
99
125
  set_unwrapped_or_0!(
126
+ weeks,
100
127
  days,
128
+ hours,
129
+ minutes,
101
130
  seconds,
102
- nanoseconds,
103
- microseconds,
104
131
  milliseconds,
105
- minutes,
106
- hours,
107
- weeks,
132
+ microseconds,
133
+ nanoseconds,
108
134
  );
109
135
  let args = DurationArgs {
136
+ weeks,
110
137
  days,
138
+ hours,
139
+ minutes,
111
140
  seconds,
112
- nanoseconds,
113
- microseconds,
114
141
  milliseconds,
115
- minutes,
116
- hours,
117
- weeks,
142
+ microseconds,
143
+ nanoseconds,
144
+ time_unit: time_unit.0,
118
145
  };
119
146
  dsl::duration(args).into()
120
147
  }
@@ -137,23 +164,27 @@ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
137
164
 
138
165
  pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
139
166
  let exprs = rb_exprs_to_exprs(exprs)?;
167
+ let lambda = Opaque::from(lambda);
140
168
 
141
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
169
+ let func =
170
+ move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
142
171
  Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
143
172
  }
144
173
 
145
174
  pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
146
175
  let exprs = rb_exprs_to_exprs(exprs)?;
176
+ let lambda = Opaque::from(lambda);
147
177
 
148
- let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
149
- Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
178
+ let func =
179
+ move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
180
+ Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
150
181
  }
151
182
 
152
183
  pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
153
184
  if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
154
- Ok(dsl::lit(value.try_convert::<bool>()?).into())
185
+ Ok(dsl::lit(bool::try_convert(value)?).into())
155
186
  } else if let Some(v) = Integer::from_value(value) {
156
- match v.try_convert::<i64>() {
187
+ match v.to_i64() {
157
188
  Ok(val) => {
158
189
  if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
159
190
  Ok(dsl::lit(val as i32).into())
@@ -162,19 +193,19 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
162
193
  }
163
194
  }
164
195
  _ => {
165
- let val = value.try_convert::<u64>()?;
196
+ let val = v.to_u64()?;
166
197
  Ok(dsl::lit(val).into())
167
198
  }
168
199
  }
169
200
  } else if let Some(v) = Float::from_value(value) {
170
- Ok(dsl::lit(v.try_convert::<f64>()?).into())
201
+ Ok(dsl::lit(v.to_f64()).into())
171
202
  } else if let Some(v) = RString::from_value(value) {
172
203
  if v.enc_get() == encoding::Index::utf8() {
173
- Ok(dsl::lit(v.try_convert::<String>()?).into())
204
+ Ok(dsl::lit(v.to_string()?).into())
174
205
  } else {
175
206
  Ok(dsl::lit(unsafe { v.as_slice() }).into())
176
207
  }
177
- } else if let Ok(series) = value.try_convert::<&RbSeries>() {
208
+ } else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
178
209
  Ok(dsl::lit(series.series.borrow().clone()).into())
179
210
  } else if value.is_nil() {
180
211
  Ok(dsl::lit(Null {}).into())
@@ -218,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
218
249
  .into()
219
250
  }
220
251
 
221
- pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
222
- polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
252
+ pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
253
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
223
254
  }
224
255
 
225
256
  pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
@@ -236,14 +267,8 @@ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
236
267
  pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
237
268
  let dtypes = dtypes
238
269
  .each()
239
- .map(|v| v?.try_convert::<Wrap<DataType>>())
270
+ .map(|v| Wrap::<DataType>::try_convert(v?))
240
271
  .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
241
272
  let dtypes = vec_extract_wrapped(dtypes);
242
273
  Ok(crate::functions::lazy::dtype_cols(dtypes))
243
274
  }
244
-
245
- // TODO rename to sum_horizontal
246
- pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
247
- let exprs = rb_exprs_to_exprs(exprs)?;
248
- Ok(polars::lazy::dsl::sum_horizontal(exprs).into())
249
- }
@@ -1,8 +1,38 @@
1
1
  use magnus::{IntoValue, Value};
2
+ use polars_core;
3
+ use polars_core::fmt::FloatFmt;
2
4
  use polars_core::prelude::IDX_DTYPE;
5
+ use polars_core::POOL;
3
6
 
4
7
  use crate::conversion::Wrap;
8
+ use crate::{RbResult, RbValueError};
5
9
 
6
10
  pub fn get_idx_type() -> Value {
7
11
  Wrap(IDX_DTYPE).into_value()
8
12
  }
13
+
14
+ pub fn threadpool_size() -> usize {
15
+ POOL.current_num_threads()
16
+ }
17
+
18
+ pub fn set_float_fmt(fmt: String) -> RbResult<()> {
19
+ let fmt = match fmt.as_str() {
20
+ "full" => FloatFmt::Full,
21
+ "mixed" => FloatFmt::Mixed,
22
+ e => {
23
+ return Err(RbValueError::new_err(format!(
24
+ "fmt must be one of {{'full', 'mixed'}}, got {e}",
25
+ )))
26
+ }
27
+ };
28
+ polars_core::fmt::set_float_fmt(fmt);
29
+ Ok(())
30
+ }
31
+
32
+ pub fn get_float_fmt() -> RbResult<String> {
33
+ let strfmt = match polars_core::fmt::get_float_fmt() {
34
+ FloatFmt::Full => "full",
35
+ FloatFmt::Mixed => "mixed",
36
+ };
37
+ Ok(strfmt.to_string())
38
+ }
@@ -0,0 +1,8 @@
1
+ use crate::conversion::Wrap;
2
+ use crate::prelude::DataType;
3
+ use crate::RbResult;
4
+
5
+ pub fn dtype_str_repr(dtype: Wrap<DataType>) -> RbResult<String> {
6
+ let dtype = dtype.0;
7
+ Ok(dtype.to_string())
8
+ }
@@ -1,5 +1,10 @@
1
+ pub mod aggregation;
1
2
  pub mod eager;
2
3
  pub mod io;
3
4
  pub mod lazy;
4
5
  pub mod meta;
6
+ pub mod misc;
7
+ pub mod random;
8
+ pub mod range;
9
+ pub mod string_cache;
5
10
  pub mod whenthen;
@@ -0,0 +1,6 @@
1
+ use crate::RbResult;
2
+
3
+ pub fn set_random_seed(seed: u64) -> RbResult<()> {
4
+ polars_core::random::set_global_random_seed(seed);
5
+ Ok(())
6
+ }
@@ -0,0 +1,46 @@
1
+ use polars::lazy::dsl;
2
+ use polars_core::datatypes::{TimeUnit, TimeZone};
3
+
4
+ use crate::conversion::Wrap;
5
+ use crate::prelude::*;
6
+ use crate::RbExpr;
7
+
8
+ pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
9
+ let dtype = dtype.0;
10
+
11
+ let mut result = dsl::int_range(start.inner.clone(), end.inner.clone(), step);
12
+
13
+ if dtype != DataType::Int64 {
14
+ result = result.cast(dtype)
15
+ }
16
+
17
+ result.into()
18
+ }
19
+
20
+ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
21
+ let dtype = dtype.0;
22
+
23
+ let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step);
24
+
25
+ if dtype != DataType::Int64 {
26
+ result = result.cast(DataType::List(Box::new(dtype)))
27
+ }
28
+
29
+ result.into()
30
+ }
31
+
32
+ pub fn date_range(
33
+ start: &RbExpr,
34
+ end: &RbExpr,
35
+ every: String,
36
+ closed: Wrap<ClosedWindow>,
37
+ time_unit: Option<Wrap<TimeUnit>>,
38
+ time_zone: Option<TimeZone>,
39
+ ) -> RbExpr {
40
+ let start = start.inner.clone();
41
+ let end = end.inner.clone();
42
+ let every = Duration::parse(&every);
43
+ let closed = closed.0;
44
+ let time_unit = time_unit.map(|x| x.0);
45
+ dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
46
+ }
@@ -0,0 +1,11 @@
1
+ pub fn enable_string_cache() {
2
+ polars_core::enable_string_cache()
3
+ }
4
+
5
+ pub fn disable_string_cache() {
6
+ polars_core::disable_string_cache()
7
+ }
8
+
9
+ pub fn using_string_cache() -> bool {
10
+ polars_core::using_string_cache()
11
+ }
@@ -16,23 +16,23 @@ impl From<dsl::When> for RbWhen {
16
16
 
17
17
  #[magnus::wrap(class = "Polars::RbWhenThen")]
18
18
  #[derive(Clone)]
19
- pub struct RbWhenThen {
20
- pub inner: dsl::WhenThen,
19
+ pub struct RbThen {
20
+ pub inner: dsl::Then,
21
21
  }
22
22
 
23
- impl From<dsl::WhenThen> for RbWhenThen {
24
- fn from(inner: dsl::WhenThen) -> Self {
25
- RbWhenThen { inner }
23
+ impl From<dsl::Then> for RbThen {
24
+ fn from(inner: dsl::Then) -> Self {
25
+ RbThen { inner }
26
26
  }
27
27
  }
28
28
 
29
29
  impl RbWhen {
30
- pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
30
+ pub fn then(&self, expr: &RbExpr) -> RbThen {
31
31
  self.inner.clone().then(expr.inner.clone()).into()
32
32
  }
33
33
  }
34
34
 
35
- impl RbWhenThen {
35
+ impl RbThen {
36
36
  pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
37
37
  self.inner.clone().otherwise(expr.inner.clone()).into()
38
38
  }
@@ -1,4 +1,4 @@
1
- use magnus::{IntoValue, RArray, RHash, Value};
1
+ use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
2
2
  use polars::io::RowCount;
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
@@ -78,32 +78,32 @@ impl RbLazyFrame {
78
78
  pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
79
79
  // start arguments
80
80
  // this pattern is needed for more than 16
81
- let path: String = arguments[0].try_convert()?;
82
- let sep: String = arguments[1].try_convert()?;
83
- let has_header: bool = arguments[2].try_convert()?;
84
- let ignore_errors: bool = arguments[3].try_convert()?;
85
- let skip_rows: usize = arguments[4].try_convert()?;
86
- let n_rows: Option<usize> = arguments[5].try_convert()?;
87
- let cache: bool = arguments[6].try_convert()?;
88
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[7].try_convert()?;
89
- let low_memory: bool = arguments[8].try_convert()?;
90
- let comment_char: Option<String> = arguments[9].try_convert()?;
91
- let quote_char: Option<String> = arguments[10].try_convert()?;
92
- let null_values: Option<Wrap<NullValues>> = arguments[11].try_convert()?;
93
- let infer_schema_length: Option<usize> = arguments[12].try_convert()?;
94
- let with_schema_modify: Option<Value> = arguments[13].try_convert()?;
95
- let rechunk: bool = arguments[14].try_convert()?;
96
- let skip_rows_after_header: usize = arguments[15].try_convert()?;
97
- let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
98
- let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
99
- let try_parse_dates: bool = arguments[18].try_convert()?;
100
- let eol_char: String = arguments[19].try_convert()?;
81
+ let path = String::try_convert(arguments[0])?;
82
+ let separator = String::try_convert(arguments[1])?;
83
+ let has_header = bool::try_convert(arguments[2])?;
84
+ let ignore_errors = bool::try_convert(arguments[3])?;
85
+ let skip_rows = usize::try_convert(arguments[4])?;
86
+ let n_rows = Option::<usize>::try_convert(arguments[5])?;
87
+ let cache = bool::try_convert(arguments[6])?;
88
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
89
+ let low_memory = bool::try_convert(arguments[8])?;
90
+ let comment_char = Option::<String>::try_convert(arguments[9])?;
91
+ let quote_char = Option::<String>::try_convert(arguments[10])?;
92
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
93
+ let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
94
+ let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
95
+ let rechunk = bool::try_convert(arguments[14])?;
96
+ let skip_rows_after_header = usize::try_convert(arguments[15])?;
97
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
98
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
99
+ let try_parse_dates = bool::try_convert(arguments[18])?;
100
+ let eol_char = String::try_convert(arguments[19])?;
101
101
  // end arguments
102
102
 
103
103
  let null_values = null_values.map(|w| w.0);
104
104
  let comment_char = comment_char.map(|s| s.as_bytes()[0]);
105
105
  let quote_char = quote_char.map(|s| s.as_bytes()[0]);
106
- let delimiter = sep.as_bytes()[0];
106
+ let separator = separator.as_bytes()[0];
107
107
  let eol_char = eol_char.as_bytes()[0];
108
108
 
109
109
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
@@ -116,7 +116,7 @@ impl RbLazyFrame {
116
116
  });
117
117
  let r = LazyCsvReader::new(path)
118
118
  .with_infer_schema_length(infer_schema_length)
119
- .with_delimiter(delimiter)
119
+ .with_separator(separator)
120
120
  .has_header(has_header)
121
121
  .with_ignore_errors(ignore_errors)
122
122
  .with_skip_rows(skip_rows)
@@ -151,6 +151,7 @@ impl RbLazyFrame {
151
151
  row_count: Option<(String, IdxSize)>,
152
152
  low_memory: bool,
153
153
  use_statistics: bool,
154
+ hive_partitioning: bool,
154
155
  ) -> RbResult<Self> {
155
156
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
156
157
  let args = ScanArgsParquet {
@@ -163,6 +164,7 @@ impl RbLazyFrame {
163
164
  // TODO support cloud options
164
165
  cloud_options: None,
165
166
  use_statistics,
167
+ hive_partitioning,
166
168
  };
167
169
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
168
170
  Ok(lf.into())
@@ -217,6 +219,7 @@ impl RbLazyFrame {
217
219
  slice_pushdown: bool,
218
220
  cse: bool,
219
221
  allow_streaming: bool,
222
+ _eager: bool,
220
223
  ) -> RbLazyFrame {
221
224
  let ldf = self.ldf.clone();
222
225
  let ldf = ldf
@@ -224,8 +227,9 @@ impl RbLazyFrame {
224
227
  .with_predicate_pushdown(predicate_pushdown)
225
228
  .with_simplify_expr(simplify_expr)
226
229
  .with_slice_pushdown(slice_pushdown)
227
- .with_common_subplan_elimination(cse)
230
+ .with_comm_subplan_elim(cse)
228
231
  .with_streaming(allow_streaming)
232
+ ._with_eager(_eager)
229
233
  .with_projection_pushdown(projection_pushdown);
230
234
  ldf.into()
231
235
  }
@@ -318,20 +322,20 @@ impl RbLazyFrame {
318
322
  Ok(ldf.select(exprs).into())
319
323
  }
320
324
 
321
- pub fn groupby(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
325
+ pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
322
326
  let ldf = self.ldf.clone();
323
327
  let by = rb_exprs_to_exprs(by)?;
324
328
  let lazy_gb = if maintain_order {
325
- ldf.groupby_stable(by)
329
+ ldf.group_by_stable(by)
326
330
  } else {
327
- ldf.groupby(by)
331
+ ldf.group_by(by)
328
332
  };
329
333
  Ok(RbLazyGroupBy {
330
334
  lgb: RefCell::new(Some(lazy_gb)),
331
335
  })
332
336
  }
333
337
 
334
- pub fn groupby_rolling(
338
+ pub fn group_by_rolling(
335
339
  &self,
336
340
  index_column: &RbExpr,
337
341
  period: String,
@@ -343,7 +347,7 @@ impl RbLazyFrame {
343
347
  let closed_window = closed.0;
344
348
  let ldf = self.ldf.clone();
345
349
  let by = rb_exprs_to_exprs(by)?;
346
- let lazy_gb = ldf.groupby_rolling(
350
+ let lazy_gb = ldf.group_by_rolling(
347
351
  index_column.inner.clone(),
348
352
  by,
349
353
  RollingGroupOptions {
@@ -361,32 +365,34 @@ impl RbLazyFrame {
361
365
  }
362
366
 
363
367
  #[allow(clippy::too_many_arguments)]
364
- pub fn groupby_dynamic(
368
+ pub fn group_by_dynamic(
365
369
  &self,
366
370
  index_column: &RbExpr,
367
371
  every: String,
368
372
  period: String,
369
373
  offset: String,
370
- truncate: bool,
374
+ label: Wrap<Label>,
371
375
  include_boundaries: bool,
372
376
  closed: Wrap<ClosedWindow>,
373
377
  by: RArray,
374
378
  start_by: Wrap<StartBy>,
379
+ check_sorted: bool,
375
380
  ) -> RbResult<RbLazyGroupBy> {
376
381
  let closed_window = closed.0;
377
382
  let by = rb_exprs_to_exprs(by)?;
378
383
  let ldf = self.ldf.clone();
379
- let lazy_gb = ldf.groupby_dynamic(
384
+ let lazy_gb = ldf.group_by_dynamic(
380
385
  index_column.inner.clone(),
381
386
  by,
382
387
  DynamicGroupOptions {
383
388
  every: Duration::parse(&every),
384
389
  period: Duration::parse(&period),
385
390
  offset: Duration::parse(&offset),
386
- truncate,
391
+ label: label.0,
387
392
  include_boundaries,
388
393
  closed_window,
389
394
  start_by: start_by.0,
395
+ check_sorted,
390
396
  ..Default::default()
391
397
  },
392
398
  );
@@ -399,7 +405,7 @@ impl RbLazyFrame {
399
405
  pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
400
406
  let contexts = contexts
401
407
  .each()
402
- .map(|v| v.unwrap().try_convert())
408
+ .map(|v| TryConvert::try_convert(v.unwrap()))
403
409
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
404
410
  let contexts = contexts
405
411
  .into_iter()
@@ -490,14 +496,13 @@ impl RbLazyFrame {
490
496
  ldf.reverse().into()
491
497
  }
492
498
 
493
- pub fn shift(&self, periods: i64) -> Self {
494
- let ldf = self.ldf.clone();
495
- ldf.shift(periods).into()
496
- }
497
-
498
- pub fn shift_and_fill(&self, periods: i64, fill_value: &RbExpr) -> Self {
499
- let ldf = self.ldf.clone();
500
- ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
499
+ pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
500
+ let lf = self.ldf.clone();
501
+ let out = match fill_value {
502
+ Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
503
+ None => lf.shift(n.inner.clone()),
504
+ };
505
+ out.into()
501
506
  }
502
507
 
503
508
  pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {