polars-df 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/ext/polars/Cargo.toml +10 -7
  5. data/ext/polars/src/batched_csv.rs +1 -1
  6. data/ext/polars/src/conversion/any_value.rs +261 -0
  7. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  8. data/ext/polars/src/conversion/mod.rs +51 -10
  9. data/ext/polars/src/dataframe/construction.rs +6 -8
  10. data/ext/polars/src/dataframe/general.rs +19 -29
  11. data/ext/polars/src/dataframe/io.rs +43 -33
  12. data/ext/polars/src/error.rs +26 -4
  13. data/ext/polars/src/expr/categorical.rs +0 -10
  14. data/ext/polars/src/expr/datetime.rs +4 -12
  15. data/ext/polars/src/expr/general.rs +123 -110
  16. data/ext/polars/src/expr/mod.rs +2 -2
  17. data/ext/polars/src/expr/rolling.rs +17 -9
  18. data/ext/polars/src/expr/string.rs +2 -6
  19. data/ext/polars/src/functions/eager.rs +10 -10
  20. data/ext/polars/src/functions/lazy.rs +21 -21
  21. data/ext/polars/src/functions/range.rs +6 -12
  22. data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
  23. data/ext/polars/src/lazyframe/mod.rs +81 -98
  24. data/ext/polars/src/lib.rs +55 -45
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/rb_modules.rs +25 -1
  27. data/ext/polars/src/series/aggregation.rs +4 -2
  28. data/ext/polars/src/series/arithmetic.rs +21 -11
  29. data/ext/polars/src/series/construction.rs +56 -38
  30. data/ext/polars/src/series/export.rs +1 -1
  31. data/ext/polars/src/series/mod.rs +31 -10
  32. data/ext/polars/src/sql.rs +3 -1
  33. data/lib/polars/array_expr.rb +4 -4
  34. data/lib/polars/batched_csv_reader.rb +2 -2
  35. data/lib/polars/cat_expr.rb +0 -36
  36. data/lib/polars/cat_name_space.rb +0 -37
  37. data/lib/polars/data_frame.rb +93 -101
  38. data/lib/polars/data_types.rb +1 -1
  39. data/lib/polars/date_time_expr.rb +525 -573
  40. data/lib/polars/date_time_name_space.rb +263 -464
  41. data/lib/polars/dynamic_group_by.rb +3 -3
  42. data/lib/polars/exceptions.rb +3 -0
  43. data/lib/polars/expr.rb +367 -330
  44. data/lib/polars/expr_dispatch.rb +1 -1
  45. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  46. data/lib/polars/functions/as_datatype.rb +63 -40
  47. data/lib/polars/functions/lazy.rb +63 -14
  48. data/lib/polars/functions/lit.rb +1 -1
  49. data/lib/polars/functions/range/date_range.rb +18 -77
  50. data/lib/polars/functions/range/datetime_range.rb +4 -4
  51. data/lib/polars/functions/range/int_range.rb +2 -2
  52. data/lib/polars/functions/range/time_range.rb +4 -4
  53. data/lib/polars/functions/repeat.rb +1 -1
  54. data/lib/polars/functions/whenthen.rb +1 -1
  55. data/lib/polars/io/csv.rb +8 -8
  56. data/lib/polars/io/ipc.rb +3 -3
  57. data/lib/polars/io/json.rb +13 -2
  58. data/lib/polars/io/ndjson.rb +15 -4
  59. data/lib/polars/io/parquet.rb +5 -4
  60. data/lib/polars/lazy_frame.rb +120 -106
  61. data/lib/polars/lazy_group_by.rb +1 -1
  62. data/lib/polars/list_expr.rb +11 -11
  63. data/lib/polars/list_name_space.rb +5 -1
  64. data/lib/polars/rolling_group_by.rb +5 -7
  65. data/lib/polars/series.rb +105 -189
  66. data/lib/polars/string_expr.rb +42 -67
  67. data/lib/polars/string_name_space.rb +5 -4
  68. data/lib/polars/testing.rb +2 -2
  69. data/lib/polars/utils/constants.rb +9 -0
  70. data/lib/polars/utils/convert.rb +97 -0
  71. data/lib/polars/utils/parse.rb +89 -0
  72. data/lib/polars/utils/various.rb +76 -0
  73. data/lib/polars/utils/wrap.rb +19 -0
  74. data/lib/polars/utils.rb +4 -330
  75. data/lib/polars/version.rb +1 -1
  76. data/lib/polars/whenthen.rb +6 -6
  77. data/lib/polars.rb +11 -0
  78. metadata +9 -4
  79. data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -9,8 +9,8 @@ use crate::{RbDataFrame, RbResult, RbSeries};
9
9
  pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
10
10
  use polars_core::error::PolarsResult;
11
11
 
12
- let mut iter = seq.each();
13
- let first = iter.next().unwrap()?;
12
+ let mut iter = seq.into_iter();
13
+ let first = iter.next().unwrap();
14
14
 
15
15
  let first_rdf = get_df(first)?;
16
16
  let identity_df = first_rdf.slice(0, 0);
@@ -18,7 +18,7 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
18
18
  let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
19
19
 
20
20
  for item in iter {
21
- let rdf = get_df(item?)?;
21
+ let rdf = get_df(item)?;
22
22
  rdfs.push(Ok(rdf));
23
23
  }
24
24
 
@@ -37,13 +37,13 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
37
37
  }
38
38
 
39
39
  pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
40
- let mut iter = seq.each();
41
- let first = iter.next().unwrap()?;
40
+ let mut iter = seq.into_iter();
41
+ let first = iter.next().unwrap();
42
42
 
43
43
  let mut s = get_series(first)?;
44
44
 
45
45
  for res in iter {
46
- let item = res?;
46
+ let item = res;
47
47
  let item = get_series(item)?;
48
48
  s.append(&item).map_err(RbPolarsErr::from)?;
49
49
  }
@@ -52,8 +52,8 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
52
52
 
53
53
  pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
54
54
  let mut dfs = Vec::new();
55
- for item in seq.each() {
56
- dfs.push(get_df(item?)?);
55
+ for item in seq.into_iter() {
56
+ dfs.push(get_df(item)?);
57
57
  }
58
58
  let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
59
59
  Ok(df.into())
@@ -61,8 +61,8 @@ pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
61
61
 
62
62
  pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
63
63
  let mut dfs = Vec::new();
64
- for item in seq.each() {
65
- dfs.push(get_df(item?)?);
64
+ for item in seq.into_iter() {
65
+ dfs.push(get_df(item)?);
66
66
  }
67
67
  let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
68
68
  Ok(df.into())
@@ -58,7 +58,7 @@ pub fn rolling_cov(
58
58
  pub fn arg_sort_by(
59
59
  by: RArray,
60
60
  descending: Vec<bool>,
61
- nulls_last: bool,
61
+ nulls_last: Vec<bool>,
62
62
  multithreaded: bool,
63
63
  maintain_order: bool,
64
64
  ) -> RbResult<RbExpr> {
@@ -95,12 +95,12 @@ pub fn col(name: String) -> RbExpr {
95
95
 
96
96
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
97
97
  let lfs = lfs
98
- .each()
99
- .map(|v| <&RbLazyFrame>::try_convert(v?))
98
+ .into_iter()
99
+ .map(<&RbLazyFrame>::try_convert)
100
100
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
101
101
 
102
102
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
103
- let df = lf.ldf.clone().collect().unwrap();
103
+ let df = lf.ldf.borrow().clone().collect().unwrap();
104
104
  RbDataFrame::new(df)
105
105
  })))
106
106
  }
@@ -118,8 +118,8 @@ pub fn concat_lf(
118
118
  let (seq, len) = get_rbseq(lfs)?;
119
119
  let mut lfs = Vec::with_capacity(len);
120
120
 
121
- for res in seq.each() {
122
- let item = res?;
121
+ for res in seq.into_iter() {
122
+ let item = res;
123
123
  let lf = get_lf(item)?;
124
124
  lfs.push(lf);
125
125
  }
@@ -184,14 +184,9 @@ pub fn concat_lf_diagonal(
184
184
  parallel: bool,
185
185
  to_supertypes: bool,
186
186
  ) -> RbResult<RbLazyFrame> {
187
- let iter = lfs.each();
187
+ let iter = lfs.into_iter();
188
188
 
189
- let lfs = iter
190
- .map(|item| {
191
- let item = item?;
192
- get_lf(item)
193
- })
194
- .collect::<RbResult<Vec<_>>>()?;
189
+ let lfs = iter.map(get_lf).collect::<RbResult<Vec<_>>>()?;
195
190
 
196
191
  let lf = dsl::functions::concat_lf_diagonal(
197
192
  lfs,
@@ -206,17 +201,22 @@ pub fn concat_lf_diagonal(
206
201
  Ok(lf.into())
207
202
  }
208
203
 
209
- pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
210
- dsl::dtype_cols(dtypes).into()
211
- }
212
-
213
- pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
204
+ pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
214
205
  let dtypes = dtypes
215
- .each()
216
- .map(|v| Wrap::<DataType>::try_convert(v?))
206
+ .into_iter()
207
+ .map(Wrap::<DataType>::try_convert)
217
208
  .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
218
209
  let dtypes = vec_extract_wrapped(dtypes);
219
- Ok(crate::functions::lazy::dtype_cols(dtypes))
210
+ Ok(dsl::dtype_cols(dtypes).into())
211
+ }
212
+
213
+ pub fn index_cols(indices: Vec<i64>) -> RbExpr {
214
+ if indices.len() == 1 {
215
+ dsl::nth(indices[0])
216
+ } else {
217
+ dsl::index_cols(indices)
218
+ }
219
+ .into()
220
220
  }
221
221
 
222
222
  #[allow(clippy::too_many_arguments)]
@@ -27,33 +27,27 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
27
27
  pub fn date_range(
28
28
  start: &RbExpr,
29
29
  end: &RbExpr,
30
- every: String,
30
+ interval: String,
31
31
  closed: Wrap<ClosedWindow>,
32
- time_unit: Option<Wrap<TimeUnit>>,
33
- time_zone: Option<TimeZone>,
34
32
  ) -> RbExpr {
35
33
  let start = start.inner.clone();
36
34
  let end = end.inner.clone();
37
- let every = Duration::parse(&every);
35
+ let interval = Duration::parse(&interval);
38
36
  let closed = closed.0;
39
- let time_unit = time_unit.map(|x| x.0);
40
- dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
37
+ dsl::date_range(start, end, interval, closed).into()
41
38
  }
42
39
 
43
40
  pub fn date_ranges(
44
41
  start: &RbExpr,
45
42
  end: &RbExpr,
46
- every: String,
43
+ interval: String,
47
44
  closed: Wrap<ClosedWindow>,
48
- time_unit: Option<Wrap<TimeUnit>>,
49
- time_zone: Option<TimeZone>,
50
45
  ) -> RbExpr {
51
46
  let start = start.inner.clone();
52
47
  let end = end.inner.clone();
53
- let every = Duration::parse(&every);
48
+ let interval = Duration::parse(&interval);
54
49
  let closed = closed.0;
55
- let time_unit = time_unit.map(|x| x.0);
56
- dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into()
50
+ dsl::date_ranges(start, end, interval, closed).into()
57
51
  }
58
52
 
59
53
  pub fn datetime_range(
@@ -1,4 +1,5 @@
1
1
  use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
+ use polars::series::BitRepr;
2
3
  use polars_core::prelude::*;
3
4
 
4
5
  use crate::error::RbPolarsErr;
@@ -23,7 +24,7 @@ impl RbSeries {
23
24
  .funcall("cast", (np_arr,))
24
25
  }
25
26
  dt if dt.is_numeric() => {
26
- if s.bit_repr_is_large() {
27
+ if let Some(BitRepr::Large(_)) = s.bit_repr() {
27
28
  let s = s.cast(&DataType::Float64).unwrap();
28
29
  let ca = s.f64().unwrap();
29
30
  // TODO make more efficient