polars-df 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/ext/polars/Cargo.toml +10 -7
  5. data/ext/polars/src/batched_csv.rs +1 -1
  6. data/ext/polars/src/conversion/any_value.rs +261 -0
  7. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  8. data/ext/polars/src/conversion/mod.rs +51 -10
  9. data/ext/polars/src/dataframe/construction.rs +6 -8
  10. data/ext/polars/src/dataframe/general.rs +19 -29
  11. data/ext/polars/src/dataframe/io.rs +43 -33
  12. data/ext/polars/src/error.rs +26 -4
  13. data/ext/polars/src/expr/categorical.rs +0 -10
  14. data/ext/polars/src/expr/datetime.rs +4 -12
  15. data/ext/polars/src/expr/general.rs +123 -110
  16. data/ext/polars/src/expr/mod.rs +2 -2
  17. data/ext/polars/src/expr/rolling.rs +17 -9
  18. data/ext/polars/src/expr/string.rs +2 -6
  19. data/ext/polars/src/functions/eager.rs +10 -10
  20. data/ext/polars/src/functions/lazy.rs +21 -21
  21. data/ext/polars/src/functions/range.rs +6 -12
  22. data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
  23. data/ext/polars/src/lazyframe/mod.rs +81 -98
  24. data/ext/polars/src/lib.rs +55 -45
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/rb_modules.rs +25 -1
  27. data/ext/polars/src/series/aggregation.rs +4 -2
  28. data/ext/polars/src/series/arithmetic.rs +21 -11
  29. data/ext/polars/src/series/construction.rs +56 -38
  30. data/ext/polars/src/series/export.rs +1 -1
  31. data/ext/polars/src/series/mod.rs +31 -10
  32. data/ext/polars/src/sql.rs +3 -1
  33. data/lib/polars/array_expr.rb +4 -4
  34. data/lib/polars/batched_csv_reader.rb +2 -2
  35. data/lib/polars/cat_expr.rb +0 -36
  36. data/lib/polars/cat_name_space.rb +0 -37
  37. data/lib/polars/data_frame.rb +93 -101
  38. data/lib/polars/data_types.rb +1 -1
  39. data/lib/polars/date_time_expr.rb +525 -573
  40. data/lib/polars/date_time_name_space.rb +263 -464
  41. data/lib/polars/dynamic_group_by.rb +3 -3
  42. data/lib/polars/exceptions.rb +3 -0
  43. data/lib/polars/expr.rb +367 -330
  44. data/lib/polars/expr_dispatch.rb +1 -1
  45. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  46. data/lib/polars/functions/as_datatype.rb +63 -40
  47. data/lib/polars/functions/lazy.rb +63 -14
  48. data/lib/polars/functions/lit.rb +1 -1
  49. data/lib/polars/functions/range/date_range.rb +18 -77
  50. data/lib/polars/functions/range/datetime_range.rb +4 -4
  51. data/lib/polars/functions/range/int_range.rb +2 -2
  52. data/lib/polars/functions/range/time_range.rb +4 -4
  53. data/lib/polars/functions/repeat.rb +1 -1
  54. data/lib/polars/functions/whenthen.rb +1 -1
  55. data/lib/polars/io/csv.rb +8 -8
  56. data/lib/polars/io/ipc.rb +3 -3
  57. data/lib/polars/io/json.rb +13 -2
  58. data/lib/polars/io/ndjson.rb +15 -4
  59. data/lib/polars/io/parquet.rb +5 -4
  60. data/lib/polars/lazy_frame.rb +120 -106
  61. data/lib/polars/lazy_group_by.rb +1 -1
  62. data/lib/polars/list_expr.rb +11 -11
  63. data/lib/polars/list_name_space.rb +5 -1
  64. data/lib/polars/rolling_group_by.rb +5 -7
  65. data/lib/polars/series.rb +105 -189
  66. data/lib/polars/string_expr.rb +42 -67
  67. data/lib/polars/string_name_space.rb +5 -4
  68. data/lib/polars/testing.rb +2 -2
  69. data/lib/polars/utils/constants.rb +9 -0
  70. data/lib/polars/utils/convert.rb +97 -0
  71. data/lib/polars/utils/parse.rb +89 -0
  72. data/lib/polars/utils/various.rb +76 -0
  73. data/lib/polars/utils/wrap.rb +19 -0
  74. data/lib/polars/utils.rb +4 -330
  75. data/lib/polars/version.rb +1 -1
  76. data/lib/polars/whenthen.rb +6 -6
  77. data/lib/polars.rb +11 -0
  78. metadata +9 -4
  79. data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -9,8 +9,8 @@ use crate::{RbDataFrame, RbResult, RbSeries};
9
9
  pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
10
10
  use polars_core::error::PolarsResult;
11
11
 
12
- let mut iter = seq.each();
13
- let first = iter.next().unwrap()?;
12
+ let mut iter = seq.into_iter();
13
+ let first = iter.next().unwrap();
14
14
 
15
15
  let first_rdf = get_df(first)?;
16
16
  let identity_df = first_rdf.slice(0, 0);
@@ -18,7 +18,7 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
18
18
  let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
19
19
 
20
20
  for item in iter {
21
- let rdf = get_df(item?)?;
21
+ let rdf = get_df(item)?;
22
22
  rdfs.push(Ok(rdf));
23
23
  }
24
24
 
@@ -37,13 +37,13 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
37
37
  }
38
38
 
39
39
  pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
40
- let mut iter = seq.each();
41
- let first = iter.next().unwrap()?;
40
+ let mut iter = seq.into_iter();
41
+ let first = iter.next().unwrap();
42
42
 
43
43
  let mut s = get_series(first)?;
44
44
 
45
45
  for res in iter {
46
- let item = res?;
46
+ let item = res;
47
47
  let item = get_series(item)?;
48
48
  s.append(&item).map_err(RbPolarsErr::from)?;
49
49
  }
@@ -52,8 +52,8 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
52
52
 
53
53
  pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
54
54
  let mut dfs = Vec::new();
55
- for item in seq.each() {
56
- dfs.push(get_df(item?)?);
55
+ for item in seq.into_iter() {
56
+ dfs.push(get_df(item)?);
57
57
  }
58
58
  let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
59
59
  Ok(df.into())
@@ -61,8 +61,8 @@ pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
61
61
 
62
62
  pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
63
63
  let mut dfs = Vec::new();
64
- for item in seq.each() {
65
- dfs.push(get_df(item?)?);
64
+ for item in seq.into_iter() {
65
+ dfs.push(get_df(item)?);
66
66
  }
67
67
  let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
68
68
  Ok(df.into())
@@ -58,7 +58,7 @@ pub fn rolling_cov(
58
58
  pub fn arg_sort_by(
59
59
  by: RArray,
60
60
  descending: Vec<bool>,
61
- nulls_last: bool,
61
+ nulls_last: Vec<bool>,
62
62
  multithreaded: bool,
63
63
  maintain_order: bool,
64
64
  ) -> RbResult<RbExpr> {
@@ -95,12 +95,12 @@ pub fn col(name: String) -> RbExpr {
95
95
 
96
96
  pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
97
97
  let lfs = lfs
98
- .each()
99
- .map(|v| <&RbLazyFrame>::try_convert(v?))
98
+ .into_iter()
99
+ .map(<&RbLazyFrame>::try_convert)
100
100
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
101
101
 
102
102
  Ok(RArray::from_iter(lfs.iter().map(|lf| {
103
- let df = lf.ldf.clone().collect().unwrap();
103
+ let df = lf.ldf.borrow().clone().collect().unwrap();
104
104
  RbDataFrame::new(df)
105
105
  })))
106
106
  }
@@ -118,8 +118,8 @@ pub fn concat_lf(
118
118
  let (seq, len) = get_rbseq(lfs)?;
119
119
  let mut lfs = Vec::with_capacity(len);
120
120
 
121
- for res in seq.each() {
122
- let item = res?;
121
+ for res in seq.into_iter() {
122
+ let item = res;
123
123
  let lf = get_lf(item)?;
124
124
  lfs.push(lf);
125
125
  }
@@ -184,14 +184,9 @@ pub fn concat_lf_diagonal(
184
184
  parallel: bool,
185
185
  to_supertypes: bool,
186
186
  ) -> RbResult<RbLazyFrame> {
187
- let iter = lfs.each();
187
+ let iter = lfs.into_iter();
188
188
 
189
- let lfs = iter
190
- .map(|item| {
191
- let item = item?;
192
- get_lf(item)
193
- })
194
- .collect::<RbResult<Vec<_>>>()?;
189
+ let lfs = iter.map(get_lf).collect::<RbResult<Vec<_>>>()?;
195
190
 
196
191
  let lf = dsl::functions::concat_lf_diagonal(
197
192
  lfs,
@@ -206,17 +201,22 @@ pub fn concat_lf_diagonal(
206
201
  Ok(lf.into())
207
202
  }
208
203
 
209
- pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
210
- dsl::dtype_cols(dtypes).into()
211
- }
212
-
213
- pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
204
+ pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
214
205
  let dtypes = dtypes
215
- .each()
216
- .map(|v| Wrap::<DataType>::try_convert(v?))
206
+ .into_iter()
207
+ .map(Wrap::<DataType>::try_convert)
217
208
  .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
218
209
  let dtypes = vec_extract_wrapped(dtypes);
219
- Ok(crate::functions::lazy::dtype_cols(dtypes))
210
+ Ok(dsl::dtype_cols(dtypes).into())
211
+ }
212
+
213
+ pub fn index_cols(indices: Vec<i64>) -> RbExpr {
214
+ if indices.len() == 1 {
215
+ dsl::nth(indices[0])
216
+ } else {
217
+ dsl::index_cols(indices)
218
+ }
219
+ .into()
220
220
  }
221
221
 
222
222
  #[allow(clippy::too_many_arguments)]
@@ -27,33 +27,27 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
27
27
  pub fn date_range(
28
28
  start: &RbExpr,
29
29
  end: &RbExpr,
30
- every: String,
30
+ interval: String,
31
31
  closed: Wrap<ClosedWindow>,
32
- time_unit: Option<Wrap<TimeUnit>>,
33
- time_zone: Option<TimeZone>,
34
32
  ) -> RbExpr {
35
33
  let start = start.inner.clone();
36
34
  let end = end.inner.clone();
37
- let every = Duration::parse(&every);
35
+ let interval = Duration::parse(&interval);
38
36
  let closed = closed.0;
39
- let time_unit = time_unit.map(|x| x.0);
40
- dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
37
+ dsl::date_range(start, end, interval, closed).into()
41
38
  }
42
39
 
43
40
  pub fn date_ranges(
44
41
  start: &RbExpr,
45
42
  end: &RbExpr,
46
- every: String,
43
+ interval: String,
47
44
  closed: Wrap<ClosedWindow>,
48
- time_unit: Option<Wrap<TimeUnit>>,
49
- time_zone: Option<TimeZone>,
50
45
  ) -> RbExpr {
51
46
  let start = start.inner.clone();
52
47
  let end = end.inner.clone();
53
- let every = Duration::parse(&every);
48
+ let interval = Duration::parse(&interval);
54
49
  let closed = closed.0;
55
- let time_unit = time_unit.map(|x| x.0);
56
- dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into()
50
+ dsl::date_ranges(start, end, interval, closed).into()
57
51
  }
58
52
 
59
53
  pub fn datetime_range(
@@ -1,4 +1,5 @@
1
1
  use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
+ use polars::series::BitRepr;
2
3
  use polars_core::prelude::*;
3
4
 
4
5
  use crate::error::RbPolarsErr;
@@ -23,7 +24,7 @@ impl RbSeries {
23
24
  .funcall("cast", (np_arr,))
24
25
  }
25
26
  dt if dt.is_numeric() => {
26
- if s.bit_repr_is_large() {
27
+ if let Some(BitRepr::Large(_)) = s.bit_repr() {
27
28
  let s = s.cast(&DataType::Float64).unwrap();
28
29
  let ca = s.f64().unwrap();
29
30
  // TODO make more efficient