polars-df 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +468 -538
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +17 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +121 -93
  9. data/ext/polars/src/dataframe.rs +116 -71
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/datetime.rs +10 -12
  13. data/ext/polars/src/expr/general.rs +68 -284
  14. data/ext/polars/src/expr/list.rs +17 -9
  15. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  16. data/ext/polars/src/expr/name.rs +44 -0
  17. data/ext/polars/src/expr/rolling.rs +196 -0
  18. data/ext/polars/src/expr/string.rs +85 -58
  19. data/ext/polars/src/file.rs +3 -3
  20. data/ext/polars/src/functions/aggregation.rs +35 -0
  21. data/ext/polars/src/functions/eager.rs +7 -31
  22. data/ext/polars/src/functions/io.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +66 -41
  24. data/ext/polars/src/functions/meta.rs +30 -0
  25. data/ext/polars/src/functions/misc.rs +8 -0
  26. data/ext/polars/src/functions/mod.rs +5 -0
  27. data/ext/polars/src/functions/random.rs +6 -0
  28. data/ext/polars/src/functions/range.rs +46 -0
  29. data/ext/polars/src/functions/string_cache.rs +11 -0
  30. data/ext/polars/src/functions/whenthen.rs +7 -7
  31. data/ext/polars/src/lazyframe.rs +47 -42
  32. data/ext/polars/src/lib.rs +156 -72
  33. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  34. data/ext/polars/src/{apply → map}/mod.rs +3 -3
  35. data/ext/polars/src/{apply → map}/series.rs +12 -16
  36. data/ext/polars/src/object.rs +1 -1
  37. data/ext/polars/src/rb_modules.rs +22 -7
  38. data/ext/polars/src/series/construction.rs +4 -4
  39. data/ext/polars/src/series/export.rs +2 -2
  40. data/ext/polars/src/series/set_at_idx.rs +33 -17
  41. data/ext/polars/src/series.rs +7 -27
  42. data/ext/polars/src/sql.rs +46 -0
  43. data/lib/polars/config.rb +530 -0
  44. data/lib/polars/data_frame.rb +115 -82
  45. data/lib/polars/date_time_expr.rb +13 -18
  46. data/lib/polars/date_time_name_space.rb +5 -25
  47. data/lib/polars/dynamic_group_by.rb +2 -2
  48. data/lib/polars/expr.rb +177 -94
  49. data/lib/polars/functions.rb +29 -37
  50. data/lib/polars/group_by.rb +38 -55
  51. data/lib/polars/io.rb +37 -2
  52. data/lib/polars/lazy_frame.rb +93 -66
  53. data/lib/polars/lazy_functions.rb +36 -48
  54. data/lib/polars/lazy_group_by.rb +7 -8
  55. data/lib/polars/list_expr.rb +12 -8
  56. data/lib/polars/list_name_space.rb +2 -2
  57. data/lib/polars/name_expr.rb +198 -0
  58. data/lib/polars/rolling_group_by.rb +2 -2
  59. data/lib/polars/series.rb +26 -13
  60. data/lib/polars/sql_context.rb +194 -0
  61. data/lib/polars/string_expr.rb +114 -60
  62. data/lib/polars/string_name_space.rb +19 -4
  63. data/lib/polars/utils.rb +12 -0
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +3 -0
  66. metadata +18 -7
  67. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,4 +1,4 @@
1
- use magnus::{class, IntoValue, RHash, TryConvert, Value};
1
+ use magnus::{class, prelude::*, typed_data::Obj, IntoValue, RHash, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
 
4
4
  use super::*;
@@ -14,12 +14,12 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
14
14
  null_count: usize,
15
15
  ) -> RbResult<RbSeries> {
16
16
  if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
17
- let first_value = out.try_convert::<bool>().unwrap();
17
+ let first_value = bool::try_convert(out).unwrap();
18
18
  applyer
19
19
  .apply_lambda_with_bool_out_type(lambda, null_count, Some(first_value))
20
20
  .map(|ca| ca.into_series().into())
21
21
  } else if out.is_kind_of(class::float()) {
22
- let first_value = out.try_convert::<f64>().unwrap();
22
+ let first_value = f64::try_convert(out).unwrap();
23
23
  applyer
24
24
  .apply_lambda_with_primitive_out_type::<Float64Type>(
25
25
  lambda,
@@ -28,7 +28,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
28
28
  )
29
29
  .map(|ca| ca.into_series().into())
30
30
  } else if out.is_kind_of(class::string()) {
31
- let first_value = out.try_convert::<String>().unwrap();
31
+ let first_value = String::try_convert(out).unwrap();
32
32
  applyer
33
33
  .apply_lambda_with_utf8_out_type(lambda, null_count, Some(first_value.as_str()))
34
34
  .map(|ca| ca.into_series().into())
@@ -37,13 +37,13 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
37
37
  } else if out.is_kind_of(class::array()) {
38
38
  todo!()
39
39
  } else if out.is_kind_of(class::hash()) {
40
- let first = out.try_convert::<Wrap<AnyValue<'_>>>()?;
40
+ let first = Wrap::<AnyValue<'_>>::try_convert(out)?;
41
41
  applyer.apply_to_struct(lambda, null_count, first.0)
42
42
  }
43
43
  // this succeeds for numpy ints as well, where checking if it is pyint fails
44
44
  // we do this later in the chain so that we don't extract integers from string chars.
45
- else if out.try_convert::<i64>().is_ok() {
46
- let first_value = out.try_convert::<i64>().unwrap();
45
+ else if i64::try_convert(out).is_ok() {
46
+ let first_value = i64::try_convert(out).unwrap();
47
47
  applyer
48
48
  .apply_lambda_with_primitive_out_type::<Int64Type>(
49
49
  lambda,
@@ -51,7 +51,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
51
51
  Some(first_value),
52
52
  )
53
53
  .map(|ca| ca.into_series().into())
54
- } else if let Ok(av) = out.try_convert::<Wrap<AnyValue>>() {
54
+ } else if let Ok(av) = Wrap::<AnyValue>::try_convert(out) {
55
55
  applyer
56
56
  .apply_extract_any_values(lambda, null_count, av.0)
57
57
  .map(|s| s.into())
@@ -141,7 +141,7 @@ where
141
141
  S: TryConvert,
142
142
  {
143
143
  match call_lambda(lambda, in_val) {
144
- Ok(out) => out.try_convert::<S>(),
144
+ Ok(out) => S::try_convert(out),
145
145
  Err(e) => panic!("ruby function failed {}", e),
146
146
  }
147
147
  }
@@ -151,13 +151,9 @@ where
151
151
  T: IntoValue,
152
152
  {
153
153
  let out: Value = lambda.funcall("call", (in_val,))?;
154
- let py_series: Value = out.funcall("_s", ())?;
155
- Ok(py_series
156
- .try_convert::<&RbSeries>()
157
- .unwrap()
158
- .series
159
- .borrow()
160
- .clone())
154
+ let py_series: Obj<RbSeries> = out.funcall("_s", ())?;
155
+ let tmp = py_series.series.borrow();
156
+ Ok(tmp.clone())
161
157
  }
162
158
 
163
159
  impl<'a> ApplyLambda<'a> for BooleanChunked {
@@ -21,7 +21,7 @@ pub(crate) fn register_object_builder() {
21
21
 
22
22
  let object_converter = Arc::new(|av: AnyValue| {
23
23
  let object = ObjectValue {
24
- inner: Wrap(av).into_value(),
24
+ inner: Wrap(av).into_value().into(),
25
25
  };
26
26
  Box::new(object) as Box<dyn Any>
27
27
  });
@@ -1,25 +1,40 @@
1
- use magnus::{class, memoize, Module, RClass, RModule};
1
+ use magnus::{value::Lazy, Module, RClass, RModule, Ruby};
2
+
3
+ static POLARS: Lazy<RModule> = Lazy::new(|ruby| ruby.class_object().const_get("Polars").unwrap());
2
4
 
3
5
  pub(crate) fn polars() -> RModule {
4
- *memoize!(RModule: class::object().const_get("Polars").unwrap())
6
+ Ruby::get().unwrap().get_inner(&POLARS)
5
7
  }
6
8
 
9
+ static SERIES: Lazy<RClass> =
10
+ Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Series").unwrap());
11
+
7
12
  pub(crate) fn series() -> RClass {
8
- *memoize!(RClass: polars().const_get("Series").unwrap())
13
+ Ruby::get().unwrap().get_inner(&SERIES)
9
14
  }
10
15
 
16
+ static UTILS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Utils").unwrap());
17
+
11
18
  pub(crate) fn utils() -> RModule {
12
- *memoize!(RModule: polars().const_get("Utils").unwrap())
19
+ Ruby::get().unwrap().get_inner(&UTILS)
13
20
  }
14
21
 
22
+ static BIGDECIMAL: Lazy<RClass> =
23
+ Lazy::new(|ruby| ruby.class_object().const_get("BigDecimal").unwrap());
24
+
15
25
  pub(crate) fn bigdecimal() -> RClass {
16
- *memoize!(RClass: class::object().const_get("BigDecimal").unwrap())
26
+ Ruby::get().unwrap().get_inner(&BIGDECIMAL)
17
27
  }
18
28
 
29
+ static DATE: Lazy<RClass> = Lazy::new(|ruby| ruby.class_object().const_get("Date").unwrap());
30
+
19
31
  pub(crate) fn date() -> RClass {
20
- *memoize!(RClass: class::object().const_get("Date").unwrap())
32
+ Ruby::get().unwrap().get_inner(&DATE)
21
33
  }
22
34
 
35
+ static DATETIME: Lazy<RClass> =
36
+ Lazy::new(|ruby| ruby.class_object().const_get("DateTime").unwrap());
37
+
23
38
  pub(crate) fn datetime() -> RClass {
24
- *memoize!(RClass: class::object().const_get("DateTime").unwrap())
39
+ Ruby::get().unwrap().get_inner(&DATETIME)
25
40
  }
@@ -1,4 +1,4 @@
1
- use magnus::RArray;
1
+ use magnus::{prelude::*, RArray};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
@@ -16,7 +16,7 @@ impl RbSeries {
16
16
  if item.is_nil() {
17
17
  builder.append_null()
18
18
  } else {
19
- match item.try_convert::<bool>() {
19
+ match bool::try_convert(*item) {
20
20
  Ok(val) => builder.append_value(val),
21
21
  Err(e) => {
22
22
  if strict {
@@ -49,7 +49,7 @@ where
49
49
  if item.is_nil() {
50
50
  builder.append_null()
51
51
  } else {
52
- match item.try_convert::<T::Native>() {
52
+ match T::Native::try_convert(*item) {
53
53
  Ok(val) => builder.append_value(val),
54
54
  Err(e) => {
55
55
  if strict {
@@ -92,7 +92,7 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
92
92
  fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
93
93
  let mut val = Vec::with_capacity(arr.len());
94
94
  for v in arr.each() {
95
- val.push(v?.try_convert()?);
95
+ val.push(Wrap::<AnyValue<'s>>::try_convert(v?)?);
96
96
  }
97
97
  Ok(val)
98
98
  }
@@ -1,4 +1,4 @@
1
- use magnus::{class, Module, RArray, RClass, RModule, Value};
1
+ use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
@@ -13,7 +13,7 @@ impl RbSeries {
13
13
  let ca = s.utf8().unwrap();
14
14
 
15
15
  // TODO make more efficient
16
- let np_arr = RArray::from_iter(ca.into_iter());
16
+ let np_arr = RArray::from_iter(ca);
17
17
  class::object()
18
18
  .const_get::<_, RModule>("Numo")?
19
19
  .const_get::<_, RClass>("RObject")?
@@ -1,18 +1,34 @@
1
- // use polars::export::arrow2::array::Array;
1
+ use polars::export::arrow::array::Array;
2
2
  use polars::prelude::*;
3
3
 
4
- pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
4
+ use crate::error::RbPolarsErr;
5
+ use crate::{RbResult, RbSeries};
6
+
7
+ impl RbSeries {
8
+ pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
9
+ let mut s = self.series.borrow_mut();
10
+ match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
11
+ Ok(out) => {
12
+ *s = out;
13
+ Ok(())
14
+ }
15
+ Err(e) => Err(RbPolarsErr::from(e)),
16
+ }
17
+ }
18
+ }
19
+
20
+ fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
5
21
  let logical_dtype = s.dtype().clone();
6
22
  let idx = idx.cast(&IDX_DTYPE)?;
7
23
  let idx = idx.rechunk();
8
24
  let idx = idx.idx().unwrap();
9
25
  let idx = idx.downcast_iter().next().unwrap();
10
26
 
11
- // if idx.null_count() > 0 {
12
- // return Err(PolarsError::ComputeError(
13
- // "index values should not be null".into(),
14
- // ));
15
- // }
27
+ if idx.null_count() > 0 {
28
+ return Err(PolarsError::ComputeError(
29
+ "index values should not be null".into(),
30
+ ));
31
+ }
16
32
 
17
33
  let idx = idx.values().as_slice();
18
34
 
@@ -27,52 +43,52 @@ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<
27
43
  DataType::Int8 => {
28
44
  let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
29
45
  let values = values.i8()?;
30
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
46
+ std::mem::take(ca).set_at_idx2(idx, values)
31
47
  }
32
48
  DataType::Int16 => {
33
49
  let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
34
50
  let values = values.i16()?;
35
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
51
+ std::mem::take(ca).set_at_idx2(idx, values)
36
52
  }
37
53
  DataType::Int32 => {
38
54
  let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
39
55
  let values = values.i32()?;
40
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
56
+ std::mem::take(ca).set_at_idx2(idx, values)
41
57
  }
42
58
  DataType::Int64 => {
43
59
  let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
44
60
  let values = values.i64()?;
45
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
61
+ std::mem::take(ca).set_at_idx2(idx, values)
46
62
  }
47
63
  DataType::UInt8 => {
48
64
  let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
49
65
  let values = values.u8()?;
50
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
66
+ std::mem::take(ca).set_at_idx2(idx, values)
51
67
  }
52
68
  DataType::UInt16 => {
53
69
  let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
54
70
  let values = values.u16()?;
55
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
71
+ std::mem::take(ca).set_at_idx2(idx, values)
56
72
  }
57
73
  DataType::UInt32 => {
58
74
  let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
59
75
  let values = values.u32()?;
60
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
76
+ std::mem::take(ca).set_at_idx2(idx, values)
61
77
  }
62
78
  DataType::UInt64 => {
63
79
  let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
64
80
  let values = values.u64()?;
65
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
81
+ std::mem::take(ca).set_at_idx2(idx, values)
66
82
  }
67
83
  DataType::Float32 => {
68
84
  let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
69
85
  let values = values.f32()?;
70
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
86
+ std::mem::take(ca).set_at_idx2(idx, values)
71
87
  }
72
88
  DataType::Float64 => {
73
89
  let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
74
90
  let values = values.f64()?;
75
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
91
+ std::mem::take(ca).set_at_idx2(idx, values)
76
92
  }
77
93
  DataType::Boolean => {
78
94
  let ca = s.bool()?;
@@ -5,15 +5,14 @@ mod construction;
5
5
  mod export;
6
6
  mod set_at_idx;
7
7
 
8
- use magnus::{exception, Error, IntoValue, RArray, Value, QNIL};
8
+ use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
10
10
  use polars::series::IsSorted;
11
11
  use std::cell::RefCell;
12
12
 
13
- use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
14
13
  use crate::apply_method_all_arrow_series2;
15
14
  use crate::conversion::*;
16
- use crate::series::set_at_idx::set_at_idx;
15
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
17
16
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
18
17
 
19
18
  #[magnus::wrap(class = "Polars::RbSeries")]
@@ -38,7 +37,7 @@ impl RbSeries {
38
37
  pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
39
38
  let mut series = Vec::new();
40
39
  for item in rs.each() {
41
- series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
40
+ series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
42
41
  }
43
42
  Ok(series)
44
43
  }
@@ -325,7 +324,7 @@ impl RbSeries {
325
324
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
326
325
  match obj {
327
326
  Some(val) => v.push(val.to_object()).unwrap(),
328
- None => v.push(QNIL).unwrap(),
327
+ None => v.push(qnil()).unwrap(),
329
328
  };
330
329
  }
331
330
  v.into_value()
@@ -333,10 +332,10 @@ impl RbSeries {
333
332
  DataType::List(_) => {
334
333
  let v = RArray::new();
335
334
  let ca = series.list().unwrap();
336
- for opt_s in ca.amortized_iter() {
335
+ for opt_s in unsafe { ca.amortized_iter() } {
337
336
  match opt_s {
338
337
  None => {
339
- v.push(QNIL).unwrap();
338
+ v.push(qnil()).unwrap();
340
339
  }
341
340
  Some(s) => {
342
341
  let rblst = to_a_recursive(s.as_ref());
@@ -352,7 +351,7 @@ impl RbSeries {
352
351
  for opt_s in ca.amortized_iter() {
353
352
  match opt_s {
354
353
  None => {
355
- v.push(QNIL).unwrap();
354
+ v.push(qnil()).unwrap();
356
355
  }
357
356
  Some(s) => {
358
357
  let rblst = to_a_recursive(s.as_ref());
@@ -643,14 +642,6 @@ impl RbSeries {
643
642
  Ok(df.into())
644
643
  }
645
644
 
646
- pub fn peak_max(&self) -> Self {
647
- self.series.borrow().peak_max().into_series().into()
648
- }
649
-
650
- pub fn peak_min(&self) -> Self {
651
- self.series.borrow().peak_min().into_series().into()
652
- }
653
-
654
645
  pub fn n_unique(&self) -> RbResult<usize> {
655
646
  let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
656
647
  Ok(n)
@@ -708,17 +699,6 @@ impl RbSeries {
708
699
  None
709
700
  }
710
701
  }
711
-
712
- pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
713
- let mut s = self.series.borrow_mut();
714
- match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
715
- Ok(out) => {
716
- *s = out;
717
- Ok(())
718
- }
719
- Err(e) => Err(RbPolarsErr::from(e)),
720
- }
721
- }
722
702
  }
723
703
 
724
704
  macro_rules! impl_set_with_mask {
@@ -0,0 +1,46 @@
1
+ use polars::sql::SQLContext;
2
+ use std::cell::RefCell;
3
+
4
+ use crate::{RbLazyFrame, RbPolarsErr, RbResult};
5
+
6
+ #[magnus::wrap(class = "Polars::RbSQLContext")]
7
+ #[repr(transparent)]
8
+ #[derive(Clone)]
9
+ pub struct RbSQLContext {
10
+ pub context: RefCell<SQLContext>,
11
+ }
12
+
13
+ #[allow(
14
+ clippy::wrong_self_convention,
15
+ clippy::should_implement_trait,
16
+ clippy::len_without_is_empty
17
+ )]
18
+ impl RbSQLContext {
19
+ #[allow(clippy::new_without_default)]
20
+ pub fn new() -> RbSQLContext {
21
+ RbSQLContext {
22
+ context: SQLContext::new().into(),
23
+ }
24
+ }
25
+
26
+ pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
27
+ Ok(self
28
+ .context
29
+ .borrow_mut()
30
+ .execute(&query)
31
+ .map_err(RbPolarsErr::from)?
32
+ .into())
33
+ }
34
+
35
+ pub fn get_tables(&self) -> RbResult<Vec<String>> {
36
+ Ok(self.context.borrow().get_tables())
37
+ }
38
+
39
+ pub fn register(&self, name: String, lf: &RbLazyFrame) {
40
+ self.context.borrow_mut().register(&name, lf.ldf.clone())
41
+ }
42
+
43
+ pub fn unregister(&self, name: String) {
44
+ self.context.borrow_mut().unregister(&name)
45
+ }
46
+ }