polars-df 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +468 -538
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +17 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +121 -93
  9. data/ext/polars/src/dataframe.rs +116 -71
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/datetime.rs +10 -12
  13. data/ext/polars/src/expr/general.rs +68 -284
  14. data/ext/polars/src/expr/list.rs +17 -9
  15. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  16. data/ext/polars/src/expr/name.rs +44 -0
  17. data/ext/polars/src/expr/rolling.rs +196 -0
  18. data/ext/polars/src/expr/string.rs +85 -58
  19. data/ext/polars/src/file.rs +3 -3
  20. data/ext/polars/src/functions/aggregation.rs +35 -0
  21. data/ext/polars/src/functions/eager.rs +7 -31
  22. data/ext/polars/src/functions/io.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +66 -41
  24. data/ext/polars/src/functions/meta.rs +30 -0
  25. data/ext/polars/src/functions/misc.rs +8 -0
  26. data/ext/polars/src/functions/mod.rs +5 -0
  27. data/ext/polars/src/functions/random.rs +6 -0
  28. data/ext/polars/src/functions/range.rs +46 -0
  29. data/ext/polars/src/functions/string_cache.rs +11 -0
  30. data/ext/polars/src/functions/whenthen.rs +7 -7
  31. data/ext/polars/src/lazyframe.rs +47 -42
  32. data/ext/polars/src/lib.rs +156 -72
  33. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  34. data/ext/polars/src/{apply → map}/mod.rs +3 -3
  35. data/ext/polars/src/{apply → map}/series.rs +12 -16
  36. data/ext/polars/src/object.rs +1 -1
  37. data/ext/polars/src/rb_modules.rs +22 -7
  38. data/ext/polars/src/series/construction.rs +4 -4
  39. data/ext/polars/src/series/export.rs +2 -2
  40. data/ext/polars/src/series/set_at_idx.rs +33 -17
  41. data/ext/polars/src/series.rs +7 -27
  42. data/ext/polars/src/sql.rs +46 -0
  43. data/lib/polars/config.rb +530 -0
  44. data/lib/polars/data_frame.rb +115 -82
  45. data/lib/polars/date_time_expr.rb +13 -18
  46. data/lib/polars/date_time_name_space.rb +5 -25
  47. data/lib/polars/dynamic_group_by.rb +2 -2
  48. data/lib/polars/expr.rb +177 -94
  49. data/lib/polars/functions.rb +29 -37
  50. data/lib/polars/group_by.rb +38 -55
  51. data/lib/polars/io.rb +37 -2
  52. data/lib/polars/lazy_frame.rb +93 -66
  53. data/lib/polars/lazy_functions.rb +36 -48
  54. data/lib/polars/lazy_group_by.rb +7 -8
  55. data/lib/polars/list_expr.rb +12 -8
  56. data/lib/polars/list_name_space.rb +2 -2
  57. data/lib/polars/name_expr.rb +198 -0
  58. data/lib/polars/rolling_group_by.rb +2 -2
  59. data/lib/polars/series.rb +26 -13
  60. data/lib/polars/sql_context.rb +194 -0
  61. data/lib/polars/string_expr.rb +114 -60
  62. data/lib/polars/string_name_space.rb +19 -4
  63. data/lib/polars/utils.rb +12 -0
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +3 -0
  66. metadata +18 -7
  67. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,4 +1,4 @@
1
- use magnus::{class, IntoValue, RHash, TryConvert, Value};
1
+ use magnus::{class, prelude::*, typed_data::Obj, IntoValue, RHash, TryConvert, Value};
2
2
  use polars::prelude::*;
3
3
 
4
4
  use super::*;
@@ -14,12 +14,12 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
14
14
  null_count: usize,
15
15
  ) -> RbResult<RbSeries> {
16
16
  if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
17
- let first_value = out.try_convert::<bool>().unwrap();
17
+ let first_value = bool::try_convert(out).unwrap();
18
18
  applyer
19
19
  .apply_lambda_with_bool_out_type(lambda, null_count, Some(first_value))
20
20
  .map(|ca| ca.into_series().into())
21
21
  } else if out.is_kind_of(class::float()) {
22
- let first_value = out.try_convert::<f64>().unwrap();
22
+ let first_value = f64::try_convert(out).unwrap();
23
23
  applyer
24
24
  .apply_lambda_with_primitive_out_type::<Float64Type>(
25
25
  lambda,
@@ -28,7 +28,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
28
28
  )
29
29
  .map(|ca| ca.into_series().into())
30
30
  } else if out.is_kind_of(class::string()) {
31
- let first_value = out.try_convert::<String>().unwrap();
31
+ let first_value = String::try_convert(out).unwrap();
32
32
  applyer
33
33
  .apply_lambda_with_utf8_out_type(lambda, null_count, Some(first_value.as_str()))
34
34
  .map(|ca| ca.into_series().into())
@@ -37,13 +37,13 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
37
37
  } else if out.is_kind_of(class::array()) {
38
38
  todo!()
39
39
  } else if out.is_kind_of(class::hash()) {
40
- let first = out.try_convert::<Wrap<AnyValue<'_>>>()?;
40
+ let first = Wrap::<AnyValue<'_>>::try_convert(out)?;
41
41
  applyer.apply_to_struct(lambda, null_count, first.0)
42
42
  }
43
43
  // this succeeds for numpy ints as well, where checking if it is pyint fails
44
44
  // we do this later in the chain so that we don't extract integers from string chars.
45
- else if out.try_convert::<i64>().is_ok() {
46
- let first_value = out.try_convert::<i64>().unwrap();
45
+ else if i64::try_convert(out).is_ok() {
46
+ let first_value = i64::try_convert(out).unwrap();
47
47
  applyer
48
48
  .apply_lambda_with_primitive_out_type::<Int64Type>(
49
49
  lambda,
@@ -51,7 +51,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
51
51
  Some(first_value),
52
52
  )
53
53
  .map(|ca| ca.into_series().into())
54
- } else if let Ok(av) = out.try_convert::<Wrap<AnyValue>>() {
54
+ } else if let Ok(av) = Wrap::<AnyValue>::try_convert(out) {
55
55
  applyer
56
56
  .apply_extract_any_values(lambda, null_count, av.0)
57
57
  .map(|s| s.into())
@@ -141,7 +141,7 @@ where
141
141
  S: TryConvert,
142
142
  {
143
143
  match call_lambda(lambda, in_val) {
144
- Ok(out) => out.try_convert::<S>(),
144
+ Ok(out) => S::try_convert(out),
145
145
  Err(e) => panic!("ruby function failed {}", e),
146
146
  }
147
147
  }
@@ -151,13 +151,9 @@ where
151
151
  T: IntoValue,
152
152
  {
153
153
  let out: Value = lambda.funcall("call", (in_val,))?;
154
- let py_series: Value = out.funcall("_s", ())?;
155
- Ok(py_series
156
- .try_convert::<&RbSeries>()
157
- .unwrap()
158
- .series
159
- .borrow()
160
- .clone())
154
+ let py_series: Obj<RbSeries> = out.funcall("_s", ())?;
155
+ let tmp = py_series.series.borrow();
156
+ Ok(tmp.clone())
161
157
  }
162
158
 
163
159
  impl<'a> ApplyLambda<'a> for BooleanChunked {
@@ -21,7 +21,7 @@ pub(crate) fn register_object_builder() {
21
21
 
22
22
  let object_converter = Arc::new(|av: AnyValue| {
23
23
  let object = ObjectValue {
24
- inner: Wrap(av).into_value(),
24
+ inner: Wrap(av).into_value().into(),
25
25
  };
26
26
  Box::new(object) as Box<dyn Any>
27
27
  });
@@ -1,25 +1,40 @@
1
- use magnus::{class, memoize, Module, RClass, RModule};
1
+ use magnus::{value::Lazy, Module, RClass, RModule, Ruby};
2
+
3
+ static POLARS: Lazy<RModule> = Lazy::new(|ruby| ruby.class_object().const_get("Polars").unwrap());
2
4
 
3
5
  pub(crate) fn polars() -> RModule {
4
- *memoize!(RModule: class::object().const_get("Polars").unwrap())
6
+ Ruby::get().unwrap().get_inner(&POLARS)
5
7
  }
6
8
 
9
+ static SERIES: Lazy<RClass> =
10
+ Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Series").unwrap());
11
+
7
12
  pub(crate) fn series() -> RClass {
8
- *memoize!(RClass: polars().const_get("Series").unwrap())
13
+ Ruby::get().unwrap().get_inner(&SERIES)
9
14
  }
10
15
 
16
+ static UTILS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Utils").unwrap());
17
+
11
18
  pub(crate) fn utils() -> RModule {
12
- *memoize!(RModule: polars().const_get("Utils").unwrap())
19
+ Ruby::get().unwrap().get_inner(&UTILS)
13
20
  }
14
21
 
22
+ static BIGDECIMAL: Lazy<RClass> =
23
+ Lazy::new(|ruby| ruby.class_object().const_get("BigDecimal").unwrap());
24
+
15
25
  pub(crate) fn bigdecimal() -> RClass {
16
- *memoize!(RClass: class::object().const_get("BigDecimal").unwrap())
26
+ Ruby::get().unwrap().get_inner(&BIGDECIMAL)
17
27
  }
18
28
 
29
+ static DATE: Lazy<RClass> = Lazy::new(|ruby| ruby.class_object().const_get("Date").unwrap());
30
+
19
31
  pub(crate) fn date() -> RClass {
20
- *memoize!(RClass: class::object().const_get("Date").unwrap())
32
+ Ruby::get().unwrap().get_inner(&DATE)
21
33
  }
22
34
 
35
+ static DATETIME: Lazy<RClass> =
36
+ Lazy::new(|ruby| ruby.class_object().const_get("DateTime").unwrap());
37
+
23
38
  pub(crate) fn datetime() -> RClass {
24
- *memoize!(RClass: class::object().const_get("DateTime").unwrap())
39
+ Ruby::get().unwrap().get_inner(&DATETIME)
25
40
  }
@@ -1,4 +1,4 @@
1
- use magnus::RArray;
1
+ use magnus::{prelude::*, RArray};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
@@ -16,7 +16,7 @@ impl RbSeries {
16
16
  if item.is_nil() {
17
17
  builder.append_null()
18
18
  } else {
19
- match item.try_convert::<bool>() {
19
+ match bool::try_convert(*item) {
20
20
  Ok(val) => builder.append_value(val),
21
21
  Err(e) => {
22
22
  if strict {
@@ -49,7 +49,7 @@ where
49
49
  if item.is_nil() {
50
50
  builder.append_null()
51
51
  } else {
52
- match item.try_convert::<T::Native>() {
52
+ match T::Native::try_convert(*item) {
53
53
  Ok(val) => builder.append_value(val),
54
54
  Err(e) => {
55
55
  if strict {
@@ -92,7 +92,7 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
92
92
  fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
93
93
  let mut val = Vec::with_capacity(arr.len());
94
94
  for v in arr.each() {
95
- val.push(v?.try_convert()?);
95
+ val.push(Wrap::<AnyValue<'s>>::try_convert(v?)?);
96
96
  }
97
97
  Ok(val)
98
98
  }
@@ -1,4 +1,4 @@
1
- use magnus::{class, Module, RArray, RClass, RModule, Value};
1
+ use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
@@ -13,7 +13,7 @@ impl RbSeries {
13
13
  let ca = s.utf8().unwrap();
14
14
 
15
15
  // TODO make more efficient
16
- let np_arr = RArray::from_iter(ca.into_iter());
16
+ let np_arr = RArray::from_iter(ca);
17
17
  class::object()
18
18
  .const_get::<_, RModule>("Numo")?
19
19
  .const_get::<_, RClass>("RObject")?
@@ -1,18 +1,34 @@
1
- // use polars::export::arrow2::array::Array;
1
+ use polars::export::arrow::array::Array;
2
2
  use polars::prelude::*;
3
3
 
4
- pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
4
+ use crate::error::RbPolarsErr;
5
+ use crate::{RbResult, RbSeries};
6
+
7
+ impl RbSeries {
8
+ pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
9
+ let mut s = self.series.borrow_mut();
10
+ match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
11
+ Ok(out) => {
12
+ *s = out;
13
+ Ok(())
14
+ }
15
+ Err(e) => Err(RbPolarsErr::from(e)),
16
+ }
17
+ }
18
+ }
19
+
20
+ fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
5
21
  let logical_dtype = s.dtype().clone();
6
22
  let idx = idx.cast(&IDX_DTYPE)?;
7
23
  let idx = idx.rechunk();
8
24
  let idx = idx.idx().unwrap();
9
25
  let idx = idx.downcast_iter().next().unwrap();
10
26
 
11
- // if idx.null_count() > 0 {
12
- // return Err(PolarsError::ComputeError(
13
- // "index values should not be null".into(),
14
- // ));
15
- // }
27
+ if idx.null_count() > 0 {
28
+ return Err(PolarsError::ComputeError(
29
+ "index values should not be null".into(),
30
+ ));
31
+ }
16
32
 
17
33
  let idx = idx.values().as_slice();
18
34
 
@@ -27,52 +43,52 @@ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<
27
43
  DataType::Int8 => {
28
44
  let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
29
45
  let values = values.i8()?;
30
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
46
+ std::mem::take(ca).set_at_idx2(idx, values)
31
47
  }
32
48
  DataType::Int16 => {
33
49
  let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
34
50
  let values = values.i16()?;
35
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
51
+ std::mem::take(ca).set_at_idx2(idx, values)
36
52
  }
37
53
  DataType::Int32 => {
38
54
  let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
39
55
  let values = values.i32()?;
40
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
56
+ std::mem::take(ca).set_at_idx2(idx, values)
41
57
  }
42
58
  DataType::Int64 => {
43
59
  let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
44
60
  let values = values.i64()?;
45
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
61
+ std::mem::take(ca).set_at_idx2(idx, values)
46
62
  }
47
63
  DataType::UInt8 => {
48
64
  let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
49
65
  let values = values.u8()?;
50
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
66
+ std::mem::take(ca).set_at_idx2(idx, values)
51
67
  }
52
68
  DataType::UInt16 => {
53
69
  let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
54
70
  let values = values.u16()?;
55
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
71
+ std::mem::take(ca).set_at_idx2(idx, values)
56
72
  }
57
73
  DataType::UInt32 => {
58
74
  let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
59
75
  let values = values.u32()?;
60
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
76
+ std::mem::take(ca).set_at_idx2(idx, values)
61
77
  }
62
78
  DataType::UInt64 => {
63
79
  let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
64
80
  let values = values.u64()?;
65
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
81
+ std::mem::take(ca).set_at_idx2(idx, values)
66
82
  }
67
83
  DataType::Float32 => {
68
84
  let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
69
85
  let values = values.f32()?;
70
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
86
+ std::mem::take(ca).set_at_idx2(idx, values)
71
87
  }
72
88
  DataType::Float64 => {
73
89
  let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
74
90
  let values = values.f64()?;
75
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
91
+ std::mem::take(ca).set_at_idx2(idx, values)
76
92
  }
77
93
  DataType::Boolean => {
78
94
  let ca = s.bool()?;
@@ -5,15 +5,14 @@ mod construction;
5
5
  mod export;
6
6
  mod set_at_idx;
7
7
 
8
- use magnus::{exception, Error, IntoValue, RArray, Value, QNIL};
8
+ use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
10
10
  use polars::series::IsSorted;
11
11
  use std::cell::RefCell;
12
12
 
13
- use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
14
13
  use crate::apply_method_all_arrow_series2;
15
14
  use crate::conversion::*;
16
- use crate::series::set_at_idx::set_at_idx;
15
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
17
16
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
18
17
 
19
18
  #[magnus::wrap(class = "Polars::RbSeries")]
@@ -38,7 +37,7 @@ impl RbSeries {
38
37
  pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
39
38
  let mut series = Vec::new();
40
39
  for item in rs.each() {
41
- series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
40
+ series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
42
41
  }
43
42
  Ok(series)
44
43
  }
@@ -325,7 +324,7 @@ impl RbSeries {
325
324
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
326
325
  match obj {
327
326
  Some(val) => v.push(val.to_object()).unwrap(),
328
- None => v.push(QNIL).unwrap(),
327
+ None => v.push(qnil()).unwrap(),
329
328
  };
330
329
  }
331
330
  v.into_value()
@@ -333,10 +332,10 @@ impl RbSeries {
333
332
  DataType::List(_) => {
334
333
  let v = RArray::new();
335
334
  let ca = series.list().unwrap();
336
- for opt_s in ca.amortized_iter() {
335
+ for opt_s in unsafe { ca.amortized_iter() } {
337
336
  match opt_s {
338
337
  None => {
339
- v.push(QNIL).unwrap();
338
+ v.push(qnil()).unwrap();
340
339
  }
341
340
  Some(s) => {
342
341
  let rblst = to_a_recursive(s.as_ref());
@@ -352,7 +351,7 @@ impl RbSeries {
352
351
  for opt_s in ca.amortized_iter() {
353
352
  match opt_s {
354
353
  None => {
355
- v.push(QNIL).unwrap();
354
+ v.push(qnil()).unwrap();
356
355
  }
357
356
  Some(s) => {
358
357
  let rblst = to_a_recursive(s.as_ref());
@@ -643,14 +642,6 @@ impl RbSeries {
643
642
  Ok(df.into())
644
643
  }
645
644
 
646
- pub fn peak_max(&self) -> Self {
647
- self.series.borrow().peak_max().into_series().into()
648
- }
649
-
650
- pub fn peak_min(&self) -> Self {
651
- self.series.borrow().peak_min().into_series().into()
652
- }
653
-
654
645
  pub fn n_unique(&self) -> RbResult<usize> {
655
646
  let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
656
647
  Ok(n)
@@ -708,17 +699,6 @@ impl RbSeries {
708
699
  None
709
700
  }
710
701
  }
711
-
712
- pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
713
- let mut s = self.series.borrow_mut();
714
- match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
715
- Ok(out) => {
716
- *s = out;
717
- Ok(())
718
- }
719
- Err(e) => Err(RbPolarsErr::from(e)),
720
- }
721
- }
722
702
  }
723
703
 
724
704
  macro_rules! impl_set_with_mask {
@@ -0,0 +1,46 @@
1
+ use polars::sql::SQLContext;
2
+ use std::cell::RefCell;
3
+
4
+ use crate::{RbLazyFrame, RbPolarsErr, RbResult};
5
+
6
+ #[magnus::wrap(class = "Polars::RbSQLContext")]
7
+ #[repr(transparent)]
8
+ #[derive(Clone)]
9
+ pub struct RbSQLContext {
10
+ pub context: RefCell<SQLContext>,
11
+ }
12
+
13
+ #[allow(
14
+ clippy::wrong_self_convention,
15
+ clippy::should_implement_trait,
16
+ clippy::len_without_is_empty
17
+ )]
18
+ impl RbSQLContext {
19
+ #[allow(clippy::new_without_default)]
20
+ pub fn new() -> RbSQLContext {
21
+ RbSQLContext {
22
+ context: SQLContext::new().into(),
23
+ }
24
+ }
25
+
26
+ pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
27
+ Ok(self
28
+ .context
29
+ .borrow_mut()
30
+ .execute(&query)
31
+ .map_err(RbPolarsErr::from)?
32
+ .into())
33
+ }
34
+
35
+ pub fn get_tables(&self) -> RbResult<Vec<String>> {
36
+ Ok(self.context.borrow().get_tables())
37
+ }
38
+
39
+ pub fn register(&self, name: String, lf: &RbLazyFrame) {
40
+ self.context.borrow_mut().register(&name, lf.ldf.clone())
41
+ }
42
+
43
+ pub fn unregister(&self, name: String) {
44
+ self.context.borrow_mut().unregister(&name)
45
+ }
46
+ }