polars-df 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -3,17 +3,16 @@ mod arithmetic;
3
3
  mod comparison;
4
4
  mod construction;
5
5
  mod export;
6
- mod set_at_idx;
6
+ mod scatter;
7
7
 
8
- use magnus::{exception, Error, IntoValue, RArray, Value, QNIL};
8
+ use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
10
10
  use polars::series::IsSorted;
11
11
  use std::cell::RefCell;
12
12
 
13
- use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
14
13
  use crate::apply_method_all_arrow_series2;
15
14
  use crate::conversion::*;
16
- use crate::series::set_at_idx::set_at_idx;
15
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
17
16
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
18
17
 
19
18
  #[magnus::wrap(class = "Polars::RbSeries")]
@@ -38,7 +37,7 @@ impl RbSeries {
38
37
  pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
39
38
  let mut series = Vec::new();
40
39
  for item in rs.each() {
41
- series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
40
+ series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
42
41
  }
43
42
  Ok(series)
44
43
  }
@@ -81,7 +80,7 @@ impl RbSeries {
81
80
 
82
81
  pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
83
82
  let val = format!("{}", self.series.borrow().get(index).unwrap());
84
- if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
83
+ if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
85
84
  let v_trunc = &val[..val
86
85
  .char_indices()
87
86
  .take(str_lengths)
@@ -91,7 +90,7 @@ impl RbSeries {
91
90
  if val == v_trunc {
92
91
  val
93
92
  } else {
94
- format!("{}...", v_trunc)
93
+ format!("{}", v_trunc)
95
94
  }
96
95
  } else {
97
96
  val
@@ -274,15 +273,13 @@ impl RbSeries {
274
273
  Ok(s.into())
275
274
  }
276
275
 
277
- pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
276
+ pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
278
277
  if strict {
279
278
  self.series.borrow().eq(&other.series.borrow())
280
279
  } else if null_equal {
281
- self.series
282
- .borrow()
283
- .series_equal_missing(&other.series.borrow())
280
+ self.series.borrow().equals_missing(&other.series.borrow())
284
281
  } else {
285
- self.series.borrow().series_equal(&other.series.borrow())
282
+ self.series.borrow().equals(&other.series.borrow())
286
283
  }
287
284
  }
288
285
 
@@ -316,16 +313,16 @@ impl RbSeries {
316
313
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
317
314
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
318
315
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
319
- DataType::Categorical(_) => {
316
+ DataType::Categorical(_, _) => {
320
317
  RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
321
318
  }
322
- DataType::Object(_) => {
319
+ DataType::Object(_, _) => {
323
320
  let v = RArray::with_capacity(series.len());
324
321
  for i in 0..series.len() {
325
322
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
326
323
  match obj {
327
324
  Some(val) => v.push(val.to_object()).unwrap(),
328
- None => v.push(QNIL).unwrap(),
325
+ None => v.push(qnil()).unwrap(),
329
326
  };
330
327
  }
331
328
  v.into_value()
@@ -333,10 +330,10 @@ impl RbSeries {
333
330
  DataType::List(_) => {
334
331
  let v = RArray::new();
335
332
  let ca = series.list().unwrap();
336
- for opt_s in ca.amortized_iter() {
333
+ for opt_s in unsafe { ca.amortized_iter() } {
337
334
  match opt_s {
338
335
  None => {
339
- v.push(QNIL).unwrap();
336
+ v.push(qnil()).unwrap();
340
337
  }
341
338
  Some(s) => {
342
339
  let rblst = to_a_recursive(s.as_ref());
@@ -352,7 +349,7 @@ impl RbSeries {
352
349
  for opt_s in ca.amortized_iter() {
353
350
  match opt_s {
354
351
  None => {
355
- v.push(QNIL).unwrap();
352
+ v.push(qnil()).unwrap();
356
353
  }
357
354
  Some(s) => {
358
355
  let rblst = to_a_recursive(s.as_ref());
@@ -378,8 +375,8 @@ impl RbSeries {
378
375
  let ca = series.decimal().unwrap();
379
376
  return Wrap(ca).into_value();
380
377
  }
381
- DataType::Utf8 => {
382
- let ca = series.utf8().unwrap();
378
+ DataType::String => {
379
+ let ca = series.str().unwrap();
383
380
  return Wrap(ca).into_value();
384
381
  }
385
382
  DataType::Struct(_) => {
@@ -443,7 +440,7 @@ impl RbSeries {
443
440
 
444
441
  macro_rules! dispatch_apply {
445
442
  ($self:expr, $method:ident, $($args:expr),*) => {
446
- if matches!($self.dtype(), DataType::Object(_)) {
443
+ if matches!($self.dtype(), DataType::Object(_, _)) {
447
444
  // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
448
445
  // ca.$method($($args),*)
449
446
  todo!()
@@ -464,7 +461,7 @@ impl RbSeries {
464
461
  DataType::Datetime(_, _)
465
462
  | DataType::Date
466
463
  | DataType::Duration(_)
467
- | DataType::Categorical(_)
464
+ | DataType::Categorical(_, _)
468
465
  | DataType::Time
469
466
  ) || !skip_nulls
470
467
  {
@@ -605,12 +602,12 @@ impl RbSeries {
605
602
  )?;
606
603
  ca.into_datetime(tu, tz).into_series()
607
604
  }
608
- Some(DataType::Utf8) => {
605
+ Some(DataType::String) => {
609
606
  let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
610
607
 
611
608
  ca.into_series()
612
609
  }
613
- Some(DataType::Object(_)) => {
610
+ Some(DataType::Object(_, _)) => {
614
611
  let ca =
615
612
  dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
616
613
  ca.into_series()
@@ -643,14 +640,6 @@ impl RbSeries {
643
640
  Ok(df.into())
644
641
  }
645
642
 
646
- pub fn peak_max(&self) -> Self {
647
- self.series.borrow().peak_max().into_series().into()
648
- }
649
-
650
- pub fn peak_min(&self) -> Self {
651
- self.series.borrow().peak_min().into_series().into()
652
- }
653
-
654
643
  pub fn n_unique(&self) -> RbResult<usize> {
655
644
  let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
656
645
  Ok(n)
@@ -665,8 +654,13 @@ impl RbSeries {
665
654
  self.series.borrow_mut().shrink_to_fit();
666
655
  }
667
656
 
668
- pub fn dot(&self, other: &RbSeries) -> Option<f64> {
669
- self.series.borrow().dot(&other.series.borrow())
657
+ pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
658
+ let out = self
659
+ .series
660
+ .borrow()
661
+ .dot(&other.series.borrow())
662
+ .map_err(RbPolarsErr::from)?;
663
+ Ok(out)
670
664
  }
671
665
 
672
666
  pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
@@ -708,17 +702,6 @@ impl RbSeries {
708
702
  None
709
703
  }
710
704
  }
711
-
712
- pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
713
- let mut s = self.series.borrow_mut();
714
- match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
715
- Ok(out) => {
716
- *s = out;
717
- Ok(())
718
- }
719
- Err(e) => Err(RbPolarsErr::from(e)),
720
- }
721
- }
722
705
  }
723
706
 
724
707
  macro_rules! impl_set_with_mask {
@@ -1,18 +1,34 @@
1
- // use polars::export::arrow2::array::Array;
1
+ use polars::export::arrow::array::Array;
2
2
  use polars::prelude::*;
3
3
 
4
- pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
4
+ use crate::error::RbPolarsErr;
5
+ use crate::{RbResult, RbSeries};
6
+
7
+ impl RbSeries {
8
+ pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
9
+ let mut s = self.series.borrow_mut();
10
+ match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
11
+ Ok(out) => {
12
+ *s = out;
13
+ Ok(())
14
+ }
15
+ Err(e) => Err(RbPolarsErr::from(e)),
16
+ }
17
+ }
18
+ }
19
+
20
+ fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
5
21
  let logical_dtype = s.dtype().clone();
6
22
  let idx = idx.cast(&IDX_DTYPE)?;
7
23
  let idx = idx.rechunk();
8
24
  let idx = idx.idx().unwrap();
9
25
  let idx = idx.downcast_iter().next().unwrap();
10
26
 
11
- // if idx.null_count() > 0 {
12
- // return Err(PolarsError::ComputeError(
13
- // "index values should not be null".into(),
14
- // ));
15
- // }
27
+ if idx.null_count() > 0 {
28
+ return Err(PolarsError::ComputeError(
29
+ "index values should not be null".into(),
30
+ ));
31
+ }
16
32
 
17
33
  let idx = idx.values().as_slice();
18
34
 
@@ -27,62 +43,62 @@ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<
27
43
  DataType::Int8 => {
28
44
  let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
29
45
  let values = values.i8()?;
30
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
46
+ std::mem::take(ca).scatter(idx, values)
31
47
  }
32
48
  DataType::Int16 => {
33
49
  let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
34
50
  let values = values.i16()?;
35
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
51
+ std::mem::take(ca).scatter(idx, values)
36
52
  }
37
53
  DataType::Int32 => {
38
54
  let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
39
55
  let values = values.i32()?;
40
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
56
+ std::mem::take(ca).scatter(idx, values)
41
57
  }
42
58
  DataType::Int64 => {
43
59
  let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
44
60
  let values = values.i64()?;
45
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
61
+ std::mem::take(ca).scatter(idx, values)
46
62
  }
47
63
  DataType::UInt8 => {
48
64
  let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
49
65
  let values = values.u8()?;
50
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
66
+ std::mem::take(ca).scatter(idx, values)
51
67
  }
52
68
  DataType::UInt16 => {
53
69
  let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
54
70
  let values = values.u16()?;
55
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
71
+ std::mem::take(ca).scatter(idx, values)
56
72
  }
57
73
  DataType::UInt32 => {
58
74
  let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
59
75
  let values = values.u32()?;
60
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
76
+ std::mem::take(ca).scatter(idx, values)
61
77
  }
62
78
  DataType::UInt64 => {
63
79
  let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
64
80
  let values = values.u64()?;
65
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
81
+ std::mem::take(ca).scatter(idx, values)
66
82
  }
67
83
  DataType::Float32 => {
68
84
  let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
69
85
  let values = values.f32()?;
70
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
86
+ std::mem::take(ca).scatter(idx, values)
71
87
  }
72
88
  DataType::Float64 => {
73
89
  let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
74
90
  let values = values.f64()?;
75
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
91
+ std::mem::take(ca).scatter(idx, values)
76
92
  }
77
93
  DataType::Boolean => {
78
94
  let ca = s.bool()?;
79
95
  let values = values.bool()?;
80
- ca.set_at_idx2(idx, values)
96
+ ca.scatter(idx, values)
81
97
  }
82
- DataType::Utf8 => {
83
- let ca = s.utf8()?;
84
- let values = values.utf8()?;
85
- ca.set_at_idx2(idx, values)
98
+ DataType::String => {
99
+ let ca = s.str()?;
100
+ let values = values.str()?;
101
+ ca.scatter(idx, values)
86
102
  }
87
103
  _ => panic!("not yet implemented for dtype: {}", logical_dtype),
88
104
  };
@@ -0,0 +1,46 @@
1
+ use polars::sql::SQLContext;
2
+ use std::cell::RefCell;
3
+
4
+ use crate::{RbLazyFrame, RbPolarsErr, RbResult};
5
+
6
+ #[magnus::wrap(class = "Polars::RbSQLContext")]
7
+ #[repr(transparent)]
8
+ #[derive(Clone)]
9
+ pub struct RbSQLContext {
10
+ pub context: RefCell<SQLContext>,
11
+ }
12
+
13
+ #[allow(
14
+ clippy::wrong_self_convention,
15
+ clippy::should_implement_trait,
16
+ clippy::len_without_is_empty
17
+ )]
18
+ impl RbSQLContext {
19
+ #[allow(clippy::new_without_default)]
20
+ pub fn new() -> RbSQLContext {
21
+ RbSQLContext {
22
+ context: SQLContext::new().into(),
23
+ }
24
+ }
25
+
26
+ pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
27
+ Ok(self
28
+ .context
29
+ .borrow_mut()
30
+ .execute(&query)
31
+ .map_err(RbPolarsErr::from)?
32
+ .into())
33
+ }
34
+
35
+ pub fn get_tables(&self) -> RbResult<Vec<String>> {
36
+ Ok(self.context.borrow().get_tables())
37
+ }
38
+
39
+ pub fn register(&self, name: String, lf: &RbLazyFrame) {
40
+ self.context.borrow_mut().register(&name, lf.ldf.clone())
41
+ }
42
+
43
+ pub fn unregister(&self, name: String) {
44
+ self.context.borrow_mut().unregister(&name)
45
+ }
46
+ }
@@ -23,7 +23,7 @@ macro_rules! apply_method_all_arrow_series2 {
23
23
  ($self:expr, $method:ident, $($args:expr),*) => {
24
24
  match $self.dtype() {
25
25
  DataType::Boolean => $self.bool().unwrap().$method($($args),*),
26
- DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
26
+ DataType::String => $self.str().unwrap().$method($($args),*),
27
27
  DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
28
28
  DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
29
29
  DataType::UInt32 => $self.u32().unwrap().$method($($args),*),