polars-df 0.6.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -3,17 +3,16 @@ mod arithmetic;
3
3
  mod comparison;
4
4
  mod construction;
5
5
  mod export;
6
- mod set_at_idx;
6
+ mod scatter;
7
7
 
8
- use magnus::{exception, Error, IntoValue, RArray, Value, QNIL};
8
+ use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
10
10
  use polars::series::IsSorted;
11
11
  use std::cell::RefCell;
12
12
 
13
- use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
14
13
  use crate::apply_method_all_arrow_series2;
15
14
  use crate::conversion::*;
16
- use crate::series::set_at_idx::set_at_idx;
15
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
17
16
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
18
17
 
19
18
  #[magnus::wrap(class = "Polars::RbSeries")]
@@ -38,7 +37,7 @@ impl RbSeries {
38
37
  pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
39
38
  let mut series = Vec::new();
40
39
  for item in rs.each() {
41
- series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
40
+ series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
42
41
  }
43
42
  Ok(series)
44
43
  }
@@ -81,7 +80,7 @@ impl RbSeries {
81
80
 
82
81
  pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
83
82
  let val = format!("{}", self.series.borrow().get(index).unwrap());
84
- if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
83
+ if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
85
84
  let v_trunc = &val[..val
86
85
  .char_indices()
87
86
  .take(str_lengths)
@@ -91,7 +90,7 @@ impl RbSeries {
91
90
  if val == v_trunc {
92
91
  val
93
92
  } else {
94
- format!("{}...", v_trunc)
93
+ format!("{}", v_trunc)
95
94
  }
96
95
  } else {
97
96
  val
@@ -274,15 +273,13 @@ impl RbSeries {
274
273
  Ok(s.into())
275
274
  }
276
275
 
277
- pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
276
+ pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
278
277
  if strict {
279
278
  self.series.borrow().eq(&other.series.borrow())
280
279
  } else if null_equal {
281
- self.series
282
- .borrow()
283
- .series_equal_missing(&other.series.borrow())
280
+ self.series.borrow().equals_missing(&other.series.borrow())
284
281
  } else {
285
- self.series.borrow().series_equal(&other.series.borrow())
282
+ self.series.borrow().equals(&other.series.borrow())
286
283
  }
287
284
  }
288
285
 
@@ -316,16 +313,16 @@ impl RbSeries {
316
313
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
317
314
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
318
315
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
319
- DataType::Categorical(_) => {
316
+ DataType::Categorical(_, _) => {
320
317
  RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
321
318
  }
322
- DataType::Object(_) => {
319
+ DataType::Object(_, _) => {
323
320
  let v = RArray::with_capacity(series.len());
324
321
  for i in 0..series.len() {
325
322
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
326
323
  match obj {
327
324
  Some(val) => v.push(val.to_object()).unwrap(),
328
- None => v.push(QNIL).unwrap(),
325
+ None => v.push(qnil()).unwrap(),
329
326
  };
330
327
  }
331
328
  v.into_value()
@@ -333,10 +330,10 @@ impl RbSeries {
333
330
  DataType::List(_) => {
334
331
  let v = RArray::new();
335
332
  let ca = series.list().unwrap();
336
- for opt_s in ca.amortized_iter() {
333
+ for opt_s in unsafe { ca.amortized_iter() } {
337
334
  match opt_s {
338
335
  None => {
339
- v.push(QNIL).unwrap();
336
+ v.push(qnil()).unwrap();
340
337
  }
341
338
  Some(s) => {
342
339
  let rblst = to_a_recursive(s.as_ref());
@@ -352,7 +349,7 @@ impl RbSeries {
352
349
  for opt_s in ca.amortized_iter() {
353
350
  match opt_s {
354
351
  None => {
355
- v.push(QNIL).unwrap();
352
+ v.push(qnil()).unwrap();
356
353
  }
357
354
  Some(s) => {
358
355
  let rblst = to_a_recursive(s.as_ref());
@@ -378,8 +375,8 @@ impl RbSeries {
378
375
  let ca = series.decimal().unwrap();
379
376
  return Wrap(ca).into_value();
380
377
  }
381
- DataType::Utf8 => {
382
- let ca = series.utf8().unwrap();
378
+ DataType::String => {
379
+ let ca = series.str().unwrap();
383
380
  return Wrap(ca).into_value();
384
381
  }
385
382
  DataType::Struct(_) => {
@@ -443,7 +440,7 @@ impl RbSeries {
443
440
 
444
441
  macro_rules! dispatch_apply {
445
442
  ($self:expr, $method:ident, $($args:expr),*) => {
446
- if matches!($self.dtype(), DataType::Object(_)) {
443
+ if matches!($self.dtype(), DataType::Object(_, _)) {
447
444
  // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
448
445
  // ca.$method($($args),*)
449
446
  todo!()
@@ -464,7 +461,7 @@ impl RbSeries {
464
461
  DataType::Datetime(_, _)
465
462
  | DataType::Date
466
463
  | DataType::Duration(_)
467
- | DataType::Categorical(_)
464
+ | DataType::Categorical(_, _)
468
465
  | DataType::Time
469
466
  ) || !skip_nulls
470
467
  {
@@ -605,12 +602,12 @@ impl RbSeries {
605
602
  )?;
606
603
  ca.into_datetime(tu, tz).into_series()
607
604
  }
608
- Some(DataType::Utf8) => {
605
+ Some(DataType::String) => {
609
606
  let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
610
607
 
611
608
  ca.into_series()
612
609
  }
613
- Some(DataType::Object(_)) => {
610
+ Some(DataType::Object(_, _)) => {
614
611
  let ca =
615
612
  dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
616
613
  ca.into_series()
@@ -643,14 +640,6 @@ impl RbSeries {
643
640
  Ok(df.into())
644
641
  }
645
642
 
646
- pub fn peak_max(&self) -> Self {
647
- self.series.borrow().peak_max().into_series().into()
648
- }
649
-
650
- pub fn peak_min(&self) -> Self {
651
- self.series.borrow().peak_min().into_series().into()
652
- }
653
-
654
643
  pub fn n_unique(&self) -> RbResult<usize> {
655
644
  let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
656
645
  Ok(n)
@@ -665,8 +654,13 @@ impl RbSeries {
665
654
  self.series.borrow_mut().shrink_to_fit();
666
655
  }
667
656
 
668
- pub fn dot(&self, other: &RbSeries) -> Option<f64> {
669
- self.series.borrow().dot(&other.series.borrow())
657
+ pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
658
+ let out = self
659
+ .series
660
+ .borrow()
661
+ .dot(&other.series.borrow())
662
+ .map_err(RbPolarsErr::from)?;
663
+ Ok(out)
670
664
  }
671
665
 
672
666
  pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
@@ -708,17 +702,6 @@ impl RbSeries {
708
702
  None
709
703
  }
710
704
  }
711
-
712
- pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
713
- let mut s = self.series.borrow_mut();
714
- match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
715
- Ok(out) => {
716
- *s = out;
717
- Ok(())
718
- }
719
- Err(e) => Err(RbPolarsErr::from(e)),
720
- }
721
- }
722
705
  }
723
706
 
724
707
  macro_rules! impl_set_with_mask {
@@ -1,18 +1,34 @@
1
- // use polars::export::arrow2::array::Array;
1
+ use polars::export::arrow::array::Array;
2
2
  use polars::prelude::*;
3
3
 
4
- pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
4
+ use crate::error::RbPolarsErr;
5
+ use crate::{RbResult, RbSeries};
6
+
7
+ impl RbSeries {
8
+ pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
9
+ let mut s = self.series.borrow_mut();
10
+ match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
11
+ Ok(out) => {
12
+ *s = out;
13
+ Ok(())
14
+ }
15
+ Err(e) => Err(RbPolarsErr::from(e)),
16
+ }
17
+ }
18
+ }
19
+
20
+ fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
5
21
  let logical_dtype = s.dtype().clone();
6
22
  let idx = idx.cast(&IDX_DTYPE)?;
7
23
  let idx = idx.rechunk();
8
24
  let idx = idx.idx().unwrap();
9
25
  let idx = idx.downcast_iter().next().unwrap();
10
26
 
11
- // if idx.null_count() > 0 {
12
- // return Err(PolarsError::ComputeError(
13
- // "index values should not be null".into(),
14
- // ));
15
- // }
27
+ if idx.null_count() > 0 {
28
+ return Err(PolarsError::ComputeError(
29
+ "index values should not be null".into(),
30
+ ));
31
+ }
16
32
 
17
33
  let idx = idx.values().as_slice();
18
34
 
@@ -27,62 +43,62 @@ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<
27
43
  DataType::Int8 => {
28
44
  let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
29
45
  let values = values.i8()?;
30
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
46
+ std::mem::take(ca).scatter(idx, values)
31
47
  }
32
48
  DataType::Int16 => {
33
49
  let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
34
50
  let values = values.i16()?;
35
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
51
+ std::mem::take(ca).scatter(idx, values)
36
52
  }
37
53
  DataType::Int32 => {
38
54
  let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
39
55
  let values = values.i32()?;
40
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
56
+ std::mem::take(ca).scatter(idx, values)
41
57
  }
42
58
  DataType::Int64 => {
43
59
  let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
44
60
  let values = values.i64()?;
45
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
61
+ std::mem::take(ca).scatter(idx, values)
46
62
  }
47
63
  DataType::UInt8 => {
48
64
  let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
49
65
  let values = values.u8()?;
50
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
66
+ std::mem::take(ca).scatter(idx, values)
51
67
  }
52
68
  DataType::UInt16 => {
53
69
  let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
54
70
  let values = values.u16()?;
55
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
71
+ std::mem::take(ca).scatter(idx, values)
56
72
  }
57
73
  DataType::UInt32 => {
58
74
  let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
59
75
  let values = values.u32()?;
60
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
76
+ std::mem::take(ca).scatter(idx, values)
61
77
  }
62
78
  DataType::UInt64 => {
63
79
  let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
64
80
  let values = values.u64()?;
65
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
81
+ std::mem::take(ca).scatter(idx, values)
66
82
  }
67
83
  DataType::Float32 => {
68
84
  let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
69
85
  let values = values.f32()?;
70
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
86
+ std::mem::take(ca).scatter(idx, values)
71
87
  }
72
88
  DataType::Float64 => {
73
89
  let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
74
90
  let values = values.f64()?;
75
- std::mem::take(ca).set_at_idx2(idx, values.into_iter())
91
+ std::mem::take(ca).scatter(idx, values)
76
92
  }
77
93
  DataType::Boolean => {
78
94
  let ca = s.bool()?;
79
95
  let values = values.bool()?;
80
- ca.set_at_idx2(idx, values)
96
+ ca.scatter(idx, values)
81
97
  }
82
- DataType::Utf8 => {
83
- let ca = s.utf8()?;
84
- let values = values.utf8()?;
85
- ca.set_at_idx2(idx, values)
98
+ DataType::String => {
99
+ let ca = s.str()?;
100
+ let values = values.str()?;
101
+ ca.scatter(idx, values)
86
102
  }
87
103
  _ => panic!("not yet implemented for dtype: {}", logical_dtype),
88
104
  };
@@ -0,0 +1,46 @@
1
+ use polars::sql::SQLContext;
2
+ use std::cell::RefCell;
3
+
4
+ use crate::{RbLazyFrame, RbPolarsErr, RbResult};
5
+
6
+ #[magnus::wrap(class = "Polars::RbSQLContext")]
7
+ #[repr(transparent)]
8
+ #[derive(Clone)]
9
+ pub struct RbSQLContext {
10
+ pub context: RefCell<SQLContext>,
11
+ }
12
+
13
+ #[allow(
14
+ clippy::wrong_self_convention,
15
+ clippy::should_implement_trait,
16
+ clippy::len_without_is_empty
17
+ )]
18
+ impl RbSQLContext {
19
+ #[allow(clippy::new_without_default)]
20
+ pub fn new() -> RbSQLContext {
21
+ RbSQLContext {
22
+ context: SQLContext::new().into(),
23
+ }
24
+ }
25
+
26
+ pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
27
+ Ok(self
28
+ .context
29
+ .borrow_mut()
30
+ .execute(&query)
31
+ .map_err(RbPolarsErr::from)?
32
+ .into())
33
+ }
34
+
35
+ pub fn get_tables(&self) -> RbResult<Vec<String>> {
36
+ Ok(self.context.borrow().get_tables())
37
+ }
38
+
39
+ pub fn register(&self, name: String, lf: &RbLazyFrame) {
40
+ self.context.borrow_mut().register(&name, lf.ldf.clone())
41
+ }
42
+
43
+ pub fn unregister(&self, name: String) {
44
+ self.context.borrow_mut().unregister(&name)
45
+ }
46
+ }
@@ -23,7 +23,7 @@ macro_rules! apply_method_all_arrow_series2 {
23
23
  ($self:expr, $method:ident, $($args:expr),*) => {
24
24
  match $self.dtype() {
25
25
  DataType::Boolean => $self.bool().unwrap().$method($($args),*),
26
- DataType::Utf8 => $self.utf8().unwrap().$method($($args),*),
26
+ DataType::String => $self.str().unwrap().$method($($args),*),
27
27
  DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
28
28
  DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
29
29
  DataType::UInt32 => $self.u32().unwrap().$method($($args),*),