polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -1,28 +1,55 @@
1
+ mod exitable;
1
2
  mod general;
2
3
  mod optflags;
3
4
  mod serde;
4
5
  mod sink;
5
6
 
6
- use polars::prelude::{LazyFrame, OptFlags};
7
+ pub use exitable::RbInProcessQuery;
8
+ use magnus::{TryConvert, Value};
9
+ use parking_lot::RwLock;
10
+ use polars::prelude::{Engine, LazyFrame, OptFlags};
7
11
  pub use sink::SinkTarget;
8
- use std::cell::RefCell;
12
+
13
+ use crate::prelude::Wrap;
14
+ use crate::{RbResult, RbValueError};
9
15
 
10
16
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
11
- #[derive(Clone)]
17
+ #[repr(transparent)]
12
18
  pub struct RbLazyFrame {
13
- pub ldf: RefCell<LazyFrame>,
19
+ pub ldf: RwLock<LazyFrame>,
14
20
  }
15
21
 
16
- #[magnus::wrap(class = "Polars::RbOptFlags")]
17
- #[derive(Clone)]
18
- pub struct RbOptFlags {
19
- pub inner: RefCell<OptFlags>,
22
+ impl Clone for RbLazyFrame {
23
+ fn clone(&self) -> Self {
24
+ Self {
25
+ ldf: RwLock::new(self.ldf.read().clone()),
26
+ }
27
+ }
20
28
  }
21
29
 
22
30
  impl From<LazyFrame> for RbLazyFrame {
23
31
  fn from(ldf: LazyFrame) -> Self {
24
32
  RbLazyFrame {
25
- ldf: RefCell::new(ldf),
33
+ ldf: RwLock::new(ldf),
34
+ }
35
+ }
36
+ }
37
+
38
+ impl From<RbLazyFrame> for LazyFrame {
39
+ fn from(pldf: RbLazyFrame) -> Self {
40
+ pldf.ldf.into_inner()
41
+ }
42
+ }
43
+
44
+ #[magnus::wrap(class = "Polars::RbOptFlags")]
45
+ pub struct RbOptFlags {
46
+ pub inner: RwLock<OptFlags>,
47
+ }
48
+
49
+ impl Clone for RbOptFlags {
50
+ fn clone(&self) -> Self {
51
+ Self {
52
+ inner: RwLock::new(*self.inner.read()),
26
53
  }
27
54
  }
28
55
  }
@@ -30,7 +57,16 @@ impl From<LazyFrame> for RbLazyFrame {
30
57
  impl From<OptFlags> for RbOptFlags {
31
58
  fn from(inner: OptFlags) -> Self {
32
59
  RbOptFlags {
33
- inner: RefCell::new(inner),
60
+ inner: RwLock::new(inner),
34
61
  }
35
62
  }
36
63
  }
64
+
65
+ impl TryConvert for Wrap<Engine> {
66
+ fn try_convert(ob: Value) -> RbResult<Self> {
67
+ let parsed = String::try_convert(ob)?
68
+ .parse()
69
+ .map_err(RbValueError::new_err)?;
70
+ Ok(Wrap(parsed))
71
+ }
72
+ }
@@ -1,3 +1,4 @@
1
+ use parking_lot::RwLock;
1
2
  use polars::prelude::OptFlags;
2
3
 
3
4
  use super::RbOptFlags;
@@ -18,20 +19,20 @@ macro_rules! flag_getter_setters {
18
19
 
19
20
  pub fn no_optimizations(&self) {
20
21
  $(if $clear {
21
- self.inner.borrow_mut().remove(OptFlags::$flag);
22
+ self.inner.write().remove(OptFlags::$flag);
22
23
  })+
23
24
  }
24
25
 
25
26
  pub fn copy(&self) -> Self {
26
- Self { inner: self.inner.clone() }
27
+ Self { inner: RwLock::new(self.inner.read().clone()) }
27
28
  }
28
29
 
29
30
  $(
30
31
  pub fn $getter(&self) -> bool {
31
- self.inner.borrow().contains(OptFlags::$flag)
32
+ self.inner.read().contains(OptFlags::$flag)
32
33
  }
33
34
  pub fn $setter(&self, value: bool) {
34
- self.inner.borrow_mut().set(OptFlags::$flag, value)
35
+ self.inner.write().set(OptFlags::$flag, value)
35
36
  }
36
37
  )+
37
38
  }
@@ -1,11 +1,12 @@
1
- use std::io::Read;
2
1
  #[cfg(feature = "serialize_binary")]
3
- use std::io::{BufReader, BufWriter};
2
+ use std::io::BufReader;
3
+ use std::io::{BufWriter, Read};
4
4
 
5
5
  use magnus::Value;
6
6
  use polars::lazy::frame::LazyFrame;
7
7
  use polars::prelude::*;
8
8
 
9
+ use crate::exceptions::ComputeError;
9
10
  use crate::file::get_file_like;
10
11
  #[cfg(feature = "serialize_binary")]
11
12
  use crate::utils::to_rb_err;
@@ -17,12 +18,19 @@ impl RbLazyFrame {
17
18
  let file = get_file_like(rb_f, true)?;
18
19
  let writer = BufWriter::new(file);
19
20
  self.ldf
20
- .borrow()
21
+ .read()
21
22
  .logical_plan
22
23
  .serialize_versioned(writer, Default::default())
23
24
  .map_err(to_rb_err)
24
25
  }
25
26
 
27
+ pub fn serialize_json(&self, rb_f: Value) -> RbResult<()> {
28
+ let file = get_file_like(rb_f, true)?;
29
+ let writer = BufWriter::new(file);
30
+ serde_json::to_writer(writer, &self.ldf.read().logical_plan)
31
+ .map_err(|err| ComputeError::new_err(err.to_string()))
32
+ }
33
+
26
34
  #[cfg(feature = "serialize_binary")]
27
35
  pub fn deserialize_binary(rb_f: Value) -> RbResult<Self> {
28
36
  let file = get_file_like(rb_f, false)?;
@@ -1,11 +1,12 @@
1
1
  use std::sync::{Arc, Mutex};
2
2
 
3
- use magnus::{RHash, TryConvert, Value};
3
+ use magnus::{RHash, Ruby, TryConvert, Value};
4
4
  use polars::prelude::sync_on_close::SyncOnCloseType;
5
5
  use polars::prelude::{PlPath, SinkOptions, SpecialEq};
6
6
  use polars_utils::plpath::PlPathRef;
7
7
 
8
8
  use crate::prelude::Wrap;
9
+ use crate::utils::RubyAttach;
9
10
  use crate::{RbResult, RbValueError};
10
11
 
11
12
  #[derive(Clone)]
@@ -18,10 +19,14 @@ impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
18
19
  if let Ok(v) = String::try_convert(ob) {
19
20
  Ok(Wrap(polars::prelude::SinkTarget::Path(PlPath::new(&v))))
20
21
  } else {
21
- let writer = {
22
+ let writer = Ruby::attach(|rb| {
22
23
  let rb_f = ob;
23
- RbResult::Ok(crate::file::try_get_rbfile(rb_f, true)?.0.into_writeable())
24
- }?;
24
+ RbResult::Ok(
25
+ crate::file::try_get_rbfile(rb, rb_f, true)?
26
+ .0
27
+ .into_writeable(),
28
+ )
29
+ })?;
25
30
 
26
31
  Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new(
27
32
  Arc::new(Mutex::new(Some(writer))),
@@ -71,7 +76,7 @@ impl TryConvert for Wrap<SinkOptions> {
71
76
 
72
77
  if parsed.len() != 3 {
73
78
  return Err(RbValueError::new_err(
74
- "`sink_options` must be a dictionary with the exactly 3 field.",
79
+ "`sink_options` must be a hash with the exactly 3 field.",
75
80
  ));
76
81
  }
77
82
 
@@ -1,29 +1,28 @@
1
1
  use magnus::RArray;
2
2
  use polars::lazy::frame::LazyGroupBy;
3
- use std::cell::RefCell;
4
3
 
5
- use crate::expr::rb_exprs_to_exprs;
4
+ use crate::expr::ToExprs;
6
5
  use crate::{RbLazyFrame, RbResult};
7
6
 
8
7
  #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
9
8
  pub struct RbLazyGroupBy {
10
- pub lgb: RefCell<Option<LazyGroupBy>>,
9
+ pub lgb: Option<LazyGroupBy>,
11
10
  }
12
11
 
13
12
  impl RbLazyGroupBy {
14
13
  pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
15
- let lgb = self.lgb.borrow_mut().take().unwrap();
16
- let aggs = rb_exprs_to_exprs(aggs)?;
14
+ let lgb = self.lgb.clone().unwrap();
15
+ let aggs = aggs.to_exprs()?;
17
16
  Ok(lgb.agg(aggs).into())
18
17
  }
19
18
 
20
19
  pub fn head(&self, n: usize) -> RbLazyFrame {
21
- let lgb = self.lgb.take().unwrap();
20
+ let lgb = self.lgb.clone().unwrap();
22
21
  lgb.head(Some(n)).into()
23
22
  }
24
23
 
25
24
  pub fn tail(&self, n: usize) -> RbLazyFrame {
26
- let lgb = self.lgb.take().unwrap();
25
+ let lgb = self.lgb.clone().unwrap();
27
26
  lgb.tail(Some(n)).into()
28
27
  }
29
28
  }