polars-df 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de79849ac873f2360b7768ae422cea5f16b0103b910e4e80b88310365cf4a060
4
- data.tar.gz: a479ccd22196787a6e19d9ab715a2ce7471dcba6c3fe9292c44c33958b2cc1fd
3
+ metadata.gz: b2df0748eeb77638cc93667d89231cfd03510a856a84a9e01a9cdc97b0b6ae57
4
+ data.tar.gz: fabc2dedf44a22a4e657be770a1cc34ce2f8001e9f4cafbbd4346eabc4d1a4ec
5
5
  SHA512:
6
- metadata.gz: d19d168b4a3fe616c4076e49e60d8cc6da05fa3df2042c9d6c8fc475daec3335488fa623908229509271430957ee414a852f571b9e750962319c3623493b66ca
7
- data.tar.gz: 32f97daa9f7743364e3ae0bc8087e60f4f64979a0163218a912b5c5d6a5917ad3a9e7ab98c0b510cca46d8582e8b2bd16ee5692eba4f6982116f9aebde40fa4f
6
+ metadata.gz: df7d294861508ccbc11a0d5ebb90880e897a6bc2f91396b1e08839c1fe019386edf0f0c80a9c6db256ca1d917dd8253bee78d353bf692a28e76b20bd048cea2b
7
+ data.tar.gz: be4eb782c03b0b41f42b0ce3aad8a64f388a4c79444be282a5802499e88f23e8e8efeea85f6ff6f684f9c004502b9be44aba33688e54b70ca4dd0a8596c0960a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## 0.17.1 (2025-04-12)
2
+
3
+ - Added support for horizontal concatenation of `LazyFrame`s
4
+ - Added `diagonal_relaxed` and `align` strategies to `concat` method
5
+ - Added `interpolate_by` method to `Series` and `Expr`
6
+ - Added `iter_columns` and `iter_slices` methods to `DataFrame`
7
+ - Added `maintain_order` option to `join` method
8
+ - Added `collect_schema` method to `DataFrame`
9
+ - Added `write_database` method to `DataFrame` (experimental)
10
+ - Fixed error with `to_numo` method for `Boolean` series with null values
11
+ - Fixed error with `slice` method and negative offset
12
+
1
13
  ## 0.17.0 (2025-01-28)
2
14
 
3
15
  - Updated Polars to 0.46.0
data/Cargo.lock CHANGED
@@ -1645,7 +1645,7 @@ dependencies = [
1645
1645
 
1646
1646
  [[package]]
1647
1647
  name = "polars"
1648
- version = "0.17.0"
1648
+ version = "0.17.1"
1649
1649
  dependencies = [
1650
1650
  "ahash",
1651
1651
  "bytes",
@@ -1654,6 +1654,7 @@ dependencies = [
1654
1654
  "jemallocator",
1655
1655
  "magnus",
1656
1656
  "mimalloc",
1657
+ "num-traits",
1657
1658
  "polars 0.46.0",
1658
1659
  "polars-arrow",
1659
1660
  "polars-core",
@@ -2545,15 +2546,14 @@ dependencies = [
2545
2546
 
2546
2547
  [[package]]
2547
2548
  name = "ring"
2548
- version = "0.17.8"
2549
+ version = "0.17.13"
2549
2550
  source = "registry+https://github.com/rust-lang/crates.io-index"
2550
- checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
2551
+ checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
2551
2552
  dependencies = [
2552
2553
  "cc",
2553
2554
  "cfg-if",
2554
2555
  "getrandom",
2555
2556
  "libc",
2556
- "spin",
2557
2557
  "untrusted",
2558
2558
  "windows-sys 0.52.0",
2559
2559
  ]
@@ -2862,12 +2862,6 @@ dependencies = [
2862
2862
  "windows-sys 0.52.0",
2863
2863
  ]
2864
2864
 
2865
- [[package]]
2866
- name = "spin"
2867
- version = "0.9.8"
2868
- source = "registry+https://github.com/rust-lang/crates.io-index"
2869
- checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
2870
-
2871
2865
  [[package]]
2872
2866
  name = "sqlparser"
2873
2867
  version = "0.53.0"
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ruby Polars
2
2
 
3
- :fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
3
+ 🔥 Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
4
4
 
5
5
  [![Build Status](https://github.com/ankane/ruby-polars/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/ruby-polars/actions)
6
6
 
@@ -448,7 +448,7 @@ df.group_by("c").plot("a", "b", stacked: true)
448
448
 
449
449
  ## History
450
450
 
451
- View the [changelog](CHANGELOG.md)
451
+ View the [changelog](https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md)
452
452
 
453
453
  ## Contributing
454
454
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.17.0"
3
+ version = "0.17.1"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -17,6 +17,7 @@ bytes = "1"
17
17
  chrono = "0.4"
18
18
  either = "1.8"
19
19
  magnus = "0.7"
20
+ num-traits = "0.2"
20
21
  polars-core = "=0.46.0"
21
22
  polars-plan = "=0.46.0"
22
23
  polars-parquet = "=0.46.0"
@@ -63,6 +64,7 @@ features = [
63
64
  "gcp",
64
65
  "http",
65
66
  "interpolate",
67
+ "interpolate_by",
66
68
  "ipc",
67
69
  "ipc_streaming",
68
70
  "is_between",
@@ -17,7 +17,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
17
17
  }
18
18
  }
19
19
 
20
- impl<'s> TryConvert for Wrap<AnyValue<'s>> {
20
+ impl TryConvert for Wrap<AnyValue<'_>> {
21
21
  fn try_convert(ob: Value) -> RbResult<Self> {
22
22
  rb_object_to_any_value(ob, true).map(Wrap)
23
23
  }
@@ -1065,6 +1065,24 @@ impl TryConvert for Wrap<JoinValidation> {
1065
1065
  }
1066
1066
  }
1067
1067
 
1068
+ impl TryConvert for Wrap<MaintainOrderJoin> {
1069
+ fn try_convert(ob: Value) -> RbResult<Self> {
1070
+ let parsed = match String::try_convert(ob)?.as_str() {
1071
+ "none" => MaintainOrderJoin::None,
1072
+ "left" => MaintainOrderJoin::Left,
1073
+ "right" => MaintainOrderJoin::Right,
1074
+ "left_right" => MaintainOrderJoin::LeftRight,
1075
+ "right_left" => MaintainOrderJoin::RightLeft,
1076
+ v => {
1077
+ return Err(RbValueError::new_err(format!(
1078
+ "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1079
+ )))
1080
+ },
1081
+ };
1082
+ Ok(Wrap(parsed))
1083
+ }
1084
+ }
1085
+
1068
1086
  impl TryConvert for Wrap<QuoteStyle> {
1069
1087
  fn try_convert(ob: Value) -> RbResult<Self> {
1070
1088
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -295,11 +295,11 @@ impl RbDataFrame {
295
295
  Ok(())
296
296
  }
297
297
 
298
- pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
299
- let df = self.df.borrow().slice(
300
- offset as i64,
301
- length.unwrap_or_else(|| self.df.borrow().height()),
302
- );
298
+ pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
299
+ let df = self
300
+ .df
301
+ .borrow()
302
+ .slice(offset, length.unwrap_or_else(|| self.df.borrow().height()));
303
303
  df.into()
304
304
  }
305
305
 
@@ -671,6 +671,10 @@ impl RbExpr {
671
671
  self.inner.clone().interpolate(method.0).into()
672
672
  }
673
673
 
674
+ pub fn interpolate_by(&self, by: &Self) -> Self {
675
+ self.inner.clone().interpolate_by(by.inner.clone()).into()
676
+ }
677
+
674
678
  pub fn lower_bound(&self) -> Self {
675
679
  self.inner.clone().lower_bound().into()
676
680
  }
@@ -205,6 +205,21 @@ pub fn concat_lf_diagonal(
205
205
  Ok(lf.into())
206
206
  }
207
207
 
208
+ pub fn concat_lf_horizontal(lfs: RArray, parallel: bool) -> RbResult<RbLazyFrame> {
209
+ let iter = lfs.into_iter();
210
+
211
+ let lfs = iter.map(get_lf).collect::<RbResult<Vec<_>>>()?;
212
+
213
+ let args = UnionArgs {
214
+ rechunk: false, // No need to rechunk with horizontal concatenation
215
+ parallel,
216
+ to_supertypes: false,
217
+ ..Default::default()
218
+ };
219
+ let lf = dsl::functions::concat_lf_horizontal(lfs, args).map_err(RbPolarsErr::from)?;
220
+ Ok(lf.into())
221
+ }
222
+
208
223
  pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
209
224
  let dtypes = dtypes
210
225
  .into_iter()
@@ -1,2 +1,3 @@
1
+ pub mod numo_rs;
1
2
  pub mod to_numo_df;
2
3
  pub mod to_numo_series;
@@ -0,0 +1,52 @@
1
+ use magnus::{class, prelude::*, IntoValue, Module, RArray, RClass, RModule, Value};
2
+
3
+ use crate::RbResult;
4
+
5
+ pub trait Element: IntoValue {
6
+ fn class_name() -> &'static str;
7
+ }
8
+
9
+ macro_rules! create_element {
10
+ ($type:ty, $name:expr) => {
11
+ impl Element for $type {
12
+ fn class_name() -> &'static str {
13
+ $name
14
+ }
15
+ }
16
+ };
17
+ }
18
+
19
+ create_element!(i8, "Int8");
20
+ create_element!(i16, "Int16");
21
+ create_element!(i32, "Int32");
22
+ create_element!(i64, "Int64");
23
+ create_element!(u8, "UInt8");
24
+ create_element!(u16, "UInt16");
25
+ create_element!(u32, "UInt32");
26
+ create_element!(u64, "UInt64");
27
+ create_element!(f32, "SFloat");
28
+ create_element!(f64, "DFloat");
29
+ create_element!(bool, "Bit");
30
+
31
+ impl<T> Element for Option<T>
32
+ where
33
+ Option<T>: IntoValue,
34
+ {
35
+ fn class_name() -> &'static str {
36
+ "RObject"
37
+ }
38
+ }
39
+
40
+ pub struct RbArray1<T>(T);
41
+
42
+ impl<T: Element> RbArray1<T> {
43
+ pub fn from_iter<I>(values: I) -> RbResult<Value>
44
+ where
45
+ I: IntoIterator<Item = T>,
46
+ {
47
+ class::object()
48
+ .const_get::<_, RModule>("Numo")?
49
+ .const_get::<_, RClass>(T::class_name())?
50
+ .funcall("cast", (RArray::from_iter(values),))
51
+ }
52
+ }
@@ -1,61 +1,82 @@
1
- use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
- use polars::series::BitRepr;
1
+ use magnus::Value;
2
+ use num_traits::{Float, NumCast};
3
3
  use polars_core::prelude::*;
4
4
 
5
+ use super::numo_rs::{Element, RbArray1};
5
6
  use crate::error::RbPolarsErr;
6
7
  use crate::raise_err;
7
8
  use crate::series::RbSeries;
8
9
  use crate::RbResult;
9
10
 
10
11
  impl RbSeries {
11
- /// For numeric types, this should only be called for Series with null types.
12
- /// This will cast to floats so that `nil = NAN`
12
+ /// Convert this Series to a Numo array.
13
13
  pub fn to_numo(&self) -> RbResult<Value> {
14
- let s = &self.series.borrow();
15
- match s.dtype() {
16
- DataType::String => {
17
- let ca = s.str().unwrap();
14
+ series_to_numo(&self.series.borrow())
15
+ }
16
+ }
17
+
18
+ /// Convert a Series to a Numo array.
19
+ fn series_to_numo(s: &Series) -> RbResult<Value> {
20
+ series_to_numo_with_copy(s)
21
+ }
18
22
 
19
- // TODO make more efficient
20
- let np_arr = RArray::from_iter(ca);
21
- class::object()
22
- .const_get::<_, RModule>("Numo")?
23
- .const_get::<_, RClass>("RObject")?
24
- .funcall("cast", (np_arr,))
25
- }
26
- dt if dt.is_primitive_numeric() => {
27
- if let Some(BitRepr::Large(_)) = s.bit_repr() {
28
- let s = s.cast(&DataType::Float64).unwrap();
29
- let ca = s.f64().unwrap();
30
- // TODO make more efficient
31
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
32
- Some(v) => v,
33
- None => f64::NAN,
34
- }));
35
- class::object()
36
- .const_get::<_, RModule>("Numo")?
37
- .const_get::<_, RClass>("DFloat")?
38
- .funcall("cast", (np_arr,))
39
- } else {
40
- let s = s.cast(&DataType::Float32).unwrap();
41
- let ca = s.f32().unwrap();
42
- // TODO make more efficient
43
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
44
- Some(v) => v,
45
- None => f32::NAN,
46
- }));
47
- class::object()
48
- .const_get::<_, RModule>("Numo")?
49
- .const_get::<_, RClass>("SFloat")?
50
- .funcall("cast", (np_arr,))
51
- }
52
- }
53
- dt => {
54
- raise_err!(
55
- format!("'to_numo' not supported for dtype: {dt:?}"),
56
- ComputeError
57
- );
58
- }
23
+ /// Convert a Series to a Numo array, copying data in the process.
24
+ fn series_to_numo_with_copy(s: &Series) -> RbResult<Value> {
25
+ use DataType::*;
26
+ match s.dtype() {
27
+ Int8 => numeric_series_to_numpy::<Int8Type, f32>(s),
28
+ Int16 => numeric_series_to_numpy::<Int16Type, f32>(s),
29
+ Int32 => numeric_series_to_numpy::<Int32Type, f64>(s),
30
+ Int64 => numeric_series_to_numpy::<Int64Type, f64>(s),
31
+ UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(s),
32
+ UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(s),
33
+ UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(s),
34
+ UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(s),
35
+ Float32 => numeric_series_to_numpy::<Float32Type, f32>(s),
36
+ Float64 => numeric_series_to_numpy::<Float64Type, f64>(s),
37
+ Boolean => boolean_series_to_numo(s),
38
+ String => {
39
+ let ca = s.str().unwrap();
40
+ RbArray1::from_iter(ca)
41
+ }
42
+ dt => {
43
+ raise_err!(
44
+ format!("'to_numo' not supported for dtype: {dt:?}"),
45
+ ComputeError
46
+ );
59
47
  }
60
48
  }
61
49
  }
50
+
51
+ /// Convert numeric types to f32 or f64 with NaN representing a null value.
52
+ fn numeric_series_to_numpy<T, U>(s: &Series) -> RbResult<Value>
53
+ where
54
+ T: PolarsNumericType,
55
+ T::Native: Element,
56
+ U: Float + Element,
57
+ {
58
+ let ca: &ChunkedArray<T> = s.as_ref().as_ref();
59
+ if s.null_count() == 0 {
60
+ let values = ca.into_no_null_iter();
61
+ RbArray1::<T::Native>::from_iter(values)
62
+ } else {
63
+ let mapper = |opt_v: Option<T::Native>| match opt_v {
64
+ Some(v) => NumCast::from(v).unwrap(),
65
+ None => U::nan(),
66
+ };
67
+ let values = ca.iter().map(mapper);
68
+ RbArray1::from_iter(values)
69
+ }
70
+ }
71
+
72
+ /// Convert booleans to bit if no nulls are present, otherwise convert to objects.
73
+ fn boolean_series_to_numo(s: &Series) -> RbResult<Value> {
74
+ let ca = s.bool().unwrap();
75
+ if s.null_count() == 0 {
76
+ let values = ca.into_no_null_iter();
77
+ RbArray1::<bool>::from_iter(values)
78
+ } else {
79
+ let values = ca.iter();
80
+ RbArray1::from_iter(values)
81
+ }
82
+ }
@@ -431,27 +431,24 @@ impl RbLazyFrame {
431
431
  Ok(())
432
432
  }
433
433
 
434
- #[allow(clippy::too_many_arguments)]
435
- pub fn sink_csv(
436
- &self,
437
- path: PathBuf,
438
- include_bom: bool,
439
- include_header: bool,
440
- separator: u8,
441
- line_terminator: String,
442
- quote_char: u8,
443
- batch_size: Wrap<NonZeroUsize>,
444
- datetime_format: Option<String>,
445
- date_format: Option<String>,
446
- time_format: Option<String>,
447
- float_scientific: Option<bool>,
448
- float_precision: Option<usize>,
449
- null_value: Option<String>,
450
- quote_style: Option<Wrap<QuoteStyle>>,
451
- maintain_order: bool,
452
- ) -> RbResult<()> {
453
- // TODO
454
- let cloud_options = None;
434
+ pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<()> {
435
+ let path = PathBuf::try_convert(arguments[0])?;
436
+ let include_bom = bool::try_convert(arguments[1])?;
437
+ let include_header = bool::try_convert(arguments[2])?;
438
+ let separator = u8::try_convert(arguments[3])?;
439
+ let line_terminator = String::try_convert(arguments[4])?;
440
+ let quote_char = u8::try_convert(arguments[5])?;
441
+ let batch_size = Wrap::<NonZeroUsize>::try_convert(arguments[6])?;
442
+ let datetime_format = Option::<String>::try_convert(arguments[7])?;
443
+ let date_format = Option::<String>::try_convert(arguments[8])?;
444
+ let time_format = Option::<String>::try_convert(arguments[9])?;
445
+ let float_scientific = Option::<bool>::try_convert(arguments[10])?;
446
+ let float_precision = Option::<usize>::try_convert(arguments[11])?;
447
+ let null_value = Option::<String>::try_convert(arguments[12])?;
448
+ let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
449
+ let maintain_order = bool::try_convert(arguments[14])?;
450
+ let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
451
+ let retries = usize::try_convert(arguments[16])?;
455
452
 
456
453
  let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
457
454
  let null_value = null_value.unwrap_or(SerializeOptions::default().null);
@@ -480,7 +477,7 @@ impl RbLazyFrame {
480
477
  let cloud_options = {
481
478
  let cloud_options =
482
479
  parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
483
- Some(cloud_options)
480
+ Some(cloud_options.with_max_retries(retries))
484
481
  };
485
482
 
486
483
  let ldf = self.ldf.borrow().clone();
@@ -679,6 +676,7 @@ impl RbLazyFrame {
679
676
  how: Wrap<JoinType>,
680
677
  suffix: String,
681
678
  validate: Wrap<JoinValidation>,
679
+ maintain_order: Wrap<MaintainOrderJoin>,
682
680
  coalesce: Option<bool>,
683
681
  ) -> RbResult<Self> {
684
682
  let coalesce = match coalesce {
@@ -702,6 +700,7 @@ impl RbLazyFrame {
702
700
  .how(how.0)
703
701
  .validate(validate.0)
704
702
  .coalesce(coalesce)
703
+ .maintain_order(maintain_order.0)
705
704
  .suffix(suffix)
706
705
  .finish()
707
706
  .into())
@@ -430,6 +430,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
430
430
  class.define_method("mode", method!(RbExpr::mode, 0))?;
431
431
  class.define_method("exclude", method!(RbExpr::exclude, 1))?;
432
432
  class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
433
+ class.define_method("interpolate_by", method!(RbExpr::interpolate_by, 1))?;
433
434
  class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
434
435
  class.define_method("rolling_sum_by", method!(RbExpr::rolling_sum_by, 4))?;
435
436
  class.define_method("rolling_min", method!(RbExpr::rolling_min, 4))?;
@@ -614,6 +615,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
614
615
  "concat_lf_diagonal",
615
616
  function!(functions::lazy::concat_lf_diagonal, 4),
616
617
  )?;
618
+ class.define_singleton_method(
619
+ "concat_lf_horizontal",
620
+ function!(functions::lazy::concat_lf_horizontal, 2),
621
+ )?;
617
622
  class.define_singleton_method("concat_df", function!(functions::eager::concat_df, 1))?;
618
623
  class.define_singleton_method("concat_lf", function!(functions::lazy::concat_lf, 4))?;
619
624
  class.define_singleton_method(
@@ -745,7 +750,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
745
750
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
746
751
  class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
747
752
  class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
748
- class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
753
+ class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, -1))?;
749
754
  class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
750
755
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
751
756
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
@@ -759,7 +764,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
759
764
  )?;
760
765
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
761
766
  class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
762
- class.define_method("join", method!(RbLazyFrame::join, 10))?;
767
+ class.define_method("join", method!(RbLazyFrame::join, 11))?;
763
768
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
764
769
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
765
770
  class.define_method(