polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -1,28 +1,55 @@
|
|
|
1
|
+
mod exitable;
|
|
1
2
|
mod general;
|
|
2
3
|
mod optflags;
|
|
3
4
|
mod serde;
|
|
4
5
|
mod sink;
|
|
5
6
|
|
|
6
|
-
use
|
|
7
|
+
pub use exitable::RbInProcessQuery;
|
|
8
|
+
use magnus::{TryConvert, Value};
|
|
9
|
+
use parking_lot::RwLock;
|
|
10
|
+
use polars::prelude::{Engine, LazyFrame, OptFlags};
|
|
7
11
|
pub use sink::SinkTarget;
|
|
8
|
-
|
|
12
|
+
|
|
13
|
+
use crate::prelude::Wrap;
|
|
14
|
+
use crate::{RbResult, RbValueError};
|
|
9
15
|
|
|
10
16
|
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
|
11
|
-
#[
|
|
17
|
+
#[repr(transparent)]
|
|
12
18
|
pub struct RbLazyFrame {
|
|
13
|
-
pub ldf:
|
|
19
|
+
pub ldf: RwLock<LazyFrame>,
|
|
14
20
|
}
|
|
15
21
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
22
|
+
impl Clone for RbLazyFrame {
|
|
23
|
+
fn clone(&self) -> Self {
|
|
24
|
+
Self {
|
|
25
|
+
ldf: RwLock::new(self.ldf.read().clone()),
|
|
26
|
+
}
|
|
27
|
+
}
|
|
20
28
|
}
|
|
21
29
|
|
|
22
30
|
impl From<LazyFrame> for RbLazyFrame {
|
|
23
31
|
fn from(ldf: LazyFrame) -> Self {
|
|
24
32
|
RbLazyFrame {
|
|
25
|
-
ldf:
|
|
33
|
+
ldf: RwLock::new(ldf),
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
impl From<RbLazyFrame> for LazyFrame {
|
|
39
|
+
fn from(pldf: RbLazyFrame) -> Self {
|
|
40
|
+
pldf.ldf.into_inner()
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
#[magnus::wrap(class = "Polars::RbOptFlags")]
|
|
45
|
+
pub struct RbOptFlags {
|
|
46
|
+
pub inner: RwLock<OptFlags>,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
impl Clone for RbOptFlags {
|
|
50
|
+
fn clone(&self) -> Self {
|
|
51
|
+
Self {
|
|
52
|
+
inner: RwLock::new(*self.inner.read()),
|
|
26
53
|
}
|
|
27
54
|
}
|
|
28
55
|
}
|
|
@@ -30,7 +57,16 @@ impl From<LazyFrame> for RbLazyFrame {
|
|
|
30
57
|
impl From<OptFlags> for RbOptFlags {
|
|
31
58
|
fn from(inner: OptFlags) -> Self {
|
|
32
59
|
RbOptFlags {
|
|
33
|
-
inner:
|
|
60
|
+
inner: RwLock::new(inner),
|
|
34
61
|
}
|
|
35
62
|
}
|
|
36
63
|
}
|
|
64
|
+
|
|
65
|
+
impl TryConvert for Wrap<Engine> {
|
|
66
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
67
|
+
let parsed = String::try_convert(ob)?
|
|
68
|
+
.parse()
|
|
69
|
+
.map_err(RbValueError::new_err)?;
|
|
70
|
+
Ok(Wrap(parsed))
|
|
71
|
+
}
|
|
72
|
+
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
use parking_lot::RwLock;
|
|
1
2
|
use polars::prelude::OptFlags;
|
|
2
3
|
|
|
3
4
|
use super::RbOptFlags;
|
|
@@ -18,20 +19,20 @@ macro_rules! flag_getter_setters {
|
|
|
18
19
|
|
|
19
20
|
pub fn no_optimizations(&self) {
|
|
20
21
|
$(if $clear {
|
|
21
|
-
self.inner.
|
|
22
|
+
self.inner.write().remove(OptFlags::$flag);
|
|
22
23
|
})+
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
pub fn copy(&self) -> Self {
|
|
26
|
-
Self { inner: self.inner.clone() }
|
|
27
|
+
Self { inner: RwLock::new(self.inner.read().clone()) }
|
|
27
28
|
}
|
|
28
29
|
|
|
29
30
|
$(
|
|
30
31
|
pub fn $getter(&self) -> bool {
|
|
31
|
-
self.inner.
|
|
32
|
+
self.inner.read().contains(OptFlags::$flag)
|
|
32
33
|
}
|
|
33
34
|
pub fn $setter(&self, value: bool) {
|
|
34
|
-
self.inner.
|
|
35
|
+
self.inner.write().set(OptFlags::$flag, value)
|
|
35
36
|
}
|
|
36
37
|
)+
|
|
37
38
|
}
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
use std::io::Read;
|
|
2
1
|
#[cfg(feature = "serialize_binary")]
|
|
3
|
-
use std::io::
|
|
2
|
+
use std::io::BufReader;
|
|
3
|
+
use std::io::{BufWriter, Read};
|
|
4
4
|
|
|
5
5
|
use magnus::Value;
|
|
6
6
|
use polars::lazy::frame::LazyFrame;
|
|
7
7
|
use polars::prelude::*;
|
|
8
8
|
|
|
9
|
+
use crate::exceptions::ComputeError;
|
|
9
10
|
use crate::file::get_file_like;
|
|
10
11
|
#[cfg(feature = "serialize_binary")]
|
|
11
12
|
use crate::utils::to_rb_err;
|
|
@@ -17,12 +18,19 @@ impl RbLazyFrame {
|
|
|
17
18
|
let file = get_file_like(rb_f, true)?;
|
|
18
19
|
let writer = BufWriter::new(file);
|
|
19
20
|
self.ldf
|
|
20
|
-
.
|
|
21
|
+
.read()
|
|
21
22
|
.logical_plan
|
|
22
23
|
.serialize_versioned(writer, Default::default())
|
|
23
24
|
.map_err(to_rb_err)
|
|
24
25
|
}
|
|
25
26
|
|
|
27
|
+
pub fn serialize_json(&self, rb_f: Value) -> RbResult<()> {
|
|
28
|
+
let file = get_file_like(rb_f, true)?;
|
|
29
|
+
let writer = BufWriter::new(file);
|
|
30
|
+
serde_json::to_writer(writer, &self.ldf.read().logical_plan)
|
|
31
|
+
.map_err(|err| ComputeError::new_err(err.to_string()))
|
|
32
|
+
}
|
|
33
|
+
|
|
26
34
|
#[cfg(feature = "serialize_binary")]
|
|
27
35
|
pub fn deserialize_binary(rb_f: Value) -> RbResult<Self> {
|
|
28
36
|
let file = get_file_like(rb_f, false)?;
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
use std::sync::{Arc, Mutex};
|
|
2
2
|
|
|
3
|
-
use magnus::{RHash, TryConvert, Value};
|
|
3
|
+
use magnus::{RHash, Ruby, TryConvert, Value};
|
|
4
4
|
use polars::prelude::sync_on_close::SyncOnCloseType;
|
|
5
5
|
use polars::prelude::{PlPath, SinkOptions, SpecialEq};
|
|
6
6
|
use polars_utils::plpath::PlPathRef;
|
|
7
7
|
|
|
8
8
|
use crate::prelude::Wrap;
|
|
9
|
+
use crate::utils::RubyAttach;
|
|
9
10
|
use crate::{RbResult, RbValueError};
|
|
10
11
|
|
|
11
12
|
#[derive(Clone)]
|
|
@@ -18,10 +19,14 @@ impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
|
|
|
18
19
|
if let Ok(v) = String::try_convert(ob) {
|
|
19
20
|
Ok(Wrap(polars::prelude::SinkTarget::Path(PlPath::new(&v))))
|
|
20
21
|
} else {
|
|
21
|
-
let writer = {
|
|
22
|
+
let writer = Ruby::attach(|rb| {
|
|
22
23
|
let rb_f = ob;
|
|
23
|
-
RbResult::Ok(
|
|
24
|
-
|
|
24
|
+
RbResult::Ok(
|
|
25
|
+
crate::file::try_get_rbfile(rb, rb_f, true)?
|
|
26
|
+
.0
|
|
27
|
+
.into_writeable(),
|
|
28
|
+
)
|
|
29
|
+
})?;
|
|
25
30
|
|
|
26
31
|
Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new(
|
|
27
32
|
Arc::new(Mutex::new(Some(writer))),
|
|
@@ -71,7 +76,7 @@ impl TryConvert for Wrap<SinkOptions> {
|
|
|
71
76
|
|
|
72
77
|
if parsed.len() != 3 {
|
|
73
78
|
return Err(RbValueError::new_err(
|
|
74
|
-
"`sink_options` must be a
|
|
79
|
+
"`sink_options` must be a hash with the exactly 3 field.",
|
|
75
80
|
));
|
|
76
81
|
}
|
|
77
82
|
|
|
@@ -1,29 +1,28 @@
|
|
|
1
1
|
use magnus::RArray;
|
|
2
2
|
use polars::lazy::frame::LazyGroupBy;
|
|
3
|
-
use std::cell::RefCell;
|
|
4
3
|
|
|
5
|
-
use crate::expr::
|
|
4
|
+
use crate::expr::ToExprs;
|
|
6
5
|
use crate::{RbLazyFrame, RbResult};
|
|
7
6
|
|
|
8
7
|
#[magnus::wrap(class = "Polars::RbLazyGroupBy")]
|
|
9
8
|
pub struct RbLazyGroupBy {
|
|
10
|
-
pub lgb:
|
|
9
|
+
pub lgb: Option<LazyGroupBy>,
|
|
11
10
|
}
|
|
12
11
|
|
|
13
12
|
impl RbLazyGroupBy {
|
|
14
13
|
pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
|
|
15
|
-
let lgb = self.lgb.
|
|
16
|
-
let aggs =
|
|
14
|
+
let lgb = self.lgb.clone().unwrap();
|
|
15
|
+
let aggs = aggs.to_exprs()?;
|
|
17
16
|
Ok(lgb.agg(aggs).into())
|
|
18
17
|
}
|
|
19
18
|
|
|
20
19
|
pub fn head(&self, n: usize) -> RbLazyFrame {
|
|
21
|
-
let lgb = self.lgb.
|
|
20
|
+
let lgb = self.lgb.clone().unwrap();
|
|
22
21
|
lgb.head(Some(n)).into()
|
|
23
22
|
}
|
|
24
23
|
|
|
25
24
|
pub fn tail(&self, n: usize) -> RbLazyFrame {
|
|
26
|
-
let lgb = self.lgb.
|
|
25
|
+
let lgb = self.lgb.clone().unwrap();
|
|
27
26
|
lgb.tail(Some(n)).into()
|
|
28
27
|
}
|
|
29
28
|
}
|