polars-df 0.16.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/Cargo.lock +225 -238
- data/LICENSE.txt +1 -1
- data/README.md +4 -4
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/conversion/any_value.rs +2 -1
- data/ext/polars/src/conversion/mod.rs +22 -0
- data/ext/polars/src/dataframe/general.rs +5 -5
- data/ext/polars/src/dataframe/io.rs +8 -14
- data/ext/polars/src/expr/general.rs +4 -0
- data/ext/polars/src/functions/io.rs +2 -2
- data/ext/polars/src/functions/lazy.rs +15 -0
- data/ext/polars/src/interop/numo/mod.rs +1 -0
- data/ext/polars/src/interop/numo/numo_rs.rs +52 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +69 -48
- data/ext/polars/src/lazyframe/general.rs +25 -22
- data/ext/polars/src/lib.rs +9 -4
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/series/export.rs +1 -0
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/scatter.rs +1 -1
- data/lib/polars/data_frame.rb +321 -23
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/expr.rb +31 -0
- data/lib/polars/functions/eager.rb +145 -16
- data/lib/polars/io/database.rb +17 -0
- data/lib/polars/lazy_frame.rb +74 -9
- data/lib/polars/schema.rb +29 -0
- data/lib/polars/series.rb +31 -24
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +4 -2
data/LICENSE.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
|
-
Copyright (c) 2022-
|
2
|
+
Copyright (c) 2022-2025 Andrew Kane
|
3
3
|
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Ruby Polars
|
2
2
|
|
3
|
-
|
3
|
+
🔥 Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
|
4
4
|
|
5
5
|
[](https://github.com/ankane/ruby-polars/actions)
|
6
6
|
|
@@ -88,7 +88,7 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
-
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental]
|
92
92
|
|
93
93
|
```ruby
|
94
94
|
Polars.read_delta("./table")
|
@@ -365,7 +365,7 @@ Avro
|
|
365
365
|
df.write_avro("file.avro")
|
366
366
|
```
|
367
367
|
|
368
|
-
Delta Lake [experimental
|
368
|
+
Delta Lake [experimental]
|
369
369
|
|
370
370
|
```ruby
|
371
371
|
df.write_delta("./table")
|
@@ -448,7 +448,7 @@ df.group_by("c").plot("a", "b", stacked: true)
|
|
448
448
|
|
449
449
|
## History
|
450
450
|
|
451
|
-
View the [changelog](CHANGELOG.md)
|
451
|
+
View the [changelog](https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md)
|
452
452
|
|
453
453
|
## Contributing
|
454
454
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.17.1"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,20 +12,22 @@ crate-type = ["cdylib"]
|
|
12
12
|
|
13
13
|
[dependencies]
|
14
14
|
ahash = "0.8"
|
15
|
-
arrow = { package = "polars-arrow", version = "=0.
|
15
|
+
arrow = { package = "polars-arrow", version = "=0.46.0" }
|
16
16
|
bytes = "1"
|
17
17
|
chrono = "0.4"
|
18
18
|
either = "1.8"
|
19
19
|
magnus = "0.7"
|
20
|
-
|
21
|
-
polars-
|
22
|
-
polars-
|
23
|
-
polars-
|
20
|
+
num-traits = "0.2"
|
21
|
+
polars-core = "=0.46.0"
|
22
|
+
polars-plan = "=0.46.0"
|
23
|
+
polars-parquet = "=0.46.0"
|
24
|
+
polars-utils = "=0.46.0"
|
25
|
+
rayon = "1.9"
|
24
26
|
regex = "1"
|
25
27
|
serde_json = "1"
|
26
28
|
|
27
29
|
[dependencies.polars]
|
28
|
-
version = "=0.
|
30
|
+
version = "=0.46.0"
|
29
31
|
features = [
|
30
32
|
"abs",
|
31
33
|
"approx_unique",
|
@@ -62,6 +64,7 @@ features = [
|
|
62
64
|
"gcp",
|
63
65
|
"http",
|
64
66
|
"interpolate",
|
67
|
+
"interpolate_by",
|
65
68
|
"ipc",
|
66
69
|
"ipc_streaming",
|
67
70
|
"is_between",
|
@@ -17,7 +17,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
17
17
|
}
|
18
18
|
}
|
19
19
|
|
20
|
-
impl
|
20
|
+
impl TryConvert for Wrap<AnyValue<'_>> {
|
21
21
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
22
22
|
rb_object_to_any_value(ob, true).map(Wrap)
|
23
23
|
}
|
@@ -33,6 +33,7 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
33
33
|
AnyValue::Int16(v) => ruby.into_value(v),
|
34
34
|
AnyValue::Int32(v) => ruby.into_value(v),
|
35
35
|
AnyValue::Int64(v) => ruby.into_value(v),
|
36
|
+
AnyValue::Int128(_v) => todo!(),
|
36
37
|
AnyValue::Float32(v) => ruby.into_value(v),
|
37
38
|
AnyValue::Float64(v) => ruby.into_value(v),
|
38
39
|
AnyValue::Null => ruby.qnil().as_value(),
|
@@ -146,6 +146,10 @@ impl IntoValue for Wrap<DataType> {
|
|
146
146
|
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
147
147
|
class.funcall("new", ()).unwrap()
|
148
148
|
}
|
149
|
+
DataType::Int128 => {
|
150
|
+
let class = pl.const_get::<_, Value>("Int128").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
149
153
|
DataType::UInt8 => {
|
150
154
|
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
151
155
|
class.funcall("new", ()).unwrap()
|
@@ -1061,6 +1065,24 @@ impl TryConvert for Wrap<JoinValidation> {
|
|
1061
1065
|
}
|
1062
1066
|
}
|
1063
1067
|
|
1068
|
+
impl TryConvert for Wrap<MaintainOrderJoin> {
|
1069
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1070
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1071
|
+
"none" => MaintainOrderJoin::None,
|
1072
|
+
"left" => MaintainOrderJoin::Left,
|
1073
|
+
"right" => MaintainOrderJoin::Right,
|
1074
|
+
"left_right" => MaintainOrderJoin::LeftRight,
|
1075
|
+
"right_left" => MaintainOrderJoin::RightLeft,
|
1076
|
+
v => {
|
1077
|
+
return Err(RbValueError::new_err(format!(
|
1078
|
+
"`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
|
1079
|
+
)))
|
1080
|
+
},
|
1081
|
+
};
|
1082
|
+
Ok(Wrap(parsed))
|
1083
|
+
}
|
1084
|
+
}
|
1085
|
+
|
1064
1086
|
impl TryConvert for Wrap<QuoteStyle> {
|
1065
1087
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1066
1088
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -295,11 +295,11 @@ impl RbDataFrame {
|
|
295
295
|
Ok(())
|
296
296
|
}
|
297
297
|
|
298
|
-
pub fn slice(&self, offset:
|
299
|
-
let df = self
|
300
|
-
|
301
|
-
|
302
|
-
|
298
|
+
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
|
299
|
+
let df = self
|
300
|
+
.df
|
301
|
+
.borrow()
|
302
|
+
.slice(offset, length.unwrap_or_else(|| self.df.borrow().height()));
|
303
303
|
df.into()
|
304
304
|
}
|
305
305
|
|
@@ -296,30 +296,24 @@ impl RbDataFrame {
|
|
296
296
|
Ok(())
|
297
297
|
}
|
298
298
|
|
299
|
-
pub fn write_json(&self, rb_f: Value
|
299
|
+
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
300
300
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
301
301
|
|
302
|
-
|
303
|
-
(
|
304
|
-
|
305
|
-
|
306
|
-
(true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
|
307
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
308
|
-
(false, _) => serde_json::to_writer(file, &*self.df.borrow())
|
309
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
310
|
-
};
|
311
|
-
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
302
|
+
JsonWriter::new(file)
|
303
|
+
.with_json_format(JsonFormat::Json)
|
304
|
+
.finish(&mut self.df.borrow_mut())
|
305
|
+
.map_err(RbPolarsErr::from)?;
|
312
306
|
Ok(())
|
313
307
|
}
|
314
308
|
|
315
309
|
pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
|
316
310
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
317
311
|
|
318
|
-
|
312
|
+
JsonWriter::new(file)
|
319
313
|
.with_json_format(JsonFormat::JsonLines)
|
320
|
-
.finish(&mut self.df.borrow_mut())
|
314
|
+
.finish(&mut self.df.borrow_mut())
|
315
|
+
.map_err(RbPolarsErr::from)?;
|
321
316
|
|
322
|
-
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
323
317
|
Ok(())
|
324
318
|
}
|
325
319
|
|
@@ -671,6 +671,10 @@ impl RbExpr {
|
|
671
671
|
self.inner.clone().interpolate(method.0).into()
|
672
672
|
}
|
673
673
|
|
674
|
+
pub fn interpolate_by(&self, by: &Self) -> Self {
|
675
|
+
self.inner.clone().interpolate_by(by.inner.clone()).into()
|
676
|
+
}
|
677
|
+
|
674
678
|
pub fn lower_bound(&self) -> Self {
|
675
679
|
self.inner.clone().lower_bound().into()
|
676
680
|
}
|
@@ -1,9 +1,9 @@
|
|
1
1
|
use std::io::BufReader;
|
2
2
|
|
3
|
+
use arrow::array::Utf8ViewArray;
|
3
4
|
use magnus::{RHash, Value};
|
4
5
|
use polars::prelude::ArrowSchema;
|
5
6
|
use polars_core::datatypes::create_enum_dtype;
|
6
|
-
use polars_core::export::arrow::array::Utf8ViewArray;
|
7
7
|
|
8
8
|
use crate::conversion::Wrap;
|
9
9
|
use crate::file::{get_either_file, EitherRustRubyFile};
|
@@ -11,7 +11,7 @@ use crate::prelude::ArrowDataType;
|
|
11
11
|
use crate::{RbPolarsErr, RbResult};
|
12
12
|
|
13
13
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
14
|
-
use
|
14
|
+
use arrow::io::ipc::read::read_file_metadata;
|
15
15
|
let metadata = match get_either_file(rb_f, false)? {
|
16
16
|
EitherRustRubyFile::Rust(r) => {
|
17
17
|
read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
@@ -205,6 +205,21 @@ pub fn concat_lf_diagonal(
|
|
205
205
|
Ok(lf.into())
|
206
206
|
}
|
207
207
|
|
208
|
+
pub fn concat_lf_horizontal(lfs: RArray, parallel: bool) -> RbResult<RbLazyFrame> {
|
209
|
+
let iter = lfs.into_iter();
|
210
|
+
|
211
|
+
let lfs = iter.map(get_lf).collect::<RbResult<Vec<_>>>()?;
|
212
|
+
|
213
|
+
let args = UnionArgs {
|
214
|
+
rechunk: false, // No need to rechunk with horizontal concatenation
|
215
|
+
parallel,
|
216
|
+
to_supertypes: false,
|
217
|
+
..Default::default()
|
218
|
+
};
|
219
|
+
let lf = dsl::functions::concat_lf_horizontal(lfs, args).map_err(RbPolarsErr::from)?;
|
220
|
+
Ok(lf.into())
|
221
|
+
}
|
222
|
+
|
208
223
|
pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
209
224
|
let dtypes = dtypes
|
210
225
|
.into_iter()
|
@@ -0,0 +1,52 @@
|
|
1
|
+
use magnus::{class, prelude::*, IntoValue, Module, RArray, RClass, RModule, Value};
|
2
|
+
|
3
|
+
use crate::RbResult;
|
4
|
+
|
5
|
+
pub trait Element: IntoValue {
|
6
|
+
fn class_name() -> &'static str;
|
7
|
+
}
|
8
|
+
|
9
|
+
macro_rules! create_element {
|
10
|
+
($type:ty, $name:expr) => {
|
11
|
+
impl Element for $type {
|
12
|
+
fn class_name() -> &'static str {
|
13
|
+
$name
|
14
|
+
}
|
15
|
+
}
|
16
|
+
};
|
17
|
+
}
|
18
|
+
|
19
|
+
create_element!(i8, "Int8");
|
20
|
+
create_element!(i16, "Int16");
|
21
|
+
create_element!(i32, "Int32");
|
22
|
+
create_element!(i64, "Int64");
|
23
|
+
create_element!(u8, "UInt8");
|
24
|
+
create_element!(u16, "UInt16");
|
25
|
+
create_element!(u32, "UInt32");
|
26
|
+
create_element!(u64, "UInt64");
|
27
|
+
create_element!(f32, "SFloat");
|
28
|
+
create_element!(f64, "DFloat");
|
29
|
+
create_element!(bool, "Bit");
|
30
|
+
|
31
|
+
impl<T> Element for Option<T>
|
32
|
+
where
|
33
|
+
Option<T>: IntoValue,
|
34
|
+
{
|
35
|
+
fn class_name() -> &'static str {
|
36
|
+
"RObject"
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
pub struct RbArray1<T>(T);
|
41
|
+
|
42
|
+
impl<T: Element> RbArray1<T> {
|
43
|
+
pub fn from_iter<I>(values: I) -> RbResult<Value>
|
44
|
+
where
|
45
|
+
I: IntoIterator<Item = T>,
|
46
|
+
{
|
47
|
+
class::object()
|
48
|
+
.const_get::<_, RModule>("Numo")?
|
49
|
+
.const_get::<_, RClass>(T::class_name())?
|
50
|
+
.funcall("cast", (RArray::from_iter(values),))
|
51
|
+
}
|
52
|
+
}
|
@@ -1,61 +1,82 @@
|
|
1
|
-
use magnus::
|
2
|
-
use
|
1
|
+
use magnus::Value;
|
2
|
+
use num_traits::{Float, NumCast};
|
3
3
|
use polars_core::prelude::*;
|
4
4
|
|
5
|
+
use super::numo_rs::{Element, RbArray1};
|
5
6
|
use crate::error::RbPolarsErr;
|
6
7
|
use crate::raise_err;
|
7
8
|
use crate::series::RbSeries;
|
8
9
|
use crate::RbResult;
|
9
10
|
|
10
11
|
impl RbSeries {
|
11
|
-
///
|
12
|
-
/// This will cast to floats so that `nil = NAN`
|
12
|
+
/// Convert this Series to a Numo array.
|
13
13
|
pub fn to_numo(&self) -> RbResult<Value> {
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
series_to_numo(&self.series.borrow())
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
/// Convert a Series to a Numo array.
|
19
|
+
fn series_to_numo(s: &Series) -> RbResult<Value> {
|
20
|
+
series_to_numo_with_copy(s)
|
21
|
+
}
|
18
22
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
44
|
-
Some(v) => v,
|
45
|
-
None => f32::NAN,
|
46
|
-
}));
|
47
|
-
class::object()
|
48
|
-
.const_get::<_, RModule>("Numo")?
|
49
|
-
.const_get::<_, RClass>("SFloat")?
|
50
|
-
.funcall("cast", (np_arr,))
|
51
|
-
}
|
52
|
-
}
|
53
|
-
dt => {
|
54
|
-
raise_err!(
|
55
|
-
format!("'to_numo' not supported for dtype: {dt:?}"),
|
56
|
-
ComputeError
|
57
|
-
);
|
58
|
-
}
|
23
|
+
/// Convert a Series to a Numo array, copying data in the process.
|
24
|
+
fn series_to_numo_with_copy(s: &Series) -> RbResult<Value> {
|
25
|
+
use DataType::*;
|
26
|
+
match s.dtype() {
|
27
|
+
Int8 => numeric_series_to_numpy::<Int8Type, f32>(s),
|
28
|
+
Int16 => numeric_series_to_numpy::<Int16Type, f32>(s),
|
29
|
+
Int32 => numeric_series_to_numpy::<Int32Type, f64>(s),
|
30
|
+
Int64 => numeric_series_to_numpy::<Int64Type, f64>(s),
|
31
|
+
UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(s),
|
32
|
+
UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(s),
|
33
|
+
UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(s),
|
34
|
+
UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(s),
|
35
|
+
Float32 => numeric_series_to_numpy::<Float32Type, f32>(s),
|
36
|
+
Float64 => numeric_series_to_numpy::<Float64Type, f64>(s),
|
37
|
+
Boolean => boolean_series_to_numo(s),
|
38
|
+
String => {
|
39
|
+
let ca = s.str().unwrap();
|
40
|
+
RbArray1::from_iter(ca)
|
41
|
+
}
|
42
|
+
dt => {
|
43
|
+
raise_err!(
|
44
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
45
|
+
ComputeError
|
46
|
+
);
|
59
47
|
}
|
60
48
|
}
|
61
49
|
}
|
50
|
+
|
51
|
+
/// Convert numeric types to f32 or f64 with NaN representing a null value.
|
52
|
+
fn numeric_series_to_numpy<T, U>(s: &Series) -> RbResult<Value>
|
53
|
+
where
|
54
|
+
T: PolarsNumericType,
|
55
|
+
T::Native: Element,
|
56
|
+
U: Float + Element,
|
57
|
+
{
|
58
|
+
let ca: &ChunkedArray<T> = s.as_ref().as_ref();
|
59
|
+
if s.null_count() == 0 {
|
60
|
+
let values = ca.into_no_null_iter();
|
61
|
+
RbArray1::<T::Native>::from_iter(values)
|
62
|
+
} else {
|
63
|
+
let mapper = |opt_v: Option<T::Native>| match opt_v {
|
64
|
+
Some(v) => NumCast::from(v).unwrap(),
|
65
|
+
None => U::nan(),
|
66
|
+
};
|
67
|
+
let values = ca.iter().map(mapper);
|
68
|
+
RbArray1::from_iter(values)
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
/// Convert booleans to bit if no nulls are present, otherwise convert to objects.
|
73
|
+
fn boolean_series_to_numo(s: &Series) -> RbResult<Value> {
|
74
|
+
let ca = s.bool().unwrap();
|
75
|
+
if s.null_count() == 0 {
|
76
|
+
let values = ca.into_no_null_iter();
|
77
|
+
RbArray1::<bool>::from_iter(values)
|
78
|
+
} else {
|
79
|
+
let values = ca.iter();
|
80
|
+
RbArray1::from_iter(values)
|
81
|
+
}
|
82
|
+
}
|
@@ -431,27 +431,24 @@ impl RbLazyFrame {
|
|
431
431
|
Ok(())
|
432
432
|
}
|
433
433
|
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
) -> RbResult<()> {
|
453
|
-
// TODO
|
454
|
-
let cloud_options = None;
|
434
|
+
pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<()> {
|
435
|
+
let path = PathBuf::try_convert(arguments[0])?;
|
436
|
+
let include_bom = bool::try_convert(arguments[1])?;
|
437
|
+
let include_header = bool::try_convert(arguments[2])?;
|
438
|
+
let separator = u8::try_convert(arguments[3])?;
|
439
|
+
let line_terminator = String::try_convert(arguments[4])?;
|
440
|
+
let quote_char = u8::try_convert(arguments[5])?;
|
441
|
+
let batch_size = Wrap::<NonZeroUsize>::try_convert(arguments[6])?;
|
442
|
+
let datetime_format = Option::<String>::try_convert(arguments[7])?;
|
443
|
+
let date_format = Option::<String>::try_convert(arguments[8])?;
|
444
|
+
let time_format = Option::<String>::try_convert(arguments[9])?;
|
445
|
+
let float_scientific = Option::<bool>::try_convert(arguments[10])?;
|
446
|
+
let float_precision = Option::<usize>::try_convert(arguments[11])?;
|
447
|
+
let null_value = Option::<String>::try_convert(arguments[12])?;
|
448
|
+
let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
|
449
|
+
let maintain_order = bool::try_convert(arguments[14])?;
|
450
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
|
451
|
+
let retries = usize::try_convert(arguments[16])?;
|
455
452
|
|
456
453
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
457
454
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
@@ -480,7 +477,7 @@ impl RbLazyFrame {
|
|
480
477
|
let cloud_options = {
|
481
478
|
let cloud_options =
|
482
479
|
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
483
|
-
Some(cloud_options)
|
480
|
+
Some(cloud_options.with_max_retries(retries))
|
484
481
|
};
|
485
482
|
|
486
483
|
let ldf = self.ldf.borrow().clone();
|
@@ -633,6 +630,8 @@ impl RbLazyFrame {
|
|
633
630
|
tolerance: Option<Wrap<AnyValue<'_>>>,
|
634
631
|
tolerance_str: Option<String>,
|
635
632
|
coalesce: bool,
|
633
|
+
allow_eq: bool,
|
634
|
+
check_sortedness: bool,
|
636
635
|
) -> RbResult<Self> {
|
637
636
|
let coalesce = if coalesce {
|
638
637
|
JoinCoalesce::CoalesceColumns
|
@@ -657,6 +656,8 @@ impl RbLazyFrame {
|
|
657
656
|
right_by: right_by.map(strings_to_pl_smallstr),
|
658
657
|
tolerance: tolerance.map(|t| t.0.into_static()),
|
659
658
|
tolerance_str: tolerance_str.map(|s| s.into()),
|
659
|
+
allow_eq,
|
660
|
+
check_sortedness,
|
660
661
|
}))
|
661
662
|
.suffix(suffix)
|
662
663
|
.finish()
|
@@ -675,6 +676,7 @@ impl RbLazyFrame {
|
|
675
676
|
how: Wrap<JoinType>,
|
676
677
|
suffix: String,
|
677
678
|
validate: Wrap<JoinValidation>,
|
679
|
+
maintain_order: Wrap<MaintainOrderJoin>,
|
678
680
|
coalesce: Option<bool>,
|
679
681
|
) -> RbResult<Self> {
|
680
682
|
let coalesce = match coalesce {
|
@@ -698,6 +700,7 @@ impl RbLazyFrame {
|
|
698
700
|
.how(how.0)
|
699
701
|
.validate(validate.0)
|
700
702
|
.coalesce(coalesce)
|
703
|
+
.maintain_order(maintain_order.0)
|
701
704
|
.suffix(suffix)
|
702
705
|
.finish()
|
703
706
|
.into())
|
data/ext/polars/src/lib.rs
CHANGED
@@ -69,7 +69,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
69
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
70
70
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
71
71
|
class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
|
72
|
-
class.define_method("write_json", method!(RbDataFrame::write_json,
|
72
|
+
class.define_method("write_json", method!(RbDataFrame::write_json, 1))?;
|
73
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
74
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
75
75
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
|
@@ -430,6 +430,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
430
430
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
431
431
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
432
432
|
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
433
|
+
class.define_method("interpolate_by", method!(RbExpr::interpolate_by, 1))?;
|
433
434
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
|
434
435
|
class.define_method("rolling_sum_by", method!(RbExpr::rolling_sum_by, 4))?;
|
435
436
|
class.define_method("rolling_min", method!(RbExpr::rolling_min, 4))?;
|
@@ -614,6 +615,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
614
615
|
"concat_lf_diagonal",
|
615
616
|
function!(functions::lazy::concat_lf_diagonal, 4),
|
616
617
|
)?;
|
618
|
+
class.define_singleton_method(
|
619
|
+
"concat_lf_horizontal",
|
620
|
+
function!(functions::lazy::concat_lf_horizontal, 2),
|
621
|
+
)?;
|
617
622
|
class.define_singleton_method("concat_df", function!(functions::eager::concat_df, 1))?;
|
618
623
|
class.define_singleton_method("concat_lf", function!(functions::lazy::concat_lf, 4))?;
|
619
624
|
class.define_singleton_method(
|
@@ -745,7 +750,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
745
750
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
746
751
|
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
|
747
752
|
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
|
748
|
-
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv,
|
753
|
+
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, -1))?;
|
749
754
|
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
|
750
755
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
751
756
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
@@ -758,8 +763,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
758
763
|
method!(RbLazyFrame::group_by_dynamic, 9),
|
759
764
|
)?;
|
760
765
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
761
|
-
class.define_method("join_asof", method!(RbLazyFrame::join_asof,
|
762
|
-
class.define_method("join", method!(RbLazyFrame::join,
|
766
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
|
767
|
+
class.define_method("join", method!(RbLazyFrame::join, 11))?;
|
763
768
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
764
769
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
765
770
|
class.define_method(
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -5,9 +5,9 @@ pub mod series;
|
|
5
5
|
use magnus::{prelude::*, RHash, Value};
|
6
6
|
use polars::chunked_array::builder::get_list_builder;
|
7
7
|
use polars::prelude::*;
|
8
|
-
use polars_core::export::rayon::prelude::*;
|
9
8
|
use polars_core::utils::CustomIterTools;
|
10
9
|
use polars_core::POOL;
|
10
|
+
use rayon::prelude::*;
|
11
11
|
|
12
12
|
use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, Wrap};
|
13
13
|
|
@@ -21,6 +21,7 @@ impl RbSeries {
|
|
21
21
|
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
22
|
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
23
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Int128 => todo!(),
|
24
25
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
26
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
27
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
@@ -1,7 +1,7 @@
|
|
1
|
+
use arrow::array::Array;
|
2
|
+
use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
1
3
|
use magnus::prelude::*;
|
2
4
|
use magnus::Value;
|
3
|
-
use polars::export::arrow::array::Array;
|
4
|
-
use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
5
5
|
use polars::prelude::*;
|
6
6
|
|
7
7
|
use super::RbSeries;
|