polars-df 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +667 -370
- data/ext/polars/Cargo.toml +8 -8
- data/ext/polars/src/conversion/mod.rs +20 -5
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -1
- data/ext/polars/src/expr/general.rs +12 -14
- data/ext/polars/src/expr/rolling.rs +17 -2
- data/ext/polars/src/file.rs +56 -14
- data/ext/polars/src/functions/lazy.rs +15 -2
- data/ext/polars/src/lazyframe/general.rs +85 -48
- data/ext/polars/src/lazyframe/mod.rs +2 -0
- data/ext/polars/src/lazyframe/sink.rs +99 -0
- data/ext/polars/src/lib.rs +4 -6
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +4 -4
- data/ext/polars/src/on_startup.rs +15 -3
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/series/general.rs +2 -2
- data/lib/polars/expr.rb +27 -19
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/lazy_frame.rb +78 -14
- data/lib/polars/list_expr.rb +4 -7
- data/lib/polars/series.rb +11 -9
- data/lib/polars/version.rb +1 -1
- metadata +4 -3
data/ext/polars/Cargo.toml
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.18.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
-
rust-version = "1.
|
7
|
+
rust-version = "1.85.0"
|
8
8
|
publish = false
|
9
9
|
|
10
10
|
[lib]
|
@@ -12,22 +12,22 @@ crate-type = ["cdylib"]
|
|
12
12
|
|
13
13
|
[dependencies]
|
14
14
|
ahash = "0.8"
|
15
|
-
arrow = { package = "polars-arrow", version = "=0.
|
15
|
+
arrow = { package = "polars-arrow", version = "=0.47.1" }
|
16
16
|
bytes = "1"
|
17
17
|
chrono = "0.4"
|
18
18
|
either = "1.8"
|
19
19
|
magnus = "0.7"
|
20
20
|
num-traits = "0.2"
|
21
|
-
polars-core = "=0.
|
22
|
-
polars-plan = "=0.
|
23
|
-
polars-parquet = "=0.
|
24
|
-
polars-utils = "=0.
|
21
|
+
polars-core = "=0.47.1"
|
22
|
+
polars-plan = "=0.47.1"
|
23
|
+
polars-parquet = "=0.47.1"
|
24
|
+
polars-utils = "=0.47.1"
|
25
25
|
rayon = "1.9"
|
26
26
|
regex = "1"
|
27
27
|
serde_json = "1"
|
28
28
|
|
29
29
|
[dependencies.polars]
|
30
|
-
version = "=0.
|
30
|
+
version = "=0.47.1"
|
31
31
|
features = [
|
32
32
|
"abs",
|
33
33
|
"approx_unique",
|
@@ -21,7 +21,7 @@ use polars::prelude::*;
|
|
21
21
|
use polars::series::ops::NullBehavior;
|
22
22
|
use polars_core::utils::arrow::array::Array;
|
23
23
|
use polars_core::utils::materialize_dyn_int;
|
24
|
-
use polars_plan::
|
24
|
+
use polars_plan::dsl::ScanSources;
|
25
25
|
use polars_utils::mmap::MemSlice;
|
26
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
27
27
|
|
@@ -219,7 +219,7 @@ impl IntoValue for Wrap<DataType> {
|
|
219
219
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
220
220
|
.unwrap()
|
221
221
|
}
|
222
|
-
DataType::Object(_
|
222
|
+
DataType::Object(_) => {
|
223
223
|
let class = pl.const_get::<_, Value>("Object").unwrap();
|
224
224
|
class.funcall("new", ()).unwrap()
|
225
225
|
}
|
@@ -332,7 +332,7 @@ impl TryConvert for Wrap<DataType> {
|
|
332
332
|
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
333
333
|
"Polars::Struct" => DataType::Struct(vec![]),
|
334
334
|
"Polars::Null" => DataType::Null,
|
335
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME
|
335
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME),
|
336
336
|
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
337
337
|
dt => {
|
338
338
|
return Err(RbValueError::new_err(format!(
|
@@ -408,7 +408,7 @@ impl TryConvert for Wrap<DataType> {
|
|
408
408
|
DataType::Struct(fields)
|
409
409
|
}
|
410
410
|
"Polars::Null" => DataType::Null,
|
411
|
-
"Object" => DataType::Object(OBJECT_NAME
|
411
|
+
"Object" => DataType::Object(OBJECT_NAME),
|
412
412
|
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
413
413
|
dt => {
|
414
414
|
return Err(RbTypeError::new_err(format!(
|
@@ -437,7 +437,7 @@ impl TryConvert for Wrap<DataType> {
|
|
437
437
|
"time" => DataType::Time,
|
438
438
|
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
439
439
|
"f64" => DataType::Float64,
|
440
|
-
"obj" => DataType::Object(OBJECT_NAME
|
440
|
+
"obj" => DataType::Object(OBJECT_NAME),
|
441
441
|
"list" => DataType::List(Box::new(DataType::Boolean)),
|
442
442
|
"null" => DataType::Null,
|
443
443
|
"unk" => DataType::Unknown(Default::default()),
|
@@ -761,6 +761,21 @@ impl TryConvert for Wrap<ClosedWindow> {
|
|
761
761
|
}
|
762
762
|
}
|
763
763
|
|
764
|
+
impl TryConvert for Wrap<RoundMode> {
|
765
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
766
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
767
|
+
"half_to_even" => RoundMode::HalfToEven,
|
768
|
+
"half_away_from_zero" => RoundMode::HalfAwayFromZero,
|
769
|
+
v => {
|
770
|
+
return Err(RbValueError::new_err(format!(
|
771
|
+
"`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
|
772
|
+
)));
|
773
|
+
}
|
774
|
+
};
|
775
|
+
Ok(Wrap(parsed))
|
776
|
+
}
|
777
|
+
}
|
778
|
+
|
764
779
|
impl TryConvert for Wrap<CsvEncoding> {
|
765
780
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
766
781
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -18,7 +18,7 @@ impl RbDataFrame {
|
|
18
18
|
.get_columns()
|
19
19
|
.iter()
|
20
20
|
.map(|s| match s.dtype() {
|
21
|
-
DataType::Object(_
|
21
|
+
DataType::Object(_) => {
|
22
22
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
23
23
|
obj.unwrap().to_value()
|
24
24
|
}
|
@@ -37,7 +37,7 @@ impl RbDataFrame {
|
|
37
37
|
.get_columns()
|
38
38
|
.iter()
|
39
39
|
.map(|s| match s.dtype() {
|
40
|
-
DataType::Object(_
|
40
|
+
DataType::Object(_) => {
|
41
41
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
42
42
|
obj.unwrap().to_value()
|
43
43
|
}
|
@@ -1,3 +1,5 @@
|
|
1
|
+
use std::hash::BuildHasher;
|
2
|
+
|
1
3
|
use either::Either;
|
2
4
|
use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
|
3
5
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
@@ -494,7 +496,8 @@ impl RbDataFrame {
|
|
494
496
|
}
|
495
497
|
|
496
498
|
pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
|
497
|
-
let
|
499
|
+
let seed = PlFixedStateQuality::default().hash_one((k0, k1, k2, k3));
|
500
|
+
let hb = PlSeedableRandomStateQuality::seed_from_u64(seed);
|
498
501
|
let hash = self
|
499
502
|
.df
|
500
503
|
.borrow_mut()
|
@@ -371,14 +371,6 @@ impl RbExpr {
|
|
371
371
|
.into())
|
372
372
|
}
|
373
373
|
|
374
|
-
pub fn backward_fill(&self, limit: FillNullLimit) -> Self {
|
375
|
-
self.inner.clone().backward_fill(limit).into()
|
376
|
-
}
|
377
|
-
|
378
|
-
pub fn forward_fill(&self, limit: FillNullLimit) -> Self {
|
379
|
-
self.inner.clone().forward_fill(limit).into()
|
380
|
-
}
|
381
|
-
|
382
374
|
pub fn shift(&self, n: &Self, fill_value: Option<&Self>) -> Self {
|
383
375
|
let expr = self.inner.clone();
|
384
376
|
let out = match fill_value {
|
@@ -497,8 +489,8 @@ impl RbExpr {
|
|
497
489
|
.into()
|
498
490
|
}
|
499
491
|
|
500
|
-
pub fn round(&self, decimals: u32) -> Self {
|
501
|
-
self.inner.clone().round(decimals).into()
|
492
|
+
pub fn round(&self, decimals: u32, mode: Wrap<RoundMode>) -> Self {
|
493
|
+
self.inner.clone().round(decimals, mode.0).into()
|
502
494
|
}
|
503
495
|
|
504
496
|
pub fn floor(&self) -> Self {
|
@@ -597,8 +589,11 @@ impl RbExpr {
|
|
597
589
|
self.inner.clone().or(expr.inner.clone()).into()
|
598
590
|
}
|
599
591
|
|
600
|
-
pub fn is_in(&self, expr: &Self) -> Self {
|
601
|
-
self.inner
|
592
|
+
pub fn is_in(&self, expr: &Self, nulls_equal: bool) -> Self {
|
593
|
+
self.inner
|
594
|
+
.clone()
|
595
|
+
.is_in(expr.inner.clone(), nulls_equal)
|
596
|
+
.into()
|
602
597
|
}
|
603
598
|
|
604
599
|
pub fn repeat_by(&self, by: &Self) -> Self {
|
@@ -698,8 +693,11 @@ impl RbExpr {
|
|
698
693
|
self.inner.clone().rank(options, seed).into()
|
699
694
|
}
|
700
695
|
|
701
|
-
pub fn diff(&self, n:
|
702
|
-
self.inner
|
696
|
+
pub fn diff(&self, n: &Self, null_behavior: Wrap<NullBehavior>) -> Self {
|
697
|
+
self.inner
|
698
|
+
.clone()
|
699
|
+
.diff(n.inner.clone(), null_behavior.0)
|
700
|
+
.into()
|
703
701
|
}
|
704
702
|
|
705
703
|
pub fn pct_change(&self, n: &Self) -> Self {
|
@@ -319,7 +319,22 @@ impl RbExpr {
|
|
319
319
|
.into()
|
320
320
|
}
|
321
321
|
|
322
|
-
pub fn rolling_skew(
|
323
|
-
self
|
322
|
+
pub fn rolling_skew(
|
323
|
+
&self,
|
324
|
+
window_size: usize,
|
325
|
+
bias: bool,
|
326
|
+
min_periods: Option<usize>,
|
327
|
+
center: bool,
|
328
|
+
) -> Self {
|
329
|
+
let min_periods = min_periods.unwrap_or(window_size);
|
330
|
+
let options = RollingOptionsFixedWindow {
|
331
|
+
window_size,
|
332
|
+
weights: None,
|
333
|
+
min_periods,
|
334
|
+
center,
|
335
|
+
fn_params: Some(RollingFnParams::Skew { bias }),
|
336
|
+
};
|
337
|
+
|
338
|
+
self.inner.clone().rolling_skew(options).into()
|
324
339
|
}
|
325
340
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -3,9 +3,12 @@ use std::io;
|
|
3
3
|
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
|
-
use magnus::{exception, prelude::*, Error, RString, Value};
|
6
|
+
use magnus::{exception, prelude::*, value::Opaque, Error, RString, Ruby, Value};
|
7
7
|
use polars::io::cloud::CloudOptions;
|
8
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars::prelude::file::DynWriteable;
|
10
|
+
use polars::prelude::sync_on_close::SyncOnCloseType;
|
11
|
+
use polars_utils::file::ClosableFile;
|
9
12
|
use polars_utils::mmap::MemSlice;
|
10
13
|
|
11
14
|
use crate::error::RbPolarsErr;
|
@@ -14,7 +17,22 @@ use crate::RbResult;
|
|
14
17
|
|
15
18
|
#[derive(Clone)]
|
16
19
|
pub struct RbFileLikeObject {
|
17
|
-
inner: Value
|
20
|
+
inner: Opaque<Value>,
|
21
|
+
}
|
22
|
+
|
23
|
+
impl DynWriteable for RbFileLikeObject {
|
24
|
+
fn as_dyn_write(&self) -> &(dyn io::Write + Send + 'static) {
|
25
|
+
self as _
|
26
|
+
}
|
27
|
+
fn as_mut_dyn_write(&mut self) -> &mut (dyn io::Write + Send + 'static) {
|
28
|
+
self as _
|
29
|
+
}
|
30
|
+
fn close(self: Box<Self>) -> io::Result<()> {
|
31
|
+
Ok(())
|
32
|
+
}
|
33
|
+
fn sync_on_close(&mut self, _sync_on_close: SyncOnCloseType) -> io::Result<()> {
|
34
|
+
Ok(())
|
35
|
+
}
|
18
36
|
}
|
19
37
|
|
20
38
|
/// Wraps a `Value`, and implements read, seek, and write for it.
|
@@ -23,7 +41,9 @@ impl RbFileLikeObject {
|
|
23
41
|
/// To assert the object has the required methods methods,
|
24
42
|
/// instantiate it with `RbFileLikeObject::require`
|
25
43
|
pub fn new(object: Value) -> Self {
|
26
|
-
RbFileLikeObject {
|
44
|
+
RbFileLikeObject {
|
45
|
+
inner: object.into(),
|
46
|
+
}
|
27
47
|
}
|
28
48
|
|
29
49
|
pub fn as_bytes(&self) -> bytes::Bytes {
|
@@ -31,8 +51,9 @@ impl RbFileLikeObject {
|
|
31
51
|
}
|
32
52
|
|
33
53
|
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
|
34
|
-
let bytes =
|
35
|
-
.
|
54
|
+
let bytes = Ruby::get()
|
55
|
+
.unwrap()
|
56
|
+
.get_inner(self.inner)
|
36
57
|
.funcall::<_, _, RString>("read", ())
|
37
58
|
.expect("no read method found");
|
38
59
|
|
@@ -77,8 +98,9 @@ fn rberr_to_io_err(e: Error) -> io::Error {
|
|
77
98
|
|
78
99
|
impl Read for RbFileLikeObject {
|
79
100
|
fn read(&mut self, mut buf: &mut [u8]) -> Result<usize, io::Error> {
|
80
|
-
let bytes =
|
81
|
-
.
|
101
|
+
let bytes = Ruby::get()
|
102
|
+
.unwrap()
|
103
|
+
.get_inner(self.inner)
|
82
104
|
.funcall::<_, _, RString>("read", (buf.len(),))
|
83
105
|
.map_err(rberr_to_io_err)?;
|
84
106
|
|
@@ -92,8 +114,9 @@ impl Write for RbFileLikeObject {
|
|
92
114
|
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
93
115
|
let rbbytes = RString::from_slice(buf);
|
94
116
|
|
95
|
-
let number_bytes_written =
|
96
|
-
.
|
117
|
+
let number_bytes_written = Ruby::get()
|
118
|
+
.unwrap()
|
119
|
+
.get_inner(self.inner)
|
97
120
|
.funcall::<_, _, usize>("write", (rbbytes,))
|
98
121
|
.map_err(rberr_to_io_err)?;
|
99
122
|
|
@@ -101,7 +124,9 @@ impl Write for RbFileLikeObject {
|
|
101
124
|
}
|
102
125
|
|
103
126
|
fn flush(&mut self) -> Result<(), io::Error> {
|
104
|
-
|
127
|
+
Ruby::get()
|
128
|
+
.unwrap()
|
129
|
+
.get_inner(self.inner)
|
105
130
|
.funcall::<_, _, Value>("flush", ())
|
106
131
|
.map_err(rberr_to_io_err)?;
|
107
132
|
|
@@ -117,8 +142,9 @@ impl Seek for RbFileLikeObject {
|
|
117
142
|
SeekFrom::End(i) => (2, i),
|
118
143
|
};
|
119
144
|
|
120
|
-
let new_position =
|
121
|
-
.
|
145
|
+
let new_position = Ruby::get()
|
146
|
+
.unwrap()
|
147
|
+
.get_inner(self.inner)
|
122
148
|
.funcall("seek", (offset, whence))
|
123
149
|
.map_err(rberr_to_io_err)?;
|
124
150
|
|
@@ -129,11 +155,12 @@ impl Seek for RbFileLikeObject {
|
|
129
155
|
pub trait FileLike: Read + Write + Seek {}
|
130
156
|
|
131
157
|
impl FileLike for File {}
|
158
|
+
impl FileLike for ClosableFile {}
|
132
159
|
impl FileLike for RbFileLikeObject {}
|
133
160
|
|
134
161
|
pub enum EitherRustRubyFile {
|
135
162
|
Rb(RbFileLikeObject),
|
136
|
-
Rust(
|
163
|
+
Rust(ClosableFile),
|
137
164
|
}
|
138
165
|
|
139
166
|
impl EitherRustRubyFile {
|
@@ -144,6 +171,13 @@ impl EitherRustRubyFile {
|
|
144
171
|
}
|
145
172
|
}
|
146
173
|
|
174
|
+
pub(crate) fn into_writeable(self) -> Box<dyn DynWriteable> {
|
175
|
+
match self {
|
176
|
+
Self::Rb(f) => Box::new(f),
|
177
|
+
Self::Rust(f) => Box::new(f),
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
147
181
|
pub fn into_dyn_writeable(self) -> Box<dyn Write> {
|
148
182
|
match self {
|
149
183
|
EitherRustRubyFile::Rb(f) => Box::new(f),
|
@@ -159,6 +193,14 @@ pub enum RubyScanSourceInput {
|
|
159
193
|
File(File),
|
160
194
|
}
|
161
195
|
|
196
|
+
pub(crate) fn try_get_rbfile(
|
197
|
+
rb_f: Value,
|
198
|
+
write: bool,
|
199
|
+
) -> RbResult<(EitherRustRubyFile, Option<PathBuf>)> {
|
200
|
+
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
201
|
+
Ok((EitherRustRubyFile::Rb(f), None))
|
202
|
+
}
|
203
|
+
|
162
204
|
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
163
205
|
if let Ok(file_path) = PathBuf::try_convert(rb_f) {
|
164
206
|
// TODO resolve_homedir
|
@@ -184,7 +226,7 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
184
226
|
} else {
|
185
227
|
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
|
186
228
|
};
|
187
|
-
Ok(EitherRustRubyFile::Rust(f))
|
229
|
+
Ok(EitherRustRubyFile::Rust(f.into()))
|
188
230
|
} else {
|
189
231
|
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
|
190
232
|
Ok(EitherRustRubyFile::Rb(f))
|
@@ -278,7 +278,13 @@ pub fn first() -> RbExpr {
|
|
278
278
|
dsl::first().into()
|
279
279
|
}
|
280
280
|
|
281
|
-
pub fn fold(
|
281
|
+
pub fn fold(
|
282
|
+
acc: &RbExpr,
|
283
|
+
lambda: Value,
|
284
|
+
exprs: RArray,
|
285
|
+
returns_scalar: bool,
|
286
|
+
return_dtype: Option<Wrap<DataType>>,
|
287
|
+
) -> RbResult<RbExpr> {
|
282
288
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
283
289
|
let lambda = Opaque::from(lambda);
|
284
290
|
|
@@ -290,7 +296,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
290
296
|
)
|
291
297
|
.map(|v| v.map(Column::from))
|
292
298
|
};
|
293
|
-
Ok(dsl::fold_exprs(
|
299
|
+
Ok(dsl::fold_exprs(
|
300
|
+
acc.inner.clone(),
|
301
|
+
func,
|
302
|
+
exprs,
|
303
|
+
returns_scalar,
|
304
|
+
return_dtype.map(|w| w.0),
|
305
|
+
)
|
306
|
+
.into())
|
294
307
|
}
|
295
308
|
|
296
309
|
pub fn last() -> RbExpr {
|
@@ -2,12 +2,13 @@ use magnus::{r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, TryConv
|
|
2
2
|
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
|
-
use polars_plan::
|
5
|
+
use polars_plan::dsl::ScanSources;
|
6
6
|
use std::cell::RefCell;
|
7
7
|
use std::io::BufWriter;
|
8
8
|
use std::num::NonZeroUsize;
|
9
9
|
use std::path::PathBuf;
|
10
10
|
|
11
|
+
use super::SinkTarget;
|
11
12
|
use crate::conversion::*;
|
12
13
|
use crate::expr::rb_exprs_to_exprs;
|
13
14
|
use crate::file::get_file_like;
|
@@ -374,16 +375,16 @@ impl RbLazyFrame {
|
|
374
375
|
#[allow(clippy::too_many_arguments)]
|
375
376
|
pub fn sink_parquet(
|
376
377
|
&self,
|
377
|
-
|
378
|
+
target: SinkTarget,
|
378
379
|
compression: String,
|
379
380
|
compression_level: Option<i32>,
|
380
381
|
statistics: Wrap<StatisticsOptions>,
|
381
382
|
row_group_size: Option<usize>,
|
382
383
|
data_page_size: Option<usize>,
|
383
|
-
maintain_order: bool,
|
384
384
|
cloud_options: Option<Vec<(String, String)>>,
|
385
385
|
retries: usize,
|
386
|
-
|
386
|
+
sink_options: Wrap<SinkOptions>,
|
387
|
+
) -> RbResult<RbLazyFrame> {
|
387
388
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
388
389
|
|
389
390
|
let options = ParquetWriteOptions {
|
@@ -391,48 +392,67 @@ impl RbLazyFrame {
|
|
391
392
|
statistics: statistics.0,
|
392
393
|
row_group_size,
|
393
394
|
data_page_size,
|
394
|
-
maintain_order,
|
395
395
|
};
|
396
396
|
|
397
|
-
let cloud_options = {
|
398
|
-
|
399
|
-
|
400
|
-
|
397
|
+
let cloud_options = match target.base_path() {
|
398
|
+
None => None,
|
399
|
+
Some(base_path) => {
|
400
|
+
let cloud_options = parse_cloud_options(
|
401
|
+
base_path.to_str().unwrap(),
|
402
|
+
cloud_options.unwrap_or_default(),
|
403
|
+
)?;
|
404
|
+
Some(cloud_options.with_max_retries(retries))
|
405
|
+
}
|
401
406
|
};
|
402
407
|
|
403
408
|
let ldf = self.ldf.borrow().clone();
|
404
|
-
|
405
|
-
|
406
|
-
|
409
|
+
match target {
|
410
|
+
SinkTarget::File(target) => {
|
411
|
+
ldf.sink_parquet(target, options, cloud_options, sink_options.0)
|
412
|
+
}
|
413
|
+
}
|
414
|
+
.map_err(RbPolarsErr::from)
|
415
|
+
.map(Into::into)
|
416
|
+
.map_err(Into::into)
|
407
417
|
}
|
408
418
|
|
409
419
|
pub fn sink_ipc(
|
410
420
|
&self,
|
411
|
-
|
421
|
+
target: SinkTarget,
|
412
422
|
compression: Option<Wrap<IpcCompression>>,
|
413
|
-
maintain_order: bool,
|
414
423
|
cloud_options: Option<Vec<(String, String)>>,
|
415
424
|
retries: usize,
|
416
|
-
|
425
|
+
sink_options: Wrap<SinkOptions>,
|
426
|
+
) -> RbResult<RbLazyFrame> {
|
417
427
|
let options = IpcWriterOptions {
|
418
428
|
compression: compression.map(|c| c.0),
|
419
|
-
|
429
|
+
..Default::default()
|
420
430
|
};
|
421
431
|
|
422
|
-
let cloud_options = {
|
423
|
-
|
424
|
-
|
425
|
-
|
432
|
+
let cloud_options = match target.base_path() {
|
433
|
+
None => None,
|
434
|
+
Some(base_path) => {
|
435
|
+
let cloud_options = parse_cloud_options(
|
436
|
+
base_path.to_str().unwrap(),
|
437
|
+
cloud_options.unwrap_or_default(),
|
438
|
+
)?;
|
439
|
+
Some(cloud_options.with_max_retries(retries))
|
440
|
+
}
|
426
441
|
};
|
427
442
|
|
428
443
|
let ldf = self.ldf.borrow().clone();
|
429
|
-
|
430
|
-
|
431
|
-
|
444
|
+
match target {
|
445
|
+
SinkTarget::File(target) => {
|
446
|
+
ldf.sink_ipc(target, options, cloud_options, sink_options.0)
|
447
|
+
}
|
448
|
+
}
|
449
|
+
.map_err(RbPolarsErr::from)
|
450
|
+
.map(Into::into)
|
451
|
+
.map_err(Into::into)
|
432
452
|
}
|
433
453
|
|
434
|
-
pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<
|
435
|
-
let
|
454
|
+
pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
|
455
|
+
let target = SinkTarget::try_convert(arguments[0])?;
|
436
456
|
let include_bom = bool::try_convert(arguments[1])?;
|
437
457
|
let include_header = bool::try_convert(arguments[2])?;
|
438
458
|
let separator = u8::try_convert(arguments[3])?;
|
@@ -446,9 +466,9 @@ impl RbLazyFrame {
|
|
446
466
|
let float_precision = Option::<usize>::try_convert(arguments[11])?;
|
447
467
|
let null_value = Option::<String>::try_convert(arguments[12])?;
|
448
468
|
let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
|
449
|
-
let
|
450
|
-
let
|
451
|
-
let
|
469
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[14])?;
|
470
|
+
let retries = usize::try_convert(arguments[15])?;
|
471
|
+
let sink_options = Wrap::<SinkOptions>::try_convert(arguments[16])?;
|
452
472
|
|
453
473
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
454
474
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
@@ -469,42 +489,59 @@ impl RbLazyFrame {
|
|
469
489
|
let options = CsvWriterOptions {
|
470
490
|
include_bom,
|
471
491
|
include_header,
|
472
|
-
maintain_order,
|
473
492
|
batch_size: batch_size.0,
|
474
493
|
serialize_options,
|
475
494
|
};
|
476
495
|
|
477
|
-
let cloud_options = {
|
478
|
-
|
479
|
-
|
480
|
-
|
496
|
+
let cloud_options = match target.base_path() {
|
497
|
+
None => None,
|
498
|
+
Some(base_path) => {
|
499
|
+
let cloud_options = parse_cloud_options(
|
500
|
+
base_path.to_str().unwrap(),
|
501
|
+
cloud_options.unwrap_or_default(),
|
502
|
+
)?;
|
503
|
+
Some(cloud_options.with_max_retries(retries))
|
504
|
+
}
|
481
505
|
};
|
482
506
|
|
483
507
|
let ldf = self.ldf.borrow().clone();
|
484
|
-
|
485
|
-
|
486
|
-
|
508
|
+
match target {
|
509
|
+
SinkTarget::File(target) => {
|
510
|
+
ldf.sink_csv(target, options, cloud_options, sink_options.0)
|
511
|
+
}
|
512
|
+
}
|
513
|
+
.map_err(RbPolarsErr::from)
|
514
|
+
.map(Into::into)
|
515
|
+
.map_err(Into::into)
|
487
516
|
}
|
488
517
|
|
489
518
|
pub fn sink_json(
|
490
519
|
&self,
|
491
|
-
|
492
|
-
maintain_order: bool,
|
520
|
+
target: SinkTarget,
|
493
521
|
cloud_options: Option<Vec<(String, String)>>,
|
494
522
|
retries: usize,
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
Some(
|
523
|
+
sink_options: Wrap<SinkOptions>,
|
524
|
+
) -> RbResult<RbLazyFrame> {
|
525
|
+
let options = JsonWriterOptions {};
|
526
|
+
|
527
|
+
let cloud_options = match target.base_path() {
|
528
|
+
None => None,
|
529
|
+
Some(base_path) => {
|
530
|
+
let cloud_options = parse_cloud_options(
|
531
|
+
base_path.to_str().unwrap(),
|
532
|
+
cloud_options.unwrap_or_default(),
|
533
|
+
)?;
|
534
|
+
Some(cloud_options.with_max_retries(retries))
|
535
|
+
}
|
502
536
|
};
|
503
537
|
|
504
538
|
let ldf = self.ldf.borrow().clone();
|
505
|
-
|
506
|
-
.
|
507
|
-
|
539
|
+
match target {
|
540
|
+
SinkTarget::File(path) => ldf.sink_json(path, options, cloud_options, sink_options.0),
|
541
|
+
}
|
542
|
+
.map_err(RbPolarsErr::from)
|
543
|
+
.map(Into::into)
|
544
|
+
.map_err(Into::into)
|
508
545
|
}
|
509
546
|
|
510
547
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|