polars-df 0.14.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
|
|
9
9
|
use crate::conversion::{parse_fill_null_strategy, Wrap};
|
10
10
|
use crate::map::lazy::map_single;
|
11
11
|
use crate::rb_exprs_to_exprs;
|
12
|
-
use crate::utils::reinterpret;
|
13
12
|
use crate::{RbExpr, RbResult};
|
14
13
|
|
15
14
|
impl RbExpr {
|
@@ -165,7 +164,7 @@ impl RbExpr {
|
|
165
164
|
self.inner.clone().implode().into()
|
166
165
|
}
|
167
166
|
|
168
|
-
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<
|
167
|
+
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
|
169
168
|
self.inner
|
170
169
|
.clone()
|
171
170
|
.quantile(quantile.inner.clone(), interpolation.0)
|
@@ -272,6 +271,7 @@ impl RbExpr {
|
|
272
271
|
nulls_last,
|
273
272
|
multithreaded: true,
|
274
273
|
maintain_order: false,
|
274
|
+
limit: None,
|
275
275
|
})
|
276
276
|
.into()
|
277
277
|
}
|
@@ -284,6 +284,7 @@ impl RbExpr {
|
|
284
284
|
nulls_last,
|
285
285
|
multithreaded: true,
|
286
286
|
maintain_order: false,
|
287
|
+
limit: None,
|
287
288
|
})
|
288
289
|
.into()
|
289
290
|
}
|
@@ -364,6 +365,7 @@ impl RbExpr {
|
|
364
365
|
nulls_last,
|
365
366
|
multithreaded,
|
366
367
|
maintain_order,
|
368
|
+
limit: None,
|
367
369
|
},
|
368
370
|
)
|
369
371
|
.into())
|
@@ -463,14 +465,7 @@ impl RbExpr {
|
|
463
465
|
}
|
464
466
|
|
465
467
|
pub fn gather_every(&self, n: usize, offset: usize) -> Self {
|
466
|
-
self.clone()
|
467
|
-
.inner
|
468
|
-
.map(
|
469
|
-
move |s: Series| Ok(Some(s.gather_every(n, offset))),
|
470
|
-
GetOutput::same_type(),
|
471
|
-
)
|
472
|
-
.with_fmt("gather_every")
|
473
|
-
.into()
|
468
|
+
self.inner.clone().gather_every(n, offset).into()
|
474
469
|
}
|
475
470
|
|
476
471
|
pub fn tail(&self, n: Option<usize>) -> Self {
|
@@ -644,8 +639,16 @@ impl RbExpr {
|
|
644
639
|
output_type: Option<Wrap<DataType>>,
|
645
640
|
agg_list: bool,
|
646
641
|
is_elementwise: bool,
|
642
|
+
returns_scalar: bool,
|
647
643
|
) -> Self {
|
648
|
-
map_single(
|
644
|
+
map_single(
|
645
|
+
self,
|
646
|
+
lambda,
|
647
|
+
output_type,
|
648
|
+
agg_list,
|
649
|
+
is_elementwise,
|
650
|
+
returns_scalar,
|
651
|
+
)
|
649
652
|
}
|
650
653
|
|
651
654
|
pub fn dot(&self, other: &Self) -> Self {
|
@@ -653,16 +656,7 @@ impl RbExpr {
|
|
653
656
|
}
|
654
657
|
|
655
658
|
pub fn reinterpret(&self, signed: bool) -> Self {
|
656
|
-
|
657
|
-
let dt = if signed {
|
658
|
-
DataType::Int64
|
659
|
-
} else {
|
660
|
-
DataType::UInt64
|
661
|
-
};
|
662
|
-
self.clone()
|
663
|
-
.inner
|
664
|
-
.map(function, GetOutput::from_type(dt))
|
665
|
-
.into()
|
659
|
+
self.inner.clone().reinterpret(signed).into()
|
666
660
|
}
|
667
661
|
|
668
662
|
pub fn mode(&self) -> Self {
|
@@ -717,7 +711,7 @@ impl RbExpr {
|
|
717
711
|
}
|
718
712
|
|
719
713
|
pub fn reshape(&self, dims: Vec<i64>) -> Self {
|
720
|
-
self.inner.clone().reshape(&dims
|
714
|
+
self.inner.clone().reshape(&dims).into()
|
721
715
|
}
|
722
716
|
|
723
717
|
pub fn cum_count(&self, reverse: bool) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, value::Opaque, Ruby, Value};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -179,23 +179,30 @@ impl RbExpr {
|
|
179
179
|
pub fn list_to_struct(
|
180
180
|
&self,
|
181
181
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
182
|
-
|
182
|
+
name_gen: Option<Value>,
|
183
183
|
upper_bound: usize,
|
184
184
|
) -> RbResult<Self> {
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
185
|
+
let name_gen = name_gen.map(|lambda| {
|
186
|
+
let lambda = Opaque::from(lambda);
|
187
|
+
Arc::new(move |idx: usize| {
|
188
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
189
|
+
let out: String = lambda.funcall("call", (idx,)).unwrap();
|
190
|
+
PlSmallStr::from_string(out)
|
191
|
+
});
|
192
|
+
|
193
|
+
// non-Ruby thread
|
194
|
+
todo!();
|
195
|
+
});
|
193
196
|
|
194
197
|
Ok(self
|
195
198
|
.inner
|
196
199
|
.clone()
|
197
200
|
.list()
|
198
|
-
.to_struct(
|
201
|
+
.to_struct(ListToStructArgs::InferWidth {
|
202
|
+
infer_field_strategy: width_strat.0,
|
203
|
+
get_index_name: name_gen,
|
204
|
+
max_fields: upper_bound,
|
205
|
+
})
|
199
206
|
.into())
|
200
207
|
}
|
201
208
|
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -84,13 +84,17 @@ impl RbExpr {
|
|
84
84
|
self.inner.clone().meta()._into_selector().into()
|
85
85
|
}
|
86
86
|
|
87
|
-
|
87
|
+
fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
|
88
88
|
let e = self
|
89
89
|
.inner
|
90
90
|
.clone()
|
91
91
|
.meta()
|
92
|
-
.into_tree_formatter()
|
92
|
+
.into_tree_formatter(display_as_dot)
|
93
93
|
.map_err(RbPolarsErr::from)?;
|
94
94
|
Ok(format!("{e}"))
|
95
95
|
}
|
96
|
+
|
97
|
+
pub fn meta_tree_format(&self) -> RbResult<String> {
|
98
|
+
self.compute_tree_format(false)
|
99
|
+
}
|
96
100
|
}
|
@@ -1,5 +1,4 @@
|
|
1
1
|
use polars::prelude::*;
|
2
|
-
use std::any::Any;
|
3
2
|
|
4
3
|
use crate::conversion::Wrap;
|
5
4
|
use crate::RbExpr;
|
@@ -169,7 +168,7 @@ impl RbExpr {
|
|
169
168
|
weights,
|
170
169
|
min_periods,
|
171
170
|
center,
|
172
|
-
fn_params: Some(
|
171
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
173
172
|
};
|
174
173
|
|
175
174
|
self.inner.clone().rolling_std(options).into()
|
@@ -187,7 +186,7 @@ impl RbExpr {
|
|
187
186
|
window_size: Duration::parse(&window_size),
|
188
187
|
min_periods,
|
189
188
|
closed_window: closed.0,
|
190
|
-
fn_params: Some(
|
189
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
191
190
|
};
|
192
191
|
|
193
192
|
self.inner
|
@@ -210,7 +209,7 @@ impl RbExpr {
|
|
210
209
|
weights,
|
211
210
|
min_periods,
|
212
211
|
center,
|
213
|
-
fn_params: Some(
|
212
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
214
213
|
};
|
215
214
|
|
216
215
|
self.inner.clone().rolling_var(options).into()
|
@@ -228,7 +227,7 @@ impl RbExpr {
|
|
228
227
|
window_size: Duration::parse(&window_size),
|
229
228
|
min_periods,
|
230
229
|
closed_window: closed.0,
|
231
|
-
fn_params: Some(
|
230
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
232
231
|
};
|
233
232
|
|
234
233
|
self.inner
|
@@ -277,7 +276,7 @@ impl RbExpr {
|
|
277
276
|
pub fn rolling_quantile(
|
278
277
|
&self,
|
279
278
|
quantile: f64,
|
280
|
-
interpolation: Wrap<
|
279
|
+
interpolation: Wrap<QuantileMethod>,
|
281
280
|
window_size: usize,
|
282
281
|
weights: Option<Vec<f64>>,
|
283
282
|
min_periods: Option<usize>,
|
@@ -302,7 +301,7 @@ impl RbExpr {
|
|
302
301
|
&self,
|
303
302
|
by: &RbExpr,
|
304
303
|
quantile: f64,
|
305
|
-
interpolation: Wrap<
|
304
|
+
interpolation: Wrap<QuantileMethod>,
|
306
305
|
window_size: String,
|
307
306
|
min_periods: usize,
|
308
307
|
closed: Wrap<ClosedWindow>,
|
@@ -130,6 +130,11 @@ impl RbExpr {
|
|
130
130
|
self.inner.clone().str().to_lowercase().into()
|
131
131
|
}
|
132
132
|
|
133
|
+
// requires nightly
|
134
|
+
// pub fn str_to_titlecase(&self) -> Self {
|
135
|
+
// self.inner.clone().str().to_titlecase().into()
|
136
|
+
// }
|
137
|
+
|
133
138
|
pub fn str_len_bytes(&self) -> Self {
|
134
139
|
self.inner.clone().str().len_bytes().into()
|
135
140
|
}
|
@@ -200,51 +205,19 @@ impl RbExpr {
|
|
200
205
|
}
|
201
206
|
|
202
207
|
pub fn str_hex_encode(&self) -> Self {
|
203
|
-
self.clone()
|
204
|
-
.inner
|
205
|
-
.map(
|
206
|
-
move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
|
207
|
-
GetOutput::same_type(),
|
208
|
-
)
|
209
|
-
.with_fmt("str.hex_encode")
|
210
|
-
.into()
|
208
|
+
self.inner.clone().str().hex_encode().into()
|
211
209
|
}
|
212
210
|
|
213
211
|
pub fn str_hex_decode(&self, strict: bool) -> Self {
|
214
|
-
self.clone()
|
215
|
-
.inner
|
216
|
-
.map(
|
217
|
-
move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
218
|
-
GetOutput::same_type(),
|
219
|
-
)
|
220
|
-
.with_fmt("str.hex_decode")
|
221
|
-
.into()
|
212
|
+
self.inner.clone().str().hex_decode(strict).into()
|
222
213
|
}
|
223
214
|
|
224
215
|
pub fn str_base64_encode(&self) -> Self {
|
225
|
-
self.clone()
|
226
|
-
.inner
|
227
|
-
.map(
|
228
|
-
move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
|
229
|
-
GetOutput::same_type(),
|
230
|
-
)
|
231
|
-
.with_fmt("str.base64_encode")
|
232
|
-
.into()
|
216
|
+
self.inner.clone().str().base64_encode().into()
|
233
217
|
}
|
234
218
|
|
235
219
|
pub fn str_base64_decode(&self, strict: bool) -> Self {
|
236
|
-
self.clone()
|
237
|
-
.inner
|
238
|
-
.map(
|
239
|
-
move |s| {
|
240
|
-
s.str()?
|
241
|
-
.base64_decode(strict)
|
242
|
-
.map(|s| Some(s.into_series()))
|
243
|
-
},
|
244
|
-
GetOutput::same_type(),
|
245
|
-
)
|
246
|
-
.with_fmt("str.base64_decode")
|
247
|
-
.into()
|
220
|
+
self.inner.clone().str().base64_decode(strict).into()
|
248
221
|
}
|
249
222
|
|
250
223
|
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
use std::fs::File;
|
2
2
|
use std::io;
|
3
|
-
use std::io::{
|
3
|
+
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
6
|
use magnus::{exception, prelude::*, Error, RString, Value};
|
7
|
+
use polars::io::cloud::CloudOptions;
|
7
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars_utils::mmap::MemSlice;
|
8
10
|
|
9
11
|
use crate::error::RbPolarsErr;
|
10
12
|
use crate::prelude::resolve_homedir;
|
@@ -24,9 +26,8 @@ impl RbFileLikeObject {
|
|
24
26
|
RbFileLikeObject { inner: object }
|
25
27
|
}
|
26
28
|
|
27
|
-
pub fn
|
28
|
-
|
29
|
-
std::io::Cursor::new(data)
|
29
|
+
pub fn as_bytes(&self) -> bytes::Bytes {
|
30
|
+
self.as_file_buffer().into_inner().into()
|
30
31
|
}
|
31
32
|
|
32
33
|
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
|
@@ -132,7 +133,42 @@ impl FileLike for RbFileLikeObject {}
|
|
132
133
|
|
133
134
|
pub enum EitherRustRubyFile {
|
134
135
|
Rb(RbFileLikeObject),
|
135
|
-
Rust(
|
136
|
+
Rust(File),
|
137
|
+
}
|
138
|
+
|
139
|
+
impl EitherRustRubyFile {
|
140
|
+
pub fn into_dyn(self) -> Box<dyn FileLike> {
|
141
|
+
match self {
|
142
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
143
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
144
|
+
}
|
145
|
+
}
|
146
|
+
|
147
|
+
pub fn into_dyn_writeable(self) -> Box<dyn Write> {
|
148
|
+
match self {
|
149
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
150
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
pub enum RubyScanSourceInput {
|
156
|
+
Buffer(MemSlice),
|
157
|
+
Path(PathBuf),
|
158
|
+
#[allow(dead_code)]
|
159
|
+
File(File),
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
163
|
+
if let Ok(file_path) = PathBuf::try_convert(rb_f) {
|
164
|
+
// TODO resolve_homedir
|
165
|
+
Ok(RubyScanSourceInput::Path(file_path))
|
166
|
+
} else {
|
167
|
+
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
168
|
+
Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
|
169
|
+
f.as_bytes(),
|
170
|
+
)))
|
171
|
+
}
|
136
172
|
}
|
137
173
|
|
138
174
|
///
|
@@ -142,14 +178,13 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
142
178
|
if let Ok(rstring) = RString::try_convert(rb_f) {
|
143
179
|
let s = unsafe { rstring.as_str() }?;
|
144
180
|
let file_path = std::path::Path::new(&s);
|
145
|
-
let file_path = resolve_homedir(file_path);
|
181
|
+
let file_path = resolve_homedir(&file_path);
|
146
182
|
let f = if truncate {
|
147
|
-
File::create(file_path).map_err(RbPolarsErr::
|
183
|
+
File::create(file_path).map_err(RbPolarsErr::from)?
|
148
184
|
} else {
|
149
185
|
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
|
150
186
|
};
|
151
|
-
|
152
|
-
Ok(EitherRustRubyFile::Rust(reader))
|
187
|
+
Ok(EitherRustRubyFile::Rust(f))
|
153
188
|
} else {
|
154
189
|
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
|
155
190
|
Ok(EitherRustRubyFile::Rb(f))
|
@@ -157,21 +192,41 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
157
192
|
}
|
158
193
|
|
159
194
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
|
160
|
-
|
161
|
-
match get_either_file(f, truncate)? {
|
162
|
-
Rb(f) => Ok(Box::new(f)),
|
163
|
-
Rust(f) => Ok(Box::new(f.into_inner())),
|
164
|
-
}
|
195
|
+
Ok(get_either_file(f, truncate)?.into_dyn())
|
165
196
|
}
|
166
197
|
|
167
|
-
pub
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
198
|
+
pub enum RbReadBytes {
|
199
|
+
Bytes(RString),
|
200
|
+
Other(Value),
|
201
|
+
}
|
202
|
+
|
203
|
+
pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
|
204
|
+
rb_f.funcall("read", ())
|
205
|
+
.map(RbReadBytes::Bytes)
|
206
|
+
.unwrap_or(RbReadBytes::Other(rb_f))
|
207
|
+
}
|
208
|
+
|
209
|
+
pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
|
210
|
+
get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
|
211
|
+
}
|
212
|
+
|
213
|
+
pub fn get_mmap_bytes_reader_and_path<'a>(
|
214
|
+
rb_f: &'a RbReadBytes,
|
215
|
+
) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
|
216
|
+
match rb_f {
|
217
|
+
RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
|
218
|
+
RbReadBytes::Other(v) => {
|
219
|
+
let path = PathBuf::try_convert(*v)?;
|
220
|
+
let f = File::open(&path)
|
221
|
+
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
222
|
+
Ok((Box::new(f), Some(path)))
|
223
|
+
}
|
176
224
|
}
|
177
225
|
}
|
226
|
+
|
227
|
+
pub fn try_get_writeable(
|
228
|
+
rb_f: Value,
|
229
|
+
_cloud_options: Option<&CloudOptions>,
|
230
|
+
) -> RbResult<Box<dyn Write>> {
|
231
|
+
Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
|
232
|
+
}
|
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
|
28
28
|
Ok(e.into())
|
29
29
|
}
|
30
30
|
|
31
|
-
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
32
32
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
34
34
|
Ok(e.into())
|
35
35
|
}
|
36
36
|
|
37
|
-
pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
37
|
+
pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
38
38
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
39
|
-
let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
39
|
+
let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
40
40
|
Ok(e.into())
|
41
41
|
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
// TODO add to Ruby
|
6
|
+
pub fn business_day_count(
|
7
|
+
start: &RbExpr,
|
8
|
+
end: &RbExpr,
|
9
|
+
week_mask: [bool; 7],
|
10
|
+
holidays: Vec<i32>,
|
11
|
+
) -> RbExpr {
|
12
|
+
let start = start.inner.clone();
|
13
|
+
let end = end.inner.clone();
|
14
|
+
dsl::business_day_count(start, end, week_mask, holidays).into()
|
15
|
+
}
|
@@ -1,34 +1,55 @@
|
|
1
|
+
use std::io::BufReader;
|
2
|
+
|
1
3
|
use magnus::{RHash, Value};
|
4
|
+
use polars::prelude::ArrowSchema;
|
5
|
+
use polars_core::datatypes::create_enum_dtype;
|
6
|
+
use polars_core::export::arrow::array::Utf8ViewArray;
|
2
7
|
|
3
8
|
use crate::conversion::Wrap;
|
4
|
-
use crate::file::
|
5
|
-
use crate::prelude::
|
9
|
+
use crate::file::{get_either_file, EitherRustRubyFile};
|
10
|
+
use crate::prelude::ArrowDataType;
|
6
11
|
use crate::{RbPolarsErr, RbResult};
|
7
12
|
|
8
13
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
14
|
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
|
-
let
|
11
|
-
|
15
|
+
let metadata = match get_either_file(rb_f, false)? {
|
16
|
+
EitherRustRubyFile::Rust(r) => {
|
17
|
+
read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
18
|
+
}
|
19
|
+
EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
20
|
+
};
|
12
21
|
|
13
22
|
let dict = RHash::new();
|
14
|
-
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
-
dict.aset(field.name.as_str(), dt)?;
|
17
|
-
}
|
23
|
+
fields_to_rbdict(&metadata.schema, &dict)?;
|
18
24
|
Ok(dict)
|
19
25
|
}
|
20
26
|
|
21
27
|
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
28
|
use polars_parquet::read::{infer_schema, read_metadata};
|
23
29
|
|
24
|
-
let
|
25
|
-
|
30
|
+
let metadata = match get_either_file(rb_f, false)? {
|
31
|
+
EitherRustRubyFile::Rust(r) => {
|
32
|
+
read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
33
|
+
}
|
34
|
+
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
35
|
+
};
|
26
36
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
37
|
|
28
38
|
let dict = RHash::new();
|
29
|
-
|
30
|
-
|
39
|
+
fields_to_rbdict(&arrow_schema, &dict)?;
|
40
|
+
Ok(dict)
|
41
|
+
}
|
42
|
+
|
43
|
+
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
|
+
for field in schema.iter_values() {
|
45
|
+
let dt = if field.is_enum() {
|
46
|
+
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
+
ArrowDataType::Utf8View,
|
48
|
+
)))
|
49
|
+
} else {
|
50
|
+
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
+
};
|
31
52
|
dict.aset(field.name.as_str(), dt)?;
|
32
53
|
}
|
33
|
-
Ok(
|
54
|
+
Ok(())
|
34
55
|
}
|
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
|
|
70
70
|
nulls_last,
|
71
71
|
multithreaded,
|
72
72
|
maintain_order,
|
73
|
+
limit: None,
|
73
74
|
},
|
74
75
|
)
|
75
76
|
.into())
|
@@ -94,10 +95,7 @@ pub fn col(name: String) -> RbExpr {
|
|
94
95
|
}
|
95
96
|
|
96
97
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
97
|
-
let lfs = lfs
|
98
|
-
.into_iter()
|
99
|
-
.map(<&RbLazyFrame>::try_convert)
|
100
|
-
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
98
|
+
let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
|
101
99
|
|
102
100
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
103
101
|
let df = lf.ldf.borrow().clone().collect().unwrap();
|
@@ -173,8 +171,14 @@ pub fn cum_fold(
|
|
173
171
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
174
172
|
let lambda = Opaque::from(lambda);
|
175
173
|
|
176
|
-
let func =
|
177
|
-
|
174
|
+
let func = move |a: Column, b: Column| {
|
175
|
+
binary_lambda(
|
176
|
+
Ruby::get().unwrap().get_inner(lambda),
|
177
|
+
a.take_materialized_series(),
|
178
|
+
b.take_materialized_series(),
|
179
|
+
)
|
180
|
+
.map(|v| v.map(Column::from))
|
181
|
+
};
|
178
182
|
Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
179
183
|
}
|
180
184
|
|
@@ -263,8 +267,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
263
267
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
264
268
|
let lambda = Opaque::from(lambda);
|
265
269
|
|
266
|
-
let func =
|
267
|
-
|
270
|
+
let func = move |a: Column, b: Column| {
|
271
|
+
binary_lambda(
|
272
|
+
Ruby::get().unwrap().get_inner(lambda),
|
273
|
+
a.take_materialized_series(),
|
274
|
+
b.take_materialized_series(),
|
275
|
+
)
|
276
|
+
.map(|v| v.map(Column::from))
|
277
|
+
};
|
268
278
|
Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
269
279
|
}
|
270
280
|
|
@@ -311,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
311
321
|
}
|
312
322
|
}
|
313
323
|
|
314
|
-
pub fn pearson_corr(a: &RbExpr, b: &RbExpr
|
315
|
-
dsl::pearson_corr(a.inner.clone(), b.inner.clone()
|
324
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
325
|
+
dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
|
316
326
|
}
|
317
327
|
|
318
328
|
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
@@ -336,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
336
346
|
Ok(dsl::repeat(value, n).into())
|
337
347
|
}
|
338
348
|
|
339
|
-
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr,
|
340
|
-
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(),
|
349
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
|
350
|
+
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
|
341
351
|
}
|
342
352
|
|
343
353
|
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod to_ruby;
|