polars-df 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
|
|
9
9
|
use crate::conversion::{parse_fill_null_strategy, Wrap};
|
10
10
|
use crate::map::lazy::map_single;
|
11
11
|
use crate::rb_exprs_to_exprs;
|
12
|
-
use crate::utils::reinterpret;
|
13
12
|
use crate::{RbExpr, RbResult};
|
14
13
|
|
15
14
|
impl RbExpr {
|
@@ -165,7 +164,7 @@ impl RbExpr {
|
|
165
164
|
self.inner.clone().implode().into()
|
166
165
|
}
|
167
166
|
|
168
|
-
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<
|
167
|
+
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
|
169
168
|
self.inner
|
170
169
|
.clone()
|
171
170
|
.quantile(quantile.inner.clone(), interpolation.0)
|
@@ -272,6 +271,7 @@ impl RbExpr {
|
|
272
271
|
nulls_last,
|
273
272
|
multithreaded: true,
|
274
273
|
maintain_order: false,
|
274
|
+
limit: None,
|
275
275
|
})
|
276
276
|
.into()
|
277
277
|
}
|
@@ -284,6 +284,7 @@ impl RbExpr {
|
|
284
284
|
nulls_last,
|
285
285
|
multithreaded: true,
|
286
286
|
maintain_order: false,
|
287
|
+
limit: None,
|
287
288
|
})
|
288
289
|
.into()
|
289
290
|
}
|
@@ -364,6 +365,7 @@ impl RbExpr {
|
|
364
365
|
nulls_last,
|
365
366
|
multithreaded,
|
366
367
|
maintain_order,
|
368
|
+
limit: None,
|
367
369
|
},
|
368
370
|
)
|
369
371
|
.into())
|
@@ -463,14 +465,7 @@ impl RbExpr {
|
|
463
465
|
}
|
464
466
|
|
465
467
|
pub fn gather_every(&self, n: usize, offset: usize) -> Self {
|
466
|
-
self.clone()
|
467
|
-
.inner
|
468
|
-
.map(
|
469
|
-
move |s: Series| Ok(Some(s.gather_every(n, offset))),
|
470
|
-
GetOutput::same_type(),
|
471
|
-
)
|
472
|
-
.with_fmt("gather_every")
|
473
|
-
.into()
|
468
|
+
self.inner.clone().gather_every(n, offset).into()
|
474
469
|
}
|
475
470
|
|
476
471
|
pub fn tail(&self, n: Option<usize>) -> Self {
|
@@ -644,8 +639,16 @@ impl RbExpr {
|
|
644
639
|
output_type: Option<Wrap<DataType>>,
|
645
640
|
agg_list: bool,
|
646
641
|
is_elementwise: bool,
|
642
|
+
returns_scalar: bool,
|
647
643
|
) -> Self {
|
648
|
-
map_single(
|
644
|
+
map_single(
|
645
|
+
self,
|
646
|
+
lambda,
|
647
|
+
output_type,
|
648
|
+
agg_list,
|
649
|
+
is_elementwise,
|
650
|
+
returns_scalar,
|
651
|
+
)
|
649
652
|
}
|
650
653
|
|
651
654
|
pub fn dot(&self, other: &Self) -> Self {
|
@@ -653,16 +656,7 @@ impl RbExpr {
|
|
653
656
|
}
|
654
657
|
|
655
658
|
pub fn reinterpret(&self, signed: bool) -> Self {
|
656
|
-
|
657
|
-
let dt = if signed {
|
658
|
-
DataType::Int64
|
659
|
-
} else {
|
660
|
-
DataType::UInt64
|
661
|
-
};
|
662
|
-
self.clone()
|
663
|
-
.inner
|
664
|
-
.map(function, GetOutput::from_type(dt))
|
665
|
-
.into()
|
659
|
+
self.inner.clone().reinterpret(signed).into()
|
666
660
|
}
|
667
661
|
|
668
662
|
pub fn mode(&self) -> Self {
|
@@ -717,7 +711,7 @@ impl RbExpr {
|
|
717
711
|
}
|
718
712
|
|
719
713
|
pub fn reshape(&self, dims: Vec<i64>) -> Self {
|
720
|
-
self.inner.clone().reshape(&dims
|
714
|
+
self.inner.clone().reshape(&dims).into()
|
721
715
|
}
|
722
716
|
|
723
717
|
pub fn cum_count(&self, reverse: bool) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, value::Opaque, Ruby, Value};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -179,23 +179,30 @@ impl RbExpr {
|
|
179
179
|
pub fn list_to_struct(
|
180
180
|
&self,
|
181
181
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
182
|
-
|
182
|
+
name_gen: Option<Value>,
|
183
183
|
upper_bound: usize,
|
184
184
|
) -> RbResult<Self> {
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
185
|
+
let name_gen = name_gen.map(|lambda| {
|
186
|
+
let lambda = Opaque::from(lambda);
|
187
|
+
Arc::new(move |idx: usize| {
|
188
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
189
|
+
let out: String = lambda.funcall("call", (idx,)).unwrap();
|
190
|
+
PlSmallStr::from_string(out)
|
191
|
+
});
|
192
|
+
|
193
|
+
// non-Ruby thread
|
194
|
+
todo!();
|
195
|
+
});
|
193
196
|
|
194
197
|
Ok(self
|
195
198
|
.inner
|
196
199
|
.clone()
|
197
200
|
.list()
|
198
|
-
.to_struct(
|
201
|
+
.to_struct(ListToStructArgs::InferWidth {
|
202
|
+
infer_field_strategy: width_strat.0,
|
203
|
+
get_index_name: name_gen,
|
204
|
+
max_fields: upper_bound,
|
205
|
+
})
|
199
206
|
.into())
|
200
207
|
}
|
201
208
|
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -84,13 +84,17 @@ impl RbExpr {
|
|
84
84
|
self.inner.clone().meta()._into_selector().into()
|
85
85
|
}
|
86
86
|
|
87
|
-
|
87
|
+
fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
|
88
88
|
let e = self
|
89
89
|
.inner
|
90
90
|
.clone()
|
91
91
|
.meta()
|
92
|
-
.into_tree_formatter()
|
92
|
+
.into_tree_formatter(display_as_dot)
|
93
93
|
.map_err(RbPolarsErr::from)?;
|
94
94
|
Ok(format!("{e}"))
|
95
95
|
}
|
96
|
+
|
97
|
+
pub fn meta_tree_format(&self) -> RbResult<String> {
|
98
|
+
self.compute_tree_format(false)
|
99
|
+
}
|
96
100
|
}
|
@@ -1,5 +1,4 @@
|
|
1
1
|
use polars::prelude::*;
|
2
|
-
use std::any::Any;
|
3
2
|
|
4
3
|
use crate::conversion::Wrap;
|
5
4
|
use crate::RbExpr;
|
@@ -169,7 +168,7 @@ impl RbExpr {
|
|
169
168
|
weights,
|
170
169
|
min_periods,
|
171
170
|
center,
|
172
|
-
fn_params: Some(
|
171
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
173
172
|
};
|
174
173
|
|
175
174
|
self.inner.clone().rolling_std(options).into()
|
@@ -187,7 +186,7 @@ impl RbExpr {
|
|
187
186
|
window_size: Duration::parse(&window_size),
|
188
187
|
min_periods,
|
189
188
|
closed_window: closed.0,
|
190
|
-
fn_params: Some(
|
189
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
191
190
|
};
|
192
191
|
|
193
192
|
self.inner
|
@@ -210,7 +209,7 @@ impl RbExpr {
|
|
210
209
|
weights,
|
211
210
|
min_periods,
|
212
211
|
center,
|
213
|
-
fn_params: Some(
|
212
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
214
213
|
};
|
215
214
|
|
216
215
|
self.inner.clone().rolling_var(options).into()
|
@@ -228,7 +227,7 @@ impl RbExpr {
|
|
228
227
|
window_size: Duration::parse(&window_size),
|
229
228
|
min_periods,
|
230
229
|
closed_window: closed.0,
|
231
|
-
fn_params: Some(
|
230
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
232
231
|
};
|
233
232
|
|
234
233
|
self.inner
|
@@ -277,7 +276,7 @@ impl RbExpr {
|
|
277
276
|
pub fn rolling_quantile(
|
278
277
|
&self,
|
279
278
|
quantile: f64,
|
280
|
-
interpolation: Wrap<
|
279
|
+
interpolation: Wrap<QuantileMethod>,
|
281
280
|
window_size: usize,
|
282
281
|
weights: Option<Vec<f64>>,
|
283
282
|
min_periods: Option<usize>,
|
@@ -302,7 +301,7 @@ impl RbExpr {
|
|
302
301
|
&self,
|
303
302
|
by: &RbExpr,
|
304
303
|
quantile: f64,
|
305
|
-
interpolation: Wrap<
|
304
|
+
interpolation: Wrap<QuantileMethod>,
|
306
305
|
window_size: String,
|
307
306
|
min_periods: usize,
|
308
307
|
closed: Wrap<ClosedWindow>,
|
@@ -130,6 +130,11 @@ impl RbExpr {
|
|
130
130
|
self.inner.clone().str().to_lowercase().into()
|
131
131
|
}
|
132
132
|
|
133
|
+
// requires nightly
|
134
|
+
// pub fn str_to_titlecase(&self) -> Self {
|
135
|
+
// self.inner.clone().str().to_titlecase().into()
|
136
|
+
// }
|
137
|
+
|
133
138
|
pub fn str_len_bytes(&self) -> Self {
|
134
139
|
self.inner.clone().str().len_bytes().into()
|
135
140
|
}
|
@@ -200,51 +205,19 @@ impl RbExpr {
|
|
200
205
|
}
|
201
206
|
|
202
207
|
pub fn str_hex_encode(&self) -> Self {
|
203
|
-
self.clone()
|
204
|
-
.inner
|
205
|
-
.map(
|
206
|
-
move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
|
207
|
-
GetOutput::same_type(),
|
208
|
-
)
|
209
|
-
.with_fmt("str.hex_encode")
|
210
|
-
.into()
|
208
|
+
self.inner.clone().str().hex_encode().into()
|
211
209
|
}
|
212
210
|
|
213
211
|
pub fn str_hex_decode(&self, strict: bool) -> Self {
|
214
|
-
self.clone()
|
215
|
-
.inner
|
216
|
-
.map(
|
217
|
-
move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
218
|
-
GetOutput::same_type(),
|
219
|
-
)
|
220
|
-
.with_fmt("str.hex_decode")
|
221
|
-
.into()
|
212
|
+
self.inner.clone().str().hex_decode(strict).into()
|
222
213
|
}
|
223
214
|
|
224
215
|
pub fn str_base64_encode(&self) -> Self {
|
225
|
-
self.clone()
|
226
|
-
.inner
|
227
|
-
.map(
|
228
|
-
move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
|
229
|
-
GetOutput::same_type(),
|
230
|
-
)
|
231
|
-
.with_fmt("str.base64_encode")
|
232
|
-
.into()
|
216
|
+
self.inner.clone().str().base64_encode().into()
|
233
217
|
}
|
234
218
|
|
235
219
|
pub fn str_base64_decode(&self, strict: bool) -> Self {
|
236
|
-
self.clone()
|
237
|
-
.inner
|
238
|
-
.map(
|
239
|
-
move |s| {
|
240
|
-
s.str()?
|
241
|
-
.base64_decode(strict)
|
242
|
-
.map(|s| Some(s.into_series()))
|
243
|
-
},
|
244
|
-
GetOutput::same_type(),
|
245
|
-
)
|
246
|
-
.with_fmt("str.base64_decode")
|
247
|
-
.into()
|
220
|
+
self.inner.clone().str().base64_decode(strict).into()
|
248
221
|
}
|
249
222
|
|
250
223
|
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
use std::fs::File;
|
2
2
|
use std::io;
|
3
|
-
use std::io::{
|
3
|
+
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
6
|
use magnus::{exception, prelude::*, Error, RString, Value};
|
7
|
+
use polars::io::cloud::CloudOptions;
|
7
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars_utils::mmap::MemSlice;
|
8
10
|
|
9
11
|
use crate::error::RbPolarsErr;
|
10
12
|
use crate::prelude::resolve_homedir;
|
@@ -24,9 +26,8 @@ impl RbFileLikeObject {
|
|
24
26
|
RbFileLikeObject { inner: object }
|
25
27
|
}
|
26
28
|
|
27
|
-
pub fn
|
28
|
-
|
29
|
-
std::io::Cursor::new(data)
|
29
|
+
pub fn as_bytes(&self) -> bytes::Bytes {
|
30
|
+
self.as_file_buffer().into_inner().into()
|
30
31
|
}
|
31
32
|
|
32
33
|
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
|
@@ -132,7 +133,42 @@ impl FileLike for RbFileLikeObject {}
|
|
132
133
|
|
133
134
|
pub enum EitherRustRubyFile {
|
134
135
|
Rb(RbFileLikeObject),
|
135
|
-
Rust(
|
136
|
+
Rust(File),
|
137
|
+
}
|
138
|
+
|
139
|
+
impl EitherRustRubyFile {
|
140
|
+
pub fn into_dyn(self) -> Box<dyn FileLike> {
|
141
|
+
match self {
|
142
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
143
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
144
|
+
}
|
145
|
+
}
|
146
|
+
|
147
|
+
pub fn into_dyn_writeable(self) -> Box<dyn Write> {
|
148
|
+
match self {
|
149
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
150
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
pub enum RubyScanSourceInput {
|
156
|
+
Buffer(MemSlice),
|
157
|
+
Path(PathBuf),
|
158
|
+
#[allow(dead_code)]
|
159
|
+
File(File),
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
163
|
+
if let Ok(file_path) = PathBuf::try_convert(rb_f) {
|
164
|
+
// TODO resolve_homedir
|
165
|
+
Ok(RubyScanSourceInput::Path(file_path))
|
166
|
+
} else {
|
167
|
+
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
168
|
+
Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
|
169
|
+
f.as_bytes(),
|
170
|
+
)))
|
171
|
+
}
|
136
172
|
}
|
137
173
|
|
138
174
|
///
|
@@ -142,14 +178,13 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
142
178
|
if let Ok(rstring) = RString::try_convert(rb_f) {
|
143
179
|
let s = unsafe { rstring.as_str() }?;
|
144
180
|
let file_path = std::path::Path::new(&s);
|
145
|
-
let file_path = resolve_homedir(file_path);
|
181
|
+
let file_path = resolve_homedir(&file_path);
|
146
182
|
let f = if truncate {
|
147
|
-
File::create(file_path).map_err(RbPolarsErr::
|
183
|
+
File::create(file_path).map_err(RbPolarsErr::from)?
|
148
184
|
} else {
|
149
185
|
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
|
150
186
|
};
|
151
|
-
|
152
|
-
Ok(EitherRustRubyFile::Rust(reader))
|
187
|
+
Ok(EitherRustRubyFile::Rust(f))
|
153
188
|
} else {
|
154
189
|
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
|
155
190
|
Ok(EitherRustRubyFile::Rb(f))
|
@@ -157,21 +192,41 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
157
192
|
}
|
158
193
|
|
159
194
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
|
160
|
-
|
161
|
-
match get_either_file(f, truncate)? {
|
162
|
-
Rb(f) => Ok(Box::new(f)),
|
163
|
-
Rust(f) => Ok(Box::new(f.into_inner())),
|
164
|
-
}
|
195
|
+
Ok(get_either_file(f, truncate)?.into_dyn())
|
165
196
|
}
|
166
197
|
|
167
|
-
pub
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
198
|
+
pub enum RbReadBytes {
|
199
|
+
Bytes(RString),
|
200
|
+
Other(Value),
|
201
|
+
}
|
202
|
+
|
203
|
+
pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
|
204
|
+
rb_f.funcall("read", ())
|
205
|
+
.map(RbReadBytes::Bytes)
|
206
|
+
.unwrap_or(RbReadBytes::Other(rb_f))
|
207
|
+
}
|
208
|
+
|
209
|
+
pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
|
210
|
+
get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
|
211
|
+
}
|
212
|
+
|
213
|
+
pub fn get_mmap_bytes_reader_and_path<'a>(
|
214
|
+
rb_f: &'a RbReadBytes,
|
215
|
+
) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
|
216
|
+
match rb_f {
|
217
|
+
RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
|
218
|
+
RbReadBytes::Other(v) => {
|
219
|
+
let path = PathBuf::try_convert(*v)?;
|
220
|
+
let f = File::open(&path)
|
221
|
+
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
222
|
+
Ok((Box::new(f), Some(path)))
|
223
|
+
}
|
176
224
|
}
|
177
225
|
}
|
226
|
+
|
227
|
+
pub fn try_get_writeable(
|
228
|
+
rb_f: Value,
|
229
|
+
_cloud_options: Option<&CloudOptions>,
|
230
|
+
) -> RbResult<Box<dyn Write>> {
|
231
|
+
Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
|
232
|
+
}
|
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
|
28
28
|
Ok(e.into())
|
29
29
|
}
|
30
30
|
|
31
|
-
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
32
32
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
34
34
|
Ok(e.into())
|
35
35
|
}
|
36
36
|
|
37
|
-
pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
37
|
+
pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
38
38
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
39
|
-
let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
39
|
+
let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
40
40
|
Ok(e.into())
|
41
41
|
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
// TODO add to Ruby
|
6
|
+
pub fn business_day_count(
|
7
|
+
start: &RbExpr,
|
8
|
+
end: &RbExpr,
|
9
|
+
week_mask: [bool; 7],
|
10
|
+
holidays: Vec<i32>,
|
11
|
+
) -> RbExpr {
|
12
|
+
let start = start.inner.clone();
|
13
|
+
let end = end.inner.clone();
|
14
|
+
dsl::business_day_count(start, end, week_mask, holidays).into()
|
15
|
+
}
|
@@ -1,34 +1,55 @@
|
|
1
|
+
use std::io::BufReader;
|
2
|
+
|
1
3
|
use magnus::{RHash, Value};
|
4
|
+
use polars::prelude::ArrowSchema;
|
5
|
+
use polars_core::datatypes::create_enum_dtype;
|
6
|
+
use polars_core::export::arrow::array::Utf8ViewArray;
|
2
7
|
|
3
8
|
use crate::conversion::Wrap;
|
4
|
-
use crate::file::
|
5
|
-
use crate::prelude::
|
9
|
+
use crate::file::{get_either_file, EitherRustRubyFile};
|
10
|
+
use crate::prelude::ArrowDataType;
|
6
11
|
use crate::{RbPolarsErr, RbResult};
|
7
12
|
|
8
13
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
14
|
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
|
-
let
|
11
|
-
|
15
|
+
let metadata = match get_either_file(rb_f, false)? {
|
16
|
+
EitherRustRubyFile::Rust(r) => {
|
17
|
+
read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
18
|
+
}
|
19
|
+
EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
20
|
+
};
|
12
21
|
|
13
22
|
let dict = RHash::new();
|
14
|
-
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
-
dict.aset(field.name.as_str(), dt)?;
|
17
|
-
}
|
23
|
+
fields_to_rbdict(&metadata.schema, &dict)?;
|
18
24
|
Ok(dict)
|
19
25
|
}
|
20
26
|
|
21
27
|
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
28
|
use polars_parquet::read::{infer_schema, read_metadata};
|
23
29
|
|
24
|
-
let
|
25
|
-
|
30
|
+
let metadata = match get_either_file(rb_f, false)? {
|
31
|
+
EitherRustRubyFile::Rust(r) => {
|
32
|
+
read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
33
|
+
}
|
34
|
+
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
35
|
+
};
|
26
36
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
37
|
|
28
38
|
let dict = RHash::new();
|
29
|
-
|
30
|
-
|
39
|
+
fields_to_rbdict(&arrow_schema, &dict)?;
|
40
|
+
Ok(dict)
|
41
|
+
}
|
42
|
+
|
43
|
+
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
|
+
for field in schema.iter_values() {
|
45
|
+
let dt = if field.is_enum() {
|
46
|
+
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
+
ArrowDataType::Utf8View,
|
48
|
+
)))
|
49
|
+
} else {
|
50
|
+
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
+
};
|
31
52
|
dict.aset(field.name.as_str(), dt)?;
|
32
53
|
}
|
33
|
-
Ok(
|
54
|
+
Ok(())
|
34
55
|
}
|
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
|
|
70
70
|
nulls_last,
|
71
71
|
multithreaded,
|
72
72
|
maintain_order,
|
73
|
+
limit: None,
|
73
74
|
},
|
74
75
|
)
|
75
76
|
.into())
|
@@ -94,10 +95,7 @@ pub fn col(name: String) -> RbExpr {
|
|
94
95
|
}
|
95
96
|
|
96
97
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
97
|
-
let lfs = lfs
|
98
|
-
.into_iter()
|
99
|
-
.map(<&RbLazyFrame>::try_convert)
|
100
|
-
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
98
|
+
let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
|
101
99
|
|
102
100
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
103
101
|
let df = lf.ldf.borrow().clone().collect().unwrap();
|
@@ -173,8 +171,14 @@ pub fn cum_fold(
|
|
173
171
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
174
172
|
let lambda = Opaque::from(lambda);
|
175
173
|
|
176
|
-
let func =
|
177
|
-
|
174
|
+
let func = move |a: Column, b: Column| {
|
175
|
+
binary_lambda(
|
176
|
+
Ruby::get().unwrap().get_inner(lambda),
|
177
|
+
a.take_materialized_series(),
|
178
|
+
b.take_materialized_series(),
|
179
|
+
)
|
180
|
+
.map(|v| v.map(Column::from))
|
181
|
+
};
|
178
182
|
Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
179
183
|
}
|
180
184
|
|
@@ -263,8 +267,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
263
267
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
264
268
|
let lambda = Opaque::from(lambda);
|
265
269
|
|
266
|
-
let func =
|
267
|
-
|
270
|
+
let func = move |a: Column, b: Column| {
|
271
|
+
binary_lambda(
|
272
|
+
Ruby::get().unwrap().get_inner(lambda),
|
273
|
+
a.take_materialized_series(),
|
274
|
+
b.take_materialized_series(),
|
275
|
+
)
|
276
|
+
.map(|v| v.map(Column::from))
|
277
|
+
};
|
268
278
|
Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
269
279
|
}
|
270
280
|
|
@@ -311,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
311
321
|
}
|
312
322
|
}
|
313
323
|
|
314
|
-
pub fn pearson_corr(a: &RbExpr, b: &RbExpr
|
315
|
-
dsl::pearson_corr(a.inner.clone(), b.inner.clone()
|
324
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
325
|
+
dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
|
316
326
|
}
|
317
327
|
|
318
328
|
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
@@ -336,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
336
346
|
Ok(dsl::repeat(value, n).into())
|
337
347
|
}
|
338
348
|
|
339
|
-
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr,
|
340
|
-
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(),
|
349
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
|
350
|
+
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
|
341
351
|
}
|
342
352
|
|
343
353
|
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod to_ruby;
|