polars-df 0.13.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +10 -13
- data/ext/polars/src/conversion/any_value.rs +37 -21
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +159 -46
- data/ext/polars/src/dataframe/construction.rs +4 -7
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +22 -16
- data/ext/polars/src/dataframe/io.rs +78 -174
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +16 -7
- data/ext/polars/src/expr/general.rs +14 -23
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +17 -37
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -825
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +44 -13
- data/ext/polars/src/map/dataframe.rs +46 -14
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +17 -16
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +52 -25
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
|
|
9
9
|
use crate::conversion::{parse_fill_null_strategy, Wrap};
|
10
10
|
use crate::map::lazy::map_single;
|
11
11
|
use crate::rb_exprs_to_exprs;
|
12
|
-
use crate::utils::reinterpret;
|
13
12
|
use crate::{RbExpr, RbResult};
|
14
13
|
|
15
14
|
impl RbExpr {
|
@@ -165,7 +164,7 @@ impl RbExpr {
|
|
165
164
|
self.inner.clone().implode().into()
|
166
165
|
}
|
167
166
|
|
168
|
-
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<
|
167
|
+
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
|
169
168
|
self.inner
|
170
169
|
.clone()
|
171
170
|
.quantile(quantile.inner.clone(), interpolation.0)
|
@@ -242,7 +241,7 @@ impl RbExpr {
|
|
242
241
|
pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self {
|
243
242
|
self.inner
|
244
243
|
.clone()
|
245
|
-
.value_counts(sort, parallel, name, normalize)
|
244
|
+
.value_counts(sort, parallel, name.as_str(), normalize)
|
246
245
|
.into()
|
247
246
|
}
|
248
247
|
|
@@ -463,14 +462,7 @@ impl RbExpr {
|
|
463
462
|
}
|
464
463
|
|
465
464
|
pub fn gather_every(&self, n: usize, offset: usize) -> Self {
|
466
|
-
self.clone()
|
467
|
-
.inner
|
468
|
-
.map(
|
469
|
-
move |s: Series| Ok(Some(s.gather_every(n, offset))),
|
470
|
-
GetOutput::same_type(),
|
471
|
-
)
|
472
|
-
.with_fmt("gather_every")
|
473
|
-
.into()
|
465
|
+
self.inner.clone().gather_every(n, offset).into()
|
474
466
|
}
|
475
467
|
|
476
468
|
pub fn tail(&self, n: Option<usize>) -> Self {
|
@@ -644,8 +636,16 @@ impl RbExpr {
|
|
644
636
|
output_type: Option<Wrap<DataType>>,
|
645
637
|
agg_list: bool,
|
646
638
|
is_elementwise: bool,
|
639
|
+
returns_scalar: bool,
|
647
640
|
) -> Self {
|
648
|
-
map_single(
|
641
|
+
map_single(
|
642
|
+
self,
|
643
|
+
lambda,
|
644
|
+
output_type,
|
645
|
+
agg_list,
|
646
|
+
is_elementwise,
|
647
|
+
returns_scalar,
|
648
|
+
)
|
649
649
|
}
|
650
650
|
|
651
651
|
pub fn dot(&self, other: &Self) -> Self {
|
@@ -653,16 +653,7 @@ impl RbExpr {
|
|
653
653
|
}
|
654
654
|
|
655
655
|
pub fn reinterpret(&self, signed: bool) -> Self {
|
656
|
-
|
657
|
-
let dt = if signed {
|
658
|
-
DataType::Int64
|
659
|
-
} else {
|
660
|
-
DataType::UInt64
|
661
|
-
};
|
662
|
-
self.clone()
|
663
|
-
.inner
|
664
|
-
.map(function, GetOutput::from_type(dt))
|
665
|
-
.into()
|
656
|
+
self.inner.clone().reinterpret(signed).into()
|
666
657
|
}
|
667
658
|
|
668
659
|
pub fn mode(&self) -> Self {
|
@@ -717,7 +708,7 @@ impl RbExpr {
|
|
717
708
|
}
|
718
709
|
|
719
710
|
pub fn reshape(&self, dims: Vec<i64>) -> Self {
|
720
|
-
self.inner.clone().reshape(&dims
|
711
|
+
self.inner.clone().reshape(&dims).into()
|
721
712
|
}
|
722
713
|
|
723
714
|
pub fn cum_count(&self, reverse: bool) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, value::Opaque, Ruby, Value};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -179,23 +179,30 @@ impl RbExpr {
|
|
179
179
|
pub fn list_to_struct(
|
180
180
|
&self,
|
181
181
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
182
|
-
|
182
|
+
name_gen: Option<Value>,
|
183
183
|
upper_bound: usize,
|
184
184
|
) -> RbResult<Self> {
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
185
|
+
let name_gen = name_gen.map(|lambda| {
|
186
|
+
let lambda = Opaque::from(lambda);
|
187
|
+
Arc::new(move |idx: usize| {
|
188
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
189
|
+
let out: String = lambda.funcall("call", (idx,)).unwrap();
|
190
|
+
PlSmallStr::from_string(out)
|
191
|
+
});
|
192
|
+
|
193
|
+
// non-Ruby thread
|
194
|
+
todo!();
|
195
|
+
});
|
193
196
|
|
194
197
|
Ok(self
|
195
198
|
.inner
|
196
199
|
.clone()
|
197
200
|
.list()
|
198
|
-
.to_struct(
|
201
|
+
.to_struct(ListToStructArgs::InferWidth {
|
202
|
+
infer_field_strategy: width_strat.0,
|
203
|
+
get_index_name: name_gen,
|
204
|
+
max_fields: upper_bound,
|
205
|
+
})
|
199
206
|
.into())
|
200
207
|
}
|
201
208
|
|
data/ext/polars/src/expr/name.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::{block::Proc, value::Opaque, Ruby};
|
2
2
|
use polars::prelude::*;
|
3
|
+
use polars_utils::format_pl_smallstr;
|
3
4
|
|
4
5
|
use crate::RbExpr;
|
5
6
|
|
@@ -15,9 +16,9 @@ impl RbExpr {
|
|
15
16
|
.name()
|
16
17
|
.map(move |name| {
|
17
18
|
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
18
|
-
let out = lambda.call::<_, String>((name,));
|
19
|
+
let out = lambda.call::<_, String>((name.as_str(),));
|
19
20
|
match out {
|
20
|
-
Ok(out) => Ok(out),
|
21
|
+
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
21
22
|
Err(e) => Err(PolarsError::ComputeError(
|
22
23
|
format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
|
23
24
|
)),
|
@@ -1,5 +1,4 @@
|
|
1
1
|
use polars::prelude::*;
|
2
|
-
use std::any::Any;
|
3
2
|
|
4
3
|
use crate::conversion::Wrap;
|
5
4
|
use crate::RbExpr;
|
@@ -169,7 +168,7 @@ impl RbExpr {
|
|
169
168
|
weights,
|
170
169
|
min_periods,
|
171
170
|
center,
|
172
|
-
fn_params: Some(
|
171
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
173
172
|
};
|
174
173
|
|
175
174
|
self.inner.clone().rolling_std(options).into()
|
@@ -187,7 +186,7 @@ impl RbExpr {
|
|
187
186
|
window_size: Duration::parse(&window_size),
|
188
187
|
min_periods,
|
189
188
|
closed_window: closed.0,
|
190
|
-
fn_params: Some(
|
189
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
191
190
|
};
|
192
191
|
|
193
192
|
self.inner
|
@@ -210,7 +209,7 @@ impl RbExpr {
|
|
210
209
|
weights,
|
211
210
|
min_periods,
|
212
211
|
center,
|
213
|
-
fn_params: Some(
|
212
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
214
213
|
};
|
215
214
|
|
216
215
|
self.inner.clone().rolling_var(options).into()
|
@@ -228,7 +227,7 @@ impl RbExpr {
|
|
228
227
|
window_size: Duration::parse(&window_size),
|
229
228
|
min_periods,
|
230
229
|
closed_window: closed.0,
|
231
|
-
fn_params: Some(
|
230
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
232
231
|
};
|
233
232
|
|
234
233
|
self.inner
|
@@ -277,7 +276,7 @@ impl RbExpr {
|
|
277
276
|
pub fn rolling_quantile(
|
278
277
|
&self,
|
279
278
|
quantile: f64,
|
280
|
-
interpolation: Wrap<
|
279
|
+
interpolation: Wrap<QuantileMethod>,
|
281
280
|
window_size: usize,
|
282
281
|
weights: Option<Vec<f64>>,
|
283
282
|
min_periods: Option<usize>,
|
@@ -302,7 +301,7 @@ impl RbExpr {
|
|
302
301
|
&self,
|
303
302
|
by: &RbExpr,
|
304
303
|
quantile: f64,
|
305
|
-
interpolation: Wrap<
|
304
|
+
interpolation: Wrap<QuantileMethod>,
|
306
305
|
window_size: String,
|
307
306
|
min_periods: usize,
|
308
307
|
closed: Wrap<ClosedWindow>,
|
@@ -19,6 +19,8 @@ impl RbExpr {
|
|
19
19
|
exact: bool,
|
20
20
|
cache: bool,
|
21
21
|
) -> Self {
|
22
|
+
let format = format.map(|x| x.into());
|
23
|
+
|
22
24
|
let options = StrptimeOptions {
|
23
25
|
format,
|
24
26
|
strict,
|
@@ -33,12 +35,15 @@ impl RbExpr {
|
|
33
35
|
&self,
|
34
36
|
format: Option<String>,
|
35
37
|
time_unit: Option<Wrap<TimeUnit>>,
|
36
|
-
time_zone: Option<TimeZone
|
38
|
+
time_zone: Option<Wrap<TimeZone>>,
|
37
39
|
strict: bool,
|
38
40
|
exact: bool,
|
39
41
|
cache: bool,
|
40
42
|
ambiguous: &Self,
|
41
43
|
) -> Self {
|
44
|
+
let format = format.map(|x| x.into());
|
45
|
+
let time_zone = time_zone.map(|x| x.0);
|
46
|
+
|
42
47
|
let options = StrptimeOptions {
|
43
48
|
format,
|
44
49
|
strict,
|
@@ -58,6 +63,8 @@ impl RbExpr {
|
|
58
63
|
}
|
59
64
|
|
60
65
|
pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
|
66
|
+
let format = format.map(|x| x.into());
|
67
|
+
|
61
68
|
let options = StrptimeOptions {
|
62
69
|
format,
|
63
70
|
strict,
|
@@ -123,6 +130,11 @@ impl RbExpr {
|
|
123
130
|
self.inner.clone().str().to_lowercase().into()
|
124
131
|
}
|
125
132
|
|
133
|
+
// requires nightly
|
134
|
+
// pub fn str_to_titlecase(&self) -> Self {
|
135
|
+
// self.inner.clone().str().to_titlecase().into()
|
136
|
+
// }
|
137
|
+
|
126
138
|
pub fn str_len_bytes(&self) -> Self {
|
127
139
|
self.inner.clone().str().len_bytes().into()
|
128
140
|
}
|
@@ -193,51 +205,19 @@ impl RbExpr {
|
|
193
205
|
}
|
194
206
|
|
195
207
|
pub fn str_hex_encode(&self) -> Self {
|
196
|
-
self.clone()
|
197
|
-
.inner
|
198
|
-
.map(
|
199
|
-
move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
|
200
|
-
GetOutput::same_type(),
|
201
|
-
)
|
202
|
-
.with_fmt("str.hex_encode")
|
203
|
-
.into()
|
208
|
+
self.inner.clone().str().hex_encode().into()
|
204
209
|
}
|
205
210
|
|
206
211
|
pub fn str_hex_decode(&self, strict: bool) -> Self {
|
207
|
-
self.clone()
|
208
|
-
.inner
|
209
|
-
.map(
|
210
|
-
move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
211
|
-
GetOutput::same_type(),
|
212
|
-
)
|
213
|
-
.with_fmt("str.hex_decode")
|
214
|
-
.into()
|
212
|
+
self.inner.clone().str().hex_decode(strict).into()
|
215
213
|
}
|
216
214
|
|
217
215
|
pub fn str_base64_encode(&self) -> Self {
|
218
|
-
self.clone()
|
219
|
-
.inner
|
220
|
-
.map(
|
221
|
-
move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
|
222
|
-
GetOutput::same_type(),
|
223
|
-
)
|
224
|
-
.with_fmt("str.base64_encode")
|
225
|
-
.into()
|
216
|
+
self.inner.clone().str().base64_encode().into()
|
226
217
|
}
|
227
218
|
|
228
219
|
pub fn str_base64_decode(&self, strict: bool) -> Self {
|
229
|
-
self.clone()
|
230
|
-
.inner
|
231
|
-
.map(
|
232
|
-
move |s| {
|
233
|
-
s.str()?
|
234
|
-
.base64_decode(strict)
|
235
|
-
.map(|s| Some(s.into_series()))
|
236
|
-
},
|
237
|
-
GetOutput::same_type(),
|
238
|
-
)
|
239
|
-
.with_fmt("str.base64_decode")
|
240
|
-
.into()
|
220
|
+
self.inner.clone().str().base64_decode(strict).into()
|
241
221
|
}
|
242
222
|
|
243
223
|
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
use std::fs::File;
|
2
2
|
use std::io;
|
3
|
-
use std::io::{
|
3
|
+
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
6
|
use magnus::{exception, prelude::*, Error, RString, Value};
|
@@ -24,9 +24,8 @@ impl RbFileLikeObject {
|
|
24
24
|
RbFileLikeObject { inner: object }
|
25
25
|
}
|
26
26
|
|
27
|
-
pub fn
|
28
|
-
|
29
|
-
std::io::Cursor::new(data)
|
27
|
+
pub fn as_bytes(&self) -> bytes::Bytes {
|
28
|
+
self.as_file_buffer().into_inner().into()
|
30
29
|
}
|
31
30
|
|
32
31
|
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
|
@@ -132,7 +131,33 @@ impl FileLike for RbFileLikeObject {}
|
|
132
131
|
|
133
132
|
pub enum EitherRustRubyFile {
|
134
133
|
Rb(RbFileLikeObject),
|
135
|
-
Rust(
|
134
|
+
Rust(File),
|
135
|
+
}
|
136
|
+
|
137
|
+
impl EitherRustRubyFile {
|
138
|
+
pub fn into_dyn(self) -> Box<dyn FileLike> {
|
139
|
+
match self {
|
140
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
141
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
pub enum RubyScanSourceInput {
|
147
|
+
Buffer(bytes::Bytes),
|
148
|
+
Path(PathBuf),
|
149
|
+
#[allow(dead_code)]
|
150
|
+
File(File),
|
151
|
+
}
|
152
|
+
|
153
|
+
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
154
|
+
if let Ok(file_path) = PathBuf::try_convert(rb_f) {
|
155
|
+
// TODO resolve_homedir
|
156
|
+
Ok(RubyScanSourceInput::Path(file_path))
|
157
|
+
} else {
|
158
|
+
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
159
|
+
Ok(RubyScanSourceInput::Buffer(f.as_bytes()))
|
160
|
+
}
|
136
161
|
}
|
137
162
|
|
138
163
|
///
|
@@ -144,12 +169,11 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
144
169
|
let file_path = std::path::Path::new(&s);
|
145
170
|
let file_path = resolve_homedir(file_path);
|
146
171
|
let f = if truncate {
|
147
|
-
File::create(file_path).map_err(RbPolarsErr::
|
172
|
+
File::create(file_path).map_err(RbPolarsErr::from)?
|
148
173
|
} else {
|
149
174
|
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
|
150
175
|
};
|
151
|
-
|
152
|
-
Ok(EitherRustRubyFile::Rust(reader))
|
176
|
+
Ok(EitherRustRubyFile::Rust(f))
|
153
177
|
} else {
|
154
178
|
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
|
155
179
|
Ok(EitherRustRubyFile::Rb(f))
|
@@ -157,21 +181,34 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
157
181
|
}
|
158
182
|
|
159
183
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
|
160
|
-
|
161
|
-
match get_either_file(f, truncate)? {
|
162
|
-
Rb(f) => Ok(Box::new(f)),
|
163
|
-
Rust(f) => Ok(Box::new(f.into_inner())),
|
164
|
-
}
|
184
|
+
Ok(get_either_file(f, truncate)?.into_dyn())
|
165
185
|
}
|
166
186
|
|
167
|
-
pub
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
187
|
+
pub enum RbReadBytes {
|
188
|
+
Bytes(RString),
|
189
|
+
Other(Value),
|
190
|
+
}
|
191
|
+
|
192
|
+
pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
|
193
|
+
rb_f.funcall("read", ())
|
194
|
+
.map(RbReadBytes::Bytes)
|
195
|
+
.unwrap_or(RbReadBytes::Other(rb_f))
|
196
|
+
}
|
197
|
+
|
198
|
+
pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
|
199
|
+
get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
|
200
|
+
}
|
201
|
+
|
202
|
+
pub fn get_mmap_bytes_reader_and_path<'a>(
|
203
|
+
rb_f: &'a RbReadBytes,
|
204
|
+
) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
|
205
|
+
match rb_f {
|
206
|
+
RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
|
207
|
+
RbReadBytes::Other(v) => {
|
208
|
+
let path = PathBuf::try_convert(*v)?;
|
209
|
+
let f = File::open(&path)
|
210
|
+
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
211
|
+
Ok((Box::new(f), Some(path)))
|
212
|
+
}
|
176
213
|
}
|
177
214
|
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
// TODO add to Ruby
|
6
|
+
pub fn business_day_count(
|
7
|
+
start: &RbExpr,
|
8
|
+
end: &RbExpr,
|
9
|
+
week_mask: [bool; 7],
|
10
|
+
holidays: Vec<i32>,
|
11
|
+
) -> RbExpr {
|
12
|
+
let start = start.inner.clone();
|
13
|
+
let end = end.inner.clone();
|
14
|
+
dsl::business_day_count(start, end, week_mask, holidays).into()
|
15
|
+
}
|
@@ -11,9 +11,9 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
|
11
11
|
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
12
12
|
|
13
13
|
let dict = RHash::new();
|
14
|
-
for field in
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.
|
16
|
-
dict.aset(field.name.
|
14
|
+
for field in metadata.schema.iter_values() {
|
15
|
+
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
+
dict.aset(field.name.as_str(), dt)?;
|
17
17
|
}
|
18
18
|
Ok(dict)
|
19
19
|
}
|
@@ -26,9 +26,9 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
26
26
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
27
|
|
28
28
|
let dict = RHash::new();
|
29
|
-
for field in arrow_schema.
|
30
|
-
let dt: Wrap<DataType> = Wrap((&field.
|
31
|
-
dict.aset(field.name, dt)?;
|
29
|
+
for field in arrow_schema.iter_values() {
|
30
|
+
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
31
|
+
dict.aset(field.name.as_str(), dt)?;
|
32
32
|
}
|
33
33
|
Ok(dict)
|
34
34
|
}
|
@@ -94,10 +94,7 @@ pub fn col(name: String) -> RbExpr {
|
|
94
94
|
}
|
95
95
|
|
96
96
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
97
|
-
let lfs = lfs
|
98
|
-
.into_iter()
|
99
|
-
.map(<&RbLazyFrame>::try_convert)
|
100
|
-
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
97
|
+
let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
|
101
98
|
|
102
99
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
103
100
|
let df = lf.ldf.borrow().clone().collect().unwrap();
|
@@ -173,8 +170,14 @@ pub fn cum_fold(
|
|
173
170
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
174
171
|
let lambda = Opaque::from(lambda);
|
175
172
|
|
176
|
-
let func =
|
177
|
-
|
173
|
+
let func = move |a: Column, b: Column| {
|
174
|
+
binary_lambda(
|
175
|
+
Ruby::get().unwrap().get_inner(lambda),
|
176
|
+
a.take_materialized_series(),
|
177
|
+
b.take_materialized_series(),
|
178
|
+
)
|
179
|
+
.map(|v| v.map(Column::from))
|
180
|
+
};
|
178
181
|
Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
179
182
|
}
|
180
183
|
|
@@ -263,8 +266,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
263
266
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
264
267
|
let lambda = Opaque::from(lambda);
|
265
268
|
|
266
|
-
let func =
|
267
|
-
|
269
|
+
let func = move |a: Column, b: Column| {
|
270
|
+
binary_lambda(
|
271
|
+
Ruby::get().unwrap().get_inner(lambda),
|
272
|
+
a.take_materialized_series(),
|
273
|
+
b.take_materialized_series(),
|
274
|
+
)
|
275
|
+
.map(|v| v.map(Column::from))
|
276
|
+
};
|
268
277
|
Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
269
278
|
}
|
270
279
|
|
@@ -56,13 +56,14 @@ pub fn datetime_range(
|
|
56
56
|
every: String,
|
57
57
|
closed: Wrap<ClosedWindow>,
|
58
58
|
time_unit: Option<Wrap<TimeUnit>>,
|
59
|
-
time_zone: Option<TimeZone
|
59
|
+
time_zone: Option<Wrap<TimeZone>>,
|
60
60
|
) -> RbExpr {
|
61
61
|
let start = start.inner.clone();
|
62
62
|
let end = end.inner.clone();
|
63
63
|
let every = Duration::parse(&every);
|
64
64
|
let closed = closed.0;
|
65
65
|
let time_unit = time_unit.map(|x| x.0);
|
66
|
+
let time_zone = time_zone.map(|x| x.0);
|
66
67
|
dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
|
67
68
|
}
|
68
69
|
|
@@ -72,13 +73,14 @@ pub fn datetime_ranges(
|
|
72
73
|
every: String,
|
73
74
|
closed: Wrap<ClosedWindow>,
|
74
75
|
time_unit: Option<Wrap<TimeUnit>>,
|
75
|
-
time_zone: Option<TimeZone
|
76
|
+
time_zone: Option<Wrap<TimeZone>>,
|
76
77
|
) -> RbExpr {
|
77
78
|
let start = start.inner.clone();
|
78
79
|
let end = end.inner.clone();
|
79
80
|
let every = Duration::parse(&every);
|
80
81
|
let closed = closed.0;
|
81
82
|
let time_unit = time_unit.map(|x| x.0);
|
83
|
+
let time_zone = time_zone.map(|x| x.0);
|
82
84
|
dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
|
83
85
|
}
|
84
86
|
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod to_ruby;
|
@@ -0,0 +1,83 @@
|
|
1
|
+
use arrow::datatypes::ArrowDataType;
|
2
|
+
use arrow::ffi;
|
3
|
+
use magnus::{IntoValue, Value};
|
4
|
+
use polars::datatypes::CompatLevel;
|
5
|
+
use polars::frame::DataFrame;
|
6
|
+
use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
|
7
|
+
use polars::series::Series;
|
8
|
+
use polars_core::utils::arrow;
|
9
|
+
|
10
|
+
use crate::RbResult;
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::RbArrowArrayStream")]
|
13
|
+
pub struct RbArrowArrayStream {
|
14
|
+
stream: ffi::ArrowArrayStream,
|
15
|
+
}
|
16
|
+
|
17
|
+
impl RbArrowArrayStream {
|
18
|
+
pub fn to_i(&self) -> usize {
|
19
|
+
(&self.stream as *const _) as usize
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
|
24
|
+
let iter = Box::new(DataFrameStreamIterator::new(df));
|
25
|
+
let field = iter.field();
|
26
|
+
let stream = ffi::export_iterator(iter, field);
|
27
|
+
Ok(RbArrowArrayStream { stream }.into_value())
|
28
|
+
}
|
29
|
+
|
30
|
+
pub struct DataFrameStreamIterator {
|
31
|
+
columns: Vec<Series>,
|
32
|
+
dtype: ArrowDataType,
|
33
|
+
idx: usize,
|
34
|
+
n_chunks: usize,
|
35
|
+
}
|
36
|
+
|
37
|
+
impl DataFrameStreamIterator {
|
38
|
+
fn new(df: &DataFrame) -> Self {
|
39
|
+
let schema = df.schema().to_arrow(CompatLevel::newest());
|
40
|
+
let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
|
41
|
+
|
42
|
+
Self {
|
43
|
+
columns: df
|
44
|
+
.get_columns()
|
45
|
+
.iter()
|
46
|
+
.map(|v| v.as_materialized_series().clone())
|
47
|
+
.collect(),
|
48
|
+
dtype,
|
49
|
+
idx: 0,
|
50
|
+
n_chunks: df.n_chunks(),
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
fn field(&self) -> ArrowField {
|
55
|
+
ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
impl Iterator for DataFrameStreamIterator {
|
60
|
+
type Item = PolarsResult<ArrayRef>;
|
61
|
+
|
62
|
+
fn next(&mut self) -> Option<Self::Item> {
|
63
|
+
if self.idx >= self.n_chunks {
|
64
|
+
None
|
65
|
+
} else {
|
66
|
+
// create a batch of the columns with the same chunk no.
|
67
|
+
let batch_cols = self
|
68
|
+
.columns
|
69
|
+
.iter()
|
70
|
+
.map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
|
71
|
+
.collect::<Vec<_>>();
|
72
|
+
self.idx += 1;
|
73
|
+
|
74
|
+
let array = arrow::array::StructArray::new(
|
75
|
+
self.dtype.clone(),
|
76
|
+
batch_cols[0].len(),
|
77
|
+
batch_cols,
|
78
|
+
None,
|
79
|
+
);
|
80
|
+
Some(Ok(Box::new(array)))
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|