polars-df 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Cargo.lock +1296 -283
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +125 -28
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +16 -11
- data/ext/polars/src/dataframe/io.rs +73 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +13 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +45 -14
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +275 -52
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -9,7 +9,6 @@ use polars_core::series::IsSorted;
|
|
9
9
|
use crate::conversion::{parse_fill_null_strategy, Wrap};
|
10
10
|
use crate::map::lazy::map_single;
|
11
11
|
use crate::rb_exprs_to_exprs;
|
12
|
-
use crate::utils::reinterpret;
|
13
12
|
use crate::{RbExpr, RbResult};
|
14
13
|
|
15
14
|
impl RbExpr {
|
@@ -165,7 +164,7 @@ impl RbExpr {
|
|
165
164
|
self.inner.clone().implode().into()
|
166
165
|
}
|
167
166
|
|
168
|
-
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<
|
167
|
+
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
|
169
168
|
self.inner
|
170
169
|
.clone()
|
171
170
|
.quantile(quantile.inner.clone(), interpolation.0)
|
@@ -463,14 +462,7 @@ impl RbExpr {
|
|
463
462
|
}
|
464
463
|
|
465
464
|
pub fn gather_every(&self, n: usize, offset: usize) -> Self {
|
466
|
-
self.clone()
|
467
|
-
.inner
|
468
|
-
.map(
|
469
|
-
move |s: Series| Ok(Some(s.gather_every(n, offset))),
|
470
|
-
GetOutput::same_type(),
|
471
|
-
)
|
472
|
-
.with_fmt("gather_every")
|
473
|
-
.into()
|
465
|
+
self.inner.clone().gather_every(n, offset).into()
|
474
466
|
}
|
475
467
|
|
476
468
|
pub fn tail(&self, n: Option<usize>) -> Self {
|
@@ -644,8 +636,16 @@ impl RbExpr {
|
|
644
636
|
output_type: Option<Wrap<DataType>>,
|
645
637
|
agg_list: bool,
|
646
638
|
is_elementwise: bool,
|
639
|
+
returns_scalar: bool,
|
647
640
|
) -> Self {
|
648
|
-
map_single(
|
641
|
+
map_single(
|
642
|
+
self,
|
643
|
+
lambda,
|
644
|
+
output_type,
|
645
|
+
agg_list,
|
646
|
+
is_elementwise,
|
647
|
+
returns_scalar,
|
648
|
+
)
|
649
649
|
}
|
650
650
|
|
651
651
|
pub fn dot(&self, other: &Self) -> Self {
|
@@ -653,16 +653,7 @@ impl RbExpr {
|
|
653
653
|
}
|
654
654
|
|
655
655
|
pub fn reinterpret(&self, signed: bool) -> Self {
|
656
|
-
|
657
|
-
let dt = if signed {
|
658
|
-
DataType::Int64
|
659
|
-
} else {
|
660
|
-
DataType::UInt64
|
661
|
-
};
|
662
|
-
self.clone()
|
663
|
-
.inner
|
664
|
-
.map(function, GetOutput::from_type(dt))
|
665
|
-
.into()
|
656
|
+
self.inner.clone().reinterpret(signed).into()
|
666
657
|
}
|
667
658
|
|
668
659
|
pub fn mode(&self) -> Self {
|
@@ -717,7 +708,7 @@ impl RbExpr {
|
|
717
708
|
}
|
718
709
|
|
719
710
|
pub fn reshape(&self, dims: Vec<i64>) -> Self {
|
720
|
-
self.inner.clone().reshape(&dims
|
711
|
+
self.inner.clone().reshape(&dims).into()
|
721
712
|
}
|
722
713
|
|
723
714
|
pub fn cum_count(&self, reverse: bool) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, value::Opaque, Ruby, Value};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -179,23 +179,30 @@ impl RbExpr {
|
|
179
179
|
pub fn list_to_struct(
|
180
180
|
&self,
|
181
181
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
182
|
-
|
182
|
+
name_gen: Option<Value>,
|
183
183
|
upper_bound: usize,
|
184
184
|
) -> RbResult<Self> {
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
185
|
+
let name_gen = name_gen.map(|lambda| {
|
186
|
+
let lambda = Opaque::from(lambda);
|
187
|
+
Arc::new(move |idx: usize| {
|
188
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
189
|
+
let out: String = lambda.funcall("call", (idx,)).unwrap();
|
190
|
+
PlSmallStr::from_string(out)
|
191
|
+
});
|
192
|
+
|
193
|
+
// non-Ruby thread
|
194
|
+
todo!();
|
195
|
+
});
|
193
196
|
|
194
197
|
Ok(self
|
195
198
|
.inner
|
196
199
|
.clone()
|
197
200
|
.list()
|
198
|
-
.to_struct(
|
201
|
+
.to_struct(ListToStructArgs::InferWidth {
|
202
|
+
infer_field_strategy: width_strat.0,
|
203
|
+
get_index_name: name_gen,
|
204
|
+
max_fields: upper_bound,
|
205
|
+
})
|
199
206
|
.into())
|
200
207
|
}
|
201
208
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
use polars::prelude::*;
|
2
|
-
use std::any::Any;
|
3
2
|
|
4
3
|
use crate::conversion::Wrap;
|
5
4
|
use crate::RbExpr;
|
@@ -169,7 +168,7 @@ impl RbExpr {
|
|
169
168
|
weights,
|
170
169
|
min_periods,
|
171
170
|
center,
|
172
|
-
fn_params: Some(
|
171
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
173
172
|
};
|
174
173
|
|
175
174
|
self.inner.clone().rolling_std(options).into()
|
@@ -187,7 +186,7 @@ impl RbExpr {
|
|
187
186
|
window_size: Duration::parse(&window_size),
|
188
187
|
min_periods,
|
189
188
|
closed_window: closed.0,
|
190
|
-
fn_params: Some(
|
189
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
191
190
|
};
|
192
191
|
|
193
192
|
self.inner
|
@@ -210,7 +209,7 @@ impl RbExpr {
|
|
210
209
|
weights,
|
211
210
|
min_periods,
|
212
211
|
center,
|
213
|
-
fn_params: Some(
|
212
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
214
213
|
};
|
215
214
|
|
216
215
|
self.inner.clone().rolling_var(options).into()
|
@@ -228,7 +227,7 @@ impl RbExpr {
|
|
228
227
|
window_size: Duration::parse(&window_size),
|
229
228
|
min_periods,
|
230
229
|
closed_window: closed.0,
|
231
|
-
fn_params: Some(
|
230
|
+
fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })),
|
232
231
|
};
|
233
232
|
|
234
233
|
self.inner
|
@@ -277,7 +276,7 @@ impl RbExpr {
|
|
277
276
|
pub fn rolling_quantile(
|
278
277
|
&self,
|
279
278
|
quantile: f64,
|
280
|
-
interpolation: Wrap<
|
279
|
+
interpolation: Wrap<QuantileMethod>,
|
281
280
|
window_size: usize,
|
282
281
|
weights: Option<Vec<f64>>,
|
283
282
|
min_periods: Option<usize>,
|
@@ -302,7 +301,7 @@ impl RbExpr {
|
|
302
301
|
&self,
|
303
302
|
by: &RbExpr,
|
304
303
|
quantile: f64,
|
305
|
-
interpolation: Wrap<
|
304
|
+
interpolation: Wrap<QuantileMethod>,
|
306
305
|
window_size: String,
|
307
306
|
min_periods: usize,
|
308
307
|
closed: Wrap<ClosedWindow>,
|
@@ -130,6 +130,11 @@ impl RbExpr {
|
|
130
130
|
self.inner.clone().str().to_lowercase().into()
|
131
131
|
}
|
132
132
|
|
133
|
+
// requires nightly
|
134
|
+
// pub fn str_to_titlecase(&self) -> Self {
|
135
|
+
// self.inner.clone().str().to_titlecase().into()
|
136
|
+
// }
|
137
|
+
|
133
138
|
pub fn str_len_bytes(&self) -> Self {
|
134
139
|
self.inner.clone().str().len_bytes().into()
|
135
140
|
}
|
@@ -200,51 +205,19 @@ impl RbExpr {
|
|
200
205
|
}
|
201
206
|
|
202
207
|
pub fn str_hex_encode(&self) -> Self {
|
203
|
-
self.clone()
|
204
|
-
.inner
|
205
|
-
.map(
|
206
|
-
move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
|
207
|
-
GetOutput::same_type(),
|
208
|
-
)
|
209
|
-
.with_fmt("str.hex_encode")
|
210
|
-
.into()
|
208
|
+
self.inner.clone().str().hex_encode().into()
|
211
209
|
}
|
212
210
|
|
213
211
|
pub fn str_hex_decode(&self, strict: bool) -> Self {
|
214
|
-
self.clone()
|
215
|
-
.inner
|
216
|
-
.map(
|
217
|
-
move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
218
|
-
GetOutput::same_type(),
|
219
|
-
)
|
220
|
-
.with_fmt("str.hex_decode")
|
221
|
-
.into()
|
212
|
+
self.inner.clone().str().hex_decode(strict).into()
|
222
213
|
}
|
223
214
|
|
224
215
|
pub fn str_base64_encode(&self) -> Self {
|
225
|
-
self.clone()
|
226
|
-
.inner
|
227
|
-
.map(
|
228
|
-
move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
|
229
|
-
GetOutput::same_type(),
|
230
|
-
)
|
231
|
-
.with_fmt("str.base64_encode")
|
232
|
-
.into()
|
216
|
+
self.inner.clone().str().base64_encode().into()
|
233
217
|
}
|
234
218
|
|
235
219
|
pub fn str_base64_decode(&self, strict: bool) -> Self {
|
236
|
-
self.clone()
|
237
|
-
.inner
|
238
|
-
.map(
|
239
|
-
move |s| {
|
240
|
-
s.str()?
|
241
|
-
.base64_decode(strict)
|
242
|
-
.map(|s| Some(s.into_series()))
|
243
|
-
},
|
244
|
-
GetOutput::same_type(),
|
245
|
-
)
|
246
|
-
.with_fmt("str.base64_decode")
|
247
|
-
.into()
|
220
|
+
self.inner.clone().str().base64_decode(strict).into()
|
248
221
|
}
|
249
222
|
|
250
223
|
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
use std::fs::File;
|
2
2
|
use std::io;
|
3
|
-
use std::io::{
|
3
|
+
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
6
|
use magnus::{exception, prelude::*, Error, RString, Value};
|
@@ -24,9 +24,8 @@ impl RbFileLikeObject {
|
|
24
24
|
RbFileLikeObject { inner: object }
|
25
25
|
}
|
26
26
|
|
27
|
-
pub fn
|
28
|
-
|
29
|
-
std::io::Cursor::new(data)
|
27
|
+
pub fn as_bytes(&self) -> bytes::Bytes {
|
28
|
+
self.as_file_buffer().into_inner().into()
|
30
29
|
}
|
31
30
|
|
32
31
|
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
|
@@ -132,7 +131,33 @@ impl FileLike for RbFileLikeObject {}
|
|
132
131
|
|
133
132
|
pub enum EitherRustRubyFile {
|
134
133
|
Rb(RbFileLikeObject),
|
135
|
-
Rust(
|
134
|
+
Rust(File),
|
135
|
+
}
|
136
|
+
|
137
|
+
impl EitherRustRubyFile {
|
138
|
+
pub fn into_dyn(self) -> Box<dyn FileLike> {
|
139
|
+
match self {
|
140
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
141
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
pub enum RubyScanSourceInput {
|
147
|
+
Buffer(bytes::Bytes),
|
148
|
+
Path(PathBuf),
|
149
|
+
#[allow(dead_code)]
|
150
|
+
File(File),
|
151
|
+
}
|
152
|
+
|
153
|
+
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
154
|
+
if let Ok(file_path) = PathBuf::try_convert(rb_f) {
|
155
|
+
// TODO resolve_homedir
|
156
|
+
Ok(RubyScanSourceInput::Path(file_path))
|
157
|
+
} else {
|
158
|
+
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
159
|
+
Ok(RubyScanSourceInput::Buffer(f.as_bytes()))
|
160
|
+
}
|
136
161
|
}
|
137
162
|
|
138
163
|
///
|
@@ -144,12 +169,11 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
144
169
|
let file_path = std::path::Path::new(&s);
|
145
170
|
let file_path = resolve_homedir(file_path);
|
146
171
|
let f = if truncate {
|
147
|
-
File::create(file_path).map_err(RbPolarsErr::
|
172
|
+
File::create(file_path).map_err(RbPolarsErr::from)?
|
148
173
|
} else {
|
149
174
|
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
|
150
175
|
};
|
151
|
-
|
152
|
-
Ok(EitherRustRubyFile::Rust(reader))
|
176
|
+
Ok(EitherRustRubyFile::Rust(f))
|
153
177
|
} else {
|
154
178
|
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
|
155
179
|
Ok(EitherRustRubyFile::Rb(f))
|
@@ -157,21 +181,34 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
157
181
|
}
|
158
182
|
|
159
183
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
|
160
|
-
|
161
|
-
match get_either_file(f, truncate)? {
|
162
|
-
Rb(f) => Ok(Box::new(f)),
|
163
|
-
Rust(f) => Ok(Box::new(f.into_inner())),
|
164
|
-
}
|
184
|
+
Ok(get_either_file(f, truncate)?.into_dyn())
|
165
185
|
}
|
166
186
|
|
167
|
-
pub
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
187
|
+
pub enum RbReadBytes {
|
188
|
+
Bytes(RString),
|
189
|
+
Other(Value),
|
190
|
+
}
|
191
|
+
|
192
|
+
pub fn read_if_bytesio(rb_f: Value) -> RbReadBytes {
|
193
|
+
rb_f.funcall("read", ())
|
194
|
+
.map(RbReadBytes::Bytes)
|
195
|
+
.unwrap_or(RbReadBytes::Other(rb_f))
|
196
|
+
}
|
197
|
+
|
198
|
+
pub fn get_mmap_bytes_reader<'a>(rb_f: &'a RbReadBytes) -> RbResult<Box<dyn MmapBytesReader + 'a>> {
|
199
|
+
get_mmap_bytes_reader_and_path(rb_f).map(|t| t.0)
|
200
|
+
}
|
201
|
+
|
202
|
+
pub fn get_mmap_bytes_reader_and_path<'a>(
|
203
|
+
rb_f: &'a RbReadBytes,
|
204
|
+
) -> RbResult<(Box<dyn MmapBytesReader + 'a>, Option<PathBuf>)> {
|
205
|
+
match rb_f {
|
206
|
+
RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
|
207
|
+
RbReadBytes::Other(v) => {
|
208
|
+
let path = PathBuf::try_convert(*v)?;
|
209
|
+
let f = File::open(&path)
|
210
|
+
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
211
|
+
Ok((Box::new(f), Some(path)))
|
212
|
+
}
|
176
213
|
}
|
177
214
|
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
|
3
|
+
use crate::RbExpr;
|
4
|
+
|
5
|
+
// TODO add to Ruby
|
6
|
+
pub fn business_day_count(
|
7
|
+
start: &RbExpr,
|
8
|
+
end: &RbExpr,
|
9
|
+
week_mask: [bool; 7],
|
10
|
+
holidays: Vec<i32>,
|
11
|
+
) -> RbExpr {
|
12
|
+
let start = start.inner.clone();
|
13
|
+
let end = end.inner.clone();
|
14
|
+
dsl::business_day_count(start, end, week_mask, holidays).into()
|
15
|
+
}
|
@@ -94,10 +94,7 @@ pub fn col(name: String) -> RbExpr {
|
|
94
94
|
}
|
95
95
|
|
96
96
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
97
|
-
let lfs = lfs
|
98
|
-
.into_iter()
|
99
|
-
.map(<&RbLazyFrame>::try_convert)
|
100
|
-
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
97
|
+
let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
|
101
98
|
|
102
99
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
103
100
|
let df = lf.ldf.borrow().clone().collect().unwrap();
|
@@ -173,8 +170,14 @@ pub fn cum_fold(
|
|
173
170
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
174
171
|
let lambda = Opaque::from(lambda);
|
175
172
|
|
176
|
-
let func =
|
177
|
-
|
173
|
+
let func = move |a: Column, b: Column| {
|
174
|
+
binary_lambda(
|
175
|
+
Ruby::get().unwrap().get_inner(lambda),
|
176
|
+
a.take_materialized_series(),
|
177
|
+
b.take_materialized_series(),
|
178
|
+
)
|
179
|
+
.map(|v| v.map(Column::from))
|
180
|
+
};
|
178
181
|
Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
179
182
|
}
|
180
183
|
|
@@ -263,8 +266,14 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
263
266
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
264
267
|
let lambda = Opaque::from(lambda);
|
265
268
|
|
266
|
-
let func =
|
267
|
-
|
269
|
+
let func = move |a: Column, b: Column| {
|
270
|
+
binary_lambda(
|
271
|
+
Ruby::get().unwrap().get_inner(lambda),
|
272
|
+
a.take_materialized_series(),
|
273
|
+
b.take_materialized_series(),
|
274
|
+
)
|
275
|
+
.map(|v| v.map(Column::from))
|
276
|
+
};
|
268
277
|
Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
269
278
|
}
|
270
279
|
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod to_ruby;
|
@@ -0,0 +1,83 @@
|
|
1
|
+
use arrow::datatypes::ArrowDataType;
|
2
|
+
use arrow::ffi;
|
3
|
+
use magnus::{IntoValue, Value};
|
4
|
+
use polars::datatypes::CompatLevel;
|
5
|
+
use polars::frame::DataFrame;
|
6
|
+
use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
|
7
|
+
use polars::series::Series;
|
8
|
+
use polars_core::utils::arrow;
|
9
|
+
|
10
|
+
use crate::RbResult;
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::RbArrowArrayStream")]
|
13
|
+
pub struct RbArrowArrayStream {
|
14
|
+
stream: ffi::ArrowArrayStream,
|
15
|
+
}
|
16
|
+
|
17
|
+
impl RbArrowArrayStream {
|
18
|
+
pub fn to_i(&self) -> usize {
|
19
|
+
(&self.stream as *const _) as usize
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
|
24
|
+
let iter = Box::new(DataFrameStreamIterator::new(df));
|
25
|
+
let field = iter.field();
|
26
|
+
let stream = ffi::export_iterator(iter, field);
|
27
|
+
Ok(RbArrowArrayStream { stream }.into_value())
|
28
|
+
}
|
29
|
+
|
30
|
+
pub struct DataFrameStreamIterator {
|
31
|
+
columns: Vec<Series>,
|
32
|
+
dtype: ArrowDataType,
|
33
|
+
idx: usize,
|
34
|
+
n_chunks: usize,
|
35
|
+
}
|
36
|
+
|
37
|
+
impl DataFrameStreamIterator {
|
38
|
+
fn new(df: &DataFrame) -> Self {
|
39
|
+
let schema = df.schema().to_arrow(CompatLevel::newest());
|
40
|
+
let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
|
41
|
+
|
42
|
+
Self {
|
43
|
+
columns: df
|
44
|
+
.get_columns()
|
45
|
+
.iter()
|
46
|
+
.map(|v| v.as_materialized_series().clone())
|
47
|
+
.collect(),
|
48
|
+
dtype,
|
49
|
+
idx: 0,
|
50
|
+
n_chunks: df.n_chunks(),
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
fn field(&self) -> ArrowField {
|
55
|
+
ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
impl Iterator for DataFrameStreamIterator {
|
60
|
+
type Item = PolarsResult<ArrayRef>;
|
61
|
+
|
62
|
+
fn next(&mut self) -> Option<Self::Item> {
|
63
|
+
if self.idx >= self.n_chunks {
|
64
|
+
None
|
65
|
+
} else {
|
66
|
+
// create a batch of the columns with the same chunk no.
|
67
|
+
let batch_cols = self
|
68
|
+
.columns
|
69
|
+
.iter()
|
70
|
+
.map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
|
71
|
+
.collect::<Vec<_>>();
|
72
|
+
self.idx += 1;
|
73
|
+
|
74
|
+
let array = arrow::array::StructArray::new(
|
75
|
+
self.dtype.clone(),
|
76
|
+
batch_cols[0].len(),
|
77
|
+
batch_cols,
|
78
|
+
None,
|
79
|
+
);
|
80
|
+
Some(Ok(Box::new(array)))
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|