polars-df 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -1,35 +1,64 @@
|
|
1
|
-
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
4
|
use polars::datatypes::AnyValue;
|
5
|
-
use polars::frame::
|
5
|
+
use polars::frame::row::Row;
|
6
|
+
use polars::frame::NullStrategy;
|
7
|
+
use polars::io::avro::AvroCompression;
|
6
8
|
use polars::prelude::*;
|
7
9
|
use polars::series::ops::NullBehavior;
|
8
10
|
use std::fmt::{Display, Formatter};
|
9
11
|
use std::hash::{Hash, Hasher};
|
10
12
|
|
11
|
-
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
13
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
12
14
|
|
15
|
+
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
16
|
+
// Safety:
|
17
|
+
// Wrap is transparent.
|
18
|
+
unsafe { std::mem::transmute(slice) }
|
19
|
+
}
|
20
|
+
|
21
|
+
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
22
|
+
// Safety:
|
23
|
+
// Wrap is transparent.
|
24
|
+
unsafe { std::mem::transmute(buf) }
|
25
|
+
}
|
26
|
+
|
27
|
+
#[repr(transparent)]
|
13
28
|
pub struct Wrap<T>(pub T);
|
14
29
|
|
30
|
+
impl<T> Clone for Wrap<T>
|
31
|
+
where
|
32
|
+
T: Clone,
|
33
|
+
{
|
34
|
+
fn clone(&self) -> Self {
|
35
|
+
Wrap(self.0.clone())
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
15
39
|
impl<T> From<T> for Wrap<T> {
|
16
40
|
fn from(t: T) -> Self {
|
17
41
|
Wrap(t)
|
18
42
|
}
|
19
43
|
}
|
20
44
|
|
21
|
-
pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
45
|
+
pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
22
46
|
let seq: RArray = obj.try_convert()?;
|
23
47
|
let len = seq.len();
|
24
48
|
Ok((seq, len))
|
25
49
|
}
|
26
50
|
|
27
|
-
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
51
|
+
pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
28
52
|
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
29
53
|
Ok(rbdf.df.borrow().clone())
|
30
54
|
}
|
31
55
|
|
32
|
-
pub fn
|
56
|
+
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
57
|
+
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
58
|
+
Ok(rbdf.ldf.clone())
|
59
|
+
}
|
60
|
+
|
61
|
+
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
33
62
|
let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
|
34
63
|
Ok(rbs.series.borrow().clone())
|
35
64
|
}
|
@@ -115,40 +144,127 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
115
144
|
|
116
145
|
impl From<Wrap<DataType>> for Value {
|
117
146
|
fn from(w: Wrap<DataType>) -> Self {
|
118
|
-
|
147
|
+
let pl = crate::module();
|
148
|
+
|
149
|
+
match &w.0 {
|
150
|
+
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
151
|
+
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
152
|
+
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
153
|
+
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
154
|
+
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
155
|
+
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
156
|
+
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
157
|
+
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
158
|
+
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
159
|
+
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
160
|
+
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
161
|
+
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
162
|
+
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
163
|
+
DataType::List(inner) => {
|
164
|
+
let inner = Wrap(*inner.clone());
|
165
|
+
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
166
|
+
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
167
|
+
}
|
168
|
+
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
169
|
+
DataType::Datetime(tu, tz) => {
|
170
|
+
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
171
|
+
datetime_class
|
172
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
|
173
|
+
.unwrap()
|
174
|
+
}
|
175
|
+
DataType::Duration(tu) => {
|
176
|
+
let duration_class = pl.const_get::<_, Value>("Duration").unwrap();
|
177
|
+
duration_class
|
178
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
179
|
+
.unwrap()
|
180
|
+
}
|
181
|
+
DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
|
182
|
+
DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
183
|
+
DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
|
184
|
+
DataType::Struct(fields) => {
|
185
|
+
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
186
|
+
let iter = fields.iter().map(|fld| {
|
187
|
+
let name = fld.name().clone();
|
188
|
+
let dtype = Wrap(fld.data_type().clone());
|
189
|
+
field_class
|
190
|
+
.funcall::<_, _, Value>("new", (name, dtype))
|
191
|
+
.unwrap()
|
192
|
+
});
|
193
|
+
let fields = RArray::from_iter(iter);
|
194
|
+
let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
|
195
|
+
struct_class
|
196
|
+
.funcall::<_, _, Value>("new", (fields,))
|
197
|
+
.unwrap()
|
198
|
+
}
|
199
|
+
DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
|
200
|
+
DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
|
201
|
+
}
|
119
202
|
}
|
120
203
|
}
|
121
204
|
|
122
205
|
impl TryConvert for Wrap<DataType> {
|
123
206
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
124
|
-
let dtype =
|
125
|
-
"
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
207
|
+
let dtype = if ob.is_kind_of(class::class()) {
|
208
|
+
let name = ob.funcall::<_, _, String>("name", ())?;
|
209
|
+
match name.as_str() {
|
210
|
+
"Polars::UInt8" => DataType::UInt8,
|
211
|
+
"Polars::UInt16" => DataType::UInt16,
|
212
|
+
"Polars::UInt32" => DataType::UInt32,
|
213
|
+
"Polars::UInt64" => DataType::UInt64,
|
214
|
+
"Polars::Int8" => DataType::Int8,
|
215
|
+
"Polars::Int16" => DataType::Int16,
|
216
|
+
"Polars::Int32" => DataType::Int32,
|
217
|
+
"Polars::Int64" => DataType::Int64,
|
218
|
+
"Polars::Utf8" => DataType::Utf8,
|
219
|
+
"Polars::Binary" => DataType::Binary,
|
220
|
+
"Polars::Boolean" => DataType::Boolean,
|
221
|
+
"Polars::Categorical" => DataType::Categorical(None),
|
222
|
+
"Polars::Date" => DataType::Date,
|
223
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
224
|
+
"Polars::Time" => DataType::Time,
|
225
|
+
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
226
|
+
"Polars::Float32" => DataType::Float32,
|
227
|
+
"Polars::Float64" => DataType::Float64,
|
228
|
+
// "Polars::Object" => DataType::Object(OBJECT_NAME),
|
229
|
+
"Polars::List" => DataType::List(Box::new(DataType::Boolean)),
|
230
|
+
"Polars::Null" => DataType::Null,
|
231
|
+
"Polars::Unknown" => DataType::Unknown,
|
232
|
+
dt => {
|
233
|
+
return Err(RbValueError::new_err(format!(
|
234
|
+
"{dt} is not a correct polars DataType.",
|
235
|
+
)))
|
236
|
+
}
|
237
|
+
}
|
238
|
+
} else {
|
239
|
+
match ob.try_convert::<String>()?.as_str() {
|
240
|
+
"u8" => DataType::UInt8,
|
241
|
+
"u16" => DataType::UInt16,
|
242
|
+
"u32" => DataType::UInt32,
|
243
|
+
"u64" => DataType::UInt64,
|
244
|
+
"i8" => DataType::Int8,
|
245
|
+
"i16" => DataType::Int16,
|
246
|
+
"i32" => DataType::Int32,
|
247
|
+
"i64" => DataType::Int64,
|
248
|
+
"str" => DataType::Utf8,
|
249
|
+
"bin" => DataType::Binary,
|
250
|
+
"bool" => DataType::Boolean,
|
251
|
+
"cat" => DataType::Categorical(None),
|
252
|
+
"date" => DataType::Date,
|
253
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
254
|
+
"f32" => DataType::Float32,
|
255
|
+
"time" => DataType::Time,
|
256
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
257
|
+
"f64" => DataType::Float64,
|
258
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
259
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
260
|
+
"null" => DataType::Null,
|
261
|
+
"unk" => DataType::Unknown,
|
262
|
+
_ => {
|
263
|
+
return Err(RbValueError::new_err(format!(
|
264
|
+
"{} is not a supported DataType.",
|
265
|
+
ob
|
266
|
+
)))
|
267
|
+
}
|
152
268
|
}
|
153
269
|
};
|
154
270
|
Ok(Wrap(dtype))
|
@@ -171,6 +287,54 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
171
287
|
}
|
172
288
|
}
|
173
289
|
|
290
|
+
impl TryConvert for Wrap<AsofStrategy> {
|
291
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
292
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
293
|
+
"backward" => AsofStrategy::Backward,
|
294
|
+
"forward" => AsofStrategy::Forward,
|
295
|
+
v => {
|
296
|
+
return Err(RbValueError::new_err(format!(
|
297
|
+
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
298
|
+
v
|
299
|
+
)))
|
300
|
+
}
|
301
|
+
};
|
302
|
+
Ok(Wrap(parsed))
|
303
|
+
}
|
304
|
+
}
|
305
|
+
|
306
|
+
impl TryConvert for Wrap<InterpolationMethod> {
|
307
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
308
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
309
|
+
"linear" => InterpolationMethod::Linear,
|
310
|
+
"nearest" => InterpolationMethod::Nearest,
|
311
|
+
v => {
|
312
|
+
return Err(RbValueError::new_err(format!(
|
313
|
+
"method must be one of {{'linear', 'nearest'}}, got {v}",
|
314
|
+
)))
|
315
|
+
}
|
316
|
+
};
|
317
|
+
Ok(Wrap(parsed))
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
impl TryConvert for Wrap<Option<AvroCompression>> {
|
322
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
323
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
324
|
+
"uncompressed" => None,
|
325
|
+
"snappy" => Some(AvroCompression::Snappy),
|
326
|
+
"deflate" => Some(AvroCompression::Deflate),
|
327
|
+
v => {
|
328
|
+
return Err(RbValueError::new_err(format!(
|
329
|
+
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
|
330
|
+
v
|
331
|
+
)))
|
332
|
+
}
|
333
|
+
};
|
334
|
+
Ok(Wrap(parsed))
|
335
|
+
}
|
336
|
+
}
|
337
|
+
|
174
338
|
impl TryConvert for Wrap<CategoricalOrdering> {
|
175
339
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
176
340
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -187,6 +351,22 @@ impl TryConvert for Wrap<CategoricalOrdering> {
|
|
187
351
|
}
|
188
352
|
}
|
189
353
|
|
354
|
+
impl TryConvert for Wrap<StartBy> {
|
355
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
356
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
357
|
+
"window" => StartBy::WindowBound,
|
358
|
+
"datapoint" => StartBy::DataPoint,
|
359
|
+
"monday" => StartBy::Monday,
|
360
|
+
v => {
|
361
|
+
return Err(RbValueError::new_err(format!(
|
362
|
+
"closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
|
363
|
+
)))
|
364
|
+
}
|
365
|
+
};
|
366
|
+
Ok(Wrap(parsed))
|
367
|
+
}
|
368
|
+
}
|
369
|
+
|
190
370
|
impl TryConvert for Wrap<ClosedWindow> {
|
191
371
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
192
372
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -462,6 +642,32 @@ pub fn parse_parquet_compression(
|
|
462
642
|
Ok(parsed)
|
463
643
|
}
|
464
644
|
|
645
|
+
impl<'s> TryConvert for Wrap<Row<'s>> {
|
646
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
647
|
+
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
648
|
+
for item in ob.try_convert::<RArray>()?.each() {
|
649
|
+
vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
|
650
|
+
}
|
651
|
+
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
652
|
+
Ok(Wrap(Row(vals)))
|
653
|
+
}
|
654
|
+
}
|
655
|
+
|
656
|
+
impl TryConvert for Wrap<Schema> {
|
657
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
658
|
+
let dict = ob.try_convert::<RHash>()?;
|
659
|
+
|
660
|
+
let mut schema = Vec::new();
|
661
|
+
dict.foreach(|key: String, val: Wrap<DataType>| {
|
662
|
+
schema.push(Field::new(&key, val.0));
|
663
|
+
Ok(ForEach::Continue)
|
664
|
+
})
|
665
|
+
.unwrap();
|
666
|
+
|
667
|
+
Ok(Wrap(schema.into_iter().into()))
|
668
|
+
}
|
669
|
+
}
|
670
|
+
|
465
671
|
#[derive(Clone, Debug)]
|
466
672
|
pub struct ObjectValue {
|
467
673
|
pub inner: Value,
|
@@ -503,18 +709,31 @@ impl From<Value> for ObjectValue {
|
|
503
709
|
}
|
504
710
|
}
|
505
711
|
|
712
|
+
impl TryConvert for ObjectValue {
|
713
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
714
|
+
Ok(ObjectValue { inner: ob })
|
715
|
+
}
|
716
|
+
}
|
717
|
+
|
506
718
|
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
507
719
|
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
508
720
|
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
509
721
|
}
|
510
722
|
}
|
511
723
|
|
724
|
+
// TODO remove
|
512
725
|
impl ObjectValue {
|
513
726
|
pub fn to_object(&self) -> Value {
|
514
727
|
self.inner
|
515
728
|
}
|
516
729
|
}
|
517
730
|
|
731
|
+
impl From<ObjectValue> for Value {
|
732
|
+
fn from(val: ObjectValue) -> Self {
|
733
|
+
val.inner
|
734
|
+
}
|
735
|
+
}
|
736
|
+
|
518
737
|
impl Default for ObjectValue {
|
519
738
|
fn default() -> Self {
|
520
739
|
ObjectValue { inner: *QNIL }
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,15 +1,22 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
|
+
use polars::frame::NullStrategy;
|
3
|
+
use polars::io::avro::AvroCompression;
|
2
4
|
use polars::io::mmap::ReaderBytes;
|
3
5
|
use polars::io::RowCount;
|
6
|
+
use polars::prelude::pivot::{pivot, pivot_stable};
|
4
7
|
use polars::prelude::*;
|
5
8
|
use std::cell::RefCell;
|
6
9
|
use std::io::{BufWriter, Cursor};
|
7
10
|
use std::ops::Deref;
|
8
11
|
|
12
|
+
use crate::apply::dataframe::{
|
13
|
+
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
14
|
+
apply_lambda_with_utf8_out_type,
|
15
|
+
};
|
9
16
|
use crate::conversion::*;
|
10
17
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
11
18
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
12
|
-
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
19
|
+
use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
20
|
|
14
21
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
15
22
|
pub struct RbDataFrame {
|
@@ -179,6 +186,48 @@ impl RbDataFrame {
|
|
179
186
|
Ok(RbDataFrame::new(df))
|
180
187
|
}
|
181
188
|
|
189
|
+
pub fn read_avro(
|
190
|
+
rb_f: Value,
|
191
|
+
columns: Option<Vec<String>>,
|
192
|
+
projection: Option<Vec<usize>>,
|
193
|
+
n_rows: Option<usize>,
|
194
|
+
) -> RbResult<Self> {
|
195
|
+
use polars::io::avro::AvroReader;
|
196
|
+
|
197
|
+
let file = get_file_like(rb_f, false)?;
|
198
|
+
let df = AvroReader::new(file)
|
199
|
+
.with_projection(projection)
|
200
|
+
.with_columns(columns)
|
201
|
+
.with_n_rows(n_rows)
|
202
|
+
.finish()
|
203
|
+
.map_err(RbPolarsErr::from)?;
|
204
|
+
Ok(RbDataFrame::new(df))
|
205
|
+
}
|
206
|
+
|
207
|
+
pub fn write_avro(
|
208
|
+
&self,
|
209
|
+
rb_f: Value,
|
210
|
+
compression: Wrap<Option<AvroCompression>>,
|
211
|
+
) -> RbResult<()> {
|
212
|
+
use polars::io::avro::AvroWriter;
|
213
|
+
|
214
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
215
|
+
let f = std::fs::File::create(&s).unwrap();
|
216
|
+
AvroWriter::new(f)
|
217
|
+
.with_compression(compression.0)
|
218
|
+
.finish(&mut self.df.borrow_mut())
|
219
|
+
.map_err(RbPolarsErr::from)?;
|
220
|
+
} else {
|
221
|
+
let mut buf = get_file_like(rb_f, true)?;
|
222
|
+
AvroWriter::new(&mut buf)
|
223
|
+
.with_compression(compression.0)
|
224
|
+
.finish(&mut self.df.borrow_mut())
|
225
|
+
.map_err(RbPolarsErr::from)?;
|
226
|
+
}
|
227
|
+
|
228
|
+
Ok(())
|
229
|
+
}
|
230
|
+
|
182
231
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
183
232
|
// memmap the file first
|
184
233
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -238,6 +287,14 @@ impl RbDataFrame {
|
|
238
287
|
Ok(())
|
239
288
|
}
|
240
289
|
|
290
|
+
pub fn read_hashes(
|
291
|
+
_dicts: Value,
|
292
|
+
_infer_schema_length: Option<usize>,
|
293
|
+
_schema_overwrite: Option<Wrap<Schema>>,
|
294
|
+
) -> RbResult<Self> {
|
295
|
+
Err(RbPolarsErr::todo())
|
296
|
+
}
|
297
|
+
|
241
298
|
pub fn read_hash(data: RHash) -> RbResult<Self> {
|
242
299
|
let mut cols: Vec<Series> = Vec::new();
|
243
300
|
data.foreach(|name: String, values: Value| {
|
@@ -341,7 +398,7 @@ impl RbDataFrame {
|
|
341
398
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
342
399
|
obj.unwrap().to_object()
|
343
400
|
}
|
344
|
-
_ => Wrap(s.get(idx)).into(),
|
401
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
345
402
|
})
|
346
403
|
.collect(),
|
347
404
|
)
|
@@ -364,7 +421,7 @@ impl RbDataFrame {
|
|
364
421
|
s.get_object(idx).map(|any| any.into());
|
365
422
|
obj.unwrap().to_object()
|
366
423
|
}
|
367
|
-
_ => Wrap(s.get(idx)).into(),
|
424
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
368
425
|
})
|
369
426
|
.collect(),
|
370
427
|
)
|
@@ -517,9 +574,8 @@ impl RbDataFrame {
|
|
517
574
|
.collect()
|
518
575
|
}
|
519
576
|
|
520
|
-
pub fn n_chunks(&self) ->
|
521
|
-
|
522
|
-
Ok(n)
|
577
|
+
pub fn n_chunks(&self) -> usize {
|
578
|
+
self.df.borrow().n_chunks()
|
523
579
|
}
|
524
580
|
|
525
581
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -751,6 +807,31 @@ impl RbDataFrame {
|
|
751
807
|
Ok(RbDataFrame::new(df))
|
752
808
|
}
|
753
809
|
|
810
|
+
pub fn pivot_expr(
|
811
|
+
&self,
|
812
|
+
values: Vec<String>,
|
813
|
+
index: Vec<String>,
|
814
|
+
columns: Vec<String>,
|
815
|
+
aggregate_expr: &RbExpr,
|
816
|
+
maintain_order: bool,
|
817
|
+
sort_columns: bool,
|
818
|
+
) -> RbResult<Self> {
|
819
|
+
let fun = match maintain_order {
|
820
|
+
true => pivot_stable,
|
821
|
+
false => pivot,
|
822
|
+
};
|
823
|
+
let df = fun(
|
824
|
+
&self.df.borrow(),
|
825
|
+
values,
|
826
|
+
index,
|
827
|
+
columns,
|
828
|
+
aggregate_expr.inner.clone(),
|
829
|
+
sort_columns,
|
830
|
+
)
|
831
|
+
.map_err(RbPolarsErr::from)?;
|
832
|
+
Ok(RbDataFrame::new(df))
|
833
|
+
}
|
834
|
+
|
754
835
|
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
755
836
|
let out = if stable {
|
756
837
|
self.df.borrow().partition_by_stable(groups)
|
@@ -870,10 +951,74 @@ impl RbDataFrame {
|
|
870
951
|
df.into()
|
871
952
|
}
|
872
953
|
|
954
|
+
pub fn apply(
|
955
|
+
&self,
|
956
|
+
lambda: Value,
|
957
|
+
output_type: Option<Wrap<DataType>>,
|
958
|
+
inference_size: usize,
|
959
|
+
) -> RbResult<(Value, bool)> {
|
960
|
+
let df = &self.df.borrow();
|
961
|
+
|
962
|
+
let output_type = output_type.map(|dt| dt.0);
|
963
|
+
let out = match output_type {
|
964
|
+
Some(DataType::Int32) => {
|
965
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
|
966
|
+
}
|
967
|
+
Some(DataType::Int64) => {
|
968
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
|
969
|
+
}
|
970
|
+
Some(DataType::UInt32) => {
|
971
|
+
apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
|
972
|
+
.into_series()
|
973
|
+
}
|
974
|
+
Some(DataType::UInt64) => {
|
975
|
+
apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
|
976
|
+
.into_series()
|
977
|
+
}
|
978
|
+
Some(DataType::Float32) => {
|
979
|
+
apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
|
980
|
+
.into_series()
|
981
|
+
}
|
982
|
+
Some(DataType::Float64) => {
|
983
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
|
984
|
+
.into_series()
|
985
|
+
}
|
986
|
+
Some(DataType::Boolean) => {
|
987
|
+
apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
|
988
|
+
}
|
989
|
+
Some(DataType::Date) => {
|
990
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
|
991
|
+
.into_date()
|
992
|
+
.into_series()
|
993
|
+
}
|
994
|
+
Some(DataType::Datetime(tu, tz)) => {
|
995
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
|
996
|
+
.into_datetime(tu, tz)
|
997
|
+
.into_series()
|
998
|
+
}
|
999
|
+
Some(DataType::Utf8) => {
|
1000
|
+
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1001
|
+
}
|
1002
|
+
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1003
|
+
};
|
1004
|
+
|
1005
|
+
Ok((RbSeries::from(out).into(), false))
|
1006
|
+
}
|
1007
|
+
|
873
1008
|
pub fn shrink_to_fit(&self) {
|
874
1009
|
self.df.borrow_mut().shrink_to_fit();
|
875
1010
|
}
|
876
1011
|
|
1012
|
+
pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
|
1013
|
+
let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
|
1014
|
+
let hash = self
|
1015
|
+
.df
|
1016
|
+
.borrow_mut()
|
1017
|
+
.hash_rows(Some(hb))
|
1018
|
+
.map_err(RbPolarsErr::from)?;
|
1019
|
+
Ok(hash.into_series().into())
|
1020
|
+
}
|
1021
|
+
|
877
1022
|
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
878
1023
|
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
879
1024
|
if include_header {
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,7 +1,39 @@
|
|
1
1
|
use magnus::Value;
|
2
|
-
use polars::
|
3
|
-
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use crate::lazy::dsl::RbExpr;
|
5
|
+
use crate::Wrap;
|
4
6
|
|
5
7
|
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
|
6
8
|
todo!();
|
7
9
|
}
|
10
|
+
|
11
|
+
pub fn map_single(
|
12
|
+
rbexpr: &RbExpr,
|
13
|
+
_lambda: Value,
|
14
|
+
output_type: Option<Wrap<DataType>>,
|
15
|
+
agg_list: bool,
|
16
|
+
) -> RbExpr {
|
17
|
+
let output_type = output_type.map(|wrap| wrap.0);
|
18
|
+
|
19
|
+
let output_type2 = output_type.clone();
|
20
|
+
let function = move |_s: Series| {
|
21
|
+
let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
|
22
|
+
|
23
|
+
todo!();
|
24
|
+
};
|
25
|
+
|
26
|
+
let output_map = GetOutput::map_field(move |fld| match output_type {
|
27
|
+
Some(ref dt) => Field::new(fld.name(), dt.clone()),
|
28
|
+
None => {
|
29
|
+
let mut fld = fld.clone();
|
30
|
+
fld.coerce(DataType::Unknown);
|
31
|
+
fld
|
32
|
+
}
|
33
|
+
});
|
34
|
+
if agg_list {
|
35
|
+
rbexpr.clone().inner.map_list(function, output_map).into()
|
36
|
+
} else {
|
37
|
+
rbexpr.clone().inner.map(function, output_map).into()
|
38
|
+
}
|
39
|
+
}
|