polars-df 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -1,35 +1,64 @@
|
|
1
|
-
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
4
|
use polars::datatypes::AnyValue;
|
5
|
-
use polars::frame::
|
5
|
+
use polars::frame::row::Row;
|
6
|
+
use polars::frame::NullStrategy;
|
7
|
+
use polars::io::avro::AvroCompression;
|
6
8
|
use polars::prelude::*;
|
7
9
|
use polars::series::ops::NullBehavior;
|
8
10
|
use std::fmt::{Display, Formatter};
|
9
11
|
use std::hash::{Hash, Hasher};
|
10
12
|
|
11
|
-
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
13
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
12
14
|
|
15
|
+
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
16
|
+
// Safety:
|
17
|
+
// Wrap is transparent.
|
18
|
+
unsafe { std::mem::transmute(slice) }
|
19
|
+
}
|
20
|
+
|
21
|
+
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
22
|
+
// Safety:
|
23
|
+
// Wrap is transparent.
|
24
|
+
unsafe { std::mem::transmute(buf) }
|
25
|
+
}
|
26
|
+
|
27
|
+
#[repr(transparent)]
|
13
28
|
pub struct Wrap<T>(pub T);
|
14
29
|
|
30
|
+
impl<T> Clone for Wrap<T>
|
31
|
+
where
|
32
|
+
T: Clone,
|
33
|
+
{
|
34
|
+
fn clone(&self) -> Self {
|
35
|
+
Wrap(self.0.clone())
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
15
39
|
impl<T> From<T> for Wrap<T> {
|
16
40
|
fn from(t: T) -> Self {
|
17
41
|
Wrap(t)
|
18
42
|
}
|
19
43
|
}
|
20
44
|
|
21
|
-
pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
45
|
+
pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
22
46
|
let seq: RArray = obj.try_convert()?;
|
23
47
|
let len = seq.len();
|
24
48
|
Ok((seq, len))
|
25
49
|
}
|
26
50
|
|
27
|
-
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
51
|
+
pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
28
52
|
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
29
53
|
Ok(rbdf.df.borrow().clone())
|
30
54
|
}
|
31
55
|
|
32
|
-
pub fn
|
56
|
+
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
57
|
+
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
58
|
+
Ok(rbdf.ldf.clone())
|
59
|
+
}
|
60
|
+
|
61
|
+
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
33
62
|
let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
|
34
63
|
Ok(rbs.series.borrow().clone())
|
35
64
|
}
|
@@ -115,40 +144,127 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
115
144
|
|
116
145
|
impl From<Wrap<DataType>> for Value {
|
117
146
|
fn from(w: Wrap<DataType>) -> Self {
|
118
|
-
|
147
|
+
let pl = crate::module();
|
148
|
+
|
149
|
+
match &w.0 {
|
150
|
+
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
151
|
+
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
152
|
+
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
153
|
+
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
154
|
+
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
155
|
+
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
156
|
+
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
157
|
+
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
158
|
+
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
159
|
+
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
160
|
+
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
161
|
+
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
162
|
+
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
163
|
+
DataType::List(inner) => {
|
164
|
+
let inner = Wrap(*inner.clone());
|
165
|
+
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
166
|
+
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
167
|
+
}
|
168
|
+
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
169
|
+
DataType::Datetime(tu, tz) => {
|
170
|
+
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
171
|
+
datetime_class
|
172
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
|
173
|
+
.unwrap()
|
174
|
+
}
|
175
|
+
DataType::Duration(tu) => {
|
176
|
+
let duration_class = pl.const_get::<_, Value>("Duration").unwrap();
|
177
|
+
duration_class
|
178
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
179
|
+
.unwrap()
|
180
|
+
}
|
181
|
+
DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
|
182
|
+
DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
183
|
+
DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
|
184
|
+
DataType::Struct(fields) => {
|
185
|
+
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
186
|
+
let iter = fields.iter().map(|fld| {
|
187
|
+
let name = fld.name().clone();
|
188
|
+
let dtype = Wrap(fld.data_type().clone());
|
189
|
+
field_class
|
190
|
+
.funcall::<_, _, Value>("new", (name, dtype))
|
191
|
+
.unwrap()
|
192
|
+
});
|
193
|
+
let fields = RArray::from_iter(iter);
|
194
|
+
let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
|
195
|
+
struct_class
|
196
|
+
.funcall::<_, _, Value>("new", (fields,))
|
197
|
+
.unwrap()
|
198
|
+
}
|
199
|
+
DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
|
200
|
+
DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
|
201
|
+
}
|
119
202
|
}
|
120
203
|
}
|
121
204
|
|
122
205
|
impl TryConvert for Wrap<DataType> {
|
123
206
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
124
|
-
let dtype =
|
125
|
-
"
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
207
|
+
let dtype = if ob.is_kind_of(class::class()) {
|
208
|
+
let name = ob.funcall::<_, _, String>("name", ())?;
|
209
|
+
match name.as_str() {
|
210
|
+
"Polars::UInt8" => DataType::UInt8,
|
211
|
+
"Polars::UInt16" => DataType::UInt16,
|
212
|
+
"Polars::UInt32" => DataType::UInt32,
|
213
|
+
"Polars::UInt64" => DataType::UInt64,
|
214
|
+
"Polars::Int8" => DataType::Int8,
|
215
|
+
"Polars::Int16" => DataType::Int16,
|
216
|
+
"Polars::Int32" => DataType::Int32,
|
217
|
+
"Polars::Int64" => DataType::Int64,
|
218
|
+
"Polars::Utf8" => DataType::Utf8,
|
219
|
+
"Polars::Binary" => DataType::Binary,
|
220
|
+
"Polars::Boolean" => DataType::Boolean,
|
221
|
+
"Polars::Categorical" => DataType::Categorical(None),
|
222
|
+
"Polars::Date" => DataType::Date,
|
223
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
224
|
+
"Polars::Time" => DataType::Time,
|
225
|
+
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
226
|
+
"Polars::Float32" => DataType::Float32,
|
227
|
+
"Polars::Float64" => DataType::Float64,
|
228
|
+
// "Polars::Object" => DataType::Object(OBJECT_NAME),
|
229
|
+
"Polars::List" => DataType::List(Box::new(DataType::Boolean)),
|
230
|
+
"Polars::Null" => DataType::Null,
|
231
|
+
"Polars::Unknown" => DataType::Unknown,
|
232
|
+
dt => {
|
233
|
+
return Err(RbValueError::new_err(format!(
|
234
|
+
"{dt} is not a correct polars DataType.",
|
235
|
+
)))
|
236
|
+
}
|
237
|
+
}
|
238
|
+
} else {
|
239
|
+
match ob.try_convert::<String>()?.as_str() {
|
240
|
+
"u8" => DataType::UInt8,
|
241
|
+
"u16" => DataType::UInt16,
|
242
|
+
"u32" => DataType::UInt32,
|
243
|
+
"u64" => DataType::UInt64,
|
244
|
+
"i8" => DataType::Int8,
|
245
|
+
"i16" => DataType::Int16,
|
246
|
+
"i32" => DataType::Int32,
|
247
|
+
"i64" => DataType::Int64,
|
248
|
+
"str" => DataType::Utf8,
|
249
|
+
"bin" => DataType::Binary,
|
250
|
+
"bool" => DataType::Boolean,
|
251
|
+
"cat" => DataType::Categorical(None),
|
252
|
+
"date" => DataType::Date,
|
253
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
254
|
+
"f32" => DataType::Float32,
|
255
|
+
"time" => DataType::Time,
|
256
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
257
|
+
"f64" => DataType::Float64,
|
258
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
259
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
260
|
+
"null" => DataType::Null,
|
261
|
+
"unk" => DataType::Unknown,
|
262
|
+
_ => {
|
263
|
+
return Err(RbValueError::new_err(format!(
|
264
|
+
"{} is not a supported DataType.",
|
265
|
+
ob
|
266
|
+
)))
|
267
|
+
}
|
152
268
|
}
|
153
269
|
};
|
154
270
|
Ok(Wrap(dtype))
|
@@ -171,6 +287,54 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
171
287
|
}
|
172
288
|
}
|
173
289
|
|
290
|
+
impl TryConvert for Wrap<AsofStrategy> {
|
291
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
292
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
293
|
+
"backward" => AsofStrategy::Backward,
|
294
|
+
"forward" => AsofStrategy::Forward,
|
295
|
+
v => {
|
296
|
+
return Err(RbValueError::new_err(format!(
|
297
|
+
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
298
|
+
v
|
299
|
+
)))
|
300
|
+
}
|
301
|
+
};
|
302
|
+
Ok(Wrap(parsed))
|
303
|
+
}
|
304
|
+
}
|
305
|
+
|
306
|
+
impl TryConvert for Wrap<InterpolationMethod> {
|
307
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
308
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
309
|
+
"linear" => InterpolationMethod::Linear,
|
310
|
+
"nearest" => InterpolationMethod::Nearest,
|
311
|
+
v => {
|
312
|
+
return Err(RbValueError::new_err(format!(
|
313
|
+
"method must be one of {{'linear', 'nearest'}}, got {v}",
|
314
|
+
)))
|
315
|
+
}
|
316
|
+
};
|
317
|
+
Ok(Wrap(parsed))
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
impl TryConvert for Wrap<Option<AvroCompression>> {
|
322
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
323
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
324
|
+
"uncompressed" => None,
|
325
|
+
"snappy" => Some(AvroCompression::Snappy),
|
326
|
+
"deflate" => Some(AvroCompression::Deflate),
|
327
|
+
v => {
|
328
|
+
return Err(RbValueError::new_err(format!(
|
329
|
+
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
|
330
|
+
v
|
331
|
+
)))
|
332
|
+
}
|
333
|
+
};
|
334
|
+
Ok(Wrap(parsed))
|
335
|
+
}
|
336
|
+
}
|
337
|
+
|
174
338
|
impl TryConvert for Wrap<CategoricalOrdering> {
|
175
339
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
176
340
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -187,6 +351,22 @@ impl TryConvert for Wrap<CategoricalOrdering> {
|
|
187
351
|
}
|
188
352
|
}
|
189
353
|
|
354
|
+
impl TryConvert for Wrap<StartBy> {
|
355
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
356
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
357
|
+
"window" => StartBy::WindowBound,
|
358
|
+
"datapoint" => StartBy::DataPoint,
|
359
|
+
"monday" => StartBy::Monday,
|
360
|
+
v => {
|
361
|
+
return Err(RbValueError::new_err(format!(
|
362
|
+
"closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
|
363
|
+
)))
|
364
|
+
}
|
365
|
+
};
|
366
|
+
Ok(Wrap(parsed))
|
367
|
+
}
|
368
|
+
}
|
369
|
+
|
190
370
|
impl TryConvert for Wrap<ClosedWindow> {
|
191
371
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
192
372
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -462,6 +642,32 @@ pub fn parse_parquet_compression(
|
|
462
642
|
Ok(parsed)
|
463
643
|
}
|
464
644
|
|
645
|
+
impl<'s> TryConvert for Wrap<Row<'s>> {
|
646
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
647
|
+
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
648
|
+
for item in ob.try_convert::<RArray>()?.each() {
|
649
|
+
vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
|
650
|
+
}
|
651
|
+
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
652
|
+
Ok(Wrap(Row(vals)))
|
653
|
+
}
|
654
|
+
}
|
655
|
+
|
656
|
+
impl TryConvert for Wrap<Schema> {
|
657
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
658
|
+
let dict = ob.try_convert::<RHash>()?;
|
659
|
+
|
660
|
+
let mut schema = Vec::new();
|
661
|
+
dict.foreach(|key: String, val: Wrap<DataType>| {
|
662
|
+
schema.push(Field::new(&key, val.0));
|
663
|
+
Ok(ForEach::Continue)
|
664
|
+
})
|
665
|
+
.unwrap();
|
666
|
+
|
667
|
+
Ok(Wrap(schema.into_iter().into()))
|
668
|
+
}
|
669
|
+
}
|
670
|
+
|
465
671
|
#[derive(Clone, Debug)]
|
466
672
|
pub struct ObjectValue {
|
467
673
|
pub inner: Value,
|
@@ -503,18 +709,31 @@ impl From<Value> for ObjectValue {
|
|
503
709
|
}
|
504
710
|
}
|
505
711
|
|
712
|
+
impl TryConvert for ObjectValue {
|
713
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
714
|
+
Ok(ObjectValue { inner: ob })
|
715
|
+
}
|
716
|
+
}
|
717
|
+
|
506
718
|
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
507
719
|
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
508
720
|
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
509
721
|
}
|
510
722
|
}
|
511
723
|
|
724
|
+
// TODO remove
|
512
725
|
impl ObjectValue {
|
513
726
|
pub fn to_object(&self) -> Value {
|
514
727
|
self.inner
|
515
728
|
}
|
516
729
|
}
|
517
730
|
|
731
|
+
impl From<ObjectValue> for Value {
|
732
|
+
fn from(val: ObjectValue) -> Self {
|
733
|
+
val.inner
|
734
|
+
}
|
735
|
+
}
|
736
|
+
|
518
737
|
impl Default for ObjectValue {
|
519
738
|
fn default() -> Self {
|
520
739
|
ObjectValue { inner: *QNIL }
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,15 +1,22 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
|
+
use polars::frame::NullStrategy;
|
3
|
+
use polars::io::avro::AvroCompression;
|
2
4
|
use polars::io::mmap::ReaderBytes;
|
3
5
|
use polars::io::RowCount;
|
6
|
+
use polars::prelude::pivot::{pivot, pivot_stable};
|
4
7
|
use polars::prelude::*;
|
5
8
|
use std::cell::RefCell;
|
6
9
|
use std::io::{BufWriter, Cursor};
|
7
10
|
use std::ops::Deref;
|
8
11
|
|
12
|
+
use crate::apply::dataframe::{
|
13
|
+
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
14
|
+
apply_lambda_with_utf8_out_type,
|
15
|
+
};
|
9
16
|
use crate::conversion::*;
|
10
17
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
11
18
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
12
|
-
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
19
|
+
use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
20
|
|
14
21
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
15
22
|
pub struct RbDataFrame {
|
@@ -179,6 +186,48 @@ impl RbDataFrame {
|
|
179
186
|
Ok(RbDataFrame::new(df))
|
180
187
|
}
|
181
188
|
|
189
|
+
pub fn read_avro(
|
190
|
+
rb_f: Value,
|
191
|
+
columns: Option<Vec<String>>,
|
192
|
+
projection: Option<Vec<usize>>,
|
193
|
+
n_rows: Option<usize>,
|
194
|
+
) -> RbResult<Self> {
|
195
|
+
use polars::io::avro::AvroReader;
|
196
|
+
|
197
|
+
let file = get_file_like(rb_f, false)?;
|
198
|
+
let df = AvroReader::new(file)
|
199
|
+
.with_projection(projection)
|
200
|
+
.with_columns(columns)
|
201
|
+
.with_n_rows(n_rows)
|
202
|
+
.finish()
|
203
|
+
.map_err(RbPolarsErr::from)?;
|
204
|
+
Ok(RbDataFrame::new(df))
|
205
|
+
}
|
206
|
+
|
207
|
+
pub fn write_avro(
|
208
|
+
&self,
|
209
|
+
rb_f: Value,
|
210
|
+
compression: Wrap<Option<AvroCompression>>,
|
211
|
+
) -> RbResult<()> {
|
212
|
+
use polars::io::avro::AvroWriter;
|
213
|
+
|
214
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
215
|
+
let f = std::fs::File::create(&s).unwrap();
|
216
|
+
AvroWriter::new(f)
|
217
|
+
.with_compression(compression.0)
|
218
|
+
.finish(&mut self.df.borrow_mut())
|
219
|
+
.map_err(RbPolarsErr::from)?;
|
220
|
+
} else {
|
221
|
+
let mut buf = get_file_like(rb_f, true)?;
|
222
|
+
AvroWriter::new(&mut buf)
|
223
|
+
.with_compression(compression.0)
|
224
|
+
.finish(&mut self.df.borrow_mut())
|
225
|
+
.map_err(RbPolarsErr::from)?;
|
226
|
+
}
|
227
|
+
|
228
|
+
Ok(())
|
229
|
+
}
|
230
|
+
|
182
231
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
183
232
|
// memmap the file first
|
184
233
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -238,6 +287,14 @@ impl RbDataFrame {
|
|
238
287
|
Ok(())
|
239
288
|
}
|
240
289
|
|
290
|
+
pub fn read_hashes(
|
291
|
+
_dicts: Value,
|
292
|
+
_infer_schema_length: Option<usize>,
|
293
|
+
_schema_overwrite: Option<Wrap<Schema>>,
|
294
|
+
) -> RbResult<Self> {
|
295
|
+
Err(RbPolarsErr::todo())
|
296
|
+
}
|
297
|
+
|
241
298
|
pub fn read_hash(data: RHash) -> RbResult<Self> {
|
242
299
|
let mut cols: Vec<Series> = Vec::new();
|
243
300
|
data.foreach(|name: String, values: Value| {
|
@@ -341,7 +398,7 @@ impl RbDataFrame {
|
|
341
398
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
342
399
|
obj.unwrap().to_object()
|
343
400
|
}
|
344
|
-
_ => Wrap(s.get(idx)).into(),
|
401
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
345
402
|
})
|
346
403
|
.collect(),
|
347
404
|
)
|
@@ -364,7 +421,7 @@ impl RbDataFrame {
|
|
364
421
|
s.get_object(idx).map(|any| any.into());
|
365
422
|
obj.unwrap().to_object()
|
366
423
|
}
|
367
|
-
_ => Wrap(s.get(idx)).into(),
|
424
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
368
425
|
})
|
369
426
|
.collect(),
|
370
427
|
)
|
@@ -517,9 +574,8 @@ impl RbDataFrame {
|
|
517
574
|
.collect()
|
518
575
|
}
|
519
576
|
|
520
|
-
pub fn n_chunks(&self) ->
|
521
|
-
|
522
|
-
Ok(n)
|
577
|
+
pub fn n_chunks(&self) -> usize {
|
578
|
+
self.df.borrow().n_chunks()
|
523
579
|
}
|
524
580
|
|
525
581
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -751,6 +807,31 @@ impl RbDataFrame {
|
|
751
807
|
Ok(RbDataFrame::new(df))
|
752
808
|
}
|
753
809
|
|
810
|
+
pub fn pivot_expr(
|
811
|
+
&self,
|
812
|
+
values: Vec<String>,
|
813
|
+
index: Vec<String>,
|
814
|
+
columns: Vec<String>,
|
815
|
+
aggregate_expr: &RbExpr,
|
816
|
+
maintain_order: bool,
|
817
|
+
sort_columns: bool,
|
818
|
+
) -> RbResult<Self> {
|
819
|
+
let fun = match maintain_order {
|
820
|
+
true => pivot_stable,
|
821
|
+
false => pivot,
|
822
|
+
};
|
823
|
+
let df = fun(
|
824
|
+
&self.df.borrow(),
|
825
|
+
values,
|
826
|
+
index,
|
827
|
+
columns,
|
828
|
+
aggregate_expr.inner.clone(),
|
829
|
+
sort_columns,
|
830
|
+
)
|
831
|
+
.map_err(RbPolarsErr::from)?;
|
832
|
+
Ok(RbDataFrame::new(df))
|
833
|
+
}
|
834
|
+
|
754
835
|
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
755
836
|
let out = if stable {
|
756
837
|
self.df.borrow().partition_by_stable(groups)
|
@@ -870,10 +951,74 @@ impl RbDataFrame {
|
|
870
951
|
df.into()
|
871
952
|
}
|
872
953
|
|
954
|
+
pub fn apply(
|
955
|
+
&self,
|
956
|
+
lambda: Value,
|
957
|
+
output_type: Option<Wrap<DataType>>,
|
958
|
+
inference_size: usize,
|
959
|
+
) -> RbResult<(Value, bool)> {
|
960
|
+
let df = &self.df.borrow();
|
961
|
+
|
962
|
+
let output_type = output_type.map(|dt| dt.0);
|
963
|
+
let out = match output_type {
|
964
|
+
Some(DataType::Int32) => {
|
965
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
|
966
|
+
}
|
967
|
+
Some(DataType::Int64) => {
|
968
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
|
969
|
+
}
|
970
|
+
Some(DataType::UInt32) => {
|
971
|
+
apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
|
972
|
+
.into_series()
|
973
|
+
}
|
974
|
+
Some(DataType::UInt64) => {
|
975
|
+
apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
|
976
|
+
.into_series()
|
977
|
+
}
|
978
|
+
Some(DataType::Float32) => {
|
979
|
+
apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
|
980
|
+
.into_series()
|
981
|
+
}
|
982
|
+
Some(DataType::Float64) => {
|
983
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
|
984
|
+
.into_series()
|
985
|
+
}
|
986
|
+
Some(DataType::Boolean) => {
|
987
|
+
apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
|
988
|
+
}
|
989
|
+
Some(DataType::Date) => {
|
990
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
|
991
|
+
.into_date()
|
992
|
+
.into_series()
|
993
|
+
}
|
994
|
+
Some(DataType::Datetime(tu, tz)) => {
|
995
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
|
996
|
+
.into_datetime(tu, tz)
|
997
|
+
.into_series()
|
998
|
+
}
|
999
|
+
Some(DataType::Utf8) => {
|
1000
|
+
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1001
|
+
}
|
1002
|
+
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1003
|
+
};
|
1004
|
+
|
1005
|
+
Ok((RbSeries::from(out).into(), false))
|
1006
|
+
}
|
1007
|
+
|
873
1008
|
pub fn shrink_to_fit(&self) {
|
874
1009
|
self.df.borrow_mut().shrink_to_fit();
|
875
1010
|
}
|
876
1011
|
|
1012
|
+
pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
|
1013
|
+
let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
|
1014
|
+
let hash = self
|
1015
|
+
.df
|
1016
|
+
.borrow_mut()
|
1017
|
+
.hash_rows(Some(hb))
|
1018
|
+
.map_err(RbPolarsErr::from)?;
|
1019
|
+
Ok(hash.into_series().into())
|
1020
|
+
}
|
1021
|
+
|
877
1022
|
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
878
1023
|
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
879
1024
|
if include_header {
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,7 +1,39 @@
|
|
1
1
|
use magnus::Value;
|
2
|
-
use polars::
|
3
|
-
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use crate::lazy::dsl::RbExpr;
|
5
|
+
use crate::Wrap;
|
4
6
|
|
5
7
|
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
|
6
8
|
todo!();
|
7
9
|
}
|
10
|
+
|
11
|
+
pub fn map_single(
|
12
|
+
rbexpr: &RbExpr,
|
13
|
+
_lambda: Value,
|
14
|
+
output_type: Option<Wrap<DataType>>,
|
15
|
+
agg_list: bool,
|
16
|
+
) -> RbExpr {
|
17
|
+
let output_type = output_type.map(|wrap| wrap.0);
|
18
|
+
|
19
|
+
let output_type2 = output_type.clone();
|
20
|
+
let function = move |_s: Series| {
|
21
|
+
let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
|
22
|
+
|
23
|
+
todo!();
|
24
|
+
};
|
25
|
+
|
26
|
+
let output_map = GetOutput::map_field(move |fld| match output_type {
|
27
|
+
Some(ref dt) => Field::new(fld.name(), dt.clone()),
|
28
|
+
None => {
|
29
|
+
let mut fld = fld.clone();
|
30
|
+
fld.coerce(DataType::Unknown);
|
31
|
+
fld
|
32
|
+
}
|
33
|
+
});
|
34
|
+
if agg_list {
|
35
|
+
rbexpr.clone().inner.map_list(function, output_map).into()
|
36
|
+
} else {
|
37
|
+
rbexpr.clone().inner.map(function, output_map).into()
|
38
|
+
}
|
39
|
+
}
|