polars-df 0.1.3 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
@@ -1,33 +1,58 @@
|
|
1
|
-
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, r_hash::ForEach, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
4
|
use polars::datatypes::AnyValue;
|
5
|
+
use polars::frame::row::Row;
|
5
6
|
use polars::frame::DataFrame;
|
7
|
+
use polars::io::avro::AvroCompression;
|
6
8
|
use polars::prelude::*;
|
7
9
|
use polars::series::ops::NullBehavior;
|
10
|
+
use std::fmt::{Display, Formatter};
|
11
|
+
use std::hash::{Hash, Hasher};
|
8
12
|
|
9
|
-
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
13
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
10
14
|
|
15
|
+
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
16
|
+
// Safety:
|
17
|
+
// Wrap is transparent.
|
18
|
+
unsafe { std::mem::transmute(slice) }
|
19
|
+
}
|
20
|
+
|
21
|
+
#[repr(transparent)]
|
11
22
|
pub struct Wrap<T>(pub T);
|
12
23
|
|
24
|
+
impl<T> Clone for Wrap<T>
|
25
|
+
where
|
26
|
+
T: Clone,
|
27
|
+
{
|
28
|
+
fn clone(&self) -> Self {
|
29
|
+
Wrap(self.0.clone())
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
13
33
|
impl<T> From<T> for Wrap<T> {
|
14
34
|
fn from(t: T) -> Self {
|
15
35
|
Wrap(t)
|
16
36
|
}
|
17
37
|
}
|
18
38
|
|
19
|
-
pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
39
|
+
pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
20
40
|
let seq: RArray = obj.try_convert()?;
|
21
41
|
let len = seq.len();
|
22
42
|
Ok((seq, len))
|
23
43
|
}
|
24
44
|
|
25
|
-
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
45
|
+
pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
26
46
|
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
27
47
|
Ok(rbdf.df.borrow().clone())
|
28
48
|
}
|
29
49
|
|
30
|
-
pub fn
|
50
|
+
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
51
|
+
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
52
|
+
Ok(rbdf.ldf.clone())
|
53
|
+
}
|
54
|
+
|
55
|
+
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
31
56
|
let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
|
32
57
|
Ok(rbs.series.borrow().clone())
|
33
58
|
}
|
@@ -87,6 +112,25 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
87
112
|
.unwrap()
|
88
113
|
.funcall::<_, _, Value>("to_date", ())
|
89
114
|
.unwrap(),
|
115
|
+
AnyValue::Datetime(v, tu, tz) => {
|
116
|
+
let t = match tu {
|
117
|
+
TimeUnit::Nanoseconds => todo!(),
|
118
|
+
TimeUnit::Microseconds => {
|
119
|
+
let sec = v / 1000000;
|
120
|
+
let subsec = v % 1000000;
|
121
|
+
class::time()
|
122
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
123
|
+
.unwrap()
|
124
|
+
}
|
125
|
+
TimeUnit::Milliseconds => todo!(),
|
126
|
+
};
|
127
|
+
|
128
|
+
if tz.is_some() {
|
129
|
+
todo!();
|
130
|
+
} else {
|
131
|
+
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
132
|
+
}
|
133
|
+
}
|
90
134
|
_ => todo!(),
|
91
135
|
}
|
92
136
|
}
|
@@ -150,6 +194,39 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
150
194
|
}
|
151
195
|
}
|
152
196
|
|
197
|
+
impl TryConvert for Wrap<AsofStrategy> {
|
198
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
199
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
200
|
+
"backward" => AsofStrategy::Backward,
|
201
|
+
"forward" => AsofStrategy::Forward,
|
202
|
+
v => {
|
203
|
+
return Err(RbValueError::new_err(format!(
|
204
|
+
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
205
|
+
v
|
206
|
+
)))
|
207
|
+
}
|
208
|
+
};
|
209
|
+
Ok(Wrap(parsed))
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
impl TryConvert for Wrap<Option<AvroCompression>> {
|
214
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
215
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
216
|
+
"uncompressed" => None,
|
217
|
+
"snappy" => Some(AvroCompression::Snappy),
|
218
|
+
"deflate" => Some(AvroCompression::Deflate),
|
219
|
+
v => {
|
220
|
+
return Err(RbValueError::new_err(format!(
|
221
|
+
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
|
222
|
+
v
|
223
|
+
)))
|
224
|
+
}
|
225
|
+
};
|
226
|
+
Ok(Wrap(parsed))
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
153
230
|
impl TryConvert for Wrap<CategoricalOrdering> {
|
154
231
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
155
232
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -238,6 +315,22 @@ impl TryConvert for Wrap<JoinType> {
|
|
238
315
|
}
|
239
316
|
}
|
240
317
|
|
318
|
+
impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
319
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
320
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
321
|
+
"first_non_null" => ListToStructWidthStrategy::FirstNonNull,
|
322
|
+
"max_width" => ListToStructWidthStrategy::MaxWidth,
|
323
|
+
v => {
|
324
|
+
return Err(RbValueError::new_err(format!(
|
325
|
+
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
|
326
|
+
v
|
327
|
+
)))
|
328
|
+
}
|
329
|
+
};
|
330
|
+
Ok(Wrap(parsed))
|
331
|
+
}
|
332
|
+
}
|
333
|
+
|
241
334
|
impl TryConvert for Wrap<NullBehavior> {
|
242
335
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
243
336
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -425,18 +518,100 @@ pub fn parse_parquet_compression(
|
|
425
518
|
Ok(parsed)
|
426
519
|
}
|
427
520
|
|
521
|
+
impl<'s> TryConvert for Wrap<Row<'s>> {
|
522
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
523
|
+
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
524
|
+
for item in ob.try_convert::<RArray>()?.each() {
|
525
|
+
vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
|
526
|
+
}
|
527
|
+
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
528
|
+
Ok(Wrap(Row(vals)))
|
529
|
+
}
|
530
|
+
}
|
531
|
+
|
532
|
+
impl TryConvert for Wrap<Schema> {
|
533
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
534
|
+
let dict = ob.try_convert::<RHash>()?;
|
535
|
+
|
536
|
+
let mut schema = Vec::new();
|
537
|
+
dict.foreach(|key: String, val: Wrap<DataType>| {
|
538
|
+
schema.push(Field::new(&key, val.0));
|
539
|
+
Ok(ForEach::Continue)
|
540
|
+
})
|
541
|
+
.unwrap();
|
542
|
+
|
543
|
+
Ok(Wrap(schema.into_iter().into()))
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
#[derive(Clone, Debug)]
|
428
548
|
pub struct ObjectValue {
|
429
549
|
pub inner: Value,
|
430
550
|
}
|
431
551
|
|
552
|
+
impl Hash for ObjectValue {
|
553
|
+
fn hash<H: Hasher>(&self, state: &mut H) {
|
554
|
+
let h = self
|
555
|
+
.inner
|
556
|
+
.funcall::<_, _, isize>("hash", ())
|
557
|
+
.expect("should be hashable");
|
558
|
+
state.write_isize(h)
|
559
|
+
}
|
560
|
+
}
|
561
|
+
|
562
|
+
impl Eq for ObjectValue {}
|
563
|
+
|
564
|
+
impl PartialEq for ObjectValue {
|
565
|
+
fn eq(&self, other: &Self) -> bool {
|
566
|
+
self.inner.eql(&other.inner).unwrap_or(false)
|
567
|
+
}
|
568
|
+
}
|
569
|
+
|
570
|
+
impl Display for ObjectValue {
|
571
|
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
572
|
+
write!(f, "{}", self.inner)
|
573
|
+
}
|
574
|
+
}
|
575
|
+
|
576
|
+
impl PolarsObject for ObjectValue {
|
577
|
+
fn type_name() -> &'static str {
|
578
|
+
"object"
|
579
|
+
}
|
580
|
+
}
|
581
|
+
|
582
|
+
impl From<Value> for ObjectValue {
|
583
|
+
fn from(v: Value) -> Self {
|
584
|
+
Self { inner: v }
|
585
|
+
}
|
586
|
+
}
|
587
|
+
|
588
|
+
impl TryConvert for ObjectValue {
|
589
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
590
|
+
Ok(ObjectValue { inner: ob })
|
591
|
+
}
|
592
|
+
}
|
593
|
+
|
432
594
|
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
433
595
|
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
434
596
|
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
435
597
|
}
|
436
598
|
}
|
437
599
|
|
600
|
+
// TODO remove
|
438
601
|
impl ObjectValue {
|
439
602
|
pub fn to_object(&self) -> Value {
|
440
603
|
self.inner
|
441
604
|
}
|
442
605
|
}
|
606
|
+
|
607
|
+
impl From<ObjectValue> for Value {
|
608
|
+
fn from(val: ObjectValue) -> Self {
|
609
|
+
val.inner
|
610
|
+
}
|
611
|
+
}
|
612
|
+
|
613
|
+
impl Default for ObjectValue {
|
614
|
+
fn default() -> Self {
|
615
|
+
ObjectValue { inner: *QNIL }
|
616
|
+
}
|
617
|
+
}
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,15 +1,21 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
|
+
use polars::io::avro::AvroCompression;
|
2
3
|
use polars::io::mmap::ReaderBytes;
|
3
4
|
use polars::io::RowCount;
|
5
|
+
use polars::prelude::pivot::{pivot, pivot_stable};
|
4
6
|
use polars::prelude::*;
|
5
7
|
use std::cell::RefCell;
|
6
8
|
use std::io::{BufWriter, Cursor};
|
7
9
|
use std::ops::Deref;
|
8
10
|
|
11
|
+
use crate::apply::dataframe::{
|
12
|
+
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
13
|
+
apply_lambda_with_utf8_out_type,
|
14
|
+
};
|
9
15
|
use crate::conversion::*;
|
10
16
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
11
17
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
12
|
-
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
18
|
+
use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
19
|
|
14
20
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
15
21
|
pub struct RbDataFrame {
|
@@ -179,6 +185,48 @@ impl RbDataFrame {
|
|
179
185
|
Ok(RbDataFrame::new(df))
|
180
186
|
}
|
181
187
|
|
188
|
+
pub fn read_avro(
|
189
|
+
rb_f: Value,
|
190
|
+
columns: Option<Vec<String>>,
|
191
|
+
projection: Option<Vec<usize>>,
|
192
|
+
n_rows: Option<usize>,
|
193
|
+
) -> RbResult<Self> {
|
194
|
+
use polars::io::avro::AvroReader;
|
195
|
+
|
196
|
+
let file = get_file_like(rb_f, false)?;
|
197
|
+
let df = AvroReader::new(file)
|
198
|
+
.with_projection(projection)
|
199
|
+
.with_columns(columns)
|
200
|
+
.with_n_rows(n_rows)
|
201
|
+
.finish()
|
202
|
+
.map_err(RbPolarsErr::from)?;
|
203
|
+
Ok(RbDataFrame::new(df))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn write_avro(
|
207
|
+
&self,
|
208
|
+
rb_f: Value,
|
209
|
+
compression: Wrap<Option<AvroCompression>>,
|
210
|
+
) -> RbResult<()> {
|
211
|
+
use polars::io::avro::AvroWriter;
|
212
|
+
|
213
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
214
|
+
let f = std::fs::File::create(&s).unwrap();
|
215
|
+
AvroWriter::new(f)
|
216
|
+
.with_compression(compression.0)
|
217
|
+
.finish(&mut self.df.borrow_mut())
|
218
|
+
.map_err(RbPolarsErr::from)?;
|
219
|
+
} else {
|
220
|
+
let mut buf = get_file_like(rb_f, true)?;
|
221
|
+
AvroWriter::new(&mut buf)
|
222
|
+
.with_compression(compression.0)
|
223
|
+
.finish(&mut self.df.borrow_mut())
|
224
|
+
.map_err(RbPolarsErr::from)?;
|
225
|
+
}
|
226
|
+
|
227
|
+
Ok(())
|
228
|
+
}
|
229
|
+
|
182
230
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
183
231
|
// memmap the file first
|
184
232
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -238,6 +286,14 @@ impl RbDataFrame {
|
|
238
286
|
Ok(())
|
239
287
|
}
|
240
288
|
|
289
|
+
pub fn read_hashes(
|
290
|
+
_dicts: Value,
|
291
|
+
_infer_schema_length: Option<usize>,
|
292
|
+
_schema_overwrite: Option<Wrap<Schema>>,
|
293
|
+
) -> RbResult<Self> {
|
294
|
+
Err(RbPolarsErr::todo())
|
295
|
+
}
|
296
|
+
|
241
297
|
pub fn read_hash(data: RHash) -> RbResult<Self> {
|
242
298
|
let mut cols: Vec<Series> = Vec::new();
|
243
299
|
data.foreach(|name: String, values: Value| {
|
@@ -751,6 +807,31 @@ impl RbDataFrame {
|
|
751
807
|
Ok(RbDataFrame::new(df))
|
752
808
|
}
|
753
809
|
|
810
|
+
pub fn pivot_expr(
|
811
|
+
&self,
|
812
|
+
values: Vec<String>,
|
813
|
+
index: Vec<String>,
|
814
|
+
columns: Vec<String>,
|
815
|
+
aggregate_expr: &RbExpr,
|
816
|
+
maintain_order: bool,
|
817
|
+
sort_columns: bool,
|
818
|
+
) -> RbResult<Self> {
|
819
|
+
let fun = match maintain_order {
|
820
|
+
true => pivot_stable,
|
821
|
+
false => pivot,
|
822
|
+
};
|
823
|
+
let df = fun(
|
824
|
+
&self.df.borrow(),
|
825
|
+
values,
|
826
|
+
index,
|
827
|
+
columns,
|
828
|
+
aggregate_expr.inner.clone(),
|
829
|
+
sort_columns,
|
830
|
+
)
|
831
|
+
.map_err(RbPolarsErr::from)?;
|
832
|
+
Ok(RbDataFrame::new(df))
|
833
|
+
}
|
834
|
+
|
754
835
|
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
755
836
|
let out = if stable {
|
756
837
|
self.df.borrow().partition_by_stable(groups)
|
@@ -870,10 +951,74 @@ impl RbDataFrame {
|
|
870
951
|
df.into()
|
871
952
|
}
|
872
953
|
|
954
|
+
pub fn apply(
|
955
|
+
&self,
|
956
|
+
lambda: Value,
|
957
|
+
output_type: Option<Wrap<DataType>>,
|
958
|
+
inference_size: usize,
|
959
|
+
) -> RbResult<(Value, bool)> {
|
960
|
+
let df = &self.df.borrow();
|
961
|
+
|
962
|
+
let output_type = output_type.map(|dt| dt.0);
|
963
|
+
let out = match output_type {
|
964
|
+
Some(DataType::Int32) => {
|
965
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
|
966
|
+
}
|
967
|
+
Some(DataType::Int64) => {
|
968
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
|
969
|
+
}
|
970
|
+
Some(DataType::UInt32) => {
|
971
|
+
apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
|
972
|
+
.into_series()
|
973
|
+
}
|
974
|
+
Some(DataType::UInt64) => {
|
975
|
+
apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
|
976
|
+
.into_series()
|
977
|
+
}
|
978
|
+
Some(DataType::Float32) => {
|
979
|
+
apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
|
980
|
+
.into_series()
|
981
|
+
}
|
982
|
+
Some(DataType::Float64) => {
|
983
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
|
984
|
+
.into_series()
|
985
|
+
}
|
986
|
+
Some(DataType::Boolean) => {
|
987
|
+
apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
|
988
|
+
}
|
989
|
+
Some(DataType::Date) => {
|
990
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
|
991
|
+
.into_date()
|
992
|
+
.into_series()
|
993
|
+
}
|
994
|
+
Some(DataType::Datetime(tu, tz)) => {
|
995
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
|
996
|
+
.into_datetime(tu, tz)
|
997
|
+
.into_series()
|
998
|
+
}
|
999
|
+
Some(DataType::Utf8) => {
|
1000
|
+
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1001
|
+
}
|
1002
|
+
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1003
|
+
};
|
1004
|
+
|
1005
|
+
Ok((RbSeries::from(out).into(), false))
|
1006
|
+
}
|
1007
|
+
|
873
1008
|
pub fn shrink_to_fit(&self) {
|
874
1009
|
self.df.borrow_mut().shrink_to_fit();
|
875
1010
|
}
|
876
1011
|
|
1012
|
+
pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
|
1013
|
+
let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
|
1014
|
+
let hash = self
|
1015
|
+
.df
|
1016
|
+
.borrow_mut()
|
1017
|
+
.hash_rows(Some(hb))
|
1018
|
+
.map_err(RbPolarsErr::from)?;
|
1019
|
+
Ok(hash.into_series().into())
|
1020
|
+
}
|
1021
|
+
|
877
1022
|
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
878
1023
|
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
879
1024
|
if include_header {
|
data/ext/polars/src/error.rs
CHANGED
@@ -22,6 +22,10 @@ impl RbPolarsErr {
|
|
22
22
|
pub fn other(message: String) -> Error {
|
23
23
|
Error::runtime_error(message)
|
24
24
|
}
|
25
|
+
|
26
|
+
pub fn todo() -> Error {
|
27
|
+
Error::runtime_error("not implemented yet")
|
28
|
+
}
|
25
29
|
}
|
26
30
|
|
27
31
|
pub struct RbValueError {}
|
@@ -31,3 +35,11 @@ impl RbValueError {
|
|
31
35
|
Error::new(arg_error(), message)
|
32
36
|
}
|
33
37
|
}
|
38
|
+
|
39
|
+
pub struct ComputeError {}
|
40
|
+
|
41
|
+
impl ComputeError {
|
42
|
+
pub fn new_err(message: String) -> Error {
|
43
|
+
Error::runtime_error(message)
|
44
|
+
}
|
45
|
+
}
|
@@ -1,7 +1,39 @@
|
|
1
1
|
use magnus::Value;
|
2
|
-
use polars::
|
3
|
-
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use crate::lazy::dsl::RbExpr;
|
5
|
+
use crate::Wrap;
|
4
6
|
|
5
7
|
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
|
6
8
|
todo!();
|
7
9
|
}
|
10
|
+
|
11
|
+
pub fn map_single(
|
12
|
+
rbexpr: &RbExpr,
|
13
|
+
_lambda: Value,
|
14
|
+
output_type: Option<Wrap<DataType>>,
|
15
|
+
agg_list: bool,
|
16
|
+
) -> RbExpr {
|
17
|
+
let output_type = output_type.map(|wrap| wrap.0);
|
18
|
+
|
19
|
+
let output_type2 = output_type.clone();
|
20
|
+
let function = move |_s: Series| {
|
21
|
+
let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
|
22
|
+
|
23
|
+
todo!();
|
24
|
+
};
|
25
|
+
|
26
|
+
let output_map = GetOutput::map_field(move |fld| match output_type {
|
27
|
+
Some(ref dt) => Field::new(fld.name(), dt.clone()),
|
28
|
+
None => {
|
29
|
+
let mut fld = fld.clone();
|
30
|
+
fld.coerce(DataType::Unknown);
|
31
|
+
fld
|
32
|
+
}
|
33
|
+
});
|
34
|
+
if agg_list {
|
35
|
+
rbexpr.clone().inner.map_list(function, output_map).into()
|
36
|
+
} else {
|
37
|
+
rbexpr.clone().inner.map(function, output_map).into()
|
38
|
+
}
|
39
|
+
}
|
@@ -3,7 +3,7 @@ use polars::io::RowCount;
|
|
3
3
|
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::io::BufWriter;
|
6
|
+
use std::io::{BufWriter, Read};
|
7
7
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::file::get_file_like;
|
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
|
|
53
53
|
}
|
54
54
|
|
55
55
|
impl RbLazyFrame {
|
56
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
57
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
58
|
+
// so don't bother with files.
|
59
|
+
let mut json = String::new();
|
60
|
+
let _ = get_file_like(rb_f, false)?
|
61
|
+
.read_to_string(&mut json)
|
62
|
+
.unwrap();
|
63
|
+
|
64
|
+
// Safety
|
65
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
66
|
+
// so we actually don't have a lifetime at all when serializing.
|
67
|
+
|
68
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
69
|
+
// in this scope
|
70
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
71
|
+
|
72
|
+
let lp = serde_json::from_str::<LogicalPlan>(json)
|
73
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
74
|
+
Ok(LazyFrame::from(lp).into())
|
75
|
+
}
|
76
|
+
|
56
77
|
pub fn new_from_ndjson(
|
57
78
|
path: String,
|
58
79
|
infer_schema_length: Option<usize>,
|
@@ -211,7 +232,7 @@ impl RbLazyFrame {
|
|
211
232
|
projection_pushdown: bool,
|
212
233
|
simplify_expr: bool,
|
213
234
|
slice_pushdown: bool,
|
214
|
-
|
235
|
+
cse: bool,
|
215
236
|
allow_streaming: bool,
|
216
237
|
) -> RbLazyFrame {
|
217
238
|
let ldf = self.ldf.clone();
|
@@ -220,7 +241,7 @@ impl RbLazyFrame {
|
|
220
241
|
.with_predicate_pushdown(predicate_pushdown)
|
221
242
|
.with_simplify_expr(simplify_expr)
|
222
243
|
.with_slice_pushdown(slice_pushdown)
|
223
|
-
|
244
|
+
.with_common_subplan_elimination(cse)
|
224
245
|
.with_streaming(allow_streaming)
|
225
246
|
.with_projection_pushdown(projection_pushdown);
|
226
247
|
ldf.into()
|
@@ -349,6 +370,56 @@ impl RbLazyFrame {
|
|
349
370
|
})
|
350
371
|
}
|
351
372
|
|
373
|
+
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
374
|
+
let contexts = contexts
|
375
|
+
.each()
|
376
|
+
.map(|v| v.unwrap().try_convert())
|
377
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
378
|
+
let contexts = contexts
|
379
|
+
.into_iter()
|
380
|
+
.map(|ldf| ldf.ldf.clone())
|
381
|
+
.collect::<Vec<_>>();
|
382
|
+
Ok(self.ldf.clone().with_context(contexts).into())
|
383
|
+
}
|
384
|
+
|
385
|
+
#[allow(clippy::too_many_arguments)]
|
386
|
+
pub fn join_asof(
|
387
|
+
&self,
|
388
|
+
other: &RbLazyFrame,
|
389
|
+
left_on: &RbExpr,
|
390
|
+
right_on: &RbExpr,
|
391
|
+
left_by: Option<Vec<String>>,
|
392
|
+
right_by: Option<Vec<String>>,
|
393
|
+
allow_parallel: bool,
|
394
|
+
force_parallel: bool,
|
395
|
+
suffix: String,
|
396
|
+
strategy: Wrap<AsofStrategy>,
|
397
|
+
tolerance: Option<Wrap<AnyValue<'_>>>,
|
398
|
+
tolerance_str: Option<String>,
|
399
|
+
) -> RbResult<Self> {
|
400
|
+
let ldf = self.ldf.clone();
|
401
|
+
let other = other.ldf.clone();
|
402
|
+
let left_on = left_on.inner.clone();
|
403
|
+
let right_on = right_on.inner.clone();
|
404
|
+
Ok(ldf
|
405
|
+
.join_builder()
|
406
|
+
.with(other)
|
407
|
+
.left_on([left_on])
|
408
|
+
.right_on([right_on])
|
409
|
+
.allow_parallel(allow_parallel)
|
410
|
+
.force_parallel(force_parallel)
|
411
|
+
.how(JoinType::AsOf(AsOfOptions {
|
412
|
+
strategy: strategy.0,
|
413
|
+
left_by,
|
414
|
+
right_by,
|
415
|
+
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
416
|
+
tolerance_str,
|
417
|
+
}))
|
418
|
+
.suffix(suffix)
|
419
|
+
.finish()
|
420
|
+
.into())
|
421
|
+
}
|
422
|
+
|
352
423
|
#[allow(clippy::too_many_arguments)]
|
353
424
|
pub fn join(
|
354
425
|
&self,
|