polars-df 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
@@ -1,33 +1,58 @@
|
|
1
|
-
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, r_hash::ForEach, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
4
|
use polars::datatypes::AnyValue;
|
5
|
+
use polars::frame::row::Row;
|
5
6
|
use polars::frame::DataFrame;
|
7
|
+
use polars::io::avro::AvroCompression;
|
6
8
|
use polars::prelude::*;
|
7
9
|
use polars::series::ops::NullBehavior;
|
10
|
+
use std::fmt::{Display, Formatter};
|
11
|
+
use std::hash::{Hash, Hasher};
|
8
12
|
|
9
|
-
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
13
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
10
14
|
|
15
|
+
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
16
|
+
// Safety:
|
17
|
+
// Wrap is transparent.
|
18
|
+
unsafe { std::mem::transmute(slice) }
|
19
|
+
}
|
20
|
+
|
21
|
+
#[repr(transparent)]
|
11
22
|
pub struct Wrap<T>(pub T);
|
12
23
|
|
24
|
+
impl<T> Clone for Wrap<T>
|
25
|
+
where
|
26
|
+
T: Clone,
|
27
|
+
{
|
28
|
+
fn clone(&self) -> Self {
|
29
|
+
Wrap(self.0.clone())
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
13
33
|
impl<T> From<T> for Wrap<T> {
|
14
34
|
fn from(t: T) -> Self {
|
15
35
|
Wrap(t)
|
16
36
|
}
|
17
37
|
}
|
18
38
|
|
19
|
-
pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
39
|
+
pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
20
40
|
let seq: RArray = obj.try_convert()?;
|
21
41
|
let len = seq.len();
|
22
42
|
Ok((seq, len))
|
23
43
|
}
|
24
44
|
|
25
|
-
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
45
|
+
pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
26
46
|
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
27
47
|
Ok(rbdf.df.borrow().clone())
|
28
48
|
}
|
29
49
|
|
30
|
-
pub fn
|
50
|
+
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
51
|
+
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
52
|
+
Ok(rbdf.ldf.clone())
|
53
|
+
}
|
54
|
+
|
55
|
+
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
31
56
|
let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
|
32
57
|
Ok(rbs.series.borrow().clone())
|
33
58
|
}
|
@@ -87,6 +112,25 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
87
112
|
.unwrap()
|
88
113
|
.funcall::<_, _, Value>("to_date", ())
|
89
114
|
.unwrap(),
|
115
|
+
AnyValue::Datetime(v, tu, tz) => {
|
116
|
+
let t = match tu {
|
117
|
+
TimeUnit::Nanoseconds => todo!(),
|
118
|
+
TimeUnit::Microseconds => {
|
119
|
+
let sec = v / 1000000;
|
120
|
+
let subsec = v % 1000000;
|
121
|
+
class::time()
|
122
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
123
|
+
.unwrap()
|
124
|
+
}
|
125
|
+
TimeUnit::Milliseconds => todo!(),
|
126
|
+
};
|
127
|
+
|
128
|
+
if tz.is_some() {
|
129
|
+
todo!();
|
130
|
+
} else {
|
131
|
+
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
132
|
+
}
|
133
|
+
}
|
90
134
|
_ => todo!(),
|
91
135
|
}
|
92
136
|
}
|
@@ -150,6 +194,39 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
150
194
|
}
|
151
195
|
}
|
152
196
|
|
197
|
+
impl TryConvert for Wrap<AsofStrategy> {
|
198
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
199
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
200
|
+
"backward" => AsofStrategy::Backward,
|
201
|
+
"forward" => AsofStrategy::Forward,
|
202
|
+
v => {
|
203
|
+
return Err(RbValueError::new_err(format!(
|
204
|
+
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
205
|
+
v
|
206
|
+
)))
|
207
|
+
}
|
208
|
+
};
|
209
|
+
Ok(Wrap(parsed))
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
impl TryConvert for Wrap<Option<AvroCompression>> {
|
214
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
215
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
216
|
+
"uncompressed" => None,
|
217
|
+
"snappy" => Some(AvroCompression::Snappy),
|
218
|
+
"deflate" => Some(AvroCompression::Deflate),
|
219
|
+
v => {
|
220
|
+
return Err(RbValueError::new_err(format!(
|
221
|
+
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
|
222
|
+
v
|
223
|
+
)))
|
224
|
+
}
|
225
|
+
};
|
226
|
+
Ok(Wrap(parsed))
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
153
230
|
impl TryConvert for Wrap<CategoricalOrdering> {
|
154
231
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
155
232
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -238,6 +315,22 @@ impl TryConvert for Wrap<JoinType> {
|
|
238
315
|
}
|
239
316
|
}
|
240
317
|
|
318
|
+
impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
319
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
320
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
321
|
+
"first_non_null" => ListToStructWidthStrategy::FirstNonNull,
|
322
|
+
"max_width" => ListToStructWidthStrategy::MaxWidth,
|
323
|
+
v => {
|
324
|
+
return Err(RbValueError::new_err(format!(
|
325
|
+
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
|
326
|
+
v
|
327
|
+
)))
|
328
|
+
}
|
329
|
+
};
|
330
|
+
Ok(Wrap(parsed))
|
331
|
+
}
|
332
|
+
}
|
333
|
+
|
241
334
|
impl TryConvert for Wrap<NullBehavior> {
|
242
335
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
243
336
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -425,18 +518,100 @@ pub fn parse_parquet_compression(
|
|
425
518
|
Ok(parsed)
|
426
519
|
}
|
427
520
|
|
521
|
+
impl<'s> TryConvert for Wrap<Row<'s>> {
|
522
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
523
|
+
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
524
|
+
for item in ob.try_convert::<RArray>()?.each() {
|
525
|
+
vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
|
526
|
+
}
|
527
|
+
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
528
|
+
Ok(Wrap(Row(vals)))
|
529
|
+
}
|
530
|
+
}
|
531
|
+
|
532
|
+
impl TryConvert for Wrap<Schema> {
|
533
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
534
|
+
let dict = ob.try_convert::<RHash>()?;
|
535
|
+
|
536
|
+
let mut schema = Vec::new();
|
537
|
+
dict.foreach(|key: String, val: Wrap<DataType>| {
|
538
|
+
schema.push(Field::new(&key, val.0));
|
539
|
+
Ok(ForEach::Continue)
|
540
|
+
})
|
541
|
+
.unwrap();
|
542
|
+
|
543
|
+
Ok(Wrap(schema.into_iter().into()))
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
#[derive(Clone, Debug)]
|
428
548
|
pub struct ObjectValue {
|
429
549
|
pub inner: Value,
|
430
550
|
}
|
431
551
|
|
552
|
+
impl Hash for ObjectValue {
|
553
|
+
fn hash<H: Hasher>(&self, state: &mut H) {
|
554
|
+
let h = self
|
555
|
+
.inner
|
556
|
+
.funcall::<_, _, isize>("hash", ())
|
557
|
+
.expect("should be hashable");
|
558
|
+
state.write_isize(h)
|
559
|
+
}
|
560
|
+
}
|
561
|
+
|
562
|
+
impl Eq for ObjectValue {}
|
563
|
+
|
564
|
+
impl PartialEq for ObjectValue {
|
565
|
+
fn eq(&self, other: &Self) -> bool {
|
566
|
+
self.inner.eql(&other.inner).unwrap_or(false)
|
567
|
+
}
|
568
|
+
}
|
569
|
+
|
570
|
+
impl Display for ObjectValue {
|
571
|
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
572
|
+
write!(f, "{}", self.inner)
|
573
|
+
}
|
574
|
+
}
|
575
|
+
|
576
|
+
impl PolarsObject for ObjectValue {
|
577
|
+
fn type_name() -> &'static str {
|
578
|
+
"object"
|
579
|
+
}
|
580
|
+
}
|
581
|
+
|
582
|
+
impl From<Value> for ObjectValue {
|
583
|
+
fn from(v: Value) -> Self {
|
584
|
+
Self { inner: v }
|
585
|
+
}
|
586
|
+
}
|
587
|
+
|
588
|
+
impl TryConvert for ObjectValue {
|
589
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
590
|
+
Ok(ObjectValue { inner: ob })
|
591
|
+
}
|
592
|
+
}
|
593
|
+
|
432
594
|
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
433
595
|
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
434
596
|
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
435
597
|
}
|
436
598
|
}
|
437
599
|
|
600
|
+
// TODO remove
|
438
601
|
impl ObjectValue {
|
439
602
|
pub fn to_object(&self) -> Value {
|
440
603
|
self.inner
|
441
604
|
}
|
442
605
|
}
|
606
|
+
|
607
|
+
impl From<ObjectValue> for Value {
|
608
|
+
fn from(val: ObjectValue) -> Self {
|
609
|
+
val.inner
|
610
|
+
}
|
611
|
+
}
|
612
|
+
|
613
|
+
impl Default for ObjectValue {
|
614
|
+
fn default() -> Self {
|
615
|
+
ObjectValue { inner: *QNIL }
|
616
|
+
}
|
617
|
+
}
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,15 +1,21 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
|
+
use polars::io::avro::AvroCompression;
|
2
3
|
use polars::io::mmap::ReaderBytes;
|
3
4
|
use polars::io::RowCount;
|
5
|
+
use polars::prelude::pivot::{pivot, pivot_stable};
|
4
6
|
use polars::prelude::*;
|
5
7
|
use std::cell::RefCell;
|
6
8
|
use std::io::{BufWriter, Cursor};
|
7
9
|
use std::ops::Deref;
|
8
10
|
|
11
|
+
use crate::apply::dataframe::{
|
12
|
+
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
13
|
+
apply_lambda_with_utf8_out_type,
|
14
|
+
};
|
9
15
|
use crate::conversion::*;
|
10
16
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
11
17
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
12
|
-
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
18
|
+
use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
19
|
|
14
20
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
15
21
|
pub struct RbDataFrame {
|
@@ -179,6 +185,48 @@ impl RbDataFrame {
|
|
179
185
|
Ok(RbDataFrame::new(df))
|
180
186
|
}
|
181
187
|
|
188
|
+
pub fn read_avro(
|
189
|
+
rb_f: Value,
|
190
|
+
columns: Option<Vec<String>>,
|
191
|
+
projection: Option<Vec<usize>>,
|
192
|
+
n_rows: Option<usize>,
|
193
|
+
) -> RbResult<Self> {
|
194
|
+
use polars::io::avro::AvroReader;
|
195
|
+
|
196
|
+
let file = get_file_like(rb_f, false)?;
|
197
|
+
let df = AvroReader::new(file)
|
198
|
+
.with_projection(projection)
|
199
|
+
.with_columns(columns)
|
200
|
+
.with_n_rows(n_rows)
|
201
|
+
.finish()
|
202
|
+
.map_err(RbPolarsErr::from)?;
|
203
|
+
Ok(RbDataFrame::new(df))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn write_avro(
|
207
|
+
&self,
|
208
|
+
rb_f: Value,
|
209
|
+
compression: Wrap<Option<AvroCompression>>,
|
210
|
+
) -> RbResult<()> {
|
211
|
+
use polars::io::avro::AvroWriter;
|
212
|
+
|
213
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
214
|
+
let f = std::fs::File::create(&s).unwrap();
|
215
|
+
AvroWriter::new(f)
|
216
|
+
.with_compression(compression.0)
|
217
|
+
.finish(&mut self.df.borrow_mut())
|
218
|
+
.map_err(RbPolarsErr::from)?;
|
219
|
+
} else {
|
220
|
+
let mut buf = get_file_like(rb_f, true)?;
|
221
|
+
AvroWriter::new(&mut buf)
|
222
|
+
.with_compression(compression.0)
|
223
|
+
.finish(&mut self.df.borrow_mut())
|
224
|
+
.map_err(RbPolarsErr::from)?;
|
225
|
+
}
|
226
|
+
|
227
|
+
Ok(())
|
228
|
+
}
|
229
|
+
|
182
230
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
183
231
|
// memmap the file first
|
184
232
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -238,6 +286,14 @@ impl RbDataFrame {
|
|
238
286
|
Ok(())
|
239
287
|
}
|
240
288
|
|
289
|
+
pub fn read_hashes(
|
290
|
+
_dicts: Value,
|
291
|
+
_infer_schema_length: Option<usize>,
|
292
|
+
_schema_overwrite: Option<Wrap<Schema>>,
|
293
|
+
) -> RbResult<Self> {
|
294
|
+
Err(RbPolarsErr::todo())
|
295
|
+
}
|
296
|
+
|
241
297
|
pub fn read_hash(data: RHash) -> RbResult<Self> {
|
242
298
|
let mut cols: Vec<Series> = Vec::new();
|
243
299
|
data.foreach(|name: String, values: Value| {
|
@@ -751,6 +807,31 @@ impl RbDataFrame {
|
|
751
807
|
Ok(RbDataFrame::new(df))
|
752
808
|
}
|
753
809
|
|
810
|
+
pub fn pivot_expr(
|
811
|
+
&self,
|
812
|
+
values: Vec<String>,
|
813
|
+
index: Vec<String>,
|
814
|
+
columns: Vec<String>,
|
815
|
+
aggregate_expr: &RbExpr,
|
816
|
+
maintain_order: bool,
|
817
|
+
sort_columns: bool,
|
818
|
+
) -> RbResult<Self> {
|
819
|
+
let fun = match maintain_order {
|
820
|
+
true => pivot_stable,
|
821
|
+
false => pivot,
|
822
|
+
};
|
823
|
+
let df = fun(
|
824
|
+
&self.df.borrow(),
|
825
|
+
values,
|
826
|
+
index,
|
827
|
+
columns,
|
828
|
+
aggregate_expr.inner.clone(),
|
829
|
+
sort_columns,
|
830
|
+
)
|
831
|
+
.map_err(RbPolarsErr::from)?;
|
832
|
+
Ok(RbDataFrame::new(df))
|
833
|
+
}
|
834
|
+
|
754
835
|
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
755
836
|
let out = if stable {
|
756
837
|
self.df.borrow().partition_by_stable(groups)
|
@@ -870,10 +951,74 @@ impl RbDataFrame {
|
|
870
951
|
df.into()
|
871
952
|
}
|
872
953
|
|
954
|
+
pub fn apply(
|
955
|
+
&self,
|
956
|
+
lambda: Value,
|
957
|
+
output_type: Option<Wrap<DataType>>,
|
958
|
+
inference_size: usize,
|
959
|
+
) -> RbResult<(Value, bool)> {
|
960
|
+
let df = &self.df.borrow();
|
961
|
+
|
962
|
+
let output_type = output_type.map(|dt| dt.0);
|
963
|
+
let out = match output_type {
|
964
|
+
Some(DataType::Int32) => {
|
965
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
|
966
|
+
}
|
967
|
+
Some(DataType::Int64) => {
|
968
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
|
969
|
+
}
|
970
|
+
Some(DataType::UInt32) => {
|
971
|
+
apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
|
972
|
+
.into_series()
|
973
|
+
}
|
974
|
+
Some(DataType::UInt64) => {
|
975
|
+
apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
|
976
|
+
.into_series()
|
977
|
+
}
|
978
|
+
Some(DataType::Float32) => {
|
979
|
+
apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
|
980
|
+
.into_series()
|
981
|
+
}
|
982
|
+
Some(DataType::Float64) => {
|
983
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
|
984
|
+
.into_series()
|
985
|
+
}
|
986
|
+
Some(DataType::Boolean) => {
|
987
|
+
apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
|
988
|
+
}
|
989
|
+
Some(DataType::Date) => {
|
990
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
|
991
|
+
.into_date()
|
992
|
+
.into_series()
|
993
|
+
}
|
994
|
+
Some(DataType::Datetime(tu, tz)) => {
|
995
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
|
996
|
+
.into_datetime(tu, tz)
|
997
|
+
.into_series()
|
998
|
+
}
|
999
|
+
Some(DataType::Utf8) => {
|
1000
|
+
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1001
|
+
}
|
1002
|
+
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1003
|
+
};
|
1004
|
+
|
1005
|
+
Ok((RbSeries::from(out).into(), false))
|
1006
|
+
}
|
1007
|
+
|
873
1008
|
pub fn shrink_to_fit(&self) {
|
874
1009
|
self.df.borrow_mut().shrink_to_fit();
|
875
1010
|
}
|
876
1011
|
|
1012
|
+
pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
|
1013
|
+
let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
|
1014
|
+
let hash = self
|
1015
|
+
.df
|
1016
|
+
.borrow_mut()
|
1017
|
+
.hash_rows(Some(hb))
|
1018
|
+
.map_err(RbPolarsErr::from)?;
|
1019
|
+
Ok(hash.into_series().into())
|
1020
|
+
}
|
1021
|
+
|
877
1022
|
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
878
1023
|
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
879
1024
|
if include_header {
|
data/ext/polars/src/error.rs
CHANGED
@@ -22,6 +22,10 @@ impl RbPolarsErr {
|
|
22
22
|
pub fn other(message: String) -> Error {
|
23
23
|
Error::runtime_error(message)
|
24
24
|
}
|
25
|
+
|
26
|
+
pub fn todo() -> Error {
|
27
|
+
Error::runtime_error("not implemented yet")
|
28
|
+
}
|
25
29
|
}
|
26
30
|
|
27
31
|
pub struct RbValueError {}
|
@@ -31,3 +35,11 @@ impl RbValueError {
|
|
31
35
|
Error::new(arg_error(), message)
|
32
36
|
}
|
33
37
|
}
|
38
|
+
|
39
|
+
pub struct ComputeError {}
|
40
|
+
|
41
|
+
impl ComputeError {
|
42
|
+
pub fn new_err(message: String) -> Error {
|
43
|
+
Error::runtime_error(message)
|
44
|
+
}
|
45
|
+
}
|
@@ -1,7 +1,39 @@
|
|
1
1
|
use magnus::Value;
|
2
|
-
use polars::
|
3
|
-
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use crate::lazy::dsl::RbExpr;
|
5
|
+
use crate::Wrap;
|
4
6
|
|
5
7
|
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
|
6
8
|
todo!();
|
7
9
|
}
|
10
|
+
|
11
|
+
pub fn map_single(
|
12
|
+
rbexpr: &RbExpr,
|
13
|
+
_lambda: Value,
|
14
|
+
output_type: Option<Wrap<DataType>>,
|
15
|
+
agg_list: bool,
|
16
|
+
) -> RbExpr {
|
17
|
+
let output_type = output_type.map(|wrap| wrap.0);
|
18
|
+
|
19
|
+
let output_type2 = output_type.clone();
|
20
|
+
let function = move |_s: Series| {
|
21
|
+
let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
|
22
|
+
|
23
|
+
todo!();
|
24
|
+
};
|
25
|
+
|
26
|
+
let output_map = GetOutput::map_field(move |fld| match output_type {
|
27
|
+
Some(ref dt) => Field::new(fld.name(), dt.clone()),
|
28
|
+
None => {
|
29
|
+
let mut fld = fld.clone();
|
30
|
+
fld.coerce(DataType::Unknown);
|
31
|
+
fld
|
32
|
+
}
|
33
|
+
});
|
34
|
+
if agg_list {
|
35
|
+
rbexpr.clone().inner.map_list(function, output_map).into()
|
36
|
+
} else {
|
37
|
+
rbexpr.clone().inner.map(function, output_map).into()
|
38
|
+
}
|
39
|
+
}
|
@@ -3,7 +3,7 @@ use polars::io::RowCount;
|
|
3
3
|
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::io::BufWriter;
|
6
|
+
use std::io::{BufWriter, Read};
|
7
7
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::file::get_file_like;
|
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
|
|
53
53
|
}
|
54
54
|
|
55
55
|
impl RbLazyFrame {
|
56
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
57
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
58
|
+
// so don't bother with files.
|
59
|
+
let mut json = String::new();
|
60
|
+
let _ = get_file_like(rb_f, false)?
|
61
|
+
.read_to_string(&mut json)
|
62
|
+
.unwrap();
|
63
|
+
|
64
|
+
// Safety
|
65
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
66
|
+
// so we actually don't have a lifetime at all when serializing.
|
67
|
+
|
68
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
69
|
+
// in this scope
|
70
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
71
|
+
|
72
|
+
let lp = serde_json::from_str::<LogicalPlan>(json)
|
73
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
74
|
+
Ok(LazyFrame::from(lp).into())
|
75
|
+
}
|
76
|
+
|
56
77
|
pub fn new_from_ndjson(
|
57
78
|
path: String,
|
58
79
|
infer_schema_length: Option<usize>,
|
@@ -211,7 +232,7 @@ impl RbLazyFrame {
|
|
211
232
|
projection_pushdown: bool,
|
212
233
|
simplify_expr: bool,
|
213
234
|
slice_pushdown: bool,
|
214
|
-
|
235
|
+
cse: bool,
|
215
236
|
allow_streaming: bool,
|
216
237
|
) -> RbLazyFrame {
|
217
238
|
let ldf = self.ldf.clone();
|
@@ -220,7 +241,7 @@ impl RbLazyFrame {
|
|
220
241
|
.with_predicate_pushdown(predicate_pushdown)
|
221
242
|
.with_simplify_expr(simplify_expr)
|
222
243
|
.with_slice_pushdown(slice_pushdown)
|
223
|
-
|
244
|
+
.with_common_subplan_elimination(cse)
|
224
245
|
.with_streaming(allow_streaming)
|
225
246
|
.with_projection_pushdown(projection_pushdown);
|
226
247
|
ldf.into()
|
@@ -349,6 +370,56 @@ impl RbLazyFrame {
|
|
349
370
|
})
|
350
371
|
}
|
351
372
|
|
373
|
+
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
374
|
+
let contexts = contexts
|
375
|
+
.each()
|
376
|
+
.map(|v| v.unwrap().try_convert())
|
377
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
378
|
+
let contexts = contexts
|
379
|
+
.into_iter()
|
380
|
+
.map(|ldf| ldf.ldf.clone())
|
381
|
+
.collect::<Vec<_>>();
|
382
|
+
Ok(self.ldf.clone().with_context(contexts).into())
|
383
|
+
}
|
384
|
+
|
385
|
+
#[allow(clippy::too_many_arguments)]
|
386
|
+
pub fn join_asof(
|
387
|
+
&self,
|
388
|
+
other: &RbLazyFrame,
|
389
|
+
left_on: &RbExpr,
|
390
|
+
right_on: &RbExpr,
|
391
|
+
left_by: Option<Vec<String>>,
|
392
|
+
right_by: Option<Vec<String>>,
|
393
|
+
allow_parallel: bool,
|
394
|
+
force_parallel: bool,
|
395
|
+
suffix: String,
|
396
|
+
strategy: Wrap<AsofStrategy>,
|
397
|
+
tolerance: Option<Wrap<AnyValue<'_>>>,
|
398
|
+
tolerance_str: Option<String>,
|
399
|
+
) -> RbResult<Self> {
|
400
|
+
let ldf = self.ldf.clone();
|
401
|
+
let other = other.ldf.clone();
|
402
|
+
let left_on = left_on.inner.clone();
|
403
|
+
let right_on = right_on.inner.clone();
|
404
|
+
Ok(ldf
|
405
|
+
.join_builder()
|
406
|
+
.with(other)
|
407
|
+
.left_on([left_on])
|
408
|
+
.right_on([right_on])
|
409
|
+
.allow_parallel(allow_parallel)
|
410
|
+
.force_parallel(force_parallel)
|
411
|
+
.how(JoinType::AsOf(AsOfOptions {
|
412
|
+
strategy: strategy.0,
|
413
|
+
left_by,
|
414
|
+
right_by,
|
415
|
+
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
416
|
+
tolerance_str,
|
417
|
+
}))
|
418
|
+
.suffix(suffix)
|
419
|
+
.finish()
|
420
|
+
.into())
|
421
|
+
}
|
422
|
+
|
352
423
|
#[allow(clippy::too_many_arguments)]
|
353
424
|
pub fn join(
|
354
425
|
&self,
|