polars-df 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
@@ -1,33 +1,58 @@
1
- use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
1
+ use magnus::{class, r_hash::ForEach, RArray, RHash, Symbol, TryConvert, Value, QNIL};
2
2
  use polars::chunked_array::object::PolarsObjectSafe;
3
3
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
4
4
  use polars::datatypes::AnyValue;
5
+ use polars::frame::row::Row;
5
6
  use polars::frame::DataFrame;
7
+ use polars::io::avro::AvroCompression;
6
8
  use polars::prelude::*;
7
9
  use polars::series::ops::NullBehavior;
10
+ use std::fmt::{Display, Formatter};
11
+ use std::hash::{Hash, Hasher};
8
12
 
9
- use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
13
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
10
14
 
15
+ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
16
+ // Safety:
17
+ // Wrap is transparent.
18
+ unsafe { std::mem::transmute(slice) }
19
+ }
20
+
21
+ #[repr(transparent)]
11
22
  pub struct Wrap<T>(pub T);
12
23
 
24
+ impl<T> Clone for Wrap<T>
25
+ where
26
+ T: Clone,
27
+ {
28
+ fn clone(&self) -> Self {
29
+ Wrap(self.0.clone())
30
+ }
31
+ }
32
+
13
33
  impl<T> From<T> for Wrap<T> {
14
34
  fn from(t: T) -> Self {
15
35
  Wrap(t)
16
36
  }
17
37
  }
18
38
 
19
- pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
39
+ pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
20
40
  let seq: RArray = obj.try_convert()?;
21
41
  let len = seq.len();
22
42
  Ok((seq, len))
23
43
  }
24
44
 
25
- pub fn get_df(obj: Value) -> RbResult<DataFrame> {
45
+ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
26
46
  let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
27
47
  Ok(rbdf.df.borrow().clone())
28
48
  }
29
49
 
30
- pub fn get_series(obj: Value) -> RbResult<Series> {
50
+ pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
51
+ let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
52
+ Ok(rbdf.ldf.clone())
53
+ }
54
+
55
+ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
31
56
  let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
32
57
  Ok(rbs.series.borrow().clone())
33
58
  }
@@ -87,6 +112,25 @@ impl From<Wrap<AnyValue<'_>>> for Value {
87
112
  .unwrap()
88
113
  .funcall::<_, _, Value>("to_date", ())
89
114
  .unwrap(),
115
+ AnyValue::Datetime(v, tu, tz) => {
116
+ let t = match tu {
117
+ TimeUnit::Nanoseconds => todo!(),
118
+ TimeUnit::Microseconds => {
119
+ let sec = v / 1000000;
120
+ let subsec = v % 1000000;
121
+ class::time()
122
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
123
+ .unwrap()
124
+ }
125
+ TimeUnit::Milliseconds => todo!(),
126
+ };
127
+
128
+ if tz.is_some() {
129
+ todo!();
130
+ } else {
131
+ t.funcall::<_, _, Value>("utc", ()).unwrap()
132
+ }
133
+ }
90
134
  _ => todo!(),
91
135
  }
92
136
  }
@@ -150,6 +194,39 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
150
194
  }
151
195
  }
152
196
 
197
+ impl TryConvert for Wrap<AsofStrategy> {
198
+ fn try_convert(ob: Value) -> RbResult<Self> {
199
+ let parsed = match ob.try_convert::<String>()?.as_str() {
200
+ "backward" => AsofStrategy::Backward,
201
+ "forward" => AsofStrategy::Forward,
202
+ v => {
203
+ return Err(RbValueError::new_err(format!(
204
+ "strategy must be one of {{'backward', 'forward'}}, got {}",
205
+ v
206
+ )))
207
+ }
208
+ };
209
+ Ok(Wrap(parsed))
210
+ }
211
+ }
212
+
213
+ impl TryConvert for Wrap<Option<AvroCompression>> {
214
+ fn try_convert(ob: Value) -> RbResult<Self> {
215
+ let parsed = match ob.try_convert::<String>()?.as_str() {
216
+ "uncompressed" => None,
217
+ "snappy" => Some(AvroCompression::Snappy),
218
+ "deflate" => Some(AvroCompression::Deflate),
219
+ v => {
220
+ return Err(RbValueError::new_err(format!(
221
+ "compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
222
+ v
223
+ )))
224
+ }
225
+ };
226
+ Ok(Wrap(parsed))
227
+ }
228
+ }
229
+
153
230
  impl TryConvert for Wrap<CategoricalOrdering> {
154
231
  fn try_convert(ob: Value) -> RbResult<Self> {
155
232
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -238,6 +315,22 @@ impl TryConvert for Wrap<JoinType> {
238
315
  }
239
316
  }
240
317
 
318
+ impl TryConvert for Wrap<ListToStructWidthStrategy> {
319
+ fn try_convert(ob: Value) -> RbResult<Self> {
320
+ let parsed = match ob.try_convert::<String>()?.as_str() {
321
+ "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
322
+ "max_width" => ListToStructWidthStrategy::MaxWidth,
323
+ v => {
324
+ return Err(RbValueError::new_err(format!(
325
+ "n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
326
+ v
327
+ )))
328
+ }
329
+ };
330
+ Ok(Wrap(parsed))
331
+ }
332
+ }
333
+
241
334
  impl TryConvert for Wrap<NullBehavior> {
242
335
  fn try_convert(ob: Value) -> RbResult<Self> {
243
336
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -425,18 +518,100 @@ pub fn parse_parquet_compression(
425
518
  Ok(parsed)
426
519
  }
427
520
 
521
+ impl<'s> TryConvert for Wrap<Row<'s>> {
522
+ fn try_convert(ob: Value) -> RbResult<Self> {
523
+ let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
524
+ for item in ob.try_convert::<RArray>()?.each() {
525
+ vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
526
+ }
527
+ let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
528
+ Ok(Wrap(Row(vals)))
529
+ }
530
+ }
531
+
532
+ impl TryConvert for Wrap<Schema> {
533
+ fn try_convert(ob: Value) -> RbResult<Self> {
534
+ let dict = ob.try_convert::<RHash>()?;
535
+
536
+ let mut schema = Vec::new();
537
+ dict.foreach(|key: String, val: Wrap<DataType>| {
538
+ schema.push(Field::new(&key, val.0));
539
+ Ok(ForEach::Continue)
540
+ })
541
+ .unwrap();
542
+
543
+ Ok(Wrap(schema.into_iter().into()))
544
+ }
545
+ }
546
+
547
+ #[derive(Clone, Debug)]
428
548
  pub struct ObjectValue {
429
549
  pub inner: Value,
430
550
  }
431
551
 
552
+ impl Hash for ObjectValue {
553
+ fn hash<H: Hasher>(&self, state: &mut H) {
554
+ let h = self
555
+ .inner
556
+ .funcall::<_, _, isize>("hash", ())
557
+ .expect("should be hashable");
558
+ state.write_isize(h)
559
+ }
560
+ }
561
+
562
+ impl Eq for ObjectValue {}
563
+
564
+ impl PartialEq for ObjectValue {
565
+ fn eq(&self, other: &Self) -> bool {
566
+ self.inner.eql(&other.inner).unwrap_or(false)
567
+ }
568
+ }
569
+
570
+ impl Display for ObjectValue {
571
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
572
+ write!(f, "{}", self.inner)
573
+ }
574
+ }
575
+
576
+ impl PolarsObject for ObjectValue {
577
+ fn type_name() -> &'static str {
578
+ "object"
579
+ }
580
+ }
581
+
582
+ impl From<Value> for ObjectValue {
583
+ fn from(v: Value) -> Self {
584
+ Self { inner: v }
585
+ }
586
+ }
587
+
588
+ impl TryConvert for ObjectValue {
589
+ fn try_convert(ob: Value) -> RbResult<Self> {
590
+ Ok(ObjectValue { inner: ob })
591
+ }
592
+ }
593
+
432
594
  impl From<&dyn PolarsObjectSafe> for &ObjectValue {
433
595
  fn from(val: &dyn PolarsObjectSafe) -> Self {
434
596
  unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
435
597
  }
436
598
  }
437
599
 
600
+ // TODO remove
438
601
  impl ObjectValue {
439
602
  pub fn to_object(&self) -> Value {
440
603
  self.inner
441
604
  }
442
605
  }
606
+
607
+ impl From<ObjectValue> for Value {
608
+ fn from(val: ObjectValue) -> Self {
609
+ val.inner
610
+ }
611
+ }
612
+
613
+ impl Default for ObjectValue {
614
+ fn default() -> Self {
615
+ ObjectValue { inner: *QNIL }
616
+ }
617
+ }
@@ -1,15 +1,21 @@
1
1
  use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
2
+ use polars::io::avro::AvroCompression;
2
3
  use polars::io::mmap::ReaderBytes;
3
4
  use polars::io::RowCount;
5
+ use polars::prelude::pivot::{pivot, pivot_stable};
4
6
  use polars::prelude::*;
5
7
  use std::cell::RefCell;
6
8
  use std::io::{BufWriter, Cursor};
7
9
  use std::ops::Deref;
8
10
 
11
+ use crate::apply::dataframe::{
12
+ apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
13
+ apply_lambda_with_utf8_out_type,
14
+ };
9
15
  use crate::conversion::*;
10
16
  use crate::file::{get_file_like, get_mmap_bytes_reader};
11
17
  use crate::series::{to_rbseries_collection, to_series_collection};
12
- use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
18
+ use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
13
19
 
14
20
  #[magnus::wrap(class = "Polars::RbDataFrame")]
15
21
  pub struct RbDataFrame {
@@ -179,6 +185,48 @@ impl RbDataFrame {
179
185
  Ok(RbDataFrame::new(df))
180
186
  }
181
187
 
188
+ pub fn read_avro(
189
+ rb_f: Value,
190
+ columns: Option<Vec<String>>,
191
+ projection: Option<Vec<usize>>,
192
+ n_rows: Option<usize>,
193
+ ) -> RbResult<Self> {
194
+ use polars::io::avro::AvroReader;
195
+
196
+ let file = get_file_like(rb_f, false)?;
197
+ let df = AvroReader::new(file)
198
+ .with_projection(projection)
199
+ .with_columns(columns)
200
+ .with_n_rows(n_rows)
201
+ .finish()
202
+ .map_err(RbPolarsErr::from)?;
203
+ Ok(RbDataFrame::new(df))
204
+ }
205
+
206
+ pub fn write_avro(
207
+ &self,
208
+ rb_f: Value,
209
+ compression: Wrap<Option<AvroCompression>>,
210
+ ) -> RbResult<()> {
211
+ use polars::io::avro::AvroWriter;
212
+
213
+ if let Ok(s) = rb_f.try_convert::<String>() {
214
+ let f = std::fs::File::create(&s).unwrap();
215
+ AvroWriter::new(f)
216
+ .with_compression(compression.0)
217
+ .finish(&mut self.df.borrow_mut())
218
+ .map_err(RbPolarsErr::from)?;
219
+ } else {
220
+ let mut buf = get_file_like(rb_f, true)?;
221
+ AvroWriter::new(&mut buf)
222
+ .with_compression(compression.0)
223
+ .finish(&mut self.df.borrow_mut())
224
+ .map_err(RbPolarsErr::from)?;
225
+ }
226
+
227
+ Ok(())
228
+ }
229
+
182
230
  pub fn read_json(rb_f: Value) -> RbResult<Self> {
183
231
  // memmap the file first
184
232
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -238,6 +286,14 @@ impl RbDataFrame {
238
286
  Ok(())
239
287
  }
240
288
 
289
+ pub fn read_hashes(
290
+ _dicts: Value,
291
+ _infer_schema_length: Option<usize>,
292
+ _schema_overwrite: Option<Wrap<Schema>>,
293
+ ) -> RbResult<Self> {
294
+ Err(RbPolarsErr::todo())
295
+ }
296
+
241
297
  pub fn read_hash(data: RHash) -> RbResult<Self> {
242
298
  let mut cols: Vec<Series> = Vec::new();
243
299
  data.foreach(|name: String, values: Value| {
@@ -751,6 +807,31 @@ impl RbDataFrame {
751
807
  Ok(RbDataFrame::new(df))
752
808
  }
753
809
 
810
+ pub fn pivot_expr(
811
+ &self,
812
+ values: Vec<String>,
813
+ index: Vec<String>,
814
+ columns: Vec<String>,
815
+ aggregate_expr: &RbExpr,
816
+ maintain_order: bool,
817
+ sort_columns: bool,
818
+ ) -> RbResult<Self> {
819
+ let fun = match maintain_order {
820
+ true => pivot_stable,
821
+ false => pivot,
822
+ };
823
+ let df = fun(
824
+ &self.df.borrow(),
825
+ values,
826
+ index,
827
+ columns,
828
+ aggregate_expr.inner.clone(),
829
+ sort_columns,
830
+ )
831
+ .map_err(RbPolarsErr::from)?;
832
+ Ok(RbDataFrame::new(df))
833
+ }
834
+
754
835
  pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
755
836
  let out = if stable {
756
837
  self.df.borrow().partition_by_stable(groups)
@@ -870,10 +951,74 @@ impl RbDataFrame {
870
951
  df.into()
871
952
  }
872
953
 
954
+ pub fn apply(
955
+ &self,
956
+ lambda: Value,
957
+ output_type: Option<Wrap<DataType>>,
958
+ inference_size: usize,
959
+ ) -> RbResult<(Value, bool)> {
960
+ let df = &self.df.borrow();
961
+
962
+ let output_type = output_type.map(|dt| dt.0);
963
+ let out = match output_type {
964
+ Some(DataType::Int32) => {
965
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
966
+ }
967
+ Some(DataType::Int64) => {
968
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
969
+ }
970
+ Some(DataType::UInt32) => {
971
+ apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
972
+ .into_series()
973
+ }
974
+ Some(DataType::UInt64) => {
975
+ apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
976
+ .into_series()
977
+ }
978
+ Some(DataType::Float32) => {
979
+ apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
980
+ .into_series()
981
+ }
982
+ Some(DataType::Float64) => {
983
+ apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
984
+ .into_series()
985
+ }
986
+ Some(DataType::Boolean) => {
987
+ apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
988
+ }
989
+ Some(DataType::Date) => {
990
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
991
+ .into_date()
992
+ .into_series()
993
+ }
994
+ Some(DataType::Datetime(tu, tz)) => {
995
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
996
+ .into_datetime(tu, tz)
997
+ .into_series()
998
+ }
999
+ Some(DataType::Utf8) => {
1000
+ apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1001
+ }
1002
+ _ => return apply_lambda_unknown(df, lambda, inference_size),
1003
+ };
1004
+
1005
+ Ok((RbSeries::from(out).into(), false))
1006
+ }
1007
+
873
1008
  pub fn shrink_to_fit(&self) {
874
1009
  self.df.borrow_mut().shrink_to_fit();
875
1010
  }
876
1011
 
1012
+ pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
1013
+ let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
1014
+ let hash = self
1015
+ .df
1016
+ .borrow_mut()
1017
+ .hash_rows(Some(hb))
1018
+ .map_err(RbPolarsErr::from)?;
1019
+ Ok(hash.into_series().into())
1020
+ }
1021
+
877
1022
  pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
878
1023
  let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
879
1024
  if include_header {
@@ -22,6 +22,10 @@ impl RbPolarsErr {
22
22
  pub fn other(message: String) -> Error {
23
23
  Error::runtime_error(message)
24
24
  }
25
+
26
+ pub fn todo() -> Error {
27
+ Error::runtime_error("not implemented yet")
28
+ }
25
29
  }
26
30
 
27
31
  pub struct RbValueError {}
@@ -31,3 +35,11 @@ impl RbValueError {
31
35
  Error::new(arg_error(), message)
32
36
  }
33
37
  }
38
+
39
+ pub struct ComputeError {}
40
+
41
+ impl ComputeError {
42
+ pub fn new_err(message: String) -> Error {
43
+ Error::runtime_error(message)
44
+ }
45
+ }
@@ -1,7 +1,39 @@
1
1
  use magnus::Value;
2
- use polars::error::PolarsResult;
3
- use polars::series::Series;
2
+ use polars::prelude::*;
3
+
4
+ use crate::lazy::dsl::RbExpr;
5
+ use crate::Wrap;
4
6
 
5
7
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
6
8
  todo!();
7
9
  }
10
+
11
+ pub fn map_single(
12
+ rbexpr: &RbExpr,
13
+ _lambda: Value,
14
+ output_type: Option<Wrap<DataType>>,
15
+ agg_list: bool,
16
+ ) -> RbExpr {
17
+ let output_type = output_type.map(|wrap| wrap.0);
18
+
19
+ let output_type2 = output_type.clone();
20
+ let function = move |_s: Series| {
21
+ let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
22
+
23
+ todo!();
24
+ };
25
+
26
+ let output_map = GetOutput::map_field(move |fld| match output_type {
27
+ Some(ref dt) => Field::new(fld.name(), dt.clone()),
28
+ None => {
29
+ let mut fld = fld.clone();
30
+ fld.coerce(DataType::Unknown);
31
+ fld
32
+ }
33
+ });
34
+ if agg_list {
35
+ rbexpr.clone().inner.map_list(function, output_map).into()
36
+ } else {
37
+ rbexpr.clone().inner.map(function, output_map).into()
38
+ }
39
+ }
@@ -3,7 +3,7 @@ use polars::io::RowCount;
3
3
  use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
- use std::io::BufWriter;
6
+ use std::io::{BufWriter, Read};
7
7
 
8
8
  use crate::conversion::*;
9
9
  use crate::file::get_file_like;
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
53
53
  }
54
54
 
55
55
  impl RbLazyFrame {
56
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
57
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
58
+ // so don't bother with files.
59
+ let mut json = String::new();
60
+ let _ = get_file_like(rb_f, false)?
61
+ .read_to_string(&mut json)
62
+ .unwrap();
63
+
64
+ // Safety
65
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
66
+ // so we actually don't have a lifetime at all when serializing.
67
+
68
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
69
+ // in this scope
70
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
71
+
72
+ let lp = serde_json::from_str::<LogicalPlan>(json)
73
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
74
+ Ok(LazyFrame::from(lp).into())
75
+ }
76
+
56
77
  pub fn new_from_ndjson(
57
78
  path: String,
58
79
  infer_schema_length: Option<usize>,
@@ -211,7 +232,7 @@ impl RbLazyFrame {
211
232
  projection_pushdown: bool,
212
233
  simplify_expr: bool,
213
234
  slice_pushdown: bool,
214
- _cse: bool,
235
+ cse: bool,
215
236
  allow_streaming: bool,
216
237
  ) -> RbLazyFrame {
217
238
  let ldf = self.ldf.clone();
@@ -220,7 +241,7 @@ impl RbLazyFrame {
220
241
  .with_predicate_pushdown(predicate_pushdown)
221
242
  .with_simplify_expr(simplify_expr)
222
243
  .with_slice_pushdown(slice_pushdown)
223
- // .with_common_subplan_elimination(cse)
244
+ .with_common_subplan_elimination(cse)
224
245
  .with_streaming(allow_streaming)
225
246
  .with_projection_pushdown(projection_pushdown);
226
247
  ldf.into()
@@ -349,6 +370,56 @@ impl RbLazyFrame {
349
370
  })
350
371
  }
351
372
 
373
+ pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
374
+ let contexts = contexts
375
+ .each()
376
+ .map(|v| v.unwrap().try_convert())
377
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
378
+ let contexts = contexts
379
+ .into_iter()
380
+ .map(|ldf| ldf.ldf.clone())
381
+ .collect::<Vec<_>>();
382
+ Ok(self.ldf.clone().with_context(contexts).into())
383
+ }
384
+
385
+ #[allow(clippy::too_many_arguments)]
386
+ pub fn join_asof(
387
+ &self,
388
+ other: &RbLazyFrame,
389
+ left_on: &RbExpr,
390
+ right_on: &RbExpr,
391
+ left_by: Option<Vec<String>>,
392
+ right_by: Option<Vec<String>>,
393
+ allow_parallel: bool,
394
+ force_parallel: bool,
395
+ suffix: String,
396
+ strategy: Wrap<AsofStrategy>,
397
+ tolerance: Option<Wrap<AnyValue<'_>>>,
398
+ tolerance_str: Option<String>,
399
+ ) -> RbResult<Self> {
400
+ let ldf = self.ldf.clone();
401
+ let other = other.ldf.clone();
402
+ let left_on = left_on.inner.clone();
403
+ let right_on = right_on.inner.clone();
404
+ Ok(ldf
405
+ .join_builder()
406
+ .with(other)
407
+ .left_on([left_on])
408
+ .right_on([right_on])
409
+ .allow_parallel(allow_parallel)
410
+ .force_parallel(force_parallel)
411
+ .how(JoinType::AsOf(AsOfOptions {
412
+ strategy: strategy.0,
413
+ left_by,
414
+ right_by,
415
+ tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
416
+ tolerance_str,
417
+ }))
418
+ .suffix(suffix)
419
+ .finish()
420
+ .into())
421
+ }
422
+
352
423
  #[allow(clippy::too_many_arguments)]
353
424
  pub fn join(
354
425
  &self,