polars-df 0.1.3 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
@@ -1,33 +1,58 @@
1
- use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
1
+ use magnus::{class, r_hash::ForEach, RArray, RHash, Symbol, TryConvert, Value, QNIL};
2
2
  use polars::chunked_array::object::PolarsObjectSafe;
3
3
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
4
4
  use polars::datatypes::AnyValue;
5
+ use polars::frame::row::Row;
5
6
  use polars::frame::DataFrame;
7
+ use polars::io::avro::AvroCompression;
6
8
  use polars::prelude::*;
7
9
  use polars::series::ops::NullBehavior;
10
+ use std::fmt::{Display, Formatter};
11
+ use std::hash::{Hash, Hasher};
8
12
 
9
- use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
13
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
10
14
 
15
+ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
16
+ // Safety:
17
+ // Wrap is transparent.
18
+ unsafe { std::mem::transmute(slice) }
19
+ }
20
+
21
+ #[repr(transparent)]
11
22
  pub struct Wrap<T>(pub T);
12
23
 
24
+ impl<T> Clone for Wrap<T>
25
+ where
26
+ T: Clone,
27
+ {
28
+ fn clone(&self) -> Self {
29
+ Wrap(self.0.clone())
30
+ }
31
+ }
32
+
13
33
  impl<T> From<T> for Wrap<T> {
14
34
  fn from(t: T) -> Self {
15
35
  Wrap(t)
16
36
  }
17
37
  }
18
38
 
19
- pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
39
+ pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
20
40
  let seq: RArray = obj.try_convert()?;
21
41
  let len = seq.len();
22
42
  Ok((seq, len))
23
43
  }
24
44
 
25
- pub fn get_df(obj: Value) -> RbResult<DataFrame> {
45
+ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
26
46
  let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
27
47
  Ok(rbdf.df.borrow().clone())
28
48
  }
29
49
 
30
- pub fn get_series(obj: Value) -> RbResult<Series> {
50
+ pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
51
+ let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
52
+ Ok(rbdf.ldf.clone())
53
+ }
54
+
55
+ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
31
56
  let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
32
57
  Ok(rbs.series.borrow().clone())
33
58
  }
@@ -87,6 +112,25 @@ impl From<Wrap<AnyValue<'_>>> for Value {
87
112
  .unwrap()
88
113
  .funcall::<_, _, Value>("to_date", ())
89
114
  .unwrap(),
115
+ AnyValue::Datetime(v, tu, tz) => {
116
+ let t = match tu {
117
+ TimeUnit::Nanoseconds => todo!(),
118
+ TimeUnit::Microseconds => {
119
+ let sec = v / 1000000;
120
+ let subsec = v % 1000000;
121
+ class::time()
122
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
123
+ .unwrap()
124
+ }
125
+ TimeUnit::Milliseconds => todo!(),
126
+ };
127
+
128
+ if tz.is_some() {
129
+ todo!();
130
+ } else {
131
+ t.funcall::<_, _, Value>("utc", ()).unwrap()
132
+ }
133
+ }
90
134
  _ => todo!(),
91
135
  }
92
136
  }
@@ -150,6 +194,39 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
150
194
  }
151
195
  }
152
196
 
197
+ impl TryConvert for Wrap<AsofStrategy> {
198
+ fn try_convert(ob: Value) -> RbResult<Self> {
199
+ let parsed = match ob.try_convert::<String>()?.as_str() {
200
+ "backward" => AsofStrategy::Backward,
201
+ "forward" => AsofStrategy::Forward,
202
+ v => {
203
+ return Err(RbValueError::new_err(format!(
204
+ "strategy must be one of {{'backward', 'forward'}}, got {}",
205
+ v
206
+ )))
207
+ }
208
+ };
209
+ Ok(Wrap(parsed))
210
+ }
211
+ }
212
+
213
+ impl TryConvert for Wrap<Option<AvroCompression>> {
214
+ fn try_convert(ob: Value) -> RbResult<Self> {
215
+ let parsed = match ob.try_convert::<String>()?.as_str() {
216
+ "uncompressed" => None,
217
+ "snappy" => Some(AvroCompression::Snappy),
218
+ "deflate" => Some(AvroCompression::Deflate),
219
+ v => {
220
+ return Err(RbValueError::new_err(format!(
221
+ "compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
222
+ v
223
+ )))
224
+ }
225
+ };
226
+ Ok(Wrap(parsed))
227
+ }
228
+ }
229
+
153
230
  impl TryConvert for Wrap<CategoricalOrdering> {
154
231
  fn try_convert(ob: Value) -> RbResult<Self> {
155
232
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -238,6 +315,22 @@ impl TryConvert for Wrap<JoinType> {
238
315
  }
239
316
  }
240
317
 
318
+ impl TryConvert for Wrap<ListToStructWidthStrategy> {
319
+ fn try_convert(ob: Value) -> RbResult<Self> {
320
+ let parsed = match ob.try_convert::<String>()?.as_str() {
321
+ "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
322
+ "max_width" => ListToStructWidthStrategy::MaxWidth,
323
+ v => {
324
+ return Err(RbValueError::new_err(format!(
325
+ "n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
326
+ v
327
+ )))
328
+ }
329
+ };
330
+ Ok(Wrap(parsed))
331
+ }
332
+ }
333
+
241
334
  impl TryConvert for Wrap<NullBehavior> {
242
335
  fn try_convert(ob: Value) -> RbResult<Self> {
243
336
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -425,18 +518,100 @@ pub fn parse_parquet_compression(
425
518
  Ok(parsed)
426
519
  }
427
520
 
521
+ impl<'s> TryConvert for Wrap<Row<'s>> {
522
+ fn try_convert(ob: Value) -> RbResult<Self> {
523
+ let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
524
+ for item in ob.try_convert::<RArray>()?.each() {
525
+ vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
526
+ }
527
+ let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
528
+ Ok(Wrap(Row(vals)))
529
+ }
530
+ }
531
+
532
+ impl TryConvert for Wrap<Schema> {
533
+ fn try_convert(ob: Value) -> RbResult<Self> {
534
+ let dict = ob.try_convert::<RHash>()?;
535
+
536
+ let mut schema = Vec::new();
537
+ dict.foreach(|key: String, val: Wrap<DataType>| {
538
+ schema.push(Field::new(&key, val.0));
539
+ Ok(ForEach::Continue)
540
+ })
541
+ .unwrap();
542
+
543
+ Ok(Wrap(schema.into_iter().into()))
544
+ }
545
+ }
546
+
547
+ #[derive(Clone, Debug)]
428
548
  pub struct ObjectValue {
429
549
  pub inner: Value,
430
550
  }
431
551
 
552
+ impl Hash for ObjectValue {
553
+ fn hash<H: Hasher>(&self, state: &mut H) {
554
+ let h = self
555
+ .inner
556
+ .funcall::<_, _, isize>("hash", ())
557
+ .expect("should be hashable");
558
+ state.write_isize(h)
559
+ }
560
+ }
561
+
562
+ impl Eq for ObjectValue {}
563
+
564
+ impl PartialEq for ObjectValue {
565
+ fn eq(&self, other: &Self) -> bool {
566
+ self.inner.eql(&other.inner).unwrap_or(false)
567
+ }
568
+ }
569
+
570
+ impl Display for ObjectValue {
571
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
572
+ write!(f, "{}", self.inner)
573
+ }
574
+ }
575
+
576
+ impl PolarsObject for ObjectValue {
577
+ fn type_name() -> &'static str {
578
+ "object"
579
+ }
580
+ }
581
+
582
+ impl From<Value> for ObjectValue {
583
+ fn from(v: Value) -> Self {
584
+ Self { inner: v }
585
+ }
586
+ }
587
+
588
+ impl TryConvert for ObjectValue {
589
+ fn try_convert(ob: Value) -> RbResult<Self> {
590
+ Ok(ObjectValue { inner: ob })
591
+ }
592
+ }
593
+
432
594
  impl From<&dyn PolarsObjectSafe> for &ObjectValue {
433
595
  fn from(val: &dyn PolarsObjectSafe) -> Self {
434
596
  unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
435
597
  }
436
598
  }
437
599
 
600
+ // TODO remove
438
601
  impl ObjectValue {
439
602
  pub fn to_object(&self) -> Value {
440
603
  self.inner
441
604
  }
442
605
  }
606
+
607
+ impl From<ObjectValue> for Value {
608
+ fn from(val: ObjectValue) -> Self {
609
+ val.inner
610
+ }
611
+ }
612
+
613
+ impl Default for ObjectValue {
614
+ fn default() -> Self {
615
+ ObjectValue { inner: *QNIL }
616
+ }
617
+ }
@@ -1,15 +1,21 @@
1
1
  use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
2
+ use polars::io::avro::AvroCompression;
2
3
  use polars::io::mmap::ReaderBytes;
3
4
  use polars::io::RowCount;
5
+ use polars::prelude::pivot::{pivot, pivot_stable};
4
6
  use polars::prelude::*;
5
7
  use std::cell::RefCell;
6
8
  use std::io::{BufWriter, Cursor};
7
9
  use std::ops::Deref;
8
10
 
11
+ use crate::apply::dataframe::{
12
+ apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
13
+ apply_lambda_with_utf8_out_type,
14
+ };
9
15
  use crate::conversion::*;
10
16
  use crate::file::{get_file_like, get_mmap_bytes_reader};
11
17
  use crate::series::{to_rbseries_collection, to_series_collection};
12
- use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
18
+ use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
13
19
 
14
20
  #[magnus::wrap(class = "Polars::RbDataFrame")]
15
21
  pub struct RbDataFrame {
@@ -179,6 +185,48 @@ impl RbDataFrame {
179
185
  Ok(RbDataFrame::new(df))
180
186
  }
181
187
 
188
+ pub fn read_avro(
189
+ rb_f: Value,
190
+ columns: Option<Vec<String>>,
191
+ projection: Option<Vec<usize>>,
192
+ n_rows: Option<usize>,
193
+ ) -> RbResult<Self> {
194
+ use polars::io::avro::AvroReader;
195
+
196
+ let file = get_file_like(rb_f, false)?;
197
+ let df = AvroReader::new(file)
198
+ .with_projection(projection)
199
+ .with_columns(columns)
200
+ .with_n_rows(n_rows)
201
+ .finish()
202
+ .map_err(RbPolarsErr::from)?;
203
+ Ok(RbDataFrame::new(df))
204
+ }
205
+
206
+ pub fn write_avro(
207
+ &self,
208
+ rb_f: Value,
209
+ compression: Wrap<Option<AvroCompression>>,
210
+ ) -> RbResult<()> {
211
+ use polars::io::avro::AvroWriter;
212
+
213
+ if let Ok(s) = rb_f.try_convert::<String>() {
214
+ let f = std::fs::File::create(&s).unwrap();
215
+ AvroWriter::new(f)
216
+ .with_compression(compression.0)
217
+ .finish(&mut self.df.borrow_mut())
218
+ .map_err(RbPolarsErr::from)?;
219
+ } else {
220
+ let mut buf = get_file_like(rb_f, true)?;
221
+ AvroWriter::new(&mut buf)
222
+ .with_compression(compression.0)
223
+ .finish(&mut self.df.borrow_mut())
224
+ .map_err(RbPolarsErr::from)?;
225
+ }
226
+
227
+ Ok(())
228
+ }
229
+
182
230
  pub fn read_json(rb_f: Value) -> RbResult<Self> {
183
231
  // memmap the file first
184
232
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -238,6 +286,14 @@ impl RbDataFrame {
238
286
  Ok(())
239
287
  }
240
288
 
289
+ pub fn read_hashes(
290
+ _dicts: Value,
291
+ _infer_schema_length: Option<usize>,
292
+ _schema_overwrite: Option<Wrap<Schema>>,
293
+ ) -> RbResult<Self> {
294
+ Err(RbPolarsErr::todo())
295
+ }
296
+
241
297
  pub fn read_hash(data: RHash) -> RbResult<Self> {
242
298
  let mut cols: Vec<Series> = Vec::new();
243
299
  data.foreach(|name: String, values: Value| {
@@ -751,6 +807,31 @@ impl RbDataFrame {
751
807
  Ok(RbDataFrame::new(df))
752
808
  }
753
809
 
810
+ pub fn pivot_expr(
811
+ &self,
812
+ values: Vec<String>,
813
+ index: Vec<String>,
814
+ columns: Vec<String>,
815
+ aggregate_expr: &RbExpr,
816
+ maintain_order: bool,
817
+ sort_columns: bool,
818
+ ) -> RbResult<Self> {
819
+ let fun = match maintain_order {
820
+ true => pivot_stable,
821
+ false => pivot,
822
+ };
823
+ let df = fun(
824
+ &self.df.borrow(),
825
+ values,
826
+ index,
827
+ columns,
828
+ aggregate_expr.inner.clone(),
829
+ sort_columns,
830
+ )
831
+ .map_err(RbPolarsErr::from)?;
832
+ Ok(RbDataFrame::new(df))
833
+ }
834
+
754
835
  pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
755
836
  let out = if stable {
756
837
  self.df.borrow().partition_by_stable(groups)
@@ -870,10 +951,74 @@ impl RbDataFrame {
870
951
  df.into()
871
952
  }
872
953
 
954
+ pub fn apply(
955
+ &self,
956
+ lambda: Value,
957
+ output_type: Option<Wrap<DataType>>,
958
+ inference_size: usize,
959
+ ) -> RbResult<(Value, bool)> {
960
+ let df = &self.df.borrow();
961
+
962
+ let output_type = output_type.map(|dt| dt.0);
963
+ let out = match output_type {
964
+ Some(DataType::Int32) => {
965
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
966
+ }
967
+ Some(DataType::Int64) => {
968
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
969
+ }
970
+ Some(DataType::UInt32) => {
971
+ apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
972
+ .into_series()
973
+ }
974
+ Some(DataType::UInt64) => {
975
+ apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
976
+ .into_series()
977
+ }
978
+ Some(DataType::Float32) => {
979
+ apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
980
+ .into_series()
981
+ }
982
+ Some(DataType::Float64) => {
983
+ apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
984
+ .into_series()
985
+ }
986
+ Some(DataType::Boolean) => {
987
+ apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
988
+ }
989
+ Some(DataType::Date) => {
990
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
991
+ .into_date()
992
+ .into_series()
993
+ }
994
+ Some(DataType::Datetime(tu, tz)) => {
995
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
996
+ .into_datetime(tu, tz)
997
+ .into_series()
998
+ }
999
+ Some(DataType::Utf8) => {
1000
+ apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1001
+ }
1002
+ _ => return apply_lambda_unknown(df, lambda, inference_size),
1003
+ };
1004
+
1005
+ Ok((RbSeries::from(out).into(), false))
1006
+ }
1007
+
873
1008
  pub fn shrink_to_fit(&self) {
874
1009
  self.df.borrow_mut().shrink_to_fit();
875
1010
  }
876
1011
 
1012
+ pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
1013
+ let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
1014
+ let hash = self
1015
+ .df
1016
+ .borrow_mut()
1017
+ .hash_rows(Some(hb))
1018
+ .map_err(RbPolarsErr::from)?;
1019
+ Ok(hash.into_series().into())
1020
+ }
1021
+
877
1022
  pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
878
1023
  let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
879
1024
  if include_header {
@@ -22,6 +22,10 @@ impl RbPolarsErr {
22
22
  pub fn other(message: String) -> Error {
23
23
  Error::runtime_error(message)
24
24
  }
25
+
26
+ pub fn todo() -> Error {
27
+ Error::runtime_error("not implemented yet")
28
+ }
25
29
  }
26
30
 
27
31
  pub struct RbValueError {}
@@ -31,3 +35,11 @@ impl RbValueError {
31
35
  Error::new(arg_error(), message)
32
36
  }
33
37
  }
38
+
39
+ pub struct ComputeError {}
40
+
41
+ impl ComputeError {
42
+ pub fn new_err(message: String) -> Error {
43
+ Error::runtime_error(message)
44
+ }
45
+ }
@@ -1,7 +1,39 @@
1
1
  use magnus::Value;
2
- use polars::error::PolarsResult;
3
- use polars::series::Series;
2
+ use polars::prelude::*;
3
+
4
+ use crate::lazy::dsl::RbExpr;
5
+ use crate::Wrap;
4
6
 
5
7
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
6
8
  todo!();
7
9
  }
10
+
11
+ pub fn map_single(
12
+ rbexpr: &RbExpr,
13
+ _lambda: Value,
14
+ output_type: Option<Wrap<DataType>>,
15
+ agg_list: bool,
16
+ ) -> RbExpr {
17
+ let output_type = output_type.map(|wrap| wrap.0);
18
+
19
+ let output_type2 = output_type.clone();
20
+ let function = move |_s: Series| {
21
+ let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
22
+
23
+ todo!();
24
+ };
25
+
26
+ let output_map = GetOutput::map_field(move |fld| match output_type {
27
+ Some(ref dt) => Field::new(fld.name(), dt.clone()),
28
+ None => {
29
+ let mut fld = fld.clone();
30
+ fld.coerce(DataType::Unknown);
31
+ fld
32
+ }
33
+ });
34
+ if agg_list {
35
+ rbexpr.clone().inner.map_list(function, output_map).into()
36
+ } else {
37
+ rbexpr.clone().inner.map(function, output_map).into()
38
+ }
39
+ }
@@ -3,7 +3,7 @@ use polars::io::RowCount;
3
3
  use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
- use std::io::BufWriter;
6
+ use std::io::{BufWriter, Read};
7
7
 
8
8
  use crate::conversion::*;
9
9
  use crate::file::get_file_like;
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
53
53
  }
54
54
 
55
55
  impl RbLazyFrame {
56
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
57
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
58
+ // so don't bother with files.
59
+ let mut json = String::new();
60
+ let _ = get_file_like(rb_f, false)?
61
+ .read_to_string(&mut json)
62
+ .unwrap();
63
+
64
+ // Safety
65
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
66
+ // so we actually don't have a lifetime at all when serializing.
67
+
68
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
69
+ // in this scope
70
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
71
+
72
+ let lp = serde_json::from_str::<LogicalPlan>(json)
73
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
74
+ Ok(LazyFrame::from(lp).into())
75
+ }
76
+
56
77
  pub fn new_from_ndjson(
57
78
  path: String,
58
79
  infer_schema_length: Option<usize>,
@@ -211,7 +232,7 @@ impl RbLazyFrame {
211
232
  projection_pushdown: bool,
212
233
  simplify_expr: bool,
213
234
  slice_pushdown: bool,
214
- _cse: bool,
235
+ cse: bool,
215
236
  allow_streaming: bool,
216
237
  ) -> RbLazyFrame {
217
238
  let ldf = self.ldf.clone();
@@ -220,7 +241,7 @@ impl RbLazyFrame {
220
241
  .with_predicate_pushdown(predicate_pushdown)
221
242
  .with_simplify_expr(simplify_expr)
222
243
  .with_slice_pushdown(slice_pushdown)
223
- // .with_common_subplan_elimination(cse)
244
+ .with_common_subplan_elimination(cse)
224
245
  .with_streaming(allow_streaming)
225
246
  .with_projection_pushdown(projection_pushdown);
226
247
  ldf.into()
@@ -349,6 +370,56 @@ impl RbLazyFrame {
349
370
  })
350
371
  }
351
372
 
373
+ pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
374
+ let contexts = contexts
375
+ .each()
376
+ .map(|v| v.unwrap().try_convert())
377
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
378
+ let contexts = contexts
379
+ .into_iter()
380
+ .map(|ldf| ldf.ldf.clone())
381
+ .collect::<Vec<_>>();
382
+ Ok(self.ldf.clone().with_context(contexts).into())
383
+ }
384
+
385
+ #[allow(clippy::too_many_arguments)]
386
+ pub fn join_asof(
387
+ &self,
388
+ other: &RbLazyFrame,
389
+ left_on: &RbExpr,
390
+ right_on: &RbExpr,
391
+ left_by: Option<Vec<String>>,
392
+ right_by: Option<Vec<String>>,
393
+ allow_parallel: bool,
394
+ force_parallel: bool,
395
+ suffix: String,
396
+ strategy: Wrap<AsofStrategy>,
397
+ tolerance: Option<Wrap<AnyValue<'_>>>,
398
+ tolerance_str: Option<String>,
399
+ ) -> RbResult<Self> {
400
+ let ldf = self.ldf.clone();
401
+ let other = other.ldf.clone();
402
+ let left_on = left_on.inner.clone();
403
+ let right_on = right_on.inner.clone();
404
+ Ok(ldf
405
+ .join_builder()
406
+ .with(other)
407
+ .left_on([left_on])
408
+ .right_on([right_on])
409
+ .allow_parallel(allow_parallel)
410
+ .force_parallel(force_parallel)
411
+ .how(JoinType::AsOf(AsOfOptions {
412
+ strategy: strategy.0,
413
+ left_by,
414
+ right_by,
415
+ tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
416
+ tolerance_str,
417
+ }))
418
+ .suffix(suffix)
419
+ .finish()
420
+ .into())
421
+ }
422
+
352
423
  #[allow(clippy::too_many_arguments)]
353
424
  pub fn join(
354
425
  &self,