RubyGems - polars-df - Versions diffs - 0.1.1 → 0.1.3 - Mend

polars-df 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/.yardopts +3 -0
data/CHANGELOG.md +8 -0
data/Cargo.lock +2 -1
data/README.md +1 -1
data/ext/polars/Cargo.toml +7 -1
data/ext/polars/src/batched_csv.rs +120 -0
data/ext/polars/src/conversion.rs +139 -6
data/ext/polars/src/dataframe.rs +360 -15
data/ext/polars/src/error.rs +9 -0
data/ext/polars/src/file.rs +8 -7
data/ext/polars/src/lazy/apply.rs +7 -0
data/ext/polars/src/lazy/dataframe.rs +135 -3
data/ext/polars/src/lazy/dsl.rs +97 -2
data/ext/polars/src/lazy/meta.rs +1 -1
data/ext/polars/src/lazy/mod.rs +1 -0
data/ext/polars/src/lib.rs +227 -12
data/ext/polars/src/series.rs +190 -38
data/ext/polars/src/set.rs +91 -0
data/ext/polars/src/utils.rs +19 -0
data/lib/polars/batched_csv_reader.rb +96 -0
data/lib/polars/cat_expr.rb +39 -0
data/lib/polars/data_frame.rb +2813 -100
data/lib/polars/date_time_expr.rb +1282 -7
data/lib/polars/exceptions.rb +20 -0
data/lib/polars/expr.rb +631 -11
data/lib/polars/expr_dispatch.rb +14 -0
data/lib/polars/functions.rb +219 -0
data/lib/polars/group_by.rb +517 -0
data/lib/polars/io.rb +763 -4
data/lib/polars/lazy_frame.rb +1415 -67
data/lib/polars/lazy_functions.rb +430 -9
data/lib/polars/lazy_group_by.rb +79 -0
data/lib/polars/list_expr.rb +5 -0
data/lib/polars/meta_expr.rb +21 -0
data/lib/polars/series.rb +2244 -192
data/lib/polars/slice.rb +104 -0
data/lib/polars/string_expr.rb +663 -2
data/lib/polars/struct_expr.rb +73 -0
data/lib/polars/utils.rb +76 -3
data/lib/polars/version.rb +2 -1
data/lib/polars/when.rb +1 -0
data/lib/polars/when_then.rb +1 -0
data/lib/polars.rb +8 -2
metadata +12 -2

data/ext/polars/src/dataframe.rs CHANGED Viewed

@@ -1,15 +1,14 @@
-use magnus::{r_hash::ForEach, Error, RArray, RHash, RString, Value};
+use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
 use polars::io::mmap::ReaderBytes;
+use polars::io::RowCount;
 use polars::prelude::*;
 use std::cell::RefCell;
-use std::fs::File;
-use std::io::{BufReader, BufWriter, Cursor};
+use std::io::{BufWriter, Cursor};
 use std::ops::Deref;
-use std::path::PathBuf;
 use crate::conversion::*;
 use crate::file::{get_file_like, get_mmap_bytes_reader};
-use crate::series::to_rbseries_collection;
+use crate::series::{to_rbseries_collection, to_series_collection};
 use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
 #[magnus::wrap(class = "Polars::RbDataFrame")]
@@ -43,22 +42,141 @@ impl RbDataFrame {
         self.df.borrow().estimated_size()
     }
-    pub fn read_csv(rb_f: Value, has_header: bool) -> RbResult<Self> {
+    pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
+        // start arguments
+        // this pattern is needed for more than 16
+        let rb_f: Value = arguments[0].try_convert()?;
+        let infer_schema_length: Option<usize> = arguments[1].try_convert()?;
+        let chunk_size: usize = arguments[2].try_convert()?;
+        let has_header: bool = arguments[3].try_convert()?;
+        let ignore_errors: bool = arguments[4].try_convert()?;
+        let n_rows: Option<usize> = arguments[5].try_convert()?;
+        let skip_rows: usize = arguments[6].try_convert()?;
+        let projection: Option<Vec<usize>> = arguments[7].try_convert()?;
+        let sep: String = arguments[8].try_convert()?;
+        let rechunk: bool = arguments[9].try_convert()?;
+        let columns: Option<Vec<String>> = arguments[10].try_convert()?;
+        let encoding: Wrap<CsvEncoding> = arguments[11].try_convert()?;
+        let n_threads: Option<usize> = arguments[12].try_convert()?;
+        let path: Option<String> = arguments[13].try_convert()?;
+        let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[14].try_convert()?;
+        // TODO fix
+        let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[15].try_convert()?;
+        let low_memory: bool = arguments[16].try_convert()?;
+        let comment_char: Option<String> = arguments[17].try_convert()?;
+        let quote_char: Option<String> = arguments[18].try_convert()?;
+        let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
+        let parse_dates: bool = arguments[20].try_convert()?;
+        let skip_rows_after_header: usize = arguments[21].try_convert()?;
+        let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
+        let sample_size: usize = arguments[23].try_convert()?;
+        let eol_char: String = arguments[24].try_convert()?;
+        // end arguments
+        let null_values = null_values.map(|w| w.0);
+        let comment_char = comment_char.map(|s| s.as_bytes()[0]);
+        let eol_char = eol_char.as_bytes()[0];
+        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let quote_char = if let Some(s) = quote_char {
+            if s.is_empty() {
+                None
+            } else {
+                Some(s.as_bytes()[0])
+            }
+        } else {
+            None
+        };
+        let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
+            let fields = overwrite_dtype.iter().map(|(name, dtype)| {
+                let dtype = dtype.0.clone();
+                Field::new(name, dtype)
+            });
+            Schema::from(fields)
+        });
+        let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
+            overwrite_dtype
+                .iter()
+                .map(|dt| dt.0.clone())
+                .collect::<Vec<_>>()
+        });
         let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
         let df = CsvReader::new(mmap_bytes_r)
+            .infer_schema(infer_schema_length)
             .has_header(has_header)
+            .with_n_rows(n_rows)
+            .with_delimiter(sep.as_bytes()[0])
+            .with_skip_rows(skip_rows)
+            .with_ignore_parser_errors(ignore_errors)
+            .with_projection(projection)
+            .with_rechunk(rechunk)
+            .with_chunk_size(chunk_size)
+            .with_encoding(encoding.0)
+            .with_columns(columns)
+            .with_n_threads(n_threads)
+            .with_path(path)
+            .with_dtypes(overwrite_dtype.as_ref())
+            .with_dtypes_slice(overwrite_dtype_slice.as_deref())
+            .low_memory(low_memory)
+            .with_comment_char(comment_char)
+            .with_null_values(null_values)
+            .with_parse_dates(parse_dates)
+            .with_quote_char(quote_char)
+            .with_end_of_line_char(eol_char)
+            .with_skip_rows_after_header(skip_rows_after_header)
+            .with_row_count(row_count)
+            .sample_size(sample_size)
             .finish()
             .map_err(RbPolarsErr::from)?;
         Ok(df.into())
     }
-    pub fn read_parquet(path: PathBuf) -> RbResult<Self> {
-        let f = File::open(&path).map_err(|e| Error::runtime_error(e.to_string()))?;
-        let reader = BufReader::new(f);
-        ParquetReader::new(reader)
+    pub fn read_parquet(
+        rb_f: Value,
+        columns: Option<Vec<String>>,
+        projection: Option<Vec<usize>>,
+        n_rows: Option<usize>,
+        parallel: Wrap<ParallelStrategy>,
+        row_count: Option<(String, IdxSize)>,
+        low_memory: bool,
+    ) -> RbResult<Self> {
+        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
+        let df = ParquetReader::new(mmap_bytes_r)
+            .with_projection(projection)
+            .with_columns(columns)
+            .read_parallel(parallel.0)
+            .with_n_rows(n_rows)
+            .with_row_count(row_count)
+            .set_low_memory(low_memory)
             .finish()
-            .map_err(RbPolarsErr::from)
-            .map(|v| v.into())
+            .map_err(RbPolarsErr::from)?;
+        Ok(RbDataFrame::new(df))
+    }
+    pub fn read_ipc(
+        rb_f: Value,
+        columns: Option<Vec<String>>,
+        projection: Option<Vec<usize>>,
+        n_rows: Option<usize>,
+        row_count: Option<(String, IdxSize)>,
+        memory_map: bool,
+    ) -> RbResult<Self> {
+        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
+        let df = IpcReader::new(mmap_bytes_r)
+            .with_projection(projection)
+            .with_columns(columns)
+            .with_n_rows(n_rows)
+            .with_row_count(row_count)
+            .memory_mapped(memory_map)
+            .finish()
+            .map_err(RbPolarsErr::from)?;
+        Ok(RbDataFrame::new(df))
     }
     pub fn read_json(rb_f: Value) -> RbResult<Self> {
@@ -185,6 +303,77 @@ impl RbDataFrame {
         Ok(())
     }
+    pub fn write_ipc(
+        &self,
+        rb_f: Value,
+        compression: Wrap<Option<IpcCompression>>,
+    ) -> RbResult<()> {
+        if let Ok(s) = rb_f.try_convert::<String>() {
+            let f = std::fs::File::create(&s).unwrap();
+            IpcWriter::new(f)
+                .with_compression(compression.0)
+                .finish(&mut self.df.borrow_mut())
+                .map_err(RbPolarsErr::from)?;
+        } else {
+            let mut buf = get_file_like(rb_f, true)?;
+            IpcWriter::new(&mut buf)
+                .with_compression(compression.0)
+                .finish(&mut self.df.borrow_mut())
+                .map_err(RbPolarsErr::from)?;
+        }
+        Ok(())
+    }
+    pub fn row_tuple(&self, idx: i64) -> Value {
+        let idx = if idx < 0 {
+            (self.df.borrow().height() as i64 + idx) as usize
+        } else {
+            idx as usize
+        };
+        RArray::from_vec(
+            self.df
+                .borrow()
+                .get_columns()
+                .iter()
+                .map(|s| match s.dtype() {
+                    DataType::Object(_) => {
+                        let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
+                        obj.unwrap().to_object()
+                    }
+                    _ => Wrap(s.get(idx)).into(),
+                })
+                .collect(),
+        )
+        .into()
+    }
+    pub fn row_tuples(&self) -> Value {
+        let df = &self.df;
+        RArray::from_vec(
+            (0..df.borrow().height())
+                .map(|idx| {
+                    RArray::from_vec(
+                        self.df
+                            .borrow()
+                            .get_columns()
+                            .iter()
+                            .map(|s| match s.dtype() {
+                                DataType::Object(_) => {
+                                    let obj: Option<&ObjectValue> =
+                                        s.get_object(idx).map(|any| any.into());
+                                    obj.unwrap().to_object()
+                                }
+                                _ => Wrap(s.get(idx)).into(),
+                            })
+                            .collect(),
+                    )
+                })
+                .collect(),
+        )
+        .into()
+    }
     pub fn write_parquet(
         &self,
         rb_f: Value,
@@ -210,6 +399,86 @@ impl RbDataFrame {
         Ok(())
     }
+    pub fn add(&self, s: &RbSeries) -> RbResult<Self> {
+        let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn sub(&self, s: &RbSeries) -> RbResult<Self> {
+        let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn div(&self, s: &RbSeries) -> RbResult<Self> {
+        let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn mul(&self, s: &RbSeries) -> RbResult<Self> {
+        let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn rem(&self, s: &RbSeries) -> RbResult<Self> {
+        let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn add_df(&self, s: &Self) -> RbResult<Self> {
+        let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn sub_df(&self, s: &Self) -> RbResult<Self> {
+        let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn div_df(&self, s: &Self) -> RbResult<Self> {
+        let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn mul_df(&self, s: &Self) -> RbResult<Self> {
+        let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn rem_df(&self, s: &Self) -> RbResult<Self> {
+        let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn sample_n(
+        &self,
+        n: usize,
+        with_replacement: bool,
+        shuffle: bool,
+        seed: Option<u64>,
+    ) -> RbResult<Self> {
+        let df = self
+            .df
+            .borrow()
+            .sample_n(n, with_replacement, shuffle, seed)
+            .map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn sample_frac(
+        &self,
+        frac: f64,
+        with_replacement: bool,
+        shuffle: bool,
+        seed: Option<u64>,
+    ) -> RbResult<Self> {
+        let df = self
+            .df
+            .borrow()
+            .sample_frac(frac, with_replacement, shuffle, seed)
+            .map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
     pub fn rechunk(&self) -> Self {
         self.df.borrow().agg_chunks().into()
     }
@@ -240,11 +509,11 @@ impl RbDataFrame {
         Ok(())
     }
-    pub fn dtypes(&self) -> Vec<String> {
+    pub fn dtypes(&self) -> Vec<Value> {
         self.df
             .borrow()
             .iter()
-            .map(|s| s.dtype().to_string())
+            .map(|s| Wrap(s.dtype().clone()).into())
             .collect()
     }
@@ -265,6 +534,73 @@ impl RbDataFrame {
         self.df.borrow().width()
     }
+    pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
+        let columns = to_series_collection(columns)?;
+        self.df
+            .borrow_mut()
+            .hstack_mut(&columns)
+            .map_err(RbPolarsErr::from)?;
+        Ok(())
+    }
+    pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
+        let columns = to_series_collection(columns)?;
+        let df = self
+            .df
+            .borrow()
+            .hstack(&columns)
+            .map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> {
+        self.df
+            .borrow_mut()
+            .extend(&df.df.borrow())
+            .map_err(RbPolarsErr::from)?;
+        Ok(())
+    }
+    pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> {
+        self.df
+            .borrow_mut()
+            .vstack_mut(&df.df.borrow())
+            .map_err(RbPolarsErr::from)?;
+        Ok(())
+    }
+    pub fn vstack(&self, df: &RbDataFrame) -> RbResult<Self> {
+        let df = self
+            .df
+            .borrow()
+            .vstack(&df.df.borrow())
+            .map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
+        let s = self
+            .df
+            .borrow_mut()
+            .drop_in_place(&name)
+            .map_err(RbPolarsErr::from)?;
+        Ok(RbSeries::new(s))
+    }
+    pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> RbResult<Self> {
+        let df = self
+            .df
+            .borrow()
+            .drop_nulls(subset.as_ref().map(|s| s.as_ref()))
+            .map_err(RbPolarsErr::from)?;
+        Ok(df.into())
+    }
+    pub fn drop(&self, name: String) -> RbResult<Self> {
+        let df = self.df.borrow().drop(&name).map_err(RbPolarsErr::from)?;
+        Ok(RbDataFrame::new(df))
+    }
     pub fn select_at_idx(&self, idx: usize) -> Option<RbSeries> {
         self.df
             .borrow()
@@ -272,6 +608,10 @@ impl RbDataFrame {
             .map(|s| RbSeries::new(s.clone()))
     }
+    pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
+        self.df.borrow().find_idx_by_name(&name)
+    }
     // TODO remove clone
     pub fn column(&self, name: String) -> RbResult<RbSeries> {
         self.df
@@ -418,7 +758,7 @@ impl RbDataFrame {
             self.df.borrow().partition_by(groups)
         }
         .map_err(RbPolarsErr::from)?;
-        Ok(out.into_iter().map(|v| RbDataFrame::new(v)).collect())
+        Ok(out.into_iter().map(RbDataFrame::new).collect())
     }
     pub fn shift(&self, periods: i64) -> Self {
@@ -574,6 +914,11 @@ impl RbDataFrame {
         Ok(out.into())
     }
+    pub fn to_struct(&self, name: String) -> RbSeries {
+        let s = self.df.borrow().clone().into_struct(&name);
+        s.into_series().into()
+    }
     pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
         let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
         Ok(df.into())

data/ext/polars/src/error.rs CHANGED Viewed

@@ -1,5 +1,6 @@
 use magnus::exception::arg_error;
 use magnus::Error;
+use polars::error::ArrowError;
 use polars::prelude::PolarsError;
 pub struct RbPolarsErr {}
@@ -10,6 +11,14 @@ impl RbPolarsErr {
         Error::runtime_error(e.to_string())
     }
+    pub fn arrow(e: ArrowError) -> Error {
+        Error::runtime_error(e.to_string())
+    }
+    pub fn io(e: std::io::Error) -> Error {
+        Error::runtime_error(e.to_string())
+    }
     pub fn other(message: String) -> Error {
         Error::runtime_error(message)
     }

data/ext/polars/src/file.rs CHANGED Viewed

@@ -1,18 +1,19 @@
 use magnus::{Error, RString, Value};
 use polars::io::mmap::MmapBytesReader;
-use std::fs::{File, OpenOptions};
+use std::fs::File;
 use std::io::Cursor;
 use std::path::PathBuf;
 use crate::RbResult;
 pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
-    OpenOptions::new()
-        .write(true)
-        .create(true)
-        .truncate(truncate)
-        .open(f.try_convert::<PathBuf>()?)
-        .map_err(|e| Error::runtime_error(e.to_string()))
+    let str_slice = f.try_convert::<PathBuf>()?;
+    let f = if truncate {
+        File::create(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
+    } else {
+        File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
+    };
+    Ok(f)
 }
 pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {

data/ext/polars/src/lazy/apply.rs ADDED Viewed

@@ -0,0 +1,7 @@
+use magnus::Value;
+use polars::error::PolarsResult;
+use polars::series::Series;
+pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
+    todo!();
+}

data/ext/polars/src/lazy/dataframe.rs CHANGED Viewed

@@ -1,4 +1,5 @@
 use magnus::{RArray, RHash, Value};
+use polars::io::RowCount;
 use polars::lazy::frame::{LazyFrame, LazyGroupBy};
 use polars::prelude::*;
 use std::cell::RefCell;
@@ -52,6 +53,137 @@ impl From<LazyFrame> for RbLazyFrame {
 }
 impl RbLazyFrame {
+    pub fn new_from_ndjson(
+        path: String,
+        infer_schema_length: Option<usize>,
+        batch_size: Option<usize>,
+        n_rows: Option<usize>,
+        low_memory: bool,
+        rechunk: bool,
+        row_count: Option<(String, IdxSize)>,
+    ) -> RbResult<Self> {
+        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let lf = LazyJsonLineReader::new(path)
+            .with_infer_schema_length(infer_schema_length)
+            .with_batch_size(batch_size)
+            .with_n_rows(n_rows)
+            .low_memory(low_memory)
+            .with_rechunk(rechunk)
+            .with_row_count(row_count)
+            .finish()
+            .map_err(RbPolarsErr::from)?;
+        Ok(lf.into())
+    }
+    pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
+        // start arguments
+        // this pattern is needed for more than 16
+        let path: String = arguments[0].try_convert()?;
+        let sep: String = arguments[1].try_convert()?;
+        let has_header: bool = arguments[2].try_convert()?;
+        let ignore_errors: bool = arguments[3].try_convert()?;
+        let skip_rows: usize = arguments[4].try_convert()?;
+        let n_rows: Option<usize> = arguments[5].try_convert()?;
+        let cache: bool = arguments[6].try_convert()?;
+        let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[7].try_convert()?;
+        let low_memory: bool = arguments[8].try_convert()?;
+        let comment_char: Option<String> = arguments[9].try_convert()?;
+        let quote_char: Option<String> = arguments[10].try_convert()?;
+        let null_values: Option<Wrap<NullValues>> = arguments[11].try_convert()?;
+        let infer_schema_length: Option<usize> = arguments[12].try_convert()?;
+        let with_schema_modify: Option<Value> = arguments[13].try_convert()?;
+        let rechunk: bool = arguments[14].try_convert()?;
+        let skip_rows_after_header: usize = arguments[15].try_convert()?;
+        let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
+        let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
+        let parse_dates: bool = arguments[18].try_convert()?;
+        let eol_char: String = arguments[19].try_convert()?;
+        // end arguments
+        let null_values = null_values.map(|w| w.0);
+        let comment_char = comment_char.map(|s| s.as_bytes()[0]);
+        let quote_char = quote_char.map(|s| s.as_bytes()[0]);
+        let delimiter = sep.as_bytes()[0];
+        let eol_char = eol_char.as_bytes()[0];
+        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
+            let fields = overwrite_dtype
+                .into_iter()
+                .map(|(name, dtype)| Field::new(&name, dtype.0));
+            Schema::from(fields)
+        });
+        let r = LazyCsvReader::new(path)
+            .with_infer_schema_length(infer_schema_length)
+            .with_delimiter(delimiter)
+            .has_header(has_header)
+            .with_ignore_parser_errors(ignore_errors)
+            .with_skip_rows(skip_rows)
+            .with_n_rows(n_rows)
+            .with_cache(cache)
+            .with_dtype_overwrite(overwrite_dtype.as_ref())
+            .low_memory(low_memory)
+            .with_comment_char(comment_char)
+            .with_quote_char(quote_char)
+            .with_end_of_line_char(eol_char)
+            .with_rechunk(rechunk)
+            .with_skip_rows_after_header(skip_rows_after_header)
+            .with_encoding(encoding.0)
+            .with_row_count(row_count)
+            .with_parse_dates(parse_dates)
+            .with_null_values(null_values);
+        if let Some(_lambda) = with_schema_modify {
+            todo!();
+        }
+        Ok(r.finish().map_err(RbPolarsErr::from)?.into())
+    }
+    pub fn new_from_parquet(
+        path: String,
+        n_rows: Option<usize>,
+        cache: bool,
+        parallel: Wrap<ParallelStrategy>,
+        rechunk: bool,
+        row_count: Option<(String, IdxSize)>,
+        low_memory: bool,
+    ) -> RbResult<Self> {
+        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let args = ScanArgsParquet {
+            n_rows,
+            cache,
+            parallel: parallel.0,
+            rechunk,
+            row_count,
+            low_memory,
+        };
+        let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
+        Ok(lf.into())
+    }
+    pub fn new_from_ipc(
+        path: String,
+        n_rows: Option<usize>,
+        cache: bool,
+        rechunk: bool,
+        row_count: Option<(String, IdxSize)>,
+        memory_map: bool,
+    ) -> RbResult<Self> {
+        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let args = ScanArgsIpc {
+            n_rows,
+            cache,
+            rechunk,
+            row_count,
+            memmap: memory_map,
+        };
+        let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
+        Ok(lf.into())
+    }
     pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
         let file = BufWriter::new(get_file_like(rb_f, true)?);
         serde_json::to_writer(file, &self.ldf.logical_plan)
@@ -388,9 +520,9 @@ impl RbLazyFrame {
         Ok(self.get_schema()?.iter_names().cloned().collect())
     }
-    pub fn dtypes(&self) -> RbResult<Vec<String>> {
+    pub fn dtypes(&self) -> RbResult<Vec<Value>> {
         let schema = self.get_schema()?;
-        let iter = schema.iter_dtypes().map(|dt| dt.to_string());
+        let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into());
         Ok(iter.collect())
     }
@@ -401,7 +533,7 @@ impl RbLazyFrame {
         schema.iter_fields().for_each(|fld| {
             // TODO remove unwrap
             schema_dict
-                .aset(fld.name().clone(), fld.data_type().to_string())
+                .aset::<String, Value>(fld.name().clone(), Wrap(fld.data_type().clone()).into())
                 .unwrap();
         });
         Ok(schema_dict)