polars-df 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +1 -1
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +105 -5
- data/ext/polars/src/dataframe.rs +132 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +132 -0
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +77 -3
- data/ext/polars/src/series.rs +8 -9
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/data_frame.rb +585 -19
- data/lib/polars/expr.rb +17 -2
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +156 -2
- data/lib/polars/lazy_functions.rb +154 -11
- data/lib/polars/series.rb +806 -18
- data/lib/polars/utils.rb +33 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -0
- metadata +5 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 510c7a761553fb3a49919add842d520da1518c5df6dd37f93af338e928c7a207
         | 
| 4 | 
            +
              data.tar.gz: 3e724d3c2553bb6b4a587f056a04977990b85103ea9ae231166746407e1c0b1e
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 59391ecd7a3d14372ad24693240ebeae4d83c59d55b37876fc6f216dd576897113607f7b5f6f6a3d7a5d75f96def72b1f2659f3de71e6a975a2753719879fa33
         | 
| 7 | 
            +
              data.tar.gz: 5a2e8cdf3aed01f16823be62067b0ec01e4e0dc3bfa4dc27929dfbe42d7c92e62f6e7a09bc78533d220a3bfc9890f9f0fb069482a338fbd0b55603aa42fddc57
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/Cargo.lock
    CHANGED
    
    
    
        data/ext/polars/Cargo.toml
    CHANGED
    
    
| @@ -0,0 +1,120 @@ | |
| 1 | 
            +
            use magnus::Value;
         | 
| 2 | 
            +
            use polars::io::mmap::MmapBytesReader;
         | 
| 3 | 
            +
            use polars::io::RowCount;
         | 
| 4 | 
            +
            use polars::prelude::read_impl::OwnedBatchedCsvReader;
         | 
| 5 | 
            +
            use polars::prelude::*;
         | 
| 6 | 
            +
            use std::cell::RefCell;
         | 
| 7 | 
            +
            use std::path::PathBuf;
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            use crate::conversion::*;
         | 
| 10 | 
            +
            use crate::{RbDataFrame, RbPolarsErr, RbResult};
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            #[magnus::wrap(class = "Polars::RbBatchedCsv")]
         | 
| 13 | 
            +
            pub struct RbBatchedCsv {
         | 
| 14 | 
            +
                pub reader: RefCell<OwnedBatchedCsvReader>,
         | 
| 15 | 
            +
            }
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            impl RbBatchedCsv {
         | 
| 18 | 
            +
                pub fn new(arguments: &[Value]) -> RbResult<Self> {
         | 
| 19 | 
            +
                    // start arguments
         | 
| 20 | 
            +
                    // this pattern is needed for more than 16
         | 
| 21 | 
            +
                    let infer_schema_length: Option<usize> = arguments[0].try_convert()?;
         | 
| 22 | 
            +
                    let chunk_size: usize = arguments[1].try_convert()?;
         | 
| 23 | 
            +
                    let has_header: bool = arguments[2].try_convert()?;
         | 
| 24 | 
            +
                    let ignore_errors: bool = arguments[3].try_convert()?;
         | 
| 25 | 
            +
                    let n_rows: Option<usize> = arguments[4].try_convert()?;
         | 
| 26 | 
            +
                    let skip_rows: usize = arguments[5].try_convert()?;
         | 
| 27 | 
            +
                    let projection: Option<Vec<usize>> = arguments[6].try_convert()?;
         | 
| 28 | 
            +
                    let sep: String = arguments[7].try_convert()?;
         | 
| 29 | 
            +
                    let rechunk: bool = arguments[8].try_convert()?;
         | 
| 30 | 
            +
                    let columns: Option<Vec<String>> = arguments[9].try_convert()?;
         | 
| 31 | 
            +
                    let encoding: Wrap<CsvEncoding> = arguments[10].try_convert()?;
         | 
| 32 | 
            +
                    let n_threads: Option<usize> = arguments[11].try_convert()?;
         | 
| 33 | 
            +
                    let path: PathBuf = arguments[12].try_convert()?;
         | 
| 34 | 
            +
                    let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[13].try_convert()?;
         | 
| 35 | 
            +
                    // TODO fix
         | 
| 36 | 
            +
                    let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[14].try_convert()?;
         | 
| 37 | 
            +
                    let low_memory: bool = arguments[15].try_convert()?;
         | 
| 38 | 
            +
                    let comment_char: Option<String> = arguments[16].try_convert()?;
         | 
| 39 | 
            +
                    let quote_char: Option<String> = arguments[17].try_convert()?;
         | 
| 40 | 
            +
                    let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
         | 
| 41 | 
            +
                    let parse_dates: bool = arguments[19].try_convert()?;
         | 
| 42 | 
            +
                    let skip_rows_after_header: usize = arguments[20].try_convert()?;
         | 
| 43 | 
            +
                    let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
         | 
| 44 | 
            +
                    let sample_size: usize = arguments[22].try_convert()?;
         | 
| 45 | 
            +
                    let eol_char: String = arguments[23].try_convert()?;
         | 
| 46 | 
            +
                    // end arguments
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    let null_values = null_values.map(|w| w.0);
         | 
| 49 | 
            +
                    let comment_char = comment_char.map(|s| s.as_bytes()[0]);
         | 
| 50 | 
            +
                    let eol_char = eol_char.as_bytes()[0];
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                    let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                    let quote_char = if let Some(s) = quote_char {
         | 
| 55 | 
            +
                        if s.is_empty() {
         | 
| 56 | 
            +
                            None
         | 
| 57 | 
            +
                        } else {
         | 
| 58 | 
            +
                            Some(s.as_bytes()[0])
         | 
| 59 | 
            +
                        }
         | 
| 60 | 
            +
                    } else {
         | 
| 61 | 
            +
                        None
         | 
| 62 | 
            +
                    };
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
         | 
| 65 | 
            +
                        let fields = overwrite_dtype.iter().map(|(name, dtype)| {
         | 
| 66 | 
            +
                            let dtype = dtype.0.clone();
         | 
| 67 | 
            +
                            Field::new(name, dtype)
         | 
| 68 | 
            +
                        });
         | 
| 69 | 
            +
                        Schema::from(fields)
         | 
| 70 | 
            +
                    });
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                    let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
         | 
| 73 | 
            +
                        overwrite_dtype
         | 
| 74 | 
            +
                            .iter()
         | 
| 75 | 
            +
                            .map(|dt| dt.0.clone())
         | 
| 76 | 
            +
                            .collect::<Vec<_>>()
         | 
| 77 | 
            +
                    });
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                    let file = std::fs::File::open(path).map_err(RbPolarsErr::io)?;
         | 
| 80 | 
            +
                    let reader = Box::new(file) as Box<dyn MmapBytesReader>;
         | 
| 81 | 
            +
                    let reader = CsvReader::new(reader)
         | 
| 82 | 
            +
                        .infer_schema(infer_schema_length)
         | 
| 83 | 
            +
                        .has_header(has_header)
         | 
| 84 | 
            +
                        .with_n_rows(n_rows)
         | 
| 85 | 
            +
                        .with_delimiter(sep.as_bytes()[0])
         | 
| 86 | 
            +
                        .with_skip_rows(skip_rows)
         | 
| 87 | 
            +
                        .with_ignore_parser_errors(ignore_errors)
         | 
| 88 | 
            +
                        .with_projection(projection)
         | 
| 89 | 
            +
                        .with_rechunk(rechunk)
         | 
| 90 | 
            +
                        .with_chunk_size(chunk_size)
         | 
| 91 | 
            +
                        .with_encoding(encoding.0)
         | 
| 92 | 
            +
                        .with_columns(columns)
         | 
| 93 | 
            +
                        .with_n_threads(n_threads)
         | 
| 94 | 
            +
                        .with_dtypes_slice(overwrite_dtype_slice.as_deref())
         | 
| 95 | 
            +
                        .low_memory(low_memory)
         | 
| 96 | 
            +
                        .with_comment_char(comment_char)
         | 
| 97 | 
            +
                        .with_null_values(null_values)
         | 
| 98 | 
            +
                        .with_parse_dates(parse_dates)
         | 
| 99 | 
            +
                        .with_quote_char(quote_char)
         | 
| 100 | 
            +
                        .with_end_of_line_char(eol_char)
         | 
| 101 | 
            +
                        .with_skip_rows_after_header(skip_rows_after_header)
         | 
| 102 | 
            +
                        .with_row_count(row_count)
         | 
| 103 | 
            +
                        .sample_size(sample_size)
         | 
| 104 | 
            +
                        .batched(overwrite_dtype.map(Arc::new))
         | 
| 105 | 
            +
                        .map_err(RbPolarsErr::from)?;
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    Ok(RbBatchedCsv {
         | 
| 108 | 
            +
                        reader: RefCell::new(reader),
         | 
| 109 | 
            +
                    })
         | 
| 110 | 
            +
                }
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                pub fn next_batches(&self, n: usize) -> RbResult<Option<Vec<RbDataFrame>>> {
         | 
| 113 | 
            +
                    let batches = self
         | 
| 114 | 
            +
                        .reader
         | 
| 115 | 
            +
                        .borrow_mut()
         | 
| 116 | 
            +
                        .next_batches(n)
         | 
| 117 | 
            +
                        .map_err(RbPolarsErr::from)?;
         | 
| 118 | 
            +
                    Ok(batches.map(|batches| batches.into_iter().map(|out| out.1.into()).collect()))
         | 
| 119 | 
            +
                }
         | 
| 120 | 
            +
            }
         | 
| @@ -1,11 +1,11 @@ | |
| 1 | 
            -
            use magnus::{TryConvert, Value, QNIL};
         | 
| 1 | 
            +
            use magnus::{RArray, Symbol, TryConvert, Value, QNIL};
         | 
| 2 2 | 
             
            use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
         | 
| 3 3 | 
             
            use polars::datatypes::AnyValue;
         | 
| 4 4 | 
             
            use polars::frame::DataFrame;
         | 
| 5 5 | 
             
            use polars::prelude::*;
         | 
| 6 6 | 
             
            use polars::series::ops::NullBehavior;
         | 
| 7 7 |  | 
| 8 | 
            -
            use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
         | 
| 8 | 
            +
            use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
         | 
| 9 9 |  | 
| 10 10 | 
             
            pub struct Wrap<T>(pub T);
         | 
| 11 11 |  | 
| @@ -15,14 +15,57 @@ impl<T> From<T> for Wrap<T> { | |
| 15 15 | 
             
                }
         | 
| 16 16 | 
             
            }
         | 
| 17 17 |  | 
| 18 | 
            +
            pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
         | 
| 19 | 
            +
                let seq: RArray = obj.try_convert()?;
         | 
| 20 | 
            +
                let len = seq.len();
         | 
| 21 | 
            +
                Ok((seq, len))
         | 
| 22 | 
            +
            }
         | 
| 23 | 
            +
             | 
| 18 24 | 
             
            pub fn get_df(obj: Value) -> RbResult<DataFrame> {
         | 
| 19 25 | 
             
                let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
         | 
| 20 26 | 
             
                Ok(rbdf.df.borrow().clone())
         | 
| 21 27 | 
             
            }
         | 
| 22 28 |  | 
| 23 | 
            -
             | 
| 24 | 
            -
                 | 
| 25 | 
            -
             | 
| 29 | 
            +
            pub fn get_series(obj: Value) -> RbResult<Series> {
         | 
| 30 | 
            +
                let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
         | 
| 31 | 
            +
                Ok(rbs.series.borrow().clone())
         | 
| 32 | 
            +
            }
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            impl TryConvert for Wrap<Utf8Chunked> {
         | 
| 35 | 
            +
                fn try_convert(obj: Value) -> RbResult<Self> {
         | 
| 36 | 
            +
                    let (seq, len) = get_rbseq(obj)?;
         | 
| 37 | 
            +
                    let mut builder = Utf8ChunkedBuilder::new("", len, len * 25);
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    for res in seq.each() {
         | 
| 40 | 
            +
                        let item = res?;
         | 
| 41 | 
            +
                        match item.try_convert::<String>() {
         | 
| 42 | 
            +
                            Ok(val) => builder.append_value(&val),
         | 
| 43 | 
            +
                            Err(_) => builder.append_null(),
         | 
| 44 | 
            +
                        }
         | 
| 45 | 
            +
                    }
         | 
| 46 | 
            +
                    Ok(Wrap(builder.finish()))
         | 
| 47 | 
            +
                }
         | 
| 48 | 
            +
            }
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            impl TryConvert for Wrap<NullValues> {
         | 
| 51 | 
            +
                fn try_convert(ob: Value) -> RbResult<Self> {
         | 
| 52 | 
            +
                    if let Ok(s) = ob.try_convert::<String>() {
         | 
| 53 | 
            +
                        Ok(Wrap(NullValues::AllColumnsSingle(s)))
         | 
| 54 | 
            +
                    } else if let Ok(s) = ob.try_convert::<Vec<String>>() {
         | 
| 55 | 
            +
                        Ok(Wrap(NullValues::AllColumns(s)))
         | 
| 56 | 
            +
                    } else if let Ok(s) = ob.try_convert::<Vec<(String, String)>>() {
         | 
| 57 | 
            +
                        Ok(Wrap(NullValues::Named(s)))
         | 
| 58 | 
            +
                    } else {
         | 
| 59 | 
            +
                        Err(RbPolarsErr::other(
         | 
| 60 | 
            +
                            "could not extract value from null_values argument".into(),
         | 
| 61 | 
            +
                        ))
         | 
| 62 | 
            +
                    }
         | 
| 63 | 
            +
                }
         | 
| 64 | 
            +
            }
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            impl From<Wrap<AnyValue<'_>>> for Value {
         | 
| 67 | 
            +
                fn from(w: Wrap<AnyValue<'_>>) -> Self {
         | 
| 68 | 
            +
                    match w.0 {
         | 
| 26 69 | 
             
                        AnyValue::UInt8(v) => Value::from(v),
         | 
| 27 70 | 
             
                        AnyValue::UInt16(v) => Value::from(v),
         | 
| 28 71 | 
             
                        AnyValue::UInt32(v) => Value::from(v),
         | 
| @@ -41,6 +84,12 @@ impl Into<Value> for Wrap<AnyValue<'_>> { | |
| 41 84 | 
             
                }
         | 
| 42 85 | 
             
            }
         | 
| 43 86 |  | 
| 87 | 
            +
            impl From<Wrap<DataType>> for Value {
         | 
| 88 | 
            +
                fn from(w: Wrap<DataType>) -> Self {
         | 
| 89 | 
            +
                    Symbol::from(w.0.to_string()).into()
         | 
| 90 | 
            +
                }
         | 
| 91 | 
            +
            }
         | 
| 92 | 
            +
             | 
| 44 93 | 
             
            impl TryConvert for Wrap<DataType> {
         | 
| 45 94 | 
             
                fn try_convert(ob: Value) -> RbResult<Self> {
         | 
| 46 95 | 
             
                    let dtype = match ob.try_convert::<String>()?.as_str() {
         | 
| @@ -118,6 +167,39 @@ impl TryConvert for Wrap<ClosedWindow> { | |
| 118 167 | 
             
                }
         | 
| 119 168 | 
             
            }
         | 
| 120 169 |  | 
| 170 | 
            +
            impl TryConvert for Wrap<CsvEncoding> {
         | 
| 171 | 
            +
                fn try_convert(ob: Value) -> RbResult<Self> {
         | 
| 172 | 
            +
                    let parsed = match ob.try_convert::<String>()?.as_str() {
         | 
| 173 | 
            +
                        "utf8" => CsvEncoding::Utf8,
         | 
| 174 | 
            +
                        "utf8-lossy" => CsvEncoding::LossyUtf8,
         | 
| 175 | 
            +
                        v => {
         | 
| 176 | 
            +
                            return Err(RbValueError::new_err(format!(
         | 
| 177 | 
            +
                                "encoding must be one of {{'utf8', 'utf8-lossy'}}, got {}",
         | 
| 178 | 
            +
                                v
         | 
| 179 | 
            +
                            )))
         | 
| 180 | 
            +
                        }
         | 
| 181 | 
            +
                    };
         | 
| 182 | 
            +
                    Ok(Wrap(parsed))
         | 
| 183 | 
            +
                }
         | 
| 184 | 
            +
            }
         | 
| 185 | 
            +
             | 
| 186 | 
            +
            impl TryConvert for Wrap<Option<IpcCompression>> {
         | 
| 187 | 
            +
                fn try_convert(ob: Value) -> RbResult<Self> {
         | 
| 188 | 
            +
                    let parsed = match ob.try_convert::<String>()?.as_str() {
         | 
| 189 | 
            +
                        "uncompressed" => None,
         | 
| 190 | 
            +
                        "lz4" => Some(IpcCompression::LZ4),
         | 
| 191 | 
            +
                        "zstd" => Some(IpcCompression::ZSTD),
         | 
| 192 | 
            +
                        v => {
         | 
| 193 | 
            +
                            return Err(RbValueError::new_err(format!(
         | 
| 194 | 
            +
                                "compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {}",
         | 
| 195 | 
            +
                                v
         | 
| 196 | 
            +
                            )))
         | 
| 197 | 
            +
                        }
         | 
| 198 | 
            +
                    };
         | 
| 199 | 
            +
                    Ok(Wrap(parsed))
         | 
| 200 | 
            +
                }
         | 
| 201 | 
            +
            }
         | 
| 202 | 
            +
             | 
| 121 203 | 
             
            impl TryConvert for Wrap<JoinType> {
         | 
| 122 204 | 
             
                fn try_convert(ob: Value) -> RbResult<Self> {
         | 
| 123 205 | 
             
                    let parsed = match ob.try_convert::<String>()?.as_str() {
         | 
| @@ -171,6 +253,24 @@ impl TryConvert for Wrap<NullStrategy> { | |
| 171 253 | 
             
                }
         | 
| 172 254 | 
             
            }
         | 
| 173 255 |  | 
| 256 | 
            +
            impl TryConvert for Wrap<ParallelStrategy> {
         | 
| 257 | 
            +
                fn try_convert(ob: Value) -> RbResult<Self> {
         | 
| 258 | 
            +
                    let parsed = match ob.try_convert::<String>()?.as_str() {
         | 
| 259 | 
            +
                        "auto" => ParallelStrategy::Auto,
         | 
| 260 | 
            +
                        "columns" => ParallelStrategy::Columns,
         | 
| 261 | 
            +
                        "row_groups" => ParallelStrategy::RowGroups,
         | 
| 262 | 
            +
                        "none" => ParallelStrategy::None,
         | 
| 263 | 
            +
                        v => {
         | 
| 264 | 
            +
                            return Err(RbValueError::new_err(format!(
         | 
| 265 | 
            +
                                "parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {}",
         | 
| 266 | 
            +
                                v
         | 
| 267 | 
            +
                            )))
         | 
| 268 | 
            +
                        }
         | 
| 269 | 
            +
                    };
         | 
| 270 | 
            +
                    Ok(Wrap(parsed))
         | 
| 271 | 
            +
                }
         | 
| 272 | 
            +
            }
         | 
| 273 | 
            +
             | 
| 174 274 | 
             
            impl TryConvert for Wrap<QuantileInterpolOptions> {
         | 
| 175 275 | 
             
                fn try_convert(ob: Value) -> RbResult<Self> {
         | 
| 176 276 | 
             
                    let parsed = match ob.try_convert::<String>()?.as_str() {
         | 
    
        data/ext/polars/src/dataframe.rs
    CHANGED
    
    | @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            use magnus::{r_hash::ForEach, Error, RArray, RHash, RString, Value};
         | 
| 2 2 | 
             
            use polars::io::mmap::ReaderBytes;
         | 
| 3 | 
            +
            use polars::io::RowCount;
         | 
| 3 4 | 
             
            use polars::prelude::*;
         | 
| 4 5 | 
             
            use std::cell::RefCell;
         | 
| 5 6 | 
             
            use std::fs::File;
         | 
| @@ -43,10 +44,94 @@ impl RbDataFrame { | |
| 43 44 | 
             
                    self.df.borrow().estimated_size()
         | 
| 44 45 | 
             
                }
         | 
| 45 46 |  | 
| 46 | 
            -
                pub fn read_csv( | 
| 47 | 
            +
                pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
         | 
| 48 | 
            +
                    // start arguments
         | 
| 49 | 
            +
                    // this pattern is needed for more than 16
         | 
| 50 | 
            +
                    let rb_f: Value = arguments[0].try_convert()?;
         | 
| 51 | 
            +
                    let infer_schema_length: Option<usize> = arguments[1].try_convert()?;
         | 
| 52 | 
            +
                    let chunk_size: usize = arguments[2].try_convert()?;
         | 
| 53 | 
            +
                    let has_header: bool = arguments[3].try_convert()?;
         | 
| 54 | 
            +
                    let ignore_errors: bool = arguments[4].try_convert()?;
         | 
| 55 | 
            +
                    let n_rows: Option<usize> = arguments[5].try_convert()?;
         | 
| 56 | 
            +
                    let skip_rows: usize = arguments[6].try_convert()?;
         | 
| 57 | 
            +
                    let projection: Option<Vec<usize>> = arguments[7].try_convert()?;
         | 
| 58 | 
            +
                    let sep: String = arguments[8].try_convert()?;
         | 
| 59 | 
            +
                    let rechunk: bool = arguments[9].try_convert()?;
         | 
| 60 | 
            +
                    let columns: Option<Vec<String>> = arguments[10].try_convert()?;
         | 
| 61 | 
            +
                    let encoding: Wrap<CsvEncoding> = arguments[11].try_convert()?;
         | 
| 62 | 
            +
                    let n_threads: Option<usize> = arguments[12].try_convert()?;
         | 
| 63 | 
            +
                    let path: Option<String> = arguments[13].try_convert()?;
         | 
| 64 | 
            +
                    let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[14].try_convert()?;
         | 
| 65 | 
            +
                    // TODO fix
         | 
| 66 | 
            +
                    let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[15].try_convert()?;
         | 
| 67 | 
            +
                    let low_memory: bool = arguments[16].try_convert()?;
         | 
| 68 | 
            +
                    let comment_char: Option<String> = arguments[17].try_convert()?;
         | 
| 69 | 
            +
                    let quote_char: Option<String> = arguments[18].try_convert()?;
         | 
| 70 | 
            +
                    let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
         | 
| 71 | 
            +
                    let parse_dates: bool = arguments[20].try_convert()?;
         | 
| 72 | 
            +
                    let skip_rows_after_header: usize = arguments[21].try_convert()?;
         | 
| 73 | 
            +
                    let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
         | 
| 74 | 
            +
                    let sample_size: usize = arguments[23].try_convert()?;
         | 
| 75 | 
            +
                    let eol_char: String = arguments[24].try_convert()?;
         | 
| 76 | 
            +
                    // end arguments
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    let null_values = null_values.map(|w| w.0);
         | 
| 79 | 
            +
                    let comment_char = comment_char.map(|s| s.as_bytes()[0]);
         | 
| 80 | 
            +
                    let eol_char = eol_char.as_bytes()[0];
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                    let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                    let quote_char = if let Some(s) = quote_char {
         | 
| 85 | 
            +
                        if s.is_empty() {
         | 
| 86 | 
            +
                            None
         | 
| 87 | 
            +
                        } else {
         | 
| 88 | 
            +
                            Some(s.as_bytes()[0])
         | 
| 89 | 
            +
                        }
         | 
| 90 | 
            +
                    } else {
         | 
| 91 | 
            +
                        None
         | 
| 92 | 
            +
                    };
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
         | 
| 95 | 
            +
                        let fields = overwrite_dtype.iter().map(|(name, dtype)| {
         | 
| 96 | 
            +
                            let dtype = dtype.0.clone();
         | 
| 97 | 
            +
                            Field::new(name, dtype)
         | 
| 98 | 
            +
                        });
         | 
| 99 | 
            +
                        Schema::from(fields)
         | 
| 100 | 
            +
                    });
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                    let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
         | 
| 103 | 
            +
                        overwrite_dtype
         | 
| 104 | 
            +
                            .iter()
         | 
| 105 | 
            +
                            .map(|dt| dt.0.clone())
         | 
| 106 | 
            +
                            .collect::<Vec<_>>()
         | 
| 107 | 
            +
                    });
         | 
| 108 | 
            +
             | 
| 47 109 | 
             
                    let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
         | 
| 48 110 | 
             
                    let df = CsvReader::new(mmap_bytes_r)
         | 
| 111 | 
            +
                        .infer_schema(infer_schema_length)
         | 
| 49 112 | 
             
                        .has_header(has_header)
         | 
| 113 | 
            +
                        .with_n_rows(n_rows)
         | 
| 114 | 
            +
                        .with_delimiter(sep.as_bytes()[0])
         | 
| 115 | 
            +
                        .with_skip_rows(skip_rows)
         | 
| 116 | 
            +
                        .with_ignore_parser_errors(ignore_errors)
         | 
| 117 | 
            +
                        .with_projection(projection)
         | 
| 118 | 
            +
                        .with_rechunk(rechunk)
         | 
| 119 | 
            +
                        .with_chunk_size(chunk_size)
         | 
| 120 | 
            +
                        .with_encoding(encoding.0)
         | 
| 121 | 
            +
                        .with_columns(columns)
         | 
| 122 | 
            +
                        .with_n_threads(n_threads)
         | 
| 123 | 
            +
                        .with_path(path)
         | 
| 124 | 
            +
                        .with_dtypes(overwrite_dtype.as_ref())
         | 
| 125 | 
            +
                        .with_dtypes_slice(overwrite_dtype_slice.as_deref())
         | 
| 126 | 
            +
                        .low_memory(low_memory)
         | 
| 127 | 
            +
                        .with_comment_char(comment_char)
         | 
| 128 | 
            +
                        .with_null_values(null_values)
         | 
| 129 | 
            +
                        .with_parse_dates(parse_dates)
         | 
| 130 | 
            +
                        .with_quote_char(quote_char)
         | 
| 131 | 
            +
                        .with_end_of_line_char(eol_char)
         | 
| 132 | 
            +
                        .with_skip_rows_after_header(skip_rows_after_header)
         | 
| 133 | 
            +
                        .with_row_count(row_count)
         | 
| 134 | 
            +
                        .sample_size(sample_size)
         | 
| 50 135 | 
             
                        .finish()
         | 
| 51 136 | 
             
                        .map_err(RbPolarsErr::from)?;
         | 
| 52 137 | 
             
                    Ok(df.into())
         | 
| @@ -61,6 +146,27 @@ impl RbDataFrame { | |
| 61 146 | 
             
                        .map(|v| v.into())
         | 
| 62 147 | 
             
                }
         | 
| 63 148 |  | 
| 149 | 
            +
                pub fn read_ipc(
         | 
| 150 | 
            +
                    rb_f: Value,
         | 
| 151 | 
            +
                    columns: Option<Vec<String>>,
         | 
| 152 | 
            +
                    projection: Option<Vec<usize>>,
         | 
| 153 | 
            +
                    n_rows: Option<usize>,
         | 
| 154 | 
            +
                    row_count: Option<(String, IdxSize)>,
         | 
| 155 | 
            +
                    memory_map: bool,
         | 
| 156 | 
            +
                ) -> RbResult<Self> {
         | 
| 157 | 
            +
                    let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
         | 
| 158 | 
            +
                    let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
         | 
| 159 | 
            +
                    let df = IpcReader::new(mmap_bytes_r)
         | 
| 160 | 
            +
                        .with_projection(projection)
         | 
| 161 | 
            +
                        .with_columns(columns)
         | 
| 162 | 
            +
                        .with_n_rows(n_rows)
         | 
| 163 | 
            +
                        .with_row_count(row_count)
         | 
| 164 | 
            +
                        .memory_mapped(memory_map)
         | 
| 165 | 
            +
                        .finish()
         | 
| 166 | 
            +
                        .map_err(RbPolarsErr::from)?;
         | 
| 167 | 
            +
                    Ok(RbDataFrame::new(df))
         | 
| 168 | 
            +
                }
         | 
| 169 | 
            +
             | 
| 64 170 | 
             
                pub fn read_json(rb_f: Value) -> RbResult<Self> {
         | 
| 65 171 | 
             
                    // memmap the file first
         | 
| 66 172 | 
             
                    let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
         | 
| @@ -185,6 +291,28 @@ impl RbDataFrame { | |
| 185 291 | 
             
                    Ok(())
         | 
| 186 292 | 
             
                }
         | 
| 187 293 |  | 
| 294 | 
            +
                pub fn write_ipc(
         | 
| 295 | 
            +
                    &self,
         | 
| 296 | 
            +
                    rb_f: Value,
         | 
| 297 | 
            +
                    compression: Wrap<Option<IpcCompression>>,
         | 
| 298 | 
            +
                ) -> RbResult<()> {
         | 
| 299 | 
            +
                    if let Ok(s) = rb_f.try_convert::<String>() {
         | 
| 300 | 
            +
                        let f = std::fs::File::create(&s).unwrap();
         | 
| 301 | 
            +
                        IpcWriter::new(f)
         | 
| 302 | 
            +
                            .with_compression(compression.0)
         | 
| 303 | 
            +
                            .finish(&mut self.df.borrow_mut())
         | 
| 304 | 
            +
                            .map_err(RbPolarsErr::from)?;
         | 
| 305 | 
            +
                    } else {
         | 
| 306 | 
            +
                        let mut buf = get_file_like(rb_f, true)?;
         | 
| 307 | 
            +
             | 
| 308 | 
            +
                        IpcWriter::new(&mut buf)
         | 
| 309 | 
            +
                            .with_compression(compression.0)
         | 
| 310 | 
            +
                            .finish(&mut self.df.borrow_mut())
         | 
| 311 | 
            +
                            .map_err(RbPolarsErr::from)?;
         | 
| 312 | 
            +
                    }
         | 
| 313 | 
            +
                    Ok(())
         | 
| 314 | 
            +
                }
         | 
| 315 | 
            +
             | 
| 188 316 | 
             
                pub fn write_parquet(
         | 
| 189 317 | 
             
                    &self,
         | 
| 190 318 | 
             
                    rb_f: Value,
         | 
| @@ -240,11 +368,11 @@ impl RbDataFrame { | |
| 240 368 | 
             
                    Ok(())
         | 
| 241 369 | 
             
                }
         | 
| 242 370 |  | 
| 243 | 
            -
                pub fn dtypes(&self) -> Vec< | 
| 371 | 
            +
                pub fn dtypes(&self) -> Vec<Value> {
         | 
| 244 372 | 
             
                    self.df
         | 
| 245 373 | 
             
                        .borrow()
         | 
| 246 374 | 
             
                        .iter()
         | 
| 247 | 
            -
                        .map(|s| s.dtype(). | 
| 375 | 
            +
                        .map(|s| Wrap(s.dtype().clone()).into())
         | 
| 248 376 | 
             
                        .collect()
         | 
| 249 377 | 
             
                }
         | 
| 250 378 |  | 
| @@ -418,7 +546,7 @@ impl RbDataFrame { | |
| 418 546 | 
             
                        self.df.borrow().partition_by(groups)
         | 
| 419 547 | 
             
                    }
         | 
| 420 548 | 
             
                    .map_err(RbPolarsErr::from)?;
         | 
| 421 | 
            -
                    Ok(out.into_iter().map( | 
| 549 | 
            +
                    Ok(out.into_iter().map(RbDataFrame::new).collect())
         | 
| 422 550 | 
             
                }
         | 
| 423 551 |  | 
| 424 552 | 
             
                pub fn shift(&self, periods: i64) -> Self {
         | 
    
        data/ext/polars/src/error.rs
    CHANGED
    
    | @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            use magnus::exception::arg_error;
         | 
| 2 2 | 
             
            use magnus::Error;
         | 
| 3 | 
            +
            use polars::error::ArrowError;
         | 
| 3 4 | 
             
            use polars::prelude::PolarsError;
         | 
| 4 5 |  | 
| 5 6 | 
             
            pub struct RbPolarsErr {}
         | 
| @@ -10,6 +11,14 @@ impl RbPolarsErr { | |
| 10 11 | 
             
                    Error::runtime_error(e.to_string())
         | 
| 11 12 | 
             
                }
         | 
| 12 13 |  | 
| 14 | 
            +
                pub fn arrow(e: ArrowError) -> Error {
         | 
| 15 | 
            +
                    Error::runtime_error(e.to_string())
         | 
| 16 | 
            +
                }
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                pub fn io(e: std::io::Error) -> Error {
         | 
| 19 | 
            +
                    Error::runtime_error(e.to_string())
         | 
| 20 | 
            +
                }
         | 
| 21 | 
            +
             | 
| 13 22 | 
             
                pub fn other(message: String) -> Error {
         | 
| 14 23 | 
             
                    Error::runtime_error(message)
         | 
| 15 24 | 
             
                }
         | 
    
        data/ext/polars/src/file.rs
    CHANGED
    
    | @@ -1,18 +1,19 @@ | |
| 1 1 | 
             
            use magnus::{Error, RString, Value};
         | 
| 2 2 | 
             
            use polars::io::mmap::MmapBytesReader;
         | 
| 3 | 
            -
            use std::fs:: | 
| 3 | 
            +
            use std::fs::File;
         | 
| 4 4 | 
             
            use std::io::Cursor;
         | 
| 5 5 | 
             
            use std::path::PathBuf;
         | 
| 6 6 |  | 
| 7 7 | 
             
            use crate::RbResult;
         | 
| 8 8 |  | 
| 9 9 | 
             
            pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
         | 
| 10 | 
            -
                 | 
| 11 | 
            -
             | 
| 12 | 
            -
                     | 
| 13 | 
            -
             | 
| 14 | 
            -
                     | 
| 15 | 
            -
             | 
| 10 | 
            +
                let str_slice = f.try_convert::<PathBuf>()?;
         | 
| 11 | 
            +
                let f = if truncate {
         | 
| 12 | 
            +
                    File::create(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
         | 
| 13 | 
            +
                } else {
         | 
| 14 | 
            +
                    File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
         | 
| 15 | 
            +
                };
         | 
| 16 | 
            +
                Ok(f)
         | 
| 16 17 | 
             
            }
         | 
| 17 18 |  | 
| 18 19 | 
             
            pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
         |