polars-df 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +16 -0
 - data/Cargo.lock +360 -361
 - data/ext/polars/Cargo.toml +10 -7
 - data/ext/polars/src/batched_csv.rs +1 -1
 - data/ext/polars/src/conversion/any_value.rs +261 -0
 - data/ext/polars/src/conversion/chunked_array.rs +4 -4
 - data/ext/polars/src/conversion/mod.rs +51 -10
 - data/ext/polars/src/dataframe/construction.rs +6 -8
 - data/ext/polars/src/dataframe/general.rs +19 -29
 - data/ext/polars/src/dataframe/io.rs +43 -33
 - data/ext/polars/src/error.rs +26 -4
 - data/ext/polars/src/expr/categorical.rs +0 -10
 - data/ext/polars/src/expr/datetime.rs +4 -12
 - data/ext/polars/src/expr/general.rs +123 -110
 - data/ext/polars/src/expr/mod.rs +2 -2
 - data/ext/polars/src/expr/rolling.rs +17 -9
 - data/ext/polars/src/expr/string.rs +2 -6
 - data/ext/polars/src/functions/eager.rs +10 -10
 - data/ext/polars/src/functions/lazy.rs +21 -21
 - data/ext/polars/src/functions/range.rs +6 -12
 - data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
 - data/ext/polars/src/lazyframe/mod.rs +81 -98
 - data/ext/polars/src/lib.rs +55 -45
 - data/ext/polars/src/map/dataframe.rs +2 -2
 - data/ext/polars/src/rb_modules.rs +25 -1
 - data/ext/polars/src/series/aggregation.rs +4 -2
 - data/ext/polars/src/series/arithmetic.rs +21 -11
 - data/ext/polars/src/series/construction.rs +56 -38
 - data/ext/polars/src/series/export.rs +1 -1
 - data/ext/polars/src/series/mod.rs +31 -10
 - data/ext/polars/src/sql.rs +3 -1
 - data/lib/polars/array_expr.rb +4 -4
 - data/lib/polars/batched_csv_reader.rb +2 -2
 - data/lib/polars/cat_expr.rb +0 -36
 - data/lib/polars/cat_name_space.rb +0 -37
 - data/lib/polars/data_frame.rb +93 -101
 - data/lib/polars/data_types.rb +1 -1
 - data/lib/polars/date_time_expr.rb +525 -573
 - data/lib/polars/date_time_name_space.rb +263 -464
 - data/lib/polars/dynamic_group_by.rb +3 -3
 - data/lib/polars/exceptions.rb +3 -0
 - data/lib/polars/expr.rb +367 -330
 - data/lib/polars/expr_dispatch.rb +1 -1
 - data/lib/polars/functions/aggregation/horizontal.rb +8 -8
 - data/lib/polars/functions/as_datatype.rb +63 -40
 - data/lib/polars/functions/lazy.rb +63 -14
 - data/lib/polars/functions/lit.rb +1 -1
 - data/lib/polars/functions/range/date_range.rb +18 -77
 - data/lib/polars/functions/range/datetime_range.rb +4 -4
 - data/lib/polars/functions/range/int_range.rb +2 -2
 - data/lib/polars/functions/range/time_range.rb +4 -4
 - data/lib/polars/functions/repeat.rb +1 -1
 - data/lib/polars/functions/whenthen.rb +1 -1
 - data/lib/polars/io/csv.rb +8 -8
 - data/lib/polars/io/ipc.rb +3 -3
 - data/lib/polars/io/json.rb +13 -2
 - data/lib/polars/io/ndjson.rb +15 -4
 - data/lib/polars/io/parquet.rb +5 -4
 - data/lib/polars/lazy_frame.rb +120 -106
 - data/lib/polars/lazy_group_by.rb +1 -1
 - data/lib/polars/list_expr.rb +11 -11
 - data/lib/polars/list_name_space.rb +5 -1
 - data/lib/polars/rolling_group_by.rb +5 -7
 - data/lib/polars/series.rb +105 -189
 - data/lib/polars/string_expr.rb +42 -67
 - data/lib/polars/string_name_space.rb +5 -4
 - data/lib/polars/testing.rb +2 -2
 - data/lib/polars/utils/constants.rb +9 -0
 - data/lib/polars/utils/convert.rb +97 -0
 - data/lib/polars/utils/parse.rb +89 -0
 - data/lib/polars/utils/various.rb +76 -0
 - data/lib/polars/utils/wrap.rb +19 -0
 - data/lib/polars/utils.rb +4 -330
 - data/lib/polars/version.rb +1 -1
 - data/lib/polars/whenthen.rb +6 -6
 - data/lib/polars.rb +11 -0
 - metadata +9 -4
 - data/ext/polars/src/conversion/anyvalue.rs +0 -186
 
| 
         @@ -9,8 +9,8 @@ use crate::{RbDataFrame, RbResult, RbSeries}; 
     | 
|
| 
       9 
9 
     | 
    
         
             
            pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
         
     | 
| 
       10 
10 
     | 
    
         
             
                use polars_core::error::PolarsResult;
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
                let mut iter = seq. 
     | 
| 
       13 
     | 
    
         
            -
                let first = iter.next().unwrap() 
     | 
| 
      
 12 
     | 
    
         
            +
                let mut iter = seq.into_iter();
         
     | 
| 
      
 13 
     | 
    
         
            +
                let first = iter.next().unwrap();
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
15 
     | 
    
         
             
                let first_rdf = get_df(first)?;
         
     | 
| 
       16 
16 
     | 
    
         
             
                let identity_df = first_rdf.slice(0, 0);
         
     | 
| 
         @@ -18,7 +18,7 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> { 
     | 
|
| 
       18 
18 
     | 
    
         
             
                let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
         
     | 
| 
       19 
19 
     | 
    
         | 
| 
       20 
20 
     | 
    
         
             
                for item in iter {
         
     | 
| 
       21 
     | 
    
         
            -
                    let rdf = get_df(item 
     | 
| 
      
 21 
     | 
    
         
            +
                    let rdf = get_df(item)?;
         
     | 
| 
       22 
22 
     | 
    
         
             
                    rdfs.push(Ok(rdf));
         
     | 
| 
       23 
23 
     | 
    
         
             
                }
         
     | 
| 
       24 
24 
     | 
    
         | 
| 
         @@ -37,13 +37,13 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> { 
     | 
|
| 
       37 
37 
     | 
    
         
             
            }
         
     | 
| 
       38 
38 
     | 
    
         | 
| 
       39 
39 
     | 
    
         
             
            pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
         
     | 
| 
       40 
     | 
    
         
            -
                let mut iter = seq. 
     | 
| 
       41 
     | 
    
         
            -
                let first = iter.next().unwrap() 
     | 
| 
      
 40 
     | 
    
         
            +
                let mut iter = seq.into_iter();
         
     | 
| 
      
 41 
     | 
    
         
            +
                let first = iter.next().unwrap();
         
     | 
| 
       42 
42 
     | 
    
         | 
| 
       43 
43 
     | 
    
         
             
                let mut s = get_series(first)?;
         
     | 
| 
       44 
44 
     | 
    
         | 
| 
       45 
45 
     | 
    
         
             
                for res in iter {
         
     | 
| 
       46 
     | 
    
         
            -
                    let item = res 
     | 
| 
      
 46 
     | 
    
         
            +
                    let item = res;
         
     | 
| 
       47 
47 
     | 
    
         
             
                    let item = get_series(item)?;
         
     | 
| 
       48 
48 
     | 
    
         
             
                    s.append(&item).map_err(RbPolarsErr::from)?;
         
     | 
| 
       49 
49 
     | 
    
         
             
                }
         
     | 
| 
         @@ -52,8 +52,8 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> { 
     | 
|
| 
       52 
52 
     | 
    
         | 
| 
       53 
53 
     | 
    
         
             
            pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
         
     | 
| 
       54 
54 
     | 
    
         
             
                let mut dfs = Vec::new();
         
     | 
| 
       55 
     | 
    
         
            -
                for item in seq. 
     | 
| 
       56 
     | 
    
         
            -
                    dfs.push(get_df(item 
     | 
| 
      
 55 
     | 
    
         
            +
                for item in seq.into_iter() {
         
     | 
| 
      
 56 
     | 
    
         
            +
                    dfs.push(get_df(item)?);
         
     | 
| 
       57 
57 
     | 
    
         
             
                }
         
     | 
| 
       58 
58 
     | 
    
         
             
                let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
         
     | 
| 
       59 
59 
     | 
    
         
             
                Ok(df.into())
         
     | 
| 
         @@ -61,8 +61,8 @@ pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> { 
     | 
|
| 
       61 
61 
     | 
    
         | 
| 
       62 
62 
     | 
    
         
             
            pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
         
     | 
| 
       63 
63 
     | 
    
         
             
                let mut dfs = Vec::new();
         
     | 
| 
       64 
     | 
    
         
            -
                for item in seq. 
     | 
| 
       65 
     | 
    
         
            -
                    dfs.push(get_df(item 
     | 
| 
      
 64 
     | 
    
         
            +
                for item in seq.into_iter() {
         
     | 
| 
      
 65 
     | 
    
         
            +
                    dfs.push(get_df(item)?);
         
     | 
| 
       66 
66 
     | 
    
         
             
                }
         
     | 
| 
       67 
67 
     | 
    
         
             
                let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
         
     | 
| 
       68 
68 
     | 
    
         
             
                Ok(df.into())
         
     | 
| 
         @@ -58,7 +58,7 @@ pub fn rolling_cov( 
     | 
|
| 
       58 
58 
     | 
    
         
             
            pub fn arg_sort_by(
         
     | 
| 
       59 
59 
     | 
    
         
             
                by: RArray,
         
     | 
| 
       60 
60 
     | 
    
         
             
                descending: Vec<bool>,
         
     | 
| 
       61 
     | 
    
         
            -
                nulls_last: bool 
     | 
| 
      
 61 
     | 
    
         
            +
                nulls_last: Vec<bool>,
         
     | 
| 
       62 
62 
     | 
    
         
             
                multithreaded: bool,
         
     | 
| 
       63 
63 
     | 
    
         
             
                maintain_order: bool,
         
     | 
| 
       64 
64 
     | 
    
         
             
            ) -> RbResult<RbExpr> {
         
     | 
| 
         @@ -95,12 +95,12 @@ pub fn col(name: String) -> RbExpr { 
     | 
|
| 
       95 
95 
     | 
    
         | 
| 
       96 
96 
     | 
    
         
             
            pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
         
     | 
| 
       97 
97 
     | 
    
         
             
                let lfs = lfs
         
     | 
| 
       98 
     | 
    
         
            -
                    . 
     | 
| 
       99 
     | 
    
         
            -
                    .map( 
     | 
| 
      
 98 
     | 
    
         
            +
                    .into_iter()
         
     | 
| 
      
 99 
     | 
    
         
            +
                    .map(<&RbLazyFrame>::try_convert)
         
     | 
| 
       100 
100 
     | 
    
         
             
                    .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
         
     | 
| 
       101 
101 
     | 
    
         | 
| 
       102 
102 
     | 
    
         
             
                Ok(RArray::from_iter(lfs.iter().map(|lf| {
         
     | 
| 
       103 
     | 
    
         
            -
                    let df = lf.ldf.clone().collect().unwrap();
         
     | 
| 
      
 103 
     | 
    
         
            +
                    let df = lf.ldf.borrow().clone().collect().unwrap();
         
     | 
| 
       104 
104 
     | 
    
         
             
                    RbDataFrame::new(df)
         
     | 
| 
       105 
105 
     | 
    
         
             
                })))
         
     | 
| 
       106 
106 
     | 
    
         
             
            }
         
     | 
| 
         @@ -118,8 +118,8 @@ pub fn concat_lf( 
     | 
|
| 
       118 
118 
     | 
    
         
             
                let (seq, len) = get_rbseq(lfs)?;
         
     | 
| 
       119 
119 
     | 
    
         
             
                let mut lfs = Vec::with_capacity(len);
         
     | 
| 
       120 
120 
     | 
    
         | 
| 
       121 
     | 
    
         
            -
                for res in seq. 
     | 
| 
       122 
     | 
    
         
            -
                    let item = res 
     | 
| 
      
 121 
     | 
    
         
            +
                for res in seq.into_iter() {
         
     | 
| 
      
 122 
     | 
    
         
            +
                    let item = res;
         
     | 
| 
       123 
123 
     | 
    
         
             
                    let lf = get_lf(item)?;
         
     | 
| 
       124 
124 
     | 
    
         
             
                    lfs.push(lf);
         
     | 
| 
       125 
125 
     | 
    
         
             
                }
         
     | 
| 
         @@ -184,14 +184,9 @@ pub fn concat_lf_diagonal( 
     | 
|
| 
       184 
184 
     | 
    
         
             
                parallel: bool,
         
     | 
| 
       185 
185 
     | 
    
         
             
                to_supertypes: bool,
         
     | 
| 
       186 
186 
     | 
    
         
             
            ) -> RbResult<RbLazyFrame> {
         
     | 
| 
       187 
     | 
    
         
            -
                let iter = lfs. 
     | 
| 
      
 187 
     | 
    
         
            +
                let iter = lfs.into_iter();
         
     | 
| 
       188 
188 
     | 
    
         | 
| 
       189 
     | 
    
         
            -
                let lfs = iter
         
     | 
| 
       190 
     | 
    
         
            -
                    .map(|item| {
         
     | 
| 
       191 
     | 
    
         
            -
                        let item = item?;
         
     | 
| 
       192 
     | 
    
         
            -
                        get_lf(item)
         
     | 
| 
       193 
     | 
    
         
            -
                    })
         
     | 
| 
       194 
     | 
    
         
            -
                    .collect::<RbResult<Vec<_>>>()?;
         
     | 
| 
      
 189 
     | 
    
         
            +
                let lfs = iter.map(get_lf).collect::<RbResult<Vec<_>>>()?;
         
     | 
| 
       195 
190 
     | 
    
         | 
| 
       196 
191 
     | 
    
         
             
                let lf = dsl::functions::concat_lf_diagonal(
         
     | 
| 
       197 
192 
     | 
    
         
             
                    lfs,
         
     | 
| 
         @@ -206,17 +201,22 @@ pub fn concat_lf_diagonal( 
     | 
|
| 
       206 
201 
     | 
    
         
             
                Ok(lf.into())
         
     | 
| 
       207 
202 
     | 
    
         
             
            }
         
     | 
| 
       208 
203 
     | 
    
         | 
| 
       209 
     | 
    
         
            -
            pub fn dtype_cols(dtypes:  
     | 
| 
       210 
     | 
    
         
            -
                dsl::dtype_cols(dtypes).into()
         
     | 
| 
       211 
     | 
    
         
            -
            }
         
     | 
| 
       212 
     | 
    
         
            -
             
     | 
| 
       213 
     | 
    
         
            -
            pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
         
     | 
| 
      
 204 
     | 
    
         
            +
            pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
         
     | 
| 
       214 
205 
     | 
    
         
             
                let dtypes = dtypes
         
     | 
| 
       215 
     | 
    
         
            -
                    . 
     | 
| 
       216 
     | 
    
         
            -
                    .map( 
     | 
| 
      
 206 
     | 
    
         
            +
                    .into_iter()
         
     | 
| 
      
 207 
     | 
    
         
            +
                    .map(Wrap::<DataType>::try_convert)
         
     | 
| 
       217 
208 
     | 
    
         
             
                    .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
         
     | 
| 
       218 
209 
     | 
    
         
             
                let dtypes = vec_extract_wrapped(dtypes);
         
     | 
| 
       219 
     | 
    
         
            -
                Ok( 
     | 
| 
      
 210 
     | 
    
         
            +
                Ok(dsl::dtype_cols(dtypes).into())
         
     | 
| 
      
 211 
     | 
    
         
            +
            }
         
     | 
| 
      
 212 
     | 
    
         
            +
             
     | 
| 
      
 213 
     | 
    
         
            +
            pub fn index_cols(indices: Vec<i64>) -> RbExpr {
         
     | 
| 
      
 214 
     | 
    
         
            +
                if indices.len() == 1 {
         
     | 
| 
      
 215 
     | 
    
         
            +
                    dsl::nth(indices[0])
         
     | 
| 
      
 216 
     | 
    
         
            +
                } else {
         
     | 
| 
      
 217 
     | 
    
         
            +
                    dsl::index_cols(indices)
         
     | 
| 
      
 218 
     | 
    
         
            +
                }
         
     | 
| 
      
 219 
     | 
    
         
            +
                .into()
         
     | 
| 
       220 
220 
     | 
    
         
             
            }
         
     | 
| 
       221 
221 
     | 
    
         | 
| 
       222 
222 
     | 
    
         
             
            #[allow(clippy::too_many_arguments)]
         
     | 
| 
         @@ -27,33 +27,27 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT 
     | 
|
| 
       27 
27 
     | 
    
         
             
            pub fn date_range(
         
     | 
| 
       28 
28 
     | 
    
         
             
                start: &RbExpr,
         
     | 
| 
       29 
29 
     | 
    
         
             
                end: &RbExpr,
         
     | 
| 
       30 
     | 
    
         
            -
                 
     | 
| 
      
 30 
     | 
    
         
            +
                interval: String,
         
     | 
| 
       31 
31 
     | 
    
         
             
                closed: Wrap<ClosedWindow>,
         
     | 
| 
       32 
     | 
    
         
            -
                time_unit: Option<Wrap<TimeUnit>>,
         
     | 
| 
       33 
     | 
    
         
            -
                time_zone: Option<TimeZone>,
         
     | 
| 
       34 
32 
     | 
    
         
             
            ) -> RbExpr {
         
     | 
| 
       35 
33 
     | 
    
         
             
                let start = start.inner.clone();
         
     | 
| 
       36 
34 
     | 
    
         
             
                let end = end.inner.clone();
         
     | 
| 
       37 
     | 
    
         
            -
                let  
     | 
| 
      
 35 
     | 
    
         
            +
                let interval = Duration::parse(&interval);
         
     | 
| 
       38 
36 
     | 
    
         
             
                let closed = closed.0;
         
     | 
| 
       39 
     | 
    
         
            -
                 
     | 
| 
       40 
     | 
    
         
            -
                dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
         
     | 
| 
      
 37 
     | 
    
         
            +
                dsl::date_range(start, end, interval, closed).into()
         
     | 
| 
       41 
38 
     | 
    
         
             
            }
         
     | 
| 
       42 
39 
     | 
    
         | 
| 
       43 
40 
     | 
    
         
             
            pub fn date_ranges(
         
     | 
| 
       44 
41 
     | 
    
         
             
                start: &RbExpr,
         
     | 
| 
       45 
42 
     | 
    
         
             
                end: &RbExpr,
         
     | 
| 
       46 
     | 
    
         
            -
                 
     | 
| 
      
 43 
     | 
    
         
            +
                interval: String,
         
     | 
| 
       47 
44 
     | 
    
         
             
                closed: Wrap<ClosedWindow>,
         
     | 
| 
       48 
     | 
    
         
            -
                time_unit: Option<Wrap<TimeUnit>>,
         
     | 
| 
       49 
     | 
    
         
            -
                time_zone: Option<TimeZone>,
         
     | 
| 
       50 
45 
     | 
    
         
             
            ) -> RbExpr {
         
     | 
| 
       51 
46 
     | 
    
         
             
                let start = start.inner.clone();
         
     | 
| 
       52 
47 
     | 
    
         
             
                let end = end.inner.clone();
         
     | 
| 
       53 
     | 
    
         
            -
                let  
     | 
| 
      
 48 
     | 
    
         
            +
                let interval = Duration::parse(&interval);
         
     | 
| 
       54 
49 
     | 
    
         
             
                let closed = closed.0;
         
     | 
| 
       55 
     | 
    
         
            -
                 
     | 
| 
       56 
     | 
    
         
            -
                dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into()
         
     | 
| 
      
 50 
     | 
    
         
            +
                dsl::date_ranges(start, end, interval, closed).into()
         
     | 
| 
       57 
51 
     | 
    
         
             
            }
         
     | 
| 
       58 
52 
     | 
    
         | 
| 
       59 
53 
     | 
    
         
             
            pub fn datetime_range(
         
     | 
| 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
         
     | 
| 
      
 2 
     | 
    
         
            +
            use polars::series::BitRepr;
         
     | 
| 
       2 
3 
     | 
    
         
             
            use polars_core::prelude::*;
         
     | 
| 
       3 
4 
     | 
    
         | 
| 
       4 
5 
     | 
    
         
             
            use crate::error::RbPolarsErr;
         
     | 
| 
         @@ -23,7 +24,7 @@ impl RbSeries { 
     | 
|
| 
       23 
24 
     | 
    
         
             
                                .funcall("cast", (np_arr,))
         
     | 
| 
       24 
25 
     | 
    
         
             
                        }
         
     | 
| 
       25 
26 
     | 
    
         
             
                        dt if dt.is_numeric() => {
         
     | 
| 
       26 
     | 
    
         
            -
                            if s. 
     | 
| 
      
 27 
     | 
    
         
            +
                            if let Some(BitRepr::Large(_)) = s.bit_repr() {
         
     | 
| 
       27 
28 
     | 
    
         
             
                                let s = s.cast(&DataType::Float64).unwrap();
         
     | 
| 
       28 
29 
     | 
    
         
             
                                let ca = s.f64().unwrap();
         
     | 
| 
       29 
30 
     | 
    
         
             
                                // TODO make more efficient
         
     |