RubyGems - polars-df - Versions diffs - 0.7.0 → 0.9.0 - Mend

polars-df 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +41 -0
data/Cargo.lock +353 -237
data/Cargo.toml +0 -3
data/LICENSE.txt +1 -1
data/README.md +2 -2
data/ext/polars/Cargo.toml +17 -6
data/ext/polars/src/batched_csv.rs +6 -7
data/ext/polars/src/conversion/anyvalue.rs +185 -0
data/ext/polars/src/conversion/chunked_array.rs +140 -0
data/ext/polars/src/{conversion.rs → conversion/mod.rs} +268 -347
data/ext/polars/src/dataframe.rs +96 -116
data/ext/polars/src/expr/array.rs +74 -0
data/ext/polars/src/expr/categorical.rs +8 -1
data/ext/polars/src/expr/datetime.rs +22 -56
data/ext/polars/src/expr/general.rs +124 -37
data/ext/polars/src/expr/list.rs +52 -4
data/ext/polars/src/expr/meta.rs +48 -0
data/ext/polars/src/expr/rolling.rs +16 -10
data/ext/polars/src/expr/string.rs +68 -17
data/ext/polars/src/expr/struct.rs +8 -4
data/ext/polars/src/functions/aggregation.rs +6 -0
data/ext/polars/src/functions/lazy.rs +103 -48
data/ext/polars/src/functions/meta.rs +45 -1
data/ext/polars/src/functions/range.rs +5 -10
data/ext/polars/src/functions/string_cache.rs +14 -0
data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +166 -41
data/ext/polars/src/lib.rs +245 -187
data/ext/polars/src/map/dataframe.rs +1 -1
data/ext/polars/src/map/mod.rs +2 -2
data/ext/polars/src/map/series.rs +6 -6
data/ext/polars/src/object.rs +0 -30
data/ext/polars/src/on_startup.rs +32 -0
data/ext/polars/src/series/aggregation.rs +23 -0
data/ext/polars/src/series/construction.rs +1 -1
data/ext/polars/src/series/export.rs +2 -2
data/ext/polars/src/{series.rs → series/mod.rs} +45 -21
data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
data/ext/polars/src/utils.rs +1 -1
data/lib/polars/array_expr.rb +449 -0
data/lib/polars/array_name_space.rb +346 -0
data/lib/polars/cat_expr.rb +24 -0
data/lib/polars/cat_name_space.rb +75 -0
data/lib/polars/config.rb +2 -2
data/lib/polars/data_frame.rb +248 -108
data/lib/polars/data_types.rb +195 -29
data/lib/polars/date_time_expr.rb +41 -24
data/lib/polars/date_time_name_space.rb +12 -12
data/lib/polars/exceptions.rb +12 -1
data/lib/polars/expr.rb +1080 -195
data/lib/polars/functions/aggregation/horizontal.rb +246 -0
data/lib/polars/functions/aggregation/vertical.rb +282 -0
data/lib/polars/functions/as_datatype.rb +248 -0
data/lib/polars/functions/col.rb +47 -0
data/lib/polars/functions/eager.rb +182 -0
data/lib/polars/functions/lazy.rb +1280 -0
data/lib/polars/functions/len.rb +49 -0
data/lib/polars/functions/lit.rb +35 -0
data/lib/polars/functions/random.rb +16 -0
data/lib/polars/functions/range/date_range.rb +103 -0
data/lib/polars/functions/range/int_range.rb +51 -0
data/lib/polars/functions/repeat.rb +144 -0
data/lib/polars/functions/whenthen.rb +27 -0
data/lib/polars/functions.rb +29 -416
data/lib/polars/group_by.rb +3 -3
data/lib/polars/io.rb +21 -28
data/lib/polars/lazy_frame.rb +390 -76
data/lib/polars/list_expr.rb +152 -6
data/lib/polars/list_name_space.rb +102 -0
data/lib/polars/meta_expr.rb +175 -7
data/lib/polars/series.rb +557 -59
data/lib/polars/sql_context.rb +1 -1
data/lib/polars/string_cache.rb +75 -0
data/lib/polars/string_expr.rb +412 -96
data/lib/polars/string_name_space.rb +4 -4
data/lib/polars/struct_expr.rb +1 -1
data/lib/polars/struct_name_space.rb +1 -1
data/lib/polars/testing.rb +507 -0
data/lib/polars/utils.rb +64 -20
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +15 -2
metadata +40 -9
data/lib/polars/lazy_functions.rb +0 -1197

data/ext/polars/src/dataframe.rs CHANGED Viewed

@@ -6,12 +6,13 @@ use polars::frame::row::{rows_to_schema_supertypes, Row};
 use polars::frame::NullStrategy;
 use polars::io::avro::AvroCompression;
 use polars::io::mmap::ReaderBytes;
-use polars::io::RowCount;
+use polars::io::RowIndex;
 use polars::prelude::pivot::{pivot, pivot_stable};
 use polars::prelude::*;
 use polars_core::utils::try_get_supertype;
 use std::cell::RefCell;
 use std::io::{BufWriter, Cursor};
+use std::num::NonZeroUsize;
 use std::ops::Deref;
 use crate::conversion::*;
@@ -45,44 +46,51 @@ impl RbDataFrame {
     fn finish_from_rows(
         rows: Vec<Row>,
         infer_schema_length: Option<usize>,
-        schema_overwrite: Option<Schema>,
+        schema: Option<Schema>,
+        schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
     ) -> RbResult<Self> {
-        // object builder must be registered.
-        crate::object::register_object_builder();
+        // Object builder must be registered
+        crate::on_startup::register_object_builder();
-        let schema =
+        let mut final_schema =
             rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
                 .map_err(RbPolarsErr::from)?;
-        // replace inferred nulls with boolean
-        let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
-            DataType::Null => {
-                // fld.coerce(DataType::Boolean);
-                fld
-            }
-            DataType::Decimal(_, _) => {
-                fld.coerce(DataType::Decimal(None, None));
-                fld
+        // Erase scale from inferred decimals.
+        for dtype in final_schema.iter_dtypes_mut() {
+            if let DataType::Decimal(_, _) = dtype {
+                *dtype = DataType::Decimal(None, None)
             }
-            _ => fld,
-        });
-        let mut schema = Schema::from_iter(fields);
+        }
-        if let Some(schema_overwrite) = schema_overwrite {
-            for (i, (name, dtype)) in schema_overwrite.into_iter().enumerate() {
-                if let Some((name_, dtype_)) = schema.get_at_index_mut(i) {
+        // Integrate explicit/inferred schema.
+        if let Some(schema) = schema {
+            for (i, (name, dtype)) in schema.into_iter().enumerate() {
+                if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
                     *name_ = name;
-                    // if user sets dtype unknown, we use the inferred datatype
+                    // If schema dtype is Unknown, overwrite with inferred datatype.
                     if !matches!(dtype, DataType::Unknown) {
                         *dtype_ = dtype;
                     }
                 } else {
-                    schema.with_column(name, dtype);
+                    final_schema.with_column(name, dtype);
                 }
             }
         }
-        let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
+        // Optional per-field overrides; these supersede default/inferred dtypes.
+        if let Some(overrides) = schema_overrides_by_idx {
+            for (i, dtype) in overrides {
+                if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
+                    if !matches!(dtype, DataType::Unknown) {
+                        *dtype_ = dtype;
+                    }
+                }
+            }
+        }
+        let df =
+            DataFrame::from_rows_and_schema(&rows, &final_schema).map_err(RbPolarsErr::from)?;
         Ok(df.into())
     }
@@ -120,21 +128,20 @@ impl RbDataFrame {
         // TODO fix
         let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
         let low_memory = bool::try_convert(arguments[16])?;
-        let comment_char = Option::<String>::try_convert(arguments[17])?;
+        let comment_prefix = Option::<String>::try_convert(arguments[17])?;
         let quote_char = Option::<String>::try_convert(arguments[18])?;
         let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
         let try_parse_dates = bool::try_convert(arguments[20])?;
         let skip_rows_after_header = usize::try_convert(arguments[21])?;
-        let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
+        let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
         let sample_size = usize::try_convert(arguments[23])?;
         let eol_char = String::try_convert(arguments[24])?;
         // end arguments
         let null_values = null_values.map(|w| w.0);
-        let comment_char = comment_char.map(|s| s.as_bytes()[0]);
         let eol_char = eol_char.as_bytes()[0];
-        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
         let quote_char = if let Some(s) = quote_char {
             if s.is_empty() {
@@ -181,13 +188,13 @@ impl RbDataFrame {
             .with_dtypes(overwrite_dtype.map(Arc::new))
             .with_dtypes_slice(overwrite_dtype_slice.as_deref())
             .low_memory(low_memory)
-            .with_comment_char(comment_char)
+            .with_comment_prefix(comment_prefix.as_deref())
             .with_null_values(null_values)
             .with_try_parse_dates(try_parse_dates)
             .with_quote_char(quote_char)
             .with_end_of_line_char(eol_char)
             .with_skip_rows_after_header(skip_rows_after_header)
-            .with_row_count(row_count)
+            .with_row_index(row_index)
             .sample_size(sample_size)
             .finish()
             .map_err(RbPolarsErr::from)?;
@@ -201,19 +208,19 @@ impl RbDataFrame {
         projection: Option<Vec<usize>>,
         n_rows: Option<usize>,
         parallel: Wrap<ParallelStrategy>,
-        row_count: Option<(String, IdxSize)>,
+        row_index: Option<(String, IdxSize)>,
         low_memory: bool,
         use_statistics: bool,
         rechunk: bool,
     ) -> RbResult<Self> {
-        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
         let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
         let df = ParquetReader::new(mmap_bytes_r)
             .with_projection(projection)
             .with_columns(columns)
             .read_parallel(parallel.0)
             .with_n_rows(n_rows)
-            .with_row_count(row_count)
+            .with_row_index(row_index)
             .set_low_memory(low_memory)
             .use_statistics(use_statistics)
             .set_rechunk(rechunk)
@@ -227,16 +234,16 @@ impl RbDataFrame {
         columns: Option<Vec<String>>,
         projection: Option<Vec<usize>>,
         n_rows: Option<usize>,
-        row_count: Option<(String, IdxSize)>,
+        row_index: Option<(String, IdxSize)>,
         memory_map: bool,
     ) -> RbResult<Self> {
-        let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
+        let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
         let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
         let df = IpcReader::new(mmap_bytes_r)
             .with_projection(projection)
             .with_columns(columns)
             .with_n_rows(n_rows)
-            .with_row_count(row_count)
+            .with_row_index(row_index)
             .memory_mapped(memory_map)
             .finish()
             .map_err(RbPolarsErr::from)?;
@@ -297,12 +304,18 @@ impl RbDataFrame {
             Ok(df) => Ok(df.into()),
             // try arrow json reader instead
             // this is row oriented
-            Err(_) => {
-                let out = JsonReader::new(mmap_bytes_r)
-                    .with_json_format(JsonFormat::Json)
-                    .finish()
-                    .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
-                Ok(out.into())
+            Err(e) => {
+                let msg = format!("{e}");
+                if msg.contains("successful parse invalid data") {
+                    let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
+                    Err(e)
+                } else {
+                    let out = JsonReader::new(mmap_bytes_r)
+                        .with_json_format(JsonFormat::Json)
+                        .finish()
+                        .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
+                    Ok(out.into())
+                }
             }
         }
     }
@@ -347,7 +360,7 @@ impl RbDataFrame {
     pub fn read_rows(
         rb_rows: RArray,
         infer_schema_length: Option<usize>,
-        schema_overwrite: Option<Wrap<Schema>>,
+        schema: Option<Wrap<Schema>>,
     ) -> RbResult<Self> {
         let mut rows = Vec::with_capacity(rb_rows.len());
         for v in rb_rows.each() {
@@ -358,30 +371,34 @@ impl RbDataFrame {
             }
             rows.push(Row(row));
         }
-        Self::finish_from_rows(
-            rows,
-            infer_schema_length,
-            schema_overwrite.map(|wrap| wrap.0),
-        )
+        Self::finish_from_rows(rows, infer_schema_length, schema.map(|wrap| wrap.0), None)
     }
     pub fn read_hashes(
         dicts: Value,
         infer_schema_length: Option<usize>,
-        schema_overwrite: Option<Wrap<Schema>>,
+        schema: Option<Wrap<Schema>>,
+        schema_overrides: Option<Wrap<Schema>>,
     ) -> RbResult<Self> {
-        let (rows, mut names) = dicts_to_rows(&dicts, infer_schema_length.unwrap_or(50))?;
+        let mut schema_columns = PlIndexSet::new();
+        if let Some(s) = &schema {
+            schema_columns.extend(s.0.iter_names().map(|n| n.to_string()))
+        }
+        let (rows, names) = dicts_to_rows(&dicts, infer_schema_length, schema_columns)?;
-        // ensure the new names are used
-        if let Some(schema) = &schema_overwrite {
-            for (new_name, name) in schema.0.iter_names().zip(names.iter_mut()) {
-                *name = new_name.to_string();
+        let mut schema_overrides_by_idx: Vec<(usize, DataType)> = Vec::new();
+        if let Some(overrides) = schema_overrides {
+            for (idx, name) in names.iter().enumerate() {
+                if let Some(dtype) = overrides.0.get(name) {
+                    schema_overrides_by_idx.push((idx, dtype.clone()));
+                }
             }
         }
         let rbdf = Self::finish_from_rows(
             rows,
             infer_schema_length,
-            schema_overwrite.map(|wrap| wrap.0),
+            schema.map(|wrap| wrap.0),
+            Some(schema_overrides_by_idx),
         )?;
         unsafe {
@@ -422,13 +439,14 @@ impl RbDataFrame {
         include_header: bool,
         separator: u8,
         quote_char: u8,
-        batch_size: usize,
+        batch_size: Wrap<NonZeroUsize>,
         datetime_format: Option<String>,
         date_format: Option<String>,
         time_format: Option<String>,
         float_precision: Option<usize>,
         null_value: Option<String>,
     ) -> RbResult<()> {
+        let batch_size = batch_size.0;
         let null = null_value.unwrap_or_default();
         if let Ok(s) = String::try_convert(rb_f) {
@@ -504,7 +522,7 @@ impl RbDataFrame {
                 .get_columns()
                 .iter()
                 .map(|s| match s.dtype() {
-                    DataType::Object(_) => {
+                    DataType::Object(_, _) => {
                         let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
                         obj.unwrap().to_object()
                     }
@@ -523,7 +541,7 @@ impl RbDataFrame {
                     .get_columns()
                     .iter()
                     .map(|s| match s.dtype() {
-                        DataType::Object(_) => {
+                        DataType::Object(_, _) => {
                             let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
                             obj.unwrap().to_object()
                         }
@@ -785,16 +803,15 @@ impl RbDataFrame {
             .map(|s| RbSeries::new(s.clone()))
     }
-    pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
-        self.df.borrow().find_idx_by_name(&name)
+    pub fn get_column_index(&self, name: String) -> Option<usize> {
+        self.df.borrow().get_column_index(&name)
     }
-    // TODO remove clone
-    pub fn column(&self, name: String) -> RbResult<RbSeries> {
+    pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
         self.df
             .borrow()
             .column(&name)
-            .map(|v| v.clone().into())
+            .map(|s| RbSeries::new(s.clone()))
             .map_err(RbPolarsErr::from)
     }
@@ -828,18 +845,18 @@ impl RbDataFrame {
         Ok(())
     }
-    pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
+    pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
         self.df
             .borrow_mut()
-            .replace_at_idx(index, new_col.series.borrow().clone())
+            .replace_column(index, new_col.series.borrow().clone())
             .map_err(RbPolarsErr::from)?;
         Ok(())
     }
-    pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
+    pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
         self.df
             .borrow_mut()
-            .insert_at_idx(index, new_col.series.borrow().clone())
+            .insert_column(index, new_col.series.borrow().clone())
             .map_err(RbPolarsErr::from)?;
         Ok(())
     }
@@ -874,19 +891,19 @@ impl RbDataFrame {
         Ok(mask.into_series().into())
     }
-    pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
+    pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
         if null_equal {
-            self.df.borrow().frame_equal_missing(&other.df.borrow())
+            self.df.borrow().equals_missing(&other.df.borrow())
         } else {
-            self.df.borrow().frame_equal(&other.df.borrow())
+            self.df.borrow().equals(&other.df.borrow())
         }
     }
-    pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
+    pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
         let df = self
             .df
             .borrow()
-            .with_row_count(&name, offset)
+            .with_row_index(&name, offset)
             .map_err(RbPolarsErr::from)?;
         Ok(df.into())
     }
@@ -917,9 +934,9 @@ impl RbDataFrame {
     #[allow(clippy::too_many_arguments)]
     pub fn pivot_expr(
         &self,
-        values: Vec<String>,
         index: Vec<String>,
         columns: Vec<String>,
+        values: Option<Vec<String>>,
         maintain_order: bool,
         sort_columns: bool,
         aggregate_expr: Option<&RbExpr>,
@@ -932,9 +949,9 @@ impl RbDataFrame {
         let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
         let df = fun(
             &self.df.borrow(),
-            values,
             index,
             columns,
+            values,
             sort_columns,
             agg_expr,
             separator.as_deref(),
@@ -966,34 +983,6 @@ impl RbDataFrame {
         self.df.borrow().clone().lazy().into()
     }
-    pub fn max(&self) -> Self {
-        self.df.borrow().max().into()
-    }
-    pub fn min(&self) -> Self {
-        self.df.borrow().min().into()
-    }
-    pub fn sum(&self) -> Self {
-        self.df.borrow().sum().into()
-    }
-    pub fn mean(&self) -> Self {
-        self.df.borrow().mean().into()
-    }
-    pub fn std(&self, ddof: u8) -> Self {
-        self.df.borrow().std(ddof).into()
-    }
-    pub fn var(&self, ddof: u8) -> Self {
-        self.df.borrow().var(ddof).into()
-    }
-    pub fn median(&self) -> Self {
-        self.df.borrow().median().into()
-    }
     pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
         let s = self
             .df
@@ -1040,19 +1029,6 @@ impl RbDataFrame {
         Ok(s.map(|s| s.into()))
     }
-    pub fn quantile(
-        &self,
-        quantile: f64,
-        interpolation: Wrap<QuantileInterpolOptions>,
-    ) -> RbResult<Self> {
-        let df = self
-            .df
-            .borrow()
-            .quantile(quantile, interpolation.0)
-            .map_err(RbPolarsErr::from)?;
-        Ok(df.into())
-    }
     pub fn to_dummies(
         &self,
         columns: Option<Vec<String>>,
@@ -1124,7 +1100,7 @@ impl RbDataFrame {
                     .into_datetime(tu, tz)
                     .into_series()
             }
-            Some(DataType::Utf8) => {
+            Some(DataType::String) => {
                 apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
             }
             _ => return apply_lambda_unknown(df, lambda, inference_size),
@@ -1157,7 +1133,7 @@ impl RbDataFrame {
         };
         Ok(self
             .df
-            .borrow()
+            .borrow_mut()
             .transpose(keep_names_as.as_deref(), new_col_names)
             .map_err(RbPolarsErr::from)?
             .into())
@@ -1199,4 +1175,8 @@ impl RbDataFrame {
         let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
         Ok(df.into())
     }
+    pub fn clear(&self) -> Self {
+        self.df.borrow().clear().into()
+    }
 }

data/ext/polars/src/expr/array.rs CHANGED Viewed

@@ -1,3 +1,5 @@
+use polars::prelude::*;
 use crate::RbExpr;
 impl RbExpr {
@@ -12,4 +14,76 @@ impl RbExpr {
     pub fn array_sum(&self) -> Self {
         self.inner.clone().arr().sum().into()
     }
+    pub fn arr_unique(&self, maintain_order: bool) -> Self {
+        if maintain_order {
+            self.inner.clone().arr().unique_stable().into()
+        } else {
+            self.inner.clone().arr().unique().into()
+        }
+    }
+    pub fn arr_to_list(&self) -> Self {
+        self.inner.clone().arr().to_list().into()
+    }
+    pub fn arr_all(&self) -> Self {
+        self.inner.clone().arr().all().into()
+    }
+    pub fn arr_any(&self) -> Self {
+        self.inner.clone().arr().any().into()
+    }
+    pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self {
+        self.inner
+            .clone()
+            .arr()
+            .sort(SortOptions {
+                descending,
+                nulls_last,
+                ..Default::default()
+            })
+            .into()
+    }
+    pub fn arr_reverse(&self) -> Self {
+        self.inner.clone().arr().reverse().into()
+    }
+    pub fn arr_arg_min(&self) -> Self {
+        self.inner.clone().arr().arg_min().into()
+    }
+    pub fn arr_arg_max(&self) -> Self {
+        self.inner.clone().arr().arg_max().into()
+    }
+    pub fn arr_get(&self, index: &RbExpr) -> Self {
+        self.inner.clone().arr().get(index.inner.clone()).into()
+    }
+    pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
+        self.inner
+            .clone()
+            .arr()
+            .join(separator.inner.clone(), ignore_nulls)
+            .into()
+    }
+    pub fn arr_contains(&self, other: &RbExpr) -> Self {
+        self.inner
+            .clone()
+            .arr()
+            .contains(other.inner.clone())
+            .into()
+    }
+    pub fn arr_count_matches(&self, expr: &RbExpr) -> Self {
+        self.inner
+            .clone()
+            .arr()
+            .count_matches(expr.inner.clone())
+            .into()
+    }
 }

data/ext/polars/src/expr/categorical.rs CHANGED Viewed

@@ -5,6 +5,13 @@ use crate::RbExpr;
 impl RbExpr {
     pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
-        self.inner.clone().cat().set_ordering(ordering.0).into()
+        self.inner
+            .clone()
+            .cast(DataType::Categorical(None, ordering.0))
+            .into()
+    }
+    pub fn cat_get_categories(&self) -> Self {
+        self.inner.clone().cat().get_categories().into()
     }
 }

data/ext/polars/src/expr/datetime.rs CHANGED Viewed

@@ -61,6 +61,14 @@ impl RbExpr {
         self.inner.clone().dt().month_end().into()
     }
+    pub fn dt_base_utc_offset(&self) -> Self {
+        self.inner.clone().dt().base_utc_offset().into()
+    }
+    pub fn dt_dst_offset(&self) -> Self {
+        self.inner.clone().dt().dst_offset().into()
+    }
     pub fn dt_round(&self, every: String, offset: String) -> Self {
         self.inner.clone().dt().round(&every, &offset).into()
     }
@@ -149,73 +157,31 @@ impl RbExpr {
         self.inner.clone().dt().timestamp(tu.0).into()
     }
-    pub fn duration_days(&self) -> Self {
-        self.inner
-            .clone()
-            .map(
-                |s| Ok(Some(s.duration()?.days().into_series())),
-                GetOutput::from_type(DataType::Int64),
-            )
-            .into()
+    pub fn dt_total_days(&self) -> Self {
+        self.inner.clone().dt().total_days().into()
     }
-    pub fn duration_hours(&self) -> Self {
-        self.inner
-            .clone()
-            .map(
-                |s| Ok(Some(s.duration()?.hours().into_series())),
-                GetOutput::from_type(DataType::Int64),
-            )
-            .into()
+    pub fn dt_total_hours(&self) -> Self {
+        self.inner.clone().dt().total_hours().into()
     }
-    pub fn duration_minutes(&self) -> Self {
-        self.inner
-            .clone()
-            .map(
-                |s| Ok(Some(s.duration()?.minutes().into_series())),
-                GetOutput::from_type(DataType::Int64),
-            )
-            .into()
+    pub fn dt_total_minutes(&self) -> Self {
+        self.inner.clone().dt().total_minutes().into()
     }
-    pub fn duration_seconds(&self) -> Self {
-        self.inner
-            .clone()
-            .map(
-                |s| Ok(Some(s.duration()?.seconds().into_series())),
-                GetOutput::from_type(DataType::Int64),
-            )
-            .into()
+    pub fn dt_total_seconds(&self) -> Self {
+        self.inner.clone().dt().total_seconds().into()
     }
-    pub fn duration_milliseconds(&self) -> Self {
-        self.inner
-            .clone()
-            .map(
-                |s| Ok(Some(s.duration()?.milliseconds().into_series())),
-                GetOutput::from_type(DataType::Int64),
-            )
-            .into()
+    pub fn dt_total_milliseconds(&self) -> Self {
+        self.inner.clone().dt().total_milliseconds().into()
     }
-    pub fn duration_microseconds(&self) -> Self {
-        self.inner
-            .clone()
-            .map(
-                |s| Ok(Some(s.duration()?.microseconds().into_series())),
-                GetOutput::from_type(DataType::Int64),
-            )
-            .into()
+    pub fn dt_total_microseconds(&self) -> Self {
+        self.inner.clone().dt().total_microseconds().into()
     }
-    pub fn duration_nanoseconds(&self) -> Self {
-        self.inner
-            .clone()
-            .map(
-                |s| Ok(Some(s.duration()?.nanoseconds().into_series())),
-                GetOutput::from_type(DataType::Int64),
-            )
-            .into()
+    pub fn dt_total_nanoseconds(&self) -> Self {
+        self.inner.clone().dt().total_nanoseconds().into()
     }
 }