RubyGems - polars-df - Versions diffs - 0.10.0 → 0.12.0 - Mend

polars-df 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -0
data/Cargo.lock +392 -351
data/README.md +6 -6
data/ext/polars/Cargo.toml +12 -7
data/ext/polars/src/batched_csv.rs +53 -52
data/ext/polars/src/conversion/any_value.rs +261 -0
data/ext/polars/src/conversion/chunked_array.rs +4 -4
data/ext/polars/src/conversion/mod.rs +60 -66
data/ext/polars/src/dataframe/construction.rs +184 -0
data/ext/polars/src/dataframe/export.rs +48 -0
data/ext/polars/src/dataframe/general.rs +597 -0
data/ext/polars/src/dataframe/io.rs +473 -0
data/ext/polars/src/dataframe/mod.rs +26 -0
data/ext/polars/src/error.rs +26 -4
data/ext/polars/src/expr/categorical.rs +0 -10
data/ext/polars/src/expr/datetime.rs +4 -8
data/ext/polars/src/expr/general.rs +129 -94
data/ext/polars/src/expr/mod.rs +2 -2
data/ext/polars/src/expr/rolling.rs +201 -77
data/ext/polars/src/expr/string.rs +11 -36
data/ext/polars/src/functions/eager.rs +10 -10
data/ext/polars/src/functions/lazy.rs +23 -21
data/ext/polars/src/functions/range.rs +69 -1
data/ext/polars/src/interop/mod.rs +1 -0
data/ext/polars/src/interop/numo/mod.rs +2 -0
data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
data/ext/polars/src/lazyframe/mod.rs +135 -136
data/ext/polars/src/lib.rs +94 -59
data/ext/polars/src/map/dataframe.rs +2 -2
data/ext/polars/src/map/lazy.rs +5 -25
data/ext/polars/src/map/series.rs +7 -1
data/ext/polars/src/rb_modules.rs +25 -1
data/ext/polars/src/series/aggregation.rs +49 -30
data/ext/polars/src/series/arithmetic.rs +21 -11
data/ext/polars/src/series/construction.rs +56 -38
data/ext/polars/src/series/export.rs +131 -49
data/ext/polars/src/series/mod.rs +32 -141
data/ext/polars/src/sql.rs +3 -1
data/lib/polars/array_expr.rb +4 -4
data/lib/polars/batched_csv_reader.rb +11 -5
data/lib/polars/cat_expr.rb +0 -36
data/lib/polars/cat_name_space.rb +0 -37
data/lib/polars/convert.rb +6 -1
data/lib/polars/data_frame.rb +176 -403
data/lib/polars/data_types.rb +1 -1
data/lib/polars/date_time_expr.rb +525 -572
data/lib/polars/date_time_name_space.rb +263 -460
data/lib/polars/dynamic_group_by.rb +5 -5
data/lib/polars/exceptions.rb +7 -0
data/lib/polars/expr.rb +1394 -243
data/lib/polars/expr_dispatch.rb +1 -1
data/lib/polars/functions/aggregation/horizontal.rb +8 -8
data/lib/polars/functions/as_datatype.rb +63 -40
data/lib/polars/functions/lazy.rb +63 -14
data/lib/polars/functions/lit.rb +1 -1
data/lib/polars/functions/range/date_range.rb +90 -57
data/lib/polars/functions/range/datetime_range.rb +149 -0
data/lib/polars/functions/range/int_range.rb +2 -2
data/lib/polars/functions/range/time_range.rb +141 -0
data/lib/polars/functions/repeat.rb +1 -1
data/lib/polars/functions/whenthen.rb +1 -1
data/lib/polars/group_by.rb +88 -23
data/lib/polars/io/avro.rb +24 -0
data/lib/polars/{io.rb → io/csv.rb} +299 -493
data/lib/polars/io/database.rb +73 -0
data/lib/polars/io/ipc.rb +247 -0
data/lib/polars/io/json.rb +29 -0
data/lib/polars/io/ndjson.rb +80 -0
data/lib/polars/io/parquet.rb +227 -0
data/lib/polars/lazy_frame.rb +143 -272
data/lib/polars/lazy_group_by.rb +100 -3
data/lib/polars/list_expr.rb +11 -11
data/lib/polars/list_name_space.rb +5 -1
data/lib/polars/rolling_group_by.rb +7 -9
data/lib/polars/series.rb +103 -187
data/lib/polars/string_expr.rb +78 -102
data/lib/polars/string_name_space.rb +5 -4
data/lib/polars/testing.rb +2 -2
data/lib/polars/utils/constants.rb +9 -0
data/lib/polars/utils/convert.rb +97 -0
data/lib/polars/utils/parse.rb +89 -0
data/lib/polars/utils/various.rb +76 -0
data/lib/polars/utils/wrap.rb +19 -0
data/lib/polars/utils.rb +8 -300
data/lib/polars/version.rb +1 -1
data/lib/polars/whenthen.rb +6 -6
data/lib/polars.rb +20 -1
metadata +28 -7
data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/ext/polars/src/dataframe.rs +0 -1208

data/lib/polars/lazy_frame.rb CHANGED Viewed

@@ -27,149 +27,6 @@ module Polars
       ldf
     end
-    # @private
-    def self._scan_csv(
-      file,
-      has_header: true,
-      sep: ",",
-      comment_char: nil,
-      quote_char: '"',
-      skip_rows: 0,
-      dtypes: nil,
-      null_values: nil,
-      ignore_errors: false,
-      cache: true,
-      with_column_names: nil,
-      infer_schema_length: 100,
-      n_rows: nil,
-      encoding: "utf8",
-      low_memory: false,
-      rechunk: true,
-      skip_rows_after_header: 0,
-      row_count_name: nil,
-      row_count_offset: 0,
-      parse_dates: false,
-      eol_char: "\n",
-      truncate_ragged_lines: true
-    )
-      dtype_list = nil
-      if !dtypes.nil?
-        dtype_list = []
-        dtypes.each do |k, v|
-          dtype_list << [k, Utils.rb_type_to_dtype(v)]
-        end
-      end
-      processed_null_values = Utils._process_null_values(null_values)
-      _from_rbldf(
-        RbLazyFrame.new_from_csv(
-          file,
-          sep,
-          has_header,
-          ignore_errors,
-          skip_rows,
-          n_rows,
-          cache,
-          dtype_list,
-          low_memory,
-          comment_char,
-          quote_char,
-          processed_null_values,
-          infer_schema_length,
-          with_column_names,
-          rechunk,
-          skip_rows_after_header,
-          encoding,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          parse_dates,
-          eol_char,
-          truncate_ragged_lines
-        )
-      )
-    end
-    # @private
-    def self._scan_parquet(
-      file,
-      n_rows: nil,
-      cache: true,
-      parallel: "auto",
-      rechunk: true,
-      row_count_name: nil,
-      row_count_offset: 0,
-      storage_options: nil,
-      low_memory: false,
-      use_statistics: true,
-      hive_partitioning: true
-    )
-      _from_rbldf(
-        RbLazyFrame.new_from_parquet(
-          file,
-          [],
-          n_rows,
-          cache,
-          parallel,
-          rechunk,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          low_memory,
-          use_statistics,
-          hive_partitioning,
-          nil
-        )
-      )
-    end
-    # @private
-    def self._scan_ipc(
-      file,
-      n_rows: nil,
-      cache: true,
-      rechunk: true,
-      row_count_name: nil,
-      row_count_offset: 0,
-      storage_options: nil,
-      memory_map: true
-    )
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      _from_rbldf(
-        RbLazyFrame.new_from_ipc(
-          file,
-          n_rows,
-          cache,
-          rechunk,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          memory_map
-        )
-      )
-    end
-    # @private
-    def self._scan_ndjson(
-      file,
-      infer_schema_length: nil,
-      batch_size: nil,
-      n_rows: nil,
-      low_memory: false,
-      rechunk: true,
-      row_count_name: nil,
-      row_count_offset: 0
-    )
-      _from_rbldf(
-        RbLazyFrame.new_from_ndjson(
-          file,
-          infer_schema_length,
-          batch_size,
-          n_rows,
-          low_memory,
-          rechunk,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset)
-        )
-      )
-    end
     # def self.from_json
     # end
@@ -181,7 +38,7 @@ module Polars
     # @return [LazyFrame]
     def self.read_json(file)
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       Utils.wrap_ldf(RbLazyFrame.read_json(file))
@@ -206,7 +63,7 @@ module Polars
     #   df.columns
     #   # => ["foo", "bar"]
     def columns
-      _ldf.columns
+      _ldf.collect_schema.keys
     end
     # Get dtypes of columns in LazyFrame.
@@ -224,7 +81,7 @@ module Polars
     #   lf.dtypes
     #   # => [Polars::Int64, Polars::Float64, Polars::String]
     def dtypes
-      _ldf.dtypes
+      _ldf.collect_schema.values
     end
     # Get the schema.
@@ -242,7 +99,7 @@ module Polars
     #   lf.schema
     #   # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
     def schema
-      _ldf.schema
+      _ldf.collect_schema
     end
     # Get the width of the LazyFrame.
@@ -254,7 +111,7 @@ module Polars
     #   lf.width
     #   # => 2
     def width
-      _ldf.width
+      _ldf.collect_schema.length
     end
     # Check if LazyFrame includes key.
@@ -288,7 +145,7 @@ module Polars
     # @return [nil]
     def write_json(file)
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       _ldf.write_json(file)
       nil
@@ -404,16 +261,23 @@ module Polars
     #   # │ 2   ┆ 7.0 ┆ b   │
     #   # │ 1   ┆ 6.0 ┆ a   │
     #   # └─────┴─────┴─────┘
-    def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
-      if by.is_a?(::String)
-        return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
-      end
-      if Utils.bool?(reverse)
-        reverse = [reverse]
+    def sort(by, *more_by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
+      if by.is_a?(::String) && more_by.empty?
+        return _from_rbldf(
+          _ldf.sort(
+            by, reverse, nulls_last, maintain_order, multithreaded
+          )
+        )
       end
-      by = Utils.selection_to_rbexpr_list(by)
-      _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
+      by = Utils.parse_into_list_of_expressions(by, *more_by)
+      reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
+      nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
+      _from_rbldf(
+        _ldf.sort_by_exprs(
+          by, reverse, nulls_last, maintain_order, multithreaded
+        )
+      )
     end
     # def profile
@@ -558,7 +422,7 @@ module Polars
       path,
       compression: "zstd",
       compression_level: nil,
-      statistics: false,
+      statistics: true,
       row_group_size: nil,
       data_pagesize_limit: nil,
       maintain_order: true,
@@ -578,6 +442,24 @@ module Polars
         no_optimization: no_optimization
       )
+      if statistics == true
+        statistics = {
+          min: true,
+          max: true,
+          distinct_count: false,
+          null_count: true
+        }
+      elsif statistics == false
+        statistics = {}
+      elsif statistics == "full"
+        statistics = {
+          min: true,
+          max: true,
+          distinct_count: true,
+          null_count: true
+        }
+      end
       lf.sink_parquet(
         path,
         compression,
@@ -732,6 +614,7 @@ module Polars
       datetime_format: nil,
       date_format: nil,
       time_format: nil,
+      float_scientific: nil,
       float_precision: nil,
       null_value: nil,
       quote_style: nil,
@@ -766,6 +649,7 @@ module Polars
         datetime_format,
         date_format,
         time_format,
+        float_scientific,
         float_precision,
         null_value,
         quote_style,
@@ -1050,7 +934,7 @@ module Polars
     def filter(predicate)
       _from_rbldf(
         _ldf.filter(
-          Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
+          Utils.parse_into_expression(predicate, str_as_lit: false)
         )
       )
     end
@@ -1137,7 +1021,7 @@ module Polars
     #   # ┌─────────┐
     #   # │ literal │
     #   # │ ---     │
-    #   # │ i64     │
+    #   # │ i32     │
     #   # ╞═════════╡
     #   # │ 0       │
     #   # │ 0       │
@@ -1146,7 +1030,7 @@ module Polars
     def select(*exprs, **named_exprs)
       structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
-      rbexprs = Utils.parse_as_list_of_expressions(
+      rbexprs = Utils.parse_into_list_of_expressions(
         *exprs, **named_exprs, __structify: structify
       )
       _from_rbldf(_ldf.select(rbexprs))
@@ -1154,12 +1038,14 @@ module Polars
     # Start a group by operation.
     #
-    # @param by [Object]
+    # @param by [Array]
     #   Column(s) to group by.
     # @param maintain_order [Boolean]
     #   Make sure that the order of the groups remain consistent. This is more
     #   expensive than a default group by.
-    #
+    # @param named_by [Hash]
+    #   Additional columns to group by, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
     # @return [LazyGroupBy]
     #
     # @example
@@ -1182,9 +1068,9 @@ module Polars
     #   # │ b   ┆ 11  │
     #   # │ c   ┆ 6   │
     #   # └─────┴─────┘
-    def group_by(by, maintain_order: false)
-      rbexprs_by = Utils.selection_to_rbexpr_list(by)
-      lgb = _ldf.group_by(rbexprs_by, maintain_order)
+    def group_by(*by, maintain_order: false, **named_by)
+      exprs = Utils.parse_into_list_of_expressions(*by, **named_by)
+      lgb = _ldf.group_by(exprs, maintain_order)
       LazyGroupBy.new(lgb)
     end
     alias_method :groupby, :group_by
@@ -1238,12 +1124,6 @@ module Polars
     #   Define whether the temporal window interval is closed or not.
     # @param by [Object]
     #   Also group by this column/these columns.
-    # @param check_sorted [Boolean]
-    #   When the `by` argument is given, polars can not check sortedness
-    #   by the metadata and has to do a full scan on the index column to
-    #   verify data is sorted. This is expensive. If you are sure the
-    #   data within the by groups is sorted, you can set this to `false`.
-    #   Doing so incorrectly will lead to incorrect output
     #
     # @return [LazyFrame]
     #
@@ -1285,21 +1165,20 @@ module Polars
       period:,
       offset: nil,
       closed: "right",
-      by: nil,
-      check_sorted: true
+      by: nil
     )
-      index_column = Utils.parse_as_expression(index_column)
+      index_column = Utils.parse_into_expression(index_column)
       if offset.nil?
-        offset = "-#{period}"
+        offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
       end
-      rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
-      period = Utils._timedelta_to_pl_duration(period)
-      offset = Utils._timedelta_to_pl_duration(offset)
-      lgb = _ldf.rolling(
-        index_column, period, offset, closed, rbexprs_by, check_sorted
+      rbexprs_by = (
+        !by.nil? ? Utils.parse_into_list_of_expressions(by) : []
       )
+      period = Utils.parse_as_duration_string(period)
+      offset = Utils.parse_as_duration_string(offset)
+      lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
       LazyGroupBy.new(lgb)
     end
     alias_method :group_by_rolling, :rolling
@@ -1367,22 +1246,18 @@ module Polars
     #   Define whether the temporal window interval is closed or not.
     # @param by [Object]
     #   Also group by this column/these columns
-    # @param check_sorted [Boolean]
-    #   When the `by` argument is given, polars can not check sortedness
-    #   by the metadata and has to do a full scan on the index column to
-    #   verify data is sorted. This is expensive. If you are sure the
-    #   data within the by groups is sorted, you can set this to `false`.
-    #   Doing so incorrectly will lead to incorrect output.
     #
     # @return [DataFrame]
     #
     # @example
     #   df = Polars::DataFrame.new(
     #     {
-    #       "time" => Polars.date_range(
+    #       "time" => Polars.datetime_range(
     #         DateTime.new(2021, 12, 16),
     #         DateTime.new(2021, 12, 16, 3),
-    #         "30m"
+    #         "30m",
+    #         time_unit: "us",
+    #         eager: true
     #       ),
     #       "n" => 0..6
     #     }
@@ -1449,16 +1324,16 @@ module Polars
     #   )
     #   # =>
     #   # shape: (4, 3)
-    #   # ┌─────────────────────┬────────────┬───────────────────────────────────┐
-    #   # │ time                ┆ time_count ┆ time_agg_list                     │
-    #   # │ ---                 ┆ ---        ┆ ---                               │
-    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                │
-    #   # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
-    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]             │
-    #   # └─────────────────────┴────────────┴───────────────────────────────────┘
+    #   # ┌─────────────────────┬────────────┬─────────────────────────────────┐
+    #   # │ time                ┆ time_count ┆ time_agg_list                   │
+    #   # │ ---                 ┆ ---        ┆ ---                             │
+    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]              │
+    #   # ╞═════════════════════╪════════════╪═════════════════════════════════╡
+    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-… │
+    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-… │
+    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-… │
+    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]           │
+    #   # └─────────────────────┴────────────┴─────────────────────────────────┘
     #
     # @example When closed="both" the time values at the window boundaries belong to 2 groups.
     #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
@@ -1481,10 +1356,12 @@ module Polars
     # @example Dynamic group bys can also be combined with grouping on normal keys.
     #   df = Polars::DataFrame.new(
     #     {
-    #       "time" => Polars.date_range(
+    #       "time" => Polars.datetime_range(
     #         DateTime.new(2021, 12, 16),
     #         DateTime.new(2021, 12, 16, 3),
-    #         "30m"
+    #         "30m",
+    #         time_unit: "us",
+    #         eager: true
     #       ),
     #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
     #     }
@@ -1548,14 +1425,13 @@ module Polars
       closed: "left",
       label: "left",
       by: nil,
-      start_by: "window",
-      check_sorted: true
+      start_by: "window"
     )
       if !truncate.nil?
         label = truncate ? "left" : "datapoint"
       end
-      index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
+      index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
       if offset.nil?
         offset = period.nil? ? "-#{every}" : "0ns"
       end
@@ -1564,13 +1440,13 @@ module Polars
         period = every
       end
-      period = Utils._timedelta_to_pl_duration(period)
-      offset = Utils._timedelta_to_pl_duration(offset)
-      every = Utils._timedelta_to_pl_duration(every)
+      period = Utils.parse_as_duration_string(period)
+      offset = Utils.parse_as_duration_string(offset)
+      every = Utils.parse_as_duration_string(every)
-      rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
+      rbexprs_by = by.nil? ? [] : Utils.parse_into_list_of_expressions(by)
       lgb = _ldf.group_by_dynamic(
-        index_column._rbexpr,
+        index_column,
         every,
         period,
         offset,
@@ -1578,8 +1454,7 @@ module Polars
         include_boundaries,
         closed,
         rbexprs_by,
-        start_by,
-        check_sorted
+        start_by
       )
       LazyGroupBy.new(lgb)
     end
@@ -1730,7 +1605,7 @@ module Polars
     # @param on Object
     #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
     #   None.
-    # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
+    # @param how ["inner", "left", "full", "semi", "anti", "cross"]
     #   Join strategy.
     # @param suffix [String]
     #   Suffix to append to columns with a duplicate name.
@@ -1772,7 +1647,7 @@ module Polars
     #   # └─────┴─────┴─────┴───────┘
     #
     # @example
-    #   df.join(other_df, on: "ham", how: "outer").collect
+    #   df.join(other_df, on: "ham", how: "full").collect
     #   # =>
     #   # shape: (4, 5)
     #   # ┌──────┬──────┬──────┬───────┬───────────┐
@@ -1839,7 +1714,9 @@ module Polars
         raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
       end
-      if how == "cross"
+      if how == "outer"
+        how = "full"
+      elsif how == "cross"
         return _from_rbldf(
           _ldf.join(
             other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
@@ -1848,12 +1725,12 @@ module Polars
       end
       if !on.nil?
-        rbexprs = Utils.selection_to_rbexpr_list(on)
+        rbexprs = Utils.parse_into_list_of_expressions(on)
         rbexprs_left = rbexprs
         rbexprs_right = rbexprs
       elsif !left_on.nil? && !right_on.nil?
-        rbexprs_left = Utils.selection_to_rbexpr_list(left_on)
-        rbexprs_right = Utils.selection_to_rbexpr_list(right_on)
+        rbexprs_left = Utils.parse_into_list_of_expressions(left_on)
+        rbexprs_right = Utils.parse_into_list_of_expressions(right_on)
       else
         raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
       end
@@ -1908,7 +1785,8 @@ module Polars
     #   # └─────┴──────┴───────┴─────┴──────┴───────┘
     def with_columns(*exprs, **named_exprs)
       structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
-      rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
+      rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs, __structify: structify)
       _from_rbldf(_ldf.with_columns(rbexprs))
     end
@@ -2069,9 +1947,9 @@ module Polars
     #   # └──────┴──────┘
     def shift(n, fill_value: nil)
       if !fill_value.nil?
-        fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
+        fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
       end
-      n = Utils.parse_as_expression(n)
+      n = Utils.parse_into_expression(n)
       _from_rbldf(_ldf.shift(n, fill_value))
     end
@@ -2236,16 +2114,16 @@ module Polars
     #   df.with_row_index.collect
     #   # =>
     #   # shape: (3, 3)
-    #   # ┌────────┬─────┬─────┐
-    #   # │ row_nr ┆ a   ┆ b   │
-    #   # │ ---    ┆ --- ┆ --- │
-    #   # │ u32    ┆ i64 ┆ i64 │
-    #   # ╞════════╪═════╪═════╡
-    #   # │ 0      ┆ 1   ┆ 2   │
-    #   # │ 1      ┆ 3   ┆ 4   │
-    #   # │ 2      ┆ 5   ┆ 6   │
-    #   # └────────┴─────┴─────┘
-    def with_row_index(name: "row_nr", offset: 0)
+    #   # ┌───────┬─────┬─────┐
+    #   # │ index ┆ a   ┆ b   │
+    #   # │ ---   ┆ --- ┆ --- │
+    #   # │ u32   ┆ i64 ┆ i64 │
+    #   # ╞═══════╪═════╪═════╡
+    #   # │ 0     ┆ 1   ┆ 2   │
+    #   # │ 1     ┆ 3   ┆ 4   │
+    #   # │ 2     ┆ 5   ┆ 6   │
+    #   # └───────┴─────┴─────┘
+    def with_row_index(name: "index", offset: 0)
       _from_rbldf(_ldf.with_row_index(name, offset))
     end
     alias_method :with_row_count, :with_row_index
@@ -2268,7 +2146,7 @@ module Polars
     #   # │ 3   ┆ 7   │
     #   # └─────┴─────┘
     def take_every(n)
-      select(Utils.col("*").take_every(n))
+      select(F.col("*").take_every(n))
     end
     # Fill null values using the specified value or strategy.
@@ -2311,7 +2189,7 @@ module Polars
     #   # └──────┴──────┘
     def fill_nan(fill_value)
       if !fill_value.is_a?(Expr)
-        fill_value = Utils.lit(fill_value)
+        fill_value = F.lit(fill_value)
       end
       _from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
     end
@@ -2502,8 +2380,8 @@ module Polars
     #   # │ 3.0 ┆ 1.0 │
     #   # └─────┴─────┘
     def quantile(quantile, interpolation: "nearest")
-      quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
-      _from_rbldf(_ldf.quantile(quantile._rbexpr, interpolation))
+      quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
+      _from_rbldf(_ldf.quantile(quantile, interpolation))
     end
     # Explode lists to long format.
@@ -2535,7 +2413,7 @@ module Polars
     #   # │ c       ┆ 8       │
     #   # └─────────┴─────────┘
     def explode(columns)
-      columns = Utils.selection_to_rbexpr_list(columns)
+      columns = Utils.parse_into_list_of_expressions(columns)
       _from_rbldf(_ldf.explode(columns))
     end
@@ -2598,35 +2476,35 @@ module Polars
     # Optionally leaves identifiers set.
     #
     # This function is useful to massage a DataFrame into a format where one or more
-    # columns are identifier variables (id_vars), while all other columns, considered
-    # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
+    # columns are identifier variables (index) while all other columns, considered
+    # measured variables (on), are "unpivoted" to the row axis leaving just
     # two non-identifier columns, 'variable' and 'value'.
     #
-    # @param id_vars [Object]
-    #   Columns to use as identifier variables.
-    # @param value_vars [Object]
-    #   Values to use as identifier variables.
-    #   If `value_vars` is empty all columns that are not in `id_vars` will be used.
+    # @param on [Object]
+    #   Column(s) or selector(s) to use as values variables; if `on`
+    #   is empty all columns that are not in `index` will be used.
+    # @param index [Object]
+    #   Column(s) or selector(s) to use as identifier variables.
     # @param variable_name [String]
-    #   Name to give to the `value` column. Defaults to "variable"
+    #   Name to give to the `variable` column. Defaults to "variable"
     # @param value_name [String]
     #   Name to give to the `value` column. Defaults to "value"
     # @param streamable [Boolean]
     #   Allow this node to run in the streaming engine.
-    #   If this runs in streaming, the output of the melt operation
+    #   If this runs in streaming, the output of the unpivot operation
     #   will not have a stable ordering.
     #
     # @return [LazyFrame]
     #
     # @example
-    #   df = Polars::DataFrame.new(
+    #   lf = Polars::LazyFrame.new(
     #     {
     #       "a" => ["x", "y", "z"],
     #       "b" => [1, 3, 5],
     #       "c" => [2, 4, 6]
     #     }
-    #   ).lazy
-    #   df.melt(id_vars: "a", value_vars: ["b", "c"]).collect
+    #   )
+    #   lf.unpivot(Polars::Selectors.numeric, index: "a").collect
     #   # =>
     #   # shape: (6, 3)
     #   # ┌─────┬──────────┬───────┐
@@ -2641,23 +2519,21 @@ module Polars
     #   # │ y   ┆ c        ┆ 4     │
     #   # │ z   ┆ c        ┆ 6     │
     #   # └─────┴──────────┴───────┘
-    def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
-      if value_vars.is_a?(::String)
-        value_vars = [value_vars]
-      end
-      if id_vars.is_a?(::String)
-        id_vars = [id_vars]
-      end
-      if value_vars.nil?
-        value_vars = []
-      end
-      if id_vars.nil?
-        id_vars = []
-      end
+    def unpivot(
+      on,
+      index: nil,
+      variable_name: nil,
+      value_name: nil,
+      streamable: true
+    )
+      on = on.nil? ? [] : Utils._expand_selectors(self, on)
+      index = index.nil? ? [] : Utils._expand_selectors(self, index)
       _from_rbldf(
-        _ldf.melt(id_vars, value_vars, value_name, variable_name, streamable)
+        _ldf.unpivot(on, index, value_name, variable_name, streamable)
       )
     end
+    alias_method :melt, :unpivot
     # def map
     # end
@@ -2688,7 +2564,7 @@ module Polars
     #   # │ 10.0 ┆ null ┆ 9.0      │
     #   # └──────┴──────┴──────────┘
     def interpolate
-      select(Utils.col("*").interpolate)
+      select(F.col("*").interpolate)
     end
     # Decompose a struct into its fields.
@@ -2795,24 +2671,19 @@ module Polars
     #
     # @param column [Object]
     #   Columns that are sorted
-    # @param more_columns [Object]
-    #   Additional columns that are sorted, specified as positional arguments.
     # @param descending [Boolean]
     #   Whether the columns are sorted in descending order.
     #
     # @return [LazyFrame]
     def set_sorted(
       column,
-      *more_columns,
       descending: false
     )
-      columns = Utils.selection_to_rbexpr_list(column)
-      if more_columns.any?
-        columns.concat(Utils.selection_to_rbexpr_list(more_columns))
+      if !Utils.strlike?(column)
+        msg = "expected a 'str' for argument 'column' in 'set_sorted'"
+        raise TypeError, msg
       end
-      with_columns(
-        columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
-      )
+      with_columns(F.col(column).set_sorted(descending: descending))
     end
     # TODO