RubyGems - polars-df - Versions diffs - 0.10.0-x86_64-linux → 0.12.0-x86_64-linux - Mend

polars-df 0.10.0-x86_64-linux → 0.12.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -0
data/Cargo.lock +392 -351
data/LICENSE-THIRD-PARTY.txt +1125 -865
data/README.md +6 -6
data/lib/polars/3.1/polars.so +0 -0
data/lib/polars/3.2/polars.so +0 -0
data/lib/polars/3.3/polars.so +0 -0
data/lib/polars/array_expr.rb +4 -4
data/lib/polars/batched_csv_reader.rb +11 -5
data/lib/polars/cat_expr.rb +0 -36
data/lib/polars/cat_name_space.rb +0 -37
data/lib/polars/convert.rb +6 -1
data/lib/polars/data_frame.rb +176 -403
data/lib/polars/data_types.rb +1 -1
data/lib/polars/date_time_expr.rb +525 -572
data/lib/polars/date_time_name_space.rb +263 -460
data/lib/polars/dynamic_group_by.rb +5 -5
data/lib/polars/exceptions.rb +7 -0
data/lib/polars/expr.rb +1394 -243
data/lib/polars/expr_dispatch.rb +1 -1
data/lib/polars/functions/aggregation/horizontal.rb +8 -8
data/lib/polars/functions/as_datatype.rb +63 -40
data/lib/polars/functions/lazy.rb +63 -14
data/lib/polars/functions/lit.rb +1 -1
data/lib/polars/functions/range/date_range.rb +90 -57
data/lib/polars/functions/range/datetime_range.rb +149 -0
data/lib/polars/functions/range/int_range.rb +2 -2
data/lib/polars/functions/range/time_range.rb +141 -0
data/lib/polars/functions/repeat.rb +1 -1
data/lib/polars/functions/whenthen.rb +1 -1
data/lib/polars/group_by.rb +88 -23
data/lib/polars/io/avro.rb +24 -0
data/lib/polars/{io.rb → io/csv.rb} +299 -493
data/lib/polars/io/database.rb +73 -0
data/lib/polars/io/ipc.rb +247 -0
data/lib/polars/io/json.rb +29 -0
data/lib/polars/io/ndjson.rb +80 -0
data/lib/polars/io/parquet.rb +227 -0
data/lib/polars/lazy_frame.rb +143 -272
data/lib/polars/lazy_group_by.rb +100 -3
data/lib/polars/list_expr.rb +11 -11
data/lib/polars/list_name_space.rb +5 -1
data/lib/polars/rolling_group_by.rb +7 -9
data/lib/polars/series.rb +103 -187
data/lib/polars/string_expr.rb +78 -102
data/lib/polars/string_name_space.rb +5 -4
data/lib/polars/testing.rb +2 -2
data/lib/polars/utils/constants.rb +9 -0
data/lib/polars/utils/convert.rb +97 -0
data/lib/polars/utils/parse.rb +89 -0
data/lib/polars/utils/various.rb +76 -0
data/lib/polars/utils/wrap.rb +19 -0
data/lib/polars/utils.rb +8 -300
data/lib/polars/version.rb +1 -1
data/lib/polars/whenthen.rb +6 -6
data/lib/polars.rb +20 -1
metadata +17 -4

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -46,271 +46,6 @@ module Polars
       df
     end
-    # @private
-    def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
-      rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
-      _from_rbdf(rbdf)
-    end
-    # @private
-    def self._from_hash(data, schema: nil, schema_overrides: nil)
-      _from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
-    end
-    # def self._from_records
-    # end
-    # def self._from_numo
-    # end
-    # no self._from_arrow
-    # no self._from_pandas
-    # @private
-    def self._read_csv(
-      file,
-      has_header: true,
-      columns: nil,
-      sep: str = ",",
-      comment_char: nil,
-      quote_char: '"',
-      skip_rows: 0,
-      dtypes: nil,
-      null_values: nil,
-      ignore_errors: false,
-      parse_dates: false,
-      n_threads: nil,
-      infer_schema_length: 100,
-      batch_size: 8192,
-      n_rows: nil,
-      encoding: "utf8",
-      low_memory: false,
-      rechunk: true,
-      skip_rows_after_header: 0,
-      row_count_name: nil,
-      row_count_offset: 0,
-      sample_size: 1024,
-      eol_char: "\n",
-      truncate_ragged_lines: false
-    )
-      if Utils.pathlike?(file)
-        path = Utils.normalise_filepath(file)
-      else
-        path = nil
-        # if defined?(StringIO) && file.is_a?(StringIO)
-        #   file = file.string
-        # end
-      end
-      dtype_list = nil
-      dtype_slice = nil
-      if !dtypes.nil?
-        if dtypes.is_a?(Hash)
-          dtype_list = []
-          dtypes.each do|k, v|
-            dtype_list << [k, Utils.rb_type_to_dtype(v)]
-          end
-        elsif dtypes.is_a?(::Array)
-          dtype_slice = dtypes
-        else
-          raise ArgumentError, "dtype arg should be list or dict"
-        end
-      end
-      processed_null_values = Utils._process_null_values(null_values)
-      if columns.is_a?(::String)
-        columns = [columns]
-      end
-      if file.is_a?(::String) && file.include?("*")
-        dtypes_dict = nil
-        if !dtype_list.nil?
-          dtypes_dict = dtype_list.to_h
-        end
-        if !dtype_slice.nil?
-          raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
-        end
-        scan = Polars.scan_csv(
-          file,
-          has_header: has_header,
-          sep: sep,
-          comment_char: comment_char,
-          quote_char: quote_char,
-          skip_rows: skip_rows,
-          dtypes: dtypes_dict,
-          null_values: null_values,
-          ignore_errors: ignore_errors,
-          infer_schema_length: infer_schema_length,
-          n_rows: n_rows,
-          low_memory: low_memory,
-          rechunk: rechunk,
-          skip_rows_after_header: skip_rows_after_header,
-          row_count_name: row_count_name,
-          row_count_offset: row_count_offset,
-          eol_char: eol_char,
-          truncate_ragged_lines: truncate_ragged_lines
-        )
-        if columns.nil?
-          return _from_rbdf(scan.collect._df)
-        elsif is_str_sequence(columns, allow_str: false)
-          return _from_rbdf(scan.select(columns).collect._df)
-        else
-          raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
-        end
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(
-        RbDataFrame.read_csv(
-          file,
-          infer_schema_length,
-          batch_size,
-          has_header,
-          ignore_errors,
-          n_rows,
-          skip_rows,
-          projection,
-          sep,
-          rechunk,
-          columns,
-          encoding,
-          n_threads,
-          path,
-          dtype_list,
-          dtype_slice,
-          low_memory,
-          comment_char,
-          quote_char,
-          processed_null_values,
-          parse_dates,
-          skip_rows_after_header,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          sample_size,
-          eol_char,
-          truncate_ragged_lines
-        )
-      )
-    end
-    # @private
-    def self._read_parquet(
-      source,
-      columns: nil,
-      n_rows: nil,
-      parallel: "auto",
-      row_count_name: nil,
-      row_count_offset: 0,
-      low_memory: false,
-      use_statistics: true,
-      rechunk: true
-    )
-      if Utils.pathlike?(source)
-        source = Utils.normalise_filepath(source)
-      end
-      if columns.is_a?(::String)
-        columns = [columns]
-      end
-      if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
-        scan =
-          Polars.scan_parquet(
-            source,
-            n_rows: n_rows,
-            rechunk: true,
-            parallel: parallel,
-            row_count_name: row_count_name,
-            row_count_offset: row_count_offset,
-            low_memory: low_memory
-          )
-        if columns.nil?
-          return self._from_rbdf(scan.collect._df)
-        elsif Utils.is_str_sequence(columns, allow_str: false)
-          return self._from_rbdf(scan.select(columns).collect._df)
-        else
-          raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
-        end
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(
-        RbDataFrame.read_parquet(
-          source,
-          columns,
-          projection,
-          n_rows,
-          parallel,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          low_memory,
-          use_statistics,
-          rechunk
-        )
-      )
-    end
-    # @private
-    def self._read_avro(file, columns: nil, n_rows: nil)
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
-    end
-    # @private
-    def self._read_ipc(
-      file,
-      columns: nil,
-      n_rows: nil,
-      row_count_name: nil,
-      row_count_offset: 0,
-      rechunk: true,
-      memory_map: true
-    )
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      if columns.is_a?(::String)
-        columns = [columns]
-      end
-      if file.is_a?(::String) && file.include?("*")
-        raise Todo
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(
-        RbDataFrame.read_ipc(
-          file,
-          columns,
-          projection,
-          n_rows,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          memory_map
-        )
-      )
-    end
-    # @private
-    def self._read_json(file)
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      _from_rbdf(RbDataFrame.read_json(file))
-    end
-    # @private
-    def self._read_ndjson(file)
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      _from_rbdf(RbDataFrame.read_ndjson(file))
-    end
     # Get the shape of the DataFrame.
     #
     # @return [Array]
@@ -419,6 +154,13 @@ module Polars
       _df.dtypes
     end
+    # Get flags that are set on the columns of this DataFrame.
+    #
+    # @return [Hash]
+    def flags
+      columns.to_h { |name| [name, self[name].flags] }
+    end
     # Get the schema.
     #
     # @return [Hash]
@@ -845,7 +587,7 @@ module Polars
       row_oriented: false
     )
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       to_string_io = !file.nil? && file.is_a?(StringIO)
       if file.nil? || to_string_io
@@ -880,11 +622,11 @@ module Polars
     #       "bar" => [6, 7, 8]
     #     }
     #   )
-    #   df.write_ndjson()
+    #   df.write_ndjson
     #   # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
     def write_ndjson(file = nil)
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       to_string_io = !file.nil? && file.is_a?(StringIO)
       if file.nil? || to_string_io
@@ -991,7 +733,7 @@ module Polars
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       _df.write_csv(
@@ -1029,7 +771,7 @@ module Polars
         compression = "uncompressed"
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       _df.write_avro(file, compression)
@@ -1050,7 +792,7 @@ module Polars
         file.set_encoding(Encoding::BINARY)
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       if compression.nil?
@@ -1061,6 +803,47 @@ module Polars
       return_bytes ? file.string : nil
     end
+    # Write to Arrow IPC record batch stream.
+    #
+    # See "Streaming format" in https://arrow.apache.org/docs/python/ipc.html.
+    #
+    # @param file [Object]
+    #   Path or writable file-like object to which the IPC record batch data will
+    #   be written. If set to `None`, the output is returned as a BytesIO object.
+    # @param compression ['uncompressed', 'lz4', 'zstd']
+    #   Compression method. Defaults to "uncompressed".
+    #
+    # @return [Object]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
+    #   df.write_ipc_stream("new_file.arrow")
+    def write_ipc_stream(
+      file,
+      compression: "uncompressed"
+    )
+      return_bytes = file.nil?
+      if return_bytes
+        file = StringIO.new
+        file.set_encoding(Encoding::BINARY)
+      elsif Utils.pathlike?(file)
+        file = Utils.normalize_filepath(file)
+      end
+      if compression.nil?
+        compression = "uncompressed"
+      end
+      _df.write_ipc_stream(file, compression)
+      return_bytes ? file.string : nil
+    end
     # Write to Apache Parquet file.
     #
     # @param file [String, Pathname, StringIO]
@@ -1097,7 +880,25 @@ module Polars
         compression = "uncompressed"
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
+      end
+      if statistics == true
+        statistics = {
+          min: true,
+          max: true,
+          distinct_count: false,
+          null_count: true
+        }
+      elsif statistics == false
+        statistics = {}
+      elsif statistics == "full"
+        statistics = {
+          min: true,
+          max: true,
+          distinct_count: true,
+          null_count: true
+        }
       end
       _df.write_parquet(
@@ -1773,10 +1574,7 @@ module Polars
     #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
     def drop_nulls(subset: nil)
-      if subset.is_a?(::String)
-        subset = [subset]
-      end
-      _from_rbdf(_df.drop_nulls(subset))
+      lazy.drop_nulls(subset: subset).collect(_eager: true)
     end
     # Offers a structured way to apply a sequence of user-defined functions (UDFs).
@@ -1838,16 +1636,16 @@ module Polars
     #   df.with_row_index
     #   # =>
     #   # shape: (3, 3)
-    #   # ┌────────┬─────┬─────┐
-    #   # │ row_nr ┆ a   ┆ b   │
-    #   # │ ---    ┆ --- ┆ --- │
-    #   # │ u32    ┆ i64 ┆ i64 │
-    #   # ╞════════╪═════╪═════╡
-    #   # │ 0      ┆ 1   ┆ 2   │
-    #   # │ 1      ┆ 3   ┆ 4   │
-    #   # │ 2      ┆ 5   ┆ 6   │
-    #   # └────────┴─────┴─────┘
-    def with_row_index(name: "row_nr", offset: 0)
+    #   # ┌───────┬─────┬─────┐
+    #   # │ index ┆ a   ┆ b   │
+    #   # │ ---   ┆ --- ┆ --- │
+    #   # │ u32   ┆ i64 ┆ i64 │
+    #   # ╞═══════╪═════╪═════╡
+    #   # │ 0     ┆ 1   ┆ 2   │
+    #   # │ 1     ┆ 3   ┆ 4   │
+    #   # │ 2     ┆ 5   ┆ 6   │
+    #   # └───────┴─────┴─────┘
+    def with_row_index(name: "index", offset: 0)
       _from_rbdf(_df.with_row_index(name, offset))
     end
     alias_method :with_row_count, :with_row_index
@@ -1944,12 +1742,6 @@ module Polars
     #   Define whether the temporal window interval is closed or not.
     # @param by [Object]
     #   Also group by this column/these columns.
-    # @param check_sorted [Boolean]
-    #   When the `by` argument is given, polars can not check sortedness
-    #   by the metadata and has to do a full scan on the index column to
-    #   verify data is sorted. This is expensive. If you are sure the
-    #   data within the by groups is sorted, you can set this to `false`.
-    #   Doing so incorrectly will lead to incorrect output
     #
     # @return [RollingGroupBy]
     #
@@ -1965,7 +1757,7 @@ module Polars
     #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
     #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
     #   )
-    #   df.group_by_rolling(index_column: "dt", period: "2d").agg(
+    #   df.rolling(index_column: "dt", period: "2d").agg(
     #     [
     #       Polars.sum("a").alias("sum_a"),
     #       Polars.min("a").alias("min_a"),
@@ -1986,17 +1778,17 @@ module Polars
     #   # │ 2020-01-03 19:45:32 ┆ 11    ┆ 2     ┆ 9     │
     #   # │ 2020-01-08 23:16:43 ┆ 1     ┆ 1     ┆ 1     │
     #   # └─────────────────────┴───────┴───────┴───────┘
-    def group_by_rolling(
+    def rolling(
       index_column:,
       period:,
       offset: nil,
       closed: "right",
-      by: nil,
-      check_sorted: true
+      by: nil
     )
-      RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
+      RollingGroupBy.new(self, index_column, period, offset, closed, by)
     end
-    alias_method :groupby_rolling, :group_by_rolling
+    alias_method :groupby_rolling, :rolling
+    alias_method :group_by_rolling, :rolling
     # Group based on a time value (or index value of type `:i32`, `:i64`).
     #
@@ -2066,10 +1858,12 @@ module Polars
     # @example
     #   df = Polars::DataFrame.new(
     #     {
-    #       "time" => Polars.date_range(
+    #       "time" => Polars.datetime_range(
     #         DateTime.new(2021, 12, 16),
     #         DateTime.new(2021, 12, 16, 3),
-    #         "30m"
+    #         "30m",
+    #         time_unit: "us",
+    #         eager: true
     #       ),
     #       "n" => 0..6
     #     }
@@ -2136,16 +1930,16 @@ module Polars
     #   )
     #   # =>
     #   # shape: (4, 3)
-    #   # ┌─────────────────────┬────────────┬───────────────────────────────────┐
-    #   # │ time                ┆ time_count ┆ time_agg_list                     │
-    #   # │ ---                 ┆ ---        ┆ ---                               │
-    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                │
-    #   # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
-    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]             │
-    #   # └─────────────────────┴────────────┴───────────────────────────────────┘
+    #   # ┌─────────────────────┬────────────┬─────────────────────────────────┐
+    #   # │ time                ┆ time_count ┆ time_agg_list                   │
+    #   # │ ---                 ┆ ---        ┆ ---                             │
+    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]              │
+    #   # ╞═════════════════════╪════════════╪═════════════════════════════════╡
+    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-… │
+    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-… │
+    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-… │
+    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]           │
+    #   # └─────────────────────┴────────────┴─────────────────────────────────┘
     #
     # @example When closed="both" the time values at the window boundaries belong to 2 groups.
     #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
@@ -2168,10 +1962,12 @@ module Polars
     # @example Dynamic group bys can also be combined with grouping on normal keys.
     #   df = Polars::DataFrame.new(
     #     {
-    #       "time" => Polars.date_range(
+    #       "time" => Polars.datetime_range(
     #         DateTime.new(2021, 12, 16),
     #         DateTime.new(2021, 12, 16, 3),
-    #         "30m"
+    #         "30m",
+    #         time_unit: "us",
+    #         eager: true
     #       ),
     #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
     #     }
@@ -2258,8 +2054,6 @@ module Polars
     #   Note that this column has to be sorted for the output to make sense.
     # @param every [String]
     #   interval will start 'every' duration
-    # @param offset [String]
-    #   change the start of the date_range by this offset.
     # @param by [Object]
     #   First group by these columns and then upsample for every group
     # @param maintain_order [Boolean]
@@ -2319,7 +2113,6 @@ module Polars
     def upsample(
       time_column:,
       every:,
-      offset: nil,
       by: nil,
       maintain_order: false
     )
@@ -2329,15 +2122,11 @@ module Polars
       if by.is_a?(::String)
         by = [by]
       end
-      if offset.nil?
-        offset = "0ns"
-      end
-      every = Utils._timedelta_to_pl_duration(every)
-      offset = Utils._timedelta_to_pl_duration(offset)
+      every = Utils.parse_as_duration_string(every)
       _from_rbdf(
-        _df.upsample(by, time_column, every, offset, maintain_order)
+        _df.upsample(by, time_column, every, maintain_order)
       )
     end
@@ -2484,7 +2273,7 @@ module Polars
     #   Name(s) of the right join column(s).
     # @param on [Object]
     #   Name(s) of the join columns in both DataFrames.
-    # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
+    # @param how ["inner", "left", "full", "semi", "anti", "cross"]
     #   Join strategy.
     # @param suffix [String]
     #   Suffix to append to columns with a duplicate name.
@@ -2520,7 +2309,7 @@ module Polars
     #   # └─────┴─────┴─────┴───────┘
     #
     # @example
-    #   df.join(other_df, on: "ham", how: "outer")
+    #   df.join(other_df, on: "ham", how: "full")
     #   # =>
     #   # shape: (4, 5)
     #   # ┌──────┬──────┬──────┬───────┬───────────┐
@@ -2620,7 +2409,7 @@ module Polars
     #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [-1, 5, 8]})
     #
     # @example Return a DataFrame by mapping each row to a tuple:
-    #   df.apply { |t| [t[0] * 2, t[1] * 3] }
+    #   df.map_rows { |t| [t[0] * 2, t[1] * 3] }
     #   # =>
     #   # shape: (3, 2)
     #   # ┌──────────┬──────────┐
@@ -2634,7 +2423,7 @@ module Polars
     #   # └──────────┴──────────┘
     #
     # @example Return a Series by mapping each row to a scalar:
-    #   df.apply { |t| t[0] * 2 + t[1] }
+    #   df.map_rows { |t| t[0] * 2 + t[1] }
     #   # =>
     #   # shape: (3, 1)
     #   # ┌───────┐
@@ -2646,14 +2435,15 @@ module Polars
     #   # │ 9     │
     #   # │ 14    │
     #   # └───────┘
-    def apply(return_dtype: nil, inference_size: 256, &f)
-      out, is_df = _df.apply(f, return_dtype, inference_size)
+    def map_rows(return_dtype: nil, inference_size: 256, &f)
+      out, is_df = _df.map_rows(f, return_dtype, inference_size)
       if is_df
         _from_rbdf(out)
       else
         _from_rbdf(Utils.wrap_s(out).to_frame._df)
       end
     end
+    alias_method :apply, :map_rows
     # Return a new DataFrame with the column added or replaced.
     #
@@ -3176,9 +2966,9 @@ module Polars
     #   arguments contains multiple columns as well
     # @param index [Object]
     #   One or multiple keys to group by
-    # @param columns [Object]
+    # @param on [Object]
     #   Columns whose values will be used as the header of the output DataFrame
-    # @param aggregate_fn ["first", "sum", "max", "min", "mean", "median", "last", "count"]
+    # @param aggregate_function ["first", "sum", "max", "min", "mean", "median", "last", "count"]
     #   A predefined aggregate function str or an expression.
     # @param maintain_order [Object]
     #   Sort the grouped keys so that the output order is predictable.
@@ -3190,66 +2980,62 @@ module Polars
     # @example
     #   df = Polars::DataFrame.new(
     #     {
-    #       "foo" => ["one", "one", "one", "two", "two", "two"],
-    #       "bar" => ["A", "B", "C", "A", "B", "C"],
+    #       "foo" => ["one", "one", "two", "two", "one", "two"],
+    #       "bar" => ["y", "y", "y", "x", "x", "x"],
     #       "baz" => [1, 2, 3, 4, 5, 6]
     #     }
     #   )
-    #   df.pivot(values: "baz", index: "foo", columns: "bar")
+    #   df.pivot("bar", index: "foo", values: "baz", aggregate_function: "sum")
     #   # =>
-    #   # shape: (2, 4)
-    #   # ┌─────┬─────┬─────┬─────┐
-    #   # │ foo ┆ A   ┆ B   ┆ C   │
-    #   # │ --- ┆ --- ┆ --- ┆ --- │
-    #   # │ str ┆ i64 ┆ i64 ┆ i64 │
-    #   # ╞═════╪═════╪═════╪═════╡
-    #   # │ one ┆ 1   ┆ 2   ┆ 3   │
-    #   # │ two ┆ 4   ┆ 5   ┆ 6   │
-    #   # └─────┴─────┴─────┴─────┘
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ y   ┆ x   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ one ┆ 3   ┆ 5   │
+    #   # │ two ┆ 3   ┆ 10  │
+    #   # └─────┴─────┴─────┘
     def pivot(
-      values:,
-      index:,
-      columns:,
-      aggregate_fn: "first",
+      on,
+      index: nil,
+      values: nil,
+      aggregate_function: nil,
       maintain_order: true,
       sort_columns: false,
       separator: "_"
     )
-      if values.is_a?(::String)
-        values = [values]
-      end
-      if index.is_a?(::String)
-        index = [index]
-      end
-      if columns.is_a?(::String)
-        columns = [columns]
+      index = Utils._expand_selectors(self, index)
+      on = Utils._expand_selectors(self, on)
+      if !values.nil?
+        values = Utils._expand_selectors(self, values)
       end
-      if aggregate_fn.is_a?(::String)
-        case aggregate_fn
+      if aggregate_function.is_a?(::String)
+        case aggregate_function
         when "first"
-          aggregate_expr = Polars.element.first._rbexpr
+          aggregate_expr = F.element.first._rbexpr
         when "sum"
-          aggregate_expr = Polars.element.sum._rbexpr
+          aggregate_expr = F.element.sum._rbexpr
         when "max"
-          aggregate_expr = Polars.element.max._rbexpr
+          aggregate_expr = F.element.max._rbexpr
         when "min"
-          aggregate_expr = Polars.element.min._rbexpr
+          aggregate_expr = F.element.min._rbexpr
         when "mean"
-          aggregate_expr = Polars.element.mean._rbexpr
+          aggregate_expr = F.element.mean._rbexpr
         when "median"
-          aggregate_expr = Polars.element.median._rbexpr
+          aggregate_expr = F.element.median._rbexpr
         when "last"
-          aggregate_expr = Polars.element.last._rbexpr
+          aggregate_expr = F.element.last._rbexpr
         when "len"
-          aggregate_expr = Polars.len._rbexpr
+          aggregate_expr = F.len._rbexpr
         when "count"
           warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
-          aggregate_expr = Polars.len._rbexpr
+          aggregate_expr = F.len._rbexpr
         else
           raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
         end
-      elsif aggregate_fn.nil?
+      elsif aggregate_function.nil?
         aggregate_expr = nil
       else
         aggregate_expr = aggregate_function._rbexpr
@@ -3257,8 +3043,8 @@ module Polars
       _from_rbdf(
         _df.pivot_expr(
+          on,
           index,
-          columns,
           values,
           maintain_order,
           sort_columns,
@@ -3273,18 +3059,18 @@ module Polars
     # Optionally leaves identifiers set.
     #
     # This function is useful to massage a DataFrame into a format where one or more
-    # columns are identifier variables (id_vars), while all other columns, considered
-    # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
+    # columns are identifier variables (index) while all other columns, considered
+    # measured variables (on), are "unpivoted" to the row axis leaving just
     # two non-identifier columns, 'variable' and 'value'.
     #
-    # @param id_vars [Object]
-    #   Columns to use as identifier variables.
-    # @param value_vars [Object]
-    #   Values to use as identifier variables.
-    #   If `value_vars` is empty all columns that are not in `id_vars` will be used.
-    # @param variable_name [String]
-    #   Name to give to the `value` column. Defaults to "variable"
-    # @param value_name [String]
+    # @param on [Object]
+    #   Column(s) or selector(s) to use as values variables; if `on`
+    #   is empty all columns that are not in `index` will be used.
+    # @param index [Object]
+    #   Column(s) or selector(s) to use as identifier variables.
+    # @param variable_name [Object]
+    #   Name to give to the `variable` column. Defaults to "variable"
+    # @param value_name [Object]
     #   Name to give to the `value` column. Defaults to "value"
     #
     # @return [DataFrame]
@@ -3297,7 +3083,7 @@ module Polars
     #       "c" => [2, 4, 6]
     #     }
     #   )
-    #   df.melt(id_vars: "a", value_vars: ["b", "c"])
+    #   df.unpivot(Polars::Selectors.numeric, index: "a")
     #   # =>
     #   # shape: (6, 3)
     #   # ┌─────┬──────────┬───────┐
@@ -3312,23 +3098,13 @@ module Polars
     #   # │ y   ┆ c        ┆ 4     │
     #   # │ z   ┆ c        ┆ 6     │
     #   # └─────┴──────────┴───────┘
-    def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
-      if value_vars.is_a?(::String)
-        value_vars = [value_vars]
-      end
-      if id_vars.is_a?(::String)
-        id_vars = [id_vars]
-      end
-      if value_vars.nil?
-        value_vars = []
-      end
-      if id_vars.nil?
-        id_vars = []
-      end
-      _from_rbdf(
-        _df.melt(id_vars, value_vars, value_name, variable_name)
-      )
+    def unpivot(on, index: nil, variable_name: nil, value_name: nil)
+      on = on.nil? ? [] : Utils._expand_selectors(self, on)
+      index = index.nil? ? [] : Utils._expand_selectors(self, index)
+      _from_rbdf(_df.unpivot(on, index, value_name, variable_name))
     end
+    alias_method :melt, :unpivot
     # Unstack a long table to a wide form without doing an aggregation.
     #
@@ -3774,7 +3550,7 @@ module Polars
     #   # ┌─────────┐
     #   # │ literal │
     #   # │ ---     │
-    #   # │ i64     │
+    #   # │ i32     │
     #   # ╞═════════╡
     #   # │ 0       │
     #   # │ 0       │
@@ -4362,7 +4138,7 @@ module Polars
       end
       if subset.is_a?(::Array) && subset.length == 1
-        expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
+        expr = Utils.wrap_expr(Utils.parse_into_expression(subset[0], str_as_lit: false))
       else
         struct_fields = subset.nil? ? Polars.all : subset
         expr = Polars.struct(struct_fields)
@@ -4780,7 +4556,7 @@ module Polars
     #   # │ 3   ┆ 7   │
     #   # └─────┴─────┘
     def gather_every(n, offset = 0)
-      select(Utils.col("*").gather_every(n, offset))
+      select(F.col("*").gather_every(n, offset))
     end
     alias_method :take_every, :gather_every
@@ -4850,7 +4626,7 @@ module Polars
     #   # │ 10.0 ┆ null ┆ 9.0      │
     #   # └──────┴──────┴──────────┘
     def interpolate
-      select(Utils.col("*").interpolate)
+      select(F.col("*").interpolate)
     end
     # Check if the dataframe is empty.
@@ -4986,19 +4762,16 @@ module Polars
     #
     # @param column [Object]
     #   Columns that are sorted
-    # @param more_columns [Object]
-    #   Additional columns that are sorted, specified as positional arguments.
     # @param descending [Boolean]
     #   Whether the columns are sorted in descending order.
     #
     # @return [DataFrame]
     def set_sorted(
       column,
-      *more_columns,
       descending: false
     )
       lazy
-        .set_sorted(column, *more_columns, descending: descending)
+        .set_sorted(column, descending: descending)
         .collect(no_optimization: true)
     end
@@ -5255,7 +5028,7 @@ module Polars
       elsif data[0].is_a?(Hash)
         column_names, dtypes = _unpack_schema(columns)
         schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
-        rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
+        rbdf = RbDataFrame.from_hashes(data, schema, schema_overrides, false, infer_schema_length)
         if column_names
           rbdf = _post_apply_columns(rbdf, column_names)
         end
@@ -5289,7 +5062,7 @@ module Polars
           if unpack_nested
             raise Todo
           else
-            rbdf = RbDataFrame.read_rows(
+            rbdf = RbDataFrame.from_rows(
               data,
               infer_schema_length,
               local_schema_override.any? ? local_schema_override : nil