RubyGems - polars-df - Versions diffs - 0.23.0 → 0.24.0 - Mend

polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +127 -1
data/Cargo.lock +72 -58
data/README.md +31 -27
data/ext/polars/Cargo.toml +15 -6
data/ext/polars/src/batched_csv.rs +35 -39
data/ext/polars/src/c_api/allocator.rs +7 -0
data/ext/polars/src/c_api/mod.rs +1 -0
data/ext/polars/src/catalog/unity.rs +123 -101
data/ext/polars/src/conversion/any_value.rs +13 -17
data/ext/polars/src/conversion/chunked_array.rs +5 -5
data/ext/polars/src/conversion/datetime.rs +3 -2
data/ext/polars/src/conversion/mod.rs +50 -45
data/ext/polars/src/dataframe/export.rs +13 -13
data/ext/polars/src/dataframe/general.rs +223 -223
data/ext/polars/src/dataframe/io.rs +27 -141
data/ext/polars/src/dataframe/mod.rs +13 -5
data/ext/polars/src/dataframe/serde.rs +1 -1
data/ext/polars/src/error.rs +44 -7
data/ext/polars/src/exceptions.rs +45 -12
data/ext/polars/src/expr/array.rs +12 -0
data/ext/polars/src/expr/datatype.rs +2 -2
data/ext/polars/src/expr/datetime.rs +4 -5
data/ext/polars/src/expr/general.rs +49 -13
data/ext/polars/src/expr/list.rs +4 -0
data/ext/polars/src/expr/meta.rs +8 -3
data/ext/polars/src/expr/mod.rs +22 -6
data/ext/polars/src/expr/name.rs +19 -8
data/ext/polars/src/expr/rolling.rs +50 -1
data/ext/polars/src/expr/string.rs +0 -1
data/ext/polars/src/expr/struct.rs +7 -2
data/ext/polars/src/file.rs +136 -103
data/ext/polars/src/functions/aggregation.rs +9 -8
data/ext/polars/src/functions/io.rs +81 -10
data/ext/polars/src/functions/lazy.rs +95 -21
data/ext/polars/src/functions/mod.rs +2 -0
data/ext/polars/src/functions/range.rs +19 -3
data/ext/polars/src/functions/strings.rs +6 -0
data/ext/polars/src/functions/utils.rs +6 -0
data/ext/polars/src/interop/arrow/mod.rs +50 -1
data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
data/ext/polars/src/lazyframe/exitable.rs +39 -0
data/ext/polars/src/lazyframe/general.rs +340 -236
data/ext/polars/src/lazyframe/mod.rs +46 -10
data/ext/polars/src/lazyframe/optflags.rs +5 -4
data/ext/polars/src/lazyframe/serde.rs +11 -3
data/ext/polars/src/lazyframe/sink.rs +10 -5
data/ext/polars/src/lazygroupby.rs +6 -7
data/ext/polars/src/lib.rs +141 -76
data/ext/polars/src/map/dataframe.rs +12 -12
data/ext/polars/src/map/lazy.rs +7 -5
data/ext/polars/src/map/mod.rs +15 -8
data/ext/polars/src/map/series.rs +3 -3
data/ext/polars/src/on_startup.rs +16 -8
data/ext/polars/src/prelude.rs +1 -0
data/ext/polars/src/rb_modules.rs +19 -49
data/ext/polars/src/series/aggregation.rs +79 -140
data/ext/polars/src/series/arithmetic.rs +16 -22
data/ext/polars/src/series/comparison.rs +101 -222
data/ext/polars/src/series/construction.rs +17 -18
data/ext/polars/src/series/export.rs +1 -1
data/ext/polars/src/series/general.rs +254 -289
data/ext/polars/src/series/import.rs +17 -0
data/ext/polars/src/series/map.rs +178 -160
data/ext/polars/src/series/mod.rs +28 -12
data/ext/polars/src/series/scatter.rs +12 -9
data/ext/polars/src/sql.rs +16 -9
data/ext/polars/src/testing/frame.rs +31 -0
data/ext/polars/src/testing/mod.rs +5 -0
data/ext/polars/src/testing/series.rs +31 -0
data/ext/polars/src/timeout.rs +105 -0
data/ext/polars/src/utils.rs +159 -1
data/lib/polars/array_expr.rb +81 -12
data/lib/polars/array_name_space.rb +74 -7
data/lib/polars/batched_csv_reader.rb +21 -21
data/lib/polars/binary_name_space.rb +1 -1
data/lib/polars/cat_expr.rb +7 -7
data/lib/polars/config.rb +1 -1
data/lib/polars/convert.rb +189 -34
data/lib/polars/data_frame.rb +1066 -831
data/lib/polars/data_frame_plot.rb +173 -0
data/lib/polars/data_type_group.rb +1 -0
data/lib/polars/data_types.rb +31 -12
data/lib/polars/date_time_expr.rb +51 -69
data/lib/polars/date_time_name_space.rb +80 -112
data/lib/polars/dynamic_group_by.rb +7 -7
data/lib/polars/exceptions.rb +50 -10
data/lib/polars/expr.rb +470 -517
data/lib/polars/functions/aggregation/horizontal.rb +0 -1
data/lib/polars/functions/aggregation/vertical.rb +2 -3
data/lib/polars/functions/as_datatype.rb +290 -8
data/lib/polars/functions/eager.rb +204 -10
data/lib/polars/functions/escape_regex.rb +21 -0
data/lib/polars/functions/lazy.rb +409 -169
data/lib/polars/functions/lit.rb +17 -1
data/lib/polars/functions/range/int_range.rb +74 -2
data/lib/polars/functions/range/linear_space.rb +77 -0
data/lib/polars/functions/range/time_range.rb +1 -1
data/lib/polars/functions/repeat.rb +3 -12
data/lib/polars/functions/whenthen.rb +2 -2
data/lib/polars/group_by.rb +72 -20
data/lib/polars/iceberg_dataset.rb +1 -6
data/lib/polars/in_process_query.rb +37 -0
data/lib/polars/io/cloud.rb +18 -0
data/lib/polars/io/csv.rb +265 -126
data/lib/polars/io/database.rb +0 -1
data/lib/polars/io/delta.rb +15 -7
data/lib/polars/io/ipc.rb +24 -17
data/lib/polars/io/ndjson.rb +161 -24
data/lib/polars/io/parquet.rb +101 -38
data/lib/polars/lazy_frame.rb +849 -558
data/lib/polars/lazy_group_by.rb +327 -2
data/lib/polars/list_expr.rb +94 -16
data/lib/polars/list_name_space.rb +88 -24
data/lib/polars/meta_expr.rb +42 -1
data/lib/polars/name_expr.rb +41 -4
data/lib/polars/query_opt_flags.rb +198 -2
data/lib/polars/rolling_group_by.rb +3 -3
data/lib/polars/schema.rb +21 -3
data/lib/polars/selector.rb +37 -2
data/lib/polars/selectors.rb +45 -9
data/lib/polars/series.rb +1156 -728
data/lib/polars/series_plot.rb +72 -0
data/lib/polars/slice.rb +1 -1
data/lib/polars/sql_context.rb +11 -4
data/lib/polars/string_expr.rb +59 -68
data/lib/polars/string_name_space.rb +51 -87
data/lib/polars/struct_expr.rb +36 -18
data/lib/polars/testing.rb +24 -273
data/lib/polars/utils/constants.rb +2 -0
data/lib/polars/utils/construction/data_frame.rb +410 -0
data/lib/polars/utils/construction/series.rb +364 -0
data/lib/polars/utils/construction/utils.rb +9 -0
data/lib/polars/utils/deprecation.rb +11 -0
data/lib/polars/utils/serde.rb +8 -3
data/lib/polars/utils/unstable.rb +19 -0
data/lib/polars/utils/various.rb +59 -0
data/lib/polars/utils.rb +46 -47
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +47 -1
metadata +25 -6
data/ext/polars/src/allocator.rs +0 -13
data/lib/polars/plot.rb +0 -109

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -1,8 +1,6 @@
 module Polars
   # Two-dimensional data structure representing data as a table with rows and columns.
   class DataFrame
-    include Plot
     # @private
     attr_accessor :_df
@@ -43,24 +41,24 @@ module Polars
     # @param infer_schema_length [Integer]
     #   The maximum number of rows to scan for schema inference. If set to `nil`, the
     #   full data may be scanned *(this can be slow)*. This parameter only applies if
-    #   the input data is a sequence or generator of rows; other input is read as-is.
+    #   the input data is an array or generator of rows; other input is read as-is.
     # @param nan_to_null [Boolean]
     #   If the data comes from one or more Numo arrays, can optionally convert input
     #   data NaN values to null instead. This is a no-op for all other input data.
-    def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
+    def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: N_INFER_DEFAULT, nan_to_null: false)
       if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
         raise ArgumentError, "Use read_database instead"
       end
       if data.nil?
-        self._df = self.class.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
+        self._df = Utils.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
       elsif data.is_a?(Hash)
         data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
-        self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, strict: strict, nan_to_null: nan_to_null)
+        self._df = Utils.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, strict: strict, nan_to_null: nan_to_null)
       elsif data.is_a?(::Array)
-        self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, strict: strict, orient: orient, infer_schema_length: infer_schema_length)
+        self._df = Utils.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, strict: strict, orient: orient, infer_schema_length: infer_schema_length)
       elsif data.is_a?(Series)
-        self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, strict: strict)
+        self._df = Utils.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, strict: strict)
       elsif data.respond_to?(:arrow_c_stream)
         # This uses the fact that RbSeries.from_arrow_c_stream will create a
         # struct-typed Series. Then we unpack that to a DataFrame.
@@ -116,6 +114,45 @@ module Polars
       df
     end
+    # Plot data.
+    #
+    # @return [Object]
+    def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
+      plot = DataFramePlot.new(self)
+      return plot if x.nil? && y.nil?
+      raise ArgumentError, "Must specify columns" if x.nil? || y.nil?
+      type ||= begin
+        if self[x].dtype.numeric? && self[y].dtype.numeric?
+          "scatter"
+        elsif self[x].dtype == String && self[y].dtype.numeric?
+          "column"
+        elsif (self[x].dtype == Date || self[x].dtype == Datetime) && self[y].dtype.numeric?
+          "line"
+        else
+          raise "Cannot determine type. Use the type option."
+        end
+      end
+      case type
+      when "line"
+        plot.line(x, y, color: group)
+      when "area"
+        plot.area(x, y, color: group)
+      when "pie"
+        raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
+        plot.pie(x, y)
+      when "column"
+        plot.column(x, y, color: group, stacked: stacked)
+      when "bar"
+        plot.bar(x, y, color: group, stacked: stacked)
+      when "scatter"
+        plot.scatter(x, y, color: group)
+      else
+        raise ArgumentError, "Invalid type: #{type}"
+      end
+    end
     # Get the shape of the DataFrame.
     #
     # @return [Array]
@@ -244,9 +281,9 @@ module Polars
     #     }
     #   )
     #   df.schema
-    #   # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
+    #   # => Polars::Schema({"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String})
     def schema
-      columns.zip(dtypes).to_h
+      Schema.new(columns.zip(dtypes).to_h)
     end
     # Equal.
@@ -383,142 +420,243 @@ module Polars
     # Returns subset of the DataFrame.
     #
     # @return [Object]
-    def [](*args)
-      if args.size == 2
-        row_selection, col_selection = args
-        # df[.., unknown]
-        if row_selection.is_a?(Range)
-          # multiple slices
-          # df[.., ..]
-          if col_selection.is_a?(Range)
-            raise Todo
-          end
-        end
-        # df[2, ..] (select row as df)
-        if row_selection.is_a?(Integer)
-          if col_selection.is_a?(::Array)
-            df = self[0.., col_selection]
-            return df.slice(row_selection, 1)
-          end
-          # df[2, "a"]
-          if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
-            return self[col_selection][row_selection]
-          end
-        end
-        # column selection can be "a" and ["a", "b"]
-        if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
-          col_selection = [col_selection]
-        end
-        # df[.., 1]
-        if col_selection.is_a?(Integer)
-          series = to_series(col_selection)
-          return series[row_selection]
-        end
-        if col_selection.is_a?(::Array)
-          # df[.., [1, 2]]
-          if Utils.is_int_sequence(col_selection)
-            series_list = col_selection.map { |i| to_series(i) }
-            df = self.class.new(series_list)
-            return df[row_selection]
-          end
-        end
-        df = self[col_selection]
-        return df[row_selection]
-      elsif args.size == 1
-        item = args[0]
-        # select single column
-        # df["foo"]
-        if item.is_a?(::String) || item.is_a?(Symbol)
-          return Utils.wrap_s(_df.get_column(item.to_s))
-        end
-        # df[idx]
-        if item.is_a?(Integer)
-          return slice(_pos_idx(item, 0), 1)
-        end
-        # df[..]
-        if item.is_a?(Range)
-          return Slice.new(self).apply(item)
-        end
-        if item.is_a?(::Array) && item.all? { |v| Utils.strlike?(v) }
-          # select multiple columns
-          # df[["foo", "bar"]]
-          return _from_rbdf(_df.select(item.map(&:to_s)))
-        end
-        if Utils.is_int_sequence(item)
-          item = Series.new("", item)
-        end
-        if item.is_a?(Series)
-          dtype = item.dtype
-          if dtype == String
-            return _from_rbdf(_df.select(item))
-          elsif dtype == UInt32
-            return _from_rbdf(_df.take_with_series(item._s))
-          elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
-            return _from_rbdf(
-              _df.take_with_series(_pos_idxs(item, 0)._s)
-            )
-          end
-        end
-      end
-      # Ruby-specific
-      if item.is_a?(Expr) || item.is_a?(Series)
-        return filter(item)
-      end
-      raise ArgumentError, "Cannot get item of type: #{item.class.name}"
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {"a" => [1, 2, 3], "d" => [4, 5, 6], "c" => [1, 3, 2], "b" => [7, 8, 9]}
+    #   )
+    #   df[0]
+    #   # =>
+    #   # shape: (1, 4)
+    #   # ┌─────┬─────┬─────┬─────┐
+    #   # │ a   ┆ d   ┆ c   ┆ b   │
+    #   # │ --- ┆ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╪═════╡
+    #   # │ 1   ┆ 4   ┆ 1   ┆ 7   │
+    #   # └─────┴─────┴─────┴─────┘
+    #
+    # @example
+    #   df[0, "a"]
+    #   # => 1
+    #
+    # @example
+    #   df["a"]
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [i64]
+    #   # [
+    #   #         1
+    #   #         2
+    #   #         3
+    #   # ]
+    #
+    # @example
+    #   df[0..1]
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌─────┬─────┬─────┬─────┐
+    #   # │ a   ┆ d   ┆ c   ┆ b   │
+    #   # │ --- ┆ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╪═════╡
+    #   # │ 1   ┆ 4   ┆ 1   ┆ 7   │
+    #   # │ 2   ┆ 5   ┆ 3   ┆ 8   │
+    #   # └─────┴─────┴─────┴─────┘
+    #
+    # @example
+    #   df[0..1, "a"]
+    #   # =>
+    #   # shape: (2,)
+    #   # Series: 'a' [i64]
+    #   # [
+    #   #         1
+    #   #         2
+    #   # ]
+    #
+    # @example
+    #   df[0..1, 0]
+    #   # =>
+    #   # shape: (2,)
+    #   # Series: 'a' [i64]
+    #   # [
+    #   #         1
+    #   #         2
+    #   # ]
+    #
+    # @example
+    #   df[[0, 1], [0, 1, 2]]
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ a   ┆ d   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 4   ┆ 1   │
+    #   # │ 2   ┆ 5   ┆ 3   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example
+    #   df[0..1, ["a", "c"]]
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ c   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 1   │
+    #   # │ 2   ┆ 3   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df[0.., 0..1]
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ d   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 4   │
+    #   # │ 2   ┆ 5   │
+    #   # │ 3   ┆ 6   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df[0.., "a".."c"]
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ a   ┆ d   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 4   ┆ 1   │
+    #   # │ 2   ┆ 5   ┆ 3   │
+    #   # │ 3   ┆ 6   ┆ 2   │
+    #   # └─────┴─────┴─────┘
+    def [](*key)
+      get_df_item_by_key(self, key)
     end
     # Set item.
     #
     # @return [Object]
+    #
+    # @example `df[["a", "b"]] = value`:
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4, 5, 6]})
+    #   df[["a", "b"]] = [[10, 40], [20, 50], [30, 60]]
+    #   df
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 10  ┆ 40  │
+    #   # │ 20  ┆ 50  │
+    #   # │ 30  ┆ 60  │
+    #   # └─────┴─────┘
+    #
+    # @example `df[row_idx, "a"] = value`:
+    #   df[1, "a"] = 100
+    #   df
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 10  ┆ 40  │
+    #   # │ 100 ┆ 50  │
+    #   # │ 30  ┆ 60  │
+    #   # └─────┴─────┘
+    #
+    # @example `df[row_idx, col_idx] = value`:
+    #   df[0, 1] = 30
+    #   df
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 10  ┆ 30  │
+    #   # │ 100 ┆ 50  │
+    #   # │ 30  ┆ 60  │
+    #   # └─────┴─────┘
     def []=(*key, value)
-      if key.length == 1
-        key = key.first
-      elsif key.length != 2
+      if key.empty? || key.length > 2
         raise ArgumentError, "wrong number of arguments (given #{key.length + 1}, expected 2..3)"
       end
-      if Utils.strlike?(key)
+      if key.length == 1 && Utils.strlike?(key[0])
+        key = key[0]
         if value.is_a?(::Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
           value = Series.new(value)
         elsif !value.is_a?(Series)
           value = Polars.lit(value)
         end
-        self._df = with_column(value.alias(key.to_s))._df
-      elsif key.is_a?(::Array)
+        self._df = with_columns(value.alias(key.to_s))._df
+      # df[["C", "D"]]
+      elsif key.length == 1 && key[0].is_a?(::Array)
+        key = key[0]
+        if !value.is_a?(::Array) || !value.all? { |v| v.is_a?(::Array) }
+          msg = "can only set multiple columns with 2D matrix"
+          raise ArgumentError, msg
+        end
+        if value.any? { |v| v.size != key.length }
+          msg = "matrix columns should be equal to list used to determine column names"
+          raise ArgumentError, msg
+        end
+        columns = []
+        key.each_with_index do |name, i|
+          columns << Series.new(name, value.map { |v| v[i] })
+        end
+        self._df = with_columns(columns)._df
+      # df[a, b]
+      else
         row_selection, col_selection = key
+        if (row_selection.is_a?(Series) && row_selection.dtype == Boolean) || Utils.is_bool_sequence(row_selection)
+          msg = (
+            "not allowed to set DataFrame by boolean mask in the row position" +
+            "\n\nConsider using `DataFrame.with_columns`."
+          )
+          raise TypeError, msg
+        end
+        # get series column selection
         if Utils.strlike?(col_selection)
           s = self[col_selection]
         elsif col_selection.is_a?(Integer)
-          raise Todo
+          s = self[0.., col_selection]
         else
-          raise ArgumentError, "column selection not understood: #{col_selection}"
+          msg = "unexpected column selection #{col_selection.inspect}"
+          raise TypeError, msg
         end
+        # dispatch to []= of Series to do modification
         s[row_selection] = value
+        # now find the location to place series
+        # df[idx]
         if col_selection.is_a?(Integer)
           replace_column(col_selection, s)
+        # df["foo"]
         elsif Utils.strlike?(col_selection)
-          replace(col_selection, s)
+          _replace(col_selection.to_s, s)
         end
-      else
-        raise Todo
       end
     end
@@ -566,22 +704,55 @@ module Polars
       Schema.new(columns.zip(dtypes), check_dtypes: false)
     end
-    # Return the dataframe as a scalar.
+    # Return the DataFrame as a scalar, or return the element at the given row/column.
     #
-    # Equivalent to `df[0,0]`, with a check that the shape is (1,1).
+    # @param row [Integer]
+    #   Optional row index.
+    # @param column [Integer, String]
+    #   Optional column index or name.
     #
     # @return [Object]
     #
+    # @note
+    #   If row/col not provided, this is equivalent to `df[0,0]`, with a check that
+    #   the shape is (1,1). With row/col, this is equivalent to `df[row,col]`.
+    #
     # @example
     #   df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4, 5, 6]})
-    #   result = df.select((Polars.col("a") * Polars.col("b")).sum)
-    #   result.item
+    #   df.select((Polars.col("a") * Polars.col("b")).sum).item
     #   # => 32
-    def item
-      if shape != [1, 1]
-        raise ArgumentError, "Can only call .item if the dataframe is of shape (1,1), dataframe is of shape #{shape}"
+    #
+    # @example
+    #   df.item(1, 1)
+    #   # => 5
+    #
+    # @example
+    #   df.item(2, "b")
+    #   # => 6
+    def item(row = nil, column = nil)
+      if row.nil? && column.nil?
+        if shape != [1, 1]
+          msg = (
+            "can only call `.item()` if the dataframe is of shape (1, 1)," +
+            " or if explicit row/col values are provided;" +
+            " frame has shape #{shape.inspect}"
+          )
+          raise ArgumentError, msg
+        end
+        return _df.to_series(0).get_index(0)
+      elsif row.nil? || column.nil?
+        msg = "cannot call `.item()` with only one of `row` or `column`"
+        raise ArgumentError, msg
       end
-      self[0, 0]
+      s =
+        if column.is_a?(Integer)
+          _df.to_series(column)
+        else
+          _df.get_column(column)
+        end
+      s.get_index_signed(row)
     end
     # no to_arrow
@@ -661,7 +832,7 @@ module Polars
       if index < 0
         index = columns.length + index
       end
-      Utils.wrap_s(_df.select_at_idx(index))
+      Utils.wrap_s(_df.to_series(index))
     end
     # Serialize this DataFrame to a file or string.
@@ -758,25 +929,26 @@ module Polars
     #   df.write_ndjson
     #   # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
     def write_ndjson(file = nil)
-      if Utils.pathlike?(file)
-        file = Utils.normalize_filepath(file)
+      should_return_buffer = false
+      target = nil
+      if file.nil?
+        target = StringIO.new
+        target.set_encoding(Encoding::BINARY)
+        should_return_buffer = true
+      elsif Utils.pathlike?(file)
+        target = Utils.normalize_filepath(file)
+      else
+        target = file
       end
-      to_string_io = !file.nil? && file.is_a?(StringIO)
-      if file.nil? || to_string_io
-        buf = StringIO.new
-        buf.set_encoding(Encoding::BINARY)
-        _df.write_ndjson(buf)
-        json_bytes = buf.string
-        json_str = json_bytes.force_encoding(Encoding::UTF_8)
-        if to_string_io
-          file.write(json_str)
-        else
-          return json_str
-        end
-      else
-        _df.write_ndjson(file)
+      lazy.sink_ndjson(
+        target
+      )
+      if should_return_buffer
+        return target.string.force_encoding(Encoding::UTF_8)
       end
       nil
     end
@@ -787,9 +959,9 @@ module Polars
     #   (default), the output is returned as a string instead.
     # @param include_header [Boolean]
     #   Whether to include header in the CSV output.
-    # @param sep [String]
+    # @param separator [String]
     #   Separate CSV fields with this symbol.
-    # @param quote [String]
+    # @param quote_char [String]
     #   Byte to use as quoting character.
     # @param batch_size [Integer]
     #   Number of rows that will be processed per thread.
@@ -808,8 +980,8 @@ module Polars
     #   [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
     #   Rust crate.
     # @param float_precision [Integer, nil]
-    #   Number of decimal places to write, applied to both `:f32` and
-    #   `:f64` datatypes.
+    #   Number of decimal places to write, applied to both `Float32` and
+    #   `Float64` datatypes.
     # @param null_value [String, nil]
     #   A string representing null values (defaulting to the empty string).
     #
@@ -826,38 +998,52 @@ module Polars
     #   df.write_csv("file.csv")
     def write_csv(
       file = nil,
+      include_bom: false,
       include_header: true,
-      sep: ",",
-      quote: '"',
+      separator: ",",
+      line_terminator: "\n",
+      quote_char: '"',
       batch_size: 1024,
       datetime_format: nil,
       date_format: nil,
       time_format: nil,
+      float_scientific: nil,
       float_precision: nil,
-      null_value: nil
+      decimal_comma: false,
+      null_value: nil,
+      quote_style: nil,
+      storage_options: nil,
+      credential_provider: "auto",
+      retries: 2
     )
-      if sep.length > 1
-        raise ArgumentError, "only single byte separator is allowed"
-      elsif quote.length > 1
-        raise ArgumentError, "only single byte quote char is allowed"
-      elsif null_value == ""
+      Utils._check_arg_is_1byte("separator", separator, false)
+      Utils._check_arg_is_1byte("quote_char", quote_char, true)
+      if null_value == ""
         null_value = nil
       end
       if file.nil?
         buffer = StringIO.new
         buffer.set_encoding(Encoding::BINARY)
-        _df.write_csv(
+        lazy.sink_csv(
           buffer,
-          include_header,
-          sep.ord,
-          quote.ord,
-          batch_size,
-          datetime_format,
-          date_format,
-          time_format,
-          float_precision,
-          null_value
+          include_bom: include_bom,
+          include_header: include_header,
+          separator: separator,
+          line_terminator: line_terminator,
+          quote_char: quote_char,
+          batch_size: batch_size,
+          datetime_format: datetime_format,
+          date_format: date_format,
+          time_format: time_format,
+          float_scientific: float_scientific,
+          float_precision: float_precision,
+          decimal_comma: decimal_comma,
+          null_value: null_value,
+          quote_style: quote_style,
+          storage_options: storage_options,
+          credential_provider: credential_provider,
+          retries: retries
         )
         return buffer.string.force_encoding(Encoding::UTF_8)
       end
@@ -866,17 +1052,25 @@ module Polars
         file = Utils.normalize_filepath(file)
       end
-      _df.write_csv(
+      lazy.sink_csv(
         file,
-        include_header,
-        sep.ord,
-        quote.ord,
-        batch_size,
-        datetime_format,
-        date_format,
-        time_format,
-        float_precision,
-        null_value,
+        include_bom: include_bom,
+        include_header: include_header,
+        separator: separator,
+        line_terminator: line_terminator,
+        quote_char: quote_char,
+        batch_size: batch_size,
+        datetime_format: datetime_format,
+        date_format: date_format,
+        time_format: time_format,
+        float_scientific: float_scientific,
+        float_precision: float_precision,
+        decimal_comma: decimal_comma,
+        null_value: null_value,
+        quote_style: quote_style,
+        storage_options: storage_options,
+        credential_provider: credential_provider,
+        retries: retries
       )
       nil
     end
@@ -934,6 +1128,10 @@ module Polars
     #
     #   If `storage_options` is not provided, Polars will try to infer the
     #   information from environment variables.
+    # @param credential_provider [Object]
+    #   Provide a function that can be called to provide cloud storage
+    #   credentials. The function is expected to return a hash of
+    #   credential keys along with an optional credential expiry time.
     # @param retries [Integer]
     #   Number of retries if accessing a cloud instance fails.
     #
@@ -943,33 +1141,27 @@ module Polars
       compression: "uncompressed",
       compat_level: nil,
       storage_options: nil,
+      credential_provider: "auto",
       retries: 2
     )
       return_bytes = file.nil?
-      if return_bytes
-        file = StringIO.new
-        file.set_encoding(Encoding::BINARY)
-      end
-      if Utils.pathlike?(file)
-        file = Utils.normalize_filepath(file)
-      end
-      if compat_level.nil?
-        compat_level = true
-      end
-      if compression.nil?
-        compression = "uncompressed"
-      end
-      if storage_options&.any?
-        storage_options = storage_options.to_a
+      target = nil
+      if file.nil?
+        target = StringIO.new
+        target.set_encoding(Encoding::BINARY)
       else
-        storage_options = nil
+        target = file
       end
-      _df.write_ipc(file, compression, compat_level, storage_options, retries)
-      return_bytes ? file.string : nil
+      lazy.sink_ipc(
+        target,
+        compression: compression,
+        compat_level: compat_level,
+        storage_options: storage_options,
+        credential_provider: credential_provider,
+        retries: retries
+      )
+      return_bytes ? target.string : nil
     end
     # Write to Arrow IPC record batch stream.
@@ -1049,9 +1241,16 @@ module Polars
       file,
       compression: "zstd",
       compression_level: nil,
-      statistics: false,
+      statistics: true,
       row_group_size: nil,
-      data_page_size: nil
+      data_page_size: nil,
+      partition_by: nil,
+      partition_chunk_size_bytes: 4_294_967_296,
+      storage_options: nil,
+      credential_provider: "auto",
+      retries: 2,
+      metadata: nil,
+      mkdir: false
     )
       if compression.nil?
         compression = "uncompressed"
@@ -1060,26 +1259,23 @@ module Polars
         file = Utils.normalize_filepath(file)
       end
-      if statistics == true
-        statistics = {
-          min: true,
-          max: true,
-          distinct_count: false,
-          null_count: true
-        }
-      elsif statistics == false
-        statistics = {}
-      elsif statistics == "full"
-        statistics = {
-          min: true,
-          max: true,
-          distinct_count: true,
-          null_count: true
-        }
+      target = file
+      if !partition_by.nil?
+        raise Todo
       end
-      _df.write_parquet(
-        file, compression, compression_level, statistics, row_group_size, data_page_size
+      lazy.sink_parquet(
+        target,
+        compression: compression,
+        compression_level: compression_level,
+        statistics: statistics,
+        row_group_size: row_group_size,
+        data_page_size: data_page_size,
+        storage_options: storage_options,
+        credential_provider: credential_provider,
+        retries: retries,
+        metadata: metadata,
+        mkdir: mkdir
       )
     end
@@ -1332,7 +1528,7 @@ module Polars
     #       "y" => 1_000_000.times.map { |v| v / 1000.0 },
     #       "z" => 1_000_000.times.map(&:to_s)
     #     },
-    #     schema: {"x" => :u32, "y" => :f64, "z" => :str}
+    #     schema: {"x" => Polars::UInt32, "y" => Polars::Float64, "z" => Polars::String}
     #   )
     #   df.estimated_size
     #   # => 25888898
@@ -1464,14 +1660,14 @@ module Polars
     #   # │ 3     ┆ 8   ┆ c   │
     #   # └───────┴─────┴─────┘
     def rename(mapping, strict: true)
-      lazy.rename(mapping, strict: strict).collect(no_optimization: true)
+      lazy.rename(mapping, strict: strict).collect(optimizations: QueryOptFlags._eager)
     end
     # Insert a Series at a certain column index. This operation is in place.
     #
     # @param index [Integer]
     #   Column to insert the new `Series` column.
-    # @param series [Series]
+    # @param column [Series]
     #   `Series` to insert.
     #
     # @return [DataFrame]
@@ -1514,19 +1710,22 @@ module Polars
     #   # │ 3   ┆ 10.0 ┆ false ┆ 20.5 │
     #   # │ 4   ┆ 13.0 ┆ true  ┆ 0.0  │
     #   # └─────┴──────┴───────┴──────┘
-    def insert_column(index, series)
+    def insert_column(index, column)
       if index < 0
-        index = columns.length + index
+        index = width + index
       end
-      _df.insert_column(index, series._s)
+      _df.insert_column(index, column._s)
       self
     end
-    alias_method :insert_at_idx, :insert_column
     # Filter the rows in the DataFrame based on a predicate expression.
     #
-    # @param predicate [Expr]
-    #   Expression that evaluates to a boolean Series.
+    # @param predicates [Array]
+    #   Expression(s) that evaluate to a boolean Series.
+    # @param constraints [Hash]
+    #   Column filters; use `name = value` to filter columns by the supplied value.
+    #   Each constraint will behave the same as `Polars.col(name).eq(value)`, and
+    #   be implicitly joined with the other filter conditions using `&`.
     #
     # @return [DataFrame]
     #
@@ -1561,15 +1760,15 @@ module Polars
     #   # ╞═════╪═════╪═════╡
     #   # │ 1   ┆ 6   ┆ a   │
     #   # └─────┴─────┴─────┘
-    def filter(predicate)
-      lazy.filter(predicate).collect
+    def filter(*predicates, **constraints)
+      lazy.filter(*predicates, **constraints).collect(optimizations: QueryOptFlags._eager)
     end
     # Remove rows, dropping those that match the given predicate expression(s).
     #
     # The original order of the remaining rows is preserved.
     #
-    # Rows where the filter predicate does not evaluate to True are retained
+    # Rows where the filter predicate does not evaluate to true are retained
     # (this includes rows where the predicate evaluates as `null`).
     #
     # @param predicates [Array]
@@ -1682,77 +1881,178 @@ module Polars
     )
       lazy
       .remove(*predicates, **constraints)
-      .collect(_eager: true)
+      .collect(optimizations: QueryOptFlags._eager)
     end
-    # Summary statistics for a DataFrame.
+    # Return a dense preview of the DataFrame.
     #
-    # @return [DataFrame]
+    # The formatting shows one line per column so that wide dataframes display
+    # cleanly. Each line shows the column name, the data type, and the first
+    # few values.
     #
-    # @example
+    # @param max_items_per_column [Integer]
+    #   Maximum number of items to show per column.
+    # @param max_colname_length [Integer]
+    #   Maximum length of the displayed column names; values that exceed
+    #   this value are truncated with a trailing ellipsis.
+    # @param return_type [nil, 'self', 'frame', 'string']
+    #   Modify the return format:
+    #
+    #   - `nil` (default): Print the glimpse output to stdout, returning `nil`.
+    #   - `"self"`: Print the glimpse output to stdout, returning the *original* frame.
+    #   - `"frame"`: Return the glimpse output as a new DataFrame.
+    #   - `"string"`: Return the glimpse output as a string.
+    #
+    # @return [Object]
+    #
+    # @example Return the glimpse output as a DataFrame:
     #   df = Polars::DataFrame.new(
     #     {
     #       "a" => [1.0, 2.8, 3.0],
     #       "b" => [4, 5, nil],
     #       "c" => [true, false, true],
     #       "d" => [nil, "b", "c"],
-    #       "e" => ["usd", "eur", nil]
+    #       "e" => ["usd", "eur", nil],
+    #       "f" => [Date.new(2020, 1, 1), Date.new(2021, 1, 2), Date.new(2022, 1, 1)]
     #     }
     #   )
-    #   df.describe
+    #   df.glimpse(return_type: "frame")
     #   # =>
-    #   # shape: (7, 6)
-    #   # ┌────────────┬──────────┬──────────┬──────────┬──────┬──────┐
-    #   # │ describe   ┆ a        ┆ b        ┆ c        ┆ d    ┆ e    │
-    #   # │ ---        ┆ ---      ┆ ---      ┆ ---      ┆ ---  ┆ ---  │
-    #   # │ str        ┆ f64      ┆ f64      ┆ f64      ┆ str  ┆ str  │
-    #   # ╞════════════╪══════════╪══════════╪══════════╪══════╪══════╡
-    #   # │ count      ┆ 3.0      ┆ 3.0      ┆ 3.0      ┆ 3    ┆ 3    │
-    #   # │ null_count ┆ 0.0      ┆ 1.0      ┆ 0.0      ┆ 1    ┆ 1    │
-    #   # │ mean       ┆ 2.266667 ┆ 4.5      ┆ 0.666667 ┆ null ┆ null │
-    #   # │ std        ┆ 1.101514 ┆ 0.707107 ┆ 0.57735  ┆ null ┆ null │
-    #   # │ min        ┆ 1.0      ┆ 4.0      ┆ 0.0      ┆ b    ┆ eur  │
-    #   # │ max        ┆ 3.0      ┆ 5.0      ┆ 1.0      ┆ c    ┆ usd  │
-    #   # │ median     ┆ 2.8      ┆ 4.5      ┆ 1.0      ┆ null ┆ null │
-    #   # └────────────┴──────────┴──────────┴──────────┴──────┴──────┘
-    def describe
-      describe_cast = lambda do |stat|
-        columns = []
-        self.columns.each_with_index do |s, i|
-          if self[s].is_numeric || self[s].is_boolean
-            columns << stat[0.., i].cast(:f64)
-          else
-            # for dates, strings, etc, we cast to string so that all
-            # statistics can be shown
-            columns << stat[0.., i].cast(:str)
-          end
+    #   # shape: (6, 3)
+    #   # ┌────────┬───────┬─────────────────────────────────┐
+    #   # │ column ┆ dtype ┆ values                          │
+    #   # │ ---    ┆ ---   ┆ ---                             │
+    #   # │ str    ┆ str   ┆ list[str]                       │
+    #   # ╞════════╪═══════╪═════════════════════════════════╡
+    #   # │ a      ┆ f64   ┆ ["1.0", "2.8", "3.0"]           │
+    #   # │ b      ┆ i64   ┆ ["4", "5", null]                │
+    #   # │ c      ┆ bool  ┆ ["true", "false", "true"]       │
+    #   # │ d      ┆ str   ┆ [null, ""b"", ""c""]            │
+    #   # │ e      ┆ str   ┆ [""usd"", ""eur"", null]        │
+    #   # │ f      ┆ date  ┆ ["2020-01-01", "2021-01-02", "… │
+    #   # └────────┴───────┴─────────────────────────────────┘
+    def glimpse(
+      max_items_per_column: 10,
+      max_colname_length: 50,
+      return_type: nil
+    )
+      if return_type.nil?
+        return_frame = false
+      else
+        return_frame = return_type == "frame"
+        if !return_frame && !["self", "string"].include?(return_type)
+          msg = "invalid `return_type`; found #{return_type.inspect}, expected one of 'string', 'frame', 'self', or nil"
+          raise ArgumentError, msg
         end
-        self.class.new(columns)
       end
-      summary = _from_rbdf(
-        Polars.concat(
-          [
-            describe_cast.(
-              self.class.new(columns.to_h { |c| [c, [height]] })
-            ),
-            describe_cast.(null_count),
-            describe_cast.(mean),
-            describe_cast.(std),
-            describe_cast.(min),
-            describe_cast.(max),
-            describe_cast.(median)
-          ]
-        )._df
-      )
-      summary.insert_column(
-        0,
-        Polars::Series.new(
-          "describe",
-          ["count", "null_count", "mean", "std", "min", "max", "median"],
+      # always print at most this number of values (mainly ensures that
+      # we do not cast long arrays to strings, which would be slow)
+      max_n_values = [max_items_per_column, height].min
+      schema = self.schema
+      _column_to_row_output = lambda do |col_name, dtype|
+        fn = schema[col_name] == String ? :inspect : :to_s
+        values = self[0...max_n_values, col_name].to_a
+        if col_name.length > max_colname_length
+          col_name = col_name[0...(max_colname_length - 1)] + "…"
+        end
+        dtype_str = Plr.dtype_str_repr(dtype)
+        if !return_frame
+          dtype_str = "<#{dtype_str}>"
+        end
+        [col_name, dtype_str, values.map { |v| !v.nil? ? v.send(fn) : nil }]
+      end
+      data = self.schema.map { |s, dtype| _column_to_row_output.(s, dtype) }
+      # output one row per column
+      if return_frame
+        DataFrame.new(
+          data,
+          orient: "row",
+          schema: {"column" => String, "dtype" => String, "values" => List.new(String)}
         )
+      else
+        raise Todo
+      end
+    end
+    # Summary statistics for a DataFrame.
+    #
+    # @param percentiles [Array]
+    #   One or more percentiles to include in the summary statistics.
+    #   All values must be in the range `[0, 1]`.
+    # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
+    #   Interpolation method used when calculating percentiles.
+    #
+    # @return [DataFrame]
+    #
+    # @example Show default frame statistics:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "float" => [1.0, 2.8, 3.0],
+    #       "int" => [40, 50, nil],
+    #       "bool" => [true, false, true],
+    #       "str" => ["zz", "xx", "yy"],
+    #       "date" => [Date.new(2020, 1, 1), Date.new(2021, 7, 5), Date.new(2022, 12, 31)]
+    #     }
+    #   )
+    #   df.describe
+    #   # =>
+    #   # shape: (9, 6)
+    #   # ┌────────────┬──────────┬──────────┬──────────┬──────┬─────────────────────────┐
+    #   # │ statistic  ┆ float    ┆ int      ┆ bool     ┆ str  ┆ date                    │
+    #   # │ ---        ┆ ---      ┆ ---      ┆ ---      ┆ ---  ┆ ---                     │
+    #   # │ str        ┆ f64      ┆ f64      ┆ f64      ┆ str  ┆ str                     │
+    #   # ╞════════════╪══════════╪══════════╪══════════╪══════╪═════════════════════════╡
+    #   # │ count      ┆ 3.0      ┆ 2.0      ┆ 3.0      ┆ 3    ┆ 3                       │
+    #   # │ null_count ┆ 0.0      ┆ 1.0      ┆ 0.0      ┆ 0    ┆ 0                       │
+    #   # │ mean       ┆ 2.266667 ┆ 45.0     ┆ 0.666667 ┆ null ┆ 2021-07-02 16:00:00 UTC │
+    #   # │ std        ┆ 1.101514 ┆ 7.071068 ┆ null     ┆ null ┆ null                    │
+    #   # │ min        ┆ 1.0      ┆ 40.0     ┆ 0.0      ┆ xx   ┆ 2020-01-01              │
+    #   # │ 25%        ┆ 2.8      ┆ 40.0     ┆ null     ┆ null ┆ 2021-07-05              │
+    #   # │ 50%        ┆ 2.8      ┆ 50.0     ┆ null     ┆ null ┆ 2021-07-05              │
+    #   # │ 75%        ┆ 3.0      ┆ 50.0     ┆ null     ┆ null ┆ 2022-12-31              │
+    #   # │ max        ┆ 3.0      ┆ 50.0     ┆ 1.0      ┆ zz   ┆ 2022-12-31              │
+    #   # └────────────┴──────────┴──────────┴──────────┴──────┴─────────────────────────┘
+    #
+    # @example Customize which percentiles are displayed, applying linear interpolation:
+    #   df.describe(
+    #     percentiles: [0.1, 0.3, 0.5, 0.7, 0.9],
+    #     interpolation: "linear"
+    #   )
+    #   # =>
+    #   # shape: (11, 6)
+    #   # ┌────────────┬──────────┬──────────┬──────────┬──────┬─────────────────────────┐
+    #   # │ statistic  ┆ float    ┆ int      ┆ bool     ┆ str  ┆ date                    │
+    #   # │ ---        ┆ ---      ┆ ---      ┆ ---      ┆ ---  ┆ ---                     │
+    #   # │ str        ┆ f64      ┆ f64      ┆ f64      ┆ str  ┆ str                     │
+    #   # ╞════════════╪══════════╪══════════╪══════════╪══════╪═════════════════════════╡
+    #   # │ count      ┆ 3.0      ┆ 2.0      ┆ 3.0      ┆ 3    ┆ 3                       │
+    #   # │ null_count ┆ 0.0      ┆ 1.0      ┆ 0.0      ┆ 0    ┆ 0                       │
+    #   # │ mean       ┆ 2.266667 ┆ 45.0     ┆ 0.666667 ┆ null ┆ 2021-07-02 16:00:00 UTC │
+    #   # │ std        ┆ 1.101514 ┆ 7.071068 ┆ null     ┆ null ┆ null                    │
+    #   # │ min        ┆ 1.0      ┆ 40.0     ┆ 0.0      ┆ xx   ┆ 2020-01-01              │
+    #   # │ …          ┆ …        ┆ …        ┆ …        ┆ …    ┆ …                       │
+    #   # │ 30%        ┆ 2.08     ┆ 43.0     ┆ null     ┆ null ┆ 2020-11-26              │
+    #   # │ 50%        ┆ 2.8      ┆ 45.0     ┆ null     ┆ null ┆ 2021-07-05              │
+    #   # │ 70%        ┆ 2.88     ┆ 47.0     ┆ null     ┆ null ┆ 2022-02-07              │
+    #   # │ 90%        ┆ 2.96     ┆ 49.0     ┆ null     ┆ null ┆ 2022-09-13              │
+    #   # │ max        ┆ 3.0      ┆ 50.0     ┆ 1.0      ┆ zz   ┆ 2022-12-31              │
+    #   # └────────────┴──────────┴──────────┴──────────┴──────┴─────────────────────────┘
+    def describe(
+      percentiles: [0.25, 0.5, 0.75],
+      interpolation: "nearest"
+    )
+      if columns.empty?
+        msg = "cannot describe a DataFrame that has no columns"
+        raise TypeError, msg
+      end
+      lazy.describe(
+        percentiles: percentiles, interpolation: interpolation
       )
-      summary
     end
     # Find the index of a column by name.
@@ -1771,13 +2071,12 @@ module Polars
     def get_column_index(name)
       _df.get_column_index(name)
     end
-    alias_method :find_idx_by_name, :get_column_index
     # Replace a column at an index location.
     #
     # @param index [Integer]
     #   Column index.
-    # @param series [Series]
+    # @param column [Series]
     #   Series that will replace the column.
     #
     # @return [DataFrame]
@@ -1803,23 +2102,31 @@ module Polars
     #   # │ 20    ┆ 7   ┆ b   │
     #   # │ 30    ┆ 8   ┆ c   │
     #   # └───────┴─────┴─────┘
-    def replace_column(index, series)
+    def replace_column(index, column)
       if index < 0
-        index = columns.length + index
+        index = width + index
       end
-      _df.replace_column(index, series._s)
+      _df.replace_column(index, column._s)
       self
     end
-    alias_method :replace_at_idx, :replace_column
-    # Sort the DataFrame by column.
+    # Sort the dataframe by the given columns.
     #
-    # @param by [String]
-    #   By which column to sort.
-    # @param reverse [Boolean]
-    #   Reverse/descending sort.
+    # @param by [Object]
+    #   Column(s) to sort by. Accepts expression input, including selectors. Strings
+    #   are parsed as column names.
+    # @param more_by [Array]
+    #   Additional columns to sort by, specified as positional arguments.
+    # @param descending [Boolean]
+    #   Sort in descending order. When sorting by multiple columns, can be specified
+    #   per column by passing an array of booleans.
     # @param nulls_last [Boolean]
-    #   Place null values last. Can only be used if sorted by a single column.
+    #   Place null values last; can specify a single boolean applying to all columns
+    #   or an array of booleans for per-column control.
+    # @param multithreaded [Boolean]
+    #   Sort using multiple threads.
+    # @param maintain_order [Boolean]
+    #   Whether the order should be maintained if elements are equal.
     #
     # @return [DataFrame]
     #
@@ -1831,7 +2138,7 @@ module Polars
     #       "ham" => ["a", "b", "c"]
     #     }
     #   )
-    #   df.sort("foo", reverse: true)
+    #   df.sort("foo", descending: true)
     #   # =>
     #   # shape: (3, 3)
     #   # ┌─────┬─────┬─────┐
@@ -1847,7 +2154,7 @@ module Polars
     # @example Sort by multiple columns.
     #   df.sort(
     #     [Polars.col("foo"), Polars.col("bar")**2],
-    #     reverse: [true, false]
+    #     descending: [true, false]
     #   )
     #   # =>
     #   # shape: (3, 3)
@@ -1860,24 +2167,38 @@ module Polars
     #   # │ 2   ┆ 7.0 ┆ b   │
     #   # │ 1   ┆ 6.0 ┆ a   │
     #   # └─────┴─────┴─────┘
-    def sort(by, reverse: false, nulls_last: false)
+    def sort(
+      by,
+      *more_by,
+      descending: false,
+      nulls_last: false,
+      multithreaded: true,
+      maintain_order: false
+    )
       lazy
-        .sort(by, reverse: reverse, nulls_last: nulls_last)
-        .collect(no_optimization: true)
+        .sort(
+          by,
+          *more_by,
+          descending: descending,
+          nulls_last: nulls_last,
+          multithreaded: multithreaded,
+          maintain_order: maintain_order
+        )
+        .collect(optimizations: QueryOptFlags._eager)
     end
     # Sort the DataFrame by column in-place.
     #
     # @param by [String]
     #   By which column to sort.
-    # @param reverse [Boolean]
+    # @param descending [Boolean]
     #   Reverse/descending sort.
     # @param nulls_last [Boolean]
     #   Place null values last. Can only be used if sorted by a single column.
     #
     # @return [DataFrame]
-    def sort!(by, reverse: false, nulls_last: false)
-      self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
+    def sort!(by, descending: false, nulls_last: false)
+      self._df = sort(by, descending: descending, nulls_last: nulls_last)._df
     end
     # Execute a SQL query against the DataFrame.
@@ -1949,7 +2270,7 @@ module Polars
     #   # │ 3   ┆ false     ┆ xx:xx ┆ 2077 ┆ 0.0  │
     #   # └─────┴───────────┴───────┴──────┴──────┘
     def sql(query, table_name: "self")
-      ctx = SQLContext.new(eager_execution: true)
+      ctx = SQLContext.new(eager: true)
       name = table_name || "self"
       ctx.register(name, self)
       ctx.execute(query)
@@ -1969,7 +2290,7 @@ module Polars
     #   Accepts expression input. Strings are parsed as column names.
     # @param reverse [Object]
     #   Consider the `k` smallest elements of the `by` column(s) (instead of the `k`
-    #   largest). This can be specified per column by passing a sequence of
+    #   largest). This can be specified per column by passing an array of
     #   booleans.
     #
     # @return [DataFrame]
@@ -2017,12 +2338,12 @@ module Polars
       lazy
       .top_k(k, by: by, reverse: reverse)
       .collect(
-        # optimizations=QueryOptFlags(
-        #   projection_pushdown=False,
-        #   predicate_pushdown=False,
-        #   comm_subplan_elim=False,
-        #   slice_pushdown=True
-        # )
+        optimizations: QueryOptFlags.new(
+          projection_pushdown: false,
+          predicate_pushdown: false,
+          comm_subplan_elim: false,
+          slice_pushdown: true
+        )
       )
     end
@@ -2040,7 +2361,7 @@ module Polars
     #   Accepts expression input. Strings are parsed as column names.
     # @param reverse [Object]
     #   Consider the `k` largest elements of the `by` column(s) (instead of the `k`
-    #   smallest). This can be specified per column by passing a sequence of
+    #   smallest). This can be specified per column by passing an array of
     #   booleans.
     #
     # @return [DataFrame]
@@ -2088,12 +2409,12 @@ module Polars
       lazy
       .bottom_k(k, by: by, reverse: reverse)
       .collect(
-        # optimizations=QueryOptFlags(
-        #   projection_pushdown=False,
-        #   predicate_pushdown=False,
-        #   comm_subplan_elim=False,
-        #   slice_pushdown=True,
-        # )
+        optimizations: QueryOptFlags.new(
+          projection_pushdown: false,
+          predicate_pushdown: false,
+          comm_subplan_elim: false,
+          slice_pushdown: true
+        )
       )
     end
@@ -2128,36 +2449,6 @@ module Polars
     def equals(other, null_equal: true)
       _df.equals(other._df, null_equal)
     end
-    alias_method :frame_equal, :equals
-    # Replace a column by a new Series.
-    #
-    # @param column [String]
-    #   Column to replace.
-    # @param new_col [Series]
-    #   New column to insert.
-    #
-    # @return [DataFrame]
-    #
-    # @example
-    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
-    #   s = Polars::Series.new([10, 20, 30])
-    #   df.replace("foo", s)
-    #   # =>
-    #   # shape: (3, 2)
-    #   # ┌─────┬─────┐
-    #   # │ foo ┆ bar │
-    #   # │ --- ┆ --- │
-    #   # │ i64 ┆ i64 │
-    #   # ╞═════╪═════╡
-    #   # │ 10  ┆ 4   │
-    #   # │ 20  ┆ 5   │
-    #   # │ 30  ┆ 6   │
-    #   # └─────┴─────┘
-    def replace(column, new_col)
-      _df.replace(column.to_s, new_col._s)
-      self
-    end
     # Get a slice of this DataFrame.
     #
@@ -2330,7 +2621,7 @@ module Polars
     #   # │ 80.0 ┆ 25.5  ┆ null │
     #   # └──────┴───────┴──────┘
     def drop_nans(subset: nil)
-      lazy.drop_nans(subset: subset).collect(_eager: true)
+      lazy.drop_nans(subset: subset).collect(optimizations: QueryOptFlags._eager)
     end
     # Drop all rows that contain one or more null values.
@@ -2375,12 +2666,12 @@ module Polars
     #   # │ 3   ┆ 8   ┆ null │
     #   # └─────┴─────┴──────┘
     def drop_nulls(subset: nil)
-      lazy.drop_nulls(subset: subset).collect(_eager: true)
+      lazy.drop_nulls(subset: subset).collect(optimizations: QueryOptFlags._eager)
     end
     # Offers a structured way to apply a sequence of user-defined functions (UDFs).
     #
-    # @param func [Object]
+    # @param function [Object]
     #   Callable; will receive the frame as the first parameter,
     #   followed by any given args/kwargs.
     # @param args [Object]
@@ -2397,7 +2688,7 @@ module Polars
     #
     # @example
     #   cast_str_to_int = lambda do |data, col_name:|
-    #     data.with_column(Polars.col(col_name).cast(:i64))
+    #     data.with_columns(Polars.col(col_name).cast(Polars::Int64))
     #   end
     #
     #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => ["10", "20", "30", "40"]})
@@ -2414,8 +2705,8 @@ module Polars
     #   # │ 3   ┆ 30  │
     #   # │ 4   ┆ 40  │
     #   # └─────┴─────┘
-    def pipe(func, *args, **kwargs, &block)
-      func.call(self, *args, **kwargs, &block)
+    def pipe(function, *args, **kwargs, &block)
+      function.(self, *args, **kwargs, &block)
     end
     # Add a column at index 0 that counts the rows.
@@ -2449,7 +2740,6 @@ module Polars
     def with_row_index(name: "index", offset: 0)
       _from_rbdf(_df.with_row_index(name, offset))
     end
-    alias_method :with_row_count, :with_row_index
     # Start a group by operation.
     #
@@ -2459,6 +2749,9 @@ module Polars
     #   Make sure that the order of the groups remain consistent. This is more
     #   expensive than a default group by. Note that this only works in expression
     #   aggregations.
+    # @param named_by [Hash]
+    #   Additional columns to group by, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
     #
     # @return [GroupBy]
     #
@@ -2482,23 +2775,23 @@ module Polars
     #   # │ b   ┆ 11  │
     #   # │ c   ┆ 6   │
     #   # └─────┴─────┘
-    def group_by(by, maintain_order: false)
-      if !Utils.bool?(maintain_order)
-        raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
+    def group_by(by, maintain_order: false, **named_by)
+      named_by.each do |_, value|
+        if !(value.is_a?(::String) || value.is_a?(Expr) || value.is_a?(Series))
+          msg = "Expected Polars expression or object convertible to one, got #{value.class.name}."
+          raise TypeError, msg
+        end
       end
       GroupBy.new(
         self,
         by,
+        **named_by,
         maintain_order: maintain_order
       )
     end
-    alias_method :groupby, :group_by
-    alias_method :group, :group_by
     # Create rolling groups based on a time column.
     #
-    # Also works for index values of type `:i32` or `:i64`.
-    #
     # Different from a `dynamic_group_by` the windows are now determined by the
     # individual values and are not of constant intervals. For constant intervals use
     # *group_by_dynamic*
@@ -2532,16 +2825,16 @@ module Polars
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a rolling group by on indices, dtype needs to be one of
-    #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
-    #   performance matters use an `:i64` column.
+    #   In case of a rolling operation on indices, dtype needs to be one of
+    #   \\\\{UInt32, UInt64, Int32, Int64}. Note that the first three get temporarily
+    #   cast to Int64, so if performance matters use an Int64 column.
     # @param period [Object]
     #   Length of the window.
     # @param offset [Object]
     #   Offset of the window. Default is -period.
     # @param closed ["right", "left", "both", "none"]
     #   Define whether the temporal window interval is closed or not.
-    # @param by [Object]
+    # @param group_by [Object]
     #   Also group by this column/these columns.
     #
     # @return [RollingGroupBy]
@@ -2555,7 +2848,7 @@ module Polars
     #     "2020-01-03 19:45:32",
     #     "2020-01-08 23:16:43"
     #   ]
-    #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
+    #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_columns(
     #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
     #   )
     #   df.rolling(index_column: "dt", period: "2d").agg(
@@ -2584,14 +2877,12 @@ module Polars
       period:,
       offset: nil,
       closed: "right",
-      by: nil
+      group_by: nil
     )
-      RollingGroupBy.new(self, index_column, period, offset, closed, by)
+      RollingGroupBy.new(self, index_column, period, offset, closed, group_by)
     end
-    alias_method :groupby_rolling, :rolling
-    alias_method :group_by_rolling, :rolling
-    # Group based on a time value (or index value of type `:i32`, `:i64`).
+    # Group based on a time value (or index value of type Int32, Int64).
     #
     # Time windows are calculated and rows are assigned to windows. Different from a
     # normal group by is that a row can be member of multiple groups. The time/index
@@ -2634,8 +2925,8 @@ module Polars
     #   make sense.
     #
     #   In case of a dynamic group by on indices, dtype needs to be one of
-    #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
-    #   performance matters use an `:i64` column.
+    #   \\\\{Int32, Int64}. Note that Int32 gets temporarily cast to Int64, so if
+    #   performance matters use an Int64 column.
     # @param every
     #   Interval of the window.
     # @param period
@@ -2643,15 +2934,21 @@ module Polars
     # @param offset
     #   Offset of the window if nil and period is nil it will be equal to negative
     #   `every`.
-    # @param truncate
-    #   Truncate the time value to the window lower bound.
     # @param include_boundaries
     #   Add the lower and upper bound of the window to the "_lower_bound" and
     #   "_upper_bound" columns. This will impact performance because it's harder to
     #   parallelize
     # @param closed ["right", "left", "both", "none"]
     #   Define whether the temporal window interval is closed or not.
-    # @param by
+    # @param label ['left', 'right', 'datapoint']
+    #   Define which label to use for the window:
+    #
+    #   - 'left': lower boundary of the window
+    #   - 'right': upper boundary of the window
+    #   - 'datapoint': the first value of the index column in the given window.
+    #     If you don't need the label to be at one of the boundaries, choose this
+    #     option for maximum performance
+    # @param group_by
     #   Also group by this column/these columns
     # @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
     #   The strategy to determine the start of the first window by.
@@ -2793,7 +3090,7 @@ module Polars
     #     "time",
     #     every: "1h",
     #     closed: "both",
-    #     by: "groups",
+    #     group_by: "groups",
     #     include_boundaries: true
     #   ).agg([Polars.col("time").count.alias("time_count")])
     #   # =>
@@ -2843,10 +3140,10 @@ module Polars
       every:,
       period: nil,
       offset: nil,
-      truncate: true,
       include_boundaries: false,
       closed: "left",
-      by: nil,
+      label: "left",
+      group_by: nil,
       start_by: "window"
     )
       DynamicGroupBy.new(
@@ -2855,14 +3152,13 @@ module Polars
         every,
         period,
         offset,
-        truncate,
         include_boundaries,
         closed,
-        by,
+        label,
+        group_by,
         start_by
       )
     end
-    alias_method :groupby_dynamic, :group_by_dynamic
     # Upsample a DataFrame at a regular frequency.
     #
@@ -2871,7 +3167,7 @@ module Polars
     #   Note that this column has to be sorted for the output to make sense.
     # @param every [String]
     #   interval will start 'every' duration
-    # @param by [Object]
+    # @param group_by [Object]
     #   First group by these columns and then upsample for every group
     # @param maintain_order [Boolean]
     #   Keep the ordering predictable. This is slower.
@@ -2910,7 +3206,7 @@ module Polars
     #     }
     #   ).set_sorted("time")
     #   df.upsample(
-    #     time_column: "time", every: "1mo", by: "groups", maintain_order: true
+    #     time_column: "time", every: "1mo", group_by: "groups", maintain_order: true
     #   ).select(Polars.all.forward_fill)
     #   # =>
     #   # shape: (7, 3)
@@ -2930,20 +3226,20 @@ module Polars
     def upsample(
       time_column:,
       every:,
-      by: nil,
+      group_by: nil,
       maintain_order: false
     )
-      if by.nil?
-        by = []
+      if group_by.nil?
+        group_by = []
       end
-      if by.is_a?(::String)
-        by = [by]
+      if group_by.is_a?(::String)
+        group_by = [group_by]
       end
       every = Utils.parse_as_duration_string(every)
       _from_rbdf(
-        _df.upsample(by, time_column, every, maintain_order)
+        _df.upsample(group_by, time_column, every, maintain_order)
       )
     end
@@ -3096,7 +3392,7 @@ module Polars
           allow_exact_matches: allow_exact_matches,
           check_sortedness: check_sortedness
         )
-        .collect(no_optimization: true)
+        .collect(optimizations: QueryOptFlags._eager)
     end
     # Join in SQL-like fashion.
@@ -3119,7 +3415,7 @@ module Polars
     #     * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
     #     * *one_to_many* - “1:m”: check if join keys are unique in left dataset
     #     * *many_to_one* - “m:1”: check if join keys are unique in right dataset
-    # @param join_nulls [Boolean]
+    # @param nulls_equal [Boolean]
     #   Join on null values. By default null values will never produce matches.
     # @param coalesce [Boolean]
     #   Coalescing behavior (merging of join columns).
@@ -3235,7 +3531,7 @@ module Polars
       how: "inner",
       suffix: "_right",
       validate: "m:m",
-      join_nulls: false,
+      nulls_equal: false,
       coalesce: nil,
       maintain_order: nil
     )
@@ -3248,11 +3544,11 @@ module Polars
           how: how,
           suffix: suffix,
           validate: validate,
-          join_nulls: join_nulls,
+          nulls_equal: nulls_equal,
           coalesce: coalesce,
           maintain_order: maintain_order
         )
-        .collect(no_optimization: true)
+        .collect(optimizations: QueryOptFlags._eager)
     end
     # Perform a join based on one or multiple (in)equality predicates.
@@ -3347,7 +3643,7 @@ module Polars
         *predicates,
         suffix: suffix
       )
-      .collect(_eager: true)
+      .collect(optimizations: QueryOptFlags._eager)
     end
     # Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
@@ -3410,61 +3706,14 @@ module Polars
     #   # │ 9   │
     #   # │ 14  │
     #   # └─────┘
-    def map_rows(return_dtype: nil, inference_size: 256, &f)
-      out, is_df = _df.map_rows(f, return_dtype, inference_size)
+    def map_rows(return_dtype: nil, inference_size: 256, &function)
+      out, is_df = _df.map_rows(function, return_dtype, inference_size)
       if is_df
         _from_rbdf(out)
       else
         _from_rbdf(Utils.wrap_s(out).to_frame._df)
       end
     end
-    alias_method :apply, :map_rows
-    # Return a new DataFrame with the column added or replaced.
-    #
-    # @param column [Object]
-    #   Series, where the name of the Series refers to the column in the DataFrame.
-    #
-    # @return [DataFrame]
-    #
-    # @example Added
-    #   df = Polars::DataFrame.new(
-    #     {
-    #       "a" => [1, 3, 5],
-    #       "b" => [2, 4, 6]
-    #     }
-    #   )
-    #   df.with_column((Polars.col("b") ** 2).alias("b_squared"))
-    #   # =>
-    #   # shape: (3, 3)
-    #   # ┌─────┬─────┬───────────┐
-    #   # │ a   ┆ b   ┆ b_squared │
-    #   # │ --- ┆ --- ┆ ---       │
-    #   # │ i64 ┆ i64 ┆ i64       │
-    #   # ╞═════╪═════╪═══════════╡
-    #   # │ 1   ┆ 2   ┆ 4         │
-    #   # │ 3   ┆ 4   ┆ 16        │
-    #   # │ 5   ┆ 6   ┆ 36        │
-    #   # └─────┴─────┴───────────┘
-    #
-    # @example Replaced
-    #   df.with_column(Polars.col("a") ** 2)
-    #   # =>
-    #   # shape: (3, 2)
-    #   # ┌─────┬─────┐
-    #   # │ a   ┆ b   │
-    #   # │ --- ┆ --- │
-    #   # │ i64 ┆ i64 │
-    #   # ╞═════╪═════╡
-    #   # │ 1   ┆ 2   │
-    #   # │ 9   ┆ 4   │
-    #   # │ 25  ┆ 6   │
-    #   # └─────┴─────┘
-    def with_column(column)
-      lazy
-        .with_column(column)
-        .collect(no_optimization: true, string_cache: false)
-    end
     # Return a new DataFrame grown horizontally by stacking multiple Series to it.
     #
@@ -3510,7 +3759,7 @@ module Polars
     # Grow this DataFrame vertically by stacking a DataFrame to it.
     #
-    # @param df [DataFrame]
+    # @param other [DataFrame]
     #   DataFrame to stack.
     # @param in_place [Boolean]
     #   Modify in place
@@ -3545,12 +3794,12 @@ module Polars
     #   # │ 3   ┆ 8   ┆ c   │
     #   # │ 4   ┆ 9   ┆ d   │
     #   # └─────┴─────┴─────┘
-    def vstack(df, in_place: false)
+    def vstack(other, in_place: false)
       if in_place
-        _df.vstack_mut(df._df)
+        _df.vstack_mut(other._df)
         self
       else
-        _from_rbdf(_df.vstack(df._df))
+        _from_rbdf(_df.vstack(other._df))
       end
     end
@@ -3603,6 +3852,9 @@ module Polars
     #
     # @param columns [Object]
     #   Column(s) to drop.
+    # @param strict [Boolean]
+    #   Validate that all column names exist in the current schema,
+    #   and throw an exception if any do not.
     #
     # @return [DataFrame]
     #
@@ -3654,8 +3906,8 @@ module Polars
     #   # │ 7.0 │
     #   # │ 8.0 │
     #   # └─────┘
-    def drop(*columns)
-      lazy.drop(*columns).collect(_eager: true)
+    def drop(*columns, strict: true)
+      lazy.drop(*columns, strict: strict).collect(optimizations: QueryOptFlags._eager)
     end
     # Drop in place.
@@ -3768,7 +4020,7 @@ module Polars
     #   df.cast(Polars::String).to_h(as_series: false)
     #   # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
     def cast(dtypes, strict: true)
-      lazy.cast(dtypes, strict: strict).collect(_eager: true)
+      lazy.cast(dtypes, strict: strict).collect(optimizations: QueryOptFlags._eager)
     end
     # Create an empty copy of the current DataFrame.
@@ -3818,7 +4070,6 @@ module Polars
         clone
       end
     end
-    alias_method :cleared, :clear
     # clone handled by initialize_copy
@@ -3880,10 +4131,13 @@ module Polars
       _df.get_columns.map { |s| Utils.wrap_s(s) }
     end
-    # Get a single column as Series by name.
+    # Get a single column by name.
     #
     # @param name [String]
     #   Name of the column to retrieve.
+    # @param default [Object]
+    #   Value to return if the column does not exist; if not explicitly set and
+    #   the column is not present a `ColumnNotFoundError` exception is raised.
     #
     # @return [Series]
     #
@@ -3898,8 +4152,22 @@ module Polars
     #   #         2
     #   #         3
     #   # ]
-    def get_column(name)
-      self[name]
+    #
+    # @example
+    #   df.get_column("baz", default: Polars::Series.new("baz", ["?", "?", "?"]))
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'baz' [str]
+    #   # [
+    #   #         "?"
+    #   #         "?"
+    #   #         "?"
+    #   # ]
+    def get_column(name, default: NO_DEFAULT)
+      Utils.wrap_s(_df.get_column(name.to_s))
+    rescue ColumnNotFoundError
+      raise if default.eql?(NO_DEFAULT)
+      default
     end
     # Fill null values using the specified value or strategy.
@@ -3985,14 +4253,14 @@ module Polars
       _from_rbdf(
         lazy
           .fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
-          .collect(no_optimization: true)
+          .collect(optimizations: QueryOptFlags._eager)
           ._df
       )
     end
     # Fill floating point NaN values by an Expression evaluation.
     #
-    # @param fill_value [Object]
+    # @param value [Object]
     #   Value to fill NaN with.
     #
     # @return [DataFrame]
@@ -4021,14 +4289,16 @@ module Polars
     #   # │ 99.0 ┆ 99.0 │
     #   # │ 4.0  ┆ 13.0 │
     #   # └──────┴──────┘
-    def fill_nan(fill_value)
-      lazy.fill_nan(fill_value).collect(no_optimization: true)
+    def fill_nan(value)
+      lazy.fill_nan(value).collect(optimizations: QueryOptFlags._eager)
     end
     # Explode `DataFrame` to long format by exploding a column with Lists.
     #
     # @param columns [Object]
     #   Column of LargeList type.
+    # @param more_columns [Array]
+    #   Additional names of columns to explode, specified as positional arguments.
     #
     # @return [DataFrame]
     #
@@ -4056,8 +4326,8 @@ module Polars
     #   # │ c       ┆ 7       │
     #   # │ c       ┆ 8       │
     #   # └─────────┴─────────┘
-    def explode(columns)
-      lazy.explode(columns).collect(no_optimization: true)
+    def explode(columns, *more_columns)
+      lazy.explode(columns, *more_columns).collect(optimizations: QueryOptFlags._eager)
     end
     # Create a spreadsheet-style pivot table as a DataFrame.
@@ -4202,13 +4472,12 @@ module Polars
     #   # │ y   ┆ c        ┆ 4     │
     #   # │ z   ┆ c        ┆ 6     │
     #   # └─────┴──────────┴───────┘
-    def unpivot(on, index: nil, variable_name: nil, value_name: nil)
+    def unpivot(on = nil, index: nil, variable_name: nil, value_name: nil)
       on = on.nil? ? [] : Utils._expand_selectors(self, on)
       index = index.nil? ? [] : Utils._expand_selectors(self, index)
       _from_rbdf(_df.unpivot(on, index, value_name, variable_name))
     end
-    alias_method :melt, :unpivot
     # Unstack a long table to a wide form without doing an aggregation.
     #
@@ -4313,7 +4582,7 @@ module Polars
       if how == "horizontal"
         df = (
-          df.with_column(
+          df.with_columns(
             (Polars.arange(0, n_cols * n_rows, eager: true) % n_cols).alias(
               "__sort_order"
             )
@@ -4336,8 +4605,10 @@ module Polars
     # Split into multiple DataFrames partitioned by groups.
     #
-    # @param groups [Object]
+    # @param by [Object]
     #   Groups to partition by.
+    # @param more_by [Array]
+    #   Additional names of columns to group by, specified as positional arguments.
     # @param maintain_order [Boolean]
     #   Keep predictable output order. This is slower as it requires an extra sort
     #   operation.
@@ -4387,7 +4658,7 @@ module Polars
     # @example
     #   df.partition_by("foo", maintain_order: true, as_dict: true)
     #   # =>
-    #   # {"A"=>shape: (2, 3)
+    #   # {["A"]=>shape: (2, 3)
     #   # ┌─────┬─────┬─────┐
     #   # │ foo ┆ N   ┆ bar │
     #   # │ --- ┆ --- ┆ --- │
@@ -4395,7 +4666,7 @@ module Polars
     #   # ╞═════╪═════╪═════╡
     #   # │ A   ┆ 1   ┆ k   │
     #   # │ A   ┆ 2   ┆ l   │
-    #   # └─────┴─────┴─────┘, "B"=>shape: (2, 3)
+    #   # └─────┴─────┴─────┘, ["B"]=>shape: (2, 3)
     #   # ┌─────┬─────┬─────┐
     #   # │ foo ┆ N   ┆ bar │
     #   # │ --- ┆ --- ┆ --- │
@@ -4403,7 +4674,7 @@ module Polars
     #   # ╞═════╪═════╪═════╡
     #   # │ B   ┆ 2   ┆ m   │
     #   # │ B   ┆ 4   ┆ m   │
-    #   # └─────┴─────┴─────┘, "C"=>shape: (1, 3)
+    #   # └─────┴─────┴─────┘, ["C"]=>shape: (1, 3)
     #   # ┌─────┬─────┬─────┐
     #   # │ foo ┆ N   ┆ bar │
     #   # │ --- ┆ --- ┆ --- │
@@ -4411,30 +4682,26 @@ module Polars
     #   # ╞═════╪═════╪═════╡
     #   # │ C   ┆ 2   ┆ l   │
     #   # └─────┴─────┴─────┘}
-    def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
-      if groups.is_a?(::String)
-        groups = [groups]
-      elsif !groups.is_a?(::Array)
-        groups = Array(groups)
-      end
+    def partition_by(by, *more_by, maintain_order: true, include_key: true, as_dict: false)
+      by_parsed = Utils._expand_selectors(self, by, *more_by)
+      partitions = _df.partition_by(by_parsed, maintain_order, include_key).map { |df| _from_rbdf(df) }
       if as_dict
-        out = {}
-        if groups.length == 1
-          _df.partition_by(groups, maintain_order, include_key).each do |df|
-            df = _from_rbdf(df)
-            out[df[groups][0, 0]] = df
-          end
+        if include_key
+          names = partitions.map { |p| p.select(by_parsed).row(0) }
         else
-          _df.partition_by(groups, maintain_order, include_key).each do |df|
-            df = _from_rbdf(df)
-            out[df[groups].row(0)] = df
+          if !maintain_order
+            msg = "cannot use `partition_by` with `maintain_order: false, include_key: false, as_dict: true`"
+            raise ArgumentError, msg
           end
+          names = select(by_parsed).unique(maintain_order: true).rows
         end
-        out
-      else
-        _df.partition_by(groups, maintain_order, include_key).map { |df| _from_rbdf(df) }
+        return names.zip(partitions).to_h
       end
+      partitions
     end
     # Shift values by the given period.
@@ -4480,41 +4747,8 @@ module Polars
     #   # │ 3    ┆ 8    ┆ c    │
     #   # │ null ┆ null ┆ null │
     #   # └──────┴──────┴──────┘
-    def shift(n, fill_value: nil)
-      lazy.shift(n, fill_value: fill_value).collect(_eager: true)
-    end
-    # Shift the values by a given period and fill the resulting null values.
-    #
-    # @param periods [Integer]
-    #   Number of places to shift (may be negative).
-    # @param fill_value [Object]
-    #   fill nil values with this value.
-    #
-    # @return [DataFrame]
-    #
-    # @example
-    #   df = Polars::DataFrame.new(
-    #     {
-    #       "foo" => [1, 2, 3],
-    #       "bar" => [6, 7, 8],
-    #       "ham" => ["a", "b", "c"]
-    #     }
-    #   )
-    #   df.shift_and_fill(1, 0)
-    #   # =>
-    #   # shape: (3, 3)
-    #   # ┌─────┬─────┬─────┐
-    #   # │ foo ┆ bar ┆ ham │
-    #   # │ --- ┆ --- ┆ --- │
-    #   # │ i64 ┆ i64 ┆ str │
-    #   # ╞═════╪═════╪═════╡
-    #   # │ 0   ┆ 0   ┆ 0   │
-    #   # │ 1   ┆ 6   ┆ a   │
-    #   # │ 2   ┆ 7   ┆ b   │
-    #   # └─────┴─────┴─────┘
-    def shift_and_fill(periods, fill_value)
-      shift(periods, fill_value: fill_value)
+    def shift(n = 1, fill_value: nil)
+      lazy.shift(n, fill_value: fill_value).collect(optimizations: QueryOptFlags._eager)
     end
     # Get a mask of all duplicated rows in this DataFrame.
@@ -4570,6 +4804,16 @@ module Polars
     # Start a lazy query from this point.
     #
     # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [nil, 2, 3, 4],
+    #       "b" => [0.5, nil, 2.5, 13],
+    #       "c" => [true, true, false, nil]
+    #     }
+    #   )
+    #   df.lazy
     def lazy
       wrap_ldf(_df.lazy)
     end
@@ -4663,7 +4907,7 @@ module Polars
     #   # │ 10      │
     #   # └─────────┘
     def select(*exprs, **named_exprs)
-      lazy.select(*exprs, **named_exprs).collect(_eager: true)
+      lazy.select(*exprs, **named_exprs).collect(optimizations: QueryOptFlags._eager)
     end
     # Select columns from this DataFrame.
@@ -4683,7 +4927,7 @@ module Polars
     def select_seq(*exprs, **named_exprs)
       lazy
       .select_seq(*exprs, **named_exprs)
-      .collect(_eager: true)
+      .collect(optimizations: QueryOptFlags._eager)
     end
     # Add columns to this DataFrame.
@@ -4795,7 +5039,7 @@ module Polars
     #   # │ 4   ┆ 13.0 ┆ true  ┆ 52.0 ┆ false │
     #   # └─────┴──────┴───────┴──────┴───────┘
     def with_columns(*exprs, **named_exprs)
-      lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
+      lazy.with_columns(*exprs, **named_exprs).collect(optimizations: QueryOptFlags._eager)
     end
     # Add columns to this DataFrame.
@@ -4820,7 +5064,7 @@ module Polars
     )
       lazy
       .with_columns_seq(*exprs, **named_exprs)
-      .collect(_eager: true)
+      .collect(optimizations: QueryOptFlags._eager)
     end
     # Get number of chunks used by the ChunkedArrays of this DataFrame.
@@ -4876,7 +5120,7 @@ module Polars
     #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
     def max
-      lazy.max.collect(_eager: true)
+      lazy.max.collect(optimizations: QueryOptFlags._eager)
     end
     # Get the maximum value horizontally across columns.
@@ -4926,7 +5170,7 @@ module Polars
     #   # │ 1   ┆ 6   ┆ a   │
     #   # └─────┴─────┴─────┘
     def min
-      lazy.min.collect(_eager: true)
+      lazy.min.collect(optimizations: QueryOptFlags._eager)
     end
     # Get the minimum value horizontally across columns.
@@ -4976,7 +5220,7 @@ module Polars
     #   # │ 6   ┆ 21  ┆ null │
     #   # └─────┴─────┴──────┘
     def sum
-      lazy.sum.collect(_eager: true)
+      lazy.sum.collect(optimizations: QueryOptFlags._eager)
     end
     # Sum all values horizontally across columns.
@@ -5032,7 +5276,7 @@ module Polars
     #   # │ 2.0 ┆ 7.0 ┆ null │
     #   # └─────┴─────┴──────┘
     def mean
-      lazy.mean.collect(_eager: true)
+      lazy.mean.collect(optimizations: QueryOptFlags._eager)
     end
     # Take the mean of all values horizontally across columns.
@@ -5103,7 +5347,7 @@ module Polars
     #   # │ 0.816497 ┆ 0.816497 ┆ null │
     #   # └──────────┴──────────┴──────┘
     def std(ddof: 1)
-      lazy.std(ddof: ddof).collect(_eager: true)
+      lazy.std(ddof: ddof).collect(optimizations: QueryOptFlags._eager)
     end
     # Aggregate the columns of this DataFrame to their variance value.
@@ -5144,7 +5388,7 @@ module Polars
     #   # │ 0.666667 ┆ 0.666667 ┆ null │
     #   # └──────────┴──────────┴──────┘
     def var(ddof: 1)
-      lazy.var(ddof: ddof).collect(_eager: true)
+      lazy.var(ddof: ddof).collect(optimizations: QueryOptFlags._eager)
     end
     # Aggregate the columns of this DataFrame to their median value.
@@ -5170,7 +5414,7 @@ module Polars
     #   # │ 2.0 ┆ 7.0 ┆ null │
     #   # └─────┴─────┴──────┘
     def median
-      lazy.median.collect(_eager: true)
+      lazy.median.collect(optimizations: QueryOptFlags._eager)
     end
     # Aggregate the columns of this DataFrame to their product values.
@@ -5227,7 +5471,7 @@ module Polars
     #   # │ 2.0 ┆ 7.0 ┆ null │
     #   # └─────┴─────┴──────┘
     def quantile(quantile, interpolation: "nearest")
-      lazy.quantile(quantile, interpolation: interpolation).collect(_eager: true)
+      lazy.quantile(quantile, interpolation: interpolation).collect(optimizations: QueryOptFlags._eager)
     end
     # Get one hot encoded dummy variables.
@@ -5294,7 +5538,7 @@ module Polars
     #       "c" => [true, true, true, false, true, true]
     #     }
     #   )
-    #   df.unique
+    #   df.unique(maintain_order: true)
     #   # =>
     #   # shape: (5, 3)
     #   # ┌─────┬─────┬───────┐
@@ -5308,11 +5552,11 @@ module Polars
     #   # │ 4   ┆ 3.0 ┆ true  │
     #   # │ 5   ┆ 3.0 ┆ true  │
     #   # └─────┴─────┴───────┘
-    def unique(maintain_order: true, subset: nil, keep: "first")
+    def unique(maintain_order: false, subset: nil, keep: "any")
       self._from_rbdf(
         lazy
           .unique(maintain_order: maintain_order, subset: subset, keep: keep)
-          .collect(no_optimization: true)
+          .collect(optimizations: QueryOptFlags._eager)
           ._df
       )
     end
@@ -5405,9 +5649,9 @@ module Polars
     # Sample from this DataFrame.
     #
     # @param n [Integer]
-    #   Number of items to return. Cannot be used with `frac`. Defaults to 1 if
-    #   `frac` is nil.
-    # @param frac [Float]
+    #   Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
+    #   `fraction` is nil.
+    # @param fraction [Float]
     #   Fraction of items to return. Cannot be used with `n`.
     # @param with_replacement [Boolean]
     #   Allow values to be sampled more than once.
@@ -5440,20 +5684,20 @@ module Polars
     #   # └─────┴─────┴─────┘
     def sample(
       n: nil,
-      frac: nil,
+      fraction: nil,
       with_replacement: false,
       shuffle: false,
       seed: nil
     )
-      if !n.nil? && !frac.nil?
-        raise ArgumentError, "cannot specify both `n` and `frac`"
+      if !n.nil? && !fraction.nil?
+        raise ArgumentError, "cannot specify both `n` and `fraction`"
       end
-      if n.nil? && !frac.nil?
-        frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
+      if n.nil? && !fraction.nil?
+        fraction = Series.new("fraction", [fraction]) unless fraction.is_a?(Series)
         return _from_rbdf(
-          _df.sample_frac(frac._s, with_replacement, shuffle, seed)
+          _df.sample_frac(fraction._s, with_replacement, shuffle, seed)
         )
       end
@@ -5725,7 +5969,7 @@ module Polars
       if include_key
         values = self
       else
-        data_cols = schema.keys - key
+        data_cols = schema.names - key
         values = select(data_cols)
       end
@@ -5768,7 +6012,7 @@ module Polars
     # @example
     #   df.iter_rows(named: true).map { |row| row["b"] }
     #   # => [2, 4, 6]
-    def iter_rows(named: false, buffer_size: 500, &block)
+    def iter_rows(named: false, buffer_size: 512, &block)
       return to_enum(:iter_rows, named: named, buffer_size: buffer_size) unless block_given?
       # load into the local namespace for a modest performance boost in the hot loops
@@ -5939,11 +6183,10 @@ module Polars
     def gather_every(n, offset = 0)
       select(F.col("*").gather_every(n, offset))
     end
-    alias_method :take_every, :gather_every
     # Hash and combine the rows in this DataFrame.
     #
-    # The hash value is of type `:u64`.
+    # The hash value is of type `UInt64`.
     #
     # @param seed [Integer]
     #   Random seed parameter. Defaults to 0.
@@ -6050,7 +6293,7 @@ module Polars
     #   #         {4,"four"}
     #   #         {5,"five"}
     #   # ]
-    def to_struct(name)
+    def to_struct(name = "")
       Utils.wrap_s(_df.to_struct(name))
     end
@@ -6092,7 +6335,7 @@ module Polars
     #   # │ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
     #   # └────────┴─────┴─────┴──────┴───────────┴───────┘
     def unnest(columns, *more_columns, separator: nil)
-      lazy.unnest(columns, *more_columns, separator: separator).collect(_eager: true)
+      lazy.unnest(columns, *more_columns, separator: separator).collect(optimizations: QueryOptFlags._eager)
     end
     # Requires NumPy
@@ -6138,7 +6381,7 @@ module Polars
     #   # │ elise  ┆ 44  │
     #   # └────────┴─────┘
     def merge_sorted(other, key)
-      lazy.merge_sorted(other.lazy, key).collect(_eager: true)
+      lazy.merge_sorted(other.lazy, key).collect(optimizations: QueryOptFlags._eager)
     end
     # Flag a column as sorted.
@@ -6160,7 +6403,7 @@ module Polars
     )
       lazy
         .set_sorted(column, descending: descending)
-        .collect(no_optimization: true)
+        .collect(optimizations: QueryOptFlags._eager)
     end
     # Update the values in this `DataFrame` with the values in `other`.
@@ -6291,7 +6534,7 @@ module Polars
         include_nulls: include_nulls,
         maintain_order: maintain_order
       )
-      .collect(_eager: true)
+      .collect(optimizations: QueryOptFlags._eager)
     end
     private
@@ -6357,282 +6600,6 @@ module Polars
       raise ArgumentError, "Unsupported idxs datatype."
     end
-    # @private
-    def self.expand_hash_scalars(data, schema_overrides: nil, strict: true, order: nil, nan_to_null: false)
-      updated_data = {}
-      unless data.empty?
-        dtypes = schema_overrides || {}
-        array_len = data.values.map { |val| Utils.arrlen(val) || 0 }.max
-        if array_len > 0
-          data.each do |name, val|
-            dtype = dtypes[name]
-            if val.is_a?(Hash) && dtype != Struct
-              updated_data[name] = DataFrame.new(val, strict: strict).to_struct(name)
-            elsif !Utils.arrlen(val).nil?
-              updated_data[name] = Series.new(::String.new(name), val, dtype: dtype, strict: strict)
-            elsif val.nil? || [Integer, Float, TrueClass, FalseClass, ::String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
-              dtype = Polars::Float64 if val.nil? && dtype.nil?
-              updated_data[name] = Series.new(::String.new(name), [val], dtype: dtype, strict: strict).extend_constant(val, array_len - 1)
-            else
-              raise Todo
-            end
-          end
-        elsif data.values.all? { |val| Utils.arrlen(val) == 0 }
-          data.each do |name, val|
-            updated_data[name] = Series.new(name, val, dtype: dtypes[name], strict: strict)
-          end
-        elsif data.values.all? { |val| Utils.arrlen(val).nil? }
-          data.each do |name, val|
-            updated_data[name] = Series.new(name, [val], dtype: dtypes[name], strict: strict)
-          end
-        end
-      end
-      updated_data
-    end
-    # @private
-    def self.hash_to_rbdf(data, schema: nil, schema_overrides: nil, strict: true, nan_to_null: nil)
-      if schema.is_a?(Hash) && !data.empty?
-        if !data.all? { |col, _| schema[col] }
-          raise ArgumentError, "The given column-schema names do not match the data dictionary"
-        end
-        data = schema.to_h { |col| [col, data[col]] }
-      end
-      column_names, schema_overrides = _unpack_schema(
-        schema, lookup_names: data.keys, schema_overrides: schema_overrides
-      )
-      if column_names.empty?
-        column_names = data.keys
-      end
-      if data.empty? && !schema_overrides.empty?
-        data_series = column_names.map { |name| Series.new(name, [], dtype: schema_overrides[name], strict: strict, nan_to_null: nan_to_null)._s }
-      else
-        data_series = expand_hash_scalars(data, schema_overrides: schema_overrides, strict: strict, nan_to_null: nan_to_null).values.map(&:_s)
-      end
-      data_series = _handle_columns_arg(data_series, columns: column_names, from_hash: true)
-      RbDataFrame.new(data_series)
-    end
-    # @private
-    def self.include_unknowns(schema, cols)
-      cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
-    end
-    # @private
-    def self._unpack_schema(schema, schema_overrides: nil, n_expected: nil, lookup_names: nil, include_overrides_in_columns: false)
-      if schema.is_a?(Hash)
-        schema = schema.to_a
-      end
-      column_names =
-        (schema || []).map.with_index do |col, i|
-          if col.is_a?(::String)
-            col || "column_#{i}"
-          else
-            col[0]
-          end
-        end
-      if column_names.empty? && n_expected
-        column_names = n_expected.times.map { |i| "column_#{i}" }
-      end
-      # TODO zip_longest
-      lookup = column_names.zip(lookup_names || []).to_h
-      column_dtypes =
-        (schema || []).select { |col| !col.is_a?(::String) && col[1] }.to_h do |col|
-          [lookup[col[0]] || col[0], col[1]]
-        end
-      if schema_overrides && schema_overrides.any?
-        column_dtypes.merge!(schema_overrides)
-      end
-      column_dtypes.each do |col, dtype|
-        if !Utils.is_polars_dtype(dtype, include_unknown: true) && !dtype.nil?
-          column_dtypes[col] = Utils.rb_type_to_dtype(dtype)
-        end
-      end
-      [column_names, column_dtypes]
-    end
-    def self._handle_columns_arg(data, columns: nil, from_hash: false)
-      if columns.nil? || columns.empty?
-        data
-      else
-        if data.empty?
-          columns.map { |c| Series.new(c, nil)._s }
-        elsif data.length == columns.length
-          if from_hash
-            series_map = data.to_h { |s| [s.name, s] }
-            if columns.all? { |col| series_map.key?(col) }
-              return columns.map { |col| series_map[col] }
-            end
-          end
-          columns.each_with_index do |c, i|
-            # not in-place?
-            data[i].rename(c)
-          end
-          data
-        else
-          raise ArgumentError, "Dimensions of columns arg must match data dimensions."
-        end
-      end
-    end
-    def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil, strict: true)
-      rbdf_columns = rbdf.columns
-      rbdf_dtypes = rbdf.dtypes
-      columns, dtypes = _unpack_schema(
-        (columns || rbdf_columns), schema_overrides: schema_overrides
-      )
-      column_subset = []
-      if columns != rbdf_columns
-        if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
-          column_subset = columns
-        else
-          rbdf.set_column_names(columns)
-        end
-      end
-      column_casts = []
-      columns.each_with_index do |col, i|
-        if dtypes[col] == Categorical # != rbdf_dtypes[i]
-          column_casts << Polars.col(col).cast(Categorical, strict: strict)._rbexpr
-        elsif structs&.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
-          column_casts << Polars.col(col).cast(structs[col], strict: strict)._rbexpr
-        elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
-          column_casts << Polars.col(col).cast(dtypes[col], strict: strict)._rbexpr
-        end
-      end
-      if column_casts.any? || column_subset.any?
-        rbdf = rbdf.lazy
-        if column_casts.any?
-          rbdf = rbdf.with_columns(column_casts)
-        end
-        if column_subset.any?
-          rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
-        end
-        rbdf = rbdf.collect
-      end
-      rbdf
-    end
-    # @private
-    def self.sequence_to_rbdf(data, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 50)
-      columns = schema
-      if data.length == 0
-        return hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides, strict: strict)
-      end
-      if data[0].is_a?(Series)
-        # series_names = data.map(&:name)
-        # columns, dtypes = _unpack_schema(columns || series_names, n_expected: data.length)
-        data_series = []
-        data.each do |s|
-          data_series << s._s
-        end
-      elsif data[0].is_a?(Hash)
-        column_names, dtypes = _unpack_schema(columns)
-        schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
-        rbdf = RbDataFrame.from_hashes(data, schema, schema_overrides, strict, infer_schema_length)
-        if column_names
-          rbdf = _post_apply_columns(rbdf, column_names)
-        end
-        return rbdf
-      elsif data[0].is_a?(::Array)
-        first_element = data[0]
-        if orient.nil? && !columns.nil?
-          row_types = first_element.filter_map { |value| value.class }.uniq
-          if row_types.include?(Integer) && row_types.include?(Float)
-            row_types.delete(Integer)
-          end
-          orient = row_types.length == 1 ? "col" : "row"
-        end
-        if orient == "row"
-          column_names, schema_overrides = _unpack_schema(
-            schema, schema_overrides: schema_overrides, n_expected: first_element.length
-          )
-          local_schema_override = (
-            schema_overrides.any? ? _include_unknowns(schema_overrides, column_names) : {}
-          )
-          if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
-            raise ArgumentError, "the row data does not match the number of columns"
-          end
-          unpack_nested = false
-          local_schema_override.each do |col, tp|
-            if [Categorical, Enum].include?(tp)
-              local_schema_override[col] = String
-            elsif !unpack_nested && [Unknown, Struct].include?(tp.base_type)
-              raise Todo
-            end
-          end
-          if unpack_nested
-            raise Todo
-          else
-            rbdf = RbDataFrame.from_rows(
-              data,
-              infer_schema_length,
-              local_schema_override.any? ? local_schema_override : nil
-            )
-          end
-          if column_names.any? || schema_overrides.any?
-            rbdf = _post_apply_columns(
-              rbdf, column_names, schema_overrides: schema_overrides, strict: strict
-            )
-          end
-          return rbdf
-        elsif orient == "col" || orient.nil?
-          column_names, schema_overrides = _unpack_schema(
-            schema, schema_overrides: schema_overrides, n_expected: data.length
-          )
-          data_series =
-            data.map.with_index do |element, i|
-              Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]], strict: strict)._s
-            end
-          return RbDataFrame.new(data_series)
-        else
-          raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
-        end
-      end
-      data_series = _handle_columns_arg(data_series, columns: columns)
-      RbDataFrame.new(data_series)
-    end
-    # @private
-    def self._include_unknowns(schema, cols)
-      cols.to_h { |col| [col, schema[col] || Unknown] }
-    end
-    # @private
-    def self.series_to_rbdf(data, schema: nil, schema_overrides: nil, strict: true)
-      data_series = [data._s]
-      series_name = data_series.map(&:name)
-      column_names, schema_overrides = _unpack_schema(
-        schema || series_name, schema_overrides: schema_overrides, n_expected: 1
-      )
-      if schema_overrides.any?
-        new_dtype = schema_overrides.values[0]
-        if new_dtype != data.dtype
-          data_series[0] = data_series[0].cast(new_dtype, strict)
-        end
-      end
-      data_series = _handle_columns_arg(data_series, columns: column_names)
-      RbDataFrame.new(data_series)
-    end
     def wrap_ldf(ldf)
       LazyFrame._from_rbldf(ldf)
     end
@@ -6641,6 +6608,11 @@ module Polars
       self.class._from_rbdf(rb_df)
     end
+    def _replace(column, new_column)
+      self._df.replace(column, new_column._s)
+      self
+    end
     def _comp(other, op)
       if other.is_a?(DataFrame)
         _compare_to_other_df(other, op)
@@ -6658,7 +6630,7 @@ module Polars
       end
       suffix = "__POLARS_CMP_OTHER"
-      other_renamed = other.select(Polars.all.suffix(suffix))
+      other_renamed = other.select(Polars.all.name.suffix(suffix))
       combined = Polars.concat([self, other_renamed], how: "horizontal")
       expr = case op
@@ -6726,5 +6698,268 @@ module Polars
         yield
       end
     end
+    def get_series_item_by_key(s, key)
+      if key.is_a?(Integer)
+        return s._s.get_index_signed(key)
+      elsif key.is_a?(Range)
+          return _select_elements_by_slice(s, key)
+      elsif key.is_a?(::Array)
+        if key.empty?
+          return s.clear
+        end
+        first = key[0]
+        if Utils.bool?(first)
+          _raise_on_boolean_mask
+        end
+        begin
+          indices = Series.new("", key, dtype: Int64)
+        rescue TypeError
+          msg = "cannot select elements using Sequence with elements of type #{first.class.name.inspect}"
+          raise TypeError, msg
+        end
+        indices = _convert_series_to_indices(indices, s.len)
+        return _select_elements_by_index(s, indices)
+      elsif key.is_a?(Series)
+        indices = _convert_series_to_indices(key, s.len)
+        return _select_elements_by_index(s, indices)
+      end
+      msg = "cannot select elements using key of type #{key.class.name.inspect}: #{key.inspect}"
+      raise TypeError, msg
+    end
+    def _select_elements_by_slice(s, key)
+      Slice.new(s).apply(key)
+    end
+    def _select_elements_by_index(s, key)
+      s.send(:_from_rbseries, s._s.gather_with_series(key._s))
+    end
+    def get_df_item_by_key(df, key)
+      if key.size == 2
+        row_key, col_key = key
+        # Support df[True, False] and df["a", "b"] as these are not ambiguous
+        if Utils.bool?(row_key) || Utils.strlike?(row_key)
+          return _select_columns(df, key)
+        end
+        selection = _select_columns(df, col_key)
+        if selection.is_empty
+          return selection
+        elsif selection.is_a?(Series)
+          return get_series_item_by_key(selection, row_key)
+        else
+          return _select_rows(selection, row_key)
+        end
+      end
+      key = key[0] if key.size == 1
+      # Single string input, e.g. df["a"]
+      if Utils.strlike?(key)
+        # This case is required because empty strings are otherwise treated
+        # as an empty Sequence in `_select_rows`
+        return df.get_column(key)
+      end
+      # Single input - df[1] - or multiple inputs - df["a", "b", "c"]
+      begin
+         _select_rows(df, key)
+      rescue TypeError
+        _select_columns(df, key)
+      end
+    end
+    def _select_columns(df, key)
+      if key.is_a?(Integer)
+        return df.to_series(key)
+      elsif Utils.strlike?(key)
+        return df.get_column(key)
+      elsif key.is_a?(Range)
+        start, stop = key.begin, key.end
+        if start.is_a?(::String)
+          start = df.get_column_index(start)
+          stop = df.get_column_index(stop)
+          rng = Range.new(start, stop, key.exclude_end?)
+          return _select_columns_by_index(df, rng)
+        else
+          return _select_columns_by_index(df, key)
+        end
+      elsif key.is_a?(::Array)
+        if key.empty?
+          return df.class.new
+        end
+        first = key[0]
+        if Utils.bool?(first)
+          return _select_columns_by_mask(df, key)
+        elsif first.is_a?(Integer)
+          return _select_columns_by_index(df, key)
+        elsif Utils.strlike?(first)
+          return _select_columns_by_name(df, key)
+        else
+          msg = "cannot select columns using Sequence with elements of type #{first.class.name.inspect}"
+          raise TypeError, msg
+        end
+      elsif key.is_a?(Series)
+        if key.is_empty
+          return df.class.new
+        end
+        dtype = key.dtype
+        if dtype == String
+          return _select_columns_by_name(df, key)
+        elsif dtype.integer?
+          return _select_columns_by_index(df, key)
+        elsif dtype == Boolean
+          return _select_columns_by_mask(df, key)
+        else
+          msg = "cannot select columns using Series of type #{dtype}"
+          raise TypeError, msg
+        end
+      end
+      msg = (
+        "cannot select columns using key of type #{key.class.name.inspect}: #{key.inspect}"
+      )
+      raise TypeError, msg
+    end
+    def _select_columns_by_index(df, key)
+      series = key.map { |i| df.to_series(i) }
+      df.class.new(series)
+    end
+    def _select_columns_by_name(df, key)
+      df.send(:_from_rbdf, df._df.select(Array(key)))
+    end
+    def _select_columns_by_mask(df, key)
+      if key.length != df.width
+        msg = "expected #{df.width} values when selecting columns by boolean mask, got #{key.length}"
+        raise ArgumentError, msg
+      end
+      indices = key.each_with_index.filter_map { |val, i| i if val }
+      _select_columns_by_index(df, indices)
+    end
+    def _select_rows(df, key)
+      if key.is_a?(Integer)
+        num_rows = df.height
+        if key >= num_rows || key < -num_rows
+          msg = "index #{key} is out of bounds for DataFrame of height #{num_rows}"
+          raise IndexError, msg
+        end
+        return df.slice(key, 1)
+      end
+      if key.is_a?(Range)
+        return _select_rows_by_slice(df, key)
+      elsif key.is_a?(::Array)
+        if key.empty?
+          return df.clear
+        end
+        if Utils.bool?(key[0])
+          _raise_on_boolean_mask
+        end
+        s = Series.new("", key, dtype: Int64)
+        indices = _convert_series_to_indices(s, df.height)
+        return _select_rows_by_index(df, indices)
+      elsif key.is_a?(Series)
+        indices = _convert_series_to_indices(key, df.height)
+        return _select_rows_by_index(df, indices)
+      else
+        msg = "cannot select rows using key of type #{key.class.name.inspect}: #{key.inspect}"
+        raise TypeError, msg
+      end
+    end
+    def _select_rows_by_slice(df, key)
+      return Slice.new(df).apply(key)
+    end
+    def _select_rows_by_index(df, key)
+      df.send(:_from_rbdf, df._df.gather_with_series(key._s))
+    end
+    def _convert_series_to_indices(s, size)
+      idx_type = Plr.get_index_type
+      if s.dtype == idx_type
+        return s
+      end
+      if !s.dtype.integer?
+        if s.dtype == Boolean
+          _raise_on_boolean_mask
+        else
+          msg = "cannot treat Series of type #{s.dtype} as indices"
+          raise TypeError, msg
+        end
+      end
+      if s.len == 0
+        return Series.new(s.name, [], dtype: idx_type)
+      end
+      if idx_type == UInt32
+        if [Int64, UInt64].include?(s.dtype) && s.max >= Utils::U32_MAX
+          msg = "index positions should be smaller than 2^32"
+          raise ArgumentError, msg
+        end
+        if s.dtype == Int64 && s.min < -Utils::U32_MAX
+          msg = "index positions should be greater than or equal to -2^32"
+          raise ArgumentError, msg
+        end
+      end
+      if s.dtype.signed_integer?
+        if s.min < 0
+          if idx_type == UInt32
+            idxs = [Int8, Int16].include?(s.dtype) ? s.cast(Int32) : s
+          else
+            idxs = [Int8, Int16, Int32].include?(s.dtype) ? s.cast(Int64) : s
+          end
+          # Update negative indexes to absolute indexes.
+          return (
+            idxs.to_frame
+            .select(
+              F.when(F.col(idxs.name) < 0)
+              .then(size + F.col(idxs.name))
+              .otherwise(F.col(idxs.name))
+              .cast(idx_type)
+            )
+            .to_series(0)
+          )
+        end
+      end
+      s.cast(idx_type)
+    end
+    def _raise_on_boolean_mask
+      msg = (
+        "selecting rows by passing a boolean mask to `[]` is not supported" +
+        "\n\nHint: Use the `filter` method instead."
+      )
+      raise TypeError, msg
+    end
   end
 end