RubyGems - polars-df - Versions diffs - 0.1.2 → 0.1.3 - Mend

polars-df 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/.yardopts +3 -0
data/CHANGELOG.md +4 -0
data/Cargo.lock +2 -1
data/README.md +1 -1
data/ext/polars/Cargo.toml +7 -1
data/ext/polars/src/conversion.rs +35 -2
data/ext/polars/src/dataframe.rs +228 -11
data/ext/polars/src/lazy/dataframe.rs +3 -3
data/ext/polars/src/lazy/dsl.rs +59 -2
data/ext/polars/src/lib.rs +151 -10
data/ext/polars/src/series.rs +182 -29
data/ext/polars/src/set.rs +91 -0
data/ext/polars/src/utils.rs +19 -0
data/lib/polars/batched_csv_reader.rb +1 -0
data/lib/polars/cat_expr.rb +39 -0
data/lib/polars/data_frame.rb +2284 -137
data/lib/polars/date_time_expr.rb +1282 -7
data/lib/polars/exceptions.rb +20 -0
data/lib/polars/expr.rb +612 -7
data/lib/polars/expr_dispatch.rb +14 -0
data/lib/polars/functions.rb +219 -0
data/lib/polars/group_by.rb +517 -0
data/lib/polars/io.rb +421 -2
data/lib/polars/lazy_frame.rb +1261 -67
data/lib/polars/lazy_functions.rb +288 -10
data/lib/polars/lazy_group_by.rb +79 -0
data/lib/polars/list_expr.rb +5 -0
data/lib/polars/meta_expr.rb +21 -0
data/lib/polars/series.rb +1476 -212
data/lib/polars/slice.rb +104 -0
data/lib/polars/string_expr.rb +663 -2
data/lib/polars/struct_expr.rb +73 -0
data/lib/polars/utils.rb +43 -3
data/lib/polars/version.rb +2 -1
data/lib/polars/when.rb +1 -0
data/lib/polars/when_then.rb +1 -0
data/lib/polars.rb +7 -10
metadata +9 -2

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -155,12 +155,35 @@ module Polars
     end
     # @private
-    def self._read_parquet(file)
+    def self._read_parquet(
+      file,
+      columns: nil,
+      n_rows: nil,
+      parallel: "auto",
+      row_count_name: nil,
+      row_count_offset: 0,
+      low_memory: false
+    )
       if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
         file = Utils.format_path(file)
       end
-      _from_rbdf(RbDataFrame.read_parquet(file))
+      if file.is_a?(String) && file.include?("*")
+        raise Todo
+      end
+      projection, columns = Utils.handle_projection_columns(columns)
+      _from_rbdf(
+        RbDataFrame.read_parquet(
+          file,
+          columns,
+          projection,
+          n_rows,
+          parallel,
+          Utils._prepare_row_count_args(row_count_name, row_count_offset),
+          low_memory
+        )
+      )
     end
     # def self._read_avro
@@ -259,11 +282,13 @@ module Polars
     # @return [Array]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.columns
     #   # => ["foo", "bar", "ham"]
     def columns
@@ -279,11 +304,13 @@ module Polars
     # @return [Object]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.columns = ["apple", "banana", "orange"]
     #   df
     #   # =>
@@ -308,11 +335,13 @@ module Polars
     # @return [Array]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6.0, 7.0, 8.0],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.dtypes
     #   # => [:i64, :f64, :str]
     def dtypes
@@ -324,56 +353,132 @@ module Polars
     # @return [Hash]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6.0, 7.0, 8.0],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.schema
     #   # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
     def schema
       columns.zip(dtypes).to_h
     end
-    # def ==(other)
-    # end
+    # Equal.
+    #
+    # @return [DataFrame]
+    def ==(other)
+      _comp(other, "eq")
+    end
-    # def !=(other)
-    # end
+    # Not equal.
+    #
+    # @return [DataFrame]
+    def !=(other)
+      _comp(other, "neq")
+    end
-    # def >(other)
-    # end
+    # Greater than.
+    #
+    # @return [DataFrame]
+    def >(other)
+      _comp(other, "gt")
+    end
-    # def <(other)
-    # end
+    # Less than.
+    #
+    # @return [DataFrame]
+    def <(other)
+      _comp(other, "lt")
+    end
-    # def >=(other)
-    # end
+    # Greater than or equal.
+    #
+    # @return [DataFrame]
+    def >=(other)
+      _comp(other, "gt_eq")
+    end
-    # def <=(other)
-    # end
+    # Less than or equal.
+    #
+    # @return [DataFrame]
+    def <=(other)
+      _comp(other, "lt_eq")
+    end
-    # def *(other)
-    # end
+    # Performs multiplication.
+    #
+    # @return [DataFrame]
+    def *(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.mul_df(other._df))
+      end
-    # def /(other)
-    # end
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.mul(other._s))
+    end
-    # def +(other)
-    # end
+    # Performs division.
+    #
+    # @return [DataFrame]
+    def /(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.div_df(other._df))
+      end
-    # def -(other)
-    # end
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.div(other._s))
+    end
-    # def %(other)
-    # end
+    # Performs addition.
+    #
+    # @return [DataFrame]
+    def +(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.add_df(other._df))
+      end
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.add(other._s))
+    end
+    # Performs subtraction.
+    #
+    # @return [DataFrame]
+    def -(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.sub_df(other._df))
+      end
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.sub(other._s))
+    end
+    # Returns the modulo.
+    #
+    # @return [DataFrame]
+    def %(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.rem_df(other._df))
+      end
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.rem(other._s))
+    end
+    # Returns a string representing the DataFrame.
     #
+    # @return [String]
     def to_s
       _df.to_s
     end
     alias_method :inspect, :to_s
+    # Check if DataFrame includes column.
+    #
+    # @return [Boolean]
     def include?(name)
       columns.include?(name)
     end
@@ -387,9 +492,78 @@ module Polars
     # def _pos_idxs
     # end
+    # Returns subset of the DataFrame.
     #
-    def [](name)
-      Utils.wrap_s(_df.column(name))
+    # @return [Object]
+    def [](*args)
+      if args.size == 2
+        row_selection, col_selection = args
+        # df[.., unknown]
+        if row_selection.is_a?(Range)
+          # multiple slices
+          # df[.., ..]
+          if col_selection.is_a?(Range)
+            raise Todo
+          end
+        end
+        # df[2, ..] (select row as df)
+        if row_selection.is_a?(Integer)
+          if col_selection.is_a?(Array)
+            df = self[0.., col_selection]
+            return df.slice(row_selection, 1)
+          end
+          # df[2, "a"]
+          if col_selection.is_a?(String)
+            return self[col_selection][row_selection]
+          end
+        end
+        # column selection can be "a" and ["a", "b"]
+        if col_selection.is_a?(String)
+          col_selection = [col_selection]
+        end
+        # df[.., 1]
+        if col_selection.is_a?(Integer)
+          series = to_series(col_selection)
+          return series[row_selection]
+        end
+        if col_selection.is_a?(Array)
+          # df[.., [1, 2]]
+          if is_int_sequence(col_selection)
+            series_list = col_selection.map { |i| to_series(i) }
+            df = self.class.new(series_list)
+            return df[row_selection]
+          end
+        end
+        df = self[col_selection]
+        return df[row_selection]
+      elsif args.size == 1
+        item = args[0]
+        # select single column
+        # df["foo"]
+        if item.is_a?(String)
+          return Utils.wrap_s(_df.column(item))
+        end
+        # df[idx]
+        if item.is_a?(Integer)
+          return slice(_pos_idx(item, dim: 0), 1)
+        end
+        # df[..]
+        if item.is_a?(Range)
+          return Slice.new(self).apply(item)
+        end
+      end
+      raise ArgumentError, "Cannot get item of type: #{item.class.name}"
     end
     # def []=(key, value)
@@ -397,7 +571,9 @@ module Polars
     # no to_arrow
+    # Convert DataFrame to a hash mapping column name to values.
     #
+    # @return [Hash]
     def to_h(as_series: true)
       if as_series
         get_columns.to_h { |s| [s.name, s] }
@@ -422,11 +598,13 @@ module Polars
     # @return [Series]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.to_series(1)
     #   # =>
     #   # shape: (3,)
@@ -519,11 +697,13 @@ module Polars
     # @return [String, nil]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3, 4, 5],
-    #     "bar" => [6, 7, 8, 9, 10],
-    #     "ham" => ["a", "b", "c", "d", "e"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
     #   df.write_csv("file.csv")
     def write_csv(
       file = nil,
@@ -694,10 +874,12 @@ module Polars
     # @return [DataFrame]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "key" => ["a", "b", "c"],
-    #     "val" => [1, 2, 3]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "key" => ["a", "b", "c"],
+    #       "val" => [1, 2, 3]
+    #     }
+    #   )
     #   df.reverse()
     #   # =>
     #   # shape: (3, 2)
@@ -724,11 +906,13 @@ module Polars
     # @return [DataFrame]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.rename({"foo" => "apple"})
     #   # =>
     #   # shape: (3, 3)
@@ -775,11 +959,13 @@ module Polars
     #   # └─────┴─────┴─────┘
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "a" => [1, 2, 3, 4],
-    #     "b" => [0.5, 4, 10, 13],
-    #     "c" => [true, true, false, true]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4],
+    #       "b" => [0.5, 4, 10, 13],
+    #       "c" => [true, true, false, true]
+    #     }
+    #   )
     #   s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
     #   df.insert_at_idx(3, s)
     #   # =>
@@ -805,63 +991,560 @@ module Polars
       self
     end
+    # Filter the rows in the DataFrame based on a predicate expression.
+    #
+    # @param predicate [Expr]
+    #   Expression that evaluates to a boolean Series.
+    #
+    # @return [DataFrame]
+    #
+    # @example Filter on one condition:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.filter(Polars.col("foo") < 3)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example Filter on multiple conditions:
+    #   df.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a"))
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # └─────┴─────┴─────┘
     def filter(predicate)
       lazy.filter(predicate).collect
     end
-    # def describe
-    # end
+    # Summary statistics for a DataFrame.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1.0, 2.8, 3.0],
+    #       "b" => [4, 5, nil],
+    #       "c" => [true, false, true],
+    #       "d" => [nil, "b", "c"],
+    #       "e" => ["usd", "eur", nil]
+    #     }
+    #   )
+    #   df.describe
+    #   # =>
+    #   # shape: (7, 6)
+    #   # ┌────────────┬──────────┬──────────┬──────┬──────┬──────┐
+    #   # │ describe   ┆ a        ┆ b        ┆ c    ┆ d    ┆ e    │
+    #   # │ ---        ┆ ---      ┆ ---      ┆ ---  ┆ ---  ┆ ---  │
+    #   # │ str        ┆ f64      ┆ f64      ┆ f64  ┆ str  ┆ str  │
+    #   # ╞════════════╪══════════╪══════════╪══════╪══════╪══════╡
+    #   # │ count      ┆ 3.0      ┆ 3.0      ┆ 3.0  ┆ 3    ┆ 3    │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ null_count ┆ 0.0      ┆ 1.0      ┆ 0.0  ┆ 1    ┆ 1    │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ mean       ┆ 2.266667 ┆ 4.5      ┆ null ┆ null ┆ null │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ std        ┆ 1.101514 ┆ 0.707107 ┆ null ┆ null ┆ null │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ min        ┆ 1.0      ┆ 4.0      ┆ 0.0  ┆ b    ┆ eur  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ max        ┆ 3.0      ┆ 5.0      ┆ 1.0  ┆ c    ┆ usd  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ median     ┆ 2.8      ┆ 4.5      ┆ null ┆ null ┆ null │
+    #   # └────────────┴──────────┴──────────┴──────┴──────┴──────┘
+    def describe
+      describe_cast = lambda do |stat|
+        columns = []
+        self.columns.each_with_index do |s, i|
+          if self[s].is_numeric || self[s].is_boolean
+            columns << stat[0.., i].cast(:f64)
+          else
+            # for dates, strings, etc, we cast to string so that all
+            # statistics can be shown
+            columns << stat[0.., i].cast(:str)
+          end
+        end
+        self.class.new(columns)
+      end
-    # def find_idx_by_name
-    # end
+      summary = _from_rbdf(
+        Polars.concat(
+          [
+            describe_cast.(
+              self.class.new(columns.to_h { |c| [c, [height]] })
+            ),
+            describe_cast.(null_count),
+            describe_cast.(mean),
+            describe_cast.(std),
+            describe_cast.(min),
+            describe_cast.(max),
+            describe_cast.(median)
+          ]
+        )._df
+      )
+      summary.insert_at_idx(
+        0,
+        Polars::Series.new(
+          "describe",
+          ["count", "null_count", "mean", "std", "min", "max", "median"],
+        )
+      )
+      summary
+    end
-    # def replace_at_idx
-    # end
+    # Find the index of a column by name.
+    #
+    # @param name [String]
+    #   Name of the column to find.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
+    #   )
+    #   df.find_idx_by_name("ham")
+    #   # => 2
+    def find_idx_by_name(name)
+      _df.find_idx_by_name(name)
+    end
+    # Replace a column at an index location.
+    #
+    # @param index [Integer]
+    #   Column index.
+    # @param series [Series]
+    #   Series that will replace the column.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   s = Polars::Series.new("apple", [10, 20, 30])
+    #   df.replace_at_idx(0, s)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌───────┬─────┬─────┐
+    #   # │ apple ┆ bar ┆ ham │
+    #   # │ ---   ┆ --- ┆ --- │
+    #   # │ i64   ┆ i64 ┆ str │
+    #   # ╞═══════╪═════╪═════╡
+    #   # │ 10    ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 20    ┆ 7   ┆ b   │
+    #   # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 30    ┆ 8   ┆ c   │
+    #   # └───────┴─────┴─────┘
+    def replace_at_idx(index, series)
+      if index < 0
+        index = columns.length + index
+      end
+      _df.replace_at_idx(index, series._s)
+      self
+    end
+    # Sort the DataFrame by column.
+    #
+    # @param by [String]
+    #   By which column to sort.
+    # @param reverse [Boolean]
+    #   Reverse/descending sort.
+    # @param nulls_last [Boolean]
+    #   Place null values last. Can only be used if sorted by a single column.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.sort("foo", reverse: true)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # └─────┴─────┴─────┘
     #
+    # @example Sort by multiple columns.
+    #   df.sort(
+    #     [Polars.col("foo"), Polars.col("bar")**2],
+    #     reverse: [true, false]
+    #   )
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # └─────┴─────┴─────┘
     def sort(by, reverse: false, nulls_last: false)
-      _from_rbdf(_df.sort(by, reverse, nulls_last))
+      if by.is_a?(Array) || by.is_a?(Expr)
+        lazy
+          .sort(by, reverse: reverse, nulls_last: nulls_last)
+          .collect(no_optimization: true, string_cache: false)
+      else
+        _from_rbdf(_df.sort(by, reverse, nulls_last))
+      end
     end
+    # Check if DataFrame is equal to other.
+    #
+    # @param other [DataFrame]
+    #   DataFrame to compare with.
+    # @param null_equal [Boolean]
+    #   Consider null values as equal.
+    #
+    # @return [Boolean]
+    #
+    # @example
+    #   df1 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df2 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [3, 2, 1],
+    #       "bar" => [8.0, 7.0, 6.0],
+    #       "ham" => ["c", "b", "a"]
+    #     }
+    #   )
+    #   df1.frame_equal(df1)
+    #   # => true
+    #   df1.frame_equal(df2)
+    #   # => false
     def frame_equal(other, null_equal: true)
       _df.frame_equal(other._df, null_equal)
     end
-    # def replace
-    # end
+    # Replace a column by a new Series.
     #
-    def slice(offset, length = nil)
-      if !length.nil? && length < 0
-        length = height - offset + length
-      end
-      _from_rbdf(_df.slice(offset, length))
+    # @param column [String]
+    #   Column to replace.
+    # @param new_col [Series]
+    #   New column to insert.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   s = Polars::Series.new([10, 20, 30])
+    #   df.replace("foo", s)
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 10  ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 20  ┆ 5   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 30  ┆ 6   │
+    #   # └─────┴─────┘
+    def replace(column, new_col)
+      _df.replace(column, new_col._s)
+      self
     end
+    # Get a slice of this DataFrame.
+    #
+    # @param offset [Integer]
+    #   Start index. Negative indexing is supported.
+    # @param length [Integer, nil]
+    #   Length of the slice. If set to `nil`, all rows starting at the offset
+    #   will be selected.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.slice(1, 2)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # └─────┴─────┴─────┘
+    def slice(offset, length = nil)
+      if !length.nil? && length < 0
+        length = height - offset + length
+      end
+      _from_rbdf(_df.slice(offset, length))
+    end
+    # Get the first `n` rows.
+    #
+    # Alias for {#head}.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {"foo" => [1, 2, 3, 4, 5, 6], "bar" => ["a", "b", "c", "d", "e", "f"]}
+    #   )
+    #   df.limit(4)
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ str │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ d   │
+    #   # └─────┴─────┘
     def limit(n = 5)
       head(n)
     end
+    # Get the first `n` rows.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
+    #   df.head(3)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # └─────┴─────┴─────┘
     def head(n = 5)
       _from_rbdf(_df.head(n))
     end
+    # Get the last `n` rows.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
+    #   df.tail(3)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 9   ┆ d   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 5   ┆ 10  ┆ e   │
+    #   # └─────┴─────┴─────┘
     def tail(n = 5)
       _from_rbdf(_df.tail(n))
     end
-    # def drop_nulls
-    # end
+    # Return a new DataFrame where the null values are dropped.
+    #
+    # @param subset [Object]
+    #   Subset of column(s) on which `drop_nulls` will be applied.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, nil, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.drop_nulls
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # └─────┴─────┴─────┘
+    def drop_nulls(subset: nil)
+      if subset.is_a?(String)
+        subset = [subset]
+      end
+      _from_rbdf(_df.drop_nulls(subset))
+    end
     # def pipe
     # end
-    # def with_row_count
-    # end
+    # Add a column at index 0 that counts the rows.
+    #
+    # @param name [String]
+    #   Name of the column to add.
+    # @param offset [Integer]
+    #   Start the row count at this offset.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   )
+    #   df.with_row_count
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌────────┬─────┬─────┐
+    #   # │ row_nr ┆ a   ┆ b   │
+    #   # │ ---    ┆ --- ┆ --- │
+    #   # │ u32    ┆ i64 ┆ i64 │
+    #   # ╞════════╪═════╪═════╡
+    #   # │ 0      ┆ 1   ┆ 2   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1      ┆ 3   ┆ 4   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2      ┆ 5   ┆ 6   │
+    #   # └────────┴─────┴─────┘
+    def with_row_count(name: "row_nr", offset: 0)
+      _from_rbdf(_df.with_row_count(name, offset))
+    end
+    # Start a groupby operation.
     #
+    # @param by [Object]
+    #   Column(s) to group by.
+    # @param maintain_order [Boolean]
+    #   Make sure that the order of the groups remain consistent. This is more
+    #   expensive than a default groupby. Note that this only works in expression
+    #   aggregations.
+    #
+    # @return [GroupBy]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [1, 2, 3, 4, 5, 6],
+    #       "c" => [6, 5, 4, 3, 2, 1]
+    #     }
+    #   )
+    #   df.groupby("a").agg(Polars.col("b").sum).sort("a")
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ a   ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ b   ┆ 11  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ c   ┆ 6   │
+    #   # └─────┴─────┘
     def groupby(by, maintain_order: false)
-      lazy.groupby(by, maintain_order: maintain_order)
+      if !Utils.bool?(maintain_order)
+        raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
+      end
+      if by.is_a?(String)
+        by = [by]
+      end
+      GroupBy.new(
+        _df,
+        by,
+        self.class,
+        maintain_order: maintain_order
+      )
     end
     # def groupby_rolling
@@ -876,7 +1559,109 @@ module Polars
     # def join_asof
     # end
+    # Join in SQL-like fashion.
+    #
+    # @param other [DataFrame]
+    #   DataFrame to join with.
+    # @param left_on [Object]
+    #   Name(s) of the left join column(s).
+    # @param right_on [Object]
+    #   Name(s) of the right join column(s).
+    # @param on [Object]
+    #   Name(s) of the join columns in both DataFrames.
+    # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
+    #   Join strategy.
+    # @param suffix [String]
+    #   Suffix to append to columns with a duplicate name.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   other_df = Polars::DataFrame.new(
+    #     {
+    #       "apple" => ["x", "y", "z"],
+    #       "ham" => ["a", "b", "d"]
+    #     }
+    #   )
+    #   df.join(other_df, on: "ham")
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ str ┆ str   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6.0 ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   ┆ y     │
+    #   # └─────┴─────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "outer")
+    #   # =>
+    #   # shape: (4, 4)
+    #   # ┌──────┬──────┬─────┬───────┐
+    #   # │ foo  ┆ bar  ┆ ham ┆ apple │
+    #   # │ ---  ┆ ---  ┆ --- ┆ ---   │
+    #   # │ i64  ┆ f64  ┆ str ┆ str   │
+    #   # ╞══════╪══════╪═════╪═══════╡
+    #   # │ 1    ┆ 6.0  ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2    ┆ 7.0  ┆ b   ┆ y     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ null ┆ null ┆ d   ┆ z     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3    ┆ 8.0  ┆ c   ┆ null  │
+    #   # └──────┴──────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "left")
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ str ┆ str   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6.0 ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   ┆ y     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 8.0 ┆ c   ┆ null  │
+    #   # └─────┴─────┴─────┴───────┘
     #
+    # @example
+    #   df.join(other_df, on: "ham", how: "semi")
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "anti")
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # └─────┴─────┴─────┘
     def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
       lazy
         .join(
@@ -893,41 +1678,322 @@ module Polars
     # def apply
     # end
+    # Return a new DataFrame with the column added or replaced.
+    #
+    # @param column [Object]
+    #   Series, where the name of the Series refers to the column in the DataFrame.
+    #
+    # @return [DataFrame]
     #
+    # @example Added
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   )
+    #   df.with_column((Polars.col("b") ** 2).alias("b_squared"))
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬───────────┐
+    #   # │ a   ┆ b   ┆ b_squared │
+    #   # │ --- ┆ --- ┆ ---       │
+    #   # │ i64 ┆ i64 ┆ f64       │
+    #   # ╞═════╪═════╪═══════════╡
+    #   # │ 1   ┆ 2   ┆ 4.0       │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 4   ┆ 16.0      │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 5   ┆ 6   ┆ 36.0      │
+    #   # └─────┴─────┴───────────┘
+    #
+    # @example Replaced
+    #   df.with_column(Polars.col("a") ** 2)
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌──────┬─────┐
+    #   # │ a    ┆ b   │
+    #   # │ ---  ┆ --- │
+    #   # │ f64  ┆ i64 │
+    #   # ╞══════╪═════╡
+    #   # │ 1.0  ┆ 2   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 9.0  ┆ 4   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 25.0 ┆ 6   │
+    #   # └──────┴─────┘
     def with_column(column)
       lazy
         .with_column(column)
         .collect(no_optimization: true, string_cache: false)
     end
-    # def hstack
-    # end
+    # Return a new DataFrame grown horizontally by stacking multiple Series to it.
+    #
+    # @param columns [Object]
+    #   Series to stack.
+    # @param in_place [Boolean]
+    #   Modify in place.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   x = Polars::Series.new("apple", [10, 20, 30])
+    #   df.hstack([x])
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ i64 ┆ str ┆ i64   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6   ┆ a   ┆ 10    │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   ┆ 20    │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   ┆ 30    │
+    #   # └─────┴─────┴─────┴───────┘
+    def hstack(columns, in_place: false)
+      if !columns.is_a?(Array)
+        columns = columns.get_columns
+      end
+      if in_place
+        _df.hstack_mut(columns.map(&:_s))
+        self
+      else
+        _from_rbdf(_df.hstack(columns.map(&:_s)))
+      end
+    end
-    # def vstack
-    # end
+    # Grow this DataFrame vertically by stacking a DataFrame to it.
+    #
+    # @param df [DataFrame]
+    #   DataFrame to stack.
+    # @param in_place [Boolean]
+    #   Modify in place
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df1 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2],
+    #       "bar" => [6, 7],
+    #       "ham" => ["a", "b"]
+    #     }
+    #   )
+    #   df2 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [3, 4],
+    #       "bar" => [8, 9],
+    #       "ham" => ["c", "d"]
+    #     }
+    #   )
+    #   df1.vstack(df2)
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 9   ┆ d   │
+    #   # └─────┴─────┴─────┘
+    def vstack(df, in_place: false)
+      if in_place
+        _df.vstack_mut(df._df)
+        self
+      else
+        _from_rbdf(_df.vstack(df._df))
+      end
+    end
+    # Extend the memory backed by this `DataFrame` with the values from `other`.
+    #
+    # Different from `vstack` which adds the chunks from `other` to the chunks of this
+    # `DataFrame` `extend` appends the data from `other` to the underlying memory
+    # locations and thus may cause a reallocation.
+    #
+    # If this does not cause a reallocation, the resulting data structure will not
+    # have any extra chunks and thus will yield faster queries.
+    #
+    # Prefer `extend` over `vstack` when you want to do a query after a single append.
+    # For instance during online operations where you add `n` rows and rerun a query.
+    #
+    # Prefer `vstack` over `extend` when you want to append many times before doing a
+    # query. For instance when you read in multiple files and when to store them in a
+    # single `DataFrame`. In the latter case, finish the sequence of `vstack`
+    # operations with a `rechunk`.
+    #
+    # @param other [DataFrame]
+    #   DataFrame to vertically add.
+    #
+    # @return [DataFrame]
     #
+    # @example
+    #   df1 = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   df2 = Polars::DataFrame.new({"foo" => [10, 20, 30], "bar" => [40, 50, 60]})
+    #   df1.extend(df2)
+    #   # =>
+    #   # shape: (6, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 5   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 10  ┆ 40  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 20  ┆ 50  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 30  ┆ 60  │
+    #   # └─────┴─────┘
     def extend(other)
       _df.extend(other._df)
       self
     end
-    # def drop
-    # end
+    # Remove column from DataFrame and return as new.
+    #
+    # @param columns [Object]
+    #   Column(s) to drop.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.drop("ham")
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ f64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 6.0 │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8.0 │
+    #   # └─────┴─────┘
+    def drop(columns)
+      if columns.is_a?(Array)
+        df = clone
+        columns.each do |n|
+          df._df.drop_in_place(n)
+        end
+        df
+      else
+        _from_rbdf(_df.drop(columns))
+      end
+    end
-    # def drop_in_place
-    # end
+    # Drop in place.
+    #
+    # @param name [Object]
+    #   Column to drop.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.drop_in_place("ham")
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'ham' [str]
+    #   # [
+    #   #         "a"
+    #   #         "b"
+    #   #         "c"
+    #   # ]
+    def drop_in_place(name)
+      Utils.wrap_s(_df.drop_in_place(name))
+    end
-    # def cleared
-    # end
+    # Create an empty copy of the current DataFrame.
+    #
+    # Returns a DataFrame with identical schema but no data.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [nil, 2, 3, 4],
+    #       "b" => [0.5, nil, 2.5, 13],
+    #       "c" => [true, true, false, nil]
+    #     }
+    #   )
+    #   df.cleared
+    #   # =>
+    #   # shape: (0, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ a   ┆ b   ┆ c    │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ f64 ┆ bool │
+    #   # ╞═════╪═════╪══════╡
+    #   # └─────┴─────┴──────┘
+    def cleared
+      height > 0 ? head(0) : clone
+    end
     # clone handled by initialize_copy
+    # Get the DataFrame as a Array of Series.
     #
+    # @return [Array]
     def get_columns
       _df.get_columns.map { |s| Utils.wrap_s(s) }
     end
+    # Get a single column as Series by name.
+    #
+    # @param name [String]
+    #   Name of the column to retrieve.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   df.get_column("foo")
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'foo' [i64]
+    #   # [
+    #   #         1
+    #   #         2
+    #   #         3
+    #   # ]
     def get_column(name)
       self[name]
     end
@@ -935,13 +2001,85 @@ module Polars
     # def fill_null
     # end
+    # Fill floating point NaN values by an Expression evaluation.
+    #
+    # @param fill_value [Object]
+    #   Value to fill NaN with.
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   Note that floating point NaNs (Not a Number) are not missing values!
+    #   To replace missing values, use `fill_null`.
     #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1.5, 2, Float::NAN, 4],
+    #       "b" => [0.5, 4, Float::NAN, 13]
+    #     }
+    #   )
+    #   df.fill_nan(99)
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌──────┬──────┐
+    #   # │ a    ┆ b    │
+    #   # │ ---  ┆ ---  │
+    #   # │ f64  ┆ f64  │
+    #   # ╞══════╪══════╡
+    #   # │ 1.5  ┆ 0.5  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2.0  ┆ 4.0  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 99.0 ┆ 99.0 │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 4.0  ┆ 13.0 │
+    #   # └──────┴──────┘
     def fill_nan(fill_value)
       lazy.fill_nan(fill_value).collect(no_optimization: true)
     end
-    # def explode
-    # end
+    # Explode `DataFrame` to long format by exploding a column with Lists.
+    #
+    # @param columns [Object]
+    #   Column of LargeList type.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "letters" => ["a", "a", "b", "c"],
+    #       "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]]
+    #     }
+    #   )
+    #   df.explode("numbers")
+    #   # =>
+    #   # shape: (8, 2)
+    #   # ┌─────────┬─────────┐
+    #   # │ letters ┆ numbers │
+    #   # │ ---     ┆ ---     │
+    #   # │ str     ┆ i64     │
+    #   # ╞═════════╪═════════╡
+    #   # │ a       ┆ 1       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ a       ┆ 2       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ a       ┆ 3       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ b       ┆ 4       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ b       ┆ 5       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 6       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 7       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 8       │
+    #   # └─────────┴─────────┘
+    def explode(columns)
+      lazy.explode(columns).collect(no_optimization: true)
+    end
     # def pivot
     # end
@@ -955,25 +2093,242 @@ module Polars
     # def partition_by
     # end
-    # def shift
-    # end
-    # def shift_and_fill
-    # end
+    # Shift values by the given period.
+    #
+    # @param periods [Integer]
+    #   Number of places to shift (may be negative).
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.shift(1)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 1    ┆ 6    ┆ a    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2    ┆ 7    ┆ b    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   df.shift(-1)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 2    ┆ 7    ┆ b    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 3    ┆ 8    ┆ c    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ null ┆ null ┆ null │
+    #   # └──────┴──────┴──────┘
+    def shift(periods)
+      _from_rbdf(_df.shift(periods))
+    end
+    # Shift the values by a given period and fill the resulting null values.
+    #
+    # @param periods [Integer]
+    #   Number of places to shift (may be negative).
+    # @param fill_value [Object]
+    #   fill nil values with this value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.shift_and_fill(1, 0)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 0   ┆ 0   ┆ 0   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # └─────┴─────┴─────┘
+    def shift_and_fill(periods, fill_value)
+      lazy
+        .shift_and_fill(periods, fill_value)
+        .collect(no_optimization: true, string_cache: false)
+    end
+    # Get a mask of all duplicated rows in this DataFrame.
+    #
+    # @return [Series]
     #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 1],
+    #       "b" => ["x", "y", "z", "x"],
+    #     }
+    #   )
+    #   df.is_duplicated
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [bool]
+    #   # [
+    #   #         true
+    #   #         false
+    #   #         false
+    #   #         true
+    #   # ]
     def is_duplicated
       Utils.wrap_s(_df.is_duplicated)
     end
+    # Get a mask of all unique rows in this DataFrame.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 1],
+    #       "b" => ["x", "y", "z", "x"]
+    #     }
+    #   )
+    #   df.is_unique
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [bool]
+    #   # [
+    #   #         false
+    #   #         true
+    #   #         true
+    #   #         false
+    #   # ]
     def is_unique
       Utils.wrap_s(_df.is_unique)
     end
+    # Start a lazy query from this point.
+    #
+    # @return [LazyFrame]
     def lazy
       wrap_ldf(_df.lazy)
     end
+    # Select columns from this DataFrame.
+    #
+    # @param exprs [Object]
+    #   Column or columns to select.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.select("foo")
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 1   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 2   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 3   │
+    #   # └─────┘
+    #
+    # @example
+    #   df.select(["foo", "bar"])
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df.select(Polars.col("foo") + 1)
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 2   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 3   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 4   │
+    #   # └─────┘
+    #
+    # @example
+    #   df.select([Polars.col("foo") + 1, Polars.col("bar") + 1])
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 2   ┆ 7   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 9   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0))
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────────┐
+    #   # │ literal │
+    #   # │ ---     │
+    #   # │ i64     │
+    #   # ╞═════════╡
+    #   # │ 0       │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 0       │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 10      │
+    #   # └─────────┘
     def select(exprs)
       _from_rbdf(
         lazy
@@ -983,6 +2338,43 @@ module Polars
       )
     end
+    # Add or overwrite multiple columns in a DataFrame.
+    #
+    # @param exprs [Array]
+    #   Array of Expressions that evaluate to columns.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4],
+    #       "b" => [0.5, 4, 10, 13],
+    #       "c" => [true, true, false, true]
+    #     }
+    #   )
+    #   df.with_columns(
+    #     [
+    #       (Polars.col("a") ** 2).alias("a^2"),
+    #       (Polars.col("b") / 2).alias("b/2"),
+    #       (Polars.col("c").is_not()).alias("not c")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (4, 6)
+    #   # ┌─────┬──────┬───────┬──────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
+    #   # └─────┴──────┴───────┴──────┴──────┴───────┘
     def with_columns(exprs)
       if !exprs.nil? && !exprs.is_a?(Array)
         exprs = [exprs]
@@ -992,6 +2384,26 @@ module Polars
         .collect(no_optimization: true, string_cache: false)
     end
+    # Get number of chunks used by the ChunkedArrays of this DataFrame.
+    #
+    # @param strategy ["first", "all"]
+    #   Return the number of chunks of the 'first' column,
+    #   or 'all' columns in this DataFrame.
+    #
+    # @return [Object]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4],
+    #       "b" => [0.5, 4, 10, 13],
+    #       "c" => [true, true, false, true]
+    #     }
+    #   )
+    #   df.n_chunks
+    #   # => 1
+    #   df.n_chunks(strategy: "all")
+    #   # => [1, 1, 1]
     def n_chunks(strategy: "first")
       if strategy == "first"
         _df.n_chunks
@@ -1002,6 +2414,28 @@ module Polars
       end
     end
+    # Aggregate the columns of this DataFrame to their maximum value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.max
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # └─────┴─────┴─────┘
     def max(axis: 0)
       if axis == 0
         _from_rbdf(_df.max)
@@ -1012,6 +2446,28 @@ module Polars
       end
     end
+    # Aggregate the columns of this DataFrame to their minimum value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.min
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # └─────┴─────┴─────┘
     def min(axis: 0)
       if axis == 0
         _from_rbdf(_df.min)
@@ -1022,6 +2478,44 @@ module Polars
       end
     end
+    # Aggregate the columns of this DataFrame to their sum value.
+    #
+    # @param axis [Integer]
+    #   Either 0 or 1.
+    # @param null_strategy ["ignore", "propagate"]
+    #   This argument is only used if axis == 1.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"],
+    #     }
+    #   )
+    #   df.sum
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ i64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 6   ┆ 21  ┆ null │
+    #   # └─────┴─────┴──────┘
+    #
+    # @example
+    #   df.sum(axis: 1)
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'foo' [str]
+    #   # [
+    #   #         "16a"
+    #   #         "27b"
+    #   #         "38c"
+    #   # ]
     def sum(axis: 0, null_strategy: "ignore")
       case axis
       when 0
@@ -1033,6 +2527,33 @@ module Polars
       end
     end
+    # Aggregate the columns of this DataFrame to their mean value.
+    #
+    # @param axis [Integer]
+    #   Either 0 or 1.
+    # @param null_strategy ["ignore", "propagate"]
+    #   This argument is only used if axis == 1.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.mean
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 2.0 ┆ 7.0 ┆ null │
+    #   # └─────┴─────┴──────┘
     def mean(axis: 0, null_strategy: "ignore")
       case axis
       when 0
@@ -1044,77 +2565,633 @@ module Polars
       end
     end
+    # Aggregate the columns of this DataFrame to their standard deviation value.
+    #
+    # @param ddof [Integer]
+    #   Degrees of freedom
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.std
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 1.0 ┆ 1.0 ┆ null │
+    #   # └─────┴─────┴──────┘
+    #
+    # @example
+    #   df.std(ddof: 0)
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────────┬──────────┬──────┐
+    #   # │ foo      ┆ bar      ┆ ham  │
+    #   # │ ---      ┆ ---      ┆ ---  │
+    #   # │ f64      ┆ f64      ┆ str  │
+    #   # ╞══════════╪══════════╪══════╡
+    #   # │ 0.816497 ┆ 0.816497 ┆ null │
+    #   # └──────────┴──────────┴──────┘
     def std(ddof: 1)
       _from_rbdf(_df.std(ddof))
     end
+    # Aggregate the columns of this DataFrame to their variance value.
+    #
+    # @param ddof [Integer]
+    #   Degrees of freedom
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.var
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 1.0 ┆ 1.0 ┆ null │
+    #   # └─────┴─────┴──────┘
+    #
+    # @example
+    #   df.var(ddof: 0)
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────────┬──────────┬──────┐
+    #   # │ foo      ┆ bar      ┆ ham  │
+    #   # │ ---      ┆ ---      ┆ ---  │
+    #   # │ f64      ┆ f64      ┆ str  │
+    #   # ╞══════════╪══════════╪══════╡
+    #   # │ 0.666667 ┆ 0.666667 ┆ null │
+    #   # └──────────┴──────────┴──────┘
     def var(ddof: 1)
       _from_rbdf(_df.var(ddof))
     end
+    # Aggregate the columns of this DataFrame to their median value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.median
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 2.0 ┆ 7.0 ┆ null │
+    #   # └─────┴─────┴──────┘
     def median
       _from_rbdf(_df.median)
     end
-    # def product
-    # end
+    # Aggregate the columns of this DataFrame to their product values.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3],
+    #       "b" => [0.5, 4, 10],
+    #       "c" => [true, true, false]
+    #     }
+    #   )
+    #   df.product
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬──────┬─────┐
+    #   # │ a   ┆ b    ┆ c   │
+    #   # │ --- ┆ ---  ┆ --- │
+    #   # │ i64 ┆ f64  ┆ i64 │
+    #   # ╞═════╪══════╪═════╡
+    #   # │ 6   ┆ 20.0 ┆ 0   │
+    #   # └─────┴──────┴─────┘
+    def product
+      select(Polars.all.product)
+    end
+    # Aggregate the columns of this DataFrame to their quantile value.
+    #
+    # @param quantile [Float]
+    #   Quantile between 0.0 and 1.0.
+    # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
+    #   Interpolation method.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.quantile(0.5, interpolation: "nearest")
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 2.0 ┆ 7.0 ┆ null │
+    #   # └─────┴─────┴──────┘
+    def quantile(quantile, interpolation: "nearest")
+      _from_rbdf(_df.quantile(quantile, interpolation))
+    end
+    # Get one hot encoded dummy variables.
+    #
+    # @param columns
+    #   A subset of columns to convert to dummy variables. `nil` means
+    #   "all columns".
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2],
+    #       "bar" => [3, 4],
+    #       "ham" => ["a", "b"]
+    #     }
+    #   )
+    #   df.to_dummies
+    #   # =>
+    #   # shape: (2, 6)
+    #   # ┌───────┬───────┬───────┬───────┬───────┬───────┐
+    #   # │ foo_1 ┆ foo_2 ┆ bar_3 ┆ bar_4 ┆ ham_a ┆ ham_b │
+    #   # │ ---   ┆ ---   ┆ ---   ┆ ---   ┆ ---   ┆ ---   │
+    #   # │ u8    ┆ u8    ┆ u8    ┆ u8    ┆ u8    ┆ u8    │
+    #   # ╞═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡
+    #   # │ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     │
+    #   # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     │
+    #   # └───────┴───────┴───────┴───────┴───────┴───────┘
+    def to_dummies(columns: nil)
+      if columns.is_a?(String)
+        columns = [columns]
+      end
+      _from_rbdf(_df.to_dummies(columns))
+    end
-    # def quantile(quantile, interpolation: "nearest")
-    # end
+    # Drop duplicate rows from this DataFrame.
+    #
+    # @param maintain_order [Boolean]
+    #   Keep the same order as the original DataFrame. This requires more work to
+    #   compute.
+    # @param subset [Object]
+    #   Subset to use to compare rows.
+    # @param keep ["first", "last"]
+    #   Which of the duplicate rows to keep (in conjunction with `subset`).
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   Note that this fails if there is a column of type `List` in the DataFrame or
+    #   subset.
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 1, 2, 3, 4, 5],
+    #       "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
+    #       "c" => [true, true, true, false, true, true]
+    #     }
+    #   )
+    #   df.unique
+    #   # =>
+    #   # shape: (5, 3)
+    #   # ┌─────┬─────┬───────┐
+    #   # │ a   ┆ b   ┆ c     │
+    #   # │ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ bool  │
+    #   # ╞═════╪═════╪═══════╡
+    #   # │ 1   ┆ 0.5 ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 1.0 ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 2.0 ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 4   ┆ 3.0 ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 5   ┆ 3.0 ┆ true  │
+    #   # └─────┴─────┴───────┘
+    def unique(maintain_order: true, subset: nil, keep: "first")
+      if !subset.nil?
+        if subset.is_a?(String)
+          subset = [subset]
+        elsif !subset.is_a?(Array)
+          subset = subset.to_a
+        end
+      end
-    # def to_dummies
-    # end
+      _from_rbdf(_df.unique(maintain_order, subset, keep))
+    end
-    # def unique
-    # end
+    # Return the number of unique rows, or the number of unique row-subsets.
+    #
+    # @param subset [Object]
+    #   One or more columns/expressions that define what to count;
+    #   omit to return the count of unique rows.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 1, 2, 3, 4, 5],
+    #       "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
+    #       "c" => [true, true, true, false, true, true]
+    #     }
+    #   )
+    #   df.n_unique
+    #   # => 5
+    #
+    # @example Simple columns subset
+    #   df.n_unique(subset: ["b", "c"])
+    #   # => 4
+    #
+    # @example Expression subset
+    #   df.n_unique(
+    #     subset: [
+    #       (Polars.col("a").floordiv(2)),
+    #       (Polars.col("c") | (Polars.col("b") >= 2))
+    #     ]
+    #   )
+    #   # => 3
+    def n_unique(subset: nil)
+      if subset.is_a?(StringIO)
+        subset = [Polars.col(subset)]
+      elsif subset.is_a?(Expr)
+        subset = [subset]
+      end
-    # def n_unique
-    # end
+      if subset.is_a?(Array) && subset.length == 1
+        expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
+      else
+        struct_fields = subset.nil? ? Polars.all : subset
+        expr = Polars.struct(struct_fields)
+      end
+      df = lazy.select(expr.n_unique).collect
+      df.is_empty ? 0 : df.row(0)[0]
+    end
+    # Rechunk the data in this DataFrame to a contiguous allocation.
+    # This will make sure all subsequent operations have optimal and predictable
+    # performance.
     #
+    # @return [DataFrame]
     def rechunk
       _from_rbdf(_df.rechunk)
     end
+    # Create a new DataFrame that shows the null counts per column.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, nil, 3],
+    #       "bar" => [6, 7, nil],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.null_count
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ u32 ┆ u32 ┆ u32 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 1   ┆ 0   │
+    #   # └─────┴─────┴─────┘
     def null_count
       _from_rbdf(_df.null_count)
     end
-    # def sample
-    # end
+    # Sample from this DataFrame.
+    #
+    # @param n [Integer]
+    #   Number of items to return. Cannot be used with `frac`. Defaults to 1 if
+    #   `frac` is nil.
+    # @param frac [Float]
+    #   Fraction of items to return. Cannot be used with `n`.
+    # @param with_replacement [Boolean]
+    #   Allow values to be sampled more than once.
+    # @param shuffle [Boolean]
+    #   Shuffle the order of sampled data points.
+    # @param seed [Integer]
+    #   Seed for the random number generator. If set to nil (default), a random
+    #   seed is used.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.sample(n: 2, seed: 0)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # └─────┴─────┴─────┘
+    def sample(
+      n: nil,
+      frac: nil,
+      with_replacement: false,
+      shuffle: false,
+      seed: nil
+    )
+      if !n.nil? && !frac.nil?
+        raise ArgumentError, "cannot specify both `n` and `frac`"
+      end
+      if n.nil? && !frac.nil?
+        _from_rbdf(
+          _df.sample_frac(frac, with_replacement, shuffle, seed)
+        )
+      end
+      if n.nil?
+        n = 1
+      end
+      _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
+    end
     # def fold
     # end
-    # def row
-    # end
+    # Get a row as tuple, either by index or by predicate.
+    #
+    # @param index [Object]
+    #   Row index.
+    # @param by_predicate [Object]
+    #   Select the row according to a given expression/predicate.
+    #
+    # @return [Object]
+    #
+    # @note
+    #   The `index` and `by_predicate` params are mutually exclusive. Additionally,
+    #   to ensure clarity, the `by_predicate` parameter must be supplied by keyword.
+    #
+    #   When using `by_predicate` it is an error condition if anything other than
+    #   one row is returned; more than one row raises `TooManyRowsReturned`, and
+    #   zero rows will raise `NoRowsReturned` (both inherit from `RowsException`).
+    #
+    # @example Return the row at the given index
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.row(2)
+    #   # => [3, 8, "c"]
+    #
+    # @example Return the row that matches the given predicate
+    #   df.row(by_predicate: Polars.col("ham") == "b")
+    #   # => [2, 7, "b"]
+    def row(index = nil, by_predicate: nil)
+      if !index.nil? && !by_predicate.nil?
+        raise ArgumentError, "Cannot set both 'index' and 'by_predicate'; mutually exclusive"
+      elsif index.is_a?(Expr)
+        raise TypeError, "Expressions should be passed to the 'by_predicate' param"
+      elsif index.is_a?(Integer)
+        _df.row_tuple(index)
+      elsif by_predicate.is_a?(Expr)
+        rows = filter(by_predicate).rows
+        n_rows = rows.length
+        if n_rows > 1
+          raise TooManyRowsReturned, "Predicate #{by_predicate} returned #{n_rows} rows"
+        elsif n_rows == 0
+          raise NoRowsReturned, "Predicate <{by_predicate!s}> returned no rows"
+        end
+        rows[0]
+      else
+        raise ArgumentError, "One of 'index' or 'by_predicate' must be set"
+      end
+    end
-    # def rows
-    # end
+    # Convert columnar data to rows as Ruby arrays.
+    #
+    # @return [Array]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   )
+    #   df.rows
+    #   # => [[1, 2], [3, 4], [5, 6]]
+    def rows
+      _df.row_tuples
+    end
-    # def shrink_to_fit
-    # end
+    # Shrink DataFrame memory usage.
+    #
+    # Shrinks to fit the exact capacity needed to hold the data.
+    #
+    # @return [DataFrame]
+    def shrink_to_fit(in_place: false)
+      if in_place
+        _df.shrink_to_fit
+        self
+      else
+        df = clone
+        df._df.shrink_to_fit
+        df
+      end
+    end
-    # def take_every
-    # end
+    # Take every nth row in the DataFrame and return as a new DataFrame.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
+    #   s.take_every(2)
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 5   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 7   │
+    #   # └─────┴─────┘
+    def take_every(n)
+      select(Utils.col("*").take_every(n))
+    end
     # def hash_rows
     # end
-    # def interpolate
-    # end
+    # Interpolate intermediate values. The interpolation method is linear.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, nil, 9, 10],
+    #       "bar" => [6, 7, 9, nil],
+    #       "baz" => [1, nil, nil, 9]
+    #     }
+    #   )
+    #   df.interpolate
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬──────┬─────┐
+    #   # │ foo ┆ bar  ┆ baz │
+    #   # │ --- ┆ ---  ┆ --- │
+    #   # │ i64 ┆ i64  ┆ i64 │
+    #   # ╞═════╪══════╪═════╡
+    #   # │ 1   ┆ 6    ┆ 1   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 5   ┆ 7    ┆ 3   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 9   ┆ 9    ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 10  ┆ null ┆ 9   │
+    #   # └─────┴──────┴─────┘
+    def interpolate
+      select(Utils.col("*").interpolate)
+    end
+    # Check if the dataframe is empty.
+    #
+    # @return [Boolean]
     #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   df.is_empty
+    #   # => false
+    #   df.filter(Polars.col("foo") > 99).is_empty
+    #   # => true
     def is_empty
       height == 0
     end
     alias_method :empty?, :is_empty
-    # def to_struct(name)
-    # end
+    # Convert a `DataFrame` to a `Series` of type `Struct`.
+    #
+    # @param name [String]
+    #   Name for the struct Series
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4, 5],
+    #       "b" => ["one", "two", "three", "four", "five"]
+    #     }
+    #   )
+    #   df.to_struct("nums")
+    #   # =>
+    #   # shape: (5,)
+    #   # Series: 'nums' [struct[2]]
+    #   # [
+    #   #         {1,"one"}
+    #   #         {2,"two"}
+    #   #         {3,"three"}
+    #   #         {4,"four"}
+    #   #         {5,"five"}
+    #   # ]
+    def to_struct(name)
+      Utils.wrap_s(_df.to_struct(name))
+    end
-    # def unnest
-    # end
+    # Decompose a struct into its fields.
+    #
+    # The fields will be inserted into the `DataFrame` on the location of the
+    # `struct` type.
+    #
+    # @param names [Object]
+    #  Names of the struct columns that will be decomposed by its fields
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "before" => ["foo", "bar"],
+    #       "t_a" => [1, 2],
+    #       "t_b" => ["a", "b"],
+    #       "t_c" => [true, nil],
+    #       "t_d" => [[1, 2], [3]],
+    #       "after" => ["baz", "womp"]
+    #     }
+    #   ).select(["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"])
+    #   df.unnest("t_struct")
+    #   # =>
+    #   # shape: (2, 6)
+    #   # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
+    #   # │ before ┆ t_a ┆ t_b ┆ t_c  ┆ t_d       ┆ after │
+    #   # │ ---    ┆ --- ┆ --- ┆ ---  ┆ ---       ┆ ---   │
+    #   # │ str    ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str   │
+    #   # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
+    #   # │ foo    ┆ 1   ┆ a   ┆ true ┆ [1, 2]    ┆ baz   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
+    #   # └────────┴─────┴─────┴──────┴───────────┴───────┘
+    def unnest(names)
+      if names.is_a?(String)
+        names = [names]
+      end
+      _from_rbdf(_df.unnest(names))
+    end
     private
@@ -1127,7 +3204,7 @@ module Polars
       if !columns.nil?
         columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
-        if !data && dtypes
+        if data.empty? && dtypes
           data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
         else
           data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
@@ -1147,7 +3224,7 @@ module Polars
       if columns.nil?
         data
       else
-        if !data
+        if data.empty?
           columns.map { |c| Series.new(c, nil)._s }
         elsif data.length == columns.length
           columns.each_with_index do |c, i|
@@ -1182,5 +3259,75 @@ module Polars
     def _from_rbdf(rb_df)
       self.class._from_rbdf(rb_df)
     end
+    def _comp(other, op)
+      if other.is_a?(DataFrame)
+        _compare_to_other_df(other, op)
+      else
+        _compare_to_non_df(other, op)
+      end
+    end
+    def _compare_to_other_df(other, op)
+      if columns != other.columns
+        raise ArgmentError, "DataFrame columns do not match"
+      end
+      if shape != other.shape
+        raise ArgmentError, "DataFrame dimensions do not match"
+      end
+      suffix = "__POLARS_CMP_OTHER"
+      other_renamed = other.select(Polars.all.suffix(suffix))
+      combined = Polars.concat([self, other_renamed], how: "horizontal")
+      expr = case op
+      when "eq"
+        columns.map { |n| Polars.col(n) == Polars.col("#{n}#{suffix}") }
+      when "neq"
+        columns.map { |n| Polars.col(n) != Polars.col("#{n}#{suffix}") }
+      when "gt"
+        columns.map { |n| Polars.col(n) > Polars.col("#{n}#{suffix}") }
+      when "lt"
+        columns.map { |n| Polars.col(n) < Polars.col("#{n}#{suffix}") }
+      when "gt_eq"
+        columns.map { |n| Polars.col(n) >= Polars.col("#{n}#{suffix}") }
+      when "lt_eq"
+        columns.map { |n| Polars.col(n) <= Polars.col("#{n}#{suffix}") }
+      else
+        raise ArgumentError, "got unexpected comparison operator: #{op}"
+      end
+      combined.select(expr)
+    end
+    def _compare_to_non_df(other, op)
+      case op
+      when "eq"
+        select(Polars.all == other)
+      when "neq"
+        select(Polars.all != other)
+      when "gt"
+        select(Polars.all > other)
+      when "lt"
+        select(Polars.all < other)
+      when "gt_eq"
+        select(Polars.all >= other)
+      when "lt_eq"
+        select(Polars.all <= other)
+      else
+        raise ArgumentError, "got unexpected comparison operator: #{op}"
+      end
+    end
+    def _prepare_other_arg(other)
+      if !other.is_a?(Series)
+        if other.is_a?(Array)
+          raise ArgumentError, "Operation not supported."
+        end
+        other = Series.new("", [other])
+      end
+      other
+    end
   end
 end