RubyGems - polars-df - Versions diffs - 0.1.2 → 0.1.3 - Mend

polars-df 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/.yardopts +3 -0
data/CHANGELOG.md +4 -0
data/Cargo.lock +2 -1
data/README.md +1 -1
data/ext/polars/Cargo.toml +7 -1
data/ext/polars/src/conversion.rs +35 -2
data/ext/polars/src/dataframe.rs +228 -11
data/ext/polars/src/lazy/dataframe.rs +3 -3
data/ext/polars/src/lazy/dsl.rs +59 -2
data/ext/polars/src/lib.rs +151 -10
data/ext/polars/src/series.rs +182 -29
data/ext/polars/src/set.rs +91 -0
data/ext/polars/src/utils.rs +19 -0
data/lib/polars/batched_csv_reader.rb +1 -0
data/lib/polars/cat_expr.rb +39 -0
data/lib/polars/data_frame.rb +2284 -137
data/lib/polars/date_time_expr.rb +1282 -7
data/lib/polars/exceptions.rb +20 -0
data/lib/polars/expr.rb +612 -7
data/lib/polars/expr_dispatch.rb +14 -0
data/lib/polars/functions.rb +219 -0
data/lib/polars/group_by.rb +517 -0
data/lib/polars/io.rb +421 -2
data/lib/polars/lazy_frame.rb +1261 -67
data/lib/polars/lazy_functions.rb +288 -10
data/lib/polars/lazy_group_by.rb +79 -0
data/lib/polars/list_expr.rb +5 -0
data/lib/polars/meta_expr.rb +21 -0
data/lib/polars/series.rb +1476 -212
data/lib/polars/slice.rb +104 -0
data/lib/polars/string_expr.rb +663 -2
data/lib/polars/struct_expr.rb +73 -0
data/lib/polars/utils.rb +43 -3
data/lib/polars/version.rb +2 -1
data/lib/polars/when.rb +1 -0
data/lib/polars/when_then.rb +1 -0
data/lib/polars.rb +7 -10
metadata +9 -2

data/lib/polars/lazy_frame.rb CHANGED Viewed

@@ -152,29 +152,98 @@ module Polars
     # def self.read_json
     # end
-    # def columns
-    # end
+    # Get or set column names.
+    #
+    # @return [Array]
+    #
+    # @example
+    #   df = (
+    #      Polars::DataFrame.new(
+    #        {
+    #          "foo" => [1, 2, 3],
+    #          "bar" => [6, 7, 8],
+    #          "ham" => ["a", "b", "c"]
+    #        }
+    #      )
+    #      .lazy
+    #      .select(["foo", "bar"])
+    #   )
+    #   df.columns
+    #   # => ["foo", "bar"]
+    def columns
+      _ldf.columns
+    end
-    # def dtypes
-    # end
+    # Get dtypes of columns in LazyFrame.
+    #
+    # @return [Array]
+    #
+    # @example
+    #   lf = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   ).lazy
+    #   lf.dtypes
+    #   # => [:i64, :f64, :str]
+    def dtypes
+      _ldf.dtypes
+    end
-    # def schema
-    # end
+    # Get the schema.
+    #
+    # @return [Hash]
+    #
+    # @example
+    #   lf = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   ).lazy
+    #   lf.schema
+    #   # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
+    def schema
+      _ldf.schema
+    end
-    # def width
-    # end
+    # Get the width of the LazyFrame.
+    #
+    # @return [Integer]
+    #
+    # @example
+    #   lf = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]}).lazy
+    #   lf.width
+    #   # => 2
+    def width
+      _ldf.width
+    end
-    # def include?(key)
-    # end
+    # Check if LazyFrame includes key.
+    #
+    # @return [Boolean]
+    def include?(key)
+      columns.include?(key)
+    end
     # clone handled by initialize_copy
     # def [](item)
     # end
-    # def to_s
-    # end
-    # alias_method :inspect, :to_s
+    # Returns a string representing the LazyFrame.
+    #
+    # @return [String]
+    def to_s
+      <<~EOS
+        naive plan: (run LazyFrame#describe_optimized_plan to see the optimized plan)
+        #{describe_plan}
+      EOS
+    end
     # def write_json
     # end
@@ -182,22 +251,125 @@ module Polars
     # def pipe
     # end
-    # def describe_plan
-    # end
+    # Create a string representation of the unoptimized query plan.
+    #
+    # @return [String]
+    def describe_plan
+      _ldf.describe_plan
+    end
+    # Create a string representation of the optimized query plan.
+    #
+    # @return [String]
     # def describe_optimized_plan
     # end
     # def show_graph
     # end
-    # def sort
-    # end
+    # Sort the DataFrame.
+    #
+    # Sorting can be done by:
+    #
+    # - A single column name
+    # - An expression
+    # - Multiple expressions
+    #
+    # @param by [Object]
+    #   Column (expressions) to sort by.
+    # @param reverse [Boolean]
+    #   Sort in descending order.
+    # @param nulls_last [Boolean]
+    #   Place null values last. Can only be used if sorted by a single column.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   ).lazy
+    #   df.sort("foo", reverse: true).collect
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # └─────┴─────┴─────┘
+    def sort(by, reverse: false, nulls_last: false)
+      if by.is_a?(String)
+        _from_rbldf(_ldf.sort(by, reverse, nulls_last))
+      end
+      if Utils.bool?(reverse)
+        reverse = [reverse]
+      end
+      by = Utils.selection_to_rbexpr_list(by)
+      _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
+    end
     # def profile
     # end
+    # Collect into a DataFrame.
+    #
+    # Note: use {#fetch} if you want to run your query on the first `n` rows
+    # only. This can be a huge time saver in debugging queries.
+    #
+    # @param type_coercion [Boolean]
+    #   Do type coercion optimization.
+    # @param predicate_pushdown [Boolean]
+    #   Do predicate pushdown optimization.
+    # @param projection_pushdown [Boolean]
+    #   Do projection pushdown optimization.
+    # @param simplify_expression [Boolean]
+    #   Run simplify expressions optimization.
+    # @param string_cache [Boolean]
+    #   This argument is deprecated. Please set the string cache globally.
+    #   The argument will be ignored
+    # @param no_optimization [Boolean]
+    #   Turn off (certain) optimizations.
+    # @param slice_pushdown [Boolean]
+    #   Slice pushdown optimization.
+    # @param common_subplan_elimination [Boolean]
+    #   Will try to cache branching subplans that occur on self-joins or unions.
+    # @param allow_streaming [Boolean]
+    #   Run parts of the query in a streaming fashion (this is in an alpha state)
     #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [1, 2, 3, 4, 5, 6],
+    #       "c" => [6, 5, 4, 3, 2, 1]
+    #     }
+    #   ).lazy
+    #   df.groupby("a", maintain_order: true).agg(Polars.all.sum).collect
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ a   ┆ b   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ a   ┆ 4   ┆ 10  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ b   ┆ 11  ┆ 10  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ c   ┆ 6   ┆ 1   │
+    #   # └─────┴─────┴─────┘
     def collect(
       type_coercion: true,
       predicate_pushdown: true,
@@ -232,21 +404,184 @@ module Polars
       Utils.wrap_df(ldf.collect)
     end
-    # def fetch
-    # end
+    # Collect a small number of rows for debugging purposes.
+    #
+    # Fetch is like a {#collect} operation, but it overwrites the number of rows
+    # read by every scan operation. This is a utility that helps debug a query on a
+    # smaller number of rows.
+    #
+    # Note that the fetch does not guarantee the final number of rows in the
+    # DataFrame. Filter, join operations and a lower number of rows available in the
+    # scanned file influence the final number of rows.
+    #
+    # @param n_rows [Integer]
+    #   Collect n_rows from the data sources.
+    # @param type_coercion [Boolean]
+    #   Run type coercion optimization.
+    # @param predicate_pushdown [Boolean]
+    #   Run predicate pushdown optimization.
+    # @param projection_pushdown [Boolean]
+    #   Run projection pushdown optimization.
+    # @param simplify_expression [Boolean]
+    #   Run simplify expressions optimization.
+    # @param string_cache [Boolean]
+    #   This argument is deprecated. Please set the string cache globally.
+    #   The argument will be ignored
+    # @param no_optimization [Boolean]
+    #   Turn off optimizations.
+    # @param slice_pushdown [Boolean]
+    #   Slice pushdown optimization
+    # @param common_subplan_elimination [Boolean]
+    #   Will try to cache branching subplans that occur on self-joins or unions.
+    # @param allow_streaming [Boolean]
+    #   Run parts of the query in a streaming fashion (this is in an alpha state)
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [1, 2, 3, 4, 5, 6],
+    #       "c" => [6, 5, 4, 3, 2, 1]
+    #     }
+    #   ).lazy
+    #   df.groupby("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ a   ┆ b   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ a   ┆ 1   ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ b   ┆ 2   ┆ 5   │
+    #   # └─────┴─────┴─────┘
+    def fetch(
+      n_rows = 500,
+      type_coercion: true,
+      predicate_pushdown: true,
+      projection_pushdown: true,
+      simplify_expression: true,
+      string_cache: false,
+      no_optimization: false,
+      slice_pushdown: true,
+      common_subplan_elimination: true,
+      allow_streaming: false
+    )
+      if no_optimization
+        predicate_pushdown = false
+        projection_pushdown = false
+        slice_pushdown = false
+        common_subplan_elimination = false
+      end
+      ldf = _ldf.optimization_toggle(
+        type_coercion,
+        predicate_pushdown,
+        projection_pushdown,
+        simplify_expression,
+        slice_pushdown,
+        common_subplan_elimination,
+        allow_streaming
+      )
+      Utils.wrap_df(ldf.fetch(n_rows))
+    end
+    # Return lazy representation, i.e. itself.
     #
+    # Useful for writing code that expects either a `DataFrame` or
+    # `LazyFrame`.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [nil, 2, 3, 4],
+    #       "b" => [0.5, nil, 2.5, 13],
+    #       "c" => [true, true, false, nil]
+    #     }
+    #   )
+    #   df.lazy
     def lazy
       self
     end
-    # def cache
-    # end
+    # Cache the result once the execution of the physical plan hits this node.
+    #
+    # @return [LazyFrame]
+    def cache
+      _from_rbldf(_ldf.cache)
+    end
-    # def cleared
-    # end
+    # Create an empty copy of the current LazyFrame.
+    #
+    # The copy has an identical schema but no data.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [nil, 2, 3, 4],
+    #       "b" => [0.5, nil, 2.5, 13],
+    #       "c" => [true, true, false, nil],
+    #     }
+    #   ).lazy
+    #   df.cleared.fetch
+    #   # =>
+    #   # shape: (0, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ a   ┆ b   ┆ c    │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ f64 ┆ bool │
+    #   # ╞═════╪═════╪══════╡
+    #   # └─────┴─────┴──────┘
+    def cleared
+      DataFrame.new(columns: schema).lazy
+    end
+    # Filter the rows in the DataFrame based on a predicate expression.
     #
+    # @param predicate [Object]
+    #   Expression that evaluates to a boolean Series.
+    #
+    # @return [LazyFrame]
+    #
+    # @example Filter on one condition:
+    #   lf = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   ).lazy
+    #   lf.filter(Polars.col("foo") < 3).collect()
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example Filter on multiple conditions:
+    #   lf.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a")).collect
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # └─────┴─────┴─────┘
     def filter(predicate)
       _from_rbldf(
         _ldf.filter(
@@ -255,11 +590,136 @@ module Polars
       )
     end
+    # Select columns from this DataFrame.
+    #
+    # @param exprs [Object]
+    #   Column or columns to select.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"],
+    #     }
+    #   ).lazy
+    #   df.select("foo").collect
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 1   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 2   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 3   │
+    #   # └─────┘
+    #
+    # @example
+    #   df.select(["foo", "bar"]).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df.select(Polars.col("foo") + 1).collect
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 2   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 3   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 4   │
+    #   # └─────┘
+    #
+    # @example
+    #   df.select([Polars.col("foo") + 1, Polars.col("bar") + 1]).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 2   ┆ 7   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 9   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0)).collect
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────────┐
+    #   # │ literal │
+    #   # │ ---     │
+    #   # │ i64     │
+    #   # ╞═════════╡
+    #   # │ 0       │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 0       │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 10      │
+    #   # └─────────┘
     def select(exprs)
       exprs = Utils.selection_to_rbexpr_list(exprs)
       _from_rbldf(_ldf.select(exprs))
     end
+    # Start a groupby operation.
+    #
+    # @param by [Object]
+    #   Column(s) to group by.
+    # @param maintain_order [Boolean]
+    #   Make sure that the order of the groups remain consistent. This is more
+    #   expensive than a default groupby.
+    #
+    # @return [LazyGroupBy]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [1, 2, 3, 4, 5, 6],
+    #       "c" => [6, 5, 4, 3, 2, 1]
+    #     }
+    #   ).lazy
+    #   df.groupby("a", maintain_order: true).agg(Polars.col("b").sum).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ a   ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ b   ┆ 11  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ c   ┆ 6   │
+    #   # └─────┴─────┘
     def groupby(by, maintain_order: false)
       rbexprs_by = Utils.selection_to_rbexpr_list(by)
       lgb = _ldf.groupby(rbexprs_by, maintain_order)
@@ -275,7 +735,116 @@ module Polars
     # def join_asof
     # end
+    # Add a join operation to the Logical Plan.
+    #
+    # @param other [LazyFrame]
+    #   Lazy DataFrame to join with.
+    # @param left_on [Object]
+    #   Join column of the left DataFrame.
+    # @param right_on [Object]
+    #   Join column of the right DataFrame.
+    # @param on Object
+    #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
+    #   None.
+    # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
+    #   Join strategy.
+    # @param suffix [String]
+    #   Suffix to append to columns with a duplicate name.
+    # @param allow_parallel [Boolean]
+    #   Allow the physical plan to optionally evaluate the computation of both
+    #   DataFrames up to the join in parallel.
+    # @param force_parallel [Boolean]
+    #   Force the physical plan to evaluate the computation of both DataFrames up to
+    #   the join in parallel.
     #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   ).lazy
+    #   other_df = Polars::DataFrame.new(
+    #     {
+    #       "apple" => ["x", "y", "z"],
+    #       "ham" => ["a", "b", "d"]
+    #     }
+    #   ).lazy
+    #   df.join(other_df, on: "ham").collect
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ str ┆ str   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6.0 ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   ┆ y     │
+    #   # └─────┴─────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "outer").collect
+    #   # =>
+    #   # shape: (4, 4)
+    #   # ┌──────┬──────┬─────┬───────┐
+    #   # │ foo  ┆ bar  ┆ ham ┆ apple │
+    #   # │ ---  ┆ ---  ┆ --- ┆ ---   │
+    #   # │ i64  ┆ f64  ┆ str ┆ str   │
+    #   # ╞══════╪══════╪═════╪═══════╡
+    #   # │ 1    ┆ 6.0  ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2    ┆ 7.0  ┆ b   ┆ y     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ null ┆ null ┆ d   ┆ z     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3    ┆ 8.0  ┆ c   ┆ null  │
+    #   # └──────┴──────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "left").collect
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ str ┆ str   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6.0 ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   ┆ y     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 8.0 ┆ c   ┆ null  │
+    #   # └─────┴─────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "semi").collect
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "anti").collect
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # └─────┴─────┴─────┘
     def join(
       other,
       left_on: nil,
@@ -322,6 +891,43 @@ module Polars
       )
     end
+    # Add or overwrite multiple columns in a DataFrame.
+    #
+    # @param exprs [Object]
+    #   List of Expressions that evaluate to columns.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   ldf = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4],
+    #       "b" => [0.5, 4, 10, 13],
+    #       "c" => [true, true, false, true]
+    #     }
+    #   ).lazy
+    #   ldf.with_columns(
+    #     [
+    #       (Polars.col("a") ** 2).alias("a^2"),
+    #       (Polars.col("b") / 2).alias("b/2"),
+    #       (Polars.col("c").is_not()).alias("not c")
+    #     ]
+    #   ).collect
+    #   # =>
+    #   # shape: (4, 6)
+    #   # ┌─────┬──────┬───────┬──────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
+    #   # └─────┴──────┴───────┴──────┴──────┴───────┘
     def with_columns(exprs)
       exprs =
         if exprs.nil?
@@ -350,58 +956,343 @@ module Polars
     # def with_context
     # end
+    # Add or overwrite column in a DataFrame.
+    #
+    # @param column [Object]
+    #   Expression that evaluates to column or a Series to use.
+    #
+    # @return [LazyFrame]
     #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   ).lazy
+    #   df.with_column((Polars.col("b") ** 2).alias("b_squared")).collect
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬───────────┐
+    #   # │ a   ┆ b   ┆ b_squared │
+    #   # │ --- ┆ --- ┆ ---       │
+    #   # │ i64 ┆ i64 ┆ f64       │
+    #   # ╞═════╪═════╪═══════════╡
+    #   # │ 1   ┆ 2   ┆ 4.0       │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 4   ┆ 16.0      │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 5   ┆ 6   ┆ 36.0      │
+    #   # └─────┴─────┴───────────┘
+    #
+    # @example
+    #   df.with_column(Polars.col("a") ** 2).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌──────┬─────┐
+    #   # │ a    ┆ b   │
+    #   # │ ---  ┆ --- │
+    #   # │ f64  ┆ i64 │
+    #   # ╞══════╪═════╡
+    #   # │ 1.0  ┆ 2   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 9.0  ┆ 4   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 25.0 ┆ 6   │
+    #   # └──────┴─────┘
     def with_column(column)
       with_columns([column])
     end
-    # def drop
-    # end
+    # Remove one or multiple columns from a DataFrame.
+    #
+    # @param columns [Object]
+    #   - Name of the column that should be removed.
+    #   - List of column names.
+    #
+    # @return [LazyFrame]
+    def drop(columns)
+      if columns.is_a?(String)
+        columns = [columns]
+      end
+      _from_rbldf(_ldf.drop_columns(columns))
+    end
+    # Rename column names.
+    #
+    # @param mapping [Hash]
+    #   Key value pairs that map from old name to new name.
     #
+    # @return [LazyFrame]
     def rename(mapping)
       existing = mapping.keys
       _new = mapping.values
       _from_rbldf(_ldf.rename(existing, _new))
     end
-    # def reverse
-    # end
+    # Reverse the DataFrame.
+    #
+    # @return [LazyFrame]
+    def reverse
+      _from_rbldf(_ldf.reverse)
+    end
-    # def shift
-    # end
+    # Shift the values by a given period.
+    #
+    # @param periods [Integer]
+    #   Number of places to shift (may be negative).
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   ).lazy
+    #   df.shift(1).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌──────┬──────┐
+    #   # │ a    ┆ b    │
+    #   # │ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  │
+    #   # ╞══════╪══════╡
+    #   # │ null ┆ null │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 1    ┆ 2    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 3    ┆ 4    │
+    #   # └──────┴──────┘
+    #
+    # @example
+    #   df.shift(-1).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌──────┬──────┐
+    #   # │ a    ┆ b    │
+    #   # │ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  │
+    #   # ╞══════╪══════╡
+    #   # │ 3    ┆ 4    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 5    ┆ 6    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ null ┆ null │
+    #   # └──────┴──────┘
+    def shift(periods)
+      _from_rbldf(_ldf.shift(periods))
+    end
-    # def shift_and_fill
-    # end
+    # Shift the values by a given period and fill the resulting null values.
+    #
+    # @param periods [Integer]
+    #   Number of places to shift (may be negative).
+    # @param fill_value [Object]
+    #   Fill `nil` values with the result of this expression.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   ).lazy
+    #   df.shift_and_fill(1, 0).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 0   ┆ 0   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 2   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 4   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df.shift_and_fill(-1, 0).collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 3   ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 5   ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 0   ┆ 0   │
+    #   # └─────┴─────┘
+    def shift_and_fill(periods, fill_value)
+      if !fill_value.is_a?(Expr)
+        fill_value = Polars.lit(fill_value)
+      end
+      _from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
+    end
-    # def slice
-    # end
+    # Get a slice of this DataFrame.
+    #
+    # @param offset [Integer]
+    #   Start index. Negative indexing is supported.
+    # @param length [Integer]
+    #   Length of the slice. If set to `nil`, all rows starting at the offset
+    #   will be selected.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["x", "y", "z"],
+    #       "b" => [1, 3, 5],
+    #       "c" => [2, 4, 6]
+    #     }
+    #   ).lazy
+    #   df.slice(1, 2).collect
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ a   ┆ b   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ y   ┆ 3   ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ z   ┆ 5   ┆ 6   │
+    #   # └─────┴─────┴─────┘
+    def slice(offset, length = nil)
+      if length && length < 0
+        raise ArgumentError, "Negative slice lengths (#{length}) are invalid for LazyFrame"
+      end
+      _from_rbldf(_ldf.slice(offset, length))
+    end
-    # def limit
-    # end
+    # Get the first `n` rows.
+    #
+    # Alias for {#head}.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [LazyFrame]
+    #
+    # @note
+    #   Consider using the {#fetch} operation if you only want to test your
+    #   query. The {#fetch} operation will load the first `n` rows at the scan
+    #   level, whereas the {#head}/{#limit} are applied at the end.
+    def limit(n = 5)
+      head(5)
+    end
-    # def head
-    # end
+    # Get the first `n` rows.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [LazyFrame]
+    #
+    # @note
+    #   Consider using the {#fetch} operation if you only want to test your
+    #   query. The {#fetch} operation will load the first `n` rows at the scan
+    #   level, whereas the {#head}/{#limit} are applied at the end.
+    def head(n = 5)
+      slice(0, n)
+    end
-    # def tail
-    # end
+    # Get the last `n` rows.
+    #
+    # @param n [Integer]
+    #     Number of rows.
+    #
+    # @return [LazyFrame]
+    def tail(n = 5)
+      _from_rbldf(_ldf.tail(n))
+    end
-    # def last
-    # end
+    # Get the last row of the DataFrame.
+    #
+    # @return [LazyFrame]
+    def last
+      tail(1)
+    end
-    # def first
-    # end
+    # Get the first row of the DataFrame.
+    #
+    # @return [LazyFrame]
+    def first
+      slice(0, 1)
+    end
     # def with_row_count
     # end
-    # def take_every
-    # end
+    # Take every nth row in the LazyFrame and return as a new LazyFrame.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]}).lazy
+    #   s.take_every(2).collect
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 5   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 7   │
+    #   # └─────┴─────┘
+    def take_every(n)
+      select(Utils.col("*").take_every(n))
+    end
     # def fill_null
     # end
+    # Fill floating point NaN values.
+    #
+    # @param fill_value [Object]
+    #   Value to fill the NaN values with.
+    #
+    # @return [LazyFrame]
+    #
+    # @note
+    #   Note that floating point NaN (Not a Number) are not missing values!
+    #   To replace missing values, use `fill_null` instead.
     #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1.5, 2, Float::NAN, 4],
+    #       "b" => [0.5, 4, Float::NAN, 13],
+    #     }
+    #   ).lazy
+    #   df.fill_nan(99).collect
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌──────┬──────┐
+    #   # │ a    ┆ b    │
+    #   # │ ---  ┆ ---  │
+    #   # │ f64  ┆ f64  │
+    #   # ╞══════╪══════╡
+    #   # │ 1.5  ┆ 0.5  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2.0  ┆ 4.0  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 99.0 ┆ 99.0 │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 4.0  ┆ 13.0 │
+    #   # └──────┴──────┘
     def fill_nan(fill_value)
       if !fill_value.is_a?(Expr)
         fill_value = Utils.lit(fill_value)
@@ -409,38 +1300,255 @@ module Polars
       _from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
     end
-    # def std
-    # end
+    # Aggregate the columns in the DataFrame to their standard deviation value.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.std.collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌──────────┬─────┐
+    #   # │ a        ┆ b   │
+    #   # │ ---      ┆ --- │
+    #   # │ f64      ┆ f64 │
+    #   # ╞══════════╪═════╡
+    #   # │ 1.290994 ┆ 0.5 │
+    #   # └──────────┴─────┘
+    #
+    # @example
+    #   df.std(ddof: 0).collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌──────────┬──────────┐
+    #   # │ a        ┆ b        │
+    #   # │ ---      ┆ ---      │
+    #   # │ f64      ┆ f64      │
+    #   # ╞══════════╪══════════╡
+    #   # │ 1.118034 ┆ 0.433013 │
+    #   # └──────────┴──────────┘
+    def std(ddof: 1)
+      _from_rbldf(_ldf.std(ddof))
+    end
-    # def var
-    # end
+    # Aggregate the columns in the DataFrame to their variance value.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.var.collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌──────────┬──────┐
+    #   # │ a        ┆ b    │
+    #   # │ ---      ┆ ---  │
+    #   # │ f64      ┆ f64  │
+    #   # ╞══════════╪══════╡
+    #   # │ 1.666667 ┆ 0.25 │
+    #   # └──────────┴──────┘
+    #
+    # @example
+    #   df.var(ddof: 0).collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌──────┬────────┐
+    #   # │ a    ┆ b      │
+    #   # │ ---  ┆ ---    │
+    #   # │ f64  ┆ f64    │
+    #   # ╞══════╪════════╡
+    #   # │ 1.25 ┆ 0.1875 │
+    #   # └──────┴────────┘
+    def var(ddof: 1)
+      _from_rbldf(_ldf.var(ddof))
+    end
-    # def max
-    # end
+    # Aggregate the columns in the DataFrame to their maximum value.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.max.collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 4   ┆ 2   │
+    #   # └─────┴─────┘
+    def max
+      _from_rbldf(_ldf.max)
+    end
-    # def min
-    # end
+    # Aggregate the columns in the DataFrame to their minimum value.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.min.collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 1   │
+    #   # └─────┴─────┘
+    def min
+      _from_rbldf(_ldf.min)
+    end
-    # def sum
-    # end
+    # Aggregate the columns in the DataFrame to their sum value.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.sum.collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 10  ┆ 5   │
+    #   # └─────┴─────┘
+    def sum
+      _from_rbldf(_ldf.sum)
+    end
-    # def mean
-    # end
+    # Aggregate the columns in the DataFrame to their mean value.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.mean.collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌─────┬──────┐
+    #   # │ a   ┆ b    │
+    #   # │ --- ┆ ---  │
+    #   # │ f64 ┆ f64  │
+    #   # ╞═════╪══════╡
+    #   # │ 2.5 ┆ 1.25 │
+    #   # └─────┴──────┘
+    def mean
+      _from_rbldf(_ldf.mean)
+    end
-    # def median
-    # end
+    # Aggregate the columns in the DataFrame to their median value.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.median.collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ f64 ┆ f64 │
+    #   # ╞═════╪═════╡
+    #   # │ 2.5 ┆ 1.0 │
+    #   # └─────┴─────┘
+    def median
+      _from_rbldf(_ldf.median)
+    end
-    # def quantile
-    # end
+    # Aggregate the columns in the DataFrame to their quantile value.
+    #
+    # @param quantile [Float]
+    #   Quantile between 0.0 and 1.0.
+    # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
+    #   Interpolation method.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
+    #   df.quantile(0.7).collect
+    #   # =>
+    #   # shape: (1, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ f64 ┆ f64 │
+    #   # ╞═════╪═════╡
+    #   # │ 3.0 ┆ 1.0 │
+    #   # └─────┴─────┘
+    def quantile(quantile, interpolation: "nearest")
+      _from_rbldf(_ldf.quantile(quantile, interpolation))
+    end
+    # Explode lists to long format.
+    #
+    # @return [LazyFrame]
     #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "letters" => ["a", "a", "b", "c"],
+    #       "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]],
+    #     }
+    #   ).lazy
+    #   df.explode("numbers").collect
+    #   # =>
+    #   # shape: (8, 2)
+    #   # ┌─────────┬─────────┐
+    #   # │ letters ┆ numbers │
+    #   # │ ---     ┆ ---     │
+    #   # │ str     ┆ i64     │
+    #   # ╞═════════╪═════════╡
+    #   # │ a       ┆ 1       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ a       ┆ 2       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ a       ┆ 3       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ b       ┆ 4       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ b       ┆ 5       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 6       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 7       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 8       │
+    #   # └─────────┴─────────┘
     def explode(columns)
       columns = Utils.selection_to_rbexpr_list(columns)
       _from_rbldf(_ldf.explode(columns))
     end
-    # def unique
-    # end
+    # Drop duplicate rows from this DataFrame.
+    #
+    # Note that this fails if there is a column of type `List` in the DataFrame or
+    # subset.
+    #
+    # @param maintain_order [Boolean]
+    #   Keep the same order as the original DataFrame. This requires more work to
+    #   compute.
+    # @param subset [Object]
+    #   Subset to use to compare rows.
+    # @param keep ["first", "last"]
+    #   Which of the duplicate rows to keep.
+    #
+    # @return [LazyFrame]
+    def unique(maintain_order: true, subset: nil, keep: "first")
+      if !subset.nil? && !subset.is_a?(Array)
+        subset = [subset]
+      end
+      _from_rbldf(_ldf.unique(maintain_order, subset, keep))
+    end
     # def drop_nulls
     # end
@@ -451,11 +1559,97 @@ module Polars
     # def map
     # end
-    # def interpolate
-    # end
+    # Interpolate intermediate values. The interpolation method is linear.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, nil, 9, 10],
+    #       "bar" => [6, 7, 9, nil],
+    #       "baz" => [1, nil, nil, 9]
+    #     }
+    #   ).lazy
+    #   df.interpolate.collect
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬──────┬─────┐
+    #   # │ foo ┆ bar  ┆ baz │
+    #   # │ --- ┆ ---  ┆ --- │
+    #   # │ i64 ┆ i64  ┆ i64 │
+    #   # ╞═════╪══════╪═════╡
+    #   # │ 1   ┆ 6    ┆ 1   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 5   ┆ 7    ┆ 3   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 9   ┆ 9    ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 10  ┆ null ┆ 9   │
+    #   # └─────┴──────┴─────┘
+    def interpolate
+      select(Utils.col("*").interpolate)
+    end
-    # def unnest
-    # end
+    # Decompose a struct into its fields.
+    #
+    # The fields will be inserted into the `DataFrame` on the location of the
+    # `struct` type.
+    #
+    # @param names [Object]
+    #   Names of the struct columns that will be decomposed by its fields
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   df = (
+    #     Polars::DataFrame.new(
+    #       {
+    #         "before" => ["foo", "bar"],
+    #         "t_a" => [1, 2],
+    #         "t_b" => ["a", "b"],
+    #         "t_c" => [true, nil],
+    #         "t_d" => [[1, 2], [3]],
+    #         "after" => ["baz", "womp"]
+    #       }
+    #     )
+    #     .lazy
+    #     .select(
+    #       ["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"]
+    #     )
+    #   )
+    #   df.fetch
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌────────┬─────────────────────┬───────┐
+    #   # │ before ┆ t_struct            ┆ after │
+    #   # │ ---    ┆ ---                 ┆ ---   │
+    #   # │ str    ┆ struct[4]           ┆ str   │
+    #   # ╞════════╪═════════════════════╪═══════╡
+    #   # │ foo    ┆ {1,"a",true,[1, 2]} ┆ baz   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ bar    ┆ {2,"b",null,[3]}    ┆ womp  │
+    #   # └────────┴─────────────────────┴───────┘
+    #
+    # @example
+    #   df.unnest("t_struct").fetch
+    #   # =>
+    #   # shape: (2, 6)
+    #   # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
+    #   # │ before ┆ t_a ┆ t_b ┆ t_c  ┆ t_d       ┆ after │
+    #   # │ ---    ┆ --- ┆ --- ┆ ---  ┆ ---       ┆ ---   │
+    #   # │ str    ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str   │
+    #   # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
+    #   # │ foo    ┆ 1   ┆ a   ┆ true ┆ [1, 2]    ┆ baz   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
+    #   # └────────┴─────┴─────┴──────┴───────────┴───────┘
+    def unnest(names)
+      if names.is_a?(String)
+        names = [names]
+      end
+      _from_rbldf(_ldf.unnest(names))
+    end
     private