RubyGems - polars-df - Versions diffs - 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt - Mend

polars-df 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -0
data/Cargo.lock +192 -186
data/LICENSE-THIRD-PARTY.txt +2153 -2532
data/LICENSE.txt +1 -1
data/lib/polars/3.2/polars.so +0 -0
data/lib/polars/3.3/polars.so +0 -0
data/lib/polars/3.4/polars.so +0 -0
data/lib/polars/array_expr.rb +382 -3
data/lib/polars/array_name_space.rb +281 -0
data/lib/polars/binary_expr.rb +67 -0
data/lib/polars/binary_name_space.rb +43 -0
data/lib/polars/cat_expr.rb +224 -0
data/lib/polars/cat_name_space.rb +130 -32
data/lib/polars/catalog/unity/catalog_info.rb +20 -0
data/lib/polars/catalog/unity/column_info.rb +31 -0
data/lib/polars/catalog/unity/namespace_info.rb +21 -0
data/lib/polars/catalog/unity/table_info.rb +50 -0
data/lib/polars/catalog.rb +448 -0
data/lib/polars/config.rb +2 -2
data/lib/polars/convert.rb +12 -2
data/lib/polars/data_frame.rb +834 -48
data/lib/polars/data_type_expr.rb +52 -0
data/lib/polars/data_types.rb +61 -5
data/lib/polars/date_time_expr.rb +251 -0
data/lib/polars/date_time_name_space.rb +299 -0
data/lib/polars/exceptions.rb +7 -2
data/lib/polars/expr.rb +1247 -211
data/lib/polars/functions/col.rb +6 -5
data/lib/polars/functions/datatype.rb +21 -0
data/lib/polars/functions/lazy.rb +127 -15
data/lib/polars/functions/repeat.rb +4 -0
data/lib/polars/io/csv.rb +19 -1
data/lib/polars/io/json.rb +16 -0
data/lib/polars/io/ndjson.rb +13 -0
data/lib/polars/io/parquet.rb +70 -66
data/lib/polars/io/scan_options.rb +47 -0
data/lib/polars/lazy_frame.rb +1099 -95
data/lib/polars/list_expr.rb +400 -11
data/lib/polars/list_name_space.rb +321 -5
data/lib/polars/meta_expr.rb +71 -22
data/lib/polars/name_expr.rb +36 -0
data/lib/polars/scan_cast_options.rb +64 -0
data/lib/polars/schema.rb +84 -3
data/lib/polars/selector.rb +210 -0
data/lib/polars/selectors.rb +932 -203
data/lib/polars/series.rb +1083 -63
data/lib/polars/string_expr.rb +435 -9
data/lib/polars/string_name_space.rb +729 -45
data/lib/polars/struct_expr.rb +103 -0
data/lib/polars/struct_name_space.rb +19 -1
data/lib/polars/utils/parse.rb +40 -0
data/lib/polars/utils/various.rb +18 -1
data/lib/polars/utils.rb +9 -1
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +10 -0
metadata +12 -2

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -15,11 +15,11 @@ module Polars
     #   The schema of the resulting DataFrame. The schema may be declared in several
     #   ways:
     #
-    #   * As a hash of name:type pairs; if type is nil, it will be auto-inferred.
+    #   * As a hash of \\\\{name:type} pairs; if type is nil, it will be auto-inferred.
     #   * As an array of column names; in this case types are automatically inferred.
-    #   * As an array of (name,type) pairs; this is equivalent to the dictionary form.
+    #   * As an array of (name,type) pairs; this is equivalent to the hash form.
     #
-    #   If you supply a list of column names that does not match the names in the
+    #   If you supply an array of column names that does not match the names in the
     #   underlying data, the names given here will overwrite them. The number
     #   of names given in the schema should match the underlying data dimensions.
     #
@@ -47,12 +47,7 @@ module Polars
     # @param nan_to_null [Boolean]
     #   If the data comes from one or more Numo arrays, can optionally convert input
     #   data NaN values to null instead. This is a no-op for all other input data.
-    def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
-      if schema && columns
-        warn "columns is ignored when schema is passed"
-      end
-      schema ||= columns
+    def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
       if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
         raise ArgumentError, "Use read_database instead"
       end
@@ -565,7 +560,7 @@ module Polars
       end
     end
-    # Convert every row to a dictionary.
+    # Convert every row to a hash.
     #
     # Note that this is slow.
     #
@@ -722,7 +717,7 @@ module Polars
     # @param file [String, nil]
     #   File path to which the result should be written. If set to `nil`
     #   (default), the output is returned as a string instead.
-    # @param has_header [Boolean]
+    # @param include_header [Boolean]
     #   Whether to include header in the CSV output.
     # @param sep [String]
     #   Separate CSV fields with this symbol.
@@ -763,8 +758,7 @@ module Polars
     #   df.write_csv("file.csv")
     def write_csv(
       file = nil,
-      has_header: true,
-      include_header: nil,
+      include_header: true,
       sep: ",",
       quote: '"',
       batch_size: 1024,
@@ -774,8 +768,6 @@ module Polars
       float_precision: nil,
       null_value: nil
     )
-      include_header = has_header if include_header.nil?
       if sep.length > 1
         raise ArgumentError, "only single byte separator is allowed"
       elsif quote.length > 1
@@ -834,6 +826,8 @@ module Polars
     #   File path to which the file should be written.
     # @param compression ["uncompressed", "snappy", "deflate"]
     #   Compression method. Defaults to "uncompressed".
+    # @param name [String]
+    #   Schema name. Defaults to empty string.
     #
     # @return [nil]
     def write_avro(file, compression = "uncompressed", name: "")
@@ -856,6 +850,24 @@ module Polars
     #   File path to which the file should be written.
     # @param compression ["uncompressed", "lz4", "zstd"]
     #   Compression method. Defaults to "uncompressed".
+    # @param compat_level [Object]
+    #   Use a specific compatibility level
+    #   when exporting Polars' internal data structures.
+    # @param storage_options [Hash]
+    #   Options that indicate how to connect to a cloud provider.
+    #
+    #   The cloud providers currently supported are AWS, GCP, and Azure.
+    #   See supported keys here:
+    #
+    #   * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
+    #   * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
+    #   * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
+    #   * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
+    #
+    #   If `storage_options` is not provided, Polars will try to infer the
+    #   information from environment variables.
+    # @param retries [Integer]
+    #   Number of retries if accessing a cloud instance fails.
     #
     # @return [nil]
     def write_ipc(
@@ -898,9 +910,12 @@ module Polars
     #
     # @param file [Object]
     #   Path or writable file-like object to which the IPC record batch data will
-    #   be written. If set to `None`, the output is returned as a BytesIO object.
+    #   be written. If set to `nil`, the output is returned as a BytesIO object.
     # @param compression ['uncompressed', 'lz4', 'zstd']
     #   Compression method. Defaults to "uncompressed".
+    # @param compat_level [Object]
+    #   Use a specific compatibility level
+    #   when exporting Polars' internal data structures.
     #
     # @return [Object]
     #
@@ -1215,7 +1230,7 @@ module Polars
     #       "y" => 1_000_000.times.map { |v| v / 1000.0 },
     #       "z" => 1_000_000.times.map(&:to_s)
     #     },
-    #     columns: {"x" => :u32, "y" => :f64, "z" => :str}
+    #     schema: {"x" => :u32, "y" => :f64, "z" => :str}
     #   )
     #   df.estimated_size
     #   # => 25888898
@@ -1448,6 +1463,126 @@ module Polars
       lazy.filter(predicate).collect
     end
+    # Remove rows, dropping those that match the given predicate expression(s).
+    #
+    # The original order of the remaining rows is preserved.
+    #
+    # Rows where the filter predicate does not evaluate to True are retained
+    # (this includes rows where the predicate evaluates as `null`).
+    #
+    # @param predicates [Array]
+    #   Expression that evaluates to a boolean Series.
+    # @param constraints [Hash]
+    #   Column filters; use `name = value` to filter columns using the supplied
+    #   value. Each constraint behaves the same as `Polars.col(name).eq(value)`,
+    #   and is implicitly joined with the other filter conditions using `&`.
+    #
+    # @return [DataFrame]
+    #
+    # @example Remove rows matching a condition:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [2, 3, nil, 4, 0],
+    #       "bar" => [5, 6, nil, nil, 0],
+    #       "ham" => ["a", "b", nil, "c", "d"]
+    #     }
+    #   )
+    #   df.remove(Polars.col("bar") >= 5)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # │ 0    ┆ 0    ┆ d    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Discard rows based on multiple conditions, combined with and/or operators:
+    #   df.remove(
+    #     (Polars.col("foo") >= 0) & (Polars.col("bar") >= 0),
+    #   )
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   df.remove(
+    #     (Polars.col("foo") >= 0) | (Polars.col("bar") >= 0),
+    #   )
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Provide multiple constraints using `*args` syntax:
+    #   df.remove(
+    #     Polars.col("ham").is_not_null,
+    #     Polars.col("bar") >= 0
+    #   )
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Provide constraints(s) using `**kwargs` syntax:
+    #   df.remove(foo: 0, bar: 0)
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 2    ┆ 5    ┆ a    │
+    #   # │ 3    ┆ 6    ┆ b    │
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Remove rows by comparing two columns against each other:
+    #   df.remove(
+    #     Polars.col("foo").ne_missing(Polars.col("bar"))
+    #   )
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 0    ┆ 0    ┆ d    │
+    #   # └──────┴──────┴──────┘
+    def remove(
+      *predicates,
+      **constraints
+    )
+      lazy
+      .remove(*predicates, **constraints)
+      .collect(_eager: true)
+    end
     # Summary statistics for a DataFrame.
     #
     # @return [DataFrame]
@@ -1643,6 +1778,223 @@ module Polars
       self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
     end
+    # Execute a SQL query against the DataFrame.
+    #
+    # @note
+    #   This functionality is considered **unstable**, although it is close to
+    #   being considered stable. It may be changed at any point without it being
+    #   considered a breaking change.
+    #
+    # @param query [String]
+    #   SQL query to execute.
+    # @param table_name [String]
+    #   Optionally provide an explicit name for the table that represents the
+    #   calling frame (defaults to "self").
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   * The calling frame is automatically registered as a table in the SQL context
+    #     under the name "self". If you want access to the DataFrames and LazyFrames
+    #     found in the current globals, use the top-level :meth:`pl.sql <polars.sql>`.
+    #   * More control over registration and execution behaviour is available by
+    #     using the :class:`SQLContext` object.
+    #   * The SQL query executes in lazy mode before being collected and returned
+    #     as a DataFrame.
+    #
+    # @example Query the DataFrame using SQL:
+    #   df1 = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3],
+    #       "b" => ["zz", "yy", "xx"],
+    #       "c" => [Date.new(1999, 12, 31), Date.new(2010, 10, 10), Date.new(2077, 8, 8)]
+    #     }
+    #   )
+    #   df1.sql("SELECT c, b FROM self WHERE a > 1")
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌────────────┬─────┐
+    #   # │ c          ┆ b   │
+    #   # │ ---        ┆ --- │
+    #   # │ date       ┆ str │
+    #   # ╞════════════╪═════╡
+    #   # │ 2010-10-10 ┆ yy  │
+    #   # │ 2077-08-08 ┆ xx  │
+    #   # └────────────┴─────┘
+    #
+    # @example Apply transformations to a DataFrame using SQL, aliasing "self" to "frame".
+    #   df1.sql(
+    #     "
+    #       SELECT
+    #           a,
+    #           (a % 2 == 0) AS a_is_even,
+    #           CONCAT_WS(':', b, b) AS b_b,
+    #           EXTRACT(year FROM c) AS year,
+    #           0::float4 AS \"zero\",
+    #       FROM frame
+    #     ",
+    #     table_name: "frame"
+    #   )
+    #   # =>
+    #   # shape: (3, 5)
+    #   # ┌─────┬───────────┬───────┬──────┬──────┐
+    #   # │ a   ┆ a_is_even ┆ b_b   ┆ year ┆ zero │
+    #   # │ --- ┆ ---       ┆ ---   ┆ ---  ┆ ---  │
+    #   # │ i64 ┆ bool      ┆ str   ┆ i32  ┆ f32  │
+    #   # ╞═════╪═══════════╪═══════╪══════╪══════╡
+    #   # │ 1   ┆ false     ┆ zz:zz ┆ 1999 ┆ 0.0  │
+    #   # │ 2   ┆ true      ┆ yy:yy ┆ 2010 ┆ 0.0  │
+    #   # │ 3   ┆ false     ┆ xx:xx ┆ 2077 ┆ 0.0  │
+    #   # └─────┴───────────┴───────┴──────┴──────┘
+    def sql(query, table_name: "self")
+      ctx = SQLContext.new(eager_execution: true)
+      name = table_name || "self"
+      ctx.register(name, self)
+      ctx.execute(query)
+    end
+    # Return the `k` largest rows.
+    #
+    # Non-null elements are always preferred over null elements, regardless of
+    # the value of `reverse`. The output is not guaranteed to be in any
+    # particular order, call `sort` after this function if you wish the
+    # output to be sorted.
+    #
+    # @param k [Integer]
+    #   Number of rows to return.
+    # @param by [Object]
+    #   Column(s) used to determine the top rows.
+    #   Accepts expression input. Strings are parsed as column names.
+    # @param reverse [Object]
+    #   Consider the `k` smallest elements of the `by` column(s) (instead of the `k`
+    #   largest). This can be specified per column by passing a sequence of
+    #   booleans.
+    #
+    # @return [DataFrame]
+    #
+    # @example Get the rows which contain the 4 largest values in column b.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [2, 1, 1, 3, 2, 1]
+    #     }
+    #   )
+    #   df.top_k(4, by: "b")
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 3   │
+    #   # │ a   ┆ 2   │
+    #   # │ b   ┆ 2   │
+    #   # │ b   ┆ 1   │
+    #   # └─────┴─────┘
+    #
+    # @example Get the rows which contain the 4 largest values when sorting on column b and a.
+    #   df.top_k(4, by: ["b", "a"])
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 3   │
+    #   # │ b   ┆ 2   │
+    #   # │ a   ┆ 2   │
+    #   # │ c   ┆ 1   │
+    #   # └─────┴─────┘
+    def top_k(
+      k,
+      by:,
+      reverse: false
+    )
+      lazy
+      .top_k(k, by: by, reverse: reverse)
+      .collect(
+        # optimizations=QueryOptFlags(
+        #   projection_pushdown=False,
+        #   predicate_pushdown=False,
+        #   comm_subplan_elim=False,
+        #   slice_pushdown=True
+        # )
+      )
+    end
+    # Return the `k` smallest rows.
+    #
+    # Non-null elements are always preferred over null elements, regardless of
+    # the value of `reverse`. The output is not guaranteed to be in any
+    # particular order, call `sort` after this function if you wish the
+    # output to be sorted.
+    #
+    # @param k [Integer]
+    #   Number of rows to return.
+    # @param by [Object]
+    #   Column(s) used to determine the bottom rows.
+    #   Accepts expression input. Strings are parsed as column names.
+    # @param reverse [Object]
+    #   Consider the `k` largest elements of the `by` column(s) (instead of the `k`
+    #   smallest). This can be specified per column by passing a sequence of
+    #   booleans.
+    #
+    # @return [DataFrame]
+    #
+    # @example Get the rows which contain the 4 smallest values in column b.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [2, 1, 1, 3, 2, 1]
+    #     }
+    #   )
+    #   df.bottom_k(4, by: "b")
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 1   │
+    #   # │ a   ┆ 1   │
+    #   # │ c   ┆ 1   │
+    #   # │ a   ┆ 2   │
+    #   # └─────┴─────┘
+    #
+    # @example Get the rows which contain the 4 smallest values when sorting on column a and b.
+    #   df.bottom_k(4, by: ["a", "b"])
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ a   ┆ 1   │
+    #   # │ a   ┆ 2   │
+    #   # │ b   ┆ 1   │
+    #   # │ b   ┆ 2   │
+    #   # └─────┴─────┘
+    def bottom_k(
+      k,
+      by:,
+      reverse: false
+    )
+      lazy
+      .bottom_k(k, by: by, reverse: reverse)
+      .collect(
+        # optimizations=QueryOptFlags(
+        #   projection_pushdown=False,
+        #   predicate_pushdown=False,
+        #   comm_subplan_elim=False,
+        #   slice_pushdown=True,
+        # )
+      )
+    end
     # Check if DataFrame is equal to other.
     #
     # @param other [DataFrame]
@@ -1833,10 +2185,59 @@ module Polars
       _from_rbdf(_df.tail(n))
     end
-    # Return a new DataFrame where the null values are dropped.
+    # Drop all rows that contain one or more NaN values.
+    #
+    # The original order of the remaining rows is preserved.
+    #
+    # @param subset [Object]
+    #   Column name(s) for which NaN values are considered; if set to `nil`
+    #   (default), use all columns (note that only floating-point columns
+    #   can contain NaNs).
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [-20.5, Float::NAN, 80.0],
+    #       "bar" => [Float::NAN, 110.0, 25.5],
+    #       "ham" => ["xxx", "yyy", nil]
+    #     }
+    #   )
+    #   df.drop_nans
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ f64  ┆ f64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 80.0 ┆ 25.5 ┆ null │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   df.drop_nans(subset: ["bar"])
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬───────┬──────┐
+    #   # │ foo  ┆ bar   ┆ ham  │
+    #   # │ ---  ┆ ---   ┆ ---  │
+    #   # │ f64  ┆ f64   ┆ str  │
+    #   # ╞══════╪═══════╪══════╡
+    #   # │ NaN  ┆ 110.0 ┆ yyy  │
+    #   # │ 80.0 ┆ 25.5  ┆ null │
+    #   # └──────┴───────┴──────┘
+    def drop_nans(subset: nil)
+      lazy.drop_nans(subset: subset).collect(_eager: true)
+    end
+    # Drop all rows that contain one or more null values.
+    #
+    # The original order of the remaining rows is preserved.
     #
     # @param subset [Object]
-    #   Subset of column(s) on which `drop_nulls` will be applied.
+    #   Column name(s) for which null values are considered.
+    #   If set to `nil` (default), use all columns.
     #
     # @return [DataFrame]
     #
@@ -1845,20 +2246,32 @@ module Polars
     #     {
     #       "foo" => [1, 2, 3],
     #       "bar" => [6, nil, 8],
-    #       "ham" => ["a", "b", "c"]
+    #       "ham" => ["a", "b", nil]
     #     }
     #   )
     #   df.drop_nulls
     #   # =>
-    #   # shape: (2, 3)
+    #   # shape: (1, 3)
     #   # ┌─────┬─────┬─────┐
     #   # │ foo ┆ bar ┆ ham │
     #   # │ --- ┆ --- ┆ --- │
     #   # │ i64 ┆ i64 ┆ str │
     #   # ╞═════╪═════╪═════╡
     #   # │ 1   ┆ 6   ┆ a   │
-    #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
+    #
+    # @example
+    #   df.drop_nulls(subset: Polars.cs.integer)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ i64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 1   ┆ 6   ┆ a    │
+    #   # │ 3   ┆ 8   ┆ null │
+    #   # └─────┴─────┴──────┘
     def drop_nulls(subset: nil)
       lazy.drop_nulls(subset: subset).collect(_eager: true)
     end
@@ -2124,9 +2537,9 @@ module Polars
     # @param every
     #   Interval of the window.
     # @param period
-    #   Length of the window, if None it is equal to 'every'.
+    #   Length of the window, if nil it is equal to 'every'.
     # @param offset
-    #   Offset of the window if None and period is None it will be equal to negative
+    #   Offset of the window if nil and period is nil it will be equal to negative
     #   `every`.
     # @param truncate
     #   Truncate the time value to the window lower bound.
@@ -2138,6 +2551,22 @@ module Polars
     #   Define whether the temporal window interval is closed or not.
     # @param by
     #   Also group by this column/these columns
+    # @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
+    #   The strategy to determine the start of the first window by.
+    #
+    #   * 'window': Start by taking the earliest timestamp, truncating it with
+    #     `every`, and then adding `offset`.
+    #     Note that weekly windows start on Monday.
+    #   * 'datapoint': Start from the first encountered data point.
+    #   * a day of the week (only takes effect if `every` contains `'w'`):
+    #
+    #     * 'monday': Start the window on the Monday before the first data point.
+    #     * 'tuesday': Start the window on the Tuesday before the first data point.
+    #     * ...
+    #     * 'sunday': Start the window on the Sunday before the first data point.
+    #
+    #     The resulting window is then shifted back until the earliest datapoint
+    #     is in or in front of it.
     #
     # @return [DataFrame]
     #
@@ -2438,13 +2867,13 @@ module Polars
     #   Join column of the right DataFrame.
     # @param on [String]
     #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
-    #   None.
-    # @param by [Object]
-    #   join on these columns before doing asof join
+    #   nil.
     # @param by_left [Object]
     #   join on these columns before doing asof join
     # @param by_right [Object]
     #   join on these columns before doing asof join
+    # @param by [Object]
+    #   join on these columns before doing asof join
     # @param strategy ["backward", "forward"]
     #   Join strategy.
     # @param suffix [String]
@@ -2454,14 +2883,6 @@ module Polars
     #   keys are within this distance. If an asof join is done on columns of dtype
     #   "Date", "Datetime", "Duration" or "Time" you use the following string
     #   language:
-    # @param allow_exact_matches [Boolean]
-    #   Whether exact matches are valid join predicates.
-    #     - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
-    #     - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
-    # @param check_sortedness [Boolean]
-    #   Check the sortedness of the asof keys. If the keys are not sorted Polars
-    #   will error, or in case of 'by' argument raise a warning. This might become
-    #   a hard error in the future.
     #
     #    - 1ns   (1 nanosecond)
     #    - 1us   (1 microsecond)
@@ -2489,6 +2910,14 @@ module Polars
     #     - true: -> Always coalesce join columns.
     #     - false: -> Never coalesce join columns.
     #   Note that joining on any other expressions than `col` will turn off coalescing.
+    # @param allow_exact_matches [Boolean]
+    #   Whether exact matches are valid join predicates.
+    #     - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
+    #     - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
+    # @param check_sortedness [Boolean]
+    #   Check the sortedness of the asof keys. If the keys are not sorted Polars
+    #   will error, or in case of 'by' argument raise a warning. This might become
+    #   a hard error in the future.
     #
     # @return [DataFrame]
     #
@@ -2724,6 +3153,101 @@ module Polars
         .collect(no_optimization: true)
     end
+    # Perform a join based on one or multiple (in)equality predicates.
+    #
+    # This performs an inner join, so only rows where all predicates are true
+    # are included in the result, and a row from either DataFrame may be included
+    # multiple times in the result.
+    #
+    # @note
+    #   The row order of the input DataFrames is not preserved.
+    #
+    # @note
+    #   This functionality is experimental. It may be
+    #   changed at any point without it being considered a breaking change.
+    #
+    # @param other [DataFrame]
+    #   DataFrame to join with.
+    # @param predicates [Array]
+    #   (In)Equality condition to join the two tables on.
+    #   When a column name occurs in both tables, the proper suffix must
+    #   be applied in the predicate.
+    # @param suffix [String]
+    #   Suffix to append to columns with a duplicate name.
+    #
+    # @return [DataFrame]
+    #
+    # @example Join two dataframes together based on two predicates which get AND-ed together.
+    #   east = Polars::DataFrame.new(
+    #     {
+    #       "id": [100, 101, 102],
+    #       "dur": [120, 140, 160],
+    #       "rev": [12, 14, 16],
+    #       "cores": [2, 8, 4]
+    #     }
+    #   )
+    #   west = Polars::DataFrame.new(
+    #     {
+    #       "t_id": [404, 498, 676, 742],
+    #       "time": [90, 130, 150, 170],
+    #       "cost": [9, 13, 15, 16],
+    #       "cores": [4, 2, 1, 4]
+    #     }
+    #   )
+    #   east.join_where(
+    #     west,
+    #     Polars.col("dur") < Polars.col("time"),
+    #     Polars.col("rev") < Polars.col("cost")
+    #   )
+    #   # =>
+    #   # shape: (5, 8)
+    #   # ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
+    #   # │ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
+    #   # ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
+    #
+    # @example To OR them together, use a single expression and the `|` operator.
+    #   east.join_where(
+    #     west,
+    #     (Polars.col("dur") < Polars.col("time")) | (Polars.col("rev") < Polars.col("cost"))
+    #   )
+    #   # =>
+    #   # shape: (6, 8)
+    #   # ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
+    #   # │ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
+    #   # ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 102 ┆ 160 ┆ 16  ┆ 4     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
+    def join_where(
+      other,
+      *predicates,
+      suffix: "_right"
+    )
+      Utils.require_same_type(self, other)
+      lazy
+      .join_where(
+        other.lazy,
+        *predicates,
+        suffix: suffix
+      )
+      .collect(_eager: true)
+    end
     # Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
     #
     # The UDF will receive each row as a tuple of values: `udf(row)`.
@@ -3436,19 +3960,22 @@ module Polars
     # Create a spreadsheet-style pivot table as a DataFrame.
     #
+    # @param on [Object]
+    #   Columns whose values will be used as the header of the output DataFrame
+    # @param index [Object]
+    #   One or multiple keys to group by
     # @param values [Object]
     #   Column values to aggregate. Can be multiple columns if the *columns*
     #   arguments contains multiple columns as well
-    # @param index [Object]
-    #   One or multiple keys to group by
-    # @param on [Object]
-    #   Columns whose values will be used as the header of the output DataFrame
     # @param aggregate_function ["first", "sum", "max", "min", "mean", "median", "last", "count"]
     #   A predefined aggregate function str or an expression.
     # @param maintain_order [Object]
     #   Sort the grouped keys so that the output order is predictable.
     # @param sort_columns [Object]
     #   Sort the transposed columns by name. Default is by order of discovery.
+    # @param separator [String]
+    #   Used as separator/delimiter in generated column names in case of multiple
+    #   `values` columns.
     #
     # @return [DataFrame]
     #
@@ -3712,9 +4239,11 @@ module Polars
     # @param maintain_order [Boolean]
     #   Keep predictable output order. This is slower as it requires an extra sort
     #   operation.
+    # @param include_key [Boolean]
+    #   Include the columns used to partition the DataFrame in the output.
     # @param as_dict [Boolean]
-    #   If true, return the partitions in a dictionary keyed by the distinct group
-    #   values instead of a list.
+    #   If true, return the partitions in a hash keyed by the distinct group
+    #   values instead of an array.
     #
     # @return [Object]
     #
@@ -4035,6 +4564,26 @@ module Polars
       lazy.select(*exprs, **named_exprs).collect(_eager: true)
     end
+    # Select columns from this DataFrame.
+    #
+    # This will run all expression sequentially instead of in parallel.
+    # Use this when the work per expression is cheap.
+    #
+    # @param exprs [Array]
+    #   Column(s) to select, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names,
+    #   other non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to select, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
+    #
+    # @return [DataFrame]
+    def select_seq(*exprs, **named_exprs)
+      lazy
+      .select_seq(*exprs, **named_exprs)
+      .collect(_eager: true)
+    end
     # Add columns to this DataFrame.
     #
     # Added columns will replace existing columns with the same name.
@@ -4147,6 +4696,31 @@ module Polars
       lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
     end
+    # Add columns to this DataFrame.
+    #
+    # Added columns will replace existing columns with the same name.
+    #
+    # This will run all expression sequentially instead of in parallel.
+    # Use this when the work per expression is cheap.
+    #
+    # @param exprs [Array]
+    #   Column(s) to add, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names, other
+    #   non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to add, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
+    #
+    # @return [DataFrame]
+    def with_columns_seq(
+      *exprs,
+      **named_exprs
+    )
+      lazy
+      .with_columns_seq(*exprs, **named_exprs)
+      .collect(_eager: true)
+    end
     # Get number of chunks used by the ChunkedArrays of this DataFrame.
     #
     # @param strategy ["first", "all"]
@@ -4556,9 +5130,15 @@ module Polars
     # Get one hot encoded dummy variables.
     #
-    # @param columns
+    # @param columns [Array]
     #   A subset of columns to convert to dummy variables. `nil` means
     #   "all columns".
+    # @param separator [String]
+    #   Separator/delimiter used when generating column names.
+    # @param drop_first [Boolean]
+    #   Remove the first category from the variables being encoded.
+    # @param drop_nulls [Boolean]
+    #   If there are `nil` values in the series, a `null` column is not generated
     #
     # @return [DataFrame]
     #
@@ -4581,11 +5161,11 @@ module Polars
     #   # │ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     │
     #   # │ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     │
     #   # └───────┴───────┴───────┴───────┴───────┴───────┘
-    def to_dummies(columns: nil, separator: "_", drop_first: false)
+    def to_dummies(columns: nil, separator: "_", drop_first: false, drop_nulls: false)
       if columns.is_a?(::String)
         columns = [columns]
       end
-      _from_rbdf(_df.to_dummies(columns, separator, drop_first))
+      _from_rbdf(_df.to_dummies(columns, separator, drop_first, drop_nulls))
     end
     # Drop duplicate rows from this DataFrame.
@@ -4753,7 +5333,7 @@ module Polars
     #   # │ --- ┆ --- ┆ --- │
     #   # │ i64 ┆ i64 ┆ str │
     #   # ╞═════╪═════╪═════╡
-    #   # │ 3   ┆ 8   ┆ c   │
+    #   # │ 1   ┆ 6   ┆ a   │
     #   # │ 2   ┆ 7   ┆ b   │
     #   # └─────┴─────┴─────┘
     def sample(
@@ -4979,6 +5559,85 @@ module Polars
       end
     end
+    # Convert columnar data to rows as Ruby arrays in a hash keyed by some column.
+    #
+    # This method is like `rows`, but instead of returning rows in a flat list, rows
+    # are grouped by the values in the `key` column(s) and returned as a hash.
+    #
+    # Note that this method should not be used in place of native operations, due to
+    # the high cost of materializing all frame data out into a hash; it should
+    # be used only when you need to move the values out into a Ruby data structure
+    # or other object that cannot operate directly with Polars/Arrow.
+    #
+    # @param key [Object]
+    #   The column(s) to use as the key for the returned hash. If multiple
+    #   columns are specified, the key will be a tuple of those values, otherwise
+    #   it will be a string.
+    # @param named [Boolean]
+    #   Return hashes instead of arrays. The hashes are a mapping of
+    #   column name to row value. This is more expensive than returning an
+    #   array, but allows for accessing values by column name.
+    # @param include_key [Boolean]
+    #   Include key values inline with the associated data (by default the key
+    #   values are omitted as a memory/performance optimisation, as they can be
+    #   reoconstructed from the key).
+    # @param unique [Boolean]
+    #   Indicate that the key is unique; this will result in a 1:1 mapping from
+    #   key to a single associated row. Note that if the key is *not* actually
+    #   unique the last row with the given key will be returned.
+    #
+    # @return [Hash]
+    #
+    # @example Group rows by the given key column(s):
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "w" => ["a", "b", "b", "a"],
+    #       "x" => ["q", "q", "q", "k"],
+    #       "y" => [1.0, 2.5, 3.0, 4.5],
+    #       "z" => [9, 8, 7, 6]
+    #     }
+    #   )
+    #   df.rows_by_key(["w"])
+    #   # => {"a"=>[["q", 1.0, 9], ["k", 4.5, 6]], "b"=>[["q", 2.5, 8], ["q", 3.0, 7]]}
+    #
+    # @example Return the same row groupings as hashes:
+    #   df.rows_by_key(["w"], named: true)
+    #   # => {"a"=>[{"x"=>"q", "y"=>1.0, "z"=>9}, {"x"=>"k", "y"=>4.5, "z"=>6}], "b"=>[{"x"=>"q", "y"=>2.5, "z"=>8}, {"x"=>"q", "y"=>3.0, "z"=>7}]}
+    #
+    # @example Return row groupings, assuming keys are unique:
+    #   df.rows_by_key(["z"], unique: true)
+    #   # => {9=>["a", "q", 1.0], 8=>["b", "q", 2.5], 7=>["b", "q", 3.0], 6=>["a", "k", 4.5]}
+    #
+    # @example Return row groupings as hashes, assuming keys are unique:
+    #   df.rows_by_key(["z"], named: true, unique: true)
+    #   # => {9=>{"w"=>"a", "x"=>"q", "y"=>1.0}, 8=>{"w"=>"b", "x"=>"q", "y"=>2.5}, 7=>{"w"=>"b", "x"=>"q", "y"=>3.0}, 6=>{"w"=>"a", "x"=>"k", "y"=>4.5}}
+    #
+    # @example Return hash rows grouped by a compound key, including key values:
+    #   df.rows_by_key(["w", "x"], named: true, include_key: true)
+    #   # => {["a", "q"]=>[{"w"=>"a", "x"=>"q", "y"=>1.0, "z"=>9}], ["b", "q"]=>[{"w"=>"b", "x"=>"q", "y"=>2.5, "z"=>8}, {"w"=>"b", "x"=>"q", "y"=>3.0, "z"=>7}], ["a", "k"]=>[{"w"=>"a", "x"=>"k", "y"=>4.5, "z"=>6}]}
+    def rows_by_key(key, named: false, include_key: false, unique: false)
+      key = Utils._expand_selectors(self, key)
+      keys = key.size == 1 ? get_column(key[0]) : select(key).iter_rows
+      if include_key
+        values = self
+      else
+        data_cols = schema.keys - key
+        values = select(data_cols)
+      end
+      zipped = keys.each.zip(values.iter_rows(named: named))
+      # if unique, we expect to write just one entry per key; otherwise, we're
+      # returning a list of rows for each key, so append into a hash of arrays.
+      if unique
+        zipped.to_h
+      else
+        zipped.each_with_object({}) { |(key, data), h| (h[key] ||= []) << data }
+      end
+    end
     # Returns an iterator over the DataFrame of rows of Ruby-native values.
     #
     # @param named [Boolean]
@@ -5400,9 +6059,136 @@ module Polars
         .collect(no_optimization: true)
     end
-    # TODO
-    # def update
-    # end
+    # Update the values in this `DataFrame` with the values in `other`.
+    #
+    # @note
+    #   This functionality is considered **unstable**. It may be changed
+    #   at any point without it being considered a breaking change.
+    #
+    # @param other [DataFrame]
+    #   DataFrame that will be used to update the values
+    # @param on [Object]
+    #   Column names that will be joined on. If set to `nil` (default),
+    #   the implicit row index of each frame is used as a join key.
+    # @param how ['left', 'inner', 'full']
+    #   * 'left' will keep all rows from the left table; rows may be duplicated
+    #     if multiple rows in the right frame match the left row's key.
+    #   * 'inner' keeps only those rows where the key exists in both frames.
+    #   * 'full' will update existing rows where the key matches while also
+    #     adding any new rows contained in the given frame.
+    # @param left_on [Object]
+    #   Join column(s) of the left DataFrame.
+    # @param right_on [Object]
+    #   Join column(s) of the right DataFrame.
+    # @param include_nulls [Boolean]
+    #   Overwrite values in the left frame with null values from the right frame.
+    #   If set to `false` (default), null values in the right frame are ignored.
+    # @param maintain_order ['none', 'left', 'right', 'left_right', 'right_left']
+    #   Which order of rows from the inputs to preserve. See `DataFrame.join`
+    #   for details. Unlike `join` this function preserves the left order by
+    #   default.
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   This is syntactic sugar for a left/inner join that preserves the order
+    #   of the left `DataFrame` by default, with an optional coalesce when
+    #   `include_nulls: false`.
+    #
+    # @example Update `df` values with the non-null values in `new_df`, by row index:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "A" => [1, 2, 3, 4],
+    #       "B" => [400, 500, 600, 700]
+    #     }
+    #   )
+    #   new_df = Polars::DataFrame.new(
+    #     {
+    #       "B" => [-66, nil, -99],
+    #       "C" => [5, 3, 1]
+    #     }
+    #   )
+    #   df.update(new_df)
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -66 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ -99 │
+    #   # │ 4   ┆ 700 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values with the non-null values in `new_df`, by row index, but only keeping those rows that are common to both frames:
+    #   df.update(new_df, how: "inner")
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -66 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ -99 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values with the non-null values in `new_df`, using a full outer join strategy that defines explicit join columns in each frame:
+    #   df.update(new_df, left_on: ["A"], right_on: ["C"], how: "full")
+    #   # =>
+    #   # shape: (5, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -99 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ 600 │
+    #   # │ 4   ┆ 700 │
+    #   # │ 5   ┆ -66 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values including null values in `new_df`, using a full outer join strategy that defines explicit join columns in each frame:
+    #   df.update(new_df, left_on: "A", right_on: "C", how: "full", include_nulls: true)
+    #   # =>
+    #   # shape: (5, 2)
+    #   # ┌─────┬──────┐
+    #   # │ A   ┆ B    │
+    #   # │ --- ┆ ---  │
+    #   # │ i64 ┆ i64  │
+    #   # ╞═════╪══════╡
+    #   # │ 1   ┆ -99  │
+    #   # │ 2   ┆ 500  │
+    #   # │ 3   ┆ null │
+    #   # │ 4   ┆ 700  │
+    #   # │ 5   ┆ -66  │
+    #   # └─────┴──────┘
+    def update(
+      other,
+      on: nil,
+      how: "left",
+      left_on: nil,
+      right_on: nil,
+      include_nulls: false,
+      maintain_order: "left"
+    )
+      Utils.require_same_type(self, other)
+      lazy
+      .update(
+        other.lazy,
+        on: on,
+        how: how,
+        left_on: left_on,
+        right_on: right_on,
+        include_nulls: include_nulls,
+        maintain_order: maintain_order
+      )
+      .collect(_eager: true)
+    end
     private