RubyGems - polars-df - Versions diffs - 0.21.0-x86_64-linux-musl → 0.22.0-x86_64-linux-musl - Mend

polars-df 0.21.0-x86_64-linux-musl → 0.22.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -0
data/Cargo.lock +55 -48
data/Cargo.toml +3 -0
data/LICENSE-THIRD-PARTY.txt +23 -49
data/README.md +12 -0
data/lib/polars/3.2/polars.so +0 -0
data/lib/polars/3.3/polars.so +0 -0
data/lib/polars/3.4/polars.so +0 -0
data/lib/polars/array_expr.rb +382 -3
data/lib/polars/array_name_space.rb +281 -0
data/lib/polars/binary_expr.rb +67 -0
data/lib/polars/binary_name_space.rb +43 -0
data/lib/polars/cat_expr.rb +224 -0
data/lib/polars/cat_name_space.rb +138 -0
data/lib/polars/config.rb +2 -2
data/lib/polars/convert.rb +6 -6
data/lib/polars/data_frame.rb +794 -27
data/lib/polars/data_type_expr.rb +52 -0
data/lib/polars/data_types.rb +26 -5
data/lib/polars/date_time_expr.rb +252 -1
data/lib/polars/date_time_name_space.rb +299 -0
data/lib/polars/expr.rb +1248 -206
data/lib/polars/functions/business.rb +95 -0
data/lib/polars/functions/datatype.rb +21 -0
data/lib/polars/functions/lazy.rb +14 -1
data/lib/polars/io/csv.rb +1 -1
data/lib/polars/io/iceberg.rb +27 -0
data/lib/polars/io/json.rb +4 -4
data/lib/polars/io/ndjson.rb +4 -4
data/lib/polars/io/parquet.rb +32 -7
data/lib/polars/io/scan_options.rb +4 -1
data/lib/polars/lazy_frame.rb +1028 -28
data/lib/polars/list_expr.rb +217 -17
data/lib/polars/list_name_space.rb +231 -22
data/lib/polars/meta_expr.rb +89 -0
data/lib/polars/name_expr.rb +36 -0
data/lib/polars/query_opt_flags.rb +50 -0
data/lib/polars/scan_cast_options.rb +20 -1
data/lib/polars/schema.rb +79 -3
data/lib/polars/selector.rb +72 -0
data/lib/polars/selectors.rb +3 -3
data/lib/polars/series.rb +1053 -54
data/lib/polars/string_expr.rb +436 -32
data/lib/polars/string_name_space.rb +736 -50
data/lib/polars/struct_expr.rb +103 -0
data/lib/polars/struct_name_space.rb +19 -1
data/lib/polars/utils/serde.rb +17 -0
data/lib/polars/utils/various.rb +22 -1
data/lib/polars/utils.rb +5 -1
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +6 -0
metadata +8 -2

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -15,11 +15,11 @@ module Polars
     #   The schema of the resulting DataFrame. The schema may be declared in several
     #   ways:
     #
-    #   * As a hash of name:type pairs; if type is nil, it will be auto-inferred.
+    #   * As a hash of \\\\{name:type} pairs; if type is nil, it will be auto-inferred.
     #   * As an array of column names; in this case types are automatically inferred.
-    #   * As an array of (name,type) pairs; this is equivalent to the dictionary form.
+    #   * As an array of (name,type) pairs; this is equivalent to the hash form.
     #
-    #   If you supply a list of column names that does not match the names in the
+    #   If you supply an array of column names that does not match the names in the
     #   underlying data, the names given here will overwrite them. The number
     #   of names given in the schema should match the underlying data dimensions.
     #
@@ -72,6 +72,43 @@ module Polars
       end
     end
+    # Read a serialized DataFrame from a file.
+    #
+    # @param source [Object]
+    #     Path to a file or a file-like object (by file-like object, we refer to
+    #     objects that have a `read` method, such as a file handler or `StringIO`).
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   Serialization is not stable across Polars versions: a LazyFrame serialized
+    #   in one Polars version may not be deserializable in another Polars version.
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4.0, 5.0, 6.0]})
+    #   bytes = df.serialize
+    #   Polars::DataFrame.deserialize(StringIO.new(bytes))
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ f64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 4.0 │
+    #   # │ 2   ┆ 5.0 │
+    #   # │ 3   ┆ 6.0 │
+    #   # └─────┴─────┘
+    def self.deserialize(source)
+      if Utils.pathlike?(source)
+        source = Utils.normalize_filepath(source)
+      end
+      deserializer = RbDataFrame.method(:deserialize_binary)
+      _from_rbdf(deserializer.(source))
+    end
     # @private
     def self._from_rbdf(rb_df)
       df = DataFrame.allocate
@@ -560,9 +597,7 @@ module Polars
       end
     end
-    # Convert every row to a dictionary.
-    #
-    # Note that this is slow.
+    # Convert every row to a hash.
     #
     # @return [Array]
     #
@@ -572,12 +607,7 @@ module Polars
     #   # =>
     #   # [{"foo"=>1, "bar"=>4}, {"foo"=>2, "bar"=>5}, {"foo"=>3, "bar"=>6}]
     def to_hashes
-      rbdf = _df
-      names = columns
-      height.times.map do |i|
-        names.zip(rbdf.row_tuple(i)).to_h
-      end
+      rows(named: true)
     end
     # Convert DataFrame to a 2D Numo array.
@@ -634,6 +664,44 @@ module Polars
       Utils.wrap_s(_df.select_at_idx(index))
     end
+    # Serialize this DataFrame to a file or string.
+    #
+    # @param file [Object]
+    #   File path or writable file-like object to which the result will be written.
+    #   If set to `nil` (default), the output is returned as a string instead.
+    #
+    # @return [Object]
+    #
+    # @note
+    #   Serialization is not stable across Polars versions: a LazyFrame serialized
+    #   in one Polars version may not be deserializable in another Polars version.
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8]
+    #     }
+    #   )
+    #   bytes = df.serialize
+    #   Polars::DataFrame.deserialize(StringIO.new(bytes))
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 6   │
+    #   # │ 2   ┆ 7   │
+    #   # │ 3   ┆ 8   │
+    #   # └─────┴─────┘
+    def serialize(file = nil)
+      serializer = _df.method(:serialize_binary)
+      Utils.serialize_polars_object(serializer, file)
+    end
     # Serialize to JSON representation.
     #
     # @param file [String]
@@ -910,7 +978,7 @@ module Polars
     #
     # @param file [Object]
     #   Path or writable file-like object to which the IPC record batch data will
-    #   be written. If set to `None`, the output is returned as a BytesIO object.
+    #   be written. If set to `nil`, the output is returned as a BytesIO object.
     # @param compression ['uncompressed', 'lz4', 'zstd']
     #   Compression method. Defaults to "uncompressed".
     # @param compat_level [Object]
@@ -1148,6 +1216,40 @@ module Polars
       end
     end
+    # Write DataFrame to an Iceberg table.
+    #
+    # @note
+    #   This functionality is currently considered **unstable**. It may be
+    #   changed at any point without it being considered a breaking change.
+    #
+    # @param target [Object]
+    #   Name of the table or the Table object representing an Iceberg table.
+    # @param mode ['append', 'overwrite']
+    #   How to handle existing data.
+    #
+    #   - If 'append', will add new data.
+    #   - If 'overwrite', will replace table with new data.
+    #
+    # @return [nil]
+    def write_iceberg(target, mode:)
+      require "iceberg"
+      table =
+        if target.is_a?(Iceberg::Table)
+          target
+        else
+          raise Todo
+        end
+      data = self
+      if mode == "append"
+        table.append(data)
+      else
+        raise Todo
+      end
+    end
     # Write DataFrame as delta table.
     #
     # @param target [Object]
@@ -1463,6 +1565,126 @@ module Polars
       lazy.filter(predicate).collect
     end
+    # Remove rows, dropping those that match the given predicate expression(s).
+    #
+    # The original order of the remaining rows is preserved.
+    #
+    # Rows where the filter predicate does not evaluate to True are retained
+    # (this includes rows where the predicate evaluates as `null`).
+    #
+    # @param predicates [Array]
+    #   Expression that evaluates to a boolean Series.
+    # @param constraints [Hash]
+    #   Column filters; use `name = value` to filter columns using the supplied
+    #   value. Each constraint behaves the same as `Polars.col(name).eq(value)`,
+    #   and is implicitly joined with the other filter conditions using `&`.
+    #
+    # @return [DataFrame]
+    #
+    # @example Remove rows matching a condition:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [2, 3, nil, 4, 0],
+    #       "bar" => [5, 6, nil, nil, 0],
+    #       "ham" => ["a", "b", nil, "c", "d"]
+    #     }
+    #   )
+    #   df.remove(Polars.col("bar") >= 5)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # │ 0    ┆ 0    ┆ d    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Discard rows based on multiple conditions, combined with and/or operators:
+    #   df.remove(
+    #     (Polars.col("foo") >= 0) & (Polars.col("bar") >= 0),
+    #   )
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   df.remove(
+    #     (Polars.col("foo") >= 0) | (Polars.col("bar") >= 0),
+    #   )
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Provide multiple constraints using `*args` syntax:
+    #   df.remove(
+    #     Polars.col("ham").is_not_null,
+    #     Polars.col("bar") >= 0
+    #   )
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Provide constraints(s) using `**kwargs` syntax:
+    #   df.remove(foo: 0, bar: 0)
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 2    ┆ 5    ┆ a    │
+    #   # │ 3    ┆ 6    ┆ b    │
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Remove rows by comparing two columns against each other:
+    #   df.remove(
+    #     Polars.col("foo").ne_missing(Polars.col("bar"))
+    #   )
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 0    ┆ 0    ┆ d    │
+    #   # └──────┴──────┴──────┘
+    def remove(
+      *predicates,
+      **constraints
+    )
+      lazy
+      .remove(*predicates, **constraints)
+      .collect(_eager: true)
+    end
     # Summary statistics for a DataFrame.
     #
     # @return [DataFrame]
@@ -1658,6 +1880,223 @@ module Polars
       self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
     end
+    # Execute a SQL query against the DataFrame.
+    #
+    # @note
+    #   This functionality is considered **unstable**, although it is close to
+    #   being considered stable. It may be changed at any point without it being
+    #   considered a breaking change.
+    #
+    # @param query [String]
+    #   SQL query to execute.
+    # @param table_name [String]
+    #   Optionally provide an explicit name for the table that represents the
+    #   calling frame (defaults to "self").
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   * The calling frame is automatically registered as a table in the SQL context
+    #     under the name "self". If you want access to the DataFrames and LazyFrames
+    #     found in the current globals, use the top-level :meth:`pl.sql <polars.sql>`.
+    #   * More control over registration and execution behaviour is available by
+    #     using the :class:`SQLContext` object.
+    #   * The SQL query executes in lazy mode before being collected and returned
+    #     as a DataFrame.
+    #
+    # @example Query the DataFrame using SQL:
+    #   df1 = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3],
+    #       "b" => ["zz", "yy", "xx"],
+    #       "c" => [Date.new(1999, 12, 31), Date.new(2010, 10, 10), Date.new(2077, 8, 8)]
+    #     }
+    #   )
+    #   df1.sql("SELECT c, b FROM self WHERE a > 1")
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌────────────┬─────┐
+    #   # │ c          ┆ b   │
+    #   # │ ---        ┆ --- │
+    #   # │ date       ┆ str │
+    #   # ╞════════════╪═════╡
+    #   # │ 2010-10-10 ┆ yy  │
+    #   # │ 2077-08-08 ┆ xx  │
+    #   # └────────────┴─────┘
+    #
+    # @example Apply transformations to a DataFrame using SQL, aliasing "self" to "frame".
+    #   df1.sql(
+    #     "
+    #       SELECT
+    #           a,
+    #           (a % 2 == 0) AS a_is_even,
+    #           CONCAT_WS(':', b, b) AS b_b,
+    #           EXTRACT(year FROM c) AS year,
+    #           0::float4 AS \"zero\",
+    #       FROM frame
+    #     ",
+    #     table_name: "frame"
+    #   )
+    #   # =>
+    #   # shape: (3, 5)
+    #   # ┌─────┬───────────┬───────┬──────┬──────┐
+    #   # │ a   ┆ a_is_even ┆ b_b   ┆ year ┆ zero │
+    #   # │ --- ┆ ---       ┆ ---   ┆ ---  ┆ ---  │
+    #   # │ i64 ┆ bool      ┆ str   ┆ i32  ┆ f32  │
+    #   # ╞═════╪═══════════╪═══════╪══════╪══════╡
+    #   # │ 1   ┆ false     ┆ zz:zz ┆ 1999 ┆ 0.0  │
+    #   # │ 2   ┆ true      ┆ yy:yy ┆ 2010 ┆ 0.0  │
+    #   # │ 3   ┆ false     ┆ xx:xx ┆ 2077 ┆ 0.0  │
+    #   # └─────┴───────────┴───────┴──────┴──────┘
+    def sql(query, table_name: "self")
+      ctx = SQLContext.new(eager_execution: true)
+      name = table_name || "self"
+      ctx.register(name, self)
+      ctx.execute(query)
+    end
+    # Return the `k` largest rows.
+    #
+    # Non-null elements are always preferred over null elements, regardless of
+    # the value of `reverse`. The output is not guaranteed to be in any
+    # particular order, call `sort` after this function if you wish the
+    # output to be sorted.
+    #
+    # @param k [Integer]
+    #   Number of rows to return.
+    # @param by [Object]
+    #   Column(s) used to determine the top rows.
+    #   Accepts expression input. Strings are parsed as column names.
+    # @param reverse [Object]
+    #   Consider the `k` smallest elements of the `by` column(s) (instead of the `k`
+    #   largest). This can be specified per column by passing a sequence of
+    #   booleans.
+    #
+    # @return [DataFrame]
+    #
+    # @example Get the rows which contain the 4 largest values in column b.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [2, 1, 1, 3, 2, 1]
+    #     }
+    #   )
+    #   df.top_k(4, by: "b")
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 3   │
+    #   # │ a   ┆ 2   │
+    #   # │ b   ┆ 2   │
+    #   # │ b   ┆ 1   │
+    #   # └─────┴─────┘
+    #
+    # @example Get the rows which contain the 4 largest values when sorting on column b and a.
+    #   df.top_k(4, by: ["b", "a"])
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 3   │
+    #   # │ b   ┆ 2   │
+    #   # │ a   ┆ 2   │
+    #   # │ c   ┆ 1   │
+    #   # └─────┴─────┘
+    def top_k(
+      k,
+      by:,
+      reverse: false
+    )
+      lazy
+      .top_k(k, by: by, reverse: reverse)
+      .collect(
+        # optimizations=QueryOptFlags(
+        #   projection_pushdown=False,
+        #   predicate_pushdown=False,
+        #   comm_subplan_elim=False,
+        #   slice_pushdown=True
+        # )
+      )
+    end
+    # Return the `k` smallest rows.
+    #
+    # Non-null elements are always preferred over null elements, regardless of
+    # the value of `reverse`. The output is not guaranteed to be in any
+    # particular order, call `sort` after this function if you wish the
+    # output to be sorted.
+    #
+    # @param k [Integer]
+    #   Number of rows to return.
+    # @param by [Object]
+    #   Column(s) used to determine the bottom rows.
+    #   Accepts expression input. Strings are parsed as column names.
+    # @param reverse [Object]
+    #   Consider the `k` largest elements of the `by` column(s) (instead of the `k`
+    #   smallest). This can be specified per column by passing a sequence of
+    #   booleans.
+    #
+    # @return [DataFrame]
+    #
+    # @example Get the rows which contain the 4 smallest values in column b.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [2, 1, 1, 3, 2, 1]
+    #     }
+    #   )
+    #   df.bottom_k(4, by: "b")
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 1   │
+    #   # │ a   ┆ 1   │
+    #   # │ c   ┆ 1   │
+    #   # │ a   ┆ 2   │
+    #   # └─────┴─────┘
+    #
+    # @example Get the rows which contain the 4 smallest values when sorting on column a and b.
+    #   df.bottom_k(4, by: ["a", "b"])
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ a   ┆ 1   │
+    #   # │ a   ┆ 2   │
+    #   # │ b   ┆ 1   │
+    #   # │ b   ┆ 2   │
+    #   # └─────┴─────┘
+    def bottom_k(
+      k,
+      by:,
+      reverse: false
+    )
+      lazy
+      .bottom_k(k, by: by, reverse: reverse)
+      .collect(
+        # optimizations=QueryOptFlags(
+        #   projection_pushdown=False,
+        #   predicate_pushdown=False,
+        #   comm_subplan_elim=False,
+        #   slice_pushdown=True,
+        # )
+      )
+    end
     # Check if DataFrame is equal to other.
     #
     # @param other [DataFrame]
@@ -1848,10 +2287,59 @@ module Polars
       _from_rbdf(_df.tail(n))
     end
-    # Return a new DataFrame where the null values are dropped.
+    # Drop all rows that contain one or more NaN values.
+    #
+    # The original order of the remaining rows is preserved.
+    #
+    # @param subset [Object]
+    #   Column name(s) for which NaN values are considered; if set to `nil`
+    #   (default), use all columns (note that only floating-point columns
+    #   can contain NaNs).
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [-20.5, Float::NAN, 80.0],
+    #       "bar" => [Float::NAN, 110.0, 25.5],
+    #       "ham" => ["xxx", "yyy", nil]
+    #     }
+    #   )
+    #   df.drop_nans
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ f64  ┆ f64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 80.0 ┆ 25.5 ┆ null │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   df.drop_nans(subset: ["bar"])
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬───────┬──────┐
+    #   # │ foo  ┆ bar   ┆ ham  │
+    #   # │ ---  ┆ ---   ┆ ---  │
+    #   # │ f64  ┆ f64   ┆ str  │
+    #   # ╞══════╪═══════╪══════╡
+    #   # │ NaN  ┆ 110.0 ┆ yyy  │
+    #   # │ 80.0 ┆ 25.5  ┆ null │
+    #   # └──────┴───────┴──────┘
+    def drop_nans(subset: nil)
+      lazy.drop_nans(subset: subset).collect(_eager: true)
+    end
+    # Drop all rows that contain one or more null values.
+    #
+    # The original order of the remaining rows is preserved.
     #
     # @param subset [Object]
-    #   Subset of column(s) on which `drop_nulls` will be applied.
+    #   Column name(s) for which null values are considered.
+    #   If set to `nil` (default), use all columns.
     #
     # @return [DataFrame]
     #
@@ -1860,20 +2348,32 @@ module Polars
     #     {
     #       "foo" => [1, 2, 3],
     #       "bar" => [6, nil, 8],
-    #       "ham" => ["a", "b", "c"]
+    #       "ham" => ["a", "b", nil]
     #     }
     #   )
     #   df.drop_nulls
     #   # =>
-    #   # shape: (2, 3)
+    #   # shape: (1, 3)
     #   # ┌─────┬─────┬─────┐
     #   # │ foo ┆ bar ┆ ham │
     #   # │ --- ┆ --- ┆ --- │
     #   # │ i64 ┆ i64 ┆ str │
     #   # ╞═════╪═════╪═════╡
     #   # │ 1   ┆ 6   ┆ a   │
-    #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
+    #
+    # @example
+    #   df.drop_nulls(subset: Polars.cs.integer)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ i64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 1   ┆ 6   ┆ a    │
+    #   # │ 3   ┆ 8   ┆ null │
+    #   # └─────┴─────┴──────┘
     def drop_nulls(subset: nil)
       lazy.drop_nulls(subset: subset).collect(_eager: true)
     end
@@ -2139,9 +2639,9 @@ module Polars
     # @param every
     #   Interval of the window.
     # @param period
-    #   Length of the window, if None it is equal to 'every'.
+    #   Length of the window, if nil it is equal to 'every'.
     # @param offset
-    #   Offset of the window if None and period is None it will be equal to negative
+    #   Offset of the window if nil and period is nil it will be equal to negative
     #   `every`.
     # @param truncate
     #   Truncate the time value to the window lower bound.
@@ -2469,7 +2969,7 @@ module Polars
     #   Join column of the right DataFrame.
     # @param on [String]
     #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
-    #   None.
+    #   nil.
     # @param by_left [Object]
     #   join on these columns before doing asof join
     # @param by_right [Object]
@@ -2755,6 +3255,101 @@ module Polars
         .collect(no_optimization: true)
     end
+    # Perform a join based on one or multiple (in)equality predicates.
+    #
+    # This performs an inner join, so only rows where all predicates are true
+    # are included in the result, and a row from either DataFrame may be included
+    # multiple times in the result.
+    #
+    # @note
+    #   The row order of the input DataFrames is not preserved.
+    #
+    # @note
+    #   This functionality is experimental. It may be
+    #   changed at any point without it being considered a breaking change.
+    #
+    # @param other [DataFrame]
+    #   DataFrame to join with.
+    # @param predicates [Array]
+    #   (In)Equality condition to join the two tables on.
+    #   When a column name occurs in both tables, the proper suffix must
+    #   be applied in the predicate.
+    # @param suffix [String]
+    #   Suffix to append to columns with a duplicate name.
+    #
+    # @return [DataFrame]
+    #
+    # @example Join two dataframes together based on two predicates which get AND-ed together.
+    #   east = Polars::DataFrame.new(
+    #     {
+    #       "id": [100, 101, 102],
+    #       "dur": [120, 140, 160],
+    #       "rev": [12, 14, 16],
+    #       "cores": [2, 8, 4]
+    #     }
+    #   )
+    #   west = Polars::DataFrame.new(
+    #     {
+    #       "t_id": [404, 498, 676, 742],
+    #       "time": [90, 130, 150, 170],
+    #       "cost": [9, 13, 15, 16],
+    #       "cores": [4, 2, 1, 4]
+    #     }
+    #   )
+    #   east.join_where(
+    #     west,
+    #     Polars.col("dur") < Polars.col("time"),
+    #     Polars.col("rev") < Polars.col("cost")
+    #   )
+    #   # =>
+    #   # shape: (5, 8)
+    #   # ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
+    #   # │ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
+    #   # ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
+    #
+    # @example To OR them together, use a single expression and the `|` operator.
+    #   east.join_where(
+    #     west,
+    #     (Polars.col("dur") < Polars.col("time")) | (Polars.col("rev") < Polars.col("cost"))
+    #   )
+    #   # =>
+    #   # shape: (6, 8)
+    #   # ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
+    #   # │ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
+    #   # ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 102 ┆ 160 ┆ 16  ┆ 4     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
+    def join_where(
+      other,
+      *predicates,
+      suffix: "_right"
+    )
+      Utils.require_same_type(self, other)
+      lazy
+      .join_where(
+        other.lazy,
+        *predicates,
+        suffix: suffix
+      )
+      .collect(_eager: true)
+    end
     # Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
     #
     # The UDF will receive each row as a tuple of values: `udf(row)`.
@@ -3749,8 +4344,8 @@ module Polars
     # @param include_key [Boolean]
     #   Include the columns used to partition the DataFrame in the output.
     # @param as_dict [Boolean]
-    #   If true, return the partitions in a dictionary keyed by the distinct group
-    #   values instead of a list.
+    #   If true, return the partitions in a hash keyed by the distinct group
+    #   values instead of an array.
     #
     # @return [Object]
     #
@@ -4071,6 +4666,26 @@ module Polars
       lazy.select(*exprs, **named_exprs).collect(_eager: true)
     end
+    # Select columns from this DataFrame.
+    #
+    # This will run all expression sequentially instead of in parallel.
+    # Use this when the work per expression is cheap.
+    #
+    # @param exprs [Array]
+    #   Column(s) to select, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names,
+    #   other non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to select, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
+    #
+    # @return [DataFrame]
+    def select_seq(*exprs, **named_exprs)
+      lazy
+      .select_seq(*exprs, **named_exprs)
+      .collect(_eager: true)
+    end
     # Add columns to this DataFrame.
     #
     # Added columns will replace existing columns with the same name.
@@ -4183,6 +4798,31 @@ module Polars
       lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
     end
+    # Add columns to this DataFrame.
+    #
+    # Added columns will replace existing columns with the same name.
+    #
+    # This will run all expression sequentially instead of in parallel.
+    # Use this when the work per expression is cheap.
+    #
+    # @param exprs [Array]
+    #   Column(s) to add, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names, other
+    #   non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to add, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
+    #
+    # @return [DataFrame]
+    def with_columns_seq(
+      *exprs,
+      **named_exprs
+    )
+      lazy
+      .with_columns_seq(*exprs, **named_exprs)
+      .collect(_eager: true)
+    end
     # Get number of chunks used by the ChunkedArrays of this DataFrame.
     #
     # @param strategy ["first", "all"]
@@ -4600,7 +5240,7 @@ module Polars
     # @param drop_first [Boolean]
     #   Remove the first category from the variables being encoded.
     # @param drop_nulls [Boolean]
-    #   If there are `None` values in the series, a `null` column is not generated
+    #   If there are `nil` values in the series, a `null` column is not generated
     #
     # @return [DataFrame]
     #
@@ -5521,9 +6161,136 @@ module Polars
         .collect(no_optimization: true)
     end
-    # TODO
-    # def update
-    # end
+    # Update the values in this `DataFrame` with the values in `other`.
+    #
+    # @note
+    #   This functionality is considered **unstable**. It may be changed
+    #   at any point without it being considered a breaking change.
+    #
+    # @param other [DataFrame]
+    #   DataFrame that will be used to update the values
+    # @param on [Object]
+    #   Column names that will be joined on. If set to `nil` (default),
+    #   the implicit row index of each frame is used as a join key.
+    # @param how ['left', 'inner', 'full']
+    #   * 'left' will keep all rows from the left table; rows may be duplicated
+    #     if multiple rows in the right frame match the left row's key.
+    #   * 'inner' keeps only those rows where the key exists in both frames.
+    #   * 'full' will update existing rows where the key matches while also
+    #     adding any new rows contained in the given frame.
+    # @param left_on [Object]
+    #   Join column(s) of the left DataFrame.
+    # @param right_on [Object]
+    #   Join column(s) of the right DataFrame.
+    # @param include_nulls [Boolean]
+    #   Overwrite values in the left frame with null values from the right frame.
+    #   If set to `false` (default), null values in the right frame are ignored.
+    # @param maintain_order ['none', 'left', 'right', 'left_right', 'right_left']
+    #   Which order of rows from the inputs to preserve. See `DataFrame.join`
+    #   for details. Unlike `join` this function preserves the left order by
+    #   default.
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   This is syntactic sugar for a left/inner join that preserves the order
+    #   of the left `DataFrame` by default, with an optional coalesce when
+    #   `include_nulls: false`.
+    #
+    # @example Update `df` values with the non-null values in `new_df`, by row index:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "A" => [1, 2, 3, 4],
+    #       "B" => [400, 500, 600, 700]
+    #     }
+    #   )
+    #   new_df = Polars::DataFrame.new(
+    #     {
+    #       "B" => [-66, nil, -99],
+    #       "C" => [5, 3, 1]
+    #     }
+    #   )
+    #   df.update(new_df)
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -66 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ -99 │
+    #   # │ 4   ┆ 700 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values with the non-null values in `new_df`, by row index, but only keeping those rows that are common to both frames:
+    #   df.update(new_df, how: "inner")
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -66 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ -99 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values with the non-null values in `new_df`, using a full outer join strategy that defines explicit join columns in each frame:
+    #   df.update(new_df, left_on: ["A"], right_on: ["C"], how: "full")
+    #   # =>
+    #   # shape: (5, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -99 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ 600 │
+    #   # │ 4   ┆ 700 │
+    #   # │ 5   ┆ -66 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values including null values in `new_df`, using a full outer join strategy that defines explicit join columns in each frame:
+    #   df.update(new_df, left_on: "A", right_on: "C", how: "full", include_nulls: true)
+    #   # =>
+    #   # shape: (5, 2)
+    #   # ┌─────┬──────┐
+    #   # │ A   ┆ B    │
+    #   # │ --- ┆ ---  │
+    #   # │ i64 ┆ i64  │
+    #   # ╞═════╪══════╡
+    #   # │ 1   ┆ -99  │
+    #   # │ 2   ┆ 500  │
+    #   # │ 3   ┆ null │
+    #   # │ 4   ┆ 700  │
+    #   # │ 5   ┆ -66  │
+    #   # └─────┴──────┘
+    def update(
+      other,
+      on: nil,
+      how: "left",
+      left_on: nil,
+      right_on: nil,
+      include_nulls: false,
+      maintain_order: "left"
+    )
+      Utils.require_same_type(self, other)
+      lazy
+      .update(
+        other.lazy,
+        on: on,
+        how: how,
+        left_on: left_on,
+        right_on: right_on,
+        include_nulls: include_nulls,
+        maintain_order: maintain_order
+      )
+      .collect(_eager: true)
+    end
     private