RubyGems - polars-df - Versions diffs - 0.20.0-x86_64-darwin → 0.21.1-x86_64-darwin - Mend

polars-df 0.20.0-x86_64-darwin → 0.21.1-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -0
data/Cargo.lock +192 -186
data/LICENSE-THIRD-PARTY.txt +1431 -1810
data/LICENSE.txt +1 -1
data/lib/polars/3.2/polars.bundle +0 -0
data/lib/polars/3.3/polars.bundle +0 -0
data/lib/polars/3.4/polars.bundle +0 -0
data/lib/polars/array_expr.rb +382 -3
data/lib/polars/array_name_space.rb +281 -0
data/lib/polars/binary_expr.rb +67 -0
data/lib/polars/binary_name_space.rb +43 -0
data/lib/polars/cat_expr.rb +224 -0
data/lib/polars/cat_name_space.rb +130 -32
data/lib/polars/catalog/unity/catalog_info.rb +20 -0
data/lib/polars/catalog/unity/column_info.rb +31 -0
data/lib/polars/catalog/unity/namespace_info.rb +21 -0
data/lib/polars/catalog/unity/table_info.rb +50 -0
data/lib/polars/catalog.rb +448 -0
data/lib/polars/config.rb +2 -2
data/lib/polars/convert.rb +12 -2
data/lib/polars/data_frame.rb +834 -48
data/lib/polars/data_type_expr.rb +52 -0
data/lib/polars/data_types.rb +61 -5
data/lib/polars/date_time_expr.rb +251 -0
data/lib/polars/date_time_name_space.rb +299 -0
data/lib/polars/exceptions.rb +7 -2
data/lib/polars/expr.rb +1247 -211
data/lib/polars/functions/col.rb +6 -5
data/lib/polars/functions/datatype.rb +21 -0
data/lib/polars/functions/lazy.rb +127 -15
data/lib/polars/functions/repeat.rb +4 -0
data/lib/polars/io/csv.rb +19 -1
data/lib/polars/io/json.rb +16 -0
data/lib/polars/io/ndjson.rb +13 -0
data/lib/polars/io/parquet.rb +70 -66
data/lib/polars/io/scan_options.rb +47 -0
data/lib/polars/lazy_frame.rb +1099 -95
data/lib/polars/list_expr.rb +400 -11
data/lib/polars/list_name_space.rb +321 -5
data/lib/polars/meta_expr.rb +71 -22
data/lib/polars/name_expr.rb +36 -0
data/lib/polars/scan_cast_options.rb +64 -0
data/lib/polars/schema.rb +84 -3
data/lib/polars/selector.rb +210 -0
data/lib/polars/selectors.rb +932 -203
data/lib/polars/series.rb +1083 -63
data/lib/polars/string_expr.rb +435 -9
data/lib/polars/string_name_space.rb +729 -45
data/lib/polars/struct_expr.rb +103 -0
data/lib/polars/struct_name_space.rb +19 -1
data/lib/polars/utils/parse.rb +40 -0
data/lib/polars/utils/various.rb +18 -1
data/lib/polars/utils.rb +9 -1
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +10 -0
metadata +12 -2

data/lib/polars/lazy_frame.rb CHANGED Viewed

@@ -234,10 +234,18 @@ module Polars
     #
     # @param by [Object]
     #   Column (expressions) to sort by.
+    # @param more_by [Array]
+    #   Additional columns to sort by, specified as positional arguments.
     # @param reverse [Boolean]
     #   Sort in descending order.
     # @param nulls_last [Boolean]
     #   Place null values last. Can only be used if sorted by a single column.
+    # @param maintain_order [Boolean]
+    #   Whether the order should be maintained if elements are equal.
+    #   Note that if `true` streaming is not possible and performance might be
+    #   worse since this requires a stable search.
+    # @param multithreaded [Boolean]
+    #   Sort using multiple threads.
     #
     # @return [LazyFrame]
     #
@@ -280,6 +288,201 @@ module Polars
       )
     end
+    # Execute a SQL query against the LazyFrame.
+    #
+    # @note
+    #   This functionality is considered **unstable**, although it is close to
+    #   being considered stable. It may be changed at any point without it being
+    #   considered a breaking change.
+    #
+    # @param query [String]
+    #   SQL query to execute.
+    # @param table_name [String]
+    #   Optionally provide an explicit name for the table that represents the
+    #   calling frame (defaults to "self").
+    #
+    # @return [Expr]
+    #
+    # @note
+    #   * The calling frame is automatically registered as a table in the SQL context
+    #     under the name "self". If you want access to the DataFrames and LazyFrames
+    #     found in the current globals, use the top-level `Polars.sql`.
+    #   * More control over registration and execution behaviour is available by
+    #     using the `SQLContext` object.
+    #
+    # @example Query the LazyFrame using SQL:
+    #   lf1 = Polars::LazyFrame.new({"a" => [1, 2, 3], "b" => [6, 7, 8], "c" => ["z", "y", "x"]})
+    #   lf2 = Polars::LazyFrame.new({"a" => [3, 2, 1], "d" => [125, -654, 888]})
+    #   lf1.sql("SELECT c, b FROM self WHERE a > 1").collect
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬─────┐
+    #   # │ c   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ y   ┆ 7   │
+    #   # │ x   ┆ 8   │
+    #   # └─────┴─────┘
+    #
+    # @example Apply SQL transforms (aliasing "self" to "frame") then filter natively (you can freely mix SQL and native operations):
+    #   lf1.sql(
+    #     "
+    #       SELECT
+    #           a,
+    #           (a % 2 == 0) AS a_is_even,
+    #           (b::float4 / 2) AS \"b/2\",
+    #           CONCAT_WS(':', c, c, c) AS c_c_c
+    #       FROM frame
+    #       ORDER BY a
+    #     ",
+    #     table_name: "frame",
+    #   ).filter(~Polars.col("c_c_c").str.starts_with("x")).collect
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌─────┬───────────┬─────┬───────┐
+    #   # │ a   ┆ a_is_even ┆ b/2 ┆ c_c_c │
+    #   # │ --- ┆ ---       ┆ --- ┆ ---   │
+    #   # │ i64 ┆ bool      ┆ f32 ┆ str   │
+    #   # ╞═════╪═══════════╪═════╪═══════╡
+    #   # │ 1   ┆ false     ┆ 3.0 ┆ z:z:z │
+    #   # │ 2   ┆ true      ┆ 3.5 ┆ y:y:y │
+    #   # └─────┴───────────┴─────┴───────┘
+    def sql(query, table_name: "self")
+      ctx = Polars::SQLContext.new
+      name = table_name || "self"
+      ctx.register(name, self)
+      ctx.execute(query)
+    end
+    # Return the `k` largest rows.
+    #
+    # Non-null elements are always preferred over null elements, regardless of
+    # the value of `reverse`. The output is not guaranteed to be in any
+    # particular order, call :func:`sort` after this function if you wish the
+    # output to be sorted.
+    #
+    # @param k [Integer]
+    #   Number of rows to return.
+    # @param by [Object]
+    #   Column(s) used to determine the top rows.
+    #   Accepts expression input. Strings are parsed as column names.
+    # @param reverse [Object]
+    #   Consider the `k` smallest elements of the `by` column(s) (instead of the `k`
+    #   largest). This can be specified per column by passing a sequence of
+    #   booleans.
+    #
+    # @return [LazyFrame]
+    #
+    # @example Get the rows which contain the 4 largest values in column b.
+    #   lf = Polars::LazyFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [2, 1, 1, 3, 2, 1]
+    #     }
+    #   )
+    #   lf.top_k(4, by: "b").collect
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 3   │
+    #   # │ a   ┆ 2   │
+    #   # │ b   ┆ 2   │
+    #   # │ b   ┆ 1   │
+    #   # └─────┴─────┘
+    #
+    # @example Get the rows which contain the 4 largest values when sorting on column b and a.
+    #   lf.top_k(4, by: ["b", "a"]).collect
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 3   │
+    #   # │ b   ┆ 2   │
+    #   # │ a   ┆ 2   │
+    #   # │ c   ┆ 1   │
+    #   # └─────┴─────┘
+    def top_k(
+      k,
+      by:,
+      reverse: false
+    )
+      by = Utils.parse_into_list_of_expressions(by)
+      reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
+      _from_rbldf(_ldf.top_k(k, by, reverse))
+    end
+    # Return the `k` smallest rows.
+    #
+    # Non-null elements are always preferred over null elements, regardless of
+    # the value of `reverse`. The output is not guaranteed to be in any
+    # particular order, call :func:`sort` after this function if you wish the
+    # output to be sorted.
+    #
+    # @param k [Integer]
+    #   Number of rows to return.
+    # @param by [Object]
+    #   Column(s) used to determine the bottom rows.
+    #   Accepts expression input. Strings are parsed as column names.
+    # @param reverse [Object]
+    #   Consider the `k` largest elements of the `by` column(s) (instead of the `k`
+    #   smallest). This can be specified per column by passing a sequence of
+    #   booleans.
+    #
+    # @return [LazyFrame]
+    #
+    # @example Get the rows which contain the 4 smallest values in column b.
+    #   lf = Polars::LazyFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [2, 1, 1, 3, 2, 1]
+    #     }
+    #   )
+    #   lf.bottom_k(4, by: "b").collect
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ b   ┆ 1   │
+    #   # │ a   ┆ 1   │
+    #   # │ c   ┆ 1   │
+    #   # │ a   ┆ 2   │
+    #   # └─────┴─────┘
+    #
+    # @example Get the rows which contain the 4 smallest values when sorting on column a and b.
+    #   lf.bottom_k(4, by: ["a", "b"]).collect
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ a   ┆ 1   │
+    #   # │ a   ┆ 2   │
+    #   # │ b   ┆ 1   │
+    #   # │ b   ┆ 2   │
+    #   # └─────┴─────┘
+    def bottom_k(
+      k,
+      by:,
+      reverse: false
+    )
+      by = Utils.parse_into_list_of_expressions(by)
+      reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
+      _from_rbldf(_ldf.bottom_k(k, by, reverse))
+    end
     # def profile
     # end
@@ -305,6 +508,8 @@ module Polars
     #   Slice pushdown optimization.
     # @param common_subplan_elimination [Boolean]
     #   Will try to cache branching subplans that occur on self-joins or unions.
+    # @param comm_subexpr_elim [Boolean]
+    #   Common subexpressions will be cached and reused.
     # @param allow_streaming [Boolean]
     #   Run parts of the query in a streaming fashion (this is in an alpha state)
     #
@@ -369,6 +574,41 @@ module Polars
       Utils.wrap_df(ldf.collect)
     end
+    # Resolve the schema of this LazyFrame.
+    #
+    # @return [Schema]
+    #
+    # @example Determine the schema.
+    #   lf = Polars::LazyFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   lf.collect_schema
+    #   # => Polars::Schema({"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String})
+    #
+    # @example Access various properties of the schema.
+    #   schema = lf.collect_schema
+    #   schema["bar"]
+    #   # => Polars::Float64
+    #
+    # @example
+    #   schema.names
+    #   # => ["foo", "bar", "ham"]
+    #
+    # @example
+    #   schema.dtypes
+    #   # => [Polars::Int64, Polars::Float64, Polars::String]
+    #
+    # @example
+    #   schema.length
+    #   # => 3
+    def collect_schema
+      Schema.new(_ldf.collect_schema, check_dtypes: false)
+    end
     # Persists a LazyFrame at the provided path.
     #
     # This allows streaming results that are larger than RAM to be written to disk.
@@ -412,6 +652,31 @@ module Polars
     #   Turn off (certain) optimizations.
     # @param slice_pushdown [Boolean]
     #   Slice pushdown optimization.
+    # @param storage_options [String]
+    #   Options that indicate how to connect to a cloud provider.
+    #
+    #   The cloud providers currently supported are AWS, GCP, and Azure.
+    #   See supported keys here:
+    #
+    #   * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
+    #   * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
+    #   * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
+    #   * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
+    #
+    #   If `storage_options` is not provided, Polars will try to infer the
+    #   information from environment variables.
+    # @param retries [Integer]
+    #   Number of retries if accessing a cloud instance fails.
+    # @param sync_on_close ['data', 'all']
+    #   Sync to disk when before closing a file.
+    #
+    #   * `nil` does not sync.
+    #   * `data` syncs the file contents.
+    #   * `all` syncs the file contents and metadata.
+    # @param mkdir [Boolean]
+    #   Recursively create all the directories in the path.
+    # @param lazy [Boolean]
+    #     Wait to start execution until `collect` is called.
     #
     # @return [DataFrame]
     #
@@ -521,6 +786,16 @@ module Polars
     #   Slice pushdown optimization.
     # @param no_optimization [Boolean]
     #   Turn off (certain) optimizations.
+    # @param sync_on_close ['data', 'all']
+    #     Sync to disk when before closing a file.
+    #
+    #     * `nil` does not sync.
+    #     * `data` syncs the file contents.
+    #     * `all` syncs the file contents and metadata.
+    # @param mkdir [Boolean]
+    #     Recursively create all the directories in the path.
+    # @param lazy [Boolean]
+    #     Wait to start execution until `collect` is called.
     #
     # @return [DataFrame]
     #
@@ -614,9 +889,15 @@ module Polars
     #   A format string, with the specifiers defined by the
     #   `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
     #   Rust crate.
+    # @param float_scientific [Integer]
+    #   Whether to use scientific form always (true), never (false), or
+    #   automatically (nil) for `Float32` and `Float64` datatypes.
     # @param float_precision [Integer]
     #   Number of decimal places to write, applied to both `Float32` and
     #   `Float64` datatypes.
+    # @param decimal_comma [Boolean]
+    #   Use a comma as the decimal separator instead of a point. Floats will be
+    #   encapsulated in quotes if necessary; set the field separator to override.
     # @param null_value [String]
     #   A string representing null values (defaulting to the empty string).
     # @param quote_style ["necessary", "always", "non_numeric", "never"]
@@ -655,6 +936,16 @@ module Polars
     #   Options that indicate how to connect to a cloud provider.
     # @param retries [Integer]
     #   Number of retries if accessing a cloud instance fails.
+    # @param sync_on_close ['data', 'all']
+    #     Sync to disk when before closing a file.
+    #
+    #     * `nil` does not sync.
+    #     * `data` syncs the file contents.
+    #     * `all` syncs the file contents and metadata.
+    # @param mkdir [Boolean]
+    #     Recursively create all the directories in the path.
+    # @param lazy [Boolean]
+    #     Wait to start execution until `collect` is called.
     #
     # @return [DataFrame]
     #
@@ -674,6 +965,7 @@ module Polars
       time_format: nil,
       float_scientific: nil,
       float_precision: nil,
+      decimal_comma: false,
       null_value: nil,
       quote_style: nil,
       maintain_order: true,
@@ -726,6 +1018,7 @@ module Polars
         time_format,
         float_scientific,
         float_precision,
+        decimal_comma,
         null_value,
         quote_style,
         storage_options,
@@ -762,6 +1055,31 @@ module Polars
     #   Slice pushdown optimization.
     # @param no_optimization [Boolean]
     #   Turn off (certain) optimizations.
+    # @param storage_options [String]
+    #   Options that indicate how to connect to a cloud provider.
+    #
+    #   The cloud providers currently supported are AWS, GCP, and Azure.
+    #   See supported keys here:
+    #
+    #   * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
+    #   * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
+    #   * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
+    #   * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
+    #
+    #   If `storage_options` is not provided, Polars will try to infer the
+    #   information from environment variables.
+    # @param retries [Integer]
+    #   Number of retries if accessing a cloud instance fails.
+    # @param sync_on_close ['data', 'all']
+    #   Sync to disk when before closing a file.
+    #
+    #   * `nil` does not sync.
+    #   * `data` syncs the file contents.
+    #   * `all` syncs the file contents and metadata.
+    # @param mkdir [Boolean]
+    #   Recursively create all the directories in the path.
+    # @param lazy [Boolean]
+    #     Wait to start execution until `collect` is called.
     #
     # @return [DataFrame]
     #
@@ -854,25 +1172,6 @@ module Polars
     #
     # @param n_rows [Integer]
     #   Collect n_rows from the data sources.
-    # @param type_coercion [Boolean]
-    #   Run type coercion optimization.
-    # @param predicate_pushdown [Boolean]
-    #   Run predicate pushdown optimization.
-    # @param projection_pushdown [Boolean]
-    #   Run projection pushdown optimization.
-    # @param simplify_expression [Boolean]
-    #   Run simplify expressions optimization.
-    # @param string_cache [Boolean]
-    #   This argument is deprecated. Please set the string cache globally.
-    #   The argument will be ignored
-    # @param no_optimization [Boolean]
-    #   Turn off optimizations.
-    # @param slice_pushdown [Boolean]
-    #   Slice pushdown optimization
-    # @param common_subplan_elimination [Boolean]
-    #   Will try to cache branching subplans that occur on self-joins or unions.
-    # @param allow_streaming [Boolean]
-    #   Run parts of the query in a streaming fashion (this is in an alpha state)
     #
     # @return [DataFrame]
     #
@@ -892,41 +1191,11 @@ module Polars
     #   # │ --- ┆ --- ┆ --- │
     #   # │ str ┆ i64 ┆ i64 │
     #   # ╞═════╪═════╪═════╡
-    #   # │ a   ┆ 1   ┆ 6   │
-    #   # │ b   ┆ 2   ┆ 5   │
+    #   # │ a   ┆ 4   ┆ 10  │
+    #   # │ b   ┆ 11  ┆ 10  │
     #   # └─────┴─────┴─────┘
-    def fetch(
-      n_rows = 500,
-      type_coercion: true,
-      predicate_pushdown: true,
-      projection_pushdown: true,
-      simplify_expression: true,
-      string_cache: false,
-      no_optimization: false,
-      slice_pushdown: true,
-      common_subplan_elimination: true,
-      comm_subexpr_elim: true,
-      allow_streaming: false
-    )
-      if no_optimization
-        predicate_pushdown = false
-        projection_pushdown = false
-        slice_pushdown = false
-        common_subplan_elimination = false
-      end
-      ldf = _ldf.optimization_toggle(
-        type_coercion,
-        predicate_pushdown,
-        projection_pushdown,
-        simplify_expression,
-        slice_pushdown,
-        common_subplan_elimination,
-        comm_subexpr_elim,
-        allow_streaming,
-        false
-      )
-      Utils.wrap_df(ldf.fetch(n_rows))
+    def fetch(n_rows = 500, **kwargs)
+      head(n_rows).collect(**kwargs)
     end
     # Return lazy representation, i.e. itself.
@@ -1058,7 +1327,7 @@ module Polars
     #   # │ null ┆ null ┆ null │
     #   # └──────┴──────┴──────┘
     def clear(n = 0)
-      DataFrame.new(columns: schema).clear(n).lazy
+      DataFrame.new(schema: schema).clear(n).lazy
     end
     alias_method :cleared, :clear
@@ -1108,6 +1377,140 @@ module Polars
       )
     end
+    # Remove rows, dropping those that match the given predicate expression(s).
+    #
+    # The original order of the remaining rows is preserved.
+    #
+    # Rows where the filter predicate does not evaluate to true are retained
+    # (this includes rows where the predicate evaluates as `null`).
+    #
+    # @param predicates [Array]
+    #   Expression that evaluates to a boolean Series.
+    # @param constraints [Hash]
+    #   Column filters; use `name = value` to filter columns using the supplied
+    #   value. Each constraint behaves the same as `Polars.col(name).eq(value)`,
+    #   and is implicitly joined with the other filter conditions using `&`.
+    #
+    # @return [LazyFrame]
+    #
+    # @example Remove rows matching a condition:
+    #   lf = Polars::LazyFrame.new(
+    #     {
+    #       "foo" => [2, 3, nil, 4, 0],
+    #       "bar" => [5, 6, nil, nil, 0],
+    #       "ham" => ["a", "b", nil, "c", "d"]
+    #     }
+    #   )
+    #   lf.remove(
+    #     Polars.col("bar") >= 5
+    #   ).collect
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # │ 0    ┆ 0    ┆ d    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Discard rows based on multiple conditions, combined with and/or operators:
+    #   lf.remove(
+    #     (Polars.col("foo") >= 0) & (Polars.col("bar") >= 0)
+    #   ).collect
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   lf.remove(
+    #     (Polars.col("foo") >= 0) | (Polars.col("bar") >= 0)
+    #   ).collect
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Provide multiple constraints using `*args` syntax:
+    #   lf.remove(
+    #     Polars.col("ham").is_not_null,
+    #     Polars.col("bar") >= 0
+    #   ).collect
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Provide constraints(s) using `**kwargs` syntax:
+    #   lf.remove(foo: 0, bar: 0).collect
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 2    ┆ 5    ┆ a    │
+    #   # │ 3    ┆ 6    ┆ b    │
+    #   # │ null ┆ null ┆ null │
+    #   # │ 4    ┆ null ┆ c    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example Remove rows by comparing two columns against each other; in this case, we remove rows where the two columns are not equal (using `ne_missing` to ensure that null values compare equal):
+    #   lf.remove(
+    #     Polars.col("foo").ne_missing(Polars.col("bar"))
+    #   ).collect
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ 0    ┆ 0    ┆ d    │
+    #   # └──────┴──────┴──────┘
+    def remove(
+      *predicates,
+      **constraints
+    )
+      if constraints.empty?
+        # early-exit conditions (exclude/include all rows)
+        if predicates.empty? || (predicates.length == 1 && predicates[0].is_a?(TrueClass))
+          return clear
+        end
+        if predicates.length == 1 && predicates[0].is_a?(FalseClass)
+          return dup
+        end
+      end
+      _filter(
+        predicates: predicates,
+        constraints: constraints,
+        invert: true
+      )
+    end
     # Select columns from this DataFrame.
     #
     # @param exprs [Array]
@@ -1205,6 +1608,29 @@ module Polars
       _from_rbldf(_ldf.select(rbexprs))
     end
+    # Select columns from this LazyFrame.
+    #
+    # This will run all expression sequentially instead of in parallel.
+    # Use this when the work per expression is cheap.
+    #
+    # @param exprs [Array]
+    #   Column(s) to select, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names,
+    #   other non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to select, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
+    #
+    # @return [LazyFrame]
+    def select_seq(*exprs, **named_exprs)
+      structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", 0).to_i != 0
+      rbexprs = Utils.parse_into_list_of_expressions(
+        *exprs, **named_exprs, __structify: structify
+      )
+      _from_rbldf(_ldf.select_seq(rbexprs))
+    end
     # Start a group by operation.
     #
     # @param by [Array]
@@ -1401,9 +1827,9 @@ module Polars
     # @param every [Object]
     #   Interval of the window.
     # @param period [Object]
-    #   Length of the window, if None it is equal to 'every'.
+    #   Length of the window, if nil it is equal to 'every'.
     # @param offset [Object]
-    #   Offset of the window if None and period is None it will be equal to negative
+    #   Offset of the window if nil and period is nil it will be equal to negative
     #   `every`.
     # @param truncate [Boolean]
     #   Truncate the time value to the window lower bound.
@@ -1413,8 +1839,32 @@ module Polars
     #   parallelize
     # @param closed ["right", "left", "both", "none"]
     #   Define whether the temporal window interval is closed or not.
+    # @param label ['left', 'right', 'datapoint']
+    #   Define which label to use for the window:
+    #
+    #   - 'left': lower boundary of the window
+    #   - 'right': upper boundary of the window
+    #   - 'datapoint': the first value of the index column in the given window.
+    #     If you don't need the label to be at one of the boundaries, choose this
+    #     option for maximum performance
     # @param by [Object]
     #   Also group by this column/these columns
+    # @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
+    #   The strategy to determine the start of the first window by.
+    #
+    #   * 'window': Start by taking the earliest timestamp, truncating it with
+    #     `every`, and then adding `offset`.
+    #     Note that weekly windows start on Monday.
+    #   * 'datapoint': Start from the first encountered data point.
+    #   * a day of the week (only takes effect if `every` contains `'w'`):
+    #
+    #     * 'monday': Start the window on the Monday before the first data point.
+    #     * 'tuesday': Start the window on the Tuesday before the first data point.
+    #     * ...
+    #     * 'sunday': Start the window on the Sunday before the first data point.
+    #
+    #     The resulting window is then shifted back until the earliest datapoint
+    #     is in or in front of it.
     #
     # @return [DataFrame]
     #
@@ -1651,13 +2101,13 @@ module Polars
     #   Join column of the right DataFrame.
     # @param on [String]
     #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
-    #   None.
-    # @param by [Object]
-    #   Join on these columns before doing asof join.
+    #   nil.
     # @param by_left [Object]
     #   Join on these columns before doing asof join.
     # @param by_right [Object]
     #   Join on these columns before doing asof join.
+    # @param by [Object]
+    #   Join on these columns before doing asof join.
     # @param strategy ["backward", "forward"]
     #   Join strategy.
     # @param suffix [String]
@@ -1873,7 +2323,7 @@ module Polars
     #   # └─────────────┴────────────┴────────────┘
     #
     # @example
-    #   pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
+    #   pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest", check_sortedness: false).collect
     #   # =>
     #   # shape: (6, 4)
     #   # ┌─────────────┬────────────┬────────────┬──────┐
@@ -1976,7 +2426,7 @@ module Polars
     #   Join column of the right DataFrame.
     # @param on Object
     #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
-    #   None.
+    #   nil.
     # @param how ["inner", "left", "full", "semi", "anti", "cross"]
     #   Join strategy.
     # @param suffix [String]
@@ -2171,10 +2621,110 @@ module Polars
       )
     end
+    # Perform a join based on one or multiple (in)equality predicates.
+    #
+    # This performs an inner join, so only rows where all predicates are true
+    # are included in the result, and a row from either DataFrame may be included
+    # multiple times in the result.
+    #
+    # @note
+    #   The row order of the input DataFrames is not preserved.
+    #
+    # @note
+    #   This functionality is experimental. It may be
+    #   changed at any point without it being considered a breaking change.
+    #
+    # @param other [Object]
+    #   DataFrame to join with.
+    # @param predicates [Object]
+    #   (In)Equality condition to join the two tables on.
+    #   When a column name occurs in both tables, the proper suffix must
+    #   be applied in the predicate.
+    # @param suffix [String]
+    #   Suffix to append to columns with a duplicate name.
+    #
+    # @return [LazyFrame]
+    #
+    # @example Join two lazyframes together based on two predicates which get AND-ed together.
+    #   east = Polars::LazyFrame.new(
+    #     {
+    #       "id" => [100, 101, 102],
+    #       "dur" => [120, 140, 160],
+    #       "rev" => [12, 14, 16],
+    #       "cores" => [2, 8, 4]
+    #     }
+    #   )
+    #   west = Polars::LazyFrame.new(
+    #     {
+    #       "t_id" => [404, 498, 676, 742],
+    #       "time" => [90, 130, 150, 170],
+    #       "cost" => [9, 13, 15, 16],
+    #       "cores" => [4, 2, 1, 4]
+    #     }
+    #   )
+    #   east.join_where(
+    #     west,
+    #     Polars.col("dur") < Polars.col("time"),
+    #     Polars.col("rev") < Polars.col("cost")
+    #   ).collect
+    #   # =>
+    #   # shape: (5, 8)
+    #   # ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
+    #   # │ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
+    #   # ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
+    #
+    # @example To OR them together, use a single expression and the `|` operator.
+    #   east.join_where(
+    #     west,
+    #     (Polars.col("dur") < Polars.col("time")) | (Polars.col("rev") < Polars.col("cost"))
+    #   ).collect
+    #   # =>
+    #   # shape: (6, 8)
+    #   # ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
+    #   # │ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
+    #   # │ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
+    #   # ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
+    #   # │ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # │ 102 ┆ 160 ┆ 16  ┆ 4     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
+    #   # └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
+    def join_where(
+      other,
+      *predicates,
+      suffix: "_right"
+    )
+      Utils.require_same_type(self, other)
+      rbexprs = Utils.parse_into_list_of_expressions(*predicates)
+      _from_rbldf(
+        _ldf.join_where(
+          other._ldf,
+          rbexprs,
+          suffix
+        )
+      )
+    end
     # Add or overwrite multiple columns in a DataFrame.
     #
     # @param exprs [Object]
     #   List of Expressions that evaluate to columns.
+    # @param named_exprs [Hash]
+    #   Additional columns to add, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
     #
     # @return [LazyFrame]
     #
@@ -2213,6 +2763,34 @@ module Polars
       _from_rbldf(_ldf.with_columns(rbexprs))
     end
+    # Add columns to this LazyFrame.
+    #
+    # Added columns will replace existing columns with the same name.
+    #
+    # This will run all expression sequentially instead of in parallel.
+    # Use this when the work per expression is cheap.
+    #
+    # @param exprs [Array]
+    #   Column(s) to add, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names, other
+    #   non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to add, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
+    #
+    # @return [LazyFrame]
+    def with_columns_seq(
+      *exprs,
+      **named_exprs
+    )
+      structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", 0).to_i != 0
+      rbexprs = Utils.parse_into_list_of_expressions(
+        *exprs, **named_exprs, __structify: structify
+      )
+      _from_rbldf(_ldf.with_columns_seq(rbexprs))
+    end
     # Add an external context to the computation graph.
     #
     # This allows expressions to also access columns from DataFrames
@@ -2299,6 +2877,9 @@ module Polars
     # @param columns [Object]
     #   - Name of the column that should be removed.
     #   - List of column names.
+    # @param strict [Boolean]
+    #   Validate that all column names exist in the current schema,
+    #   and throw an exception if any do not.
     #
     # @return [LazyFrame]
     #
@@ -2350,9 +2931,18 @@ module Polars
     #   # │ 7.0 │
     #   # │ 8.0 │
     #   # └─────┘
-    def drop(*columns)
-      drop_cols = Utils._expand_selectors(self, *columns)
-      _from_rbldf(_ldf.drop(drop_cols))
+    def drop(*columns, strict: true)
+      selectors = []
+      columns.each do |c|
+        if c.is_a?(Enumerable)
+          selectors += c
+        else
+          selectors += [c]
+        end
+      end
+      drop_cols = Utils.parse_list_into_selector(selectors, strict: strict)
+      _from_rbldf(_ldf.drop(drop_cols._rbselector))
     end
     # Rename column names.
@@ -2809,7 +3399,7 @@ module Polars
     #
     # @example
     #   s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]}).lazy
-    #   s.take_every(2).collect
+    #   s.gather_every(2).collect
     #   # =>
     #   # shape: (2, 2)
     #   # ┌─────┬─────┐
@@ -2820,9 +3410,10 @@ module Polars
     #   # │ 1   ┆ 5   │
     #   # │ 3   ┆ 7   │
     #   # └─────┴─────┘
-    def take_every(n)
-      select(F.col("*").take_every(n))
+    def gather_every(n)
+      select(F.col("*").gather_every(n))
     end
+    alias_method :take_every, :gather_every
     # Fill null values using the specified value or strategy.
     #
@@ -3099,6 +3690,32 @@ module Polars
       _from_rbldf(_ldf.median)
     end
+    # Aggregate the columns in the LazyFrame as the sum of their null value count.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   lf = Polars::LazyFrame.new(
+    #     {
+    #       "foo" => [1, nil, 3],
+    #       "bar" => [6, 7, nil],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   lf.null_count.collect
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ u32 ┆ u32 ┆ u32 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 1   ┆ 0   │
+    #   # └─────┴─────┴─────┘
+    def null_count
+      _from_rbldf(_ldf.null_count)
+    end
     # Aggregate the columns in the DataFrame to their quantile value.
     #
     # @param quantile [Float]
@@ -3153,9 +3770,11 @@ module Polars
     #   # │ c       ┆ 7       │
     #   # │ c       ┆ 8       │
     #   # └─────────┴─────────┘
-    def explode(columns)
-      columns = Utils.parse_into_list_of_expressions(columns)
-      _from_rbldf(_ldf.explode(columns))
+    def explode(columns, *more_columns)
+      subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
+        more_columns
+      )
+      _from_rbldf(_ldf.explode(subset._rbselector))
     end
     # Drop duplicate rows from this DataFrame.
@@ -3220,43 +3839,110 @@ module Polars
     #   # │ 1   ┆ a   ┆ b   │
     #   # └─────┴─────┴─────┘
     def unique(maintain_order: true, subset: nil, keep: "first")
-      if !subset.nil? && !subset.is_a?(::Array)
-        subset = [subset]
+      selector_subset = nil
+      if !subset.nil?
+        selector_subset = Utils.parse_list_into_selector(subset)._rbselector
       end
-      _from_rbldf(_ldf.unique(maintain_order, subset, keep))
+      _from_rbldf(_ldf.unique(maintain_order, selector_subset, keep))
     end
-    # Drop rows with null values from this LazyFrame.
+    # Drop all rows that contain one or more NaN values.
+    #
+    # The original order of the remaining rows is preserved.
     #
     # @param subset [Object]
-    #   Subset of column(s) on which `drop_nulls` will be applied.
+    #   Column name(s) for which NaN values are considered; if set to `nil`
+    #   (default), use all columns (note that only floating-point columns
+    #   can contain NaNs).
     #
     # @return [LazyFrame]
     #
     # @example
-    #   df = Polars::DataFrame.new(
+    #   lf = Polars::LazyFrame.new(
+    #     {
+    #       "foo" => [-20.5, Float::NAN, 80.0],
+    #       "bar" => [Float::NAN, 110.0, 25.5],
+    #       "ham" => ["xxx", "yyy", nil]
+    #     }
+    #   )
+    #   lf.drop_nans.collect
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ f64  ┆ f64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 80.0 ┆ 25.5 ┆ null │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   lf.drop_nans(subset: ["bar"]).collect
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬───────┬──────┐
+    #   # │ foo  ┆ bar   ┆ ham  │
+    #   # │ ---  ┆ ---   ┆ ---  │
+    #   # │ f64  ┆ f64   ┆ str  │
+    #   # ╞══════╪═══════╪══════╡
+    #   # │ NaN  ┆ 110.0 ┆ yyy  │
+    #   # │ 80.0 ┆ 25.5  ┆ null │
+    #   # └──────┴───────┴──────┘
+    def drop_nans(subset: nil)
+      selector_subset = nil
+      if !subset.nil?
+        selector_subset = Utils.parse_list_into_selector(subset)._rbselector
+      end
+      _from_rbldf(_ldf.drop_nans(selector_subset))
+    end
+    # Drop all rows that contain one or more null values.
+    #
+    # The original order of the remaining rows is preserved.
+    #
+    # @param subset [Object]
+    #   Column name(s) for which null values are considered.
+    #   If set to `nil` (default), use all columns.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   lf = Polars::LazyFrame.new(
     #     {
     #       "foo" => [1, 2, 3],
     #       "bar" => [6, nil, 8],
-    #       "ham" => ["a", "b", "c"]
+    #       "ham" => ["a", "b", nil]
     #     }
     #   )
-    #   df.lazy.drop_nulls.collect
+    #   lf.drop_nulls.collect
     #   # =>
-    #   # shape: (2, 3)
+    #   # shape: (1, 3)
     #   # ┌─────┬─────┬─────┐
     #   # │ foo ┆ bar ┆ ham │
     #   # │ --- ┆ --- ┆ --- │
     #   # │ i64 ┆ i64 ┆ str │
     #   # ╞═════╪═════╪═════╡
     #   # │ 1   ┆ 6   ┆ a   │
-    #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
+    #
+    # @example
+    #   lf.drop_nulls(subset: Polars.cs.integer).collect
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ i64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 1   ┆ 6   ┆ a    │
+    #   # │ 3   ┆ 8   ┆ null │
+    #   # └─────┴─────┴──────┘
     def drop_nulls(subset: nil)
-      if !subset.nil? && !subset.is_a?(::Array)
-        subset = [subset]
+      selector_subset = nil
+      if !subset.nil?
+        selector_subset = Utils.parse_list_into_selector(subset)._rbselector
       end
-      _from_rbldf(_ldf.drop_nulls(subset))
+      _from_rbldf(_ldf.drop_nulls(selector_subset))
     end
     # Unpivot a DataFrame from wide to long format.
@@ -3318,11 +4004,16 @@ module Polars
         warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
       end
-      on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
-      index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
+      selector_on = on.nil? ? Selectors.empty : Utils.parse_list_into_selector(on)
+      selector_index = index.nil? ? Selectors.empty : Utils.parse_list_into_selector(index)
       _from_rbldf(
-        _ldf.unpivot(on, index, value_name, variable_name)
+        _ldf.unpivot(
+          selector_on._rbselector,
+          selector_index._rbselector,
+          value_name,
+          variable_name
+        )
       )
     end
     alias_method :melt, :unpivot
@@ -3364,8 +4055,10 @@ module Polars
     # The fields will be inserted into the `DataFrame` on the location of the
     # `struct` type.
     #
-    # @param names [Object]
+    # @param columns [Object]
     #   Names of the struct columns that will be decomposed by its fields
+    # @param more_columns [Array]
+    #   Additional columns to unnest, specified as positional arguments.
     #
     # @return [LazyFrame]
     #
@@ -3410,11 +4103,11 @@ module Polars
     #   # │ foo    ┆ 1   ┆ a   ┆ true ┆ [1, 2]    ┆ baz   │
     #   # │ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
     #   # └────────┴─────┴─────┴──────┴───────────┴───────┘
-    def unnest(names)
-      if names.is_a?(::String)
-        names = [names]
-      end
-      _from_rbldf(_ldf.unnest(names))
+    def unnest(columns, *more_columns)
+      subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
+        more_columns
+      )
+      _from_rbldf(_ldf.unnest(subset._rbselector))
     end
     # Take two sorted DataFrames and merge them by the sorted key.
@@ -3483,9 +4176,261 @@ module Polars
       with_columns(F.col(column).set_sorted(descending: descending))
     end
-    # TODO
-    # def update
-    # end
+    # Update the values in this `LazyFrame` with the values in `other`.
+    #
+    # @note
+    #   This functionality is considered **unstable**. It may be changed
+    #   at any point without it being considered a breaking change.
+    #
+    # @param other [LazyFrame]
+    #   LazyFrame that will be used to update the values
+    # @param on [Object]
+    #   Column names that will be joined on. If set to `nil` (default),
+    #   the implicit row index of each frame is used as a join key.
+    # @param how ['left', 'inner', 'full']
+    #   * 'left' will keep all rows from the left table; rows may be duplicated
+    #     if multiple rows in the right frame match the left row's key.
+    #   * 'inner' keeps only those rows where the key exists in both frames.
+    #   * 'full' will update existing rows where the key matches while also
+    #     adding any new rows contained in the given frame.
+    # @param left_on [Object]
+    #  Join column(s) of the left DataFrame.
+    # @param right_on [Object]
+    #  Join column(s) of the right DataFrame.
+    # @param include_nulls [Boolean]
+    #   Overwrite values in the left frame with null values from the right frame.
+    #   If set to `false` (default), null values in the right frame are ignored.
+    # @param maintain_order ['none', 'left', 'right', 'left_right', 'right_left']
+    #   Which order of rows from the inputs to preserve. See `LazyFrame.join`
+    #   for details. Unlike `join` this function preserves the left order by
+    #   default.
+    #
+    # @return [LazyFrame]
+    #
+    # @note
+    #   This is syntactic sugar for a left/inner join that preserves the order
+    #   of the left `DataFrame` by default, with an optional coalesce when
+    #   `include_nulls: False`.
+    #
+    # @example Update `df` values with the non-null values in `new_df`, by row index:
+    #   lf = Polars::LazyFrame.new(
+    #     {
+    #       "A" => [1, 2, 3, 4],
+    #       "B" => [400, 500, 600, 700]
+    #     }
+    #   )
+    #   new_lf = Polars::LazyFrame.new(
+    #     {
+    #       "B" => [-66, nil, -99],
+    #       "C" => [5, 3, 1]
+    #     }
+    #   )
+    #   lf.update(new_lf).collect
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -66 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ -99 │
+    #   # │ 4   ┆ 700 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values with the non-null values in `new_df`, by row index, but only keeping those rows that are common to both frames:
+    #   lf.update(new_lf, how: "inner").collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -66 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ -99 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values with the non-null values in `new_df`, using a full outer join strategy that defines explicit join columns in each frame:
+    #   lf.update(new_lf, left_on: ["A"], right_on: ["C"], how: "full").collect
+    #   # =>
+    #   # shape: (5, 2)
+    #   # ┌─────┬─────┐
+    #   # │ A   ┆ B   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ -99 │
+    #   # │ 2   ┆ 500 │
+    #   # │ 3   ┆ 600 │
+    #   # │ 4   ┆ 700 │
+    #   # │ 5   ┆ -66 │
+    #   # └─────┴─────┘
+    #
+    # @example Update `df` values including null values in `new_df`, using a full outer join strategy that defines explicit join columns in each frame:
+    #   lf.update(
+    #     new_lf, left_on: "A", right_on: "C", how: "full", include_nulls: true
+    #   ).collect
+    #   # =>
+    #   # shape: (5, 2)
+    #   # ┌─────┬──────┐
+    #   # │ A   ┆ B    │
+    #   # │ --- ┆ ---  │
+    #   # │ i64 ┆ i64  │
+    #   # ╞═════╪══════╡
+    #   # │ 1   ┆ -99  │
+    #   # │ 2   ┆ 500  │
+    #   # │ 3   ┆ null │
+    #   # │ 4   ┆ 700  │
+    #   # │ 5   ┆ -66  │
+    #   # └─────┴──────┘
+    def update(
+      other,
+      on: nil,
+      how: "left",
+      left_on: nil,
+      right_on: nil,
+      include_nulls: false,
+      maintain_order: "left"
+    )
+      Utils.require_same_type(self, other)
+      if ["outer", "outer_coalesce"].include?(how)
+        how = "full"
+      end
+      if !["left", "inner", "full"].include?(how)
+        msg = "`how` must be one of {{'left', 'inner', 'full'}}; found #{how.inspect}"
+        raise ArgumentError, msg
+      end
+      slf = self
+      row_index_used = false
+      if on.nil?
+        if left_on.nil? && right_on.nil?
+          # no keys provided--use row index
+          row_index_used = true
+          row_index_name = "__POLARS_ROW_INDEX"
+          slf = slf.with_row_index(name: row_index_name)
+          other = other.with_row_index(name: row_index_name)
+          left_on = right_on = [row_index_name]
+        else
+          # one of left or right is missing, raise error
+          if left_on.nil?
+            msg = "missing join columns for left frame"
+            raise ArgumentError, msg
+          end
+          if right_on.nil?
+            msg = "missing join columns for right frame"
+            raise ArgumentError, msg
+          end
+        end
+      else
+        # move on into left/right_on to simplify logic
+        left_on = right_on = on
+      end
+      if left_on.is_a?(::String)
+        left_on = [left_on]
+      end
+      if right_on.is_a?(::String)
+        right_on = [right_on]
+      end
+      left_schema = slf.collect_schema
+      left_on.each do |name|
+        if !left_schema.include?(name)
+          msg = "left join column #{name.inspect} not found"
+          raise ArgumentError, msg
+        end
+      end
+      right_schema = other.collect_schema
+      right_on.each do |name|
+        if !right_schema.include?(name)
+          msg = "right join column #{name.inspect} not found"
+          raise ArgumentError, msg
+        end
+      end
+      # no need to join if *only* join columns are in other (inner/left update only)
+      if how != "full" && right_schema.length == right_on.length
+        if row_index_used
+          return slf.drop(row_index_name)
+        end
+        return slf
+      end
+      # only use non-idx right columns present in left frame
+      right_other = Set.new(right_schema.to_h.keys).intersection(left_schema.to_h.keys) - Set.new(right_on)
+      # When include_nulls is True, we need to distinguish records after the join that
+      # were originally null in the right frame, as opposed to records that were null
+      # because the key was missing from the right frame.
+      # Add a validity column to track whether row was matched or not.
+      if include_nulls
+        validity = ["__POLARS_VALIDITY"]
+        other = other.with_columns(F.lit(true).alias(validity[0]))
+      else
+        validity = []
+      end
+      tmp_name = "__POLARS_RIGHT"
+      drop_columns = right_other.map { |name| "#{name}#{tmp_name}" } + validity
+      result = (
+        slf.join(
+          other.select(*right_on, *right_other, *validity),
+          left_on: left_on,
+          right_on: right_on,
+          how: how,
+          suffix: tmp_name,
+          coalesce: true,
+          maintain_order: maintain_order
+        )
+        .with_columns(
+          right_other.map do |name|
+            (
+              if include_nulls
+                # use left value only when right value failed to join
+                F.when(F.col(validity).is_null)
+                .then(F.col(name))
+                .otherwise(F.col("#{name}#{tmp_name}"))
+              else
+                F.coalesce(["#{name}#{tmp_name}", F.col(name)])
+              end
+            ).alias(name)
+          end
+        )
+        .drop(drop_columns)
+      )
+      if row_index_used
+        result = result.drop(row_index_name)
+      end
+      _from_rbldf(result._ldf)
+    end
+    # Return the number of non-null elements for each column.
+    #
+    # @return [LazyFrame]
+    #
+    # @example
+    #   lf = Polars::LazyFrame.new(
+    #     {"a" => [1, 2, 3, 4], "b" => [1, 2, 1, nil], "c" => [nil, nil, nil, nil]}
+    #   )
+    #   lf.count.collect
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ a   ┆ b   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ u32 ┆ u32 ┆ u32 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 4   ┆ 3   ┆ 0   │
+    #   # └─────┴─────┴─────┘
+    def count
+      _from_rbldf(_ldf.count)
+    end
     private
@@ -3497,5 +4442,64 @@ module Polars
     def _from_rbldf(rb_ldf)
       self.class._from_rbldf(rb_ldf)
     end
+    def _filter(
+      predicates:,
+      constraints:,
+      invert: false
+    )
+      all_predicates = []
+      boolean_masks = []
+      predicates.each do |p|
+        # quick exit/skip conditions
+        if (p.is_a?(FalseClass) && invert) || (p.is_a?(TrueClass) && !invert)
+          next # ignore; doesn't filter/remove anything
+        end
+        if (p.is_a?(TrueClass) && invert) || (p.is_a?(FalseClass) && !invert)
+          return clear # discard all rows
+        end
+        # note: identify masks separately from predicates
+        if Utils.is_bool_sequence(p, include_series: true)
+          boolean_masks << Polars::Series.new(p, dtype: Boolean)
+        elsif (
+          (is_seq = Utils.is_sequence(p)) && p.any? { |x| !x.is_a?(Expr) }) ||
+          (!is_seq && !p.is_a?(Expr) && !(p.is_a?(::String) && collect_schema.include?(p))
+        )
+          err = p.is_a?(Series) ? "Series(…, dtype: #{p.dtype})" : p.inspect
+          msg = "invalid predicate for `filter`: #{err}"
+          raise TypeError, msg
+        else
+          all_predicates.concat(
+            Utils.parse_into_list_of_expressions(p).map { |x| Utils.wrap_expr(x) }
+          )
+        end
+      end
+      # unpack equality constraints from kwargs
+      all_predicates.concat(
+        constraints.map { |name, value| F.col(name).eq(value) }
+      )
+      if !(all_predicates.any? || boolean_masks.any?)
+        msg = "at least one predicate or constraint must be provided"
+        raise TypeError, msg
+      end
+      # if multiple predicates, combine as 'horizontal' expression
+      combined_predicate = all_predicates ? (all_predicates.length > 1 ? F.all_horizontal(*all_predicates) : all_predicates[0]) : nil
+      # apply reduced boolean mask first, if applicable, then predicates
+      if boolean_masks.any?
+        raise Todo
+      end
+      if combined_predicate.nil?
+        return _from_rbldf(_ldf)
+      end
+      filter_method = invert ? _ldf.method(:remove) : _ldf.method(:filter)
+      _from_rbldf(filter_method.(combined_predicate._rbexpr))
+    end
   end
 end