RubyGems - polars-df - Versions diffs - 0.21.1-arm64-darwin → 0.23.0-arm64-darwin - Mend

polars-df 0.21.1-arm64-darwin → 0.23.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +15 -0
data/Cargo.lock +120 -90
data/Cargo.toml +3 -0
data/LICENSE-THIRD-PARTY.txt +2082 -2083
data/README.md +20 -7
data/lib/polars/3.2/polars.bundle +0 -0
data/lib/polars/3.3/polars.bundle +0 -0
data/lib/polars/3.4/polars.bundle +0 -0
data/lib/polars/array_expr.rb +1 -1
data/lib/polars/data_frame.rb +119 -15
data/lib/polars/data_types.rb +23 -6
data/lib/polars/date_time_expr.rb +36 -15
data/lib/polars/expr.rb +41 -32
data/lib/polars/functions/business.rb +95 -0
data/lib/polars/functions/lazy.rb +1 -1
data/lib/polars/iceberg_dataset.rb +113 -0
data/lib/polars/io/iceberg.rb +34 -0
data/lib/polars/io/ipc.rb +28 -49
data/lib/polars/io/parquet.rb +7 -4
data/lib/polars/io/scan_options.rb +12 -3
data/lib/polars/io/utils.rb +17 -0
data/lib/polars/lazy_frame.rb +97 -10
data/lib/polars/list_expr.rb +21 -13
data/lib/polars/list_name_space.rb +33 -21
data/lib/polars/meta_expr.rb +25 -0
data/lib/polars/query_opt_flags.rb +50 -0
data/lib/polars/scan_cast_options.rb +23 -1
data/lib/polars/schema.rb +1 -1
data/lib/polars/selectors.rb +8 -8
data/lib/polars/series.rb +26 -2
data/lib/polars/string_expr.rb +27 -28
data/lib/polars/string_name_space.rb +18 -5
data/lib/polars/utils/convert.rb +2 -2
data/lib/polars/utils/serde.rb +17 -0
data/lib/polars/utils/various.rb +4 -0
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +6 -0
metadata +8 -2

data/lib/polars/iceberg_dataset.rb ADDED Viewed

@@ -0,0 +1,113 @@
+module Polars
+  # @private
+  class IcebergDataset
+    def initialize(
+      source,
+      snapshot_id:,
+      storage_options:
+    )
+      @source = source
+      @snapshot_id = snapshot_id
+      @storage_options = storage_options
+    end
+    def to_lazyframe
+      # for iceberg < 0.1.3
+      if !@source.respond_to?(:scan)
+        return @source.to_polars(snapshot_id: @snapshot_id, storage_options: @storage_options)
+      end
+      scan = @source.scan(snapshot_id: @snapshot_id)
+      files = scan.plan_files
+      table = scan.table
+      snapshot = scan.snapshot
+      schema = snapshot ? table.schema_by_id(snapshot[:schema_id]) : table.current_schema
+      if files.empty?
+        # TODO improve
+        schema =
+          schema.fields.to_h do |field|
+            dtype =
+              case field[:type]
+              when "int"
+                Polars::Int32
+              when "long"
+                Polars::Int64
+              when "double"
+                Polars::Float64
+              when "string"
+                Polars::String
+              when "timestamp"
+                Polars::Datetime
+              else
+                raise Todo
+              end
+            [field[:name], dtype]
+          end
+        LazyFrame.new(schema: schema)
+      else
+        sources = files.map { |v| v[:data_file_path] }
+        column_mapping = [
+          "iceberg-column-mapping",
+          arrow_schema(schema)
+        ]
+        deletion_files = [
+          "iceberg-position-delete",
+          files.map.with_index
+            .select { |v, i| v[:deletes].any? }
+            .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
+        ]
+        scan_options = {
+          storage_options: @storage_options,
+          cast_options: Polars::ScanCastOptions._default_iceberg,
+          allow_missing_columns: true,
+          extra_columns: "ignore",
+          _column_mapping: column_mapping,
+          _deletion_files: deletion_files
+        }
+        Polars.scan_parquet(sources, **scan_options)
+      end
+    end
+    private
+    def arrow_schema(schema)
+      fields =
+        schema.fields.map do |field|
+          type =
+            case field[:type]
+            when "boolean"
+              "boolean"
+            when "int"
+              "int32"
+            when "long"
+              "int64"
+            when "float"
+              "float32"
+            when "double"
+              "float64"
+            else
+              raise Todo
+            end
+          {
+            name: field[:name],
+            type: type,
+            nullable: !field[:required],
+            metadata: {
+              "PARQUET:field_id" => field[:id].to_s
+            }
+          }
+        end
+      {fields: fields}
+    end
+  end
+end

data/lib/polars/io/iceberg.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module Polars
+  module IO
+    # Lazily read from an Apache Iceberg table.
+    #
+    # @param source [Object]
+    #   A Iceberg Ruby table, or a direct path to the metadata.
+    # @param snapshot_id [Integer]
+    #   The snapshot ID to scan from.
+    # @param storage_options [Hash]
+    #   Extra options for the storage backends.
+    #
+    # @return [LazyFrame]
+    def scan_iceberg(
+      source,
+      snapshot_id: nil,
+      storage_options: nil
+    )
+      require "iceberg"
+      unless source.is_a?(Iceberg::Table)
+        raise Todo
+      end
+      dataset =
+        IcebergDataset.new(
+          source,
+          snapshot_id:,
+          storage_options:
+        )
+      dataset.to_lazyframe
+    end
+  end
+end

data/lib/polars/io/ipc.rb CHANGED Viewed

@@ -187,8 +187,16 @@ module Polars
     #   DataFrame.
     # @param row_count_offset [Integer]
     #   Offset to start the row_count column (only use if the name is set).
+    # @param glob [Boolean]
+    #   Expand path given via globbing rules.
     # @param storage_options [Hash]
     #   Extra options that make sense for a particular storage connection.
+    # @param retries [Integer]
+    #   Number of retries if accessing a cloud instance fails.
+    # @param file_cache_ttl [Integer]
+    #   Amount of time to keep downloaded cloud files since their last access time,
+    #   in seconds. Uses the `POLARS_FILE_CACHE_TTL` environment variable
+    #   (which defaults to 1 hour) if not given.
     # @param hive_partitioning [Boolean]
     #   Infer statistics and schema from Hive partitioned URL and use them
     #   to prune reads. This is unset by default (i.e. `nil`), meaning it is
@@ -210,66 +218,37 @@ module Polars
       rechunk: true,
       row_count_name: nil,
       row_count_offset: 0,
+      glob: true,
       storage_options: nil,
+      retries: 2,
+      file_cache_ttl: nil,
       hive_partitioning: nil,
       hive_schema: nil,
       try_parse_hive_dates: true,
       include_file_paths: nil
     )
-      _scan_ipc_impl(
-        source,
-        n_rows: n_rows,
-        cache: cache,
-        rechunk: rechunk,
-        row_count_name: row_count_name,
-        row_count_offset: row_count_offset,
-        storage_options: storage_options,
-        hive_partitioning: hive_partitioning,
-        hive_schema: hive_schema,
-        try_parse_hive_dates: try_parse_hive_dates,
-        include_file_paths: include_file_paths
-      )
-    end
-    # @private
-    def _scan_ipc_impl(
-      source,
-      n_rows: nil,
-      cache: true,
-      rechunk: true,
-      row_count_name: nil,
-      row_count_offset: 0,
-      storage_options: nil,
-      hive_partitioning: nil,
-      hive_schema: nil,
-      try_parse_hive_dates: true,
-      include_file_paths: nil
-    )
-      sources = []
-      if Utils.pathlike?(source)
-        source = Utils.normalize_filepath(source)
-      elsif source.is_a?(::Array)
-        if Utils.is_path_or_str_sequence(source)
-          sources = source.map { |s| Utils.normalize_filepath(s) }
-        else
-          sources = source
-        end
+      row_index_name = row_count_name
+      row_index_offset = row_count_offset
-        source = nil
-      end
+      sources = get_sources(source)
       rblf =
         RbLazyFrame.new_from_ipc(
-          source,
           sources,
-          n_rows,
-          cache,
-          rechunk,
-          Utils.parse_row_index_args(row_count_name, row_count_offset),
-          hive_partitioning,
-          hive_schema,
-          try_parse_hive_dates,
-          include_file_paths
+          ScanOptions.new(
+            row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
+            pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
+            include_file_paths: include_file_paths,
+            glob: glob,
+            hive_partitioning: hive_partitioning,
+            hive_schema: hive_schema,
+            try_parse_hive_dates: try_parse_hive_dates,
+            rechunk: rechunk,
+            cache: cache,
+            storage_options: !storage_options.nil? ? storage_options.to_a : nil,
+            retries: retries
+          ),
+          file_cache_ttl
         )
       Utils.wrap_ldf(rblf)
     end

data/lib/polars/io/parquet.rb CHANGED Viewed

@@ -117,14 +117,13 @@ module Polars
     # @param source [Object]
     #   Path to a file or a file-like object.
     #
-    # @return [Hash]
+    # @return [Schema]
     def read_parquet_schema(source)
       if Utils.pathlike?(source)
         source = Utils.normalize_filepath(source)
       end
-      # TODO return Schema
-      scan_parquet(source).collect_schema.to_h
+      scan_parquet(source).collect_schema
     end
     # Get file-level custom metadata of a Parquet file without reading data.
@@ -207,6 +206,9 @@ module Polars
     #   defined schema are encountered in the data:
     #     * `ignore`: Silently ignores.
     #     * `raise`: Raises an error.
+    # @param cast_options [Object]
+    #   Configuration for column type-casting during scans. Useful for datasets
+    #   containing files that have differing schemas.
     #
     # @return [LazyFrame]
     def scan_parquet(
@@ -230,6 +232,7 @@ module Polars
       include_file_paths: nil,
       allow_missing_columns: false,
       extra_columns: "raise",
+      cast_options: nil,
       _column_mapping: nil,
       _deletion_files: nil
     )
@@ -268,7 +271,7 @@ module Polars
           ScanOptions.new(
             row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
             pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
-            # cast_options: cast_options,
+            cast_options: cast_options,
             extra_columns: extra_columns,
             missing_columns: missing_columns,
             include_file_paths: include_file_paths,

data/lib/polars/io/scan_options.rb CHANGED Viewed

@@ -2,8 +2,9 @@ module Polars
   module IO
     class ScanOptions
       attr_reader :row_index, :pre_slice, :cast_options, :extra_columns, :missing_columns,
-        :include_file_paths, :glob, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
-        :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping, :deletion_files
+        :include_file_paths, :glob, :hidden_file_prefix, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
+        :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping,
+        :default_values, :deletion_files, :table_statistics, :row_count
       def initialize(
         row_index: nil,
@@ -13,6 +14,7 @@ module Polars
         missing_columns: "raise",
         include_file_paths: nil,
         glob: true,
+        hidden_file_prefix: nil,
         hive_partitioning: nil,
         hive_schema: nil,
         try_parse_hive_dates: true,
@@ -22,7 +24,10 @@ module Polars
         credential_provider: nil,
         retries: 2,
         column_mapping: nil,
-        deletion_files: nil
+        default_values: nil,
+        deletion_files: nil,
+        table_statistics: nil,
+        row_count: nil
       )
         @row_index = row_index
         @pre_slice = pre_slice
@@ -31,6 +36,7 @@ module Polars
         @missing_columns = missing_columns
         @include_file_paths = include_file_paths
         @glob = glob
+        @hidden_file_prefix = hidden_file_prefix
         @hive_partitioning = hive_partitioning
         @hive_schema = hive_schema
         @try_parse_hive_dates = try_parse_hive_dates
@@ -40,7 +46,10 @@ module Polars
         @credential_provider = credential_provider
         @retries = retries
         @column_mapping = column_mapping
+        @default_values = default_values
         @deletion_files = deletion_files
+        @table_statistics = table_statistics
+        @row_count = row_count
       end
     end
   end

data/lib/polars/io/utils.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module Polars
+  module IO
+    private
+    def get_sources(source)
+      if Utils.pathlike?(source)
+        source = Utils.normalize_filepath(source, check_not_directory: false)
+      elsif Utils.is_path_or_str_sequence(source)
+        source = source.map { |s| Utils.normalize_filepath(s, check_not_directory: false) }
+      end
+      unless source.is_a?(::Array)
+        source = [source]
+      end
+      source
+    end
+  end
+end

data/lib/polars/lazy_frame.rb CHANGED Viewed

@@ -27,9 +27,6 @@ module Polars
       ldf
     end
-    # def self.from_json
-    # end
     # Read a logical plan from a JSON file to construct a LazyFrame.
     #
     # @param file [String]
@@ -41,7 +38,49 @@ module Polars
         file = Utils.normalize_filepath(file)
       end
-      Utils.wrap_ldf(RbLazyFrame.read_json(file))
+      Utils.wrap_ldf(RbLazyFrame.deserialize_json(file))
+    end
+    # Read a logical plan from a file to construct a LazyFrame.
+    #
+    # @param source [Object]
+    #   Path to a file or a file-like object (by file-like object, we refer to
+    #   objects that have a `read` method, such as a file handler or `StringIO`).
+    #
+    # @return [LazyFrame]
+    #
+    # @note
+    #   This function uses marshaling if the logical plan contains Ruby UDFs,
+    #   and as such inherits the security implications. Deserializing can execute
+    #   arbitrary code, so it should only be attempted on trusted data.
+    #
+    # @note
+    #   Serialization is not stable across Polars versions: a LazyFrame serialized
+    #   in one Polars version may not be deserializable in another Polars version.
+    #
+    # @example
+    #   lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
+    #   bytes = lf.serialize
+    #   Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
+    #   # =>
+    #   # shape: (1, 1)
+    #   # ┌─────┐
+    #   # │ a   │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 6   │
+    #   # └─────┘
+    def self.deserialize(source)
+      raise Todo unless RbLazyFrame.respond_to?(:deserialize_binary)
+      if Utils.pathlike?(source)
+        source = Utils.normalize_filepath(source)
+      end
+      deserializer = RbLazyFrame.method(:deserialize_binary)
+      _from_rbldf(deserializer.(source))
     end
     # Get or set column names.
@@ -151,6 +190,38 @@ module Polars
       nil
     end
+    # Serialize the logical plan of this LazyFrame to a file or string.
+    #
+    # @param file [Object]
+    #   File path to which the result should be written. If set to `nil`
+    #   (default), the output is returned as a string instead.
+    #
+    # @return [Object]
+    #
+    # @note
+    #   Serialization is not stable across Polars versions: a LazyFrame serialized
+    #   in one Polars version may not be deserializable in another Polars version.
+    #
+    # @example Serialize the logical plan into a binary representation.
+    #   lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
+    #   bytes = lf.serialize
+    #   Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
+    #   # =>
+    #   # shape: (1, 1)
+    #   # ┌─────┐
+    #   # │ a   │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 6   │
+    #   # └─────┘
+    def serialize(file = nil)
+      raise Todo unless _ldf.respond_to?(:serialize_binary)
+      serializer = _ldf.method(:serialize_binary)
+      Utils.serialize_polars_object(serializer, file)
+    end
     # Offers a structured way to apply a sequence of user-defined functions (UDFs).
     #
     # @param func [Object]
@@ -774,6 +845,21 @@ module Polars
     # @param maintain_order [Boolean]
     #   Maintain the order in which data is processed.
     #   Setting this to `false` will  be slightly faster.
+    # @param storage_options [String]
+    #   Options that indicate how to connect to a cloud provider.
+    #
+    #   The cloud providers currently supported are AWS, GCP, and Azure.
+    #   See supported keys here:
+    #
+    #   * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
+    #   * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
+    #   * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
+    #   * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
+    #
+    #   If `storage_options` is not provided, Polars will try to infer the
+    #   information from environment variables.
+    # @param retries [Integer]
+    #   Number of retries if accessing a cloud instance fails.
     # @param type_coercion [Boolean]
     #   Do type coercion optimization.
     # @param predicate_pushdown [Boolean]
@@ -806,6 +892,8 @@ module Polars
       path,
       compression: "zstd",
       maintain_order: true,
+      storage_options: nil,
+      retries: 2,
       type_coercion: true,
       predicate_pushdown: true,
       projection_pushdown: true,
@@ -816,10 +904,6 @@ module Polars
       mkdir: false,
       lazy: false
     )
-      # TODO support storage options in Rust
-      storage_options = nil
-      retries = 2
       lf = _set_sink_optimizations(
         type_coercion: type_coercion,
         predicate_pushdown: predicate_pushdown,
@@ -4059,6 +4143,9 @@ module Polars
     #   Names of the struct columns that will be decomposed by its fields
     # @param more_columns [Array]
     #   Additional columns to unnest, specified as positional arguments.
+    # @param separator [String]
+    #   Rename output column names as combination of the struct column name,
+    #   name separator and field name.
     #
     # @return [LazyFrame]
     #
@@ -4103,11 +4190,11 @@ module Polars
     #   # │ foo    ┆ 1   ┆ a   ┆ true ┆ [1, 2]    ┆ baz   │
     #   # │ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
     #   # └────────┴─────┴─────┴──────┴───────────┴───────┘
-    def unnest(columns, *more_columns)
+    def unnest(columns, *more_columns, separator: nil)
       subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
         more_columns
       )
-      _from_rbldf(_ldf.unnest(subset._rbselector))
+      _from_rbldf(_ldf.unnest(subset._rbselector, separator))
     end
     # Take two sorted DataFrames and merge them by the sorted key.

data/lib/polars/list_expr.rb CHANGED Viewed

@@ -925,7 +925,7 @@ module Polars
     # Convert the series of type `List` to a series of type `Struct`.
     #
     # @param n_field_strategy ["first_non_null", "max_width"]
-    #   Strategy to determine the number of fields of the struct.
+    #   Deprecated and ignored.
     # @param fields pArray
     #   If the name and number of the desired fields is known in advance
     #   a list of field names can be given, which will be assigned by index.
@@ -945,20 +945,28 @@ module Polars
     # @return [Expr]
     #
     # @example
-    #   df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
-    #   df.select([Polars.col("a").list.to_struct])
+    #   df = Polars::DataFrame.new({"n" => [[0, 1], [0, 1, 2]]})
+    #   df.with_columns(struct: Polars.col("n").list.to_struct(upper_bound: 2))
     #   # =>
-    #   # shape: (2, 1)
-    #   # ┌────────────┐
-    #   # │ a          │
-    #   # │ ---        │
-    #   # │ struct[3]  │
-    #   # ╞════════════╡
-    #   # │ {1,2,3}    │
-    #   # │ {1,2,null} │
-    #   # └────────────┘
+    #   # shape: (2, 2)
+    #   # ┌───────────┬───────────┐
+    #   # │ n         ┆ struct    │
+    #   # │ ---       ┆ ---       │
+    #   # │ list[i64] ┆ struct[2] │
+    #   # ╞═══════════╪═══════════╡
+    #   # │ [0, 1]    ┆ {0,1}     │
+    #   # │ [0, 1, 2] ┆ {0,1}     │
+    #   # └───────────┴───────────┘
     def to_struct(n_field_strategy: "first_non_null", fields: nil, upper_bound: nil)
-      Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, fields, nil))
+      if !fields.is_a?(::Array)
+        if fields.nil?
+          fields = upper_bound.times.map { |i| "field_#{i}" }
+        else
+          fields = upper_bound.times.map { |i| fields.(i) }
+        end
+      end
+      Utils.wrap_expr(_rbexpr.list_to_struct(fields))
     end
     # Run any polars expression against the lists' elements.

data/lib/polars/list_name_space.rb CHANGED Viewed

@@ -755,27 +755,39 @@ module Polars
     #
     # @param n_field_strategy ["first_non_null", "max_width"]
     #   Strategy to determine the number of fields of the struct.
-    # @param name_generator [Object]
-    #   A custom function that can be used to generate the field names.
-    #   Default field names are `field_0, field_1 .. field_n`
-    #
-    # @return [Series]
-    #
-    # @example
-    #   df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
-    #   df.select([Polars.col("a").list.to_struct])
-    #   # =>
-    #   # shape: (2, 1)
-    #   # ┌────────────┐
-    #   # │ a          │
-    #   # │ ---        │
-    #   # │ struct[3]  │
-    #   # ╞════════════╡
-    #   # │ {1,2,3}    │
-    #   # │ {1,2,null} │
-    #   # └────────────┘
-    def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
-      super
+    # @param fields [Object]
+    #   If the name and number of the desired fields is known in advance
+    #   a list of field names can be given, which will be assigned by index.
+    #   Otherwise, to dynamically assign field names, a custom function can be
+    #   used; if neither are set, fields will be `field_0, field_1 .. field_n`.
+    #
+    # @return [Series]
+    #
+    # @example Convert list to struct with field name assignment by index from a list of names:
+    #   s1 = Polars::Series.new("n", [[0, 1, 2], [0, 1]])
+    #   s1.list.to_struct(fields: ["one", "two", "three"]).struct.unnest
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬───────┐
+    #   # │ one ┆ two ┆ three │
+    #   # │ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ i64 ┆ i64   │
+    #   # ╞═════╪═════╪═══════╡
+    #   # │ 0   ┆ 1   ┆ 2     │
+    #   # │ 0   ┆ 1   ┆ null  │
+    #   # └─────┴─────┴───────┘
+    def to_struct(n_field_strategy: "first_non_null", fields: nil)
+      if fields.is_a?(::Array)
+        s = Utils.wrap_s(_s)
+        return (
+          s.to_frame
+          .select_seq(F.col(s.name).list.to_struct(fields: fields))
+          .to_series
+        )
+      end
+      raise Todo
+      # Utils.wrap_s(_s.list_to_struct(n_field_strategy, fields))
     end
     # Run any polars expression against the lists' elements.

data/lib/polars/meta_expr.rb CHANGED Viewed

@@ -248,6 +248,31 @@ module Polars
       Selector._from_rbselector(_rbexpr.into_selector)
     end
+    # Serialize this expression to a file or string.
+    #
+    # @param file [Object]
+    #   File path to which the result should be written. If set to `nil`
+    #   (default), the output is returned as a string instead.
+    #
+    # @return [Object]
+    #
+    # @note
+    #   Serialization is not stable across Polars versions: a LazyFrame serialized
+    #   in one Polars version may not be deserializable in another Polars version.
+    #
+    # @example Serialize the expression into a binary representation.
+    #   expr = Polars.col("foo").sum.over("bar")
+    #   bytes = expr.meta.serialize
+    #   Polars::Expr.deserialize(StringIO.new(bytes))
+    #   # => col("foo").sum().over([col("bar")])
+    def serialize(file = nil)
+      raise Todo unless _rbexpr.respond_to?(:serialize_binary)
+      serializer = _rbexpr.method(:serialize_binary)
+      Utils.serialize_polars_object(serializer, file)
+    end
     # Format the expression as a tree.
     #
     # @param return_as_string [Boolean]