polars-df 0.21.1-arm64-darwin → 0.23.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ module Polars
2
+ # @private
3
+ class IcebergDataset
4
+ def initialize(
5
+ source,
6
+ snapshot_id:,
7
+ storage_options:
8
+ )
9
+ @source = source
10
+ @snapshot_id = snapshot_id
11
+ @storage_options = storage_options
12
+ end
13
+
14
+ def to_lazyframe
15
+ # for iceberg < 0.1.3
16
+ if !@source.respond_to?(:scan)
17
+ return @source.to_polars(snapshot_id: @snapshot_id, storage_options: @storage_options)
18
+ end
19
+
20
+ scan = @source.scan(snapshot_id: @snapshot_id)
21
+ files = scan.plan_files
22
+
23
+ table = scan.table
24
+ snapshot = scan.snapshot
25
+ schema = snapshot ? table.schema_by_id(snapshot[:schema_id]) : table.current_schema
26
+
27
+ if files.empty?
28
+ # TODO improve
29
+ schema =
30
+ schema.fields.to_h do |field|
31
+ dtype =
32
+ case field[:type]
33
+ when "int"
34
+ Polars::Int32
35
+ when "long"
36
+ Polars::Int64
37
+ when "double"
38
+ Polars::Float64
39
+ when "string"
40
+ Polars::String
41
+ when "timestamp"
42
+ Polars::Datetime
43
+ else
44
+ raise Todo
45
+ end
46
+
47
+ [field[:name], dtype]
48
+ end
49
+
50
+ LazyFrame.new(schema: schema)
51
+ else
52
+ sources = files.map { |v| v[:data_file_path] }
53
+
54
+ column_mapping = [
55
+ "iceberg-column-mapping",
56
+ arrow_schema(schema)
57
+ ]
58
+
59
+ deletion_files = [
60
+ "iceberg-position-delete",
61
+ files.map.with_index
62
+ .select { |v, i| v[:deletes].any? }
63
+ .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
64
+ ]
65
+
66
+ scan_options = {
67
+ storage_options: @storage_options,
68
+ cast_options: Polars::ScanCastOptions._default_iceberg,
69
+ allow_missing_columns: true,
70
+ extra_columns: "ignore",
71
+ _column_mapping: column_mapping,
72
+ _deletion_files: deletion_files
73
+ }
74
+
75
+ Polars.scan_parquet(sources, **scan_options)
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def arrow_schema(schema)
82
+ fields =
83
+ schema.fields.map do |field|
84
+ type =
85
+ case field[:type]
86
+ when "boolean"
87
+ "boolean"
88
+ when "int"
89
+ "int32"
90
+ when "long"
91
+ "int64"
92
+ when "float"
93
+ "float32"
94
+ when "double"
95
+ "float64"
96
+ else
97
+ raise Todo
98
+ end
99
+
100
+ {
101
+ name: field[:name],
102
+ type: type,
103
+ nullable: !field[:required],
104
+ metadata: {
105
+ "PARQUET:field_id" => field[:id].to_s
106
+ }
107
+ }
108
+ end
109
+
110
+ {fields: fields}
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,34 @@
1
+ module Polars
2
+ module IO
3
+ # Lazily read from an Apache Iceberg table.
4
+ #
5
+ # @param source [Object]
6
+ # A Iceberg Ruby table, or a direct path to the metadata.
7
+ # @param snapshot_id [Integer]
8
+ # The snapshot ID to scan from.
9
+ # @param storage_options [Hash]
10
+ # Extra options for the storage backends.
11
+ #
12
+ # @return [LazyFrame]
13
+ def scan_iceberg(
14
+ source,
15
+ snapshot_id: nil,
16
+ storage_options: nil
17
+ )
18
+ require "iceberg"
19
+
20
+ unless source.is_a?(Iceberg::Table)
21
+ raise Todo
22
+ end
23
+
24
+ dataset =
25
+ IcebergDataset.new(
26
+ source,
27
+ snapshot_id:,
28
+ storage_options:
29
+ )
30
+
31
+ dataset.to_lazyframe
32
+ end
33
+ end
34
+ end
data/lib/polars/io/ipc.rb CHANGED
@@ -187,8 +187,16 @@ module Polars
187
187
  # DataFrame.
188
188
  # @param row_count_offset [Integer]
189
189
  # Offset to start the row_count column (only use if the name is set).
190
+ # @param glob [Boolean]
191
+ # Expand path given via globbing rules.
190
192
  # @param storage_options [Hash]
191
193
  # Extra options that make sense for a particular storage connection.
194
+ # @param retries [Integer]
195
+ # Number of retries if accessing a cloud instance fails.
196
+ # @param file_cache_ttl [Integer]
197
+ # Amount of time to keep downloaded cloud files since their last access time,
198
+ # in seconds. Uses the `POLARS_FILE_CACHE_TTL` environment variable
199
+ # (which defaults to 1 hour) if not given.
192
200
  # @param hive_partitioning [Boolean]
193
201
  # Infer statistics and schema from Hive partitioned URL and use them
194
202
  # to prune reads. This is unset by default (i.e. `nil`), meaning it is
@@ -210,66 +218,37 @@ module Polars
210
218
  rechunk: true,
211
219
  row_count_name: nil,
212
220
  row_count_offset: 0,
221
+ glob: true,
213
222
  storage_options: nil,
223
+ retries: 2,
224
+ file_cache_ttl: nil,
214
225
  hive_partitioning: nil,
215
226
  hive_schema: nil,
216
227
  try_parse_hive_dates: true,
217
228
  include_file_paths: nil
218
229
  )
219
- _scan_ipc_impl(
220
- source,
221
- n_rows: n_rows,
222
- cache: cache,
223
- rechunk: rechunk,
224
- row_count_name: row_count_name,
225
- row_count_offset: row_count_offset,
226
- storage_options: storage_options,
227
- hive_partitioning: hive_partitioning,
228
- hive_schema: hive_schema,
229
- try_parse_hive_dates: try_parse_hive_dates,
230
- include_file_paths: include_file_paths
231
- )
232
- end
233
-
234
- # @private
235
- def _scan_ipc_impl(
236
- source,
237
- n_rows: nil,
238
- cache: true,
239
- rechunk: true,
240
- row_count_name: nil,
241
- row_count_offset: 0,
242
- storage_options: nil,
243
- hive_partitioning: nil,
244
- hive_schema: nil,
245
- try_parse_hive_dates: true,
246
- include_file_paths: nil
247
- )
248
- sources = []
249
- if Utils.pathlike?(source)
250
- source = Utils.normalize_filepath(source)
251
- elsif source.is_a?(::Array)
252
- if Utils.is_path_or_str_sequence(source)
253
- sources = source.map { |s| Utils.normalize_filepath(s) }
254
- else
255
- sources = source
256
- end
230
+ row_index_name = row_count_name
231
+ row_index_offset = row_count_offset
257
232
 
258
- source = nil
259
- end
233
+ sources = get_sources(source)
260
234
 
261
235
  rblf =
262
236
  RbLazyFrame.new_from_ipc(
263
- source,
264
237
  sources,
265
- n_rows,
266
- cache,
267
- rechunk,
268
- Utils.parse_row_index_args(row_count_name, row_count_offset),
269
- hive_partitioning,
270
- hive_schema,
271
- try_parse_hive_dates,
272
- include_file_paths
238
+ ScanOptions.new(
239
+ row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
240
+ pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
241
+ include_file_paths: include_file_paths,
242
+ glob: glob,
243
+ hive_partitioning: hive_partitioning,
244
+ hive_schema: hive_schema,
245
+ try_parse_hive_dates: try_parse_hive_dates,
246
+ rechunk: rechunk,
247
+ cache: cache,
248
+ storage_options: !storage_options.nil? ? storage_options.to_a : nil,
249
+ retries: retries
250
+ ),
251
+ file_cache_ttl
273
252
  )
274
253
  Utils.wrap_ldf(rblf)
275
254
  end
@@ -117,14 +117,13 @@ module Polars
117
117
  # @param source [Object]
118
118
  # Path to a file or a file-like object.
119
119
  #
120
- # @return [Hash]
120
+ # @return [Schema]
121
121
  def read_parquet_schema(source)
122
122
  if Utils.pathlike?(source)
123
123
  source = Utils.normalize_filepath(source)
124
124
  end
125
125
 
126
- # TODO return Schema
127
- scan_parquet(source).collect_schema.to_h
126
+ scan_parquet(source).collect_schema
128
127
  end
129
128
 
130
129
  # Get file-level custom metadata of a Parquet file without reading data.
@@ -207,6 +206,9 @@ module Polars
207
206
  # defined schema are encountered in the data:
208
207
  # * `ignore`: Silently ignores.
209
208
  # * `raise`: Raises an error.
209
+ # @param cast_options [Object]
210
+ # Configuration for column type-casting during scans. Useful for datasets
211
+ # containing files that have differing schemas.
210
212
  #
211
213
  # @return [LazyFrame]
212
214
  def scan_parquet(
@@ -230,6 +232,7 @@ module Polars
230
232
  include_file_paths: nil,
231
233
  allow_missing_columns: false,
232
234
  extra_columns: "raise",
235
+ cast_options: nil,
233
236
  _column_mapping: nil,
234
237
  _deletion_files: nil
235
238
  )
@@ -268,7 +271,7 @@ module Polars
268
271
  ScanOptions.new(
269
272
  row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
270
273
  pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
271
- # cast_options: cast_options,
274
+ cast_options: cast_options,
272
275
  extra_columns: extra_columns,
273
276
  missing_columns: missing_columns,
274
277
  include_file_paths: include_file_paths,
@@ -2,8 +2,9 @@ module Polars
2
2
  module IO
3
3
  class ScanOptions
4
4
  attr_reader :row_index, :pre_slice, :cast_options, :extra_columns, :missing_columns,
5
- :include_file_paths, :glob, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
6
- :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping, :deletion_files
5
+ :include_file_paths, :glob, :hidden_file_prefix, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
6
+ :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping,
7
+ :default_values, :deletion_files, :table_statistics, :row_count
7
8
 
8
9
  def initialize(
9
10
  row_index: nil,
@@ -13,6 +14,7 @@ module Polars
13
14
  missing_columns: "raise",
14
15
  include_file_paths: nil,
15
16
  glob: true,
17
+ hidden_file_prefix: nil,
16
18
  hive_partitioning: nil,
17
19
  hive_schema: nil,
18
20
  try_parse_hive_dates: true,
@@ -22,7 +24,10 @@ module Polars
22
24
  credential_provider: nil,
23
25
  retries: 2,
24
26
  column_mapping: nil,
25
- deletion_files: nil
27
+ default_values: nil,
28
+ deletion_files: nil,
29
+ table_statistics: nil,
30
+ row_count: nil
26
31
  )
27
32
  @row_index = row_index
28
33
  @pre_slice = pre_slice
@@ -31,6 +36,7 @@ module Polars
31
36
  @missing_columns = missing_columns
32
37
  @include_file_paths = include_file_paths
33
38
  @glob = glob
39
+ @hidden_file_prefix = hidden_file_prefix
34
40
  @hive_partitioning = hive_partitioning
35
41
  @hive_schema = hive_schema
36
42
  @try_parse_hive_dates = try_parse_hive_dates
@@ -40,7 +46,10 @@ module Polars
40
46
  @credential_provider = credential_provider
41
47
  @retries = retries
42
48
  @column_mapping = column_mapping
49
+ @default_values = default_values
43
50
  @deletion_files = deletion_files
51
+ @table_statistics = table_statistics
52
+ @row_count = row_count
44
53
  end
45
54
  end
46
55
  end
@@ -0,0 +1,17 @@
1
+ module Polars
2
+ module IO
3
+ private
4
+
5
+ def get_sources(source)
6
+ if Utils.pathlike?(source)
7
+ source = Utils.normalize_filepath(source, check_not_directory: false)
8
+ elsif Utils.is_path_or_str_sequence(source)
9
+ source = source.map { |s| Utils.normalize_filepath(s, check_not_directory: false) }
10
+ end
11
+ unless source.is_a?(::Array)
12
+ source = [source]
13
+ end
14
+ source
15
+ end
16
+ end
17
+ end
@@ -27,9 +27,6 @@ module Polars
27
27
  ldf
28
28
  end
29
29
 
30
- # def self.from_json
31
- # end
32
-
33
30
  # Read a logical plan from a JSON file to construct a LazyFrame.
34
31
  #
35
32
  # @param file [String]
@@ -41,7 +38,49 @@ module Polars
41
38
  file = Utils.normalize_filepath(file)
42
39
  end
43
40
 
44
- Utils.wrap_ldf(RbLazyFrame.read_json(file))
41
+ Utils.wrap_ldf(RbLazyFrame.deserialize_json(file))
42
+ end
43
+
44
+ # Read a logical plan from a file to construct a LazyFrame.
45
+ #
46
+ # @param source [Object]
47
+ # Path to a file or a file-like object (by file-like object, we refer to
48
+ # objects that have a `read` method, such as a file handler or `StringIO`).
49
+ #
50
+ # @return [LazyFrame]
51
+ #
52
+ # @note
53
+ # This function uses marshaling if the logical plan contains Ruby UDFs,
54
+ # and as such inherits the security implications. Deserializing can execute
55
+ # arbitrary code, so it should only be attempted on trusted data.
56
+ #
57
+ # @note
58
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
59
+ # in one Polars version may not be deserializable in another Polars version.
60
+ #
61
+ # @example
62
+ # lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
63
+ # bytes = lf.serialize
64
+ # Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
65
+ # # =>
66
+ # # shape: (1, 1)
67
+ # # ┌─────┐
68
+ # # │ a │
69
+ # # │ --- │
70
+ # # │ i64 │
71
+ # # ╞═════╡
72
+ # # │ 6 │
73
+ # # └─────┘
74
+ def self.deserialize(source)
75
+ raise Todo unless RbLazyFrame.respond_to?(:deserialize_binary)
76
+
77
+ if Utils.pathlike?(source)
78
+ source = Utils.normalize_filepath(source)
79
+ end
80
+
81
+ deserializer = RbLazyFrame.method(:deserialize_binary)
82
+
83
+ _from_rbldf(deserializer.(source))
45
84
  end
46
85
 
47
86
  # Get or set column names.
@@ -151,6 +190,38 @@ module Polars
151
190
  nil
152
191
  end
153
192
 
193
+ # Serialize the logical plan of this LazyFrame to a file or string.
194
+ #
195
+ # @param file [Object]
196
+ # File path to which the result should be written. If set to `nil`
197
+ # (default), the output is returned as a string instead.
198
+ #
199
+ # @return [Object]
200
+ #
201
+ # @note
202
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
203
+ # in one Polars version may not be deserializable in another Polars version.
204
+ #
205
+ # @example Serialize the logical plan into a binary representation.
206
+ # lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
207
+ # bytes = lf.serialize
208
+ # Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
209
+ # # =>
210
+ # # shape: (1, 1)
211
+ # # ┌─────┐
212
+ # # │ a │
213
+ # # │ --- │
214
+ # # │ i64 │
215
+ # # ╞═════╡
216
+ # # │ 6 │
217
+ # # └─────┘
218
+ def serialize(file = nil)
219
+ raise Todo unless _ldf.respond_to?(:serialize_binary)
220
+
221
+ serializer = _ldf.method(:serialize_binary)
222
+ Utils.serialize_polars_object(serializer, file)
223
+ end
224
+
154
225
  # Offers a structured way to apply a sequence of user-defined functions (UDFs).
155
226
  #
156
227
  # @param func [Object]
@@ -774,6 +845,21 @@ module Polars
774
845
  # @param maintain_order [Boolean]
775
846
  # Maintain the order in which data is processed.
776
847
  # Setting this to `false` will be slightly faster.
848
+ # @param storage_options [String]
849
+ # Options that indicate how to connect to a cloud provider.
850
+ #
851
+ # The cloud providers currently supported are AWS, GCP, and Azure.
852
+ # See supported keys here:
853
+ #
854
+ # * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
855
+ # * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
856
+ # * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
857
+ # * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
858
+ #
859
+ # If `storage_options` is not provided, Polars will try to infer the
860
+ # information from environment variables.
861
+ # @param retries [Integer]
862
+ # Number of retries if accessing a cloud instance fails.
777
863
  # @param type_coercion [Boolean]
778
864
  # Do type coercion optimization.
779
865
  # @param predicate_pushdown [Boolean]
@@ -806,6 +892,8 @@ module Polars
806
892
  path,
807
893
  compression: "zstd",
808
894
  maintain_order: true,
895
+ storage_options: nil,
896
+ retries: 2,
809
897
  type_coercion: true,
810
898
  predicate_pushdown: true,
811
899
  projection_pushdown: true,
@@ -816,10 +904,6 @@ module Polars
816
904
  mkdir: false,
817
905
  lazy: false
818
906
  )
819
- # TODO support storage options in Rust
820
- storage_options = nil
821
- retries = 2
822
-
823
907
  lf = _set_sink_optimizations(
824
908
  type_coercion: type_coercion,
825
909
  predicate_pushdown: predicate_pushdown,
@@ -4059,6 +4143,9 @@ module Polars
4059
4143
  # Names of the struct columns that will be decomposed by its fields
4060
4144
  # @param more_columns [Array]
4061
4145
  # Additional columns to unnest, specified as positional arguments.
4146
+ # @param separator [String]
4147
+ # Rename output column names as combination of the struct column name,
4148
+ # name separator and field name.
4062
4149
  #
4063
4150
  # @return [LazyFrame]
4064
4151
  #
@@ -4103,11 +4190,11 @@ module Polars
4103
4190
  # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
4104
4191
  # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
4105
4192
  # # └────────┴─────┴─────┴──────┴───────────┴───────┘
4106
- def unnest(columns, *more_columns)
4193
+ def unnest(columns, *more_columns, separator: nil)
4107
4194
  subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
4108
4195
  more_columns
4109
4196
  )
4110
- _from_rbldf(_ldf.unnest(subset._rbselector))
4197
+ _from_rbldf(_ldf.unnest(subset._rbselector, separator))
4111
4198
  end
4112
4199
 
4113
4200
  # Take two sorted DataFrames and merge them by the sorted key.
@@ -925,7 +925,7 @@ module Polars
925
925
  # Convert the series of type `List` to a series of type `Struct`.
926
926
  #
927
927
  # @param n_field_strategy ["first_non_null", "max_width"]
928
- # Strategy to determine the number of fields of the struct.
928
+ # Deprecated and ignored.
929
929
  # @param fields pArray
930
930
  # If the name and number of the desired fields is known in advance
931
931
  # a list of field names can be given, which will be assigned by index.
@@ -945,20 +945,28 @@ module Polars
945
945
  # @return [Expr]
946
946
  #
947
947
  # @example
948
- # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
949
- # df.select([Polars.col("a").list.to_struct])
948
+ # df = Polars::DataFrame.new({"n" => [[0, 1], [0, 1, 2]]})
949
+ # df.with_columns(struct: Polars.col("n").list.to_struct(upper_bound: 2))
950
950
  # # =>
951
- # # shape: (2, 1)
952
- # # ┌────────────┐
953
- # # │ a
954
- # # │ ---
955
- # # │ struct[3]
956
- # # ╞════════════╡
957
- # # │ {1,2,3}
958
- # # │ {1,2,null}
959
- # # └────────────┘
951
+ # # shape: (2, 2)
952
+ # # ┌───────────┬───────────┐
953
+ # # │ n ┆ struct
954
+ # # │ --- ┆ ---
955
+ # # │ list[i64] ┆ struct[2]
956
+ # # ╞═══════════╪═══════════╡
957
+ # # │ [0, 1] ┆ {0,1}
958
+ # # │ [0, 1, 2] ┆ {0,1}
959
+ # # └───────────┴───────────┘
960
960
  def to_struct(n_field_strategy: "first_non_null", fields: nil, upper_bound: nil)
961
- Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, fields, nil))
961
+ if !fields.is_a?(::Array)
962
+ if fields.nil?
963
+ fields = upper_bound.times.map { |i| "field_#{i}" }
964
+ else
965
+ fields = upper_bound.times.map { |i| fields.(i) }
966
+ end
967
+ end
968
+
969
+ Utils.wrap_expr(_rbexpr.list_to_struct(fields))
962
970
  end
963
971
 
964
972
  # Run any polars expression against the lists' elements.
@@ -755,27 +755,39 @@ module Polars
755
755
  #
756
756
  # @param n_field_strategy ["first_non_null", "max_width"]
757
757
  # Strategy to determine the number of fields of the struct.
758
- # @param name_generator [Object]
759
- # A custom function that can be used to generate the field names.
760
- # Default field names are `field_0, field_1 .. field_n`
761
- #
762
- # @return [Series]
763
- #
764
- # @example
765
- # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
766
- # df.select([Polars.col("a").list.to_struct])
767
- # # =>
768
- # # shape: (2, 1)
769
- # # ┌────────────┐
770
- # # a │
771
- # # │ --- │
772
- # # │ struct[3]
773
- # # ╞════════════╡
774
- # # │ {1,2,3}
775
- # # │ {1,2,null} │
776
- # # └────────────┘
777
- def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
778
- super
758
+ # @param fields [Object]
759
+ # If the name and number of the desired fields is known in advance
760
+ # a list of field names can be given, which will be assigned by index.
761
+ # Otherwise, to dynamically assign field names, a custom function can be
762
+ # used; if neither are set, fields will be `field_0, field_1 .. field_n`.
763
+ #
764
+ # @return [Series]
765
+ #
766
+ # @example Convert list to struct with field name assignment by index from a list of names:
767
+ # s1 = Polars::Series.new("n", [[0, 1, 2], [0, 1]])
768
+ # s1.list.to_struct(fields: ["one", "two", "three"]).struct.unnest
769
+ # # =>
770
+ # # shape: (2, 3)
771
+ # # ┌─────┬─────┬───────┐
772
+ # # │ one ┆ two ┆ three
773
+ # # │ --- ┆ --- ┆ --- │
774
+ # # │ i64 ┆ i64 ┆ i64
775
+ # # ╞═════╪═════╪═══════╡
776
+ # # │ 0 ┆ 1 ┆ 2 │
777
+ # # 0 ┆ 1 ┆ null │
778
+ # # └─────┴─────┴───────┘
779
+ def to_struct(n_field_strategy: "first_non_null", fields: nil)
780
+ if fields.is_a?(::Array)
781
+ s = Utils.wrap_s(_s)
782
+ return (
783
+ s.to_frame
784
+ .select_seq(F.col(s.name).list.to_struct(fields: fields))
785
+ .to_series
786
+ )
787
+ end
788
+
789
+ raise Todo
790
+ # Utils.wrap_s(_s.list_to_struct(n_field_strategy, fields))
779
791
  end
780
792
 
781
793
  # Run any polars expression against the lists' elements.
@@ -248,6 +248,31 @@ module Polars
248
248
  Selector._from_rbselector(_rbexpr.into_selector)
249
249
  end
250
250
 
251
+ # Serialize this expression to a file or string.
252
+ #
253
+ # @param file [Object]
254
+ # File path to which the result should be written. If set to `nil`
255
+ # (default), the output is returned as a string instead.
256
+ #
257
+ # @return [Object]
258
+ #
259
+ # @note
260
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
261
+ # in one Polars version may not be deserializable in another Polars version.
262
+ #
263
+ # @example Serialize the expression into a binary representation.
264
+ # expr = Polars.col("foo").sum.over("bar")
265
+ # bytes = expr.meta.serialize
266
+ # Polars::Expr.deserialize(StringIO.new(bytes))
267
+ # # => col("foo").sum().over([col("bar")])
268
+ def serialize(file = nil)
269
+ raise Todo unless _rbexpr.respond_to?(:serialize_binary)
270
+
271
+ serializer = _rbexpr.method(:serialize_binary)
272
+
273
+ Utils.serialize_polars_object(serializer, file)
274
+ end
275
+
251
276
  # Format the expression as a tree.
252
277
  #
253
278
  # @param return_as_string [Boolean]