polars-df 0.21.1-x86_64-linux-musl → 0.22.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,6 +72,43 @@ module Polars
72
72
  end
73
73
  end
74
74
 
75
+ # Read a serialized DataFrame from a file.
76
+ #
77
+ # @param source [Object]
78
+ # Path to a file or a file-like object (by file-like object, we refer to
79
+ # objects that have a `read` method, such as a file handler or `StringIO`).
80
+ #
81
+ # @return [DataFrame]
82
+ #
83
+ # @note
84
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
85
+ # in one Polars version may not be deserializable in another Polars version.
86
+ #
87
+ # @example
88
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4.0, 5.0, 6.0]})
89
+ # bytes = df.serialize
90
+ # Polars::DataFrame.deserialize(StringIO.new(bytes))
91
+ # # =>
92
+ # # shape: (3, 2)
93
+ # # ┌─────┬─────┐
94
+ # # │ a ┆ b │
95
+ # # │ --- ┆ --- │
96
+ # # │ i64 ┆ f64 │
97
+ # # ╞═════╪═════╡
98
+ # # │ 1 ┆ 4.0 │
99
+ # # │ 2 ┆ 5.0 │
100
+ # # │ 3 ┆ 6.0 │
101
+ # # └─────┴─────┘
102
+ def self.deserialize(source)
103
+ if Utils.pathlike?(source)
104
+ source = Utils.normalize_filepath(source)
105
+ end
106
+
107
+ deserializer = RbDataFrame.method(:deserialize_binary)
108
+
109
+ _from_rbdf(deserializer.(source))
110
+ end
111
+
75
112
  # @private
76
113
  def self._from_rbdf(rb_df)
77
114
  df = DataFrame.allocate
@@ -562,8 +599,6 @@ module Polars
562
599
 
563
600
  # Convert every row to a hash.
564
601
  #
565
- # Note that this is slow.
566
- #
567
602
  # @return [Array]
568
603
  #
569
604
  # @example
@@ -572,12 +607,7 @@ module Polars
572
607
  # # =>
573
608
  # # [{"foo"=>1, "bar"=>4}, {"foo"=>2, "bar"=>5}, {"foo"=>3, "bar"=>6}]
574
609
  def to_hashes
575
- rbdf = _df
576
- names = columns
577
-
578
- height.times.map do |i|
579
- names.zip(rbdf.row_tuple(i)).to_h
580
- end
610
+ rows(named: true)
581
611
  end
582
612
 
583
613
  # Convert DataFrame to a 2D Numo array.
@@ -634,6 +664,44 @@ module Polars
634
664
  Utils.wrap_s(_df.select_at_idx(index))
635
665
  end
636
666
 
667
+ # Serialize this DataFrame to a file or string.
668
+ #
669
+ # @param file [Object]
670
+ # File path or writable file-like object to which the result will be written.
671
+ # If set to `nil` (default), the output is returned as a string instead.
672
+ #
673
+ # @return [Object]
674
+ #
675
+ # @note
676
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
677
+ # in one Polars version may not be deserializable in another Polars version.
678
+ #
679
+ # @example
680
+ # df = Polars::DataFrame.new(
681
+ # {
682
+ # "foo" => [1, 2, 3],
683
+ # "bar" => [6, 7, 8]
684
+ # }
685
+ # )
686
+ # bytes = df.serialize
687
+ # Polars::DataFrame.deserialize(StringIO.new(bytes))
688
+ # # =>
689
+ # # shape: (3, 2)
690
+ # # ┌─────┬─────┐
691
+ # # │ foo ┆ bar │
692
+ # # │ --- ┆ --- │
693
+ # # │ i64 ┆ i64 │
694
+ # # ╞═════╪═════╡
695
+ # # │ 1 ┆ 6 │
696
+ # # │ 2 ┆ 7 │
697
+ # # │ 3 ┆ 8 │
698
+ # # └─────┴─────┘
699
+ def serialize(file = nil)
700
+ serializer = _df.method(:serialize_binary)
701
+
702
+ Utils.serialize_polars_object(serializer, file)
703
+ end
704
+
637
705
  # Serialize to JSON representation.
638
706
  #
639
707
  # @param file [String]
@@ -1148,6 +1216,40 @@ module Polars
1148
1216
  end
1149
1217
  end
1150
1218
 
1219
+ # Write DataFrame to an Iceberg table.
1220
+ #
1221
+ # @note
1222
+ # This functionality is currently considered **unstable**. It may be
1223
+ # changed at any point without it being considered a breaking change.
1224
+ #
1225
+ # @param target [Object]
1226
+ # Name of the table or the Table object representing an Iceberg table.
1227
+ # @param mode ['append', 'overwrite']
1228
+ # How to handle existing data.
1229
+ #
1230
+ # - If 'append', will add new data.
1231
+ # - If 'overwrite', will replace table with new data.
1232
+ #
1233
+ # @return [nil]
1234
+ def write_iceberg(target, mode:)
1235
+ require "iceberg"
1236
+
1237
+ table =
1238
+ if target.is_a?(Iceberg::Table)
1239
+ target
1240
+ else
1241
+ raise Todo
1242
+ end
1243
+
1244
+ data = self
1245
+
1246
+ if mode == "append"
1247
+ table.append(data)
1248
+ else
1249
+ raise Todo
1250
+ end
1251
+ end
1252
+
1151
1253
  # Write DataFrame as delta table.
1152
1254
  #
1153
1255
  # @param target [Object]
@@ -110,12 +110,23 @@ module Polars
110
110
  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
111
111
  end
112
112
 
113
- [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?, :to_dtype_expr].each do |v|
113
+ [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
114
114
  define_method(v) do
115
115
  self.class.public_send(v)
116
116
  end
117
117
  end
118
118
 
119
+ # Return a `DataTypeExpr` with a static `DataType`.
120
+ #
121
+ # @return [Expr]
122
+ #
123
+ # @example
124
+ # Polars::Int16.new.to_dtype_expr.collect_dtype({})
125
+ # # => Polars::Int16
126
+ def to_dtype_expr
127
+ DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
128
+ end
129
+
119
130
  # Returns a string representing the data type.
120
131
  #
121
132
  # @return [String]
@@ -317,11 +328,9 @@ module Polars
317
328
  class Categories
318
329
  attr_accessor :_categories
319
330
 
320
- def initialize
321
- # TODO fix
322
- name = nil
331
+ def initialize(name = nil)
323
332
  if name.nil? || name == ""
324
- @_categories = RbCategories.global_categories
333
+ self._categories = RbCategories.global_categories
325
334
  return
326
335
  end
327
336
 
@@ -1188,7 +1188,7 @@ module Polars
1188
1188
  if Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
1189
1189
  timestamp(time_unit)
1190
1190
  elsif time_unit == "s"
1191
- Utils.wrap_expr(_rbexpr.dt_epoch_seconds)
1191
+ timestamp("ms").floordiv(F.lit(1000, dtype: Int64))
1192
1192
  elsif time_unit == "d"
1193
1193
  Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
1194
1194
  else
data/lib/polars/expr.rb CHANGED
@@ -146,6 +146,40 @@ module Polars
146
146
  wrap_expr(_rbexpr.neg)
147
147
  end
148
148
 
149
+ # Read a serialized expression from a file.
150
+ #
151
+ # @param source [Object]
152
+ # Path to a file or a file-like object (by file-like object, we refer to
153
+ # objects that have a `read` method, such as a file handler or `StringIO`).
154
+ #
155
+ # @return [Expr]
156
+ #
157
+ # @note
158
+ # This function uses marshaling if the logical plan contains Ruby UDFs,
159
+ # and as such inherits the security implications. Deserializing can execute
160
+ # arbitrary code, so it should only be attempted on trusted data.
161
+ #
162
+ # @note
163
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
164
+ # in one Polars version may not be deserializable in another Polars version.
165
+ #
166
+ # @example
167
+ # expr = Polars.col("foo").sum.over("bar")
168
+ # bytes = expr.meta.serialize
169
+ # Polars::Expr.deserialize(StringIO.new(bytes))
170
+ # # => col("foo").sum().over([col("bar")])
171
+ def self.deserialize(source)
172
+ raise Todo unless RbExpr.respond_to?(:deserialize_binary)
173
+
174
+ if Utils.pathlike?(source)
175
+ source = Utils.normalize_filepath(source)
176
+ end
177
+
178
+ deserializer = RbExpr.method(:deserialize_binary)
179
+
180
+ _from_rbexpr(deserializer.(source))
181
+ end
182
+
149
183
  # Cast to physical representation of the logical dtype.
150
184
  #
151
185
  # - `:date` -> `:i32`
@@ -377,8 +411,6 @@ module Polars
377
411
  wrap_expr(_rbexpr._alias(name))
378
412
  end
379
413
 
380
- # TODO support symbols for exclude
381
-
382
414
  # Exclude certain columns from a wildcard/regex selection.
383
415
  #
384
416
  # You may also use regexes in the exclude list. They must start with `^` and end
@@ -1787,7 +1819,7 @@ module Polars
1787
1819
  wrap_expr(_rbexpr.arg_min)
1788
1820
  end
1789
1821
 
1790
- # Get the index of the first occurrence of a value, or ``None`` if it's not found.
1822
+ # Get the index of the first occurrence of a value, or `nil` if it's not found.
1791
1823
  #
1792
1824
  # @param element [Object]
1793
1825
  # Value to find.
@@ -7571,7 +7603,8 @@ module Polars
7571
7603
  # # │ 1.584963 │
7572
7604
  # # └──────────┘
7573
7605
  def log(base = Math::E)
7574
- wrap_expr(_rbexpr.log(base))
7606
+ base_rbexpr = Utils.parse_into_expression(base)
7607
+ wrap_expr(_rbexpr.log(base_rbexpr))
7575
7608
  end
7576
7609
 
7577
7610
  # Compute the natural logarithm of each element plus one.
@@ -7743,33 +7776,9 @@ module Polars
7743
7776
  # This can be used to reduce memory pressure.
7744
7777
  #
7745
7778
  # @return [Expr]
7746
- #
7747
- # @example
7748
- # Polars::DataFrame.new(
7749
- # {
7750
- # "a" => [1, 2, 3],
7751
- # "b" => [1, 2, 2 << 32],
7752
- # "c" => [-1, 2, 1 << 30],
7753
- # "d" => [-112, 2, 112],
7754
- # "e" => [-112, 2, 129],
7755
- # "f" => ["a", "b", "c"],
7756
- # "g" => [0.1, 1.32, 0.12],
7757
- # "h" => [true, nil, false]
7758
- # }
7759
- # ).select(Polars.all.shrink_dtype)
7760
- # # =>
7761
- # # shape: (3, 8)
7762
- # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
7763
- # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
7764
- # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
7765
- # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
7766
- # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
7767
- # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
7768
- # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
7769
- # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
7770
- # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
7771
7779
  def shrink_dtype
7772
- wrap_expr(_rbexpr.shrink_dtype)
7780
+ warn "`Expr.shrink_dtype` is deprecated and is a no-op; use `Series.shrink_dtype` instead."
7781
+ self
7773
7782
  end
7774
7783
 
7775
7784
  # Bin values into buckets and count their occurrences.
@@ -0,0 +1,95 @@
1
+ module Polars
2
+ module Functions
3
+ # Count the number of business days between `start` and `end` (not including `end`).
4
+ #
5
+ # @note
6
+ # This functionality is considered **unstable**. It may be changed
7
+ # at any point without it being considered a breaking change.
8
+ #
9
+ # @param start [Object]
10
+ # Start dates.
11
+ # @param stop [Object]
12
+ # End dates.
13
+ # @param week_mask [Array]
14
+ # Which days of the week to count. The default is Monday to Friday.
15
+ # If you wanted to count only Monday to Thursday, you would pass
16
+ # `[true, true, true, true, false, false, false]`.
17
+ # @param holidays [Array]
18
+ # Holidays to exclude from the count.
19
+ #
20
+ # @return [Expr]
21
+ #
22
+ # @example
23
+ # df = Polars::DataFrame.new(
24
+ # {
25
+ # "start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)],
26
+ # "end" => [Date.new(2020, 1, 2), Date.new(2020, 1, 10)]
27
+ # }
28
+ # )
29
+ # df.with_columns(
30
+ # business_day_count: Polars.business_day_count("start", "end")
31
+ # )
32
+ # # =>
33
+ # # shape: (2, 3)
34
+ # # ┌────────────┬────────────┬────────────────────┐
35
+ # # │ start ┆ end ┆ business_day_count │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ date ┆ date ┆ i32 │
38
+ # # ╞════════════╪════════════╪════════════════════╡
39
+ # # │ 2020-01-01 ┆ 2020-01-02 ┆ 1 │
40
+ # # │ 2020-01-02 ┆ 2020-01-10 ┆ 6 │
41
+ # # └────────────┴────────────┴────────────────────┘
42
+ #
43
+ # @example You can pass a custom weekend - for example, if you only take Sunday off:
44
+ # week_mask = [true, true, true, true, true, true, false]
45
+ # df.with_columns(
46
+ # business_day_count: Polars.business_day_count(
47
+ # "start", "end", week_mask: week_mask
48
+ # )
49
+ # )
50
+ # # =>
51
+ # # shape: (2, 3)
52
+ # # ┌────────────┬────────────┬────────────────────┐
53
+ # # │ start ┆ end ┆ business_day_count │
54
+ # # │ --- ┆ --- ┆ --- │
55
+ # # │ date ┆ date ┆ i32 │
56
+ # # ╞════════════╪════════════╪════════════════════╡
57
+ # # │ 2020-01-01 ┆ 2020-01-02 ┆ 1 │
58
+ # # │ 2020-01-02 ┆ 2020-01-10 ┆ 7 │
59
+ # # └────────────┴────────────┴────────────────────┘
60
+ #
61
+ # @example You can also pass a list of holidays to exclude from the count:
62
+ # holidays = [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]
63
+ # df.with_columns(
64
+ # business_day_count: Polars.business_day_count("start", "end", holidays: holidays)
65
+ # )
66
+ # # =>
67
+ # # shape: (2, 3)
68
+ # # ┌────────────┬────────────┬────────────────────┐
69
+ # # │ start ┆ end ┆ business_day_count │
70
+ # # │ --- ┆ --- ┆ --- │
71
+ # # │ date ┆ date ┆ i32 │
72
+ # # ╞════════════╪════════════╪════════════════════╡
73
+ # # │ 2020-01-01 ┆ 2020-01-02 ┆ 0 │
74
+ # # │ 2020-01-02 ┆ 2020-01-10 ┆ 5 │
75
+ # # └────────────┴────────────┴────────────────────┘
76
+ def business_day_count(
77
+ start,
78
+ stop,
79
+ week_mask: [true, true, true, true, true, false, false],
80
+ holidays: []
81
+ )
82
+ start_rbexpr = Utils.parse_into_expression(start)
83
+ end_rbexpr = Utils.parse_into_expression(stop)
84
+ unix_epoch = ::Date.new(1970, 1, 1)
85
+ Utils.wrap_expr(
86
+ Plr.business_day_count(
87
+ start_rbexpr,
88
+ end_rbexpr,
89
+ week_mask,
90
+ holidays.map { |holiday| holiday - unix_epoch }
91
+ )
92
+ )
93
+ end
94
+ end
95
+ end
@@ -823,7 +823,7 @@ module Polars
823
823
  # # ┌─────┐
824
824
  # # │ sum │
825
825
  # # │ --- │
826
- # # │ i64
826
+ # # │ i32
827
827
  # # ╞═════╡
828
828
  # # │ 10 │
829
829
  # # │ 13 │
@@ -0,0 +1,27 @@
1
+ module Polars
2
+ module IO
3
+ # Lazily read from an Apache Iceberg table.
4
+ #
5
+ # @param source [Object]
6
+ # A Iceberg Ruby table, or a direct path to the metadata.
7
+ # @param snapshot_id [Integer]
8
+ # The snapshot ID to scan from.
9
+ # @param storage_options [Hash]
10
+ # Extra options for the storage backends.
11
+ #
12
+ # @return [LazyFrame]
13
+ def scan_iceberg(
14
+ source,
15
+ snapshot_id: nil,
16
+ storage_options: nil
17
+ )
18
+ require "iceberg"
19
+
20
+ unless source.is_a?(Iceberg::Table)
21
+ raise Todo
22
+ end
23
+
24
+ source.to_polars(snapshot_id:, storage_options:)
25
+ end
26
+ end
27
+ end
@@ -117,14 +117,13 @@ module Polars
117
117
  # @param source [Object]
118
118
  # Path to a file or a file-like object.
119
119
  #
120
- # @return [Hash]
120
+ # @return [Schema]
121
121
  def read_parquet_schema(source)
122
122
  if Utils.pathlike?(source)
123
123
  source = Utils.normalize_filepath(source)
124
124
  end
125
125
 
126
- # TODO return Schema
127
- scan_parquet(source).collect_schema.to_h
126
+ scan_parquet(source).collect_schema
128
127
  end
129
128
 
130
129
  # Get file-level custom metadata of a Parquet file without reading data.
@@ -207,6 +206,9 @@ module Polars
207
206
  # defined schema are encountered in the data:
208
207
  # * `ignore`: Silently ignores.
209
208
  # * `raise`: Raises an error.
209
+ # @param cast_options [Object]
210
+ # Configuration for column type-casting during scans. Useful for datasets
211
+ # containing files that have differing schemas.
210
212
  #
211
213
  # @return [LazyFrame]
212
214
  def scan_parquet(
@@ -230,6 +232,7 @@ module Polars
230
232
  include_file_paths: nil,
231
233
  allow_missing_columns: false,
232
234
  extra_columns: "raise",
235
+ cast_options: nil,
233
236
  _column_mapping: nil,
234
237
  _deletion_files: nil
235
238
  )
@@ -268,7 +271,7 @@ module Polars
268
271
  ScanOptions.new(
269
272
  row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
270
273
  pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
271
- # cast_options: cast_options,
274
+ cast_options: cast_options,
272
275
  extra_columns: extra_columns,
273
276
  missing_columns: missing_columns,
274
277
  include_file_paths: include_file_paths,
@@ -3,7 +3,8 @@ module Polars
3
3
  class ScanOptions
4
4
  attr_reader :row_index, :pre_slice, :cast_options, :extra_columns, :missing_columns,
5
5
  :include_file_paths, :glob, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
6
- :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping, :deletion_files
6
+ :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping,
7
+ :default_values, :deletion_files
7
8
 
8
9
  def initialize(
9
10
  row_index: nil,
@@ -22,6 +23,7 @@ module Polars
22
23
  credential_provider: nil,
23
24
  retries: 2,
24
25
  column_mapping: nil,
26
+ default_values: nil,
25
27
  deletion_files: nil
26
28
  )
27
29
  @row_index = row_index
@@ -40,6 +42,7 @@ module Polars
40
42
  @credential_provider = credential_provider
41
43
  @retries = retries
42
44
  @column_mapping = column_mapping
45
+ @default_values = default_values
43
46
  @deletion_files = deletion_files
44
47
  end
45
48
  end
@@ -27,9 +27,6 @@ module Polars
27
27
  ldf
28
28
  end
29
29
 
30
- # def self.from_json
31
- # end
32
-
33
30
  # Read a logical plan from a JSON file to construct a LazyFrame.
34
31
  #
35
32
  # @param file [String]
@@ -41,7 +38,49 @@ module Polars
41
38
  file = Utils.normalize_filepath(file)
42
39
  end
43
40
 
44
- Utils.wrap_ldf(RbLazyFrame.read_json(file))
41
+ Utils.wrap_ldf(RbLazyFrame.deserialize_json(file))
42
+ end
43
+
44
+ # Read a logical plan from a file to construct a LazyFrame.
45
+ #
46
+ # @param source [Object]
47
+ # Path to a file or a file-like object (by file-like object, we refer to
48
+ # objects that have a `read` method, such as a file handler or `StringIO`).
49
+ #
50
+ # @return [LazyFrame]
51
+ #
52
+ # @note
53
+ # This function uses marshaling if the logical plan contains Ruby UDFs,
54
+ # and as such inherits the security implications. Deserializing can execute
55
+ # arbitrary code, so it should only be attempted on trusted data.
56
+ #
57
+ # @note
58
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
59
+ # in one Polars version may not be deserializable in another Polars version.
60
+ #
61
+ # @example
62
+ # lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
63
+ # bytes = lf.serialize
64
+ # Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
65
+ # # =>
66
+ # # shape: (1, 1)
67
+ # # ┌─────┐
68
+ # # │ a │
69
+ # # │ --- │
70
+ # # │ i64 │
71
+ # # ╞═════╡
72
+ # # │ 6 │
73
+ # # └─────┘
74
+ def self.deserialize(source)
75
+ raise Todo unless RbLazyFrame.respond_to?(:deserialize_binary)
76
+
77
+ if Utils.pathlike?(source)
78
+ source = Utils.normalize_filepath(source)
79
+ end
80
+
81
+ deserializer = RbLazyFrame.method(:deserialize_binary)
82
+
83
+ _from_rbldf(deserializer.(source))
45
84
  end
46
85
 
47
86
  # Get or set column names.
@@ -151,6 +190,38 @@ module Polars
151
190
  nil
152
191
  end
153
192
 
193
+ # Serialize the logical plan of this LazyFrame to a file or string.
194
+ #
195
+ # @param file [Object]
196
+ # File path to which the result should be written. If set to `nil`
197
+ # (default), the output is returned as a string instead.
198
+ #
199
+ # @return [Object]
200
+ #
201
+ # @note
202
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
203
+ # in one Polars version may not be deserializable in another Polars version.
204
+ #
205
+ # @example Serialize the logical plan into a binary representation.
206
+ # lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
207
+ # bytes = lf.serialize
208
+ # Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
209
+ # # =>
210
+ # # shape: (1, 1)
211
+ # # ┌─────┐
212
+ # # │ a │
213
+ # # │ --- │
214
+ # # │ i64 │
215
+ # # ╞═════╡
216
+ # # │ 6 │
217
+ # # └─────┘
218
+ def serialize(file = nil)
219
+ raise Todo unless _ldf.respond_to?(:serialize_binary)
220
+
221
+ serializer = _ldf.method(:serialize_binary)
222
+ Utils.serialize_polars_object(serializer, file)
223
+ end
224
+
154
225
  # Offers a structured way to apply a sequence of user-defined functions (UDFs).
155
226
  #
156
227
  # @param func [Object]
@@ -774,6 +845,21 @@ module Polars
774
845
  # @param maintain_order [Boolean]
775
846
  # Maintain the order in which data is processed.
776
847
  # Setting this to `false` will be slightly faster.
848
+ # @param storage_options [String]
849
+ # Options that indicate how to connect to a cloud provider.
850
+ #
851
+ # The cloud providers currently supported are AWS, GCP, and Azure.
852
+ # See supported keys here:
853
+ #
854
+ # * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
855
+ # * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
856
+ # * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
857
+ # * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
858
+ #
859
+ # If `storage_options` is not provided, Polars will try to infer the
860
+ # information from environment variables.
861
+ # @param retries [Integer]
862
+ # Number of retries if accessing a cloud instance fails.
777
863
  # @param type_coercion [Boolean]
778
864
  # Do type coercion optimization.
779
865
  # @param predicate_pushdown [Boolean]
@@ -806,6 +892,8 @@ module Polars
806
892
  path,
807
893
  compression: "zstd",
808
894
  maintain_order: true,
895
+ storage_options: nil,
896
+ retries: 2,
809
897
  type_coercion: true,
810
898
  predicate_pushdown: true,
811
899
  projection_pushdown: true,
@@ -816,10 +904,6 @@ module Polars
816
904
  mkdir: false,
817
905
  lazy: false
818
906
  )
819
- # TODO support storage options in Rust
820
- storage_options = nil
821
- retries = 2
822
-
823
907
  lf = _set_sink_optimizations(
824
908
  type_coercion: type_coercion,
825
909
  predicate_pushdown: predicate_pushdown,