polars-df 0.21.1-arm64-darwin → 0.22.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -925,7 +925,7 @@ module Polars
925
925
  # Convert the series of type `List` to a series of type `Struct`.
926
926
  #
927
927
  # @param n_field_strategy ["first_non_null", "max_width"]
928
- # Strategy to determine the number of fields of the struct.
928
+ # Deprecated and ignored.
929
929
  # @param fields pArray
930
930
  # If the name and number of the desired fields is known in advance
931
931
  # a list of field names can be given, which will be assigned by index.
@@ -945,20 +945,28 @@ module Polars
945
945
  # @return [Expr]
946
946
  #
947
947
  # @example
948
- # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
949
- # df.select([Polars.col("a").list.to_struct])
948
+ # df = Polars::DataFrame.new({"n" => [[0, 1], [0, 1, 2]]})
949
+ # df.with_columns(struct: Polars.col("n").list.to_struct(upper_bound: 2))
950
950
  # # =>
951
- # # shape: (2, 1)
952
- # # ┌────────────┐
953
- # # │ a
954
- # # │ ---
955
- # # │ struct[3]
956
- # # ╞════════════╡
957
- # # │ {1,2,3}
958
- # # │ {1,2,null}
959
- # # └────────────┘
951
+ # # shape: (2, 2)
952
+ # # ┌───────────┬───────────┐
953
+ # # │ n ┆ struct
954
+ # # │ --- ┆ ---
955
+ # # │ list[i64] ┆ struct[2]
956
+ # # ╞═══════════╪═══════════╡
957
+ # # │ [0, 1] ┆ {0,1}
958
+ # # │ [0, 1, 2] ┆ {0,1}
959
+ # # └───────────┴───────────┘
960
960
  def to_struct(n_field_strategy: "first_non_null", fields: nil, upper_bound: nil)
961
- Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, fields, nil))
961
+ if !fields.is_a?(::Array)
962
+ if fields.nil?
963
+ fields = upper_bound.times.map { |i| "field_#{i}" }
964
+ else
965
+ fields = upper_bound.times.map { |i| fields.(i) }
966
+ end
967
+ end
968
+
969
+ Utils.wrap_expr(_rbexpr.list_to_struct(fields))
962
970
  end
963
971
 
964
972
  # Run any polars expression against the lists' elements.
@@ -755,27 +755,39 @@ module Polars
755
755
  #
756
756
  # @param n_field_strategy ["first_non_null", "max_width"]
757
757
  # Strategy to determine the number of fields of the struct.
758
- # @param name_generator [Object]
759
- # A custom function that can be used to generate the field names.
760
- # Default field names are `field_0, field_1 .. field_n`
761
- #
762
- # @return [Series]
763
- #
764
- # @example
765
- # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
766
- # df.select([Polars.col("a").list.to_struct])
767
- # # =>
768
- # # shape: (2, 1)
769
- # # ┌────────────┐
770
- # # a │
771
- # # │ --- │
772
- # # │ struct[3]
773
- # # ╞════════════╡
774
- # # │ {1,2,3}
775
- # # │ {1,2,null} │
776
- # # └────────────┘
777
- def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
778
- super
758
+ # @param fields [Object]
759
+ # If the name and number of the desired fields is known in advance
760
+ # a list of field names can be given, which will be assigned by index.
761
+ # Otherwise, to dynamically assign field names, a custom function can be
762
+ # used; if neither are set, fields will be `field_0, field_1 .. field_n`.
763
+ #
764
+ # @return [Series]
765
+ #
766
+ # @example Convert list to struct with field name assignment by index from a list of names:
767
+ # s1 = Polars::Series.new("n", [[0, 1, 2], [0, 1]])
768
+ # s1.list.to_struct(fields: ["one", "two", "three"]).struct.unnest
769
+ # # =>
770
+ # # shape: (2, 3)
771
+ # # ┌─────┬─────┬───────┐
772
+ # # │ one ┆ two ┆ three
773
+ # # │ --- ┆ --- ┆ --- │
774
+ # # │ i64 ┆ i64 ┆ i64
775
+ # # ╞═════╪═════╪═══════╡
776
+ # # │ 0 ┆ 1 ┆ 2 │
777
+ # # 0 ┆ 1 ┆ null │
778
+ # # └─────┴─────┴───────┘
779
+ def to_struct(n_field_strategy: "first_non_null", fields: nil)
780
+ if fields.is_a?(::Array)
781
+ s = Utils.wrap_s(_s)
782
+ return (
783
+ s.to_frame
784
+ .select_seq(F.col(s.name).list.to_struct(fields: fields))
785
+ .to_series
786
+ )
787
+ end
788
+
789
+ raise Todo
790
+ # Utils.wrap_s(_s.list_to_struct(n_field_strategy, fields))
779
791
  end
780
792
 
781
793
  # Run any polars expression against the lists' elements.
@@ -248,6 +248,31 @@ module Polars
248
248
  Selector._from_rbselector(_rbexpr.into_selector)
249
249
  end
250
250
 
251
+ # Serialize this expression to a file or string.
252
+ #
253
+ # @param file [Object]
254
+ # File path to which the result should be written. If set to `nil`
255
+ # (default), the output is returned as a string instead.
256
+ #
257
+ # @return [Object]
258
+ #
259
+ # @note
260
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
261
+ # in one Polars version may not be deserializable in another Polars version.
262
+ #
263
+ # @example Serialize the expression into a binary representation.
264
+ # expr = Polars.col("foo").sum.over("bar")
265
+ # bytes = expr.meta.serialize
266
+ # Polars::Expr.deserialize(StringIO.new(bytes))
267
+ # # => col("foo").sum().over([col("bar")])
268
+ def serialize(file = nil)
269
+ raise Todo unless _rbexpr.respond_to?(:serialize_binary)
270
+
271
+ serializer = _rbexpr.method(:serialize_binary)
272
+
273
+ Utils.serialize_polars_object(serializer, file)
274
+ end
275
+
251
276
  # Format the expression as a tree.
252
277
  #
253
278
  # @param return_as_string [Boolean]
@@ -0,0 +1,50 @@
1
+ module Polars
2
+ # The set of the optimizations considered during query optimization.
3
+ #
4
+ # @note
5
+ # This functionality is considered **unstable**. It may be changed
6
+ # at any point without it being considered a breaking change.
7
+ class QueryOptFlags
8
+ def initialize(
9
+ predicate_pushdown: nil,
10
+ projection_pushdown: nil,
11
+ simplify_expression: nil,
12
+ slice_pushdown: nil,
13
+ comm_subplan_elim: nil,
14
+ comm_subexpr_elim: nil,
15
+ cluster_with_columns: nil,
16
+ collapse_joins: nil,
17
+ check_order_observe: nil,
18
+ fast_projection: nil
19
+ )
20
+ @_rboptflags = RbOptFlags.default
21
+ update(
22
+ predicate_pushdown: predicate_pushdown,
23
+ projection_pushdown: projection_pushdown,
24
+ simplify_expression: simplify_expression,
25
+ slice_pushdown: slice_pushdown,
26
+ comm_subplan_elim: comm_subplan_elim,
27
+ comm_subexpr_elim: comm_subexpr_elim,
28
+ cluster_with_columns: cluster_with_columns,
29
+ collapse_joins: collapse_joins,
30
+ check_order_observe: check_order_observe,
31
+ fast_projection: fast_projection
32
+ )
33
+ end
34
+
35
+ def update(
36
+ predicate_pushdown: nil,
37
+ projection_pushdown: nil,
38
+ simplify_expression: nil,
39
+ slice_pushdown: nil,
40
+ comm_subplan_elim: nil,
41
+ comm_subexpr_elim: nil,
42
+ cluster_with_columns: nil,
43
+ collapse_joins: nil,
44
+ check_order_observe: nil,
45
+ fast_projection: nil
46
+ )
47
+ raise Todo
48
+ end
49
+ end
50
+ end
@@ -1,6 +1,8 @@
1
1
  module Polars
2
2
  # Options for scanning files.
3
3
  class ScanCastOptions
4
+ attr_reader :integer_cast, :float_cast, :datetime_cast, :missing_struct_fields, :extra_struct_fields
5
+
4
6
  # Common configuration for scanning files.
5
7
  #
6
8
  # @note
@@ -50,6 +52,10 @@ module Polars
50
52
  extra_struct_fields: "raise",
51
53
  _internal_call: false
52
54
  )
55
+ if !_internal_call
56
+ warn "ScanCastOptions is considered unstable."
57
+ end
58
+
53
59
  @integer_cast = integer_cast
54
60
  @float_cast = float_cast
55
61
  @datetime_cast = datetime_cast
@@ -57,8 +63,21 @@ module Polars
57
63
  @extra_struct_fields = extra_struct_fields
58
64
  end
59
65
 
60
- def self.default
66
+ def self._default
61
67
  new(_internal_call: true)
62
68
  end
69
+
70
+ def self._default_iceberg
71
+ @_default_cast_options_iceberg ||= begin
72
+ ScanCastOptions.new(
73
+ integer_cast: "upcast",
74
+ float_cast: ["upcast", "downcast"],
75
+ datetime_cast: ["nanosecond-downcast", "convert-timezone"],
76
+ missing_struct_fields: "insert",
77
+ extra_struct_fields: "ignore",
78
+ _internal_call: true
79
+ )
80
+ end
81
+ end
63
82
  end
64
83
  end
data/lib/polars/schema.rb CHANGED
@@ -30,7 +30,7 @@ module Polars
30
30
  #
31
31
  # @return [Object]
32
32
  def []=(name, dtype)
33
- # TODO check dtype
33
+ _check_dtype(dtype)
34
34
  @schema[name] = dtype
35
35
  end
36
36
 
data/lib/polars/series.rb CHANGED
@@ -5307,7 +5307,7 @@ module Polars
5307
5307
  # # 6
5308
5308
  # # ]
5309
5309
  def shrink_dtype
5310
- super
5310
+ Utils.wrap_s(_s.shrink_dtype)
5311
5311
  end
5312
5312
 
5313
5313
  # Get the chunks of this Series as a list of Series.
@@ -5851,6 +5851,7 @@ module Polars
5851
5851
  Int16 => RbSeries.method(:new_opt_i16),
5852
5852
  Int32 => RbSeries.method(:new_opt_i32),
5853
5853
  Int64 => RbSeries.method(:new_opt_i64),
5854
+ Int128 => RbSeries.method(:new_opt_i128),
5854
5855
  UInt8 => RbSeries.method(:new_opt_u8),
5855
5856
  UInt16 => RbSeries.method(:new_opt_u16),
5856
5857
  UInt32 => RbSeries.method(:new_opt_u32),
@@ -5876,6 +5877,7 @@ module Polars
5876
5877
  i16: RbSeries.method(:new_opt_i16),
5877
5878
  i32: RbSeries.method(:new_opt_i32),
5878
5879
  i64: RbSeries.method(:new_opt_i64),
5880
+ i128: RbSeries.method(:new_opt_i128),
5879
5881
  u8: RbSeries.method(:new_opt_u8),
5880
5882
  u16: RbSeries.method(:new_opt_u16),
5881
5883
  u32: RbSeries.method(:new_opt_u32),
@@ -222,10 +222,8 @@ module Polars
222
222
 
223
223
  # Convert a String column into a Decimal column.
224
224
  #
225
- # This method infers the needed parameters `precision` and `scale`.
226
- #
227
- # @param inference_length [Integer]
228
- # Number of elements to parse to determine the `precision` and `scale`.
225
+ # @param scale [Integer]
226
+ # Number of digits after the comma to use for the decimals.
229
227
  #
230
228
  # @return [Expr]
231
229
  #
@@ -243,7 +241,7 @@ module Polars
243
241
  # ]
244
242
  # }
245
243
  # )
246
- # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal)
244
+ # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal(scale: 2))
247
245
  # # =>
248
246
  # # shape: (7, 2)
249
247
  # # ┌───────────┬─────────────────┐
@@ -259,8 +257,8 @@ module Polars
259
257
  # # │ 143.09 ┆ 143.09 │
260
258
  # # │ 143.9 ┆ 143.90 │
261
259
  # # └───────────┴─────────────────┘
262
- def to_decimal(inference_length = 100)
263
- Utils.wrap_expr(_rbexpr.str_to_decimal(inference_length))
260
+ def to_decimal(scale:)
261
+ Utils.wrap_expr(_rbexpr.str_to_decimal(scale))
264
262
  end
265
263
 
266
264
  # Get length of the strings as `:u32` (as number of bytes).
@@ -774,7 +772,7 @@ module Polars
774
772
 
775
773
  # Return the bytes offset of the first substring matching a pattern.
776
774
  #
777
- # If the pattern is not found, returns None.
775
+ # If the pattern is not found, returns nil.
778
776
  #
779
777
  # @param pattern [String]
780
778
  # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
@@ -919,11 +917,9 @@ module Polars
919
917
  # Throw errors if encounter invalid JSON strings.
920
918
  #
921
919
  # @param dtype [Object]
922
- # The dtype to cast the extracted value to. If nil, the dtype will be
923
- # inferred from the JSON value.
920
+ # The dtype to cast the extracted value to.
924
921
  # @param infer_schema_length [Integer]
925
- # The maximum number of rows to scan for schema inference.
926
- # If set to `nil`, the full data may be scanned *(this is slow)*.
922
+ # Deprecated and ignored.
927
923
  #
928
924
  # @return [Expr]
929
925
  #
@@ -932,23 +928,26 @@ module Polars
932
928
  # {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
933
929
  # )
934
930
  # dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
935
- # df.select(Polars.col("json").str.json_decode(dtype))
931
+ # df.with_columns(decoded: Polars.col("json").str.json_decode(dtype))
936
932
  # # =>
937
- # # shape: (3, 1)
938
- # # ┌───────────┐
939
- # # │ json
940
- # # │ --- │
941
- # # │ struct[2] │
942
- # # ╞═══════════╡
943
- # # │ {1,true} │
944
- # # │ null │
945
- # # │ {2,false} │
946
- # # └───────────┘
947
- def json_decode(dtype = nil, infer_schema_length: 100)
948
- if !dtype.nil?
949
- dtype = Utils.rb_type_to_dtype(dtype)
933
+ # # shape: (3, 2)
934
+ # # ┌─────────────────────┬───────────┐
935
+ # # │ json ┆ decoded
936
+ # # │ --- ┆ ---
937
+ # # │ str ┆ struct[2] │
938
+ # # ╞═════════════════════╪═══════════╡
939
+ # # │ {"a":1, "b": true} ┆ {1,true}
940
+ # # │ null ┆ null
941
+ # # │ {"a":2, "b": false} ┆ {2,false}
942
+ # # └─────────────────────┴───────────┘
943
+ def json_decode(dtype, infer_schema_length: nil)
944
+ if dtype.nil?
945
+ msg = "`Expr.str.json_decode` needs an explicitly given `dtype` otherwise Polars is not able to determine the output type. If you want to eagerly infer datatype you can use `Series.str.json_decode`."
946
+ raise TypeError, msg
950
947
  end
951
- Utils.wrap_expr(_rbexpr.str_json_decode(dtype, infer_schema_length))
948
+
949
+ dtype_expr = Utils.parse_into_datatype_expr(dtype)._rbdatatype_expr
950
+ Utils.wrap_expr(_rbexpr.str_json_decode(dtype_expr))
952
951
  end
953
952
  alias_method :json_extract, :json_decode
954
953
 
@@ -210,8 +210,12 @@ module Polars
210
210
  # # 143.09
211
211
  # # 143.90
212
212
  # # ]
213
- def to_decimal(inference_length = 100)
214
- super
213
+ def to_decimal(inference_length = 100, scale: nil)
214
+ if !scale.nil?
215
+ raise Todo
216
+ end
217
+
218
+ Utils.wrap_s(_s.str_to_decimal_infer(inference_length))
215
219
  end
216
220
 
217
221
  # Return the length of each string as the number of bytes.
@@ -450,7 +454,7 @@ module Polars
450
454
  # Throws an error if invalid JSON strings are encountered.
451
455
  #
452
456
  # @param dtype [Object]
453
- # The dtype to cast the extracted value to. If None, the dtype will be
457
+ # The dtype to cast the extracted value to. If nil, the dtype will be
454
458
  # inferred from the JSON value.
455
459
  # @param infer_schema_length [Integer]
456
460
  # The maximum number of rows to scan for schema inference.
@@ -470,7 +474,16 @@ module Polars
470
474
  # # {2,false}
471
475
  # # ]
472
476
  def json_decode(dtype = nil, infer_schema_length: 100)
473
- super
477
+ if !dtype.nil?
478
+ s = Utils.wrap_s(_s)
479
+ return (
480
+ s.to_frame
481
+ .select_seq(F.col(s.name).str.json_decode(dtype))
482
+ .to_series
483
+ )
484
+ end
485
+
486
+ Utils.wrap_s(_s.str_json_decode(infer_schema_length))
474
487
  end
475
488
 
476
489
  # Extract the first match of json string with provided JSONPath expression.
@@ -0,0 +1,17 @@
1
+ module Polars
2
+ module Utils
3
+ def self.serialize_polars_object(serializer, file)
4
+ serialize_to_bytes = lambda do
5
+ buf = StringIO.new
6
+ serializer.(buf)
7
+ buf.string
8
+ end
9
+
10
+ if file.nil?
11
+ return serialize_to_bytes.call
12
+ end
13
+
14
+ raise Todo
15
+ end
16
+ end
17
+ end
@@ -75,6 +75,10 @@ module Polars
75
75
  end
76
76
  end
77
77
 
78
+ def self._polars_warn(msg)
79
+ warn msg
80
+ end
81
+
78
82
  def self.extend_bool(value, n_match, value_name, match_name)
79
83
  values = bool?(value) ? [value] * n_match : value
80
84
  if n_match != values.length
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.21.1"
3
+ VERSION = "0.22.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -37,6 +37,7 @@ require_relative "polars/dynamic_group_by"
37
37
  require_relative "polars/exceptions"
38
38
  require_relative "polars/expr"
39
39
  require_relative "polars/functions/as_datatype"
40
+ require_relative "polars/functions/business"
40
41
  require_relative "polars/functions/col"
41
42
  require_relative "polars/functions/datatype"
42
43
  require_relative "polars/functions/eager"
@@ -57,6 +58,7 @@ require_relative "polars/io/avro"
57
58
  require_relative "polars/io/csv"
58
59
  require_relative "polars/io/database"
59
60
  require_relative "polars/io/delta"
61
+ require_relative "polars/io/iceberg"
60
62
  require_relative "polars/io/ipc"
61
63
  require_relative "polars/io/json"
62
64
  require_relative "polars/io/ndjson"
@@ -68,6 +70,7 @@ require_relative "polars/list_expr"
68
70
  require_relative "polars/list_name_space"
69
71
  require_relative "polars/meta_expr"
70
72
  require_relative "polars/name_expr"
73
+ require_relative "polars/query_opt_flags"
71
74
  require_relative "polars/rolling_group_by"
72
75
  require_relative "polars/scan_cast_options"
73
76
  require_relative "polars/schema"
@@ -86,6 +89,7 @@ require_relative "polars/utils"
86
89
  require_relative "polars/utils/constants"
87
90
  require_relative "polars/utils/convert"
88
91
  require_relative "polars/utils/parse"
92
+ require_relative "polars/utils/serde"
89
93
  require_relative "polars/utils/various"
90
94
  require_relative "polars/utils/wrap"
91
95
  require_relative "polars/version"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.21.1
4
+ version: 0.22.0
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-08-19 00:00:00.000000000 Z
11
+ date: 2025-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -69,6 +69,7 @@ files:
69
69
  - lib/polars/functions/aggregation/horizontal.rb
70
70
  - lib/polars/functions/aggregation/vertical.rb
71
71
  - lib/polars/functions/as_datatype.rb
72
+ - lib/polars/functions/business.rb
72
73
  - lib/polars/functions/col.rb
73
74
  - lib/polars/functions/datatype.rb
74
75
  - lib/polars/functions/eager.rb
@@ -87,6 +88,7 @@ files:
87
88
  - lib/polars/io/csv.rb
88
89
  - lib/polars/io/database.rb
89
90
  - lib/polars/io/delta.rb
91
+ - lib/polars/io/iceberg.rb
90
92
  - lib/polars/io/ipc.rb
91
93
  - lib/polars/io/json.rb
92
94
  - lib/polars/io/ndjson.rb
@@ -99,6 +101,7 @@ files:
99
101
  - lib/polars/meta_expr.rb
100
102
  - lib/polars/name_expr.rb
101
103
  - lib/polars/plot.rb
104
+ - lib/polars/query_opt_flags.rb
102
105
  - lib/polars/rolling_group_by.rb
103
106
  - lib/polars/scan_cast_options.rb
104
107
  - lib/polars/schema.rb
@@ -117,6 +120,7 @@ files:
117
120
  - lib/polars/utils/constants.rb
118
121
  - lib/polars/utils/convert.rb
119
122
  - lib/polars/utils/parse.rb
123
+ - lib/polars/utils/serde.rb
120
124
  - lib/polars/utils/various.rb
121
125
  - lib/polars/utils/wrap.rb
122
126
  - lib/polars/version.rb