polars-df 0.21.1-x86_64-linux-musl → 0.23.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ module Polars
2
+ # The set of the optimizations considered during query optimization.
3
+ #
4
+ # @note
5
+ # This functionality is considered **unstable**. It may be changed
6
+ # at any point without it being considered a breaking change.
7
+ class QueryOptFlags
8
+ def initialize(
9
+ predicate_pushdown: nil,
10
+ projection_pushdown: nil,
11
+ simplify_expression: nil,
12
+ slice_pushdown: nil,
13
+ comm_subplan_elim: nil,
14
+ comm_subexpr_elim: nil,
15
+ cluster_with_columns: nil,
16
+ collapse_joins: nil,
17
+ check_order_observe: nil,
18
+ fast_projection: nil
19
+ )
20
+ @_rboptflags = RbOptFlags.default
21
+ update(
22
+ predicate_pushdown: predicate_pushdown,
23
+ projection_pushdown: projection_pushdown,
24
+ simplify_expression: simplify_expression,
25
+ slice_pushdown: slice_pushdown,
26
+ comm_subplan_elim: comm_subplan_elim,
27
+ comm_subexpr_elim: comm_subexpr_elim,
28
+ cluster_with_columns: cluster_with_columns,
29
+ collapse_joins: collapse_joins,
30
+ check_order_observe: check_order_observe,
31
+ fast_projection: fast_projection
32
+ )
33
+ end
34
+
35
+ def update(
36
+ predicate_pushdown: nil,
37
+ projection_pushdown: nil,
38
+ simplify_expression: nil,
39
+ slice_pushdown: nil,
40
+ comm_subplan_elim: nil,
41
+ comm_subexpr_elim: nil,
42
+ cluster_with_columns: nil,
43
+ collapse_joins: nil,
44
+ check_order_observe: nil,
45
+ fast_projection: nil
46
+ )
47
+ raise Todo
48
+ end
49
+ end
50
+ end
@@ -1,6 +1,8 @@
1
1
  module Polars
2
2
  # Options for scanning files.
3
3
  class ScanCastOptions
4
+ attr_reader :integer_cast, :float_cast, :datetime_cast, :missing_struct_fields, :extra_struct_fields, :categorical_to_string
5
+
4
6
  # Common configuration for scanning files.
5
7
  #
6
8
  # @note
@@ -48,17 +50,37 @@ module Polars
48
50
  datetime_cast: "forbid",
49
51
  missing_struct_fields: "raise",
50
52
  extra_struct_fields: "raise",
53
+ categorical_to_string: "forbid",
51
54
  _internal_call: false
52
55
  )
56
+ if !_internal_call
57
+ warn "ScanCastOptions is considered unstable."
58
+ end
59
+
53
60
  @integer_cast = integer_cast
54
61
  @float_cast = float_cast
55
62
  @datetime_cast = datetime_cast
56
63
  @missing_struct_fields = missing_struct_fields
57
64
  @extra_struct_fields = extra_struct_fields
65
+ @categorical_to_string = categorical_to_string
58
66
  end
59
67
 
60
- def self.default
68
+ def self._default
61
69
  new(_internal_call: true)
62
70
  end
71
+
72
+ def self._default_iceberg
73
+ @_default_cast_options_iceberg ||= begin
74
+ ScanCastOptions.new(
75
+ integer_cast: "upcast",
76
+ float_cast: ["upcast", "downcast"],
77
+ datetime_cast: ["nanosecond-downcast", "convert-timezone"],
78
+ missing_struct_fields: "insert",
79
+ extra_struct_fields: "ignore",
80
+ categorical_to_string: "allow",
81
+ _internal_call: true
82
+ )
83
+ end
84
+ end
63
85
  end
64
86
  end
data/lib/polars/schema.rb CHANGED
@@ -30,7 +30,7 @@ module Polars
30
30
  #
31
31
  # @return [Object]
32
32
  def []=(name, dtype)
33
- # TODO check dtype
33
+ _check_dtype(dtype)
34
34
  @schema[name] = dtype
35
35
  end
36
36
 
@@ -1063,14 +1063,14 @@ module Polars
1063
1063
  # df.select(Polars.cs.decimal)
1064
1064
  # # =>
1065
1065
  # # shape: (2, 2)
1066
- # # ┌──────────────┬───────────────┐
1067
- # # │ bar ┆ baz │
1068
- # # │ --- ┆ --- │
1069
- # # │ decimal[*,0] ┆ decimal[10,5] │
1070
- # # ╞══════════════╪═══════════════╡
1071
- # # │ 123 ┆ 2.00050 │
1072
- # # │ 456 ┆ -50.55550 │
1073
- # # └──────────────┴───────────────┘
1066
+ # # ┌───────────────┬───────────────┐
1067
+ # # │ bar ┆ baz │
1068
+ # # │ --- ┆ --- │
1069
+ # # │ decimal[38,0] ┆ decimal[10,5] │
1070
+ # # ╞═══════════════╪═══════════════╡
1071
+ # # │ 123 ┆ 2.00050 │
1072
+ # # │ 456 ┆ -50.55550 │
1073
+ # # └───────────────┴───────────────┘
1074
1074
  #
1075
1075
  # @example Select all columns *except* the decimal ones:
1076
1076
  #
data/lib/polars/series.rb CHANGED
@@ -5307,7 +5307,7 @@ module Polars
5307
5307
  # # 6
5308
5308
  # # ]
5309
5309
  def shrink_dtype
5310
- super
5310
+ Utils.wrap_s(_s.shrink_dtype)
5311
5311
  end
5312
5312
 
5313
5313
  # Get the chunks of this Series as a list of Series.
@@ -5747,11 +5747,31 @@ module Polars
5747
5747
  end
5748
5748
 
5749
5749
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
5750
- if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum, Decimal].include?(base_type)
5750
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type) || dtype.is_a?(Decimal)
5751
5751
  if rbseries.dtype != dtype
5752
5752
  rbseries = rbseries.cast(dtype, true)
5753
5753
  end
5754
5754
  end
5755
+
5756
+ # Uninstanced Decimal is a bit special and has various inference paths
5757
+ if dtype == Decimal
5758
+ if rbseries.dtype == String
5759
+ rbseries = rbseries.str_to_decimal_infer(0)
5760
+ elsif rbseries.dtype.float?
5761
+ # Go through string so we infer an appropriate scale.
5762
+ rbseries = rbseries.cast(
5763
+ String, strict: strict, wrap_numerical: false
5764
+ ).str_to_decimal_infer(0)
5765
+ elsif rbseries.dtype.integer? || rbseries.dtype == Null
5766
+ rbseries = rbseries.cast(
5767
+ Decimal.new(nil, 0), strict: strict, wrap_numerical: false
5768
+ )
5769
+ elsif !rbseries.dtype.is_a?(Decimal)
5770
+ msg = "can't convert #{rbseries.dtype} to Decimal"
5771
+ raise TypeError, msg
5772
+ end
5773
+ end
5774
+
5755
5775
  rbseries
5756
5776
  elsif dtype == Struct
5757
5777
  struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
@@ -5851,10 +5871,12 @@ module Polars
5851
5871
  Int16 => RbSeries.method(:new_opt_i16),
5852
5872
  Int32 => RbSeries.method(:new_opt_i32),
5853
5873
  Int64 => RbSeries.method(:new_opt_i64),
5874
+ Int128 => RbSeries.method(:new_opt_i128),
5854
5875
  UInt8 => RbSeries.method(:new_opt_u8),
5855
5876
  UInt16 => RbSeries.method(:new_opt_u16),
5856
5877
  UInt32 => RbSeries.method(:new_opt_u32),
5857
5878
  UInt64 => RbSeries.method(:new_opt_u64),
5879
+ UInt128 => RbSeries.method(:new_opt_u128),
5858
5880
  Decimal => RbSeries.method(:new_decimal),
5859
5881
  Date => RbSeries.method(:new_from_any_values),
5860
5882
  Datetime => RbSeries.method(:new_from_any_values),
@@ -5876,10 +5898,12 @@ module Polars
5876
5898
  i16: RbSeries.method(:new_opt_i16),
5877
5899
  i32: RbSeries.method(:new_opt_i32),
5878
5900
  i64: RbSeries.method(:new_opt_i64),
5901
+ i128: RbSeries.method(:new_opt_i128),
5879
5902
  u8: RbSeries.method(:new_opt_u8),
5880
5903
  u16: RbSeries.method(:new_opt_u16),
5881
5904
  u32: RbSeries.method(:new_opt_u32),
5882
5905
  u64: RbSeries.method(:new_opt_u64),
5906
+ u128: RbSeries.method(:new_opt_u128),
5883
5907
  bool: RbSeries.method(:new_opt_bool),
5884
5908
  str: RbSeries.method(:new_str)
5885
5909
  }
@@ -222,10 +222,8 @@ module Polars
222
222
 
223
223
  # Convert a String column into a Decimal column.
224
224
  #
225
- # This method infers the needed parameters `precision` and `scale`.
226
- #
227
- # @param inference_length [Integer]
228
- # Number of elements to parse to determine the `precision` and `scale`.
225
+ # @param scale [Integer]
226
+ # Number of digits after the comma to use for the decimals.
229
227
  #
230
228
  # @return [Expr]
231
229
  #
@@ -243,13 +241,13 @@ module Polars
243
241
  # ]
244
242
  # }
245
243
  # )
246
- # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal)
244
+ # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal(scale: 2))
247
245
  # # =>
248
246
  # # shape: (7, 2)
249
247
  # # ┌───────────┬─────────────────┐
250
248
  # # │ numbers ┆ numbers_decimal │
251
249
  # # │ --- ┆ --- │
252
- # # │ str ┆ decimal[*,2]
250
+ # # │ str ┆ decimal[38,2]
253
251
  # # ╞═══════════╪═════════════════╡
254
252
  # # │ 40.12 ┆ 40.12 │
255
253
  # # │ 3420.13 ┆ 3420.13 │
@@ -259,8 +257,8 @@ module Polars
259
257
  # # │ 143.09 ┆ 143.09 │
260
258
  # # │ 143.9 ┆ 143.90 │
261
259
  # # └───────────┴─────────────────┘
262
- def to_decimal(inference_length = 100)
263
- Utils.wrap_expr(_rbexpr.str_to_decimal(inference_length))
260
+ def to_decimal(scale:)
261
+ Utils.wrap_expr(_rbexpr.str_to_decimal(scale))
264
262
  end
265
263
 
266
264
  # Get length of the strings as `:u32` (as number of bytes).
@@ -774,7 +772,7 @@ module Polars
774
772
 
775
773
  # Return the bytes offset of the first substring matching a pattern.
776
774
  #
777
- # If the pattern is not found, returns None.
775
+ # If the pattern is not found, returns nil.
778
776
  #
779
777
  # @param pattern [String]
780
778
  # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
@@ -919,11 +917,9 @@ module Polars
919
917
  # Throw errors if encounter invalid JSON strings.
920
918
  #
921
919
  # @param dtype [Object]
922
- # The dtype to cast the extracted value to. If nil, the dtype will be
923
- # inferred from the JSON value.
920
+ # The dtype to cast the extracted value to.
924
921
  # @param infer_schema_length [Integer]
925
- # The maximum number of rows to scan for schema inference.
926
- # If set to `nil`, the full data may be scanned *(this is slow)*.
922
+ # Deprecated and ignored.
927
923
  #
928
924
  # @return [Expr]
929
925
  #
@@ -932,23 +928,26 @@ module Polars
932
928
  # {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
933
929
  # )
934
930
  # dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
935
- # df.select(Polars.col("json").str.json_decode(dtype))
931
+ # df.with_columns(decoded: Polars.col("json").str.json_decode(dtype))
936
932
  # # =>
937
- # # shape: (3, 1)
938
- # # ┌───────────┐
939
- # # │ json
940
- # # │ --- │
941
- # # │ struct[2] │
942
- # # ╞═══════════╡
943
- # # │ {1,true} │
944
- # # │ null │
945
- # # │ {2,false} │
946
- # # └───────────┘
947
- def json_decode(dtype = nil, infer_schema_length: 100)
948
- if !dtype.nil?
949
- dtype = Utils.rb_type_to_dtype(dtype)
933
+ # # shape: (3, 2)
934
+ # # ┌─────────────────────┬───────────┐
935
+ # # │ json ┆ decoded
936
+ # # │ --- ┆ ---
937
+ # # │ str ┆ struct[2] │
938
+ # # ╞═════════════════════╪═══════════╡
939
+ # # │ {"a":1, "b": true} ┆ {1,true}
940
+ # # │ null ┆ null
941
+ # # │ {"a":2, "b": false} ┆ {2,false}
942
+ # # └─────────────────────┴───────────┘
943
+ def json_decode(dtype, infer_schema_length: nil)
944
+ if dtype.nil?
945
+ msg = "`Expr.str.json_decode` needs an explicitly given `dtype` otherwise Polars is not able to determine the output type. If you want to eagerly infer datatype you can use `Series.str.json_decode`."
946
+ raise TypeError, msg
950
947
  end
951
- Utils.wrap_expr(_rbexpr.str_json_decode(dtype, infer_schema_length))
948
+
949
+ dtype_expr = Utils.parse_into_datatype_expr(dtype)._rbdatatype_expr
950
+ Utils.wrap_expr(_rbexpr.str_json_decode(dtype_expr))
952
951
  end
953
952
  alias_method :json_extract, :json_decode
954
953
 
@@ -200,7 +200,7 @@ module Polars
200
200
  # s.str.to_decimal
201
201
  # # =>
202
202
  # # shape: (7,)
203
- # # Series: '' [decimal[*,2]]
203
+ # # Series: '' [decimal[8,2]]
204
204
  # # [
205
205
  # # 40.12
206
206
  # # 3420.13
@@ -210,8 +210,12 @@ module Polars
210
210
  # # 143.09
211
211
  # # 143.90
212
212
  # # ]
213
- def to_decimal(inference_length = 100)
214
- super
213
+ def to_decimal(inference_length = 100, scale: nil)
214
+ if !scale.nil?
215
+ raise Todo
216
+ end
217
+
218
+ Utils.wrap_s(_s.str_to_decimal_infer(inference_length))
215
219
  end
216
220
 
217
221
  # Return the length of each string as the number of bytes.
@@ -450,7 +454,7 @@ module Polars
450
454
  # Throws an error if invalid JSON strings are encountered.
451
455
  #
452
456
  # @param dtype [Object]
453
- # The dtype to cast the extracted value to. If None, the dtype will be
457
+ # The dtype to cast the extracted value to. If nil, the dtype will be
454
458
  # inferred from the JSON value.
455
459
  # @param infer_schema_length [Integer]
456
460
  # The maximum number of rows to scan for schema inference.
@@ -470,7 +474,16 @@ module Polars
470
474
  # # {2,false}
471
475
  # # ]
472
476
  def json_decode(dtype = nil, infer_schema_length: 100)
473
- super
477
+ if !dtype.nil?
478
+ s = Utils.wrap_s(_s)
479
+ return (
480
+ s.to_frame
481
+ .select_seq(F.col(s.name).str.json_decode(dtype))
482
+ .to_series
483
+ )
484
+ end
485
+
486
+ Utils.wrap_s(_s.str_json_decode(infer_schema_length))
474
487
  end
475
488
 
476
489
  # Extract the first match of json string with provided JSONPath expression.
@@ -90,8 +90,8 @@ module Polars
90
90
  end
91
91
  end
92
92
 
93
- def self._to_ruby_decimal(digits, scale)
94
- BigDecimal("#{digits}e#{scale}")
93
+ def self._to_ruby_decimal(prec, value)
94
+ BigDecimal(value, prec)
95
95
  end
96
96
  end
97
97
  end
@@ -0,0 +1,17 @@
1
+ module Polars
2
+ module Utils
3
+ def self.serialize_polars_object(serializer, file)
4
+ serialize_to_bytes = lambda do
5
+ buf = StringIO.new
6
+ serializer.(buf)
7
+ buf.string
8
+ end
9
+
10
+ if file.nil?
11
+ return serialize_to_bytes.call
12
+ end
13
+
14
+ raise Todo
15
+ end
16
+ end
17
+ end
@@ -75,6 +75,10 @@ module Polars
75
75
  end
76
76
  end
77
77
 
78
+ def self._polars_warn(msg)
79
+ warn msg
80
+ end
81
+
78
82
  def self.extend_bool(value, n_match, value_name, match_name)
79
83
  values = bool?(value) ? [value] * n_match : value
80
84
  if n_match != values.length
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.21.1"
3
+ VERSION = "0.23.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -37,6 +37,7 @@ require_relative "polars/dynamic_group_by"
37
37
  require_relative "polars/exceptions"
38
38
  require_relative "polars/expr"
39
39
  require_relative "polars/functions/as_datatype"
40
+ require_relative "polars/functions/business"
40
41
  require_relative "polars/functions/col"
41
42
  require_relative "polars/functions/datatype"
42
43
  require_relative "polars/functions/eager"
@@ -53,21 +54,25 @@ require_relative "polars/functions/range/datetime_range"
53
54
  require_relative "polars/functions/range/int_range"
54
55
  require_relative "polars/functions/range/time_range"
55
56
  require_relative "polars/group_by"
57
+ require_relative "polars/iceberg_dataset"
56
58
  require_relative "polars/io/avro"
57
59
  require_relative "polars/io/csv"
58
60
  require_relative "polars/io/database"
59
61
  require_relative "polars/io/delta"
62
+ require_relative "polars/io/iceberg"
60
63
  require_relative "polars/io/ipc"
61
64
  require_relative "polars/io/json"
62
65
  require_relative "polars/io/ndjson"
63
66
  require_relative "polars/io/parquet"
64
67
  require_relative "polars/io/scan_options"
68
+ require_relative "polars/io/utils"
65
69
  require_relative "polars/lazy_frame"
66
70
  require_relative "polars/lazy_group_by"
67
71
  require_relative "polars/list_expr"
68
72
  require_relative "polars/list_name_space"
69
73
  require_relative "polars/meta_expr"
70
74
  require_relative "polars/name_expr"
75
+ require_relative "polars/query_opt_flags"
71
76
  require_relative "polars/rolling_group_by"
72
77
  require_relative "polars/scan_cast_options"
73
78
  require_relative "polars/schema"
@@ -86,6 +91,7 @@ require_relative "polars/utils"
86
91
  require_relative "polars/utils/constants"
87
92
  require_relative "polars/utils/convert"
88
93
  require_relative "polars/utils/parse"
94
+ require_relative "polars/utils/serde"
89
95
  require_relative "polars/utils/various"
90
96
  require_relative "polars/utils/wrap"
91
97
  require_relative "polars/version"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.21.1
4
+ version: 0.23.0
5
5
  platform: x86_64-linux-musl
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-08-19 00:00:00.000000000 Z
11
+ date: 2025-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -69,6 +69,7 @@ files:
69
69
  - lib/polars/functions/aggregation/horizontal.rb
70
70
  - lib/polars/functions/aggregation/vertical.rb
71
71
  - lib/polars/functions/as_datatype.rb
72
+ - lib/polars/functions/business.rb
72
73
  - lib/polars/functions/col.rb
73
74
  - lib/polars/functions/datatype.rb
74
75
  - lib/polars/functions/eager.rb
@@ -83,15 +84,18 @@ files:
83
84
  - lib/polars/functions/repeat.rb
84
85
  - lib/polars/functions/whenthen.rb
85
86
  - lib/polars/group_by.rb
87
+ - lib/polars/iceberg_dataset.rb
86
88
  - lib/polars/io/avro.rb
87
89
  - lib/polars/io/csv.rb
88
90
  - lib/polars/io/database.rb
89
91
  - lib/polars/io/delta.rb
92
+ - lib/polars/io/iceberg.rb
90
93
  - lib/polars/io/ipc.rb
91
94
  - lib/polars/io/json.rb
92
95
  - lib/polars/io/ndjson.rb
93
96
  - lib/polars/io/parquet.rb
94
97
  - lib/polars/io/scan_options.rb
98
+ - lib/polars/io/utils.rb
95
99
  - lib/polars/lazy_frame.rb
96
100
  - lib/polars/lazy_group_by.rb
97
101
  - lib/polars/list_expr.rb
@@ -99,6 +103,7 @@ files:
99
103
  - lib/polars/meta_expr.rb
100
104
  - lib/polars/name_expr.rb
101
105
  - lib/polars/plot.rb
106
+ - lib/polars/query_opt_flags.rb
102
107
  - lib/polars/rolling_group_by.rb
103
108
  - lib/polars/scan_cast_options.rb
104
109
  - lib/polars/schema.rb
@@ -117,6 +122,7 @@ files:
117
122
  - lib/polars/utils/constants.rb
118
123
  - lib/polars/utils/convert.rb
119
124
  - lib/polars/utils/parse.rb
125
+ - lib/polars/utils/serde.rb
120
126
  - lib/polars/utils/various.rb
121
127
  - lib/polars/utils/wrap.rb
122
128
  - lib/polars/version.rb