polars-df 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/Cargo.lock +112 -89
  4. data/README.md +8 -7
  5. data/ext/polars/Cargo.toml +13 -12
  6. data/ext/polars/src/conversion/any_value.rs +14 -8
  7. data/ext/polars/src/conversion/chunked_array.rs +5 -2
  8. data/ext/polars/src/conversion/mod.rs +27 -19
  9. data/ext/polars/src/dataframe/construction.rs +1 -14
  10. data/ext/polars/src/dataframe/general.rs +0 -5
  11. data/ext/polars/src/expr/datetime.rs +22 -14
  12. data/ext/polars/src/file.rs +5 -5
  13. data/ext/polars/src/io/mod.rs +22 -8
  14. data/ext/polars/src/lazyframe/general.rs +27 -41
  15. data/ext/polars/src/lazyframe/optflags.rs +0 -1
  16. data/ext/polars/src/lib.rs +10 -18
  17. data/ext/polars/src/series/aggregation.rs +8 -1
  18. data/ext/polars/src/series/construction.rs +1 -0
  19. data/ext/polars/src/series/export.rs +1 -0
  20. data/ext/polars/src/series/general.rs +0 -1
  21. data/lib/polars/data_frame.rb +11 -9
  22. data/lib/polars/data_types.rb +9 -1
  23. data/lib/polars/date_time_expr.rb +35 -14
  24. data/lib/polars/expr.rb +2 -2
  25. data/lib/polars/iceberg_dataset.rb +113 -0
  26. data/lib/polars/io/iceberg.rb +8 -1
  27. data/lib/polars/io/ipc.rb +28 -49
  28. data/lib/polars/io/scan_options.rb +9 -3
  29. data/lib/polars/io/utils.rb +17 -0
  30. data/lib/polars/lazy_frame.rb +5 -2
  31. data/lib/polars/scan_cast_options.rb +4 -1
  32. data/lib/polars/selectors.rb +8 -8
  33. data/lib/polars/series.rb +23 -1
  34. data/lib/polars/string_expr.rb +1 -1
  35. data/lib/polars/string_name_space.rb +1 -1
  36. data/lib/polars/utils/convert.rb +2 -2
  37. data/lib/polars/version.rb +1 -1
  38. data/lib/polars.rb +2 -0
  39. metadata +3 -1
@@ -75,8 +75,8 @@ module Polars
75
75
  # Read a serialized DataFrame from a file.
76
76
  #
77
77
  # @param source [Object]
78
- # Path to a file or a file-like object (by file-like object, we refer to
79
- # objects that have a `read` method, such as a file handler or `StringIO`).
78
+ # Path to a file or a file-like object (by file-like object, we refer to
79
+ # objects that have a `read` method, such as a file handler or `StringIO`).
80
80
  #
81
81
  # @return [DataFrame]
82
82
  #
@@ -6059,8 +6059,13 @@ module Polars
6059
6059
  # The fields will be inserted into the `DataFrame` on the location of the
6060
6060
  # `struct` type.
6061
6061
  #
6062
- # @param names [Object]
6063
- # Names of the struct columns that will be decomposed by its fields
6062
+ # @param columns [Object]
6063
+ # Name of the struct column(s) that should be unnested.
6064
+ # @param more_columns [Array]
6065
+ # Additional columns to unnest, specified as positional arguments.
6066
+ # @param separator [String]
6067
+ # Rename output column names as combination of the struct column name,
6068
+ # name separator and field name.
6064
6069
  #
6065
6070
  # @return [DataFrame]
6066
6071
  #
@@ -6086,11 +6091,8 @@ module Polars
6086
6091
  # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
6087
6092
  # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
6088
6093
  # # └────────┴─────┴─────┴──────┴───────────┴───────┘
6089
- def unnest(names)
6090
- if names.is_a?(::String)
6091
- names = [names]
6092
- end
6093
- _from_rbdf(_df.unnest(names))
6094
+ def unnest(columns, *more_columns, separator: nil)
6095
+ lazy.unnest(columns, *more_columns, separator: separator).collect(_eager: true)
6094
6096
  end
6095
6097
 
6096
6098
  # Requires NumPy
@@ -209,6 +209,10 @@ module Polars
209
209
  class UInt64 < UnsignedIntegerType
210
210
  end
211
211
 
212
+ # 128-bit unsigned integer type.
213
+ class UInt128 < UnsignedIntegerType
214
+ end
215
+
212
216
  # 32-bit floating point type.
213
217
  class Float32 < FloatType
214
218
  end
@@ -223,7 +227,11 @@ module Polars
223
227
  class Decimal < NumericType
224
228
  attr_reader :precision, :scale
225
229
 
226
- def initialize(precision, scale)
230
+ def initialize(precision = nil, scale = 0)
231
+ if precision.nil?
232
+ precision = 38
233
+ end
234
+
227
235
  @precision = precision
228
236
  @scale = scale
229
237
  end
@@ -1435,6 +1435,9 @@ module Polars
1435
1435
 
1436
1436
  # Extract the days from a Duration type.
1437
1437
  #
1438
+ # @param fractional [Boolean]
1439
+ # Whether to include the fractional component of the second.
1440
+ #
1438
1441
  # @return [Expr]
1439
1442
  #
1440
1443
  # @example
@@ -1462,13 +1465,16 @@ module Polars
1462
1465
  # # │ 2020-04-01 00:00:00 ┆ 31 │
1463
1466
  # # │ 2020-05-01 00:00:00 ┆ 30 │
1464
1467
  # # └─────────────────────┴───────────┘
1465
- def total_days
1466
- Utils.wrap_expr(_rbexpr.dt_total_days)
1468
+ def total_days(fractional: false)
1469
+ Utils.wrap_expr(_rbexpr.dt_total_days(fractional))
1467
1470
  end
1468
1471
  alias_method :days, :total_days
1469
1472
 
1470
1473
  # Extract the hours from a Duration type.
1471
1474
  #
1475
+ # @param fractional [Boolean]
1476
+ # Whether to include the fractional component of the second.
1477
+ #
1472
1478
  # @return [Expr]
1473
1479
  #
1474
1480
  # @example
@@ -1497,13 +1503,16 @@ module Polars
1497
1503
  # # │ 2020-01-03 00:00:00 ┆ 24 │
1498
1504
  # # │ 2020-01-04 00:00:00 ┆ 24 │
1499
1505
  # # └─────────────────────┴────────────┘
1500
- def total_hours
1501
- Utils.wrap_expr(_rbexpr.dt_total_hours)
1506
+ def total_hours(fractional: false)
1507
+ Utils.wrap_expr(_rbexpr.dt_total_hours(fractional))
1502
1508
  end
1503
1509
  alias_method :hours, :total_hours
1504
1510
 
1505
1511
  # Extract the minutes from a Duration type.
1506
1512
  #
1513
+ # @param fractional [Boolean]
1514
+ # Whether to include the fractional component of the second.
1515
+ #
1507
1516
  # @return [Expr]
1508
1517
  #
1509
1518
  # @example
@@ -1532,13 +1541,16 @@ module Polars
1532
1541
  # # │ 2020-01-03 00:00:00 ┆ 1440 │
1533
1542
  # # │ 2020-01-04 00:00:00 ┆ 1440 │
1534
1543
  # # └─────────────────────┴──────────────┘
1535
- def total_minutes
1536
- Utils.wrap_expr(_rbexpr.dt_total_minutes)
1544
+ def total_minutes(fractional: false)
1545
+ Utils.wrap_expr(_rbexpr.dt_total_minutes(fractional))
1537
1546
  end
1538
1547
  alias_method :minutes, :total_minutes
1539
1548
 
1540
1549
  # Extract the seconds from a Duration type.
1541
1550
  #
1551
+ # @param fractional [Boolean]
1552
+ # Whether to include the fractional component of the second.
1553
+ #
1542
1554
  # @return [Expr]
1543
1555
  #
1544
1556
  # @example
@@ -1568,13 +1580,16 @@ module Polars
1568
1580
  # # │ 2020-01-01 00:03:00 ┆ 60 │
1569
1581
  # # │ 2020-01-01 00:04:00 ┆ 60 │
1570
1582
  # # └─────────────────────┴──────────────┘
1571
- def total_seconds
1572
- Utils.wrap_expr(_rbexpr.dt_total_seconds)
1583
+ def total_seconds(fractional: false)
1584
+ Utils.wrap_expr(_rbexpr.dt_total_seconds(fractional))
1573
1585
  end
1574
1586
  alias_method :seconds, :total_seconds
1575
1587
 
1576
1588
  # Extract the milliseconds from a Duration type.
1577
1589
  #
1590
+ # @param fractional [Boolean]
1591
+ # Whether to include the fractional component of the second.
1592
+ #
1578
1593
  # @return [Expr]
1579
1594
  #
1580
1595
  # @example
@@ -1610,13 +1625,16 @@ module Polars
1610
1625
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
1611
1626
  # # │ 2020-01-01 00:00:01 ┆ 1 │
1612
1627
  # # └─────────────────────────┴───────────────────┘
1613
- def total_milliseconds
1614
- Utils.wrap_expr(_rbexpr.dt_total_milliseconds)
1628
+ def total_milliseconds(fractional: false)
1629
+ Utils.wrap_expr(_rbexpr.dt_total_milliseconds(fractional))
1615
1630
  end
1616
1631
  alias_method :milliseconds, :total_milliseconds
1617
1632
 
1618
1633
  # Extract the microseconds from a Duration type.
1619
1634
  #
1635
+ # @param fractional [Boolean]
1636
+ # Whether to include the fractional component of the second.
1637
+ #
1620
1638
  # @return [Expr]
1621
1639
  #
1622
1640
  # @example
@@ -1652,13 +1670,16 @@ module Polars
1652
1670
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
1653
1671
  # # │ 2020-01-01 00:00:01 ┆ 1000 │
1654
1672
  # # └─────────────────────────┴───────────────────┘
1655
- def total_microseconds
1656
- Utils.wrap_expr(_rbexpr.dt_total_microseconds)
1673
+ def total_microseconds(fractional: false)
1674
+ Utils.wrap_expr(_rbexpr.dt_total_microseconds(fractional))
1657
1675
  end
1658
1676
  alias_method :microseconds, :total_microseconds
1659
1677
 
1660
1678
  # Extract the nanoseconds from a Duration type.
1661
1679
  #
1680
+ # @param fractional [Boolean]
1681
+ # Whether to include the fractional component of the second.
1682
+ #
1662
1683
  # @return [Expr]
1663
1684
  #
1664
1685
  # @example
@@ -1694,8 +1715,8 @@ module Polars
1694
1715
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
1695
1716
  # # │ 2020-01-01 00:00:01 ┆ 1000000 │
1696
1717
  # # └─────────────────────────┴──────────────────┘
1697
- def total_nanoseconds
1698
- Utils.wrap_expr(_rbexpr.dt_total_nanoseconds)
1718
+ def total_nanoseconds(fractional: false)
1719
+ Utils.wrap_expr(_rbexpr.dt_total_nanoseconds(fractional))
1699
1720
  end
1700
1721
  alias_method :nanoseconds, :total_nanoseconds
1701
1722
 
data/lib/polars/expr.rb CHANGED
@@ -6612,8 +6612,8 @@ module Polars
6612
6612
  # # │ 10 ┆ null │
6613
6613
  # # │ 11 ┆ 0.1 │
6614
6614
  # # │ 12 ┆ 0.090909 │
6615
- # # │ null ┆ 0.0
6616
- # # │ 12 ┆ 0.0
6615
+ # # │ null ┆ null
6616
+ # # │ 12 ┆ null
6617
6617
  # # └──────┴────────────┘
6618
6618
  def pct_change(n: 1)
6619
6619
  n = Utils.parse_into_expression(n)
@@ -0,0 +1,113 @@
1
+ module Polars
2
+ # @private
3
+ class IcebergDataset
4
+ def initialize(
5
+ source,
6
+ snapshot_id:,
7
+ storage_options:
8
+ )
9
+ @source = source
10
+ @snapshot_id = snapshot_id
11
+ @storage_options = storage_options
12
+ end
13
+
14
+ def to_lazyframe
15
+ # for iceberg < 0.1.3
16
+ if !@source.respond_to?(:scan)
17
+ return @source.to_polars(snapshot_id: @snapshot_id, storage_options: @storage_options)
18
+ end
19
+
20
+ scan = @source.scan(snapshot_id: @snapshot_id)
21
+ files = scan.plan_files
22
+
23
+ table = scan.table
24
+ snapshot = scan.snapshot
25
+ schema = snapshot ? table.schema_by_id(snapshot[:schema_id]) : table.current_schema
26
+
27
+ if files.empty?
28
+ # TODO improve
29
+ schema =
30
+ schema.fields.to_h do |field|
31
+ dtype =
32
+ case field[:type]
33
+ when "int"
34
+ Polars::Int32
35
+ when "long"
36
+ Polars::Int64
37
+ when "double"
38
+ Polars::Float64
39
+ when "string"
40
+ Polars::String
41
+ when "timestamp"
42
+ Polars::Datetime
43
+ else
44
+ raise Todo
45
+ end
46
+
47
+ [field[:name], dtype]
48
+ end
49
+
50
+ LazyFrame.new(schema: schema)
51
+ else
52
+ sources = files.map { |v| v[:data_file_path] }
53
+
54
+ column_mapping = [
55
+ "iceberg-column-mapping",
56
+ arrow_schema(schema)
57
+ ]
58
+
59
+ deletion_files = [
60
+ "iceberg-position-delete",
61
+ files.map.with_index
62
+ .select { |v, i| v[:deletes].any? }
63
+ .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
64
+ ]
65
+
66
+ scan_options = {
67
+ storage_options: @storage_options,
68
+ cast_options: Polars::ScanCastOptions._default_iceberg,
69
+ allow_missing_columns: true,
70
+ extra_columns: "ignore",
71
+ _column_mapping: column_mapping,
72
+ _deletion_files: deletion_files
73
+ }
74
+
75
+ Polars.scan_parquet(sources, **scan_options)
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def arrow_schema(schema)
82
+ fields =
83
+ schema.fields.map do |field|
84
+ type =
85
+ case field[:type]
86
+ when "boolean"
87
+ "boolean"
88
+ when "int"
89
+ "int32"
90
+ when "long"
91
+ "int64"
92
+ when "float"
93
+ "float32"
94
+ when "double"
95
+ "float64"
96
+ else
97
+ raise Todo
98
+ end
99
+
100
+ {
101
+ name: field[:name],
102
+ type: type,
103
+ nullable: !field[:required],
104
+ metadata: {
105
+ "PARQUET:field_id" => field[:id].to_s
106
+ }
107
+ }
108
+ end
109
+
110
+ {fields: fields}
111
+ end
112
+ end
113
+ end
@@ -21,7 +21,14 @@ module Polars
21
21
  raise Todo
22
22
  end
23
23
 
24
- source.to_polars(snapshot_id:, storage_options:)
24
+ dataset =
25
+ IcebergDataset.new(
26
+ source,
27
+ snapshot_id:,
28
+ storage_options:
29
+ )
30
+
31
+ dataset.to_lazyframe
25
32
  end
26
33
  end
27
34
  end
data/lib/polars/io/ipc.rb CHANGED
@@ -187,8 +187,16 @@ module Polars
187
187
  # DataFrame.
188
188
  # @param row_count_offset [Integer]
189
189
  # Offset to start the row_count column (only use if the name is set).
190
+ # @param glob [Boolean]
191
+ # Expand path given via globbing rules.
190
192
  # @param storage_options [Hash]
191
193
  # Extra options that make sense for a particular storage connection.
194
+ # @param retries [Integer]
195
+ # Number of retries if accessing a cloud instance fails.
196
+ # @param file_cache_ttl [Integer]
197
+ # Amount of time to keep downloaded cloud files since their last access time,
198
+ # in seconds. Uses the `POLARS_FILE_CACHE_TTL` environment variable
199
+ # (which defaults to 1 hour) if not given.
192
200
  # @param hive_partitioning [Boolean]
193
201
  # Infer statistics and schema from Hive partitioned URL and use them
194
202
  # to prune reads. This is unset by default (i.e. `nil`), meaning it is
@@ -210,66 +218,37 @@ module Polars
210
218
  rechunk: true,
211
219
  row_count_name: nil,
212
220
  row_count_offset: 0,
221
+ glob: true,
213
222
  storage_options: nil,
223
+ retries: 2,
224
+ file_cache_ttl: nil,
214
225
  hive_partitioning: nil,
215
226
  hive_schema: nil,
216
227
  try_parse_hive_dates: true,
217
228
  include_file_paths: nil
218
229
  )
219
- _scan_ipc_impl(
220
- source,
221
- n_rows: n_rows,
222
- cache: cache,
223
- rechunk: rechunk,
224
- row_count_name: row_count_name,
225
- row_count_offset: row_count_offset,
226
- storage_options: storage_options,
227
- hive_partitioning: hive_partitioning,
228
- hive_schema: hive_schema,
229
- try_parse_hive_dates: try_parse_hive_dates,
230
- include_file_paths: include_file_paths
231
- )
232
- end
233
-
234
- # @private
235
- def _scan_ipc_impl(
236
- source,
237
- n_rows: nil,
238
- cache: true,
239
- rechunk: true,
240
- row_count_name: nil,
241
- row_count_offset: 0,
242
- storage_options: nil,
243
- hive_partitioning: nil,
244
- hive_schema: nil,
245
- try_parse_hive_dates: true,
246
- include_file_paths: nil
247
- )
248
- sources = []
249
- if Utils.pathlike?(source)
250
- source = Utils.normalize_filepath(source)
251
- elsif source.is_a?(::Array)
252
- if Utils.is_path_or_str_sequence(source)
253
- sources = source.map { |s| Utils.normalize_filepath(s) }
254
- else
255
- sources = source
256
- end
230
+ row_index_name = row_count_name
231
+ row_index_offset = row_count_offset
257
232
 
258
- source = nil
259
- end
233
+ sources = get_sources(source)
260
234
 
261
235
  rblf =
262
236
  RbLazyFrame.new_from_ipc(
263
- source,
264
237
  sources,
265
- n_rows,
266
- cache,
267
- rechunk,
268
- Utils.parse_row_index_args(row_count_name, row_count_offset),
269
- hive_partitioning,
270
- hive_schema,
271
- try_parse_hive_dates,
272
- include_file_paths
238
+ ScanOptions.new(
239
+ row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
240
+ pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
241
+ include_file_paths: include_file_paths,
242
+ glob: glob,
243
+ hive_partitioning: hive_partitioning,
244
+ hive_schema: hive_schema,
245
+ try_parse_hive_dates: try_parse_hive_dates,
246
+ rechunk: rechunk,
247
+ cache: cache,
248
+ storage_options: !storage_options.nil? ? storage_options.to_a : nil,
249
+ retries: retries
250
+ ),
251
+ file_cache_ttl
273
252
  )
274
253
  Utils.wrap_ldf(rblf)
275
254
  end
@@ -2,9 +2,9 @@ module Polars
2
2
  module IO
3
3
  class ScanOptions
4
4
  attr_reader :row_index, :pre_slice, :cast_options, :extra_columns, :missing_columns,
5
- :include_file_paths, :glob, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
5
+ :include_file_paths, :glob, :hidden_file_prefix, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
6
6
  :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping,
7
- :default_values, :deletion_files
7
+ :default_values, :deletion_files, :table_statistics, :row_count
8
8
 
9
9
  def initialize(
10
10
  row_index: nil,
@@ -14,6 +14,7 @@ module Polars
14
14
  missing_columns: "raise",
15
15
  include_file_paths: nil,
16
16
  glob: true,
17
+ hidden_file_prefix: nil,
17
18
  hive_partitioning: nil,
18
19
  hive_schema: nil,
19
20
  try_parse_hive_dates: true,
@@ -24,7 +25,9 @@ module Polars
24
25
  retries: 2,
25
26
  column_mapping: nil,
26
27
  default_values: nil,
27
- deletion_files: nil
28
+ deletion_files: nil,
29
+ table_statistics: nil,
30
+ row_count: nil
28
31
  )
29
32
  @row_index = row_index
30
33
  @pre_slice = pre_slice
@@ -33,6 +36,7 @@ module Polars
33
36
  @missing_columns = missing_columns
34
37
  @include_file_paths = include_file_paths
35
38
  @glob = glob
39
+ @hidden_file_prefix = hidden_file_prefix
36
40
  @hive_partitioning = hive_partitioning
37
41
  @hive_schema = hive_schema
38
42
  @try_parse_hive_dates = try_parse_hive_dates
@@ -44,6 +48,8 @@ module Polars
44
48
  @column_mapping = column_mapping
45
49
  @default_values = default_values
46
50
  @deletion_files = deletion_files
51
+ @table_statistics = table_statistics
52
+ @row_count = row_count
47
53
  end
48
54
  end
49
55
  end
@@ -0,0 +1,17 @@
1
+ module Polars
2
+ module IO
3
+ private
4
+
5
+ def get_sources(source)
6
+ if Utils.pathlike?(source)
7
+ source = Utils.normalize_filepath(source, check_not_directory: false)
8
+ elsif Utils.is_path_or_str_sequence(source)
9
+ source = source.map { |s| Utils.normalize_filepath(s, check_not_directory: false) }
10
+ end
11
+ unless source.is_a?(::Array)
12
+ source = [source]
13
+ end
14
+ source
15
+ end
16
+ end
17
+ end
@@ -4143,6 +4143,9 @@ module Polars
4143
4143
  # Names of the struct columns that will be decomposed by its fields
4144
4144
  # @param more_columns [Array]
4145
4145
  # Additional columns to unnest, specified as positional arguments.
4146
+ # @param separator [String]
4147
+ # Rename output column names as combination of the struct column name,
4148
+ # name separator and field name.
4146
4149
  #
4147
4150
  # @return [LazyFrame]
4148
4151
  #
@@ -4187,11 +4190,11 @@ module Polars
4187
4190
  # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
4188
4191
  # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
4189
4192
  # # └────────┴─────┴─────┴──────┴───────────┴───────┘
4190
- def unnest(columns, *more_columns)
4193
+ def unnest(columns, *more_columns, separator: nil)
4191
4194
  subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
4192
4195
  more_columns
4193
4196
  )
4194
- _from_rbldf(_ldf.unnest(subset._rbselector))
4197
+ _from_rbldf(_ldf.unnest(subset._rbselector, separator))
4195
4198
  end
4196
4199
 
4197
4200
  # Take two sorted DataFrames and merge them by the sorted key.
@@ -1,7 +1,7 @@
1
1
  module Polars
2
2
  # Options for scanning files.
3
3
  class ScanCastOptions
4
- attr_reader :integer_cast, :float_cast, :datetime_cast, :missing_struct_fields, :extra_struct_fields
4
+ attr_reader :integer_cast, :float_cast, :datetime_cast, :missing_struct_fields, :extra_struct_fields, :categorical_to_string
5
5
 
6
6
  # Common configuration for scanning files.
7
7
  #
@@ -50,6 +50,7 @@ module Polars
50
50
  datetime_cast: "forbid",
51
51
  missing_struct_fields: "raise",
52
52
  extra_struct_fields: "raise",
53
+ categorical_to_string: "forbid",
53
54
  _internal_call: false
54
55
  )
55
56
  if !_internal_call
@@ -61,6 +62,7 @@ module Polars
61
62
  @datetime_cast = datetime_cast
62
63
  @missing_struct_fields = missing_struct_fields
63
64
  @extra_struct_fields = extra_struct_fields
65
+ @categorical_to_string = categorical_to_string
64
66
  end
65
67
 
66
68
  def self._default
@@ -75,6 +77,7 @@ module Polars
75
77
  datetime_cast: ["nanosecond-downcast", "convert-timezone"],
76
78
  missing_struct_fields: "insert",
77
79
  extra_struct_fields: "ignore",
80
+ categorical_to_string: "allow",
78
81
  _internal_call: true
79
82
  )
80
83
  end
@@ -1063,14 +1063,14 @@ module Polars
1063
1063
  # df.select(Polars.cs.decimal)
1064
1064
  # # =>
1065
1065
  # # shape: (2, 2)
1066
- # # ┌──────────────┬───────────────┐
1067
- # # │ bar ┆ baz │
1068
- # # │ --- ┆ --- │
1069
- # # │ decimal[*,0] ┆ decimal[10,5] │
1070
- # # ╞══════════════╪═══════════════╡
1071
- # # │ 123 ┆ 2.00050 │
1072
- # # │ 456 ┆ -50.55550 │
1073
- # # └──────────────┴───────────────┘
1066
+ # # ┌───────────────┬───────────────┐
1067
+ # # │ bar ┆ baz │
1068
+ # # │ --- ┆ --- │
1069
+ # # │ decimal[38,0] ┆ decimal[10,5] │
1070
+ # # ╞═══════════════╪═══════════════╡
1071
+ # # │ 123 ┆ 2.00050 │
1072
+ # # │ 456 ┆ -50.55550 │
1073
+ # # └───────────────┴───────────────┘
1074
1074
  #
1075
1075
  # @example Select all columns *except* the decimal ones:
1076
1076
  #
data/lib/polars/series.rb CHANGED
@@ -5747,11 +5747,31 @@ module Polars
5747
5747
  end
5748
5748
 
5749
5749
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
5750
- if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum, Decimal].include?(base_type)
5750
+ if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum].include?(base_type) || dtype.is_a?(Decimal)
5751
5751
  if rbseries.dtype != dtype
5752
5752
  rbseries = rbseries.cast(dtype, true)
5753
5753
  end
5754
5754
  end
5755
+
5756
+ # Uninstanced Decimal is a bit special and has various inference paths
5757
+ if dtype == Decimal
5758
+ if rbseries.dtype == String
5759
+ rbseries = rbseries.str_to_decimal_infer(0)
5760
+ elsif rbseries.dtype.float?
5761
+ # Go through string so we infer an appropriate scale.
5762
+ rbseries = rbseries.cast(
5763
+ String, strict: strict, wrap_numerical: false
5764
+ ).str_to_decimal_infer(0)
5765
+ elsif rbseries.dtype.integer? || rbseries.dtype == Null
5766
+ rbseries = rbseries.cast(
5767
+ Decimal.new(nil, 0), strict: strict, wrap_numerical: false
5768
+ )
5769
+ elsif !rbseries.dtype.is_a?(Decimal)
5770
+ msg = "can't convert #{rbseries.dtype} to Decimal"
5771
+ raise TypeError, msg
5772
+ end
5773
+ end
5774
+
5755
5775
  rbseries
5756
5776
  elsif dtype == Struct
5757
5777
  struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
@@ -5856,6 +5876,7 @@ module Polars
5856
5876
  UInt16 => RbSeries.method(:new_opt_u16),
5857
5877
  UInt32 => RbSeries.method(:new_opt_u32),
5858
5878
  UInt64 => RbSeries.method(:new_opt_u64),
5879
+ UInt128 => RbSeries.method(:new_opt_u128),
5859
5880
  Decimal => RbSeries.method(:new_decimal),
5860
5881
  Date => RbSeries.method(:new_from_any_values),
5861
5882
  Datetime => RbSeries.method(:new_from_any_values),
@@ -5882,6 +5903,7 @@ module Polars
5882
5903
  u16: RbSeries.method(:new_opt_u16),
5883
5904
  u32: RbSeries.method(:new_opt_u32),
5884
5905
  u64: RbSeries.method(:new_opt_u64),
5906
+ u128: RbSeries.method(:new_opt_u128),
5885
5907
  bool: RbSeries.method(:new_opt_bool),
5886
5908
  str: RbSeries.method(:new_str)
5887
5909
  }
@@ -247,7 +247,7 @@ module Polars
247
247
  # # ┌───────────┬─────────────────┐
248
248
  # # │ numbers ┆ numbers_decimal │
249
249
  # # │ --- ┆ --- │
250
- # # │ str ┆ decimal[*,2]
250
+ # # │ str ┆ decimal[38,2]
251
251
  # # ╞═══════════╪═════════════════╡
252
252
  # # │ 40.12 ┆ 40.12 │
253
253
  # # │ 3420.13 ┆ 3420.13 │
@@ -200,7 +200,7 @@ module Polars
200
200
  # s.str.to_decimal
201
201
  # # =>
202
202
  # # shape: (7,)
203
- # # Series: '' [decimal[*,2]]
203
+ # # Series: '' [decimal[8,2]]
204
204
  # # [
205
205
  # # 40.12
206
206
  # # 3420.13
@@ -90,8 +90,8 @@ module Polars
90
90
  end
91
91
  end
92
92
 
93
- def self._to_ruby_decimal(digits, scale)
94
- BigDecimal("#{digits}e#{scale}")
93
+ def self._to_ruby_decimal(prec, value)
94
+ BigDecimal(value, prec)
95
95
  end
96
96
  end
97
97
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.22.0"
3
+ VERSION = "0.23.0"
4
4
  end