polars-df 0.19.0-x86_64-darwin → 0.21.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +211 -320
  4. data/LICENSE-THIRD-PARTY.txt +1256 -2131
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/3.4/polars.bundle +0 -0
  9. data/lib/polars/cat_name_space.rb +3 -43
  10. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  11. data/lib/polars/catalog/unity/column_info.rb +31 -0
  12. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  13. data/lib/polars/catalog/unity/table_info.rb +50 -0
  14. data/lib/polars/catalog.rb +448 -0
  15. data/lib/polars/convert.rb +10 -0
  16. data/lib/polars/data_frame.rb +151 -30
  17. data/lib/polars/data_types.rb +47 -3
  18. data/lib/polars/exceptions.rb +7 -2
  19. data/lib/polars/expr.rb +48 -39
  20. data/lib/polars/functions/col.rb +6 -5
  21. data/lib/polars/functions/eager.rb +1 -1
  22. data/lib/polars/functions/lazy.rb +114 -15
  23. data/lib/polars/functions/repeat.rb +4 -0
  24. data/lib/polars/io/csv.rb +18 -0
  25. data/lib/polars/io/json.rb +16 -0
  26. data/lib/polars/io/ndjson.rb +13 -0
  27. data/lib/polars/io/parquet.rb +45 -63
  28. data/lib/polars/io/scan_options.rb +47 -0
  29. data/lib/polars/lazy_frame.rb +163 -75
  30. data/lib/polars/list_expr.rb +213 -17
  31. data/lib/polars/list_name_space.rb +121 -8
  32. data/lib/polars/meta_expr.rb +14 -29
  33. data/lib/polars/scan_cast_options.rb +64 -0
  34. data/lib/polars/schema.rb +6 -1
  35. data/lib/polars/selector.rb +138 -0
  36. data/lib/polars/selectors.rb +931 -202
  37. data/lib/polars/series.rb +46 -19
  38. data/lib/polars/string_expr.rb +24 -3
  39. data/lib/polars/string_name_space.rb +12 -1
  40. data/lib/polars/utils/parse.rb +40 -0
  41. data/lib/polars/utils.rb +5 -1
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +8 -0
  44. metadata +10 -2
data/lib/polars/series.rb CHANGED
@@ -563,7 +563,7 @@ module Polars
563
563
  # # => false
564
564
  def any?(ignore_nulls: true, &block)
565
565
  if block_given?
566
- apply(skip_nulls: ignore_nulls, &block).any?
566
+ apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).any?
567
567
  else
568
568
  _s.any(ignore_nulls)
569
569
  end
@@ -587,7 +587,7 @@ module Polars
587
587
  # # => true
588
588
  def all?(ignore_nulls: true, &block)
589
589
  if block_given?
590
- apply(skip_nulls: ignore_nulls, &block).all?
590
+ apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).all?
591
591
  else
592
592
  _s.all(ignore_nulls)
593
593
  end
@@ -611,7 +611,7 @@ module Polars
611
611
  # # => true
612
612
  def none?(&block)
613
613
  if block_given?
614
- apply(&block).none?
614
+ apply(return_dtype: Boolean, &block).none?
615
615
  else
616
616
  to_frame.select(Polars.col(name).is_not.all).to_series[0]
617
617
  end
@@ -1004,6 +1004,13 @@ module Polars
1004
1004
 
1005
1005
  # Get dummy variables.
1006
1006
  #
1007
+ # @param separator [String]
1008
+ # Separator/delimiter used when generating column names.
1009
+ # @param drop_first [Boolean]
1010
+ # Remove the first category from the variable being encoded.
1011
+ # @param drop_nulls [Boolean]
1012
+ # If there are `nil` values in the series, a `null` column is not generated.
1013
+ #
1007
1014
  # @return [DataFrame]
1008
1015
  #
1009
1016
  # @example
@@ -1020,8 +1027,8 @@ module Polars
1020
1027
  # # │ 0 ┆ 1 ┆ 0 │
1021
1028
  # # │ 0 ┆ 0 ┆ 1 │
1022
1029
  # # └─────┴─────┴─────┘
1023
- def to_dummies(separator: "_", drop_first: false)
1024
- Utils.wrap_df(_s.to_dummies(separator, drop_first))
1030
+ def to_dummies(separator: "_", drop_first: false, drop_nulls: false)
1031
+ Utils.wrap_df(_s.to_dummies(separator, drop_first, drop_nulls))
1025
1032
  end
1026
1033
 
1027
1034
  # Bin continuous values into discrete categories.
@@ -1234,6 +1241,14 @@ module Polars
1234
1241
  #
1235
1242
  # @param sort [Boolean]
1236
1243
  # Ensure the output is sorted from most values to least.
1244
+ # @param parallel [Boolean]
1245
+ # Execute the computation in parallel.
1246
+ # @param name [String]
1247
+ # Give the resulting count column a specific name; if `normalize` is
1248
+ # true this defaults to "proportion", otherwise defaults to "count".
1249
+ # @param normalize [Boolean]
1250
+ # If true, the count is returned as the relative frequency of unique
1251
+ # values normalized to 1.0.
1237
1252
  #
1238
1253
  # @return [DataFrame]
1239
1254
  #
@@ -1321,9 +1336,6 @@ module Polars
1321
1336
  # @param min_periods [Integer]
1322
1337
  # Number of valid values there should be in the window before the expression
1323
1338
  # is evaluated. valid values = `length - null_count`
1324
- # @param parallel [Boolean]
1325
- # Run in parallel. Don't do this in a group by or another operation that
1326
- # already has much parallelization.
1327
1339
  #
1328
1340
  # @return [Series]
1329
1341
  #
@@ -1348,7 +1360,7 @@ module Polars
1348
1360
  # # -15
1349
1361
  # # -24
1350
1362
  # # ]
1351
- def cumulative_eval(expr, min_periods: 1, parallel: false)
1363
+ def cumulative_eval(expr, min_periods: 1)
1352
1364
  super
1353
1365
  end
1354
1366
 
@@ -1725,6 +1737,10 @@ module Polars
1725
1737
  #
1726
1738
  # @param reverse [Boolean]
1727
1739
  # Reverse sort.
1740
+ # @param nulls_last [Boolean]
1741
+ # Place null values last instead of first.
1742
+ # @param multithreaded [Boolean]
1743
+ # Sort using multiple threads.
1728
1744
  # @param in_place [Boolean]
1729
1745
  # Sort in place.
1730
1746
  #
@@ -1879,6 +1895,13 @@ module Polars
1879
1895
  #
1880
1896
  # @param element [Object]
1881
1897
  # Expression or scalar value.
1898
+ # @param side ['any', 'left', 'right']
1899
+ # If 'any', the index of the first suitable location found is given.
1900
+ # If 'left', the index of the leftmost suitable location found is given.
1901
+ # If 'right', return the rightmost suitable location found is given.
1902
+ # @param descending [Boolean]
1903
+ # Boolean indicating whether the values are descending or not (they
1904
+ # are required to be sorted either way).
1882
1905
  #
1883
1906
  # @return [Integer]
1884
1907
  #
@@ -1927,12 +1950,12 @@ module Polars
1927
1950
  # # 5
1928
1951
  # # 6
1929
1952
  # # ]
1930
- def search_sorted(element, side: "any")
1953
+ def search_sorted(element, side: "any", descending: false)
1931
1954
  if element.is_a?(Integer) || element.is_a?(Float)
1932
- return Polars.select(Polars.lit(self).search_sorted(element, side: side)).item
1955
+ return Polars.select(Polars.lit(self).search_sorted(element, side: side, descending: descending)).item
1933
1956
  end
1934
1957
  element = Series.new(element)
1935
- Polars.select(Polars.lit(self).search_sorted(element, side: side)).to_series
1958
+ Polars.select(Polars.lit(self).search_sorted(element, side: side, descending: descending)).to_series
1936
1959
  end
1937
1960
 
1938
1961
  # Get unique elements in series.
@@ -2150,7 +2173,7 @@ module Polars
2150
2173
  # @return [Series]
2151
2174
  #
2152
2175
  # @example
2153
- # s = Polars::Series.new("a", [1, 2, 3])
2176
+ # s = Polars::Series.new("a", [[1, 2, 3]])
2154
2177
  # s2 = Polars::Series.new("b", [2, 4, nil])
2155
2178
  # s2.is_in(s)
2156
2179
  # # =>
@@ -3128,7 +3151,7 @@ module Polars
3128
3151
  #
3129
3152
  # @example
3130
3153
  # s = Polars::Series.new("a", [1, 2, 3])
3131
- # s.map_elements { |x| x + 10 }
3154
+ # s.map_elements(return_dtype: Polars::Int64) { |x| x + 10 }
3132
3155
  # # =>
3133
3156
  # # shape: (3,)
3134
3157
  # # Series: 'a' [i64]
@@ -3143,7 +3166,7 @@ module Polars
3143
3166
  else
3144
3167
  pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
3145
3168
  end
3146
- Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
3169
+ Utils.wrap_s(_s.map_elements(func, pl_return_dtype, skip_nulls))
3147
3170
  end
3148
3171
  alias_method :map, :map_elements
3149
3172
  alias_method :apply, :map_elements
@@ -3417,6 +3440,8 @@ module Polars
3417
3440
  # a result. If None, it will be set equal to window size.
3418
3441
  # @param center [Boolean]
3419
3442
  # Set the labels at the center of the window
3443
+ # @param ddof [Integer]
3444
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
3420
3445
  #
3421
3446
  # @return [Series]
3422
3447
  #
@@ -3460,6 +3485,8 @@ module Polars
3460
3485
  # a result. If None, it will be set equal to window size.
3461
3486
  # @param center [Boolean]
3462
3487
  # Set the labels at the center of the window
3488
+ # @param ddof [Integer]
3489
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
3463
3490
  #
3464
3491
  # @return [Series]
3465
3492
  #
@@ -3556,10 +3583,10 @@ module Polars
3556
3583
  # # [
3557
3584
  # # null
3558
3585
  # # null
3559
- # # 1.0
3560
3586
  # # 2.0
3561
3587
  # # 3.0
3562
3588
  # # 4.0
3589
+ # # 6.0
3563
3590
  # # ]
3564
3591
  #
3565
3592
  # @example
@@ -3640,7 +3667,7 @@ module Polars
3640
3667
  # # Series: 'a' [i64]
3641
3668
  # # [
3642
3669
  # # 5
3643
- # # 3
3670
+ # # 2
3644
3671
  # # ]
3645
3672
  def sample(
3646
3673
  n: nil,
@@ -4252,8 +4279,8 @@ module Polars
4252
4279
  # # Series: 'a' [i64]
4253
4280
  # # [
4254
4281
  # # 2
4255
- # # 1
4256
4282
  # # 3
4283
+ # # 1
4257
4284
  # # ]
4258
4285
  def shuffle(seed: nil)
4259
4286
  super
@@ -4366,7 +4393,7 @@ module Polars
4366
4393
  # # 99
4367
4394
  # # ]
4368
4395
  def extend_constant(value, n)
4369
- Utils.wrap_s(_s.extend_constant(value, n))
4396
+ super
4370
4397
  end
4371
4398
 
4372
4399
  # Flags the Series as sorted.
@@ -63,6 +63,13 @@ module Polars
63
63
  # in the target string.
64
64
  # @param cache [Boolean]
65
65
  # Use a cache of unique, converted datetimes to apply the conversion.
66
+ # @param ambiguous ['raise', 'earliest', 'latest', 'null']
67
+ # Determine how to deal with ambiguous datetimes:
68
+ #
69
+ # - `'raise'` (default): raise
70
+ # - `'earliest'`: use the earliest datetime
71
+ # - `'latest'`: use the latest datetime
72
+ # - `'null'`: set to null
66
73
  #
67
74
  # @return [Expr]
68
75
  #
@@ -145,6 +152,8 @@ module Polars
145
152
  # @param exact [Boolean]
146
153
  # - If true, require an exact format match.
147
154
  # - If false, allow the format to match anywhere in the target string.
155
+ # @param cache [Boolean]
156
+ # Use a cache of unique, converted dates to apply the datetime conversion.
148
157
  # @param utc [Boolean]
149
158
  # Parse timezone aware datetimes as UTC. This may be useful if you have data
150
159
  # with mixed offsets.
@@ -590,6 +599,7 @@ module Polars
590
599
  # # │ null ┆ null │
591
600
  # # └──────────────┴──────────────┘
592
601
  def pad_start(length, fill_char = " ")
602
+ length = Utils.parse_into_expression(length)
593
603
  Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
594
604
  end
595
605
  alias_method :rjust, :pad_start
@@ -620,6 +630,7 @@ module Polars
620
630
  # # │ null ┆ null │
621
631
  # # └──────────────┴──────────────┘
622
632
  def pad_end(length, fill_char = " ")
633
+ length = Utils.parse_into_expression(length)
623
634
  Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
624
635
  end
625
636
  alias_method :ljust, :pad_end
@@ -664,6 +675,9 @@ module Polars
664
675
  # A valid regex pattern.
665
676
  # @param literal [Boolean]
666
677
  # Treat pattern as a literal string.
678
+ # @param strict [Boolean]
679
+ # Raise an error if the underlying pattern is not a valid regex,
680
+ # otherwise mask out with a null value.
667
681
  #
668
682
  # @return [Expr]
669
683
  #
@@ -780,6 +794,9 @@ module Polars
780
794
  # @param dtype [Object]
781
795
  # The dtype to cast the extracted value to. If nil, the dtype will be
782
796
  # inferred from the JSON value.
797
+ # @param infer_schema_length [Integer]
798
+ # The maximum number of rows to scan for schema inference.
799
+ # If set to `nil`, the full data may be scanned *(this is slow)*.
783
800
  #
784
801
  # @return [Expr]
785
802
  #
@@ -1036,6 +1053,8 @@ module Polars
1036
1053
  #
1037
1054
  # @param pattern [String]
1038
1055
  # A valid regex pattern
1056
+ # @param literal [Boolean]
1057
+ # Treat `pattern` as a literal string, not as a regular expression.
1039
1058
  #
1040
1059
  # @return [Expr]
1041
1060
  #
@@ -1177,6 +1196,8 @@ module Polars
1177
1196
  # Replacement string.
1178
1197
  # @param literal [Boolean]
1179
1198
  # Treat pattern as a literal string.
1199
+ # @param n [Integer]
1200
+ # Number of matches to replace.
1180
1201
  #
1181
1202
  # @return [Expr]
1182
1203
  #
@@ -1328,9 +1349,9 @@ module Polars
1328
1349
  # # │ cafe ┆ 51966 │
1329
1350
  # # │ null ┆ null │
1330
1351
  # # └──────┴────────┘
1331
- def to_integer(base: 10, strict: true)
1352
+ def to_integer(base: 10, dtype: Int64, strict: true)
1332
1353
  base = Utils.parse_into_expression(base, str_as_lit: false)
1333
- Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
1354
+ Utils.wrap_expr(_rbexpr.str_to_integer(base, dtype, strict))
1334
1355
  end
1335
1356
 
1336
1357
  # Parse integers with base radix from strings.
@@ -1437,7 +1458,7 @@ module Polars
1437
1458
  # Polars.col("lyrics")
1438
1459
  # .str.replace_many(
1439
1460
  # ["me", "you", "they"],
1440
- # ""
1461
+ # [""]
1441
1462
  # )
1442
1463
  # .alias("removes_pronouns")
1443
1464
  # )
@@ -63,6 +63,13 @@ module Polars
63
63
  # in the target string.
64
64
  # @param cache [Boolean]
65
65
  # Use a cache of unique, converted datetimes to apply the conversion.
66
+ # @param ambiguous ['raise', 'earliest', 'latest', 'null']
67
+ # Determine how to deal with ambiguous datetimes:
68
+ #
69
+ # - `'raise'` (default): raise
70
+ # - `'earliest'`: use the earliest datetime
71
+ # - `'latest'`: use the latest datetime
72
+ # - `'null'`: set to null
66
73
  #
67
74
  # @return [Series]
68
75
  #
@@ -120,7 +127,7 @@ module Polars
120
127
  # Parse a Series of dtype Utf8 to a Date/Datetime Series.
121
128
  #
122
129
  # @param datatype [Symbol]
123
- # `:date`, `:dateime`, or `:time`.
130
+ # `:date`, `:datetime`, or `:time`.
124
131
  # @param fmt [String]
125
132
  # Format to use, refer to the
126
133
  # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
@@ -229,6 +236,10 @@ module Polars
229
236
  #
230
237
  # @param delimiter [String]
231
238
  # The delimiter to insert between consecutive string values.
239
+ # @param ignore_nulls [Boolean]
240
+ # Ignore null values (default).
241
+ # If set to `False`, null values will be propagated. This means that
242
+ # if the column contains any null values, the output is null.
232
243
  #
233
244
  # @return [Series]
234
245
  #
@@ -33,6 +33,46 @@ module Polars
33
33
  exprs
34
34
  end
35
35
 
36
+ def self.parse_into_selector(i, strict: true)
37
+ if i.is_a?(::String)
38
+ cs = Selectors
39
+
40
+ cs.by_name([i], require_all: strict)
41
+ elsif i.is_a?(Selector)
42
+ i
43
+ elsif i.is_a?(Expr)
44
+ i.meta.as_selector
45
+ else
46
+ msg = "cannot turn #{i.inspect} into selector"
47
+ raise TypeError, msg
48
+ end
49
+ end
50
+
51
+ def self.parse_list_into_selector(inputs, strict: true)
52
+ if inputs.is_a?(::Array)
53
+ cs = Selectors
54
+
55
+ columns = inputs.select { |i| i.is_a?(::String) }
56
+ selector = cs.by_name(columns, require_all: strict)
57
+
58
+ if columns.length == inputs.length
59
+ return selector
60
+ end
61
+
62
+ # A bit cleaner
63
+ if columns.length == 0
64
+ selector = cs.empty
65
+ end
66
+
67
+ inputs.each do |i|
68
+ selector |= parse_into_selector(i, strict: strict)
69
+ end
70
+ selector
71
+ else
72
+ parse_into_selector(inputs, strict: strict)
73
+ end
74
+ end
75
+
36
76
  def self._parse_positional_inputs(inputs, structify: false)
37
77
  inputs_iter = _parse_inputs_as_iterable(inputs)
38
78
  inputs_iter.map { |e| parse_into_expression(e, structify: structify) }
data/lib/polars/utils.rb CHANGED
@@ -107,7 +107,7 @@ module Polars
107
107
  end
108
108
 
109
109
  def self.is_selector(obj)
110
- obj.is_a?(Selectors::SelectorProxy)
110
+ obj.is_a?(Selector)
111
111
  end
112
112
 
113
113
  def self.expand_selector(target, selector, strict: true)
@@ -161,5 +161,9 @@ module Polars
161
161
  # escapes _only_ those metachars with meaning to the rust regex crate
162
162
  Plr.re_escape(s)
163
163
  end
164
+
165
+ def self.parse_into_datatype_expr(input)
166
+ raise Todo
167
+ end
164
168
  end
165
169
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.19.0"
3
+ VERSION = "0.21.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -19,6 +19,11 @@ require_relative "polars/binary_expr"
19
19
  require_relative "polars/binary_name_space"
20
20
  require_relative "polars/cat_expr"
21
21
  require_relative "polars/cat_name_space"
22
+ require_relative "polars/catalog"
23
+ require_relative "polars/catalog/unity/catalog_info"
24
+ require_relative "polars/catalog/unity/column_info"
25
+ require_relative "polars/catalog/unity/namespace_info"
26
+ require_relative "polars/catalog/unity/table_info"
22
27
  require_relative "polars/config"
23
28
  require_relative "polars/convert"
24
29
  require_relative "polars/plot"
@@ -54,6 +59,7 @@ require_relative "polars/io/ipc"
54
59
  require_relative "polars/io/json"
55
60
  require_relative "polars/io/ndjson"
56
61
  require_relative "polars/io/parquet"
62
+ require_relative "polars/io/scan_options"
57
63
  require_relative "polars/lazy_frame"
58
64
  require_relative "polars/lazy_group_by"
59
65
  require_relative "polars/list_expr"
@@ -61,7 +67,9 @@ require_relative "polars/list_name_space"
61
67
  require_relative "polars/meta_expr"
62
68
  require_relative "polars/name_expr"
63
69
  require_relative "polars/rolling_group_by"
70
+ require_relative "polars/scan_cast_options"
64
71
  require_relative "polars/schema"
72
+ require_relative "polars/selector"
65
73
  require_relative "polars/selectors"
66
74
  require_relative "polars/series"
67
75
  require_relative "polars/slice"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.21.0
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-05-20 00:00:00.000000000 Z
11
+ date: 2025-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -49,6 +49,11 @@ files:
49
49
  - lib/polars/binary_name_space.rb
50
50
  - lib/polars/cat_expr.rb
51
51
  - lib/polars/cat_name_space.rb
52
+ - lib/polars/catalog.rb
53
+ - lib/polars/catalog/unity/catalog_info.rb
54
+ - lib/polars/catalog/unity/column_info.rb
55
+ - lib/polars/catalog/unity/namespace_info.rb
56
+ - lib/polars/catalog/unity/table_info.rb
52
57
  - lib/polars/config.rb
53
58
  - lib/polars/convert.rb
54
59
  - lib/polars/data_frame.rb
@@ -84,6 +89,7 @@ files:
84
89
  - lib/polars/io/json.rb
85
90
  - lib/polars/io/ndjson.rb
86
91
  - lib/polars/io/parquet.rb
92
+ - lib/polars/io/scan_options.rb
87
93
  - lib/polars/lazy_frame.rb
88
94
  - lib/polars/lazy_group_by.rb
89
95
  - lib/polars/list_expr.rb
@@ -92,7 +98,9 @@ files:
92
98
  - lib/polars/name_expr.rb
93
99
  - lib/polars/plot.rb
94
100
  - lib/polars/rolling_group_by.rb
101
+ - lib/polars/scan_cast_options.rb
95
102
  - lib/polars/schema.rb
103
+ - lib/polars/selector.rb
96
104
  - lib/polars/selectors.rb
97
105
  - lib/polars/series.rb
98
106
  - lib/polars/slice.rb