polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE-THIRD-PARTY.txt +1431 -1810
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/3.4/polars.bundle +0 -0
  9. data/lib/polars/cat_name_space.rb +3 -43
  10. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  11. data/lib/polars/catalog/unity/column_info.rb +31 -0
  12. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  13. data/lib/polars/catalog/unity/table_info.rb +50 -0
  14. data/lib/polars/catalog.rb +448 -0
  15. data/lib/polars/convert.rb +10 -0
  16. data/lib/polars/data_frame.rb +151 -30
  17. data/lib/polars/data_types.rb +47 -3
  18. data/lib/polars/exceptions.rb +7 -2
  19. data/lib/polars/expr.rb +34 -31
  20. data/lib/polars/functions/col.rb +6 -5
  21. data/lib/polars/functions/lazy.rb +114 -15
  22. data/lib/polars/functions/repeat.rb +4 -0
  23. data/lib/polars/io/csv.rb +18 -0
  24. data/lib/polars/io/json.rb +16 -0
  25. data/lib/polars/io/ndjson.rb +13 -0
  26. data/lib/polars/io/parquet.rb +45 -63
  27. data/lib/polars/io/scan_options.rb +47 -0
  28. data/lib/polars/lazy_frame.rb +163 -75
  29. data/lib/polars/list_expr.rb +204 -7
  30. data/lib/polars/list_name_space.rb +120 -1
  31. data/lib/polars/meta_expr.rb +7 -22
  32. data/lib/polars/scan_cast_options.rb +64 -0
  33. data/lib/polars/schema.rb +6 -1
  34. data/lib/polars/selector.rb +138 -0
  35. data/lib/polars/selectors.rb +931 -202
  36. data/lib/polars/series.rb +34 -11
  37. data/lib/polars/string_expr.rb +24 -3
  38. data/lib/polars/string_name_space.rb +11 -0
  39. data/lib/polars/utils/parse.rb +40 -0
  40. data/lib/polars/utils.rb +5 -1
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +8 -0
  43. metadata +10 -2
data/lib/polars/series.rb CHANGED
@@ -563,7 +563,7 @@ module Polars
563
563
  # # => false
564
564
  def any?(ignore_nulls: true, &block)
565
565
  if block_given?
566
- apply(skip_nulls: ignore_nulls, &block).any?
566
+ apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).any?
567
567
  else
568
568
  _s.any(ignore_nulls)
569
569
  end
@@ -587,7 +587,7 @@ module Polars
587
587
  # # => true
588
588
  def all?(ignore_nulls: true, &block)
589
589
  if block_given?
590
- apply(skip_nulls: ignore_nulls, &block).all?
590
+ apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).all?
591
591
  else
592
592
  _s.all(ignore_nulls)
593
593
  end
@@ -611,7 +611,7 @@ module Polars
611
611
  # # => true
612
612
  def none?(&block)
613
613
  if block_given?
614
- apply(&block).none?
614
+ apply(return_dtype: Boolean, &block).none?
615
615
  else
616
616
  to_frame.select(Polars.col(name).is_not.all).to_series[0]
617
617
  end
@@ -1004,6 +1004,13 @@ module Polars
1004
1004
 
1005
1005
  # Get dummy variables.
1006
1006
  #
1007
+ # @param separator [String]
1008
+ # Separator/delimiter used when generating column names.
1009
+ # @param drop_first [Boolean]
1010
+ # Remove the first category from the variable being encoded.
1011
+ # @param drop_nulls [Boolean]
1012
+ # If there are `nil` values in the series, a `null` column is not generated.
1013
+ #
1007
1014
  # @return [DataFrame]
1008
1015
  #
1009
1016
  # @example
@@ -1020,8 +1027,8 @@ module Polars
1020
1027
  # # │ 0 ┆ 1 ┆ 0 │
1021
1028
  # # │ 0 ┆ 0 ┆ 1 │
1022
1029
  # # └─────┴─────┴─────┘
1023
- def to_dummies(separator: "_", drop_first: false)
1024
- Utils.wrap_df(_s.to_dummies(separator, drop_first))
1030
+ def to_dummies(separator: "_", drop_first: false, drop_nulls: false)
1031
+ Utils.wrap_df(_s.to_dummies(separator, drop_first, drop_nulls))
1025
1032
  end
1026
1033
 
1027
1034
  # Bin continuous values into discrete categories.
@@ -1234,6 +1241,14 @@ module Polars
1234
1241
  #
1235
1242
  # @param sort [Boolean]
1236
1243
  # Ensure the output is sorted from most values to least.
1244
+ # @param parallel [Boolean]
1245
+ # Execute the computation in parallel.
1246
+ # @param name [String]
1247
+ # Give the resulting count column a specific name; if `normalize` is
1248
+ # true this defaults to "proportion", otherwise defaults to "count".
1249
+ # @param normalize [Boolean]
1250
+ # If true, the count is returned as the relative frequency of unique
1251
+ # values normalized to 1.0.
1237
1252
  #
1238
1253
  # @return [DataFrame]
1239
1254
  #
@@ -1722,6 +1737,10 @@ module Polars
1722
1737
  #
1723
1738
  # @param reverse [Boolean]
1724
1739
  # Reverse sort.
1740
+ # @param nulls_last [Boolean]
1741
+ # Place null values last instead of first.
1742
+ # @param multithreaded [Boolean]
1743
+ # Sort using multiple threads.
1725
1744
  # @param in_place [Boolean]
1726
1745
  # Sort in place.
1727
1746
  #
@@ -2154,7 +2173,7 @@ module Polars
2154
2173
  # @return [Series]
2155
2174
  #
2156
2175
  # @example
2157
- # s = Polars::Series.new("a", [1, 2, 3])
2176
+ # s = Polars::Series.new("a", [[1, 2, 3]])
2158
2177
  # s2 = Polars::Series.new("b", [2, 4, nil])
2159
2178
  # s2.is_in(s)
2160
2179
  # # =>
@@ -3132,7 +3151,7 @@ module Polars
3132
3151
  #
3133
3152
  # @example
3134
3153
  # s = Polars::Series.new("a", [1, 2, 3])
3135
- # s.map_elements { |x| x + 10 }
3154
+ # s.map_elements(return_dtype: Polars::Int64) { |x| x + 10 }
3136
3155
  # # =>
3137
3156
  # # shape: (3,)
3138
3157
  # # Series: 'a' [i64]
@@ -3147,7 +3166,7 @@ module Polars
3147
3166
  else
3148
3167
  pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
3149
3168
  end
3150
- Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
3169
+ Utils.wrap_s(_s.map_elements(func, pl_return_dtype, skip_nulls))
3151
3170
  end
3152
3171
  alias_method :map, :map_elements
3153
3172
  alias_method :apply, :map_elements
@@ -3421,6 +3440,8 @@ module Polars
3421
3440
  # a result. If None, it will be set equal to window size.
3422
3441
  # @param center [Boolean]
3423
3442
  # Set the labels at the center of the window
3443
+ # @param ddof [Integer]
3444
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
3424
3445
  #
3425
3446
  # @return [Series]
3426
3447
  #
@@ -3464,6 +3485,8 @@ module Polars
3464
3485
  # a result. If None, it will be set equal to window size.
3465
3486
  # @param center [Boolean]
3466
3487
  # Set the labels at the center of the window
3488
+ # @param ddof [Integer]
3489
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
3467
3490
  #
3468
3491
  # @return [Series]
3469
3492
  #
@@ -3560,10 +3583,10 @@ module Polars
3560
3583
  # # [
3561
3584
  # # null
3562
3585
  # # null
3563
- # # 1.0
3564
3586
  # # 2.0
3565
3587
  # # 3.0
3566
3588
  # # 4.0
3589
+ # # 6.0
3567
3590
  # # ]
3568
3591
  #
3569
3592
  # @example
@@ -3644,7 +3667,7 @@ module Polars
3644
3667
  # # Series: 'a' [i64]
3645
3668
  # # [
3646
3669
  # # 5
3647
- # # 3
3670
+ # # 2
3648
3671
  # # ]
3649
3672
  def sample(
3650
3673
  n: nil,
@@ -4256,8 +4279,8 @@ module Polars
4256
4279
  # # Series: 'a' [i64]
4257
4280
  # # [
4258
4281
  # # 2
4259
- # # 1
4260
4282
  # # 3
4283
+ # # 1
4261
4284
  # # ]
4262
4285
  def shuffle(seed: nil)
4263
4286
  super
@@ -63,6 +63,13 @@ module Polars
63
63
  # in the target string.
64
64
  # @param cache [Boolean]
65
65
  # Use a cache of unique, converted datetimes to apply the conversion.
66
+ # @param ambiguous ['raise', 'earliest', 'latest', 'null']
67
+ # Determine how to deal with ambiguous datetimes:
68
+ #
69
+ # - `'raise'` (default): raise
70
+ # - `'earliest'`: use the earliest datetime
71
+ # - `'latest'`: use the latest datetime
72
+ # - `'null'`: set to null
66
73
  #
67
74
  # @return [Expr]
68
75
  #
@@ -145,6 +152,8 @@ module Polars
145
152
  # @param exact [Boolean]
146
153
  # - If true, require an exact format match.
147
154
  # - If false, allow the format to match anywhere in the target string.
155
+ # @param cache [Boolean]
156
+ # Use a cache of unique, converted dates to apply the datetime conversion.
148
157
  # @param utc [Boolean]
149
158
  # Parse timezone aware datetimes as UTC. This may be useful if you have data
150
159
  # with mixed offsets.
@@ -590,6 +599,7 @@ module Polars
590
599
  # # │ null ┆ null │
591
600
  # # └──────────────┴──────────────┘
592
601
  def pad_start(length, fill_char = " ")
602
+ length = Utils.parse_into_expression(length)
593
603
  Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
594
604
  end
595
605
  alias_method :rjust, :pad_start
@@ -620,6 +630,7 @@ module Polars
620
630
  # # │ null ┆ null │
621
631
  # # └──────────────┴──────────────┘
622
632
  def pad_end(length, fill_char = " ")
633
+ length = Utils.parse_into_expression(length)
623
634
  Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
624
635
  end
625
636
  alias_method :ljust, :pad_end
@@ -664,6 +675,9 @@ module Polars
664
675
  # A valid regex pattern.
665
676
  # @param literal [Boolean]
666
677
  # Treat pattern as a literal string.
678
+ # @param strict [Boolean]
679
+ # Raise an error if the underlying pattern is not a valid regex,
680
+ # otherwise mask out with a null value.
667
681
  #
668
682
  # @return [Expr]
669
683
  #
@@ -780,6 +794,9 @@ module Polars
780
794
  # @param dtype [Object]
781
795
  # The dtype to cast the extracted value to. If nil, the dtype will be
782
796
  # inferred from the JSON value.
797
+ # @param infer_schema_length [Integer]
798
+ # The maximum number of rows to scan for schema inference.
799
+ # If set to `nil`, the full data may be scanned *(this is slow)*.
783
800
  #
784
801
  # @return [Expr]
785
802
  #
@@ -1036,6 +1053,8 @@ module Polars
1036
1053
  #
1037
1054
  # @param pattern [String]
1038
1055
  # A valid regex pattern
1056
+ # @param literal [Boolean]
1057
+ # Treat `pattern` as a literal string, not as a regular expression.
1039
1058
  #
1040
1059
  # @return [Expr]
1041
1060
  #
@@ -1177,6 +1196,8 @@ module Polars
1177
1196
  # Replacement string.
1178
1197
  # @param literal [Boolean]
1179
1198
  # Treat pattern as a literal string.
1199
+ # @param n [Integer]
1200
+ # Number of matches to replace.
1180
1201
  #
1181
1202
  # @return [Expr]
1182
1203
  #
@@ -1328,9 +1349,9 @@ module Polars
1328
1349
  # # │ cafe ┆ 51966 │
1329
1350
  # # │ null ┆ null │
1330
1351
  # # └──────┴────────┘
1331
- def to_integer(base: 10, strict: true)
1352
+ def to_integer(base: 10, dtype: Int64, strict: true)
1332
1353
  base = Utils.parse_into_expression(base, str_as_lit: false)
1333
- Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
1354
+ Utils.wrap_expr(_rbexpr.str_to_integer(base, dtype, strict))
1334
1355
  end
1335
1356
 
1336
1357
  # Parse integers with base radix from strings.
@@ -1437,7 +1458,7 @@ module Polars
1437
1458
  # Polars.col("lyrics")
1438
1459
  # .str.replace_many(
1439
1460
  # ["me", "you", "they"],
1440
- # ""
1461
+ # [""]
1441
1462
  # )
1442
1463
  # .alias("removes_pronouns")
1443
1464
  # )
@@ -63,6 +63,13 @@ module Polars
63
63
  # in the target string.
64
64
  # @param cache [Boolean]
65
65
  # Use a cache of unique, converted datetimes to apply the conversion.
66
+ # @param ambiguous ['raise', 'earliest', 'latest', 'null']
67
+ # Determine how to deal with ambiguous datetimes:
68
+ #
69
+ # - `'raise'` (default): raise
70
+ # - `'earliest'`: use the earliest datetime
71
+ # - `'latest'`: use the latest datetime
72
+ # - `'null'`: set to null
66
73
  #
67
74
  # @return [Series]
68
75
  #
@@ -229,6 +236,10 @@ module Polars
229
236
  #
230
237
  # @param delimiter [String]
231
238
  # The delimiter to insert between consecutive string values.
239
+ # @param ignore_nulls [Boolean]
240
+ # Ignore null values (default).
241
+ # If set to `False`, null values will be propagated. This means that
242
+ # if the column contains any null values, the output is null.
232
243
  #
233
244
  # @return [Series]
234
245
  #
@@ -33,6 +33,46 @@ module Polars
33
33
  exprs
34
34
  end
35
35
 
36
+ def self.parse_into_selector(i, strict: true)
37
+ if i.is_a?(::String)
38
+ cs = Selectors
39
+
40
+ cs.by_name([i], require_all: strict)
41
+ elsif i.is_a?(Selector)
42
+ i
43
+ elsif i.is_a?(Expr)
44
+ i.meta.as_selector
45
+ else
46
+ msg = "cannot turn #{i.inspect} into selector"
47
+ raise TypeError, msg
48
+ end
49
+ end
50
+
51
+ def self.parse_list_into_selector(inputs, strict: true)
52
+ if inputs.is_a?(::Array)
53
+ cs = Selectors
54
+
55
+ columns = inputs.select { |i| i.is_a?(::String) }
56
+ selector = cs.by_name(columns, require_all: strict)
57
+
58
+ if columns.length == inputs.length
59
+ return selector
60
+ end
61
+
62
+ # A bit cleaner
63
+ if columns.length == 0
64
+ selector = cs.empty
65
+ end
66
+
67
+ inputs.each do |i|
68
+ selector |= parse_into_selector(i, strict: strict)
69
+ end
70
+ selector
71
+ else
72
+ parse_into_selector(inputs, strict: strict)
73
+ end
74
+ end
75
+
36
76
  def self._parse_positional_inputs(inputs, structify: false)
37
77
  inputs_iter = _parse_inputs_as_iterable(inputs)
38
78
  inputs_iter.map { |e| parse_into_expression(e, structify: structify) }
data/lib/polars/utils.rb CHANGED
@@ -107,7 +107,7 @@ module Polars
107
107
  end
108
108
 
109
109
  def self.is_selector(obj)
110
- obj.is_a?(Selectors::SelectorProxy)
110
+ obj.is_a?(Selector)
111
111
  end
112
112
 
113
113
  def self.expand_selector(target, selector, strict: true)
@@ -161,5 +161,9 @@ module Polars
161
161
  # escapes _only_ those metachars with meaning to the rust regex crate
162
162
  Plr.re_escape(s)
163
163
  end
164
+
165
+ def self.parse_into_datatype_expr(input)
166
+ raise Todo
167
+ end
164
168
  end
165
169
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.20.0"
3
+ VERSION = "0.21.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -19,6 +19,11 @@ require_relative "polars/binary_expr"
19
19
  require_relative "polars/binary_name_space"
20
20
  require_relative "polars/cat_expr"
21
21
  require_relative "polars/cat_name_space"
22
+ require_relative "polars/catalog"
23
+ require_relative "polars/catalog/unity/catalog_info"
24
+ require_relative "polars/catalog/unity/column_info"
25
+ require_relative "polars/catalog/unity/namespace_info"
26
+ require_relative "polars/catalog/unity/table_info"
22
27
  require_relative "polars/config"
23
28
  require_relative "polars/convert"
24
29
  require_relative "polars/plot"
@@ -54,6 +59,7 @@ require_relative "polars/io/ipc"
54
59
  require_relative "polars/io/json"
55
60
  require_relative "polars/io/ndjson"
56
61
  require_relative "polars/io/parquet"
62
+ require_relative "polars/io/scan_options"
57
63
  require_relative "polars/lazy_frame"
58
64
  require_relative "polars/lazy_group_by"
59
65
  require_relative "polars/list_expr"
@@ -61,7 +67,9 @@ require_relative "polars/list_name_space"
61
67
  require_relative "polars/meta_expr"
62
68
  require_relative "polars/name_expr"
63
69
  require_relative "polars/rolling_group_by"
70
+ require_relative "polars/scan_cast_options"
64
71
  require_relative "polars/schema"
72
+ require_relative "polars/selector"
65
73
  require_relative "polars/selectors"
66
74
  require_relative "polars/series"
67
75
  require_relative "polars/slice"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.21.0
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-06-24 00:00:00.000000000 Z
11
+ date: 2025-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -49,6 +49,11 @@ files:
49
49
  - lib/polars/binary_name_space.rb
50
50
  - lib/polars/cat_expr.rb
51
51
  - lib/polars/cat_name_space.rb
52
+ - lib/polars/catalog.rb
53
+ - lib/polars/catalog/unity/catalog_info.rb
54
+ - lib/polars/catalog/unity/column_info.rb
55
+ - lib/polars/catalog/unity/namespace_info.rb
56
+ - lib/polars/catalog/unity/table_info.rb
52
57
  - lib/polars/config.rb
53
58
  - lib/polars/convert.rb
54
59
  - lib/polars/data_frame.rb
@@ -84,6 +89,7 @@ files:
84
89
  - lib/polars/io/json.rb
85
90
  - lib/polars/io/ndjson.rb
86
91
  - lib/polars/io/parquet.rb
92
+ - lib/polars/io/scan_options.rb
87
93
  - lib/polars/lazy_frame.rb
88
94
  - lib/polars/lazy_group_by.rb
89
95
  - lib/polars/list_expr.rb
@@ -92,7 +98,9 @@ files:
92
98
  - lib/polars/name_expr.rb
93
99
  - lib/polars/plot.rb
94
100
  - lib/polars/rolling_group_by.rb
101
+ - lib/polars/scan_cast_options.rb
95
102
  - lib/polars/schema.rb
103
+ - lib/polars/selector.rb
96
104
  - lib/polars/selectors.rb
97
105
  - lib/polars/series.rb
98
106
  - lib/polars/slice.rb