polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +1431 -1810
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +34 -31
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +204 -7
- data/lib/polars/list_name_space.rb +120 -1
- data/lib/polars/meta_expr.rb +7 -22
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +34 -11
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +11 -0
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
data/lib/polars/series.rb
CHANGED
@@ -563,7 +563,7 @@ module Polars
|
|
563
563
|
# # => false
|
564
564
|
def any?(ignore_nulls: true, &block)
|
565
565
|
if block_given?
|
566
|
-
apply(skip_nulls: ignore_nulls, &block).any?
|
566
|
+
apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).any?
|
567
567
|
else
|
568
568
|
_s.any(ignore_nulls)
|
569
569
|
end
|
@@ -587,7 +587,7 @@ module Polars
|
|
587
587
|
# # => true
|
588
588
|
def all?(ignore_nulls: true, &block)
|
589
589
|
if block_given?
|
590
|
-
apply(skip_nulls: ignore_nulls, &block).all?
|
590
|
+
apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).all?
|
591
591
|
else
|
592
592
|
_s.all(ignore_nulls)
|
593
593
|
end
|
@@ -611,7 +611,7 @@ module Polars
|
|
611
611
|
# # => true
|
612
612
|
def none?(&block)
|
613
613
|
if block_given?
|
614
|
-
apply(&block).none?
|
614
|
+
apply(return_dtype: Boolean, &block).none?
|
615
615
|
else
|
616
616
|
to_frame.select(Polars.col(name).is_not.all).to_series[0]
|
617
617
|
end
|
@@ -1004,6 +1004,13 @@ module Polars
|
|
1004
1004
|
|
1005
1005
|
# Get dummy variables.
|
1006
1006
|
#
|
1007
|
+
# @param separator [String]
|
1008
|
+
# Separator/delimiter used when generating column names.
|
1009
|
+
# @param drop_first [Boolean]
|
1010
|
+
# Remove the first category from the variable being encoded.
|
1011
|
+
# @param drop_nulls [Boolean]
|
1012
|
+
# If there are `nil` values in the series, a `null` column is not generated.
|
1013
|
+
#
|
1007
1014
|
# @return [DataFrame]
|
1008
1015
|
#
|
1009
1016
|
# @example
|
@@ -1020,8 +1027,8 @@ module Polars
|
|
1020
1027
|
# # │ 0 ┆ 1 ┆ 0 │
|
1021
1028
|
# # │ 0 ┆ 0 ┆ 1 │
|
1022
1029
|
# # └─────┴─────┴─────┘
|
1023
|
-
def to_dummies(separator: "_", drop_first: false)
|
1024
|
-
Utils.wrap_df(_s.to_dummies(separator, drop_first))
|
1030
|
+
def to_dummies(separator: "_", drop_first: false, drop_nulls: false)
|
1031
|
+
Utils.wrap_df(_s.to_dummies(separator, drop_first, drop_nulls))
|
1025
1032
|
end
|
1026
1033
|
|
1027
1034
|
# Bin continuous values into discrete categories.
|
@@ -1234,6 +1241,14 @@ module Polars
|
|
1234
1241
|
#
|
1235
1242
|
# @param sort [Boolean]
|
1236
1243
|
# Ensure the output is sorted from most values to least.
|
1244
|
+
# @param parallel [Boolean]
|
1245
|
+
# Execute the computation in parallel.
|
1246
|
+
# @param name [String]
|
1247
|
+
# Give the resulting count column a specific name; if `normalize` is
|
1248
|
+
# true this defaults to "proportion", otherwise defaults to "count".
|
1249
|
+
# @param normalize [Boolean]
|
1250
|
+
# If true, the count is returned as the relative frequency of unique
|
1251
|
+
# values normalized to 1.0.
|
1237
1252
|
#
|
1238
1253
|
# @return [DataFrame]
|
1239
1254
|
#
|
@@ -1722,6 +1737,10 @@ module Polars
|
|
1722
1737
|
#
|
1723
1738
|
# @param reverse [Boolean]
|
1724
1739
|
# Reverse sort.
|
1740
|
+
# @param nulls_last [Boolean]
|
1741
|
+
# Place null values last instead of first.
|
1742
|
+
# @param multithreaded [Boolean]
|
1743
|
+
# Sort using multiple threads.
|
1725
1744
|
# @param in_place [Boolean]
|
1726
1745
|
# Sort in place.
|
1727
1746
|
#
|
@@ -2154,7 +2173,7 @@ module Polars
|
|
2154
2173
|
# @return [Series]
|
2155
2174
|
#
|
2156
2175
|
# @example
|
2157
|
-
# s = Polars::Series.new("a", [1, 2, 3])
|
2176
|
+
# s = Polars::Series.new("a", [[1, 2, 3]])
|
2158
2177
|
# s2 = Polars::Series.new("b", [2, 4, nil])
|
2159
2178
|
# s2.is_in(s)
|
2160
2179
|
# # =>
|
@@ -3132,7 +3151,7 @@ module Polars
|
|
3132
3151
|
#
|
3133
3152
|
# @example
|
3134
3153
|
# s = Polars::Series.new("a", [1, 2, 3])
|
3135
|
-
# s.map_elements { |x| x + 10 }
|
3154
|
+
# s.map_elements(return_dtype: Polars::Int64) { |x| x + 10 }
|
3136
3155
|
# # =>
|
3137
3156
|
# # shape: (3,)
|
3138
3157
|
# # Series: 'a' [i64]
|
@@ -3147,7 +3166,7 @@ module Polars
|
|
3147
3166
|
else
|
3148
3167
|
pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
3149
3168
|
end
|
3150
|
-
Utils.wrap_s(_s.
|
3169
|
+
Utils.wrap_s(_s.map_elements(func, pl_return_dtype, skip_nulls))
|
3151
3170
|
end
|
3152
3171
|
alias_method :map, :map_elements
|
3153
3172
|
alias_method :apply, :map_elements
|
@@ -3421,6 +3440,8 @@ module Polars
|
|
3421
3440
|
# a result. If None, it will be set equal to window size.
|
3422
3441
|
# @param center [Boolean]
|
3423
3442
|
# Set the labels at the center of the window
|
3443
|
+
# @param ddof [Integer]
|
3444
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
3424
3445
|
#
|
3425
3446
|
# @return [Series]
|
3426
3447
|
#
|
@@ -3464,6 +3485,8 @@ module Polars
|
|
3464
3485
|
# a result. If None, it will be set equal to window size.
|
3465
3486
|
# @param center [Boolean]
|
3466
3487
|
# Set the labels at the center of the window
|
3488
|
+
# @param ddof [Integer]
|
3489
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
3467
3490
|
#
|
3468
3491
|
# @return [Series]
|
3469
3492
|
#
|
@@ -3560,10 +3583,10 @@ module Polars
|
|
3560
3583
|
# # [
|
3561
3584
|
# # null
|
3562
3585
|
# # null
|
3563
|
-
# # 1.0
|
3564
3586
|
# # 2.0
|
3565
3587
|
# # 3.0
|
3566
3588
|
# # 4.0
|
3589
|
+
# # 6.0
|
3567
3590
|
# # ]
|
3568
3591
|
#
|
3569
3592
|
# @example
|
@@ -3644,7 +3667,7 @@ module Polars
|
|
3644
3667
|
# # Series: 'a' [i64]
|
3645
3668
|
# # [
|
3646
3669
|
# # 5
|
3647
|
-
# #
|
3670
|
+
# # 2
|
3648
3671
|
# # ]
|
3649
3672
|
def sample(
|
3650
3673
|
n: nil,
|
@@ -4256,8 +4279,8 @@ module Polars
|
|
4256
4279
|
# # Series: 'a' [i64]
|
4257
4280
|
# # [
|
4258
4281
|
# # 2
|
4259
|
-
# # 1
|
4260
4282
|
# # 3
|
4283
|
+
# # 1
|
4261
4284
|
# # ]
|
4262
4285
|
def shuffle(seed: nil)
|
4263
4286
|
super
|
data/lib/polars/string_expr.rb
CHANGED
@@ -63,6 +63,13 @@ module Polars
|
|
63
63
|
# in the target string.
|
64
64
|
# @param cache [Boolean]
|
65
65
|
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
# @param ambiguous ['raise', 'earliest', 'latest', 'null']
|
67
|
+
# Determine how to deal with ambiguous datetimes:
|
68
|
+
#
|
69
|
+
# - `'raise'` (default): raise
|
70
|
+
# - `'earliest'`: use the earliest datetime
|
71
|
+
# - `'latest'`: use the latest datetime
|
72
|
+
# - `'null'`: set to null
|
66
73
|
#
|
67
74
|
# @return [Expr]
|
68
75
|
#
|
@@ -145,6 +152,8 @@ module Polars
|
|
145
152
|
# @param exact [Boolean]
|
146
153
|
# - If true, require an exact format match.
|
147
154
|
# - If false, allow the format to match anywhere in the target string.
|
155
|
+
# @param cache [Boolean]
|
156
|
+
# Use a cache of unique, converted dates to apply the datetime conversion.
|
148
157
|
# @param utc [Boolean]
|
149
158
|
# Parse timezone aware datetimes as UTC. This may be useful if you have data
|
150
159
|
# with mixed offsets.
|
@@ -590,6 +599,7 @@ module Polars
|
|
590
599
|
# # │ null ┆ null │
|
591
600
|
# # └──────────────┴──────────────┘
|
592
601
|
def pad_start(length, fill_char = " ")
|
602
|
+
length = Utils.parse_into_expression(length)
|
593
603
|
Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
|
594
604
|
end
|
595
605
|
alias_method :rjust, :pad_start
|
@@ -620,6 +630,7 @@ module Polars
|
|
620
630
|
# # │ null ┆ null │
|
621
631
|
# # └──────────────┴──────────────┘
|
622
632
|
def pad_end(length, fill_char = " ")
|
633
|
+
length = Utils.parse_into_expression(length)
|
623
634
|
Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
|
624
635
|
end
|
625
636
|
alias_method :ljust, :pad_end
|
@@ -664,6 +675,9 @@ module Polars
|
|
664
675
|
# A valid regex pattern.
|
665
676
|
# @param literal [Boolean]
|
666
677
|
# Treat pattern as a literal string.
|
678
|
+
# @param strict [Boolean]
|
679
|
+
# Raise an error if the underlying pattern is not a valid regex,
|
680
|
+
# otherwise mask out with a null value.
|
667
681
|
#
|
668
682
|
# @return [Expr]
|
669
683
|
#
|
@@ -780,6 +794,9 @@ module Polars
|
|
780
794
|
# @param dtype [Object]
|
781
795
|
# The dtype to cast the extracted value to. If nil, the dtype will be
|
782
796
|
# inferred from the JSON value.
|
797
|
+
# @param infer_schema_length [Integer]
|
798
|
+
# The maximum number of rows to scan for schema inference.
|
799
|
+
# If set to `nil`, the full data may be scanned *(this is slow)*.
|
783
800
|
#
|
784
801
|
# @return [Expr]
|
785
802
|
#
|
@@ -1036,6 +1053,8 @@ module Polars
|
|
1036
1053
|
#
|
1037
1054
|
# @param pattern [String]
|
1038
1055
|
# A valid regex pattern
|
1056
|
+
# @param literal [Boolean]
|
1057
|
+
# Treat `pattern` as a literal string, not as a regular expression.
|
1039
1058
|
#
|
1040
1059
|
# @return [Expr]
|
1041
1060
|
#
|
@@ -1177,6 +1196,8 @@ module Polars
|
|
1177
1196
|
# Replacement string.
|
1178
1197
|
# @param literal [Boolean]
|
1179
1198
|
# Treat pattern as a literal string.
|
1199
|
+
# @param n [Integer]
|
1200
|
+
# Number of matches to replace.
|
1180
1201
|
#
|
1181
1202
|
# @return [Expr]
|
1182
1203
|
#
|
@@ -1328,9 +1349,9 @@ module Polars
|
|
1328
1349
|
# # │ cafe ┆ 51966 │
|
1329
1350
|
# # │ null ┆ null │
|
1330
1351
|
# # └──────┴────────┘
|
1331
|
-
def to_integer(base: 10, strict: true)
|
1352
|
+
def to_integer(base: 10, dtype: Int64, strict: true)
|
1332
1353
|
base = Utils.parse_into_expression(base, str_as_lit: false)
|
1333
|
-
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1354
|
+
Utils.wrap_expr(_rbexpr.str_to_integer(base, dtype, strict))
|
1334
1355
|
end
|
1335
1356
|
|
1336
1357
|
# Parse integers with base radix from strings.
|
@@ -1437,7 +1458,7 @@ module Polars
|
|
1437
1458
|
# Polars.col("lyrics")
|
1438
1459
|
# .str.replace_many(
|
1439
1460
|
# ["me", "you", "they"],
|
1440
|
-
# ""
|
1461
|
+
# [""]
|
1441
1462
|
# )
|
1442
1463
|
# .alias("removes_pronouns")
|
1443
1464
|
# )
|
@@ -63,6 +63,13 @@ module Polars
|
|
63
63
|
# in the target string.
|
64
64
|
# @param cache [Boolean]
|
65
65
|
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
# @param ambiguous ['raise', 'earliest', 'latest', 'null']
|
67
|
+
# Determine how to deal with ambiguous datetimes:
|
68
|
+
#
|
69
|
+
# - `'raise'` (default): raise
|
70
|
+
# - `'earliest'`: use the earliest datetime
|
71
|
+
# - `'latest'`: use the latest datetime
|
72
|
+
# - `'null'`: set to null
|
66
73
|
#
|
67
74
|
# @return [Series]
|
68
75
|
#
|
@@ -229,6 +236,10 @@ module Polars
|
|
229
236
|
#
|
230
237
|
# @param delimiter [String]
|
231
238
|
# The delimiter to insert between consecutive string values.
|
239
|
+
# @param ignore_nulls [Boolean]
|
240
|
+
# Ignore null values (default).
|
241
|
+
# If set to `False`, null values will be propagated. This means that
|
242
|
+
# if the column contains any null values, the output is null.
|
232
243
|
#
|
233
244
|
# @return [Series]
|
234
245
|
#
|
data/lib/polars/utils/parse.rb
CHANGED
@@ -33,6 +33,46 @@ module Polars
|
|
33
33
|
exprs
|
34
34
|
end
|
35
35
|
|
36
|
+
def self.parse_into_selector(i, strict: true)
|
37
|
+
if i.is_a?(::String)
|
38
|
+
cs = Selectors
|
39
|
+
|
40
|
+
cs.by_name([i], require_all: strict)
|
41
|
+
elsif i.is_a?(Selector)
|
42
|
+
i
|
43
|
+
elsif i.is_a?(Expr)
|
44
|
+
i.meta.as_selector
|
45
|
+
else
|
46
|
+
msg = "cannot turn #{i.inspect} into selector"
|
47
|
+
raise TypeError, msg
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse_list_into_selector(inputs, strict: true)
|
52
|
+
if inputs.is_a?(::Array)
|
53
|
+
cs = Selectors
|
54
|
+
|
55
|
+
columns = inputs.select { |i| i.is_a?(::String) }
|
56
|
+
selector = cs.by_name(columns, require_all: strict)
|
57
|
+
|
58
|
+
if columns.length == inputs.length
|
59
|
+
return selector
|
60
|
+
end
|
61
|
+
|
62
|
+
# A bit cleaner
|
63
|
+
if columns.length == 0
|
64
|
+
selector = cs.empty
|
65
|
+
end
|
66
|
+
|
67
|
+
inputs.each do |i|
|
68
|
+
selector |= parse_into_selector(i, strict: strict)
|
69
|
+
end
|
70
|
+
selector
|
71
|
+
else
|
72
|
+
parse_into_selector(inputs, strict: strict)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
36
76
|
def self._parse_positional_inputs(inputs, structify: false)
|
37
77
|
inputs_iter = _parse_inputs_as_iterable(inputs)
|
38
78
|
inputs_iter.map { |e| parse_into_expression(e, structify: structify) }
|
data/lib/polars/utils.rb
CHANGED
@@ -107,7 +107,7 @@ module Polars
|
|
107
107
|
end
|
108
108
|
|
109
109
|
def self.is_selector(obj)
|
110
|
-
obj.is_a?(
|
110
|
+
obj.is_a?(Selector)
|
111
111
|
end
|
112
112
|
|
113
113
|
def self.expand_selector(target, selector, strict: true)
|
@@ -161,5 +161,9 @@ module Polars
|
|
161
161
|
# escapes _only_ those metachars with meaning to the rust regex crate
|
162
162
|
Plr.re_escape(s)
|
163
163
|
end
|
164
|
+
|
165
|
+
def self.parse_into_datatype_expr(input)
|
166
|
+
raise Todo
|
167
|
+
end
|
164
168
|
end
|
165
169
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -19,6 +19,11 @@ require_relative "polars/binary_expr"
|
|
19
19
|
require_relative "polars/binary_name_space"
|
20
20
|
require_relative "polars/cat_expr"
|
21
21
|
require_relative "polars/cat_name_space"
|
22
|
+
require_relative "polars/catalog"
|
23
|
+
require_relative "polars/catalog/unity/catalog_info"
|
24
|
+
require_relative "polars/catalog/unity/column_info"
|
25
|
+
require_relative "polars/catalog/unity/namespace_info"
|
26
|
+
require_relative "polars/catalog/unity/table_info"
|
22
27
|
require_relative "polars/config"
|
23
28
|
require_relative "polars/convert"
|
24
29
|
require_relative "polars/plot"
|
@@ -54,6 +59,7 @@ require_relative "polars/io/ipc"
|
|
54
59
|
require_relative "polars/io/json"
|
55
60
|
require_relative "polars/io/ndjson"
|
56
61
|
require_relative "polars/io/parquet"
|
62
|
+
require_relative "polars/io/scan_options"
|
57
63
|
require_relative "polars/lazy_frame"
|
58
64
|
require_relative "polars/lazy_group_by"
|
59
65
|
require_relative "polars/list_expr"
|
@@ -61,7 +67,9 @@ require_relative "polars/list_name_space"
|
|
61
67
|
require_relative "polars/meta_expr"
|
62
68
|
require_relative "polars/name_expr"
|
63
69
|
require_relative "polars/rolling_group_by"
|
70
|
+
require_relative "polars/scan_cast_options"
|
64
71
|
require_relative "polars/schema"
|
72
|
+
require_relative "polars/selector"
|
65
73
|
require_relative "polars/selectors"
|
66
74
|
require_relative "polars/series"
|
67
75
|
require_relative "polars/slice"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -49,6 +49,11 @@ files:
|
|
49
49
|
- lib/polars/binary_name_space.rb
|
50
50
|
- lib/polars/cat_expr.rb
|
51
51
|
- lib/polars/cat_name_space.rb
|
52
|
+
- lib/polars/catalog.rb
|
53
|
+
- lib/polars/catalog/unity/catalog_info.rb
|
54
|
+
- lib/polars/catalog/unity/column_info.rb
|
55
|
+
- lib/polars/catalog/unity/namespace_info.rb
|
56
|
+
- lib/polars/catalog/unity/table_info.rb
|
52
57
|
- lib/polars/config.rb
|
53
58
|
- lib/polars/convert.rb
|
54
59
|
- lib/polars/data_frame.rb
|
@@ -84,6 +89,7 @@ files:
|
|
84
89
|
- lib/polars/io/json.rb
|
85
90
|
- lib/polars/io/ndjson.rb
|
86
91
|
- lib/polars/io/parquet.rb
|
92
|
+
- lib/polars/io/scan_options.rb
|
87
93
|
- lib/polars/lazy_frame.rb
|
88
94
|
- lib/polars/lazy_group_by.rb
|
89
95
|
- lib/polars/list_expr.rb
|
@@ -92,7 +98,9 @@ files:
|
|
92
98
|
- lib/polars/name_expr.rb
|
93
99
|
- lib/polars/plot.rb
|
94
100
|
- lib/polars/rolling_group_by.rb
|
101
|
+
- lib/polars/scan_cast_options.rb
|
95
102
|
- lib/polars/schema.rb
|
103
|
+
- lib/polars/selector.rb
|
96
104
|
- lib/polars/selectors.rb
|
97
105
|
- lib/polars/series.rb
|
98
106
|
- lib/polars/slice.rb
|