polars-df 0.9.0-arm64-darwin → 0.11.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +144 -57
- data/LICENSE-THIRD-PARTY.txt +629 -29
- data/README.md +7 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +11 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +225 -370
- data/lib/polars/date_time_expr.rb +11 -4
- data/lib/polars/date_time_name_space.rb +14 -4
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1171 -54
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +307 -489
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +55 -195
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +14 -12
- data/lib/polars/string_expr.rb +38 -36
- data/lib/polars/utils.rb +89 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +10 -3
- metadata +13 -6
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/lazy_frame.rb
CHANGED
@@ -27,145 +27,6 @@ module Polars
|
|
27
27
|
ldf
|
28
28
|
end
|
29
29
|
|
30
|
-
# @private
|
31
|
-
def self._scan_csv(
|
32
|
-
file,
|
33
|
-
has_header: true,
|
34
|
-
sep: ",",
|
35
|
-
comment_char: nil,
|
36
|
-
quote_char: '"',
|
37
|
-
skip_rows: 0,
|
38
|
-
dtypes: nil,
|
39
|
-
null_values: nil,
|
40
|
-
ignore_errors: false,
|
41
|
-
cache: true,
|
42
|
-
with_column_names: nil,
|
43
|
-
infer_schema_length: 100,
|
44
|
-
n_rows: nil,
|
45
|
-
encoding: "utf8",
|
46
|
-
low_memory: false,
|
47
|
-
rechunk: true,
|
48
|
-
skip_rows_after_header: 0,
|
49
|
-
row_count_name: nil,
|
50
|
-
row_count_offset: 0,
|
51
|
-
parse_dates: false,
|
52
|
-
eol_char: "\n"
|
53
|
-
)
|
54
|
-
dtype_list = nil
|
55
|
-
if !dtypes.nil?
|
56
|
-
dtype_list = []
|
57
|
-
dtypes.each do |k, v|
|
58
|
-
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
59
|
-
end
|
60
|
-
end
|
61
|
-
processed_null_values = Utils._process_null_values(null_values)
|
62
|
-
|
63
|
-
_from_rbldf(
|
64
|
-
RbLazyFrame.new_from_csv(
|
65
|
-
file,
|
66
|
-
sep,
|
67
|
-
has_header,
|
68
|
-
ignore_errors,
|
69
|
-
skip_rows,
|
70
|
-
n_rows,
|
71
|
-
cache,
|
72
|
-
dtype_list,
|
73
|
-
low_memory,
|
74
|
-
comment_char,
|
75
|
-
quote_char,
|
76
|
-
processed_null_values,
|
77
|
-
infer_schema_length,
|
78
|
-
with_column_names,
|
79
|
-
rechunk,
|
80
|
-
skip_rows_after_header,
|
81
|
-
encoding,
|
82
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
83
|
-
parse_dates,
|
84
|
-
eol_char
|
85
|
-
)
|
86
|
-
)
|
87
|
-
end
|
88
|
-
|
89
|
-
# @private
|
90
|
-
def self._scan_parquet(
|
91
|
-
file,
|
92
|
-
n_rows: nil,
|
93
|
-
cache: true,
|
94
|
-
parallel: "auto",
|
95
|
-
rechunk: true,
|
96
|
-
row_count_name: nil,
|
97
|
-
row_count_offset: 0,
|
98
|
-
storage_options: nil,
|
99
|
-
low_memory: false,
|
100
|
-
use_statistics: true,
|
101
|
-
hive_partitioning: true
|
102
|
-
)
|
103
|
-
_from_rbldf(
|
104
|
-
RbLazyFrame.new_from_parquet(
|
105
|
-
file,
|
106
|
-
n_rows,
|
107
|
-
cache,
|
108
|
-
parallel,
|
109
|
-
rechunk,
|
110
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
111
|
-
low_memory,
|
112
|
-
use_statistics,
|
113
|
-
hive_partitioning
|
114
|
-
)
|
115
|
-
)
|
116
|
-
end
|
117
|
-
|
118
|
-
# @private
|
119
|
-
def self._scan_ipc(
|
120
|
-
file,
|
121
|
-
n_rows: nil,
|
122
|
-
cache: true,
|
123
|
-
rechunk: true,
|
124
|
-
row_count_name: nil,
|
125
|
-
row_count_offset: 0,
|
126
|
-
storage_options: nil,
|
127
|
-
memory_map: true
|
128
|
-
)
|
129
|
-
if Utils.pathlike?(file)
|
130
|
-
file = Utils.normalise_filepath(file)
|
131
|
-
end
|
132
|
-
|
133
|
-
_from_rbldf(
|
134
|
-
RbLazyFrame.new_from_ipc(
|
135
|
-
file,
|
136
|
-
n_rows,
|
137
|
-
cache,
|
138
|
-
rechunk,
|
139
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
140
|
-
memory_map
|
141
|
-
)
|
142
|
-
)
|
143
|
-
end
|
144
|
-
|
145
|
-
# @private
|
146
|
-
def self._scan_ndjson(
|
147
|
-
file,
|
148
|
-
infer_schema_length: nil,
|
149
|
-
batch_size: nil,
|
150
|
-
n_rows: nil,
|
151
|
-
low_memory: false,
|
152
|
-
rechunk: true,
|
153
|
-
row_count_name: nil,
|
154
|
-
row_count_offset: 0
|
155
|
-
)
|
156
|
-
_from_rbldf(
|
157
|
-
RbLazyFrame.new_from_ndjson(
|
158
|
-
file,
|
159
|
-
infer_schema_length,
|
160
|
-
batch_size,
|
161
|
-
n_rows,
|
162
|
-
low_memory,
|
163
|
-
rechunk,
|
164
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset)
|
165
|
-
)
|
166
|
-
)
|
167
|
-
end
|
168
|
-
|
169
30
|
# def self.from_json
|
170
31
|
# end
|
171
32
|
|
@@ -177,7 +38,7 @@ module Polars
|
|
177
38
|
# @return [LazyFrame]
|
178
39
|
def self.read_json(file)
|
179
40
|
if Utils.pathlike?(file)
|
180
|
-
file = Utils.
|
41
|
+
file = Utils.normalize_filepath(file)
|
181
42
|
end
|
182
43
|
|
183
44
|
Utils.wrap_ldf(RbLazyFrame.read_json(file))
|
@@ -284,7 +145,7 @@ module Polars
|
|
284
145
|
# @return [nil]
|
285
146
|
def write_json(file)
|
286
147
|
if Utils.pathlike?(file)
|
287
|
-
file = Utils.
|
148
|
+
file = Utils.normalize_filepath(file)
|
288
149
|
end
|
289
150
|
_ldf.write_json(file)
|
290
151
|
nil
|
@@ -400,16 +261,16 @@ module Polars
|
|
400
261
|
# # │ 2 ┆ 7.0 ┆ b │
|
401
262
|
# # │ 1 ┆ 6.0 ┆ a │
|
402
263
|
# # └─────┴─────┴─────┘
|
403
|
-
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
264
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
404
265
|
if by.is_a?(::String)
|
405
|
-
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
266
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
|
406
267
|
end
|
407
268
|
if Utils.bool?(reverse)
|
408
269
|
reverse = [reverse]
|
409
270
|
end
|
410
271
|
|
411
272
|
by = Utils.selection_to_rbexpr_list(by)
|
412
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
273
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
|
413
274
|
end
|
414
275
|
|
415
276
|
# def profile
|
@@ -1133,7 +994,7 @@ module Polars
|
|
1133
994
|
# # ┌─────────┐
|
1134
995
|
# # │ literal │
|
1135
996
|
# # │ --- │
|
1136
|
-
# # │
|
997
|
+
# # │ i32 │
|
1137
998
|
# # ╞═════════╡
|
1138
999
|
# # │ 0 │
|
1139
1000
|
# # │ 0 │
|
@@ -1445,16 +1306,16 @@ module Polars
|
|
1445
1306
|
# )
|
1446
1307
|
# # =>
|
1447
1308
|
# # shape: (4, 3)
|
1448
|
-
# #
|
1449
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
1450
|
-
# # │ --- ┆ --- ┆ ---
|
1451
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
1452
|
-
# #
|
1453
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12
|
1454
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12
|
1455
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12
|
1456
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
1457
|
-
# #
|
1309
|
+
# # ┌─────────────────────┬────────────┬─────────────────────────────────┐
|
1310
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1311
|
+
# # │ --- ┆ --- ┆ --- │
|
1312
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1313
|
+
# # ╞═════════════════════╪════════════╪═════════════════════════════════╡
|
1314
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-… │
|
1315
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-… │
|
1316
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-… │
|
1317
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1318
|
+
# # └─────────────────────┴────────────┴─────────────────────────────────┘
|
1458
1319
|
#
|
1459
1320
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1460
1321
|
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -1523,12 +1384,13 @@ module Polars
|
|
1523
1384
|
# closed: "right"
|
1524
1385
|
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1525
1386
|
# # =>
|
1526
|
-
# # shape: (
|
1387
|
+
# # shape: (4, 4)
|
1527
1388
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
1528
1389
|
# # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
|
1529
1390
|
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1530
1391
|
# # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
|
1531
1392
|
# # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
|
1393
|
+
# # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
|
1532
1394
|
# # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
|
1533
1395
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
1534
1396
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
@@ -1837,7 +1699,7 @@ module Polars
|
|
1837
1699
|
if how == "cross"
|
1838
1700
|
return _from_rbldf(
|
1839
1701
|
_ldf.join(
|
1840
|
-
other._ldf, [], [], allow_parallel, force_parallel, how, suffix
|
1702
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
1841
1703
|
)
|
1842
1704
|
)
|
1843
1705
|
end
|
@@ -1891,16 +1753,16 @@ module Polars
|
|
1891
1753
|
# ).collect
|
1892
1754
|
# # =>
|
1893
1755
|
# # shape: (4, 6)
|
1894
|
-
# #
|
1895
|
-
# # │ a ┆ b ┆ c ┆ a^2
|
1896
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1897
|
-
# # │ i64 ┆ f64 ┆ bool ┆
|
1898
|
-
# #
|
1899
|
-
# # │ 1 ┆ 0.5 ┆ true ┆ 1
|
1900
|
-
# # │ 2 ┆ 4.0 ┆ true ┆ 4
|
1901
|
-
# # │ 3 ┆ 10.0 ┆ false ┆ 9
|
1902
|
-
# # │ 4 ┆ 13.0 ┆ true ┆ 16
|
1903
|
-
# #
|
1756
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
1757
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
1758
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1759
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
1760
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
1761
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
1762
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
1763
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
1764
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
1765
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
1904
1766
|
def with_columns(*exprs, **named_exprs)
|
1905
1767
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1906
1768
|
rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
|
@@ -1965,26 +1827,26 @@ module Polars
|
|
1965
1827
|
# # ┌─────┬─────┬───────────┐
|
1966
1828
|
# # │ a ┆ b ┆ b_squared │
|
1967
1829
|
# # │ --- ┆ --- ┆ --- │
|
1968
|
-
# # │ i64 ┆ i64 ┆
|
1830
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
1969
1831
|
# # ╞═════╪═════╪═══════════╡
|
1970
|
-
# # │ 1 ┆ 2 ┆ 4
|
1971
|
-
# # │ 3 ┆ 4 ┆ 16
|
1972
|
-
# # │ 5 ┆ 6 ┆ 36
|
1832
|
+
# # │ 1 ┆ 2 ┆ 4 │
|
1833
|
+
# # │ 3 ┆ 4 ┆ 16 │
|
1834
|
+
# # │ 5 ┆ 6 ┆ 36 │
|
1973
1835
|
# # └─────┴─────┴───────────┘
|
1974
1836
|
#
|
1975
1837
|
# @example
|
1976
1838
|
# df.with_column(Polars.col("a") ** 2).collect
|
1977
1839
|
# # =>
|
1978
1840
|
# # shape: (3, 2)
|
1979
|
-
# #
|
1980
|
-
# # │ a
|
1981
|
-
# # │ ---
|
1982
|
-
# # │
|
1983
|
-
# #
|
1984
|
-
# # │ 1
|
1985
|
-
# # │ 9
|
1986
|
-
# # │ 25
|
1987
|
-
# #
|
1841
|
+
# # ┌─────┬─────┐
|
1842
|
+
# # │ a ┆ b │
|
1843
|
+
# # │ --- ┆ --- │
|
1844
|
+
# # │ i64 ┆ i64 │
|
1845
|
+
# # ╞═════╪═════╡
|
1846
|
+
# # │ 1 ┆ 2 │
|
1847
|
+
# # │ 9 ┆ 4 │
|
1848
|
+
# # │ 25 ┆ 6 │
|
1849
|
+
# # └─────┴─────┘
|
1988
1850
|
def with_column(column)
|
1989
1851
|
with_columns([column])
|
1990
1852
|
end
|
@@ -1996,11 +1858,9 @@ module Polars
|
|
1996
1858
|
# - List of column names.
|
1997
1859
|
#
|
1998
1860
|
# @return [LazyFrame]
|
1999
|
-
def drop(columns)
|
2000
|
-
|
2001
|
-
|
2002
|
-
end
|
2003
|
-
_from_rbldf(_ldf.drop(columns))
|
1861
|
+
def drop(*columns)
|
1862
|
+
drop_cols = Utils._expand_selectors(self, *columns)
|
1863
|
+
_from_rbldf(_ldf.drop(drop_cols))
|
2004
1864
|
end
|
2005
1865
|
|
2006
1866
|
# Rename column names.
|
@@ -2233,16 +2093,16 @@ module Polars
|
|
2233
2093
|
# df.with_row_index.collect
|
2234
2094
|
# # =>
|
2235
2095
|
# # shape: (3, 3)
|
2236
|
-
# #
|
2237
|
-
# # │
|
2238
|
-
# # │ ---
|
2239
|
-
# # │ u32
|
2240
|
-
# #
|
2241
|
-
# # │ 0
|
2242
|
-
# # │ 1
|
2243
|
-
# # │ 2
|
2244
|
-
# #
|
2245
|
-
def with_row_index(name: "
|
2096
|
+
# # ┌───────┬─────┬─────┐
|
2097
|
+
# # │ index ┆ a ┆ b │
|
2098
|
+
# # │ --- ┆ --- ┆ --- │
|
2099
|
+
# # │ u32 ┆ i64 ┆ i64 │
|
2100
|
+
# # ╞═══════╪═════╪═════╡
|
2101
|
+
# # │ 0 ┆ 1 ┆ 2 │
|
2102
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
2103
|
+
# # │ 2 ┆ 5 ┆ 6 │
|
2104
|
+
# # └───────┴─────┴─────┘
|
2105
|
+
def with_row_index(name: "index", offset: 0)
|
2246
2106
|
_from_rbldf(_ldf.with_row_index(name, offset))
|
2247
2107
|
end
|
2248
2108
|
alias_method :with_row_count, :with_row_index
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -6,11 +6,108 @@ module Polars
|
|
6
6
|
@lgb = lgb
|
7
7
|
end
|
8
8
|
|
9
|
-
#
|
9
|
+
# Compute aggregations for each group of a group by operation.
|
10
|
+
#
|
11
|
+
# @param aggs [Array]
|
12
|
+
# Aggregations to compute for each group of the group by operation,
|
13
|
+
# specified as positional arguments.
|
14
|
+
# Accepts expression input. Strings are parsed as column names.
|
15
|
+
# @param named_aggs [Hash]
|
16
|
+
# Additional aggregations, specified as keyword arguments.
|
17
|
+
# The resulting columns will be renamed to the keyword used.
|
10
18
|
#
|
11
19
|
# @return [LazyFrame]
|
12
|
-
|
13
|
-
|
20
|
+
#
|
21
|
+
# @example Compute the aggregation of the columns for each group.
|
22
|
+
# ldf = Polars::DataFrame.new(
|
23
|
+
# {
|
24
|
+
# "a" => ["a", "b", "a", "b", "c"],
|
25
|
+
# "b" => [1, 2, 1, 3, 3],
|
26
|
+
# "c" => [5, 4, 3, 2, 1]
|
27
|
+
# }
|
28
|
+
# ).lazy
|
29
|
+
# ldf.group_by("a").agg(
|
30
|
+
# [Polars.col("b"), Polars.col("c")]
|
31
|
+
# ).collect
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌─────┬───────────┬───────────┐
|
35
|
+
# # │ a ┆ b ┆ c │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ list[i64] ┆ list[i64] │
|
38
|
+
# # ╞═════╪═══════════╪═══════════╡
|
39
|
+
# # │ a ┆ [1, 1] ┆ [5, 3] │
|
40
|
+
# # │ b ┆ [2, 3] ┆ [4, 2] │
|
41
|
+
# # │ c ┆ [3] ┆ [1] │
|
42
|
+
# # └─────┴───────────┴───────────┘
|
43
|
+
#
|
44
|
+
# @example Compute the sum of a column for each group.
|
45
|
+
# ldf.group_by("a").agg(
|
46
|
+
# Polars.col("b").sum
|
47
|
+
# ).collect
|
48
|
+
# # =>
|
49
|
+
# # shape: (3, 2)
|
50
|
+
# # ┌─────┬─────┐
|
51
|
+
# # │ a ┆ b │
|
52
|
+
# # │ --- ┆ --- │
|
53
|
+
# # │ str ┆ i64 │
|
54
|
+
# # ╞═════╪═════╡
|
55
|
+
# # │ a ┆ 2 │
|
56
|
+
# # │ b ┆ 5 │
|
57
|
+
# # │ c ┆ 3 │
|
58
|
+
# # └─────┴─────┘
|
59
|
+
#
|
60
|
+
# @example Compute multiple aggregates at once by passing a list of expressions.
|
61
|
+
# ldf.group_by("a").agg(
|
62
|
+
# [Polars.sum("b"), Polars.mean("c")]
|
63
|
+
# ).collect
|
64
|
+
# # =>
|
65
|
+
# # shape: (3, 3)
|
66
|
+
# # ┌─────┬─────┬─────┐
|
67
|
+
# # │ a ┆ b ┆ c │
|
68
|
+
# # │ --- ┆ --- ┆ --- │
|
69
|
+
# # │ str ┆ i64 ┆ f64 │
|
70
|
+
# # ╞═════╪═════╪═════╡
|
71
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
72
|
+
# # │ a ┆ 2 ┆ 4.0 │
|
73
|
+
# # │ b ┆ 5 ┆ 3.0 │
|
74
|
+
# # └─────┴─────┴─────┘
|
75
|
+
#
|
76
|
+
# @example Or use positional arguments to compute multiple aggregations in the same way.
|
77
|
+
# ldf.group_by("a").agg(
|
78
|
+
# Polars.sum("b").name.suffix("_sum"),
|
79
|
+
# (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
|
80
|
+
# ).collect
|
81
|
+
# # =>
|
82
|
+
# # shape: (3, 3)
|
83
|
+
# # ┌─────┬───────┬────────────────┐
|
84
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
85
|
+
# # │ --- ┆ --- ┆ --- │
|
86
|
+
# # │ str ┆ i64 ┆ f64 │
|
87
|
+
# # ╞═════╪═══════╪════════════════╡
|
88
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
89
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
90
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
91
|
+
# # └─────┴───────┴────────────────┘
|
92
|
+
#
|
93
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
94
|
+
# ldf.group_by("a").agg(
|
95
|
+
# b_sum: Polars.sum("b"),
|
96
|
+
# c_mean_squared: (Polars.col("c") ** 2).mean
|
97
|
+
# ).collect
|
98
|
+
# # =>
|
99
|
+
# # shape: (3, 3)
|
100
|
+
# # ┌─────┬───────┬────────────────┐
|
101
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
102
|
+
# # │ --- ┆ --- ┆ --- │
|
103
|
+
# # │ str ┆ i64 ┆ f64 │
|
104
|
+
# # ╞═════╪═══════╪════════════════╡
|
105
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
106
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
107
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
108
|
+
# # └─────┴───────┴────────────────┘
|
109
|
+
def agg(*aggs, **named_aggs)
|
110
|
+
rbexprs = Utils.parse_as_list_of_expressions(*aggs, **named_aggs)
|
14
111
|
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
15
112
|
end
|
16
113
|
|
data/lib/polars/list_expr.rb
CHANGED
@@ -365,6 +365,10 @@ module Polars
|
|
365
365
|
#
|
366
366
|
# @param index [Integer]
|
367
367
|
# Index to return per sublist
|
368
|
+
# @param null_on_oob [Boolean]
|
369
|
+
# Behavior if an index is out of bounds:
|
370
|
+
# true -> set as null
|
371
|
+
# false -> raise an error
|
368
372
|
#
|
369
373
|
# @return [Expr]
|
370
374
|
#
|
@@ -382,9 +386,9 @@ module Polars
|
|
382
386
|
# # │ null │
|
383
387
|
# # │ 1 │
|
384
388
|
# # └──────┘
|
385
|
-
def get(index)
|
389
|
+
def get(index, null_on_oob: true)
|
386
390
|
index = Utils.parse_as_expression(index)
|
387
|
-
Utils.wrap_expr(_rbexpr.list_get(index))
|
391
|
+
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
388
392
|
end
|
389
393
|
|
390
394
|
# Get the value by index in the sublists.
|
@@ -25,12 +25,12 @@ module Polars
|
|
25
25
|
@check_sorted = check_sorted
|
26
26
|
end
|
27
27
|
|
28
|
-
def agg(aggs)
|
28
|
+
def agg(*aggs, **named_aggs)
|
29
29
|
@df.lazy
|
30
30
|
.group_by_rolling(
|
31
31
|
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
|
32
32
|
)
|
33
|
-
.agg(aggs)
|
33
|
+
.agg(*aggs, **named_aggs)
|
34
34
|
.collect(no_optimization: true, string_cache: false)
|
35
35
|
end
|
36
36
|
end
|
data/lib/polars/series.rb
CHANGED
@@ -1155,13 +1155,13 @@ module Polars
|
|
1155
1155
|
# s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
|
1156
1156
|
# # =>
|
1157
1157
|
# # shape: (5,)
|
1158
|
-
# # Series: 'values' [
|
1158
|
+
# # Series: 'values' [i64]
|
1159
1159
|
# # [
|
1160
|
-
# # 0
|
1161
|
-
# # -3
|
1162
|
-
# # -8
|
1163
|
-
# # -15
|
1164
|
-
# # -24
|
1160
|
+
# # 0
|
1161
|
+
# # -3
|
1162
|
+
# # -8
|
1163
|
+
# # -15
|
1164
|
+
# # -24
|
1165
1165
|
# # ]
|
1166
1166
|
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
1167
1167
|
super
|
@@ -1567,12 +1567,12 @@ module Polars
|
|
1567
1567
|
# # 2
|
1568
1568
|
# # 1
|
1569
1569
|
# # ]
|
1570
|
-
def sort(reverse: false, nulls_last: false, in_place: false)
|
1570
|
+
def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
|
1571
1571
|
if in_place
|
1572
|
-
self._s = _s.sort(reverse, nulls_last)
|
1572
|
+
self._s = _s.sort(reverse, nulls_last, multithreaded)
|
1573
1573
|
self
|
1574
1574
|
else
|
1575
|
-
Utils.wrap_s(_s.sort(reverse, nulls_last))
|
1575
|
+
Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
|
1576
1576
|
end
|
1577
1577
|
end
|
1578
1578
|
|
@@ -1594,7 +1594,7 @@ module Polars
|
|
1594
1594
|
# # 4
|
1595
1595
|
# # 3
|
1596
1596
|
# # ]
|
1597
|
-
def top_k(k: 5)
|
1597
|
+
def top_k(k: 5, nulls_last: false, multithreaded: true)
|
1598
1598
|
super
|
1599
1599
|
end
|
1600
1600
|
|
@@ -1616,7 +1616,7 @@ module Polars
|
|
1616
1616
|
# # 2
|
1617
1617
|
# # 3
|
1618
1618
|
# # ]
|
1619
|
-
def bottom_k(k: 5)
|
1619
|
+
def bottom_k(k: 5, nulls_last: false, multithreaded: true)
|
1620
1620
|
super
|
1621
1621
|
end
|
1622
1622
|
|
@@ -3646,6 +3646,8 @@ module Polars
|
|
3646
3646
|
# on the order that the values occur in the Series.
|
3647
3647
|
# @param reverse [Boolean]
|
3648
3648
|
# Reverse the operation.
|
3649
|
+
# @param seed [Integer]
|
3650
|
+
# If `method: "random"`, use this as seed.
|
3649
3651
|
#
|
3650
3652
|
# @return [Series]
|
3651
3653
|
#
|
@@ -3676,7 +3678,7 @@ module Polars
|
|
3676
3678
|
# # 2
|
3677
3679
|
# # 5
|
3678
3680
|
# # ]
|
3679
|
-
def rank(method: "average", reverse: false)
|
3681
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3680
3682
|
super
|
3681
3683
|
end
|
3682
3684
|
|