polars-df 0.9.0-arm64-darwin → 0.11.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +144 -57
- data/LICENSE-THIRD-PARTY.txt +629 -29
- data/README.md +7 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +11 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +225 -370
- data/lib/polars/date_time_expr.rb +11 -4
- data/lib/polars/date_time_name_space.rb +14 -4
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1171 -54
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +307 -489
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +55 -195
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +14 -12
- data/lib/polars/string_expr.rb +38 -36
- data/lib/polars/utils.rb +89 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +10 -3
- metadata +13 -6
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/lazy_frame.rb
CHANGED
@@ -27,145 +27,6 @@ module Polars
|
|
27
27
|
ldf
|
28
28
|
end
|
29
29
|
|
30
|
-
# @private
|
31
|
-
def self._scan_csv(
|
32
|
-
file,
|
33
|
-
has_header: true,
|
34
|
-
sep: ",",
|
35
|
-
comment_char: nil,
|
36
|
-
quote_char: '"',
|
37
|
-
skip_rows: 0,
|
38
|
-
dtypes: nil,
|
39
|
-
null_values: nil,
|
40
|
-
ignore_errors: false,
|
41
|
-
cache: true,
|
42
|
-
with_column_names: nil,
|
43
|
-
infer_schema_length: 100,
|
44
|
-
n_rows: nil,
|
45
|
-
encoding: "utf8",
|
46
|
-
low_memory: false,
|
47
|
-
rechunk: true,
|
48
|
-
skip_rows_after_header: 0,
|
49
|
-
row_count_name: nil,
|
50
|
-
row_count_offset: 0,
|
51
|
-
parse_dates: false,
|
52
|
-
eol_char: "\n"
|
53
|
-
)
|
54
|
-
dtype_list = nil
|
55
|
-
if !dtypes.nil?
|
56
|
-
dtype_list = []
|
57
|
-
dtypes.each do |k, v|
|
58
|
-
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
59
|
-
end
|
60
|
-
end
|
61
|
-
processed_null_values = Utils._process_null_values(null_values)
|
62
|
-
|
63
|
-
_from_rbldf(
|
64
|
-
RbLazyFrame.new_from_csv(
|
65
|
-
file,
|
66
|
-
sep,
|
67
|
-
has_header,
|
68
|
-
ignore_errors,
|
69
|
-
skip_rows,
|
70
|
-
n_rows,
|
71
|
-
cache,
|
72
|
-
dtype_list,
|
73
|
-
low_memory,
|
74
|
-
comment_char,
|
75
|
-
quote_char,
|
76
|
-
processed_null_values,
|
77
|
-
infer_schema_length,
|
78
|
-
with_column_names,
|
79
|
-
rechunk,
|
80
|
-
skip_rows_after_header,
|
81
|
-
encoding,
|
82
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
83
|
-
parse_dates,
|
84
|
-
eol_char
|
85
|
-
)
|
86
|
-
)
|
87
|
-
end
|
88
|
-
|
89
|
-
# @private
|
90
|
-
def self._scan_parquet(
|
91
|
-
file,
|
92
|
-
n_rows: nil,
|
93
|
-
cache: true,
|
94
|
-
parallel: "auto",
|
95
|
-
rechunk: true,
|
96
|
-
row_count_name: nil,
|
97
|
-
row_count_offset: 0,
|
98
|
-
storage_options: nil,
|
99
|
-
low_memory: false,
|
100
|
-
use_statistics: true,
|
101
|
-
hive_partitioning: true
|
102
|
-
)
|
103
|
-
_from_rbldf(
|
104
|
-
RbLazyFrame.new_from_parquet(
|
105
|
-
file,
|
106
|
-
n_rows,
|
107
|
-
cache,
|
108
|
-
parallel,
|
109
|
-
rechunk,
|
110
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
111
|
-
low_memory,
|
112
|
-
use_statistics,
|
113
|
-
hive_partitioning
|
114
|
-
)
|
115
|
-
)
|
116
|
-
end
|
117
|
-
|
118
|
-
# @private
|
119
|
-
def self._scan_ipc(
|
120
|
-
file,
|
121
|
-
n_rows: nil,
|
122
|
-
cache: true,
|
123
|
-
rechunk: true,
|
124
|
-
row_count_name: nil,
|
125
|
-
row_count_offset: 0,
|
126
|
-
storage_options: nil,
|
127
|
-
memory_map: true
|
128
|
-
)
|
129
|
-
if Utils.pathlike?(file)
|
130
|
-
file = Utils.normalise_filepath(file)
|
131
|
-
end
|
132
|
-
|
133
|
-
_from_rbldf(
|
134
|
-
RbLazyFrame.new_from_ipc(
|
135
|
-
file,
|
136
|
-
n_rows,
|
137
|
-
cache,
|
138
|
-
rechunk,
|
139
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
140
|
-
memory_map
|
141
|
-
)
|
142
|
-
)
|
143
|
-
end
|
144
|
-
|
145
|
-
# @private
|
146
|
-
def self._scan_ndjson(
|
147
|
-
file,
|
148
|
-
infer_schema_length: nil,
|
149
|
-
batch_size: nil,
|
150
|
-
n_rows: nil,
|
151
|
-
low_memory: false,
|
152
|
-
rechunk: true,
|
153
|
-
row_count_name: nil,
|
154
|
-
row_count_offset: 0
|
155
|
-
)
|
156
|
-
_from_rbldf(
|
157
|
-
RbLazyFrame.new_from_ndjson(
|
158
|
-
file,
|
159
|
-
infer_schema_length,
|
160
|
-
batch_size,
|
161
|
-
n_rows,
|
162
|
-
low_memory,
|
163
|
-
rechunk,
|
164
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset)
|
165
|
-
)
|
166
|
-
)
|
167
|
-
end
|
168
|
-
|
169
30
|
# def self.from_json
|
170
31
|
# end
|
171
32
|
|
@@ -177,7 +38,7 @@ module Polars
|
|
177
38
|
# @return [LazyFrame]
|
178
39
|
def self.read_json(file)
|
179
40
|
if Utils.pathlike?(file)
|
180
|
-
file = Utils.
|
41
|
+
file = Utils.normalize_filepath(file)
|
181
42
|
end
|
182
43
|
|
183
44
|
Utils.wrap_ldf(RbLazyFrame.read_json(file))
|
@@ -284,7 +145,7 @@ module Polars
|
|
284
145
|
# @return [nil]
|
285
146
|
def write_json(file)
|
286
147
|
if Utils.pathlike?(file)
|
287
|
-
file = Utils.
|
148
|
+
file = Utils.normalize_filepath(file)
|
288
149
|
end
|
289
150
|
_ldf.write_json(file)
|
290
151
|
nil
|
@@ -400,16 +261,16 @@ module Polars
|
|
400
261
|
# # │ 2 ┆ 7.0 ┆ b │
|
401
262
|
# # │ 1 ┆ 6.0 ┆ a │
|
402
263
|
# # └─────┴─────┴─────┘
|
403
|
-
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
264
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
404
265
|
if by.is_a?(::String)
|
405
|
-
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
266
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
|
406
267
|
end
|
407
268
|
if Utils.bool?(reverse)
|
408
269
|
reverse = [reverse]
|
409
270
|
end
|
410
271
|
|
411
272
|
by = Utils.selection_to_rbexpr_list(by)
|
412
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
273
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
|
413
274
|
end
|
414
275
|
|
415
276
|
# def profile
|
@@ -1133,7 +994,7 @@ module Polars
|
|
1133
994
|
# # ┌─────────┐
|
1134
995
|
# # │ literal │
|
1135
996
|
# # │ --- │
|
1136
|
-
# # │
|
997
|
+
# # │ i32 │
|
1137
998
|
# # ╞═════════╡
|
1138
999
|
# # │ 0 │
|
1139
1000
|
# # │ 0 │
|
@@ -1445,16 +1306,16 @@ module Polars
|
|
1445
1306
|
# )
|
1446
1307
|
# # =>
|
1447
1308
|
# # shape: (4, 3)
|
1448
|
-
# #
|
1449
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
1450
|
-
# # │ --- ┆ --- ┆ ---
|
1451
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
1452
|
-
# #
|
1453
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12
|
1454
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12
|
1455
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12
|
1456
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
1457
|
-
# #
|
1309
|
+
# # ┌─────────────────────┬────────────┬─────────────────────────────────┐
|
1310
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1311
|
+
# # │ --- ┆ --- ┆ --- │
|
1312
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1313
|
+
# # ╞═════════════════════╪════════════╪═════════════════════════════════╡
|
1314
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-… │
|
1315
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-… │
|
1316
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-… │
|
1317
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1318
|
+
# # └─────────────────────┴────────────┴─────────────────────────────────┘
|
1458
1319
|
#
|
1459
1320
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1460
1321
|
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -1523,12 +1384,13 @@ module Polars
|
|
1523
1384
|
# closed: "right"
|
1524
1385
|
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1525
1386
|
# # =>
|
1526
|
-
# # shape: (
|
1387
|
+
# # shape: (4, 4)
|
1527
1388
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
1528
1389
|
# # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
|
1529
1390
|
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1530
1391
|
# # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
|
1531
1392
|
# # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
|
1393
|
+
# # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
|
1532
1394
|
# # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
|
1533
1395
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
1534
1396
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
@@ -1837,7 +1699,7 @@ module Polars
|
|
1837
1699
|
if how == "cross"
|
1838
1700
|
return _from_rbldf(
|
1839
1701
|
_ldf.join(
|
1840
|
-
other._ldf, [], [], allow_parallel, force_parallel, how, suffix
|
1702
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
1841
1703
|
)
|
1842
1704
|
)
|
1843
1705
|
end
|
@@ -1891,16 +1753,16 @@ module Polars
|
|
1891
1753
|
# ).collect
|
1892
1754
|
# # =>
|
1893
1755
|
# # shape: (4, 6)
|
1894
|
-
# #
|
1895
|
-
# # │ a ┆ b ┆ c ┆ a^2
|
1896
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1897
|
-
# # │ i64 ┆ f64 ┆ bool ┆
|
1898
|
-
# #
|
1899
|
-
# # │ 1 ┆ 0.5 ┆ true ┆ 1
|
1900
|
-
# # │ 2 ┆ 4.0 ┆ true ┆ 4
|
1901
|
-
# # │ 3 ┆ 10.0 ┆ false ┆ 9
|
1902
|
-
# # │ 4 ┆ 13.0 ┆ true ┆ 16
|
1903
|
-
# #
|
1756
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
1757
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
1758
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1759
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
1760
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
1761
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
1762
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
1763
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
1764
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
1765
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
1904
1766
|
def with_columns(*exprs, **named_exprs)
|
1905
1767
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1906
1768
|
rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
|
@@ -1965,26 +1827,26 @@ module Polars
|
|
1965
1827
|
# # ┌─────┬─────┬───────────┐
|
1966
1828
|
# # │ a ┆ b ┆ b_squared │
|
1967
1829
|
# # │ --- ┆ --- ┆ --- │
|
1968
|
-
# # │ i64 ┆ i64 ┆
|
1830
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
1969
1831
|
# # ╞═════╪═════╪═══════════╡
|
1970
|
-
# # │ 1 ┆ 2 ┆ 4
|
1971
|
-
# # │ 3 ┆ 4 ┆ 16
|
1972
|
-
# # │ 5 ┆ 6 ┆ 36
|
1832
|
+
# # │ 1 ┆ 2 ┆ 4 │
|
1833
|
+
# # │ 3 ┆ 4 ┆ 16 │
|
1834
|
+
# # │ 5 ┆ 6 ┆ 36 │
|
1973
1835
|
# # └─────┴─────┴───────────┘
|
1974
1836
|
#
|
1975
1837
|
# @example
|
1976
1838
|
# df.with_column(Polars.col("a") ** 2).collect
|
1977
1839
|
# # =>
|
1978
1840
|
# # shape: (3, 2)
|
1979
|
-
# #
|
1980
|
-
# # │ a
|
1981
|
-
# # │ ---
|
1982
|
-
# # │
|
1983
|
-
# #
|
1984
|
-
# # │ 1
|
1985
|
-
# # │ 9
|
1986
|
-
# # │ 25
|
1987
|
-
# #
|
1841
|
+
# # ┌─────┬─────┐
|
1842
|
+
# # │ a ┆ b │
|
1843
|
+
# # │ --- ┆ --- │
|
1844
|
+
# # │ i64 ┆ i64 │
|
1845
|
+
# # ╞═════╪═════╡
|
1846
|
+
# # │ 1 ┆ 2 │
|
1847
|
+
# # │ 9 ┆ 4 │
|
1848
|
+
# # │ 25 ┆ 6 │
|
1849
|
+
# # └─────┴─────┘
|
1988
1850
|
def with_column(column)
|
1989
1851
|
with_columns([column])
|
1990
1852
|
end
|
@@ -1996,11 +1858,9 @@ module Polars
|
|
1996
1858
|
# - List of column names.
|
1997
1859
|
#
|
1998
1860
|
# @return [LazyFrame]
|
1999
|
-
def drop(columns)
|
2000
|
-
|
2001
|
-
|
2002
|
-
end
|
2003
|
-
_from_rbldf(_ldf.drop(columns))
|
1861
|
+
def drop(*columns)
|
1862
|
+
drop_cols = Utils._expand_selectors(self, *columns)
|
1863
|
+
_from_rbldf(_ldf.drop(drop_cols))
|
2004
1864
|
end
|
2005
1865
|
|
2006
1866
|
# Rename column names.
|
@@ -2233,16 +2093,16 @@ module Polars
|
|
2233
2093
|
# df.with_row_index.collect
|
2234
2094
|
# # =>
|
2235
2095
|
# # shape: (3, 3)
|
2236
|
-
# #
|
2237
|
-
# # │
|
2238
|
-
# # │ ---
|
2239
|
-
# # │ u32
|
2240
|
-
# #
|
2241
|
-
# # │ 0
|
2242
|
-
# # │ 1
|
2243
|
-
# # │ 2
|
2244
|
-
# #
|
2245
|
-
def with_row_index(name: "
|
2096
|
+
# # ┌───────┬─────┬─────┐
|
2097
|
+
# # │ index ┆ a ┆ b │
|
2098
|
+
# # │ --- ┆ --- ┆ --- │
|
2099
|
+
# # │ u32 ┆ i64 ┆ i64 │
|
2100
|
+
# # ╞═══════╪═════╪═════╡
|
2101
|
+
# # │ 0 ┆ 1 ┆ 2 │
|
2102
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
2103
|
+
# # │ 2 ┆ 5 ┆ 6 │
|
2104
|
+
# # └───────┴─────┴─────┘
|
2105
|
+
def with_row_index(name: "index", offset: 0)
|
2246
2106
|
_from_rbldf(_ldf.with_row_index(name, offset))
|
2247
2107
|
end
|
2248
2108
|
alias_method :with_row_count, :with_row_index
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -6,11 +6,108 @@ module Polars
|
|
6
6
|
@lgb = lgb
|
7
7
|
end
|
8
8
|
|
9
|
-
#
|
9
|
+
# Compute aggregations for each group of a group by operation.
|
10
|
+
#
|
11
|
+
# @param aggs [Array]
|
12
|
+
# Aggregations to compute for each group of the group by operation,
|
13
|
+
# specified as positional arguments.
|
14
|
+
# Accepts expression input. Strings are parsed as column names.
|
15
|
+
# @param named_aggs [Hash]
|
16
|
+
# Additional aggregations, specified as keyword arguments.
|
17
|
+
# The resulting columns will be renamed to the keyword used.
|
10
18
|
#
|
11
19
|
# @return [LazyFrame]
|
12
|
-
|
13
|
-
|
20
|
+
#
|
21
|
+
# @example Compute the aggregation of the columns for each group.
|
22
|
+
# ldf = Polars::DataFrame.new(
|
23
|
+
# {
|
24
|
+
# "a" => ["a", "b", "a", "b", "c"],
|
25
|
+
# "b" => [1, 2, 1, 3, 3],
|
26
|
+
# "c" => [5, 4, 3, 2, 1]
|
27
|
+
# }
|
28
|
+
# ).lazy
|
29
|
+
# ldf.group_by("a").agg(
|
30
|
+
# [Polars.col("b"), Polars.col("c")]
|
31
|
+
# ).collect
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌─────┬───────────┬───────────┐
|
35
|
+
# # │ a ┆ b ┆ c │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ list[i64] ┆ list[i64] │
|
38
|
+
# # ╞═════╪═══════════╪═══════════╡
|
39
|
+
# # │ a ┆ [1, 1] ┆ [5, 3] │
|
40
|
+
# # │ b ┆ [2, 3] ┆ [4, 2] │
|
41
|
+
# # │ c ┆ [3] ┆ [1] │
|
42
|
+
# # └─────┴───────────┴───────────┘
|
43
|
+
#
|
44
|
+
# @example Compute the sum of a column for each group.
|
45
|
+
# ldf.group_by("a").agg(
|
46
|
+
# Polars.col("b").sum
|
47
|
+
# ).collect
|
48
|
+
# # =>
|
49
|
+
# # shape: (3, 2)
|
50
|
+
# # ┌─────┬─────┐
|
51
|
+
# # │ a ┆ b │
|
52
|
+
# # │ --- ┆ --- │
|
53
|
+
# # │ str ┆ i64 │
|
54
|
+
# # ╞═════╪═════╡
|
55
|
+
# # │ a ┆ 2 │
|
56
|
+
# # │ b ┆ 5 │
|
57
|
+
# # │ c ┆ 3 │
|
58
|
+
# # └─────┴─────┘
|
59
|
+
#
|
60
|
+
# @example Compute multiple aggregates at once by passing a list of expressions.
|
61
|
+
# ldf.group_by("a").agg(
|
62
|
+
# [Polars.sum("b"), Polars.mean("c")]
|
63
|
+
# ).collect
|
64
|
+
# # =>
|
65
|
+
# # shape: (3, 3)
|
66
|
+
# # ┌─────┬─────┬─────┐
|
67
|
+
# # │ a ┆ b ┆ c │
|
68
|
+
# # │ --- ┆ --- ┆ --- │
|
69
|
+
# # │ str ┆ i64 ┆ f64 │
|
70
|
+
# # ╞═════╪═════╪═════╡
|
71
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
72
|
+
# # │ a ┆ 2 ┆ 4.0 │
|
73
|
+
# # │ b ┆ 5 ┆ 3.0 │
|
74
|
+
# # └─────┴─────┴─────┘
|
75
|
+
#
|
76
|
+
# @example Or use positional arguments to compute multiple aggregations in the same way.
|
77
|
+
# ldf.group_by("a").agg(
|
78
|
+
# Polars.sum("b").name.suffix("_sum"),
|
79
|
+
# (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
|
80
|
+
# ).collect
|
81
|
+
# # =>
|
82
|
+
# # shape: (3, 3)
|
83
|
+
# # ┌─────┬───────┬────────────────┐
|
84
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
85
|
+
# # │ --- ┆ --- ┆ --- │
|
86
|
+
# # │ str ┆ i64 ┆ f64 │
|
87
|
+
# # ╞═════╪═══════╪════════════════╡
|
88
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
89
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
90
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
91
|
+
# # └─────┴───────┴────────────────┘
|
92
|
+
#
|
93
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
94
|
+
# ldf.group_by("a").agg(
|
95
|
+
# b_sum: Polars.sum("b"),
|
96
|
+
# c_mean_squared: (Polars.col("c") ** 2).mean
|
97
|
+
# ).collect
|
98
|
+
# # =>
|
99
|
+
# # shape: (3, 3)
|
100
|
+
# # ┌─────┬───────┬────────────────┐
|
101
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
102
|
+
# # │ --- ┆ --- ┆ --- │
|
103
|
+
# # │ str ┆ i64 ┆ f64 │
|
104
|
+
# # ╞═════╪═══════╪════════════════╡
|
105
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
106
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
107
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
108
|
+
# # └─────┴───────┴────────────────┘
|
109
|
+
def agg(*aggs, **named_aggs)
|
110
|
+
rbexprs = Utils.parse_as_list_of_expressions(*aggs, **named_aggs)
|
14
111
|
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
15
112
|
end
|
16
113
|
|
data/lib/polars/list_expr.rb
CHANGED
@@ -365,6 +365,10 @@ module Polars
|
|
365
365
|
#
|
366
366
|
# @param index [Integer]
|
367
367
|
# Index to return per sublist
|
368
|
+
# @param null_on_oob [Boolean]
|
369
|
+
# Behavior if an index is out of bounds:
|
370
|
+
# true -> set as null
|
371
|
+
# false -> raise an error
|
368
372
|
#
|
369
373
|
# @return [Expr]
|
370
374
|
#
|
@@ -382,9 +386,9 @@ module Polars
|
|
382
386
|
# # │ null │
|
383
387
|
# # │ 1 │
|
384
388
|
# # └──────┘
|
385
|
-
def get(index)
|
389
|
+
def get(index, null_on_oob: true)
|
386
390
|
index = Utils.parse_as_expression(index)
|
387
|
-
Utils.wrap_expr(_rbexpr.list_get(index))
|
391
|
+
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
388
392
|
end
|
389
393
|
|
390
394
|
# Get the value by index in the sublists.
|
@@ -25,12 +25,12 @@ module Polars
|
|
25
25
|
@check_sorted = check_sorted
|
26
26
|
end
|
27
27
|
|
28
|
-
def agg(aggs)
|
28
|
+
def agg(*aggs, **named_aggs)
|
29
29
|
@df.lazy
|
30
30
|
.group_by_rolling(
|
31
31
|
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
|
32
32
|
)
|
33
|
-
.agg(aggs)
|
33
|
+
.agg(*aggs, **named_aggs)
|
34
34
|
.collect(no_optimization: true, string_cache: false)
|
35
35
|
end
|
36
36
|
end
|
data/lib/polars/series.rb
CHANGED
@@ -1155,13 +1155,13 @@ module Polars
|
|
1155
1155
|
# s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
|
1156
1156
|
# # =>
|
1157
1157
|
# # shape: (5,)
|
1158
|
-
# # Series: 'values' [
|
1158
|
+
# # Series: 'values' [i64]
|
1159
1159
|
# # [
|
1160
|
-
# # 0
|
1161
|
-
# # -3
|
1162
|
-
# # -8
|
1163
|
-
# # -15
|
1164
|
-
# # -24
|
1160
|
+
# # 0
|
1161
|
+
# # -3
|
1162
|
+
# # -8
|
1163
|
+
# # -15
|
1164
|
+
# # -24
|
1165
1165
|
# # ]
|
1166
1166
|
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
1167
1167
|
super
|
@@ -1567,12 +1567,12 @@ module Polars
|
|
1567
1567
|
# # 2
|
1568
1568
|
# # 1
|
1569
1569
|
# # ]
|
1570
|
-
def sort(reverse: false, nulls_last: false, in_place: false)
|
1570
|
+
def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
|
1571
1571
|
if in_place
|
1572
|
-
self._s = _s.sort(reverse, nulls_last)
|
1572
|
+
self._s = _s.sort(reverse, nulls_last, multithreaded)
|
1573
1573
|
self
|
1574
1574
|
else
|
1575
|
-
Utils.wrap_s(_s.sort(reverse, nulls_last))
|
1575
|
+
Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
|
1576
1576
|
end
|
1577
1577
|
end
|
1578
1578
|
|
@@ -1594,7 +1594,7 @@ module Polars
|
|
1594
1594
|
# # 4
|
1595
1595
|
# # 3
|
1596
1596
|
# # ]
|
1597
|
-
def top_k(k: 5)
|
1597
|
+
def top_k(k: 5, nulls_last: false, multithreaded: true)
|
1598
1598
|
super
|
1599
1599
|
end
|
1600
1600
|
|
@@ -1616,7 +1616,7 @@ module Polars
|
|
1616
1616
|
# # 2
|
1617
1617
|
# # 3
|
1618
1618
|
# # ]
|
1619
|
-
def bottom_k(k: 5)
|
1619
|
+
def bottom_k(k: 5, nulls_last: false, multithreaded: true)
|
1620
1620
|
super
|
1621
1621
|
end
|
1622
1622
|
|
@@ -3646,6 +3646,8 @@ module Polars
|
|
3646
3646
|
# on the order that the values occur in the Series.
|
3647
3647
|
# @param reverse [Boolean]
|
3648
3648
|
# Reverse the operation.
|
3649
|
+
# @param seed [Integer]
|
3650
|
+
# If `method: "random"`, use this as seed.
|
3649
3651
|
#
|
3650
3652
|
# @return [Series]
|
3651
3653
|
#
|
@@ -3676,7 +3678,7 @@ module Polars
|
|
3676
3678
|
# # 2
|
3677
3679
|
# # 5
|
3678
3680
|
# # ]
|
3679
|
-
def rank(method: "average", reverse: false)
|
3681
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3680
3682
|
super
|
3681
3683
|
end
|
3682
3684
|
|