polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
data/lib/polars/lazy_group_by.rb
CHANGED
|
@@ -175,7 +175,332 @@ module Polars
|
|
|
175
175
|
Utils.wrap_ldf(@lgb.tail(n))
|
|
176
176
|
end
|
|
177
177
|
|
|
178
|
-
#
|
|
179
|
-
#
|
|
178
|
+
# Aggregate the groups into Series.
|
|
179
|
+
#
|
|
180
|
+
# @return [LazyFrame]
|
|
181
|
+
#
|
|
182
|
+
# @example
|
|
183
|
+
# ldf = Polars::DataFrame.new(
|
|
184
|
+
# {
|
|
185
|
+
# "a" => ["one", "two", "one", "two"],
|
|
186
|
+
# "b" => [1, 2, 3, 4]
|
|
187
|
+
# }
|
|
188
|
+
# ).lazy
|
|
189
|
+
# ldf.group_by("a", maintain_order: true).all.collect
|
|
190
|
+
# # =>
|
|
191
|
+
# # shape: (2, 2)
|
|
192
|
+
# # ┌─────┬───────────┐
|
|
193
|
+
# # │ a ┆ b │
|
|
194
|
+
# # │ --- ┆ --- │
|
|
195
|
+
# # │ str ┆ list[i64] │
|
|
196
|
+
# # ╞═════╪═══════════╡
|
|
197
|
+
# # │ one ┆ [1, 3] │
|
|
198
|
+
# # │ two ┆ [2, 4] │
|
|
199
|
+
# # └─────┴───────────┘
|
|
200
|
+
def all
|
|
201
|
+
agg(F.all)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Return the number of rows in each group.
|
|
205
|
+
#
|
|
206
|
+
# @param name [String]
|
|
207
|
+
# Assign a name to the resulting column; if unset, defaults to "len".
|
|
208
|
+
#
|
|
209
|
+
# @return [LazyFrame]
|
|
210
|
+
#
|
|
211
|
+
# @example
|
|
212
|
+
# lf = Polars::LazyFrame.new({"a" => ["Apple", "Apple", "Orange"], "b" => [1, nil, 2]})
|
|
213
|
+
# lf.group_by("a").len.collect
|
|
214
|
+
# # =>
|
|
215
|
+
# # shape: (2, 2)
|
|
216
|
+
# # ┌────────┬─────┐
|
|
217
|
+
# # │ a ┆ len │
|
|
218
|
+
# # │ --- ┆ --- │
|
|
219
|
+
# # │ str ┆ u32 │
|
|
220
|
+
# # ╞════════╪═════╡
|
|
221
|
+
# # │ Apple ┆ 2 │
|
|
222
|
+
# # │ Orange ┆ 1 │
|
|
223
|
+
# # └────────┴─────┘
|
|
224
|
+
#
|
|
225
|
+
# @example
|
|
226
|
+
# lf.group_by("a").len(name: "n").collect
|
|
227
|
+
# # =>
|
|
228
|
+
# # shape: (2, 2)
|
|
229
|
+
# # ┌────────┬─────┐
|
|
230
|
+
# # │ a ┆ n │
|
|
231
|
+
# # │ --- ┆ --- │
|
|
232
|
+
# # │ str ┆ u32 │
|
|
233
|
+
# # ╞════════╪═════╡
|
|
234
|
+
# # │ Apple ┆ 2 │
|
|
235
|
+
# # │ Orange ┆ 1 │
|
|
236
|
+
# # └────────┴─────┘
|
|
237
|
+
def len(name: nil)
|
|
238
|
+
len_expr = F.len
|
|
239
|
+
if !name.nil?
|
|
240
|
+
len_expr = len_expr.alias(name)
|
|
241
|
+
end
|
|
242
|
+
agg(len_expr)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Aggregate the first values in the group.
|
|
246
|
+
#
|
|
247
|
+
# @return [LazyFrame]
|
|
248
|
+
#
|
|
249
|
+
# @example
|
|
250
|
+
# ldf = Polars::DataFrame.new(
|
|
251
|
+
# {
|
|
252
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
253
|
+
# "b" => [0.5, 0.5, 4, 10, 13, 14],
|
|
254
|
+
# "c" => [true, true, true, false, false, true],
|
|
255
|
+
# "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
|
|
256
|
+
# }
|
|
257
|
+
# ).lazy
|
|
258
|
+
# ldf.group_by("d", maintain_order: true).first.collect
|
|
259
|
+
# # =>
|
|
260
|
+
# # shape: (3, 4)
|
|
261
|
+
# # ┌────────┬─────┬──────┬───────┐
|
|
262
|
+
# # │ d ┆ a ┆ b ┆ c │
|
|
263
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
264
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
|
265
|
+
# # ╞════════╪═════╪══════╪═══════╡
|
|
266
|
+
# # │ Apple ┆ 1 ┆ 0.5 ┆ true │
|
|
267
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
268
|
+
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
|
269
|
+
# # └────────┴─────┴──────┴───────┘
|
|
270
|
+
def first
|
|
271
|
+
agg(F.all.first)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Aggregate the last values in the group.
|
|
275
|
+
#
|
|
276
|
+
# @return [LazyFrame]
|
|
277
|
+
#
|
|
278
|
+
# @example
|
|
279
|
+
# ldf = Polars::DataFrame.new(
|
|
280
|
+
# {
|
|
281
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
282
|
+
# "b" => [0.5, 0.5, 4, 10, 14, 13],
|
|
283
|
+
# "c" => [true, true, true, false, false, true],
|
|
284
|
+
# "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
|
|
285
|
+
# }
|
|
286
|
+
# ).lazy
|
|
287
|
+
# ldf.group_by("d", maintain_order: true).last.collect
|
|
288
|
+
# # =>
|
|
289
|
+
# # shape: (3, 4)
|
|
290
|
+
# # ┌────────┬─────┬──────┬───────┐
|
|
291
|
+
# # │ d ┆ a ┆ b ┆ c │
|
|
292
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
293
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
|
294
|
+
# # ╞════════╪═════╪══════╪═══════╡
|
|
295
|
+
# # │ Apple ┆ 3 ┆ 10.0 ┆ false │
|
|
296
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
297
|
+
# # │ Banana ┆ 5 ┆ 13.0 ┆ true │
|
|
298
|
+
# # └────────┴─────┴──────┴───────┘
|
|
299
|
+
def last
|
|
300
|
+
agg(F.all.last)
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Reduce the groups to the maximal value.
|
|
304
|
+
#
|
|
305
|
+
# @return [LazyFrame]
|
|
306
|
+
#
|
|
307
|
+
# @example
|
|
308
|
+
# ldf = Polars::DataFrame.new(
|
|
309
|
+
# {
|
|
310
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
311
|
+
# "b" => [0.5, 0.5, 4, 10, 13, 14],
|
|
312
|
+
# "c" => [true, true, true, false, false, true],
|
|
313
|
+
# "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
|
|
314
|
+
# }
|
|
315
|
+
# ).lazy
|
|
316
|
+
# ldf.group_by("d", maintain_order: true).max.collect
|
|
317
|
+
# # =>
|
|
318
|
+
# # shape: (3, 4)
|
|
319
|
+
# # ┌────────┬─────┬──────┬──────┐
|
|
320
|
+
# # │ d ┆ a ┆ b ┆ c │
|
|
321
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
322
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
|
323
|
+
# # ╞════════╪═════╪══════╪══════╡
|
|
324
|
+
# # │ Apple ┆ 3 ┆ 10.0 ┆ true │
|
|
325
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
326
|
+
# # │ Banana ┆ 5 ┆ 14.0 ┆ true │
|
|
327
|
+
# # └────────┴─────┴──────┴──────┘
|
|
328
|
+
def max
|
|
329
|
+
agg(F.all.max)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
# Reduce the groups to the mean values.
|
|
333
|
+
#
|
|
334
|
+
# @return [LazyFrame]
|
|
335
|
+
#
|
|
336
|
+
# @example
|
|
337
|
+
# ldf = Polars::DataFrame.new(
|
|
338
|
+
# {
|
|
339
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
340
|
+
# "b" => [0.5, 0.5, 4, 10, 13, 14],
|
|
341
|
+
# "c" => [true, true, true, false, false, true],
|
|
342
|
+
# "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
|
|
343
|
+
# }
|
|
344
|
+
# ).lazy
|
|
345
|
+
# ldf.group_by("d", maintain_order: true).mean.collect
|
|
346
|
+
# # =>
|
|
347
|
+
# # shape: (3, 4)
|
|
348
|
+
# # ┌────────┬─────┬──────────┬──────────┐
|
|
349
|
+
# # │ d ┆ a ┆ b ┆ c │
|
|
350
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
351
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 │
|
|
352
|
+
# # ╞════════╪═════╪══════════╪══════════╡
|
|
353
|
+
# # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
|
|
354
|
+
# # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
|
|
355
|
+
# # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
|
|
356
|
+
# # └────────┴─────┴──────────┴──────────┘
|
|
357
|
+
def mean
|
|
358
|
+
agg(F.all.mean)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Return the median per group.
|
|
362
|
+
#
|
|
363
|
+
# @return [LazyFrame]
|
|
364
|
+
#
|
|
365
|
+
# @example
|
|
366
|
+
# ldf = Polars::DataFrame.new(
|
|
367
|
+
# {
|
|
368
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
369
|
+
# "b" => [0.5, 0.5, 4, 10, 13, 14],
|
|
370
|
+
# "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
|
|
371
|
+
# }
|
|
372
|
+
# ).lazy
|
|
373
|
+
# ldf.group_by("d", maintain_order: true).median.collect
|
|
374
|
+
# # =>
|
|
375
|
+
# # shape: (2, 3)
|
|
376
|
+
# # ┌────────┬─────┬──────┐
|
|
377
|
+
# # │ d ┆ a ┆ b │
|
|
378
|
+
# # │ --- ┆ --- ┆ --- │
|
|
379
|
+
# # │ str ┆ f64 ┆ f64 │
|
|
380
|
+
# # ╞════════╪═════╪══════╡
|
|
381
|
+
# # │ Apple ┆ 2.0 ┆ 4.0 │
|
|
382
|
+
# # │ Banana ┆ 4.0 ┆ 13.0 │
|
|
383
|
+
# # └────────┴─────┴──────┘
|
|
384
|
+
def median
|
|
385
|
+
agg(F.all.median)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Reduce the groups to the minimal value.
|
|
389
|
+
#
|
|
390
|
+
# @return [LazyFrame]
|
|
391
|
+
#
|
|
392
|
+
# @example
|
|
393
|
+
# ldf = Polars::DataFrame.new(
|
|
394
|
+
# {
|
|
395
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
396
|
+
# "b" => [0.5, 0.5, 4, 10, 13, 14],
|
|
397
|
+
# "c" => [true, true, true, false, false, true],
|
|
398
|
+
# "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
|
|
399
|
+
# }
|
|
400
|
+
# ).lazy
|
|
401
|
+
# ldf.group_by("d", maintain_order: true).min.collect
|
|
402
|
+
# # =>
|
|
403
|
+
# # shape: (3, 4)
|
|
404
|
+
# # ┌────────┬─────┬──────┬───────┐
|
|
405
|
+
# # │ d ┆ a ┆ b ┆ c │
|
|
406
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
407
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
|
408
|
+
# # ╞════════╪═════╪══════╪═══════╡
|
|
409
|
+
# # │ Apple ┆ 1 ┆ 0.5 ┆ false │
|
|
410
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
411
|
+
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
|
412
|
+
# # └────────┴─────┴──────┴───────┘
|
|
413
|
+
def min
|
|
414
|
+
agg(F.all.min)
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Count the unique values per group.
|
|
418
|
+
#
|
|
419
|
+
# @return [LazyFrame]
|
|
420
|
+
#
|
|
421
|
+
# @example
|
|
422
|
+
# ldf = Polars::DataFrame.new(
|
|
423
|
+
# {
|
|
424
|
+
# "a" => [1, 2, 1, 3, 4, 5],
|
|
425
|
+
# "b" => [0.5, 0.5, 0.5, 10, 13, 14],
|
|
426
|
+
# "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
|
|
427
|
+
# }
|
|
428
|
+
# ).lazy
|
|
429
|
+
# ldf.group_by("d", maintain_order: true).n_unique.collect
|
|
430
|
+
# # =>
|
|
431
|
+
# # shape: (2, 3)
|
|
432
|
+
# # ┌────────┬─────┬─────┐
|
|
433
|
+
# # │ d ┆ a ┆ b │
|
|
434
|
+
# # │ --- ┆ --- ┆ --- │
|
|
435
|
+
# # │ str ┆ u32 ┆ u32 │
|
|
436
|
+
# # ╞════════╪═════╪═════╡
|
|
437
|
+
# # │ Apple ┆ 2 ┆ 2 │
|
|
438
|
+
# # │ Banana ┆ 3 ┆ 3 │
|
|
439
|
+
# # └────────┴─────┴─────┘
|
|
440
|
+
def n_unique
|
|
441
|
+
agg(F.all.n_unique)
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
# Compute the quantile per group.
|
|
445
|
+
#
|
|
446
|
+
# @param quantile [Float]
|
|
447
|
+
# Quantile between 0.0 and 1.0.
|
|
448
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
|
|
449
|
+
# Interpolation method.
|
|
450
|
+
#
|
|
451
|
+
# @return [LazyFrame]
|
|
452
|
+
#
|
|
453
|
+
# @example
|
|
454
|
+
# ldf = Polars::DataFrame.new(
|
|
455
|
+
# {
|
|
456
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
457
|
+
# "b" => [0.5, 0.5, 4, 10, 13, 14],
|
|
458
|
+
# "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
|
|
459
|
+
# }
|
|
460
|
+
# ).lazy
|
|
461
|
+
# ldf.group_by("d", maintain_order: true).quantile(1).collect
|
|
462
|
+
# # =>
|
|
463
|
+
# # shape: (3, 3)
|
|
464
|
+
# # ┌────────┬─────┬──────┐
|
|
465
|
+
# # │ d ┆ a ┆ b │
|
|
466
|
+
# # │ --- ┆ --- ┆ --- │
|
|
467
|
+
# # │ str ┆ f64 ┆ f64 │
|
|
468
|
+
# # ╞════════╪═════╪══════╡
|
|
469
|
+
# # │ Apple ┆ 3.0 ┆ 10.0 │
|
|
470
|
+
# # │ Orange ┆ 2.0 ┆ 0.5 │
|
|
471
|
+
# # │ Banana ┆ 5.0 ┆ 14.0 │
|
|
472
|
+
# # └────────┴─────┴──────┘
|
|
473
|
+
def quantile(quantile, interpolation: "nearest")
|
|
474
|
+
agg(F.all.quantile(quantile, interpolation: interpolation))
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
# Reduce the groups to the sum.
|
|
478
|
+
#
|
|
479
|
+
# @return [LazyFrame]
|
|
480
|
+
#
|
|
481
|
+
# @example
|
|
482
|
+
# ldf = Polars::DataFrame.new(
|
|
483
|
+
# {
|
|
484
|
+
# "a" => [1, 2, 2, 3, 4, 5],
|
|
485
|
+
# "b" => [0.5, 0.5, 4, 10, 13, 14],
|
|
486
|
+
# "c" => [true, true, true, false, false, true],
|
|
487
|
+
# "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
|
|
488
|
+
# }
|
|
489
|
+
# ).lazy
|
|
490
|
+
# ldf.group_by("d", maintain_order: true).sum.collect
|
|
491
|
+
# # =>
|
|
492
|
+
# # shape: (3, 4)
|
|
493
|
+
# # ┌────────┬─────┬──────┬─────┐
|
|
494
|
+
# # │ d ┆ a ┆ b ┆ c │
|
|
495
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
496
|
+
# # │ str ┆ i64 ┆ f64 ┆ u32 │
|
|
497
|
+
# # ╞════════╪═════╪══════╪═════╡
|
|
498
|
+
# # │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
|
|
499
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
|
|
500
|
+
# # │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
|
|
501
|
+
# # └────────┴─────┴──────┴─────┘
|
|
502
|
+
def sum
|
|
503
|
+
agg(F.all.sum)
|
|
504
|
+
end
|
|
180
505
|
end
|
|
181
506
|
end
|
data/lib/polars/list_expr.rb
CHANGED
|
@@ -69,7 +69,7 @@ module Polars
|
|
|
69
69
|
#
|
|
70
70
|
# @example
|
|
71
71
|
# df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
|
|
72
|
-
# df.select(Polars.col("bar").list.
|
|
72
|
+
# df.select(Polars.col("bar").list.len)
|
|
73
73
|
# # =>
|
|
74
74
|
# # shape: (2, 1)
|
|
75
75
|
# # ┌─────┐
|
|
@@ -83,7 +83,6 @@ module Polars
|
|
|
83
83
|
def len
|
|
84
84
|
Utils.wrap_expr(_rbexpr.list_len)
|
|
85
85
|
end
|
|
86
|
-
alias_method :lengths, :len
|
|
87
86
|
|
|
88
87
|
# Drop all null values in the list.
|
|
89
88
|
#
|
|
@@ -318,7 +317,7 @@ module Polars
|
|
|
318
317
|
|
|
319
318
|
# Sort the arrays in the list.
|
|
320
319
|
#
|
|
321
|
-
# @param
|
|
320
|
+
# @param descending [Boolean]
|
|
322
321
|
# Sort in descending order.
|
|
323
322
|
# @param nulls_last [Boolean]
|
|
324
323
|
# Place null values last.
|
|
@@ -342,8 +341,8 @@ module Polars
|
|
|
342
341
|
# # │ [1, 2, 3] │
|
|
343
342
|
# # │ [1, 2, 9] │
|
|
344
343
|
# # └───────────┘
|
|
345
|
-
def sort(
|
|
346
|
-
Utils.wrap_expr(_rbexpr.list_sort(
|
|
344
|
+
def sort(descending: false, nulls_last: false)
|
|
345
|
+
Utils.wrap_expr(_rbexpr.list_sort(descending, nulls_last))
|
|
347
346
|
end
|
|
348
347
|
|
|
349
348
|
# Reverse the arrays in the list.
|
|
@@ -477,7 +476,7 @@ module Polars
|
|
|
477
476
|
#
|
|
478
477
|
# @example
|
|
479
478
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
|
480
|
-
# df.select(Polars.col("foo").list.get(0))
|
|
479
|
+
# df.select(Polars.col("foo").list.get(0, null_on_oob: true))
|
|
481
480
|
# # =>
|
|
482
481
|
# # shape: (3, 1)
|
|
483
482
|
# # ┌──────┐
|
|
@@ -489,7 +488,7 @@ module Polars
|
|
|
489
488
|
# # │ null │
|
|
490
489
|
# # │ 1 │
|
|
491
490
|
# # └──────┘
|
|
492
|
-
def get(index, null_on_oob:
|
|
491
|
+
def get(index, null_on_oob: false)
|
|
493
492
|
index = Utils.parse_into_expression(index)
|
|
494
493
|
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
|
495
494
|
end
|
|
@@ -510,8 +509,8 @@ module Polars
|
|
|
510
509
|
# Indices to return per sublist
|
|
511
510
|
# @param null_on_oob [Boolean]
|
|
512
511
|
# Behavior if an index is out of bounds:
|
|
513
|
-
#
|
|
514
|
-
#
|
|
512
|
+
# true -> set as null
|
|
513
|
+
# false -> raise an error
|
|
515
514
|
# Note that defaulting to raising an error is much cheaper
|
|
516
515
|
#
|
|
517
516
|
# @return [Expr]
|
|
@@ -534,7 +533,6 @@ module Polars
|
|
|
534
533
|
indices = Utils.parse_into_expression(indices)
|
|
535
534
|
Utils.wrap_expr(_rbexpr.list_gather(indices, null_on_oob))
|
|
536
535
|
end
|
|
537
|
-
alias_method :take, :gather
|
|
538
536
|
|
|
539
537
|
# Take every n-th value start from offset in sublists.
|
|
540
538
|
#
|
|
@@ -597,7 +595,7 @@ module Polars
|
|
|
597
595
|
# # │ 1 │
|
|
598
596
|
# # └──────┘
|
|
599
597
|
def first
|
|
600
|
-
get(0)
|
|
598
|
+
get(0, null_on_oob: true)
|
|
601
599
|
end
|
|
602
600
|
|
|
603
601
|
# Get the last value of the sublists.
|
|
@@ -619,7 +617,49 @@ module Polars
|
|
|
619
617
|
# # │ 2 │
|
|
620
618
|
# # └──────┘
|
|
621
619
|
def last
|
|
622
|
-
get(-1)
|
|
620
|
+
get(-1, null_on_oob: true)
|
|
621
|
+
end
|
|
622
|
+
|
|
623
|
+
# Get the single value of the sublists.
|
|
624
|
+
#
|
|
625
|
+
# This errors if the sublist length is not exactly one.
|
|
626
|
+
#
|
|
627
|
+
# @param allow_empty [Boolean]
|
|
628
|
+
# Allow having no values to return `null`.
|
|
629
|
+
#
|
|
630
|
+
# @return [Expr]
|
|
631
|
+
#
|
|
632
|
+
# @example
|
|
633
|
+
# df = Polars::DataFrame.new({"a" => [[3], [1], [2]]})
|
|
634
|
+
# df.with_columns(item: Polars.col("a").list.item)
|
|
635
|
+
# # =>
|
|
636
|
+
# # shape: (3, 2)
|
|
637
|
+
# # ┌───────────┬──────┐
|
|
638
|
+
# # │ a ┆ item │
|
|
639
|
+
# # │ --- ┆ --- │
|
|
640
|
+
# # │ list[i64] ┆ i64 │
|
|
641
|
+
# # ╞═══════════╪══════╡
|
|
642
|
+
# # │ [3] ┆ 3 │
|
|
643
|
+
# # │ [1] ┆ 1 │
|
|
644
|
+
# # │ [2] ┆ 2 │
|
|
645
|
+
# # └───────────┴──────┘
|
|
646
|
+
#
|
|
647
|
+
# @example
|
|
648
|
+
# df = Polars::DataFrame.new({"a" => [[], [1], [2]]})
|
|
649
|
+
# df.select(Polars.col("a").list.item(allow_empty: true))
|
|
650
|
+
# # =>
|
|
651
|
+
# # shape: (3, 1)
|
|
652
|
+
# # ┌──────┐
|
|
653
|
+
# # │ a │
|
|
654
|
+
# # │ --- │
|
|
655
|
+
# # │ i64 │
|
|
656
|
+
# # ╞══════╡
|
|
657
|
+
# # │ null │
|
|
658
|
+
# # │ 1 │
|
|
659
|
+
# # │ 2 │
|
|
660
|
+
# # └──────┘
|
|
661
|
+
def item(allow_empty: false)
|
|
662
|
+
agg(F.element.item(allow_empty: allow_empty))
|
|
623
663
|
end
|
|
624
664
|
|
|
625
665
|
# Check if sublists contain the given item.
|
|
@@ -876,7 +916,7 @@ module Polars
|
|
|
876
916
|
#
|
|
877
917
|
# @example
|
|
878
918
|
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
|
879
|
-
# df.select(Polars.col("listcol").list.
|
|
919
|
+
# df.select(Polars.col("listcol").list.count_matches(2).alias("number_of_twos"))
|
|
880
920
|
# # =>
|
|
881
921
|
# # shape: (5, 1)
|
|
882
922
|
# # ┌────────────────┐
|
|
@@ -893,7 +933,6 @@ module Polars
|
|
|
893
933
|
def count_matches(element)
|
|
894
934
|
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
|
|
895
935
|
end
|
|
896
|
-
alias_method :count_match, :count_matches
|
|
897
936
|
|
|
898
937
|
# Convert a List column into an Array column with the same inner data type.
|
|
899
938
|
#
|
|
@@ -957,7 +996,7 @@ module Polars
|
|
|
957
996
|
# # │ [0, 1] ┆ {0,1} │
|
|
958
997
|
# # │ [0, 1, 2] ┆ {0,1} │
|
|
959
998
|
# # └───────────┴───────────┘
|
|
960
|
-
def to_struct(n_field_strategy:
|
|
999
|
+
def to_struct(n_field_strategy: nil, fields: nil, upper_bound: nil)
|
|
961
1000
|
if !fields.is_a?(::Array)
|
|
962
1001
|
if fields.nil?
|
|
963
1002
|
fields = upper_bound.times.map { |i| "field_#{i}" }
|
|
@@ -979,7 +1018,7 @@ module Polars
|
|
|
979
1018
|
#
|
|
980
1019
|
# @example
|
|
981
1020
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
|
982
|
-
# df.
|
|
1021
|
+
# df.with_columns(
|
|
983
1022
|
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
|
984
1023
|
# )
|
|
985
1024
|
# # =>
|
|
@@ -997,6 +1036,45 @@ module Polars
|
|
|
997
1036
|
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr))
|
|
998
1037
|
end
|
|
999
1038
|
|
|
1039
|
+
# Run any polars aggregation expression against the lists' elements.
|
|
1040
|
+
#
|
|
1041
|
+
# @param expr [Expr]
|
|
1042
|
+
# Expression to run. Note that you can select an element with `Polars.element`.
|
|
1043
|
+
#
|
|
1044
|
+
# @return [Expr]
|
|
1045
|
+
#
|
|
1046
|
+
# @example
|
|
1047
|
+
# df = Polars::DataFrame.new({"a" => [[1, nil], [42, 13], [nil, nil]]})
|
|
1048
|
+
# df.with_columns(null_count: Polars.col("a").list.agg(Polars.element.null_count))
|
|
1049
|
+
# # =>
|
|
1050
|
+
# # shape: (3, 2)
|
|
1051
|
+
# # ┌──────────────┬────────────┐
|
|
1052
|
+
# # │ a ┆ null_count │
|
|
1053
|
+
# # │ --- ┆ --- │
|
|
1054
|
+
# # │ list[i64] ┆ u32 │
|
|
1055
|
+
# # ╞══════════════╪════════════╡
|
|
1056
|
+
# # │ [1, null] ┆ 1 │
|
|
1057
|
+
# # │ [42, 13] ┆ 0 │
|
|
1058
|
+
# # │ [null, null] ┆ 2 │
|
|
1059
|
+
# # └──────────────┴────────────┘
|
|
1060
|
+
#
|
|
1061
|
+
# @example
|
|
1062
|
+
# df.with_columns(no_nulls: Polars.col("a").list.agg(Polars.element.drop_nulls))
|
|
1063
|
+
# # =>
|
|
1064
|
+
# # shape: (3, 2)
|
|
1065
|
+
# # ┌──────────────┬───────────┐
|
|
1066
|
+
# # │ a ┆ no_nulls │
|
|
1067
|
+
# # │ --- ┆ --- │
|
|
1068
|
+
# # │ list[i64] ┆ list[i64] │
|
|
1069
|
+
# # ╞══════════════╪═══════════╡
|
|
1070
|
+
# # │ [1, null] ┆ [1] │
|
|
1071
|
+
# # │ [42, 13] ┆ [42, 13] │
|
|
1072
|
+
# # │ [null, null] ┆ [] │
|
|
1073
|
+
# # └──────────────┴───────────┘
|
|
1074
|
+
def agg(expr)
|
|
1075
|
+
Utils.wrap_expr(_rbexpr.list_agg(expr._rbexpr))
|
|
1076
|
+
end
|
|
1077
|
+
|
|
1000
1078
|
# Filter elements in each list by a boolean expression.
|
|
1001
1079
|
#
|
|
1002
1080
|
# @param predicate [Object]
|