polars-df 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion.rs +35 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/lazy/dataframe.rs +3 -3
- data/ext/polars/src/lazy/dsl.rs +59 -2
- data/ext/polars/src/lib.rs +151 -10
- data/ext/polars/src/series.rs +182 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2284 -137
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +612 -7
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1261 -67
- data/lib/polars/lazy_functions.rb +288 -10
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +1476 -212
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +43 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +7 -10
- metadata +9 -2
data/lib/polars/lazy_frame.rb
CHANGED
@@ -152,29 +152,98 @@ module Polars
|
|
152
152
|
# def self.read_json
|
153
153
|
# end
|
154
154
|
|
155
|
-
#
|
156
|
-
#
|
155
|
+
# Get or set column names.
|
156
|
+
#
|
157
|
+
# @return [Array]
|
158
|
+
#
|
159
|
+
# @example
|
160
|
+
# df = (
|
161
|
+
# Polars::DataFrame.new(
|
162
|
+
# {
|
163
|
+
# "foo" => [1, 2, 3],
|
164
|
+
# "bar" => [6, 7, 8],
|
165
|
+
# "ham" => ["a", "b", "c"]
|
166
|
+
# }
|
167
|
+
# )
|
168
|
+
# .lazy
|
169
|
+
# .select(["foo", "bar"])
|
170
|
+
# )
|
171
|
+
# df.columns
|
172
|
+
# # => ["foo", "bar"]
|
173
|
+
def columns
|
174
|
+
_ldf.columns
|
175
|
+
end
|
157
176
|
|
158
|
-
#
|
159
|
-
#
|
177
|
+
# Get dtypes of columns in LazyFrame.
|
178
|
+
#
|
179
|
+
# @return [Array]
|
180
|
+
#
|
181
|
+
# @example
|
182
|
+
# lf = Polars::DataFrame.new(
|
183
|
+
# {
|
184
|
+
# "foo" => [1, 2, 3],
|
185
|
+
# "bar" => [6.0, 7.0, 8.0],
|
186
|
+
# "ham" => ["a", "b", "c"]
|
187
|
+
# }
|
188
|
+
# ).lazy
|
189
|
+
# lf.dtypes
|
190
|
+
# # => [:i64, :f64, :str]
|
191
|
+
def dtypes
|
192
|
+
_ldf.dtypes
|
193
|
+
end
|
160
194
|
|
161
|
-
#
|
162
|
-
#
|
195
|
+
# Get the schema.
|
196
|
+
#
|
197
|
+
# @return [Hash]
|
198
|
+
#
|
199
|
+
# @example
|
200
|
+
# lf = Polars::DataFrame.new(
|
201
|
+
# {
|
202
|
+
# "foo" => [1, 2, 3],
|
203
|
+
# "bar" => [6.0, 7.0, 8.0],
|
204
|
+
# "ham" => ["a", "b", "c"]
|
205
|
+
# }
|
206
|
+
# ).lazy
|
207
|
+
# lf.schema
|
208
|
+
# # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
|
209
|
+
def schema
|
210
|
+
_ldf.schema
|
211
|
+
end
|
163
212
|
|
164
|
-
#
|
165
|
-
#
|
213
|
+
# Get the width of the LazyFrame.
|
214
|
+
#
|
215
|
+
# @return [Integer]
|
216
|
+
#
|
217
|
+
# @example
|
218
|
+
# lf = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]}).lazy
|
219
|
+
# lf.width
|
220
|
+
# # => 2
|
221
|
+
def width
|
222
|
+
_ldf.width
|
223
|
+
end
|
166
224
|
|
167
|
-
#
|
168
|
-
#
|
225
|
+
# Check if LazyFrame includes key.
|
226
|
+
#
|
227
|
+
# @return [Boolean]
|
228
|
+
def include?(key)
|
229
|
+
columns.include?(key)
|
230
|
+
end
|
169
231
|
|
170
232
|
# clone handled by initialize_copy
|
171
233
|
|
172
234
|
# def [](item)
|
173
235
|
# end
|
174
236
|
|
175
|
-
#
|
176
|
-
#
|
177
|
-
#
|
237
|
+
# Returns a string representing the LazyFrame.
|
238
|
+
#
|
239
|
+
# @return [String]
|
240
|
+
def to_s
|
241
|
+
<<~EOS
|
242
|
+
naive plan: (run LazyFrame#describe_optimized_plan to see the optimized plan)
|
243
|
+
|
244
|
+
#{describe_plan}
|
245
|
+
EOS
|
246
|
+
end
|
178
247
|
|
179
248
|
# def write_json
|
180
249
|
# end
|
@@ -182,22 +251,125 @@ module Polars
|
|
182
251
|
# def pipe
|
183
252
|
# end
|
184
253
|
|
185
|
-
#
|
186
|
-
#
|
254
|
+
# Create a string representation of the unoptimized query plan.
|
255
|
+
#
|
256
|
+
# @return [String]
|
257
|
+
def describe_plan
|
258
|
+
_ldf.describe_plan
|
259
|
+
end
|
187
260
|
|
261
|
+
# Create a string representation of the optimized query plan.
|
262
|
+
#
|
263
|
+
# @return [String]
|
188
264
|
# def describe_optimized_plan
|
189
265
|
# end
|
190
266
|
|
191
267
|
# def show_graph
|
192
268
|
# end
|
193
269
|
|
194
|
-
#
|
195
|
-
#
|
270
|
+
# Sort the DataFrame.
|
271
|
+
#
|
272
|
+
# Sorting can be done by:
|
273
|
+
#
|
274
|
+
# - A single column name
|
275
|
+
# - An expression
|
276
|
+
# - Multiple expressions
|
277
|
+
#
|
278
|
+
# @param by [Object]
|
279
|
+
# Column (expressions) to sort by.
|
280
|
+
# @param reverse [Boolean]
|
281
|
+
# Sort in descending order.
|
282
|
+
# @param nulls_last [Boolean]
|
283
|
+
# Place null values last. Can only be used if sorted by a single column.
|
284
|
+
#
|
285
|
+
# @return [LazyFrame]
|
286
|
+
#
|
287
|
+
# @example
|
288
|
+
# df = Polars::DataFrame.new(
|
289
|
+
# {
|
290
|
+
# "foo" => [1, 2, 3],
|
291
|
+
# "bar" => [6.0, 7.0, 8.0],
|
292
|
+
# "ham" => ["a", "b", "c"]
|
293
|
+
# }
|
294
|
+
# ).lazy
|
295
|
+
# df.sort("foo", reverse: true).collect
|
296
|
+
# # =>
|
297
|
+
# # shape: (3, 3)
|
298
|
+
# # ┌─────┬─────┬─────┐
|
299
|
+
# # │ foo ┆ bar ┆ ham │
|
300
|
+
# # │ --- ┆ --- ┆ --- │
|
301
|
+
# # │ i64 ┆ f64 ┆ str │
|
302
|
+
# # ╞═════╪═════╪═════╡
|
303
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
304
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
305
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
306
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
307
|
+
# # │ 1 ┆ 6.0 ┆ a │
|
308
|
+
# # └─────┴─────┴─────┘
|
309
|
+
def sort(by, reverse: false, nulls_last: false)
|
310
|
+
if by.is_a?(String)
|
311
|
+
_from_rbldf(_ldf.sort(by, reverse, nulls_last))
|
312
|
+
end
|
313
|
+
if Utils.bool?(reverse)
|
314
|
+
reverse = [reverse]
|
315
|
+
end
|
316
|
+
|
317
|
+
by = Utils.selection_to_rbexpr_list(by)
|
318
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
|
319
|
+
end
|
196
320
|
|
197
321
|
# def profile
|
198
322
|
# end
|
199
323
|
|
324
|
+
# Collect into a DataFrame.
|
325
|
+
#
|
326
|
+
# Note: use {#fetch} if you want to run your query on the first `n` rows
|
327
|
+
# only. This can be a huge time saver in debugging queries.
|
328
|
+
#
|
329
|
+
# @param type_coercion [Boolean]
|
330
|
+
# Do type coercion optimization.
|
331
|
+
# @param predicate_pushdown [Boolean]
|
332
|
+
# Do predicate pushdown optimization.
|
333
|
+
# @param projection_pushdown [Boolean]
|
334
|
+
# Do projection pushdown optimization.
|
335
|
+
# @param simplify_expression [Boolean]
|
336
|
+
# Run simplify expressions optimization.
|
337
|
+
# @param string_cache [Boolean]
|
338
|
+
# This argument is deprecated. Please set the string cache globally.
|
339
|
+
# The argument will be ignored
|
340
|
+
# @param no_optimization [Boolean]
|
341
|
+
# Turn off (certain) optimizations.
|
342
|
+
# @param slice_pushdown [Boolean]
|
343
|
+
# Slice pushdown optimization.
|
344
|
+
# @param common_subplan_elimination [Boolean]
|
345
|
+
# Will try to cache branching subplans that occur on self-joins or unions.
|
346
|
+
# @param allow_streaming [Boolean]
|
347
|
+
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
200
348
|
#
|
349
|
+
# @return [DataFrame]
|
350
|
+
#
|
351
|
+
# @example
|
352
|
+
# df = Polars::DataFrame.new(
|
353
|
+
# {
|
354
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
355
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
356
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
357
|
+
# }
|
358
|
+
# ).lazy
|
359
|
+
# df.groupby("a", maintain_order: true).agg(Polars.all.sum).collect
|
360
|
+
# # =>
|
361
|
+
# # shape: (3, 3)
|
362
|
+
# # ┌─────┬─────┬─────┐
|
363
|
+
# # │ a ┆ b ┆ c │
|
364
|
+
# # │ --- ┆ --- ┆ --- │
|
365
|
+
# # │ str ┆ i64 ┆ i64 │
|
366
|
+
# # ╞═════╪═════╪═════╡
|
367
|
+
# # │ a ┆ 4 ┆ 10 │
|
368
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
369
|
+
# # │ b ┆ 11 ┆ 10 │
|
370
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
371
|
+
# # │ c ┆ 6 ┆ 1 │
|
372
|
+
# # └─────┴─────┴─────┘
|
201
373
|
def collect(
|
202
374
|
type_coercion: true,
|
203
375
|
predicate_pushdown: true,
|
@@ -232,21 +404,184 @@ module Polars
|
|
232
404
|
Utils.wrap_df(ldf.collect)
|
233
405
|
end
|
234
406
|
|
235
|
-
#
|
236
|
-
#
|
407
|
+
# Collect a small number of rows for debugging purposes.
|
408
|
+
#
|
409
|
+
# Fetch is like a {#collect} operation, but it overwrites the number of rows
|
410
|
+
# read by every scan operation. This is a utility that helps debug a query on a
|
411
|
+
# smaller number of rows.
|
412
|
+
#
|
413
|
+
# Note that the fetch does not guarantee the final number of rows in the
|
414
|
+
# DataFrame. Filter, join operations and a lower number of rows available in the
|
415
|
+
# scanned file influence the final number of rows.
|
416
|
+
#
|
417
|
+
# @param n_rows [Integer]
|
418
|
+
# Collect n_rows from the data sources.
|
419
|
+
# @param type_coercion [Boolean]
|
420
|
+
# Run type coercion optimization.
|
421
|
+
# @param predicate_pushdown [Boolean]
|
422
|
+
# Run predicate pushdown optimization.
|
423
|
+
# @param projection_pushdown [Boolean]
|
424
|
+
# Run projection pushdown optimization.
|
425
|
+
# @param simplify_expression [Boolean]
|
426
|
+
# Run simplify expressions optimization.
|
427
|
+
# @param string_cache [Boolean]
|
428
|
+
# This argument is deprecated. Please set the string cache globally.
|
429
|
+
# The argument will be ignored
|
430
|
+
# @param no_optimization [Boolean]
|
431
|
+
# Turn off optimizations.
|
432
|
+
# @param slice_pushdown [Boolean]
|
433
|
+
# Slice pushdown optimization
|
434
|
+
# @param common_subplan_elimination [Boolean]
|
435
|
+
# Will try to cache branching subplans that occur on self-joins or unions.
|
436
|
+
# @param allow_streaming [Boolean]
|
437
|
+
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
438
|
+
#
|
439
|
+
# @return [DataFrame]
|
440
|
+
#
|
441
|
+
# @example
|
442
|
+
# df = Polars::DataFrame.new(
|
443
|
+
# {
|
444
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
445
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
446
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
447
|
+
# }
|
448
|
+
# ).lazy
|
449
|
+
# df.groupby("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
|
450
|
+
# # =>
|
451
|
+
# # shape: (2, 3)
|
452
|
+
# # ┌─────┬─────┬─────┐
|
453
|
+
# # │ a ┆ b ┆ c │
|
454
|
+
# # │ --- ┆ --- ┆ --- │
|
455
|
+
# # │ str ┆ i64 ┆ i64 │
|
456
|
+
# # ╞═════╪═════╪═════╡
|
457
|
+
# # │ a ┆ 1 ┆ 6 │
|
458
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
459
|
+
# # │ b ┆ 2 ┆ 5 │
|
460
|
+
# # └─────┴─────┴─────┘
|
461
|
+
def fetch(
|
462
|
+
n_rows = 500,
|
463
|
+
type_coercion: true,
|
464
|
+
predicate_pushdown: true,
|
465
|
+
projection_pushdown: true,
|
466
|
+
simplify_expression: true,
|
467
|
+
string_cache: false,
|
468
|
+
no_optimization: false,
|
469
|
+
slice_pushdown: true,
|
470
|
+
common_subplan_elimination: true,
|
471
|
+
allow_streaming: false
|
472
|
+
)
|
473
|
+
if no_optimization
|
474
|
+
predicate_pushdown = false
|
475
|
+
projection_pushdown = false
|
476
|
+
slice_pushdown = false
|
477
|
+
common_subplan_elimination = false
|
478
|
+
end
|
479
|
+
|
480
|
+
ldf = _ldf.optimization_toggle(
|
481
|
+
type_coercion,
|
482
|
+
predicate_pushdown,
|
483
|
+
projection_pushdown,
|
484
|
+
simplify_expression,
|
485
|
+
slice_pushdown,
|
486
|
+
common_subplan_elimination,
|
487
|
+
allow_streaming
|
488
|
+
)
|
489
|
+
Utils.wrap_df(ldf.fetch(n_rows))
|
490
|
+
end
|
237
491
|
|
492
|
+
# Return lazy representation, i.e. itself.
|
238
493
|
#
|
494
|
+
# Useful for writing code that expects either a `DataFrame` or
|
495
|
+
# `LazyFrame`.
|
496
|
+
#
|
497
|
+
# @return [LazyFrame]
|
498
|
+
#
|
499
|
+
# @example
|
500
|
+
# df = Polars::DataFrame.new(
|
501
|
+
# {
|
502
|
+
# "a" => [nil, 2, 3, 4],
|
503
|
+
# "b" => [0.5, nil, 2.5, 13],
|
504
|
+
# "c" => [true, true, false, nil]
|
505
|
+
# }
|
506
|
+
# )
|
507
|
+
# df.lazy
|
239
508
|
def lazy
|
240
509
|
self
|
241
510
|
end
|
242
511
|
|
243
|
-
#
|
244
|
-
#
|
512
|
+
# Cache the result once the execution of the physical plan hits this node.
|
513
|
+
#
|
514
|
+
# @return [LazyFrame]
|
515
|
+
def cache
|
516
|
+
_from_rbldf(_ldf.cache)
|
517
|
+
end
|
245
518
|
|
246
|
-
#
|
247
|
-
#
|
519
|
+
# Create an empty copy of the current LazyFrame.
|
520
|
+
#
|
521
|
+
# The copy has an identical schema but no data.
|
522
|
+
#
|
523
|
+
# @return [LazyFrame]
|
524
|
+
#
|
525
|
+
# @example
|
526
|
+
# df = Polars::DataFrame.new(
|
527
|
+
# {
|
528
|
+
# "a" => [nil, 2, 3, 4],
|
529
|
+
# "b" => [0.5, nil, 2.5, 13],
|
530
|
+
# "c" => [true, true, false, nil],
|
531
|
+
# }
|
532
|
+
# ).lazy
|
533
|
+
# df.cleared.fetch
|
534
|
+
# # =>
|
535
|
+
# # shape: (0, 3)
|
536
|
+
# # ┌─────┬─────┬──────┐
|
537
|
+
# # │ a ┆ b ┆ c │
|
538
|
+
# # │ --- ┆ --- ┆ --- │
|
539
|
+
# # │ i64 ┆ f64 ┆ bool │
|
540
|
+
# # ╞═════╪═════╪══════╡
|
541
|
+
# # └─────┴─────┴──────┘
|
542
|
+
def cleared
|
543
|
+
DataFrame.new(columns: schema).lazy
|
544
|
+
end
|
248
545
|
|
546
|
+
# Filter the rows in the DataFrame based on a predicate expression.
|
249
547
|
#
|
548
|
+
# @param predicate [Object]
|
549
|
+
# Expression that evaluates to a boolean Series.
|
550
|
+
#
|
551
|
+
# @return [LazyFrame]
|
552
|
+
#
|
553
|
+
# @example Filter on one condition:
|
554
|
+
# lf = Polars::DataFrame.new(
|
555
|
+
# {
|
556
|
+
# "foo" => [1, 2, 3],
|
557
|
+
# "bar" => [6, 7, 8],
|
558
|
+
# "ham" => ["a", "b", "c"]
|
559
|
+
# }
|
560
|
+
# ).lazy
|
561
|
+
# lf.filter(Polars.col("foo") < 3).collect()
|
562
|
+
# # =>
|
563
|
+
# # shape: (2, 3)
|
564
|
+
# # ┌─────┬─────┬─────┐
|
565
|
+
# # │ foo ┆ bar ┆ ham │
|
566
|
+
# # │ --- ┆ --- ┆ --- │
|
567
|
+
# # │ i64 ┆ i64 ┆ str │
|
568
|
+
# # ╞═════╪═════╪═════╡
|
569
|
+
# # │ 1 ┆ 6 ┆ a │
|
570
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
571
|
+
# # │ 2 ┆ 7 ┆ b │
|
572
|
+
# # └─────┴─────┴─────┘
|
573
|
+
#
|
574
|
+
# @example Filter on multiple conditions:
|
575
|
+
# lf.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a")).collect
|
576
|
+
# # =>
|
577
|
+
# # shape: (1, 3)
|
578
|
+
# # ┌─────┬─────┬─────┐
|
579
|
+
# # │ foo ┆ bar ┆ ham │
|
580
|
+
# # │ --- ┆ --- ┆ --- │
|
581
|
+
# # │ i64 ┆ i64 ┆ str │
|
582
|
+
# # ╞═════╪═════╪═════╡
|
583
|
+
# # │ 1 ┆ 6 ┆ a │
|
584
|
+
# # └─────┴─────┴─────┘
|
250
585
|
def filter(predicate)
|
251
586
|
_from_rbldf(
|
252
587
|
_ldf.filter(
|
@@ -255,11 +590,136 @@ module Polars
|
|
255
590
|
)
|
256
591
|
end
|
257
592
|
|
593
|
+
# Select columns from this DataFrame.
|
594
|
+
#
|
595
|
+
# @param exprs [Object]
|
596
|
+
# Column or columns to select.
|
597
|
+
#
|
598
|
+
# @return [LazyFrame]
|
599
|
+
#
|
600
|
+
# @example
|
601
|
+
# df = Polars::DataFrame.new(
|
602
|
+
# {
|
603
|
+
# "foo" => [1, 2, 3],
|
604
|
+
# "bar" => [6, 7, 8],
|
605
|
+
# "ham" => ["a", "b", "c"],
|
606
|
+
# }
|
607
|
+
# ).lazy
|
608
|
+
# df.select("foo").collect
|
609
|
+
# # =>
|
610
|
+
# # shape: (3, 1)
|
611
|
+
# # ┌─────┐
|
612
|
+
# # │ foo │
|
613
|
+
# # │ --- │
|
614
|
+
# # │ i64 │
|
615
|
+
# # ╞═════╡
|
616
|
+
# # │ 1 │
|
617
|
+
# # ├╌╌╌╌╌┤
|
618
|
+
# # │ 2 │
|
619
|
+
# # ├╌╌╌╌╌┤
|
620
|
+
# # │ 3 │
|
621
|
+
# # └─────┘
|
622
|
+
#
|
623
|
+
# @example
|
624
|
+
# df.select(["foo", "bar"]).collect
|
625
|
+
# # =>
|
626
|
+
# # shape: (3, 2)
|
627
|
+
# # ┌─────┬─────┐
|
628
|
+
# # │ foo ┆ bar │
|
629
|
+
# # │ --- ┆ --- │
|
630
|
+
# # │ i64 ┆ i64 │
|
631
|
+
# # ╞═════╪═════╡
|
632
|
+
# # │ 1 ┆ 6 │
|
633
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
634
|
+
# # │ 2 ┆ 7 │
|
635
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
636
|
+
# # │ 3 ┆ 8 │
|
637
|
+
# # └─────┴─────┘
|
638
|
+
#
|
639
|
+
# @example
|
640
|
+
# df.select(Polars.col("foo") + 1).collect
|
641
|
+
# # =>
|
642
|
+
# # shape: (3, 1)
|
643
|
+
# # ┌─────┐
|
644
|
+
# # │ foo │
|
645
|
+
# # │ --- │
|
646
|
+
# # │ i64 │
|
647
|
+
# # ╞═════╡
|
648
|
+
# # │ 2 │
|
649
|
+
# # ├╌╌╌╌╌┤
|
650
|
+
# # │ 3 │
|
651
|
+
# # ├╌╌╌╌╌┤
|
652
|
+
# # │ 4 │
|
653
|
+
# # └─────┘
|
654
|
+
#
|
655
|
+
# @example
|
656
|
+
# df.select([Polars.col("foo") + 1, Polars.col("bar") + 1]).collect
|
657
|
+
# # =>
|
658
|
+
# # shape: (3, 2)
|
659
|
+
# # ┌─────┬─────┐
|
660
|
+
# # │ foo ┆ bar │
|
661
|
+
# # │ --- ┆ --- │
|
662
|
+
# # │ i64 ┆ i64 │
|
663
|
+
# # ╞═════╪═════╡
|
664
|
+
# # │ 2 ┆ 7 │
|
665
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
666
|
+
# # │ 3 ┆ 8 │
|
667
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
668
|
+
# # │ 4 ┆ 9 │
|
669
|
+
# # └─────┴─────┘
|
670
|
+
#
|
671
|
+
# @example
|
672
|
+
# df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0)).collect
|
673
|
+
# # =>
|
674
|
+
# # shape: (3, 1)
|
675
|
+
# # ┌─────────┐
|
676
|
+
# # │ literal │
|
677
|
+
# # │ --- │
|
678
|
+
# # │ i64 │
|
679
|
+
# # ╞═════════╡
|
680
|
+
# # │ 0 │
|
681
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
682
|
+
# # │ 0 │
|
683
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
684
|
+
# # │ 10 │
|
685
|
+
# # └─────────┘
|
258
686
|
def select(exprs)
|
259
687
|
exprs = Utils.selection_to_rbexpr_list(exprs)
|
260
688
|
_from_rbldf(_ldf.select(exprs))
|
261
689
|
end
|
262
690
|
|
691
|
+
# Start a groupby operation.
|
692
|
+
#
|
693
|
+
# @param by [Object]
|
694
|
+
# Column(s) to group by.
|
695
|
+
# @param maintain_order [Boolean]
|
696
|
+
# Make sure that the order of the groups remain consistent. This is more
|
697
|
+
# expensive than a default groupby.
|
698
|
+
#
|
699
|
+
# @return [LazyGroupBy]
|
700
|
+
#
|
701
|
+
# @example
|
702
|
+
# df = Polars::DataFrame.new(
|
703
|
+
# {
|
704
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
705
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
706
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
707
|
+
# }
|
708
|
+
# ).lazy
|
709
|
+
# df.groupby("a", maintain_order: true).agg(Polars.col("b").sum).collect
|
710
|
+
# # =>
|
711
|
+
# # shape: (3, 2)
|
712
|
+
# # ┌─────┬─────┐
|
713
|
+
# # │ a ┆ b │
|
714
|
+
# # │ --- ┆ --- │
|
715
|
+
# # │ str ┆ i64 │
|
716
|
+
# # ╞═════╪═════╡
|
717
|
+
# # │ a ┆ 4 │
|
718
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
719
|
+
# # │ b ┆ 11 │
|
720
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
721
|
+
# # │ c ┆ 6 │
|
722
|
+
# # └─────┴─────┘
|
263
723
|
def groupby(by, maintain_order: false)
|
264
724
|
rbexprs_by = Utils.selection_to_rbexpr_list(by)
|
265
725
|
lgb = _ldf.groupby(rbexprs_by, maintain_order)
|
@@ -275,7 +735,116 @@ module Polars
|
|
275
735
|
# def join_asof
|
276
736
|
# end
|
277
737
|
|
738
|
+
# Add a join operation to the Logical Plan.
|
739
|
+
#
|
740
|
+
# @param other [LazyFrame]
|
741
|
+
# Lazy DataFrame to join with.
|
742
|
+
# @param left_on [Object]
|
743
|
+
# Join column of the left DataFrame.
|
744
|
+
# @param right_on [Object]
|
745
|
+
# Join column of the right DataFrame.
|
746
|
+
# @param on Object
|
747
|
+
# Join column of both DataFrames. If set, `left_on` and `right_on` should be
|
748
|
+
# None.
|
749
|
+
# @param how ["inner", "left", "outer", "semi", "anti", "cross"]
|
750
|
+
# Join strategy.
|
751
|
+
# @param suffix [String]
|
752
|
+
# Suffix to append to columns with a duplicate name.
|
753
|
+
# @param allow_parallel [Boolean]
|
754
|
+
# Allow the physical plan to optionally evaluate the computation of both
|
755
|
+
# DataFrames up to the join in parallel.
|
756
|
+
# @param force_parallel [Boolean]
|
757
|
+
# Force the physical plan to evaluate the computation of both DataFrames up to
|
758
|
+
# the join in parallel.
|
278
759
|
#
|
760
|
+
# @return [LazyFrame]
|
761
|
+
#
|
762
|
+
# @example
|
763
|
+
# df = Polars::DataFrame.new(
|
764
|
+
# {
|
765
|
+
# "foo" => [1, 2, 3],
|
766
|
+
# "bar" => [6.0, 7.0, 8.0],
|
767
|
+
# "ham" => ["a", "b", "c"]
|
768
|
+
# }
|
769
|
+
# ).lazy
|
770
|
+
# other_df = Polars::DataFrame.new(
|
771
|
+
# {
|
772
|
+
# "apple" => ["x", "y", "z"],
|
773
|
+
# "ham" => ["a", "b", "d"]
|
774
|
+
# }
|
775
|
+
# ).lazy
|
776
|
+
# df.join(other_df, on: "ham").collect
|
777
|
+
# # =>
|
778
|
+
# # shape: (2, 4)
|
779
|
+
# # ┌─────┬─────┬─────┬───────┐
|
780
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
781
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
782
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
783
|
+
# # ╞═════╪═════╪═════╪═══════╡
|
784
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
785
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
786
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
787
|
+
# # └─────┴─────┴─────┴───────┘
|
788
|
+
#
|
789
|
+
# @example
|
790
|
+
# df.join(other_df, on: "ham", how: "outer").collect
|
791
|
+
# # =>
|
792
|
+
# # shape: (4, 4)
|
793
|
+
# # ┌──────┬──────┬─────┬───────┐
|
794
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
795
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
796
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
797
|
+
# # ╞══════╪══════╪═════╪═══════╡
|
798
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
799
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
800
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
801
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
802
|
+
# # │ null ┆ null ┆ d ┆ z │
|
803
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
804
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null │
|
805
|
+
# # └──────┴──────┴─────┴───────┘
|
806
|
+
#
|
807
|
+
# @example
|
808
|
+
# df.join(other_df, on: "ham", how: "left").collect
|
809
|
+
# # =>
|
810
|
+
# # shape: (3, 4)
|
811
|
+
# # ┌─────┬─────┬─────┬───────┐
|
812
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
813
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
814
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
815
|
+
# # ╞═════╪═════╪═════╪═══════╡
|
816
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
817
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
818
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
819
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
820
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null │
|
821
|
+
# # └─────┴─────┴─────┴───────┘
|
822
|
+
#
|
823
|
+
# @example
|
824
|
+
# df.join(other_df, on: "ham", how: "semi").collect
|
825
|
+
# # =>
|
826
|
+
# # shape: (2, 3)
|
827
|
+
# # ┌─────┬─────┬─────┐
|
828
|
+
# # │ foo ┆ bar ┆ ham │
|
829
|
+
# # │ --- ┆ --- ┆ --- │
|
830
|
+
# # │ i64 ┆ f64 ┆ str │
|
831
|
+
# # ╞═════╪═════╪═════╡
|
832
|
+
# # │ 1 ┆ 6.0 ┆ a │
|
833
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
834
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
835
|
+
# # └─────┴─────┴─────┘
|
836
|
+
#
|
837
|
+
# @example
|
838
|
+
# df.join(other_df, on: "ham", how: "anti").collect
|
839
|
+
# # =>
|
840
|
+
# # shape: (1, 3)
|
841
|
+
# # ┌─────┬─────┬─────┐
|
842
|
+
# # │ foo ┆ bar ┆ ham │
|
843
|
+
# # │ --- ┆ --- ┆ --- │
|
844
|
+
# # │ i64 ┆ f64 ┆ str │
|
845
|
+
# # ╞═════╪═════╪═════╡
|
846
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
847
|
+
# # └─────┴─────┴─────┘
|
279
848
|
def join(
|
280
849
|
other,
|
281
850
|
left_on: nil,
|
@@ -322,6 +891,43 @@ module Polars
|
|
322
891
|
)
|
323
892
|
end
|
324
893
|
|
894
|
+
# Add or overwrite multiple columns in a DataFrame.
|
895
|
+
#
|
896
|
+
# @param exprs [Object]
|
897
|
+
# List of Expressions that evaluate to columns.
|
898
|
+
#
|
899
|
+
# @return [LazyFrame]
|
900
|
+
#
|
901
|
+
# @example
|
902
|
+
# ldf = Polars::DataFrame.new(
|
903
|
+
# {
|
904
|
+
# "a" => [1, 2, 3, 4],
|
905
|
+
# "b" => [0.5, 4, 10, 13],
|
906
|
+
# "c" => [true, true, false, true]
|
907
|
+
# }
|
908
|
+
# ).lazy
|
909
|
+
# ldf.with_columns(
|
910
|
+
# [
|
911
|
+
# (Polars.col("a") ** 2).alias("a^2"),
|
912
|
+
# (Polars.col("b") / 2).alias("b/2"),
|
913
|
+
# (Polars.col("c").is_not()).alias("not c")
|
914
|
+
# ]
|
915
|
+
# ).collect
|
916
|
+
# # =>
|
917
|
+
# # shape: (4, 6)
|
918
|
+
# # ┌─────┬──────┬───────┬──────┬──────┬───────┐
|
919
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
920
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
921
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
|
922
|
+
# # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
|
923
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
|
924
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
925
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
|
926
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
927
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
|
928
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
929
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
930
|
+
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
325
931
|
def with_columns(exprs)
|
326
932
|
exprs =
|
327
933
|
if exprs.nil?
|
@@ -350,58 +956,343 @@ module Polars
|
|
350
956
|
# def with_context
|
351
957
|
# end
|
352
958
|
|
959
|
+
# Add or overwrite column in a DataFrame.
|
960
|
+
#
|
961
|
+
# @param column [Object]
|
962
|
+
# Expression that evaluates to column or a Series to use.
|
963
|
+
#
|
964
|
+
# @return [LazyFrame]
|
353
965
|
#
|
966
|
+
# @example
|
967
|
+
# df = Polars::DataFrame.new(
|
968
|
+
# {
|
969
|
+
# "a" => [1, 3, 5],
|
970
|
+
# "b" => [2, 4, 6]
|
971
|
+
# }
|
972
|
+
# ).lazy
|
973
|
+
# df.with_column((Polars.col("b") ** 2).alias("b_squared")).collect
|
974
|
+
# # =>
|
975
|
+
# # shape: (3, 3)
|
976
|
+
# # ┌─────┬─────┬───────────┐
|
977
|
+
# # │ a ┆ b ┆ b_squared │
|
978
|
+
# # │ --- ┆ --- ┆ --- │
|
979
|
+
# # │ i64 ┆ i64 ┆ f64 │
|
980
|
+
# # ╞═════╪═════╪═══════════╡
|
981
|
+
# # │ 1 ┆ 2 ┆ 4.0 │
|
982
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
983
|
+
# # │ 3 ┆ 4 ┆ 16.0 │
|
984
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
985
|
+
# # │ 5 ┆ 6 ┆ 36.0 │
|
986
|
+
# # └─────┴─────┴───────────┘
|
987
|
+
#
|
988
|
+
# @example
|
989
|
+
# df.with_column(Polars.col("a") ** 2).collect
|
990
|
+
# # =>
|
991
|
+
# # shape: (3, 2)
|
992
|
+
# # ┌──────┬─────┐
|
993
|
+
# # │ a ┆ b │
|
994
|
+
# # │ --- ┆ --- │
|
995
|
+
# # │ f64 ┆ i64 │
|
996
|
+
# # ╞══════╪═════╡
|
997
|
+
# # │ 1.0 ┆ 2 │
|
998
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
999
|
+
# # │ 9.0 ┆ 4 │
|
1000
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1001
|
+
# # │ 25.0 ┆ 6 │
|
1002
|
+
# # └──────┴─────┘
|
354
1003
|
def with_column(column)
|
355
1004
|
with_columns([column])
|
356
1005
|
end
|
357
1006
|
|
358
|
-
#
|
359
|
-
#
|
1007
|
+
# Remove one or multiple columns from a DataFrame.
|
1008
|
+
#
|
1009
|
+
# @param columns [Object]
|
1010
|
+
# - Name of the column that should be removed.
|
1011
|
+
# - List of column names.
|
1012
|
+
#
|
1013
|
+
# @return [LazyFrame]
|
1014
|
+
def drop(columns)
|
1015
|
+
if columns.is_a?(String)
|
1016
|
+
columns = [columns]
|
1017
|
+
end
|
1018
|
+
_from_rbldf(_ldf.drop_columns(columns))
|
1019
|
+
end
|
360
1020
|
|
1021
|
+
# Rename column names.
|
1022
|
+
#
|
1023
|
+
# @param mapping [Hash]
|
1024
|
+
# Key value pairs that map from old name to new name.
|
361
1025
|
#
|
1026
|
+
# @return [LazyFrame]
|
362
1027
|
def rename(mapping)
|
363
1028
|
existing = mapping.keys
|
364
1029
|
_new = mapping.values
|
365
1030
|
_from_rbldf(_ldf.rename(existing, _new))
|
366
1031
|
end
|
367
1032
|
|
368
|
-
#
|
369
|
-
#
|
1033
|
+
# Reverse the DataFrame.
|
1034
|
+
#
|
1035
|
+
# @return [LazyFrame]
|
1036
|
+
def reverse
|
1037
|
+
_from_rbldf(_ldf.reverse)
|
1038
|
+
end
|
370
1039
|
|
371
|
-
#
|
372
|
-
#
|
1040
|
+
# Shift the values by a given period.
|
1041
|
+
#
|
1042
|
+
# @param periods [Integer]
|
1043
|
+
# Number of places to shift (may be negative).
|
1044
|
+
#
|
1045
|
+
# @return [LazyFrame]
|
1046
|
+
#
|
1047
|
+
# @example
|
1048
|
+
# df = Polars::DataFrame.new(
|
1049
|
+
# {
|
1050
|
+
# "a" => [1, 3, 5],
|
1051
|
+
# "b" => [2, 4, 6]
|
1052
|
+
# }
|
1053
|
+
# ).lazy
|
1054
|
+
# df.shift(1).collect
|
1055
|
+
# # =>
|
1056
|
+
# # shape: (3, 2)
|
1057
|
+
# # ┌──────┬──────┐
|
1058
|
+
# # │ a ┆ b │
|
1059
|
+
# # │ --- ┆ --- │
|
1060
|
+
# # │ i64 ┆ i64 │
|
1061
|
+
# # ╞══════╪══════╡
|
1062
|
+
# # │ null ┆ null │
|
1063
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1064
|
+
# # │ 1 ┆ 2 │
|
1065
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1066
|
+
# # │ 3 ┆ 4 │
|
1067
|
+
# # └──────┴──────┘
|
1068
|
+
#
|
1069
|
+
# @example
|
1070
|
+
# df.shift(-1).collect
|
1071
|
+
# # =>
|
1072
|
+
# # shape: (3, 2)
|
1073
|
+
# # ┌──────┬──────┐
|
1074
|
+
# # │ a ┆ b │
|
1075
|
+
# # │ --- ┆ --- │
|
1076
|
+
# # │ i64 ┆ i64 │
|
1077
|
+
# # ╞══════╪══════╡
|
1078
|
+
# # │ 3 ┆ 4 │
|
1079
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1080
|
+
# # │ 5 ┆ 6 │
|
1081
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1082
|
+
# # │ null ┆ null │
|
1083
|
+
# # └──────┴──────┘
|
1084
|
+
def shift(periods)
|
1085
|
+
_from_rbldf(_ldf.shift(periods))
|
1086
|
+
end
|
373
1087
|
|
374
|
-
#
|
375
|
-
#
|
1088
|
+
# Shift the values by a given period and fill the resulting null values.
|
1089
|
+
#
|
1090
|
+
# @param periods [Integer]
|
1091
|
+
# Number of places to shift (may be negative).
|
1092
|
+
# @param fill_value [Object]
|
1093
|
+
# Fill `nil` values with the result of this expression.
|
1094
|
+
#
|
1095
|
+
# @return [LazyFrame]
|
1096
|
+
#
|
1097
|
+
# @example
|
1098
|
+
# df = Polars::DataFrame.new(
|
1099
|
+
# {
|
1100
|
+
# "a" => [1, 3, 5],
|
1101
|
+
# "b" => [2, 4, 6]
|
1102
|
+
# }
|
1103
|
+
# ).lazy
|
1104
|
+
# df.shift_and_fill(1, 0).collect
|
1105
|
+
# # =>
|
1106
|
+
# # shape: (3, 2)
|
1107
|
+
# # ┌─────┬─────┐
|
1108
|
+
# # │ a ┆ b │
|
1109
|
+
# # │ --- ┆ --- │
|
1110
|
+
# # │ i64 ┆ i64 │
|
1111
|
+
# # ╞═════╪═════╡
|
1112
|
+
# # │ 0 ┆ 0 │
|
1113
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1114
|
+
# # │ 1 ┆ 2 │
|
1115
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1116
|
+
# # │ 3 ┆ 4 │
|
1117
|
+
# # └─────┴─────┘
|
1118
|
+
#
|
1119
|
+
# @example
|
1120
|
+
# df.shift_and_fill(-1, 0).collect
|
1121
|
+
# # =>
|
1122
|
+
# # shape: (3, 2)
|
1123
|
+
# # ┌─────┬─────┐
|
1124
|
+
# # │ a ┆ b │
|
1125
|
+
# # │ --- ┆ --- │
|
1126
|
+
# # │ i64 ┆ i64 │
|
1127
|
+
# # ╞═════╪═════╡
|
1128
|
+
# # │ 3 ┆ 4 │
|
1129
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1130
|
+
# # │ 5 ┆ 6 │
|
1131
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1132
|
+
# # │ 0 ┆ 0 │
|
1133
|
+
# # └─────┴─────┘
|
1134
|
+
def shift_and_fill(periods, fill_value)
|
1135
|
+
if !fill_value.is_a?(Expr)
|
1136
|
+
fill_value = Polars.lit(fill_value)
|
1137
|
+
end
|
1138
|
+
_from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
|
1139
|
+
end
|
376
1140
|
|
377
|
-
#
|
378
|
-
#
|
1141
|
+
# Get a slice of this DataFrame.
|
1142
|
+
#
|
1143
|
+
# @param offset [Integer]
|
1144
|
+
# Start index. Negative indexing is supported.
|
1145
|
+
# @param length [Integer]
|
1146
|
+
# Length of the slice. If set to `nil`, all rows starting at the offset
|
1147
|
+
# will be selected.
|
1148
|
+
#
|
1149
|
+
# @return [LazyFrame]
|
1150
|
+
#
|
1151
|
+
# @example
|
1152
|
+
# df = Polars::DataFrame.new(
|
1153
|
+
# {
|
1154
|
+
# "a" => ["x", "y", "z"],
|
1155
|
+
# "b" => [1, 3, 5],
|
1156
|
+
# "c" => [2, 4, 6]
|
1157
|
+
# }
|
1158
|
+
# ).lazy
|
1159
|
+
# df.slice(1, 2).collect
|
1160
|
+
# # =>
|
1161
|
+
# # shape: (2, 3)
|
1162
|
+
# # ┌─────┬─────┬─────┐
|
1163
|
+
# # │ a ┆ b ┆ c │
|
1164
|
+
# # │ --- ┆ --- ┆ --- │
|
1165
|
+
# # │ str ┆ i64 ┆ i64 │
|
1166
|
+
# # ╞═════╪═════╪═════╡
|
1167
|
+
# # │ y ┆ 3 ┆ 4 │
|
1168
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1169
|
+
# # │ z ┆ 5 ┆ 6 │
|
1170
|
+
# # └─────┴─────┴─────┘
|
1171
|
+
def slice(offset, length = nil)
|
1172
|
+
if length && length < 0
|
1173
|
+
raise ArgumentError, "Negative slice lengths (#{length}) are invalid for LazyFrame"
|
1174
|
+
end
|
1175
|
+
_from_rbldf(_ldf.slice(offset, length))
|
1176
|
+
end
|
379
1177
|
|
380
|
-
#
|
381
|
-
#
|
1178
|
+
# Get the first `n` rows.
|
1179
|
+
#
|
1180
|
+
# Alias for {#head}.
|
1181
|
+
#
|
1182
|
+
# @param n [Integer]
|
1183
|
+
# Number of rows to return.
|
1184
|
+
#
|
1185
|
+
# @return [LazyFrame]
|
1186
|
+
#
|
1187
|
+
# @note
|
1188
|
+
# Consider using the {#fetch} operation if you only want to test your
|
1189
|
+
# query. The {#fetch} operation will load the first `n` rows at the scan
|
1190
|
+
# level, whereas the {#head}/{#limit} are applied at the end.
|
1191
|
+
def limit(n = 5)
|
1192
|
+
head(5)
|
1193
|
+
end
|
382
1194
|
|
383
|
-
#
|
384
|
-
#
|
1195
|
+
# Get the first `n` rows.
|
1196
|
+
#
|
1197
|
+
# @param n [Integer]
|
1198
|
+
# Number of rows to return.
|
1199
|
+
#
|
1200
|
+
# @return [LazyFrame]
|
1201
|
+
#
|
1202
|
+
# @note
|
1203
|
+
# Consider using the {#fetch} operation if you only want to test your
|
1204
|
+
# query. The {#fetch} operation will load the first `n` rows at the scan
|
1205
|
+
# level, whereas the {#head}/{#limit} are applied at the end.
|
1206
|
+
def head(n = 5)
|
1207
|
+
slice(0, n)
|
1208
|
+
end
|
385
1209
|
|
386
|
-
#
|
387
|
-
#
|
1210
|
+
# Get the last `n` rows.
|
1211
|
+
#
|
1212
|
+
# @param n [Integer]
|
1213
|
+
# Number of rows.
|
1214
|
+
#
|
1215
|
+
# @return [LazyFrame]
|
1216
|
+
def tail(n = 5)
|
1217
|
+
_from_rbldf(_ldf.tail(n))
|
1218
|
+
end
|
388
1219
|
|
389
|
-
#
|
390
|
-
#
|
1220
|
+
# Get the last row of the DataFrame.
|
1221
|
+
#
|
1222
|
+
# @return [LazyFrame]
|
1223
|
+
def last
|
1224
|
+
tail(1)
|
1225
|
+
end
|
391
1226
|
|
392
|
-
#
|
393
|
-
#
|
1227
|
+
# Get the first row of the DataFrame.
|
1228
|
+
#
|
1229
|
+
# @return [LazyFrame]
|
1230
|
+
def first
|
1231
|
+
slice(0, 1)
|
1232
|
+
end
|
394
1233
|
|
395
1234
|
# def with_row_count
|
396
1235
|
# end
|
397
1236
|
|
398
|
-
#
|
399
|
-
#
|
1237
|
+
# Take every nth row in the LazyFrame and return as a new LazyFrame.
|
1238
|
+
#
|
1239
|
+
# @return [LazyFrame]
|
1240
|
+
#
|
1241
|
+
# @example
|
1242
|
+
# s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]}).lazy
|
1243
|
+
# s.take_every(2).collect
|
1244
|
+
# # =>
|
1245
|
+
# # shape: (2, 2)
|
1246
|
+
# # ┌─────┬─────┐
|
1247
|
+
# # │ a ┆ b │
|
1248
|
+
# # │ --- ┆ --- │
|
1249
|
+
# # │ i64 ┆ i64 │
|
1250
|
+
# # ╞═════╪═════╡
|
1251
|
+
# # │ 1 ┆ 5 │
|
1252
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1253
|
+
# # │ 3 ┆ 7 │
|
1254
|
+
# # └─────┴─────┘
|
1255
|
+
def take_every(n)
|
1256
|
+
select(Utils.col("*").take_every(n))
|
1257
|
+
end
|
400
1258
|
|
401
1259
|
# def fill_null
|
402
1260
|
# end
|
403
1261
|
|
1262
|
+
# Fill floating point NaN values.
|
1263
|
+
#
|
1264
|
+
# @param fill_value [Object]
|
1265
|
+
# Value to fill the NaN values with.
|
1266
|
+
#
|
1267
|
+
# @return [LazyFrame]
|
1268
|
+
#
|
1269
|
+
# @note
|
1270
|
+
# Note that floating point NaN (Not a Number) are not missing values!
|
1271
|
+
# To replace missing values, use `fill_null` instead.
|
404
1272
|
#
|
1273
|
+
# @example
|
1274
|
+
# df = Polars::DataFrame.new(
|
1275
|
+
# {
|
1276
|
+
# "a" => [1.5, 2, Float::NAN, 4],
|
1277
|
+
# "b" => [0.5, 4, Float::NAN, 13],
|
1278
|
+
# }
|
1279
|
+
# ).lazy
|
1280
|
+
# df.fill_nan(99).collect
|
1281
|
+
# # =>
|
1282
|
+
# # shape: (4, 2)
|
1283
|
+
# # ┌──────┬──────┐
|
1284
|
+
# # │ a ┆ b │
|
1285
|
+
# # │ --- ┆ --- │
|
1286
|
+
# # │ f64 ┆ f64 │
|
1287
|
+
# # ╞══════╪══════╡
|
1288
|
+
# # │ 1.5 ┆ 0.5 │
|
1289
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1290
|
+
# # │ 2.0 ┆ 4.0 │
|
1291
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1292
|
+
# # │ 99.0 ┆ 99.0 │
|
1293
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1294
|
+
# # │ 4.0 ┆ 13.0 │
|
1295
|
+
# # └──────┴──────┘
|
405
1296
|
def fill_nan(fill_value)
|
406
1297
|
if !fill_value.is_a?(Expr)
|
407
1298
|
fill_value = Utils.lit(fill_value)
|
@@ -409,38 +1300,255 @@ module Polars
|
|
409
1300
|
_from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
|
410
1301
|
end
|
411
1302
|
|
412
|
-
#
|
413
|
-
#
|
1303
|
+
# Aggregate the columns in the DataFrame to their standard deviation value.
|
1304
|
+
#
|
1305
|
+
# @return [LazyFrame]
|
1306
|
+
#
|
1307
|
+
# @example
|
1308
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1309
|
+
# df.std.collect
|
1310
|
+
# # =>
|
1311
|
+
# # shape: (1, 2)
|
1312
|
+
# # ┌──────────┬─────┐
|
1313
|
+
# # │ a ┆ b │
|
1314
|
+
# # │ --- ┆ --- │
|
1315
|
+
# # │ f64 ┆ f64 │
|
1316
|
+
# # ╞══════════╪═════╡
|
1317
|
+
# # │ 1.290994 ┆ 0.5 │
|
1318
|
+
# # └──────────┴─────┘
|
1319
|
+
#
|
1320
|
+
# @example
|
1321
|
+
# df.std(ddof: 0).collect
|
1322
|
+
# # =>
|
1323
|
+
# # shape: (1, 2)
|
1324
|
+
# # ┌──────────┬──────────┐
|
1325
|
+
# # │ a ┆ b │
|
1326
|
+
# # │ --- ┆ --- │
|
1327
|
+
# # │ f64 ┆ f64 │
|
1328
|
+
# # ╞══════════╪══════════╡
|
1329
|
+
# # │ 1.118034 ┆ 0.433013 │
|
1330
|
+
# # └──────────┴──────────┘
|
1331
|
+
def std(ddof: 1)
|
1332
|
+
_from_rbldf(_ldf.std(ddof))
|
1333
|
+
end
|
414
1334
|
|
415
|
-
#
|
416
|
-
#
|
1335
|
+
# Aggregate the columns in the DataFrame to their variance value.
|
1336
|
+
#
|
1337
|
+
# @return [LazyFrame]
|
1338
|
+
#
|
1339
|
+
# @example
|
1340
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1341
|
+
# df.var.collect
|
1342
|
+
# # =>
|
1343
|
+
# # shape: (1, 2)
|
1344
|
+
# # ┌──────────┬──────┐
|
1345
|
+
# # │ a ┆ b │
|
1346
|
+
# # │ --- ┆ --- │
|
1347
|
+
# # │ f64 ┆ f64 │
|
1348
|
+
# # ╞══════════╪══════╡
|
1349
|
+
# # │ 1.666667 ┆ 0.25 │
|
1350
|
+
# # └──────────┴──────┘
|
1351
|
+
#
|
1352
|
+
# @example
|
1353
|
+
# df.var(ddof: 0).collect
|
1354
|
+
# # =>
|
1355
|
+
# # shape: (1, 2)
|
1356
|
+
# # ┌──────┬────────┐
|
1357
|
+
# # │ a ┆ b │
|
1358
|
+
# # │ --- ┆ --- │
|
1359
|
+
# # │ f64 ┆ f64 │
|
1360
|
+
# # ╞══════╪════════╡
|
1361
|
+
# # │ 1.25 ┆ 0.1875 │
|
1362
|
+
# # └──────┴────────┘
|
1363
|
+
def var(ddof: 1)
|
1364
|
+
_from_rbldf(_ldf.var(ddof))
|
1365
|
+
end
|
417
1366
|
|
418
|
-
#
|
419
|
-
#
|
1367
|
+
# Aggregate the columns in the DataFrame to their maximum value.
|
1368
|
+
#
|
1369
|
+
# @return [LazyFrame]
|
1370
|
+
#
|
1371
|
+
# @example
|
1372
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1373
|
+
# df.max.collect
|
1374
|
+
# # =>
|
1375
|
+
# # shape: (1, 2)
|
1376
|
+
# # ┌─────┬─────┐
|
1377
|
+
# # │ a ┆ b │
|
1378
|
+
# # │ --- ┆ --- │
|
1379
|
+
# # │ i64 ┆ i64 │
|
1380
|
+
# # ╞═════╪═════╡
|
1381
|
+
# # │ 4 ┆ 2 │
|
1382
|
+
# # └─────┴─────┘
|
1383
|
+
def max
|
1384
|
+
_from_rbldf(_ldf.max)
|
1385
|
+
end
|
420
1386
|
|
421
|
-
#
|
422
|
-
#
|
1387
|
+
# Aggregate the columns in the DataFrame to their minimum value.
|
1388
|
+
#
|
1389
|
+
# @return [LazyFrame]
|
1390
|
+
#
|
1391
|
+
# @example
|
1392
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1393
|
+
# df.min.collect
|
1394
|
+
# # =>
|
1395
|
+
# # shape: (1, 2)
|
1396
|
+
# # ┌─────┬─────┐
|
1397
|
+
# # │ a ┆ b │
|
1398
|
+
# # │ --- ┆ --- │
|
1399
|
+
# # │ i64 ┆ i64 │
|
1400
|
+
# # ╞═════╪═════╡
|
1401
|
+
# # │ 1 ┆ 1 │
|
1402
|
+
# # └─────┴─────┘
|
1403
|
+
def min
|
1404
|
+
_from_rbldf(_ldf.min)
|
1405
|
+
end
|
423
1406
|
|
424
|
-
#
|
425
|
-
#
|
1407
|
+
# Aggregate the columns in the DataFrame to their sum value.
|
1408
|
+
#
|
1409
|
+
# @return [LazyFrame]
|
1410
|
+
#
|
1411
|
+
# @example
|
1412
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1413
|
+
# df.sum.collect
|
1414
|
+
# # =>
|
1415
|
+
# # shape: (1, 2)
|
1416
|
+
# # ┌─────┬─────┐
|
1417
|
+
# # │ a ┆ b │
|
1418
|
+
# # │ --- ┆ --- │
|
1419
|
+
# # │ i64 ┆ i64 │
|
1420
|
+
# # ╞═════╪═════╡
|
1421
|
+
# # │ 10 ┆ 5 │
|
1422
|
+
# # └─────┴─────┘
|
1423
|
+
def sum
|
1424
|
+
_from_rbldf(_ldf.sum)
|
1425
|
+
end
|
426
1426
|
|
427
|
-
#
|
428
|
-
#
|
1427
|
+
# Aggregate the columns in the DataFrame to their mean value.
|
1428
|
+
#
|
1429
|
+
# @return [LazyFrame]
|
1430
|
+
#
|
1431
|
+
# @example
|
1432
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1433
|
+
# df.mean.collect
|
1434
|
+
# # =>
|
1435
|
+
# # shape: (1, 2)
|
1436
|
+
# # ┌─────┬──────┐
|
1437
|
+
# # │ a ┆ b │
|
1438
|
+
# # │ --- ┆ --- │
|
1439
|
+
# # │ f64 ┆ f64 │
|
1440
|
+
# # ╞═════╪══════╡
|
1441
|
+
# # │ 2.5 ┆ 1.25 │
|
1442
|
+
# # └─────┴──────┘
|
1443
|
+
def mean
|
1444
|
+
_from_rbldf(_ldf.mean)
|
1445
|
+
end
|
429
1446
|
|
430
|
-
#
|
431
|
-
#
|
1447
|
+
# Aggregate the columns in the DataFrame to their median value.
|
1448
|
+
#
|
1449
|
+
# @return [LazyFrame]
|
1450
|
+
#
|
1451
|
+
# @example
|
1452
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1453
|
+
# df.median.collect
|
1454
|
+
# # =>
|
1455
|
+
# # shape: (1, 2)
|
1456
|
+
# # ┌─────┬─────┐
|
1457
|
+
# # │ a ┆ b │
|
1458
|
+
# # │ --- ┆ --- │
|
1459
|
+
# # │ f64 ┆ f64 │
|
1460
|
+
# # ╞═════╪═════╡
|
1461
|
+
# # │ 2.5 ┆ 1.0 │
|
1462
|
+
# # └─────┴─────┘
|
1463
|
+
def median
|
1464
|
+
_from_rbldf(_ldf.median)
|
1465
|
+
end
|
432
1466
|
|
433
|
-
#
|
434
|
-
#
|
1467
|
+
# Aggregate the columns in the DataFrame to their quantile value.
|
1468
|
+
#
|
1469
|
+
# @param quantile [Float]
|
1470
|
+
# Quantile between 0.0 and 1.0.
|
1471
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
1472
|
+
# Interpolation method.
|
1473
|
+
#
|
1474
|
+
# @return [LazyFrame]
|
1475
|
+
#
|
1476
|
+
# @example
|
1477
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1478
|
+
# df.quantile(0.7).collect
|
1479
|
+
# # =>
|
1480
|
+
# # shape: (1, 2)
|
1481
|
+
# # ┌─────┬─────┐
|
1482
|
+
# # │ a ┆ b │
|
1483
|
+
# # │ --- ┆ --- │
|
1484
|
+
# # │ f64 ┆ f64 │
|
1485
|
+
# # ╞═════╪═════╡
|
1486
|
+
# # │ 3.0 ┆ 1.0 │
|
1487
|
+
# # └─────┴─────┘
|
1488
|
+
def quantile(quantile, interpolation: "nearest")
|
1489
|
+
_from_rbldf(_ldf.quantile(quantile, interpolation))
|
1490
|
+
end
|
435
1491
|
|
1492
|
+
# Explode lists to long format.
|
1493
|
+
#
|
1494
|
+
# @return [LazyFrame]
|
436
1495
|
#
|
1496
|
+
# @example
|
1497
|
+
# df = Polars::DataFrame.new(
|
1498
|
+
# {
|
1499
|
+
# "letters" => ["a", "a", "b", "c"],
|
1500
|
+
# "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]],
|
1501
|
+
# }
|
1502
|
+
# ).lazy
|
1503
|
+
# df.explode("numbers").collect
|
1504
|
+
# # =>
|
1505
|
+
# # shape: (8, 2)
|
1506
|
+
# # ┌─────────┬─────────┐
|
1507
|
+
# # │ letters ┆ numbers │
|
1508
|
+
# # │ --- ┆ --- │
|
1509
|
+
# # │ str ┆ i64 │
|
1510
|
+
# # ╞═════════╪═════════╡
|
1511
|
+
# # │ a ┆ 1 │
|
1512
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1513
|
+
# # │ a ┆ 2 │
|
1514
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1515
|
+
# # │ a ┆ 3 │
|
1516
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1517
|
+
# # │ b ┆ 4 │
|
1518
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1519
|
+
# # │ b ┆ 5 │
|
1520
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1521
|
+
# # │ c ┆ 6 │
|
1522
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1523
|
+
# # │ c ┆ 7 │
|
1524
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1525
|
+
# # │ c ┆ 8 │
|
1526
|
+
# # └─────────┴─────────┘
|
437
1527
|
def explode(columns)
|
438
1528
|
columns = Utils.selection_to_rbexpr_list(columns)
|
439
1529
|
_from_rbldf(_ldf.explode(columns))
|
440
1530
|
end
|
441
1531
|
|
442
|
-
#
|
443
|
-
#
|
1532
|
+
# Drop duplicate rows from this DataFrame.
|
1533
|
+
#
|
1534
|
+
# Note that this fails if there is a column of type `List` in the DataFrame or
|
1535
|
+
# subset.
|
1536
|
+
#
|
1537
|
+
# @param maintain_order [Boolean]
|
1538
|
+
# Keep the same order as the original DataFrame. This requires more work to
|
1539
|
+
# compute.
|
1540
|
+
# @param subset [Object]
|
1541
|
+
# Subset to use to compare rows.
|
1542
|
+
# @param keep ["first", "last"]
|
1543
|
+
# Which of the duplicate rows to keep.
|
1544
|
+
#
|
1545
|
+
# @return [LazyFrame]
|
1546
|
+
def unique(maintain_order: true, subset: nil, keep: "first")
|
1547
|
+
if !subset.nil? && !subset.is_a?(Array)
|
1548
|
+
subset = [subset]
|
1549
|
+
end
|
1550
|
+
_from_rbldf(_ldf.unique(maintain_order, subset, keep))
|
1551
|
+
end
|
444
1552
|
|
445
1553
|
# def drop_nulls
|
446
1554
|
# end
|
@@ -451,11 +1559,97 @@ module Polars
|
|
451
1559
|
# def map
|
452
1560
|
# end
|
453
1561
|
|
454
|
-
#
|
455
|
-
#
|
1562
|
+
# Interpolate intermediate values. The interpolation method is linear.
|
1563
|
+
#
|
1564
|
+
# @return [LazyFrame]
|
1565
|
+
#
|
1566
|
+
# @example
|
1567
|
+
# df = Polars::DataFrame.new(
|
1568
|
+
# {
|
1569
|
+
# "foo" => [1, nil, 9, 10],
|
1570
|
+
# "bar" => [6, 7, 9, nil],
|
1571
|
+
# "baz" => [1, nil, nil, 9]
|
1572
|
+
# }
|
1573
|
+
# ).lazy
|
1574
|
+
# df.interpolate.collect
|
1575
|
+
# # =>
|
1576
|
+
# # shape: (4, 3)
|
1577
|
+
# # ┌─────┬──────┬─────┐
|
1578
|
+
# # │ foo ┆ bar ┆ baz │
|
1579
|
+
# # │ --- ┆ --- ┆ --- │
|
1580
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
1581
|
+
# # ╞═════╪══════╪═════╡
|
1582
|
+
# # │ 1 ┆ 6 ┆ 1 │
|
1583
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1584
|
+
# # │ 5 ┆ 7 ┆ 3 │
|
1585
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1586
|
+
# # │ 9 ┆ 9 ┆ 6 │
|
1587
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1588
|
+
# # │ 10 ┆ null ┆ 9 │
|
1589
|
+
# # └─────┴──────┴─────┘
|
1590
|
+
def interpolate
|
1591
|
+
select(Utils.col("*").interpolate)
|
1592
|
+
end
|
456
1593
|
|
457
|
-
#
|
458
|
-
#
|
1594
|
+
# Decompose a struct into its fields.
|
1595
|
+
#
|
1596
|
+
# The fields will be inserted into the `DataFrame` on the location of the
|
1597
|
+
# `struct` type.
|
1598
|
+
#
|
1599
|
+
# @param names [Object]
|
1600
|
+
# Names of the struct columns that will be decomposed by its fields
|
1601
|
+
#
|
1602
|
+
# @return [LazyFrame]
|
1603
|
+
#
|
1604
|
+
# @example
|
1605
|
+
# df = (
|
1606
|
+
# Polars::DataFrame.new(
|
1607
|
+
# {
|
1608
|
+
# "before" => ["foo", "bar"],
|
1609
|
+
# "t_a" => [1, 2],
|
1610
|
+
# "t_b" => ["a", "b"],
|
1611
|
+
# "t_c" => [true, nil],
|
1612
|
+
# "t_d" => [[1, 2], [3]],
|
1613
|
+
# "after" => ["baz", "womp"]
|
1614
|
+
# }
|
1615
|
+
# )
|
1616
|
+
# .lazy
|
1617
|
+
# .select(
|
1618
|
+
# ["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"]
|
1619
|
+
# )
|
1620
|
+
# )
|
1621
|
+
# df.fetch
|
1622
|
+
# # =>
|
1623
|
+
# # shape: (2, 3)
|
1624
|
+
# # ┌────────┬─────────────────────┬───────┐
|
1625
|
+
# # │ before ┆ t_struct ┆ after │
|
1626
|
+
# # │ --- ┆ --- ┆ --- │
|
1627
|
+
# # │ str ┆ struct[4] ┆ str │
|
1628
|
+
# # ╞════════╪═════════════════════╪═══════╡
|
1629
|
+
# # │ foo ┆ {1,"a",true,[1, 2]} ┆ baz │
|
1630
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1631
|
+
# # │ bar ┆ {2,"b",null,[3]} ┆ womp │
|
1632
|
+
# # └────────┴─────────────────────┴───────┘
|
1633
|
+
#
|
1634
|
+
# @example
|
1635
|
+
# df.unnest("t_struct").fetch
|
1636
|
+
# # =>
|
1637
|
+
# # shape: (2, 6)
|
1638
|
+
# # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
|
1639
|
+
# # │ before ┆ t_a ┆ t_b ┆ t_c ┆ t_d ┆ after │
|
1640
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1641
|
+
# # │ str ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str │
|
1642
|
+
# # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
|
1643
|
+
# # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
|
1644
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1645
|
+
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
1646
|
+
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
1647
|
+
def unnest(names)
|
1648
|
+
if names.is_a?(String)
|
1649
|
+
names = [names]
|
1650
|
+
end
|
1651
|
+
_from_rbldf(_ldf.unnest(names))
|
1652
|
+
end
|
459
1653
|
|
460
1654
|
private
|
461
1655
|
|