polars-df 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/lib/polars/lazy_frame.rb
CHANGED
@@ -152,29 +152,98 @@ module Polars
|
|
152
152
|
# def self.read_json
|
153
153
|
# end
|
154
154
|
|
155
|
-
#
|
156
|
-
#
|
155
|
+
# Get or set column names.
|
156
|
+
#
|
157
|
+
# @return [Array]
|
158
|
+
#
|
159
|
+
# @example
|
160
|
+
# df = (
|
161
|
+
# Polars::DataFrame.new(
|
162
|
+
# {
|
163
|
+
# "foo" => [1, 2, 3],
|
164
|
+
# "bar" => [6, 7, 8],
|
165
|
+
# "ham" => ["a", "b", "c"]
|
166
|
+
# }
|
167
|
+
# )
|
168
|
+
# .lazy
|
169
|
+
# .select(["foo", "bar"])
|
170
|
+
# )
|
171
|
+
# df.columns
|
172
|
+
# # => ["foo", "bar"]
|
173
|
+
def columns
|
174
|
+
_ldf.columns
|
175
|
+
end
|
157
176
|
|
158
|
-
#
|
159
|
-
#
|
177
|
+
# Get dtypes of columns in LazyFrame.
|
178
|
+
#
|
179
|
+
# @return [Array]
|
180
|
+
#
|
181
|
+
# @example
|
182
|
+
# lf = Polars::DataFrame.new(
|
183
|
+
# {
|
184
|
+
# "foo" => [1, 2, 3],
|
185
|
+
# "bar" => [6.0, 7.0, 8.0],
|
186
|
+
# "ham" => ["a", "b", "c"]
|
187
|
+
# }
|
188
|
+
# ).lazy
|
189
|
+
# lf.dtypes
|
190
|
+
# # => [:i64, :f64, :str]
|
191
|
+
def dtypes
|
192
|
+
_ldf.dtypes
|
193
|
+
end
|
160
194
|
|
161
|
-
#
|
162
|
-
#
|
195
|
+
# Get the schema.
|
196
|
+
#
|
197
|
+
# @return [Hash]
|
198
|
+
#
|
199
|
+
# @example
|
200
|
+
# lf = Polars::DataFrame.new(
|
201
|
+
# {
|
202
|
+
# "foo" => [1, 2, 3],
|
203
|
+
# "bar" => [6.0, 7.0, 8.0],
|
204
|
+
# "ham" => ["a", "b", "c"]
|
205
|
+
# }
|
206
|
+
# ).lazy
|
207
|
+
# lf.schema
|
208
|
+
# # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
|
209
|
+
def schema
|
210
|
+
_ldf.schema
|
211
|
+
end
|
163
212
|
|
164
|
-
#
|
165
|
-
#
|
213
|
+
# Get the width of the LazyFrame.
|
214
|
+
#
|
215
|
+
# @return [Integer]
|
216
|
+
#
|
217
|
+
# @example
|
218
|
+
# lf = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]}).lazy
|
219
|
+
# lf.width
|
220
|
+
# # => 2
|
221
|
+
def width
|
222
|
+
_ldf.width
|
223
|
+
end
|
166
224
|
|
167
|
-
#
|
168
|
-
#
|
225
|
+
# Check if LazyFrame includes key.
|
226
|
+
#
|
227
|
+
# @return [Boolean]
|
228
|
+
def include?(key)
|
229
|
+
columns.include?(key)
|
230
|
+
end
|
169
231
|
|
170
232
|
# clone handled by initialize_copy
|
171
233
|
|
172
234
|
# def [](item)
|
173
235
|
# end
|
174
236
|
|
175
|
-
#
|
176
|
-
#
|
177
|
-
#
|
237
|
+
# Returns a string representing the LazyFrame.
|
238
|
+
#
|
239
|
+
# @return [String]
|
240
|
+
def to_s
|
241
|
+
<<~EOS
|
242
|
+
naive plan: (run LazyFrame#describe_optimized_plan to see the optimized plan)
|
243
|
+
|
244
|
+
#{describe_plan}
|
245
|
+
EOS
|
246
|
+
end
|
178
247
|
|
179
248
|
# def write_json
|
180
249
|
# end
|
@@ -182,22 +251,125 @@ module Polars
|
|
182
251
|
# def pipe
|
183
252
|
# end
|
184
253
|
|
185
|
-
#
|
186
|
-
#
|
254
|
+
# Create a string representation of the unoptimized query plan.
|
255
|
+
#
|
256
|
+
# @return [String]
|
257
|
+
def describe_plan
|
258
|
+
_ldf.describe_plan
|
259
|
+
end
|
187
260
|
|
261
|
+
# Create a string representation of the optimized query plan.
|
262
|
+
#
|
263
|
+
# @return [String]
|
188
264
|
# def describe_optimized_plan
|
189
265
|
# end
|
190
266
|
|
191
267
|
# def show_graph
|
192
268
|
# end
|
193
269
|
|
194
|
-
#
|
195
|
-
#
|
270
|
+
# Sort the DataFrame.
|
271
|
+
#
|
272
|
+
# Sorting can be done by:
|
273
|
+
#
|
274
|
+
# - A single column name
|
275
|
+
# - An expression
|
276
|
+
# - Multiple expressions
|
277
|
+
#
|
278
|
+
# @param by [Object]
|
279
|
+
# Column (expressions) to sort by.
|
280
|
+
# @param reverse [Boolean]
|
281
|
+
# Sort in descending order.
|
282
|
+
# @param nulls_last [Boolean]
|
283
|
+
# Place null values last. Can only be used if sorted by a single column.
|
284
|
+
#
|
285
|
+
# @return [LazyFrame]
|
286
|
+
#
|
287
|
+
# @example
|
288
|
+
# df = Polars::DataFrame.new(
|
289
|
+
# {
|
290
|
+
# "foo" => [1, 2, 3],
|
291
|
+
# "bar" => [6.0, 7.0, 8.0],
|
292
|
+
# "ham" => ["a", "b", "c"]
|
293
|
+
# }
|
294
|
+
# ).lazy
|
295
|
+
# df.sort("foo", reverse: true).collect
|
296
|
+
# # =>
|
297
|
+
# # shape: (3, 3)
|
298
|
+
# # ┌─────┬─────┬─────┐
|
299
|
+
# # │ foo ┆ bar ┆ ham │
|
300
|
+
# # │ --- ┆ --- ┆ --- │
|
301
|
+
# # │ i64 ┆ f64 ┆ str │
|
302
|
+
# # ╞═════╪═════╪═════╡
|
303
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
304
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
305
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
306
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
307
|
+
# # │ 1 ┆ 6.0 ┆ a │
|
308
|
+
# # └─────┴─────┴─────┘
|
309
|
+
def sort(by, reverse: false, nulls_last: false)
|
310
|
+
if by.is_a?(String)
|
311
|
+
_from_rbldf(_ldf.sort(by, reverse, nulls_last))
|
312
|
+
end
|
313
|
+
if Utils.bool?(reverse)
|
314
|
+
reverse = [reverse]
|
315
|
+
end
|
316
|
+
|
317
|
+
by = Utils.selection_to_rbexpr_list(by)
|
318
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
|
319
|
+
end
|
196
320
|
|
197
321
|
# def profile
|
198
322
|
# end
|
199
323
|
|
324
|
+
# Collect into a DataFrame.
|
325
|
+
#
|
326
|
+
# Note: use {#fetch} if you want to run your query on the first `n` rows
|
327
|
+
# only. This can be a huge time saver in debugging queries.
|
328
|
+
#
|
329
|
+
# @param type_coercion [Boolean]
|
330
|
+
# Do type coercion optimization.
|
331
|
+
# @param predicate_pushdown [Boolean]
|
332
|
+
# Do predicate pushdown optimization.
|
333
|
+
# @param projection_pushdown [Boolean]
|
334
|
+
# Do projection pushdown optimization.
|
335
|
+
# @param simplify_expression [Boolean]
|
336
|
+
# Run simplify expressions optimization.
|
337
|
+
# @param string_cache [Boolean]
|
338
|
+
# This argument is deprecated. Please set the string cache globally.
|
339
|
+
# The argument will be ignored
|
340
|
+
# @param no_optimization [Boolean]
|
341
|
+
# Turn off (certain) optimizations.
|
342
|
+
# @param slice_pushdown [Boolean]
|
343
|
+
# Slice pushdown optimization.
|
344
|
+
# @param common_subplan_elimination [Boolean]
|
345
|
+
# Will try to cache branching subplans that occur on self-joins or unions.
|
346
|
+
# @param allow_streaming [Boolean]
|
347
|
+
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
200
348
|
#
|
349
|
+
# @return [DataFrame]
|
350
|
+
#
|
351
|
+
# @example
|
352
|
+
# df = Polars::DataFrame.new(
|
353
|
+
# {
|
354
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
355
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
356
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
357
|
+
# }
|
358
|
+
# ).lazy
|
359
|
+
# df.groupby("a", maintain_order: true).agg(Polars.all.sum).collect
|
360
|
+
# # =>
|
361
|
+
# # shape: (3, 3)
|
362
|
+
# # ┌─────┬─────┬─────┐
|
363
|
+
# # │ a ┆ b ┆ c │
|
364
|
+
# # │ --- ┆ --- ┆ --- │
|
365
|
+
# # │ str ┆ i64 ┆ i64 │
|
366
|
+
# # ╞═════╪═════╪═════╡
|
367
|
+
# # │ a ┆ 4 ┆ 10 │
|
368
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
369
|
+
# # │ b ┆ 11 ┆ 10 │
|
370
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
371
|
+
# # │ c ┆ 6 ┆ 1 │
|
372
|
+
# # └─────┴─────┴─────┘
|
201
373
|
def collect(
|
202
374
|
type_coercion: true,
|
203
375
|
predicate_pushdown: true,
|
@@ -232,21 +404,184 @@ module Polars
|
|
232
404
|
Utils.wrap_df(ldf.collect)
|
233
405
|
end
|
234
406
|
|
235
|
-
#
|
236
|
-
#
|
407
|
+
# Collect a small number of rows for debugging purposes.
|
408
|
+
#
|
409
|
+
# Fetch is like a {#collect} operation, but it overwrites the number of rows
|
410
|
+
# read by every scan operation. This is a utility that helps debug a query on a
|
411
|
+
# smaller number of rows.
|
412
|
+
#
|
413
|
+
# Note that the fetch does not guarantee the final number of rows in the
|
414
|
+
# DataFrame. Filter, join operations and a lower number of rows available in the
|
415
|
+
# scanned file influence the final number of rows.
|
416
|
+
#
|
417
|
+
# @param n_rows [Integer]
|
418
|
+
# Collect n_rows from the data sources.
|
419
|
+
# @param type_coercion [Boolean]
|
420
|
+
# Run type coercion optimization.
|
421
|
+
# @param predicate_pushdown [Boolean]
|
422
|
+
# Run predicate pushdown optimization.
|
423
|
+
# @param projection_pushdown [Boolean]
|
424
|
+
# Run projection pushdown optimization.
|
425
|
+
# @param simplify_expression [Boolean]
|
426
|
+
# Run simplify expressions optimization.
|
427
|
+
# @param string_cache [Boolean]
|
428
|
+
# This argument is deprecated. Please set the string cache globally.
|
429
|
+
# The argument will be ignored
|
430
|
+
# @param no_optimization [Boolean]
|
431
|
+
# Turn off optimizations.
|
432
|
+
# @param slice_pushdown [Boolean]
|
433
|
+
# Slice pushdown optimization
|
434
|
+
# @param common_subplan_elimination [Boolean]
|
435
|
+
# Will try to cache branching subplans that occur on self-joins or unions.
|
436
|
+
# @param allow_streaming [Boolean]
|
437
|
+
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
438
|
+
#
|
439
|
+
# @return [DataFrame]
|
440
|
+
#
|
441
|
+
# @example
|
442
|
+
# df = Polars::DataFrame.new(
|
443
|
+
# {
|
444
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
445
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
446
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
447
|
+
# }
|
448
|
+
# ).lazy
|
449
|
+
# df.groupby("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
|
450
|
+
# # =>
|
451
|
+
# # shape: (2, 3)
|
452
|
+
# # ┌─────┬─────┬─────┐
|
453
|
+
# # │ a ┆ b ┆ c │
|
454
|
+
# # │ --- ┆ --- ┆ --- │
|
455
|
+
# # │ str ┆ i64 ┆ i64 │
|
456
|
+
# # ╞═════╪═════╪═════╡
|
457
|
+
# # │ a ┆ 1 ┆ 6 │
|
458
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
459
|
+
# # │ b ┆ 2 ┆ 5 │
|
460
|
+
# # └─────┴─────┴─────┘
|
461
|
+
def fetch(
|
462
|
+
n_rows = 500,
|
463
|
+
type_coercion: true,
|
464
|
+
predicate_pushdown: true,
|
465
|
+
projection_pushdown: true,
|
466
|
+
simplify_expression: true,
|
467
|
+
string_cache: false,
|
468
|
+
no_optimization: false,
|
469
|
+
slice_pushdown: true,
|
470
|
+
common_subplan_elimination: true,
|
471
|
+
allow_streaming: false
|
472
|
+
)
|
473
|
+
if no_optimization
|
474
|
+
predicate_pushdown = false
|
475
|
+
projection_pushdown = false
|
476
|
+
slice_pushdown = false
|
477
|
+
common_subplan_elimination = false
|
478
|
+
end
|
479
|
+
|
480
|
+
ldf = _ldf.optimization_toggle(
|
481
|
+
type_coercion,
|
482
|
+
predicate_pushdown,
|
483
|
+
projection_pushdown,
|
484
|
+
simplify_expression,
|
485
|
+
slice_pushdown,
|
486
|
+
common_subplan_elimination,
|
487
|
+
allow_streaming
|
488
|
+
)
|
489
|
+
Utils.wrap_df(ldf.fetch(n_rows))
|
490
|
+
end
|
237
491
|
|
492
|
+
# Return lazy representation, i.e. itself.
|
493
|
+
#
|
494
|
+
# Useful for writing code that expects either a `DataFrame` or
|
495
|
+
# `LazyFrame`.
|
238
496
|
#
|
497
|
+
# @return [LazyFrame]
|
498
|
+
#
|
499
|
+
# @example
|
500
|
+
# df = Polars::DataFrame.new(
|
501
|
+
# {
|
502
|
+
# "a" => [nil, 2, 3, 4],
|
503
|
+
# "b" => [0.5, nil, 2.5, 13],
|
504
|
+
# "c" => [true, true, false, nil]
|
505
|
+
# }
|
506
|
+
# )
|
507
|
+
# df.lazy
|
239
508
|
def lazy
|
240
509
|
self
|
241
510
|
end
|
242
511
|
|
243
|
-
#
|
244
|
-
#
|
512
|
+
# Cache the result once the execution of the physical plan hits this node.
|
513
|
+
#
|
514
|
+
# @return [LazyFrame]
|
515
|
+
def cache
|
516
|
+
_from_rbldf(_ldf.cache)
|
517
|
+
end
|
245
518
|
|
246
|
-
#
|
247
|
-
#
|
519
|
+
# Create an empty copy of the current LazyFrame.
|
520
|
+
#
|
521
|
+
# The copy has an identical schema but no data.
|
522
|
+
#
|
523
|
+
# @return [LazyFrame]
|
524
|
+
#
|
525
|
+
# @example
|
526
|
+
# df = Polars::DataFrame.new(
|
527
|
+
# {
|
528
|
+
# "a" => [nil, 2, 3, 4],
|
529
|
+
# "b" => [0.5, nil, 2.5, 13],
|
530
|
+
# "c" => [true, true, false, nil],
|
531
|
+
# }
|
532
|
+
# ).lazy
|
533
|
+
# df.cleared.fetch
|
534
|
+
# # =>
|
535
|
+
# # shape: (0, 3)
|
536
|
+
# # ┌─────┬─────┬──────┐
|
537
|
+
# # │ a ┆ b ┆ c │
|
538
|
+
# # │ --- ┆ --- ┆ --- │
|
539
|
+
# # │ i64 ┆ f64 ┆ bool │
|
540
|
+
# # ╞═════╪═════╪══════╡
|
541
|
+
# # └─────┴─────┴──────┘
|
542
|
+
def cleared
|
543
|
+
DataFrame.new(columns: schema).lazy
|
544
|
+
end
|
248
545
|
|
546
|
+
# Filter the rows in the DataFrame based on a predicate expression.
|
547
|
+
#
|
548
|
+
# @param predicate [Object]
|
549
|
+
# Expression that evaluates to a boolean Series.
|
249
550
|
#
|
551
|
+
# @return [LazyFrame]
|
552
|
+
#
|
553
|
+
# @example Filter on one condition:
|
554
|
+
# lf = Polars::DataFrame.new(
|
555
|
+
# {
|
556
|
+
# "foo" => [1, 2, 3],
|
557
|
+
# "bar" => [6, 7, 8],
|
558
|
+
# "ham" => ["a", "b", "c"]
|
559
|
+
# }
|
560
|
+
# ).lazy
|
561
|
+
# lf.filter(Polars.col("foo") < 3).collect
|
562
|
+
# # =>
|
563
|
+
# # shape: (2, 3)
|
564
|
+
# # ┌─────┬─────┬─────┐
|
565
|
+
# # │ foo ┆ bar ┆ ham │
|
566
|
+
# # │ --- ┆ --- ┆ --- │
|
567
|
+
# # │ i64 ┆ i64 ┆ str │
|
568
|
+
# # ╞═════╪═════╪═════╡
|
569
|
+
# # │ 1 ┆ 6 ┆ a │
|
570
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
571
|
+
# # │ 2 ┆ 7 ┆ b │
|
572
|
+
# # └─────┴─────┴─────┘
|
573
|
+
#
|
574
|
+
# @example Filter on multiple conditions:
|
575
|
+
# lf.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a")).collect
|
576
|
+
# # =>
|
577
|
+
# # shape: (1, 3)
|
578
|
+
# # ┌─────┬─────┬─────┐
|
579
|
+
# # │ foo ┆ bar ┆ ham │
|
580
|
+
# # │ --- ┆ --- ┆ --- │
|
581
|
+
# # │ i64 ┆ i64 ┆ str │
|
582
|
+
# # ╞═════╪═════╪═════╡
|
583
|
+
# # │ 1 ┆ 6 ┆ a │
|
584
|
+
# # └─────┴─────┴─────┘
|
250
585
|
def filter(predicate)
|
251
586
|
_from_rbldf(
|
252
587
|
_ldf.filter(
|
@@ -255,11 +590,136 @@ module Polars
|
|
255
590
|
)
|
256
591
|
end
|
257
592
|
|
593
|
+
# Select columns from this DataFrame.
|
594
|
+
#
|
595
|
+
# @param exprs [Object]
|
596
|
+
# Column or columns to select.
|
597
|
+
#
|
598
|
+
# @return [LazyFrame]
|
599
|
+
#
|
600
|
+
# @example
|
601
|
+
# df = Polars::DataFrame.new(
|
602
|
+
# {
|
603
|
+
# "foo" => [1, 2, 3],
|
604
|
+
# "bar" => [6, 7, 8],
|
605
|
+
# "ham" => ["a", "b", "c"],
|
606
|
+
# }
|
607
|
+
# ).lazy
|
608
|
+
# df.select("foo").collect
|
609
|
+
# # =>
|
610
|
+
# # shape: (3, 1)
|
611
|
+
# # ┌─────┐
|
612
|
+
# # │ foo │
|
613
|
+
# # │ --- │
|
614
|
+
# # │ i64 │
|
615
|
+
# # ╞═════╡
|
616
|
+
# # │ 1 │
|
617
|
+
# # ├╌╌╌╌╌┤
|
618
|
+
# # │ 2 │
|
619
|
+
# # ├╌╌╌╌╌┤
|
620
|
+
# # │ 3 │
|
621
|
+
# # └─────┘
|
622
|
+
#
|
623
|
+
# @example
|
624
|
+
# df.select(["foo", "bar"]).collect
|
625
|
+
# # =>
|
626
|
+
# # shape: (3, 2)
|
627
|
+
# # ┌─────┬─────┐
|
628
|
+
# # │ foo ┆ bar │
|
629
|
+
# # │ --- ┆ --- │
|
630
|
+
# # │ i64 ┆ i64 │
|
631
|
+
# # ╞═════╪═════╡
|
632
|
+
# # │ 1 ┆ 6 │
|
633
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
634
|
+
# # │ 2 ┆ 7 │
|
635
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
636
|
+
# # │ 3 ┆ 8 │
|
637
|
+
# # └─────┴─────┘
|
638
|
+
#
|
639
|
+
# @example
|
640
|
+
# df.select(Polars.col("foo") + 1).collect
|
641
|
+
# # =>
|
642
|
+
# # shape: (3, 1)
|
643
|
+
# # ┌─────┐
|
644
|
+
# # │ foo │
|
645
|
+
# # │ --- │
|
646
|
+
# # │ i64 │
|
647
|
+
# # ╞═════╡
|
648
|
+
# # │ 2 │
|
649
|
+
# # ├╌╌╌╌╌┤
|
650
|
+
# # │ 3 │
|
651
|
+
# # ├╌╌╌╌╌┤
|
652
|
+
# # │ 4 │
|
653
|
+
# # └─────┘
|
654
|
+
#
|
655
|
+
# @example
|
656
|
+
# df.select([Polars.col("foo") + 1, Polars.col("bar") + 1]).collect
|
657
|
+
# # =>
|
658
|
+
# # shape: (3, 2)
|
659
|
+
# # ┌─────┬─────┐
|
660
|
+
# # │ foo ┆ bar │
|
661
|
+
# # │ --- ┆ --- │
|
662
|
+
# # │ i64 ┆ i64 │
|
663
|
+
# # ╞═════╪═════╡
|
664
|
+
# # │ 2 ┆ 7 │
|
665
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
666
|
+
# # │ 3 ┆ 8 │
|
667
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
668
|
+
# # │ 4 ┆ 9 │
|
669
|
+
# # └─────┴─────┘
|
670
|
+
#
|
671
|
+
# @example
|
672
|
+
# df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0)).collect
|
673
|
+
# # =>
|
674
|
+
# # shape: (3, 1)
|
675
|
+
# # ┌─────────┐
|
676
|
+
# # │ literal │
|
677
|
+
# # │ --- │
|
678
|
+
# # │ i64 │
|
679
|
+
# # ╞═════════╡
|
680
|
+
# # │ 0 │
|
681
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
682
|
+
# # │ 0 │
|
683
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
684
|
+
# # │ 10 │
|
685
|
+
# # └─────────┘
|
258
686
|
def select(exprs)
|
259
687
|
exprs = Utils.selection_to_rbexpr_list(exprs)
|
260
688
|
_from_rbldf(_ldf.select(exprs))
|
261
689
|
end
|
262
690
|
|
691
|
+
# Start a groupby operation.
|
692
|
+
#
|
693
|
+
# @param by [Object]
|
694
|
+
# Column(s) to group by.
|
695
|
+
# @param maintain_order [Boolean]
|
696
|
+
# Make sure that the order of the groups remain consistent. This is more
|
697
|
+
# expensive than a default groupby.
|
698
|
+
#
|
699
|
+
# @return [LazyGroupBy]
|
700
|
+
#
|
701
|
+
# @example
|
702
|
+
# df = Polars::DataFrame.new(
|
703
|
+
# {
|
704
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
705
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
706
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
707
|
+
# }
|
708
|
+
# ).lazy
|
709
|
+
# df.groupby("a", maintain_order: true).agg(Polars.col("b").sum).collect
|
710
|
+
# # =>
|
711
|
+
# # shape: (3, 2)
|
712
|
+
# # ┌─────┬─────┐
|
713
|
+
# # │ a ┆ b │
|
714
|
+
# # │ --- ┆ --- │
|
715
|
+
# # │ str ┆ i64 │
|
716
|
+
# # ╞═════╪═════╡
|
717
|
+
# # │ a ┆ 4 │
|
718
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
719
|
+
# # │ b ┆ 11 │
|
720
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
721
|
+
# # │ c ┆ 6 │
|
722
|
+
# # └─────┴─────┘
|
263
723
|
def groupby(by, maintain_order: false)
|
264
724
|
rbexprs_by = Utils.selection_to_rbexpr_list(by)
|
265
725
|
lgb = _ldf.groupby(rbexprs_by, maintain_order)
|
@@ -275,7 +735,116 @@ module Polars
|
|
275
735
|
# def join_asof
|
276
736
|
# end
|
277
737
|
|
738
|
+
# Add a join operation to the Logical Plan.
|
739
|
+
#
|
740
|
+
# @param other [LazyFrame]
|
741
|
+
# Lazy DataFrame to join with.
|
742
|
+
# @param left_on [Object]
|
743
|
+
# Join column of the left DataFrame.
|
744
|
+
# @param right_on [Object]
|
745
|
+
# Join column of the right DataFrame.
|
746
|
+
# @param on Object
|
747
|
+
# Join column of both DataFrames. If set, `left_on` and `right_on` should be
|
748
|
+
# None.
|
749
|
+
# @param how ["inner", "left", "outer", "semi", "anti", "cross"]
|
750
|
+
# Join strategy.
|
751
|
+
# @param suffix [String]
|
752
|
+
# Suffix to append to columns with a duplicate name.
|
753
|
+
# @param allow_parallel [Boolean]
|
754
|
+
# Allow the physical plan to optionally evaluate the computation of both
|
755
|
+
# DataFrames up to the join in parallel.
|
756
|
+
# @param force_parallel [Boolean]
|
757
|
+
# Force the physical plan to evaluate the computation of both DataFrames up to
|
758
|
+
# the join in parallel.
|
759
|
+
#
|
760
|
+
# @return [LazyFrame]
|
278
761
|
#
|
762
|
+
# @example
|
763
|
+
# df = Polars::DataFrame.new(
|
764
|
+
# {
|
765
|
+
# "foo" => [1, 2, 3],
|
766
|
+
# "bar" => [6.0, 7.0, 8.0],
|
767
|
+
# "ham" => ["a", "b", "c"]
|
768
|
+
# }
|
769
|
+
# ).lazy
|
770
|
+
# other_df = Polars::DataFrame.new(
|
771
|
+
# {
|
772
|
+
# "apple" => ["x", "y", "z"],
|
773
|
+
# "ham" => ["a", "b", "d"]
|
774
|
+
# }
|
775
|
+
# ).lazy
|
776
|
+
# df.join(other_df, on: "ham").collect
|
777
|
+
# # =>
|
778
|
+
# # shape: (2, 4)
|
779
|
+
# # ┌─────┬─────┬─────┬───────┐
|
780
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
781
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
782
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
783
|
+
# # ╞═════╪═════╪═════╪═══════╡
|
784
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
785
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
786
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
787
|
+
# # └─────┴─────┴─────┴───────┘
|
788
|
+
#
|
789
|
+
# @example
|
790
|
+
# df.join(other_df, on: "ham", how: "outer").collect
|
791
|
+
# # =>
|
792
|
+
# # shape: (4, 4)
|
793
|
+
# # ┌──────┬──────┬─────┬───────┐
|
794
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
795
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
796
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
797
|
+
# # ╞══════╪══════╪═════╪═══════╡
|
798
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
799
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
800
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
801
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
802
|
+
# # │ null ┆ null ┆ d ┆ z │
|
803
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
804
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null │
|
805
|
+
# # └──────┴──────┴─────┴───────┘
|
806
|
+
#
|
807
|
+
# @example
|
808
|
+
# df.join(other_df, on: "ham", how: "left").collect
|
809
|
+
# # =>
|
810
|
+
# # shape: (3, 4)
|
811
|
+
# # ┌─────┬─────┬─────┬───────┐
|
812
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
813
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
814
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
815
|
+
# # ╞═════╪═════╪═════╪═══════╡
|
816
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
817
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
818
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
819
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
820
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null │
|
821
|
+
# # └─────┴─────┴─────┴───────┘
|
822
|
+
#
|
823
|
+
# @example
|
824
|
+
# df.join(other_df, on: "ham", how: "semi").collect
|
825
|
+
# # =>
|
826
|
+
# # shape: (2, 3)
|
827
|
+
# # ┌─────┬─────┬─────┐
|
828
|
+
# # │ foo ┆ bar ┆ ham │
|
829
|
+
# # │ --- ┆ --- ┆ --- │
|
830
|
+
# # │ i64 ┆ f64 ┆ str │
|
831
|
+
# # ╞═════╪═════╪═════╡
|
832
|
+
# # │ 1 ┆ 6.0 ┆ a │
|
833
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
834
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
835
|
+
# # └─────┴─────┴─────┘
|
836
|
+
#
|
837
|
+
# @example
|
838
|
+
# df.join(other_df, on: "ham", how: "anti").collect
|
839
|
+
# # =>
|
840
|
+
# # shape: (1, 3)
|
841
|
+
# # ┌─────┬─────┬─────┐
|
842
|
+
# # │ foo ┆ bar ┆ ham │
|
843
|
+
# # │ --- ┆ --- ┆ --- │
|
844
|
+
# # │ i64 ┆ f64 ┆ str │
|
845
|
+
# # ╞═════╪═════╪═════╡
|
846
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
847
|
+
# # └─────┴─────┴─────┘
|
279
848
|
def join(
|
280
849
|
other,
|
281
850
|
left_on: nil,
|
@@ -322,6 +891,43 @@ module Polars
|
|
322
891
|
)
|
323
892
|
end
|
324
893
|
|
894
|
+
# Add or overwrite multiple columns in a DataFrame.
|
895
|
+
#
|
896
|
+
# @param exprs [Object]
|
897
|
+
# List of Expressions that evaluate to columns.
|
898
|
+
#
|
899
|
+
# @return [LazyFrame]
|
900
|
+
#
|
901
|
+
# @example
|
902
|
+
# ldf = Polars::DataFrame.new(
|
903
|
+
# {
|
904
|
+
# "a" => [1, 2, 3, 4],
|
905
|
+
# "b" => [0.5, 4, 10, 13],
|
906
|
+
# "c" => [true, true, false, true]
|
907
|
+
# }
|
908
|
+
# ).lazy
|
909
|
+
# ldf.with_columns(
|
910
|
+
# [
|
911
|
+
# (Polars.col("a") ** 2).alias("a^2"),
|
912
|
+
# (Polars.col("b") / 2).alias("b/2"),
|
913
|
+
# (Polars.col("c").is_not).alias("not c")
|
914
|
+
# ]
|
915
|
+
# ).collect
|
916
|
+
# # =>
|
917
|
+
# # shape: (4, 6)
|
918
|
+
# # ┌─────┬──────┬───────┬──────┬──────┬───────┐
|
919
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
920
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
921
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
|
922
|
+
# # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
|
923
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
|
924
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
925
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
|
926
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
927
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
|
928
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
929
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
930
|
+
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
325
931
|
def with_columns(exprs)
|
326
932
|
exprs =
|
327
933
|
if exprs.nil?
|
@@ -350,58 +956,347 @@ module Polars
|
|
350
956
|
# def with_context
|
351
957
|
# end
|
352
958
|
|
959
|
+
# Add or overwrite column in a DataFrame.
|
353
960
|
#
|
961
|
+
# @param column [Object]
|
962
|
+
# Expression that evaluates to column or a Series to use.
|
963
|
+
#
|
964
|
+
# @return [LazyFrame]
|
965
|
+
#
|
966
|
+
# @example
|
967
|
+
# df = Polars::DataFrame.new(
|
968
|
+
# {
|
969
|
+
# "a" => [1, 3, 5],
|
970
|
+
# "b" => [2, 4, 6]
|
971
|
+
# }
|
972
|
+
# ).lazy
|
973
|
+
# df.with_column((Polars.col("b") ** 2).alias("b_squared")).collect
|
974
|
+
# # =>
|
975
|
+
# # shape: (3, 3)
|
976
|
+
# # ┌─────┬─────┬───────────┐
|
977
|
+
# # │ a ┆ b ┆ b_squared │
|
978
|
+
# # │ --- ┆ --- ┆ --- │
|
979
|
+
# # │ i64 ┆ i64 ┆ f64 │
|
980
|
+
# # ╞═════╪═════╪═══════════╡
|
981
|
+
# # │ 1 ┆ 2 ┆ 4.0 │
|
982
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
983
|
+
# # │ 3 ┆ 4 ┆ 16.0 │
|
984
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
985
|
+
# # │ 5 ┆ 6 ┆ 36.0 │
|
986
|
+
# # └─────┴─────┴───────────┘
|
987
|
+
#
|
988
|
+
# @example
|
989
|
+
# df.with_column(Polars.col("a") ** 2).collect
|
990
|
+
# # =>
|
991
|
+
# # shape: (3, 2)
|
992
|
+
# # ┌──────┬─────┐
|
993
|
+
# # │ a ┆ b │
|
994
|
+
# # │ --- ┆ --- │
|
995
|
+
# # │ f64 ┆ i64 │
|
996
|
+
# # ╞══════╪═════╡
|
997
|
+
# # │ 1.0 ┆ 2 │
|
998
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
999
|
+
# # │ 9.0 ┆ 4 │
|
1000
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1001
|
+
# # │ 25.0 ┆ 6 │
|
1002
|
+
# # └──────┴─────┘
|
354
1003
|
def with_column(column)
|
355
1004
|
with_columns([column])
|
356
1005
|
end
|
357
1006
|
|
358
|
-
#
|
359
|
-
#
|
1007
|
+
# Remove one or multiple columns from a DataFrame.
|
1008
|
+
#
|
1009
|
+
# @param columns [Object]
|
1010
|
+
# - Name of the column that should be removed.
|
1011
|
+
# - List of column names.
|
1012
|
+
#
|
1013
|
+
# @return [LazyFrame]
|
1014
|
+
def drop(columns)
|
1015
|
+
if columns.is_a?(String)
|
1016
|
+
columns = [columns]
|
1017
|
+
end
|
1018
|
+
_from_rbldf(_ldf.drop_columns(columns))
|
1019
|
+
end
|
360
1020
|
|
1021
|
+
# Rename column names.
|
361
1022
|
#
|
1023
|
+
# @param mapping [Hash]
|
1024
|
+
# Key value pairs that map from old name to new name.
|
1025
|
+
#
|
1026
|
+
# @return [LazyFrame]
|
362
1027
|
def rename(mapping)
|
363
1028
|
existing = mapping.keys
|
364
1029
|
_new = mapping.values
|
365
1030
|
_from_rbldf(_ldf.rename(existing, _new))
|
366
1031
|
end
|
367
1032
|
|
368
|
-
#
|
369
|
-
#
|
1033
|
+
# Reverse the DataFrame.
|
1034
|
+
#
|
1035
|
+
# @return [LazyFrame]
|
1036
|
+
def reverse
|
1037
|
+
_from_rbldf(_ldf.reverse)
|
1038
|
+
end
|
370
1039
|
|
371
|
-
#
|
372
|
-
#
|
1040
|
+
# Shift the values by a given period.
|
1041
|
+
#
|
1042
|
+
# @param periods [Integer]
|
1043
|
+
# Number of places to shift (may be negative).
|
1044
|
+
#
|
1045
|
+
# @return [LazyFrame]
|
1046
|
+
#
|
1047
|
+
# @example
|
1048
|
+
# df = Polars::DataFrame.new(
|
1049
|
+
# {
|
1050
|
+
# "a" => [1, 3, 5],
|
1051
|
+
# "b" => [2, 4, 6]
|
1052
|
+
# }
|
1053
|
+
# ).lazy
|
1054
|
+
# df.shift(1).collect
|
1055
|
+
# # =>
|
1056
|
+
# # shape: (3, 2)
|
1057
|
+
# # ┌──────┬──────┐
|
1058
|
+
# # │ a ┆ b │
|
1059
|
+
# # │ --- ┆ --- │
|
1060
|
+
# # │ i64 ┆ i64 │
|
1061
|
+
# # ╞══════╪══════╡
|
1062
|
+
# # │ null ┆ null │
|
1063
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1064
|
+
# # │ 1 ┆ 2 │
|
1065
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1066
|
+
# # │ 3 ┆ 4 │
|
1067
|
+
# # └──────┴──────┘
|
1068
|
+
#
|
1069
|
+
# @example
|
1070
|
+
# df.shift(-1).collect
|
1071
|
+
# # =>
|
1072
|
+
# # shape: (3, 2)
|
1073
|
+
# # ┌──────┬──────┐
|
1074
|
+
# # │ a ┆ b │
|
1075
|
+
# # │ --- ┆ --- │
|
1076
|
+
# # │ i64 ┆ i64 │
|
1077
|
+
# # ╞══════╪══════╡
|
1078
|
+
# # │ 3 ┆ 4 │
|
1079
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1080
|
+
# # │ 5 ┆ 6 │
|
1081
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1082
|
+
# # │ null ┆ null │
|
1083
|
+
# # └──────┴──────┘
|
1084
|
+
def shift(periods)
|
1085
|
+
_from_rbldf(_ldf.shift(periods))
|
1086
|
+
end
|
373
1087
|
|
374
|
-
#
|
375
|
-
#
|
1088
|
+
# Shift the values by a given period and fill the resulting null values.
|
1089
|
+
#
|
1090
|
+
# @param periods [Integer]
|
1091
|
+
# Number of places to shift (may be negative).
|
1092
|
+
# @param fill_value [Object]
|
1093
|
+
# Fill `nil` values with the result of this expression.
|
1094
|
+
#
|
1095
|
+
# @return [LazyFrame]
|
1096
|
+
#
|
1097
|
+
# @example
|
1098
|
+
# df = Polars::DataFrame.new(
|
1099
|
+
# {
|
1100
|
+
# "a" => [1, 3, 5],
|
1101
|
+
# "b" => [2, 4, 6]
|
1102
|
+
# }
|
1103
|
+
# ).lazy
|
1104
|
+
# df.shift_and_fill(1, 0).collect
|
1105
|
+
# # =>
|
1106
|
+
# # shape: (3, 2)
|
1107
|
+
# # ┌─────┬─────┐
|
1108
|
+
# # │ a ┆ b │
|
1109
|
+
# # │ --- ┆ --- │
|
1110
|
+
# # │ i64 ┆ i64 │
|
1111
|
+
# # ╞═════╪═════╡
|
1112
|
+
# # │ 0 ┆ 0 │
|
1113
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1114
|
+
# # │ 1 ┆ 2 │
|
1115
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1116
|
+
# # │ 3 ┆ 4 │
|
1117
|
+
# # └─────┴─────┘
|
1118
|
+
#
|
1119
|
+
# @example
|
1120
|
+
# df.shift_and_fill(-1, 0).collect
|
1121
|
+
# # =>
|
1122
|
+
# # shape: (3, 2)
|
1123
|
+
# # ┌─────┬─────┐
|
1124
|
+
# # │ a ┆ b │
|
1125
|
+
# # │ --- ┆ --- │
|
1126
|
+
# # │ i64 ┆ i64 │
|
1127
|
+
# # ╞═════╪═════╡
|
1128
|
+
# # │ 3 ┆ 4 │
|
1129
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1130
|
+
# # │ 5 ┆ 6 │
|
1131
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1132
|
+
# # │ 0 ┆ 0 │
|
1133
|
+
# # └─────┴─────┘
|
1134
|
+
def shift_and_fill(periods, fill_value)
|
1135
|
+
if !fill_value.is_a?(Expr)
|
1136
|
+
fill_value = Polars.lit(fill_value)
|
1137
|
+
end
|
1138
|
+
_from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
|
1139
|
+
end
|
376
1140
|
|
377
|
-
#
|
378
|
-
#
|
1141
|
+
# Get a slice of this DataFrame.
|
1142
|
+
#
|
1143
|
+
# @param offset [Integer]
|
1144
|
+
# Start index. Negative indexing is supported.
|
1145
|
+
# @param length [Integer]
|
1146
|
+
# Length of the slice. If set to `nil`, all rows starting at the offset
|
1147
|
+
# will be selected.
|
1148
|
+
#
|
1149
|
+
# @return [LazyFrame]
|
1150
|
+
#
|
1151
|
+
# @example
|
1152
|
+
# df = Polars::DataFrame.new(
|
1153
|
+
# {
|
1154
|
+
# "a" => ["x", "y", "z"],
|
1155
|
+
# "b" => [1, 3, 5],
|
1156
|
+
# "c" => [2, 4, 6]
|
1157
|
+
# }
|
1158
|
+
# ).lazy
|
1159
|
+
# df.slice(1, 2).collect
|
1160
|
+
# # =>
|
1161
|
+
# # shape: (2, 3)
|
1162
|
+
# # ┌─────┬─────┬─────┐
|
1163
|
+
# # │ a ┆ b ┆ c │
|
1164
|
+
# # │ --- ┆ --- ┆ --- │
|
1165
|
+
# # │ str ┆ i64 ┆ i64 │
|
1166
|
+
# # ╞═════╪═════╪═════╡
|
1167
|
+
# # │ y ┆ 3 ┆ 4 │
|
1168
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1169
|
+
# # │ z ┆ 5 ┆ 6 │
|
1170
|
+
# # └─────┴─────┴─────┘
|
1171
|
+
def slice(offset, length = nil)
|
1172
|
+
if length && length < 0
|
1173
|
+
raise ArgumentError, "Negative slice lengths (#{length}) are invalid for LazyFrame"
|
1174
|
+
end
|
1175
|
+
_from_rbldf(_ldf.slice(offset, length))
|
1176
|
+
end
|
379
1177
|
|
380
|
-
#
|
381
|
-
#
|
1178
|
+
# Get the first `n` rows.
|
1179
|
+
#
|
1180
|
+
# Alias for {#head}.
|
1181
|
+
#
|
1182
|
+
# @param n [Integer]
|
1183
|
+
# Number of rows to return.
|
1184
|
+
#
|
1185
|
+
# @return [LazyFrame]
|
1186
|
+
#
|
1187
|
+
# @note
|
1188
|
+
# Consider using the {#fetch} operation if you only want to test your
|
1189
|
+
# query. The {#fetch} operation will load the first `n` rows at the scan
|
1190
|
+
# level, whereas the {#head}/{#limit} are applied at the end.
|
1191
|
+
def limit(n = 5)
|
1192
|
+
head(5)
|
1193
|
+
end
|
382
1194
|
|
383
|
-
#
|
384
|
-
#
|
1195
|
+
# Get the first `n` rows.
|
1196
|
+
#
|
1197
|
+
# @param n [Integer]
|
1198
|
+
# Number of rows to return.
|
1199
|
+
#
|
1200
|
+
# @return [LazyFrame]
|
1201
|
+
#
|
1202
|
+
# @note
|
1203
|
+
# Consider using the {#fetch} operation if you only want to test your
|
1204
|
+
# query. The {#fetch} operation will load the first `n` rows at the scan
|
1205
|
+
# level, whereas the {#head}/{#limit} are applied at the end.
|
1206
|
+
def head(n = 5)
|
1207
|
+
slice(0, n)
|
1208
|
+
end
|
385
1209
|
|
386
|
-
#
|
387
|
-
#
|
1210
|
+
# Get the last `n` rows.
|
1211
|
+
#
|
1212
|
+
# @param n [Integer]
|
1213
|
+
# Number of rows.
|
1214
|
+
#
|
1215
|
+
# @return [LazyFrame]
|
1216
|
+
def tail(n = 5)
|
1217
|
+
_from_rbldf(_ldf.tail(n))
|
1218
|
+
end
|
388
1219
|
|
389
|
-
#
|
390
|
-
#
|
1220
|
+
# Get the last row of the DataFrame.
|
1221
|
+
#
|
1222
|
+
# @return [LazyFrame]
|
1223
|
+
def last
|
1224
|
+
tail(1)
|
1225
|
+
end
|
391
1226
|
|
392
|
-
#
|
393
|
-
#
|
1227
|
+
# Get the first row of the DataFrame.
|
1228
|
+
#
|
1229
|
+
# @return [LazyFrame]
|
1230
|
+
def first
|
1231
|
+
slice(0, 1)
|
1232
|
+
end
|
394
1233
|
|
395
1234
|
# def with_row_count
|
396
1235
|
# end
|
397
1236
|
|
398
|
-
#
|
399
|
-
#
|
1237
|
+
# Take every nth row in the LazyFrame and return as a new LazyFrame.
|
1238
|
+
#
|
1239
|
+
# @return [LazyFrame]
|
1240
|
+
#
|
1241
|
+
# @example
|
1242
|
+
# s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]}).lazy
|
1243
|
+
# s.take_every(2).collect
|
1244
|
+
# # =>
|
1245
|
+
# # shape: (2, 2)
|
1246
|
+
# # ┌─────┬─────┐
|
1247
|
+
# # │ a ┆ b │
|
1248
|
+
# # │ --- ┆ --- │
|
1249
|
+
# # │ i64 ┆ i64 │
|
1250
|
+
# # ╞═════╪═════╡
|
1251
|
+
# # │ 1 ┆ 5 │
|
1252
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1253
|
+
# # │ 3 ┆ 7 │
|
1254
|
+
# # └─────┴─────┘
|
1255
|
+
def take_every(n)
|
1256
|
+
select(Utils.col("*").take_every(n))
|
1257
|
+
end
|
400
1258
|
|
401
|
-
#
|
402
|
-
#
|
1259
|
+
# Fill null values using the specified value or strategy.
|
1260
|
+
#
|
1261
|
+
# @return [LazyFrame]
|
1262
|
+
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
|
1263
|
+
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
1264
|
+
end
|
403
1265
|
|
1266
|
+
# Fill floating point NaN values.
|
1267
|
+
#
|
1268
|
+
# @param fill_value [Object]
|
1269
|
+
# Value to fill the NaN values with.
|
1270
|
+
#
|
1271
|
+
# @return [LazyFrame]
|
404
1272
|
#
|
1273
|
+
# @note
|
1274
|
+
# Note that floating point NaN (Not a Number) are not missing values!
|
1275
|
+
# To replace missing values, use `fill_null` instead.
|
1276
|
+
#
|
1277
|
+
# @example
|
1278
|
+
# df = Polars::DataFrame.new(
|
1279
|
+
# {
|
1280
|
+
# "a" => [1.5, 2, Float::NAN, 4],
|
1281
|
+
# "b" => [0.5, 4, Float::NAN, 13],
|
1282
|
+
# }
|
1283
|
+
# ).lazy
|
1284
|
+
# df.fill_nan(99).collect
|
1285
|
+
# # =>
|
1286
|
+
# # shape: (4, 2)
|
1287
|
+
# # ┌──────┬──────┐
|
1288
|
+
# # │ a ┆ b │
|
1289
|
+
# # │ --- ┆ --- │
|
1290
|
+
# # │ f64 ┆ f64 │
|
1291
|
+
# # ╞══════╪══════╡
|
1292
|
+
# # │ 1.5 ┆ 0.5 │
|
1293
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1294
|
+
# # │ 2.0 ┆ 4.0 │
|
1295
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1296
|
+
# # │ 99.0 ┆ 99.0 │
|
1297
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1298
|
+
# # │ 4.0 ┆ 13.0 │
|
1299
|
+
# # └──────┴──────┘
|
405
1300
|
def fill_nan(fill_value)
|
406
1301
|
if !fill_value.is_a?(Expr)
|
407
1302
|
fill_value = Utils.lit(fill_value)
|
@@ -409,38 +1304,255 @@ module Polars
|
|
409
1304
|
_from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
|
410
1305
|
end
|
411
1306
|
|
412
|
-
#
|
413
|
-
#
|
1307
|
+
# Aggregate the columns in the DataFrame to their standard deviation value.
|
1308
|
+
#
|
1309
|
+
# @return [LazyFrame]
|
1310
|
+
#
|
1311
|
+
# @example
|
1312
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1313
|
+
# df.std.collect
|
1314
|
+
# # =>
|
1315
|
+
# # shape: (1, 2)
|
1316
|
+
# # ┌──────────┬─────┐
|
1317
|
+
# # │ a ┆ b │
|
1318
|
+
# # │ --- ┆ --- │
|
1319
|
+
# # │ f64 ┆ f64 │
|
1320
|
+
# # ╞══════════╪═════╡
|
1321
|
+
# # │ 1.290994 ┆ 0.5 │
|
1322
|
+
# # └──────────┴─────┘
|
1323
|
+
#
|
1324
|
+
# @example
|
1325
|
+
# df.std(ddof: 0).collect
|
1326
|
+
# # =>
|
1327
|
+
# # shape: (1, 2)
|
1328
|
+
# # ┌──────────┬──────────┐
|
1329
|
+
# # │ a ┆ b │
|
1330
|
+
# # │ --- ┆ --- │
|
1331
|
+
# # │ f64 ┆ f64 │
|
1332
|
+
# # ╞══════════╪══════════╡
|
1333
|
+
# # │ 1.118034 ┆ 0.433013 │
|
1334
|
+
# # └──────────┴──────────┘
|
1335
|
+
def std(ddof: 1)
|
1336
|
+
_from_rbldf(_ldf.std(ddof))
|
1337
|
+
end
|
414
1338
|
|
415
|
-
#
|
416
|
-
#
|
1339
|
+
# Aggregate the columns in the DataFrame to their variance value.
|
1340
|
+
#
|
1341
|
+
# @return [LazyFrame]
|
1342
|
+
#
|
1343
|
+
# @example
|
1344
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1345
|
+
# df.var.collect
|
1346
|
+
# # =>
|
1347
|
+
# # shape: (1, 2)
|
1348
|
+
# # ┌──────────┬──────┐
|
1349
|
+
# # │ a ┆ b │
|
1350
|
+
# # │ --- ┆ --- │
|
1351
|
+
# # │ f64 ┆ f64 │
|
1352
|
+
# # ╞══════════╪══════╡
|
1353
|
+
# # │ 1.666667 ┆ 0.25 │
|
1354
|
+
# # └──────────┴──────┘
|
1355
|
+
#
|
1356
|
+
# @example
|
1357
|
+
# df.var(ddof: 0).collect
|
1358
|
+
# # =>
|
1359
|
+
# # shape: (1, 2)
|
1360
|
+
# # ┌──────┬────────┐
|
1361
|
+
# # │ a ┆ b │
|
1362
|
+
# # │ --- ┆ --- │
|
1363
|
+
# # │ f64 ┆ f64 │
|
1364
|
+
# # ╞══════╪════════╡
|
1365
|
+
# # │ 1.25 ┆ 0.1875 │
|
1366
|
+
# # └──────┴────────┘
|
1367
|
+
def var(ddof: 1)
|
1368
|
+
_from_rbldf(_ldf.var(ddof))
|
1369
|
+
end
|
417
1370
|
|
418
|
-
#
|
419
|
-
#
|
1371
|
+
# Aggregate the columns in the DataFrame to their maximum value.
|
1372
|
+
#
|
1373
|
+
# @return [LazyFrame]
|
1374
|
+
#
|
1375
|
+
# @example
|
1376
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1377
|
+
# df.max.collect
|
1378
|
+
# # =>
|
1379
|
+
# # shape: (1, 2)
|
1380
|
+
# # ┌─────┬─────┐
|
1381
|
+
# # │ a ┆ b │
|
1382
|
+
# # │ --- ┆ --- │
|
1383
|
+
# # │ i64 ┆ i64 │
|
1384
|
+
# # ╞═════╪═════╡
|
1385
|
+
# # │ 4 ┆ 2 │
|
1386
|
+
# # └─────┴─────┘
|
1387
|
+
def max
|
1388
|
+
_from_rbldf(_ldf.max)
|
1389
|
+
end
|
420
1390
|
|
421
|
-
#
|
422
|
-
#
|
1391
|
+
# Aggregate the columns in the DataFrame to their minimum value.
|
1392
|
+
#
|
1393
|
+
# @return [LazyFrame]
|
1394
|
+
#
|
1395
|
+
# @example
|
1396
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1397
|
+
# df.min.collect
|
1398
|
+
# # =>
|
1399
|
+
# # shape: (1, 2)
|
1400
|
+
# # ┌─────┬─────┐
|
1401
|
+
# # │ a ┆ b │
|
1402
|
+
# # │ --- ┆ --- │
|
1403
|
+
# # │ i64 ┆ i64 │
|
1404
|
+
# # ╞═════╪═════╡
|
1405
|
+
# # │ 1 ┆ 1 │
|
1406
|
+
# # └─────┴─────┘
|
1407
|
+
def min
|
1408
|
+
_from_rbldf(_ldf.min)
|
1409
|
+
end
|
423
1410
|
|
424
|
-
#
|
425
|
-
#
|
1411
|
+
# Aggregate the columns in the DataFrame to their sum value.
|
1412
|
+
#
|
1413
|
+
# @return [LazyFrame]
|
1414
|
+
#
|
1415
|
+
# @example
|
1416
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1417
|
+
# df.sum.collect
|
1418
|
+
# # =>
|
1419
|
+
# # shape: (1, 2)
|
1420
|
+
# # ┌─────┬─────┐
|
1421
|
+
# # │ a ┆ b │
|
1422
|
+
# # │ --- ┆ --- │
|
1423
|
+
# # │ i64 ┆ i64 │
|
1424
|
+
# # ╞═════╪═════╡
|
1425
|
+
# # │ 10 ┆ 5 │
|
1426
|
+
# # └─────┴─────┘
|
1427
|
+
def sum
|
1428
|
+
_from_rbldf(_ldf.sum)
|
1429
|
+
end
|
426
1430
|
|
427
|
-
#
|
428
|
-
#
|
1431
|
+
# Aggregate the columns in the DataFrame to their mean value.
|
1432
|
+
#
|
1433
|
+
# @return [LazyFrame]
|
1434
|
+
#
|
1435
|
+
# @example
|
1436
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1437
|
+
# df.mean.collect
|
1438
|
+
# # =>
|
1439
|
+
# # shape: (1, 2)
|
1440
|
+
# # ┌─────┬──────┐
|
1441
|
+
# # │ a ┆ b │
|
1442
|
+
# # │ --- ┆ --- │
|
1443
|
+
# # │ f64 ┆ f64 │
|
1444
|
+
# # ╞═════╪══════╡
|
1445
|
+
# # │ 2.5 ┆ 1.25 │
|
1446
|
+
# # └─────┴──────┘
|
1447
|
+
def mean
|
1448
|
+
_from_rbldf(_ldf.mean)
|
1449
|
+
end
|
429
1450
|
|
430
|
-
#
|
431
|
-
#
|
1451
|
+
# Aggregate the columns in the DataFrame to their median value.
|
1452
|
+
#
|
1453
|
+
# @return [LazyFrame]
|
1454
|
+
#
|
1455
|
+
# @example
|
1456
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1457
|
+
# df.median.collect
|
1458
|
+
# # =>
|
1459
|
+
# # shape: (1, 2)
|
1460
|
+
# # ┌─────┬─────┐
|
1461
|
+
# # │ a ┆ b │
|
1462
|
+
# # │ --- ┆ --- │
|
1463
|
+
# # │ f64 ┆ f64 │
|
1464
|
+
# # ╞═════╪═════╡
|
1465
|
+
# # │ 2.5 ┆ 1.0 │
|
1466
|
+
# # └─────┴─────┘
|
1467
|
+
def median
|
1468
|
+
_from_rbldf(_ldf.median)
|
1469
|
+
end
|
432
1470
|
|
433
|
-
#
|
434
|
-
#
|
1471
|
+
# Aggregate the columns in the DataFrame to their quantile value.
|
1472
|
+
#
|
1473
|
+
# @param quantile [Float]
|
1474
|
+
# Quantile between 0.0 and 1.0.
|
1475
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
1476
|
+
# Interpolation method.
|
1477
|
+
#
|
1478
|
+
# @return [LazyFrame]
|
1479
|
+
#
|
1480
|
+
# @example
|
1481
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [1, 2, 1, 1]}).lazy
|
1482
|
+
# df.quantile(0.7).collect
|
1483
|
+
# # =>
|
1484
|
+
# # shape: (1, 2)
|
1485
|
+
# # ┌─────┬─────┐
|
1486
|
+
# # │ a ┆ b │
|
1487
|
+
# # │ --- ┆ --- │
|
1488
|
+
# # │ f64 ┆ f64 │
|
1489
|
+
# # ╞═════╪═════╡
|
1490
|
+
# # │ 3.0 ┆ 1.0 │
|
1491
|
+
# # └─────┴─────┘
|
1492
|
+
def quantile(quantile, interpolation: "nearest")
|
1493
|
+
_from_rbldf(_ldf.quantile(quantile, interpolation))
|
1494
|
+
end
|
435
1495
|
|
1496
|
+
# Explode lists to long format.
|
1497
|
+
#
|
1498
|
+
# @return [LazyFrame]
|
436
1499
|
#
|
1500
|
+
# @example
|
1501
|
+
# df = Polars::DataFrame.new(
|
1502
|
+
# {
|
1503
|
+
# "letters" => ["a", "a", "b", "c"],
|
1504
|
+
# "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]],
|
1505
|
+
# }
|
1506
|
+
# ).lazy
|
1507
|
+
# df.explode("numbers").collect
|
1508
|
+
# # =>
|
1509
|
+
# # shape: (8, 2)
|
1510
|
+
# # ┌─────────┬─────────┐
|
1511
|
+
# # │ letters ┆ numbers │
|
1512
|
+
# # │ --- ┆ --- │
|
1513
|
+
# # │ str ┆ i64 │
|
1514
|
+
# # ╞═════════╪═════════╡
|
1515
|
+
# # │ a ┆ 1 │
|
1516
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1517
|
+
# # │ a ┆ 2 │
|
1518
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1519
|
+
# # │ a ┆ 3 │
|
1520
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1521
|
+
# # │ b ┆ 4 │
|
1522
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1523
|
+
# # │ b ┆ 5 │
|
1524
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1525
|
+
# # │ c ┆ 6 │
|
1526
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1527
|
+
# # │ c ┆ 7 │
|
1528
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
1529
|
+
# # │ c ┆ 8 │
|
1530
|
+
# # └─────────┴─────────┘
|
437
1531
|
def explode(columns)
|
438
1532
|
columns = Utils.selection_to_rbexpr_list(columns)
|
439
1533
|
_from_rbldf(_ldf.explode(columns))
|
440
1534
|
end
|
441
1535
|
|
442
|
-
#
|
443
|
-
#
|
1536
|
+
# Drop duplicate rows from this DataFrame.
|
1537
|
+
#
|
1538
|
+
# Note that this fails if there is a column of type `List` in the DataFrame or
|
1539
|
+
# subset.
|
1540
|
+
#
|
1541
|
+
# @param maintain_order [Boolean]
|
1542
|
+
# Keep the same order as the original DataFrame. This requires more work to
|
1543
|
+
# compute.
|
1544
|
+
# @param subset [Object]
|
1545
|
+
# Subset to use to compare rows.
|
1546
|
+
# @param keep ["first", "last"]
|
1547
|
+
# Which of the duplicate rows to keep.
|
1548
|
+
#
|
1549
|
+
# @return [LazyFrame]
|
1550
|
+
def unique(maintain_order: true, subset: nil, keep: "first")
|
1551
|
+
if !subset.nil? && !subset.is_a?(Array)
|
1552
|
+
subset = [subset]
|
1553
|
+
end
|
1554
|
+
_from_rbldf(_ldf.unique(maintain_order, subset, keep))
|
1555
|
+
end
|
444
1556
|
|
445
1557
|
# def drop_nulls
|
446
1558
|
# end
|
@@ -451,11 +1563,97 @@ module Polars
|
|
451
1563
|
# def map
|
452
1564
|
# end
|
453
1565
|
|
454
|
-
#
|
455
|
-
#
|
1566
|
+
# Interpolate intermediate values. The interpolation method is linear.
|
1567
|
+
#
|
1568
|
+
# @return [LazyFrame]
|
1569
|
+
#
|
1570
|
+
# @example
|
1571
|
+
# df = Polars::DataFrame.new(
|
1572
|
+
# {
|
1573
|
+
# "foo" => [1, nil, 9, 10],
|
1574
|
+
# "bar" => [6, 7, 9, nil],
|
1575
|
+
# "baz" => [1, nil, nil, 9]
|
1576
|
+
# }
|
1577
|
+
# ).lazy
|
1578
|
+
# df.interpolate.collect
|
1579
|
+
# # =>
|
1580
|
+
# # shape: (4, 3)
|
1581
|
+
# # ┌─────┬──────┬─────┐
|
1582
|
+
# # │ foo ┆ bar ┆ baz │
|
1583
|
+
# # │ --- ┆ --- ┆ --- │
|
1584
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
1585
|
+
# # ╞═════╪══════╪═════╡
|
1586
|
+
# # │ 1 ┆ 6 ┆ 1 │
|
1587
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1588
|
+
# # │ 5 ┆ 7 ┆ 3 │
|
1589
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1590
|
+
# # │ 9 ┆ 9 ┆ 6 │
|
1591
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1592
|
+
# # │ 10 ┆ null ┆ 9 │
|
1593
|
+
# # └─────┴──────┴─────┘
|
1594
|
+
def interpolate
|
1595
|
+
select(Utils.col("*").interpolate)
|
1596
|
+
end
|
456
1597
|
|
457
|
-
#
|
458
|
-
#
|
1598
|
+
# Decompose a struct into its fields.
|
1599
|
+
#
|
1600
|
+
# The fields will be inserted into the `DataFrame` on the location of the
|
1601
|
+
# `struct` type.
|
1602
|
+
#
|
1603
|
+
# @param names [Object]
|
1604
|
+
# Names of the struct columns that will be decomposed by its fields
|
1605
|
+
#
|
1606
|
+
# @return [LazyFrame]
|
1607
|
+
#
|
1608
|
+
# @example
|
1609
|
+
# df = (
|
1610
|
+
# Polars::DataFrame.new(
|
1611
|
+
# {
|
1612
|
+
# "before" => ["foo", "bar"],
|
1613
|
+
# "t_a" => [1, 2],
|
1614
|
+
# "t_b" => ["a", "b"],
|
1615
|
+
# "t_c" => [true, nil],
|
1616
|
+
# "t_d" => [[1, 2], [3]],
|
1617
|
+
# "after" => ["baz", "womp"]
|
1618
|
+
# }
|
1619
|
+
# )
|
1620
|
+
# .lazy
|
1621
|
+
# .select(
|
1622
|
+
# ["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"]
|
1623
|
+
# )
|
1624
|
+
# )
|
1625
|
+
# df.fetch
|
1626
|
+
# # =>
|
1627
|
+
# # shape: (2, 3)
|
1628
|
+
# # ┌────────┬─────────────────────┬───────┐
|
1629
|
+
# # │ before ┆ t_struct ┆ after │
|
1630
|
+
# # │ --- ┆ --- ┆ --- │
|
1631
|
+
# # │ str ┆ struct[4] ┆ str │
|
1632
|
+
# # ╞════════╪═════════════════════╪═══════╡
|
1633
|
+
# # │ foo ┆ {1,"a",true,[1, 2]} ┆ baz │
|
1634
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1635
|
+
# # │ bar ┆ {2,"b",null,[3]} ┆ womp │
|
1636
|
+
# # └────────┴─────────────────────┴───────┘
|
1637
|
+
#
|
1638
|
+
# @example
|
1639
|
+
# df.unnest("t_struct").fetch
|
1640
|
+
# # =>
|
1641
|
+
# # shape: (2, 6)
|
1642
|
+
# # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
|
1643
|
+
# # │ before ┆ t_a ┆ t_b ┆ t_c ┆ t_d ┆ after │
|
1644
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1645
|
+
# # │ str ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str │
|
1646
|
+
# # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
|
1647
|
+
# # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
|
1648
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1649
|
+
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
1650
|
+
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
1651
|
+
def unnest(names)
|
1652
|
+
if names.is_a?(String)
|
1653
|
+
names = [names]
|
1654
|
+
end
|
1655
|
+
_from_rbldf(_ldf.unnest(names))
|
1656
|
+
end
|
459
1657
|
|
460
1658
|
private
|
461
1659
|
|