polars-df 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/lib/polars/data_frame.rb
CHANGED
@@ -155,12 +155,35 @@ module Polars
|
|
155
155
|
end
|
156
156
|
|
157
157
|
# @private
|
158
|
-
def self._read_parquet(
|
158
|
+
def self._read_parquet(
|
159
|
+
file,
|
160
|
+
columns: nil,
|
161
|
+
n_rows: nil,
|
162
|
+
parallel: "auto",
|
163
|
+
row_count_name: nil,
|
164
|
+
row_count_offset: 0,
|
165
|
+
low_memory: false
|
166
|
+
)
|
159
167
|
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
160
168
|
file = Utils.format_path(file)
|
161
169
|
end
|
162
170
|
|
163
|
-
|
171
|
+
if file.is_a?(String) && file.include?("*")
|
172
|
+
raise Todo
|
173
|
+
end
|
174
|
+
|
175
|
+
projection, columns = Utils.handle_projection_columns(columns)
|
176
|
+
_from_rbdf(
|
177
|
+
RbDataFrame.read_parquet(
|
178
|
+
file,
|
179
|
+
columns,
|
180
|
+
projection,
|
181
|
+
n_rows,
|
182
|
+
parallel,
|
183
|
+
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
184
|
+
low_memory
|
185
|
+
)
|
186
|
+
)
|
164
187
|
end
|
165
188
|
|
166
189
|
# def self._read_avro
|
@@ -259,11 +282,13 @@ module Polars
|
|
259
282
|
# @return [Array]
|
260
283
|
#
|
261
284
|
# @example
|
262
|
-
# df = Polars::DataFrame.new(
|
263
|
-
#
|
264
|
-
#
|
265
|
-
#
|
266
|
-
#
|
285
|
+
# df = Polars::DataFrame.new(
|
286
|
+
# {
|
287
|
+
# "foo" => [1, 2, 3],
|
288
|
+
# "bar" => [6, 7, 8],
|
289
|
+
# "ham" => ["a", "b", "c"]
|
290
|
+
# }
|
291
|
+
# )
|
267
292
|
# df.columns
|
268
293
|
# # => ["foo", "bar", "ham"]
|
269
294
|
def columns
|
@@ -279,11 +304,13 @@ module Polars
|
|
279
304
|
# @return [Object]
|
280
305
|
#
|
281
306
|
# @example
|
282
|
-
# df = Polars::DataFrame.new(
|
283
|
-
#
|
284
|
-
#
|
285
|
-
#
|
286
|
-
#
|
307
|
+
# df = Polars::DataFrame.new(
|
308
|
+
# {
|
309
|
+
# "foo" => [1, 2, 3],
|
310
|
+
# "bar" => [6, 7, 8],
|
311
|
+
# "ham" => ["a", "b", "c"]
|
312
|
+
# }
|
313
|
+
# )
|
287
314
|
# df.columns = ["apple", "banana", "orange"]
|
288
315
|
# df
|
289
316
|
# # =>
|
@@ -308,11 +335,13 @@ module Polars
|
|
308
335
|
# @return [Array]
|
309
336
|
#
|
310
337
|
# @example
|
311
|
-
# df = Polars::DataFrame.new(
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
#
|
338
|
+
# df = Polars::DataFrame.new(
|
339
|
+
# {
|
340
|
+
# "foo" => [1, 2, 3],
|
341
|
+
# "bar" => [6.0, 7.0, 8.0],
|
342
|
+
# "ham" => ["a", "b", "c"]
|
343
|
+
# }
|
344
|
+
# )
|
316
345
|
# df.dtypes
|
317
346
|
# # => [:i64, :f64, :str]
|
318
347
|
def dtypes
|
@@ -324,56 +353,132 @@ module Polars
|
|
324
353
|
# @return [Hash]
|
325
354
|
#
|
326
355
|
# @example
|
327
|
-
# df = Polars::DataFrame.new(
|
328
|
-
#
|
329
|
-
#
|
330
|
-
#
|
331
|
-
#
|
356
|
+
# df = Polars::DataFrame.new(
|
357
|
+
# {
|
358
|
+
# "foo" => [1, 2, 3],
|
359
|
+
# "bar" => [6.0, 7.0, 8.0],
|
360
|
+
# "ham" => ["a", "b", "c"]
|
361
|
+
# }
|
362
|
+
# )
|
332
363
|
# df.schema
|
333
364
|
# # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
|
334
365
|
def schema
|
335
366
|
columns.zip(dtypes).to_h
|
336
367
|
end
|
337
368
|
|
338
|
-
#
|
339
|
-
#
|
369
|
+
# Equal.
|
370
|
+
#
|
371
|
+
# @return [DataFrame]
|
372
|
+
def ==(other)
|
373
|
+
_comp(other, "eq")
|
374
|
+
end
|
340
375
|
|
341
|
-
#
|
342
|
-
#
|
376
|
+
# Not equal.
|
377
|
+
#
|
378
|
+
# @return [DataFrame]
|
379
|
+
def !=(other)
|
380
|
+
_comp(other, "neq")
|
381
|
+
end
|
343
382
|
|
344
|
-
#
|
345
|
-
#
|
383
|
+
# Greater than.
|
384
|
+
#
|
385
|
+
# @return [DataFrame]
|
386
|
+
def >(other)
|
387
|
+
_comp(other, "gt")
|
388
|
+
end
|
346
389
|
|
347
|
-
#
|
348
|
-
#
|
390
|
+
# Less than.
|
391
|
+
#
|
392
|
+
# @return [DataFrame]
|
393
|
+
def <(other)
|
394
|
+
_comp(other, "lt")
|
395
|
+
end
|
349
396
|
|
350
|
-
#
|
351
|
-
#
|
397
|
+
# Greater than or equal.
|
398
|
+
#
|
399
|
+
# @return [DataFrame]
|
400
|
+
def >=(other)
|
401
|
+
_comp(other, "gt_eq")
|
402
|
+
end
|
352
403
|
|
353
|
-
#
|
354
|
-
#
|
404
|
+
# Less than or equal.
|
405
|
+
#
|
406
|
+
# @return [DataFrame]
|
407
|
+
def <=(other)
|
408
|
+
_comp(other, "lt_eq")
|
409
|
+
end
|
355
410
|
|
356
|
-
#
|
357
|
-
#
|
411
|
+
# Performs multiplication.
|
412
|
+
#
|
413
|
+
# @return [DataFrame]
|
414
|
+
def *(other)
|
415
|
+
if other.is_a?(DataFrame)
|
416
|
+
return _from_rbdf(_df.mul_df(other._df))
|
417
|
+
end
|
358
418
|
|
359
|
-
|
360
|
-
|
419
|
+
other = _prepare_other_arg(other)
|
420
|
+
_from_rbdf(_df.mul(other._s))
|
421
|
+
end
|
361
422
|
|
362
|
-
#
|
363
|
-
#
|
423
|
+
# Performs division.
|
424
|
+
#
|
425
|
+
# @return [DataFrame]
|
426
|
+
def /(other)
|
427
|
+
if other.is_a?(DataFrame)
|
428
|
+
return _from_rbdf(_df.div_df(other._df))
|
429
|
+
end
|
364
430
|
|
365
|
-
|
366
|
-
|
431
|
+
other = _prepare_other_arg(other)
|
432
|
+
_from_rbdf(_df.div(other._s))
|
433
|
+
end
|
367
434
|
|
368
|
-
#
|
369
|
-
#
|
435
|
+
# Performs addition.
|
436
|
+
#
|
437
|
+
# @return [DataFrame]
|
438
|
+
def +(other)
|
439
|
+
if other.is_a?(DataFrame)
|
440
|
+
return _from_rbdf(_df.add_df(other._df))
|
441
|
+
end
|
442
|
+
|
443
|
+
other = _prepare_other_arg(other)
|
444
|
+
_from_rbdf(_df.add(other._s))
|
445
|
+
end
|
446
|
+
|
447
|
+
# Performs subtraction.
|
448
|
+
#
|
449
|
+
# @return [DataFrame]
|
450
|
+
def -(other)
|
451
|
+
if other.is_a?(DataFrame)
|
452
|
+
return _from_rbdf(_df.sub_df(other._df))
|
453
|
+
end
|
454
|
+
|
455
|
+
other = _prepare_other_arg(other)
|
456
|
+
_from_rbdf(_df.sub(other._s))
|
457
|
+
end
|
458
|
+
|
459
|
+
# Returns the modulo.
|
460
|
+
#
|
461
|
+
# @return [DataFrame]
|
462
|
+
def %(other)
|
463
|
+
if other.is_a?(DataFrame)
|
464
|
+
return _from_rbdf(_df.rem_df(other._df))
|
465
|
+
end
|
466
|
+
|
467
|
+
other = _prepare_other_arg(other)
|
468
|
+
_from_rbdf(_df.rem(other._s))
|
469
|
+
end
|
370
470
|
|
471
|
+
# Returns a string representing the DataFrame.
|
371
472
|
#
|
473
|
+
# @return [String]
|
372
474
|
def to_s
|
373
475
|
_df.to_s
|
374
476
|
end
|
375
477
|
alias_method :inspect, :to_s
|
376
478
|
|
479
|
+
# Check if DataFrame includes column.
|
480
|
+
#
|
481
|
+
# @return [Boolean]
|
377
482
|
def include?(name)
|
378
483
|
columns.include?(name)
|
379
484
|
end
|
@@ -387,9 +492,78 @@ module Polars
|
|
387
492
|
# def _pos_idxs
|
388
493
|
# end
|
389
494
|
|
495
|
+
# Returns subset of the DataFrame.
|
390
496
|
#
|
391
|
-
|
392
|
-
|
497
|
+
# @return [Object]
|
498
|
+
def [](*args)
|
499
|
+
if args.size == 2
|
500
|
+
row_selection, col_selection = args
|
501
|
+
|
502
|
+
# df[.., unknown]
|
503
|
+
if row_selection.is_a?(Range)
|
504
|
+
|
505
|
+
# multiple slices
|
506
|
+
# df[.., ..]
|
507
|
+
if col_selection.is_a?(Range)
|
508
|
+
raise Todo
|
509
|
+
end
|
510
|
+
end
|
511
|
+
|
512
|
+
# df[2, ..] (select row as df)
|
513
|
+
if row_selection.is_a?(Integer)
|
514
|
+
if col_selection.is_a?(Array)
|
515
|
+
df = self[0.., col_selection]
|
516
|
+
return df.slice(row_selection, 1)
|
517
|
+
end
|
518
|
+
# df[2, "a"]
|
519
|
+
if col_selection.is_a?(String)
|
520
|
+
return self[col_selection][row_selection]
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
# column selection can be "a" and ["a", "b"]
|
525
|
+
if col_selection.is_a?(String)
|
526
|
+
col_selection = [col_selection]
|
527
|
+
end
|
528
|
+
|
529
|
+
# df[.., 1]
|
530
|
+
if col_selection.is_a?(Integer)
|
531
|
+
series = to_series(col_selection)
|
532
|
+
return series[row_selection]
|
533
|
+
end
|
534
|
+
|
535
|
+
if col_selection.is_a?(Array)
|
536
|
+
# df[.., [1, 2]]
|
537
|
+
if is_int_sequence(col_selection)
|
538
|
+
series_list = col_selection.map { |i| to_series(i) }
|
539
|
+
df = self.class.new(series_list)
|
540
|
+
return df[row_selection]
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
df = self[col_selection]
|
545
|
+
return df[row_selection]
|
546
|
+
elsif args.size == 1
|
547
|
+
item = args[0]
|
548
|
+
|
549
|
+
# select single column
|
550
|
+
# df["foo"]
|
551
|
+
if item.is_a?(String)
|
552
|
+
return Utils.wrap_s(_df.column(item))
|
553
|
+
end
|
554
|
+
|
555
|
+
# df[idx]
|
556
|
+
if item.is_a?(Integer)
|
557
|
+
return slice(_pos_idx(item, dim: 0), 1)
|
558
|
+
end
|
559
|
+
|
560
|
+
# df[..]
|
561
|
+
if item.is_a?(Range)
|
562
|
+
return Slice.new(self).apply(item)
|
563
|
+
end
|
564
|
+
end
|
565
|
+
|
566
|
+
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
393
567
|
end
|
394
568
|
|
395
569
|
# def []=(key, value)
|
@@ -397,7 +571,9 @@ module Polars
|
|
397
571
|
|
398
572
|
# no to_arrow
|
399
573
|
|
574
|
+
# Convert DataFrame to a hash mapping column name to values.
|
400
575
|
#
|
576
|
+
# @return [Hash]
|
401
577
|
def to_h(as_series: true)
|
402
578
|
if as_series
|
403
579
|
get_columns.to_h { |s| [s.name, s] }
|
@@ -422,11 +598,13 @@ module Polars
|
|
422
598
|
# @return [Series]
|
423
599
|
#
|
424
600
|
# @example
|
425
|
-
# df = Polars::DataFrame.new(
|
426
|
-
#
|
427
|
-
#
|
428
|
-
#
|
429
|
-
#
|
601
|
+
# df = Polars::DataFrame.new(
|
602
|
+
# {
|
603
|
+
# "foo" => [1, 2, 3],
|
604
|
+
# "bar" => [6, 7, 8],
|
605
|
+
# "ham" => ["a", "b", "c"]
|
606
|
+
# }
|
607
|
+
# )
|
430
608
|
# df.to_series(1)
|
431
609
|
# # =>
|
432
610
|
# # shape: (3,)
|
@@ -519,11 +697,13 @@ module Polars
|
|
519
697
|
# @return [String, nil]
|
520
698
|
#
|
521
699
|
# @example
|
522
|
-
# df = Polars::DataFrame.new(
|
523
|
-
#
|
524
|
-
#
|
525
|
-
#
|
526
|
-
#
|
700
|
+
# df = Polars::DataFrame.new(
|
701
|
+
# {
|
702
|
+
# "foo" => [1, 2, 3, 4, 5],
|
703
|
+
# "bar" => [6, 7, 8, 9, 10],
|
704
|
+
# "ham" => ["a", "b", "c", "d", "e"]
|
705
|
+
# }
|
706
|
+
# )
|
527
707
|
# df.write_csv("file.csv")
|
528
708
|
def write_csv(
|
529
709
|
file = nil,
|
@@ -694,11 +874,13 @@ module Polars
|
|
694
874
|
# @return [DataFrame]
|
695
875
|
#
|
696
876
|
# @example
|
697
|
-
# df = Polars::DataFrame.new(
|
698
|
-
#
|
699
|
-
#
|
700
|
-
#
|
701
|
-
#
|
877
|
+
# df = Polars::DataFrame.new(
|
878
|
+
# {
|
879
|
+
# "key" => ["a", "b", "c"],
|
880
|
+
# "val" => [1, 2, 3]
|
881
|
+
# }
|
882
|
+
# )
|
883
|
+
# df.reverse
|
702
884
|
# # =>
|
703
885
|
# # shape: (3, 2)
|
704
886
|
# # ┌─────┬─────┐
|
@@ -724,11 +906,13 @@ module Polars
|
|
724
906
|
# @return [DataFrame]
|
725
907
|
#
|
726
908
|
# @example
|
727
|
-
# df = Polars::DataFrame.new(
|
728
|
-
#
|
729
|
-
#
|
730
|
-
#
|
731
|
-
#
|
909
|
+
# df = Polars::DataFrame.new(
|
910
|
+
# {
|
911
|
+
# "foo" => [1, 2, 3],
|
912
|
+
# "bar" => [6, 7, 8],
|
913
|
+
# "ham" => ["a", "b", "c"]
|
914
|
+
# }
|
915
|
+
# )
|
732
916
|
# df.rename({"foo" => "apple"})
|
733
917
|
# # =>
|
734
918
|
# # shape: (3, 3)
|
@@ -775,11 +959,13 @@ module Polars
|
|
775
959
|
# # └─────┴─────┴─────┘
|
776
960
|
#
|
777
961
|
# @example
|
778
|
-
# df = Polars::DataFrame.new(
|
779
|
-
#
|
780
|
-
#
|
781
|
-
#
|
782
|
-
#
|
962
|
+
# df = Polars::DataFrame.new(
|
963
|
+
# {
|
964
|
+
# "a" => [1, 2, 3, 4],
|
965
|
+
# "b" => [0.5, 4, 10, 13],
|
966
|
+
# "c" => [true, true, false, true]
|
967
|
+
# }
|
968
|
+
# )
|
783
969
|
# s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
|
784
970
|
# df.insert_at_idx(3, s)
|
785
971
|
# # =>
|
@@ -805,63 +991,560 @@ module Polars
|
|
805
991
|
self
|
806
992
|
end
|
807
993
|
|
994
|
+
# Filter the rows in the DataFrame based on a predicate expression.
|
995
|
+
#
|
996
|
+
# @param predicate [Expr]
|
997
|
+
# Expression that evaluates to a boolean Series.
|
998
|
+
#
|
999
|
+
# @return [DataFrame]
|
1000
|
+
#
|
1001
|
+
# @example Filter on one condition:
|
1002
|
+
# df = Polars::DataFrame.new(
|
1003
|
+
# {
|
1004
|
+
# "foo" => [1, 2, 3],
|
1005
|
+
# "bar" => [6, 7, 8],
|
1006
|
+
# "ham" => ["a", "b", "c"]
|
1007
|
+
# }
|
1008
|
+
# )
|
1009
|
+
# df.filter(Polars.col("foo") < 3)
|
1010
|
+
# # =>
|
1011
|
+
# # shape: (2, 3)
|
1012
|
+
# # ┌─────┬─────┬─────┐
|
1013
|
+
# # │ foo ┆ bar ┆ ham │
|
1014
|
+
# # │ --- ┆ --- ┆ --- │
|
1015
|
+
# # │ i64 ┆ i64 ┆ str │
|
1016
|
+
# # ╞═════╪═════╪═════╡
|
1017
|
+
# # │ 1 ┆ 6 ┆ a │
|
1018
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1019
|
+
# # │ 2 ┆ 7 ┆ b │
|
1020
|
+
# # └─────┴─────┴─────┘
|
1021
|
+
#
|
1022
|
+
# @example Filter on multiple conditions:
|
1023
|
+
# df.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a"))
|
1024
|
+
# # =>
|
1025
|
+
# # shape: (1, 3)
|
1026
|
+
# # ┌─────┬─────┬─────┐
|
1027
|
+
# # │ foo ┆ bar ┆ ham │
|
1028
|
+
# # │ --- ┆ --- ┆ --- │
|
1029
|
+
# # │ i64 ┆ i64 ┆ str │
|
1030
|
+
# # ╞═════╪═════╪═════╡
|
1031
|
+
# # │ 1 ┆ 6 ┆ a │
|
1032
|
+
# # └─────┴─────┴─────┘
|
808
1033
|
def filter(predicate)
|
809
1034
|
lazy.filter(predicate).collect
|
810
1035
|
end
|
811
1036
|
|
812
|
-
#
|
813
|
-
#
|
1037
|
+
# Summary statistics for a DataFrame.
|
1038
|
+
#
|
1039
|
+
# @return [DataFrame]
|
1040
|
+
#
|
1041
|
+
# @example
|
1042
|
+
# df = Polars::DataFrame.new(
|
1043
|
+
# {
|
1044
|
+
# "a" => [1.0, 2.8, 3.0],
|
1045
|
+
# "b" => [4, 5, nil],
|
1046
|
+
# "c" => [true, false, true],
|
1047
|
+
# "d" => [nil, "b", "c"],
|
1048
|
+
# "e" => ["usd", "eur", nil]
|
1049
|
+
# }
|
1050
|
+
# )
|
1051
|
+
# df.describe
|
1052
|
+
# # =>
|
1053
|
+
# # shape: (7, 6)
|
1054
|
+
# # ┌────────────┬──────────┬──────────┬──────┬──────┬──────┐
|
1055
|
+
# # │ describe ┆ a ┆ b ┆ c ┆ d ┆ e │
|
1056
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1057
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ str │
|
1058
|
+
# # ╞════════════╪══════════╪══════════╪══════╪══════╪══════╡
|
1059
|
+
# # │ count ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3 ┆ 3 │
|
1060
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1061
|
+
# # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0 ┆ 1 ┆ 1 │
|
1062
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1063
|
+
# # │ mean ┆ 2.266667 ┆ 4.5 ┆ null ┆ null ┆ null │
|
1064
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1065
|
+
# # │ std ┆ 1.101514 ┆ 0.707107 ┆ null ┆ null ┆ null │
|
1066
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1067
|
+
# # │ min ┆ 1.0 ┆ 4.0 ┆ 0.0 ┆ b ┆ eur │
|
1068
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1069
|
+
# # │ max ┆ 3.0 ┆ 5.0 ┆ 1.0 ┆ c ┆ usd │
|
1070
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1071
|
+
# # │ median ┆ 2.8 ┆ 4.5 ┆ null ┆ null ┆ null │
|
1072
|
+
# # └────────────┴──────────┴──────────┴──────┴──────┴──────┘
|
1073
|
+
def describe
|
1074
|
+
describe_cast = lambda do |stat|
|
1075
|
+
columns = []
|
1076
|
+
self.columns.each_with_index do |s, i|
|
1077
|
+
if self[s].is_numeric || self[s].is_boolean
|
1078
|
+
columns << stat[0.., i].cast(:f64)
|
1079
|
+
else
|
1080
|
+
# for dates, strings, etc, we cast to string so that all
|
1081
|
+
# statistics can be shown
|
1082
|
+
columns << stat[0.., i].cast(:str)
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
self.class.new(columns)
|
1086
|
+
end
|
814
1087
|
|
815
|
-
|
816
|
-
|
1088
|
+
summary = _from_rbdf(
|
1089
|
+
Polars.concat(
|
1090
|
+
[
|
1091
|
+
describe_cast.(
|
1092
|
+
self.class.new(columns.to_h { |c| [c, [height]] })
|
1093
|
+
),
|
1094
|
+
describe_cast.(null_count),
|
1095
|
+
describe_cast.(mean),
|
1096
|
+
describe_cast.(std),
|
1097
|
+
describe_cast.(min),
|
1098
|
+
describe_cast.(max),
|
1099
|
+
describe_cast.(median)
|
1100
|
+
]
|
1101
|
+
)._df
|
1102
|
+
)
|
1103
|
+
summary.insert_at_idx(
|
1104
|
+
0,
|
1105
|
+
Polars::Series.new(
|
1106
|
+
"describe",
|
1107
|
+
["count", "null_count", "mean", "std", "min", "max", "median"],
|
1108
|
+
)
|
1109
|
+
)
|
1110
|
+
summary
|
1111
|
+
end
|
817
1112
|
|
818
|
-
#
|
819
|
-
#
|
1113
|
+
# Find the index of a column by name.
|
1114
|
+
#
|
1115
|
+
# @param name [String]
|
1116
|
+
# Name of the column to find.
|
1117
|
+
#
|
1118
|
+
# @return [Series]
|
1119
|
+
#
|
1120
|
+
# @example
|
1121
|
+
# df = Polars::DataFrame.new(
|
1122
|
+
# {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
|
1123
|
+
# )
|
1124
|
+
# df.find_idx_by_name("ham")
|
1125
|
+
# # => 2
|
1126
|
+
def find_idx_by_name(name)
|
1127
|
+
_df.find_idx_by_name(name)
|
1128
|
+
end
|
1129
|
+
|
1130
|
+
# Replace a column at an index location.
|
1131
|
+
#
|
1132
|
+
# @param index [Integer]
|
1133
|
+
# Column index.
|
1134
|
+
# @param series [Series]
|
1135
|
+
# Series that will replace the column.
|
1136
|
+
#
|
1137
|
+
# @return [DataFrame]
|
1138
|
+
#
|
1139
|
+
# @example
|
1140
|
+
# df = Polars::DataFrame.new(
|
1141
|
+
# {
|
1142
|
+
# "foo" => [1, 2, 3],
|
1143
|
+
# "bar" => [6, 7, 8],
|
1144
|
+
# "ham" => ["a", "b", "c"]
|
1145
|
+
# }
|
1146
|
+
# )
|
1147
|
+
# s = Polars::Series.new("apple", [10, 20, 30])
|
1148
|
+
# df.replace_at_idx(0, s)
|
1149
|
+
# # =>
|
1150
|
+
# # shape: (3, 3)
|
1151
|
+
# # ┌───────┬─────┬─────┐
|
1152
|
+
# # │ apple ┆ bar ┆ ham │
|
1153
|
+
# # │ --- ┆ --- ┆ --- │
|
1154
|
+
# # │ i64 ┆ i64 ┆ str │
|
1155
|
+
# # ╞═══════╪═════╪═════╡
|
1156
|
+
# # │ 10 ┆ 6 ┆ a │
|
1157
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1158
|
+
# # │ 20 ┆ 7 ┆ b │
|
1159
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1160
|
+
# # │ 30 ┆ 8 ┆ c │
|
1161
|
+
# # └───────┴─────┴─────┘
|
1162
|
+
def replace_at_idx(index, series)
|
1163
|
+
if index < 0
|
1164
|
+
index = columns.length + index
|
1165
|
+
end
|
1166
|
+
_df.replace_at_idx(index, series._s)
|
1167
|
+
self
|
1168
|
+
end
|
820
1169
|
|
1170
|
+
# Sort the DataFrame by column.
|
1171
|
+
#
|
1172
|
+
# @param by [String]
|
1173
|
+
# By which column to sort.
|
1174
|
+
# @param reverse [Boolean]
|
1175
|
+
# Reverse/descending sort.
|
1176
|
+
# @param nulls_last [Boolean]
|
1177
|
+
# Place null values last. Can only be used if sorted by a single column.
|
1178
|
+
#
|
1179
|
+
# @return [DataFrame]
|
1180
|
+
#
|
1181
|
+
# @example
|
1182
|
+
# df = Polars::DataFrame.new(
|
1183
|
+
# {
|
1184
|
+
# "foo" => [1, 2, 3],
|
1185
|
+
# "bar" => [6.0, 7.0, 8.0],
|
1186
|
+
# "ham" => ["a", "b", "c"]
|
1187
|
+
# }
|
1188
|
+
# )
|
1189
|
+
# df.sort("foo", reverse: true)
|
1190
|
+
# # =>
|
1191
|
+
# # shape: (3, 3)
|
1192
|
+
# # ┌─────┬─────┬─────┐
|
1193
|
+
# # │ foo ┆ bar ┆ ham │
|
1194
|
+
# # │ --- ┆ --- ┆ --- │
|
1195
|
+
# # │ i64 ┆ f64 ┆ str │
|
1196
|
+
# # ╞═════╪═════╪═════╡
|
1197
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
1198
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1199
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
1200
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1201
|
+
# # │ 1 ┆ 6.0 ┆ a │
|
1202
|
+
# # └─────┴─────┴─────┘
|
821
1203
|
#
|
1204
|
+
# @example Sort by multiple columns.
|
1205
|
+
# df.sort(
|
1206
|
+
# [Polars.col("foo"), Polars.col("bar")**2],
|
1207
|
+
# reverse: [true, false]
|
1208
|
+
# )
|
1209
|
+
# # =>
|
1210
|
+
# # shape: (3, 3)
|
1211
|
+
# # ┌─────┬─────┬─────┐
|
1212
|
+
# # │ foo ┆ bar ┆ ham │
|
1213
|
+
# # │ --- ┆ --- ┆ --- │
|
1214
|
+
# # │ i64 ┆ f64 ┆ str │
|
1215
|
+
# # ╞═════╪═════╪═════╡
|
1216
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
1217
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1218
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
1219
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1220
|
+
# # │ 1 ┆ 6.0 ┆ a │
|
1221
|
+
# # └─────┴─────┴─────┘
|
822
1222
|
def sort(by, reverse: false, nulls_last: false)
|
823
|
-
|
1223
|
+
if by.is_a?(Array) || by.is_a?(Expr)
|
1224
|
+
lazy
|
1225
|
+
.sort(by, reverse: reverse, nulls_last: nulls_last)
|
1226
|
+
.collect(no_optimization: true, string_cache: false)
|
1227
|
+
else
|
1228
|
+
_from_rbdf(_df.sort(by, reverse, nulls_last))
|
1229
|
+
end
|
824
1230
|
end
|
825
1231
|
|
1232
|
+
# Check if DataFrame is equal to other.
|
1233
|
+
#
|
1234
|
+
# @param other [DataFrame]
|
1235
|
+
# DataFrame to compare with.
|
1236
|
+
# @param null_equal [Boolean]
|
1237
|
+
# Consider null values as equal.
|
1238
|
+
#
|
1239
|
+
# @return [Boolean]
|
1240
|
+
#
|
1241
|
+
# @example
|
1242
|
+
# df1 = Polars::DataFrame.new(
|
1243
|
+
# {
|
1244
|
+
# "foo" => [1, 2, 3],
|
1245
|
+
# "bar" => [6.0, 7.0, 8.0],
|
1246
|
+
# "ham" => ["a", "b", "c"]
|
1247
|
+
# }
|
1248
|
+
# )
|
1249
|
+
# df2 = Polars::DataFrame.new(
|
1250
|
+
# {
|
1251
|
+
# "foo" => [3, 2, 1],
|
1252
|
+
# "bar" => [8.0, 7.0, 6.0],
|
1253
|
+
# "ham" => ["c", "b", "a"]
|
1254
|
+
# }
|
1255
|
+
# )
|
1256
|
+
# df1.frame_equal(df1)
|
1257
|
+
# # => true
|
1258
|
+
# df1.frame_equal(df2)
|
1259
|
+
# # => false
|
826
1260
|
def frame_equal(other, null_equal: true)
|
827
1261
|
_df.frame_equal(other._df, null_equal)
|
828
1262
|
end
|
829
1263
|
|
830
|
-
#
|
831
|
-
# end
|
832
|
-
|
1264
|
+
# Replace a column by a new Series.
|
833
1265
|
#
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
1266
|
+
# @param column [String]
|
1267
|
+
# Column to replace.
|
1268
|
+
# @param new_col [Series]
|
1269
|
+
# New column to insert.
|
1270
|
+
#
|
1271
|
+
# @return [DataFrame]
|
1272
|
+
#
|
1273
|
+
# @example
|
1274
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
1275
|
+
# s = Polars::Series.new([10, 20, 30])
|
1276
|
+
# df.replace("foo", s)
|
1277
|
+
# # =>
|
1278
|
+
# # shape: (3, 2)
|
1279
|
+
# # ┌─────┬─────┐
|
1280
|
+
# # │ foo ┆ bar │
|
1281
|
+
# # │ --- ┆ --- │
|
1282
|
+
# # │ i64 ┆ i64 │
|
1283
|
+
# # ╞═════╪═════╡
|
1284
|
+
# # │ 10 ┆ 4 │
|
1285
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1286
|
+
# # │ 20 ┆ 5 │
|
1287
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1288
|
+
# # │ 30 ┆ 6 │
|
1289
|
+
# # └─────┴─────┘
|
1290
|
+
def replace(column, new_col)
|
1291
|
+
_df.replace(column, new_col._s)
|
1292
|
+
self
|
839
1293
|
end
|
840
1294
|
|
1295
|
+
# Get a slice of this DataFrame.
|
1296
|
+
#
|
1297
|
+
# @param offset [Integer]
|
1298
|
+
# Start index. Negative indexing is supported.
|
1299
|
+
# @param length [Integer, nil]
|
1300
|
+
# Length of the slice. If set to `nil`, all rows starting at the offset
|
1301
|
+
# will be selected.
|
1302
|
+
#
|
1303
|
+
# @return [DataFrame]
|
1304
|
+
#
|
1305
|
+
# @example
|
1306
|
+
# df = Polars::DataFrame.new(
|
1307
|
+
# {
|
1308
|
+
# "foo" => [1, 2, 3],
|
1309
|
+
# "bar" => [6.0, 7.0, 8.0],
|
1310
|
+
# "ham" => ["a", "b", "c"]
|
1311
|
+
# }
|
1312
|
+
# )
|
1313
|
+
# df.slice(1, 2)
|
1314
|
+
# # =>
|
1315
|
+
# # shape: (2, 3)
|
1316
|
+
# # ┌─────┬─────┬─────┐
|
1317
|
+
# # │ foo ┆ bar ┆ ham │
|
1318
|
+
# # │ --- ┆ --- ┆ --- │
|
1319
|
+
# # │ i64 ┆ f64 ┆ str │
|
1320
|
+
# # ╞═════╪═════╪═════╡
|
1321
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
1322
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1323
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
1324
|
+
# # └─────┴─────┴─────┘
|
1325
|
+
def slice(offset, length = nil)
|
1326
|
+
if !length.nil? && length < 0
|
1327
|
+
length = height - offset + length
|
1328
|
+
end
|
1329
|
+
_from_rbdf(_df.slice(offset, length))
|
1330
|
+
end
|
1331
|
+
|
1332
|
+
# Get the first `n` rows.
|
1333
|
+
#
|
1334
|
+
# Alias for {#head}.
|
1335
|
+
#
|
1336
|
+
# @param n [Integer]
|
1337
|
+
# Number of rows to return.
|
1338
|
+
#
|
1339
|
+
# @return [DataFrame]
|
1340
|
+
#
|
1341
|
+
# @example
|
1342
|
+
# df = Polars::DataFrame.new(
|
1343
|
+
# {"foo" => [1, 2, 3, 4, 5, 6], "bar" => ["a", "b", "c", "d", "e", "f"]}
|
1344
|
+
# )
|
1345
|
+
# df.limit(4)
|
1346
|
+
# # =>
|
1347
|
+
# # shape: (4, 2)
|
1348
|
+
# # ┌─────┬─────┐
|
1349
|
+
# # │ foo ┆ bar │
|
1350
|
+
# # │ --- ┆ --- │
|
1351
|
+
# # │ i64 ┆ str │
|
1352
|
+
# # ╞═════╪═════╡
|
1353
|
+
# # │ 1 ┆ a │
|
1354
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1355
|
+
# # │ 2 ┆ b │
|
1356
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1357
|
+
# # │ 3 ┆ c │
|
1358
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1359
|
+
# # │ 4 ┆ d │
|
1360
|
+
# # └─────┴─────┘
|
841
1361
|
def limit(n = 5)
|
842
1362
|
head(n)
|
843
1363
|
end
|
844
1364
|
|
1365
|
+
# Get the first `n` rows.
|
1366
|
+
#
|
1367
|
+
# @param n [Integer]
|
1368
|
+
# Number of rows to return.
|
1369
|
+
#
|
1370
|
+
# @return [DataFrame]
|
1371
|
+
#
|
1372
|
+
# @example
|
1373
|
+
# df = Polars::DataFrame.new(
|
1374
|
+
# {
|
1375
|
+
# "foo" => [1, 2, 3, 4, 5],
|
1376
|
+
# "bar" => [6, 7, 8, 9, 10],
|
1377
|
+
# "ham" => ["a", "b", "c", "d", "e"]
|
1378
|
+
# }
|
1379
|
+
# )
|
1380
|
+
# df.head(3)
|
1381
|
+
# # =>
|
1382
|
+
# # shape: (3, 3)
|
1383
|
+
# # ┌─────┬─────┬─────┐
|
1384
|
+
# # │ foo ┆ bar ┆ ham │
|
1385
|
+
# # │ --- ┆ --- ┆ --- │
|
1386
|
+
# # │ i64 ┆ i64 ┆ str │
|
1387
|
+
# # ╞═════╪═════╪═════╡
|
1388
|
+
# # │ 1 ┆ 6 ┆ a │
|
1389
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1390
|
+
# # │ 2 ┆ 7 ┆ b │
|
1391
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1392
|
+
# # │ 3 ┆ 8 ┆ c │
|
1393
|
+
# # └─────┴─────┴─────┘
|
845
1394
|
def head(n = 5)
|
846
1395
|
_from_rbdf(_df.head(n))
|
847
1396
|
end
|
848
1397
|
|
1398
|
+
# Get the last `n` rows.
|
1399
|
+
#
|
1400
|
+
# @param n [Integer]
|
1401
|
+
# Number of rows to return.
|
1402
|
+
#
|
1403
|
+
# @return [DataFrame]
|
1404
|
+
#
|
1405
|
+
# @example
|
1406
|
+
# df = Polars::DataFrame.new(
|
1407
|
+
# {
|
1408
|
+
# "foo" => [1, 2, 3, 4, 5],
|
1409
|
+
# "bar" => [6, 7, 8, 9, 10],
|
1410
|
+
# "ham" => ["a", "b", "c", "d", "e"]
|
1411
|
+
# }
|
1412
|
+
# )
|
1413
|
+
# df.tail(3)
|
1414
|
+
# # =>
|
1415
|
+
# # shape: (3, 3)
|
1416
|
+
# # ┌─────┬─────┬─────┐
|
1417
|
+
# # │ foo ┆ bar ┆ ham │
|
1418
|
+
# # │ --- ┆ --- ┆ --- │
|
1419
|
+
# # │ i64 ┆ i64 ┆ str │
|
1420
|
+
# # ╞═════╪═════╪═════╡
|
1421
|
+
# # │ 3 ┆ 8 ┆ c │
|
1422
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1423
|
+
# # │ 4 ┆ 9 ┆ d │
|
1424
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1425
|
+
# # │ 5 ┆ 10 ┆ e │
|
1426
|
+
# # └─────┴─────┴─────┘
|
849
1427
|
def tail(n = 5)
|
850
1428
|
_from_rbdf(_df.tail(n))
|
851
1429
|
end
|
852
1430
|
|
853
|
-
#
|
854
|
-
#
|
1431
|
+
# Return a new DataFrame where the null values are dropped.
|
1432
|
+
#
|
1433
|
+
# @param subset [Object]
|
1434
|
+
# Subset of column(s) on which `drop_nulls` will be applied.
|
1435
|
+
#
|
1436
|
+
# @return [DataFrame]
|
1437
|
+
#
|
1438
|
+
# @example
|
1439
|
+
# df = Polars::DataFrame.new(
|
1440
|
+
# {
|
1441
|
+
# "foo" => [1, 2, 3],
|
1442
|
+
# "bar" => [6, nil, 8],
|
1443
|
+
# "ham" => ["a", "b", "c"]
|
1444
|
+
# }
|
1445
|
+
# )
|
1446
|
+
# df.drop_nulls
|
1447
|
+
# # =>
|
1448
|
+
# # shape: (2, 3)
|
1449
|
+
# # ┌─────┬─────┬─────┐
|
1450
|
+
# # │ foo ┆ bar ┆ ham │
|
1451
|
+
# # │ --- ┆ --- ┆ --- │
|
1452
|
+
# # │ i64 ┆ i64 ┆ str │
|
1453
|
+
# # ╞═════╪═════╪═════╡
|
1454
|
+
# # │ 1 ┆ 6 ┆ a │
|
1455
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1456
|
+
# # │ 3 ┆ 8 ┆ c │
|
1457
|
+
# # └─────┴─────┴─────┘
|
1458
|
+
def drop_nulls(subset: nil)
|
1459
|
+
if subset.is_a?(String)
|
1460
|
+
subset = [subset]
|
1461
|
+
end
|
1462
|
+
_from_rbdf(_df.drop_nulls(subset))
|
1463
|
+
end
|
855
1464
|
|
856
1465
|
# def pipe
|
857
1466
|
# end
|
858
1467
|
|
859
|
-
#
|
860
|
-
#
|
861
|
-
|
1468
|
+
# Add a column at index 0 that counts the rows.
|
1469
|
+
#
|
1470
|
+
# @param name [String]
|
1471
|
+
# Name of the column to add.
|
1472
|
+
# @param offset [Integer]
|
1473
|
+
# Start the row count at this offset.
|
1474
|
+
#
|
1475
|
+
# @return [DataFrame]
|
1476
|
+
#
|
1477
|
+
# @example
|
1478
|
+
# df = Polars::DataFrame.new(
|
1479
|
+
# {
|
1480
|
+
# "a" => [1, 3, 5],
|
1481
|
+
# "b" => [2, 4, 6]
|
1482
|
+
# }
|
1483
|
+
# )
|
1484
|
+
# df.with_row_count
|
1485
|
+
# # =>
|
1486
|
+
# # shape: (3, 3)
|
1487
|
+
# # ┌────────┬─────┬─────┐
|
1488
|
+
# # │ row_nr ┆ a ┆ b │
|
1489
|
+
# # │ --- ┆ --- ┆ --- │
|
1490
|
+
# # │ u32 ┆ i64 ┆ i64 │
|
1491
|
+
# # ╞════════╪═════╪═════╡
|
1492
|
+
# # │ 0 ┆ 1 ┆ 2 │
|
1493
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1494
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
1495
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1496
|
+
# # │ 2 ┆ 5 ┆ 6 │
|
1497
|
+
# # └────────┴─────┴─────┘
|
1498
|
+
def with_row_count(name: "row_nr", offset: 0)
|
1499
|
+
_from_rbdf(_df.with_row_count(name, offset))
|
1500
|
+
end
|
1501
|
+
|
1502
|
+
# Start a groupby operation.
|
1503
|
+
#
|
1504
|
+
# @param by [Object]
|
1505
|
+
# Column(s) to group by.
|
1506
|
+
# @param maintain_order [Boolean]
|
1507
|
+
# Make sure that the order of the groups remain consistent. This is more
|
1508
|
+
# expensive than a default groupby. Note that this only works in expression
|
1509
|
+
# aggregations.
|
862
1510
|
#
|
1511
|
+
# @return [GroupBy]
|
1512
|
+
#
|
1513
|
+
# @example
|
1514
|
+
# df = Polars::DataFrame.new(
|
1515
|
+
# {
|
1516
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
1517
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
1518
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
1519
|
+
# }
|
1520
|
+
# )
|
1521
|
+
# df.groupby("a").agg(Polars.col("b").sum).sort("a")
|
1522
|
+
# # =>
|
1523
|
+
# # shape: (3, 2)
|
1524
|
+
# # ┌─────┬─────┐
|
1525
|
+
# # │ a ┆ b │
|
1526
|
+
# # │ --- ┆ --- │
|
1527
|
+
# # │ str ┆ i64 │
|
1528
|
+
# # ╞═════╪═════╡
|
1529
|
+
# # │ a ┆ 4 │
|
1530
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1531
|
+
# # │ b ┆ 11 │
|
1532
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1533
|
+
# # │ c ┆ 6 │
|
1534
|
+
# # └─────┴─────┘
|
863
1535
|
def groupby(by, maintain_order: false)
|
864
|
-
|
1536
|
+
if !Utils.bool?(maintain_order)
|
1537
|
+
raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
|
1538
|
+
end
|
1539
|
+
if by.is_a?(String)
|
1540
|
+
by = [by]
|
1541
|
+
end
|
1542
|
+
GroupBy.new(
|
1543
|
+
_df,
|
1544
|
+
by,
|
1545
|
+
self.class,
|
1546
|
+
maintain_order: maintain_order
|
1547
|
+
)
|
865
1548
|
end
|
866
1549
|
|
867
1550
|
# def groupby_rolling
|
@@ -876,7 +1559,109 @@ module Polars
|
|
876
1559
|
# def join_asof
|
877
1560
|
# end
|
878
1561
|
|
1562
|
+
# Join in SQL-like fashion.
|
1563
|
+
#
|
1564
|
+
# @param other [DataFrame]
|
1565
|
+
# DataFrame to join with.
|
1566
|
+
# @param left_on [Object]
|
1567
|
+
# Name(s) of the left join column(s).
|
1568
|
+
# @param right_on [Object]
|
1569
|
+
# Name(s) of the right join column(s).
|
1570
|
+
# @param on [Object]
|
1571
|
+
# Name(s) of the join columns in both DataFrames.
|
1572
|
+
# @param how ["inner", "left", "outer", "semi", "anti", "cross"]
|
1573
|
+
# Join strategy.
|
1574
|
+
# @param suffix [String]
|
1575
|
+
# Suffix to append to columns with a duplicate name.
|
1576
|
+
#
|
1577
|
+
# @return [DataFrame]
|
1578
|
+
#
|
1579
|
+
# @example
|
1580
|
+
# df = Polars::DataFrame.new(
|
1581
|
+
# {
|
1582
|
+
# "foo" => [1, 2, 3],
|
1583
|
+
# "bar" => [6.0, 7.0, 8.0],
|
1584
|
+
# "ham" => ["a", "b", "c"]
|
1585
|
+
# }
|
1586
|
+
# )
|
1587
|
+
# other_df = Polars::DataFrame.new(
|
1588
|
+
# {
|
1589
|
+
# "apple" => ["x", "y", "z"],
|
1590
|
+
# "ham" => ["a", "b", "d"]
|
1591
|
+
# }
|
1592
|
+
# )
|
1593
|
+
# df.join(other_df, on: "ham")
|
1594
|
+
# # =>
|
1595
|
+
# # shape: (2, 4)
|
1596
|
+
# # ┌─────┬─────┬─────┬───────┐
|
1597
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
1598
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1599
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
1600
|
+
# # ╞═════╪═════╪═════╪═══════╡
|
1601
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
1602
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1603
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
1604
|
+
# # └─────┴─────┴─────┴───────┘
|
1605
|
+
#
|
1606
|
+
# @example
|
1607
|
+
# df.join(other_df, on: "ham", how: "outer")
|
1608
|
+
# # =>
|
1609
|
+
# # shape: (4, 4)
|
1610
|
+
# # ┌──────┬──────┬─────┬───────┐
|
1611
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
1612
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1613
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
1614
|
+
# # ╞══════╪══════╪═════╪═══════╡
|
1615
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
1616
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1617
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
1618
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1619
|
+
# # │ null ┆ null ┆ d ┆ z │
|
1620
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1621
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null │
|
1622
|
+
# # └──────┴──────┴─────┴───────┘
|
1623
|
+
#
|
1624
|
+
# @example
|
1625
|
+
# df.join(other_df, on: "ham", how: "left")
|
1626
|
+
# # =>
|
1627
|
+
# # shape: (3, 4)
|
1628
|
+
# # ┌─────┬─────┬─────┬───────┐
|
1629
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
1630
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1631
|
+
# # │ i64 ┆ f64 ┆ str ┆ str │
|
1632
|
+
# # ╞═════╪═════╪═════╪═══════╡
|
1633
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x │
|
1634
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1635
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y │
|
1636
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1637
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null │
|
1638
|
+
# # └─────┴─────┴─────┴───────┘
|
1639
|
+
#
|
1640
|
+
# @example
|
1641
|
+
# df.join(other_df, on: "ham", how: "semi")
|
1642
|
+
# # =>
|
1643
|
+
# # shape: (2, 3)
|
1644
|
+
# # ┌─────┬─────┬─────┐
|
1645
|
+
# # │ foo ┆ bar ┆ ham │
|
1646
|
+
# # │ --- ┆ --- ┆ --- │
|
1647
|
+
# # │ i64 ┆ f64 ┆ str │
|
1648
|
+
# # ╞═════╪═════╪═════╡
|
1649
|
+
# # │ 1 ┆ 6.0 ┆ a │
|
1650
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1651
|
+
# # │ 2 ┆ 7.0 ┆ b │
|
1652
|
+
# # └─────┴─────┴─────┘
|
879
1653
|
#
|
1654
|
+
# @example
|
1655
|
+
# df.join(other_df, on: "ham", how: "anti")
|
1656
|
+
# # =>
|
1657
|
+
# # shape: (1, 3)
|
1658
|
+
# # ┌─────┬─────┬─────┐
|
1659
|
+
# # │ foo ┆ bar ┆ ham │
|
1660
|
+
# # │ --- ┆ --- ┆ --- │
|
1661
|
+
# # │ i64 ┆ f64 ┆ str │
|
1662
|
+
# # ╞═════╪═════╪═════╡
|
1663
|
+
# # │ 3 ┆ 8.0 ┆ c │
|
1664
|
+
# # └─────┴─────┴─────┘
|
880
1665
|
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
|
881
1666
|
lazy
|
882
1667
|
.join(
|
@@ -893,55 +1678,505 @@ module Polars
|
|
893
1678
|
# def apply
|
894
1679
|
# end
|
895
1680
|
|
1681
|
+
# Return a new DataFrame with the column added or replaced.
|
1682
|
+
#
|
1683
|
+
# @param column [Object]
|
1684
|
+
# Series, where the name of the Series refers to the column in the DataFrame.
|
1685
|
+
#
|
1686
|
+
# @return [DataFrame]
|
1687
|
+
#
|
1688
|
+
# @example Added
|
1689
|
+
# df = Polars::DataFrame.new(
|
1690
|
+
# {
|
1691
|
+
# "a" => [1, 3, 5],
|
1692
|
+
# "b" => [2, 4, 6]
|
1693
|
+
# }
|
1694
|
+
# )
|
1695
|
+
# df.with_column((Polars.col("b") ** 2).alias("b_squared"))
|
1696
|
+
# # =>
|
1697
|
+
# # shape: (3, 3)
|
1698
|
+
# # ┌─────┬─────┬───────────┐
|
1699
|
+
# # │ a ┆ b ┆ b_squared │
|
1700
|
+
# # │ --- ┆ --- ┆ --- │
|
1701
|
+
# # │ i64 ┆ i64 ┆ f64 │
|
1702
|
+
# # ╞═════╪═════╪═══════════╡
|
1703
|
+
# # │ 1 ┆ 2 ┆ 4.0 │
|
1704
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1705
|
+
# # │ 3 ┆ 4 ┆ 16.0 │
|
1706
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
1707
|
+
# # │ 5 ┆ 6 ┆ 36.0 │
|
1708
|
+
# # └─────┴─────┴───────────┘
|
896
1709
|
#
|
1710
|
+
# @example Replaced
|
1711
|
+
# df.with_column(Polars.col("a") ** 2)
|
1712
|
+
# # =>
|
1713
|
+
# # shape: (3, 2)
|
1714
|
+
# # ┌──────┬─────┐
|
1715
|
+
# # │ a ┆ b │
|
1716
|
+
# # │ --- ┆ --- │
|
1717
|
+
# # │ f64 ┆ i64 │
|
1718
|
+
# # ╞══════╪═════╡
|
1719
|
+
# # │ 1.0 ┆ 2 │
|
1720
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1721
|
+
# # │ 9.0 ┆ 4 │
|
1722
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
|
1723
|
+
# # │ 25.0 ┆ 6 │
|
1724
|
+
# # └──────┴─────┘
|
897
1725
|
def with_column(column)
|
898
1726
|
lazy
|
899
1727
|
.with_column(column)
|
900
1728
|
.collect(no_optimization: true, string_cache: false)
|
901
1729
|
end
|
902
1730
|
|
903
|
-
#
|
904
|
-
#
|
1731
|
+
# Return a new DataFrame grown horizontally by stacking multiple Series to it.
|
1732
|
+
#
|
1733
|
+
# @param columns [Object]
|
1734
|
+
# Series to stack.
|
1735
|
+
# @param in_place [Boolean]
|
1736
|
+
# Modify in place.
|
1737
|
+
#
|
1738
|
+
# @return [DataFrame]
|
1739
|
+
#
|
1740
|
+
# @example
|
1741
|
+
# df = Polars::DataFrame.new(
|
1742
|
+
# {
|
1743
|
+
# "foo" => [1, 2, 3],
|
1744
|
+
# "bar" => [6, 7, 8],
|
1745
|
+
# "ham" => ["a", "b", "c"]
|
1746
|
+
# }
|
1747
|
+
# )
|
1748
|
+
# x = Polars::Series.new("apple", [10, 20, 30])
|
1749
|
+
# df.hstack([x])
|
1750
|
+
# # =>
|
1751
|
+
# # shape: (3, 4)
|
1752
|
+
# # ┌─────┬─────┬─────┬───────┐
|
1753
|
+
# # │ foo ┆ bar ┆ ham ┆ apple │
|
1754
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1755
|
+
# # │ i64 ┆ i64 ┆ str ┆ i64 │
|
1756
|
+
# # ╞═════╪═════╪═════╪═══════╡
|
1757
|
+
# # │ 1 ┆ 6 ┆ a ┆ 10 │
|
1758
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1759
|
+
# # │ 2 ┆ 7 ┆ b ┆ 20 │
|
1760
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
1761
|
+
# # │ 3 ┆ 8 ┆ c ┆ 30 │
|
1762
|
+
# # └─────┴─────┴─────┴───────┘
|
1763
|
+
def hstack(columns, in_place: false)
|
1764
|
+
if !columns.is_a?(Array)
|
1765
|
+
columns = columns.get_columns
|
1766
|
+
end
|
1767
|
+
if in_place
|
1768
|
+
_df.hstack_mut(columns.map(&:_s))
|
1769
|
+
self
|
1770
|
+
else
|
1771
|
+
_from_rbdf(_df.hstack(columns.map(&:_s)))
|
1772
|
+
end
|
1773
|
+
end
|
905
1774
|
|
906
|
-
#
|
907
|
-
#
|
1775
|
+
# Grow this DataFrame vertically by stacking a DataFrame to it.
|
1776
|
+
#
|
1777
|
+
# @param df [DataFrame]
|
1778
|
+
# DataFrame to stack.
|
1779
|
+
# @param in_place [Boolean]
|
1780
|
+
# Modify in place
|
1781
|
+
#
|
1782
|
+
# @return [DataFrame]
|
1783
|
+
#
|
1784
|
+
# @example
|
1785
|
+
# df1 = Polars::DataFrame.new(
|
1786
|
+
# {
|
1787
|
+
# "foo" => [1, 2],
|
1788
|
+
# "bar" => [6, 7],
|
1789
|
+
# "ham" => ["a", "b"]
|
1790
|
+
# }
|
1791
|
+
# )
|
1792
|
+
# df2 = Polars::DataFrame.new(
|
1793
|
+
# {
|
1794
|
+
# "foo" => [3, 4],
|
1795
|
+
# "bar" => [8, 9],
|
1796
|
+
# "ham" => ["c", "d"]
|
1797
|
+
# }
|
1798
|
+
# )
|
1799
|
+
# df1.vstack(df2)
|
1800
|
+
# # =>
|
1801
|
+
# # shape: (4, 3)
|
1802
|
+
# # ┌─────┬─────┬─────┐
|
1803
|
+
# # │ foo ┆ bar ┆ ham │
|
1804
|
+
# # │ --- ┆ --- ┆ --- │
|
1805
|
+
# # │ i64 ┆ i64 ┆ str │
|
1806
|
+
# # ╞═════╪═════╪═════╡
|
1807
|
+
# # │ 1 ┆ 6 ┆ a │
|
1808
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1809
|
+
# # │ 2 ┆ 7 ┆ b │
|
1810
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1811
|
+
# # │ 3 ┆ 8 ┆ c │
|
1812
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
1813
|
+
# # │ 4 ┆ 9 ┆ d │
|
1814
|
+
# # └─────┴─────┴─────┘
|
1815
|
+
def vstack(df, in_place: false)
|
1816
|
+
if in_place
|
1817
|
+
_df.vstack_mut(df._df)
|
1818
|
+
self
|
1819
|
+
else
|
1820
|
+
_from_rbdf(_df.vstack(df._df))
|
1821
|
+
end
|
1822
|
+
end
|
908
1823
|
|
1824
|
+
# Extend the memory backed by this `DataFrame` with the values from `other`.
|
1825
|
+
#
|
1826
|
+
# Different from `vstack` which adds the chunks from `other` to the chunks of this
|
1827
|
+
# `DataFrame` `extend` appends the data from `other` to the underlying memory
|
1828
|
+
# locations and thus may cause a reallocation.
|
1829
|
+
#
|
1830
|
+
# If this does not cause a reallocation, the resulting data structure will not
|
1831
|
+
# have any extra chunks and thus will yield faster queries.
|
1832
|
+
#
|
1833
|
+
# Prefer `extend` over `vstack` when you want to do a query after a single append.
|
1834
|
+
# For instance during online operations where you add `n` rows and rerun a query.
|
909
1835
|
#
|
1836
|
+
# Prefer `vstack` over `extend` when you want to append many times before doing a
|
1837
|
+
# query. For instance when you read in multiple files and when to store them in a
|
1838
|
+
# single `DataFrame`. In the latter case, finish the sequence of `vstack`
|
1839
|
+
# operations with a `rechunk`.
|
1840
|
+
#
|
1841
|
+
# @param other [DataFrame]
|
1842
|
+
# DataFrame to vertically add.
|
1843
|
+
#
|
1844
|
+
# @return [DataFrame]
|
1845
|
+
#
|
1846
|
+
# @example
|
1847
|
+
# df1 = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
1848
|
+
# df2 = Polars::DataFrame.new({"foo" => [10, 20, 30], "bar" => [40, 50, 60]})
|
1849
|
+
# df1.extend(df2)
|
1850
|
+
# # =>
|
1851
|
+
# # shape: (6, 2)
|
1852
|
+
# # ┌─────┬─────┐
|
1853
|
+
# # │ foo ┆ bar │
|
1854
|
+
# # │ --- ┆ --- │
|
1855
|
+
# # │ i64 ┆ i64 │
|
1856
|
+
# # ╞═════╪═════╡
|
1857
|
+
# # │ 1 ┆ 4 │
|
1858
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1859
|
+
# # │ 2 ┆ 5 │
|
1860
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1861
|
+
# # │ 3 ┆ 6 │
|
1862
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1863
|
+
# # │ 10 ┆ 40 │
|
1864
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1865
|
+
# # │ 20 ┆ 50 │
|
1866
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1867
|
+
# # │ 30 ┆ 60 │
|
1868
|
+
# # └─────┴─────┘
|
910
1869
|
def extend(other)
|
911
1870
|
_df.extend(other._df)
|
912
1871
|
self
|
913
1872
|
end
|
914
1873
|
|
915
|
-
#
|
916
|
-
#
|
1874
|
+
# Remove column from DataFrame and return as new.
|
1875
|
+
#
|
1876
|
+
# @param columns [Object]
|
1877
|
+
# Column(s) to drop.
|
1878
|
+
#
|
1879
|
+
# @return [DataFrame]
|
1880
|
+
#
|
1881
|
+
# @example
|
1882
|
+
# df = Polars::DataFrame.new(
|
1883
|
+
# {
|
1884
|
+
# "foo" => [1, 2, 3],
|
1885
|
+
# "bar" => [6.0, 7.0, 8.0],
|
1886
|
+
# "ham" => ["a", "b", "c"]
|
1887
|
+
# }
|
1888
|
+
# )
|
1889
|
+
# df.drop("ham")
|
1890
|
+
# # =>
|
1891
|
+
# # shape: (3, 2)
|
1892
|
+
# # ┌─────┬─────┐
|
1893
|
+
# # │ foo ┆ bar │
|
1894
|
+
# # │ --- ┆ --- │
|
1895
|
+
# # │ i64 ┆ f64 │
|
1896
|
+
# # ╞═════╪═════╡
|
1897
|
+
# # │ 1 ┆ 6.0 │
|
1898
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1899
|
+
# # │ 2 ┆ 7.0 │
|
1900
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
1901
|
+
# # │ 3 ┆ 8.0 │
|
1902
|
+
# # └─────┴─────┘
|
1903
|
+
def drop(columns)
|
1904
|
+
if columns.is_a?(Array)
|
1905
|
+
df = clone
|
1906
|
+
columns.each do |n|
|
1907
|
+
df._df.drop_in_place(n)
|
1908
|
+
end
|
1909
|
+
df
|
1910
|
+
else
|
1911
|
+
_from_rbdf(_df.drop(columns))
|
1912
|
+
end
|
1913
|
+
end
|
917
1914
|
|
918
|
-
#
|
919
|
-
#
|
1915
|
+
# Drop in place.
|
1916
|
+
#
|
1917
|
+
# @param name [Object]
|
1918
|
+
# Column to drop.
|
1919
|
+
#
|
1920
|
+
# @return [Series]
|
1921
|
+
#
|
1922
|
+
# @example
|
1923
|
+
# df = Polars::DataFrame.new(
|
1924
|
+
# {
|
1925
|
+
# "foo" => [1, 2, 3],
|
1926
|
+
# "bar" => [6, 7, 8],
|
1927
|
+
# "ham" => ["a", "b", "c"]
|
1928
|
+
# }
|
1929
|
+
# )
|
1930
|
+
# df.drop_in_place("ham")
|
1931
|
+
# # =>
|
1932
|
+
# # shape: (3,)
|
1933
|
+
# # Series: 'ham' [str]
|
1934
|
+
# # [
|
1935
|
+
# # "a"
|
1936
|
+
# # "b"
|
1937
|
+
# # "c"
|
1938
|
+
# # ]
|
1939
|
+
def drop_in_place(name)
|
1940
|
+
Utils.wrap_s(_df.drop_in_place(name))
|
1941
|
+
end
|
920
1942
|
|
921
|
-
#
|
922
|
-
#
|
1943
|
+
# Create an empty copy of the current DataFrame.
|
1944
|
+
#
|
1945
|
+
# Returns a DataFrame with identical schema but no data.
|
1946
|
+
#
|
1947
|
+
# @return [DataFrame]
|
1948
|
+
#
|
1949
|
+
# @example
|
1950
|
+
# df = Polars::DataFrame.new(
|
1951
|
+
# {
|
1952
|
+
# "a" => [nil, 2, 3, 4],
|
1953
|
+
# "b" => [0.5, nil, 2.5, 13],
|
1954
|
+
# "c" => [true, true, false, nil]
|
1955
|
+
# }
|
1956
|
+
# )
|
1957
|
+
# df.cleared
|
1958
|
+
# # =>
|
1959
|
+
# # shape: (0, 3)
|
1960
|
+
# # ┌─────┬─────┬──────┐
|
1961
|
+
# # │ a ┆ b ┆ c │
|
1962
|
+
# # │ --- ┆ --- ┆ --- │
|
1963
|
+
# # │ i64 ┆ f64 ┆ bool │
|
1964
|
+
# # ╞═════╪═════╪══════╡
|
1965
|
+
# # └─────┴─────┴──────┘
|
1966
|
+
def cleared
|
1967
|
+
height > 0 ? head(0) : clone
|
1968
|
+
end
|
923
1969
|
|
924
1970
|
# clone handled by initialize_copy
|
925
1971
|
|
1972
|
+
# Get the DataFrame as a Array of Series.
|
926
1973
|
#
|
1974
|
+
# @return [Array]
|
927
1975
|
def get_columns
|
928
1976
|
_df.get_columns.map { |s| Utils.wrap_s(s) }
|
929
1977
|
end
|
930
1978
|
|
1979
|
+
# Get a single column as Series by name.
|
1980
|
+
#
|
1981
|
+
# @param name [String]
|
1982
|
+
# Name of the column to retrieve.
|
1983
|
+
#
|
1984
|
+
# @return [Series]
|
1985
|
+
#
|
1986
|
+
# @example
|
1987
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
1988
|
+
# df.get_column("foo")
|
1989
|
+
# # =>
|
1990
|
+
# # shape: (3,)
|
1991
|
+
# # Series: 'foo' [i64]
|
1992
|
+
# # [
|
1993
|
+
# # 1
|
1994
|
+
# # 2
|
1995
|
+
# # 3
|
1996
|
+
# # ]
|
931
1997
|
def get_column(name)
|
932
1998
|
self[name]
|
933
1999
|
end
|
934
2000
|
|
935
|
-
#
|
936
|
-
#
|
2001
|
+
# Fill null values using the specified value or strategy.
|
2002
|
+
#
|
2003
|
+
# @param value [Numeric]
|
2004
|
+
# Value used to fill null values.
|
2005
|
+
# @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
|
2006
|
+
# Strategy used to fill null values.
|
2007
|
+
# @param limit [Integer]
|
2008
|
+
# Number of consecutive null values to fill when using the 'forward' or
|
2009
|
+
# 'backward' strategy.
|
2010
|
+
# @param matches_supertype [Boolean]
|
2011
|
+
# Fill all matching supertype of the fill `value`.
|
2012
|
+
#
|
2013
|
+
# @return [DataFrame]
|
2014
|
+
#
|
2015
|
+
# @example
|
2016
|
+
# df = Polars::DataFrame.new(
|
2017
|
+
# {
|
2018
|
+
# "a" => [1, 2, nil, 4],
|
2019
|
+
# "b" => [0.5, 4, nil, 13]
|
2020
|
+
# }
|
2021
|
+
# )
|
2022
|
+
# df.fill_null(99)
|
2023
|
+
# # =>
|
2024
|
+
# # shape: (4, 2)
|
2025
|
+
# # ┌─────┬──────┐
|
2026
|
+
# # │ a ┆ b │
|
2027
|
+
# # │ --- ┆ --- │
|
2028
|
+
# # │ i64 ┆ f64 │
|
2029
|
+
# # ╞═════╪══════╡
|
2030
|
+
# # │ 1 ┆ 0.5 │
|
2031
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2032
|
+
# # │ 2 ┆ 4.0 │
|
2033
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2034
|
+
# # │ 99 ┆ 99.0 │
|
2035
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2036
|
+
# # │ 4 ┆ 13.0 │
|
2037
|
+
# # └─────┴──────┘
|
2038
|
+
#
|
2039
|
+
# @example
|
2040
|
+
# df.fill_null(strategy: "forward")
|
2041
|
+
# # =>
|
2042
|
+
# # shape: (4, 2)
|
2043
|
+
# # ┌─────┬──────┐
|
2044
|
+
# # │ a ┆ b │
|
2045
|
+
# # │ --- ┆ --- │
|
2046
|
+
# # │ i64 ┆ f64 │
|
2047
|
+
# # ╞═════╪══════╡
|
2048
|
+
# # │ 1 ┆ 0.5 │
|
2049
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2050
|
+
# # │ 2 ┆ 4.0 │
|
2051
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2052
|
+
# # │ 2 ┆ 4.0 │
|
2053
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2054
|
+
# # │ 4 ┆ 13.0 │
|
2055
|
+
# # └─────┴──────┘
|
2056
|
+
#
|
2057
|
+
# @example
|
2058
|
+
# df.fill_null(strategy: "max")
|
2059
|
+
# # =>
|
2060
|
+
# # shape: (4, 2)
|
2061
|
+
# # ┌─────┬──────┐
|
2062
|
+
# # │ a ┆ b │
|
2063
|
+
# # │ --- ┆ --- │
|
2064
|
+
# # │ i64 ┆ f64 │
|
2065
|
+
# # ╞═════╪══════╡
|
2066
|
+
# # │ 1 ┆ 0.5 │
|
2067
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2068
|
+
# # │ 2 ┆ 4.0 │
|
2069
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2070
|
+
# # │ 4 ┆ 13.0 │
|
2071
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2072
|
+
# # │ 4 ┆ 13.0 │
|
2073
|
+
# # └─────┴──────┘
|
2074
|
+
#
|
2075
|
+
# @example
|
2076
|
+
# df.fill_null(strategy: "zero")
|
2077
|
+
# # =>
|
2078
|
+
# # shape: (4, 2)
|
2079
|
+
# # ┌─────┬──────┐
|
2080
|
+
# # │ a ┆ b │
|
2081
|
+
# # │ --- ┆ --- │
|
2082
|
+
# # │ i64 ┆ f64 │
|
2083
|
+
# # ╞═════╪══════╡
|
2084
|
+
# # │ 1 ┆ 0.5 │
|
2085
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2086
|
+
# # │ 2 ┆ 4.0 │
|
2087
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2088
|
+
# # │ 0 ┆ 0.0 │
|
2089
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2090
|
+
# # │ 4 ┆ 13.0 │
|
2091
|
+
# # └─────┴──────┘
|
2092
|
+
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
|
2093
|
+
_from_rbdf(
|
2094
|
+
lazy
|
2095
|
+
.fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
|
2096
|
+
.collect(no_optimization: true)
|
2097
|
+
._df
|
2098
|
+
)
|
2099
|
+
end
|
937
2100
|
|
2101
|
+
# Fill floating point NaN values by an Expression evaluation.
|
2102
|
+
#
|
2103
|
+
# @param fill_value [Object]
|
2104
|
+
# Value to fill NaN with.
|
2105
|
+
#
|
2106
|
+
# @return [DataFrame]
|
2107
|
+
#
|
2108
|
+
# @note
|
2109
|
+
# Note that floating point NaNs (Not a Number) are not missing values!
|
2110
|
+
# To replace missing values, use `fill_null`.
|
938
2111
|
#
|
2112
|
+
# @example
|
2113
|
+
# df = Polars::DataFrame.new(
|
2114
|
+
# {
|
2115
|
+
# "a" => [1.5, 2, Float::NAN, 4],
|
2116
|
+
# "b" => [0.5, 4, Float::NAN, 13]
|
2117
|
+
# }
|
2118
|
+
# )
|
2119
|
+
# df.fill_nan(99)
|
2120
|
+
# # =>
|
2121
|
+
# # shape: (4, 2)
|
2122
|
+
# # ┌──────┬──────┐
|
2123
|
+
# # │ a ┆ b │
|
2124
|
+
# # │ --- ┆ --- │
|
2125
|
+
# # │ f64 ┆ f64 │
|
2126
|
+
# # ╞══════╪══════╡
|
2127
|
+
# # │ 1.5 ┆ 0.5 │
|
2128
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2129
|
+
# # │ 2.0 ┆ 4.0 │
|
2130
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2131
|
+
# # │ 99.0 ┆ 99.0 │
|
2132
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2133
|
+
# # │ 4.0 ┆ 13.0 │
|
2134
|
+
# # └──────┴──────┘
|
939
2135
|
def fill_nan(fill_value)
|
940
2136
|
lazy.fill_nan(fill_value).collect(no_optimization: true)
|
941
2137
|
end
|
942
2138
|
|
943
|
-
#
|
944
|
-
#
|
2139
|
+
# Explode `DataFrame` to long format by exploding a column with Lists.
|
2140
|
+
#
|
2141
|
+
# @param columns [Object]
|
2142
|
+
# Column of LargeList type.
|
2143
|
+
#
|
2144
|
+
# @return [DataFrame]
|
2145
|
+
#
|
2146
|
+
# @example
|
2147
|
+
# df = Polars::DataFrame.new(
|
2148
|
+
# {
|
2149
|
+
# "letters" => ["a", "a", "b", "c"],
|
2150
|
+
# "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]]
|
2151
|
+
# }
|
2152
|
+
# )
|
2153
|
+
# df.explode("numbers")
|
2154
|
+
# # =>
|
2155
|
+
# # shape: (8, 2)
|
2156
|
+
# # ┌─────────┬─────────┐
|
2157
|
+
# # │ letters ┆ numbers │
|
2158
|
+
# # │ --- ┆ --- │
|
2159
|
+
# # │ str ┆ i64 │
|
2160
|
+
# # ╞═════════╪═════════╡
|
2161
|
+
# # │ a ┆ 1 │
|
2162
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
2163
|
+
# # │ a ┆ 2 │
|
2164
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
2165
|
+
# # │ a ┆ 3 │
|
2166
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
2167
|
+
# # │ b ┆ 4 │
|
2168
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
2169
|
+
# # │ b ┆ 5 │
|
2170
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
2171
|
+
# # │ c ┆ 6 │
|
2172
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
2173
|
+
# # │ c ┆ 7 │
|
2174
|
+
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
2175
|
+
# # │ c ┆ 8 │
|
2176
|
+
# # └─────────┴─────────┘
|
2177
|
+
def explode(columns)
|
2178
|
+
lazy.explode(columns).collect(no_optimization: true)
|
2179
|
+
end
|
945
2180
|
|
946
2181
|
# def pivot
|
947
2182
|
# end
|
@@ -955,25 +2190,242 @@ module Polars
|
|
955
2190
|
# def partition_by
|
956
2191
|
# end
|
957
2192
|
|
958
|
-
#
|
959
|
-
#
|
960
|
-
|
961
|
-
#
|
962
|
-
#
|
2193
|
+
# Shift values by the given period.
|
2194
|
+
#
|
2195
|
+
# @param periods [Integer]
|
2196
|
+
# Number of places to shift (may be negative).
|
2197
|
+
#
|
2198
|
+
# @return [DataFrame]
|
2199
|
+
#
|
2200
|
+
# @example
|
2201
|
+
# df = Polars::DataFrame.new(
|
2202
|
+
# {
|
2203
|
+
# "foo" => [1, 2, 3],
|
2204
|
+
# "bar" => [6, 7, 8],
|
2205
|
+
# "ham" => ["a", "b", "c"]
|
2206
|
+
# }
|
2207
|
+
# )
|
2208
|
+
# df.shift(1)
|
2209
|
+
# # =>
|
2210
|
+
# # shape: (3, 3)
|
2211
|
+
# # ┌──────┬──────┬──────┐
|
2212
|
+
# # │ foo ┆ bar ┆ ham │
|
2213
|
+
# # │ --- ┆ --- ┆ --- │
|
2214
|
+
# # │ i64 ┆ i64 ┆ str │
|
2215
|
+
# # ╞══════╪══════╪══════╡
|
2216
|
+
# # │ null ┆ null ┆ null │
|
2217
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2218
|
+
# # │ 1 ┆ 6 ┆ a │
|
2219
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2220
|
+
# # │ 2 ┆ 7 ┆ b │
|
2221
|
+
# # └──────┴──────┴──────┘
|
2222
|
+
#
|
2223
|
+
# @example
|
2224
|
+
# df.shift(-1)
|
2225
|
+
# # =>
|
2226
|
+
# # shape: (3, 3)
|
2227
|
+
# # ┌──────┬──────┬──────┐
|
2228
|
+
# # │ foo ┆ bar ┆ ham │
|
2229
|
+
# # │ --- ┆ --- ┆ --- │
|
2230
|
+
# # │ i64 ┆ i64 ┆ str │
|
2231
|
+
# # ╞══════╪══════╪══════╡
|
2232
|
+
# # │ 2 ┆ 7 ┆ b │
|
2233
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2234
|
+
# # │ 3 ┆ 8 ┆ c │
|
2235
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2236
|
+
# # │ null ┆ null ┆ null │
|
2237
|
+
# # └──────┴──────┴──────┘
|
2238
|
+
def shift(periods)
|
2239
|
+
_from_rbdf(_df.shift(periods))
|
2240
|
+
end
|
2241
|
+
|
2242
|
+
# Shift the values by a given period and fill the resulting null values.
|
2243
|
+
#
|
2244
|
+
# @param periods [Integer]
|
2245
|
+
# Number of places to shift (may be negative).
|
2246
|
+
# @param fill_value [Object]
|
2247
|
+
# fill nil values with this value.
|
2248
|
+
#
|
2249
|
+
# @return [DataFrame]
|
2250
|
+
#
|
2251
|
+
# @example
|
2252
|
+
# df = Polars::DataFrame.new(
|
2253
|
+
# {
|
2254
|
+
# "foo" => [1, 2, 3],
|
2255
|
+
# "bar" => [6, 7, 8],
|
2256
|
+
# "ham" => ["a", "b", "c"]
|
2257
|
+
# }
|
2258
|
+
# )
|
2259
|
+
# df.shift_and_fill(1, 0)
|
2260
|
+
# # =>
|
2261
|
+
# # shape: (3, 3)
|
2262
|
+
# # ┌─────┬─────┬─────┐
|
2263
|
+
# # │ foo ┆ bar ┆ ham │
|
2264
|
+
# # │ --- ┆ --- ┆ --- │
|
2265
|
+
# # │ i64 ┆ i64 ┆ str │
|
2266
|
+
# # ╞═════╪═════╪═════╡
|
2267
|
+
# # │ 0 ┆ 0 ┆ 0 │
|
2268
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
2269
|
+
# # │ 1 ┆ 6 ┆ a │
|
2270
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
2271
|
+
# # │ 2 ┆ 7 ┆ b │
|
2272
|
+
# # └─────┴─────┴─────┘
|
2273
|
+
def shift_and_fill(periods, fill_value)
|
2274
|
+
lazy
|
2275
|
+
.shift_and_fill(periods, fill_value)
|
2276
|
+
.collect(no_optimization: true, string_cache: false)
|
2277
|
+
end
|
963
2278
|
|
2279
|
+
# Get a mask of all duplicated rows in this DataFrame.
|
2280
|
+
#
|
2281
|
+
# @return [Series]
|
964
2282
|
#
|
2283
|
+
# @example
|
2284
|
+
# df = Polars::DataFrame.new(
|
2285
|
+
# {
|
2286
|
+
# "a" => [1, 2, 3, 1],
|
2287
|
+
# "b" => ["x", "y", "z", "x"],
|
2288
|
+
# }
|
2289
|
+
# )
|
2290
|
+
# df.is_duplicated
|
2291
|
+
# # =>
|
2292
|
+
# # shape: (4,)
|
2293
|
+
# # Series: '' [bool]
|
2294
|
+
# # [
|
2295
|
+
# # true
|
2296
|
+
# # false
|
2297
|
+
# # false
|
2298
|
+
# # true
|
2299
|
+
# # ]
|
965
2300
|
def is_duplicated
|
966
2301
|
Utils.wrap_s(_df.is_duplicated)
|
967
2302
|
end
|
968
2303
|
|
2304
|
+
# Get a mask of all unique rows in this DataFrame.
|
2305
|
+
#
|
2306
|
+
# @return [Series]
|
2307
|
+
#
|
2308
|
+
# @example
|
2309
|
+
# df = Polars::DataFrame.new(
|
2310
|
+
# {
|
2311
|
+
# "a" => [1, 2, 3, 1],
|
2312
|
+
# "b" => ["x", "y", "z", "x"]
|
2313
|
+
# }
|
2314
|
+
# )
|
2315
|
+
# df.is_unique
|
2316
|
+
# # =>
|
2317
|
+
# # shape: (4,)
|
2318
|
+
# # Series: '' [bool]
|
2319
|
+
# # [
|
2320
|
+
# # false
|
2321
|
+
# # true
|
2322
|
+
# # true
|
2323
|
+
# # false
|
2324
|
+
# # ]
|
969
2325
|
def is_unique
|
970
2326
|
Utils.wrap_s(_df.is_unique)
|
971
2327
|
end
|
972
2328
|
|
2329
|
+
# Start a lazy query from this point.
|
2330
|
+
#
|
2331
|
+
# @return [LazyFrame]
|
973
2332
|
def lazy
|
974
2333
|
wrap_ldf(_df.lazy)
|
975
2334
|
end
|
976
2335
|
|
2336
|
+
# Select columns from this DataFrame.
|
2337
|
+
#
|
2338
|
+
# @param exprs [Object]
|
2339
|
+
# Column or columns to select.
|
2340
|
+
#
|
2341
|
+
# @return [DataFrame]
|
2342
|
+
#
|
2343
|
+
# @example
|
2344
|
+
# df = Polars::DataFrame.new(
|
2345
|
+
# {
|
2346
|
+
# "foo" => [1, 2, 3],
|
2347
|
+
# "bar" => [6, 7, 8],
|
2348
|
+
# "ham" => ["a", "b", "c"]
|
2349
|
+
# }
|
2350
|
+
# )
|
2351
|
+
# df.select("foo")
|
2352
|
+
# # =>
|
2353
|
+
# # shape: (3, 1)
|
2354
|
+
# # ┌─────┐
|
2355
|
+
# # │ foo │
|
2356
|
+
# # │ --- │
|
2357
|
+
# # │ i64 │
|
2358
|
+
# # ╞═════╡
|
2359
|
+
# # │ 1 │
|
2360
|
+
# # ├╌╌╌╌╌┤
|
2361
|
+
# # │ 2 │
|
2362
|
+
# # ├╌╌╌╌╌┤
|
2363
|
+
# # │ 3 │
|
2364
|
+
# # └─────┘
|
2365
|
+
#
|
2366
|
+
# @example
|
2367
|
+
# df.select(["foo", "bar"])
|
2368
|
+
# # =>
|
2369
|
+
# # shape: (3, 2)
|
2370
|
+
# # ┌─────┬─────┐
|
2371
|
+
# # │ foo ┆ bar │
|
2372
|
+
# # │ --- ┆ --- │
|
2373
|
+
# # │ i64 ┆ i64 │
|
2374
|
+
# # ╞═════╪═════╡
|
2375
|
+
# # │ 1 ┆ 6 │
|
2376
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2377
|
+
# # │ 2 ┆ 7 │
|
2378
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2379
|
+
# # │ 3 ┆ 8 │
|
2380
|
+
# # └─────┴─────┘
|
2381
|
+
#
|
2382
|
+
# @example
|
2383
|
+
# df.select(Polars.col("foo") + 1)
|
2384
|
+
# # =>
|
2385
|
+
# # shape: (3, 1)
|
2386
|
+
# # ┌─────┐
|
2387
|
+
# # │ foo │
|
2388
|
+
# # │ --- │
|
2389
|
+
# # │ i64 │
|
2390
|
+
# # ╞═════╡
|
2391
|
+
# # │ 2 │
|
2392
|
+
# # ├╌╌╌╌╌┤
|
2393
|
+
# # │ 3 │
|
2394
|
+
# # ├╌╌╌╌╌┤
|
2395
|
+
# # │ 4 │
|
2396
|
+
# # └─────┘
|
2397
|
+
#
|
2398
|
+
# @example
|
2399
|
+
# df.select([Polars.col("foo") + 1, Polars.col("bar") + 1])
|
2400
|
+
# # =>
|
2401
|
+
# # shape: (3, 2)
|
2402
|
+
# # ┌─────┬─────┐
|
2403
|
+
# # │ foo ┆ bar │
|
2404
|
+
# # │ --- ┆ --- │
|
2405
|
+
# # │ i64 ┆ i64 │
|
2406
|
+
# # ╞═════╪═════╡
|
2407
|
+
# # │ 2 ┆ 7 │
|
2408
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2409
|
+
# # │ 3 ┆ 8 │
|
2410
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2411
|
+
# # │ 4 ┆ 9 │
|
2412
|
+
# # └─────┴─────┘
|
2413
|
+
#
|
2414
|
+
# @example
|
2415
|
+
# df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0))
|
2416
|
+
# # =>
|
2417
|
+
# # shape: (3, 1)
|
2418
|
+
# # ┌─────────┐
|
2419
|
+
# # │ literal │
|
2420
|
+
# # │ --- │
|
2421
|
+
# # │ i64 │
|
2422
|
+
# # ╞═════════╡
|
2423
|
+
# # │ 0 │
|
2424
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
2425
|
+
# # │ 0 │
|
2426
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
2427
|
+
# # │ 10 │
|
2428
|
+
# # └─────────┘
|
977
2429
|
def select(exprs)
|
978
2430
|
_from_rbdf(
|
979
2431
|
lazy
|
@@ -983,6 +2435,43 @@ module Polars
|
|
983
2435
|
)
|
984
2436
|
end
|
985
2437
|
|
2438
|
+
# Add or overwrite multiple columns in a DataFrame.
|
2439
|
+
#
|
2440
|
+
# @param exprs [Array]
|
2441
|
+
# Array of Expressions that evaluate to columns.
|
2442
|
+
#
|
2443
|
+
# @return [DataFrame]
|
2444
|
+
#
|
2445
|
+
# @example
|
2446
|
+
# df = Polars::DataFrame.new(
|
2447
|
+
# {
|
2448
|
+
# "a" => [1, 2, 3, 4],
|
2449
|
+
# "b" => [0.5, 4, 10, 13],
|
2450
|
+
# "c" => [true, true, false, true]
|
2451
|
+
# }
|
2452
|
+
# )
|
2453
|
+
# df.with_columns(
|
2454
|
+
# [
|
2455
|
+
# (Polars.col("a") ** 2).alias("a^2"),
|
2456
|
+
# (Polars.col("b") / 2).alias("b/2"),
|
2457
|
+
# (Polars.col("c").is_not).alias("not c")
|
2458
|
+
# ]
|
2459
|
+
# )
|
2460
|
+
# # =>
|
2461
|
+
# # shape: (4, 6)
|
2462
|
+
# # ┌─────┬──────┬───────┬──────┬──────┬───────┐
|
2463
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
2464
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
2465
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
|
2466
|
+
# # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
|
2467
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
|
2468
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2469
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
|
2470
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2471
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
|
2472
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2473
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
2474
|
+
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
986
2475
|
def with_columns(exprs)
|
987
2476
|
if !exprs.nil? && !exprs.is_a?(Array)
|
988
2477
|
exprs = [exprs]
|
@@ -992,6 +2481,26 @@ module Polars
|
|
992
2481
|
.collect(no_optimization: true, string_cache: false)
|
993
2482
|
end
|
994
2483
|
|
2484
|
+
# Get number of chunks used by the ChunkedArrays of this DataFrame.
|
2485
|
+
#
|
2486
|
+
# @param strategy ["first", "all"]
|
2487
|
+
# Return the number of chunks of the 'first' column,
|
2488
|
+
# or 'all' columns in this DataFrame.
|
2489
|
+
#
|
2490
|
+
# @return [Object]
|
2491
|
+
#
|
2492
|
+
# @example
|
2493
|
+
# df = Polars::DataFrame.new(
|
2494
|
+
# {
|
2495
|
+
# "a" => [1, 2, 3, 4],
|
2496
|
+
# "b" => [0.5, 4, 10, 13],
|
2497
|
+
# "c" => [true, true, false, true]
|
2498
|
+
# }
|
2499
|
+
# )
|
2500
|
+
# df.n_chunks
|
2501
|
+
# # => 1
|
2502
|
+
# df.n_chunks(strategy: "all")
|
2503
|
+
# # => [1, 1, 1]
|
995
2504
|
def n_chunks(strategy: "first")
|
996
2505
|
if strategy == "first"
|
997
2506
|
_df.n_chunks
|
@@ -1002,6 +2511,28 @@ module Polars
|
|
1002
2511
|
end
|
1003
2512
|
end
|
1004
2513
|
|
2514
|
+
# Aggregate the columns of this DataFrame to their maximum value.
|
2515
|
+
#
|
2516
|
+
# @return [DataFrame]
|
2517
|
+
#
|
2518
|
+
# @example
|
2519
|
+
# df = Polars::DataFrame.new(
|
2520
|
+
# {
|
2521
|
+
# "foo" => [1, 2, 3],
|
2522
|
+
# "bar" => [6, 7, 8],
|
2523
|
+
# "ham" => ["a", "b", "c"]
|
2524
|
+
# }
|
2525
|
+
# )
|
2526
|
+
# df.max
|
2527
|
+
# # =>
|
2528
|
+
# # shape: (1, 3)
|
2529
|
+
# # ┌─────┬─────┬─────┐
|
2530
|
+
# # │ foo ┆ bar ┆ ham │
|
2531
|
+
# # │ --- ┆ --- ┆ --- │
|
2532
|
+
# # │ i64 ┆ i64 ┆ str │
|
2533
|
+
# # ╞═════╪═════╪═════╡
|
2534
|
+
# # │ 3 ┆ 8 ┆ c │
|
2535
|
+
# # └─────┴─────┴─────┘
|
1005
2536
|
def max(axis: 0)
|
1006
2537
|
if axis == 0
|
1007
2538
|
_from_rbdf(_df.max)
|
@@ -1012,6 +2543,28 @@ module Polars
|
|
1012
2543
|
end
|
1013
2544
|
end
|
1014
2545
|
|
2546
|
+
# Aggregate the columns of this DataFrame to their minimum value.
|
2547
|
+
#
|
2548
|
+
# @return [DataFrame]
|
2549
|
+
#
|
2550
|
+
# @example
|
2551
|
+
# df = Polars::DataFrame.new(
|
2552
|
+
# {
|
2553
|
+
# "foo" => [1, 2, 3],
|
2554
|
+
# "bar" => [6, 7, 8],
|
2555
|
+
# "ham" => ["a", "b", "c"]
|
2556
|
+
# }
|
2557
|
+
# )
|
2558
|
+
# df.min
|
2559
|
+
# # =>
|
2560
|
+
# # shape: (1, 3)
|
2561
|
+
# # ┌─────┬─────┬─────┐
|
2562
|
+
# # │ foo ┆ bar ┆ ham │
|
2563
|
+
# # │ --- ┆ --- ┆ --- │
|
2564
|
+
# # │ i64 ┆ i64 ┆ str │
|
2565
|
+
# # ╞═════╪═════╪═════╡
|
2566
|
+
# # │ 1 ┆ 6 ┆ a │
|
2567
|
+
# # └─────┴─────┴─────┘
|
1015
2568
|
def min(axis: 0)
|
1016
2569
|
if axis == 0
|
1017
2570
|
_from_rbdf(_df.min)
|
@@ -1022,6 +2575,44 @@ module Polars
|
|
1022
2575
|
end
|
1023
2576
|
end
|
1024
2577
|
|
2578
|
+
# Aggregate the columns of this DataFrame to their sum value.
|
2579
|
+
#
|
2580
|
+
# @param axis [Integer]
|
2581
|
+
# Either 0 or 1.
|
2582
|
+
# @param null_strategy ["ignore", "propagate"]
|
2583
|
+
# This argument is only used if axis == 1.
|
2584
|
+
#
|
2585
|
+
# @return [DataFrame]
|
2586
|
+
#
|
2587
|
+
# @example
|
2588
|
+
# df = Polars::DataFrame.new(
|
2589
|
+
# {
|
2590
|
+
# "foo" => [1, 2, 3],
|
2591
|
+
# "bar" => [6, 7, 8],
|
2592
|
+
# "ham" => ["a", "b", "c"],
|
2593
|
+
# }
|
2594
|
+
# )
|
2595
|
+
# df.sum
|
2596
|
+
# # =>
|
2597
|
+
# # shape: (1, 3)
|
2598
|
+
# # ┌─────┬─────┬──────┐
|
2599
|
+
# # │ foo ┆ bar ┆ ham │
|
2600
|
+
# # │ --- ┆ --- ┆ --- │
|
2601
|
+
# # │ i64 ┆ i64 ┆ str │
|
2602
|
+
# # ╞═════╪═════╪══════╡
|
2603
|
+
# # │ 6 ┆ 21 ┆ null │
|
2604
|
+
# # └─────┴─────┴──────┘
|
2605
|
+
#
|
2606
|
+
# @example
|
2607
|
+
# df.sum(axis: 1)
|
2608
|
+
# # =>
|
2609
|
+
# # shape: (3,)
|
2610
|
+
# # Series: 'foo' [str]
|
2611
|
+
# # [
|
2612
|
+
# # "16a"
|
2613
|
+
# # "27b"
|
2614
|
+
# # "38c"
|
2615
|
+
# # ]
|
1025
2616
|
def sum(axis: 0, null_strategy: "ignore")
|
1026
2617
|
case axis
|
1027
2618
|
when 0
|
@@ -1033,6 +2624,33 @@ module Polars
|
|
1033
2624
|
end
|
1034
2625
|
end
|
1035
2626
|
|
2627
|
+
# Aggregate the columns of this DataFrame to their mean value.
|
2628
|
+
#
|
2629
|
+
# @param axis [Integer]
|
2630
|
+
# Either 0 or 1.
|
2631
|
+
# @param null_strategy ["ignore", "propagate"]
|
2632
|
+
# This argument is only used if axis == 1.
|
2633
|
+
#
|
2634
|
+
# @return [DataFrame]
|
2635
|
+
#
|
2636
|
+
# @example
|
2637
|
+
# df = Polars::DataFrame.new(
|
2638
|
+
# {
|
2639
|
+
# "foo" => [1, 2, 3],
|
2640
|
+
# "bar" => [6, 7, 8],
|
2641
|
+
# "ham" => ["a", "b", "c"]
|
2642
|
+
# }
|
2643
|
+
# )
|
2644
|
+
# df.mean
|
2645
|
+
# # =>
|
2646
|
+
# # shape: (1, 3)
|
2647
|
+
# # ┌─────┬─────┬──────┐
|
2648
|
+
# # │ foo ┆ bar ┆ ham │
|
2649
|
+
# # │ --- ┆ --- ┆ --- │
|
2650
|
+
# # │ f64 ┆ f64 ┆ str │
|
2651
|
+
# # ╞═════╪═════╪══════╡
|
2652
|
+
# # │ 2.0 ┆ 7.0 ┆ null │
|
2653
|
+
# # └─────┴─────┴──────┘
|
1036
2654
|
def mean(axis: 0, null_strategy: "ignore")
|
1037
2655
|
case axis
|
1038
2656
|
when 0
|
@@ -1044,77 +2662,633 @@ module Polars
|
|
1044
2662
|
end
|
1045
2663
|
end
|
1046
2664
|
|
2665
|
+
# Aggregate the columns of this DataFrame to their standard deviation value.
|
2666
|
+
#
|
2667
|
+
# @param ddof [Integer]
|
2668
|
+
# Degrees of freedom
|
2669
|
+
#
|
2670
|
+
# @return [DataFrame]
|
2671
|
+
#
|
2672
|
+
# @example
|
2673
|
+
# df = Polars::DataFrame.new(
|
2674
|
+
# {
|
2675
|
+
# "foo" => [1, 2, 3],
|
2676
|
+
# "bar" => [6, 7, 8],
|
2677
|
+
# "ham" => ["a", "b", "c"]
|
2678
|
+
# }
|
2679
|
+
# )
|
2680
|
+
# df.std
|
2681
|
+
# # =>
|
2682
|
+
# # shape: (1, 3)
|
2683
|
+
# # ┌─────┬─────┬──────┐
|
2684
|
+
# # │ foo ┆ bar ┆ ham │
|
2685
|
+
# # │ --- ┆ --- ┆ --- │
|
2686
|
+
# # │ f64 ┆ f64 ┆ str │
|
2687
|
+
# # ╞═════╪═════╪══════╡
|
2688
|
+
# # │ 1.0 ┆ 1.0 ┆ null │
|
2689
|
+
# # └─────┴─────┴──────┘
|
2690
|
+
#
|
2691
|
+
# @example
|
2692
|
+
# df.std(ddof: 0)
|
2693
|
+
# # =>
|
2694
|
+
# # shape: (1, 3)
|
2695
|
+
# # ┌──────────┬──────────┬──────┐
|
2696
|
+
# # │ foo ┆ bar ┆ ham │
|
2697
|
+
# # │ --- ┆ --- ┆ --- │
|
2698
|
+
# # │ f64 ┆ f64 ┆ str │
|
2699
|
+
# # ╞══════════╪══════════╪══════╡
|
2700
|
+
# # │ 0.816497 ┆ 0.816497 ┆ null │
|
2701
|
+
# # └──────────┴──────────┴──────┘
|
1047
2702
|
def std(ddof: 1)
|
1048
2703
|
_from_rbdf(_df.std(ddof))
|
1049
2704
|
end
|
1050
2705
|
|
2706
|
+
# Aggregate the columns of this DataFrame to their variance value.
|
2707
|
+
#
|
2708
|
+
# @param ddof [Integer]
|
2709
|
+
# Degrees of freedom
|
2710
|
+
#
|
2711
|
+
# @return [DataFrame]
|
2712
|
+
#
|
2713
|
+
# @example
|
2714
|
+
# df = Polars::DataFrame.new(
|
2715
|
+
# {
|
2716
|
+
# "foo" => [1, 2, 3],
|
2717
|
+
# "bar" => [6, 7, 8],
|
2718
|
+
# "ham" => ["a", "b", "c"]
|
2719
|
+
# }
|
2720
|
+
# )
|
2721
|
+
# df.var
|
2722
|
+
# # =>
|
2723
|
+
# # shape: (1, 3)
|
2724
|
+
# # ┌─────┬─────┬──────┐
|
2725
|
+
# # │ foo ┆ bar ┆ ham │
|
2726
|
+
# # │ --- ┆ --- ┆ --- │
|
2727
|
+
# # │ f64 ┆ f64 ┆ str │
|
2728
|
+
# # ╞═════╪═════╪══════╡
|
2729
|
+
# # │ 1.0 ┆ 1.0 ┆ null │
|
2730
|
+
# # └─────┴─────┴──────┘
|
2731
|
+
#
|
2732
|
+
# @example
|
2733
|
+
# df.var(ddof: 0)
|
2734
|
+
# # =>
|
2735
|
+
# # shape: (1, 3)
|
2736
|
+
# # ┌──────────┬──────────┬──────┐
|
2737
|
+
# # │ foo ┆ bar ┆ ham │
|
2738
|
+
# # │ --- ┆ --- ┆ --- │
|
2739
|
+
# # │ f64 ┆ f64 ┆ str │
|
2740
|
+
# # ╞══════════╪══════════╪══════╡
|
2741
|
+
# # │ 0.666667 ┆ 0.666667 ┆ null │
|
2742
|
+
# # └──────────┴──────────┴──────┘
|
1051
2743
|
def var(ddof: 1)
|
1052
2744
|
_from_rbdf(_df.var(ddof))
|
1053
2745
|
end
|
1054
2746
|
|
2747
|
+
# Aggregate the columns of this DataFrame to their median value.
|
2748
|
+
#
|
2749
|
+
# @return [DataFrame]
|
2750
|
+
#
|
2751
|
+
# @example
|
2752
|
+
# df = Polars::DataFrame.new(
|
2753
|
+
# {
|
2754
|
+
# "foo" => [1, 2, 3],
|
2755
|
+
# "bar" => [6, 7, 8],
|
2756
|
+
# "ham" => ["a", "b", "c"]
|
2757
|
+
# }
|
2758
|
+
# )
|
2759
|
+
# df.median
|
2760
|
+
# # =>
|
2761
|
+
# # shape: (1, 3)
|
2762
|
+
# # ┌─────┬─────┬──────┐
|
2763
|
+
# # │ foo ┆ bar ┆ ham │
|
2764
|
+
# # │ --- ┆ --- ┆ --- │
|
2765
|
+
# # │ f64 ┆ f64 ┆ str │
|
2766
|
+
# # ╞═════╪═════╪══════╡
|
2767
|
+
# # │ 2.0 ┆ 7.0 ┆ null │
|
2768
|
+
# # └─────┴─────┴──────┘
|
1055
2769
|
def median
|
1056
2770
|
_from_rbdf(_df.median)
|
1057
2771
|
end
|
1058
2772
|
|
1059
|
-
#
|
1060
|
-
#
|
2773
|
+
# Aggregate the columns of this DataFrame to their product values.
|
2774
|
+
#
|
2775
|
+
# @return [DataFrame]
|
2776
|
+
#
|
2777
|
+
# @example
|
2778
|
+
# df = Polars::DataFrame.new(
|
2779
|
+
# {
|
2780
|
+
# "a" => [1, 2, 3],
|
2781
|
+
# "b" => [0.5, 4, 10],
|
2782
|
+
# "c" => [true, true, false]
|
2783
|
+
# }
|
2784
|
+
# )
|
2785
|
+
# df.product
|
2786
|
+
# # =>
|
2787
|
+
# # shape: (1, 3)
|
2788
|
+
# # ┌─────┬──────┬─────┐
|
2789
|
+
# # │ a ┆ b ┆ c │
|
2790
|
+
# # │ --- ┆ --- ┆ --- │
|
2791
|
+
# # │ i64 ┆ f64 ┆ i64 │
|
2792
|
+
# # ╞═════╪══════╪═════╡
|
2793
|
+
# # │ 6 ┆ 20.0 ┆ 0 │
|
2794
|
+
# # └─────┴──────┴─────┘
|
2795
|
+
def product
|
2796
|
+
select(Polars.all.product)
|
2797
|
+
end
|
2798
|
+
|
2799
|
+
# Aggregate the columns of this DataFrame to their quantile value.
|
2800
|
+
#
|
2801
|
+
# @param quantile [Float]
|
2802
|
+
# Quantile between 0.0 and 1.0.
|
2803
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
2804
|
+
# Interpolation method.
|
2805
|
+
#
|
2806
|
+
# @return [DataFrame]
|
2807
|
+
#
|
2808
|
+
# @example
|
2809
|
+
# df = Polars::DataFrame.new(
|
2810
|
+
# {
|
2811
|
+
# "foo" => [1, 2, 3],
|
2812
|
+
# "bar" => [6, 7, 8],
|
2813
|
+
# "ham" => ["a", "b", "c"]
|
2814
|
+
# }
|
2815
|
+
# )
|
2816
|
+
# df.quantile(0.5, interpolation: "nearest")
|
2817
|
+
# # =>
|
2818
|
+
# # shape: (1, 3)
|
2819
|
+
# # ┌─────┬─────┬──────┐
|
2820
|
+
# # │ foo ┆ bar ┆ ham │
|
2821
|
+
# # │ --- ┆ --- ┆ --- │
|
2822
|
+
# # │ f64 ┆ f64 ┆ str │
|
2823
|
+
# # ╞═════╪═════╪══════╡
|
2824
|
+
# # │ 2.0 ┆ 7.0 ┆ null │
|
2825
|
+
# # └─────┴─────┴──────┘
|
2826
|
+
def quantile(quantile, interpolation: "nearest")
|
2827
|
+
_from_rbdf(_df.quantile(quantile, interpolation))
|
2828
|
+
end
|
2829
|
+
|
2830
|
+
# Get one hot encoded dummy variables.
|
2831
|
+
#
|
2832
|
+
# @param columns
|
2833
|
+
# A subset of columns to convert to dummy variables. `nil` means
|
2834
|
+
# "all columns".
|
2835
|
+
#
|
2836
|
+
# @return [DataFrame]
|
2837
|
+
#
|
2838
|
+
# @example
|
2839
|
+
# df = Polars::DataFrame.new(
|
2840
|
+
# {
|
2841
|
+
# "foo" => [1, 2],
|
2842
|
+
# "bar" => [3, 4],
|
2843
|
+
# "ham" => ["a", "b"]
|
2844
|
+
# }
|
2845
|
+
# )
|
2846
|
+
# df.to_dummies
|
2847
|
+
# # =>
|
2848
|
+
# # shape: (2, 6)
|
2849
|
+
# # ┌───────┬───────┬───────┬───────┬───────┬───────┐
|
2850
|
+
# # │ foo_1 ┆ foo_2 ┆ bar_3 ┆ bar_4 ┆ ham_a ┆ ham_b │
|
2851
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
2852
|
+
# # │ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 │
|
2853
|
+
# # ╞═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡
|
2854
|
+
# # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
|
2855
|
+
# # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2856
|
+
# # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
|
2857
|
+
# # └───────┴───────┴───────┴───────┴───────┴───────┘
|
2858
|
+
def to_dummies(columns: nil)
|
2859
|
+
if columns.is_a?(String)
|
2860
|
+
columns = [columns]
|
2861
|
+
end
|
2862
|
+
_from_rbdf(_df.to_dummies(columns))
|
2863
|
+
end
|
1061
2864
|
|
1062
|
-
#
|
1063
|
-
#
|
2865
|
+
# Drop duplicate rows from this DataFrame.
|
2866
|
+
#
|
2867
|
+
# @param maintain_order [Boolean]
|
2868
|
+
# Keep the same order as the original DataFrame. This requires more work to
|
2869
|
+
# compute.
|
2870
|
+
# @param subset [Object]
|
2871
|
+
# Subset to use to compare rows.
|
2872
|
+
# @param keep ["first", "last"]
|
2873
|
+
# Which of the duplicate rows to keep (in conjunction with `subset`).
|
2874
|
+
#
|
2875
|
+
# @return [DataFrame]
|
2876
|
+
#
|
2877
|
+
# @note
|
2878
|
+
# Note that this fails if there is a column of type `List` in the DataFrame or
|
2879
|
+
# subset.
|
2880
|
+
#
|
2881
|
+
# @example
|
2882
|
+
# df = Polars::DataFrame.new(
|
2883
|
+
# {
|
2884
|
+
# "a" => [1, 1, 2, 3, 4, 5],
|
2885
|
+
# "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
|
2886
|
+
# "c" => [true, true, true, false, true, true]
|
2887
|
+
# }
|
2888
|
+
# )
|
2889
|
+
# df.unique
|
2890
|
+
# # =>
|
2891
|
+
# # shape: (5, 3)
|
2892
|
+
# # ┌─────┬─────┬───────┐
|
2893
|
+
# # │ a ┆ b ┆ c │
|
2894
|
+
# # │ --- ┆ --- ┆ --- │
|
2895
|
+
# # │ i64 ┆ f64 ┆ bool │
|
2896
|
+
# # ╞═════╪═════╪═══════╡
|
2897
|
+
# # │ 1 ┆ 0.5 ┆ true │
|
2898
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2899
|
+
# # │ 2 ┆ 1.0 ┆ true │
|
2900
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2901
|
+
# # │ 3 ┆ 2.0 ┆ false │
|
2902
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2903
|
+
# # │ 4 ┆ 3.0 ┆ true │
|
2904
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
2905
|
+
# # │ 5 ┆ 3.0 ┆ true │
|
2906
|
+
# # └─────┴─────┴───────┘
|
2907
|
+
def unique(maintain_order: true, subset: nil, keep: "first")
|
2908
|
+
if !subset.nil?
|
2909
|
+
if subset.is_a?(String)
|
2910
|
+
subset = [subset]
|
2911
|
+
elsif !subset.is_a?(Array)
|
2912
|
+
subset = subset.to_a
|
2913
|
+
end
|
2914
|
+
end
|
1064
2915
|
|
1065
|
-
|
1066
|
-
|
2916
|
+
_from_rbdf(_df.unique(maintain_order, subset, keep))
|
2917
|
+
end
|
1067
2918
|
|
1068
|
-
#
|
1069
|
-
#
|
2919
|
+
# Return the number of unique rows, or the number of unique row-subsets.
|
2920
|
+
#
|
2921
|
+
# @param subset [Object]
|
2922
|
+
# One or more columns/expressions that define what to count;
|
2923
|
+
# omit to return the count of unique rows.
|
2924
|
+
#
|
2925
|
+
# @return [DataFrame]
|
2926
|
+
#
|
2927
|
+
# @example
|
2928
|
+
# df = Polars::DataFrame.new(
|
2929
|
+
# {
|
2930
|
+
# "a" => [1, 1, 2, 3, 4, 5],
|
2931
|
+
# "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
|
2932
|
+
# "c" => [true, true, true, false, true, true]
|
2933
|
+
# }
|
2934
|
+
# )
|
2935
|
+
# df.n_unique
|
2936
|
+
# # => 5
|
2937
|
+
#
|
2938
|
+
# @example Simple columns subset
|
2939
|
+
# df.n_unique(subset: ["b", "c"])
|
2940
|
+
# # => 4
|
2941
|
+
#
|
2942
|
+
# @example Expression subset
|
2943
|
+
# df.n_unique(
|
2944
|
+
# subset: [
|
2945
|
+
# (Polars.col("a").floordiv(2)),
|
2946
|
+
# (Polars.col("c") | (Polars.col("b") >= 2))
|
2947
|
+
# ]
|
2948
|
+
# )
|
2949
|
+
# # => 3
|
2950
|
+
def n_unique(subset: nil)
|
2951
|
+
if subset.is_a?(StringIO)
|
2952
|
+
subset = [Polars.col(subset)]
|
2953
|
+
elsif subset.is_a?(Expr)
|
2954
|
+
subset = [subset]
|
2955
|
+
end
|
1070
2956
|
|
1071
|
-
|
1072
|
-
|
2957
|
+
if subset.is_a?(Array) && subset.length == 1
|
2958
|
+
expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
|
2959
|
+
else
|
2960
|
+
struct_fields = subset.nil? ? Polars.all : subset
|
2961
|
+
expr = Polars.struct(struct_fields)
|
2962
|
+
end
|
2963
|
+
|
2964
|
+
df = lazy.select(expr.n_unique).collect
|
2965
|
+
df.is_empty ? 0 : df.row(0)[0]
|
2966
|
+
end
|
2967
|
+
|
2968
|
+
# Rechunk the data in this DataFrame to a contiguous allocation.
|
1073
2969
|
|
2970
|
+
# This will make sure all subsequent operations have optimal and predictable
|
2971
|
+
# performance.
|
1074
2972
|
#
|
2973
|
+
# @return [DataFrame]
|
1075
2974
|
def rechunk
|
1076
2975
|
_from_rbdf(_df.rechunk)
|
1077
2976
|
end
|
1078
2977
|
|
2978
|
+
# Create a new DataFrame that shows the null counts per column.
|
2979
|
+
#
|
2980
|
+
# @return [DataFrame]
|
2981
|
+
#
|
2982
|
+
# @example
|
2983
|
+
# df = Polars::DataFrame.new(
|
2984
|
+
# {
|
2985
|
+
# "foo" => [1, nil, 3],
|
2986
|
+
# "bar" => [6, 7, nil],
|
2987
|
+
# "ham" => ["a", "b", "c"]
|
2988
|
+
# }
|
2989
|
+
# )
|
2990
|
+
# df.null_count
|
2991
|
+
# # =>
|
2992
|
+
# # shape: (1, 3)
|
2993
|
+
# # ┌─────┬─────┬─────┐
|
2994
|
+
# # │ foo ┆ bar ┆ ham │
|
2995
|
+
# # │ --- ┆ --- ┆ --- │
|
2996
|
+
# # │ u32 ┆ u32 ┆ u32 │
|
2997
|
+
# # ╞═════╪═════╪═════╡
|
2998
|
+
# # │ 1 ┆ 1 ┆ 0 │
|
2999
|
+
# # └─────┴─────┴─────┘
|
1079
3000
|
def null_count
|
1080
3001
|
_from_rbdf(_df.null_count)
|
1081
3002
|
end
|
1082
3003
|
|
1083
|
-
#
|
1084
|
-
#
|
3004
|
+
# Sample from this DataFrame.
|
3005
|
+
#
|
3006
|
+
# @param n [Integer]
|
3007
|
+
# Number of items to return. Cannot be used with `frac`. Defaults to 1 if
|
3008
|
+
# `frac` is nil.
|
3009
|
+
# @param frac [Float]
|
3010
|
+
# Fraction of items to return. Cannot be used with `n`.
|
3011
|
+
# @param with_replacement [Boolean]
|
3012
|
+
# Allow values to be sampled more than once.
|
3013
|
+
# @param shuffle [Boolean]
|
3014
|
+
# Shuffle the order of sampled data points.
|
3015
|
+
# @param seed [Integer]
|
3016
|
+
# Seed for the random number generator. If set to nil (default), a random
|
3017
|
+
# seed is used.
|
3018
|
+
#
|
3019
|
+
# @return [DataFrame]
|
3020
|
+
#
|
3021
|
+
# @example
|
3022
|
+
# df = Polars::DataFrame.new(
|
3023
|
+
# {
|
3024
|
+
# "foo" => [1, 2, 3],
|
3025
|
+
# "bar" => [6, 7, 8],
|
3026
|
+
# "ham" => ["a", "b", "c"]
|
3027
|
+
# }
|
3028
|
+
# )
|
3029
|
+
# df.sample(n: 2, seed: 0)
|
3030
|
+
# # =>
|
3031
|
+
# # shape: (2, 3)
|
3032
|
+
# # ┌─────┬─────┬─────┐
|
3033
|
+
# # │ foo ┆ bar ┆ ham │
|
3034
|
+
# # │ --- ┆ --- ┆ --- │
|
3035
|
+
# # │ i64 ┆ i64 ┆ str │
|
3036
|
+
# # ╞═════╪═════╪═════╡
|
3037
|
+
# # │ 3 ┆ 8 ┆ c │
|
3038
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
3039
|
+
# # │ 2 ┆ 7 ┆ b │
|
3040
|
+
# # └─────┴─────┴─────┘
|
3041
|
+
def sample(
|
3042
|
+
n: nil,
|
3043
|
+
frac: nil,
|
3044
|
+
with_replacement: false,
|
3045
|
+
shuffle: false,
|
3046
|
+
seed: nil
|
3047
|
+
)
|
3048
|
+
if !n.nil? && !frac.nil?
|
3049
|
+
raise ArgumentError, "cannot specify both `n` and `frac`"
|
3050
|
+
end
|
3051
|
+
|
3052
|
+
if n.nil? && !frac.nil?
|
3053
|
+
_from_rbdf(
|
3054
|
+
_df.sample_frac(frac, with_replacement, shuffle, seed)
|
3055
|
+
)
|
3056
|
+
end
|
3057
|
+
|
3058
|
+
if n.nil?
|
3059
|
+
n = 1
|
3060
|
+
end
|
3061
|
+
_from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
|
3062
|
+
end
|
1085
3063
|
|
1086
3064
|
# def fold
|
1087
3065
|
# end
|
1088
3066
|
|
1089
|
-
#
|
1090
|
-
#
|
3067
|
+
# Get a row as tuple, either by index or by predicate.
|
3068
|
+
#
|
3069
|
+
# @param index [Object]
|
3070
|
+
# Row index.
|
3071
|
+
# @param by_predicate [Object]
|
3072
|
+
# Select the row according to a given expression/predicate.
|
3073
|
+
#
|
3074
|
+
# @return [Object]
|
3075
|
+
#
|
3076
|
+
# @note
|
3077
|
+
# The `index` and `by_predicate` params are mutually exclusive. Additionally,
|
3078
|
+
# to ensure clarity, the `by_predicate` parameter must be supplied by keyword.
|
3079
|
+
#
|
3080
|
+
# When using `by_predicate` it is an error condition if anything other than
|
3081
|
+
# one row is returned; more than one row raises `TooManyRowsReturned`, and
|
3082
|
+
# zero rows will raise `NoRowsReturned` (both inherit from `RowsException`).
|
3083
|
+
#
|
3084
|
+
# @example Return the row at the given index
|
3085
|
+
# df = Polars::DataFrame.new(
|
3086
|
+
# {
|
3087
|
+
# "foo" => [1, 2, 3],
|
3088
|
+
# "bar" => [6, 7, 8],
|
3089
|
+
# "ham" => ["a", "b", "c"]
|
3090
|
+
# }
|
3091
|
+
# )
|
3092
|
+
# df.row(2)
|
3093
|
+
# # => [3, 8, "c"]
|
3094
|
+
#
|
3095
|
+
# @example Return the row that matches the given predicate
|
3096
|
+
# df.row(by_predicate: Polars.col("ham") == "b")
|
3097
|
+
# # => [2, 7, "b"]
|
3098
|
+
def row(index = nil, by_predicate: nil)
|
3099
|
+
if !index.nil? && !by_predicate.nil?
|
3100
|
+
raise ArgumentError, "Cannot set both 'index' and 'by_predicate'; mutually exclusive"
|
3101
|
+
elsif index.is_a?(Expr)
|
3102
|
+
raise TypeError, "Expressions should be passed to the 'by_predicate' param"
|
3103
|
+
elsif index.is_a?(Integer)
|
3104
|
+
_df.row_tuple(index)
|
3105
|
+
elsif by_predicate.is_a?(Expr)
|
3106
|
+
rows = filter(by_predicate).rows
|
3107
|
+
n_rows = rows.length
|
3108
|
+
if n_rows > 1
|
3109
|
+
raise TooManyRowsReturned, "Predicate #{by_predicate} returned #{n_rows} rows"
|
3110
|
+
elsif n_rows == 0
|
3111
|
+
raise NoRowsReturned, "Predicate <{by_predicate!s}> returned no rows"
|
3112
|
+
end
|
3113
|
+
rows[0]
|
3114
|
+
else
|
3115
|
+
raise ArgumentError, "One of 'index' or 'by_predicate' must be set"
|
3116
|
+
end
|
3117
|
+
end
|
1091
3118
|
|
1092
|
-
#
|
1093
|
-
#
|
3119
|
+
# Convert columnar data to rows as Ruby arrays.
|
3120
|
+
#
|
3121
|
+
# @return [Array]
|
3122
|
+
#
|
3123
|
+
# @example
|
3124
|
+
# df = Polars::DataFrame.new(
|
3125
|
+
# {
|
3126
|
+
# "a" => [1, 3, 5],
|
3127
|
+
# "b" => [2, 4, 6]
|
3128
|
+
# }
|
3129
|
+
# )
|
3130
|
+
# df.rows
|
3131
|
+
# # => [[1, 2], [3, 4], [5, 6]]
|
3132
|
+
def rows
|
3133
|
+
_df.row_tuples
|
3134
|
+
end
|
1094
3135
|
|
1095
|
-
#
|
1096
|
-
#
|
3136
|
+
# Shrink DataFrame memory usage.
|
3137
|
+
#
|
3138
|
+
# Shrinks to fit the exact capacity needed to hold the data.
|
3139
|
+
#
|
3140
|
+
# @return [DataFrame]
|
3141
|
+
def shrink_to_fit(in_place: false)
|
3142
|
+
if in_place
|
3143
|
+
_df.shrink_to_fit
|
3144
|
+
self
|
3145
|
+
else
|
3146
|
+
df = clone
|
3147
|
+
df._df.shrink_to_fit
|
3148
|
+
df
|
3149
|
+
end
|
3150
|
+
end
|
1097
3151
|
|
1098
|
-
#
|
1099
|
-
#
|
3152
|
+
# Take every nth row in the DataFrame and return as a new DataFrame.
|
3153
|
+
#
|
3154
|
+
# @return [DataFrame]
|
3155
|
+
#
|
3156
|
+
# @example
|
3157
|
+
# s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
|
3158
|
+
# s.take_every(2)
|
3159
|
+
# # =>
|
3160
|
+
# # shape: (2, 2)
|
3161
|
+
# # ┌─────┬─────┐
|
3162
|
+
# # │ a ┆ b │
|
3163
|
+
# # │ --- ┆ --- │
|
3164
|
+
# # │ i64 ┆ i64 │
|
3165
|
+
# # ╞═════╪═════╡
|
3166
|
+
# # │ 1 ┆ 5 │
|
3167
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
3168
|
+
# # │ 3 ┆ 7 │
|
3169
|
+
# # └─────┴─────┘
|
3170
|
+
def take_every(n)
|
3171
|
+
select(Utils.col("*").take_every(n))
|
3172
|
+
end
|
1100
3173
|
|
1101
3174
|
# def hash_rows
|
1102
3175
|
# end
|
1103
3176
|
|
1104
|
-
#
|
1105
|
-
#
|
1106
|
-
|
3177
|
+
# Interpolate intermediate values. The interpolation method is linear.
|
3178
|
+
#
|
3179
|
+
# @return [DataFrame]
|
3180
|
+
#
|
3181
|
+
# @example
|
3182
|
+
# df = Polars::DataFrame.new(
|
3183
|
+
# {
|
3184
|
+
# "foo" => [1, nil, 9, 10],
|
3185
|
+
# "bar" => [6, 7, 9, nil],
|
3186
|
+
# "baz" => [1, nil, nil, 9]
|
3187
|
+
# }
|
3188
|
+
# )
|
3189
|
+
# df.interpolate
|
3190
|
+
# # =>
|
3191
|
+
# # shape: (4, 3)
|
3192
|
+
# # ┌─────┬──────┬─────┐
|
3193
|
+
# # │ foo ┆ bar ┆ baz │
|
3194
|
+
# # │ --- ┆ --- ┆ --- │
|
3195
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
3196
|
+
# # ╞═════╪══════╪═════╡
|
3197
|
+
# # │ 1 ┆ 6 ┆ 1 │
|
3198
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
3199
|
+
# # │ 5 ┆ 7 ┆ 3 │
|
3200
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
3201
|
+
# # │ 9 ┆ 9 ┆ 6 │
|
3202
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
|
3203
|
+
# # │ 10 ┆ null ┆ 9 │
|
3204
|
+
# # └─────┴──────┴─────┘
|
3205
|
+
def interpolate
|
3206
|
+
select(Utils.col("*").interpolate)
|
3207
|
+
end
|
3208
|
+
|
3209
|
+
# Check if the dataframe is empty.
|
3210
|
+
#
|
3211
|
+
# @return [Boolean]
|
1107
3212
|
#
|
3213
|
+
# @example
|
3214
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
3215
|
+
# df.is_empty
|
3216
|
+
# # => false
|
3217
|
+
# df.filter(Polars.col("foo") > 99).is_empty
|
3218
|
+
# # => true
|
1108
3219
|
def is_empty
|
1109
3220
|
height == 0
|
1110
3221
|
end
|
1111
3222
|
alias_method :empty?, :is_empty
|
1112
3223
|
|
1113
|
-
#
|
1114
|
-
#
|
3224
|
+
# Convert a `DataFrame` to a `Series` of type `Struct`.
|
3225
|
+
#
|
3226
|
+
# @param name [String]
|
3227
|
+
# Name for the struct Series
|
3228
|
+
#
|
3229
|
+
# @return [Series]
|
3230
|
+
#
|
3231
|
+
# @example
|
3232
|
+
# df = Polars::DataFrame.new(
|
3233
|
+
# {
|
3234
|
+
# "a" => [1, 2, 3, 4, 5],
|
3235
|
+
# "b" => ["one", "two", "three", "four", "five"]
|
3236
|
+
# }
|
3237
|
+
# )
|
3238
|
+
# df.to_struct("nums")
|
3239
|
+
# # =>
|
3240
|
+
# # shape: (5,)
|
3241
|
+
# # Series: 'nums' [struct[2]]
|
3242
|
+
# # [
|
3243
|
+
# # {1,"one"}
|
3244
|
+
# # {2,"two"}
|
3245
|
+
# # {3,"three"}
|
3246
|
+
# # {4,"four"}
|
3247
|
+
# # {5,"five"}
|
3248
|
+
# # ]
|
3249
|
+
def to_struct(name)
|
3250
|
+
Utils.wrap_s(_df.to_struct(name))
|
3251
|
+
end
|
1115
3252
|
|
1116
|
-
#
|
1117
|
-
#
|
3253
|
+
# Decompose a struct into its fields.
|
3254
|
+
#
|
3255
|
+
# The fields will be inserted into the `DataFrame` on the location of the
|
3256
|
+
# `struct` type.
|
3257
|
+
#
|
3258
|
+
# @param names [Object]
|
3259
|
+
# Names of the struct columns that will be decomposed by its fields
|
3260
|
+
#
|
3261
|
+
# @return [DataFrame]
|
3262
|
+
#
|
3263
|
+
# @example
|
3264
|
+
# df = Polars::DataFrame.new(
|
3265
|
+
# {
|
3266
|
+
# "before" => ["foo", "bar"],
|
3267
|
+
# "t_a" => [1, 2],
|
3268
|
+
# "t_b" => ["a", "b"],
|
3269
|
+
# "t_c" => [true, nil],
|
3270
|
+
# "t_d" => [[1, 2], [3]],
|
3271
|
+
# "after" => ["baz", "womp"]
|
3272
|
+
# }
|
3273
|
+
# ).select(["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"])
|
3274
|
+
# df.unnest("t_struct")
|
3275
|
+
# # =>
|
3276
|
+
# # shape: (2, 6)
|
3277
|
+
# # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
|
3278
|
+
# # │ before ┆ t_a ┆ t_b ┆ t_c ┆ t_d ┆ after │
|
3279
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3280
|
+
# # │ str ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str │
|
3281
|
+
# # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
|
3282
|
+
# # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
|
3283
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
3284
|
+
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
3285
|
+
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
3286
|
+
def unnest(names)
|
3287
|
+
if names.is_a?(String)
|
3288
|
+
names = [names]
|
3289
|
+
end
|
3290
|
+
_from_rbdf(_df.unnest(names))
|
3291
|
+
end
|
1118
3292
|
|
1119
3293
|
private
|
1120
3294
|
|
@@ -1127,7 +3301,7 @@ module Polars
|
|
1127
3301
|
if !columns.nil?
|
1128
3302
|
columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
|
1129
3303
|
|
1130
|
-
if
|
3304
|
+
if data.empty? && dtypes
|
1131
3305
|
data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
|
1132
3306
|
else
|
1133
3307
|
data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
|
@@ -1147,7 +3321,7 @@ module Polars
|
|
1147
3321
|
if columns.nil?
|
1148
3322
|
data
|
1149
3323
|
else
|
1150
|
-
if
|
3324
|
+
if data.empty?
|
1151
3325
|
columns.map { |c| Series.new(c, nil)._s }
|
1152
3326
|
elsif data.length == columns.length
|
1153
3327
|
columns.each_with_index do |c, i|
|
@@ -1182,5 +3356,75 @@ module Polars
|
|
1182
3356
|
def _from_rbdf(rb_df)
|
1183
3357
|
self.class._from_rbdf(rb_df)
|
1184
3358
|
end
|
3359
|
+
|
3360
|
+
def _comp(other, op)
|
3361
|
+
if other.is_a?(DataFrame)
|
3362
|
+
_compare_to_other_df(other, op)
|
3363
|
+
else
|
3364
|
+
_compare_to_non_df(other, op)
|
3365
|
+
end
|
3366
|
+
end
|
3367
|
+
|
3368
|
+
def _compare_to_other_df(other, op)
|
3369
|
+
if columns != other.columns
|
3370
|
+
raise ArgmentError, "DataFrame columns do not match"
|
3371
|
+
end
|
3372
|
+
if shape != other.shape
|
3373
|
+
raise ArgmentError, "DataFrame dimensions do not match"
|
3374
|
+
end
|
3375
|
+
|
3376
|
+
suffix = "__POLARS_CMP_OTHER"
|
3377
|
+
other_renamed = other.select(Polars.all.suffix(suffix))
|
3378
|
+
combined = Polars.concat([self, other_renamed], how: "horizontal")
|
3379
|
+
|
3380
|
+
expr = case op
|
3381
|
+
when "eq"
|
3382
|
+
columns.map { |n| Polars.col(n) == Polars.col("#{n}#{suffix}") }
|
3383
|
+
when "neq"
|
3384
|
+
columns.map { |n| Polars.col(n) != Polars.col("#{n}#{suffix}") }
|
3385
|
+
when "gt"
|
3386
|
+
columns.map { |n| Polars.col(n) > Polars.col("#{n}#{suffix}") }
|
3387
|
+
when "lt"
|
3388
|
+
columns.map { |n| Polars.col(n) < Polars.col("#{n}#{suffix}") }
|
3389
|
+
when "gt_eq"
|
3390
|
+
columns.map { |n| Polars.col(n) >= Polars.col("#{n}#{suffix}") }
|
3391
|
+
when "lt_eq"
|
3392
|
+
columns.map { |n| Polars.col(n) <= Polars.col("#{n}#{suffix}") }
|
3393
|
+
else
|
3394
|
+
raise ArgumentError, "got unexpected comparison operator: #{op}"
|
3395
|
+
end
|
3396
|
+
|
3397
|
+
combined.select(expr)
|
3398
|
+
end
|
3399
|
+
|
3400
|
+
def _compare_to_non_df(other, op)
|
3401
|
+
case op
|
3402
|
+
when "eq"
|
3403
|
+
select(Polars.all == other)
|
3404
|
+
when "neq"
|
3405
|
+
select(Polars.all != other)
|
3406
|
+
when "gt"
|
3407
|
+
select(Polars.all > other)
|
3408
|
+
when "lt"
|
3409
|
+
select(Polars.all < other)
|
3410
|
+
when "gt_eq"
|
3411
|
+
select(Polars.all >= other)
|
3412
|
+
when "lt_eq"
|
3413
|
+
select(Polars.all <= other)
|
3414
|
+
else
|
3415
|
+
raise ArgumentError, "got unexpected comparison operator: #{op}"
|
3416
|
+
end
|
3417
|
+
end
|
3418
|
+
|
3419
|
+
def _prepare_other_arg(other)
|
3420
|
+
if !other.is_a?(Series)
|
3421
|
+
if other.is_a?(Array)
|
3422
|
+
raise ArgumentError, "Operation not supported."
|
3423
|
+
end
|
3424
|
+
|
3425
|
+
other = Series.new("", [other])
|
3426
|
+
end
|
3427
|
+
other
|
3428
|
+
end
|
1185
3429
|
end
|
1186
3430
|
end
|