polars-df 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -155,12 +155,35 @@ module Polars
155
155
  end
156
156
 
157
157
  # @private
158
- def self._read_parquet(file)
158
+ def self._read_parquet(
159
+ file,
160
+ columns: nil,
161
+ n_rows: nil,
162
+ parallel: "auto",
163
+ row_count_name: nil,
164
+ row_count_offset: 0,
165
+ low_memory: false
166
+ )
159
167
  if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
160
168
  file = Utils.format_path(file)
161
169
  end
162
170
 
163
- _from_rbdf(RbDataFrame.read_parquet(file))
171
+ if file.is_a?(String) && file.include?("*")
172
+ raise Todo
173
+ end
174
+
175
+ projection, columns = Utils.handle_projection_columns(columns)
176
+ _from_rbdf(
177
+ RbDataFrame.read_parquet(
178
+ file,
179
+ columns,
180
+ projection,
181
+ n_rows,
182
+ parallel,
183
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
184
+ low_memory
185
+ )
186
+ )
164
187
  end
165
188
 
166
189
  # def self._read_avro
@@ -259,11 +282,13 @@ module Polars
259
282
  # @return [Array]
260
283
  #
261
284
  # @example
262
- # df = Polars::DataFrame.new({
263
- # "foo" => [1, 2, 3],
264
- # "bar" => [6, 7, 8],
265
- # "ham" => ["a", "b", "c"]
266
- # })
285
+ # df = Polars::DataFrame.new(
286
+ # {
287
+ # "foo" => [1, 2, 3],
288
+ # "bar" => [6, 7, 8],
289
+ # "ham" => ["a", "b", "c"]
290
+ # }
291
+ # )
267
292
  # df.columns
268
293
  # # => ["foo", "bar", "ham"]
269
294
  def columns
@@ -279,11 +304,13 @@ module Polars
279
304
  # @return [Object]
280
305
  #
281
306
  # @example
282
- # df = Polars::DataFrame.new({
283
- # "foo" => [1, 2, 3],
284
- # "bar" => [6, 7, 8],
285
- # "ham" => ["a", "b", "c"]
286
- # })
307
+ # df = Polars::DataFrame.new(
308
+ # {
309
+ # "foo" => [1, 2, 3],
310
+ # "bar" => [6, 7, 8],
311
+ # "ham" => ["a", "b", "c"]
312
+ # }
313
+ # )
287
314
  # df.columns = ["apple", "banana", "orange"]
288
315
  # df
289
316
  # # =>
@@ -308,11 +335,13 @@ module Polars
308
335
  # @return [Array]
309
336
  #
310
337
  # @example
311
- # df = Polars::DataFrame.new({
312
- # "foo" => [1, 2, 3],
313
- # "bar" => [6.0, 7.0, 8.0],
314
- # "ham" => ["a", "b", "c"]
315
- # })
338
+ # df = Polars::DataFrame.new(
339
+ # {
340
+ # "foo" => [1, 2, 3],
341
+ # "bar" => [6.0, 7.0, 8.0],
342
+ # "ham" => ["a", "b", "c"]
343
+ # }
344
+ # )
316
345
  # df.dtypes
317
346
  # # => [:i64, :f64, :str]
318
347
  def dtypes
@@ -324,56 +353,132 @@ module Polars
324
353
  # @return [Hash]
325
354
  #
326
355
  # @example
327
- # df = Polars::DataFrame.new({
328
- # "foo" => [1, 2, 3],
329
- # "bar" => [6.0, 7.0, 8.0],
330
- # "ham" => ["a", "b", "c"]
331
- # })
356
+ # df = Polars::DataFrame.new(
357
+ # {
358
+ # "foo" => [1, 2, 3],
359
+ # "bar" => [6.0, 7.0, 8.0],
360
+ # "ham" => ["a", "b", "c"]
361
+ # }
362
+ # )
332
363
  # df.schema
333
364
  # # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
334
365
  def schema
335
366
  columns.zip(dtypes).to_h
336
367
  end
337
368
 
338
- # def ==(other)
339
- # end
369
+ # Equal.
370
+ #
371
+ # @return [DataFrame]
372
+ def ==(other)
373
+ _comp(other, "eq")
374
+ end
340
375
 
341
- # def !=(other)
342
- # end
376
+ # Not equal.
377
+ #
378
+ # @return [DataFrame]
379
+ def !=(other)
380
+ _comp(other, "neq")
381
+ end
343
382
 
344
- # def >(other)
345
- # end
383
+ # Greater than.
384
+ #
385
+ # @return [DataFrame]
386
+ def >(other)
387
+ _comp(other, "gt")
388
+ end
346
389
 
347
- # def <(other)
348
- # end
390
+ # Less than.
391
+ #
392
+ # @return [DataFrame]
393
+ def <(other)
394
+ _comp(other, "lt")
395
+ end
349
396
 
350
- # def >=(other)
351
- # end
397
+ # Greater than or equal.
398
+ #
399
+ # @return [DataFrame]
400
+ def >=(other)
401
+ _comp(other, "gt_eq")
402
+ end
352
403
 
353
- # def <=(other)
354
- # end
404
+ # Less than or equal.
405
+ #
406
+ # @return [DataFrame]
407
+ def <=(other)
408
+ _comp(other, "lt_eq")
409
+ end
355
410
 
356
- # def *(other)
357
- # end
411
+ # Performs multiplication.
412
+ #
413
+ # @return [DataFrame]
414
+ def *(other)
415
+ if other.is_a?(DataFrame)
416
+ return _from_rbdf(_df.mul_df(other._df))
417
+ end
358
418
 
359
- # def /(other)
360
- # end
419
+ other = _prepare_other_arg(other)
420
+ _from_rbdf(_df.mul(other._s))
421
+ end
361
422
 
362
- # def +(other)
363
- # end
423
+ # Performs division.
424
+ #
425
+ # @return [DataFrame]
426
+ def /(other)
427
+ if other.is_a?(DataFrame)
428
+ return _from_rbdf(_df.div_df(other._df))
429
+ end
364
430
 
365
- # def -(other)
366
- # end
431
+ other = _prepare_other_arg(other)
432
+ _from_rbdf(_df.div(other._s))
433
+ end
367
434
 
368
- # def %(other)
369
- # end
435
+ # Performs addition.
436
+ #
437
+ # @return [DataFrame]
438
+ def +(other)
439
+ if other.is_a?(DataFrame)
440
+ return _from_rbdf(_df.add_df(other._df))
441
+ end
442
+
443
+ other = _prepare_other_arg(other)
444
+ _from_rbdf(_df.add(other._s))
445
+ end
446
+
447
+ # Performs subtraction.
448
+ #
449
+ # @return [DataFrame]
450
+ def -(other)
451
+ if other.is_a?(DataFrame)
452
+ return _from_rbdf(_df.sub_df(other._df))
453
+ end
454
+
455
+ other = _prepare_other_arg(other)
456
+ _from_rbdf(_df.sub(other._s))
457
+ end
458
+
459
+ # Returns the modulo.
460
+ #
461
+ # @return [DataFrame]
462
+ def %(other)
463
+ if other.is_a?(DataFrame)
464
+ return _from_rbdf(_df.rem_df(other._df))
465
+ end
466
+
467
+ other = _prepare_other_arg(other)
468
+ _from_rbdf(_df.rem(other._s))
469
+ end
370
470
 
471
+ # Returns a string representing the DataFrame.
371
472
  #
473
+ # @return [String]
372
474
  def to_s
373
475
  _df.to_s
374
476
  end
375
477
  alias_method :inspect, :to_s
376
478
 
479
+ # Check if DataFrame includes column.
480
+ #
481
+ # @return [Boolean]
377
482
  def include?(name)
378
483
  columns.include?(name)
379
484
  end
@@ -387,9 +492,78 @@ module Polars
387
492
  # def _pos_idxs
388
493
  # end
389
494
 
495
+ # Returns subset of the DataFrame.
390
496
  #
391
- def [](name)
392
- Utils.wrap_s(_df.column(name))
497
+ # @return [Object]
498
+ def [](*args)
499
+ if args.size == 2
500
+ row_selection, col_selection = args
501
+
502
+ # df[.., unknown]
503
+ if row_selection.is_a?(Range)
504
+
505
+ # multiple slices
506
+ # df[.., ..]
507
+ if col_selection.is_a?(Range)
508
+ raise Todo
509
+ end
510
+ end
511
+
512
+ # df[2, ..] (select row as df)
513
+ if row_selection.is_a?(Integer)
514
+ if col_selection.is_a?(Array)
515
+ df = self[0.., col_selection]
516
+ return df.slice(row_selection, 1)
517
+ end
518
+ # df[2, "a"]
519
+ if col_selection.is_a?(String)
520
+ return self[col_selection][row_selection]
521
+ end
522
+ end
523
+
524
+ # column selection can be "a" and ["a", "b"]
525
+ if col_selection.is_a?(String)
526
+ col_selection = [col_selection]
527
+ end
528
+
529
+ # df[.., 1]
530
+ if col_selection.is_a?(Integer)
531
+ series = to_series(col_selection)
532
+ return series[row_selection]
533
+ end
534
+
535
+ if col_selection.is_a?(Array)
536
+ # df[.., [1, 2]]
537
+ if is_int_sequence(col_selection)
538
+ series_list = col_selection.map { |i| to_series(i) }
539
+ df = self.class.new(series_list)
540
+ return df[row_selection]
541
+ end
542
+ end
543
+
544
+ df = self[col_selection]
545
+ return df[row_selection]
546
+ elsif args.size == 1
547
+ item = args[0]
548
+
549
+ # select single column
550
+ # df["foo"]
551
+ if item.is_a?(String)
552
+ return Utils.wrap_s(_df.column(item))
553
+ end
554
+
555
+ # df[idx]
556
+ if item.is_a?(Integer)
557
+ return slice(_pos_idx(item, dim: 0), 1)
558
+ end
559
+
560
+ # df[..]
561
+ if item.is_a?(Range)
562
+ return Slice.new(self).apply(item)
563
+ end
564
+ end
565
+
566
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
393
567
  end
394
568
 
395
569
  # def []=(key, value)
@@ -397,7 +571,9 @@ module Polars
397
571
 
398
572
  # no to_arrow
399
573
 
574
+ # Convert DataFrame to a hash mapping column name to values.
400
575
  #
576
+ # @return [Hash]
401
577
  def to_h(as_series: true)
402
578
  if as_series
403
579
  get_columns.to_h { |s| [s.name, s] }
@@ -422,11 +598,13 @@ module Polars
422
598
  # @return [Series]
423
599
  #
424
600
  # @example
425
- # df = Polars::DataFrame.new({
426
- # "foo" => [1, 2, 3],
427
- # "bar" => [6, 7, 8],
428
- # "ham" => ["a", "b", "c"]
429
- # })
601
+ # df = Polars::DataFrame.new(
602
+ # {
603
+ # "foo" => [1, 2, 3],
604
+ # "bar" => [6, 7, 8],
605
+ # "ham" => ["a", "b", "c"]
606
+ # }
607
+ # )
430
608
  # df.to_series(1)
431
609
  # # =>
432
610
  # # shape: (3,)
@@ -519,11 +697,13 @@ module Polars
519
697
  # @return [String, nil]
520
698
  #
521
699
  # @example
522
- # df = Polars::DataFrame.new({
523
- # "foo" => [1, 2, 3, 4, 5],
524
- # "bar" => [6, 7, 8, 9, 10],
525
- # "ham" => ["a", "b", "c", "d", "e"]
526
- # })
700
+ # df = Polars::DataFrame.new(
701
+ # {
702
+ # "foo" => [1, 2, 3, 4, 5],
703
+ # "bar" => [6, 7, 8, 9, 10],
704
+ # "ham" => ["a", "b", "c", "d", "e"]
705
+ # }
706
+ # )
527
707
  # df.write_csv("file.csv")
528
708
  def write_csv(
529
709
  file = nil,
@@ -694,11 +874,13 @@ module Polars
694
874
  # @return [DataFrame]
695
875
  #
696
876
  # @example
697
- # df = Polars::DataFrame.new({
698
- # "key" => ["a", "b", "c"],
699
- # "val" => [1, 2, 3]
700
- # })
701
- # df.reverse()
877
+ # df = Polars::DataFrame.new(
878
+ # {
879
+ # "key" => ["a", "b", "c"],
880
+ # "val" => [1, 2, 3]
881
+ # }
882
+ # )
883
+ # df.reverse
702
884
  # # =>
703
885
  # # shape: (3, 2)
704
886
  # # ┌─────┬─────┐
@@ -724,11 +906,13 @@ module Polars
724
906
  # @return [DataFrame]
725
907
  #
726
908
  # @example
727
- # df = Polars::DataFrame.new({
728
- # "foo" => [1, 2, 3],
729
- # "bar" => [6, 7, 8],
730
- # "ham" => ["a", "b", "c"]
731
- # })
909
+ # df = Polars::DataFrame.new(
910
+ # {
911
+ # "foo" => [1, 2, 3],
912
+ # "bar" => [6, 7, 8],
913
+ # "ham" => ["a", "b", "c"]
914
+ # }
915
+ # )
732
916
  # df.rename({"foo" => "apple"})
733
917
  # # =>
734
918
  # # shape: (3, 3)
@@ -775,11 +959,13 @@ module Polars
775
959
  # # └─────┴─────┴─────┘
776
960
  #
777
961
  # @example
778
- # df = Polars::DataFrame.new({
779
- # "a" => [1, 2, 3, 4],
780
- # "b" => [0.5, 4, 10, 13],
781
- # "c" => [true, true, false, true]
782
- # })
962
+ # df = Polars::DataFrame.new(
963
+ # {
964
+ # "a" => [1, 2, 3, 4],
965
+ # "b" => [0.5, 4, 10, 13],
966
+ # "c" => [true, true, false, true]
967
+ # }
968
+ # )
783
969
  # s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
784
970
  # df.insert_at_idx(3, s)
785
971
  # # =>
@@ -805,63 +991,560 @@ module Polars
805
991
  self
806
992
  end
807
993
 
994
+ # Filter the rows in the DataFrame based on a predicate expression.
995
+ #
996
+ # @param predicate [Expr]
997
+ # Expression that evaluates to a boolean Series.
998
+ #
999
+ # @return [DataFrame]
1000
+ #
1001
+ # @example Filter on one condition:
1002
+ # df = Polars::DataFrame.new(
1003
+ # {
1004
+ # "foo" => [1, 2, 3],
1005
+ # "bar" => [6, 7, 8],
1006
+ # "ham" => ["a", "b", "c"]
1007
+ # }
1008
+ # )
1009
+ # df.filter(Polars.col("foo") < 3)
1010
+ # # =>
1011
+ # # shape: (2, 3)
1012
+ # # ┌─────┬─────┬─────┐
1013
+ # # │ foo ┆ bar ┆ ham │
1014
+ # # │ --- ┆ --- ┆ --- │
1015
+ # # │ i64 ┆ i64 ┆ str │
1016
+ # # ╞═════╪═════╪═════╡
1017
+ # # │ 1 ┆ 6 ┆ a │
1018
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1019
+ # # │ 2 ┆ 7 ┆ b │
1020
+ # # └─────┴─────┴─────┘
1021
+ #
1022
+ # @example Filter on multiple conditions:
1023
+ # df.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a"))
1024
+ # # =>
1025
+ # # shape: (1, 3)
1026
+ # # ┌─────┬─────┬─────┐
1027
+ # # │ foo ┆ bar ┆ ham │
1028
+ # # │ --- ┆ --- ┆ --- │
1029
+ # # │ i64 ┆ i64 ┆ str │
1030
+ # # ╞═════╪═════╪═════╡
1031
+ # # │ 1 ┆ 6 ┆ a │
1032
+ # # └─────┴─────┴─────┘
808
1033
  def filter(predicate)
809
1034
  lazy.filter(predicate).collect
810
1035
  end
811
1036
 
812
- # def describe
813
- # end
1037
+ # Summary statistics for a DataFrame.
1038
+ #
1039
+ # @return [DataFrame]
1040
+ #
1041
+ # @example
1042
+ # df = Polars::DataFrame.new(
1043
+ # {
1044
+ # "a" => [1.0, 2.8, 3.0],
1045
+ # "b" => [4, 5, nil],
1046
+ # "c" => [true, false, true],
1047
+ # "d" => [nil, "b", "c"],
1048
+ # "e" => ["usd", "eur", nil]
1049
+ # }
1050
+ # )
1051
+ # df.describe
1052
+ # # =>
1053
+ # # shape: (7, 6)
1054
+ # # ┌────────────┬──────────┬──────────┬──────┬──────┬──────┐
1055
+ # # │ describe ┆ a ┆ b ┆ c ┆ d ┆ e │
1056
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1057
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ str │
1058
+ # # ╞════════════╪══════════╪══════════╪══════╪══════╪══════╡
1059
+ # # │ count ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3 ┆ 3 │
1060
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1061
+ # # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0 ┆ 1 ┆ 1 │
1062
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1063
+ # # │ mean ┆ 2.266667 ┆ 4.5 ┆ null ┆ null ┆ null │
1064
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1065
+ # # │ std ┆ 1.101514 ┆ 0.707107 ┆ null ┆ null ┆ null │
1066
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1067
+ # # │ min ┆ 1.0 ┆ 4.0 ┆ 0.0 ┆ b ┆ eur │
1068
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1069
+ # # │ max ┆ 3.0 ┆ 5.0 ┆ 1.0 ┆ c ┆ usd │
1070
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1071
+ # # │ median ┆ 2.8 ┆ 4.5 ┆ null ┆ null ┆ null │
1072
+ # # └────────────┴──────────┴──────────┴──────┴──────┴──────┘
1073
+ def describe
1074
+ describe_cast = lambda do |stat|
1075
+ columns = []
1076
+ self.columns.each_with_index do |s, i|
1077
+ if self[s].is_numeric || self[s].is_boolean
1078
+ columns << stat[0.., i].cast(:f64)
1079
+ else
1080
+ # for dates, strings, etc, we cast to string so that all
1081
+ # statistics can be shown
1082
+ columns << stat[0.., i].cast(:str)
1083
+ end
1084
+ end
1085
+ self.class.new(columns)
1086
+ end
814
1087
 
815
- # def find_idx_by_name
816
- # end
1088
+ summary = _from_rbdf(
1089
+ Polars.concat(
1090
+ [
1091
+ describe_cast.(
1092
+ self.class.new(columns.to_h { |c| [c, [height]] })
1093
+ ),
1094
+ describe_cast.(null_count),
1095
+ describe_cast.(mean),
1096
+ describe_cast.(std),
1097
+ describe_cast.(min),
1098
+ describe_cast.(max),
1099
+ describe_cast.(median)
1100
+ ]
1101
+ )._df
1102
+ )
1103
+ summary.insert_at_idx(
1104
+ 0,
1105
+ Polars::Series.new(
1106
+ "describe",
1107
+ ["count", "null_count", "mean", "std", "min", "max", "median"],
1108
+ )
1109
+ )
1110
+ summary
1111
+ end
817
1112
 
818
- # def replace_at_idx
819
- # end
1113
+ # Find the index of a column by name.
1114
+ #
1115
+ # @param name [String]
1116
+ # Name of the column to find.
1117
+ #
1118
+ # @return [Series]
1119
+ #
1120
+ # @example
1121
+ # df = Polars::DataFrame.new(
1122
+ # {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
1123
+ # )
1124
+ # df.find_idx_by_name("ham")
1125
+ # # => 2
1126
+ def find_idx_by_name(name)
1127
+ _df.find_idx_by_name(name)
1128
+ end
1129
+
1130
+ # Replace a column at an index location.
1131
+ #
1132
+ # @param index [Integer]
1133
+ # Column index.
1134
+ # @param series [Series]
1135
+ # Series that will replace the column.
1136
+ #
1137
+ # @return [DataFrame]
1138
+ #
1139
+ # @example
1140
+ # df = Polars::DataFrame.new(
1141
+ # {
1142
+ # "foo" => [1, 2, 3],
1143
+ # "bar" => [6, 7, 8],
1144
+ # "ham" => ["a", "b", "c"]
1145
+ # }
1146
+ # )
1147
+ # s = Polars::Series.new("apple", [10, 20, 30])
1148
+ # df.replace_at_idx(0, s)
1149
+ # # =>
1150
+ # # shape: (3, 3)
1151
+ # # ┌───────┬─────┬─────┐
1152
+ # # │ apple ┆ bar ┆ ham │
1153
+ # # │ --- ┆ --- ┆ --- │
1154
+ # # │ i64 ┆ i64 ┆ str │
1155
+ # # ╞═══════╪═════╪═════╡
1156
+ # # │ 10 ┆ 6 ┆ a │
1157
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1158
+ # # │ 20 ┆ 7 ┆ b │
1159
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1160
+ # # │ 30 ┆ 8 ┆ c │
1161
+ # # └───────┴─────┴─────┘
1162
+ def replace_at_idx(index, series)
1163
+ if index < 0
1164
+ index = columns.length + index
1165
+ end
1166
+ _df.replace_at_idx(index, series._s)
1167
+ self
1168
+ end
820
1169
 
1170
+ # Sort the DataFrame by column.
1171
+ #
1172
+ # @param by [String]
1173
+ # By which column to sort.
1174
+ # @param reverse [Boolean]
1175
+ # Reverse/descending sort.
1176
+ # @param nulls_last [Boolean]
1177
+ # Place null values last. Can only be used if sorted by a single column.
1178
+ #
1179
+ # @return [DataFrame]
1180
+ #
1181
+ # @example
1182
+ # df = Polars::DataFrame.new(
1183
+ # {
1184
+ # "foo" => [1, 2, 3],
1185
+ # "bar" => [6.0, 7.0, 8.0],
1186
+ # "ham" => ["a", "b", "c"]
1187
+ # }
1188
+ # )
1189
+ # df.sort("foo", reverse: true)
1190
+ # # =>
1191
+ # # shape: (3, 3)
1192
+ # # ┌─────┬─────┬─────┐
1193
+ # # │ foo ┆ bar ┆ ham │
1194
+ # # │ --- ┆ --- ┆ --- │
1195
+ # # │ i64 ┆ f64 ┆ str │
1196
+ # # ╞═════╪═════╪═════╡
1197
+ # # │ 3 ┆ 8.0 ┆ c │
1198
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1199
+ # # │ 2 ┆ 7.0 ┆ b │
1200
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1201
+ # # │ 1 ┆ 6.0 ┆ a │
1202
+ # # └─────┴─────┴─────┘
821
1203
  #
1204
+ # @example Sort by multiple columns.
1205
+ # df.sort(
1206
+ # [Polars.col("foo"), Polars.col("bar")**2],
1207
+ # reverse: [true, false]
1208
+ # )
1209
+ # # =>
1210
+ # # shape: (3, 3)
1211
+ # # ┌─────┬─────┬─────┐
1212
+ # # │ foo ┆ bar ┆ ham │
1213
+ # # │ --- ┆ --- ┆ --- │
1214
+ # # │ i64 ┆ f64 ┆ str │
1215
+ # # ╞═════╪═════╪═════╡
1216
+ # # │ 3 ┆ 8.0 ┆ c │
1217
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1218
+ # # │ 2 ┆ 7.0 ┆ b │
1219
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1220
+ # # │ 1 ┆ 6.0 ┆ a │
1221
+ # # └─────┴─────┴─────┘
822
1222
  def sort(by, reverse: false, nulls_last: false)
823
- _from_rbdf(_df.sort(by, reverse, nulls_last))
1223
+ if by.is_a?(Array) || by.is_a?(Expr)
1224
+ lazy
1225
+ .sort(by, reverse: reverse, nulls_last: nulls_last)
1226
+ .collect(no_optimization: true, string_cache: false)
1227
+ else
1228
+ _from_rbdf(_df.sort(by, reverse, nulls_last))
1229
+ end
824
1230
  end
825
1231
 
1232
+ # Check if DataFrame is equal to other.
1233
+ #
1234
+ # @param other [DataFrame]
1235
+ # DataFrame to compare with.
1236
+ # @param null_equal [Boolean]
1237
+ # Consider null values as equal.
1238
+ #
1239
+ # @return [Boolean]
1240
+ #
1241
+ # @example
1242
+ # df1 = Polars::DataFrame.new(
1243
+ # {
1244
+ # "foo" => [1, 2, 3],
1245
+ # "bar" => [6.0, 7.0, 8.0],
1246
+ # "ham" => ["a", "b", "c"]
1247
+ # }
1248
+ # )
1249
+ # df2 = Polars::DataFrame.new(
1250
+ # {
1251
+ # "foo" => [3, 2, 1],
1252
+ # "bar" => [8.0, 7.0, 6.0],
1253
+ # "ham" => ["c", "b", "a"]
1254
+ # }
1255
+ # )
1256
+ # df1.frame_equal(df1)
1257
+ # # => true
1258
+ # df1.frame_equal(df2)
1259
+ # # => false
826
1260
  def frame_equal(other, null_equal: true)
827
1261
  _df.frame_equal(other._df, null_equal)
828
1262
  end
829
1263
 
830
- # def replace
831
- # end
832
-
1264
+ # Replace a column by a new Series.
833
1265
  #
834
- def slice(offset, length = nil)
835
- if !length.nil? && length < 0
836
- length = height - offset + length
837
- end
838
- _from_rbdf(_df.slice(offset, length))
1266
+ # @param column [String]
1267
+ # Column to replace.
1268
+ # @param new_col [Series]
1269
+ # New column to insert.
1270
+ #
1271
+ # @return [DataFrame]
1272
+ #
1273
+ # @example
1274
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1275
+ # s = Polars::Series.new([10, 20, 30])
1276
+ # df.replace("foo", s)
1277
+ # # =>
1278
+ # # shape: (3, 2)
1279
+ # # ┌─────┬─────┐
1280
+ # # │ foo ┆ bar │
1281
+ # # │ --- ┆ --- │
1282
+ # # │ i64 ┆ i64 │
1283
+ # # ╞═════╪═════╡
1284
+ # # │ 10 ┆ 4 │
1285
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1286
+ # # │ 20 ┆ 5 │
1287
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1288
+ # # │ 30 ┆ 6 │
1289
+ # # └─────┴─────┘
1290
+ def replace(column, new_col)
1291
+ _df.replace(column, new_col._s)
1292
+ self
839
1293
  end
840
1294
 
1295
+ # Get a slice of this DataFrame.
1296
+ #
1297
+ # @param offset [Integer]
1298
+ # Start index. Negative indexing is supported.
1299
+ # @param length [Integer, nil]
1300
+ # Length of the slice. If set to `nil`, all rows starting at the offset
1301
+ # will be selected.
1302
+ #
1303
+ # @return [DataFrame]
1304
+ #
1305
+ # @example
1306
+ # df = Polars::DataFrame.new(
1307
+ # {
1308
+ # "foo" => [1, 2, 3],
1309
+ # "bar" => [6.0, 7.0, 8.0],
1310
+ # "ham" => ["a", "b", "c"]
1311
+ # }
1312
+ # )
1313
+ # df.slice(1, 2)
1314
+ # # =>
1315
+ # # shape: (2, 3)
1316
+ # # ┌─────┬─────┬─────┐
1317
+ # # │ foo ┆ bar ┆ ham │
1318
+ # # │ --- ┆ --- ┆ --- │
1319
+ # # │ i64 ┆ f64 ┆ str │
1320
+ # # ╞═════╪═════╪═════╡
1321
+ # # │ 2 ┆ 7.0 ┆ b │
1322
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1323
+ # # │ 3 ┆ 8.0 ┆ c │
1324
+ # # └─────┴─────┴─────┘
1325
+ def slice(offset, length = nil)
1326
+ if !length.nil? && length < 0
1327
+ length = height - offset + length
1328
+ end
1329
+ _from_rbdf(_df.slice(offset, length))
1330
+ end
1331
+
1332
+ # Get the first `n` rows.
1333
+ #
1334
+ # Alias for {#head}.
1335
+ #
1336
+ # @param n [Integer]
1337
+ # Number of rows to return.
1338
+ #
1339
+ # @return [DataFrame]
1340
+ #
1341
+ # @example
1342
+ # df = Polars::DataFrame.new(
1343
+ # {"foo" => [1, 2, 3, 4, 5, 6], "bar" => ["a", "b", "c", "d", "e", "f"]}
1344
+ # )
1345
+ # df.limit(4)
1346
+ # # =>
1347
+ # # shape: (4, 2)
1348
+ # # ┌─────┬─────┐
1349
+ # # │ foo ┆ bar │
1350
+ # # │ --- ┆ --- │
1351
+ # # │ i64 ┆ str │
1352
+ # # ╞═════╪═════╡
1353
+ # # │ 1 ┆ a │
1354
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1355
+ # # │ 2 ┆ b │
1356
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1357
+ # # │ 3 ┆ c │
1358
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1359
+ # # │ 4 ┆ d │
1360
+ # # └─────┴─────┘
841
1361
  def limit(n = 5)
842
1362
  head(n)
843
1363
  end
844
1364
 
1365
+ # Get the first `n` rows.
1366
+ #
1367
+ # @param n [Integer]
1368
+ # Number of rows to return.
1369
+ #
1370
+ # @return [DataFrame]
1371
+ #
1372
+ # @example
1373
+ # df = Polars::DataFrame.new(
1374
+ # {
1375
+ # "foo" => [1, 2, 3, 4, 5],
1376
+ # "bar" => [6, 7, 8, 9, 10],
1377
+ # "ham" => ["a", "b", "c", "d", "e"]
1378
+ # }
1379
+ # )
1380
+ # df.head(3)
1381
+ # # =>
1382
+ # # shape: (3, 3)
1383
+ # # ┌─────┬─────┬─────┐
1384
+ # # │ foo ┆ bar ┆ ham │
1385
+ # # │ --- ┆ --- ┆ --- │
1386
+ # # │ i64 ┆ i64 ┆ str │
1387
+ # # ╞═════╪═════╪═════╡
1388
+ # # │ 1 ┆ 6 ┆ a │
1389
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1390
+ # # │ 2 ┆ 7 ┆ b │
1391
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1392
+ # # │ 3 ┆ 8 ┆ c │
1393
+ # # └─────┴─────┴─────┘
845
1394
  def head(n = 5)
846
1395
  _from_rbdf(_df.head(n))
847
1396
  end
848
1397
 
1398
+ # Get the last `n` rows.
1399
+ #
1400
+ # @param n [Integer]
1401
+ # Number of rows to return.
1402
+ #
1403
+ # @return [DataFrame]
1404
+ #
1405
+ # @example
1406
+ # df = Polars::DataFrame.new(
1407
+ # {
1408
+ # "foo" => [1, 2, 3, 4, 5],
1409
+ # "bar" => [6, 7, 8, 9, 10],
1410
+ # "ham" => ["a", "b", "c", "d", "e"]
1411
+ # }
1412
+ # )
1413
+ # df.tail(3)
1414
+ # # =>
1415
+ # # shape: (3, 3)
1416
+ # # ┌─────┬─────┬─────┐
1417
+ # # │ foo ┆ bar ┆ ham │
1418
+ # # │ --- ┆ --- ┆ --- │
1419
+ # # │ i64 ┆ i64 ┆ str │
1420
+ # # ╞═════╪═════╪═════╡
1421
+ # # │ 3 ┆ 8 ┆ c │
1422
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1423
+ # # │ 4 ┆ 9 ┆ d │
1424
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1425
+ # # │ 5 ┆ 10 ┆ e │
1426
+ # # └─────┴─────┴─────┘
849
1427
  def tail(n = 5)
850
1428
  _from_rbdf(_df.tail(n))
851
1429
  end
852
1430
 
853
- # def drop_nulls
854
- # end
1431
+ # Return a new DataFrame where the null values are dropped.
1432
+ #
1433
+ # @param subset [Object]
1434
+ # Subset of column(s) on which `drop_nulls` will be applied.
1435
+ #
1436
+ # @return [DataFrame]
1437
+ #
1438
+ # @example
1439
+ # df = Polars::DataFrame.new(
1440
+ # {
1441
+ # "foo" => [1, 2, 3],
1442
+ # "bar" => [6, nil, 8],
1443
+ # "ham" => ["a", "b", "c"]
1444
+ # }
1445
+ # )
1446
+ # df.drop_nulls
1447
+ # # =>
1448
+ # # shape: (2, 3)
1449
+ # # ┌─────┬─────┬─────┐
1450
+ # # │ foo ┆ bar ┆ ham │
1451
+ # # │ --- ┆ --- ┆ --- │
1452
+ # # │ i64 ┆ i64 ┆ str │
1453
+ # # ╞═════╪═════╪═════╡
1454
+ # # │ 1 ┆ 6 ┆ a │
1455
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1456
+ # # │ 3 ┆ 8 ┆ c │
1457
+ # # └─────┴─────┴─────┘
1458
+ def drop_nulls(subset: nil)
1459
+ if subset.is_a?(String)
1460
+ subset = [subset]
1461
+ end
1462
+ _from_rbdf(_df.drop_nulls(subset))
1463
+ end
855
1464
 
856
1465
  # def pipe
857
1466
  # end
858
1467
 
859
- # def with_row_count
860
- # end
861
-
1468
+ # Add a column at index 0 that counts the rows.
1469
+ #
1470
+ # @param name [String]
1471
+ # Name of the column to add.
1472
+ # @param offset [Integer]
1473
+ # Start the row count at this offset.
1474
+ #
1475
+ # @return [DataFrame]
1476
+ #
1477
+ # @example
1478
+ # df = Polars::DataFrame.new(
1479
+ # {
1480
+ # "a" => [1, 3, 5],
1481
+ # "b" => [2, 4, 6]
1482
+ # }
1483
+ # )
1484
+ # df.with_row_count
1485
+ # # =>
1486
+ # # shape: (3, 3)
1487
+ # # ┌────────┬─────┬─────┐
1488
+ # # │ row_nr ┆ a ┆ b │
1489
+ # # │ --- ┆ --- ┆ --- │
1490
+ # # │ u32 ┆ i64 ┆ i64 │
1491
+ # # ╞════════╪═════╪═════╡
1492
+ # # │ 0 ┆ 1 ┆ 2 │
1493
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1494
+ # # │ 1 ┆ 3 ┆ 4 │
1495
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1496
+ # # │ 2 ┆ 5 ┆ 6 │
1497
+ # # └────────┴─────┴─────┘
1498
+ def with_row_count(name: "row_nr", offset: 0)
1499
+ _from_rbdf(_df.with_row_count(name, offset))
1500
+ end
1501
+
1502
+ # Start a groupby operation.
1503
+ #
1504
+ # @param by [Object]
1505
+ # Column(s) to group by.
1506
+ # @param maintain_order [Boolean]
1507
+ # Make sure that the order of the groups remain consistent. This is more
1508
+ # expensive than a default groupby. Note that this only works in expression
1509
+ # aggregations.
862
1510
  #
1511
+ # @return [GroupBy]
1512
+ #
1513
+ # @example
1514
+ # df = Polars::DataFrame.new(
1515
+ # {
1516
+ # "a" => ["a", "b", "a", "b", "b", "c"],
1517
+ # "b" => [1, 2, 3, 4, 5, 6],
1518
+ # "c" => [6, 5, 4, 3, 2, 1]
1519
+ # }
1520
+ # )
1521
+ # df.groupby("a").agg(Polars.col("b").sum).sort("a")
1522
+ # # =>
1523
+ # # shape: (3, 2)
1524
+ # # ┌─────┬─────┐
1525
+ # # │ a ┆ b │
1526
+ # # │ --- ┆ --- │
1527
+ # # │ str ┆ i64 │
1528
+ # # ╞═════╪═════╡
1529
+ # # │ a ┆ 4 │
1530
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1531
+ # # │ b ┆ 11 │
1532
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1533
+ # # │ c ┆ 6 │
1534
+ # # └─────┴─────┘
863
1535
  def groupby(by, maintain_order: false)
864
- lazy.groupby(by, maintain_order: maintain_order)
1536
+ if !Utils.bool?(maintain_order)
1537
+ raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
1538
+ end
1539
+ if by.is_a?(String)
1540
+ by = [by]
1541
+ end
1542
+ GroupBy.new(
1543
+ _df,
1544
+ by,
1545
+ self.class,
1546
+ maintain_order: maintain_order
1547
+ )
865
1548
  end
866
1549
 
867
1550
  # def groupby_rolling
@@ -876,7 +1559,109 @@ module Polars
876
1559
  # def join_asof
877
1560
  # end
878
1561
 
1562
+ # Join in SQL-like fashion.
1563
+ #
1564
+ # @param other [DataFrame]
1565
+ # DataFrame to join with.
1566
+ # @param left_on [Object]
1567
+ # Name(s) of the left join column(s).
1568
+ # @param right_on [Object]
1569
+ # Name(s) of the right join column(s).
1570
+ # @param on [Object]
1571
+ # Name(s) of the join columns in both DataFrames.
1572
+ # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
1573
+ # Join strategy.
1574
+ # @param suffix [String]
1575
+ # Suffix to append to columns with a duplicate name.
1576
+ #
1577
+ # @return [DataFrame]
1578
+ #
1579
+ # @example
1580
+ # df = Polars::DataFrame.new(
1581
+ # {
1582
+ # "foo" => [1, 2, 3],
1583
+ # "bar" => [6.0, 7.0, 8.0],
1584
+ # "ham" => ["a", "b", "c"]
1585
+ # }
1586
+ # )
1587
+ # other_df = Polars::DataFrame.new(
1588
+ # {
1589
+ # "apple" => ["x", "y", "z"],
1590
+ # "ham" => ["a", "b", "d"]
1591
+ # }
1592
+ # )
1593
+ # df.join(other_df, on: "ham")
1594
+ # # =>
1595
+ # # shape: (2, 4)
1596
+ # # ┌─────┬─────┬─────┬───────┐
1597
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1598
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1599
+ # # │ i64 ┆ f64 ┆ str ┆ str │
1600
+ # # ╞═════╪═════╪═════╪═══════╡
1601
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
1602
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1603
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
1604
+ # # └─────┴─────┴─────┴───────┘
1605
+ #
1606
+ # @example
1607
+ # df.join(other_df, on: "ham", how: "outer")
1608
+ # # =>
1609
+ # # shape: (4, 4)
1610
+ # # ┌──────┬──────┬─────┬───────┐
1611
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1612
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1613
+ # # │ i64 ┆ f64 ┆ str ┆ str │
1614
+ # # ╞══════╪══════╪═════╪═══════╡
1615
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
1616
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1617
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
1618
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1619
+ # # │ null ┆ null ┆ d ┆ z │
1620
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1621
+ # # │ 3 ┆ 8.0 ┆ c ┆ null │
1622
+ # # └──────┴──────┴─────┴───────┘
1623
+ #
1624
+ # @example
1625
+ # df.join(other_df, on: "ham", how: "left")
1626
+ # # =>
1627
+ # # shape: (3, 4)
1628
+ # # ┌─────┬─────┬─────┬───────┐
1629
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1630
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1631
+ # # │ i64 ┆ f64 ┆ str ┆ str │
1632
+ # # ╞═════╪═════╪═════╪═══════╡
1633
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
1634
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1635
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
1636
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1637
+ # # │ 3 ┆ 8.0 ┆ c ┆ null │
1638
+ # # └─────┴─────┴─────┴───────┘
1639
+ #
1640
+ # @example
1641
+ # df.join(other_df, on: "ham", how: "semi")
1642
+ # # =>
1643
+ # # shape: (2, 3)
1644
+ # # ┌─────┬─────┬─────┐
1645
+ # # │ foo ┆ bar ┆ ham │
1646
+ # # │ --- ┆ --- ┆ --- │
1647
+ # # │ i64 ┆ f64 ┆ str │
1648
+ # # ╞═════╪═════╪═════╡
1649
+ # # │ 1 ┆ 6.0 ┆ a │
1650
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1651
+ # # │ 2 ┆ 7.0 ┆ b │
1652
+ # # └─────┴─────┴─────┘
879
1653
  #
1654
+ # @example
1655
+ # df.join(other_df, on: "ham", how: "anti")
1656
+ # # =>
1657
+ # # shape: (1, 3)
1658
+ # # ┌─────┬─────┬─────┐
1659
+ # # │ foo ┆ bar ┆ ham │
1660
+ # # │ --- ┆ --- ┆ --- │
1661
+ # # │ i64 ┆ f64 ┆ str │
1662
+ # # ╞═════╪═════╪═════╡
1663
+ # # │ 3 ┆ 8.0 ┆ c │
1664
+ # # └─────┴─────┴─────┘
880
1665
  def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
881
1666
  lazy
882
1667
  .join(
@@ -893,55 +1678,505 @@ module Polars
893
1678
  # def apply
894
1679
  # end
895
1680
 
1681
+ # Return a new DataFrame with the column added or replaced.
1682
+ #
1683
+ # @param column [Object]
1684
+ # Series, where the name of the Series refers to the column in the DataFrame.
1685
+ #
1686
+ # @return [DataFrame]
1687
+ #
1688
+ # @example Added
1689
+ # df = Polars::DataFrame.new(
1690
+ # {
1691
+ # "a" => [1, 3, 5],
1692
+ # "b" => [2, 4, 6]
1693
+ # }
1694
+ # )
1695
+ # df.with_column((Polars.col("b") ** 2).alias("b_squared"))
1696
+ # # =>
1697
+ # # shape: (3, 3)
1698
+ # # ┌─────┬─────┬───────────┐
1699
+ # # │ a ┆ b ┆ b_squared │
1700
+ # # │ --- ┆ --- ┆ --- │
1701
+ # # │ i64 ┆ i64 ┆ f64 │
1702
+ # # ╞═════╪═════╪═══════════╡
1703
+ # # │ 1 ┆ 2 ┆ 4.0 │
1704
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1705
+ # # │ 3 ┆ 4 ┆ 16.0 │
1706
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1707
+ # # │ 5 ┆ 6 ┆ 36.0 │
1708
+ # # └─────┴─────┴───────────┘
896
1709
  #
1710
+ # @example Replaced
1711
+ # df.with_column(Polars.col("a") ** 2)
1712
+ # # =>
1713
+ # # shape: (3, 2)
1714
+ # # ┌──────┬─────┐
1715
+ # # │ a ┆ b │
1716
+ # # │ --- ┆ --- │
1717
+ # # │ f64 ┆ i64 │
1718
+ # # ╞══════╪═════╡
1719
+ # # │ 1.0 ┆ 2 │
1720
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1721
+ # # │ 9.0 ┆ 4 │
1722
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1723
+ # # │ 25.0 ┆ 6 │
1724
+ # # └──────┴─────┘
897
1725
  def with_column(column)
898
1726
  lazy
899
1727
  .with_column(column)
900
1728
  .collect(no_optimization: true, string_cache: false)
901
1729
  end
902
1730
 
903
- # def hstack
904
- # end
1731
+ # Return a new DataFrame grown horizontally by stacking multiple Series to it.
1732
+ #
1733
+ # @param columns [Object]
1734
+ # Series to stack.
1735
+ # @param in_place [Boolean]
1736
+ # Modify in place.
1737
+ #
1738
+ # @return [DataFrame]
1739
+ #
1740
+ # @example
1741
+ # df = Polars::DataFrame.new(
1742
+ # {
1743
+ # "foo" => [1, 2, 3],
1744
+ # "bar" => [6, 7, 8],
1745
+ # "ham" => ["a", "b", "c"]
1746
+ # }
1747
+ # )
1748
+ # x = Polars::Series.new("apple", [10, 20, 30])
1749
+ # df.hstack([x])
1750
+ # # =>
1751
+ # # shape: (3, 4)
1752
+ # # ┌─────┬─────┬─────┬───────┐
1753
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1754
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1755
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
1756
+ # # ╞═════╪═════╪═════╪═══════╡
1757
+ # # │ 1 ┆ 6 ┆ a ┆ 10 │
1758
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1759
+ # # │ 2 ┆ 7 ┆ b ┆ 20 │
1760
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1761
+ # # │ 3 ┆ 8 ┆ c ┆ 30 │
1762
+ # # └─────┴─────┴─────┴───────┘
1763
+ def hstack(columns, in_place: false)
1764
+ if !columns.is_a?(Array)
1765
+ columns = columns.get_columns
1766
+ end
1767
+ if in_place
1768
+ _df.hstack_mut(columns.map(&:_s))
1769
+ self
1770
+ else
1771
+ _from_rbdf(_df.hstack(columns.map(&:_s)))
1772
+ end
1773
+ end
905
1774
 
906
- # def vstack
907
- # end
1775
+ # Grow this DataFrame vertically by stacking a DataFrame to it.
1776
+ #
1777
+ # @param df [DataFrame]
1778
+ # DataFrame to stack.
1779
+ # @param in_place [Boolean]
1780
+ # Modify in place
1781
+ #
1782
+ # @return [DataFrame]
1783
+ #
1784
+ # @example
1785
+ # df1 = Polars::DataFrame.new(
1786
+ # {
1787
+ # "foo" => [1, 2],
1788
+ # "bar" => [6, 7],
1789
+ # "ham" => ["a", "b"]
1790
+ # }
1791
+ # )
1792
+ # df2 = Polars::DataFrame.new(
1793
+ # {
1794
+ # "foo" => [3, 4],
1795
+ # "bar" => [8, 9],
1796
+ # "ham" => ["c", "d"]
1797
+ # }
1798
+ # )
1799
+ # df1.vstack(df2)
1800
+ # # =>
1801
+ # # shape: (4, 3)
1802
+ # # ┌─────┬─────┬─────┐
1803
+ # # │ foo ┆ bar ┆ ham │
1804
+ # # │ --- ┆ --- ┆ --- │
1805
+ # # │ i64 ┆ i64 ┆ str │
1806
+ # # ╞═════╪═════╪═════╡
1807
+ # # │ 1 ┆ 6 ┆ a │
1808
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1809
+ # # │ 2 ┆ 7 ┆ b │
1810
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1811
+ # # │ 3 ┆ 8 ┆ c │
1812
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1813
+ # # │ 4 ┆ 9 ┆ d │
1814
+ # # └─────┴─────┴─────┘
1815
+ def vstack(df, in_place: false)
1816
+ if in_place
1817
+ _df.vstack_mut(df._df)
1818
+ self
1819
+ else
1820
+ _from_rbdf(_df.vstack(df._df))
1821
+ end
1822
+ end
908
1823
 
1824
+ # Extend the memory backed by this `DataFrame` with the values from `other`.
1825
+ #
1826
+ # Different from `vstack` which adds the chunks from `other` to the chunks of this
1827
+ # `DataFrame` `extend` appends the data from `other` to the underlying memory
1828
+ # locations and thus may cause a reallocation.
1829
+ #
1830
+ # If this does not cause a reallocation, the resulting data structure will not
1831
+ # have any extra chunks and thus will yield faster queries.
1832
+ #
1833
+ # Prefer `extend` over `vstack` when you want to do a query after a single append.
1834
+ # For instance during online operations where you add `n` rows and rerun a query.
909
1835
  #
1836
+ # Prefer `vstack` over `extend` when you want to append many times before doing a
1837
+ # query. For instance when you read in multiple files and when to store them in a
1838
+ # single `DataFrame`. In the latter case, finish the sequence of `vstack`
1839
+ # operations with a `rechunk`.
1840
+ #
1841
+ # @param other [DataFrame]
1842
+ # DataFrame to vertically add.
1843
+ #
1844
+ # @return [DataFrame]
1845
+ #
1846
+ # @example
1847
+ # df1 = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1848
+ # df2 = Polars::DataFrame.new({"foo" => [10, 20, 30], "bar" => [40, 50, 60]})
1849
+ # df1.extend(df2)
1850
+ # # =>
1851
+ # # shape: (6, 2)
1852
+ # # ┌─────┬─────┐
1853
+ # # │ foo ┆ bar │
1854
+ # # │ --- ┆ --- │
1855
+ # # │ i64 ┆ i64 │
1856
+ # # ╞═════╪═════╡
1857
+ # # │ 1 ┆ 4 │
1858
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1859
+ # # │ 2 ┆ 5 │
1860
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1861
+ # # │ 3 ┆ 6 │
1862
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1863
+ # # │ 10 ┆ 40 │
1864
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1865
+ # # │ 20 ┆ 50 │
1866
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1867
+ # # │ 30 ┆ 60 │
1868
+ # # └─────┴─────┘
910
1869
  def extend(other)
911
1870
  _df.extend(other._df)
912
1871
  self
913
1872
  end
914
1873
 
915
- # def drop
916
- # end
1874
+ # Remove column from DataFrame and return as new.
1875
+ #
1876
+ # @param columns [Object]
1877
+ # Column(s) to drop.
1878
+ #
1879
+ # @return [DataFrame]
1880
+ #
1881
+ # @example
1882
+ # df = Polars::DataFrame.new(
1883
+ # {
1884
+ # "foo" => [1, 2, 3],
1885
+ # "bar" => [6.0, 7.0, 8.0],
1886
+ # "ham" => ["a", "b", "c"]
1887
+ # }
1888
+ # )
1889
+ # df.drop("ham")
1890
+ # # =>
1891
+ # # shape: (3, 2)
1892
+ # # ┌─────┬─────┐
1893
+ # # │ foo ┆ bar │
1894
+ # # │ --- ┆ --- │
1895
+ # # │ i64 ┆ f64 │
1896
+ # # ╞═════╪═════╡
1897
+ # # │ 1 ┆ 6.0 │
1898
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1899
+ # # │ 2 ┆ 7.0 │
1900
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1901
+ # # │ 3 ┆ 8.0 │
1902
+ # # └─────┴─────┘
1903
+ def drop(columns)
1904
+ if columns.is_a?(Array)
1905
+ df = clone
1906
+ columns.each do |n|
1907
+ df._df.drop_in_place(n)
1908
+ end
1909
+ df
1910
+ else
1911
+ _from_rbdf(_df.drop(columns))
1912
+ end
1913
+ end
917
1914
 
918
- # def drop_in_place
919
- # end
1915
+ # Drop in place.
1916
+ #
1917
+ # @param name [Object]
1918
+ # Column to drop.
1919
+ #
1920
+ # @return [Series]
1921
+ #
1922
+ # @example
1923
+ # df = Polars::DataFrame.new(
1924
+ # {
1925
+ # "foo" => [1, 2, 3],
1926
+ # "bar" => [6, 7, 8],
1927
+ # "ham" => ["a", "b", "c"]
1928
+ # }
1929
+ # )
1930
+ # df.drop_in_place("ham")
1931
+ # # =>
1932
+ # # shape: (3,)
1933
+ # # Series: 'ham' [str]
1934
+ # # [
1935
+ # # "a"
1936
+ # # "b"
1937
+ # # "c"
1938
+ # # ]
1939
+ def drop_in_place(name)
1940
+ Utils.wrap_s(_df.drop_in_place(name))
1941
+ end
920
1942
 
921
- # def cleared
922
- # end
1943
+ # Create an empty copy of the current DataFrame.
1944
+ #
1945
+ # Returns a DataFrame with identical schema but no data.
1946
+ #
1947
+ # @return [DataFrame]
1948
+ #
1949
+ # @example
1950
+ # df = Polars::DataFrame.new(
1951
+ # {
1952
+ # "a" => [nil, 2, 3, 4],
1953
+ # "b" => [0.5, nil, 2.5, 13],
1954
+ # "c" => [true, true, false, nil]
1955
+ # }
1956
+ # )
1957
+ # df.cleared
1958
+ # # =>
1959
+ # # shape: (0, 3)
1960
+ # # ┌─────┬─────┬──────┐
1961
+ # # │ a ┆ b ┆ c │
1962
+ # # │ --- ┆ --- ┆ --- │
1963
+ # # │ i64 ┆ f64 ┆ bool │
1964
+ # # ╞═════╪═════╪══════╡
1965
+ # # └─────┴─────┴──────┘
1966
+ def cleared
1967
+ height > 0 ? head(0) : clone
1968
+ end
923
1969
 
924
1970
  # clone handled by initialize_copy
925
1971
 
1972
+ # Get the DataFrame as a Array of Series.
926
1973
  #
1974
+ # @return [Array]
927
1975
  def get_columns
928
1976
  _df.get_columns.map { |s| Utils.wrap_s(s) }
929
1977
  end
930
1978
 
1979
+ # Get a single column as Series by name.
1980
+ #
1981
+ # @param name [String]
1982
+ # Name of the column to retrieve.
1983
+ #
1984
+ # @return [Series]
1985
+ #
1986
+ # @example
1987
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1988
+ # df.get_column("foo")
1989
+ # # =>
1990
+ # # shape: (3,)
1991
+ # # Series: 'foo' [i64]
1992
+ # # [
1993
+ # # 1
1994
+ # # 2
1995
+ # # 3
1996
+ # # ]
931
1997
  def get_column(name)
932
1998
  self[name]
933
1999
  end
934
2000
 
935
- # def fill_null
936
- # end
2001
+ # Fill null values using the specified value or strategy.
2002
+ #
2003
+ # @param value [Numeric]
2004
+ # Value used to fill null values.
2005
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2006
+ # Strategy used to fill null values.
2007
+ # @param limit [Integer]
2008
+ # Number of consecutive null values to fill when using the 'forward' or
2009
+ # 'backward' strategy.
2010
+ # @param matches_supertype [Boolean]
2011
+ # Fill all matching supertype of the fill `value`.
2012
+ #
2013
+ # @return [DataFrame]
2014
+ #
2015
+ # @example
2016
+ # df = Polars::DataFrame.new(
2017
+ # {
2018
+ # "a" => [1, 2, nil, 4],
2019
+ # "b" => [0.5, 4, nil, 13]
2020
+ # }
2021
+ # )
2022
+ # df.fill_null(99)
2023
+ # # =>
2024
+ # # shape: (4, 2)
2025
+ # # ┌─────┬──────┐
2026
+ # # │ a ┆ b │
2027
+ # # │ --- ┆ --- │
2028
+ # # │ i64 ┆ f64 │
2029
+ # # ╞═════╪══════╡
2030
+ # # │ 1 ┆ 0.5 │
2031
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2032
+ # # │ 2 ┆ 4.0 │
2033
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2034
+ # # │ 99 ┆ 99.0 │
2035
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2036
+ # # │ 4 ┆ 13.0 │
2037
+ # # └─────┴──────┘
2038
+ #
2039
+ # @example
2040
+ # df.fill_null(strategy: "forward")
2041
+ # # =>
2042
+ # # shape: (4, 2)
2043
+ # # ┌─────┬──────┐
2044
+ # # │ a ┆ b │
2045
+ # # │ --- ┆ --- │
2046
+ # # │ i64 ┆ f64 │
2047
+ # # ╞═════╪══════╡
2048
+ # # │ 1 ┆ 0.5 │
2049
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2050
+ # # │ 2 ┆ 4.0 │
2051
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2052
+ # # │ 2 ┆ 4.0 │
2053
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2054
+ # # │ 4 ┆ 13.0 │
2055
+ # # └─────┴──────┘
2056
+ #
2057
+ # @example
2058
+ # df.fill_null(strategy: "max")
2059
+ # # =>
2060
+ # # shape: (4, 2)
2061
+ # # ┌─────┬──────┐
2062
+ # # │ a ┆ b │
2063
+ # # │ --- ┆ --- │
2064
+ # # │ i64 ┆ f64 │
2065
+ # # ╞═════╪══════╡
2066
+ # # │ 1 ┆ 0.5 │
2067
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2068
+ # # │ 2 ┆ 4.0 │
2069
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2070
+ # # │ 4 ┆ 13.0 │
2071
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2072
+ # # │ 4 ┆ 13.0 │
2073
+ # # └─────┴──────┘
2074
+ #
2075
+ # @example
2076
+ # df.fill_null(strategy: "zero")
2077
+ # # =>
2078
+ # # shape: (4, 2)
2079
+ # # ┌─────┬──────┐
2080
+ # # │ a ┆ b │
2081
+ # # │ --- ┆ --- │
2082
+ # # │ i64 ┆ f64 │
2083
+ # # ╞═════╪══════╡
2084
+ # # │ 1 ┆ 0.5 │
2085
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2086
+ # # │ 2 ┆ 4.0 │
2087
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2088
+ # # │ 0 ┆ 0.0 │
2089
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2090
+ # # │ 4 ┆ 13.0 │
2091
+ # # └─────┴──────┘
2092
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
2093
+ _from_rbdf(
2094
+ lazy
2095
+ .fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
2096
+ .collect(no_optimization: true)
2097
+ ._df
2098
+ )
2099
+ end
937
2100
 
2101
+ # Fill floating point NaN values by an Expression evaluation.
2102
+ #
2103
+ # @param fill_value [Object]
2104
+ # Value to fill NaN with.
2105
+ #
2106
+ # @return [DataFrame]
2107
+ #
2108
+ # @note
2109
+ # Note that floating point NaNs (Not a Number) are not missing values!
2110
+ # To replace missing values, use `fill_null`.
938
2111
  #
2112
+ # @example
2113
+ # df = Polars::DataFrame.new(
2114
+ # {
2115
+ # "a" => [1.5, 2, Float::NAN, 4],
2116
+ # "b" => [0.5, 4, Float::NAN, 13]
2117
+ # }
2118
+ # )
2119
+ # df.fill_nan(99)
2120
+ # # =>
2121
+ # # shape: (4, 2)
2122
+ # # ┌──────┬──────┐
2123
+ # # │ a ┆ b │
2124
+ # # │ --- ┆ --- │
2125
+ # # │ f64 ┆ f64 │
2126
+ # # ╞══════╪══════╡
2127
+ # # │ 1.5 ┆ 0.5 │
2128
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2129
+ # # │ 2.0 ┆ 4.0 │
2130
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2131
+ # # │ 99.0 ┆ 99.0 │
2132
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2133
+ # # │ 4.0 ┆ 13.0 │
2134
+ # # └──────┴──────┘
939
2135
  def fill_nan(fill_value)
940
2136
  lazy.fill_nan(fill_value).collect(no_optimization: true)
941
2137
  end
942
2138
 
943
- # def explode
944
- # end
2139
+ # Explode `DataFrame` to long format by exploding a column with Lists.
2140
+ #
2141
+ # @param columns [Object]
2142
+ # Column of LargeList type.
2143
+ #
2144
+ # @return [DataFrame]
2145
+ #
2146
+ # @example
2147
+ # df = Polars::DataFrame.new(
2148
+ # {
2149
+ # "letters" => ["a", "a", "b", "c"],
2150
+ # "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]]
2151
+ # }
2152
+ # )
2153
+ # df.explode("numbers")
2154
+ # # =>
2155
+ # # shape: (8, 2)
2156
+ # # ┌─────────┬─────────┐
2157
+ # # │ letters ┆ numbers │
2158
+ # # │ --- ┆ --- │
2159
+ # # │ str ┆ i64 │
2160
+ # # ╞═════════╪═════════╡
2161
+ # # │ a ┆ 1 │
2162
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2163
+ # # │ a ┆ 2 │
2164
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2165
+ # # │ a ┆ 3 │
2166
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2167
+ # # │ b ┆ 4 │
2168
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2169
+ # # │ b ┆ 5 │
2170
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2171
+ # # │ c ┆ 6 │
2172
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2173
+ # # │ c ┆ 7 │
2174
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2175
+ # # │ c ┆ 8 │
2176
+ # # └─────────┴─────────┘
2177
+ def explode(columns)
2178
+ lazy.explode(columns).collect(no_optimization: true)
2179
+ end
945
2180
 
946
2181
  # def pivot
947
2182
  # end
@@ -955,25 +2190,242 @@ module Polars
955
2190
  # def partition_by
956
2191
  # end
957
2192
 
958
- # def shift
959
- # end
960
-
961
- # def shift_and_fill
962
- # end
2193
+ # Shift values by the given period.
2194
+ #
2195
+ # @param periods [Integer]
2196
+ # Number of places to shift (may be negative).
2197
+ #
2198
+ # @return [DataFrame]
2199
+ #
2200
+ # @example
2201
+ # df = Polars::DataFrame.new(
2202
+ # {
2203
+ # "foo" => [1, 2, 3],
2204
+ # "bar" => [6, 7, 8],
2205
+ # "ham" => ["a", "b", "c"]
2206
+ # }
2207
+ # )
2208
+ # df.shift(1)
2209
+ # # =>
2210
+ # # shape: (3, 3)
2211
+ # # ┌──────┬──────┬──────┐
2212
+ # # │ foo ┆ bar ┆ ham │
2213
+ # # │ --- ┆ --- ┆ --- │
2214
+ # # │ i64 ┆ i64 ┆ str │
2215
+ # # ╞══════╪══════╪══════╡
2216
+ # # │ null ┆ null ┆ null │
2217
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2218
+ # # │ 1 ┆ 6 ┆ a │
2219
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2220
+ # # │ 2 ┆ 7 ┆ b │
2221
+ # # └──────┴──────┴──────┘
2222
+ #
2223
+ # @example
2224
+ # df.shift(-1)
2225
+ # # =>
2226
+ # # shape: (3, 3)
2227
+ # # ┌──────┬──────┬──────┐
2228
+ # # │ foo ┆ bar ┆ ham │
2229
+ # # │ --- ┆ --- ┆ --- │
2230
+ # # │ i64 ┆ i64 ┆ str │
2231
+ # # ╞══════╪══════╪══════╡
2232
+ # # │ 2 ┆ 7 ┆ b │
2233
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2234
+ # # │ 3 ┆ 8 ┆ c │
2235
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2236
+ # # │ null ┆ null ┆ null │
2237
+ # # └──────┴──────┴──────┘
2238
+ def shift(periods)
2239
+ _from_rbdf(_df.shift(periods))
2240
+ end
2241
+
2242
+ # Shift the values by a given period and fill the resulting null values.
2243
+ #
2244
+ # @param periods [Integer]
2245
+ # Number of places to shift (may be negative).
2246
+ # @param fill_value [Object]
2247
+ # fill nil values with this value.
2248
+ #
2249
+ # @return [DataFrame]
2250
+ #
2251
+ # @example
2252
+ # df = Polars::DataFrame.new(
2253
+ # {
2254
+ # "foo" => [1, 2, 3],
2255
+ # "bar" => [6, 7, 8],
2256
+ # "ham" => ["a", "b", "c"]
2257
+ # }
2258
+ # )
2259
+ # df.shift_and_fill(1, 0)
2260
+ # # =>
2261
+ # # shape: (3, 3)
2262
+ # # ┌─────┬─────┬─────┐
2263
+ # # │ foo ┆ bar ┆ ham │
2264
+ # # │ --- ┆ --- ┆ --- │
2265
+ # # │ i64 ┆ i64 ┆ str │
2266
+ # # ╞═════╪═════╪═════╡
2267
+ # # │ 0 ┆ 0 ┆ 0 │
2268
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2269
+ # # │ 1 ┆ 6 ┆ a │
2270
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2271
+ # # │ 2 ┆ 7 ┆ b │
2272
+ # # └─────┴─────┴─────┘
2273
+ def shift_and_fill(periods, fill_value)
2274
+ lazy
2275
+ .shift_and_fill(periods, fill_value)
2276
+ .collect(no_optimization: true, string_cache: false)
2277
+ end
963
2278
 
2279
+ # Get a mask of all duplicated rows in this DataFrame.
2280
+ #
2281
+ # @return [Series]
964
2282
  #
2283
+ # @example
2284
+ # df = Polars::DataFrame.new(
2285
+ # {
2286
+ # "a" => [1, 2, 3, 1],
2287
+ # "b" => ["x", "y", "z", "x"],
2288
+ # }
2289
+ # )
2290
+ # df.is_duplicated
2291
+ # # =>
2292
+ # # shape: (4,)
2293
+ # # Series: '' [bool]
2294
+ # # [
2295
+ # # true
2296
+ # # false
2297
+ # # false
2298
+ # # true
2299
+ # # ]
965
2300
  def is_duplicated
966
2301
  Utils.wrap_s(_df.is_duplicated)
967
2302
  end
968
2303
 
2304
+ # Get a mask of all unique rows in this DataFrame.
2305
+ #
2306
+ # @return [Series]
2307
+ #
2308
+ # @example
2309
+ # df = Polars::DataFrame.new(
2310
+ # {
2311
+ # "a" => [1, 2, 3, 1],
2312
+ # "b" => ["x", "y", "z", "x"]
2313
+ # }
2314
+ # )
2315
+ # df.is_unique
2316
+ # # =>
2317
+ # # shape: (4,)
2318
+ # # Series: '' [bool]
2319
+ # # [
2320
+ # # false
2321
+ # # true
2322
+ # # true
2323
+ # # false
2324
+ # # ]
969
2325
  def is_unique
970
2326
  Utils.wrap_s(_df.is_unique)
971
2327
  end
972
2328
 
2329
+ # Start a lazy query from this point.
2330
+ #
2331
+ # @return [LazyFrame]
973
2332
  def lazy
974
2333
  wrap_ldf(_df.lazy)
975
2334
  end
976
2335
 
2336
+ # Select columns from this DataFrame.
2337
+ #
2338
+ # @param exprs [Object]
2339
+ # Column or columns to select.
2340
+ #
2341
+ # @return [DataFrame]
2342
+ #
2343
+ # @example
2344
+ # df = Polars::DataFrame.new(
2345
+ # {
2346
+ # "foo" => [1, 2, 3],
2347
+ # "bar" => [6, 7, 8],
2348
+ # "ham" => ["a", "b", "c"]
2349
+ # }
2350
+ # )
2351
+ # df.select("foo")
2352
+ # # =>
2353
+ # # shape: (3, 1)
2354
+ # # ┌─────┐
2355
+ # # │ foo │
2356
+ # # │ --- │
2357
+ # # │ i64 │
2358
+ # # ╞═════╡
2359
+ # # │ 1 │
2360
+ # # ├╌╌╌╌╌┤
2361
+ # # │ 2 │
2362
+ # # ├╌╌╌╌╌┤
2363
+ # # │ 3 │
2364
+ # # └─────┘
2365
+ #
2366
+ # @example
2367
+ # df.select(["foo", "bar"])
2368
+ # # =>
2369
+ # # shape: (3, 2)
2370
+ # # ┌─────┬─────┐
2371
+ # # │ foo ┆ bar │
2372
+ # # │ --- ┆ --- │
2373
+ # # │ i64 ┆ i64 │
2374
+ # # ╞═════╪═════╡
2375
+ # # │ 1 ┆ 6 │
2376
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2377
+ # # │ 2 ┆ 7 │
2378
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2379
+ # # │ 3 ┆ 8 │
2380
+ # # └─────┴─────┘
2381
+ #
2382
+ # @example
2383
+ # df.select(Polars.col("foo") + 1)
2384
+ # # =>
2385
+ # # shape: (3, 1)
2386
+ # # ┌─────┐
2387
+ # # │ foo │
2388
+ # # │ --- │
2389
+ # # │ i64 │
2390
+ # # ╞═════╡
2391
+ # # │ 2 │
2392
+ # # ├╌╌╌╌╌┤
2393
+ # # │ 3 │
2394
+ # # ├╌╌╌╌╌┤
2395
+ # # │ 4 │
2396
+ # # └─────┘
2397
+ #
2398
+ # @example
2399
+ # df.select([Polars.col("foo") + 1, Polars.col("bar") + 1])
2400
+ # # =>
2401
+ # # shape: (3, 2)
2402
+ # # ┌─────┬─────┐
2403
+ # # │ foo ┆ bar │
2404
+ # # │ --- ┆ --- │
2405
+ # # │ i64 ┆ i64 │
2406
+ # # ╞═════╪═════╡
2407
+ # # │ 2 ┆ 7 │
2408
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2409
+ # # │ 3 ┆ 8 │
2410
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2411
+ # # │ 4 ┆ 9 │
2412
+ # # └─────┴─────┘
2413
+ #
2414
+ # @example
2415
+ # df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0))
2416
+ # # =>
2417
+ # # shape: (3, 1)
2418
+ # # ┌─────────┐
2419
+ # # │ literal │
2420
+ # # │ --- │
2421
+ # # │ i64 │
2422
+ # # ╞═════════╡
2423
+ # # │ 0 │
2424
+ # # ├╌╌╌╌╌╌╌╌╌┤
2425
+ # # │ 0 │
2426
+ # # ├╌╌╌╌╌╌╌╌╌┤
2427
+ # # │ 10 │
2428
+ # # └─────────┘
977
2429
  def select(exprs)
978
2430
  _from_rbdf(
979
2431
  lazy
@@ -983,6 +2435,43 @@ module Polars
983
2435
  )
984
2436
  end
985
2437
 
2438
+ # Add or overwrite multiple columns in a DataFrame.
2439
+ #
2440
+ # @param exprs [Array]
2441
+ # Array of Expressions that evaluate to columns.
2442
+ #
2443
+ # @return [DataFrame]
2444
+ #
2445
+ # @example
2446
+ # df = Polars::DataFrame.new(
2447
+ # {
2448
+ # "a" => [1, 2, 3, 4],
2449
+ # "b" => [0.5, 4, 10, 13],
2450
+ # "c" => [true, true, false, true]
2451
+ # }
2452
+ # )
2453
+ # df.with_columns(
2454
+ # [
2455
+ # (Polars.col("a") ** 2).alias("a^2"),
2456
+ # (Polars.col("b") / 2).alias("b/2"),
2457
+ # (Polars.col("c").is_not).alias("not c")
2458
+ # ]
2459
+ # )
2460
+ # # =>
2461
+ # # shape: (4, 6)
2462
+ # # ┌─────┬──────┬───────┬──────┬──────┬───────┐
2463
+ # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
2464
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
2465
+ # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
2466
+ # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
2467
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
2468
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2469
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
2470
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2471
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
2472
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2473
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
2474
+ # # └─────┴──────┴───────┴──────┴──────┴───────┘
986
2475
  def with_columns(exprs)
987
2476
  if !exprs.nil? && !exprs.is_a?(Array)
988
2477
  exprs = [exprs]
@@ -992,6 +2481,26 @@ module Polars
992
2481
  .collect(no_optimization: true, string_cache: false)
993
2482
  end
994
2483
 
2484
+ # Get number of chunks used by the ChunkedArrays of this DataFrame.
2485
+ #
2486
+ # @param strategy ["first", "all"]
2487
+ # Return the number of chunks of the 'first' column,
2488
+ # or 'all' columns in this DataFrame.
2489
+ #
2490
+ # @return [Object]
2491
+ #
2492
+ # @example
2493
+ # df = Polars::DataFrame.new(
2494
+ # {
2495
+ # "a" => [1, 2, 3, 4],
2496
+ # "b" => [0.5, 4, 10, 13],
2497
+ # "c" => [true, true, false, true]
2498
+ # }
2499
+ # )
2500
+ # df.n_chunks
2501
+ # # => 1
2502
+ # df.n_chunks(strategy: "all")
2503
+ # # => [1, 1, 1]
995
2504
  def n_chunks(strategy: "first")
996
2505
  if strategy == "first"
997
2506
  _df.n_chunks
@@ -1002,6 +2511,28 @@ module Polars
1002
2511
  end
1003
2512
  end
1004
2513
 
2514
+ # Aggregate the columns of this DataFrame to their maximum value.
2515
+ #
2516
+ # @return [DataFrame]
2517
+ #
2518
+ # @example
2519
+ # df = Polars::DataFrame.new(
2520
+ # {
2521
+ # "foo" => [1, 2, 3],
2522
+ # "bar" => [6, 7, 8],
2523
+ # "ham" => ["a", "b", "c"]
2524
+ # }
2525
+ # )
2526
+ # df.max
2527
+ # # =>
2528
+ # # shape: (1, 3)
2529
+ # # ┌─────┬─────┬─────┐
2530
+ # # │ foo ┆ bar ┆ ham │
2531
+ # # │ --- ┆ --- ┆ --- │
2532
+ # # │ i64 ┆ i64 ┆ str │
2533
+ # # ╞═════╪═════╪═════╡
2534
+ # # │ 3 ┆ 8 ┆ c │
2535
+ # # └─────┴─────┴─────┘
1005
2536
  def max(axis: 0)
1006
2537
  if axis == 0
1007
2538
  _from_rbdf(_df.max)
@@ -1012,6 +2543,28 @@ module Polars
1012
2543
  end
1013
2544
  end
1014
2545
 
2546
+ # Aggregate the columns of this DataFrame to their minimum value.
2547
+ #
2548
+ # @return [DataFrame]
2549
+ #
2550
+ # @example
2551
+ # df = Polars::DataFrame.new(
2552
+ # {
2553
+ # "foo" => [1, 2, 3],
2554
+ # "bar" => [6, 7, 8],
2555
+ # "ham" => ["a", "b", "c"]
2556
+ # }
2557
+ # )
2558
+ # df.min
2559
+ # # =>
2560
+ # # shape: (1, 3)
2561
+ # # ┌─────┬─────┬─────┐
2562
+ # # │ foo ┆ bar ┆ ham │
2563
+ # # │ --- ┆ --- ┆ --- │
2564
+ # # │ i64 ┆ i64 ┆ str │
2565
+ # # ╞═════╪═════╪═════╡
2566
+ # # │ 1 ┆ 6 ┆ a │
2567
+ # # └─────┴─────┴─────┘
1015
2568
  def min(axis: 0)
1016
2569
  if axis == 0
1017
2570
  _from_rbdf(_df.min)
@@ -1022,6 +2575,44 @@ module Polars
1022
2575
  end
1023
2576
  end
1024
2577
 
2578
+ # Aggregate the columns of this DataFrame to their sum value.
2579
+ #
2580
+ # @param axis [Integer]
2581
+ # Either 0 or 1.
2582
+ # @param null_strategy ["ignore", "propagate"]
2583
+ # This argument is only used if axis == 1.
2584
+ #
2585
+ # @return [DataFrame]
2586
+ #
2587
+ # @example
2588
+ # df = Polars::DataFrame.new(
2589
+ # {
2590
+ # "foo" => [1, 2, 3],
2591
+ # "bar" => [6, 7, 8],
2592
+ # "ham" => ["a", "b", "c"],
2593
+ # }
2594
+ # )
2595
+ # df.sum
2596
+ # # =>
2597
+ # # shape: (1, 3)
2598
+ # # ┌─────┬─────┬──────┐
2599
+ # # │ foo ┆ bar ┆ ham │
2600
+ # # │ --- ┆ --- ┆ --- │
2601
+ # # │ i64 ┆ i64 ┆ str │
2602
+ # # ╞═════╪═════╪══════╡
2603
+ # # │ 6 ┆ 21 ┆ null │
2604
+ # # └─────┴─────┴──────┘
2605
+ #
2606
+ # @example
2607
+ # df.sum(axis: 1)
2608
+ # # =>
2609
+ # # shape: (3,)
2610
+ # # Series: 'foo' [str]
2611
+ # # [
2612
+ # # "16a"
2613
+ # # "27b"
2614
+ # # "38c"
2615
+ # # ]
1025
2616
  def sum(axis: 0, null_strategy: "ignore")
1026
2617
  case axis
1027
2618
  when 0
@@ -1033,6 +2624,33 @@ module Polars
1033
2624
  end
1034
2625
  end
1035
2626
 
2627
+ # Aggregate the columns of this DataFrame to their mean value.
2628
+ #
2629
+ # @param axis [Integer]
2630
+ # Either 0 or 1.
2631
+ # @param null_strategy ["ignore", "propagate"]
2632
+ # This argument is only used if axis == 1.
2633
+ #
2634
+ # @return [DataFrame]
2635
+ #
2636
+ # @example
2637
+ # df = Polars::DataFrame.new(
2638
+ # {
2639
+ # "foo" => [1, 2, 3],
2640
+ # "bar" => [6, 7, 8],
2641
+ # "ham" => ["a", "b", "c"]
2642
+ # }
2643
+ # )
2644
+ # df.mean
2645
+ # # =>
2646
+ # # shape: (1, 3)
2647
+ # # ┌─────┬─────┬──────┐
2648
+ # # │ foo ┆ bar ┆ ham │
2649
+ # # │ --- ┆ --- ┆ --- │
2650
+ # # │ f64 ┆ f64 ┆ str │
2651
+ # # ╞═════╪═════╪══════╡
2652
+ # # │ 2.0 ┆ 7.0 ┆ null │
2653
+ # # └─────┴─────┴──────┘
1036
2654
  def mean(axis: 0, null_strategy: "ignore")
1037
2655
  case axis
1038
2656
  when 0
@@ -1044,77 +2662,633 @@ module Polars
1044
2662
  end
1045
2663
  end
1046
2664
 
2665
+ # Aggregate the columns of this DataFrame to their standard deviation value.
2666
+ #
2667
+ # @param ddof [Integer]
2668
+ # Degrees of freedom
2669
+ #
2670
+ # @return [DataFrame]
2671
+ #
2672
+ # @example
2673
+ # df = Polars::DataFrame.new(
2674
+ # {
2675
+ # "foo" => [1, 2, 3],
2676
+ # "bar" => [6, 7, 8],
2677
+ # "ham" => ["a", "b", "c"]
2678
+ # }
2679
+ # )
2680
+ # df.std
2681
+ # # =>
2682
+ # # shape: (1, 3)
2683
+ # # ┌─────┬─────┬──────┐
2684
+ # # │ foo ┆ bar ┆ ham │
2685
+ # # │ --- ┆ --- ┆ --- │
2686
+ # # │ f64 ┆ f64 ┆ str │
2687
+ # # ╞═════╪═════╪══════╡
2688
+ # # │ 1.0 ┆ 1.0 ┆ null │
2689
+ # # └─────┴─────┴──────┘
2690
+ #
2691
+ # @example
2692
+ # df.std(ddof: 0)
2693
+ # # =>
2694
+ # # shape: (1, 3)
2695
+ # # ┌──────────┬──────────┬──────┐
2696
+ # # │ foo ┆ bar ┆ ham │
2697
+ # # │ --- ┆ --- ┆ --- │
2698
+ # # │ f64 ┆ f64 ┆ str │
2699
+ # # ╞══════════╪══════════╪══════╡
2700
+ # # │ 0.816497 ┆ 0.816497 ┆ null │
2701
+ # # └──────────┴──────────┴──────┘
1047
2702
  def std(ddof: 1)
1048
2703
  _from_rbdf(_df.std(ddof))
1049
2704
  end
1050
2705
 
2706
+ # Aggregate the columns of this DataFrame to their variance value.
2707
+ #
2708
+ # @param ddof [Integer]
2709
+ # Degrees of freedom
2710
+ #
2711
+ # @return [DataFrame]
2712
+ #
2713
+ # @example
2714
+ # df = Polars::DataFrame.new(
2715
+ # {
2716
+ # "foo" => [1, 2, 3],
2717
+ # "bar" => [6, 7, 8],
2718
+ # "ham" => ["a", "b", "c"]
2719
+ # }
2720
+ # )
2721
+ # df.var
2722
+ # # =>
2723
+ # # shape: (1, 3)
2724
+ # # ┌─────┬─────┬──────┐
2725
+ # # │ foo ┆ bar ┆ ham │
2726
+ # # │ --- ┆ --- ┆ --- │
2727
+ # # │ f64 ┆ f64 ┆ str │
2728
+ # # ╞═════╪═════╪══════╡
2729
+ # # │ 1.0 ┆ 1.0 ┆ null │
2730
+ # # └─────┴─────┴──────┘
2731
+ #
2732
+ # @example
2733
+ # df.var(ddof: 0)
2734
+ # # =>
2735
+ # # shape: (1, 3)
2736
+ # # ┌──────────┬──────────┬──────┐
2737
+ # # │ foo ┆ bar ┆ ham │
2738
+ # # │ --- ┆ --- ┆ --- │
2739
+ # # │ f64 ┆ f64 ┆ str │
2740
+ # # ╞══════════╪══════════╪══════╡
2741
+ # # │ 0.666667 ┆ 0.666667 ┆ null │
2742
+ # # └──────────┴──────────┴──────┘
1051
2743
  def var(ddof: 1)
1052
2744
  _from_rbdf(_df.var(ddof))
1053
2745
  end
1054
2746
 
2747
+ # Aggregate the columns of this DataFrame to their median value.
2748
+ #
2749
+ # @return [DataFrame]
2750
+ #
2751
+ # @example
2752
+ # df = Polars::DataFrame.new(
2753
+ # {
2754
+ # "foo" => [1, 2, 3],
2755
+ # "bar" => [6, 7, 8],
2756
+ # "ham" => ["a", "b", "c"]
2757
+ # }
2758
+ # )
2759
+ # df.median
2760
+ # # =>
2761
+ # # shape: (1, 3)
2762
+ # # ┌─────┬─────┬──────┐
2763
+ # # │ foo ┆ bar ┆ ham │
2764
+ # # │ --- ┆ --- ┆ --- │
2765
+ # # │ f64 ┆ f64 ┆ str │
2766
+ # # ╞═════╪═════╪══════╡
2767
+ # # │ 2.0 ┆ 7.0 ┆ null │
2768
+ # # └─────┴─────┴──────┘
1055
2769
  def median
1056
2770
  _from_rbdf(_df.median)
1057
2771
  end
1058
2772
 
1059
- # def product
1060
- # end
2773
+ # Aggregate the columns of this DataFrame to their product values.
2774
+ #
2775
+ # @return [DataFrame]
2776
+ #
2777
+ # @example
2778
+ # df = Polars::DataFrame.new(
2779
+ # {
2780
+ # "a" => [1, 2, 3],
2781
+ # "b" => [0.5, 4, 10],
2782
+ # "c" => [true, true, false]
2783
+ # }
2784
+ # )
2785
+ # df.product
2786
+ # # =>
2787
+ # # shape: (1, 3)
2788
+ # # ┌─────┬──────┬─────┐
2789
+ # # │ a ┆ b ┆ c │
2790
+ # # │ --- ┆ --- ┆ --- │
2791
+ # # │ i64 ┆ f64 ┆ i64 │
2792
+ # # ╞═════╪══════╪═════╡
2793
+ # # │ 6 ┆ 20.0 ┆ 0 │
2794
+ # # └─────┴──────┴─────┘
2795
+ def product
2796
+ select(Polars.all.product)
2797
+ end
2798
+
2799
+ # Aggregate the columns of this DataFrame to their quantile value.
2800
+ #
2801
+ # @param quantile [Float]
2802
+ # Quantile between 0.0 and 1.0.
2803
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2804
+ # Interpolation method.
2805
+ #
2806
+ # @return [DataFrame]
2807
+ #
2808
+ # @example
2809
+ # df = Polars::DataFrame.new(
2810
+ # {
2811
+ # "foo" => [1, 2, 3],
2812
+ # "bar" => [6, 7, 8],
2813
+ # "ham" => ["a", "b", "c"]
2814
+ # }
2815
+ # )
2816
+ # df.quantile(0.5, interpolation: "nearest")
2817
+ # # =>
2818
+ # # shape: (1, 3)
2819
+ # # ┌─────┬─────┬──────┐
2820
+ # # │ foo ┆ bar ┆ ham │
2821
+ # # │ --- ┆ --- ┆ --- │
2822
+ # # │ f64 ┆ f64 ┆ str │
2823
+ # # ╞═════╪═════╪══════╡
2824
+ # # │ 2.0 ┆ 7.0 ┆ null │
2825
+ # # └─────┴─────┴──────┘
2826
+ def quantile(quantile, interpolation: "nearest")
2827
+ _from_rbdf(_df.quantile(quantile, interpolation))
2828
+ end
2829
+
2830
+ # Get one hot encoded dummy variables.
2831
+ #
2832
+ # @param columns
2833
+ # A subset of columns to convert to dummy variables. `nil` means
2834
+ # "all columns".
2835
+ #
2836
+ # @return [DataFrame]
2837
+ #
2838
+ # @example
2839
+ # df = Polars::DataFrame.new(
2840
+ # {
2841
+ # "foo" => [1, 2],
2842
+ # "bar" => [3, 4],
2843
+ # "ham" => ["a", "b"]
2844
+ # }
2845
+ # )
2846
+ # df.to_dummies
2847
+ # # =>
2848
+ # # shape: (2, 6)
2849
+ # # ┌───────┬───────┬───────┬───────┬───────┬───────┐
2850
+ # # │ foo_1 ┆ foo_2 ┆ bar_3 ┆ bar_4 ┆ ham_a ┆ ham_b │
2851
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
2852
+ # # │ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 │
2853
+ # # ╞═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡
2854
+ # # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
2855
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2856
+ # # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
2857
+ # # └───────┴───────┴───────┴───────┴───────┴───────┘
2858
+ def to_dummies(columns: nil)
2859
+ if columns.is_a?(String)
2860
+ columns = [columns]
2861
+ end
2862
+ _from_rbdf(_df.to_dummies(columns))
2863
+ end
1061
2864
 
1062
- # def quantile(quantile, interpolation: "nearest")
1063
- # end
2865
+ # Drop duplicate rows from this DataFrame.
2866
+ #
2867
+ # @param maintain_order [Boolean]
2868
+ # Keep the same order as the original DataFrame. This requires more work to
2869
+ # compute.
2870
+ # @param subset [Object]
2871
+ # Subset to use to compare rows.
2872
+ # @param keep ["first", "last"]
2873
+ # Which of the duplicate rows to keep (in conjunction with `subset`).
2874
+ #
2875
+ # @return [DataFrame]
2876
+ #
2877
+ # @note
2878
+ # Note that this fails if there is a column of type `List` in the DataFrame or
2879
+ # subset.
2880
+ #
2881
+ # @example
2882
+ # df = Polars::DataFrame.new(
2883
+ # {
2884
+ # "a" => [1, 1, 2, 3, 4, 5],
2885
+ # "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
2886
+ # "c" => [true, true, true, false, true, true]
2887
+ # }
2888
+ # )
2889
+ # df.unique
2890
+ # # =>
2891
+ # # shape: (5, 3)
2892
+ # # ┌─────┬─────┬───────┐
2893
+ # # │ a ┆ b ┆ c │
2894
+ # # │ --- ┆ --- ┆ --- │
2895
+ # # │ i64 ┆ f64 ┆ bool │
2896
+ # # ╞═════╪═════╪═══════╡
2897
+ # # │ 1 ┆ 0.5 ┆ true │
2898
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2899
+ # # │ 2 ┆ 1.0 ┆ true │
2900
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2901
+ # # │ 3 ┆ 2.0 ┆ false │
2902
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2903
+ # # │ 4 ┆ 3.0 ┆ true │
2904
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2905
+ # # │ 5 ┆ 3.0 ┆ true │
2906
+ # # └─────┴─────┴───────┘
2907
+ def unique(maintain_order: true, subset: nil, keep: "first")
2908
+ if !subset.nil?
2909
+ if subset.is_a?(String)
2910
+ subset = [subset]
2911
+ elsif !subset.is_a?(Array)
2912
+ subset = subset.to_a
2913
+ end
2914
+ end
1064
2915
 
1065
- # def to_dummies
1066
- # end
2916
+ _from_rbdf(_df.unique(maintain_order, subset, keep))
2917
+ end
1067
2918
 
1068
- # def unique
1069
- # end
2919
+ # Return the number of unique rows, or the number of unique row-subsets.
2920
+ #
2921
+ # @param subset [Object]
2922
+ # One or more columns/expressions that define what to count;
2923
+ # omit to return the count of unique rows.
2924
+ #
2925
+ # @return [DataFrame]
2926
+ #
2927
+ # @example
2928
+ # df = Polars::DataFrame.new(
2929
+ # {
2930
+ # "a" => [1, 1, 2, 3, 4, 5],
2931
+ # "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
2932
+ # "c" => [true, true, true, false, true, true]
2933
+ # }
2934
+ # )
2935
+ # df.n_unique
2936
+ # # => 5
2937
+ #
2938
+ # @example Simple columns subset
2939
+ # df.n_unique(subset: ["b", "c"])
2940
+ # # => 4
2941
+ #
2942
+ # @example Expression subset
2943
+ # df.n_unique(
2944
+ # subset: [
2945
+ # (Polars.col("a").floordiv(2)),
2946
+ # (Polars.col("c") | (Polars.col("b") >= 2))
2947
+ # ]
2948
+ # )
2949
+ # # => 3
2950
+ def n_unique(subset: nil)
2951
+ if subset.is_a?(StringIO)
2952
+ subset = [Polars.col(subset)]
2953
+ elsif subset.is_a?(Expr)
2954
+ subset = [subset]
2955
+ end
1070
2956
 
1071
- # def n_unique
1072
- # end
2957
+ if subset.is_a?(Array) && subset.length == 1
2958
+ expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
2959
+ else
2960
+ struct_fields = subset.nil? ? Polars.all : subset
2961
+ expr = Polars.struct(struct_fields)
2962
+ end
2963
+
2964
+ df = lazy.select(expr.n_unique).collect
2965
+ df.is_empty ? 0 : df.row(0)[0]
2966
+ end
2967
+
2968
+ # Rechunk the data in this DataFrame to a contiguous allocation.
1073
2969
 
2970
+ # This will make sure all subsequent operations have optimal and predictable
2971
+ # performance.
1074
2972
  #
2973
+ # @return [DataFrame]
1075
2974
  def rechunk
1076
2975
  _from_rbdf(_df.rechunk)
1077
2976
  end
1078
2977
 
2978
+ # Create a new DataFrame that shows the null counts per column.
2979
+ #
2980
+ # @return [DataFrame]
2981
+ #
2982
+ # @example
2983
+ # df = Polars::DataFrame.new(
2984
+ # {
2985
+ # "foo" => [1, nil, 3],
2986
+ # "bar" => [6, 7, nil],
2987
+ # "ham" => ["a", "b", "c"]
2988
+ # }
2989
+ # )
2990
+ # df.null_count
2991
+ # # =>
2992
+ # # shape: (1, 3)
2993
+ # # ┌─────┬─────┬─────┐
2994
+ # # │ foo ┆ bar ┆ ham │
2995
+ # # │ --- ┆ --- ┆ --- │
2996
+ # # │ u32 ┆ u32 ┆ u32 │
2997
+ # # ╞═════╪═════╪═════╡
2998
+ # # │ 1 ┆ 1 ┆ 0 │
2999
+ # # └─────┴─────┴─────┘
1079
3000
  def null_count
1080
3001
  _from_rbdf(_df.null_count)
1081
3002
  end
1082
3003
 
1083
- # def sample
1084
- # end
3004
+ # Sample from this DataFrame.
3005
+ #
3006
+ # @param n [Integer]
3007
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
3008
+ # `frac` is nil.
3009
+ # @param frac [Float]
3010
+ # Fraction of items to return. Cannot be used with `n`.
3011
+ # @param with_replacement [Boolean]
3012
+ # Allow values to be sampled more than once.
3013
+ # @param shuffle [Boolean]
3014
+ # Shuffle the order of sampled data points.
3015
+ # @param seed [Integer]
3016
+ # Seed for the random number generator. If set to nil (default), a random
3017
+ # seed is used.
3018
+ #
3019
+ # @return [DataFrame]
3020
+ #
3021
+ # @example
3022
+ # df = Polars::DataFrame.new(
3023
+ # {
3024
+ # "foo" => [1, 2, 3],
3025
+ # "bar" => [6, 7, 8],
3026
+ # "ham" => ["a", "b", "c"]
3027
+ # }
3028
+ # )
3029
+ # df.sample(n: 2, seed: 0)
3030
+ # # =>
3031
+ # # shape: (2, 3)
3032
+ # # ┌─────┬─────┬─────┐
3033
+ # # │ foo ┆ bar ┆ ham │
3034
+ # # │ --- ┆ --- ┆ --- │
3035
+ # # │ i64 ┆ i64 ┆ str │
3036
+ # # ╞═════╪═════╪═════╡
3037
+ # # │ 3 ┆ 8 ┆ c │
3038
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3039
+ # # │ 2 ┆ 7 ┆ b │
3040
+ # # └─────┴─────┴─────┘
3041
+ def sample(
3042
+ n: nil,
3043
+ frac: nil,
3044
+ with_replacement: false,
3045
+ shuffle: false,
3046
+ seed: nil
3047
+ )
3048
+ if !n.nil? && !frac.nil?
3049
+ raise ArgumentError, "cannot specify both `n` and `frac`"
3050
+ end
3051
+
3052
+ if n.nil? && !frac.nil?
3053
+ _from_rbdf(
3054
+ _df.sample_frac(frac, with_replacement, shuffle, seed)
3055
+ )
3056
+ end
3057
+
3058
+ if n.nil?
3059
+ n = 1
3060
+ end
3061
+ _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
3062
+ end
1085
3063
 
1086
3064
  # def fold
1087
3065
  # end
1088
3066
 
1089
- # def row
1090
- # end
3067
+ # Get a row as tuple, either by index or by predicate.
3068
+ #
3069
+ # @param index [Object]
3070
+ # Row index.
3071
+ # @param by_predicate [Object]
3072
+ # Select the row according to a given expression/predicate.
3073
+ #
3074
+ # @return [Object]
3075
+ #
3076
+ # @note
3077
+ # The `index` and `by_predicate` params are mutually exclusive. Additionally,
3078
+ # to ensure clarity, the `by_predicate` parameter must be supplied by keyword.
3079
+ #
3080
+ # When using `by_predicate` it is an error condition if anything other than
3081
+ # one row is returned; more than one row raises `TooManyRowsReturned`, and
3082
+ # zero rows will raise `NoRowsReturned` (both inherit from `RowsException`).
3083
+ #
3084
+ # @example Return the row at the given index
3085
+ # df = Polars::DataFrame.new(
3086
+ # {
3087
+ # "foo" => [1, 2, 3],
3088
+ # "bar" => [6, 7, 8],
3089
+ # "ham" => ["a", "b", "c"]
3090
+ # }
3091
+ # )
3092
+ # df.row(2)
3093
+ # # => [3, 8, "c"]
3094
+ #
3095
+ # @example Return the row that matches the given predicate
3096
+ # df.row(by_predicate: Polars.col("ham") == "b")
3097
+ # # => [2, 7, "b"]
3098
+ def row(index = nil, by_predicate: nil)
3099
+ if !index.nil? && !by_predicate.nil?
3100
+ raise ArgumentError, "Cannot set both 'index' and 'by_predicate'; mutually exclusive"
3101
+ elsif index.is_a?(Expr)
3102
+ raise TypeError, "Expressions should be passed to the 'by_predicate' param"
3103
+ elsif index.is_a?(Integer)
3104
+ _df.row_tuple(index)
3105
+ elsif by_predicate.is_a?(Expr)
3106
+ rows = filter(by_predicate).rows
3107
+ n_rows = rows.length
3108
+ if n_rows > 1
3109
+ raise TooManyRowsReturned, "Predicate #{by_predicate} returned #{n_rows} rows"
3110
+ elsif n_rows == 0
3111
+ raise NoRowsReturned, "Predicate <{by_predicate!s}> returned no rows"
3112
+ end
3113
+ rows[0]
3114
+ else
3115
+ raise ArgumentError, "One of 'index' or 'by_predicate' must be set"
3116
+ end
3117
+ end
1091
3118
 
1092
- # def rows
1093
- # end
3119
+ # Convert columnar data to rows as Ruby arrays.
3120
+ #
3121
+ # @return [Array]
3122
+ #
3123
+ # @example
3124
+ # df = Polars::DataFrame.new(
3125
+ # {
3126
+ # "a" => [1, 3, 5],
3127
+ # "b" => [2, 4, 6]
3128
+ # }
3129
+ # )
3130
+ # df.rows
3131
+ # # => [[1, 2], [3, 4], [5, 6]]
3132
+ def rows
3133
+ _df.row_tuples
3134
+ end
1094
3135
 
1095
- # def shrink_to_fit
1096
- # end
3136
+ # Shrink DataFrame memory usage.
3137
+ #
3138
+ # Shrinks to fit the exact capacity needed to hold the data.
3139
+ #
3140
+ # @return [DataFrame]
3141
+ def shrink_to_fit(in_place: false)
3142
+ if in_place
3143
+ _df.shrink_to_fit
3144
+ self
3145
+ else
3146
+ df = clone
3147
+ df._df.shrink_to_fit
3148
+ df
3149
+ end
3150
+ end
1097
3151
 
1098
- # def take_every
1099
- # end
3152
+ # Take every nth row in the DataFrame and return as a new DataFrame.
3153
+ #
3154
+ # @return [DataFrame]
3155
+ #
3156
+ # @example
3157
+ # s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
3158
+ # s.take_every(2)
3159
+ # # =>
3160
+ # # shape: (2, 2)
3161
+ # # ┌─────┬─────┐
3162
+ # # │ a ┆ b │
3163
+ # # │ --- ┆ --- │
3164
+ # # │ i64 ┆ i64 │
3165
+ # # ╞═════╪═════╡
3166
+ # # │ 1 ┆ 5 │
3167
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3168
+ # # │ 3 ┆ 7 │
3169
+ # # └─────┴─────┘
3170
+ def take_every(n)
3171
+ select(Utils.col("*").take_every(n))
3172
+ end
1100
3173
 
1101
3174
  # def hash_rows
1102
3175
  # end
1103
3176
 
1104
- # def interpolate
1105
- # end
1106
-
3177
+ # Interpolate intermediate values. The interpolation method is linear.
3178
+ #
3179
+ # @return [DataFrame]
3180
+ #
3181
+ # @example
3182
+ # df = Polars::DataFrame.new(
3183
+ # {
3184
+ # "foo" => [1, nil, 9, 10],
3185
+ # "bar" => [6, 7, 9, nil],
3186
+ # "baz" => [1, nil, nil, 9]
3187
+ # }
3188
+ # )
3189
+ # df.interpolate
3190
+ # # =>
3191
+ # # shape: (4, 3)
3192
+ # # ┌─────┬──────┬─────┐
3193
+ # # │ foo ┆ bar ┆ baz │
3194
+ # # │ --- ┆ --- ┆ --- │
3195
+ # # │ i64 ┆ i64 ┆ i64 │
3196
+ # # ╞═════╪══════╪═════╡
3197
+ # # │ 1 ┆ 6 ┆ 1 │
3198
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
3199
+ # # │ 5 ┆ 7 ┆ 3 │
3200
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
3201
+ # # │ 9 ┆ 9 ┆ 6 │
3202
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
3203
+ # # │ 10 ┆ null ┆ 9 │
3204
+ # # └─────┴──────┴─────┘
3205
+ def interpolate
3206
+ select(Utils.col("*").interpolate)
3207
+ end
3208
+
3209
+ # Check if the dataframe is empty.
3210
+ #
3211
+ # @return [Boolean]
1107
3212
  #
3213
+ # @example
3214
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
3215
+ # df.is_empty
3216
+ # # => false
3217
+ # df.filter(Polars.col("foo") > 99).is_empty
3218
+ # # => true
1108
3219
  def is_empty
1109
3220
  height == 0
1110
3221
  end
1111
3222
  alias_method :empty?, :is_empty
1112
3223
 
1113
- # def to_struct(name)
1114
- # end
3224
+ # Convert a `DataFrame` to a `Series` of type `Struct`.
3225
+ #
3226
+ # @param name [String]
3227
+ # Name for the struct Series
3228
+ #
3229
+ # @return [Series]
3230
+ #
3231
+ # @example
3232
+ # df = Polars::DataFrame.new(
3233
+ # {
3234
+ # "a" => [1, 2, 3, 4, 5],
3235
+ # "b" => ["one", "two", "three", "four", "five"]
3236
+ # }
3237
+ # )
3238
+ # df.to_struct("nums")
3239
+ # # =>
3240
+ # # shape: (5,)
3241
+ # # Series: 'nums' [struct[2]]
3242
+ # # [
3243
+ # # {1,"one"}
3244
+ # # {2,"two"}
3245
+ # # {3,"three"}
3246
+ # # {4,"four"}
3247
+ # # {5,"five"}
3248
+ # # ]
3249
+ def to_struct(name)
3250
+ Utils.wrap_s(_df.to_struct(name))
3251
+ end
1115
3252
 
1116
- # def unnest
1117
- # end
3253
+ # Decompose a struct into its fields.
3254
+ #
3255
+ # The fields will be inserted into the `DataFrame` on the location of the
3256
+ # `struct` type.
3257
+ #
3258
+ # @param names [Object]
3259
+ # Names of the struct columns that will be decomposed by its fields
3260
+ #
3261
+ # @return [DataFrame]
3262
+ #
3263
+ # @example
3264
+ # df = Polars::DataFrame.new(
3265
+ # {
3266
+ # "before" => ["foo", "bar"],
3267
+ # "t_a" => [1, 2],
3268
+ # "t_b" => ["a", "b"],
3269
+ # "t_c" => [true, nil],
3270
+ # "t_d" => [[1, 2], [3]],
3271
+ # "after" => ["baz", "womp"]
3272
+ # }
3273
+ # ).select(["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"])
3274
+ # df.unnest("t_struct")
3275
+ # # =>
3276
+ # # shape: (2, 6)
3277
+ # # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
3278
+ # # │ before ┆ t_a ┆ t_b ┆ t_c ┆ t_d ┆ after │
3279
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3280
+ # # │ str ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str │
3281
+ # # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
3282
+ # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
3283
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3284
+ # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
3285
+ # # └────────┴─────┴─────┴──────┴───────────┴───────┘
3286
+ def unnest(names)
3287
+ if names.is_a?(String)
3288
+ names = [names]
3289
+ end
3290
+ _from_rbdf(_df.unnest(names))
3291
+ end
1118
3292
 
1119
3293
  private
1120
3294
 
@@ -1127,7 +3301,7 @@ module Polars
1127
3301
  if !columns.nil?
1128
3302
  columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
1129
3303
 
1130
- if !data && dtypes
3304
+ if data.empty? && dtypes
1131
3305
  data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
1132
3306
  else
1133
3307
  data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
@@ -1147,7 +3321,7 @@ module Polars
1147
3321
  if columns.nil?
1148
3322
  data
1149
3323
  else
1150
- if !data
3324
+ if data.empty?
1151
3325
  columns.map { |c| Series.new(c, nil)._s }
1152
3326
  elsif data.length == columns.length
1153
3327
  columns.each_with_index do |c, i|
@@ -1182,5 +3356,75 @@ module Polars
1182
3356
  def _from_rbdf(rb_df)
1183
3357
  self.class._from_rbdf(rb_df)
1184
3358
  end
3359
+
3360
+ def _comp(other, op)
3361
+ if other.is_a?(DataFrame)
3362
+ _compare_to_other_df(other, op)
3363
+ else
3364
+ _compare_to_non_df(other, op)
3365
+ end
3366
+ end
3367
+
3368
+ def _compare_to_other_df(other, op)
3369
+ if columns != other.columns
3370
+ raise ArgmentError, "DataFrame columns do not match"
3371
+ end
3372
+ if shape != other.shape
3373
+ raise ArgmentError, "DataFrame dimensions do not match"
3374
+ end
3375
+
3376
+ suffix = "__POLARS_CMP_OTHER"
3377
+ other_renamed = other.select(Polars.all.suffix(suffix))
3378
+ combined = Polars.concat([self, other_renamed], how: "horizontal")
3379
+
3380
+ expr = case op
3381
+ when "eq"
3382
+ columns.map { |n| Polars.col(n) == Polars.col("#{n}#{suffix}") }
3383
+ when "neq"
3384
+ columns.map { |n| Polars.col(n) != Polars.col("#{n}#{suffix}") }
3385
+ when "gt"
3386
+ columns.map { |n| Polars.col(n) > Polars.col("#{n}#{suffix}") }
3387
+ when "lt"
3388
+ columns.map { |n| Polars.col(n) < Polars.col("#{n}#{suffix}") }
3389
+ when "gt_eq"
3390
+ columns.map { |n| Polars.col(n) >= Polars.col("#{n}#{suffix}") }
3391
+ when "lt_eq"
3392
+ columns.map { |n| Polars.col(n) <= Polars.col("#{n}#{suffix}") }
3393
+ else
3394
+ raise ArgumentError, "got unexpected comparison operator: #{op}"
3395
+ end
3396
+
3397
+ combined.select(expr)
3398
+ end
3399
+
3400
+ def _compare_to_non_df(other, op)
3401
+ case op
3402
+ when "eq"
3403
+ select(Polars.all == other)
3404
+ when "neq"
3405
+ select(Polars.all != other)
3406
+ when "gt"
3407
+ select(Polars.all > other)
3408
+ when "lt"
3409
+ select(Polars.all < other)
3410
+ when "gt_eq"
3411
+ select(Polars.all >= other)
3412
+ when "lt_eq"
3413
+ select(Polars.all <= other)
3414
+ else
3415
+ raise ArgumentError, "got unexpected comparison operator: #{op}"
3416
+ end
3417
+ end
3418
+
3419
+ def _prepare_other_arg(other)
3420
+ if !other.is_a?(Series)
3421
+ if other.is_a?(Array)
3422
+ raise ArgumentError, "Operation not supported."
3423
+ end
3424
+
3425
+ other = Series.new("", [other])
3426
+ end
3427
+ other
3428
+ end
1185
3429
  end
1186
3430
  end