polars-df 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -155,12 +155,35 @@ module Polars
155
155
  end
156
156
 
157
157
  # @private
158
- def self._read_parquet(file)
158
+ def self._read_parquet(
159
+ file,
160
+ columns: nil,
161
+ n_rows: nil,
162
+ parallel: "auto",
163
+ row_count_name: nil,
164
+ row_count_offset: 0,
165
+ low_memory: false
166
+ )
159
167
  if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
160
168
  file = Utils.format_path(file)
161
169
  end
162
170
 
163
- _from_rbdf(RbDataFrame.read_parquet(file))
171
+ if file.is_a?(String) && file.include?("*")
172
+ raise Todo
173
+ end
174
+
175
+ projection, columns = Utils.handle_projection_columns(columns)
176
+ _from_rbdf(
177
+ RbDataFrame.read_parquet(
178
+ file,
179
+ columns,
180
+ projection,
181
+ n_rows,
182
+ parallel,
183
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
184
+ low_memory
185
+ )
186
+ )
164
187
  end
165
188
 
166
189
  # def self._read_avro
@@ -259,11 +282,13 @@ module Polars
259
282
  # @return [Array]
260
283
  #
261
284
  # @example
262
- # df = Polars::DataFrame.new({
263
- # "foo" => [1, 2, 3],
264
- # "bar" => [6, 7, 8],
265
- # "ham" => ["a", "b", "c"]
266
- # })
285
+ # df = Polars::DataFrame.new(
286
+ # {
287
+ # "foo" => [1, 2, 3],
288
+ # "bar" => [6, 7, 8],
289
+ # "ham" => ["a", "b", "c"]
290
+ # }
291
+ # )
267
292
  # df.columns
268
293
  # # => ["foo", "bar", "ham"]
269
294
  def columns
@@ -279,11 +304,13 @@ module Polars
279
304
  # @return [Object]
280
305
  #
281
306
  # @example
282
- # df = Polars::DataFrame.new({
283
- # "foo" => [1, 2, 3],
284
- # "bar" => [6, 7, 8],
285
- # "ham" => ["a", "b", "c"]
286
- # })
307
+ # df = Polars::DataFrame.new(
308
+ # {
309
+ # "foo" => [1, 2, 3],
310
+ # "bar" => [6, 7, 8],
311
+ # "ham" => ["a", "b", "c"]
312
+ # }
313
+ # )
287
314
  # df.columns = ["apple", "banana", "orange"]
288
315
  # df
289
316
  # # =>
@@ -308,11 +335,13 @@ module Polars
308
335
  # @return [Array]
309
336
  #
310
337
  # @example
311
- # df = Polars::DataFrame.new({
312
- # "foo" => [1, 2, 3],
313
- # "bar" => [6.0, 7.0, 8.0],
314
- # "ham" => ["a", "b", "c"]
315
- # })
338
+ # df = Polars::DataFrame.new(
339
+ # {
340
+ # "foo" => [1, 2, 3],
341
+ # "bar" => [6.0, 7.0, 8.0],
342
+ # "ham" => ["a", "b", "c"]
343
+ # }
344
+ # )
316
345
  # df.dtypes
317
346
  # # => [:i64, :f64, :str]
318
347
  def dtypes
@@ -324,56 +353,132 @@ module Polars
324
353
  # @return [Hash]
325
354
  #
326
355
  # @example
327
- # df = Polars::DataFrame.new({
328
- # "foo" => [1, 2, 3],
329
- # "bar" => [6.0, 7.0, 8.0],
330
- # "ham" => ["a", "b", "c"]
331
- # })
356
+ # df = Polars::DataFrame.new(
357
+ # {
358
+ # "foo" => [1, 2, 3],
359
+ # "bar" => [6.0, 7.0, 8.0],
360
+ # "ham" => ["a", "b", "c"]
361
+ # }
362
+ # )
332
363
  # df.schema
333
364
  # # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
334
365
  def schema
335
366
  columns.zip(dtypes).to_h
336
367
  end
337
368
 
338
- # def ==(other)
339
- # end
369
+ # Equal.
370
+ #
371
+ # @return [DataFrame]
372
+ def ==(other)
373
+ _comp(other, "eq")
374
+ end
340
375
 
341
- # def !=(other)
342
- # end
376
+ # Not equal.
377
+ #
378
+ # @return [DataFrame]
379
+ def !=(other)
380
+ _comp(other, "neq")
381
+ end
343
382
 
344
- # def >(other)
345
- # end
383
+ # Greater than.
384
+ #
385
+ # @return [DataFrame]
386
+ def >(other)
387
+ _comp(other, "gt")
388
+ end
346
389
 
347
- # def <(other)
348
- # end
390
+ # Less than.
391
+ #
392
+ # @return [DataFrame]
393
+ def <(other)
394
+ _comp(other, "lt")
395
+ end
349
396
 
350
- # def >=(other)
351
- # end
397
+ # Greater than or equal.
398
+ #
399
+ # @return [DataFrame]
400
+ def >=(other)
401
+ _comp(other, "gt_eq")
402
+ end
352
403
 
353
- # def <=(other)
354
- # end
404
+ # Less than or equal.
405
+ #
406
+ # @return [DataFrame]
407
+ def <=(other)
408
+ _comp(other, "lt_eq")
409
+ end
355
410
 
356
- # def *(other)
357
- # end
411
+ # Performs multiplication.
412
+ #
413
+ # @return [DataFrame]
414
+ def *(other)
415
+ if other.is_a?(DataFrame)
416
+ return _from_rbdf(_df.mul_df(other._df))
417
+ end
358
418
 
359
- # def /(other)
360
- # end
419
+ other = _prepare_other_arg(other)
420
+ _from_rbdf(_df.mul(other._s))
421
+ end
361
422
 
362
- # def +(other)
363
- # end
423
+ # Performs division.
424
+ #
425
+ # @return [DataFrame]
426
+ def /(other)
427
+ if other.is_a?(DataFrame)
428
+ return _from_rbdf(_df.div_df(other._df))
429
+ end
364
430
 
365
- # def -(other)
366
- # end
431
+ other = _prepare_other_arg(other)
432
+ _from_rbdf(_df.div(other._s))
433
+ end
367
434
 
368
- # def %(other)
369
- # end
435
+ # Performs addition.
436
+ #
437
+ # @return [DataFrame]
438
+ def +(other)
439
+ if other.is_a?(DataFrame)
440
+ return _from_rbdf(_df.add_df(other._df))
441
+ end
442
+
443
+ other = _prepare_other_arg(other)
444
+ _from_rbdf(_df.add(other._s))
445
+ end
446
+
447
+ # Performs subtraction.
448
+ #
449
+ # @return [DataFrame]
450
+ def -(other)
451
+ if other.is_a?(DataFrame)
452
+ return _from_rbdf(_df.sub_df(other._df))
453
+ end
454
+
455
+ other = _prepare_other_arg(other)
456
+ _from_rbdf(_df.sub(other._s))
457
+ end
458
+
459
+ # Returns the modulo.
460
+ #
461
+ # @return [DataFrame]
462
+ def %(other)
463
+ if other.is_a?(DataFrame)
464
+ return _from_rbdf(_df.rem_df(other._df))
465
+ end
466
+
467
+ other = _prepare_other_arg(other)
468
+ _from_rbdf(_df.rem(other._s))
469
+ end
370
470
 
471
+ # Returns a string representing the DataFrame.
371
472
  #
473
+ # @return [String]
372
474
  def to_s
373
475
  _df.to_s
374
476
  end
375
477
  alias_method :inspect, :to_s
376
478
 
479
+ # Check if DataFrame includes column.
480
+ #
481
+ # @return [Boolean]
377
482
  def include?(name)
378
483
  columns.include?(name)
379
484
  end
@@ -387,9 +492,78 @@ module Polars
387
492
  # def _pos_idxs
388
493
  # end
389
494
 
495
+ # Returns subset of the DataFrame.
390
496
  #
391
- def [](name)
392
- Utils.wrap_s(_df.column(name))
497
+ # @return [Object]
498
+ def [](*args)
499
+ if args.size == 2
500
+ row_selection, col_selection = args
501
+
502
+ # df[.., unknown]
503
+ if row_selection.is_a?(Range)
504
+
505
+ # multiple slices
506
+ # df[.., ..]
507
+ if col_selection.is_a?(Range)
508
+ raise Todo
509
+ end
510
+ end
511
+
512
+ # df[2, ..] (select row as df)
513
+ if row_selection.is_a?(Integer)
514
+ if col_selection.is_a?(Array)
515
+ df = self[0.., col_selection]
516
+ return df.slice(row_selection, 1)
517
+ end
518
+ # df[2, "a"]
519
+ if col_selection.is_a?(String)
520
+ return self[col_selection][row_selection]
521
+ end
522
+ end
523
+
524
+ # column selection can be "a" and ["a", "b"]
525
+ if col_selection.is_a?(String)
526
+ col_selection = [col_selection]
527
+ end
528
+
529
+ # df[.., 1]
530
+ if col_selection.is_a?(Integer)
531
+ series = to_series(col_selection)
532
+ return series[row_selection]
533
+ end
534
+
535
+ if col_selection.is_a?(Array)
536
+ # df[.., [1, 2]]
537
+ if is_int_sequence(col_selection)
538
+ series_list = col_selection.map { |i| to_series(i) }
539
+ df = self.class.new(series_list)
540
+ return df[row_selection]
541
+ end
542
+ end
543
+
544
+ df = self[col_selection]
545
+ return df[row_selection]
546
+ elsif args.size == 1
547
+ item = args[0]
548
+
549
+ # select single column
550
+ # df["foo"]
551
+ if item.is_a?(String)
552
+ return Utils.wrap_s(_df.column(item))
553
+ end
554
+
555
+ # df[idx]
556
+ if item.is_a?(Integer)
557
+ return slice(_pos_idx(item, dim: 0), 1)
558
+ end
559
+
560
+ # df[..]
561
+ if item.is_a?(Range)
562
+ return Slice.new(self).apply(item)
563
+ end
564
+ end
565
+
566
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
393
567
  end
394
568
 
395
569
  # def []=(key, value)
@@ -397,7 +571,9 @@ module Polars
397
571
 
398
572
  # no to_arrow
399
573
 
574
+ # Convert DataFrame to a hash mapping column name to values.
400
575
  #
576
+ # @return [Hash]
401
577
  def to_h(as_series: true)
402
578
  if as_series
403
579
  get_columns.to_h { |s| [s.name, s] }
@@ -422,11 +598,13 @@ module Polars
422
598
  # @return [Series]
423
599
  #
424
600
  # @example
425
- # df = Polars::DataFrame.new({
426
- # "foo" => [1, 2, 3],
427
- # "bar" => [6, 7, 8],
428
- # "ham" => ["a", "b", "c"]
429
- # })
601
+ # df = Polars::DataFrame.new(
602
+ # {
603
+ # "foo" => [1, 2, 3],
604
+ # "bar" => [6, 7, 8],
605
+ # "ham" => ["a", "b", "c"]
606
+ # }
607
+ # )
430
608
  # df.to_series(1)
431
609
  # # =>
432
610
  # # shape: (3,)
@@ -519,11 +697,13 @@ module Polars
519
697
  # @return [String, nil]
520
698
  #
521
699
  # @example
522
- # df = Polars::DataFrame.new({
523
- # "foo" => [1, 2, 3, 4, 5],
524
- # "bar" => [6, 7, 8, 9, 10],
525
- # "ham" => ["a", "b", "c", "d", "e"]
526
- # })
700
+ # df = Polars::DataFrame.new(
701
+ # {
702
+ # "foo" => [1, 2, 3, 4, 5],
703
+ # "bar" => [6, 7, 8, 9, 10],
704
+ # "ham" => ["a", "b", "c", "d", "e"]
705
+ # }
706
+ # )
527
707
  # df.write_csv("file.csv")
528
708
  def write_csv(
529
709
  file = nil,
@@ -694,11 +874,13 @@ module Polars
694
874
  # @return [DataFrame]
695
875
  #
696
876
  # @example
697
- # df = Polars::DataFrame.new({
698
- # "key" => ["a", "b", "c"],
699
- # "val" => [1, 2, 3]
700
- # })
701
- # df.reverse()
877
+ # df = Polars::DataFrame.new(
878
+ # {
879
+ # "key" => ["a", "b", "c"],
880
+ # "val" => [1, 2, 3]
881
+ # }
882
+ # )
883
+ # df.reverse
702
884
  # # =>
703
885
  # # shape: (3, 2)
704
886
  # # ┌─────┬─────┐
@@ -724,11 +906,13 @@ module Polars
724
906
  # @return [DataFrame]
725
907
  #
726
908
  # @example
727
- # df = Polars::DataFrame.new({
728
- # "foo" => [1, 2, 3],
729
- # "bar" => [6, 7, 8],
730
- # "ham" => ["a", "b", "c"]
731
- # })
909
+ # df = Polars::DataFrame.new(
910
+ # {
911
+ # "foo" => [1, 2, 3],
912
+ # "bar" => [6, 7, 8],
913
+ # "ham" => ["a", "b", "c"]
914
+ # }
915
+ # )
732
916
  # df.rename({"foo" => "apple"})
733
917
  # # =>
734
918
  # # shape: (3, 3)
@@ -775,11 +959,13 @@ module Polars
775
959
  # # └─────┴─────┴─────┘
776
960
  #
777
961
  # @example
778
- # df = Polars::DataFrame.new({
779
- # "a" => [1, 2, 3, 4],
780
- # "b" => [0.5, 4, 10, 13],
781
- # "c" => [true, true, false, true]
782
- # })
962
+ # df = Polars::DataFrame.new(
963
+ # {
964
+ # "a" => [1, 2, 3, 4],
965
+ # "b" => [0.5, 4, 10, 13],
966
+ # "c" => [true, true, false, true]
967
+ # }
968
+ # )
783
969
  # s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
784
970
  # df.insert_at_idx(3, s)
785
971
  # # =>
@@ -805,63 +991,560 @@ module Polars
805
991
  self
806
992
  end
807
993
 
994
+ # Filter the rows in the DataFrame based on a predicate expression.
995
+ #
996
+ # @param predicate [Expr]
997
+ # Expression that evaluates to a boolean Series.
998
+ #
999
+ # @return [DataFrame]
1000
+ #
1001
+ # @example Filter on one condition:
1002
+ # df = Polars::DataFrame.new(
1003
+ # {
1004
+ # "foo" => [1, 2, 3],
1005
+ # "bar" => [6, 7, 8],
1006
+ # "ham" => ["a", "b", "c"]
1007
+ # }
1008
+ # )
1009
+ # df.filter(Polars.col("foo") < 3)
1010
+ # # =>
1011
+ # # shape: (2, 3)
1012
+ # # ┌─────┬─────┬─────┐
1013
+ # # │ foo ┆ bar ┆ ham │
1014
+ # # │ --- ┆ --- ┆ --- │
1015
+ # # │ i64 ┆ i64 ┆ str │
1016
+ # # ╞═════╪═════╪═════╡
1017
+ # # │ 1 ┆ 6 ┆ a │
1018
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1019
+ # # │ 2 ┆ 7 ┆ b │
1020
+ # # └─────┴─────┴─────┘
1021
+ #
1022
+ # @example Filter on multiple conditions:
1023
+ # df.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a"))
1024
+ # # =>
1025
+ # # shape: (1, 3)
1026
+ # # ┌─────┬─────┬─────┐
1027
+ # # │ foo ┆ bar ┆ ham │
1028
+ # # │ --- ┆ --- ┆ --- │
1029
+ # # │ i64 ┆ i64 ┆ str │
1030
+ # # ╞═════╪═════╪═════╡
1031
+ # # │ 1 ┆ 6 ┆ a │
1032
+ # # └─────┴─────┴─────┘
808
1033
  def filter(predicate)
809
1034
  lazy.filter(predicate).collect
810
1035
  end
811
1036
 
812
- # def describe
813
- # end
1037
+ # Summary statistics for a DataFrame.
1038
+ #
1039
+ # @return [DataFrame]
1040
+ #
1041
+ # @example
1042
+ # df = Polars::DataFrame.new(
1043
+ # {
1044
+ # "a" => [1.0, 2.8, 3.0],
1045
+ # "b" => [4, 5, nil],
1046
+ # "c" => [true, false, true],
1047
+ # "d" => [nil, "b", "c"],
1048
+ # "e" => ["usd", "eur", nil]
1049
+ # }
1050
+ # )
1051
+ # df.describe
1052
+ # # =>
1053
+ # # shape: (7, 6)
1054
+ # # ┌────────────┬──────────┬──────────┬──────┬──────┬──────┐
1055
+ # # │ describe ┆ a ┆ b ┆ c ┆ d ┆ e │
1056
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1057
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ str │
1058
+ # # ╞════════════╪══════════╪══════════╪══════╪══════╪══════╡
1059
+ # # │ count ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3 ┆ 3 │
1060
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1061
+ # # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0 ┆ 1 ┆ 1 │
1062
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1063
+ # # │ mean ┆ 2.266667 ┆ 4.5 ┆ null ┆ null ┆ null │
1064
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1065
+ # # │ std ┆ 1.101514 ┆ 0.707107 ┆ null ┆ null ┆ null │
1066
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1067
+ # # │ min ┆ 1.0 ┆ 4.0 ┆ 0.0 ┆ b ┆ eur │
1068
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1069
+ # # │ max ┆ 3.0 ┆ 5.0 ┆ 1.0 ┆ c ┆ usd │
1070
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1071
+ # # │ median ┆ 2.8 ┆ 4.5 ┆ null ┆ null ┆ null │
1072
+ # # └────────────┴──────────┴──────────┴──────┴──────┴──────┘
1073
+ def describe
1074
+ describe_cast = lambda do |stat|
1075
+ columns = []
1076
+ self.columns.each_with_index do |s, i|
1077
+ if self[s].is_numeric || self[s].is_boolean
1078
+ columns << stat[0.., i].cast(:f64)
1079
+ else
1080
+ # for dates, strings, etc, we cast to string so that all
1081
+ # statistics can be shown
1082
+ columns << stat[0.., i].cast(:str)
1083
+ end
1084
+ end
1085
+ self.class.new(columns)
1086
+ end
814
1087
 
815
- # def find_idx_by_name
816
- # end
1088
+ summary = _from_rbdf(
1089
+ Polars.concat(
1090
+ [
1091
+ describe_cast.(
1092
+ self.class.new(columns.to_h { |c| [c, [height]] })
1093
+ ),
1094
+ describe_cast.(null_count),
1095
+ describe_cast.(mean),
1096
+ describe_cast.(std),
1097
+ describe_cast.(min),
1098
+ describe_cast.(max),
1099
+ describe_cast.(median)
1100
+ ]
1101
+ )._df
1102
+ )
1103
+ summary.insert_at_idx(
1104
+ 0,
1105
+ Polars::Series.new(
1106
+ "describe",
1107
+ ["count", "null_count", "mean", "std", "min", "max", "median"],
1108
+ )
1109
+ )
1110
+ summary
1111
+ end
817
1112
 
818
- # def replace_at_idx
819
- # end
1113
+ # Find the index of a column by name.
1114
+ #
1115
+ # @param name [String]
1116
+ # Name of the column to find.
1117
+ #
1118
+ # @return [Series]
1119
+ #
1120
+ # @example
1121
+ # df = Polars::DataFrame.new(
1122
+ # {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
1123
+ # )
1124
+ # df.find_idx_by_name("ham")
1125
+ # # => 2
1126
+ def find_idx_by_name(name)
1127
+ _df.find_idx_by_name(name)
1128
+ end
1129
+
1130
+ # Replace a column at an index location.
1131
+ #
1132
+ # @param index [Integer]
1133
+ # Column index.
1134
+ # @param series [Series]
1135
+ # Series that will replace the column.
1136
+ #
1137
+ # @return [DataFrame]
1138
+ #
1139
+ # @example
1140
+ # df = Polars::DataFrame.new(
1141
+ # {
1142
+ # "foo" => [1, 2, 3],
1143
+ # "bar" => [6, 7, 8],
1144
+ # "ham" => ["a", "b", "c"]
1145
+ # }
1146
+ # )
1147
+ # s = Polars::Series.new("apple", [10, 20, 30])
1148
+ # df.replace_at_idx(0, s)
1149
+ # # =>
1150
+ # # shape: (3, 3)
1151
+ # # ┌───────┬─────┬─────┐
1152
+ # # │ apple ┆ bar ┆ ham │
1153
+ # # │ --- ┆ --- ┆ --- │
1154
+ # # │ i64 ┆ i64 ┆ str │
1155
+ # # ╞═══════╪═════╪═════╡
1156
+ # # │ 10 ┆ 6 ┆ a │
1157
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1158
+ # # │ 20 ┆ 7 ┆ b │
1159
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1160
+ # # │ 30 ┆ 8 ┆ c │
1161
+ # # └───────┴─────┴─────┘
1162
+ def replace_at_idx(index, series)
1163
+ if index < 0
1164
+ index = columns.length + index
1165
+ end
1166
+ _df.replace_at_idx(index, series._s)
1167
+ self
1168
+ end
820
1169
 
1170
+ # Sort the DataFrame by column.
1171
+ #
1172
+ # @param by [String]
1173
+ # By which column to sort.
1174
+ # @param reverse [Boolean]
1175
+ # Reverse/descending sort.
1176
+ # @param nulls_last [Boolean]
1177
+ # Place null values last. Can only be used if sorted by a single column.
1178
+ #
1179
+ # @return [DataFrame]
1180
+ #
1181
+ # @example
1182
+ # df = Polars::DataFrame.new(
1183
+ # {
1184
+ # "foo" => [1, 2, 3],
1185
+ # "bar" => [6.0, 7.0, 8.0],
1186
+ # "ham" => ["a", "b", "c"]
1187
+ # }
1188
+ # )
1189
+ # df.sort("foo", reverse: true)
1190
+ # # =>
1191
+ # # shape: (3, 3)
1192
+ # # ┌─────┬─────┬─────┐
1193
+ # # │ foo ┆ bar ┆ ham │
1194
+ # # │ --- ┆ --- ┆ --- │
1195
+ # # │ i64 ┆ f64 ┆ str │
1196
+ # # ╞═════╪═════╪═════╡
1197
+ # # │ 3 ┆ 8.0 ┆ c │
1198
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1199
+ # # │ 2 ┆ 7.0 ┆ b │
1200
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1201
+ # # │ 1 ┆ 6.0 ┆ a │
1202
+ # # └─────┴─────┴─────┘
821
1203
  #
1204
+ # @example Sort by multiple columns.
1205
+ # df.sort(
1206
+ # [Polars.col("foo"), Polars.col("bar")**2],
1207
+ # reverse: [true, false]
1208
+ # )
1209
+ # # =>
1210
+ # # shape: (3, 3)
1211
+ # # ┌─────┬─────┬─────┐
1212
+ # # │ foo ┆ bar ┆ ham │
1213
+ # # │ --- ┆ --- ┆ --- │
1214
+ # # │ i64 ┆ f64 ┆ str │
1215
+ # # ╞═════╪═════╪═════╡
1216
+ # # │ 3 ┆ 8.0 ┆ c │
1217
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1218
+ # # │ 2 ┆ 7.0 ┆ b │
1219
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1220
+ # # │ 1 ┆ 6.0 ┆ a │
1221
+ # # └─────┴─────┴─────┘
822
1222
  def sort(by, reverse: false, nulls_last: false)
823
- _from_rbdf(_df.sort(by, reverse, nulls_last))
1223
+ if by.is_a?(Array) || by.is_a?(Expr)
1224
+ lazy
1225
+ .sort(by, reverse: reverse, nulls_last: nulls_last)
1226
+ .collect(no_optimization: true, string_cache: false)
1227
+ else
1228
+ _from_rbdf(_df.sort(by, reverse, nulls_last))
1229
+ end
824
1230
  end
825
1231
 
1232
+ # Check if DataFrame is equal to other.
1233
+ #
1234
+ # @param other [DataFrame]
1235
+ # DataFrame to compare with.
1236
+ # @param null_equal [Boolean]
1237
+ # Consider null values as equal.
1238
+ #
1239
+ # @return [Boolean]
1240
+ #
1241
+ # @example
1242
+ # df1 = Polars::DataFrame.new(
1243
+ # {
1244
+ # "foo" => [1, 2, 3],
1245
+ # "bar" => [6.0, 7.0, 8.0],
1246
+ # "ham" => ["a", "b", "c"]
1247
+ # }
1248
+ # )
1249
+ # df2 = Polars::DataFrame.new(
1250
+ # {
1251
+ # "foo" => [3, 2, 1],
1252
+ # "bar" => [8.0, 7.0, 6.0],
1253
+ # "ham" => ["c", "b", "a"]
1254
+ # }
1255
+ # )
1256
+ # df1.frame_equal(df1)
1257
+ # # => true
1258
+ # df1.frame_equal(df2)
1259
+ # # => false
826
1260
  def frame_equal(other, null_equal: true)
827
1261
  _df.frame_equal(other._df, null_equal)
828
1262
  end
829
1263
 
830
- # def replace
831
- # end
832
-
1264
+ # Replace a column by a new Series.
833
1265
  #
834
- def slice(offset, length = nil)
835
- if !length.nil? && length < 0
836
- length = height - offset + length
837
- end
838
- _from_rbdf(_df.slice(offset, length))
1266
+ # @param column [String]
1267
+ # Column to replace.
1268
+ # @param new_col [Series]
1269
+ # New column to insert.
1270
+ #
1271
+ # @return [DataFrame]
1272
+ #
1273
+ # @example
1274
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1275
+ # s = Polars::Series.new([10, 20, 30])
1276
+ # df.replace("foo", s)
1277
+ # # =>
1278
+ # # shape: (3, 2)
1279
+ # # ┌─────┬─────┐
1280
+ # # │ foo ┆ bar │
1281
+ # # │ --- ┆ --- │
1282
+ # # │ i64 ┆ i64 │
1283
+ # # ╞═════╪═════╡
1284
+ # # │ 10 ┆ 4 │
1285
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1286
+ # # │ 20 ┆ 5 │
1287
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1288
+ # # │ 30 ┆ 6 │
1289
+ # # └─────┴─────┘
1290
+ def replace(column, new_col)
1291
+ _df.replace(column, new_col._s)
1292
+ self
839
1293
  end
840
1294
 
1295
+ # Get a slice of this DataFrame.
1296
+ #
1297
+ # @param offset [Integer]
1298
+ # Start index. Negative indexing is supported.
1299
+ # @param length [Integer, nil]
1300
+ # Length of the slice. If set to `nil`, all rows starting at the offset
1301
+ # will be selected.
1302
+ #
1303
+ # @return [DataFrame]
1304
+ #
1305
+ # @example
1306
+ # df = Polars::DataFrame.new(
1307
+ # {
1308
+ # "foo" => [1, 2, 3],
1309
+ # "bar" => [6.0, 7.0, 8.0],
1310
+ # "ham" => ["a", "b", "c"]
1311
+ # }
1312
+ # )
1313
+ # df.slice(1, 2)
1314
+ # # =>
1315
+ # # shape: (2, 3)
1316
+ # # ┌─────┬─────┬─────┐
1317
+ # # │ foo ┆ bar ┆ ham │
1318
+ # # │ --- ┆ --- ┆ --- │
1319
+ # # │ i64 ┆ f64 ┆ str │
1320
+ # # ╞═════╪═════╪═════╡
1321
+ # # │ 2 ┆ 7.0 ┆ b │
1322
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1323
+ # # │ 3 ┆ 8.0 ┆ c │
1324
+ # # └─────┴─────┴─────┘
1325
+ def slice(offset, length = nil)
1326
+ if !length.nil? && length < 0
1327
+ length = height - offset + length
1328
+ end
1329
+ _from_rbdf(_df.slice(offset, length))
1330
+ end
1331
+
1332
+ # Get the first `n` rows.
1333
+ #
1334
+ # Alias for {#head}.
1335
+ #
1336
+ # @param n [Integer]
1337
+ # Number of rows to return.
1338
+ #
1339
+ # @return [DataFrame]
1340
+ #
1341
+ # @example
1342
+ # df = Polars::DataFrame.new(
1343
+ # {"foo" => [1, 2, 3, 4, 5, 6], "bar" => ["a", "b", "c", "d", "e", "f"]}
1344
+ # )
1345
+ # df.limit(4)
1346
+ # # =>
1347
+ # # shape: (4, 2)
1348
+ # # ┌─────┬─────┐
1349
+ # # │ foo ┆ bar │
1350
+ # # │ --- ┆ --- │
1351
+ # # │ i64 ┆ str │
1352
+ # # ╞═════╪═════╡
1353
+ # # │ 1 ┆ a │
1354
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1355
+ # # │ 2 ┆ b │
1356
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1357
+ # # │ 3 ┆ c │
1358
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1359
+ # # │ 4 ┆ d │
1360
+ # # └─────┴─────┘
841
1361
  def limit(n = 5)
842
1362
  head(n)
843
1363
  end
844
1364
 
1365
+ # Get the first `n` rows.
1366
+ #
1367
+ # @param n [Integer]
1368
+ # Number of rows to return.
1369
+ #
1370
+ # @return [DataFrame]
1371
+ #
1372
+ # @example
1373
+ # df = Polars::DataFrame.new(
1374
+ # {
1375
+ # "foo" => [1, 2, 3, 4, 5],
1376
+ # "bar" => [6, 7, 8, 9, 10],
1377
+ # "ham" => ["a", "b", "c", "d", "e"]
1378
+ # }
1379
+ # )
1380
+ # df.head(3)
1381
+ # # =>
1382
+ # # shape: (3, 3)
1383
+ # # ┌─────┬─────┬─────┐
1384
+ # # │ foo ┆ bar ┆ ham │
1385
+ # # │ --- ┆ --- ┆ --- │
1386
+ # # │ i64 ┆ i64 ┆ str │
1387
+ # # ╞═════╪═════╪═════╡
1388
+ # # │ 1 ┆ 6 ┆ a │
1389
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1390
+ # # │ 2 ┆ 7 ┆ b │
1391
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1392
+ # # │ 3 ┆ 8 ┆ c │
1393
+ # # └─────┴─────┴─────┘
845
1394
  def head(n = 5)
846
1395
  _from_rbdf(_df.head(n))
847
1396
  end
848
1397
 
1398
+ # Get the last `n` rows.
1399
+ #
1400
+ # @param n [Integer]
1401
+ # Number of rows to return.
1402
+ #
1403
+ # @return [DataFrame]
1404
+ #
1405
+ # @example
1406
+ # df = Polars::DataFrame.new(
1407
+ # {
1408
+ # "foo" => [1, 2, 3, 4, 5],
1409
+ # "bar" => [6, 7, 8, 9, 10],
1410
+ # "ham" => ["a", "b", "c", "d", "e"]
1411
+ # }
1412
+ # )
1413
+ # df.tail(3)
1414
+ # # =>
1415
+ # # shape: (3, 3)
1416
+ # # ┌─────┬─────┬─────┐
1417
+ # # │ foo ┆ bar ┆ ham │
1418
+ # # │ --- ┆ --- ┆ --- │
1419
+ # # │ i64 ┆ i64 ┆ str │
1420
+ # # ╞═════╪═════╪═════╡
1421
+ # # │ 3 ┆ 8 ┆ c │
1422
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1423
+ # # │ 4 ┆ 9 ┆ d │
1424
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1425
+ # # │ 5 ┆ 10 ┆ e │
1426
+ # # └─────┴─────┴─────┘
849
1427
  def tail(n = 5)
850
1428
  _from_rbdf(_df.tail(n))
851
1429
  end
852
1430
 
853
- # def drop_nulls
854
- # end
1431
+ # Return a new DataFrame where the null values are dropped.
1432
+ #
1433
+ # @param subset [Object]
1434
+ # Subset of column(s) on which `drop_nulls` will be applied.
1435
+ #
1436
+ # @return [DataFrame]
1437
+ #
1438
+ # @example
1439
+ # df = Polars::DataFrame.new(
1440
+ # {
1441
+ # "foo" => [1, 2, 3],
1442
+ # "bar" => [6, nil, 8],
1443
+ # "ham" => ["a", "b", "c"]
1444
+ # }
1445
+ # )
1446
+ # df.drop_nulls
1447
+ # # =>
1448
+ # # shape: (2, 3)
1449
+ # # ┌─────┬─────┬─────┐
1450
+ # # │ foo ┆ bar ┆ ham │
1451
+ # # │ --- ┆ --- ┆ --- │
1452
+ # # │ i64 ┆ i64 ┆ str │
1453
+ # # ╞═════╪═════╪═════╡
1454
+ # # │ 1 ┆ 6 ┆ a │
1455
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1456
+ # # │ 3 ┆ 8 ┆ c │
1457
+ # # └─────┴─────┴─────┘
1458
+ def drop_nulls(subset: nil)
1459
+ if subset.is_a?(String)
1460
+ subset = [subset]
1461
+ end
1462
+ _from_rbdf(_df.drop_nulls(subset))
1463
+ end
855
1464
 
856
1465
  # def pipe
857
1466
  # end
858
1467
 
859
- # def with_row_count
860
- # end
861
-
1468
+ # Add a column at index 0 that counts the rows.
1469
+ #
1470
+ # @param name [String]
1471
+ # Name of the column to add.
1472
+ # @param offset [Integer]
1473
+ # Start the row count at this offset.
1474
+ #
1475
+ # @return [DataFrame]
1476
+ #
1477
+ # @example
1478
+ # df = Polars::DataFrame.new(
1479
+ # {
1480
+ # "a" => [1, 3, 5],
1481
+ # "b" => [2, 4, 6]
1482
+ # }
1483
+ # )
1484
+ # df.with_row_count
1485
+ # # =>
1486
+ # # shape: (3, 3)
1487
+ # # ┌────────┬─────┬─────┐
1488
+ # # │ row_nr ┆ a ┆ b │
1489
+ # # │ --- ┆ --- ┆ --- │
1490
+ # # │ u32 ┆ i64 ┆ i64 │
1491
+ # # ╞════════╪═════╪═════╡
1492
+ # # │ 0 ┆ 1 ┆ 2 │
1493
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1494
+ # # │ 1 ┆ 3 ┆ 4 │
1495
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1496
+ # # │ 2 ┆ 5 ┆ 6 │
1497
+ # # └────────┴─────┴─────┘
1498
+ def with_row_count(name: "row_nr", offset: 0)
1499
+ _from_rbdf(_df.with_row_count(name, offset))
1500
+ end
1501
+
1502
+ # Start a groupby operation.
1503
+ #
1504
+ # @param by [Object]
1505
+ # Column(s) to group by.
1506
+ # @param maintain_order [Boolean]
1507
+ # Make sure that the order of the groups remain consistent. This is more
1508
+ # expensive than a default groupby. Note that this only works in expression
1509
+ # aggregations.
862
1510
  #
1511
+ # @return [GroupBy]
1512
+ #
1513
+ # @example
1514
+ # df = Polars::DataFrame.new(
1515
+ # {
1516
+ # "a" => ["a", "b", "a", "b", "b", "c"],
1517
+ # "b" => [1, 2, 3, 4, 5, 6],
1518
+ # "c" => [6, 5, 4, 3, 2, 1]
1519
+ # }
1520
+ # )
1521
+ # df.groupby("a").agg(Polars.col("b").sum).sort("a")
1522
+ # # =>
1523
+ # # shape: (3, 2)
1524
+ # # ┌─────┬─────┐
1525
+ # # │ a ┆ b │
1526
+ # # │ --- ┆ --- │
1527
+ # # │ str ┆ i64 │
1528
+ # # ╞═════╪═════╡
1529
+ # # │ a ┆ 4 │
1530
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1531
+ # # │ b ┆ 11 │
1532
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1533
+ # # │ c ┆ 6 │
1534
+ # # └─────┴─────┘
863
1535
  def groupby(by, maintain_order: false)
864
- lazy.groupby(by, maintain_order: maintain_order)
1536
+ if !Utils.bool?(maintain_order)
1537
+ raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
1538
+ end
1539
+ if by.is_a?(String)
1540
+ by = [by]
1541
+ end
1542
+ GroupBy.new(
1543
+ _df,
1544
+ by,
1545
+ self.class,
1546
+ maintain_order: maintain_order
1547
+ )
865
1548
  end
866
1549
 
867
1550
  # def groupby_rolling
@@ -876,7 +1559,109 @@ module Polars
876
1559
  # def join_asof
877
1560
  # end
878
1561
 
1562
+ # Join in SQL-like fashion.
1563
+ #
1564
+ # @param other [DataFrame]
1565
+ # DataFrame to join with.
1566
+ # @param left_on [Object]
1567
+ # Name(s) of the left join column(s).
1568
+ # @param right_on [Object]
1569
+ # Name(s) of the right join column(s).
1570
+ # @param on [Object]
1571
+ # Name(s) of the join columns in both DataFrames.
1572
+ # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
1573
+ # Join strategy.
1574
+ # @param suffix [String]
1575
+ # Suffix to append to columns with a duplicate name.
1576
+ #
1577
+ # @return [DataFrame]
1578
+ #
1579
+ # @example
1580
+ # df = Polars::DataFrame.new(
1581
+ # {
1582
+ # "foo" => [1, 2, 3],
1583
+ # "bar" => [6.0, 7.0, 8.0],
1584
+ # "ham" => ["a", "b", "c"]
1585
+ # }
1586
+ # )
1587
+ # other_df = Polars::DataFrame.new(
1588
+ # {
1589
+ # "apple" => ["x", "y", "z"],
1590
+ # "ham" => ["a", "b", "d"]
1591
+ # }
1592
+ # )
1593
+ # df.join(other_df, on: "ham")
1594
+ # # =>
1595
+ # # shape: (2, 4)
1596
+ # # ┌─────┬─────┬─────┬───────┐
1597
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1598
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1599
+ # # │ i64 ┆ f64 ┆ str ┆ str │
1600
+ # # ╞═════╪═════╪═════╪═══════╡
1601
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
1602
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1603
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
1604
+ # # └─────┴─────┴─────┴───────┘
1605
+ #
1606
+ # @example
1607
+ # df.join(other_df, on: "ham", how: "outer")
1608
+ # # =>
1609
+ # # shape: (4, 4)
1610
+ # # ┌──────┬──────┬─────┬───────┐
1611
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1612
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1613
+ # # │ i64 ┆ f64 ┆ str ┆ str │
1614
+ # # ╞══════╪══════╪═════╪═══════╡
1615
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
1616
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1617
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
1618
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1619
+ # # │ null ┆ null ┆ d ┆ z │
1620
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1621
+ # # │ 3 ┆ 8.0 ┆ c ┆ null │
1622
+ # # └──────┴──────┴─────┴───────┘
1623
+ #
1624
+ # @example
1625
+ # df.join(other_df, on: "ham", how: "left")
1626
+ # # =>
1627
+ # # shape: (3, 4)
1628
+ # # ┌─────┬─────┬─────┬───────┐
1629
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1630
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1631
+ # # │ i64 ┆ f64 ┆ str ┆ str │
1632
+ # # ╞═════╪═════╪═════╪═══════╡
1633
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
1634
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1635
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
1636
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1637
+ # # │ 3 ┆ 8.0 ┆ c ┆ null │
1638
+ # # └─────┴─────┴─────┴───────┘
1639
+ #
1640
+ # @example
1641
+ # df.join(other_df, on: "ham", how: "semi")
1642
+ # # =>
1643
+ # # shape: (2, 3)
1644
+ # # ┌─────┬─────┬─────┐
1645
+ # # │ foo ┆ bar ┆ ham │
1646
+ # # │ --- ┆ --- ┆ --- │
1647
+ # # │ i64 ┆ f64 ┆ str │
1648
+ # # ╞═════╪═════╪═════╡
1649
+ # # │ 1 ┆ 6.0 ┆ a │
1650
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1651
+ # # │ 2 ┆ 7.0 ┆ b │
1652
+ # # └─────┴─────┴─────┘
879
1653
  #
1654
+ # @example
1655
+ # df.join(other_df, on: "ham", how: "anti")
1656
+ # # =>
1657
+ # # shape: (1, 3)
1658
+ # # ┌─────┬─────┬─────┐
1659
+ # # │ foo ┆ bar ┆ ham │
1660
+ # # │ --- ┆ --- ┆ --- │
1661
+ # # │ i64 ┆ f64 ┆ str │
1662
+ # # ╞═════╪═════╪═════╡
1663
+ # # │ 3 ┆ 8.0 ┆ c │
1664
+ # # └─────┴─────┴─────┘
880
1665
  def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
881
1666
  lazy
882
1667
  .join(
@@ -893,55 +1678,505 @@ module Polars
893
1678
  # def apply
894
1679
  # end
895
1680
 
1681
+ # Return a new DataFrame with the column added or replaced.
1682
+ #
1683
+ # @param column [Object]
1684
+ # Series, where the name of the Series refers to the column in the DataFrame.
1685
+ #
1686
+ # @return [DataFrame]
1687
+ #
1688
+ # @example Added
1689
+ # df = Polars::DataFrame.new(
1690
+ # {
1691
+ # "a" => [1, 3, 5],
1692
+ # "b" => [2, 4, 6]
1693
+ # }
1694
+ # )
1695
+ # df.with_column((Polars.col("b") ** 2).alias("b_squared"))
1696
+ # # =>
1697
+ # # shape: (3, 3)
1698
+ # # ┌─────┬─────┬───────────┐
1699
+ # # │ a ┆ b ┆ b_squared │
1700
+ # # │ --- ┆ --- ┆ --- │
1701
+ # # │ i64 ┆ i64 ┆ f64 │
1702
+ # # ╞═════╪═════╪═══════════╡
1703
+ # # │ 1 ┆ 2 ┆ 4.0 │
1704
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1705
+ # # │ 3 ┆ 4 ┆ 16.0 │
1706
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
1707
+ # # │ 5 ┆ 6 ┆ 36.0 │
1708
+ # # └─────┴─────┴───────────┘
896
1709
  #
1710
+ # @example Replaced
1711
+ # df.with_column(Polars.col("a") ** 2)
1712
+ # # =>
1713
+ # # shape: (3, 2)
1714
+ # # ┌──────┬─────┐
1715
+ # # │ a ┆ b │
1716
+ # # │ --- ┆ --- │
1717
+ # # │ f64 ┆ i64 │
1718
+ # # ╞══════╪═════╡
1719
+ # # │ 1.0 ┆ 2 │
1720
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1721
+ # # │ 9.0 ┆ 4 │
1722
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
1723
+ # # │ 25.0 ┆ 6 │
1724
+ # # └──────┴─────┘
897
1725
  def with_column(column)
898
1726
  lazy
899
1727
  .with_column(column)
900
1728
  .collect(no_optimization: true, string_cache: false)
901
1729
  end
902
1730
 
903
- # def hstack
904
- # end
1731
+ # Return a new DataFrame grown horizontally by stacking multiple Series to it.
1732
+ #
1733
+ # @param columns [Object]
1734
+ # Series to stack.
1735
+ # @param in_place [Boolean]
1736
+ # Modify in place.
1737
+ #
1738
+ # @return [DataFrame]
1739
+ #
1740
+ # @example
1741
+ # df = Polars::DataFrame.new(
1742
+ # {
1743
+ # "foo" => [1, 2, 3],
1744
+ # "bar" => [6, 7, 8],
1745
+ # "ham" => ["a", "b", "c"]
1746
+ # }
1747
+ # )
1748
+ # x = Polars::Series.new("apple", [10, 20, 30])
1749
+ # df.hstack([x])
1750
+ # # =>
1751
+ # # shape: (3, 4)
1752
+ # # ┌─────┬─────┬─────┬───────┐
1753
+ # # │ foo ┆ bar ┆ ham ┆ apple │
1754
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1755
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
1756
+ # # ╞═════╪═════╪═════╪═══════╡
1757
+ # # │ 1 ┆ 6 ┆ a ┆ 10 │
1758
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1759
+ # # │ 2 ┆ 7 ┆ b ┆ 20 │
1760
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1761
+ # # │ 3 ┆ 8 ┆ c ┆ 30 │
1762
+ # # └─────┴─────┴─────┴───────┘
1763
+ def hstack(columns, in_place: false)
1764
+ if !columns.is_a?(Array)
1765
+ columns = columns.get_columns
1766
+ end
1767
+ if in_place
1768
+ _df.hstack_mut(columns.map(&:_s))
1769
+ self
1770
+ else
1771
+ _from_rbdf(_df.hstack(columns.map(&:_s)))
1772
+ end
1773
+ end
905
1774
 
906
- # def vstack
907
- # end
1775
+ # Grow this DataFrame vertically by stacking a DataFrame to it.
1776
+ #
1777
+ # @param df [DataFrame]
1778
+ # DataFrame to stack.
1779
+ # @param in_place [Boolean]
1780
+ # Modify in place
1781
+ #
1782
+ # @return [DataFrame]
1783
+ #
1784
+ # @example
1785
+ # df1 = Polars::DataFrame.new(
1786
+ # {
1787
+ # "foo" => [1, 2],
1788
+ # "bar" => [6, 7],
1789
+ # "ham" => ["a", "b"]
1790
+ # }
1791
+ # )
1792
+ # df2 = Polars::DataFrame.new(
1793
+ # {
1794
+ # "foo" => [3, 4],
1795
+ # "bar" => [8, 9],
1796
+ # "ham" => ["c", "d"]
1797
+ # }
1798
+ # )
1799
+ # df1.vstack(df2)
1800
+ # # =>
1801
+ # # shape: (4, 3)
1802
+ # # ┌─────┬─────┬─────┐
1803
+ # # │ foo ┆ bar ┆ ham │
1804
+ # # │ --- ┆ --- ┆ --- │
1805
+ # # │ i64 ┆ i64 ┆ str │
1806
+ # # ╞═════╪═════╪═════╡
1807
+ # # │ 1 ┆ 6 ┆ a │
1808
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1809
+ # # │ 2 ┆ 7 ┆ b │
1810
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1811
+ # # │ 3 ┆ 8 ┆ c │
1812
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1813
+ # # │ 4 ┆ 9 ┆ d │
1814
+ # # └─────┴─────┴─────┘
1815
+ def vstack(df, in_place: false)
1816
+ if in_place
1817
+ _df.vstack_mut(df._df)
1818
+ self
1819
+ else
1820
+ _from_rbdf(_df.vstack(df._df))
1821
+ end
1822
+ end
908
1823
 
1824
+ # Extend the memory backed by this `DataFrame` with the values from `other`.
1825
+ #
1826
+ # Different from `vstack` which adds the chunks from `other` to the chunks of this
1827
+ # `DataFrame` `extend` appends the data from `other` to the underlying memory
1828
+ # locations and thus may cause a reallocation.
1829
+ #
1830
+ # If this does not cause a reallocation, the resulting data structure will not
1831
+ # have any extra chunks and thus will yield faster queries.
1832
+ #
1833
+ # Prefer `extend` over `vstack` when you want to do a query after a single append.
1834
+ # For instance during online operations where you add `n` rows and rerun a query.
909
1835
  #
1836
+ # Prefer `vstack` over `extend` when you want to append many times before doing a
1837
+ # query. For instance when you read in multiple files and when to store them in a
1838
+ # single `DataFrame`. In the latter case, finish the sequence of `vstack`
1839
+ # operations with a `rechunk`.
1840
+ #
1841
+ # @param other [DataFrame]
1842
+ # DataFrame to vertically add.
1843
+ #
1844
+ # @return [DataFrame]
1845
+ #
1846
+ # @example
1847
+ # df1 = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1848
+ # df2 = Polars::DataFrame.new({"foo" => [10, 20, 30], "bar" => [40, 50, 60]})
1849
+ # df1.extend(df2)
1850
+ # # =>
1851
+ # # shape: (6, 2)
1852
+ # # ┌─────┬─────┐
1853
+ # # │ foo ┆ bar │
1854
+ # # │ --- ┆ --- │
1855
+ # # │ i64 ┆ i64 │
1856
+ # # ╞═════╪═════╡
1857
+ # # │ 1 ┆ 4 │
1858
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1859
+ # # │ 2 ┆ 5 │
1860
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1861
+ # # │ 3 ┆ 6 │
1862
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1863
+ # # │ 10 ┆ 40 │
1864
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1865
+ # # │ 20 ┆ 50 │
1866
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1867
+ # # │ 30 ┆ 60 │
1868
+ # # └─────┴─────┘
910
1869
  def extend(other)
911
1870
  _df.extend(other._df)
912
1871
  self
913
1872
  end
914
1873
 
915
- # def drop
916
- # end
1874
+ # Remove column from DataFrame and return as new.
1875
+ #
1876
+ # @param columns [Object]
1877
+ # Column(s) to drop.
1878
+ #
1879
+ # @return [DataFrame]
1880
+ #
1881
+ # @example
1882
+ # df = Polars::DataFrame.new(
1883
+ # {
1884
+ # "foo" => [1, 2, 3],
1885
+ # "bar" => [6.0, 7.0, 8.0],
1886
+ # "ham" => ["a", "b", "c"]
1887
+ # }
1888
+ # )
1889
+ # df.drop("ham")
1890
+ # # =>
1891
+ # # shape: (3, 2)
1892
+ # # ┌─────┬─────┐
1893
+ # # │ foo ┆ bar │
1894
+ # # │ --- ┆ --- │
1895
+ # # │ i64 ┆ f64 │
1896
+ # # ╞═════╪═════╡
1897
+ # # │ 1 ┆ 6.0 │
1898
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1899
+ # # │ 2 ┆ 7.0 │
1900
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1901
+ # # │ 3 ┆ 8.0 │
1902
+ # # └─────┴─────┘
1903
+ def drop(columns)
1904
+ if columns.is_a?(Array)
1905
+ df = clone
1906
+ columns.each do |n|
1907
+ df._df.drop_in_place(n)
1908
+ end
1909
+ df
1910
+ else
1911
+ _from_rbdf(_df.drop(columns))
1912
+ end
1913
+ end
917
1914
 
918
- # def drop_in_place
919
- # end
1915
+ # Drop in place.
1916
+ #
1917
+ # @param name [Object]
1918
+ # Column to drop.
1919
+ #
1920
+ # @return [Series]
1921
+ #
1922
+ # @example
1923
+ # df = Polars::DataFrame.new(
1924
+ # {
1925
+ # "foo" => [1, 2, 3],
1926
+ # "bar" => [6, 7, 8],
1927
+ # "ham" => ["a", "b", "c"]
1928
+ # }
1929
+ # )
1930
+ # df.drop_in_place("ham")
1931
+ # # =>
1932
+ # # shape: (3,)
1933
+ # # Series: 'ham' [str]
1934
+ # # [
1935
+ # # "a"
1936
+ # # "b"
1937
+ # # "c"
1938
+ # # ]
1939
+ def drop_in_place(name)
1940
+ Utils.wrap_s(_df.drop_in_place(name))
1941
+ end
920
1942
 
921
- # def cleared
922
- # end
1943
+ # Create an empty copy of the current DataFrame.
1944
+ #
1945
+ # Returns a DataFrame with identical schema but no data.
1946
+ #
1947
+ # @return [DataFrame]
1948
+ #
1949
+ # @example
1950
+ # df = Polars::DataFrame.new(
1951
+ # {
1952
+ # "a" => [nil, 2, 3, 4],
1953
+ # "b" => [0.5, nil, 2.5, 13],
1954
+ # "c" => [true, true, false, nil]
1955
+ # }
1956
+ # )
1957
+ # df.cleared
1958
+ # # =>
1959
+ # # shape: (0, 3)
1960
+ # # ┌─────┬─────┬──────┐
1961
+ # # │ a ┆ b ┆ c │
1962
+ # # │ --- ┆ --- ┆ --- │
1963
+ # # │ i64 ┆ f64 ┆ bool │
1964
+ # # ╞═════╪═════╪══════╡
1965
+ # # └─────┴─────┴──────┘
1966
+ def cleared
1967
+ height > 0 ? head(0) : clone
1968
+ end
923
1969
 
924
1970
  # clone handled by initialize_copy
925
1971
 
1972
+ # Get the DataFrame as a Array of Series.
926
1973
  #
1974
+ # @return [Array]
927
1975
  def get_columns
928
1976
  _df.get_columns.map { |s| Utils.wrap_s(s) }
929
1977
  end
930
1978
 
1979
+ # Get a single column as Series by name.
1980
+ #
1981
+ # @param name [String]
1982
+ # Name of the column to retrieve.
1983
+ #
1984
+ # @return [Series]
1985
+ #
1986
+ # @example
1987
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1988
+ # df.get_column("foo")
1989
+ # # =>
1990
+ # # shape: (3,)
1991
+ # # Series: 'foo' [i64]
1992
+ # # [
1993
+ # # 1
1994
+ # # 2
1995
+ # # 3
1996
+ # # ]
931
1997
  def get_column(name)
932
1998
  self[name]
933
1999
  end
934
2000
 
935
- # def fill_null
936
- # end
2001
+ # Fill null values using the specified value or strategy.
2002
+ #
2003
+ # @param value [Numeric]
2004
+ # Value used to fill null values.
2005
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2006
+ # Strategy used to fill null values.
2007
+ # @param limit [Integer]
2008
+ # Number of consecutive null values to fill when using the 'forward' or
2009
+ # 'backward' strategy.
2010
+ # @param matches_supertype [Boolean]
2011
+ # Fill all matching supertype of the fill `value`.
2012
+ #
2013
+ # @return [DataFrame]
2014
+ #
2015
+ # @example
2016
+ # df = Polars::DataFrame.new(
2017
+ # {
2018
+ # "a" => [1, 2, nil, 4],
2019
+ # "b" => [0.5, 4, nil, 13]
2020
+ # }
2021
+ # )
2022
+ # df.fill_null(99)
2023
+ # # =>
2024
+ # # shape: (4, 2)
2025
+ # # ┌─────┬──────┐
2026
+ # # │ a ┆ b │
2027
+ # # │ --- ┆ --- │
2028
+ # # │ i64 ┆ f64 │
2029
+ # # ╞═════╪══════╡
2030
+ # # │ 1 ┆ 0.5 │
2031
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2032
+ # # │ 2 ┆ 4.0 │
2033
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2034
+ # # │ 99 ┆ 99.0 │
2035
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2036
+ # # │ 4 ┆ 13.0 │
2037
+ # # └─────┴──────┘
2038
+ #
2039
+ # @example
2040
+ # df.fill_null(strategy: "forward")
2041
+ # # =>
2042
+ # # shape: (4, 2)
2043
+ # # ┌─────┬──────┐
2044
+ # # │ a ┆ b │
2045
+ # # │ --- ┆ --- │
2046
+ # # │ i64 ┆ f64 │
2047
+ # # ╞═════╪══════╡
2048
+ # # │ 1 ┆ 0.5 │
2049
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2050
+ # # │ 2 ┆ 4.0 │
2051
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2052
+ # # │ 2 ┆ 4.0 │
2053
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2054
+ # # │ 4 ┆ 13.0 │
2055
+ # # └─────┴──────┘
2056
+ #
2057
+ # @example
2058
+ # df.fill_null(strategy: "max")
2059
+ # # =>
2060
+ # # shape: (4, 2)
2061
+ # # ┌─────┬──────┐
2062
+ # # │ a ┆ b │
2063
+ # # │ --- ┆ --- │
2064
+ # # │ i64 ┆ f64 │
2065
+ # # ╞═════╪══════╡
2066
+ # # │ 1 ┆ 0.5 │
2067
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2068
+ # # │ 2 ┆ 4.0 │
2069
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2070
+ # # │ 4 ┆ 13.0 │
2071
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2072
+ # # │ 4 ┆ 13.0 │
2073
+ # # └─────┴──────┘
2074
+ #
2075
+ # @example
2076
+ # df.fill_null(strategy: "zero")
2077
+ # # =>
2078
+ # # shape: (4, 2)
2079
+ # # ┌─────┬──────┐
2080
+ # # │ a ┆ b │
2081
+ # # │ --- ┆ --- │
2082
+ # # │ i64 ┆ f64 │
2083
+ # # ╞═════╪══════╡
2084
+ # # │ 1 ┆ 0.5 │
2085
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2086
+ # # │ 2 ┆ 4.0 │
2087
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2088
+ # # │ 0 ┆ 0.0 │
2089
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2090
+ # # │ 4 ┆ 13.0 │
2091
+ # # └─────┴──────┘
2092
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
2093
+ _from_rbdf(
2094
+ lazy
2095
+ .fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
2096
+ .collect(no_optimization: true)
2097
+ ._df
2098
+ )
2099
+ end
937
2100
 
2101
+ # Fill floating point NaN values by an Expression evaluation.
2102
+ #
2103
+ # @param fill_value [Object]
2104
+ # Value to fill NaN with.
2105
+ #
2106
+ # @return [DataFrame]
2107
+ #
2108
+ # @note
2109
+ # Note that floating point NaNs (Not a Number) are not missing values!
2110
+ # To replace missing values, use `fill_null`.
938
2111
  #
2112
+ # @example
2113
+ # df = Polars::DataFrame.new(
2114
+ # {
2115
+ # "a" => [1.5, 2, Float::NAN, 4],
2116
+ # "b" => [0.5, 4, Float::NAN, 13]
2117
+ # }
2118
+ # )
2119
+ # df.fill_nan(99)
2120
+ # # =>
2121
+ # # shape: (4, 2)
2122
+ # # ┌──────┬──────┐
2123
+ # # │ a ┆ b │
2124
+ # # │ --- ┆ --- │
2125
+ # # │ f64 ┆ f64 │
2126
+ # # ╞══════╪══════╡
2127
+ # # │ 1.5 ┆ 0.5 │
2128
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2129
+ # # │ 2.0 ┆ 4.0 │
2130
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2131
+ # # │ 99.0 ┆ 99.0 │
2132
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2133
+ # # │ 4.0 ┆ 13.0 │
2134
+ # # └──────┴──────┘
939
2135
  def fill_nan(fill_value)
940
2136
  lazy.fill_nan(fill_value).collect(no_optimization: true)
941
2137
  end
942
2138
 
943
- # def explode
944
- # end
2139
+ # Explode `DataFrame` to long format by exploding a column with Lists.
2140
+ #
2141
+ # @param columns [Object]
2142
+ # Column of LargeList type.
2143
+ #
2144
+ # @return [DataFrame]
2145
+ #
2146
+ # @example
2147
+ # df = Polars::DataFrame.new(
2148
+ # {
2149
+ # "letters" => ["a", "a", "b", "c"],
2150
+ # "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]]
2151
+ # }
2152
+ # )
2153
+ # df.explode("numbers")
2154
+ # # =>
2155
+ # # shape: (8, 2)
2156
+ # # ┌─────────┬─────────┐
2157
+ # # │ letters ┆ numbers │
2158
+ # # │ --- ┆ --- │
2159
+ # # │ str ┆ i64 │
2160
+ # # ╞═════════╪═════════╡
2161
+ # # │ a ┆ 1 │
2162
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2163
+ # # │ a ┆ 2 │
2164
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2165
+ # # │ a ┆ 3 │
2166
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2167
+ # # │ b ┆ 4 │
2168
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2169
+ # # │ b ┆ 5 │
2170
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2171
+ # # │ c ┆ 6 │
2172
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2173
+ # # │ c ┆ 7 │
2174
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2175
+ # # │ c ┆ 8 │
2176
+ # # └─────────┴─────────┘
2177
+ def explode(columns)
2178
+ lazy.explode(columns).collect(no_optimization: true)
2179
+ end
945
2180
 
946
2181
  # def pivot
947
2182
  # end
@@ -955,25 +2190,242 @@ module Polars
955
2190
  # def partition_by
956
2191
  # end
957
2192
 
958
- # def shift
959
- # end
960
-
961
- # def shift_and_fill
962
- # end
2193
+ # Shift values by the given period.
2194
+ #
2195
+ # @param periods [Integer]
2196
+ # Number of places to shift (may be negative).
2197
+ #
2198
+ # @return [DataFrame]
2199
+ #
2200
+ # @example
2201
+ # df = Polars::DataFrame.new(
2202
+ # {
2203
+ # "foo" => [1, 2, 3],
2204
+ # "bar" => [6, 7, 8],
2205
+ # "ham" => ["a", "b", "c"]
2206
+ # }
2207
+ # )
2208
+ # df.shift(1)
2209
+ # # =>
2210
+ # # shape: (3, 3)
2211
+ # # ┌──────┬──────┬──────┐
2212
+ # # │ foo ┆ bar ┆ ham │
2213
+ # # │ --- ┆ --- ┆ --- │
2214
+ # # │ i64 ┆ i64 ┆ str │
2215
+ # # ╞══════╪══════╪══════╡
2216
+ # # │ null ┆ null ┆ null │
2217
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2218
+ # # │ 1 ┆ 6 ┆ a │
2219
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2220
+ # # │ 2 ┆ 7 ┆ b │
2221
+ # # └──────┴──────┴──────┘
2222
+ #
2223
+ # @example
2224
+ # df.shift(-1)
2225
+ # # =>
2226
+ # # shape: (3, 3)
2227
+ # # ┌──────┬──────┬──────┐
2228
+ # # │ foo ┆ bar ┆ ham │
2229
+ # # │ --- ┆ --- ┆ --- │
2230
+ # # │ i64 ┆ i64 ┆ str │
2231
+ # # ╞══════╪══════╪══════╡
2232
+ # # │ 2 ┆ 7 ┆ b │
2233
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2234
+ # # │ 3 ┆ 8 ┆ c │
2235
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2236
+ # # │ null ┆ null ┆ null │
2237
+ # # └──────┴──────┴──────┘
2238
+ def shift(periods)
2239
+ _from_rbdf(_df.shift(periods))
2240
+ end
2241
+
2242
+ # Shift the values by a given period and fill the resulting null values.
2243
+ #
2244
+ # @param periods [Integer]
2245
+ # Number of places to shift (may be negative).
2246
+ # @param fill_value [Object]
2247
+ # fill nil values with this value.
2248
+ #
2249
+ # @return [DataFrame]
2250
+ #
2251
+ # @example
2252
+ # df = Polars::DataFrame.new(
2253
+ # {
2254
+ # "foo" => [1, 2, 3],
2255
+ # "bar" => [6, 7, 8],
2256
+ # "ham" => ["a", "b", "c"]
2257
+ # }
2258
+ # )
2259
+ # df.shift_and_fill(1, 0)
2260
+ # # =>
2261
+ # # shape: (3, 3)
2262
+ # # ┌─────┬─────┬─────┐
2263
+ # # │ foo ┆ bar ┆ ham │
2264
+ # # │ --- ┆ --- ┆ --- │
2265
+ # # │ i64 ┆ i64 ┆ str │
2266
+ # # ╞═════╪═════╪═════╡
2267
+ # # │ 0 ┆ 0 ┆ 0 │
2268
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2269
+ # # │ 1 ┆ 6 ┆ a │
2270
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2271
+ # # │ 2 ┆ 7 ┆ b │
2272
+ # # └─────┴─────┴─────┘
2273
+ def shift_and_fill(periods, fill_value)
2274
+ lazy
2275
+ .shift_and_fill(periods, fill_value)
2276
+ .collect(no_optimization: true, string_cache: false)
2277
+ end
963
2278
 
2279
+ # Get a mask of all duplicated rows in this DataFrame.
2280
+ #
2281
+ # @return [Series]
964
2282
  #
2283
+ # @example
2284
+ # df = Polars::DataFrame.new(
2285
+ # {
2286
+ # "a" => [1, 2, 3, 1],
2287
+ # "b" => ["x", "y", "z", "x"],
2288
+ # }
2289
+ # )
2290
+ # df.is_duplicated
2291
+ # # =>
2292
+ # # shape: (4,)
2293
+ # # Series: '' [bool]
2294
+ # # [
2295
+ # # true
2296
+ # # false
2297
+ # # false
2298
+ # # true
2299
+ # # ]
965
2300
  def is_duplicated
966
2301
  Utils.wrap_s(_df.is_duplicated)
967
2302
  end
968
2303
 
2304
+ # Get a mask of all unique rows in this DataFrame.
2305
+ #
2306
+ # @return [Series]
2307
+ #
2308
+ # @example
2309
+ # df = Polars::DataFrame.new(
2310
+ # {
2311
+ # "a" => [1, 2, 3, 1],
2312
+ # "b" => ["x", "y", "z", "x"]
2313
+ # }
2314
+ # )
2315
+ # df.is_unique
2316
+ # # =>
2317
+ # # shape: (4,)
2318
+ # # Series: '' [bool]
2319
+ # # [
2320
+ # # false
2321
+ # # true
2322
+ # # true
2323
+ # # false
2324
+ # # ]
969
2325
  def is_unique
970
2326
  Utils.wrap_s(_df.is_unique)
971
2327
  end
972
2328
 
2329
+ # Start a lazy query from this point.
2330
+ #
2331
+ # @return [LazyFrame]
973
2332
  def lazy
974
2333
  wrap_ldf(_df.lazy)
975
2334
  end
976
2335
 
2336
+ # Select columns from this DataFrame.
2337
+ #
2338
+ # @param exprs [Object]
2339
+ # Column or columns to select.
2340
+ #
2341
+ # @return [DataFrame]
2342
+ #
2343
+ # @example
2344
+ # df = Polars::DataFrame.new(
2345
+ # {
2346
+ # "foo" => [1, 2, 3],
2347
+ # "bar" => [6, 7, 8],
2348
+ # "ham" => ["a", "b", "c"]
2349
+ # }
2350
+ # )
2351
+ # df.select("foo")
2352
+ # # =>
2353
+ # # shape: (3, 1)
2354
+ # # ┌─────┐
2355
+ # # │ foo │
2356
+ # # │ --- │
2357
+ # # │ i64 │
2358
+ # # ╞═════╡
2359
+ # # │ 1 │
2360
+ # # ├╌╌╌╌╌┤
2361
+ # # │ 2 │
2362
+ # # ├╌╌╌╌╌┤
2363
+ # # │ 3 │
2364
+ # # └─────┘
2365
+ #
2366
+ # @example
2367
+ # df.select(["foo", "bar"])
2368
+ # # =>
2369
+ # # shape: (3, 2)
2370
+ # # ┌─────┬─────┐
2371
+ # # │ foo ┆ bar │
2372
+ # # │ --- ┆ --- │
2373
+ # # │ i64 ┆ i64 │
2374
+ # # ╞═════╪═════╡
2375
+ # # │ 1 ┆ 6 │
2376
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2377
+ # # │ 2 ┆ 7 │
2378
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2379
+ # # │ 3 ┆ 8 │
2380
+ # # └─────┴─────┘
2381
+ #
2382
+ # @example
2383
+ # df.select(Polars.col("foo") + 1)
2384
+ # # =>
2385
+ # # shape: (3, 1)
2386
+ # # ┌─────┐
2387
+ # # │ foo │
2388
+ # # │ --- │
2389
+ # # │ i64 │
2390
+ # # ╞═════╡
2391
+ # # │ 2 │
2392
+ # # ├╌╌╌╌╌┤
2393
+ # # │ 3 │
2394
+ # # ├╌╌╌╌╌┤
2395
+ # # │ 4 │
2396
+ # # └─────┘
2397
+ #
2398
+ # @example
2399
+ # df.select([Polars.col("foo") + 1, Polars.col("bar") + 1])
2400
+ # # =>
2401
+ # # shape: (3, 2)
2402
+ # # ┌─────┬─────┐
2403
+ # # │ foo ┆ bar │
2404
+ # # │ --- ┆ --- │
2405
+ # # │ i64 ┆ i64 │
2406
+ # # ╞═════╪═════╡
2407
+ # # │ 2 ┆ 7 │
2408
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2409
+ # # │ 3 ┆ 8 │
2410
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2411
+ # # │ 4 ┆ 9 │
2412
+ # # └─────┴─────┘
2413
+ #
2414
+ # @example
2415
+ # df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0))
2416
+ # # =>
2417
+ # # shape: (3, 1)
2418
+ # # ┌─────────┐
2419
+ # # │ literal │
2420
+ # # │ --- │
2421
+ # # │ i64 │
2422
+ # # ╞═════════╡
2423
+ # # │ 0 │
2424
+ # # ├╌╌╌╌╌╌╌╌╌┤
2425
+ # # │ 0 │
2426
+ # # ├╌╌╌╌╌╌╌╌╌┤
2427
+ # # │ 10 │
2428
+ # # └─────────┘
977
2429
  def select(exprs)
978
2430
  _from_rbdf(
979
2431
  lazy
@@ -983,6 +2435,43 @@ module Polars
983
2435
  )
984
2436
  end
985
2437
 
2438
+ # Add or overwrite multiple columns in a DataFrame.
2439
+ #
2440
+ # @param exprs [Array]
2441
+ # Array of Expressions that evaluate to columns.
2442
+ #
2443
+ # @return [DataFrame]
2444
+ #
2445
+ # @example
2446
+ # df = Polars::DataFrame.new(
2447
+ # {
2448
+ # "a" => [1, 2, 3, 4],
2449
+ # "b" => [0.5, 4, 10, 13],
2450
+ # "c" => [true, true, false, true]
2451
+ # }
2452
+ # )
2453
+ # df.with_columns(
2454
+ # [
2455
+ # (Polars.col("a") ** 2).alias("a^2"),
2456
+ # (Polars.col("b") / 2).alias("b/2"),
2457
+ # (Polars.col("c").is_not).alias("not c")
2458
+ # ]
2459
+ # )
2460
+ # # =>
2461
+ # # shape: (4, 6)
2462
+ # # ┌─────┬──────┬───────┬──────┬──────┬───────┐
2463
+ # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
2464
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
2465
+ # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
2466
+ # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
2467
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
2468
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2469
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
2470
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2471
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
2472
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2473
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
2474
+ # # └─────┴──────┴───────┴──────┴──────┴───────┘
986
2475
  def with_columns(exprs)
987
2476
  if !exprs.nil? && !exprs.is_a?(Array)
988
2477
  exprs = [exprs]
@@ -992,6 +2481,26 @@ module Polars
992
2481
  .collect(no_optimization: true, string_cache: false)
993
2482
  end
994
2483
 
2484
+ # Get number of chunks used by the ChunkedArrays of this DataFrame.
2485
+ #
2486
+ # @param strategy ["first", "all"]
2487
+ # Return the number of chunks of the 'first' column,
2488
+ # or 'all' columns in this DataFrame.
2489
+ #
2490
+ # @return [Object]
2491
+ #
2492
+ # @example
2493
+ # df = Polars::DataFrame.new(
2494
+ # {
2495
+ # "a" => [1, 2, 3, 4],
2496
+ # "b" => [0.5, 4, 10, 13],
2497
+ # "c" => [true, true, false, true]
2498
+ # }
2499
+ # )
2500
+ # df.n_chunks
2501
+ # # => 1
2502
+ # df.n_chunks(strategy: "all")
2503
+ # # => [1, 1, 1]
995
2504
  def n_chunks(strategy: "first")
996
2505
  if strategy == "first"
997
2506
  _df.n_chunks
@@ -1002,6 +2511,28 @@ module Polars
1002
2511
  end
1003
2512
  end
1004
2513
 
2514
+ # Aggregate the columns of this DataFrame to their maximum value.
2515
+ #
2516
+ # @return [DataFrame]
2517
+ #
2518
+ # @example
2519
+ # df = Polars::DataFrame.new(
2520
+ # {
2521
+ # "foo" => [1, 2, 3],
2522
+ # "bar" => [6, 7, 8],
2523
+ # "ham" => ["a", "b", "c"]
2524
+ # }
2525
+ # )
2526
+ # df.max
2527
+ # # =>
2528
+ # # shape: (1, 3)
2529
+ # # ┌─────┬─────┬─────┐
2530
+ # # │ foo ┆ bar ┆ ham │
2531
+ # # │ --- ┆ --- ┆ --- │
2532
+ # # │ i64 ┆ i64 ┆ str │
2533
+ # # ╞═════╪═════╪═════╡
2534
+ # # │ 3 ┆ 8 ┆ c │
2535
+ # # └─────┴─────┴─────┘
1005
2536
  def max(axis: 0)
1006
2537
  if axis == 0
1007
2538
  _from_rbdf(_df.max)
@@ -1012,6 +2543,28 @@ module Polars
1012
2543
  end
1013
2544
  end
1014
2545
 
2546
+ # Aggregate the columns of this DataFrame to their minimum value.
2547
+ #
2548
+ # @return [DataFrame]
2549
+ #
2550
+ # @example
2551
+ # df = Polars::DataFrame.new(
2552
+ # {
2553
+ # "foo" => [1, 2, 3],
2554
+ # "bar" => [6, 7, 8],
2555
+ # "ham" => ["a", "b", "c"]
2556
+ # }
2557
+ # )
2558
+ # df.min
2559
+ # # =>
2560
+ # # shape: (1, 3)
2561
+ # # ┌─────┬─────┬─────┐
2562
+ # # │ foo ┆ bar ┆ ham │
2563
+ # # │ --- ┆ --- ┆ --- │
2564
+ # # │ i64 ┆ i64 ┆ str │
2565
+ # # ╞═════╪═════╪═════╡
2566
+ # # │ 1 ┆ 6 ┆ a │
2567
+ # # └─────┴─────┴─────┘
1015
2568
  def min(axis: 0)
1016
2569
  if axis == 0
1017
2570
  _from_rbdf(_df.min)
@@ -1022,6 +2575,44 @@ module Polars
1022
2575
  end
1023
2576
  end
1024
2577
 
2578
+ # Aggregate the columns of this DataFrame to their sum value.
2579
+ #
2580
+ # @param axis [Integer]
2581
+ # Either 0 or 1.
2582
+ # @param null_strategy ["ignore", "propagate"]
2583
+ # This argument is only used if axis == 1.
2584
+ #
2585
+ # @return [DataFrame]
2586
+ #
2587
+ # @example
2588
+ # df = Polars::DataFrame.new(
2589
+ # {
2590
+ # "foo" => [1, 2, 3],
2591
+ # "bar" => [6, 7, 8],
2592
+ # "ham" => ["a", "b", "c"],
2593
+ # }
2594
+ # )
2595
+ # df.sum
2596
+ # # =>
2597
+ # # shape: (1, 3)
2598
+ # # ┌─────┬─────┬──────┐
2599
+ # # │ foo ┆ bar ┆ ham │
2600
+ # # │ --- ┆ --- ┆ --- │
2601
+ # # │ i64 ┆ i64 ┆ str │
2602
+ # # ╞═════╪═════╪══════╡
2603
+ # # │ 6 ┆ 21 ┆ null │
2604
+ # # └─────┴─────┴──────┘
2605
+ #
2606
+ # @example
2607
+ # df.sum(axis: 1)
2608
+ # # =>
2609
+ # # shape: (3,)
2610
+ # # Series: 'foo' [str]
2611
+ # # [
2612
+ # # "16a"
2613
+ # # "27b"
2614
+ # # "38c"
2615
+ # # ]
1025
2616
  def sum(axis: 0, null_strategy: "ignore")
1026
2617
  case axis
1027
2618
  when 0
@@ -1033,6 +2624,33 @@ module Polars
1033
2624
  end
1034
2625
  end
1035
2626
 
2627
+ # Aggregate the columns of this DataFrame to their mean value.
2628
+ #
2629
+ # @param axis [Integer]
2630
+ # Either 0 or 1.
2631
+ # @param null_strategy ["ignore", "propagate"]
2632
+ # This argument is only used if axis == 1.
2633
+ #
2634
+ # @return [DataFrame]
2635
+ #
2636
+ # @example
2637
+ # df = Polars::DataFrame.new(
2638
+ # {
2639
+ # "foo" => [1, 2, 3],
2640
+ # "bar" => [6, 7, 8],
2641
+ # "ham" => ["a", "b", "c"]
2642
+ # }
2643
+ # )
2644
+ # df.mean
2645
+ # # =>
2646
+ # # shape: (1, 3)
2647
+ # # ┌─────┬─────┬──────┐
2648
+ # # │ foo ┆ bar ┆ ham │
2649
+ # # │ --- ┆ --- ┆ --- │
2650
+ # # │ f64 ┆ f64 ┆ str │
2651
+ # # ╞═════╪═════╪══════╡
2652
+ # # │ 2.0 ┆ 7.0 ┆ null │
2653
+ # # └─────┴─────┴──────┘
1036
2654
  def mean(axis: 0, null_strategy: "ignore")
1037
2655
  case axis
1038
2656
  when 0
@@ -1044,77 +2662,633 @@ module Polars
1044
2662
  end
1045
2663
  end
1046
2664
 
2665
+ # Aggregate the columns of this DataFrame to their standard deviation value.
2666
+ #
2667
+ # @param ddof [Integer]
2668
+ # Degrees of freedom
2669
+ #
2670
+ # @return [DataFrame]
2671
+ #
2672
+ # @example
2673
+ # df = Polars::DataFrame.new(
2674
+ # {
2675
+ # "foo" => [1, 2, 3],
2676
+ # "bar" => [6, 7, 8],
2677
+ # "ham" => ["a", "b", "c"]
2678
+ # }
2679
+ # )
2680
+ # df.std
2681
+ # # =>
2682
+ # # shape: (1, 3)
2683
+ # # ┌─────┬─────┬──────┐
2684
+ # # │ foo ┆ bar ┆ ham │
2685
+ # # │ --- ┆ --- ┆ --- │
2686
+ # # │ f64 ┆ f64 ┆ str │
2687
+ # # ╞═════╪═════╪══════╡
2688
+ # # │ 1.0 ┆ 1.0 ┆ null │
2689
+ # # └─────┴─────┴──────┘
2690
+ #
2691
+ # @example
2692
+ # df.std(ddof: 0)
2693
+ # # =>
2694
+ # # shape: (1, 3)
2695
+ # # ┌──────────┬──────────┬──────┐
2696
+ # # │ foo ┆ bar ┆ ham │
2697
+ # # │ --- ┆ --- ┆ --- │
2698
+ # # │ f64 ┆ f64 ┆ str │
2699
+ # # ╞══════════╪══════════╪══════╡
2700
+ # # │ 0.816497 ┆ 0.816497 ┆ null │
2701
+ # # └──────────┴──────────┴──────┘
1047
2702
  def std(ddof: 1)
1048
2703
  _from_rbdf(_df.std(ddof))
1049
2704
  end
1050
2705
 
2706
+ # Aggregate the columns of this DataFrame to their variance value.
2707
+ #
2708
+ # @param ddof [Integer]
2709
+ # Degrees of freedom
2710
+ #
2711
+ # @return [DataFrame]
2712
+ #
2713
+ # @example
2714
+ # df = Polars::DataFrame.new(
2715
+ # {
2716
+ # "foo" => [1, 2, 3],
2717
+ # "bar" => [6, 7, 8],
2718
+ # "ham" => ["a", "b", "c"]
2719
+ # }
2720
+ # )
2721
+ # df.var
2722
+ # # =>
2723
+ # # shape: (1, 3)
2724
+ # # ┌─────┬─────┬──────┐
2725
+ # # │ foo ┆ bar ┆ ham │
2726
+ # # │ --- ┆ --- ┆ --- │
2727
+ # # │ f64 ┆ f64 ┆ str │
2728
+ # # ╞═════╪═════╪══════╡
2729
+ # # │ 1.0 ┆ 1.0 ┆ null │
2730
+ # # └─────┴─────┴──────┘
2731
+ #
2732
+ # @example
2733
+ # df.var(ddof: 0)
2734
+ # # =>
2735
+ # # shape: (1, 3)
2736
+ # # ┌──────────┬──────────┬──────┐
2737
+ # # │ foo ┆ bar ┆ ham │
2738
+ # # │ --- ┆ --- ┆ --- │
2739
+ # # │ f64 ┆ f64 ┆ str │
2740
+ # # ╞══════════╪══════════╪══════╡
2741
+ # # │ 0.666667 ┆ 0.666667 ┆ null │
2742
+ # # └──────────┴──────────┴──────┘
1051
2743
  def var(ddof: 1)
1052
2744
  _from_rbdf(_df.var(ddof))
1053
2745
  end
1054
2746
 
2747
+ # Aggregate the columns of this DataFrame to their median value.
2748
+ #
2749
+ # @return [DataFrame]
2750
+ #
2751
+ # @example
2752
+ # df = Polars::DataFrame.new(
2753
+ # {
2754
+ # "foo" => [1, 2, 3],
2755
+ # "bar" => [6, 7, 8],
2756
+ # "ham" => ["a", "b", "c"]
2757
+ # }
2758
+ # )
2759
+ # df.median
2760
+ # # =>
2761
+ # # shape: (1, 3)
2762
+ # # ┌─────┬─────┬──────┐
2763
+ # # │ foo ┆ bar ┆ ham │
2764
+ # # │ --- ┆ --- ┆ --- │
2765
+ # # │ f64 ┆ f64 ┆ str │
2766
+ # # ╞═════╪═════╪══════╡
2767
+ # # │ 2.0 ┆ 7.0 ┆ null │
2768
+ # # └─────┴─────┴──────┘
1055
2769
  def median
1056
2770
  _from_rbdf(_df.median)
1057
2771
  end
1058
2772
 
1059
- # def product
1060
- # end
2773
+ # Aggregate the columns of this DataFrame to their product values.
2774
+ #
2775
+ # @return [DataFrame]
2776
+ #
2777
+ # @example
2778
+ # df = Polars::DataFrame.new(
2779
+ # {
2780
+ # "a" => [1, 2, 3],
2781
+ # "b" => [0.5, 4, 10],
2782
+ # "c" => [true, true, false]
2783
+ # }
2784
+ # )
2785
+ # df.product
2786
+ # # =>
2787
+ # # shape: (1, 3)
2788
+ # # ┌─────┬──────┬─────┐
2789
+ # # │ a ┆ b ┆ c │
2790
+ # # │ --- ┆ --- ┆ --- │
2791
+ # # │ i64 ┆ f64 ┆ i64 │
2792
+ # # ╞═════╪══════╪═════╡
2793
+ # # │ 6 ┆ 20.0 ┆ 0 │
2794
+ # # └─────┴──────┴─────┘
2795
+ def product
2796
+ select(Polars.all.product)
2797
+ end
2798
+
2799
+ # Aggregate the columns of this DataFrame to their quantile value.
2800
+ #
2801
+ # @param quantile [Float]
2802
+ # Quantile between 0.0 and 1.0.
2803
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2804
+ # Interpolation method.
2805
+ #
2806
+ # @return [DataFrame]
2807
+ #
2808
+ # @example
2809
+ # df = Polars::DataFrame.new(
2810
+ # {
2811
+ # "foo" => [1, 2, 3],
2812
+ # "bar" => [6, 7, 8],
2813
+ # "ham" => ["a", "b", "c"]
2814
+ # }
2815
+ # )
2816
+ # df.quantile(0.5, interpolation: "nearest")
2817
+ # # =>
2818
+ # # shape: (1, 3)
2819
+ # # ┌─────┬─────┬──────┐
2820
+ # # │ foo ┆ bar ┆ ham │
2821
+ # # │ --- ┆ --- ┆ --- │
2822
+ # # │ f64 ┆ f64 ┆ str │
2823
+ # # ╞═════╪═════╪══════╡
2824
+ # # │ 2.0 ┆ 7.0 ┆ null │
2825
+ # # └─────┴─────┴──────┘
2826
+ def quantile(quantile, interpolation: "nearest")
2827
+ _from_rbdf(_df.quantile(quantile, interpolation))
2828
+ end
2829
+
2830
+ # Get one hot encoded dummy variables.
2831
+ #
2832
+ # @param columns
2833
+ # A subset of columns to convert to dummy variables. `nil` means
2834
+ # "all columns".
2835
+ #
2836
+ # @return [DataFrame]
2837
+ #
2838
+ # @example
2839
+ # df = Polars::DataFrame.new(
2840
+ # {
2841
+ # "foo" => [1, 2],
2842
+ # "bar" => [3, 4],
2843
+ # "ham" => ["a", "b"]
2844
+ # }
2845
+ # )
2846
+ # df.to_dummies
2847
+ # # =>
2848
+ # # shape: (2, 6)
2849
+ # # ┌───────┬───────┬───────┬───────┬───────┬───────┐
2850
+ # # │ foo_1 ┆ foo_2 ┆ bar_3 ┆ bar_4 ┆ ham_a ┆ ham_b │
2851
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
2852
+ # # │ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 │
2853
+ # # ╞═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡
2854
+ # # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
2855
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2856
+ # # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
2857
+ # # └───────┴───────┴───────┴───────┴───────┴───────┘
2858
+ def to_dummies(columns: nil)
2859
+ if columns.is_a?(String)
2860
+ columns = [columns]
2861
+ end
2862
+ _from_rbdf(_df.to_dummies(columns))
2863
+ end
1061
2864
 
1062
- # def quantile(quantile, interpolation: "nearest")
1063
- # end
2865
+ # Drop duplicate rows from this DataFrame.
2866
+ #
2867
+ # @param maintain_order [Boolean]
2868
+ # Keep the same order as the original DataFrame. This requires more work to
2869
+ # compute.
2870
+ # @param subset [Object]
2871
+ # Subset to use to compare rows.
2872
+ # @param keep ["first", "last"]
2873
+ # Which of the duplicate rows to keep (in conjunction with `subset`).
2874
+ #
2875
+ # @return [DataFrame]
2876
+ #
2877
+ # @note
2878
+ # Note that this fails if there is a column of type `List` in the DataFrame or
2879
+ # subset.
2880
+ #
2881
+ # @example
2882
+ # df = Polars::DataFrame.new(
2883
+ # {
2884
+ # "a" => [1, 1, 2, 3, 4, 5],
2885
+ # "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
2886
+ # "c" => [true, true, true, false, true, true]
2887
+ # }
2888
+ # )
2889
+ # df.unique
2890
+ # # =>
2891
+ # # shape: (5, 3)
2892
+ # # ┌─────┬─────┬───────┐
2893
+ # # │ a ┆ b ┆ c │
2894
+ # # │ --- ┆ --- ┆ --- │
2895
+ # # │ i64 ┆ f64 ┆ bool │
2896
+ # # ╞═════╪═════╪═══════╡
2897
+ # # │ 1 ┆ 0.5 ┆ true │
2898
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2899
+ # # │ 2 ┆ 1.0 ┆ true │
2900
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2901
+ # # │ 3 ┆ 2.0 ┆ false │
2902
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2903
+ # # │ 4 ┆ 3.0 ┆ true │
2904
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2905
+ # # │ 5 ┆ 3.0 ┆ true │
2906
+ # # └─────┴─────┴───────┘
2907
+ def unique(maintain_order: true, subset: nil, keep: "first")
2908
+ if !subset.nil?
2909
+ if subset.is_a?(String)
2910
+ subset = [subset]
2911
+ elsif !subset.is_a?(Array)
2912
+ subset = subset.to_a
2913
+ end
2914
+ end
1064
2915
 
1065
- # def to_dummies
1066
- # end
2916
+ _from_rbdf(_df.unique(maintain_order, subset, keep))
2917
+ end
1067
2918
 
1068
- # def unique
1069
- # end
2919
+ # Return the number of unique rows, or the number of unique row-subsets.
2920
+ #
2921
+ # @param subset [Object]
2922
+ # One or more columns/expressions that define what to count;
2923
+ # omit to return the count of unique rows.
2924
+ #
2925
+ # @return [DataFrame]
2926
+ #
2927
+ # @example
2928
+ # df = Polars::DataFrame.new(
2929
+ # {
2930
+ # "a" => [1, 1, 2, 3, 4, 5],
2931
+ # "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
2932
+ # "c" => [true, true, true, false, true, true]
2933
+ # }
2934
+ # )
2935
+ # df.n_unique
2936
+ # # => 5
2937
+ #
2938
+ # @example Simple columns subset
2939
+ # df.n_unique(subset: ["b", "c"])
2940
+ # # => 4
2941
+ #
2942
+ # @example Expression subset
2943
+ # df.n_unique(
2944
+ # subset: [
2945
+ # (Polars.col("a").floordiv(2)),
2946
+ # (Polars.col("c") | (Polars.col("b") >= 2))
2947
+ # ]
2948
+ # )
2949
+ # # => 3
2950
+ def n_unique(subset: nil)
2951
+ if subset.is_a?(StringIO)
2952
+ subset = [Polars.col(subset)]
2953
+ elsif subset.is_a?(Expr)
2954
+ subset = [subset]
2955
+ end
1070
2956
 
1071
- # def n_unique
1072
- # end
2957
+ if subset.is_a?(Array) && subset.length == 1
2958
+ expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
2959
+ else
2960
+ struct_fields = subset.nil? ? Polars.all : subset
2961
+ expr = Polars.struct(struct_fields)
2962
+ end
2963
+
2964
+ df = lazy.select(expr.n_unique).collect
2965
+ df.is_empty ? 0 : df.row(0)[0]
2966
+ end
2967
+
2968
+ # Rechunk the data in this DataFrame to a contiguous allocation.
1073
2969
 
2970
+ # This will make sure all subsequent operations have optimal and predictable
2971
+ # performance.
1074
2972
  #
2973
+ # @return [DataFrame]
1075
2974
  def rechunk
1076
2975
  _from_rbdf(_df.rechunk)
1077
2976
  end
1078
2977
 
2978
+ # Create a new DataFrame that shows the null counts per column.
2979
+ #
2980
+ # @return [DataFrame]
2981
+ #
2982
+ # @example
2983
+ # df = Polars::DataFrame.new(
2984
+ # {
2985
+ # "foo" => [1, nil, 3],
2986
+ # "bar" => [6, 7, nil],
2987
+ # "ham" => ["a", "b", "c"]
2988
+ # }
2989
+ # )
2990
+ # df.null_count
2991
+ # # =>
2992
+ # # shape: (1, 3)
2993
+ # # ┌─────┬─────┬─────┐
2994
+ # # │ foo ┆ bar ┆ ham │
2995
+ # # │ --- ┆ --- ┆ --- │
2996
+ # # │ u32 ┆ u32 ┆ u32 │
2997
+ # # ╞═════╪═════╪═════╡
2998
+ # # │ 1 ┆ 1 ┆ 0 │
2999
+ # # └─────┴─────┴─────┘
1079
3000
  def null_count
1080
3001
  _from_rbdf(_df.null_count)
1081
3002
  end
1082
3003
 
1083
- # def sample
1084
- # end
3004
+ # Sample from this DataFrame.
3005
+ #
3006
+ # @param n [Integer]
3007
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
3008
+ # `frac` is nil.
3009
+ # @param frac [Float]
3010
+ # Fraction of items to return. Cannot be used with `n`.
3011
+ # @param with_replacement [Boolean]
3012
+ # Allow values to be sampled more than once.
3013
+ # @param shuffle [Boolean]
3014
+ # Shuffle the order of sampled data points.
3015
+ # @param seed [Integer]
3016
+ # Seed for the random number generator. If set to nil (default), a random
3017
+ # seed is used.
3018
+ #
3019
+ # @return [DataFrame]
3020
+ #
3021
+ # @example
3022
+ # df = Polars::DataFrame.new(
3023
+ # {
3024
+ # "foo" => [1, 2, 3],
3025
+ # "bar" => [6, 7, 8],
3026
+ # "ham" => ["a", "b", "c"]
3027
+ # }
3028
+ # )
3029
+ # df.sample(n: 2, seed: 0)
3030
+ # # =>
3031
+ # # shape: (2, 3)
3032
+ # # ┌─────┬─────┬─────┐
3033
+ # # │ foo ┆ bar ┆ ham │
3034
+ # # │ --- ┆ --- ┆ --- │
3035
+ # # │ i64 ┆ i64 ┆ str │
3036
+ # # ╞═════╪═════╪═════╡
3037
+ # # │ 3 ┆ 8 ┆ c │
3038
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3039
+ # # │ 2 ┆ 7 ┆ b │
3040
+ # # └─────┴─────┴─────┘
3041
+ def sample(
3042
+ n: nil,
3043
+ frac: nil,
3044
+ with_replacement: false,
3045
+ shuffle: false,
3046
+ seed: nil
3047
+ )
3048
+ if !n.nil? && !frac.nil?
3049
+ raise ArgumentError, "cannot specify both `n` and `frac`"
3050
+ end
3051
+
3052
+ if n.nil? && !frac.nil?
3053
+ _from_rbdf(
3054
+ _df.sample_frac(frac, with_replacement, shuffle, seed)
3055
+ )
3056
+ end
3057
+
3058
+ if n.nil?
3059
+ n = 1
3060
+ end
3061
+ _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
3062
+ end
1085
3063
 
1086
3064
  # def fold
1087
3065
  # end
1088
3066
 
1089
- # def row
1090
- # end
3067
+ # Get a row as tuple, either by index or by predicate.
3068
+ #
3069
+ # @param index [Object]
3070
+ # Row index.
3071
+ # @param by_predicate [Object]
3072
+ # Select the row according to a given expression/predicate.
3073
+ #
3074
+ # @return [Object]
3075
+ #
3076
+ # @note
3077
+ # The `index` and `by_predicate` params are mutually exclusive. Additionally,
3078
+ # to ensure clarity, the `by_predicate` parameter must be supplied by keyword.
3079
+ #
3080
+ # When using `by_predicate` it is an error condition if anything other than
3081
+ # one row is returned; more than one row raises `TooManyRowsReturned`, and
3082
+ # zero rows will raise `NoRowsReturned` (both inherit from `RowsException`).
3083
+ #
3084
+ # @example Return the row at the given index
3085
+ # df = Polars::DataFrame.new(
3086
+ # {
3087
+ # "foo" => [1, 2, 3],
3088
+ # "bar" => [6, 7, 8],
3089
+ # "ham" => ["a", "b", "c"]
3090
+ # }
3091
+ # )
3092
+ # df.row(2)
3093
+ # # => [3, 8, "c"]
3094
+ #
3095
+ # @example Return the row that matches the given predicate
3096
+ # df.row(by_predicate: Polars.col("ham") == "b")
3097
+ # # => [2, 7, "b"]
3098
+ def row(index = nil, by_predicate: nil)
3099
+ if !index.nil? && !by_predicate.nil?
3100
+ raise ArgumentError, "Cannot set both 'index' and 'by_predicate'; mutually exclusive"
3101
+ elsif index.is_a?(Expr)
3102
+ raise TypeError, "Expressions should be passed to the 'by_predicate' param"
3103
+ elsif index.is_a?(Integer)
3104
+ _df.row_tuple(index)
3105
+ elsif by_predicate.is_a?(Expr)
3106
+ rows = filter(by_predicate).rows
3107
+ n_rows = rows.length
3108
+ if n_rows > 1
3109
+ raise TooManyRowsReturned, "Predicate #{by_predicate} returned #{n_rows} rows"
3110
+ elsif n_rows == 0
3111
+ raise NoRowsReturned, "Predicate <{by_predicate!s}> returned no rows"
3112
+ end
3113
+ rows[0]
3114
+ else
3115
+ raise ArgumentError, "One of 'index' or 'by_predicate' must be set"
3116
+ end
3117
+ end
1091
3118
 
1092
- # def rows
1093
- # end
3119
+ # Convert columnar data to rows as Ruby arrays.
3120
+ #
3121
+ # @return [Array]
3122
+ #
3123
+ # @example
3124
+ # df = Polars::DataFrame.new(
3125
+ # {
3126
+ # "a" => [1, 3, 5],
3127
+ # "b" => [2, 4, 6]
3128
+ # }
3129
+ # )
3130
+ # df.rows
3131
+ # # => [[1, 2], [3, 4], [5, 6]]
3132
+ def rows
3133
+ _df.row_tuples
3134
+ end
1094
3135
 
1095
- # def shrink_to_fit
1096
- # end
3136
+ # Shrink DataFrame memory usage.
3137
+ #
3138
+ # Shrinks to fit the exact capacity needed to hold the data.
3139
+ #
3140
+ # @return [DataFrame]
3141
+ def shrink_to_fit(in_place: false)
3142
+ if in_place
3143
+ _df.shrink_to_fit
3144
+ self
3145
+ else
3146
+ df = clone
3147
+ df._df.shrink_to_fit
3148
+ df
3149
+ end
3150
+ end
1097
3151
 
1098
- # def take_every
1099
- # end
3152
+ # Take every nth row in the DataFrame and return as a new DataFrame.
3153
+ #
3154
+ # @return [DataFrame]
3155
+ #
3156
+ # @example
3157
+ # s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
3158
+ # s.take_every(2)
3159
+ # # =>
3160
+ # # shape: (2, 2)
3161
+ # # ┌─────┬─────┐
3162
+ # # │ a ┆ b │
3163
+ # # │ --- ┆ --- │
3164
+ # # │ i64 ┆ i64 │
3165
+ # # ╞═════╪═════╡
3166
+ # # │ 1 ┆ 5 │
3167
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3168
+ # # │ 3 ┆ 7 │
3169
+ # # └─────┴─────┘
3170
+ def take_every(n)
3171
+ select(Utils.col("*").take_every(n))
3172
+ end
1100
3173
 
1101
3174
  # def hash_rows
1102
3175
  # end
1103
3176
 
1104
- # def interpolate
1105
- # end
1106
-
3177
+ # Interpolate intermediate values. The interpolation method is linear.
3178
+ #
3179
+ # @return [DataFrame]
3180
+ #
3181
+ # @example
3182
+ # df = Polars::DataFrame.new(
3183
+ # {
3184
+ # "foo" => [1, nil, 9, 10],
3185
+ # "bar" => [6, 7, 9, nil],
3186
+ # "baz" => [1, nil, nil, 9]
3187
+ # }
3188
+ # )
3189
+ # df.interpolate
3190
+ # # =>
3191
+ # # shape: (4, 3)
3192
+ # # ┌─────┬──────┬─────┐
3193
+ # # │ foo ┆ bar ┆ baz │
3194
+ # # │ --- ┆ --- ┆ --- │
3195
+ # # │ i64 ┆ i64 ┆ i64 │
3196
+ # # ╞═════╪══════╪═════╡
3197
+ # # │ 1 ┆ 6 ┆ 1 │
3198
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
3199
+ # # │ 5 ┆ 7 ┆ 3 │
3200
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
3201
+ # # │ 9 ┆ 9 ┆ 6 │
3202
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
3203
+ # # │ 10 ┆ null ┆ 9 │
3204
+ # # └─────┴──────┴─────┘
3205
+ def interpolate
3206
+ select(Utils.col("*").interpolate)
3207
+ end
3208
+
3209
+ # Check if the dataframe is empty.
3210
+ #
3211
+ # @return [Boolean]
1107
3212
  #
3213
+ # @example
3214
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
3215
+ # df.is_empty
3216
+ # # => false
3217
+ # df.filter(Polars.col("foo") > 99).is_empty
3218
+ # # => true
1108
3219
  def is_empty
1109
3220
  height == 0
1110
3221
  end
1111
3222
  alias_method :empty?, :is_empty
1112
3223
 
1113
- # def to_struct(name)
1114
- # end
3224
+ # Convert a `DataFrame` to a `Series` of type `Struct`.
3225
+ #
3226
+ # @param name [String]
3227
+ # Name for the struct Series
3228
+ #
3229
+ # @return [Series]
3230
+ #
3231
+ # @example
3232
+ # df = Polars::DataFrame.new(
3233
+ # {
3234
+ # "a" => [1, 2, 3, 4, 5],
3235
+ # "b" => ["one", "two", "three", "four", "five"]
3236
+ # }
3237
+ # )
3238
+ # df.to_struct("nums")
3239
+ # # =>
3240
+ # # shape: (5,)
3241
+ # # Series: 'nums' [struct[2]]
3242
+ # # [
3243
+ # # {1,"one"}
3244
+ # # {2,"two"}
3245
+ # # {3,"three"}
3246
+ # # {4,"four"}
3247
+ # # {5,"five"}
3248
+ # # ]
3249
+ def to_struct(name)
3250
+ Utils.wrap_s(_df.to_struct(name))
3251
+ end
1115
3252
 
1116
- # def unnest
1117
- # end
3253
+ # Decompose a struct into its fields.
3254
+ #
3255
+ # The fields will be inserted into the `DataFrame` on the location of the
3256
+ # `struct` type.
3257
+ #
3258
+ # @param names [Object]
3259
+ # Names of the struct columns that will be decomposed by its fields
3260
+ #
3261
+ # @return [DataFrame]
3262
+ #
3263
+ # @example
3264
+ # df = Polars::DataFrame.new(
3265
+ # {
3266
+ # "before" => ["foo", "bar"],
3267
+ # "t_a" => [1, 2],
3268
+ # "t_b" => ["a", "b"],
3269
+ # "t_c" => [true, nil],
3270
+ # "t_d" => [[1, 2], [3]],
3271
+ # "after" => ["baz", "womp"]
3272
+ # }
3273
+ # ).select(["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"])
3274
+ # df.unnest("t_struct")
3275
+ # # =>
3276
+ # # shape: (2, 6)
3277
+ # # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
3278
+ # # │ before ┆ t_a ┆ t_b ┆ t_c ┆ t_d ┆ after │
3279
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3280
+ # # │ str ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str │
3281
+ # # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
3282
+ # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
3283
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3284
+ # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
3285
+ # # └────────┴─────┴─────┴──────┴───────────┴───────┘
3286
+ def unnest(names)
3287
+ if names.is_a?(String)
3288
+ names = [names]
3289
+ end
3290
+ _from_rbdf(_df.unnest(names))
3291
+ end
1118
3292
 
1119
3293
  private
1120
3294
 
@@ -1127,7 +3301,7 @@ module Polars
1127
3301
  if !columns.nil?
1128
3302
  columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
1129
3303
 
1130
- if !data && dtypes
3304
+ if data.empty? && dtypes
1131
3305
  data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
1132
3306
  else
1133
3307
  data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
@@ -1147,7 +3321,7 @@ module Polars
1147
3321
  if columns.nil?
1148
3322
  data
1149
3323
  else
1150
- if !data
3324
+ if data.empty?
1151
3325
  columns.map { |c| Series.new(c, nil)._s }
1152
3326
  elsif data.length == columns.length
1153
3327
  columns.each_with_index do |c, i|
@@ -1182,5 +3356,75 @@ module Polars
1182
3356
  def _from_rbdf(rb_df)
1183
3357
  self.class._from_rbdf(rb_df)
1184
3358
  end
3359
+
3360
+ def _comp(other, op)
3361
+ if other.is_a?(DataFrame)
3362
+ _compare_to_other_df(other, op)
3363
+ else
3364
+ _compare_to_non_df(other, op)
3365
+ end
3366
+ end
3367
+
3368
+ def _compare_to_other_df(other, op)
3369
+ if columns != other.columns
3370
+ raise ArgmentError, "DataFrame columns do not match"
3371
+ end
3372
+ if shape != other.shape
3373
+ raise ArgmentError, "DataFrame dimensions do not match"
3374
+ end
3375
+
3376
+ suffix = "__POLARS_CMP_OTHER"
3377
+ other_renamed = other.select(Polars.all.suffix(suffix))
3378
+ combined = Polars.concat([self, other_renamed], how: "horizontal")
3379
+
3380
+ expr = case op
3381
+ when "eq"
3382
+ columns.map { |n| Polars.col(n) == Polars.col("#{n}#{suffix}") }
3383
+ when "neq"
3384
+ columns.map { |n| Polars.col(n) != Polars.col("#{n}#{suffix}") }
3385
+ when "gt"
3386
+ columns.map { |n| Polars.col(n) > Polars.col("#{n}#{suffix}") }
3387
+ when "lt"
3388
+ columns.map { |n| Polars.col(n) < Polars.col("#{n}#{suffix}") }
3389
+ when "gt_eq"
3390
+ columns.map { |n| Polars.col(n) >= Polars.col("#{n}#{suffix}") }
3391
+ when "lt_eq"
3392
+ columns.map { |n| Polars.col(n) <= Polars.col("#{n}#{suffix}") }
3393
+ else
3394
+ raise ArgumentError, "got unexpected comparison operator: #{op}"
3395
+ end
3396
+
3397
+ combined.select(expr)
3398
+ end
3399
+
3400
+ def _compare_to_non_df(other, op)
3401
+ case op
3402
+ when "eq"
3403
+ select(Polars.all == other)
3404
+ when "neq"
3405
+ select(Polars.all != other)
3406
+ when "gt"
3407
+ select(Polars.all > other)
3408
+ when "lt"
3409
+ select(Polars.all < other)
3410
+ when "gt_eq"
3411
+ select(Polars.all >= other)
3412
+ when "lt_eq"
3413
+ select(Polars.all <= other)
3414
+ else
3415
+ raise ArgumentError, "got unexpected comparison operator: #{op}"
3416
+ end
3417
+ end
3418
+
3419
+ def _prepare_other_arg(other)
3420
+ if !other.is_a?(Series)
3421
+ if other.is_a?(Array)
3422
+ raise ArgumentError, "Operation not supported."
3423
+ end
3424
+
3425
+ other = Series.new("", [other])
3426
+ end
3427
+ other
3428
+ end
1185
3429
  end
1186
3430
  end