polars-df 0.8.0-arm64-darwin → 0.9.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1726 -754
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.bundle +0 -0
  9. data/lib/polars/3.2/polars.bundle +0 -0
  10. data/lib/polars/3.3/polars.bundle +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +179 -43
  17. data/lib/polars/data_types.rb +191 -28
  18. data/lib/polars/date_time_expr.rb +31 -14
  19. data/lib/polars/exceptions.rb +12 -1
  20. data/lib/polars/expr.rb +866 -186
  21. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  22. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  23. data/lib/polars/functions/as_datatype.rb +248 -0
  24. data/lib/polars/functions/col.rb +47 -0
  25. data/lib/polars/functions/eager.rb +182 -0
  26. data/lib/polars/functions/lazy.rb +1280 -0
  27. data/lib/polars/functions/len.rb +49 -0
  28. data/lib/polars/functions/lit.rb +35 -0
  29. data/lib/polars/functions/random.rb +16 -0
  30. data/lib/polars/functions/range/date_range.rb +103 -0
  31. data/lib/polars/functions/range/int_range.rb +51 -0
  32. data/lib/polars/functions/repeat.rb +144 -0
  33. data/lib/polars/functions/whenthen.rb +27 -0
  34. data/lib/polars/functions.rb +29 -416
  35. data/lib/polars/group_by.rb +2 -2
  36. data/lib/polars/io.rb +18 -25
  37. data/lib/polars/lazy_frame.rb +367 -53
  38. data/lib/polars/list_expr.rb +152 -6
  39. data/lib/polars/list_name_space.rb +102 -0
  40. data/lib/polars/meta_expr.rb +175 -7
  41. data/lib/polars/series.rb +273 -34
  42. data/lib/polars/string_cache.rb +75 -0
  43. data/lib/polars/string_expr.rb +412 -96
  44. data/lib/polars/string_name_space.rb +4 -4
  45. data/lib/polars/testing.rb +507 -0
  46. data/lib/polars/utils.rb +52 -8
  47. data/lib/polars/version.rb +1 -1
  48. data/lib/polars.rb +15 -2
  49. metadata +33 -4
  50. data/lib/polars/lazy_functions.rb +0 -1181
@@ -308,7 +308,7 @@ module Polars
308
308
  # end
309
309
  #
310
310
  # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => ["10", "20", "30", "40"]}).lazy
311
- # df.pipe(cast_str_to_int, col_name: "b").collect()
311
+ # df.pipe(cast_str_to_int, col_name: "b").collect
312
312
  # # =>
313
313
  # # shape: (4, 2)
314
314
  # # ┌─────┬─────┐
@@ -342,6 +342,7 @@ module Polars
342
342
  simplify_expression: true,
343
343
  slice_pushdown: true,
344
344
  common_subplan_elimination: true,
345
+ comm_subexpr_elim: true,
345
346
  allow_streaming: false
346
347
  )
347
348
  ldf = _ldf.optimization_toggle(
@@ -351,6 +352,7 @@ module Polars
351
352
  simplify_expression,
352
353
  slice_pushdown,
353
354
  common_subplan_elimination,
355
+ comm_subexpr_elim,
354
356
  allow_streaming,
355
357
  false
356
358
  )
@@ -469,6 +471,7 @@ module Polars
469
471
  no_optimization: false,
470
472
  slice_pushdown: true,
471
473
  common_subplan_elimination: true,
474
+ comm_subexpr_elim: true,
472
475
  allow_streaming: false,
473
476
  _eager: false
474
477
  )
@@ -477,6 +480,7 @@ module Polars
477
480
  projection_pushdown = false
478
481
  slice_pushdown = false
479
482
  common_subplan_elimination = false
483
+ comm_subexpr_elim = false
480
484
  end
481
485
 
482
486
  if allow_streaming
@@ -490,6 +494,7 @@ module Polars
490
494
  simplify_expression,
491
495
  slice_pushdown,
492
496
  common_subplan_elimination,
497
+ comm_subexpr_elim,
493
498
  allow_streaming,
494
499
  _eager
495
500
  )
@@ -559,6 +564,268 @@ module Polars
559
564
  simplify_expression: true,
560
565
  no_optimization: false,
561
566
  slice_pushdown: true
567
+ )
568
+ lf = _set_sink_optimizations(
569
+ type_coercion: type_coercion,
570
+ predicate_pushdown: predicate_pushdown,
571
+ projection_pushdown: projection_pushdown,
572
+ simplify_expression: simplify_expression,
573
+ slice_pushdown: slice_pushdown,
574
+ no_optimization: no_optimization
575
+ )
576
+
577
+ lf.sink_parquet(
578
+ path,
579
+ compression,
580
+ compression_level,
581
+ statistics,
582
+ row_group_size,
583
+ data_pagesize_limit,
584
+ maintain_order
585
+ )
586
+ end
587
+
588
+ # Evaluate the query in streaming mode and write to an IPC file.
589
+ #
590
+ # This allows streaming results that are larger than RAM to be written to disk.
591
+ #
592
+ # @param path [String]
593
+ # File path to which the file should be written.
594
+ # @param compression ["lz4", "zstd"]
595
+ # Choose "zstd" for good compression performance.
596
+ # Choose "lz4" for fast compression/decompression.
597
+ # @param maintain_order [Boolean]
598
+ # Maintain the order in which data is processed.
599
+ # Setting this to `false` will be slightly faster.
600
+ # @param type_coercion [Boolean]
601
+ # Do type coercion optimization.
602
+ # @param predicate_pushdown [Boolean]
603
+ # Do predicate pushdown optimization.
604
+ # @param projection_pushdown [Boolean]
605
+ # Do projection pushdown optimization.
606
+ # @param simplify_expression [Boolean]
607
+ # Run simplify expressions optimization.
608
+ # @param slice_pushdown [Boolean]
609
+ # Slice pushdown optimization.
610
+ # @param no_optimization [Boolean]
611
+ # Turn off (certain) optimizations.
612
+ #
613
+ # @return [DataFrame]
614
+ #
615
+ # @example
616
+ # lf = Polars.scan_csv("/path/to/my_larger_than_ram_file.csv")
617
+ # lf.sink_ipc("out.arrow")
618
+ def sink_ipc(
619
+ path,
620
+ compression: "zstd",
621
+ maintain_order: true,
622
+ type_coercion: true,
623
+ predicate_pushdown: true,
624
+ projection_pushdown: true,
625
+ simplify_expression: true,
626
+ slice_pushdown: true,
627
+ no_optimization: false
628
+ )
629
+ lf = _set_sink_optimizations(
630
+ type_coercion: type_coercion,
631
+ predicate_pushdown: predicate_pushdown,
632
+ projection_pushdown: projection_pushdown,
633
+ simplify_expression: simplify_expression,
634
+ slice_pushdown: slice_pushdown,
635
+ no_optimization: no_optimization
636
+ )
637
+
638
+ lf.sink_ipc(
639
+ path,
640
+ compression,
641
+ maintain_order
642
+ )
643
+ end
644
+
645
+ # Evaluate the query in streaming mode and write to a CSV file.
646
+ #
647
+ # This allows streaming results that are larger than RAM to be written to disk.
648
+ #
649
+ # @param path [String]
650
+ # File path to which the file should be written.
651
+ # @param include_bom [Boolean]
652
+ # Whether to include UTF-8 BOM in the CSV output.
653
+ # @param include_header [Boolean]
654
+ # Whether to include header in the CSV output.
655
+ # @param separator [String]
656
+ # Separate CSV fields with this symbol.
657
+ # @param line_terminator [String]
658
+ # String used to end each row.
659
+ # @param quote_char [String]
660
+ # Byte to use as quoting character.
661
+ # @param batch_size [Integer]
662
+ # Number of rows that will be processed per thread.
663
+ # @param datetime_format [String]
664
+ # A format string, with the specifiers defined by the
665
+ # `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
666
+ # Rust crate. If no format specified, the default fractional-second
667
+ # precision is inferred from the maximum timeunit found in the frame's
668
+ # Datetime cols (if any).
669
+ # @param date_format [String]
670
+ # A format string, with the specifiers defined by the
671
+ # `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
672
+ # Rust crate.
673
+ # @param time_format [String]
674
+ # A format string, with the specifiers defined by the
675
+ # `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
676
+ # Rust crate.
677
+ # @param float_precision [Integer]
678
+ # Number of decimal places to write, applied to both `Float32` and
679
+ # `Float64` datatypes.
680
+ # @param null_value [String]
681
+ # A string representing null values (defaulting to the empty string).
682
+ # @param quote_style ["necessary", "always", "non_numeric", "never"]
683
+ # Determines the quoting strategy used.
684
+ #
685
+ # - necessary (default): This puts quotes around fields only when necessary.
686
+ # They are necessary when fields contain a quote,
687
+ # delimiter or record terminator.
688
+ # Quotes are also necessary when writing an empty record
689
+ # (which is indistinguishable from a record with one empty field).
690
+ # This is the default.
691
+ # - always: This puts quotes around every field. Always.
692
+ # - never: This never puts quotes around fields, even if that results in
693
+ # invalid CSV data (e.g.: by not quoting strings containing the
694
+ # separator).
695
+ # - non_numeric: This puts quotes around all fields that are non-numeric.
696
+ # Namely, when writing a field that does not parse as a valid float
697
+ # or integer, then quotes will be used even if they aren`t strictly
698
+ # necessary.
699
+ # @param maintain_order [Boolean]
700
+ # Maintain the order in which data is processed.
701
+ # Setting this to `false` will be slightly faster.
702
+ # @param type_coercion [Boolean]
703
+ # Do type coercion optimization.
704
+ # @param predicate_pushdown [Boolean]
705
+ # Do predicate pushdown optimization.
706
+ # @param projection_pushdown [Boolean]
707
+ # Do projection pushdown optimization.
708
+ # @param simplify_expression [Boolean]
709
+ # Run simplify expressions optimization.
710
+ # @param slice_pushdown [Boolean]
711
+ # Slice pushdown optimization.
712
+ # @param no_optimization [Boolean]
713
+ # Turn off (certain) optimizations.
714
+ #
715
+ # @return [DataFrame]
716
+ #
717
+ # @example
718
+ # lf = Polars.scan_csv("/path/to/my_larger_than_ram_file.csv")
719
+ # lf.sink_csv("out.csv")
720
+ def sink_csv(
721
+ path,
722
+ include_bom: false,
723
+ include_header: true,
724
+ separator: ",",
725
+ line_terminator: "\n",
726
+ quote_char: '"',
727
+ batch_size: 1024,
728
+ datetime_format: nil,
729
+ date_format: nil,
730
+ time_format: nil,
731
+ float_precision: nil,
732
+ null_value: nil,
733
+ quote_style: nil,
734
+ maintain_order: true,
735
+ type_coercion: true,
736
+ predicate_pushdown: true,
737
+ projection_pushdown: true,
738
+ simplify_expression: true,
739
+ slice_pushdown: true,
740
+ no_optimization: false
741
+ )
742
+ Utils._check_arg_is_1byte("separator", separator, false)
743
+ Utils._check_arg_is_1byte("quote_char", quote_char, false)
744
+
745
+ lf = _set_sink_optimizations(
746
+ type_coercion: type_coercion,
747
+ predicate_pushdown: predicate_pushdown,
748
+ projection_pushdown: projection_pushdown,
749
+ simplify_expression: simplify_expression,
750
+ slice_pushdown: slice_pushdown,
751
+ no_optimization: no_optimization
752
+ )
753
+
754
+ lf.sink_csv(
755
+ path,
756
+ include_bom,
757
+ include_header,
758
+ separator.ord,
759
+ line_terminator,
760
+ quote_char.ord,
761
+ batch_size,
762
+ datetime_format,
763
+ date_format,
764
+ time_format,
765
+ float_precision,
766
+ null_value,
767
+ quote_style,
768
+ maintain_order
769
+ )
770
+ end
771
+
772
+ # Evaluate the query in streaming mode and write to an NDJSON file.
773
+ #
774
+ # This allows streaming results that are larger than RAM to be written to disk.
775
+ #
776
+ # @param path [String]
777
+ # File path to which the file should be written.
778
+ # @param maintain_order [Boolean]
779
+ # Maintain the order in which data is processed.
780
+ # Setting this to `false` will be slightly faster.
781
+ # @param type_coercion [Boolean]
782
+ # Do type coercion optimization.
783
+ # @param predicate_pushdown [Boolean]
784
+ # Do predicate pushdown optimization.
785
+ # @param projection_pushdown [Boolean]
786
+ # Do projection pushdown optimization.
787
+ # @param simplify_expression [Boolean]
788
+ # Run simplify expressions optimization.
789
+ # @param slice_pushdown [Boolean]
790
+ # Slice pushdown optimization.
791
+ # @param no_optimization [Boolean]
792
+ # Turn off (certain) optimizations.
793
+ #
794
+ # @return [DataFrame]
795
+ #
796
+ # @example
797
+ # lf = Polars.scan_csv("/path/to/my_larger_than_ram_file.csv")
798
+ # lf.sink_ndjson("out.ndjson")
799
+ def sink_ndjson(
800
+ path,
801
+ maintain_order: true,
802
+ type_coercion: true,
803
+ predicate_pushdown: true,
804
+ projection_pushdown: true,
805
+ simplify_expression: true,
806
+ slice_pushdown: true,
807
+ no_optimization: false
808
+ )
809
+ lf = _set_sink_optimizations(
810
+ type_coercion: type_coercion,
811
+ predicate_pushdown: predicate_pushdown,
812
+ projection_pushdown: projection_pushdown,
813
+ simplify_expression: simplify_expression,
814
+ slice_pushdown: slice_pushdown,
815
+ no_optimization: no_optimization
816
+ )
817
+
818
+ lf.sink_json(path, maintain_order)
819
+ end
820
+
821
+ # @private
822
+ def _set_sink_optimizations(
823
+ type_coercion: true,
824
+ predicate_pushdown: true,
825
+ projection_pushdown: true,
826
+ simplify_expression: true,
827
+ slice_pushdown: true,
828
+ no_optimization: false
562
829
  )
563
830
  if no_optimization
564
831
  predicate_pushdown = false
@@ -566,25 +833,17 @@ module Polars
566
833
  slice_pushdown = false
567
834
  end
568
835
 
569
- lf = _ldf.optimization_toggle(
836
+ _ldf.optimization_toggle(
570
837
  type_coercion,
571
838
  predicate_pushdown,
572
839
  projection_pushdown,
573
840
  simplify_expression,
574
841
  slice_pushdown,
575
842
  false,
843
+ false,
576
844
  true,
577
845
  false
578
846
  )
579
- lf.sink_parquet(
580
- path,
581
- compression,
582
- compression_level,
583
- statistics,
584
- row_group_size,
585
- data_pagesize_limit,
586
- maintain_order
587
- )
588
847
  end
589
848
 
590
849
  # Collect a small number of rows for debugging purposes.
@@ -650,6 +909,7 @@ module Polars
650
909
  no_optimization: false,
651
910
  slice_pushdown: true,
652
911
  common_subplan_elimination: true,
912
+ comm_subexpr_elim: true,
653
913
  allow_streaming: false
654
914
  )
655
915
  if no_optimization
@@ -666,6 +926,7 @@ module Polars
666
926
  simplify_expression,
667
927
  slice_pushdown,
668
928
  common_subplan_elimination,
929
+ comm_subexpr_elim,
669
930
  allow_streaming,
670
931
  false
671
932
  )
@@ -699,6 +960,10 @@ module Polars
699
960
  _from_rbldf(_ldf.cache)
700
961
  end
701
962
 
963
+ # TODO
964
+ # def cast
965
+ # end
966
+
702
967
  # Create an empty copy of the current LazyFrame.
703
968
  #
704
969
  # The copy has an identical schema but no data.
@@ -706,14 +971,14 @@ module Polars
706
971
  # @return [LazyFrame]
707
972
  #
708
973
  # @example
709
- # df = Polars::DataFrame.new(
974
+ # lf = Polars::LazyFrame.new(
710
975
  # {
711
976
  # "a" => [nil, 2, 3, 4],
712
977
  # "b" => [0.5, nil, 2.5, 13],
713
978
  # "c" => [true, true, false, nil],
714
979
  # }
715
980
  # ).lazy
716
- # df.cleared.fetch
981
+ # lf.clear.fetch
717
982
  # # =>
718
983
  # # shape: (0, 3)
719
984
  # # ┌─────┬─────┬──────┐
@@ -722,9 +987,23 @@ module Polars
722
987
  # # │ i64 ┆ f64 ┆ bool │
723
988
  # # ╞═════╪═════╪══════╡
724
989
  # # └─────┴─────┴──────┘
725
- def cleared
726
- DataFrame.new(columns: schema).lazy
727
- end
990
+ #
991
+ # @example
992
+ # lf.clear(2).fetch
993
+ # # =>
994
+ # # shape: (2, 3)
995
+ # # ┌──────┬──────┬──────┐
996
+ # # │ a ┆ b ┆ c │
997
+ # # │ --- ┆ --- ┆ --- │
998
+ # # │ i64 ┆ f64 ┆ bool │
999
+ # # ╞══════╪══════╪══════╡
1000
+ # # │ null ┆ null ┆ null │
1001
+ # # │ null ┆ null ┆ null │
1002
+ # # └──────┴──────┴──────┘
1003
+ def clear(n = 0)
1004
+ DataFrame.new(columns: schema).clear(n).lazy
1005
+ end
1006
+ alias_method :cleared, :clear
728
1007
 
729
1008
  # Filter the rows in the DataFrame based on a predicate expression.
730
1009
  #
@@ -774,8 +1053,13 @@ module Polars
774
1053
 
775
1054
  # Select columns from this DataFrame.
776
1055
  #
777
- # @param exprs [Object]
778
- # Column or columns to select.
1056
+ # @param exprs [Array]
1057
+ # Column(s) to select, specified as positional arguments.
1058
+ # Accepts expression input. Strings are parsed as column names,
1059
+ # other non-expression inputs are parsed as literals.
1060
+ # @param named_exprs [Hash]
1061
+ # Additional columns to select, specified as keyword arguments.
1062
+ # The columns will be renamed to the keyword used.
779
1063
  #
780
1064
  # @return [LazyFrame]
781
1065
  #
@@ -855,9 +1139,13 @@ module Polars
855
1139
  # # │ 0 │
856
1140
  # # │ 10 │
857
1141
  # # └─────────┘
858
- def select(exprs)
859
- exprs = Utils.selection_to_rbexpr_list(exprs)
860
- _from_rbldf(_ldf.select(exprs))
1142
+ def select(*exprs, **named_exprs)
1143
+ structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1144
+
1145
+ rbexprs = Utils.parse_as_list_of_expressions(
1146
+ *exprs, **named_exprs, __structify: structify
1147
+ )
1148
+ _from_rbldf(_ldf.select(rbexprs))
861
1149
  end
862
1150
 
863
1151
  # Start a group by operation.
@@ -967,7 +1255,7 @@ module Polars
967
1255
  # df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
968
1256
  # Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
969
1257
  # )
970
- # df.group_by_rolling(index_column: "dt", period: "2d").agg(
1258
+ # df.rolling(index_column: "dt", period: "2d").agg(
971
1259
  # [
972
1260
  # Polars.sum("a").alias("sum_a"),
973
1261
  # Polars.min("a").alias("min_a"),
@@ -988,7 +1276,7 @@ module Polars
988
1276
  # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
989
1277
  # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
990
1278
  # # └─────────────────────┴───────┴───────┴───────┘
991
- def group_by_rolling(
1279
+ def rolling(
992
1280
  index_column:,
993
1281
  period:,
994
1282
  offset: nil,
@@ -1005,12 +1293,13 @@ module Polars
1005
1293
  period = Utils._timedelta_to_pl_duration(period)
1006
1294
  offset = Utils._timedelta_to_pl_duration(offset)
1007
1295
 
1008
- lgb = _ldf.group_by_rolling(
1296
+ lgb = _ldf.rolling(
1009
1297
  index_column, period, offset, closed, rbexprs_by, check_sorted
1010
1298
  )
1011
1299
  LazyGroupBy.new(lgb)
1012
1300
  end
1013
- alias_method :groupby_rolling, :group_by_rolling
1301
+ alias_method :group_by_rolling, :rolling
1302
+ alias_method :groupby_rolling, :rolling
1014
1303
 
1015
1304
  # Group based on a time value (or index value of type `:i32`, `:i64`).
1016
1305
  #
@@ -1440,6 +1729,8 @@ module Polars
1440
1729
  # Join strategy.
1441
1730
  # @param suffix [String]
1442
1731
  # Suffix to append to columns with a duplicate name.
1732
+ # @param join_nulls [Boolean]
1733
+ # Join on null values. By default null values will never produce matches.
1443
1734
  # @param allow_parallel [Boolean]
1444
1735
  # Allow the physical plan to optionally evaluate the computation of both
1445
1736
  # DataFrames up to the join in parallel.
@@ -1535,6 +1826,7 @@ module Polars
1535
1826
  on: nil,
1536
1827
  how: "inner",
1537
1828
  suffix: "_right",
1829
+ join_nulls: false,
1538
1830
  allow_parallel: true,
1539
1831
  force_parallel: false
1540
1832
  )
@@ -1568,6 +1860,7 @@ module Polars
1568
1860
  rbexprs_right,
1569
1861
  allow_parallel,
1570
1862
  force_parallel,
1863
+ join_nulls,
1571
1864
  how,
1572
1865
  suffix,
1573
1866
  )
@@ -1608,27 +1901,9 @@ module Polars
1608
1901
  # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
1609
1902
  # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
1610
1903
  # # └─────┴──────┴───────┴──────┴──────┴───────┘
1611
- def with_columns(exprs)
1612
- exprs =
1613
- if exprs.nil?
1614
- []
1615
- elsif exprs.is_a?(Expr)
1616
- [exprs]
1617
- else
1618
- exprs.to_a
1619
- end
1620
-
1621
- rbexprs = []
1622
- exprs.each do |e|
1623
- case e
1624
- when Expr
1625
- rbexprs << e._rbexpr
1626
- when Series
1627
- rbexprs << Utils.lit(e)._rbexpr
1628
- else
1629
- raise ArgumentError, "Expected an expression, got #{e}"
1630
- end
1631
- end
1904
+ def with_columns(*exprs, **named_exprs)
1905
+ structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1906
+ rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
1632
1907
 
1633
1908
  _from_rbldf(_ldf.with_columns(rbexprs))
1634
1909
  end
@@ -1725,7 +2000,7 @@ module Polars
1725
2000
  if columns.is_a?(::String)
1726
2001
  columns = [columns]
1727
2002
  end
1728
- _from_rbldf(_ldf.drop_columns(columns))
2003
+ _from_rbldf(_ldf.drop(columns))
1729
2004
  end
1730
2005
 
1731
2006
  # Rename column names.
@@ -1955,7 +2230,7 @@ module Polars
1955
2230
  # "b" => [2, 4, 6]
1956
2231
  # }
1957
2232
  # ).lazy
1958
- # df.with_row_count.collect
2233
+ # df.with_row_index.collect
1959
2234
  # # =>
1960
2235
  # # shape: (3, 3)
1961
2236
  # # ┌────────┬─────┬─────┐
@@ -1967,9 +2242,10 @@ module Polars
1967
2242
  # # │ 1 ┆ 3 ┆ 4 │
1968
2243
  # # │ 2 ┆ 5 ┆ 6 │
1969
2244
  # # └────────┴─────┴─────┘
1970
- def with_row_count(name: "row_nr", offset: 0)
1971
- _from_rbldf(_ldf.with_row_count(name, offset))
2245
+ def with_row_index(name: "row_nr", offset: 0)
2246
+ _from_rbldf(_ldf.with_row_index(name, offset))
1972
2247
  end
2248
+ alias_method :with_row_count, :with_row_index
1973
2249
 
1974
2250
  # Take every nth row in the LazyFrame and return as a new LazyFrame.
1975
2251
  #
@@ -2470,9 +2746,47 @@ module Polars
2470
2746
  _from_rbldf(_ldf.unnest(names))
2471
2747
  end
2472
2748
 
2473
- # TODO
2474
- # def merge_sorted
2475
- # end
2749
+ # Take two sorted DataFrames and merge them by the sorted key.
2750
+ #
2751
+ # The output of this operation will also be sorted.
2752
+ # It is the callers responsibility that the frames are sorted
2753
+ # by that key otherwise the output will not make sense.
2754
+ #
2755
+ # The schemas of both LazyFrames must be equal.
2756
+ #
2757
+ # @param other [DataFrame]
2758
+ # Other DataFrame that must be merged
2759
+ # @param key [String]
2760
+ # Key that is sorted.
2761
+ #
2762
+ # @return [LazyFrame]
2763
+ #
2764
+ # @example
2765
+ # df0 = Polars::LazyFrame.new(
2766
+ # {"name" => ["steve", "elise", "bob"], "age" => [42, 44, 18]}
2767
+ # ).sort("age")
2768
+ # df1 = Polars::LazyFrame.new(
2769
+ # {"name" => ["anna", "megan", "steve", "thomas"], "age" => [21, 33, 42, 20]}
2770
+ # ).sort("age")
2771
+ # df0.merge_sorted(df1, "age").collect
2772
+ # # =>
2773
+ # # shape: (7, 2)
2774
+ # # ┌────────┬─────┐
2775
+ # # │ name ┆ age │
2776
+ # # │ --- ┆ --- │
2777
+ # # │ str ┆ i64 │
2778
+ # # ╞════════╪═════╡
2779
+ # # │ bob ┆ 18 │
2780
+ # # │ thomas ┆ 20 │
2781
+ # # │ anna ┆ 21 │
2782
+ # # │ megan ┆ 33 │
2783
+ # # │ steve ┆ 42 │
2784
+ # # │ steve ┆ 42 │
2785
+ # # │ elise ┆ 44 │
2786
+ # # └────────┴─────┘
2787
+ def merge_sorted(other, key)
2788
+ _from_rbldf(_ldf.merge_sorted(other._ldf, key))
2789
+ end
2476
2790
 
2477
2791
  # Indicate that one or multiple columns are sorted.
2478
2792
  #