polars-df 0.14.0-x86_64-linux-musl → 0.16.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +23495 -12923
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +10 -9
  45. data/lib/polars/functions.rb +0 -57
@@ -431,7 +431,9 @@ module Polars
431
431
  projection_pushdown: true,
432
432
  simplify_expression: true,
433
433
  no_optimization: false,
434
- slice_pushdown: true
434
+ slice_pushdown: true,
435
+ storage_options: nil,
436
+ retries: 2
435
437
  )
436
438
  lf = _set_sink_optimizations(
437
439
  type_coercion: type_coercion,
@@ -460,6 +462,12 @@ module Polars
460
462
  }
461
463
  end
462
464
 
465
+ if storage_options&.any?
466
+ storage_options = storage_options.to_a
467
+ else
468
+ storage_options = nil
469
+ end
470
+
463
471
  lf.sink_parquet(
464
472
  path,
465
473
  compression,
@@ -467,7 +475,9 @@ module Polars
467
475
  statistics,
468
476
  row_group_size,
469
477
  data_pagesize_limit,
470
- maintain_order
478
+ maintain_order,
479
+ storage_options,
480
+ retries
471
481
  )
472
482
  end
473
483
 
@@ -512,6 +522,10 @@ module Polars
512
522
  slice_pushdown: true,
513
523
  no_optimization: false
514
524
  )
525
+ # TODO support storage options in Rust
526
+ storage_options = nil
527
+ retries = 2
528
+
515
529
  lf = _set_sink_optimizations(
516
530
  type_coercion: type_coercion,
517
531
  predicate_pushdown: predicate_pushdown,
@@ -521,10 +535,18 @@ module Polars
521
535
  no_optimization: no_optimization
522
536
  )
523
537
 
538
+ if storage_options&.any?
539
+ storage_options = storage_options.to_a
540
+ else
541
+ storage_options = nil
542
+ end
543
+
524
544
  lf.sink_ipc(
525
545
  path,
526
546
  compression,
527
- maintain_order
547
+ maintain_order,
548
+ storage_options,
549
+ retries
528
550
  )
529
551
  end
530
552
 
@@ -692,7 +714,9 @@ module Polars
692
714
  projection_pushdown: true,
693
715
  simplify_expression: true,
694
716
  slice_pushdown: true,
695
- no_optimization: false
717
+ no_optimization: false,
718
+ storage_options: nil,
719
+ retries: 2
696
720
  )
697
721
  lf = _set_sink_optimizations(
698
722
  type_coercion: type_coercion,
@@ -703,7 +727,13 @@ module Polars
703
727
  no_optimization: no_optimization
704
728
  )
705
729
 
706
- lf.sink_json(path, maintain_order)
730
+ if storage_options&.any?
731
+ storage_options = storage_options.to_a
732
+ else
733
+ storage_options = nil
734
+ end
735
+
736
+ lf.sink_json(path, maintain_order, storage_options, retries)
707
737
  end
708
738
 
709
739
  # @private
@@ -848,9 +878,70 @@ module Polars
848
878
  _from_rbldf(_ldf.cache)
849
879
  end
850
880
 
851
- # TODO
852
- # def cast
853
- # end
881
+ # Cast LazyFrame column(s) to the specified dtype(s).
882
+ #
883
+ # @param dtypes [Hash]
884
+ # Mapping of column names (or selector) to dtypes, or a single dtype
885
+ # to which all columns will be cast.
886
+ # @param strict [Boolean]
887
+ # Throw an error if a cast could not be done (for instance, due to an
888
+ # overflow).
889
+ #
890
+ # @return [LazyFrame]
891
+ #
892
+ # @example Cast specific frame columns to the specified dtypes:
893
+ # lf = Polars::LazyFrame.new(
894
+ # {
895
+ # "foo" => [1, 2, 3],
896
+ # "bar" => [6.0, 7.0, 8.0],
897
+ # "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
898
+ # }
899
+ # )
900
+ # lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
901
+ # # =>
902
+ # # shape: (3, 3)
903
+ # # ┌─────┬─────┬────────────┐
904
+ # # │ foo ┆ bar ┆ ham │
905
+ # # │ --- ┆ --- ┆ --- │
906
+ # # │ f32 ┆ u8 ┆ date │
907
+ # # ╞═════╪═════╪════════════╡
908
+ # # │ 1.0 ┆ 6 ┆ 2020-01-02 │
909
+ # # │ 2.0 ┆ 7 ┆ 2021-03-04 │
910
+ # # │ 3.0 ┆ 8 ┆ 2022-05-06 │
911
+ # # └─────┴─────┴────────────┘
912
+ #
913
+ # @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
914
+ # lf.cast({Polars::Date => Polars::Datetime}).collect
915
+ # # =>
916
+ # # shape: (3, 3)
917
+ # # ┌─────┬─────┬─────────────────────┐
918
+ # # │ foo ┆ bar ┆ ham │
919
+ # # │ --- ┆ --- ┆ --- │
920
+ # # │ i64 ┆ f64 ┆ datetime[μs] │
921
+ # # ╞═════╪═════╪═════════════════════╡
922
+ # # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
923
+ # # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
924
+ # # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
925
+ # # └─────┴─────┴─────────────────────┘
926
+ #
927
+ # @example Cast all frame columns to the specified dtype:
928
+ # lf.cast(Polars::String).collect.to_h(as_series: false)
929
+ # # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
930
+ def cast(dtypes, strict: true)
931
+ if !dtypes.is_a?(Hash)
932
+ return _from_rbldf(_ldf.cast_all(dtypes, strict))
933
+ end
934
+
935
+ cast_map = {}
936
+ dtypes.each do |c, dtype|
937
+ dtype = Utils.parse_into_dtype(dtype)
938
+ cast_map.merge!(
939
+ c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
940
+ )
941
+ end
942
+
943
+ _from_rbldf(_ldf.cast(cast_map, strict))
944
+ end
854
945
 
855
946
  # Create an empty copy of the current LazyFrame.
856
947
  #
@@ -1520,8 +1611,197 @@ module Polars
1520
1611
  # @param force_parallel [Boolean]
1521
1612
  # Force the physical plan to evaluate the computation of both DataFrames up to
1522
1613
  # the join in parallel.
1614
+ # @param coalesce [Boolean]
1615
+ # Coalescing behavior (merging of join columns).
1616
+ # - true: -> Always coalesce join columns.
1617
+ # - false: -> Never coalesce join columns.
1618
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1523
1619
  #
1524
1620
  # @return [LazyFrame]
1621
+ #
1622
+ # @example
1623
+ # gdp = Polars::LazyFrame.new(
1624
+ # {
1625
+ # "date" => Polars.date_range(
1626
+ # Date.new(2016, 1, 1),
1627
+ # Date.new(2020, 1, 1),
1628
+ # "1y",
1629
+ # eager: true
1630
+ # ),
1631
+ # "gdp" => [4164, 4411, 4566, 4696, 4827]
1632
+ # }
1633
+ # )
1634
+ # gdp.collect
1635
+ # # =>
1636
+ # # shape: (5, 2)
1637
+ # # ┌────────────┬──────┐
1638
+ # # │ date ┆ gdp │
1639
+ # # │ --- ┆ --- │
1640
+ # # │ date ┆ i64 │
1641
+ # # ╞════════════╪══════╡
1642
+ # # │ 2016-01-01 ┆ 4164 │
1643
+ # # │ 2017-01-01 ┆ 4411 │
1644
+ # # │ 2018-01-01 ┆ 4566 │
1645
+ # # │ 2019-01-01 ┆ 4696 │
1646
+ # # │ 2020-01-01 ┆ 4827 │
1647
+ # # └────────────┴──────┘
1648
+ #
1649
+ # @example
1650
+ # population = Polars::LazyFrame.new(
1651
+ # {
1652
+ # "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
1653
+ # "population" => [82.19, 82.66, 83.12]
1654
+ # }
1655
+ # ).sort("date")
1656
+ # population.collect
1657
+ # # =>
1658
+ # # shape: (3, 2)
1659
+ # # ┌────────────┬────────────┐
1660
+ # # │ date ┆ population │
1661
+ # # │ --- ┆ --- │
1662
+ # # │ date ┆ f64 │
1663
+ # # ╞════════════╪════════════╡
1664
+ # # │ 2016-03-01 ┆ 82.19 │
1665
+ # # │ 2018-08-01 ┆ 82.66 │
1666
+ # # │ 2019-01-01 ┆ 83.12 │
1667
+ # # └────────────┴────────────┘
1668
+ #
1669
+ # @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
1670
+ # population.join_asof(gdp, on: "date", strategy: "backward").collect
1671
+ # # =>
1672
+ # # shape: (3, 3)
1673
+ # # ┌────────────┬────────────┬──────┐
1674
+ # # │ date ┆ population ┆ gdp │
1675
+ # # │ --- ┆ --- ┆ --- │
1676
+ # # │ date ┆ f64 ┆ i64 │
1677
+ # # ╞════════════╪════════════╪══════╡
1678
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1679
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
1680
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1681
+ # # └────────────┴────────────┴──────┘
1682
+ #
1683
+ # @example
1684
+ # population.join_asof(
1685
+ # gdp, on: "date", strategy: "backward", coalesce: false
1686
+ # ).collect
1687
+ # # =>
1688
+ # # shape: (3, 4)
1689
+ # # ┌────────────┬────────────┬────────────┬──────┐
1690
+ # # │ date ┆ population ┆ date_right ┆ gdp │
1691
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1692
+ # # │ date ┆ f64 ┆ date ┆ i64 │
1693
+ # # ╞════════════╪════════════╪════════════╪══════╡
1694
+ # # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
1695
+ # # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
1696
+ # # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
1697
+ # # └────────────┴────────────┴────────────┴──────┘
1698
+ #
1699
+ # @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
1700
+ # population.join_asof(gdp, on: "date", strategy: "forward").collect
1701
+ # # =>
1702
+ # # shape: (3, 3)
1703
+ # # ┌────────────┬────────────┬──────┐
1704
+ # # │ date ┆ population ┆ gdp │
1705
+ # # │ --- ┆ --- ┆ --- │
1706
+ # # │ date ┆ f64 ┆ i64 │
1707
+ # # ╞════════════╪════════════╪══════╡
1708
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
1709
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1710
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1711
+ # # └────────────┴────────────┴──────┘
1712
+ #
1713
+ # @example
1714
+ # population.join_asof(gdp, on: "date", strategy: "nearest").collect
1715
+ # # =>
1716
+ # # shape: (3, 3)
1717
+ # # ┌────────────┬────────────┬──────┐
1718
+ # # │ date ┆ population ┆ gdp │
1719
+ # # │ --- ┆ --- ┆ --- │
1720
+ # # │ date ┆ f64 ┆ i64 │
1721
+ # # ╞════════════╪════════════╪══════╡
1722
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1723
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1724
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1725
+ # # └────────────┴────────────┴──────┘
1726
+ #
1727
+ # @example
1728
+ # gdp_dates = Polars.date_range(
1729
+ # Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
1730
+ # )
1731
+ # gdp2 = Polars::LazyFrame.new(
1732
+ # {
1733
+ # "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
1734
+ # "date" => Polars.concat([gdp_dates, gdp_dates]),
1735
+ # "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
1736
+ # }
1737
+ # ).sort("country", "date")
1738
+ # gdp2.collect
1739
+ # # =>
1740
+ # # shape: (10, 3)
1741
+ # # ┌─────────────┬────────────┬──────┐
1742
+ # # │ country ┆ date ┆ gdp │
1743
+ # # │ --- ┆ --- ┆ --- │
1744
+ # # │ str ┆ date ┆ i64 │
1745
+ # # ╞═════════════╪════════════╪══════╡
1746
+ # # │ Germany ┆ 2016-01-01 ┆ 4164 │
1747
+ # # │ Germany ┆ 2017-01-01 ┆ 4411 │
1748
+ # # │ Germany ┆ 2018-01-01 ┆ 4566 │
1749
+ # # │ Germany ┆ 2019-01-01 ┆ 4696 │
1750
+ # # │ Germany ┆ 2020-01-01 ┆ 4827 │
1751
+ # # │ Netherlands ┆ 2016-01-01 ┆ 784 │
1752
+ # # │ Netherlands ┆ 2017-01-01 ┆ 833 │
1753
+ # # │ Netherlands ┆ 2018-01-01 ┆ 914 │
1754
+ # # │ Netherlands ┆ 2019-01-01 ┆ 910 │
1755
+ # # │ Netherlands ┆ 2020-01-01 ┆ 909 │
1756
+ # # └─────────────┴────────────┴──────┘
1757
+ #
1758
+ # @example
1759
+ # pop2 = Polars::LazyFrame.new(
1760
+ # {
1761
+ # "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
1762
+ # "date" => [
1763
+ # Date.new(2016, 3, 1),
1764
+ # Date.new(2018, 8, 1),
1765
+ # Date.new(2019, 1, 1),
1766
+ # Date.new(2016, 3, 1),
1767
+ # Date.new(2018, 8, 1),
1768
+ # Date.new(2019, 1, 1)
1769
+ # ],
1770
+ # "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
1771
+ # }
1772
+ # ).sort("country", "date")
1773
+ # pop2.collect
1774
+ # # =>
1775
+ # # shape: (6, 3)
1776
+ # # ┌─────────────┬────────────┬────────────┐
1777
+ # # │ country ┆ date ┆ population │
1778
+ # # │ --- ┆ --- ┆ --- │
1779
+ # # │ str ┆ date ┆ f64 │
1780
+ # # ╞═════════════╪════════════╪════════════╡
1781
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 │
1782
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 │
1783
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 │
1784
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
1785
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
1786
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
1787
+ # # └─────────────┴────────────┴────────────┘
1788
+ #
1789
+ # @example
1790
+ # pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
1791
+ # # =>
1792
+ # # shape: (6, 4)
1793
+ # # ┌─────────────┬────────────┬────────────┬──────┐
1794
+ # # │ country ┆ date ┆ population ┆ gdp │
1795
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1796
+ # # │ str ┆ date ┆ f64 ┆ i64 │
1797
+ # # ╞═════════════╪════════════╪════════════╪══════╡
1798
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
1799
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
1800
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
1801
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
1802
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
1803
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
1804
+ # # └─────────────┴────────────┴────────────┴──────┘
1525
1805
  def join_asof(
1526
1806
  other,
1527
1807
  left_on: nil,
@@ -1534,7 +1814,8 @@ module Polars
1534
1814
  suffix: "_right",
1535
1815
  tolerance: nil,
1536
1816
  allow_parallel: true,
1537
- force_parallel: false
1817
+ force_parallel: false,
1818
+ coalesce: true
1538
1819
  )
1539
1820
  if !other.is_a?(LazyFrame)
1540
1821
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1589,7 +1870,8 @@ module Polars
1589
1870
  suffix,
1590
1871
  strategy,
1591
1872
  tolerance_num,
1592
- tolerance_str
1873
+ tolerance_str,
1874
+ coalesce
1593
1875
  )
1594
1876
  )
1595
1877
  end
@@ -1609,6 +1891,12 @@ module Polars
1609
1891
  # Join strategy.
1610
1892
  # @param suffix [String]
1611
1893
  # Suffix to append to columns with a duplicate name.
1894
+ # @param validate ['m:m', 'm:1', '1:m', '1:1']
1895
+ # Checks if join is of specified type.
1896
+ # * *many_to_many* - “m:m”: default, does not result in checks
1897
+ # * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
1898
+ # * *one_to_many* - “1:m”: check if join keys are unique in left dataset
1899
+ # * *many_to_one* - “m:1”: check if join keys are unique in right dataset
1612
1900
  # @param join_nulls [Boolean]
1613
1901
  # Join on null values. By default null values will never produce matches.
1614
1902
  # @param allow_parallel [Boolean]
@@ -1617,6 +1905,12 @@ module Polars
1617
1905
  # @param force_parallel [Boolean]
1618
1906
  # Force the physical plan to evaluate the computation of both DataFrames up to
1619
1907
  # the join in parallel.
1908
+ # @param coalesce [Boolean]
1909
+ # Coalescing behavior (merging of join columns).
1910
+ # - nil: -> join specific.
1911
+ # - true: -> Always coalesce join columns.
1912
+ # - false: -> Never coalesce join columns.
1913
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1620
1914
  #
1621
1915
  # @return [LazyFrame]
1622
1916
  #
@@ -1706,9 +2000,11 @@ module Polars
1706
2000
  on: nil,
1707
2001
  how: "inner",
1708
2002
  suffix: "_right",
2003
+ validate: "m:m",
1709
2004
  join_nulls: false,
1710
2005
  allow_parallel: true,
1711
- force_parallel: false
2006
+ force_parallel: false,
2007
+ coalesce: nil
1712
2008
  )
1713
2009
  if !other.is_a?(LazyFrame)
1714
2010
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1719,7 +2015,7 @@ module Polars
1719
2015
  elsif how == "cross"
1720
2016
  return _from_rbldf(
1721
2017
  _ldf.join(
1722
- other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
2018
+ other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
1723
2019
  )
1724
2020
  )
1725
2021
  end
@@ -1745,6 +2041,8 @@ module Polars
1745
2041
  join_nulls,
1746
2042
  how,
1747
2043
  suffix,
2044
+ validate,
2045
+ coalesce
1748
2046
  )
1749
2047
  )
1750
2048
  end
@@ -1879,6 +2177,55 @@ module Polars
1879
2177
  # - List of column names.
1880
2178
  #
1881
2179
  # @return [LazyFrame]
2180
+ #
2181
+ # @example Drop a single column by passing the name of that column.
2182
+ # lf = Polars::LazyFrame.new(
2183
+ # {
2184
+ # "foo" => [1, 2, 3],
2185
+ # "bar" => [6.0, 7.0, 8.0],
2186
+ # "ham" => ["a", "b", "c"]
2187
+ # }
2188
+ # )
2189
+ # lf.drop("ham").collect
2190
+ # # =>
2191
+ # # shape: (3, 2)
2192
+ # # ┌─────┬─────┐
2193
+ # # │ foo ┆ bar │
2194
+ # # │ --- ┆ --- │
2195
+ # # │ i64 ┆ f64 │
2196
+ # # ╞═════╪═════╡
2197
+ # # │ 1 ┆ 6.0 │
2198
+ # # │ 2 ┆ 7.0 │
2199
+ # # │ 3 ┆ 8.0 │
2200
+ # # └─────┴─────┘
2201
+ #
2202
+ # @example Drop multiple columns by passing a selector.
2203
+ # lf.drop(Polars.cs.numeric).collect
2204
+ # # =>
2205
+ # # shape: (3, 1)
2206
+ # # ┌─────┐
2207
+ # # │ ham │
2208
+ # # │ --- │
2209
+ # # │ str │
2210
+ # # ╞═════╡
2211
+ # # │ a │
2212
+ # # │ b │
2213
+ # # │ c │
2214
+ # # └─────┘
2215
+ #
2216
+ # @example Use positional arguments to drop multiple columns.
2217
+ # lf.drop("foo", "ham").collect
2218
+ # # =>
2219
+ # # shape: (3, 1)
2220
+ # # ┌─────┐
2221
+ # # │ bar │
2222
+ # # │ --- │
2223
+ # # │ f64 │
2224
+ # # ╞═════╡
2225
+ # # │ 6.0 │
2226
+ # # │ 7.0 │
2227
+ # # │ 8.0 │
2228
+ # # └─────┘
1882
2229
  def drop(*columns)
1883
2230
  drop_cols = Utils._expand_selectors(self, *columns)
1884
2231
  _from_rbldf(_ldf.drop(drop_cols))
@@ -1888,17 +2235,80 @@ module Polars
1888
2235
  #
1889
2236
  # @param mapping [Hash]
1890
2237
  # Key value pairs that map from old name to new name.
2238
+ # @param strict [Boolean]
2239
+ # Validate that all column names exist in the current schema,
2240
+ # and throw an exception if any do not. (Note that this parameter
2241
+ # is a no-op when passing a function to `mapping`).
1891
2242
  #
1892
2243
  # @return [LazyFrame]
1893
- def rename(mapping)
1894
- existing = mapping.keys
1895
- _new = mapping.values
1896
- _from_rbldf(_ldf.rename(existing, _new))
2244
+ #
2245
+ # @example
2246
+ # lf = Polars::LazyFrame.new(
2247
+ # {
2248
+ # "foo" => [1, 2, 3],
2249
+ # "bar" => [6, 7, 8],
2250
+ # "ham" => ["a", "b", "c"]
2251
+ # }
2252
+ # )
2253
+ # lf.rename({"foo" => "apple"}).collect
2254
+ # # =>
2255
+ # # shape: (3, 3)
2256
+ # # ┌───────┬─────┬─────┐
2257
+ # # │ apple ┆ bar ┆ ham │
2258
+ # # │ --- ┆ --- ┆ --- │
2259
+ # # │ i64 ┆ i64 ┆ str │
2260
+ # # ╞═══════╪═════╪═════╡
2261
+ # # │ 1 ┆ 6 ┆ a │
2262
+ # # │ 2 ┆ 7 ┆ b │
2263
+ # # │ 3 ┆ 8 ┆ c │
2264
+ # # └───────┴─────┴─────┘
2265
+ #
2266
+ # @example
2267
+ # lf.rename(->(column_name) { "c" + column_name[1..] }).collect
2268
+ # # =>
2269
+ # # shape: (3, 3)
2270
+ # # ┌─────┬─────┬─────┐
2271
+ # # │ coo ┆ car ┆ cam │
2272
+ # # │ --- ┆ --- ┆ --- │
2273
+ # # │ i64 ┆ i64 ┆ str │
2274
+ # # ╞═════╪═════╪═════╡
2275
+ # # │ 1 ┆ 6 ┆ a │
2276
+ # # │ 2 ┆ 7 ┆ b │
2277
+ # # │ 3 ┆ 8 ┆ c │
2278
+ # # └─────┴─────┴─────┘
2279
+ def rename(mapping, strict: true)
2280
+ if mapping.respond_to?(:call)
2281
+ select(F.all.name.map(&mapping))
2282
+ else
2283
+ existing = mapping.keys
2284
+ _new = mapping.values
2285
+ _from_rbldf(_ldf.rename(existing, _new, strict))
2286
+ end
1897
2287
  end
1898
2288
 
1899
2289
  # Reverse the DataFrame.
1900
2290
  #
1901
2291
  # @return [LazyFrame]
2292
+ #
2293
+ # @example
2294
+ # lf = Polars::LazyFrame.new(
2295
+ # {
2296
+ # "key" => ["a", "b", "c"],
2297
+ # "val" => [1, 2, 3]
2298
+ # }
2299
+ # )
2300
+ # lf.reverse.collect
2301
+ # # =>
2302
+ # # shape: (3, 2)
2303
+ # # ┌─────┬─────┐
2304
+ # # │ key ┆ val │
2305
+ # # │ --- ┆ --- │
2306
+ # # │ str ┆ i64 │
2307
+ # # ╞═════╪═════╡
2308
+ # # │ c ┆ 3 │
2309
+ # # │ b ┆ 2 │
2310
+ # # │ a ┆ 1 │
2311
+ # # └─────┴─────┘
1902
2312
  def reverse
1903
2313
  _from_rbldf(_ldf.reverse)
1904
2314
  end
@@ -2048,8 +2458,43 @@ module Polars
2048
2458
  # Consider using the {#fetch} operation if you only want to test your
2049
2459
  # query. The {#fetch} operation will load the first `n` rows at the scan
2050
2460
  # level, whereas the {#head}/{#limit} are applied at the end.
2461
+ #
2462
+ # @example
2463
+ # lf = Polars::LazyFrame.new(
2464
+ # {
2465
+ # "a" => [1, 2, 3, 4, 5, 6],
2466
+ # "b" => [7, 8, 9, 10, 11, 12]
2467
+ # }
2468
+ # )
2469
+ # lf.limit.collect
2470
+ # # =>
2471
+ # # shape: (5, 2)
2472
+ # # ┌─────┬─────┐
2473
+ # # │ a ┆ b │
2474
+ # # │ --- ┆ --- │
2475
+ # # │ i64 ┆ i64 │
2476
+ # # ╞═════╪═════╡
2477
+ # # │ 1 ┆ 7 │
2478
+ # # │ 2 ┆ 8 │
2479
+ # # │ 3 ┆ 9 │
2480
+ # # │ 4 ┆ 10 │
2481
+ # # │ 5 ┆ 11 │
2482
+ # # └─────┴─────┘
2483
+ #
2484
+ # @example
2485
+ # lf.limit(2).collect
2486
+ # # =>
2487
+ # # shape: (2, 2)
2488
+ # # ┌─────┬─────┐
2489
+ # # │ a ┆ b │
2490
+ # # │ --- ┆ --- │
2491
+ # # │ i64 ┆ i64 │
2492
+ # # ╞═════╪═════╡
2493
+ # # │ 1 ┆ 7 │
2494
+ # # │ 2 ┆ 8 │
2495
+ # # └─────┴─────┘
2051
2496
  def limit(n = 5)
2052
- head(5)
2497
+ head(n)
2053
2498
  end
2054
2499
 
2055
2500
  # Get the first `n` rows.
@@ -2063,6 +2508,41 @@ module Polars
2063
2508
  # Consider using the {#fetch} operation if you only want to test your
2064
2509
  # query. The {#fetch} operation will load the first `n` rows at the scan
2065
2510
  # level, whereas the {#head}/{#limit} are applied at the end.
2511
+ #
2512
+ # @example
2513
+ # lf = Polars::LazyFrame.new(
2514
+ # {
2515
+ # "a" => [1, 2, 3, 4, 5, 6],
2516
+ # "b" => [7, 8, 9, 10, 11, 12]
2517
+ # }
2518
+ # )
2519
+ # lf.head.collect
2520
+ # # =>
2521
+ # # shape: (5, 2)
2522
+ # # ┌─────┬─────┐
2523
+ # # │ a ┆ b │
2524
+ # # │ --- ┆ --- │
2525
+ # # │ i64 ┆ i64 │
2526
+ # # ╞═════╪═════╡
2527
+ # # │ 1 ┆ 7 │
2528
+ # # │ 2 ┆ 8 │
2529
+ # # │ 3 ┆ 9 │
2530
+ # # │ 4 ┆ 10 │
2531
+ # # │ 5 ┆ 11 │
2532
+ # # └─────┴─────┘
2533
+ #
2534
+ # @example
2535
+ # lf.head(2).collect
2536
+ # # =>
2537
+ # # shape: (2, 2)
2538
+ # # ┌─────┬─────┐
2539
+ # # │ a ┆ b │
2540
+ # # │ --- ┆ --- │
2541
+ # # │ i64 ┆ i64 │
2542
+ # # ╞═════╪═════╡
2543
+ # # │ 1 ┆ 7 │
2544
+ # # │ 2 ┆ 8 │
2545
+ # # └─────┴─────┘
2066
2546
  def head(n = 5)
2067
2547
  slice(0, n)
2068
2548
  end
@@ -2073,6 +2553,41 @@ module Polars
2073
2553
  # Number of rows.
2074
2554
  #
2075
2555
  # @return [LazyFrame]
2556
+ #
2557
+ # @example
2558
+ # lf = Polars::LazyFrame.new(
2559
+ # {
2560
+ # "a" => [1, 2, 3, 4, 5, 6],
2561
+ # "b" => [7, 8, 9, 10, 11, 12]
2562
+ # }
2563
+ # )
2564
+ # lf.tail.collect
2565
+ # # =>
2566
+ # # shape: (5, 2)
2567
+ # # ┌─────┬─────┐
2568
+ # # │ a ┆ b │
2569
+ # # │ --- ┆ --- │
2570
+ # # │ i64 ┆ i64 │
2571
+ # # ╞═════╪═════╡
2572
+ # # │ 2 ┆ 8 │
2573
+ # # │ 3 ┆ 9 │
2574
+ # # │ 4 ┆ 10 │
2575
+ # # │ 5 ┆ 11 │
2576
+ # # │ 6 ┆ 12 │
2577
+ # # └─────┴─────┘
2578
+ #
2579
+ # @example
2580
+ # lf.tail(2).collect
2581
+ # # =>
2582
+ # # shape: (2, 2)
2583
+ # # ┌─────┬─────┐
2584
+ # # │ a ┆ b │
2585
+ # # │ --- ┆ --- │
2586
+ # # │ i64 ┆ i64 │
2587
+ # # ╞═════╪═════╡
2588
+ # # │ 5 ┆ 11 │
2589
+ # # │ 6 ┆ 12 │
2590
+ # # └─────┴─────┘
2076
2591
  def tail(n = 5)
2077
2592
  _from_rbldf(_ldf.tail(n))
2078
2593
  end
@@ -2080,6 +2595,24 @@ module Polars
2080
2595
  # Get the last row of the DataFrame.
2081
2596
  #
2082
2597
  # @return [LazyFrame]
2598
+ #
2599
+ # @example
2600
+ # lf = Polars::LazyFrame.new(
2601
+ # {
2602
+ # "a" => [1, 5, 3],
2603
+ # "b" => [2, 4, 6]
2604
+ # }
2605
+ # )
2606
+ # lf.last.collect
2607
+ # # =>
2608
+ # # shape: (1, 2)
2609
+ # # ┌─────┬─────┐
2610
+ # # │ a ┆ b │
2611
+ # # │ --- ┆ --- │
2612
+ # # │ i64 ┆ i64 │
2613
+ # # ╞═════╪═════╡
2614
+ # # │ 3 ┆ 6 │
2615
+ # # └─────┴─────┘
2083
2616
  def last
2084
2617
  tail(1)
2085
2618
  end
@@ -2087,6 +2620,24 @@ module Polars
2087
2620
  # Get the first row of the DataFrame.
2088
2621
  #
2089
2622
  # @return [LazyFrame]
2623
+ #
2624
+ # @example
2625
+ # lf = Polars::LazyFrame.new(
2626
+ # {
2627
+ # "a" => [1, 5, 3],
2628
+ # "b" => [2, 4, 6]
2629
+ # }
2630
+ # )
2631
+ # lf.first.collect
2632
+ # # =>
2633
+ # # shape: (1, 2)
2634
+ # # ┌─────┬─────┐
2635
+ # # │ a ┆ b │
2636
+ # # │ --- ┆ --- │
2637
+ # # │ i64 ┆ i64 │
2638
+ # # ╞═════╪═════╡
2639
+ # # │ 1 ┆ 2 │
2640
+ # # └─────┴─────┘
2090
2641
  def first
2091
2642
  slice(0, 1)
2092
2643
  end
@@ -2152,6 +2703,72 @@ module Polars
2152
2703
  # Fill null values using the specified value or strategy.
2153
2704
  #
2154
2705
  # @return [LazyFrame]
2706
+ #
2707
+ # @example
2708
+ # lf = Polars::LazyFrame.new(
2709
+ # {
2710
+ # "a" => [1, 2, nil, 4],
2711
+ # "b" => [0.5, 4, nil, 13]
2712
+ # }
2713
+ # )
2714
+ # lf.fill_null(99).collect
2715
+ # # =>
2716
+ # # shape: (4, 2)
2717
+ # # ┌─────┬──────┐
2718
+ # # │ a ┆ b │
2719
+ # # │ --- ┆ --- │
2720
+ # # │ i64 ┆ f64 │
2721
+ # # ╞═════╪══════╡
2722
+ # # │ 1 ┆ 0.5 │
2723
+ # # │ 2 ┆ 4.0 │
2724
+ # # │ 99 ┆ 99.0 │
2725
+ # # │ 4 ┆ 13.0 │
2726
+ # # └─────┴──────┘
2727
+ #
2728
+ # @example
2729
+ # lf.fill_null(strategy: "forward").collect
2730
+ # # =>
2731
+ # # shape: (4, 2)
2732
+ # # ┌─────┬──────┐
2733
+ # # │ a ┆ b │
2734
+ # # │ --- ┆ --- │
2735
+ # # │ i64 ┆ f64 │
2736
+ # # ╞═════╪══════╡
2737
+ # # │ 1 ┆ 0.5 │
2738
+ # # │ 2 ┆ 4.0 │
2739
+ # # │ 2 ┆ 4.0 │
2740
+ # # │ 4 ┆ 13.0 │
2741
+ # # └─────┴──────┘
2742
+ #
2743
+ # @example
2744
+ # lf.fill_null(strategy: "max").collect
2745
+ # # =>
2746
+ # # shape: (4, 2)
2747
+ # # ┌─────┬──────┐
2748
+ # # │ a ┆ b │
2749
+ # # │ --- ┆ --- │
2750
+ # # │ i64 ┆ f64 │
2751
+ # # ╞═════╪══════╡
2752
+ # # │ 1 ┆ 0.5 │
2753
+ # # │ 2 ┆ 4.0 │
2754
+ # # │ 4 ┆ 13.0 │
2755
+ # # │ 4 ┆ 13.0 │
2756
+ # # └─────┴──────┘
2757
+ #
2758
+ # @example
2759
+ # lf.fill_null(strategy: "zero").collect
2760
+ # # =>
2761
+ # # shape: (4, 2)
2762
+ # # ┌─────┬──────┐
2763
+ # # │ a ┆ b │
2764
+ # # │ --- ┆ --- │
2765
+ # # │ i64 ┆ f64 │
2766
+ # # ╞═════╪══════╡
2767
+ # # │ 1 ┆ 0.5 │
2768
+ # # │ 2 ┆ 4.0 │
2769
+ # # │ 0 ┆ 0.0 │
2770
+ # # │ 4 ┆ 13.0 │
2771
+ # # └─────┴──────┘
2155
2772
  def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
2156
2773
  select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
2157
2774
  end
@@ -2431,6 +3048,53 @@ module Polars
2431
3048
  # Which of the duplicate rows to keep.
2432
3049
  #
2433
3050
  # @return [LazyFrame]
3051
+ #
3052
+ # @example
3053
+ # lf = Polars::LazyFrame.new(
3054
+ # {
3055
+ # "foo" => [1, 2, 3, 1],
3056
+ # "bar" => ["a", "a", "a", "a"],
3057
+ # "ham" => ["b", "b", "b", "b"]
3058
+ # }
3059
+ # )
3060
+ # lf.unique(maintain_order: true).collect
3061
+ # # =>
3062
+ # # shape: (3, 3)
3063
+ # # ┌─────┬─────┬─────┐
3064
+ # # │ foo ┆ bar ┆ ham │
3065
+ # # │ --- ┆ --- ┆ --- │
3066
+ # # │ i64 ┆ str ┆ str │
3067
+ # # ╞═════╪═════╪═════╡
3068
+ # # │ 1 ┆ a ┆ b │
3069
+ # # │ 2 ┆ a ┆ b │
3070
+ # # │ 3 ┆ a ┆ b │
3071
+ # # └─────┴─────┴─────┘
3072
+ #
3073
+ # @example
3074
+ # lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
3075
+ # # =>
3076
+ # # shape: (1, 3)
3077
+ # # ┌─────┬─────┬─────┐
3078
+ # # │ foo ┆ bar ┆ ham │
3079
+ # # │ --- ┆ --- ┆ --- │
3080
+ # # │ i64 ┆ str ┆ str │
3081
+ # # ╞═════╪═════╪═════╡
3082
+ # # │ 1 ┆ a ┆ b │
3083
+ # # └─────┴─────┴─────┘
3084
+ #
3085
+ # @example
3086
+ # lf.unique(keep: "last", maintain_order: true).collect
3087
+ # # =>
3088
+ # # shape: (3, 3)
3089
+ # # ┌─────┬─────┬─────┐
3090
+ # # │ foo ┆ bar ┆ ham │
3091
+ # # │ --- ┆ --- ┆ --- │
3092
+ # # │ i64 ┆ str ┆ str │
3093
+ # # ╞═════╪═════╪═════╡
3094
+ # # │ 2 ┆ a ┆ b │
3095
+ # # │ 3 ┆ a ┆ b │
3096
+ # # │ 1 ┆ a ┆ b │
3097
+ # # └─────┴─────┴─────┘
2434
3098
  def unique(maintain_order: true, subset: nil, keep: "first")
2435
3099
  if !subset.nil? && !subset.is_a?(::Array)
2436
3100
  subset = [subset]
@@ -2504,7 +3168,7 @@ module Polars
2504
3168
  # "c" => [2, 4, 6]
2505
3169
  # }
2506
3170
  # )
2507
- # lf.unpivot(Polars::Selectors.numeric, index: "a").collect
3171
+ # lf.unpivot(Polars.cs.numeric, index: "a").collect
2508
3172
  # # =>
2509
3173
  # # shape: (6, 3)
2510
3174
  # # ┌─────┬──────────┬───────┐
@@ -2530,8 +3194,8 @@ module Polars
2530
3194
  warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
2531
3195
  end
2532
3196
 
2533
- on = on.nil? ? [] : Utils._expand_selectors(self, on)
2534
- index = index.nil? ? [] : Utils._expand_selectors(self, index)
3197
+ on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
3198
+ index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
2535
3199
 
2536
3200
  _from_rbldf(
2537
3201
  _ldf.unpivot(on, index, value_name, variable_name)