polars-df 0.13.0-arm64-darwin → 0.15.0-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE-THIRD-PARTY.txt +24439 -12853
  5. data/LICENSE.txt +1 -0
  6. data/README.md +1 -2
  7. data/lib/polars/3.1/polars.bundle +0 -0
  8. data/lib/polars/3.2/polars.bundle +0 -0
  9. data/lib/polars/3.3/polars.bundle +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +285 -62
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +2 -0
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +109 -8
  20. data/lib/polars/functions/as_datatype.rb +51 -2
  21. data/lib/polars/functions/col.rb +1 -1
  22. data/lib/polars/functions/eager.rb +1 -3
  23. data/lib/polars/functions/lazy.rb +88 -10
  24. data/lib/polars/functions/range/time_range.rb +21 -21
  25. data/lib/polars/io/csv.rb +14 -16
  26. data/lib/polars/io/database.rb +2 -2
  27. data/lib/polars/io/ipc.rb +14 -12
  28. data/lib/polars/io/ndjson.rb +10 -0
  29. data/lib/polars/io/parquet.rb +168 -111
  30. data/lib/polars/lazy_frame.rb +649 -15
  31. data/lib/polars/list_name_space.rb +169 -0
  32. data/lib/polars/selectors.rb +1144 -0
  33. data/lib/polars/series.rb +470 -40
  34. data/lib/polars/string_cache.rb +27 -1
  35. data/lib/polars/string_expr.rb +0 -1
  36. data/lib/polars/string_name_space.rb +73 -3
  37. data/lib/polars/struct_name_space.rb +31 -7
  38. data/lib/polars/utils/various.rb +5 -1
  39. data/lib/polars/utils.rb +45 -10
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +2 -1
  42. metadata +4 -3
  43. data/lib/polars/functions.rb +0 -57
@@ -848,9 +848,70 @@ module Polars
848
848
  _from_rbldf(_ldf.cache)
849
849
  end
850
850
 
851
- # TODO
852
- # def cast
853
- # end
851
+ # Cast LazyFrame column(s) to the specified dtype(s).
852
+ #
853
+ # @param dtypes [Hash]
854
+ # Mapping of column names (or selector) to dtypes, or a single dtype
855
+ # to which all columns will be cast.
856
+ # @param strict [Boolean]
857
+ # Throw an error if a cast could not be done (for instance, due to an
858
+ # overflow).
859
+ #
860
+ # @return [LazyFrame]
861
+ #
862
+ # @example Cast specific frame columns to the specified dtypes:
863
+ # lf = Polars::LazyFrame.new(
864
+ # {
865
+ # "foo" => [1, 2, 3],
866
+ # "bar" => [6.0, 7.0, 8.0],
867
+ # "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
868
+ # }
869
+ # )
870
+ # lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
871
+ # # =>
872
+ # # shape: (3, 3)
873
+ # # ┌─────┬─────┬────────────┐
874
+ # # │ foo ┆ bar ┆ ham │
875
+ # # │ --- ┆ --- ┆ --- │
876
+ # # │ f32 ┆ u8 ┆ date │
877
+ # # ╞═════╪═════╪════════════╡
878
+ # # │ 1.0 ┆ 6 ┆ 2020-01-02 │
879
+ # # │ 2.0 ┆ 7 ┆ 2021-03-04 │
880
+ # # │ 3.0 ┆ 8 ┆ 2022-05-06 │
881
+ # # └─────┴─────┴────────────┘
882
+ #
883
+ # @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
884
+ # lf.cast({Polars::Date => Polars::Datetime}).collect
885
+ # # =>
886
+ # # shape: (3, 3)
887
+ # # ┌─────┬─────┬─────────────────────┐
888
+ # # │ foo ┆ bar ┆ ham │
889
+ # # │ --- ┆ --- ┆ --- │
890
+ # # │ i64 ┆ f64 ┆ datetime[μs] │
891
+ # # ╞═════╪═════╪═════════════════════╡
892
+ # # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
893
+ # # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
894
+ # # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
895
+ # # └─────┴─────┴─────────────────────┘
896
+ #
897
+ # @example Cast all frame columns to the specified dtype:
898
+ # lf.cast(Polars::String).collect.to_h(as_series: false)
899
+ # # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
900
+ def cast(dtypes, strict: true)
901
+ if !dtypes.is_a?(Hash)
902
+ return _from_rbldf(_ldf.cast_all(dtypes, strict))
903
+ end
904
+
905
+ cast_map = {}
906
+ dtypes.each do |c, dtype|
907
+ dtype = Utils.parse_into_dtype(dtype)
908
+ cast_map.merge!(
909
+ c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
910
+ )
911
+ end
912
+
913
+ _from_rbldf(_ldf.cast(cast_map, strict))
914
+ end
854
915
 
855
916
  # Create an empty copy of the current LazyFrame.
856
917
  #
@@ -1520,8 +1581,197 @@ module Polars
1520
1581
  # @param force_parallel [Boolean]
1521
1582
  # Force the physical plan to evaluate the computation of both DataFrames up to
1522
1583
  # the join in parallel.
1584
+ # @param coalesce [Boolean]
1585
+ # Coalescing behavior (merging of join columns).
1586
+ # - true: -> Always coalesce join columns.
1587
+ # - false: -> Never coalesce join columns.
1588
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1523
1589
  #
1524
1590
  # @return [LazyFrame]
1591
+ #
1592
+ # @example
1593
+ # gdp = Polars::LazyFrame.new(
1594
+ # {
1595
+ # "date" => Polars.date_range(
1596
+ # Date.new(2016, 1, 1),
1597
+ # Date.new(2020, 1, 1),
1598
+ # "1y",
1599
+ # eager: true
1600
+ # ),
1601
+ # "gdp" => [4164, 4411, 4566, 4696, 4827]
1602
+ # }
1603
+ # )
1604
+ # gdp.collect
1605
+ # # =>
1606
+ # # shape: (5, 2)
1607
+ # # ┌────────────┬──────┐
1608
+ # # │ date ┆ gdp │
1609
+ # # │ --- ┆ --- │
1610
+ # # │ date ┆ i64 │
1611
+ # # ╞════════════╪══════╡
1612
+ # # │ 2016-01-01 ┆ 4164 │
1613
+ # # │ 2017-01-01 ┆ 4411 │
1614
+ # # │ 2018-01-01 ┆ 4566 │
1615
+ # # │ 2019-01-01 ┆ 4696 │
1616
+ # # │ 2020-01-01 ┆ 4827 │
1617
+ # # └────────────┴──────┘
1618
+ #
1619
+ # @example
1620
+ # population = Polars::LazyFrame.new(
1621
+ # {
1622
+ # "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
1623
+ # "population" => [82.19, 82.66, 83.12]
1624
+ # }
1625
+ # ).sort("date")
1626
+ # population.collect
1627
+ # # =>
1628
+ # # shape: (3, 2)
1629
+ # # ┌────────────┬────────────┐
1630
+ # # │ date ┆ population │
1631
+ # # │ --- ┆ --- │
1632
+ # # │ date ┆ f64 │
1633
+ # # ╞════════════╪════════════╡
1634
+ # # │ 2016-03-01 ┆ 82.19 │
1635
+ # # │ 2018-08-01 ┆ 82.66 │
1636
+ # # │ 2019-01-01 ┆ 83.12 │
1637
+ # # └────────────┴────────────┘
1638
+ #
1639
+ # @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
1640
+ # population.join_asof(gdp, on: "date", strategy: "backward").collect
1641
+ # # =>
1642
+ # # shape: (3, 3)
1643
+ # # ┌────────────┬────────────┬──────┐
1644
+ # # │ date ┆ population ┆ gdp │
1645
+ # # │ --- ┆ --- ┆ --- │
1646
+ # # │ date ┆ f64 ┆ i64 │
1647
+ # # ╞════════════╪════════════╪══════╡
1648
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1649
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
1650
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1651
+ # # └────────────┴────────────┴──────┘
1652
+ #
1653
+ # @example
1654
+ # population.join_asof(
1655
+ # gdp, on: "date", strategy: "backward", coalesce: false
1656
+ # ).collect
1657
+ # # =>
1658
+ # # shape: (3, 4)
1659
+ # # ┌────────────┬────────────┬────────────┬──────┐
1660
+ # # │ date ┆ population ┆ date_right ┆ gdp │
1661
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1662
+ # # │ date ┆ f64 ┆ date ┆ i64 │
1663
+ # # ╞════════════╪════════════╪════════════╪══════╡
1664
+ # # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
1665
+ # # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
1666
+ # # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
1667
+ # # └────────────┴────────────┴────────────┴──────┘
1668
+ #
1669
+ # @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
1670
+ # population.join_asof(gdp, on: "date", strategy: "forward").collect
1671
+ # # =>
1672
+ # # shape: (3, 3)
1673
+ # # ┌────────────┬────────────┬──────┐
1674
+ # # │ date ┆ population ┆ gdp │
1675
+ # # │ --- ┆ --- ┆ --- │
1676
+ # # │ date ┆ f64 ┆ i64 │
1677
+ # # ╞════════════╪════════════╪══════╡
1678
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
1679
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1680
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1681
+ # # └────────────┴────────────┴──────┘
1682
+ #
1683
+ # @example
1684
+ # population.join_asof(gdp, on: "date", strategy: "nearest").collect
1685
+ # # =>
1686
+ # # shape: (3, 3)
1687
+ # # ┌────────────┬────────────┬──────┐
1688
+ # # │ date ┆ population ┆ gdp │
1689
+ # # │ --- ┆ --- ┆ --- │
1690
+ # # │ date ┆ f64 ┆ i64 │
1691
+ # # ╞════════════╪════════════╪══════╡
1692
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1693
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1694
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1695
+ # # └────────────┴────────────┴──────┘
1696
+ #
1697
+ # @example
1698
+ # gdp_dates = Polars.date_range(
1699
+ # Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
1700
+ # )
1701
+ # gdp2 = Polars::LazyFrame.new(
1702
+ # {
1703
+ # "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
1704
+ # "date" => Polars.concat([gdp_dates, gdp_dates]),
1705
+ # "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
1706
+ # }
1707
+ # ).sort("country", "date")
1708
+ # gdp2.collect
1709
+ # # =>
1710
+ # # shape: (10, 3)
1711
+ # # ┌─────────────┬────────────┬──────┐
1712
+ # # │ country ┆ date ┆ gdp │
1713
+ # # │ --- ┆ --- ┆ --- │
1714
+ # # │ str ┆ date ┆ i64 │
1715
+ # # ╞═════════════╪════════════╪══════╡
1716
+ # # │ Germany ┆ 2016-01-01 ┆ 4164 │
1717
+ # # │ Germany ┆ 2017-01-01 ┆ 4411 │
1718
+ # # │ Germany ┆ 2018-01-01 ┆ 4566 │
1719
+ # # │ Germany ┆ 2019-01-01 ┆ 4696 │
1720
+ # # │ Germany ┆ 2020-01-01 ┆ 4827 │
1721
+ # # │ Netherlands ┆ 2016-01-01 ┆ 784 │
1722
+ # # │ Netherlands ┆ 2017-01-01 ┆ 833 │
1723
+ # # │ Netherlands ┆ 2018-01-01 ┆ 914 │
1724
+ # # │ Netherlands ┆ 2019-01-01 ┆ 910 │
1725
+ # # │ Netherlands ┆ 2020-01-01 ┆ 909 │
1726
+ # # └─────────────┴────────────┴──────┘
1727
+ #
1728
+ # @example
1729
+ # pop2 = Polars::LazyFrame.new(
1730
+ # {
1731
+ # "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
1732
+ # "date" => [
1733
+ # Date.new(2016, 3, 1),
1734
+ # Date.new(2018, 8, 1),
1735
+ # Date.new(2019, 1, 1),
1736
+ # Date.new(2016, 3, 1),
1737
+ # Date.new(2018, 8, 1),
1738
+ # Date.new(2019, 1, 1)
1739
+ # ],
1740
+ # "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
1741
+ # }
1742
+ # ).sort("country", "date")
1743
+ # pop2.collect
1744
+ # # =>
1745
+ # # shape: (6, 3)
1746
+ # # ┌─────────────┬────────────┬────────────┐
1747
+ # # │ country ┆ date ┆ population │
1748
+ # # │ --- ┆ --- ┆ --- │
1749
+ # # │ str ┆ date ┆ f64 │
1750
+ # # ╞═════════════╪════════════╪════════════╡
1751
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 │
1752
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 │
1753
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 │
1754
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
1755
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
1756
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
1757
+ # # └─────────────┴────────────┴────────────┘
1758
+ #
1759
+ # @example
1760
+ # pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
1761
+ # # =>
1762
+ # # shape: (6, 4)
1763
+ # # ┌─────────────┬────────────┬────────────┬──────┐
1764
+ # # │ country ┆ date ┆ population ┆ gdp │
1765
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1766
+ # # │ str ┆ date ┆ f64 ┆ i64 │
1767
+ # # ╞═════════════╪════════════╪════════════╪══════╡
1768
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
1769
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
1770
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
1771
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
1772
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
1773
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
1774
+ # # └─────────────┴────────────┴────────────┴──────┘
1525
1775
  def join_asof(
1526
1776
  other,
1527
1777
  left_on: nil,
@@ -1534,7 +1784,8 @@ module Polars
1534
1784
  suffix: "_right",
1535
1785
  tolerance: nil,
1536
1786
  allow_parallel: true,
1537
- force_parallel: false
1787
+ force_parallel: false,
1788
+ coalesce: true
1538
1789
  )
1539
1790
  if !other.is_a?(LazyFrame)
1540
1791
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1589,7 +1840,8 @@ module Polars
1589
1840
  suffix,
1590
1841
  strategy,
1591
1842
  tolerance_num,
1592
- tolerance_str
1843
+ tolerance_str,
1844
+ coalesce
1593
1845
  )
1594
1846
  )
1595
1847
  end
@@ -1609,6 +1861,12 @@ module Polars
1609
1861
  # Join strategy.
1610
1862
  # @param suffix [String]
1611
1863
  # Suffix to append to columns with a duplicate name.
1864
+ # @param validate ['m:m', 'm:1', '1:m', '1:1']
1865
+ # Checks if join is of specified type.
1866
+ # * *many_to_many* - “m:m”: default, does not result in checks
1867
+ # * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
1868
+ # * *one_to_many* - “1:m”: check if join keys are unique in left dataset
1869
+ # * *many_to_one* - “m:1”: check if join keys are unique in right dataset
1612
1870
  # @param join_nulls [Boolean]
1613
1871
  # Join on null values. By default null values will never produce matches.
1614
1872
  # @param allow_parallel [Boolean]
@@ -1617,6 +1875,12 @@ module Polars
1617
1875
  # @param force_parallel [Boolean]
1618
1876
  # Force the physical plan to evaluate the computation of both DataFrames up to
1619
1877
  # the join in parallel.
1878
+ # @param coalesce [Boolean]
1879
+ # Coalescing behavior (merging of join columns).
1880
+ # - nil: -> join specific.
1881
+ # - true: -> Always coalesce join columns.
1882
+ # - false: -> Never coalesce join columns.
1883
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1620
1884
  #
1621
1885
  # @return [LazyFrame]
1622
1886
  #
@@ -1706,9 +1970,11 @@ module Polars
1706
1970
  on: nil,
1707
1971
  how: "inner",
1708
1972
  suffix: "_right",
1973
+ validate: "m:m",
1709
1974
  join_nulls: false,
1710
1975
  allow_parallel: true,
1711
- force_parallel: false
1976
+ force_parallel: false,
1977
+ coalesce: nil
1712
1978
  )
1713
1979
  if !other.is_a?(LazyFrame)
1714
1980
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1719,7 +1985,7 @@ module Polars
1719
1985
  elsif how == "cross"
1720
1986
  return _from_rbldf(
1721
1987
  _ldf.join(
1722
- other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
1988
+ other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
1723
1989
  )
1724
1990
  )
1725
1991
  end
@@ -1745,6 +2011,8 @@ module Polars
1745
2011
  join_nulls,
1746
2012
  how,
1747
2013
  suffix,
2014
+ validate,
2015
+ coalesce
1748
2016
  )
1749
2017
  )
1750
2018
  end
@@ -1879,6 +2147,55 @@ module Polars
1879
2147
  # - List of column names.
1880
2148
  #
1881
2149
  # @return [LazyFrame]
2150
+ #
2151
+ # @example Drop a single column by passing the name of that column.
2152
+ # lf = Polars::LazyFrame.new(
2153
+ # {
2154
+ # "foo" => [1, 2, 3],
2155
+ # "bar" => [6.0, 7.0, 8.0],
2156
+ # "ham" => ["a", "b", "c"]
2157
+ # }
2158
+ # )
2159
+ # lf.drop("ham").collect
2160
+ # # =>
2161
+ # # shape: (3, 2)
2162
+ # # ┌─────┬─────┐
2163
+ # # │ foo ┆ bar │
2164
+ # # │ --- ┆ --- │
2165
+ # # │ i64 ┆ f64 │
2166
+ # # ╞═════╪═════╡
2167
+ # # │ 1 ┆ 6.0 │
2168
+ # # │ 2 ┆ 7.0 │
2169
+ # # │ 3 ┆ 8.0 │
2170
+ # # └─────┴─────┘
2171
+ #
2172
+ # @example Drop multiple columns by passing a selector.
2173
+ # lf.drop(Polars.cs.numeric).collect
2174
+ # # =>
2175
+ # # shape: (3, 1)
2176
+ # # ┌─────┐
2177
+ # # │ ham │
2178
+ # # │ --- │
2179
+ # # │ str │
2180
+ # # ╞═════╡
2181
+ # # │ a │
2182
+ # # │ b │
2183
+ # # │ c │
2184
+ # # └─────┘
2185
+ #
2186
+ # @example Use positional arguments to drop multiple columns.
2187
+ # lf.drop("foo", "ham").collect
2188
+ # # =>
2189
+ # # shape: (3, 1)
2190
+ # # ┌─────┐
2191
+ # # │ bar │
2192
+ # # │ --- │
2193
+ # # │ f64 │
2194
+ # # ╞═════╡
2195
+ # # │ 6.0 │
2196
+ # # │ 7.0 │
2197
+ # # │ 8.0 │
2198
+ # # └─────┘
1882
2199
  def drop(*columns)
1883
2200
  drop_cols = Utils._expand_selectors(self, *columns)
1884
2201
  _from_rbldf(_ldf.drop(drop_cols))
@@ -1888,17 +2205,80 @@ module Polars
1888
2205
  #
1889
2206
  # @param mapping [Hash]
1890
2207
  # Key value pairs that map from old name to new name.
2208
+ # @param strict [Boolean]
2209
+ # Validate that all column names exist in the current schema,
2210
+ # and throw an exception if any do not. (Note that this parameter
2211
+ # is a no-op when passing a function to `mapping`).
1891
2212
  #
1892
2213
  # @return [LazyFrame]
1893
- def rename(mapping)
1894
- existing = mapping.keys
1895
- _new = mapping.values
1896
- _from_rbldf(_ldf.rename(existing, _new))
2214
+ #
2215
+ # @example
2216
+ # lf = Polars::LazyFrame.new(
2217
+ # {
2218
+ # "foo" => [1, 2, 3],
2219
+ # "bar" => [6, 7, 8],
2220
+ # "ham" => ["a", "b", "c"]
2221
+ # }
2222
+ # )
2223
+ # lf.rename({"foo" => "apple"}).collect
2224
+ # # =>
2225
+ # # shape: (3, 3)
2226
+ # # ┌───────┬─────┬─────┐
2227
+ # # │ apple ┆ bar ┆ ham │
2228
+ # # │ --- ┆ --- ┆ --- │
2229
+ # # │ i64 ┆ i64 ┆ str │
2230
+ # # ╞═══════╪═════╪═════╡
2231
+ # # │ 1 ┆ 6 ┆ a │
2232
+ # # │ 2 ┆ 7 ┆ b │
2233
+ # # │ 3 ┆ 8 ┆ c │
2234
+ # # └───────┴─────┴─────┘
2235
+ #
2236
+ # @example
2237
+ # lf.rename(->(column_name) { "c" + column_name[1..] }).collect
2238
+ # # =>
2239
+ # # shape: (3, 3)
2240
+ # # ┌─────┬─────┬─────┐
2241
+ # # │ coo ┆ car ┆ cam │
2242
+ # # │ --- ┆ --- ┆ --- │
2243
+ # # │ i64 ┆ i64 ┆ str │
2244
+ # # ╞═════╪═════╪═════╡
2245
+ # # │ 1 ┆ 6 ┆ a │
2246
+ # # │ 2 ┆ 7 ┆ b │
2247
+ # # │ 3 ┆ 8 ┆ c │
2248
+ # # └─────┴─────┴─────┘
2249
+ def rename(mapping, strict: true)
2250
+ if mapping.respond_to?(:call)
2251
+ select(F.all.name.map(&mapping))
2252
+ else
2253
+ existing = mapping.keys
2254
+ _new = mapping.values
2255
+ _from_rbldf(_ldf.rename(existing, _new, strict))
2256
+ end
1897
2257
  end
1898
2258
 
1899
2259
  # Reverse the DataFrame.
1900
2260
  #
1901
2261
  # @return [LazyFrame]
2262
+ #
2263
+ # @example
2264
+ # lf = Polars::LazyFrame.new(
2265
+ # {
2266
+ # "key" => ["a", "b", "c"],
2267
+ # "val" => [1, 2, 3]
2268
+ # }
2269
+ # )
2270
+ # lf.reverse.collect
2271
+ # # =>
2272
+ # # shape: (3, 2)
2273
+ # # ┌─────┬─────┐
2274
+ # # │ key ┆ val │
2275
+ # # │ --- ┆ --- │
2276
+ # # │ str ┆ i64 │
2277
+ # # ╞═════╪═════╡
2278
+ # # │ c ┆ 3 │
2279
+ # # │ b ┆ 2 │
2280
+ # # │ a ┆ 1 │
2281
+ # # └─────┴─────┘
1902
2282
  def reverse
1903
2283
  _from_rbldf(_ldf.reverse)
1904
2284
  end
@@ -2048,8 +2428,43 @@ module Polars
2048
2428
  # Consider using the {#fetch} operation if you only want to test your
2049
2429
  # query. The {#fetch} operation will load the first `n` rows at the scan
2050
2430
  # level, whereas the {#head}/{#limit} are applied at the end.
2431
+ #
2432
+ # @example
2433
+ # lf = Polars::LazyFrame.new(
2434
+ # {
2435
+ # "a" => [1, 2, 3, 4, 5, 6],
2436
+ # "b" => [7, 8, 9, 10, 11, 12]
2437
+ # }
2438
+ # )
2439
+ # lf.limit.collect
2440
+ # # =>
2441
+ # # shape: (5, 2)
2442
+ # # ┌─────┬─────┐
2443
+ # # │ a ┆ b │
2444
+ # # │ --- ┆ --- │
2445
+ # # │ i64 ┆ i64 │
2446
+ # # ╞═════╪═════╡
2447
+ # # │ 1 ┆ 7 │
2448
+ # # │ 2 ┆ 8 │
2449
+ # # │ 3 ┆ 9 │
2450
+ # # │ 4 ┆ 10 │
2451
+ # # │ 5 ┆ 11 │
2452
+ # # └─────┴─────┘
2453
+ #
2454
+ # @example
2455
+ # lf.limit(2).collect
2456
+ # # =>
2457
+ # # shape: (2, 2)
2458
+ # # ┌─────┬─────┐
2459
+ # # │ a ┆ b │
2460
+ # # │ --- ┆ --- │
2461
+ # # │ i64 ┆ i64 │
2462
+ # # ╞═════╪═════╡
2463
+ # # │ 1 ┆ 7 │
2464
+ # # │ 2 ┆ 8 │
2465
+ # # └─────┴─────┘
2051
2466
  def limit(n = 5)
2052
- head(5)
2467
+ head(n)
2053
2468
  end
2054
2469
 
2055
2470
  # Get the first `n` rows.
@@ -2063,6 +2478,41 @@ module Polars
2063
2478
  # Consider using the {#fetch} operation if you only want to test your
2064
2479
  # query. The {#fetch} operation will load the first `n` rows at the scan
2065
2480
  # level, whereas the {#head}/{#limit} are applied at the end.
2481
+ #
2482
+ # @example
2483
+ # lf = Polars::LazyFrame.new(
2484
+ # {
2485
+ # "a" => [1, 2, 3, 4, 5, 6],
2486
+ # "b" => [7, 8, 9, 10, 11, 12]
2487
+ # }
2488
+ # )
2489
+ # lf.head.collect
2490
+ # # =>
2491
+ # # shape: (5, 2)
2492
+ # # ┌─────┬─────┐
2493
+ # # │ a ┆ b │
2494
+ # # │ --- ┆ --- │
2495
+ # # │ i64 ┆ i64 │
2496
+ # # ╞═════╪═════╡
2497
+ # # │ 1 ┆ 7 │
2498
+ # # │ 2 ┆ 8 │
2499
+ # # │ 3 ┆ 9 │
2500
+ # # │ 4 ┆ 10 │
2501
+ # # │ 5 ┆ 11 │
2502
+ # # └─────┴─────┘
2503
+ #
2504
+ # @example
2505
+ # lf.head(2).collect
2506
+ # # =>
2507
+ # # shape: (2, 2)
2508
+ # # ┌─────┬─────┐
2509
+ # # │ a ┆ b │
2510
+ # # │ --- ┆ --- │
2511
+ # # │ i64 ┆ i64 │
2512
+ # # ╞═════╪═════╡
2513
+ # # │ 1 ┆ 7 │
2514
+ # # │ 2 ┆ 8 │
2515
+ # # └─────┴─────┘
2066
2516
  def head(n = 5)
2067
2517
  slice(0, n)
2068
2518
  end
@@ -2073,6 +2523,41 @@ module Polars
2073
2523
  # Number of rows.
2074
2524
  #
2075
2525
  # @return [LazyFrame]
2526
+ #
2527
+ # @example
2528
+ # lf = Polars::LazyFrame.new(
2529
+ # {
2530
+ # "a" => [1, 2, 3, 4, 5, 6],
2531
+ # "b" => [7, 8, 9, 10, 11, 12]
2532
+ # }
2533
+ # )
2534
+ # lf.tail.collect
2535
+ # # =>
2536
+ # # shape: (5, 2)
2537
+ # # ┌─────┬─────┐
2538
+ # # │ a ┆ b │
2539
+ # # │ --- ┆ --- │
2540
+ # # │ i64 ┆ i64 │
2541
+ # # ╞═════╪═════╡
2542
+ # # │ 2 ┆ 8 │
2543
+ # # │ 3 ┆ 9 │
2544
+ # # │ 4 ┆ 10 │
2545
+ # # │ 5 ┆ 11 │
2546
+ # # │ 6 ┆ 12 │
2547
+ # # └─────┴─────┘
2548
+ #
2549
+ # @example
2550
+ # lf.tail(2).collect
2551
+ # # =>
2552
+ # # shape: (2, 2)
2553
+ # # ┌─────┬─────┐
2554
+ # # │ a ┆ b │
2555
+ # # │ --- ┆ --- │
2556
+ # # │ i64 ┆ i64 │
2557
+ # # ╞═════╪═════╡
2558
+ # # │ 5 ┆ 11 │
2559
+ # # │ 6 ┆ 12 │
2560
+ # # └─────┴─────┘
2076
2561
  def tail(n = 5)
2077
2562
  _from_rbldf(_ldf.tail(n))
2078
2563
  end
@@ -2080,6 +2565,24 @@ module Polars
2080
2565
  # Get the last row of the DataFrame.
2081
2566
  #
2082
2567
  # @return [LazyFrame]
2568
+ #
2569
+ # @example
2570
+ # lf = Polars::LazyFrame.new(
2571
+ # {
2572
+ # "a" => [1, 5, 3],
2573
+ # "b" => [2, 4, 6]
2574
+ # }
2575
+ # )
2576
+ # lf.last.collect
2577
+ # # =>
2578
+ # # shape: (1, 2)
2579
+ # # ┌─────┬─────┐
2580
+ # # │ a ┆ b │
2581
+ # # │ --- ┆ --- │
2582
+ # # │ i64 ┆ i64 │
2583
+ # # ╞═════╪═════╡
2584
+ # # │ 3 ┆ 6 │
2585
+ # # └─────┴─────┘
2083
2586
  def last
2084
2587
  tail(1)
2085
2588
  end
@@ -2087,6 +2590,24 @@ module Polars
2087
2590
  # Get the first row of the DataFrame.
2088
2591
  #
2089
2592
  # @return [LazyFrame]
2593
+ #
2594
+ # @example
2595
+ # lf = Polars::LazyFrame.new(
2596
+ # {
2597
+ # "a" => [1, 5, 3],
2598
+ # "b" => [2, 4, 6]
2599
+ # }
2600
+ # )
2601
+ # lf.first.collect
2602
+ # # =>
2603
+ # # shape: (1, 2)
2604
+ # # ┌─────┬─────┐
2605
+ # # │ a ┆ b │
2606
+ # # │ --- ┆ --- │
2607
+ # # │ i64 ┆ i64 │
2608
+ # # ╞═════╪═════╡
2609
+ # # │ 1 ┆ 2 │
2610
+ # # └─────┴─────┘
2090
2611
  def first
2091
2612
  slice(0, 1)
2092
2613
  end
@@ -2152,6 +2673,72 @@ module Polars
2152
2673
  # Fill null values using the specified value or strategy.
2153
2674
  #
2154
2675
  # @return [LazyFrame]
2676
+ #
2677
+ # @example
2678
+ # lf = Polars::LazyFrame.new(
2679
+ # {
2680
+ # "a" => [1, 2, nil, 4],
2681
+ # "b" => [0.5, 4, nil, 13]
2682
+ # }
2683
+ # )
2684
+ # lf.fill_null(99).collect
2685
+ # # =>
2686
+ # # shape: (4, 2)
2687
+ # # ┌─────┬──────┐
2688
+ # # │ a ┆ b │
2689
+ # # │ --- ┆ --- │
2690
+ # # │ i64 ┆ f64 │
2691
+ # # ╞═════╪══════╡
2692
+ # # │ 1 ┆ 0.5 │
2693
+ # # │ 2 ┆ 4.0 │
2694
+ # # │ 99 ┆ 99.0 │
2695
+ # # │ 4 ┆ 13.0 │
2696
+ # # └─────┴──────┘
2697
+ #
2698
+ # @example
2699
+ # lf.fill_null(strategy: "forward").collect
2700
+ # # =>
2701
+ # # shape: (4, 2)
2702
+ # # ┌─────┬──────┐
2703
+ # # │ a ┆ b │
2704
+ # # │ --- ┆ --- │
2705
+ # # │ i64 ┆ f64 │
2706
+ # # ╞═════╪══════╡
2707
+ # # │ 1 ┆ 0.5 │
2708
+ # # │ 2 ┆ 4.0 │
2709
+ # # │ 2 ┆ 4.0 │
2710
+ # # │ 4 ┆ 13.0 │
2711
+ # # └─────┴──────┘
2712
+ #
2713
+ # @example
2714
+ # lf.fill_null(strategy: "max").collect
2715
+ # # =>
2716
+ # # shape: (4, 2)
2717
+ # # ┌─────┬──────┐
2718
+ # # │ a ┆ b │
2719
+ # # │ --- ┆ --- │
2720
+ # # │ i64 ┆ f64 │
2721
+ # # ╞═════╪══════╡
2722
+ # # │ 1 ┆ 0.5 │
2723
+ # # │ 2 ┆ 4.0 │
2724
+ # # │ 4 ┆ 13.0 │
2725
+ # # │ 4 ┆ 13.0 │
2726
+ # # └─────┴──────┘
2727
+ #
2728
+ # @example
2729
+ # lf.fill_null(strategy: "zero").collect
2730
+ # # =>
2731
+ # # shape: (4, 2)
2732
+ # # ┌─────┬──────┐
2733
+ # # │ a ┆ b │
2734
+ # # │ --- ┆ --- │
2735
+ # # │ i64 ┆ f64 │
2736
+ # # ╞═════╪══════╡
2737
+ # # │ 1 ┆ 0.5 │
2738
+ # # │ 2 ┆ 4.0 │
2739
+ # # │ 0 ┆ 0.0 │
2740
+ # # │ 4 ┆ 13.0 │
2741
+ # # └─────┴──────┘
2155
2742
  def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
2156
2743
  select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
2157
2744
  end
@@ -2431,6 +3018,53 @@ module Polars
2431
3018
  # Which of the duplicate rows to keep.
2432
3019
  #
2433
3020
  # @return [LazyFrame]
3021
+ #
3022
+ # @example
3023
+ # lf = Polars::LazyFrame.new(
3024
+ # {
3025
+ # "foo" => [1, 2, 3, 1],
3026
+ # "bar" => ["a", "a", "a", "a"],
3027
+ # "ham" => ["b", "b", "b", "b"]
3028
+ # }
3029
+ # )
3030
+ # lf.unique(maintain_order: true).collect
3031
+ # # =>
3032
+ # # shape: (3, 3)
3033
+ # # ┌─────┬─────┬─────┐
3034
+ # # │ foo ┆ bar ┆ ham │
3035
+ # # │ --- ┆ --- ┆ --- │
3036
+ # # │ i64 ┆ str ┆ str │
3037
+ # # ╞═════╪═════╪═════╡
3038
+ # # │ 1 ┆ a ┆ b │
3039
+ # # │ 2 ┆ a ┆ b │
3040
+ # # │ 3 ┆ a ┆ b │
3041
+ # # └─────┴─────┴─────┘
3042
+ #
3043
+ # @example
3044
+ # lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
3045
+ # # =>
3046
+ # # shape: (1, 3)
3047
+ # # ┌─────┬─────┬─────┐
3048
+ # # │ foo ┆ bar ┆ ham │
3049
+ # # │ --- ┆ --- ┆ --- │
3050
+ # # │ i64 ┆ str ┆ str │
3051
+ # # ╞═════╪═════╪═════╡
3052
+ # # │ 1 ┆ a ┆ b │
3053
+ # # └─────┴─────┴─────┘
3054
+ #
3055
+ # @example
3056
+ # lf.unique(keep: "last", maintain_order: true).collect
3057
+ # # =>
3058
+ # # shape: (3, 3)
3059
+ # # ┌─────┬─────┬─────┐
3060
+ # # │ foo ┆ bar ┆ ham │
3061
+ # # │ --- ┆ --- ┆ --- │
3062
+ # # │ i64 ┆ str ┆ str │
3063
+ # # ╞═════╪═════╪═════╡
3064
+ # # │ 2 ┆ a ┆ b │
3065
+ # # │ 3 ┆ a ┆ b │
3066
+ # # │ 1 ┆ a ┆ b │
3067
+ # # └─────┴─────┴─────┘
2434
3068
  def unique(maintain_order: true, subset: nil, keep: "first")
2435
3069
  if !subset.nil? && !subset.is_a?(::Array)
2436
3070
  subset = [subset]
@@ -2504,7 +3138,7 @@ module Polars
2504
3138
  # "c" => [2, 4, 6]
2505
3139
  # }
2506
3140
  # )
2507
- # lf.unpivot(Polars::Selectors.numeric, index: "a").collect
3141
+ # lf.unpivot(Polars.cs.numeric, index: "a").collect
2508
3142
  # # =>
2509
3143
  # # shape: (6, 3)
2510
3144
  # # ┌─────┬──────────┬───────┐
@@ -2530,8 +3164,8 @@ module Polars
2530
3164
  warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
2531
3165
  end
2532
3166
 
2533
- on = on.nil? ? [] : Utils._expand_selectors(self, on)
2534
- index = index.nil? ? [] : Utils._expand_selectors(self, index)
3167
+ on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
3168
+ index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
2535
3169
 
2536
3170
  _from_rbldf(
2537
3171
  _ldf.unpivot(on, index, value_name, variable_name)