polars-df 0.13.0-aarch64-linux-musl → 0.15.0-aarch64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE-THIRD-PARTY.txt +24801 -13447
  5. data/LICENSE.txt +1 -0
  6. data/README.md +1 -2
  7. data/lib/polars/3.1/polars.so +0 -0
  8. data/lib/polars/3.2/polars.so +0 -0
  9. data/lib/polars/3.3/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +285 -62
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +2 -0
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +109 -8
  20. data/lib/polars/functions/as_datatype.rb +51 -2
  21. data/lib/polars/functions/col.rb +1 -1
  22. data/lib/polars/functions/eager.rb +1 -3
  23. data/lib/polars/functions/lazy.rb +88 -10
  24. data/lib/polars/functions/range/time_range.rb +21 -21
  25. data/lib/polars/io/csv.rb +14 -16
  26. data/lib/polars/io/database.rb +2 -2
  27. data/lib/polars/io/ipc.rb +14 -12
  28. data/lib/polars/io/ndjson.rb +10 -0
  29. data/lib/polars/io/parquet.rb +168 -111
  30. data/lib/polars/lazy_frame.rb +649 -15
  31. data/lib/polars/list_name_space.rb +169 -0
  32. data/lib/polars/selectors.rb +1144 -0
  33. data/lib/polars/series.rb +470 -40
  34. data/lib/polars/string_cache.rb +27 -1
  35. data/lib/polars/string_expr.rb +0 -1
  36. data/lib/polars/string_name_space.rb +73 -3
  37. data/lib/polars/struct_name_space.rb +31 -7
  38. data/lib/polars/utils/various.rb +5 -1
  39. data/lib/polars/utils.rb +45 -10
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +2 -1
  42. metadata +4 -3
  43. data/lib/polars/functions.rb +0 -57
@@ -848,9 +848,70 @@ module Polars
848
848
  _from_rbldf(_ldf.cache)
849
849
  end
850
850
 
851
- # TODO
852
- # def cast
853
- # end
851
+ # Cast LazyFrame column(s) to the specified dtype(s).
852
+ #
853
+ # @param dtypes [Hash]
854
+ # Mapping of column names (or selector) to dtypes, or a single dtype
855
+ # to which all columns will be cast.
856
+ # @param strict [Boolean]
857
+ # Throw an error if a cast could not be done (for instance, due to an
858
+ # overflow).
859
+ #
860
+ # @return [LazyFrame]
861
+ #
862
+ # @example Cast specific frame columns to the specified dtypes:
863
+ # lf = Polars::LazyFrame.new(
864
+ # {
865
+ # "foo" => [1, 2, 3],
866
+ # "bar" => [6.0, 7.0, 8.0],
867
+ # "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
868
+ # }
869
+ # )
870
+ # lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
871
+ # # =>
872
+ # # shape: (3, 3)
873
+ # # ┌─────┬─────┬────────────┐
874
+ # # │ foo ┆ bar ┆ ham │
875
+ # # │ --- ┆ --- ┆ --- │
876
+ # # │ f32 ┆ u8 ┆ date │
877
+ # # ╞═════╪═════╪════════════╡
878
+ # # │ 1.0 ┆ 6 ┆ 2020-01-02 │
879
+ # # │ 2.0 ┆ 7 ┆ 2021-03-04 │
880
+ # # │ 3.0 ┆ 8 ┆ 2022-05-06 │
881
+ # # └─────┴─────┴────────────┘
882
+ #
883
+ # @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
884
+ # lf.cast({Polars::Date => Polars::Datetime}).collect
885
+ # # =>
886
+ # # shape: (3, 3)
887
+ # # ┌─────┬─────┬─────────────────────┐
888
+ # # │ foo ┆ bar ┆ ham │
889
+ # # │ --- ┆ --- ┆ --- │
890
+ # # │ i64 ┆ f64 ┆ datetime[μs] │
891
+ # # ╞═════╪═════╪═════════════════════╡
892
+ # # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
893
+ # # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
894
+ # # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
895
+ # # └─────┴─────┴─────────────────────┘
896
+ #
897
+ # @example Cast all frame columns to the specified dtype:
898
+ # lf.cast(Polars::String).collect.to_h(as_series: false)
899
+ # # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
900
+ def cast(dtypes, strict: true)
901
+ if !dtypes.is_a?(Hash)
902
+ return _from_rbldf(_ldf.cast_all(dtypes, strict))
903
+ end
904
+
905
+ cast_map = {}
906
+ dtypes.each do |c, dtype|
907
+ dtype = Utils.parse_into_dtype(dtype)
908
+ cast_map.merge!(
909
+ c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
910
+ )
911
+ end
912
+
913
+ _from_rbldf(_ldf.cast(cast_map, strict))
914
+ end
854
915
 
855
916
  # Create an empty copy of the current LazyFrame.
856
917
  #
@@ -1520,8 +1581,197 @@ module Polars
1520
1581
  # @param force_parallel [Boolean]
1521
1582
  # Force the physical plan to evaluate the computation of both DataFrames up to
1522
1583
  # the join in parallel.
1584
+ # @param coalesce [Boolean]
1585
+ # Coalescing behavior (merging of join columns).
1586
+ # - true: -> Always coalesce join columns.
1587
+ # - false: -> Never coalesce join columns.
1588
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1523
1589
  #
1524
1590
  # @return [LazyFrame]
1591
+ #
1592
+ # @example
1593
+ # gdp = Polars::LazyFrame.new(
1594
+ # {
1595
+ # "date" => Polars.date_range(
1596
+ # Date.new(2016, 1, 1),
1597
+ # Date.new(2020, 1, 1),
1598
+ # "1y",
1599
+ # eager: true
1600
+ # ),
1601
+ # "gdp" => [4164, 4411, 4566, 4696, 4827]
1602
+ # }
1603
+ # )
1604
+ # gdp.collect
1605
+ # # =>
1606
+ # # shape: (5, 2)
1607
+ # # ┌────────────┬──────┐
1608
+ # # │ date ┆ gdp │
1609
+ # # │ --- ┆ --- │
1610
+ # # │ date ┆ i64 │
1611
+ # # ╞════════════╪══════╡
1612
+ # # │ 2016-01-01 ┆ 4164 │
1613
+ # # │ 2017-01-01 ┆ 4411 │
1614
+ # # │ 2018-01-01 ┆ 4566 │
1615
+ # # │ 2019-01-01 ┆ 4696 │
1616
+ # # │ 2020-01-01 ┆ 4827 │
1617
+ # # └────────────┴──────┘
1618
+ #
1619
+ # @example
1620
+ # population = Polars::LazyFrame.new(
1621
+ # {
1622
+ # "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
1623
+ # "population" => [82.19, 82.66, 83.12]
1624
+ # }
1625
+ # ).sort("date")
1626
+ # population.collect
1627
+ # # =>
1628
+ # # shape: (3, 2)
1629
+ # # ┌────────────┬────────────┐
1630
+ # # │ date ┆ population │
1631
+ # # │ --- ┆ --- │
1632
+ # # │ date ┆ f64 │
1633
+ # # ╞════════════╪════════════╡
1634
+ # # │ 2016-03-01 ┆ 82.19 │
1635
+ # # │ 2018-08-01 ┆ 82.66 │
1636
+ # # │ 2019-01-01 ┆ 83.12 │
1637
+ # # └────────────┴────────────┘
1638
+ #
1639
+ # @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
1640
+ # population.join_asof(gdp, on: "date", strategy: "backward").collect
1641
+ # # =>
1642
+ # # shape: (3, 3)
1643
+ # # ┌────────────┬────────────┬──────┐
1644
+ # # │ date ┆ population ┆ gdp │
1645
+ # # │ --- ┆ --- ┆ --- │
1646
+ # # │ date ┆ f64 ┆ i64 │
1647
+ # # ╞════════════╪════════════╪══════╡
1648
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1649
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
1650
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1651
+ # # └────────────┴────────────┴──────┘
1652
+ #
1653
+ # @example
1654
+ # population.join_asof(
1655
+ # gdp, on: "date", strategy: "backward", coalesce: false
1656
+ # ).collect
1657
+ # # =>
1658
+ # # shape: (3, 4)
1659
+ # # ┌────────────┬────────────┬────────────┬──────┐
1660
+ # # │ date ┆ population ┆ date_right ┆ gdp │
1661
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1662
+ # # │ date ┆ f64 ┆ date ┆ i64 │
1663
+ # # ╞════════════╪════════════╪════════════╪══════╡
1664
+ # # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
1665
+ # # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
1666
+ # # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
1667
+ # # └────────────┴────────────┴────────────┴──────┘
1668
+ #
1669
+ # @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
1670
+ # population.join_asof(gdp, on: "date", strategy: "forward").collect
1671
+ # # =>
1672
+ # # shape: (3, 3)
1673
+ # # ┌────────────┬────────────┬──────┐
1674
+ # # │ date ┆ population ┆ gdp │
1675
+ # # │ --- ┆ --- ┆ --- │
1676
+ # # │ date ┆ f64 ┆ i64 │
1677
+ # # ╞════════════╪════════════╪══════╡
1678
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
1679
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1680
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1681
+ # # └────────────┴────────────┴──────┘
1682
+ #
1683
+ # @example
1684
+ # population.join_asof(gdp, on: "date", strategy: "nearest").collect
1685
+ # # =>
1686
+ # # shape: (3, 3)
1687
+ # # ┌────────────┬────────────┬──────┐
1688
+ # # │ date ┆ population ┆ gdp │
1689
+ # # │ --- ┆ --- ┆ --- │
1690
+ # # │ date ┆ f64 ┆ i64 │
1691
+ # # ╞════════════╪════════════╪══════╡
1692
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1693
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1694
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1695
+ # # └────────────┴────────────┴──────┘
1696
+ #
1697
+ # @example
1698
+ # gdp_dates = Polars.date_range(
1699
+ # Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
1700
+ # )
1701
+ # gdp2 = Polars::LazyFrame.new(
1702
+ # {
1703
+ # "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
1704
+ # "date" => Polars.concat([gdp_dates, gdp_dates]),
1705
+ # "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
1706
+ # }
1707
+ # ).sort("country", "date")
1708
+ # gdp2.collect
1709
+ # # =>
1710
+ # # shape: (10, 3)
1711
+ # # ┌─────────────┬────────────┬──────┐
1712
+ # # │ country ┆ date ┆ gdp │
1713
+ # # │ --- ┆ --- ┆ --- │
1714
+ # # │ str ┆ date ┆ i64 │
1715
+ # # ╞═════════════╪════════════╪══════╡
1716
+ # # │ Germany ┆ 2016-01-01 ┆ 4164 │
1717
+ # # │ Germany ┆ 2017-01-01 ┆ 4411 │
1718
+ # # │ Germany ┆ 2018-01-01 ┆ 4566 │
1719
+ # # │ Germany ┆ 2019-01-01 ┆ 4696 │
1720
+ # # │ Germany ┆ 2020-01-01 ┆ 4827 │
1721
+ # # │ Netherlands ┆ 2016-01-01 ┆ 784 │
1722
+ # # │ Netherlands ┆ 2017-01-01 ┆ 833 │
1723
+ # # │ Netherlands ┆ 2018-01-01 ┆ 914 │
1724
+ # # │ Netherlands ┆ 2019-01-01 ┆ 910 │
1725
+ # # │ Netherlands ┆ 2020-01-01 ┆ 909 │
1726
+ # # └─────────────┴────────────┴──────┘
1727
+ #
1728
+ # @example
1729
+ # pop2 = Polars::LazyFrame.new(
1730
+ # {
1731
+ # "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
1732
+ # "date" => [
1733
+ # Date.new(2016, 3, 1),
1734
+ # Date.new(2018, 8, 1),
1735
+ # Date.new(2019, 1, 1),
1736
+ # Date.new(2016, 3, 1),
1737
+ # Date.new(2018, 8, 1),
1738
+ # Date.new(2019, 1, 1)
1739
+ # ],
1740
+ # "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
1741
+ # }
1742
+ # ).sort("country", "date")
1743
+ # pop2.collect
1744
+ # # =>
1745
+ # # shape: (6, 3)
1746
+ # # ┌─────────────┬────────────┬────────────┐
1747
+ # # │ country ┆ date ┆ population │
1748
+ # # │ --- ┆ --- ┆ --- │
1749
+ # # │ str ┆ date ┆ f64 │
1750
+ # # ╞═════════════╪════════════╪════════════╡
1751
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 │
1752
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 │
1753
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 │
1754
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
1755
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
1756
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
1757
+ # # └─────────────┴────────────┴────────────┘
1758
+ #
1759
+ # @example
1760
+ # pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
1761
+ # # =>
1762
+ # # shape: (6, 4)
1763
+ # # ┌─────────────┬────────────┬────────────┬──────┐
1764
+ # # │ country ┆ date ┆ population ┆ gdp │
1765
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1766
+ # # │ str ┆ date ┆ f64 ┆ i64 │
1767
+ # # ╞═════════════╪════════════╪════════════╪══════╡
1768
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
1769
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
1770
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
1771
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
1772
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
1773
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
1774
+ # # └─────────────┴────────────┴────────────┴──────┘
1525
1775
  def join_asof(
1526
1776
  other,
1527
1777
  left_on: nil,
@@ -1534,7 +1784,8 @@ module Polars
1534
1784
  suffix: "_right",
1535
1785
  tolerance: nil,
1536
1786
  allow_parallel: true,
1537
- force_parallel: false
1787
+ force_parallel: false,
1788
+ coalesce: true
1538
1789
  )
1539
1790
  if !other.is_a?(LazyFrame)
1540
1791
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1589,7 +1840,8 @@ module Polars
1589
1840
  suffix,
1590
1841
  strategy,
1591
1842
  tolerance_num,
1592
- tolerance_str
1843
+ tolerance_str,
1844
+ coalesce
1593
1845
  )
1594
1846
  )
1595
1847
  end
@@ -1609,6 +1861,12 @@ module Polars
1609
1861
  # Join strategy.
1610
1862
  # @param suffix [String]
1611
1863
  # Suffix to append to columns with a duplicate name.
1864
+ # @param validate ['m:m', 'm:1', '1:m', '1:1']
1865
+ # Checks if join is of specified type.
1866
+ # * *many_to_many* - “m:m”: default, does not result in checks
1867
+ # * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
1868
+ # * *one_to_many* - “1:m”: check if join keys are unique in left dataset
1869
+ # * *many_to_one* - “m:1”: check if join keys are unique in right dataset
1612
1870
  # @param join_nulls [Boolean]
1613
1871
  # Join on null values. By default null values will never produce matches.
1614
1872
  # @param allow_parallel [Boolean]
@@ -1617,6 +1875,12 @@ module Polars
1617
1875
  # @param force_parallel [Boolean]
1618
1876
  # Force the physical plan to evaluate the computation of both DataFrames up to
1619
1877
  # the join in parallel.
1878
+ # @param coalesce [Boolean]
1879
+ # Coalescing behavior (merging of join columns).
1880
+ # - nil: -> join specific.
1881
+ # - true: -> Always coalesce join columns.
1882
+ # - false: -> Never coalesce join columns.
1883
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1620
1884
  #
1621
1885
  # @return [LazyFrame]
1622
1886
  #
@@ -1706,9 +1970,11 @@ module Polars
1706
1970
  on: nil,
1707
1971
  how: "inner",
1708
1972
  suffix: "_right",
1973
+ validate: "m:m",
1709
1974
  join_nulls: false,
1710
1975
  allow_parallel: true,
1711
- force_parallel: false
1976
+ force_parallel: false,
1977
+ coalesce: nil
1712
1978
  )
1713
1979
  if !other.is_a?(LazyFrame)
1714
1980
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1719,7 +1985,7 @@ module Polars
1719
1985
  elsif how == "cross"
1720
1986
  return _from_rbldf(
1721
1987
  _ldf.join(
1722
- other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
1988
+ other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
1723
1989
  )
1724
1990
  )
1725
1991
  end
@@ -1745,6 +2011,8 @@ module Polars
1745
2011
  join_nulls,
1746
2012
  how,
1747
2013
  suffix,
2014
+ validate,
2015
+ coalesce
1748
2016
  )
1749
2017
  )
1750
2018
  end
@@ -1879,6 +2147,55 @@ module Polars
1879
2147
  # - List of column names.
1880
2148
  #
1881
2149
  # @return [LazyFrame]
2150
+ #
2151
+ # @example Drop a single column by passing the name of that column.
2152
+ # lf = Polars::LazyFrame.new(
2153
+ # {
2154
+ # "foo" => [1, 2, 3],
2155
+ # "bar" => [6.0, 7.0, 8.0],
2156
+ # "ham" => ["a", "b", "c"]
2157
+ # }
2158
+ # )
2159
+ # lf.drop("ham").collect
2160
+ # # =>
2161
+ # # shape: (3, 2)
2162
+ # # ┌─────┬─────┐
2163
+ # # │ foo ┆ bar │
2164
+ # # │ --- ┆ --- │
2165
+ # # │ i64 ┆ f64 │
2166
+ # # ╞═════╪═════╡
2167
+ # # │ 1 ┆ 6.0 │
2168
+ # # │ 2 ┆ 7.0 │
2169
+ # # │ 3 ┆ 8.0 │
2170
+ # # └─────┴─────┘
2171
+ #
2172
+ # @example Drop multiple columns by passing a selector.
2173
+ # lf.drop(Polars.cs.numeric).collect
2174
+ # # =>
2175
+ # # shape: (3, 1)
2176
+ # # ┌─────┐
2177
+ # # │ ham │
2178
+ # # │ --- │
2179
+ # # │ str │
2180
+ # # ╞═════╡
2181
+ # # │ a │
2182
+ # # │ b │
2183
+ # # │ c │
2184
+ # # └─────┘
2185
+ #
2186
+ # @example Use positional arguments to drop multiple columns.
2187
+ # lf.drop("foo", "ham").collect
2188
+ # # =>
2189
+ # # shape: (3, 1)
2190
+ # # ┌─────┐
2191
+ # # │ bar │
2192
+ # # │ --- │
2193
+ # # │ f64 │
2194
+ # # ╞═════╡
2195
+ # # │ 6.0 │
2196
+ # # │ 7.0 │
2197
+ # # │ 8.0 │
2198
+ # # └─────┘
1882
2199
  def drop(*columns)
1883
2200
  drop_cols = Utils._expand_selectors(self, *columns)
1884
2201
  _from_rbldf(_ldf.drop(drop_cols))
@@ -1888,17 +2205,80 @@ module Polars
1888
2205
  #
1889
2206
  # @param mapping [Hash]
1890
2207
  # Key value pairs that map from old name to new name.
2208
+ # @param strict [Boolean]
2209
+ # Validate that all column names exist in the current schema,
2210
+ # and throw an exception if any do not. (Note that this parameter
2211
+ # is a no-op when passing a function to `mapping`).
1891
2212
  #
1892
2213
  # @return [LazyFrame]
1893
- def rename(mapping)
1894
- existing = mapping.keys
1895
- _new = mapping.values
1896
- _from_rbldf(_ldf.rename(existing, _new))
2214
+ #
2215
+ # @example
2216
+ # lf = Polars::LazyFrame.new(
2217
+ # {
2218
+ # "foo" => [1, 2, 3],
2219
+ # "bar" => [6, 7, 8],
2220
+ # "ham" => ["a", "b", "c"]
2221
+ # }
2222
+ # )
2223
+ # lf.rename({"foo" => "apple"}).collect
2224
+ # # =>
2225
+ # # shape: (3, 3)
2226
+ # # ┌───────┬─────┬─────┐
2227
+ # # │ apple ┆ bar ┆ ham │
2228
+ # # │ --- ┆ --- ┆ --- │
2229
+ # # │ i64 ┆ i64 ┆ str │
2230
+ # # ╞═══════╪═════╪═════╡
2231
+ # # │ 1 ┆ 6 ┆ a │
2232
+ # # │ 2 ┆ 7 ┆ b │
2233
+ # # │ 3 ┆ 8 ┆ c │
2234
+ # # └───────┴─────┴─────┘
2235
+ #
2236
+ # @example
2237
+ # lf.rename(->(column_name) { "c" + column_name[1..] }).collect
2238
+ # # =>
2239
+ # # shape: (3, 3)
2240
+ # # ┌─────┬─────┬─────┐
2241
+ # # │ coo ┆ car ┆ cam │
2242
+ # # │ --- ┆ --- ┆ --- │
2243
+ # # │ i64 ┆ i64 ┆ str │
2244
+ # # ╞═════╪═════╪═════╡
2245
+ # # │ 1 ┆ 6 ┆ a │
2246
+ # # │ 2 ┆ 7 ┆ b │
2247
+ # # │ 3 ┆ 8 ┆ c │
2248
+ # # └─────┴─────┴─────┘
2249
+ def rename(mapping, strict: true)
2250
+ if mapping.respond_to?(:call)
2251
+ select(F.all.name.map(&mapping))
2252
+ else
2253
+ existing = mapping.keys
2254
+ _new = mapping.values
2255
+ _from_rbldf(_ldf.rename(existing, _new, strict))
2256
+ end
1897
2257
  end
1898
2258
 
1899
2259
  # Reverse the DataFrame.
1900
2260
  #
1901
2261
  # @return [LazyFrame]
2262
+ #
2263
+ # @example
2264
+ # lf = Polars::LazyFrame.new(
2265
+ # {
2266
+ # "key" => ["a", "b", "c"],
2267
+ # "val" => [1, 2, 3]
2268
+ # }
2269
+ # )
2270
+ # lf.reverse.collect
2271
+ # # =>
2272
+ # # shape: (3, 2)
2273
+ # # ┌─────┬─────┐
2274
+ # # │ key ┆ val │
2275
+ # # │ --- ┆ --- │
2276
+ # # │ str ┆ i64 │
2277
+ # # ╞═════╪═════╡
2278
+ # # │ c ┆ 3 │
2279
+ # # │ b ┆ 2 │
2280
+ # # │ a ┆ 1 │
2281
+ # # └─────┴─────┘
1902
2282
  def reverse
1903
2283
  _from_rbldf(_ldf.reverse)
1904
2284
  end
@@ -2048,8 +2428,43 @@ module Polars
2048
2428
  # Consider using the {#fetch} operation if you only want to test your
2049
2429
  # query. The {#fetch} operation will load the first `n` rows at the scan
2050
2430
  # level, whereas the {#head}/{#limit} are applied at the end.
2431
+ #
2432
+ # @example
2433
+ # lf = Polars::LazyFrame.new(
2434
+ # {
2435
+ # "a" => [1, 2, 3, 4, 5, 6],
2436
+ # "b" => [7, 8, 9, 10, 11, 12]
2437
+ # }
2438
+ # )
2439
+ # lf.limit.collect
2440
+ # # =>
2441
+ # # shape: (5, 2)
2442
+ # # ┌─────┬─────┐
2443
+ # # │ a ┆ b │
2444
+ # # │ --- ┆ --- │
2445
+ # # │ i64 ┆ i64 │
2446
+ # # ╞═════╪═════╡
2447
+ # # │ 1 ┆ 7 │
2448
+ # # │ 2 ┆ 8 │
2449
+ # # │ 3 ┆ 9 │
2450
+ # # │ 4 ┆ 10 │
2451
+ # # │ 5 ┆ 11 │
2452
+ # # └─────┴─────┘
2453
+ #
2454
+ # @example
2455
+ # lf.limit(2).collect
2456
+ # # =>
2457
+ # # shape: (2, 2)
2458
+ # # ┌─────┬─────┐
2459
+ # # │ a ┆ b │
2460
+ # # │ --- ┆ --- │
2461
+ # # │ i64 ┆ i64 │
2462
+ # # ╞═════╪═════╡
2463
+ # # │ 1 ┆ 7 │
2464
+ # # │ 2 ┆ 8 │
2465
+ # # └─────┴─────┘
2051
2466
  def limit(n = 5)
2052
- head(5)
2467
+ head(n)
2053
2468
  end
2054
2469
 
2055
2470
  # Get the first `n` rows.
@@ -2063,6 +2478,41 @@ module Polars
2063
2478
  # Consider using the {#fetch} operation if you only want to test your
2064
2479
  # query. The {#fetch} operation will load the first `n` rows at the scan
2065
2480
  # level, whereas the {#head}/{#limit} are applied at the end.
2481
+ #
2482
+ # @example
2483
+ # lf = Polars::LazyFrame.new(
2484
+ # {
2485
+ # "a" => [1, 2, 3, 4, 5, 6],
2486
+ # "b" => [7, 8, 9, 10, 11, 12]
2487
+ # }
2488
+ # )
2489
+ # lf.head.collect
2490
+ # # =>
2491
+ # # shape: (5, 2)
2492
+ # # ┌─────┬─────┐
2493
+ # # │ a ┆ b │
2494
+ # # │ --- ┆ --- │
2495
+ # # │ i64 ┆ i64 │
2496
+ # # ╞═════╪═════╡
2497
+ # # │ 1 ┆ 7 │
2498
+ # # │ 2 ┆ 8 │
2499
+ # # │ 3 ┆ 9 │
2500
+ # # │ 4 ┆ 10 │
2501
+ # # │ 5 ┆ 11 │
2502
+ # # └─────┴─────┘
2503
+ #
2504
+ # @example
2505
+ # lf.head(2).collect
2506
+ # # =>
2507
+ # # shape: (2, 2)
2508
+ # # ┌─────┬─────┐
2509
+ # # │ a ┆ b │
2510
+ # # │ --- ┆ --- │
2511
+ # # │ i64 ┆ i64 │
2512
+ # # ╞═════╪═════╡
2513
+ # # │ 1 ┆ 7 │
2514
+ # # │ 2 ┆ 8 │
2515
+ # # └─────┴─────┘
2066
2516
  def head(n = 5)
2067
2517
  slice(0, n)
2068
2518
  end
@@ -2073,6 +2523,41 @@ module Polars
2073
2523
  # Number of rows.
2074
2524
  #
2075
2525
  # @return [LazyFrame]
2526
+ #
2527
+ # @example
2528
+ # lf = Polars::LazyFrame.new(
2529
+ # {
2530
+ # "a" => [1, 2, 3, 4, 5, 6],
2531
+ # "b" => [7, 8, 9, 10, 11, 12]
2532
+ # }
2533
+ # )
2534
+ # lf.tail.collect
2535
+ # # =>
2536
+ # # shape: (5, 2)
2537
+ # # ┌─────┬─────┐
2538
+ # # │ a ┆ b │
2539
+ # # │ --- ┆ --- │
2540
+ # # │ i64 ┆ i64 │
2541
+ # # ╞═════╪═════╡
2542
+ # # │ 2 ┆ 8 │
2543
+ # # │ 3 ┆ 9 │
2544
+ # # │ 4 ┆ 10 │
2545
+ # # │ 5 ┆ 11 │
2546
+ # # │ 6 ┆ 12 │
2547
+ # # └─────┴─────┘
2548
+ #
2549
+ # @example
2550
+ # lf.tail(2).collect
2551
+ # # =>
2552
+ # # shape: (2, 2)
2553
+ # # ┌─────┬─────┐
2554
+ # # │ a ┆ b │
2555
+ # # │ --- ┆ --- │
2556
+ # # │ i64 ┆ i64 │
2557
+ # # ╞═════╪═════╡
2558
+ # # │ 5 ┆ 11 │
2559
+ # # │ 6 ┆ 12 │
2560
+ # # └─────┴─────┘
2076
2561
  def tail(n = 5)
2077
2562
  _from_rbldf(_ldf.tail(n))
2078
2563
  end
@@ -2080,6 +2565,24 @@ module Polars
2080
2565
  # Get the last row of the DataFrame.
2081
2566
  #
2082
2567
  # @return [LazyFrame]
2568
+ #
2569
+ # @example
2570
+ # lf = Polars::LazyFrame.new(
2571
+ # {
2572
+ # "a" => [1, 5, 3],
2573
+ # "b" => [2, 4, 6]
2574
+ # }
2575
+ # )
2576
+ # lf.last.collect
2577
+ # # =>
2578
+ # # shape: (1, 2)
2579
+ # # ┌─────┬─────┐
2580
+ # # │ a ┆ b │
2581
+ # # │ --- ┆ --- │
2582
+ # # │ i64 ┆ i64 │
2583
+ # # ╞═════╪═════╡
2584
+ # # │ 3 ┆ 6 │
2585
+ # # └─────┴─────┘
2083
2586
  def last
2084
2587
  tail(1)
2085
2588
  end
@@ -2087,6 +2590,24 @@ module Polars
2087
2590
  # Get the first row of the DataFrame.
2088
2591
  #
2089
2592
  # @return [LazyFrame]
2593
+ #
2594
+ # @example
2595
+ # lf = Polars::LazyFrame.new(
2596
+ # {
2597
+ # "a" => [1, 5, 3],
2598
+ # "b" => [2, 4, 6]
2599
+ # }
2600
+ # )
2601
+ # lf.first.collect
2602
+ # # =>
2603
+ # # shape: (1, 2)
2604
+ # # ┌─────┬─────┐
2605
+ # # │ a ┆ b │
2606
+ # # │ --- ┆ --- │
2607
+ # # │ i64 ┆ i64 │
2608
+ # # ╞═════╪═════╡
2609
+ # # │ 1 ┆ 2 │
2610
+ # # └─────┴─────┘
2090
2611
  def first
2091
2612
  slice(0, 1)
2092
2613
  end
@@ -2152,6 +2673,72 @@ module Polars
2152
2673
  # Fill null values using the specified value or strategy.
2153
2674
  #
2154
2675
  # @return [LazyFrame]
2676
+ #
2677
+ # @example
2678
+ # lf = Polars::LazyFrame.new(
2679
+ # {
2680
+ # "a" => [1, 2, nil, 4],
2681
+ # "b" => [0.5, 4, nil, 13]
2682
+ # }
2683
+ # )
2684
+ # lf.fill_null(99).collect
2685
+ # # =>
2686
+ # # shape: (4, 2)
2687
+ # # ┌─────┬──────┐
2688
+ # # │ a ┆ b │
2689
+ # # │ --- ┆ --- │
2690
+ # # │ i64 ┆ f64 │
2691
+ # # ╞═════╪══════╡
2692
+ # # │ 1 ┆ 0.5 │
2693
+ # # │ 2 ┆ 4.0 │
2694
+ # # │ 99 ┆ 99.0 │
2695
+ # # │ 4 ┆ 13.0 │
2696
+ # # └─────┴──────┘
2697
+ #
2698
+ # @example
2699
+ # lf.fill_null(strategy: "forward").collect
2700
+ # # =>
2701
+ # # shape: (4, 2)
2702
+ # # ┌─────┬──────┐
2703
+ # # │ a ┆ b │
2704
+ # # │ --- ┆ --- │
2705
+ # # │ i64 ┆ f64 │
2706
+ # # ╞═════╪══════╡
2707
+ # # │ 1 ┆ 0.5 │
2708
+ # # │ 2 ┆ 4.0 │
2709
+ # # │ 2 ┆ 4.0 │
2710
+ # # │ 4 ┆ 13.0 │
2711
+ # # └─────┴──────┘
2712
+ #
2713
+ # @example
2714
+ # lf.fill_null(strategy: "max").collect
2715
+ # # =>
2716
+ # # shape: (4, 2)
2717
+ # # ┌─────┬──────┐
2718
+ # # │ a ┆ b │
2719
+ # # │ --- ┆ --- │
2720
+ # # │ i64 ┆ f64 │
2721
+ # # ╞═════╪══════╡
2722
+ # # │ 1 ┆ 0.5 │
2723
+ # # │ 2 ┆ 4.0 │
2724
+ # # │ 4 ┆ 13.0 │
2725
+ # # │ 4 ┆ 13.0 │
2726
+ # # └─────┴──────┘
2727
+ #
2728
+ # @example
2729
+ # lf.fill_null(strategy: "zero").collect
2730
+ # # =>
2731
+ # # shape: (4, 2)
2732
+ # # ┌─────┬──────┐
2733
+ # # │ a ┆ b │
2734
+ # # │ --- ┆ --- │
2735
+ # # │ i64 ┆ f64 │
2736
+ # # ╞═════╪══════╡
2737
+ # # │ 1 ┆ 0.5 │
2738
+ # # │ 2 ┆ 4.0 │
2739
+ # # │ 0 ┆ 0.0 │
2740
+ # # │ 4 ┆ 13.0 │
2741
+ # # └─────┴──────┘
2155
2742
  def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
2156
2743
  select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
2157
2744
  end
@@ -2431,6 +3018,53 @@ module Polars
2431
3018
  # Which of the duplicate rows to keep.
2432
3019
  #
2433
3020
  # @return [LazyFrame]
3021
+ #
3022
+ # @example
3023
+ # lf = Polars::LazyFrame.new(
3024
+ # {
3025
+ # "foo" => [1, 2, 3, 1],
3026
+ # "bar" => ["a", "a", "a", "a"],
3027
+ # "ham" => ["b", "b", "b", "b"]
3028
+ # }
3029
+ # )
3030
+ # lf.unique(maintain_order: true).collect
3031
+ # # =>
3032
+ # # shape: (3, 3)
3033
+ # # ┌─────┬─────┬─────┐
3034
+ # # │ foo ┆ bar ┆ ham │
3035
+ # # │ --- ┆ --- ┆ --- │
3036
+ # # │ i64 ┆ str ┆ str │
3037
+ # # ╞═════╪═════╪═════╡
3038
+ # # │ 1 ┆ a ┆ b │
3039
+ # # │ 2 ┆ a ┆ b │
3040
+ # # │ 3 ┆ a ┆ b │
3041
+ # # └─────┴─────┴─────┘
3042
+ #
3043
+ # @example
3044
+ # lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
3045
+ # # =>
3046
+ # # shape: (1, 3)
3047
+ # # ┌─────┬─────┬─────┐
3048
+ # # │ foo ┆ bar ┆ ham │
3049
+ # # │ --- ┆ --- ┆ --- │
3050
+ # # │ i64 ┆ str ┆ str │
3051
+ # # ╞═════╪═════╪═════╡
3052
+ # # │ 1 ┆ a ┆ b │
3053
+ # # └─────┴─────┴─────┘
3054
+ #
3055
+ # @example
3056
+ # lf.unique(keep: "last", maintain_order: true).collect
3057
+ # # =>
3058
+ # # shape: (3, 3)
3059
+ # # ┌─────┬─────┬─────┐
3060
+ # # │ foo ┆ bar ┆ ham │
3061
+ # # │ --- ┆ --- ┆ --- │
3062
+ # # │ i64 ┆ str ┆ str │
3063
+ # # ╞═════╪═════╪═════╡
3064
+ # # │ 2 ┆ a ┆ b │
3065
+ # # │ 3 ┆ a ┆ b │
3066
+ # # │ 1 ┆ a ┆ b │
3067
+ # # └─────┴─────┴─────┘
2434
3068
  def unique(maintain_order: true, subset: nil, keep: "first")
2435
3069
  if !subset.nil? && !subset.is_a?(::Array)
2436
3070
  subset = [subset]
@@ -2504,7 +3138,7 @@ module Polars
2504
3138
  # "c" => [2, 4, 6]
2505
3139
  # }
2506
3140
  # )
2507
- # lf.unpivot(Polars::Selectors.numeric, index: "a").collect
3141
+ # lf.unpivot(Polars.cs.numeric, index: "a").collect
2508
3142
  # # =>
2509
3143
  # # shape: (6, 3)
2510
3144
  # # ┌─────┬──────────┬───────┐
@@ -2530,8 +3164,8 @@ module Polars
2530
3164
  warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
2531
3165
  end
2532
3166
 
2533
- on = on.nil? ? [] : Utils._expand_selectors(self, on)
2534
- index = index.nil? ? [] : Utils._expand_selectors(self, index)
3167
+ on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
3168
+ index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
2535
3169
 
2536
3170
  _from_rbldf(
2537
3171
  _ldf.unpivot(on, index, value_name, variable_name)