polars-df 0.13.0-x64-mingw-ucrt → 0.15.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE-THIRD-PARTY.txt +24818 -14217
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -3
- data/lib/polars/functions.rb +0 -57
data/lib/polars/lazy_frame.rb
CHANGED
@@ -848,9 +848,70 @@ module Polars
|
|
848
848
|
_from_rbldf(_ldf.cache)
|
849
849
|
end
|
850
850
|
|
851
|
-
#
|
852
|
-
#
|
853
|
-
#
|
851
|
+
# Cast LazyFrame column(s) to the specified dtype(s).
|
852
|
+
#
|
853
|
+
# @param dtypes [Hash]
|
854
|
+
# Mapping of column names (or selector) to dtypes, or a single dtype
|
855
|
+
# to which all columns will be cast.
|
856
|
+
# @param strict [Boolean]
|
857
|
+
# Throw an error if a cast could not be done (for instance, due to an
|
858
|
+
# overflow).
|
859
|
+
#
|
860
|
+
# @return [LazyFrame]
|
861
|
+
#
|
862
|
+
# @example Cast specific frame columns to the specified dtypes:
|
863
|
+
# lf = Polars::LazyFrame.new(
|
864
|
+
# {
|
865
|
+
# "foo" => [1, 2, 3],
|
866
|
+
# "bar" => [6.0, 7.0, 8.0],
|
867
|
+
# "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
|
868
|
+
# }
|
869
|
+
# )
|
870
|
+
# lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
|
871
|
+
# # =>
|
872
|
+
# # shape: (3, 3)
|
873
|
+
# # ┌─────┬─────┬────────────┐
|
874
|
+
# # │ foo ┆ bar ┆ ham │
|
875
|
+
# # │ --- ┆ --- ┆ --- │
|
876
|
+
# # │ f32 ┆ u8 ┆ date │
|
877
|
+
# # ╞═════╪═════╪════════════╡
|
878
|
+
# # │ 1.0 ┆ 6 ┆ 2020-01-02 │
|
879
|
+
# # │ 2.0 ┆ 7 ┆ 2021-03-04 │
|
880
|
+
# # │ 3.0 ┆ 8 ┆ 2022-05-06 │
|
881
|
+
# # └─────┴─────┴────────────┘
|
882
|
+
#
|
883
|
+
# @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
|
884
|
+
# lf.cast({Polars::Date => Polars::Datetime}).collect
|
885
|
+
# # =>
|
886
|
+
# # shape: (3, 3)
|
887
|
+
# # ┌─────┬─────┬─────────────────────┐
|
888
|
+
# # │ foo ┆ bar ┆ ham │
|
889
|
+
# # │ --- ┆ --- ┆ --- │
|
890
|
+
# # │ i64 ┆ f64 ┆ datetime[μs] │
|
891
|
+
# # ╞═════╪═════╪═════════════════════╡
|
892
|
+
# # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
|
893
|
+
# # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
|
894
|
+
# # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
|
895
|
+
# # └─────┴─────┴─────────────────────┘
|
896
|
+
#
|
897
|
+
# @example Cast all frame columns to the specified dtype:
|
898
|
+
# lf.cast(Polars::String).collect.to_h(as_series: false)
|
899
|
+
# # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
|
900
|
+
def cast(dtypes, strict: true)
|
901
|
+
if !dtypes.is_a?(Hash)
|
902
|
+
return _from_rbldf(_ldf.cast_all(dtypes, strict))
|
903
|
+
end
|
904
|
+
|
905
|
+
cast_map = {}
|
906
|
+
dtypes.each do |c, dtype|
|
907
|
+
dtype = Utils.parse_into_dtype(dtype)
|
908
|
+
cast_map.merge!(
|
909
|
+
c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
|
910
|
+
)
|
911
|
+
end
|
912
|
+
|
913
|
+
_from_rbldf(_ldf.cast(cast_map, strict))
|
914
|
+
end
|
854
915
|
|
855
916
|
# Create an empty copy of the current LazyFrame.
|
856
917
|
#
|
@@ -1520,8 +1581,197 @@ module Polars
|
|
1520
1581
|
# @param force_parallel [Boolean]
|
1521
1582
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1522
1583
|
# the join in parallel.
|
1584
|
+
# @param coalesce [Boolean]
|
1585
|
+
# Coalescing behavior (merging of join columns).
|
1586
|
+
# - true: -> Always coalesce join columns.
|
1587
|
+
# - false: -> Never coalesce join columns.
|
1588
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1523
1589
|
#
|
1524
1590
|
# @return [LazyFrame]
|
1591
|
+
#
|
1592
|
+
# @example
|
1593
|
+
# gdp = Polars::LazyFrame.new(
|
1594
|
+
# {
|
1595
|
+
# "date" => Polars.date_range(
|
1596
|
+
# Date.new(2016, 1, 1),
|
1597
|
+
# Date.new(2020, 1, 1),
|
1598
|
+
# "1y",
|
1599
|
+
# eager: true
|
1600
|
+
# ),
|
1601
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827]
|
1602
|
+
# }
|
1603
|
+
# )
|
1604
|
+
# gdp.collect
|
1605
|
+
# # =>
|
1606
|
+
# # shape: (5, 2)
|
1607
|
+
# # ┌────────────┬──────┐
|
1608
|
+
# # │ date ┆ gdp │
|
1609
|
+
# # │ --- ┆ --- │
|
1610
|
+
# # │ date ┆ i64 │
|
1611
|
+
# # ╞════════════╪══════╡
|
1612
|
+
# # │ 2016-01-01 ┆ 4164 │
|
1613
|
+
# # │ 2017-01-01 ┆ 4411 │
|
1614
|
+
# # │ 2018-01-01 ┆ 4566 │
|
1615
|
+
# # │ 2019-01-01 ┆ 4696 │
|
1616
|
+
# # │ 2020-01-01 ┆ 4827 │
|
1617
|
+
# # └────────────┴──────┘
|
1618
|
+
#
|
1619
|
+
# @example
|
1620
|
+
# population = Polars::LazyFrame.new(
|
1621
|
+
# {
|
1622
|
+
# "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
|
1623
|
+
# "population" => [82.19, 82.66, 83.12]
|
1624
|
+
# }
|
1625
|
+
# ).sort("date")
|
1626
|
+
# population.collect
|
1627
|
+
# # =>
|
1628
|
+
# # shape: (3, 2)
|
1629
|
+
# # ┌────────────┬────────────┐
|
1630
|
+
# # │ date ┆ population │
|
1631
|
+
# # │ --- ┆ --- │
|
1632
|
+
# # │ date ┆ f64 │
|
1633
|
+
# # ╞════════════╪════════════╡
|
1634
|
+
# # │ 2016-03-01 ┆ 82.19 │
|
1635
|
+
# # │ 2018-08-01 ┆ 82.66 │
|
1636
|
+
# # │ 2019-01-01 ┆ 83.12 │
|
1637
|
+
# # └────────────┴────────────┘
|
1638
|
+
#
|
1639
|
+
# @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
|
1640
|
+
# population.join_asof(gdp, on: "date", strategy: "backward").collect
|
1641
|
+
# # =>
|
1642
|
+
# # shape: (3, 3)
|
1643
|
+
# # ┌────────────┬────────────┬──────┐
|
1644
|
+
# # │ date ┆ population ┆ gdp │
|
1645
|
+
# # │ --- ┆ --- ┆ --- │
|
1646
|
+
# # │ date ┆ f64 ┆ i64 │
|
1647
|
+
# # ╞════════════╪════════════╪══════╡
|
1648
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1649
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
|
1650
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1651
|
+
# # └────────────┴────────────┴──────┘
|
1652
|
+
#
|
1653
|
+
# @example
|
1654
|
+
# population.join_asof(
|
1655
|
+
# gdp, on: "date", strategy: "backward", coalesce: false
|
1656
|
+
# ).collect
|
1657
|
+
# # =>
|
1658
|
+
# # shape: (3, 4)
|
1659
|
+
# # ┌────────────┬────────────┬────────────┬──────┐
|
1660
|
+
# # │ date ┆ population ┆ date_right ┆ gdp │
|
1661
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1662
|
+
# # │ date ┆ f64 ┆ date ┆ i64 │
|
1663
|
+
# # ╞════════════╪════════════╪════════════╪══════╡
|
1664
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
|
1665
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
|
1666
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
|
1667
|
+
# # └────────────┴────────────┴────────────┴──────┘
|
1668
|
+
#
|
1669
|
+
# @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
|
1670
|
+
# population.join_asof(gdp, on: "date", strategy: "forward").collect
|
1671
|
+
# # =>
|
1672
|
+
# # shape: (3, 3)
|
1673
|
+
# # ┌────────────┬────────────┬──────┐
|
1674
|
+
# # │ date ┆ population ┆ gdp │
|
1675
|
+
# # │ --- ┆ --- ┆ --- │
|
1676
|
+
# # │ date ┆ f64 ┆ i64 │
|
1677
|
+
# # ╞════════════╪════════════╪══════╡
|
1678
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
|
1679
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1680
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1681
|
+
# # └────────────┴────────────┴──────┘
|
1682
|
+
#
|
1683
|
+
# @example
|
1684
|
+
# population.join_asof(gdp, on: "date", strategy: "nearest").collect
|
1685
|
+
# # =>
|
1686
|
+
# # shape: (3, 3)
|
1687
|
+
# # ┌────────────┬────────────┬──────┐
|
1688
|
+
# # │ date ┆ population ┆ gdp │
|
1689
|
+
# # │ --- ┆ --- ┆ --- │
|
1690
|
+
# # │ date ┆ f64 ┆ i64 │
|
1691
|
+
# # ╞════════════╪════════════╪══════╡
|
1692
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1693
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1694
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1695
|
+
# # └────────────┴────────────┴──────┘
|
1696
|
+
#
|
1697
|
+
# @example
|
1698
|
+
# gdp_dates = Polars.date_range(
|
1699
|
+
# Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
|
1700
|
+
# )
|
1701
|
+
# gdp2 = Polars::LazyFrame.new(
|
1702
|
+
# {
|
1703
|
+
# "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
|
1704
|
+
# "date" => Polars.concat([gdp_dates, gdp_dates]),
|
1705
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
|
1706
|
+
# }
|
1707
|
+
# ).sort("country", "date")
|
1708
|
+
# gdp2.collect
|
1709
|
+
# # =>
|
1710
|
+
# # shape: (10, 3)
|
1711
|
+
# # ┌─────────────┬────────────┬──────┐
|
1712
|
+
# # │ country ┆ date ┆ gdp │
|
1713
|
+
# # │ --- ┆ --- ┆ --- │
|
1714
|
+
# # │ str ┆ date ┆ i64 │
|
1715
|
+
# # ╞═════════════╪════════════╪══════╡
|
1716
|
+
# # │ Germany ┆ 2016-01-01 ┆ 4164 │
|
1717
|
+
# # │ Germany ┆ 2017-01-01 ┆ 4411 │
|
1718
|
+
# # │ Germany ┆ 2018-01-01 ┆ 4566 │
|
1719
|
+
# # │ Germany ┆ 2019-01-01 ┆ 4696 │
|
1720
|
+
# # │ Germany ┆ 2020-01-01 ┆ 4827 │
|
1721
|
+
# # │ Netherlands ┆ 2016-01-01 ┆ 784 │
|
1722
|
+
# # │ Netherlands ┆ 2017-01-01 ┆ 833 │
|
1723
|
+
# # │ Netherlands ┆ 2018-01-01 ┆ 914 │
|
1724
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 910 │
|
1725
|
+
# # │ Netherlands ┆ 2020-01-01 ┆ 909 │
|
1726
|
+
# # └─────────────┴────────────┴──────┘
|
1727
|
+
#
|
1728
|
+
# @example
|
1729
|
+
# pop2 = Polars::LazyFrame.new(
|
1730
|
+
# {
|
1731
|
+
# "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
|
1732
|
+
# "date" => [
|
1733
|
+
# Date.new(2016, 3, 1),
|
1734
|
+
# Date.new(2018, 8, 1),
|
1735
|
+
# Date.new(2019, 1, 1),
|
1736
|
+
# Date.new(2016, 3, 1),
|
1737
|
+
# Date.new(2018, 8, 1),
|
1738
|
+
# Date.new(2019, 1, 1)
|
1739
|
+
# ],
|
1740
|
+
# "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
|
1741
|
+
# }
|
1742
|
+
# ).sort("country", "date")
|
1743
|
+
# pop2.collect
|
1744
|
+
# # =>
|
1745
|
+
# # shape: (6, 3)
|
1746
|
+
# # ┌─────────────┬────────────┬────────────┐
|
1747
|
+
# # │ country ┆ date ┆ population │
|
1748
|
+
# # │ --- ┆ --- ┆ --- │
|
1749
|
+
# # │ str ┆ date ┆ f64 │
|
1750
|
+
# # ╞═════════════╪════════════╪════════════╡
|
1751
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 │
|
1752
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 │
|
1753
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 │
|
1754
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
|
1755
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
|
1756
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
|
1757
|
+
# # └─────────────┴────────────┴────────────┘
|
1758
|
+
#
|
1759
|
+
# @example
|
1760
|
+
# pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
|
1761
|
+
# # =>
|
1762
|
+
# # shape: (6, 4)
|
1763
|
+
# # ┌─────────────┬────────────┬────────────┬──────┐
|
1764
|
+
# # │ country ┆ date ┆ population ┆ gdp │
|
1765
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1766
|
+
# # │ str ┆ date ┆ f64 ┆ i64 │
|
1767
|
+
# # ╞═════════════╪════════════╪════════════╪══════╡
|
1768
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1769
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1770
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1771
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
|
1772
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
|
1773
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
|
1774
|
+
# # └─────────────┴────────────┴────────────┴──────┘
|
1525
1775
|
def join_asof(
|
1526
1776
|
other,
|
1527
1777
|
left_on: nil,
|
@@ -1534,7 +1784,8 @@ module Polars
|
|
1534
1784
|
suffix: "_right",
|
1535
1785
|
tolerance: nil,
|
1536
1786
|
allow_parallel: true,
|
1537
|
-
force_parallel: false
|
1787
|
+
force_parallel: false,
|
1788
|
+
coalesce: true
|
1538
1789
|
)
|
1539
1790
|
if !other.is_a?(LazyFrame)
|
1540
1791
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1589,7 +1840,8 @@ module Polars
|
|
1589
1840
|
suffix,
|
1590
1841
|
strategy,
|
1591
1842
|
tolerance_num,
|
1592
|
-
tolerance_str
|
1843
|
+
tolerance_str,
|
1844
|
+
coalesce
|
1593
1845
|
)
|
1594
1846
|
)
|
1595
1847
|
end
|
@@ -1609,6 +1861,12 @@ module Polars
|
|
1609
1861
|
# Join strategy.
|
1610
1862
|
# @param suffix [String]
|
1611
1863
|
# Suffix to append to columns with a duplicate name.
|
1864
|
+
# @param validate ['m:m', 'm:1', '1:m', '1:1']
|
1865
|
+
# Checks if join is of specified type.
|
1866
|
+
# * *many_to_many* - “m:m”: default, does not result in checks
|
1867
|
+
# * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
|
1868
|
+
# * *one_to_many* - “1:m”: check if join keys are unique in left dataset
|
1869
|
+
# * *many_to_one* - “m:1”: check if join keys are unique in right dataset
|
1612
1870
|
# @param join_nulls [Boolean]
|
1613
1871
|
# Join on null values. By default null values will never produce matches.
|
1614
1872
|
# @param allow_parallel [Boolean]
|
@@ -1617,6 +1875,12 @@ module Polars
|
|
1617
1875
|
# @param force_parallel [Boolean]
|
1618
1876
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1619
1877
|
# the join in parallel.
|
1878
|
+
# @param coalesce [Boolean]
|
1879
|
+
# Coalescing behavior (merging of join columns).
|
1880
|
+
# - nil: -> join specific.
|
1881
|
+
# - true: -> Always coalesce join columns.
|
1882
|
+
# - false: -> Never coalesce join columns.
|
1883
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1620
1884
|
#
|
1621
1885
|
# @return [LazyFrame]
|
1622
1886
|
#
|
@@ -1706,9 +1970,11 @@ module Polars
|
|
1706
1970
|
on: nil,
|
1707
1971
|
how: "inner",
|
1708
1972
|
suffix: "_right",
|
1973
|
+
validate: "m:m",
|
1709
1974
|
join_nulls: false,
|
1710
1975
|
allow_parallel: true,
|
1711
|
-
force_parallel: false
|
1976
|
+
force_parallel: false,
|
1977
|
+
coalesce: nil
|
1712
1978
|
)
|
1713
1979
|
if !other.is_a?(LazyFrame)
|
1714
1980
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1719,7 +1985,7 @@ module Polars
|
|
1719
1985
|
elsif how == "cross"
|
1720
1986
|
return _from_rbldf(
|
1721
1987
|
_ldf.join(
|
1722
|
-
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
1988
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
|
1723
1989
|
)
|
1724
1990
|
)
|
1725
1991
|
end
|
@@ -1745,6 +2011,8 @@ module Polars
|
|
1745
2011
|
join_nulls,
|
1746
2012
|
how,
|
1747
2013
|
suffix,
|
2014
|
+
validate,
|
2015
|
+
coalesce
|
1748
2016
|
)
|
1749
2017
|
)
|
1750
2018
|
end
|
@@ -1879,6 +2147,55 @@ module Polars
|
|
1879
2147
|
# - List of column names.
|
1880
2148
|
#
|
1881
2149
|
# @return [LazyFrame]
|
2150
|
+
#
|
2151
|
+
# @example Drop a single column by passing the name of that column.
|
2152
|
+
# lf = Polars::LazyFrame.new(
|
2153
|
+
# {
|
2154
|
+
# "foo" => [1, 2, 3],
|
2155
|
+
# "bar" => [6.0, 7.0, 8.0],
|
2156
|
+
# "ham" => ["a", "b", "c"]
|
2157
|
+
# }
|
2158
|
+
# )
|
2159
|
+
# lf.drop("ham").collect
|
2160
|
+
# # =>
|
2161
|
+
# # shape: (3, 2)
|
2162
|
+
# # ┌─────┬─────┐
|
2163
|
+
# # │ foo ┆ bar │
|
2164
|
+
# # │ --- ┆ --- │
|
2165
|
+
# # │ i64 ┆ f64 │
|
2166
|
+
# # ╞═════╪═════╡
|
2167
|
+
# # │ 1 ┆ 6.0 │
|
2168
|
+
# # │ 2 ┆ 7.0 │
|
2169
|
+
# # │ 3 ┆ 8.0 │
|
2170
|
+
# # └─────┴─────┘
|
2171
|
+
#
|
2172
|
+
# @example Drop multiple columns by passing a selector.
|
2173
|
+
# lf.drop(Polars.cs.numeric).collect
|
2174
|
+
# # =>
|
2175
|
+
# # shape: (3, 1)
|
2176
|
+
# # ┌─────┐
|
2177
|
+
# # │ ham │
|
2178
|
+
# # │ --- │
|
2179
|
+
# # │ str │
|
2180
|
+
# # ╞═════╡
|
2181
|
+
# # │ a │
|
2182
|
+
# # │ b │
|
2183
|
+
# # │ c │
|
2184
|
+
# # └─────┘
|
2185
|
+
#
|
2186
|
+
# @example Use positional arguments to drop multiple columns.
|
2187
|
+
# lf.drop("foo", "ham").collect
|
2188
|
+
# # =>
|
2189
|
+
# # shape: (3, 1)
|
2190
|
+
# # ┌─────┐
|
2191
|
+
# # │ bar │
|
2192
|
+
# # │ --- │
|
2193
|
+
# # │ f64 │
|
2194
|
+
# # ╞═════╡
|
2195
|
+
# # │ 6.0 │
|
2196
|
+
# # │ 7.0 │
|
2197
|
+
# # │ 8.0 │
|
2198
|
+
# # └─────┘
|
1882
2199
|
def drop(*columns)
|
1883
2200
|
drop_cols = Utils._expand_selectors(self, *columns)
|
1884
2201
|
_from_rbldf(_ldf.drop(drop_cols))
|
@@ -1888,17 +2205,80 @@ module Polars
|
|
1888
2205
|
#
|
1889
2206
|
# @param mapping [Hash]
|
1890
2207
|
# Key value pairs that map from old name to new name.
|
2208
|
+
# @param strict [Boolean]
|
2209
|
+
# Validate that all column names exist in the current schema,
|
2210
|
+
# and throw an exception if any do not. (Note that this parameter
|
2211
|
+
# is a no-op when passing a function to `mapping`).
|
1891
2212
|
#
|
1892
2213
|
# @return [LazyFrame]
|
1893
|
-
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
2214
|
+
#
|
2215
|
+
# @example
|
2216
|
+
# lf = Polars::LazyFrame.new(
|
2217
|
+
# {
|
2218
|
+
# "foo" => [1, 2, 3],
|
2219
|
+
# "bar" => [6, 7, 8],
|
2220
|
+
# "ham" => ["a", "b", "c"]
|
2221
|
+
# }
|
2222
|
+
# )
|
2223
|
+
# lf.rename({"foo" => "apple"}).collect
|
2224
|
+
# # =>
|
2225
|
+
# # shape: (3, 3)
|
2226
|
+
# # ┌───────┬─────┬─────┐
|
2227
|
+
# # │ apple ┆ bar ┆ ham │
|
2228
|
+
# # │ --- ┆ --- ┆ --- │
|
2229
|
+
# # │ i64 ┆ i64 ┆ str │
|
2230
|
+
# # ╞═══════╪═════╪═════╡
|
2231
|
+
# # │ 1 ┆ 6 ┆ a │
|
2232
|
+
# # │ 2 ┆ 7 ┆ b │
|
2233
|
+
# # │ 3 ┆ 8 ┆ c │
|
2234
|
+
# # └───────┴─────┴─────┘
|
2235
|
+
#
|
2236
|
+
# @example
|
2237
|
+
# lf.rename(->(column_name) { "c" + column_name[1..] }).collect
|
2238
|
+
# # =>
|
2239
|
+
# # shape: (3, 3)
|
2240
|
+
# # ┌─────┬─────┬─────┐
|
2241
|
+
# # │ coo ┆ car ┆ cam │
|
2242
|
+
# # │ --- ┆ --- ┆ --- │
|
2243
|
+
# # │ i64 ┆ i64 ┆ str │
|
2244
|
+
# # ╞═════╪═════╪═════╡
|
2245
|
+
# # │ 1 ┆ 6 ┆ a │
|
2246
|
+
# # │ 2 ┆ 7 ┆ b │
|
2247
|
+
# # │ 3 ┆ 8 ┆ c │
|
2248
|
+
# # └─────┴─────┴─────┘
|
2249
|
+
def rename(mapping, strict: true)
|
2250
|
+
if mapping.respond_to?(:call)
|
2251
|
+
select(F.all.name.map(&mapping))
|
2252
|
+
else
|
2253
|
+
existing = mapping.keys
|
2254
|
+
_new = mapping.values
|
2255
|
+
_from_rbldf(_ldf.rename(existing, _new, strict))
|
2256
|
+
end
|
1897
2257
|
end
|
1898
2258
|
|
1899
2259
|
# Reverse the DataFrame.
|
1900
2260
|
#
|
1901
2261
|
# @return [LazyFrame]
|
2262
|
+
#
|
2263
|
+
# @example
|
2264
|
+
# lf = Polars::LazyFrame.new(
|
2265
|
+
# {
|
2266
|
+
# "key" => ["a", "b", "c"],
|
2267
|
+
# "val" => [1, 2, 3]
|
2268
|
+
# }
|
2269
|
+
# )
|
2270
|
+
# lf.reverse.collect
|
2271
|
+
# # =>
|
2272
|
+
# # shape: (3, 2)
|
2273
|
+
# # ┌─────┬─────┐
|
2274
|
+
# # │ key ┆ val │
|
2275
|
+
# # │ --- ┆ --- │
|
2276
|
+
# # │ str ┆ i64 │
|
2277
|
+
# # ╞═════╪═════╡
|
2278
|
+
# # │ c ┆ 3 │
|
2279
|
+
# # │ b ┆ 2 │
|
2280
|
+
# # │ a ┆ 1 │
|
2281
|
+
# # └─────┴─────┘
|
1902
2282
|
def reverse
|
1903
2283
|
_from_rbldf(_ldf.reverse)
|
1904
2284
|
end
|
@@ -2048,8 +2428,43 @@ module Polars
|
|
2048
2428
|
# Consider using the {#fetch} operation if you only want to test your
|
2049
2429
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2050
2430
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2431
|
+
#
|
2432
|
+
# @example
|
2433
|
+
# lf = Polars::LazyFrame.new(
|
2434
|
+
# {
|
2435
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2436
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2437
|
+
# }
|
2438
|
+
# )
|
2439
|
+
# lf.limit.collect
|
2440
|
+
# # =>
|
2441
|
+
# # shape: (5, 2)
|
2442
|
+
# # ┌─────┬─────┐
|
2443
|
+
# # │ a ┆ b │
|
2444
|
+
# # │ --- ┆ --- │
|
2445
|
+
# # │ i64 ┆ i64 │
|
2446
|
+
# # ╞═════╪═════╡
|
2447
|
+
# # │ 1 ┆ 7 │
|
2448
|
+
# # │ 2 ┆ 8 │
|
2449
|
+
# # │ 3 ┆ 9 │
|
2450
|
+
# # │ 4 ┆ 10 │
|
2451
|
+
# # │ 5 ┆ 11 │
|
2452
|
+
# # └─────┴─────┘
|
2453
|
+
#
|
2454
|
+
# @example
|
2455
|
+
# lf.limit(2).collect
|
2456
|
+
# # =>
|
2457
|
+
# # shape: (2, 2)
|
2458
|
+
# # ┌─────┬─────┐
|
2459
|
+
# # │ a ┆ b │
|
2460
|
+
# # │ --- ┆ --- │
|
2461
|
+
# # │ i64 ┆ i64 │
|
2462
|
+
# # ╞═════╪═════╡
|
2463
|
+
# # │ 1 ┆ 7 │
|
2464
|
+
# # │ 2 ┆ 8 │
|
2465
|
+
# # └─────┴─────┘
|
2051
2466
|
def limit(n = 5)
|
2052
|
-
head(
|
2467
|
+
head(n)
|
2053
2468
|
end
|
2054
2469
|
|
2055
2470
|
# Get the first `n` rows.
|
@@ -2063,6 +2478,41 @@ module Polars
|
|
2063
2478
|
# Consider using the {#fetch} operation if you only want to test your
|
2064
2479
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2065
2480
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2481
|
+
#
|
2482
|
+
# @example
|
2483
|
+
# lf = Polars::LazyFrame.new(
|
2484
|
+
# {
|
2485
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2486
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2487
|
+
# }
|
2488
|
+
# )
|
2489
|
+
# lf.head.collect
|
2490
|
+
# # =>
|
2491
|
+
# # shape: (5, 2)
|
2492
|
+
# # ┌─────┬─────┐
|
2493
|
+
# # │ a ┆ b │
|
2494
|
+
# # │ --- ┆ --- │
|
2495
|
+
# # │ i64 ┆ i64 │
|
2496
|
+
# # ╞═════╪═════╡
|
2497
|
+
# # │ 1 ┆ 7 │
|
2498
|
+
# # │ 2 ┆ 8 │
|
2499
|
+
# # │ 3 ┆ 9 │
|
2500
|
+
# # │ 4 ┆ 10 │
|
2501
|
+
# # │ 5 ┆ 11 │
|
2502
|
+
# # └─────┴─────┘
|
2503
|
+
#
|
2504
|
+
# @example
|
2505
|
+
# lf.head(2).collect
|
2506
|
+
# # =>
|
2507
|
+
# # shape: (2, 2)
|
2508
|
+
# # ┌─────┬─────┐
|
2509
|
+
# # │ a ┆ b │
|
2510
|
+
# # │ --- ┆ --- │
|
2511
|
+
# # │ i64 ┆ i64 │
|
2512
|
+
# # ╞═════╪═════╡
|
2513
|
+
# # │ 1 ┆ 7 │
|
2514
|
+
# # │ 2 ┆ 8 │
|
2515
|
+
# # └─────┴─────┘
|
2066
2516
|
def head(n = 5)
|
2067
2517
|
slice(0, n)
|
2068
2518
|
end
|
@@ -2073,6 +2523,41 @@ module Polars
|
|
2073
2523
|
# Number of rows.
|
2074
2524
|
#
|
2075
2525
|
# @return [LazyFrame]
|
2526
|
+
#
|
2527
|
+
# @example
|
2528
|
+
# lf = Polars::LazyFrame.new(
|
2529
|
+
# {
|
2530
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2531
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2532
|
+
# }
|
2533
|
+
# )
|
2534
|
+
# lf.tail.collect
|
2535
|
+
# # =>
|
2536
|
+
# # shape: (5, 2)
|
2537
|
+
# # ┌─────┬─────┐
|
2538
|
+
# # │ a ┆ b │
|
2539
|
+
# # │ --- ┆ --- │
|
2540
|
+
# # │ i64 ┆ i64 │
|
2541
|
+
# # ╞═════╪═════╡
|
2542
|
+
# # │ 2 ┆ 8 │
|
2543
|
+
# # │ 3 ┆ 9 │
|
2544
|
+
# # │ 4 ┆ 10 │
|
2545
|
+
# # │ 5 ┆ 11 │
|
2546
|
+
# # │ 6 ┆ 12 │
|
2547
|
+
# # └─────┴─────┘
|
2548
|
+
#
|
2549
|
+
# @example
|
2550
|
+
# lf.tail(2).collect
|
2551
|
+
# # =>
|
2552
|
+
# # shape: (2, 2)
|
2553
|
+
# # ┌─────┬─────┐
|
2554
|
+
# # │ a ┆ b │
|
2555
|
+
# # │ --- ┆ --- │
|
2556
|
+
# # │ i64 ┆ i64 │
|
2557
|
+
# # ╞═════╪═════╡
|
2558
|
+
# # │ 5 ┆ 11 │
|
2559
|
+
# # │ 6 ┆ 12 │
|
2560
|
+
# # └─────┴─────┘
|
2076
2561
|
def tail(n = 5)
|
2077
2562
|
_from_rbldf(_ldf.tail(n))
|
2078
2563
|
end
|
@@ -2080,6 +2565,24 @@ module Polars
|
|
2080
2565
|
# Get the last row of the DataFrame.
|
2081
2566
|
#
|
2082
2567
|
# @return [LazyFrame]
|
2568
|
+
#
|
2569
|
+
# @example
|
2570
|
+
# lf = Polars::LazyFrame.new(
|
2571
|
+
# {
|
2572
|
+
# "a" => [1, 5, 3],
|
2573
|
+
# "b" => [2, 4, 6]
|
2574
|
+
# }
|
2575
|
+
# )
|
2576
|
+
# lf.last.collect
|
2577
|
+
# # =>
|
2578
|
+
# # shape: (1, 2)
|
2579
|
+
# # ┌─────┬─────┐
|
2580
|
+
# # │ a ┆ b │
|
2581
|
+
# # │ --- ┆ --- │
|
2582
|
+
# # │ i64 ┆ i64 │
|
2583
|
+
# # ╞═════╪═════╡
|
2584
|
+
# # │ 3 ┆ 6 │
|
2585
|
+
# # └─────┴─────┘
|
2083
2586
|
def last
|
2084
2587
|
tail(1)
|
2085
2588
|
end
|
@@ -2087,6 +2590,24 @@ module Polars
|
|
2087
2590
|
# Get the first row of the DataFrame.
|
2088
2591
|
#
|
2089
2592
|
# @return [LazyFrame]
|
2593
|
+
#
|
2594
|
+
# @example
|
2595
|
+
# lf = Polars::LazyFrame.new(
|
2596
|
+
# {
|
2597
|
+
# "a" => [1, 5, 3],
|
2598
|
+
# "b" => [2, 4, 6]
|
2599
|
+
# }
|
2600
|
+
# )
|
2601
|
+
# lf.first.collect
|
2602
|
+
# # =>
|
2603
|
+
# # shape: (1, 2)
|
2604
|
+
# # ┌─────┬─────┐
|
2605
|
+
# # │ a ┆ b │
|
2606
|
+
# # │ --- ┆ --- │
|
2607
|
+
# # │ i64 ┆ i64 │
|
2608
|
+
# # ╞═════╪═════╡
|
2609
|
+
# # │ 1 ┆ 2 │
|
2610
|
+
# # └─────┴─────┘
|
2090
2611
|
def first
|
2091
2612
|
slice(0, 1)
|
2092
2613
|
end
|
@@ -2152,6 +2673,72 @@ module Polars
|
|
2152
2673
|
# Fill null values using the specified value or strategy.
|
2153
2674
|
#
|
2154
2675
|
# @return [LazyFrame]
|
2676
|
+
#
|
2677
|
+
# @example
|
2678
|
+
# lf = Polars::LazyFrame.new(
|
2679
|
+
# {
|
2680
|
+
# "a" => [1, 2, nil, 4],
|
2681
|
+
# "b" => [0.5, 4, nil, 13]
|
2682
|
+
# }
|
2683
|
+
# )
|
2684
|
+
# lf.fill_null(99).collect
|
2685
|
+
# # =>
|
2686
|
+
# # shape: (4, 2)
|
2687
|
+
# # ┌─────┬──────┐
|
2688
|
+
# # │ a ┆ b │
|
2689
|
+
# # │ --- ┆ --- │
|
2690
|
+
# # │ i64 ┆ f64 │
|
2691
|
+
# # ╞═════╪══════╡
|
2692
|
+
# # │ 1 ┆ 0.5 │
|
2693
|
+
# # │ 2 ┆ 4.0 │
|
2694
|
+
# # │ 99 ┆ 99.0 │
|
2695
|
+
# # │ 4 ┆ 13.0 │
|
2696
|
+
# # └─────┴──────┘
|
2697
|
+
#
|
2698
|
+
# @example
|
2699
|
+
# lf.fill_null(strategy: "forward").collect
|
2700
|
+
# # =>
|
2701
|
+
# # shape: (4, 2)
|
2702
|
+
# # ┌─────┬──────┐
|
2703
|
+
# # │ a ┆ b │
|
2704
|
+
# # │ --- ┆ --- │
|
2705
|
+
# # │ i64 ┆ f64 │
|
2706
|
+
# # ╞═════╪══════╡
|
2707
|
+
# # │ 1 ┆ 0.5 │
|
2708
|
+
# # │ 2 ┆ 4.0 │
|
2709
|
+
# # │ 2 ┆ 4.0 │
|
2710
|
+
# # │ 4 ┆ 13.0 │
|
2711
|
+
# # └─────┴──────┘
|
2712
|
+
#
|
2713
|
+
# @example
|
2714
|
+
# lf.fill_null(strategy: "max").collect
|
2715
|
+
# # =>
|
2716
|
+
# # shape: (4, 2)
|
2717
|
+
# # ┌─────┬──────┐
|
2718
|
+
# # │ a ┆ b │
|
2719
|
+
# # │ --- ┆ --- │
|
2720
|
+
# # │ i64 ┆ f64 │
|
2721
|
+
# # ╞═════╪══════╡
|
2722
|
+
# # │ 1 ┆ 0.5 │
|
2723
|
+
# # │ 2 ┆ 4.0 │
|
2724
|
+
# # │ 4 ┆ 13.0 │
|
2725
|
+
# # │ 4 ┆ 13.0 │
|
2726
|
+
# # └─────┴──────┘
|
2727
|
+
#
|
2728
|
+
# @example
|
2729
|
+
# lf.fill_null(strategy: "zero").collect
|
2730
|
+
# # =>
|
2731
|
+
# # shape: (4, 2)
|
2732
|
+
# # ┌─────┬──────┐
|
2733
|
+
# # │ a ┆ b │
|
2734
|
+
# # │ --- ┆ --- │
|
2735
|
+
# # │ i64 ┆ f64 │
|
2736
|
+
# # ╞═════╪══════╡
|
2737
|
+
# # │ 1 ┆ 0.5 │
|
2738
|
+
# # │ 2 ┆ 4.0 │
|
2739
|
+
# # │ 0 ┆ 0.0 │
|
2740
|
+
# # │ 4 ┆ 13.0 │
|
2741
|
+
# # └─────┴──────┘
|
2155
2742
|
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
|
2156
2743
|
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
2157
2744
|
end
|
@@ -2431,6 +3018,53 @@ module Polars
|
|
2431
3018
|
# Which of the duplicate rows to keep.
|
2432
3019
|
#
|
2433
3020
|
# @return [LazyFrame]
|
3021
|
+
#
|
3022
|
+
# @example
|
3023
|
+
# lf = Polars::LazyFrame.new(
|
3024
|
+
# {
|
3025
|
+
# "foo" => [1, 2, 3, 1],
|
3026
|
+
# "bar" => ["a", "a", "a", "a"],
|
3027
|
+
# "ham" => ["b", "b", "b", "b"]
|
3028
|
+
# }
|
3029
|
+
# )
|
3030
|
+
# lf.unique(maintain_order: true).collect
|
3031
|
+
# # =>
|
3032
|
+
# # shape: (3, 3)
|
3033
|
+
# # ┌─────┬─────┬─────┐
|
3034
|
+
# # │ foo ┆ bar ┆ ham │
|
3035
|
+
# # │ --- ┆ --- ┆ --- │
|
3036
|
+
# # │ i64 ┆ str ┆ str │
|
3037
|
+
# # ╞═════╪═════╪═════╡
|
3038
|
+
# # │ 1 ┆ a ┆ b │
|
3039
|
+
# # │ 2 ┆ a ┆ b │
|
3040
|
+
# # │ 3 ┆ a ┆ b │
|
3041
|
+
# # └─────┴─────┴─────┘
|
3042
|
+
#
|
3043
|
+
# @example
|
3044
|
+
# lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
|
3045
|
+
# # =>
|
3046
|
+
# # shape: (1, 3)
|
3047
|
+
# # ┌─────┬─────┬─────┐
|
3048
|
+
# # │ foo ┆ bar ┆ ham │
|
3049
|
+
# # │ --- ┆ --- ┆ --- │
|
3050
|
+
# # │ i64 ┆ str ┆ str │
|
3051
|
+
# # ╞═════╪═════╪═════╡
|
3052
|
+
# # │ 1 ┆ a ┆ b │
|
3053
|
+
# # └─────┴─────┴─────┘
|
3054
|
+
#
|
3055
|
+
# @example
|
3056
|
+
# lf.unique(keep: "last", maintain_order: true).collect
|
3057
|
+
# # =>
|
3058
|
+
# # shape: (3, 3)
|
3059
|
+
# # ┌─────┬─────┬─────┐
|
3060
|
+
# # │ foo ┆ bar ┆ ham │
|
3061
|
+
# # │ --- ┆ --- ┆ --- │
|
3062
|
+
# # │ i64 ┆ str ┆ str │
|
3063
|
+
# # ╞═════╪═════╪═════╡
|
3064
|
+
# # │ 2 ┆ a ┆ b │
|
3065
|
+
# # │ 3 ┆ a ┆ b │
|
3066
|
+
# # │ 1 ┆ a ┆ b │
|
3067
|
+
# # └─────┴─────┴─────┘
|
2434
3068
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2435
3069
|
if !subset.nil? && !subset.is_a?(::Array)
|
2436
3070
|
subset = [subset]
|
@@ -2504,7 +3138,7 @@ module Polars
|
|
2504
3138
|
# "c" => [2, 4, 6]
|
2505
3139
|
# }
|
2506
3140
|
# )
|
2507
|
-
# lf.unpivot(Polars
|
3141
|
+
# lf.unpivot(Polars.cs.numeric, index: "a").collect
|
2508
3142
|
# # =>
|
2509
3143
|
# # shape: (6, 3)
|
2510
3144
|
# # ┌─────┬──────────┬───────┐
|
@@ -2530,8 +3164,8 @@ module Polars
|
|
2530
3164
|
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2531
3165
|
end
|
2532
3166
|
|
2533
|
-
on = on.nil? ? [] : Utils.
|
2534
|
-
index = index.nil? ? [] : Utils.
|
3167
|
+
on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
|
3168
|
+
index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
|
2535
3169
|
|
2536
3170
|
_from_rbldf(
|
2537
3171
|
_ldf.unpivot(on, index, value_name, variable_name)
|