polars-df 0.13.0-x86_64-linux → 0.15.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE-THIRD-PARTY.txt +24801 -13447
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -3
- data/lib/polars/functions.rb +0 -57
data/lib/polars/lazy_frame.rb
CHANGED
@@ -848,9 +848,70 @@ module Polars
|
|
848
848
|
_from_rbldf(_ldf.cache)
|
849
849
|
end
|
850
850
|
|
851
|
-
#
|
852
|
-
#
|
853
|
-
#
|
851
|
+
# Cast LazyFrame column(s) to the specified dtype(s).
|
852
|
+
#
|
853
|
+
# @param dtypes [Hash]
|
854
|
+
# Mapping of column names (or selector) to dtypes, or a single dtype
|
855
|
+
# to which all columns will be cast.
|
856
|
+
# @param strict [Boolean]
|
857
|
+
# Throw an error if a cast could not be done (for instance, due to an
|
858
|
+
# overflow).
|
859
|
+
#
|
860
|
+
# @return [LazyFrame]
|
861
|
+
#
|
862
|
+
# @example Cast specific frame columns to the specified dtypes:
|
863
|
+
# lf = Polars::LazyFrame.new(
|
864
|
+
# {
|
865
|
+
# "foo" => [1, 2, 3],
|
866
|
+
# "bar" => [6.0, 7.0, 8.0],
|
867
|
+
# "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
|
868
|
+
# }
|
869
|
+
# )
|
870
|
+
# lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
|
871
|
+
# # =>
|
872
|
+
# # shape: (3, 3)
|
873
|
+
# # ┌─────┬─────┬────────────┐
|
874
|
+
# # │ foo ┆ bar ┆ ham │
|
875
|
+
# # │ --- ┆ --- ┆ --- │
|
876
|
+
# # │ f32 ┆ u8 ┆ date │
|
877
|
+
# # ╞═════╪═════╪════════════╡
|
878
|
+
# # │ 1.0 ┆ 6 ┆ 2020-01-02 │
|
879
|
+
# # │ 2.0 ┆ 7 ┆ 2021-03-04 │
|
880
|
+
# # │ 3.0 ┆ 8 ┆ 2022-05-06 │
|
881
|
+
# # └─────┴─────┴────────────┘
|
882
|
+
#
|
883
|
+
# @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
|
884
|
+
# lf.cast({Polars::Date => Polars::Datetime}).collect
|
885
|
+
# # =>
|
886
|
+
# # shape: (3, 3)
|
887
|
+
# # ┌─────┬─────┬─────────────────────┐
|
888
|
+
# # │ foo ┆ bar ┆ ham │
|
889
|
+
# # │ --- ┆ --- ┆ --- │
|
890
|
+
# # │ i64 ┆ f64 ┆ datetime[μs] │
|
891
|
+
# # ╞═════╪═════╪═════════════════════╡
|
892
|
+
# # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
|
893
|
+
# # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
|
894
|
+
# # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
|
895
|
+
# # └─────┴─────┴─────────────────────┘
|
896
|
+
#
|
897
|
+
# @example Cast all frame columns to the specified dtype:
|
898
|
+
# lf.cast(Polars::String).collect.to_h(as_series: false)
|
899
|
+
# # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
|
900
|
+
def cast(dtypes, strict: true)
|
901
|
+
if !dtypes.is_a?(Hash)
|
902
|
+
return _from_rbldf(_ldf.cast_all(dtypes, strict))
|
903
|
+
end
|
904
|
+
|
905
|
+
cast_map = {}
|
906
|
+
dtypes.each do |c, dtype|
|
907
|
+
dtype = Utils.parse_into_dtype(dtype)
|
908
|
+
cast_map.merge!(
|
909
|
+
c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
|
910
|
+
)
|
911
|
+
end
|
912
|
+
|
913
|
+
_from_rbldf(_ldf.cast(cast_map, strict))
|
914
|
+
end
|
854
915
|
|
855
916
|
# Create an empty copy of the current LazyFrame.
|
856
917
|
#
|
@@ -1520,8 +1581,197 @@ module Polars
|
|
1520
1581
|
# @param force_parallel [Boolean]
|
1521
1582
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1522
1583
|
# the join in parallel.
|
1584
|
+
# @param coalesce [Boolean]
|
1585
|
+
# Coalescing behavior (merging of join columns).
|
1586
|
+
# - true: -> Always coalesce join columns.
|
1587
|
+
# - false: -> Never coalesce join columns.
|
1588
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1523
1589
|
#
|
1524
1590
|
# @return [LazyFrame]
|
1591
|
+
#
|
1592
|
+
# @example
|
1593
|
+
# gdp = Polars::LazyFrame.new(
|
1594
|
+
# {
|
1595
|
+
# "date" => Polars.date_range(
|
1596
|
+
# Date.new(2016, 1, 1),
|
1597
|
+
# Date.new(2020, 1, 1),
|
1598
|
+
# "1y",
|
1599
|
+
# eager: true
|
1600
|
+
# ),
|
1601
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827]
|
1602
|
+
# }
|
1603
|
+
# )
|
1604
|
+
# gdp.collect
|
1605
|
+
# # =>
|
1606
|
+
# # shape: (5, 2)
|
1607
|
+
# # ┌────────────┬──────┐
|
1608
|
+
# # │ date ┆ gdp │
|
1609
|
+
# # │ --- ┆ --- │
|
1610
|
+
# # │ date ┆ i64 │
|
1611
|
+
# # ╞════════════╪══════╡
|
1612
|
+
# # │ 2016-01-01 ┆ 4164 │
|
1613
|
+
# # │ 2017-01-01 ┆ 4411 │
|
1614
|
+
# # │ 2018-01-01 ┆ 4566 │
|
1615
|
+
# # │ 2019-01-01 ┆ 4696 │
|
1616
|
+
# # │ 2020-01-01 ┆ 4827 │
|
1617
|
+
# # └────────────┴──────┘
|
1618
|
+
#
|
1619
|
+
# @example
|
1620
|
+
# population = Polars::LazyFrame.new(
|
1621
|
+
# {
|
1622
|
+
# "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
|
1623
|
+
# "population" => [82.19, 82.66, 83.12]
|
1624
|
+
# }
|
1625
|
+
# ).sort("date")
|
1626
|
+
# population.collect
|
1627
|
+
# # =>
|
1628
|
+
# # shape: (3, 2)
|
1629
|
+
# # ┌────────────┬────────────┐
|
1630
|
+
# # │ date ┆ population │
|
1631
|
+
# # │ --- ┆ --- │
|
1632
|
+
# # │ date ┆ f64 │
|
1633
|
+
# # ╞════════════╪════════════╡
|
1634
|
+
# # │ 2016-03-01 ┆ 82.19 │
|
1635
|
+
# # │ 2018-08-01 ┆ 82.66 │
|
1636
|
+
# # │ 2019-01-01 ┆ 83.12 │
|
1637
|
+
# # └────────────┴────────────┘
|
1638
|
+
#
|
1639
|
+
# @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
|
1640
|
+
# population.join_asof(gdp, on: "date", strategy: "backward").collect
|
1641
|
+
# # =>
|
1642
|
+
# # shape: (3, 3)
|
1643
|
+
# # ┌────────────┬────────────┬──────┐
|
1644
|
+
# # │ date ┆ population ┆ gdp │
|
1645
|
+
# # │ --- ┆ --- ┆ --- │
|
1646
|
+
# # │ date ┆ f64 ┆ i64 │
|
1647
|
+
# # ╞════════════╪════════════╪══════╡
|
1648
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1649
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
|
1650
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1651
|
+
# # └────────────┴────────────┴──────┘
|
1652
|
+
#
|
1653
|
+
# @example
|
1654
|
+
# population.join_asof(
|
1655
|
+
# gdp, on: "date", strategy: "backward", coalesce: false
|
1656
|
+
# ).collect
|
1657
|
+
# # =>
|
1658
|
+
# # shape: (3, 4)
|
1659
|
+
# # ┌────────────┬────────────┬────────────┬──────┐
|
1660
|
+
# # │ date ┆ population ┆ date_right ┆ gdp │
|
1661
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1662
|
+
# # │ date ┆ f64 ┆ date ┆ i64 │
|
1663
|
+
# # ╞════════════╪════════════╪════════════╪══════╡
|
1664
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
|
1665
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
|
1666
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
|
1667
|
+
# # └────────────┴────────────┴────────────┴──────┘
|
1668
|
+
#
|
1669
|
+
# @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
|
1670
|
+
# population.join_asof(gdp, on: "date", strategy: "forward").collect
|
1671
|
+
# # =>
|
1672
|
+
# # shape: (3, 3)
|
1673
|
+
# # ┌────────────┬────────────┬──────┐
|
1674
|
+
# # │ date ┆ population ┆ gdp │
|
1675
|
+
# # │ --- ┆ --- ┆ --- │
|
1676
|
+
# # │ date ┆ f64 ┆ i64 │
|
1677
|
+
# # ╞════════════╪════════════╪══════╡
|
1678
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
|
1679
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1680
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1681
|
+
# # └────────────┴────────────┴──────┘
|
1682
|
+
#
|
1683
|
+
# @example
|
1684
|
+
# population.join_asof(gdp, on: "date", strategy: "nearest").collect
|
1685
|
+
# # =>
|
1686
|
+
# # shape: (3, 3)
|
1687
|
+
# # ┌────────────┬────────────┬──────┐
|
1688
|
+
# # │ date ┆ population ┆ gdp │
|
1689
|
+
# # │ --- ┆ --- ┆ --- │
|
1690
|
+
# # │ date ┆ f64 ┆ i64 │
|
1691
|
+
# # ╞════════════╪════════════╪══════╡
|
1692
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1693
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1694
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1695
|
+
# # └────────────┴────────────┴──────┘
|
1696
|
+
#
|
1697
|
+
# @example
|
1698
|
+
# gdp_dates = Polars.date_range(
|
1699
|
+
# Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
|
1700
|
+
# )
|
1701
|
+
# gdp2 = Polars::LazyFrame.new(
|
1702
|
+
# {
|
1703
|
+
# "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
|
1704
|
+
# "date" => Polars.concat([gdp_dates, gdp_dates]),
|
1705
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
|
1706
|
+
# }
|
1707
|
+
# ).sort("country", "date")
|
1708
|
+
# gdp2.collect
|
1709
|
+
# # =>
|
1710
|
+
# # shape: (10, 3)
|
1711
|
+
# # ┌─────────────┬────────────┬──────┐
|
1712
|
+
# # │ country ┆ date ┆ gdp │
|
1713
|
+
# # │ --- ┆ --- ┆ --- │
|
1714
|
+
# # │ str ┆ date ┆ i64 │
|
1715
|
+
# # ╞═════════════╪════════════╪══════╡
|
1716
|
+
# # │ Germany ┆ 2016-01-01 ┆ 4164 │
|
1717
|
+
# # │ Germany ┆ 2017-01-01 ┆ 4411 │
|
1718
|
+
# # │ Germany ┆ 2018-01-01 ┆ 4566 │
|
1719
|
+
# # │ Germany ┆ 2019-01-01 ┆ 4696 │
|
1720
|
+
# # │ Germany ┆ 2020-01-01 ┆ 4827 │
|
1721
|
+
# # │ Netherlands ┆ 2016-01-01 ┆ 784 │
|
1722
|
+
# # │ Netherlands ┆ 2017-01-01 ┆ 833 │
|
1723
|
+
# # │ Netherlands ┆ 2018-01-01 ┆ 914 │
|
1724
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 910 │
|
1725
|
+
# # │ Netherlands ┆ 2020-01-01 ┆ 909 │
|
1726
|
+
# # └─────────────┴────────────┴──────┘
|
1727
|
+
#
|
1728
|
+
# @example
|
1729
|
+
# pop2 = Polars::LazyFrame.new(
|
1730
|
+
# {
|
1731
|
+
# "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
|
1732
|
+
# "date" => [
|
1733
|
+
# Date.new(2016, 3, 1),
|
1734
|
+
# Date.new(2018, 8, 1),
|
1735
|
+
# Date.new(2019, 1, 1),
|
1736
|
+
# Date.new(2016, 3, 1),
|
1737
|
+
# Date.new(2018, 8, 1),
|
1738
|
+
# Date.new(2019, 1, 1)
|
1739
|
+
# ],
|
1740
|
+
# "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
|
1741
|
+
# }
|
1742
|
+
# ).sort("country", "date")
|
1743
|
+
# pop2.collect
|
1744
|
+
# # =>
|
1745
|
+
# # shape: (6, 3)
|
1746
|
+
# # ┌─────────────┬────────────┬────────────┐
|
1747
|
+
# # │ country ┆ date ┆ population │
|
1748
|
+
# # │ --- ┆ --- ┆ --- │
|
1749
|
+
# # │ str ┆ date ┆ f64 │
|
1750
|
+
# # ╞═════════════╪════════════╪════════════╡
|
1751
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 │
|
1752
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 │
|
1753
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 │
|
1754
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
|
1755
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
|
1756
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
|
1757
|
+
# # └─────────────┴────────────┴────────────┘
|
1758
|
+
#
|
1759
|
+
# @example
|
1760
|
+
# pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
|
1761
|
+
# # =>
|
1762
|
+
# # shape: (6, 4)
|
1763
|
+
# # ┌─────────────┬────────────┬────────────┬──────┐
|
1764
|
+
# # │ country ┆ date ┆ population ┆ gdp │
|
1765
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1766
|
+
# # │ str ┆ date ┆ f64 ┆ i64 │
|
1767
|
+
# # ╞═════════════╪════════════╪════════════╪══════╡
|
1768
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1769
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1770
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1771
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
|
1772
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
|
1773
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
|
1774
|
+
# # └─────────────┴────────────┴────────────┴──────┘
|
1525
1775
|
def join_asof(
|
1526
1776
|
other,
|
1527
1777
|
left_on: nil,
|
@@ -1534,7 +1784,8 @@ module Polars
|
|
1534
1784
|
suffix: "_right",
|
1535
1785
|
tolerance: nil,
|
1536
1786
|
allow_parallel: true,
|
1537
|
-
force_parallel: false
|
1787
|
+
force_parallel: false,
|
1788
|
+
coalesce: true
|
1538
1789
|
)
|
1539
1790
|
if !other.is_a?(LazyFrame)
|
1540
1791
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1589,7 +1840,8 @@ module Polars
|
|
1589
1840
|
suffix,
|
1590
1841
|
strategy,
|
1591
1842
|
tolerance_num,
|
1592
|
-
tolerance_str
|
1843
|
+
tolerance_str,
|
1844
|
+
coalesce
|
1593
1845
|
)
|
1594
1846
|
)
|
1595
1847
|
end
|
@@ -1609,6 +1861,12 @@ module Polars
|
|
1609
1861
|
# Join strategy.
|
1610
1862
|
# @param suffix [String]
|
1611
1863
|
# Suffix to append to columns with a duplicate name.
|
1864
|
+
# @param validate ['m:m', 'm:1', '1:m', '1:1']
|
1865
|
+
# Checks if join is of specified type.
|
1866
|
+
# * *many_to_many* - “m:m”: default, does not result in checks
|
1867
|
+
# * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
|
1868
|
+
# * *one_to_many* - “1:m”: check if join keys are unique in left dataset
|
1869
|
+
# * *many_to_one* - “m:1”: check if join keys are unique in right dataset
|
1612
1870
|
# @param join_nulls [Boolean]
|
1613
1871
|
# Join on null values. By default null values will never produce matches.
|
1614
1872
|
# @param allow_parallel [Boolean]
|
@@ -1617,6 +1875,12 @@ module Polars
|
|
1617
1875
|
# @param force_parallel [Boolean]
|
1618
1876
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1619
1877
|
# the join in parallel.
|
1878
|
+
# @param coalesce [Boolean]
|
1879
|
+
# Coalescing behavior (merging of join columns).
|
1880
|
+
# - nil: -> join specific.
|
1881
|
+
# - true: -> Always coalesce join columns.
|
1882
|
+
# - false: -> Never coalesce join columns.
|
1883
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1620
1884
|
#
|
1621
1885
|
# @return [LazyFrame]
|
1622
1886
|
#
|
@@ -1706,9 +1970,11 @@ module Polars
|
|
1706
1970
|
on: nil,
|
1707
1971
|
how: "inner",
|
1708
1972
|
suffix: "_right",
|
1973
|
+
validate: "m:m",
|
1709
1974
|
join_nulls: false,
|
1710
1975
|
allow_parallel: true,
|
1711
|
-
force_parallel: false
|
1976
|
+
force_parallel: false,
|
1977
|
+
coalesce: nil
|
1712
1978
|
)
|
1713
1979
|
if !other.is_a?(LazyFrame)
|
1714
1980
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1719,7 +1985,7 @@ module Polars
|
|
1719
1985
|
elsif how == "cross"
|
1720
1986
|
return _from_rbldf(
|
1721
1987
|
_ldf.join(
|
1722
|
-
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
1988
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
|
1723
1989
|
)
|
1724
1990
|
)
|
1725
1991
|
end
|
@@ -1745,6 +2011,8 @@ module Polars
|
|
1745
2011
|
join_nulls,
|
1746
2012
|
how,
|
1747
2013
|
suffix,
|
2014
|
+
validate,
|
2015
|
+
coalesce
|
1748
2016
|
)
|
1749
2017
|
)
|
1750
2018
|
end
|
@@ -1879,6 +2147,55 @@ module Polars
|
|
1879
2147
|
# - List of column names.
|
1880
2148
|
#
|
1881
2149
|
# @return [LazyFrame]
|
2150
|
+
#
|
2151
|
+
# @example Drop a single column by passing the name of that column.
|
2152
|
+
# lf = Polars::LazyFrame.new(
|
2153
|
+
# {
|
2154
|
+
# "foo" => [1, 2, 3],
|
2155
|
+
# "bar" => [6.0, 7.0, 8.0],
|
2156
|
+
# "ham" => ["a", "b", "c"]
|
2157
|
+
# }
|
2158
|
+
# )
|
2159
|
+
# lf.drop("ham").collect
|
2160
|
+
# # =>
|
2161
|
+
# # shape: (3, 2)
|
2162
|
+
# # ┌─────┬─────┐
|
2163
|
+
# # │ foo ┆ bar │
|
2164
|
+
# # │ --- ┆ --- │
|
2165
|
+
# # │ i64 ┆ f64 │
|
2166
|
+
# # ╞═════╪═════╡
|
2167
|
+
# # │ 1 ┆ 6.0 │
|
2168
|
+
# # │ 2 ┆ 7.0 │
|
2169
|
+
# # │ 3 ┆ 8.0 │
|
2170
|
+
# # └─────┴─────┘
|
2171
|
+
#
|
2172
|
+
# @example Drop multiple columns by passing a selector.
|
2173
|
+
# lf.drop(Polars.cs.numeric).collect
|
2174
|
+
# # =>
|
2175
|
+
# # shape: (3, 1)
|
2176
|
+
# # ┌─────┐
|
2177
|
+
# # │ ham │
|
2178
|
+
# # │ --- │
|
2179
|
+
# # │ str │
|
2180
|
+
# # ╞═════╡
|
2181
|
+
# # │ a │
|
2182
|
+
# # │ b │
|
2183
|
+
# # │ c │
|
2184
|
+
# # └─────┘
|
2185
|
+
#
|
2186
|
+
# @example Use positional arguments to drop multiple columns.
|
2187
|
+
# lf.drop("foo", "ham").collect
|
2188
|
+
# # =>
|
2189
|
+
# # shape: (3, 1)
|
2190
|
+
# # ┌─────┐
|
2191
|
+
# # │ bar │
|
2192
|
+
# # │ --- │
|
2193
|
+
# # │ f64 │
|
2194
|
+
# # ╞═════╡
|
2195
|
+
# # │ 6.0 │
|
2196
|
+
# # │ 7.0 │
|
2197
|
+
# # │ 8.0 │
|
2198
|
+
# # └─────┘
|
1882
2199
|
def drop(*columns)
|
1883
2200
|
drop_cols = Utils._expand_selectors(self, *columns)
|
1884
2201
|
_from_rbldf(_ldf.drop(drop_cols))
|
@@ -1888,17 +2205,80 @@ module Polars
|
|
1888
2205
|
#
|
1889
2206
|
# @param mapping [Hash]
|
1890
2207
|
# Key value pairs that map from old name to new name.
|
2208
|
+
# @param strict [Boolean]
|
2209
|
+
# Validate that all column names exist in the current schema,
|
2210
|
+
# and throw an exception if any do not. (Note that this parameter
|
2211
|
+
# is a no-op when passing a function to `mapping`).
|
1891
2212
|
#
|
1892
2213
|
# @return [LazyFrame]
|
1893
|
-
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
2214
|
+
#
|
2215
|
+
# @example
|
2216
|
+
# lf = Polars::LazyFrame.new(
|
2217
|
+
# {
|
2218
|
+
# "foo" => [1, 2, 3],
|
2219
|
+
# "bar" => [6, 7, 8],
|
2220
|
+
# "ham" => ["a", "b", "c"]
|
2221
|
+
# }
|
2222
|
+
# )
|
2223
|
+
# lf.rename({"foo" => "apple"}).collect
|
2224
|
+
# # =>
|
2225
|
+
# # shape: (3, 3)
|
2226
|
+
# # ┌───────┬─────┬─────┐
|
2227
|
+
# # │ apple ┆ bar ┆ ham │
|
2228
|
+
# # │ --- ┆ --- ┆ --- │
|
2229
|
+
# # │ i64 ┆ i64 ┆ str │
|
2230
|
+
# # ╞═══════╪═════╪═════╡
|
2231
|
+
# # │ 1 ┆ 6 ┆ a │
|
2232
|
+
# # │ 2 ┆ 7 ┆ b │
|
2233
|
+
# # │ 3 ┆ 8 ┆ c │
|
2234
|
+
# # └───────┴─────┴─────┘
|
2235
|
+
#
|
2236
|
+
# @example
|
2237
|
+
# lf.rename(->(column_name) { "c" + column_name[1..] }).collect
|
2238
|
+
# # =>
|
2239
|
+
# # shape: (3, 3)
|
2240
|
+
# # ┌─────┬─────┬─────┐
|
2241
|
+
# # │ coo ┆ car ┆ cam │
|
2242
|
+
# # │ --- ┆ --- ┆ --- │
|
2243
|
+
# # │ i64 ┆ i64 ┆ str │
|
2244
|
+
# # ╞═════╪═════╪═════╡
|
2245
|
+
# # │ 1 ┆ 6 ┆ a │
|
2246
|
+
# # │ 2 ┆ 7 ┆ b │
|
2247
|
+
# # │ 3 ┆ 8 ┆ c │
|
2248
|
+
# # └─────┴─────┴─────┘
|
2249
|
+
def rename(mapping, strict: true)
|
2250
|
+
if mapping.respond_to?(:call)
|
2251
|
+
select(F.all.name.map(&mapping))
|
2252
|
+
else
|
2253
|
+
existing = mapping.keys
|
2254
|
+
_new = mapping.values
|
2255
|
+
_from_rbldf(_ldf.rename(existing, _new, strict))
|
2256
|
+
end
|
1897
2257
|
end
|
1898
2258
|
|
1899
2259
|
# Reverse the DataFrame.
|
1900
2260
|
#
|
1901
2261
|
# @return [LazyFrame]
|
2262
|
+
#
|
2263
|
+
# @example
|
2264
|
+
# lf = Polars::LazyFrame.new(
|
2265
|
+
# {
|
2266
|
+
# "key" => ["a", "b", "c"],
|
2267
|
+
# "val" => [1, 2, 3]
|
2268
|
+
# }
|
2269
|
+
# )
|
2270
|
+
# lf.reverse.collect
|
2271
|
+
# # =>
|
2272
|
+
# # shape: (3, 2)
|
2273
|
+
# # ┌─────┬─────┐
|
2274
|
+
# # │ key ┆ val │
|
2275
|
+
# # │ --- ┆ --- │
|
2276
|
+
# # │ str ┆ i64 │
|
2277
|
+
# # ╞═════╪═════╡
|
2278
|
+
# # │ c ┆ 3 │
|
2279
|
+
# # │ b ┆ 2 │
|
2280
|
+
# # │ a ┆ 1 │
|
2281
|
+
# # └─────┴─────┘
|
1902
2282
|
def reverse
|
1903
2283
|
_from_rbldf(_ldf.reverse)
|
1904
2284
|
end
|
@@ -2048,8 +2428,43 @@ module Polars
|
|
2048
2428
|
# Consider using the {#fetch} operation if you only want to test your
|
2049
2429
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2050
2430
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2431
|
+
#
|
2432
|
+
# @example
|
2433
|
+
# lf = Polars::LazyFrame.new(
|
2434
|
+
# {
|
2435
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2436
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2437
|
+
# }
|
2438
|
+
# )
|
2439
|
+
# lf.limit.collect
|
2440
|
+
# # =>
|
2441
|
+
# # shape: (5, 2)
|
2442
|
+
# # ┌─────┬─────┐
|
2443
|
+
# # │ a ┆ b │
|
2444
|
+
# # │ --- ┆ --- │
|
2445
|
+
# # │ i64 ┆ i64 │
|
2446
|
+
# # ╞═════╪═════╡
|
2447
|
+
# # │ 1 ┆ 7 │
|
2448
|
+
# # │ 2 ┆ 8 │
|
2449
|
+
# # │ 3 ┆ 9 │
|
2450
|
+
# # │ 4 ┆ 10 │
|
2451
|
+
# # │ 5 ┆ 11 │
|
2452
|
+
# # └─────┴─────┘
|
2453
|
+
#
|
2454
|
+
# @example
|
2455
|
+
# lf.limit(2).collect
|
2456
|
+
# # =>
|
2457
|
+
# # shape: (2, 2)
|
2458
|
+
# # ┌─────┬─────┐
|
2459
|
+
# # │ a ┆ b │
|
2460
|
+
# # │ --- ┆ --- │
|
2461
|
+
# # │ i64 ┆ i64 │
|
2462
|
+
# # ╞═════╪═════╡
|
2463
|
+
# # │ 1 ┆ 7 │
|
2464
|
+
# # │ 2 ┆ 8 │
|
2465
|
+
# # └─────┴─────┘
|
2051
2466
|
def limit(n = 5)
|
2052
|
-
head(
|
2467
|
+
head(n)
|
2053
2468
|
end
|
2054
2469
|
|
2055
2470
|
# Get the first `n` rows.
|
@@ -2063,6 +2478,41 @@ module Polars
|
|
2063
2478
|
# Consider using the {#fetch} operation if you only want to test your
|
2064
2479
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2065
2480
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2481
|
+
#
|
2482
|
+
# @example
|
2483
|
+
# lf = Polars::LazyFrame.new(
|
2484
|
+
# {
|
2485
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2486
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2487
|
+
# }
|
2488
|
+
# )
|
2489
|
+
# lf.head.collect
|
2490
|
+
# # =>
|
2491
|
+
# # shape: (5, 2)
|
2492
|
+
# # ┌─────┬─────┐
|
2493
|
+
# # │ a ┆ b │
|
2494
|
+
# # │ --- ┆ --- │
|
2495
|
+
# # │ i64 ┆ i64 │
|
2496
|
+
# # ╞═════╪═════╡
|
2497
|
+
# # │ 1 ┆ 7 │
|
2498
|
+
# # │ 2 ┆ 8 │
|
2499
|
+
# # │ 3 ┆ 9 │
|
2500
|
+
# # │ 4 ┆ 10 │
|
2501
|
+
# # │ 5 ┆ 11 │
|
2502
|
+
# # └─────┴─────┘
|
2503
|
+
#
|
2504
|
+
# @example
|
2505
|
+
# lf.head(2).collect
|
2506
|
+
# # =>
|
2507
|
+
# # shape: (2, 2)
|
2508
|
+
# # ┌─────┬─────┐
|
2509
|
+
# # │ a ┆ b │
|
2510
|
+
# # │ --- ┆ --- │
|
2511
|
+
# # │ i64 ┆ i64 │
|
2512
|
+
# # ╞═════╪═════╡
|
2513
|
+
# # │ 1 ┆ 7 │
|
2514
|
+
# # │ 2 ┆ 8 │
|
2515
|
+
# # └─────┴─────┘
|
2066
2516
|
def head(n = 5)
|
2067
2517
|
slice(0, n)
|
2068
2518
|
end
|
@@ -2073,6 +2523,41 @@ module Polars
|
|
2073
2523
|
# Number of rows.
|
2074
2524
|
#
|
2075
2525
|
# @return [LazyFrame]
|
2526
|
+
#
|
2527
|
+
# @example
|
2528
|
+
# lf = Polars::LazyFrame.new(
|
2529
|
+
# {
|
2530
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2531
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2532
|
+
# }
|
2533
|
+
# )
|
2534
|
+
# lf.tail.collect
|
2535
|
+
# # =>
|
2536
|
+
# # shape: (5, 2)
|
2537
|
+
# # ┌─────┬─────┐
|
2538
|
+
# # │ a ┆ b │
|
2539
|
+
# # │ --- ┆ --- │
|
2540
|
+
# # │ i64 ┆ i64 │
|
2541
|
+
# # ╞═════╪═════╡
|
2542
|
+
# # │ 2 ┆ 8 │
|
2543
|
+
# # │ 3 ┆ 9 │
|
2544
|
+
# # │ 4 ┆ 10 │
|
2545
|
+
# # │ 5 ┆ 11 │
|
2546
|
+
# # │ 6 ┆ 12 │
|
2547
|
+
# # └─────┴─────┘
|
2548
|
+
#
|
2549
|
+
# @example
|
2550
|
+
# lf.tail(2).collect
|
2551
|
+
# # =>
|
2552
|
+
# # shape: (2, 2)
|
2553
|
+
# # ┌─────┬─────┐
|
2554
|
+
# # │ a ┆ b │
|
2555
|
+
# # │ --- ┆ --- │
|
2556
|
+
# # │ i64 ┆ i64 │
|
2557
|
+
# # ╞═════╪═════╡
|
2558
|
+
# # │ 5 ┆ 11 │
|
2559
|
+
# # │ 6 ┆ 12 │
|
2560
|
+
# # └─────┴─────┘
|
2076
2561
|
def tail(n = 5)
|
2077
2562
|
_from_rbldf(_ldf.tail(n))
|
2078
2563
|
end
|
@@ -2080,6 +2565,24 @@ module Polars
|
|
2080
2565
|
# Get the last row of the DataFrame.
|
2081
2566
|
#
|
2082
2567
|
# @return [LazyFrame]
|
2568
|
+
#
|
2569
|
+
# @example
|
2570
|
+
# lf = Polars::LazyFrame.new(
|
2571
|
+
# {
|
2572
|
+
# "a" => [1, 5, 3],
|
2573
|
+
# "b" => [2, 4, 6]
|
2574
|
+
# }
|
2575
|
+
# )
|
2576
|
+
# lf.last.collect
|
2577
|
+
# # =>
|
2578
|
+
# # shape: (1, 2)
|
2579
|
+
# # ┌─────┬─────┐
|
2580
|
+
# # │ a ┆ b │
|
2581
|
+
# # │ --- ┆ --- │
|
2582
|
+
# # │ i64 ┆ i64 │
|
2583
|
+
# # ╞═════╪═════╡
|
2584
|
+
# # │ 3 ┆ 6 │
|
2585
|
+
# # └─────┴─────┘
|
2083
2586
|
def last
|
2084
2587
|
tail(1)
|
2085
2588
|
end
|
@@ -2087,6 +2590,24 @@ module Polars
|
|
2087
2590
|
# Get the first row of the DataFrame.
|
2088
2591
|
#
|
2089
2592
|
# @return [LazyFrame]
|
2593
|
+
#
|
2594
|
+
# @example
|
2595
|
+
# lf = Polars::LazyFrame.new(
|
2596
|
+
# {
|
2597
|
+
# "a" => [1, 5, 3],
|
2598
|
+
# "b" => [2, 4, 6]
|
2599
|
+
# }
|
2600
|
+
# )
|
2601
|
+
# lf.first.collect
|
2602
|
+
# # =>
|
2603
|
+
# # shape: (1, 2)
|
2604
|
+
# # ┌─────┬─────┐
|
2605
|
+
# # │ a ┆ b │
|
2606
|
+
# # │ --- ┆ --- │
|
2607
|
+
# # │ i64 ┆ i64 │
|
2608
|
+
# # ╞═════╪═════╡
|
2609
|
+
# # │ 1 ┆ 2 │
|
2610
|
+
# # └─────┴─────┘
|
2090
2611
|
def first
|
2091
2612
|
slice(0, 1)
|
2092
2613
|
end
|
@@ -2152,6 +2673,72 @@ module Polars
|
|
2152
2673
|
# Fill null values using the specified value or strategy.
|
2153
2674
|
#
|
2154
2675
|
# @return [LazyFrame]
|
2676
|
+
#
|
2677
|
+
# @example
|
2678
|
+
# lf = Polars::LazyFrame.new(
|
2679
|
+
# {
|
2680
|
+
# "a" => [1, 2, nil, 4],
|
2681
|
+
# "b" => [0.5, 4, nil, 13]
|
2682
|
+
# }
|
2683
|
+
# )
|
2684
|
+
# lf.fill_null(99).collect
|
2685
|
+
# # =>
|
2686
|
+
# # shape: (4, 2)
|
2687
|
+
# # ┌─────┬──────┐
|
2688
|
+
# # │ a ┆ b │
|
2689
|
+
# # │ --- ┆ --- │
|
2690
|
+
# # │ i64 ┆ f64 │
|
2691
|
+
# # ╞═════╪══════╡
|
2692
|
+
# # │ 1 ┆ 0.5 │
|
2693
|
+
# # │ 2 ┆ 4.0 │
|
2694
|
+
# # │ 99 ┆ 99.0 │
|
2695
|
+
# # │ 4 ┆ 13.0 │
|
2696
|
+
# # └─────┴──────┘
|
2697
|
+
#
|
2698
|
+
# @example
|
2699
|
+
# lf.fill_null(strategy: "forward").collect
|
2700
|
+
# # =>
|
2701
|
+
# # shape: (4, 2)
|
2702
|
+
# # ┌─────┬──────┐
|
2703
|
+
# # │ a ┆ b │
|
2704
|
+
# # │ --- ┆ --- │
|
2705
|
+
# # │ i64 ┆ f64 │
|
2706
|
+
# # ╞═════╪══════╡
|
2707
|
+
# # │ 1 ┆ 0.5 │
|
2708
|
+
# # │ 2 ┆ 4.0 │
|
2709
|
+
# # │ 2 ┆ 4.0 │
|
2710
|
+
# # │ 4 ┆ 13.0 │
|
2711
|
+
# # └─────┴──────┘
|
2712
|
+
#
|
2713
|
+
# @example
|
2714
|
+
# lf.fill_null(strategy: "max").collect
|
2715
|
+
# # =>
|
2716
|
+
# # shape: (4, 2)
|
2717
|
+
# # ┌─────┬──────┐
|
2718
|
+
# # │ a ┆ b │
|
2719
|
+
# # │ --- ┆ --- │
|
2720
|
+
# # │ i64 ┆ f64 │
|
2721
|
+
# # ╞═════╪══════╡
|
2722
|
+
# # │ 1 ┆ 0.5 │
|
2723
|
+
# # │ 2 ┆ 4.0 │
|
2724
|
+
# # │ 4 ┆ 13.0 │
|
2725
|
+
# # │ 4 ┆ 13.0 │
|
2726
|
+
# # └─────┴──────┘
|
2727
|
+
#
|
2728
|
+
# @example
|
2729
|
+
# lf.fill_null(strategy: "zero").collect
|
2730
|
+
# # =>
|
2731
|
+
# # shape: (4, 2)
|
2732
|
+
# # ┌─────┬──────┐
|
2733
|
+
# # │ a ┆ b │
|
2734
|
+
# # │ --- ┆ --- │
|
2735
|
+
# # │ i64 ┆ f64 │
|
2736
|
+
# # ╞═════╪══════╡
|
2737
|
+
# # │ 1 ┆ 0.5 │
|
2738
|
+
# # │ 2 ┆ 4.0 │
|
2739
|
+
# # │ 0 ┆ 0.0 │
|
2740
|
+
# # │ 4 ┆ 13.0 │
|
2741
|
+
# # └─────┴──────┘
|
2155
2742
|
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
|
2156
2743
|
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
2157
2744
|
end
|
@@ -2431,6 +3018,53 @@ module Polars
|
|
2431
3018
|
# Which of the duplicate rows to keep.
|
2432
3019
|
#
|
2433
3020
|
# @return [LazyFrame]
|
3021
|
+
#
|
3022
|
+
# @example
|
3023
|
+
# lf = Polars::LazyFrame.new(
|
3024
|
+
# {
|
3025
|
+
# "foo" => [1, 2, 3, 1],
|
3026
|
+
# "bar" => ["a", "a", "a", "a"],
|
3027
|
+
# "ham" => ["b", "b", "b", "b"]
|
3028
|
+
# }
|
3029
|
+
# )
|
3030
|
+
# lf.unique(maintain_order: true).collect
|
3031
|
+
# # =>
|
3032
|
+
# # shape: (3, 3)
|
3033
|
+
# # ┌─────┬─────┬─────┐
|
3034
|
+
# # │ foo ┆ bar ┆ ham │
|
3035
|
+
# # │ --- ┆ --- ┆ --- │
|
3036
|
+
# # │ i64 ┆ str ┆ str │
|
3037
|
+
# # ╞═════╪═════╪═════╡
|
3038
|
+
# # │ 1 ┆ a ┆ b │
|
3039
|
+
# # │ 2 ┆ a ┆ b │
|
3040
|
+
# # │ 3 ┆ a ┆ b │
|
3041
|
+
# # └─────┴─────┴─────┘
|
3042
|
+
#
|
3043
|
+
# @example
|
3044
|
+
# lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
|
3045
|
+
# # =>
|
3046
|
+
# # shape: (1, 3)
|
3047
|
+
# # ┌─────┬─────┬─────┐
|
3048
|
+
# # │ foo ┆ bar ┆ ham │
|
3049
|
+
# # │ --- ┆ --- ┆ --- │
|
3050
|
+
# # │ i64 ┆ str ┆ str │
|
3051
|
+
# # ╞═════╪═════╪═════╡
|
3052
|
+
# # │ 1 ┆ a ┆ b │
|
3053
|
+
# # └─────┴─────┴─────┘
|
3054
|
+
#
|
3055
|
+
# @example
|
3056
|
+
# lf.unique(keep: "last", maintain_order: true).collect
|
3057
|
+
# # =>
|
3058
|
+
# # shape: (3, 3)
|
3059
|
+
# # ┌─────┬─────┬─────┐
|
3060
|
+
# # │ foo ┆ bar ┆ ham │
|
3061
|
+
# # │ --- ┆ --- ┆ --- │
|
3062
|
+
# # │ i64 ┆ str ┆ str │
|
3063
|
+
# # ╞═════╪═════╪═════╡
|
3064
|
+
# # │ 2 ┆ a ┆ b │
|
3065
|
+
# # │ 3 ┆ a ┆ b │
|
3066
|
+
# # │ 1 ┆ a ┆ b │
|
3067
|
+
# # └─────┴─────┴─────┘
|
2434
3068
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2435
3069
|
if !subset.nil? && !subset.is_a?(::Array)
|
2436
3070
|
subset = [subset]
|
@@ -2504,7 +3138,7 @@ module Polars
|
|
2504
3138
|
# "c" => [2, 4, 6]
|
2505
3139
|
# }
|
2506
3140
|
# )
|
2507
|
-
# lf.unpivot(Polars
|
3141
|
+
# lf.unpivot(Polars.cs.numeric, index: "a").collect
|
2508
3142
|
# # =>
|
2509
3143
|
# # shape: (6, 3)
|
2510
3144
|
# # ┌─────┬──────────┬───────┐
|
@@ -2530,8 +3164,8 @@ module Polars
|
|
2530
3164
|
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2531
3165
|
end
|
2532
3166
|
|
2533
|
-
on = on.nil? ? [] : Utils.
|
2534
|
-
index = index.nil? ? [] : Utils.
|
3167
|
+
on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
|
3168
|
+
index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
|
2535
3169
|
|
2536
3170
|
_from_rbldf(
|
2537
3171
|
_ldf.unpivot(on, index, value_name, variable_name)
|