polars-df 0.14.0-x86_64-linux-musl → 0.16.0-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE-THIRD-PARTY.txt +23495 -12923
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +10 -9
- data/lib/polars/functions.rb +0 -57
data/lib/polars/lazy_frame.rb
CHANGED
@@ -431,7 +431,9 @@ module Polars
|
|
431
431
|
projection_pushdown: true,
|
432
432
|
simplify_expression: true,
|
433
433
|
no_optimization: false,
|
434
|
-
slice_pushdown: true
|
434
|
+
slice_pushdown: true,
|
435
|
+
storage_options: nil,
|
436
|
+
retries: 2
|
435
437
|
)
|
436
438
|
lf = _set_sink_optimizations(
|
437
439
|
type_coercion: type_coercion,
|
@@ -460,6 +462,12 @@ module Polars
|
|
460
462
|
}
|
461
463
|
end
|
462
464
|
|
465
|
+
if storage_options&.any?
|
466
|
+
storage_options = storage_options.to_a
|
467
|
+
else
|
468
|
+
storage_options = nil
|
469
|
+
end
|
470
|
+
|
463
471
|
lf.sink_parquet(
|
464
472
|
path,
|
465
473
|
compression,
|
@@ -467,7 +475,9 @@ module Polars
|
|
467
475
|
statistics,
|
468
476
|
row_group_size,
|
469
477
|
data_pagesize_limit,
|
470
|
-
maintain_order
|
478
|
+
maintain_order,
|
479
|
+
storage_options,
|
480
|
+
retries
|
471
481
|
)
|
472
482
|
end
|
473
483
|
|
@@ -512,6 +522,10 @@ module Polars
|
|
512
522
|
slice_pushdown: true,
|
513
523
|
no_optimization: false
|
514
524
|
)
|
525
|
+
# TODO support storage options in Rust
|
526
|
+
storage_options = nil
|
527
|
+
retries = 2
|
528
|
+
|
515
529
|
lf = _set_sink_optimizations(
|
516
530
|
type_coercion: type_coercion,
|
517
531
|
predicate_pushdown: predicate_pushdown,
|
@@ -521,10 +535,18 @@ module Polars
|
|
521
535
|
no_optimization: no_optimization
|
522
536
|
)
|
523
537
|
|
538
|
+
if storage_options&.any?
|
539
|
+
storage_options = storage_options.to_a
|
540
|
+
else
|
541
|
+
storage_options = nil
|
542
|
+
end
|
543
|
+
|
524
544
|
lf.sink_ipc(
|
525
545
|
path,
|
526
546
|
compression,
|
527
|
-
maintain_order
|
547
|
+
maintain_order,
|
548
|
+
storage_options,
|
549
|
+
retries
|
528
550
|
)
|
529
551
|
end
|
530
552
|
|
@@ -692,7 +714,9 @@ module Polars
|
|
692
714
|
projection_pushdown: true,
|
693
715
|
simplify_expression: true,
|
694
716
|
slice_pushdown: true,
|
695
|
-
no_optimization: false
|
717
|
+
no_optimization: false,
|
718
|
+
storage_options: nil,
|
719
|
+
retries: 2
|
696
720
|
)
|
697
721
|
lf = _set_sink_optimizations(
|
698
722
|
type_coercion: type_coercion,
|
@@ -703,7 +727,13 @@ module Polars
|
|
703
727
|
no_optimization: no_optimization
|
704
728
|
)
|
705
729
|
|
706
|
-
|
730
|
+
if storage_options&.any?
|
731
|
+
storage_options = storage_options.to_a
|
732
|
+
else
|
733
|
+
storage_options = nil
|
734
|
+
end
|
735
|
+
|
736
|
+
lf.sink_json(path, maintain_order, storage_options, retries)
|
707
737
|
end
|
708
738
|
|
709
739
|
# @private
|
@@ -848,9 +878,70 @@ module Polars
|
|
848
878
|
_from_rbldf(_ldf.cache)
|
849
879
|
end
|
850
880
|
|
851
|
-
#
|
852
|
-
#
|
853
|
-
#
|
881
|
+
# Cast LazyFrame column(s) to the specified dtype(s).
|
882
|
+
#
|
883
|
+
# @param dtypes [Hash]
|
884
|
+
# Mapping of column names (or selector) to dtypes, or a single dtype
|
885
|
+
# to which all columns will be cast.
|
886
|
+
# @param strict [Boolean]
|
887
|
+
# Throw an error if a cast could not be done (for instance, due to an
|
888
|
+
# overflow).
|
889
|
+
#
|
890
|
+
# @return [LazyFrame]
|
891
|
+
#
|
892
|
+
# @example Cast specific frame columns to the specified dtypes:
|
893
|
+
# lf = Polars::LazyFrame.new(
|
894
|
+
# {
|
895
|
+
# "foo" => [1, 2, 3],
|
896
|
+
# "bar" => [6.0, 7.0, 8.0],
|
897
|
+
# "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
|
898
|
+
# }
|
899
|
+
# )
|
900
|
+
# lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
|
901
|
+
# # =>
|
902
|
+
# # shape: (3, 3)
|
903
|
+
# # ┌─────┬─────┬────────────┐
|
904
|
+
# # │ foo ┆ bar ┆ ham │
|
905
|
+
# # │ --- ┆ --- ┆ --- │
|
906
|
+
# # │ f32 ┆ u8 ┆ date │
|
907
|
+
# # ╞═════╪═════╪════════════╡
|
908
|
+
# # │ 1.0 ┆ 6 ┆ 2020-01-02 │
|
909
|
+
# # │ 2.0 ┆ 7 ┆ 2021-03-04 │
|
910
|
+
# # │ 3.0 ┆ 8 ┆ 2022-05-06 │
|
911
|
+
# # └─────┴─────┴────────────┘
|
912
|
+
#
|
913
|
+
# @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
|
914
|
+
# lf.cast({Polars::Date => Polars::Datetime}).collect
|
915
|
+
# # =>
|
916
|
+
# # shape: (3, 3)
|
917
|
+
# # ┌─────┬─────┬─────────────────────┐
|
918
|
+
# # │ foo ┆ bar ┆ ham │
|
919
|
+
# # │ --- ┆ --- ┆ --- │
|
920
|
+
# # │ i64 ┆ f64 ┆ datetime[μs] │
|
921
|
+
# # ╞═════╪═════╪═════════════════════╡
|
922
|
+
# # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
|
923
|
+
# # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
|
924
|
+
# # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
|
925
|
+
# # └─────┴─────┴─────────────────────┘
|
926
|
+
#
|
927
|
+
# @example Cast all frame columns to the specified dtype:
|
928
|
+
# lf.cast(Polars::String).collect.to_h(as_series: false)
|
929
|
+
# # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
|
930
|
+
def cast(dtypes, strict: true)
|
931
|
+
if !dtypes.is_a?(Hash)
|
932
|
+
return _from_rbldf(_ldf.cast_all(dtypes, strict))
|
933
|
+
end
|
934
|
+
|
935
|
+
cast_map = {}
|
936
|
+
dtypes.each do |c, dtype|
|
937
|
+
dtype = Utils.parse_into_dtype(dtype)
|
938
|
+
cast_map.merge!(
|
939
|
+
c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
|
940
|
+
)
|
941
|
+
end
|
942
|
+
|
943
|
+
_from_rbldf(_ldf.cast(cast_map, strict))
|
944
|
+
end
|
854
945
|
|
855
946
|
# Create an empty copy of the current LazyFrame.
|
856
947
|
#
|
@@ -1520,8 +1611,197 @@ module Polars
|
|
1520
1611
|
# @param force_parallel [Boolean]
|
1521
1612
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1522
1613
|
# the join in parallel.
|
1614
|
+
# @param coalesce [Boolean]
|
1615
|
+
# Coalescing behavior (merging of join columns).
|
1616
|
+
# - true: -> Always coalesce join columns.
|
1617
|
+
# - false: -> Never coalesce join columns.
|
1618
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1523
1619
|
#
|
1524
1620
|
# @return [LazyFrame]
|
1621
|
+
#
|
1622
|
+
# @example
|
1623
|
+
# gdp = Polars::LazyFrame.new(
|
1624
|
+
# {
|
1625
|
+
# "date" => Polars.date_range(
|
1626
|
+
# Date.new(2016, 1, 1),
|
1627
|
+
# Date.new(2020, 1, 1),
|
1628
|
+
# "1y",
|
1629
|
+
# eager: true
|
1630
|
+
# ),
|
1631
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827]
|
1632
|
+
# }
|
1633
|
+
# )
|
1634
|
+
# gdp.collect
|
1635
|
+
# # =>
|
1636
|
+
# # shape: (5, 2)
|
1637
|
+
# # ┌────────────┬──────┐
|
1638
|
+
# # │ date ┆ gdp │
|
1639
|
+
# # │ --- ┆ --- │
|
1640
|
+
# # │ date ┆ i64 │
|
1641
|
+
# # ╞════════════╪══════╡
|
1642
|
+
# # │ 2016-01-01 ┆ 4164 │
|
1643
|
+
# # │ 2017-01-01 ┆ 4411 │
|
1644
|
+
# # │ 2018-01-01 ┆ 4566 │
|
1645
|
+
# # │ 2019-01-01 ┆ 4696 │
|
1646
|
+
# # │ 2020-01-01 ┆ 4827 │
|
1647
|
+
# # └────────────┴──────┘
|
1648
|
+
#
|
1649
|
+
# @example
|
1650
|
+
# population = Polars::LazyFrame.new(
|
1651
|
+
# {
|
1652
|
+
# "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
|
1653
|
+
# "population" => [82.19, 82.66, 83.12]
|
1654
|
+
# }
|
1655
|
+
# ).sort("date")
|
1656
|
+
# population.collect
|
1657
|
+
# # =>
|
1658
|
+
# # shape: (3, 2)
|
1659
|
+
# # ┌────────────┬────────────┐
|
1660
|
+
# # │ date ┆ population │
|
1661
|
+
# # │ --- ┆ --- │
|
1662
|
+
# # │ date ┆ f64 │
|
1663
|
+
# # ╞════════════╪════════════╡
|
1664
|
+
# # │ 2016-03-01 ┆ 82.19 │
|
1665
|
+
# # │ 2018-08-01 ┆ 82.66 │
|
1666
|
+
# # │ 2019-01-01 ┆ 83.12 │
|
1667
|
+
# # └────────────┴────────────┘
|
1668
|
+
#
|
1669
|
+
# @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
|
1670
|
+
# population.join_asof(gdp, on: "date", strategy: "backward").collect
|
1671
|
+
# # =>
|
1672
|
+
# # shape: (3, 3)
|
1673
|
+
# # ┌────────────┬────────────┬──────┐
|
1674
|
+
# # │ date ┆ population ┆ gdp │
|
1675
|
+
# # │ --- ┆ --- ┆ --- │
|
1676
|
+
# # │ date ┆ f64 ┆ i64 │
|
1677
|
+
# # ╞════════════╪════════════╪══════╡
|
1678
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1679
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
|
1680
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1681
|
+
# # └────────────┴────────────┴──────┘
|
1682
|
+
#
|
1683
|
+
# @example
|
1684
|
+
# population.join_asof(
|
1685
|
+
# gdp, on: "date", strategy: "backward", coalesce: false
|
1686
|
+
# ).collect
|
1687
|
+
# # =>
|
1688
|
+
# # shape: (3, 4)
|
1689
|
+
# # ┌────────────┬────────────┬────────────┬──────┐
|
1690
|
+
# # │ date ┆ population ┆ date_right ┆ gdp │
|
1691
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1692
|
+
# # │ date ┆ f64 ┆ date ┆ i64 │
|
1693
|
+
# # ╞════════════╪════════════╪════════════╪══════╡
|
1694
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
|
1695
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
|
1696
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
|
1697
|
+
# # └────────────┴────────────┴────────────┴──────┘
|
1698
|
+
#
|
1699
|
+
# @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
|
1700
|
+
# population.join_asof(gdp, on: "date", strategy: "forward").collect
|
1701
|
+
# # =>
|
1702
|
+
# # shape: (3, 3)
|
1703
|
+
# # ┌────────────┬────────────┬──────┐
|
1704
|
+
# # │ date ┆ population ┆ gdp │
|
1705
|
+
# # │ --- ┆ --- ┆ --- │
|
1706
|
+
# # │ date ┆ f64 ┆ i64 │
|
1707
|
+
# # ╞════════════╪════════════╪══════╡
|
1708
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
|
1709
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1710
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1711
|
+
# # └────────────┴────────────┴──────┘
|
1712
|
+
#
|
1713
|
+
# @example
|
1714
|
+
# population.join_asof(gdp, on: "date", strategy: "nearest").collect
|
1715
|
+
# # =>
|
1716
|
+
# # shape: (3, 3)
|
1717
|
+
# # ┌────────────┬────────────┬──────┐
|
1718
|
+
# # │ date ┆ population ┆ gdp │
|
1719
|
+
# # │ --- ┆ --- ┆ --- │
|
1720
|
+
# # │ date ┆ f64 ┆ i64 │
|
1721
|
+
# # ╞════════════╪════════════╪══════╡
|
1722
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1723
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1724
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1725
|
+
# # └────────────┴────────────┴──────┘
|
1726
|
+
#
|
1727
|
+
# @example
|
1728
|
+
# gdp_dates = Polars.date_range(
|
1729
|
+
# Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
|
1730
|
+
# )
|
1731
|
+
# gdp2 = Polars::LazyFrame.new(
|
1732
|
+
# {
|
1733
|
+
# "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
|
1734
|
+
# "date" => Polars.concat([gdp_dates, gdp_dates]),
|
1735
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
|
1736
|
+
# }
|
1737
|
+
# ).sort("country", "date")
|
1738
|
+
# gdp2.collect
|
1739
|
+
# # =>
|
1740
|
+
# # shape: (10, 3)
|
1741
|
+
# # ┌─────────────┬────────────┬──────┐
|
1742
|
+
# # │ country ┆ date ┆ gdp │
|
1743
|
+
# # │ --- ┆ --- ┆ --- │
|
1744
|
+
# # │ str ┆ date ┆ i64 │
|
1745
|
+
# # ╞═════════════╪════════════╪══════╡
|
1746
|
+
# # │ Germany ┆ 2016-01-01 ┆ 4164 │
|
1747
|
+
# # │ Germany ┆ 2017-01-01 ┆ 4411 │
|
1748
|
+
# # │ Germany ┆ 2018-01-01 ┆ 4566 │
|
1749
|
+
# # │ Germany ┆ 2019-01-01 ┆ 4696 │
|
1750
|
+
# # │ Germany ┆ 2020-01-01 ┆ 4827 │
|
1751
|
+
# # │ Netherlands ┆ 2016-01-01 ┆ 784 │
|
1752
|
+
# # │ Netherlands ┆ 2017-01-01 ┆ 833 │
|
1753
|
+
# # │ Netherlands ┆ 2018-01-01 ┆ 914 │
|
1754
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 910 │
|
1755
|
+
# # │ Netherlands ┆ 2020-01-01 ┆ 909 │
|
1756
|
+
# # └─────────────┴────────────┴──────┘
|
1757
|
+
#
|
1758
|
+
# @example
|
1759
|
+
# pop2 = Polars::LazyFrame.new(
|
1760
|
+
# {
|
1761
|
+
# "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
|
1762
|
+
# "date" => [
|
1763
|
+
# Date.new(2016, 3, 1),
|
1764
|
+
# Date.new(2018, 8, 1),
|
1765
|
+
# Date.new(2019, 1, 1),
|
1766
|
+
# Date.new(2016, 3, 1),
|
1767
|
+
# Date.new(2018, 8, 1),
|
1768
|
+
# Date.new(2019, 1, 1)
|
1769
|
+
# ],
|
1770
|
+
# "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
|
1771
|
+
# }
|
1772
|
+
# ).sort("country", "date")
|
1773
|
+
# pop2.collect
|
1774
|
+
# # =>
|
1775
|
+
# # shape: (6, 3)
|
1776
|
+
# # ┌─────────────┬────────────┬────────────┐
|
1777
|
+
# # │ country ┆ date ┆ population │
|
1778
|
+
# # │ --- ┆ --- ┆ --- │
|
1779
|
+
# # │ str ┆ date ┆ f64 │
|
1780
|
+
# # ╞═════════════╪════════════╪════════════╡
|
1781
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 │
|
1782
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 │
|
1783
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 │
|
1784
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
|
1785
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
|
1786
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
|
1787
|
+
# # └─────────────┴────────────┴────────────┘
|
1788
|
+
#
|
1789
|
+
# @example
|
1790
|
+
# pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
|
1791
|
+
# # =>
|
1792
|
+
# # shape: (6, 4)
|
1793
|
+
# # ┌─────────────┬────────────┬────────────┬──────┐
|
1794
|
+
# # │ country ┆ date ┆ population ┆ gdp │
|
1795
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1796
|
+
# # │ str ┆ date ┆ f64 ┆ i64 │
|
1797
|
+
# # ╞═════════════╪════════════╪════════════╪══════╡
|
1798
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1799
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1800
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1801
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
|
1802
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
|
1803
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
|
1804
|
+
# # └─────────────┴────────────┴────────────┴──────┘
|
1525
1805
|
def join_asof(
|
1526
1806
|
other,
|
1527
1807
|
left_on: nil,
|
@@ -1534,7 +1814,8 @@ module Polars
|
|
1534
1814
|
suffix: "_right",
|
1535
1815
|
tolerance: nil,
|
1536
1816
|
allow_parallel: true,
|
1537
|
-
force_parallel: false
|
1817
|
+
force_parallel: false,
|
1818
|
+
coalesce: true
|
1538
1819
|
)
|
1539
1820
|
if !other.is_a?(LazyFrame)
|
1540
1821
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1589,7 +1870,8 @@ module Polars
|
|
1589
1870
|
suffix,
|
1590
1871
|
strategy,
|
1591
1872
|
tolerance_num,
|
1592
|
-
tolerance_str
|
1873
|
+
tolerance_str,
|
1874
|
+
coalesce
|
1593
1875
|
)
|
1594
1876
|
)
|
1595
1877
|
end
|
@@ -1609,6 +1891,12 @@ module Polars
|
|
1609
1891
|
# Join strategy.
|
1610
1892
|
# @param suffix [String]
|
1611
1893
|
# Suffix to append to columns with a duplicate name.
|
1894
|
+
# @param validate ['m:m', 'm:1', '1:m', '1:1']
|
1895
|
+
# Checks if join is of specified type.
|
1896
|
+
# * *many_to_many* - “m:m”: default, does not result in checks
|
1897
|
+
# * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
|
1898
|
+
# * *one_to_many* - “1:m”: check if join keys are unique in left dataset
|
1899
|
+
# * *many_to_one* - “m:1”: check if join keys are unique in right dataset
|
1612
1900
|
# @param join_nulls [Boolean]
|
1613
1901
|
# Join on null values. By default null values will never produce matches.
|
1614
1902
|
# @param allow_parallel [Boolean]
|
@@ -1617,6 +1905,12 @@ module Polars
|
|
1617
1905
|
# @param force_parallel [Boolean]
|
1618
1906
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1619
1907
|
# the join in parallel.
|
1908
|
+
# @param coalesce [Boolean]
|
1909
|
+
# Coalescing behavior (merging of join columns).
|
1910
|
+
# - nil: -> join specific.
|
1911
|
+
# - true: -> Always coalesce join columns.
|
1912
|
+
# - false: -> Never coalesce join columns.
|
1913
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1620
1914
|
#
|
1621
1915
|
# @return [LazyFrame]
|
1622
1916
|
#
|
@@ -1706,9 +2000,11 @@ module Polars
|
|
1706
2000
|
on: nil,
|
1707
2001
|
how: "inner",
|
1708
2002
|
suffix: "_right",
|
2003
|
+
validate: "m:m",
|
1709
2004
|
join_nulls: false,
|
1710
2005
|
allow_parallel: true,
|
1711
|
-
force_parallel: false
|
2006
|
+
force_parallel: false,
|
2007
|
+
coalesce: nil
|
1712
2008
|
)
|
1713
2009
|
if !other.is_a?(LazyFrame)
|
1714
2010
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1719,7 +2015,7 @@ module Polars
|
|
1719
2015
|
elsif how == "cross"
|
1720
2016
|
return _from_rbldf(
|
1721
2017
|
_ldf.join(
|
1722
|
-
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
2018
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
|
1723
2019
|
)
|
1724
2020
|
)
|
1725
2021
|
end
|
@@ -1745,6 +2041,8 @@ module Polars
|
|
1745
2041
|
join_nulls,
|
1746
2042
|
how,
|
1747
2043
|
suffix,
|
2044
|
+
validate,
|
2045
|
+
coalesce
|
1748
2046
|
)
|
1749
2047
|
)
|
1750
2048
|
end
|
@@ -1879,6 +2177,55 @@ module Polars
|
|
1879
2177
|
# - List of column names.
|
1880
2178
|
#
|
1881
2179
|
# @return [LazyFrame]
|
2180
|
+
#
|
2181
|
+
# @example Drop a single column by passing the name of that column.
|
2182
|
+
# lf = Polars::LazyFrame.new(
|
2183
|
+
# {
|
2184
|
+
# "foo" => [1, 2, 3],
|
2185
|
+
# "bar" => [6.0, 7.0, 8.0],
|
2186
|
+
# "ham" => ["a", "b", "c"]
|
2187
|
+
# }
|
2188
|
+
# )
|
2189
|
+
# lf.drop("ham").collect
|
2190
|
+
# # =>
|
2191
|
+
# # shape: (3, 2)
|
2192
|
+
# # ┌─────┬─────┐
|
2193
|
+
# # │ foo ┆ bar │
|
2194
|
+
# # │ --- ┆ --- │
|
2195
|
+
# # │ i64 ┆ f64 │
|
2196
|
+
# # ╞═════╪═════╡
|
2197
|
+
# # │ 1 ┆ 6.0 │
|
2198
|
+
# # │ 2 ┆ 7.0 │
|
2199
|
+
# # │ 3 ┆ 8.0 │
|
2200
|
+
# # └─────┴─────┘
|
2201
|
+
#
|
2202
|
+
# @example Drop multiple columns by passing a selector.
|
2203
|
+
# lf.drop(Polars.cs.numeric).collect
|
2204
|
+
# # =>
|
2205
|
+
# # shape: (3, 1)
|
2206
|
+
# # ┌─────┐
|
2207
|
+
# # │ ham │
|
2208
|
+
# # │ --- │
|
2209
|
+
# # │ str │
|
2210
|
+
# # ╞═════╡
|
2211
|
+
# # │ a │
|
2212
|
+
# # │ b │
|
2213
|
+
# # │ c │
|
2214
|
+
# # └─────┘
|
2215
|
+
#
|
2216
|
+
# @example Use positional arguments to drop multiple columns.
|
2217
|
+
# lf.drop("foo", "ham").collect
|
2218
|
+
# # =>
|
2219
|
+
# # shape: (3, 1)
|
2220
|
+
# # ┌─────┐
|
2221
|
+
# # │ bar │
|
2222
|
+
# # │ --- │
|
2223
|
+
# # │ f64 │
|
2224
|
+
# # ╞═════╡
|
2225
|
+
# # │ 6.0 │
|
2226
|
+
# # │ 7.0 │
|
2227
|
+
# # │ 8.0 │
|
2228
|
+
# # └─────┘
|
1882
2229
|
def drop(*columns)
|
1883
2230
|
drop_cols = Utils._expand_selectors(self, *columns)
|
1884
2231
|
_from_rbldf(_ldf.drop(drop_cols))
|
@@ -1888,17 +2235,80 @@ module Polars
|
|
1888
2235
|
#
|
1889
2236
|
# @param mapping [Hash]
|
1890
2237
|
# Key value pairs that map from old name to new name.
|
2238
|
+
# @param strict [Boolean]
|
2239
|
+
# Validate that all column names exist in the current schema,
|
2240
|
+
# and throw an exception if any do not. (Note that this parameter
|
2241
|
+
# is a no-op when passing a function to `mapping`).
|
1891
2242
|
#
|
1892
2243
|
# @return [LazyFrame]
|
1893
|
-
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
2244
|
+
#
|
2245
|
+
# @example
|
2246
|
+
# lf = Polars::LazyFrame.new(
|
2247
|
+
# {
|
2248
|
+
# "foo" => [1, 2, 3],
|
2249
|
+
# "bar" => [6, 7, 8],
|
2250
|
+
# "ham" => ["a", "b", "c"]
|
2251
|
+
# }
|
2252
|
+
# )
|
2253
|
+
# lf.rename({"foo" => "apple"}).collect
|
2254
|
+
# # =>
|
2255
|
+
# # shape: (3, 3)
|
2256
|
+
# # ┌───────┬─────┬─────┐
|
2257
|
+
# # │ apple ┆ bar ┆ ham │
|
2258
|
+
# # │ --- ┆ --- ┆ --- │
|
2259
|
+
# # │ i64 ┆ i64 ┆ str │
|
2260
|
+
# # ╞═══════╪═════╪═════╡
|
2261
|
+
# # │ 1 ┆ 6 ┆ a │
|
2262
|
+
# # │ 2 ┆ 7 ┆ b │
|
2263
|
+
# # │ 3 ┆ 8 ┆ c │
|
2264
|
+
# # └───────┴─────┴─────┘
|
2265
|
+
#
|
2266
|
+
# @example
|
2267
|
+
# lf.rename(->(column_name) { "c" + column_name[1..] }).collect
|
2268
|
+
# # =>
|
2269
|
+
# # shape: (3, 3)
|
2270
|
+
# # ┌─────┬─────┬─────┐
|
2271
|
+
# # │ coo ┆ car ┆ cam │
|
2272
|
+
# # │ --- ┆ --- ┆ --- │
|
2273
|
+
# # │ i64 ┆ i64 ┆ str │
|
2274
|
+
# # ╞═════╪═════╪═════╡
|
2275
|
+
# # │ 1 ┆ 6 ┆ a │
|
2276
|
+
# # │ 2 ┆ 7 ┆ b │
|
2277
|
+
# # │ 3 ┆ 8 ┆ c │
|
2278
|
+
# # └─────┴─────┴─────┘
|
2279
|
+
def rename(mapping, strict: true)
|
2280
|
+
if mapping.respond_to?(:call)
|
2281
|
+
select(F.all.name.map(&mapping))
|
2282
|
+
else
|
2283
|
+
existing = mapping.keys
|
2284
|
+
_new = mapping.values
|
2285
|
+
_from_rbldf(_ldf.rename(existing, _new, strict))
|
2286
|
+
end
|
1897
2287
|
end
|
1898
2288
|
|
1899
2289
|
# Reverse the DataFrame.
|
1900
2290
|
#
|
1901
2291
|
# @return [LazyFrame]
|
2292
|
+
#
|
2293
|
+
# @example
|
2294
|
+
# lf = Polars::LazyFrame.new(
|
2295
|
+
# {
|
2296
|
+
# "key" => ["a", "b", "c"],
|
2297
|
+
# "val" => [1, 2, 3]
|
2298
|
+
# }
|
2299
|
+
# )
|
2300
|
+
# lf.reverse.collect
|
2301
|
+
# # =>
|
2302
|
+
# # shape: (3, 2)
|
2303
|
+
# # ┌─────┬─────┐
|
2304
|
+
# # │ key ┆ val │
|
2305
|
+
# # │ --- ┆ --- │
|
2306
|
+
# # │ str ┆ i64 │
|
2307
|
+
# # ╞═════╪═════╡
|
2308
|
+
# # │ c ┆ 3 │
|
2309
|
+
# # │ b ┆ 2 │
|
2310
|
+
# # │ a ┆ 1 │
|
2311
|
+
# # └─────┴─────┘
|
1902
2312
|
def reverse
|
1903
2313
|
_from_rbldf(_ldf.reverse)
|
1904
2314
|
end
|
@@ -2048,8 +2458,43 @@ module Polars
|
|
2048
2458
|
# Consider using the {#fetch} operation if you only want to test your
|
2049
2459
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2050
2460
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2461
|
+
#
|
2462
|
+
# @example
|
2463
|
+
# lf = Polars::LazyFrame.new(
|
2464
|
+
# {
|
2465
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2466
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2467
|
+
# }
|
2468
|
+
# )
|
2469
|
+
# lf.limit.collect
|
2470
|
+
# # =>
|
2471
|
+
# # shape: (5, 2)
|
2472
|
+
# # ┌─────┬─────┐
|
2473
|
+
# # │ a ┆ b │
|
2474
|
+
# # │ --- ┆ --- │
|
2475
|
+
# # │ i64 ┆ i64 │
|
2476
|
+
# # ╞═════╪═════╡
|
2477
|
+
# # │ 1 ┆ 7 │
|
2478
|
+
# # │ 2 ┆ 8 │
|
2479
|
+
# # │ 3 ┆ 9 │
|
2480
|
+
# # │ 4 ┆ 10 │
|
2481
|
+
# # │ 5 ┆ 11 │
|
2482
|
+
# # └─────┴─────┘
|
2483
|
+
#
|
2484
|
+
# @example
|
2485
|
+
# lf.limit(2).collect
|
2486
|
+
# # =>
|
2487
|
+
# # shape: (2, 2)
|
2488
|
+
# # ┌─────┬─────┐
|
2489
|
+
# # │ a ┆ b │
|
2490
|
+
# # │ --- ┆ --- │
|
2491
|
+
# # │ i64 ┆ i64 │
|
2492
|
+
# # ╞═════╪═════╡
|
2493
|
+
# # │ 1 ┆ 7 │
|
2494
|
+
# # │ 2 ┆ 8 │
|
2495
|
+
# # └─────┴─────┘
|
2051
2496
|
def limit(n = 5)
|
2052
|
-
head(
|
2497
|
+
head(n)
|
2053
2498
|
end
|
2054
2499
|
|
2055
2500
|
# Get the first `n` rows.
|
@@ -2063,6 +2508,41 @@ module Polars
|
|
2063
2508
|
# Consider using the {#fetch} operation if you only want to test your
|
2064
2509
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2065
2510
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2511
|
+
#
|
2512
|
+
# @example
|
2513
|
+
# lf = Polars::LazyFrame.new(
|
2514
|
+
# {
|
2515
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2516
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2517
|
+
# }
|
2518
|
+
# )
|
2519
|
+
# lf.head.collect
|
2520
|
+
# # =>
|
2521
|
+
# # shape: (5, 2)
|
2522
|
+
# # ┌─────┬─────┐
|
2523
|
+
# # │ a ┆ b │
|
2524
|
+
# # │ --- ┆ --- │
|
2525
|
+
# # │ i64 ┆ i64 │
|
2526
|
+
# # ╞═════╪═════╡
|
2527
|
+
# # │ 1 ┆ 7 │
|
2528
|
+
# # │ 2 ┆ 8 │
|
2529
|
+
# # │ 3 ┆ 9 │
|
2530
|
+
# # │ 4 ┆ 10 │
|
2531
|
+
# # │ 5 ┆ 11 │
|
2532
|
+
# # └─────┴─────┘
|
2533
|
+
#
|
2534
|
+
# @example
|
2535
|
+
# lf.head(2).collect
|
2536
|
+
# # =>
|
2537
|
+
# # shape: (2, 2)
|
2538
|
+
# # ┌─────┬─────┐
|
2539
|
+
# # │ a ┆ b │
|
2540
|
+
# # │ --- ┆ --- │
|
2541
|
+
# # │ i64 ┆ i64 │
|
2542
|
+
# # ╞═════╪═════╡
|
2543
|
+
# # │ 1 ┆ 7 │
|
2544
|
+
# # │ 2 ┆ 8 │
|
2545
|
+
# # └─────┴─────┘
|
2066
2546
|
def head(n = 5)
|
2067
2547
|
slice(0, n)
|
2068
2548
|
end
|
@@ -2073,6 +2553,41 @@ module Polars
|
|
2073
2553
|
# Number of rows.
|
2074
2554
|
#
|
2075
2555
|
# @return [LazyFrame]
|
2556
|
+
#
|
2557
|
+
# @example
|
2558
|
+
# lf = Polars::LazyFrame.new(
|
2559
|
+
# {
|
2560
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2561
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2562
|
+
# }
|
2563
|
+
# )
|
2564
|
+
# lf.tail.collect
|
2565
|
+
# # =>
|
2566
|
+
# # shape: (5, 2)
|
2567
|
+
# # ┌─────┬─────┐
|
2568
|
+
# # │ a ┆ b │
|
2569
|
+
# # │ --- ┆ --- │
|
2570
|
+
# # │ i64 ┆ i64 │
|
2571
|
+
# # ╞═════╪═════╡
|
2572
|
+
# # │ 2 ┆ 8 │
|
2573
|
+
# # │ 3 ┆ 9 │
|
2574
|
+
# # │ 4 ┆ 10 │
|
2575
|
+
# # │ 5 ┆ 11 │
|
2576
|
+
# # │ 6 ┆ 12 │
|
2577
|
+
# # └─────┴─────┘
|
2578
|
+
#
|
2579
|
+
# @example
|
2580
|
+
# lf.tail(2).collect
|
2581
|
+
# # =>
|
2582
|
+
# # shape: (2, 2)
|
2583
|
+
# # ┌─────┬─────┐
|
2584
|
+
# # │ a ┆ b │
|
2585
|
+
# # │ --- ┆ --- │
|
2586
|
+
# # │ i64 ┆ i64 │
|
2587
|
+
# # ╞═════╪═════╡
|
2588
|
+
# # │ 5 ┆ 11 │
|
2589
|
+
# # │ 6 ┆ 12 │
|
2590
|
+
# # └─────┴─────┘
|
2076
2591
|
def tail(n = 5)
|
2077
2592
|
_from_rbldf(_ldf.tail(n))
|
2078
2593
|
end
|
@@ -2080,6 +2595,24 @@ module Polars
|
|
2080
2595
|
# Get the last row of the DataFrame.
|
2081
2596
|
#
|
2082
2597
|
# @return [LazyFrame]
|
2598
|
+
#
|
2599
|
+
# @example
|
2600
|
+
# lf = Polars::LazyFrame.new(
|
2601
|
+
# {
|
2602
|
+
# "a" => [1, 5, 3],
|
2603
|
+
# "b" => [2, 4, 6]
|
2604
|
+
# }
|
2605
|
+
# )
|
2606
|
+
# lf.last.collect
|
2607
|
+
# # =>
|
2608
|
+
# # shape: (1, 2)
|
2609
|
+
# # ┌─────┬─────┐
|
2610
|
+
# # │ a ┆ b │
|
2611
|
+
# # │ --- ┆ --- │
|
2612
|
+
# # │ i64 ┆ i64 │
|
2613
|
+
# # ╞═════╪═════╡
|
2614
|
+
# # │ 3 ┆ 6 │
|
2615
|
+
# # └─────┴─────┘
|
2083
2616
|
def last
|
2084
2617
|
tail(1)
|
2085
2618
|
end
|
@@ -2087,6 +2620,24 @@ module Polars
|
|
2087
2620
|
# Get the first row of the DataFrame.
|
2088
2621
|
#
|
2089
2622
|
# @return [LazyFrame]
|
2623
|
+
#
|
2624
|
+
# @example
|
2625
|
+
# lf = Polars::LazyFrame.new(
|
2626
|
+
# {
|
2627
|
+
# "a" => [1, 5, 3],
|
2628
|
+
# "b" => [2, 4, 6]
|
2629
|
+
# }
|
2630
|
+
# )
|
2631
|
+
# lf.first.collect
|
2632
|
+
# # =>
|
2633
|
+
# # shape: (1, 2)
|
2634
|
+
# # ┌─────┬─────┐
|
2635
|
+
# # │ a ┆ b │
|
2636
|
+
# # │ --- ┆ --- │
|
2637
|
+
# # │ i64 ┆ i64 │
|
2638
|
+
# # ╞═════╪═════╡
|
2639
|
+
# # │ 1 ┆ 2 │
|
2640
|
+
# # └─────┴─────┘
|
2090
2641
|
def first
|
2091
2642
|
slice(0, 1)
|
2092
2643
|
end
|
@@ -2152,6 +2703,72 @@ module Polars
|
|
2152
2703
|
# Fill null values using the specified value or strategy.
|
2153
2704
|
#
|
2154
2705
|
# @return [LazyFrame]
|
2706
|
+
#
|
2707
|
+
# @example
|
2708
|
+
# lf = Polars::LazyFrame.new(
|
2709
|
+
# {
|
2710
|
+
# "a" => [1, 2, nil, 4],
|
2711
|
+
# "b" => [0.5, 4, nil, 13]
|
2712
|
+
# }
|
2713
|
+
# )
|
2714
|
+
# lf.fill_null(99).collect
|
2715
|
+
# # =>
|
2716
|
+
# # shape: (4, 2)
|
2717
|
+
# # ┌─────┬──────┐
|
2718
|
+
# # │ a ┆ b │
|
2719
|
+
# # │ --- ┆ --- │
|
2720
|
+
# # │ i64 ┆ f64 │
|
2721
|
+
# # ╞═════╪══════╡
|
2722
|
+
# # │ 1 ┆ 0.5 │
|
2723
|
+
# # │ 2 ┆ 4.0 │
|
2724
|
+
# # │ 99 ┆ 99.0 │
|
2725
|
+
# # │ 4 ┆ 13.0 │
|
2726
|
+
# # └─────┴──────┘
|
2727
|
+
#
|
2728
|
+
# @example
|
2729
|
+
# lf.fill_null(strategy: "forward").collect
|
2730
|
+
# # =>
|
2731
|
+
# # shape: (4, 2)
|
2732
|
+
# # ┌─────┬──────┐
|
2733
|
+
# # │ a ┆ b │
|
2734
|
+
# # │ --- ┆ --- │
|
2735
|
+
# # │ i64 ┆ f64 │
|
2736
|
+
# # ╞═════╪══════╡
|
2737
|
+
# # │ 1 ┆ 0.5 │
|
2738
|
+
# # │ 2 ┆ 4.0 │
|
2739
|
+
# # │ 2 ┆ 4.0 │
|
2740
|
+
# # │ 4 ┆ 13.0 │
|
2741
|
+
# # └─────┴──────┘
|
2742
|
+
#
|
2743
|
+
# @example
|
2744
|
+
# lf.fill_null(strategy: "max").collect
|
2745
|
+
# # =>
|
2746
|
+
# # shape: (4, 2)
|
2747
|
+
# # ┌─────┬──────┐
|
2748
|
+
# # │ a ┆ b │
|
2749
|
+
# # │ --- ┆ --- │
|
2750
|
+
# # │ i64 ┆ f64 │
|
2751
|
+
# # ╞═════╪══════╡
|
2752
|
+
# # │ 1 ┆ 0.5 │
|
2753
|
+
# # │ 2 ┆ 4.0 │
|
2754
|
+
# # │ 4 ┆ 13.0 │
|
2755
|
+
# # │ 4 ┆ 13.0 │
|
2756
|
+
# # └─────┴──────┘
|
2757
|
+
#
|
2758
|
+
# @example
|
2759
|
+
# lf.fill_null(strategy: "zero").collect
|
2760
|
+
# # =>
|
2761
|
+
# # shape: (4, 2)
|
2762
|
+
# # ┌─────┬──────┐
|
2763
|
+
# # │ a ┆ b │
|
2764
|
+
# # │ --- ┆ --- │
|
2765
|
+
# # │ i64 ┆ f64 │
|
2766
|
+
# # ╞═════╪══════╡
|
2767
|
+
# # │ 1 ┆ 0.5 │
|
2768
|
+
# # │ 2 ┆ 4.0 │
|
2769
|
+
# # │ 0 ┆ 0.0 │
|
2770
|
+
# # │ 4 ┆ 13.0 │
|
2771
|
+
# # └─────┴──────┘
|
2155
2772
|
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
|
2156
2773
|
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
2157
2774
|
end
|
@@ -2431,6 +3048,53 @@ module Polars
|
|
2431
3048
|
# Which of the duplicate rows to keep.
|
2432
3049
|
#
|
2433
3050
|
# @return [LazyFrame]
|
3051
|
+
#
|
3052
|
+
# @example
|
3053
|
+
# lf = Polars::LazyFrame.new(
|
3054
|
+
# {
|
3055
|
+
# "foo" => [1, 2, 3, 1],
|
3056
|
+
# "bar" => ["a", "a", "a", "a"],
|
3057
|
+
# "ham" => ["b", "b", "b", "b"]
|
3058
|
+
# }
|
3059
|
+
# )
|
3060
|
+
# lf.unique(maintain_order: true).collect
|
3061
|
+
# # =>
|
3062
|
+
# # shape: (3, 3)
|
3063
|
+
# # ┌─────┬─────┬─────┐
|
3064
|
+
# # │ foo ┆ bar ┆ ham │
|
3065
|
+
# # │ --- ┆ --- ┆ --- │
|
3066
|
+
# # │ i64 ┆ str ┆ str │
|
3067
|
+
# # ╞═════╪═════╪═════╡
|
3068
|
+
# # │ 1 ┆ a ┆ b │
|
3069
|
+
# # │ 2 ┆ a ┆ b │
|
3070
|
+
# # │ 3 ┆ a ┆ b │
|
3071
|
+
# # └─────┴─────┴─────┘
|
3072
|
+
#
|
3073
|
+
# @example
|
3074
|
+
# lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
|
3075
|
+
# # =>
|
3076
|
+
# # shape: (1, 3)
|
3077
|
+
# # ┌─────┬─────┬─────┐
|
3078
|
+
# # │ foo ┆ bar ┆ ham │
|
3079
|
+
# # │ --- ┆ --- ┆ --- │
|
3080
|
+
# # │ i64 ┆ str ┆ str │
|
3081
|
+
# # ╞═════╪═════╪═════╡
|
3082
|
+
# # │ 1 ┆ a ┆ b │
|
3083
|
+
# # └─────┴─────┴─────┘
|
3084
|
+
#
|
3085
|
+
# @example
|
3086
|
+
# lf.unique(keep: "last", maintain_order: true).collect
|
3087
|
+
# # =>
|
3088
|
+
# # shape: (3, 3)
|
3089
|
+
# # ┌─────┬─────┬─────┐
|
3090
|
+
# # │ foo ┆ bar ┆ ham │
|
3091
|
+
# # │ --- ┆ --- ┆ --- │
|
3092
|
+
# # │ i64 ┆ str ┆ str │
|
3093
|
+
# # ╞═════╪═════╪═════╡
|
3094
|
+
# # │ 2 ┆ a ┆ b │
|
3095
|
+
# # │ 3 ┆ a ┆ b │
|
3096
|
+
# # │ 1 ┆ a ┆ b │
|
3097
|
+
# # └─────┴─────┴─────┘
|
2434
3098
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2435
3099
|
if !subset.nil? && !subset.is_a?(::Array)
|
2436
3100
|
subset = [subset]
|
@@ -2504,7 +3168,7 @@ module Polars
|
|
2504
3168
|
# "c" => [2, 4, 6]
|
2505
3169
|
# }
|
2506
3170
|
# )
|
2507
|
-
# lf.unpivot(Polars
|
3171
|
+
# lf.unpivot(Polars.cs.numeric, index: "a").collect
|
2508
3172
|
# # =>
|
2509
3173
|
# # shape: (6, 3)
|
2510
3174
|
# # ┌─────┬──────────┬───────┐
|
@@ -2530,8 +3194,8 @@ module Polars
|
|
2530
3194
|
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2531
3195
|
end
|
2532
3196
|
|
2533
|
-
on = on.nil? ? [] : Utils.
|
2534
|
-
index = index.nil? ? [] : Utils.
|
3197
|
+
on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
|
3198
|
+
index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
|
2535
3199
|
|
2536
3200
|
_from_rbldf(
|
2537
3201
|
_ldf.unpivot(on, index, value_name, variable_name)
|