polars-df 0.14.0-aarch64-linux → 0.16.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE-THIRD-PARTY.txt +23495 -12923
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +9 -8
- data/lib/polars/functions.rb +0 -57
data/lib/polars/lazy_frame.rb
CHANGED
@@ -431,7 +431,9 @@ module Polars
|
|
431
431
|
projection_pushdown: true,
|
432
432
|
simplify_expression: true,
|
433
433
|
no_optimization: false,
|
434
|
-
slice_pushdown: true
|
434
|
+
slice_pushdown: true,
|
435
|
+
storage_options: nil,
|
436
|
+
retries: 2
|
435
437
|
)
|
436
438
|
lf = _set_sink_optimizations(
|
437
439
|
type_coercion: type_coercion,
|
@@ -460,6 +462,12 @@ module Polars
|
|
460
462
|
}
|
461
463
|
end
|
462
464
|
|
465
|
+
if storage_options&.any?
|
466
|
+
storage_options = storage_options.to_a
|
467
|
+
else
|
468
|
+
storage_options = nil
|
469
|
+
end
|
470
|
+
|
463
471
|
lf.sink_parquet(
|
464
472
|
path,
|
465
473
|
compression,
|
@@ -467,7 +475,9 @@ module Polars
|
|
467
475
|
statistics,
|
468
476
|
row_group_size,
|
469
477
|
data_pagesize_limit,
|
470
|
-
maintain_order
|
478
|
+
maintain_order,
|
479
|
+
storage_options,
|
480
|
+
retries
|
471
481
|
)
|
472
482
|
end
|
473
483
|
|
@@ -512,6 +522,10 @@ module Polars
|
|
512
522
|
slice_pushdown: true,
|
513
523
|
no_optimization: false
|
514
524
|
)
|
525
|
+
# TODO support storage options in Rust
|
526
|
+
storage_options = nil
|
527
|
+
retries = 2
|
528
|
+
|
515
529
|
lf = _set_sink_optimizations(
|
516
530
|
type_coercion: type_coercion,
|
517
531
|
predicate_pushdown: predicate_pushdown,
|
@@ -521,10 +535,18 @@ module Polars
|
|
521
535
|
no_optimization: no_optimization
|
522
536
|
)
|
523
537
|
|
538
|
+
if storage_options&.any?
|
539
|
+
storage_options = storage_options.to_a
|
540
|
+
else
|
541
|
+
storage_options = nil
|
542
|
+
end
|
543
|
+
|
524
544
|
lf.sink_ipc(
|
525
545
|
path,
|
526
546
|
compression,
|
527
|
-
maintain_order
|
547
|
+
maintain_order,
|
548
|
+
storage_options,
|
549
|
+
retries
|
528
550
|
)
|
529
551
|
end
|
530
552
|
|
@@ -692,7 +714,9 @@ module Polars
|
|
692
714
|
projection_pushdown: true,
|
693
715
|
simplify_expression: true,
|
694
716
|
slice_pushdown: true,
|
695
|
-
no_optimization: false
|
717
|
+
no_optimization: false,
|
718
|
+
storage_options: nil,
|
719
|
+
retries: 2
|
696
720
|
)
|
697
721
|
lf = _set_sink_optimizations(
|
698
722
|
type_coercion: type_coercion,
|
@@ -703,7 +727,13 @@ module Polars
|
|
703
727
|
no_optimization: no_optimization
|
704
728
|
)
|
705
729
|
|
706
|
-
|
730
|
+
if storage_options&.any?
|
731
|
+
storage_options = storage_options.to_a
|
732
|
+
else
|
733
|
+
storage_options = nil
|
734
|
+
end
|
735
|
+
|
736
|
+
lf.sink_json(path, maintain_order, storage_options, retries)
|
707
737
|
end
|
708
738
|
|
709
739
|
# @private
|
@@ -848,9 +878,70 @@ module Polars
|
|
848
878
|
_from_rbldf(_ldf.cache)
|
849
879
|
end
|
850
880
|
|
851
|
-
#
|
852
|
-
#
|
853
|
-
#
|
881
|
+
# Cast LazyFrame column(s) to the specified dtype(s).
|
882
|
+
#
|
883
|
+
# @param dtypes [Hash]
|
884
|
+
# Mapping of column names (or selector) to dtypes, or a single dtype
|
885
|
+
# to which all columns will be cast.
|
886
|
+
# @param strict [Boolean]
|
887
|
+
# Throw an error if a cast could not be done (for instance, due to an
|
888
|
+
# overflow).
|
889
|
+
#
|
890
|
+
# @return [LazyFrame]
|
891
|
+
#
|
892
|
+
# @example Cast specific frame columns to the specified dtypes:
|
893
|
+
# lf = Polars::LazyFrame.new(
|
894
|
+
# {
|
895
|
+
# "foo" => [1, 2, 3],
|
896
|
+
# "bar" => [6.0, 7.0, 8.0],
|
897
|
+
# "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
|
898
|
+
# }
|
899
|
+
# )
|
900
|
+
# lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
|
901
|
+
# # =>
|
902
|
+
# # shape: (3, 3)
|
903
|
+
# # ┌─────┬─────┬────────────┐
|
904
|
+
# # │ foo ┆ bar ┆ ham │
|
905
|
+
# # │ --- ┆ --- ┆ --- │
|
906
|
+
# # │ f32 ┆ u8 ┆ date │
|
907
|
+
# # ╞═════╪═════╪════════════╡
|
908
|
+
# # │ 1.0 ┆ 6 ┆ 2020-01-02 │
|
909
|
+
# # │ 2.0 ┆ 7 ┆ 2021-03-04 │
|
910
|
+
# # │ 3.0 ┆ 8 ┆ 2022-05-06 │
|
911
|
+
# # └─────┴─────┴────────────┘
|
912
|
+
#
|
913
|
+
# @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
|
914
|
+
# lf.cast({Polars::Date => Polars::Datetime}).collect
|
915
|
+
# # =>
|
916
|
+
# # shape: (3, 3)
|
917
|
+
# # ┌─────┬─────┬─────────────────────┐
|
918
|
+
# # │ foo ┆ bar ┆ ham │
|
919
|
+
# # │ --- ┆ --- ┆ --- │
|
920
|
+
# # │ i64 ┆ f64 ┆ datetime[μs] │
|
921
|
+
# # ╞═════╪═════╪═════════════════════╡
|
922
|
+
# # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
|
923
|
+
# # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
|
924
|
+
# # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
|
925
|
+
# # └─────┴─────┴─────────────────────┘
|
926
|
+
#
|
927
|
+
# @example Cast all frame columns to the specified dtype:
|
928
|
+
# lf.cast(Polars::String).collect.to_h(as_series: false)
|
929
|
+
# # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
|
930
|
+
def cast(dtypes, strict: true)
|
931
|
+
if !dtypes.is_a?(Hash)
|
932
|
+
return _from_rbldf(_ldf.cast_all(dtypes, strict))
|
933
|
+
end
|
934
|
+
|
935
|
+
cast_map = {}
|
936
|
+
dtypes.each do |c, dtype|
|
937
|
+
dtype = Utils.parse_into_dtype(dtype)
|
938
|
+
cast_map.merge!(
|
939
|
+
c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
|
940
|
+
)
|
941
|
+
end
|
942
|
+
|
943
|
+
_from_rbldf(_ldf.cast(cast_map, strict))
|
944
|
+
end
|
854
945
|
|
855
946
|
# Create an empty copy of the current LazyFrame.
|
856
947
|
#
|
@@ -1520,8 +1611,197 @@ module Polars
|
|
1520
1611
|
# @param force_parallel [Boolean]
|
1521
1612
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1522
1613
|
# the join in parallel.
|
1614
|
+
# @param coalesce [Boolean]
|
1615
|
+
# Coalescing behavior (merging of join columns).
|
1616
|
+
# - true: -> Always coalesce join columns.
|
1617
|
+
# - false: -> Never coalesce join columns.
|
1618
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1523
1619
|
#
|
1524
1620
|
# @return [LazyFrame]
|
1621
|
+
#
|
1622
|
+
# @example
|
1623
|
+
# gdp = Polars::LazyFrame.new(
|
1624
|
+
# {
|
1625
|
+
# "date" => Polars.date_range(
|
1626
|
+
# Date.new(2016, 1, 1),
|
1627
|
+
# Date.new(2020, 1, 1),
|
1628
|
+
# "1y",
|
1629
|
+
# eager: true
|
1630
|
+
# ),
|
1631
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827]
|
1632
|
+
# }
|
1633
|
+
# )
|
1634
|
+
# gdp.collect
|
1635
|
+
# # =>
|
1636
|
+
# # shape: (5, 2)
|
1637
|
+
# # ┌────────────┬──────┐
|
1638
|
+
# # │ date ┆ gdp │
|
1639
|
+
# # │ --- ┆ --- │
|
1640
|
+
# # │ date ┆ i64 │
|
1641
|
+
# # ╞════════════╪══════╡
|
1642
|
+
# # │ 2016-01-01 ┆ 4164 │
|
1643
|
+
# # │ 2017-01-01 ┆ 4411 │
|
1644
|
+
# # │ 2018-01-01 ┆ 4566 │
|
1645
|
+
# # │ 2019-01-01 ┆ 4696 │
|
1646
|
+
# # │ 2020-01-01 ┆ 4827 │
|
1647
|
+
# # └────────────┴──────┘
|
1648
|
+
#
|
1649
|
+
# @example
|
1650
|
+
# population = Polars::LazyFrame.new(
|
1651
|
+
# {
|
1652
|
+
# "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
|
1653
|
+
# "population" => [82.19, 82.66, 83.12]
|
1654
|
+
# }
|
1655
|
+
# ).sort("date")
|
1656
|
+
# population.collect
|
1657
|
+
# # =>
|
1658
|
+
# # shape: (3, 2)
|
1659
|
+
# # ┌────────────┬────────────┐
|
1660
|
+
# # │ date ┆ population │
|
1661
|
+
# # │ --- ┆ --- │
|
1662
|
+
# # │ date ┆ f64 │
|
1663
|
+
# # ╞════════════╪════════════╡
|
1664
|
+
# # │ 2016-03-01 ┆ 82.19 │
|
1665
|
+
# # │ 2018-08-01 ┆ 82.66 │
|
1666
|
+
# # │ 2019-01-01 ┆ 83.12 │
|
1667
|
+
# # └────────────┴────────────┘
|
1668
|
+
#
|
1669
|
+
# @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
|
1670
|
+
# population.join_asof(gdp, on: "date", strategy: "backward").collect
|
1671
|
+
# # =>
|
1672
|
+
# # shape: (3, 3)
|
1673
|
+
# # ┌────────────┬────────────┬──────┐
|
1674
|
+
# # │ date ┆ population ┆ gdp │
|
1675
|
+
# # │ --- ┆ --- ┆ --- │
|
1676
|
+
# # │ date ┆ f64 ┆ i64 │
|
1677
|
+
# # ╞════════════╪════════════╪══════╡
|
1678
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1679
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
|
1680
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1681
|
+
# # └────────────┴────────────┴──────┘
|
1682
|
+
#
|
1683
|
+
# @example
|
1684
|
+
# population.join_asof(
|
1685
|
+
# gdp, on: "date", strategy: "backward", coalesce: false
|
1686
|
+
# ).collect
|
1687
|
+
# # =>
|
1688
|
+
# # shape: (3, 4)
|
1689
|
+
# # ┌────────────┬────────────┬────────────┬──────┐
|
1690
|
+
# # │ date ┆ population ┆ date_right ┆ gdp │
|
1691
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1692
|
+
# # │ date ┆ f64 ┆ date ┆ i64 │
|
1693
|
+
# # ╞════════════╪════════════╪════════════╪══════╡
|
1694
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
|
1695
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
|
1696
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
|
1697
|
+
# # └────────────┴────────────┴────────────┴──────┘
|
1698
|
+
#
|
1699
|
+
# @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
|
1700
|
+
# population.join_asof(gdp, on: "date", strategy: "forward").collect
|
1701
|
+
# # =>
|
1702
|
+
# # shape: (3, 3)
|
1703
|
+
# # ┌────────────┬────────────┬──────┐
|
1704
|
+
# # │ date ┆ population ┆ gdp │
|
1705
|
+
# # │ --- ┆ --- ┆ --- │
|
1706
|
+
# # │ date ┆ f64 ┆ i64 │
|
1707
|
+
# # ╞════════════╪════════════╪══════╡
|
1708
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
|
1709
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1710
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1711
|
+
# # └────────────┴────────────┴──────┘
|
1712
|
+
#
|
1713
|
+
# @example
|
1714
|
+
# population.join_asof(gdp, on: "date", strategy: "nearest").collect
|
1715
|
+
# # =>
|
1716
|
+
# # shape: (3, 3)
|
1717
|
+
# # ┌────────────┬────────────┬──────┐
|
1718
|
+
# # │ date ┆ population ┆ gdp │
|
1719
|
+
# # │ --- ┆ --- ┆ --- │
|
1720
|
+
# # │ date ┆ f64 ┆ i64 │
|
1721
|
+
# # ╞════════════╪════════════╪══════╡
|
1722
|
+
# # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1723
|
+
# # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1724
|
+
# # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1725
|
+
# # └────────────┴────────────┴──────┘
|
1726
|
+
#
|
1727
|
+
# @example
|
1728
|
+
# gdp_dates = Polars.date_range(
|
1729
|
+
# Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
|
1730
|
+
# )
|
1731
|
+
# gdp2 = Polars::LazyFrame.new(
|
1732
|
+
# {
|
1733
|
+
# "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
|
1734
|
+
# "date" => Polars.concat([gdp_dates, gdp_dates]),
|
1735
|
+
# "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
|
1736
|
+
# }
|
1737
|
+
# ).sort("country", "date")
|
1738
|
+
# gdp2.collect
|
1739
|
+
# # =>
|
1740
|
+
# # shape: (10, 3)
|
1741
|
+
# # ┌─────────────┬────────────┬──────┐
|
1742
|
+
# # │ country ┆ date ┆ gdp │
|
1743
|
+
# # │ --- ┆ --- ┆ --- │
|
1744
|
+
# # │ str ┆ date ┆ i64 │
|
1745
|
+
# # ╞═════════════╪════════════╪══════╡
|
1746
|
+
# # │ Germany ┆ 2016-01-01 ┆ 4164 │
|
1747
|
+
# # │ Germany ┆ 2017-01-01 ┆ 4411 │
|
1748
|
+
# # │ Germany ┆ 2018-01-01 ┆ 4566 │
|
1749
|
+
# # │ Germany ┆ 2019-01-01 ┆ 4696 │
|
1750
|
+
# # │ Germany ┆ 2020-01-01 ┆ 4827 │
|
1751
|
+
# # │ Netherlands ┆ 2016-01-01 ┆ 784 │
|
1752
|
+
# # │ Netherlands ┆ 2017-01-01 ┆ 833 │
|
1753
|
+
# # │ Netherlands ┆ 2018-01-01 ┆ 914 │
|
1754
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 910 │
|
1755
|
+
# # │ Netherlands ┆ 2020-01-01 ┆ 909 │
|
1756
|
+
# # └─────────────┴────────────┴──────┘
|
1757
|
+
#
|
1758
|
+
# @example
|
1759
|
+
# pop2 = Polars::LazyFrame.new(
|
1760
|
+
# {
|
1761
|
+
# "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
|
1762
|
+
# "date" => [
|
1763
|
+
# Date.new(2016, 3, 1),
|
1764
|
+
# Date.new(2018, 8, 1),
|
1765
|
+
# Date.new(2019, 1, 1),
|
1766
|
+
# Date.new(2016, 3, 1),
|
1767
|
+
# Date.new(2018, 8, 1),
|
1768
|
+
# Date.new(2019, 1, 1)
|
1769
|
+
# ],
|
1770
|
+
# "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
|
1771
|
+
# }
|
1772
|
+
# ).sort("country", "date")
|
1773
|
+
# pop2.collect
|
1774
|
+
# # =>
|
1775
|
+
# # shape: (6, 3)
|
1776
|
+
# # ┌─────────────┬────────────┬────────────┐
|
1777
|
+
# # │ country ┆ date ┆ population │
|
1778
|
+
# # │ --- ┆ --- ┆ --- │
|
1779
|
+
# # │ str ┆ date ┆ f64 │
|
1780
|
+
# # ╞═════════════╪════════════╪════════════╡
|
1781
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 │
|
1782
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 │
|
1783
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 │
|
1784
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
|
1785
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
|
1786
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
|
1787
|
+
# # └─────────────┴────────────┴────────────┘
|
1788
|
+
#
|
1789
|
+
# @example
|
1790
|
+
# pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
|
1791
|
+
# # =>
|
1792
|
+
# # shape: (6, 4)
|
1793
|
+
# # ┌─────────────┬────────────┬────────────┬──────┐
|
1794
|
+
# # │ country ┆ date ┆ population ┆ gdp │
|
1795
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1796
|
+
# # │ str ┆ date ┆ f64 ┆ i64 │
|
1797
|
+
# # ╞═════════════╪════════════╪════════════╪══════╡
|
1798
|
+
# # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
|
1799
|
+
# # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
|
1800
|
+
# # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
|
1801
|
+
# # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
|
1802
|
+
# # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
|
1803
|
+
# # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
|
1804
|
+
# # └─────────────┴────────────┴────────────┴──────┘
|
1525
1805
|
def join_asof(
|
1526
1806
|
other,
|
1527
1807
|
left_on: nil,
|
@@ -1534,7 +1814,8 @@ module Polars
|
|
1534
1814
|
suffix: "_right",
|
1535
1815
|
tolerance: nil,
|
1536
1816
|
allow_parallel: true,
|
1537
|
-
force_parallel: false
|
1817
|
+
force_parallel: false,
|
1818
|
+
coalesce: true
|
1538
1819
|
)
|
1539
1820
|
if !other.is_a?(LazyFrame)
|
1540
1821
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1589,7 +1870,8 @@ module Polars
|
|
1589
1870
|
suffix,
|
1590
1871
|
strategy,
|
1591
1872
|
tolerance_num,
|
1592
|
-
tolerance_str
|
1873
|
+
tolerance_str,
|
1874
|
+
coalesce
|
1593
1875
|
)
|
1594
1876
|
)
|
1595
1877
|
end
|
@@ -1609,6 +1891,12 @@ module Polars
|
|
1609
1891
|
# Join strategy.
|
1610
1892
|
# @param suffix [String]
|
1611
1893
|
# Suffix to append to columns with a duplicate name.
|
1894
|
+
# @param validate ['m:m', 'm:1', '1:m', '1:1']
|
1895
|
+
# Checks if join is of specified type.
|
1896
|
+
# * *many_to_many* - “m:m”: default, does not result in checks
|
1897
|
+
# * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
|
1898
|
+
# * *one_to_many* - “1:m”: check if join keys are unique in left dataset
|
1899
|
+
# * *many_to_one* - “m:1”: check if join keys are unique in right dataset
|
1612
1900
|
# @param join_nulls [Boolean]
|
1613
1901
|
# Join on null values. By default null values will never produce matches.
|
1614
1902
|
# @param allow_parallel [Boolean]
|
@@ -1617,6 +1905,12 @@ module Polars
|
|
1617
1905
|
# @param force_parallel [Boolean]
|
1618
1906
|
# Force the physical plan to evaluate the computation of both DataFrames up to
|
1619
1907
|
# the join in parallel.
|
1908
|
+
# @param coalesce [Boolean]
|
1909
|
+
# Coalescing behavior (merging of join columns).
|
1910
|
+
# - nil: -> join specific.
|
1911
|
+
# - true: -> Always coalesce join columns.
|
1912
|
+
# - false: -> Never coalesce join columns.
|
1913
|
+
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1620
1914
|
#
|
1621
1915
|
# @return [LazyFrame]
|
1622
1916
|
#
|
@@ -1706,9 +2000,11 @@ module Polars
|
|
1706
2000
|
on: nil,
|
1707
2001
|
how: "inner",
|
1708
2002
|
suffix: "_right",
|
2003
|
+
validate: "m:m",
|
1709
2004
|
join_nulls: false,
|
1710
2005
|
allow_parallel: true,
|
1711
|
-
force_parallel: false
|
2006
|
+
force_parallel: false,
|
2007
|
+
coalesce: nil
|
1712
2008
|
)
|
1713
2009
|
if !other.is_a?(LazyFrame)
|
1714
2010
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1719,7 +2015,7 @@ module Polars
|
|
1719
2015
|
elsif how == "cross"
|
1720
2016
|
return _from_rbldf(
|
1721
2017
|
_ldf.join(
|
1722
|
-
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
2018
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
|
1723
2019
|
)
|
1724
2020
|
)
|
1725
2021
|
end
|
@@ -1745,6 +2041,8 @@ module Polars
|
|
1745
2041
|
join_nulls,
|
1746
2042
|
how,
|
1747
2043
|
suffix,
|
2044
|
+
validate,
|
2045
|
+
coalesce
|
1748
2046
|
)
|
1749
2047
|
)
|
1750
2048
|
end
|
@@ -1879,6 +2177,55 @@ module Polars
|
|
1879
2177
|
# - List of column names.
|
1880
2178
|
#
|
1881
2179
|
# @return [LazyFrame]
|
2180
|
+
#
|
2181
|
+
# @example Drop a single column by passing the name of that column.
|
2182
|
+
# lf = Polars::LazyFrame.new(
|
2183
|
+
# {
|
2184
|
+
# "foo" => [1, 2, 3],
|
2185
|
+
# "bar" => [6.0, 7.0, 8.0],
|
2186
|
+
# "ham" => ["a", "b", "c"]
|
2187
|
+
# }
|
2188
|
+
# )
|
2189
|
+
# lf.drop("ham").collect
|
2190
|
+
# # =>
|
2191
|
+
# # shape: (3, 2)
|
2192
|
+
# # ┌─────┬─────┐
|
2193
|
+
# # │ foo ┆ bar │
|
2194
|
+
# # │ --- ┆ --- │
|
2195
|
+
# # │ i64 ┆ f64 │
|
2196
|
+
# # ╞═════╪═════╡
|
2197
|
+
# # │ 1 ┆ 6.0 │
|
2198
|
+
# # │ 2 ┆ 7.0 │
|
2199
|
+
# # │ 3 ┆ 8.0 │
|
2200
|
+
# # └─────┴─────┘
|
2201
|
+
#
|
2202
|
+
# @example Drop multiple columns by passing a selector.
|
2203
|
+
# lf.drop(Polars.cs.numeric).collect
|
2204
|
+
# # =>
|
2205
|
+
# # shape: (3, 1)
|
2206
|
+
# # ┌─────┐
|
2207
|
+
# # │ ham │
|
2208
|
+
# # │ --- │
|
2209
|
+
# # │ str │
|
2210
|
+
# # ╞═════╡
|
2211
|
+
# # │ a │
|
2212
|
+
# # │ b │
|
2213
|
+
# # │ c │
|
2214
|
+
# # └─────┘
|
2215
|
+
#
|
2216
|
+
# @example Use positional arguments to drop multiple columns.
|
2217
|
+
# lf.drop("foo", "ham").collect
|
2218
|
+
# # =>
|
2219
|
+
# # shape: (3, 1)
|
2220
|
+
# # ┌─────┐
|
2221
|
+
# # │ bar │
|
2222
|
+
# # │ --- │
|
2223
|
+
# # │ f64 │
|
2224
|
+
# # ╞═════╡
|
2225
|
+
# # │ 6.0 │
|
2226
|
+
# # │ 7.0 │
|
2227
|
+
# # │ 8.0 │
|
2228
|
+
# # └─────┘
|
1882
2229
|
def drop(*columns)
|
1883
2230
|
drop_cols = Utils._expand_selectors(self, *columns)
|
1884
2231
|
_from_rbldf(_ldf.drop(drop_cols))
|
@@ -1888,17 +2235,80 @@ module Polars
|
|
1888
2235
|
#
|
1889
2236
|
# @param mapping [Hash]
|
1890
2237
|
# Key value pairs that map from old name to new name.
|
2238
|
+
# @param strict [Boolean]
|
2239
|
+
# Validate that all column names exist in the current schema,
|
2240
|
+
# and throw an exception if any do not. (Note that this parameter
|
2241
|
+
# is a no-op when passing a function to `mapping`).
|
1891
2242
|
#
|
1892
2243
|
# @return [LazyFrame]
|
1893
|
-
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
2244
|
+
#
|
2245
|
+
# @example
|
2246
|
+
# lf = Polars::LazyFrame.new(
|
2247
|
+
# {
|
2248
|
+
# "foo" => [1, 2, 3],
|
2249
|
+
# "bar" => [6, 7, 8],
|
2250
|
+
# "ham" => ["a", "b", "c"]
|
2251
|
+
# }
|
2252
|
+
# )
|
2253
|
+
# lf.rename({"foo" => "apple"}).collect
|
2254
|
+
# # =>
|
2255
|
+
# # shape: (3, 3)
|
2256
|
+
# # ┌───────┬─────┬─────┐
|
2257
|
+
# # │ apple ┆ bar ┆ ham │
|
2258
|
+
# # │ --- ┆ --- ┆ --- │
|
2259
|
+
# # │ i64 ┆ i64 ┆ str │
|
2260
|
+
# # ╞═══════╪═════╪═════╡
|
2261
|
+
# # │ 1 ┆ 6 ┆ a │
|
2262
|
+
# # │ 2 ┆ 7 ┆ b │
|
2263
|
+
# # │ 3 ┆ 8 ┆ c │
|
2264
|
+
# # └───────┴─────┴─────┘
|
2265
|
+
#
|
2266
|
+
# @example
|
2267
|
+
# lf.rename(->(column_name) { "c" + column_name[1..] }).collect
|
2268
|
+
# # =>
|
2269
|
+
# # shape: (3, 3)
|
2270
|
+
# # ┌─────┬─────┬─────┐
|
2271
|
+
# # │ coo ┆ car ┆ cam │
|
2272
|
+
# # │ --- ┆ --- ┆ --- │
|
2273
|
+
# # │ i64 ┆ i64 ┆ str │
|
2274
|
+
# # ╞═════╪═════╪═════╡
|
2275
|
+
# # │ 1 ┆ 6 ┆ a │
|
2276
|
+
# # │ 2 ┆ 7 ┆ b │
|
2277
|
+
# # │ 3 ┆ 8 ┆ c │
|
2278
|
+
# # └─────┴─────┴─────┘
|
2279
|
+
def rename(mapping, strict: true)
|
2280
|
+
if mapping.respond_to?(:call)
|
2281
|
+
select(F.all.name.map(&mapping))
|
2282
|
+
else
|
2283
|
+
existing = mapping.keys
|
2284
|
+
_new = mapping.values
|
2285
|
+
_from_rbldf(_ldf.rename(existing, _new, strict))
|
2286
|
+
end
|
1897
2287
|
end
|
1898
2288
|
|
1899
2289
|
# Reverse the DataFrame.
|
1900
2290
|
#
|
1901
2291
|
# @return [LazyFrame]
|
2292
|
+
#
|
2293
|
+
# @example
|
2294
|
+
# lf = Polars::LazyFrame.new(
|
2295
|
+
# {
|
2296
|
+
# "key" => ["a", "b", "c"],
|
2297
|
+
# "val" => [1, 2, 3]
|
2298
|
+
# }
|
2299
|
+
# )
|
2300
|
+
# lf.reverse.collect
|
2301
|
+
# # =>
|
2302
|
+
# # shape: (3, 2)
|
2303
|
+
# # ┌─────┬─────┐
|
2304
|
+
# # │ key ┆ val │
|
2305
|
+
# # │ --- ┆ --- │
|
2306
|
+
# # │ str ┆ i64 │
|
2307
|
+
# # ╞═════╪═════╡
|
2308
|
+
# # │ c ┆ 3 │
|
2309
|
+
# # │ b ┆ 2 │
|
2310
|
+
# # │ a ┆ 1 │
|
2311
|
+
# # └─────┴─────┘
|
1902
2312
|
def reverse
|
1903
2313
|
_from_rbldf(_ldf.reverse)
|
1904
2314
|
end
|
@@ -2048,8 +2458,43 @@ module Polars
|
|
2048
2458
|
# Consider using the {#fetch} operation if you only want to test your
|
2049
2459
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2050
2460
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2461
|
+
#
|
2462
|
+
# @example
|
2463
|
+
# lf = Polars::LazyFrame.new(
|
2464
|
+
# {
|
2465
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2466
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2467
|
+
# }
|
2468
|
+
# )
|
2469
|
+
# lf.limit.collect
|
2470
|
+
# # =>
|
2471
|
+
# # shape: (5, 2)
|
2472
|
+
# # ┌─────┬─────┐
|
2473
|
+
# # │ a ┆ b │
|
2474
|
+
# # │ --- ┆ --- │
|
2475
|
+
# # │ i64 ┆ i64 │
|
2476
|
+
# # ╞═════╪═════╡
|
2477
|
+
# # │ 1 ┆ 7 │
|
2478
|
+
# # │ 2 ┆ 8 │
|
2479
|
+
# # │ 3 ┆ 9 │
|
2480
|
+
# # │ 4 ┆ 10 │
|
2481
|
+
# # │ 5 ┆ 11 │
|
2482
|
+
# # └─────┴─────┘
|
2483
|
+
#
|
2484
|
+
# @example
|
2485
|
+
# lf.limit(2).collect
|
2486
|
+
# # =>
|
2487
|
+
# # shape: (2, 2)
|
2488
|
+
# # ┌─────┬─────┐
|
2489
|
+
# # │ a ┆ b │
|
2490
|
+
# # │ --- ┆ --- │
|
2491
|
+
# # │ i64 ┆ i64 │
|
2492
|
+
# # ╞═════╪═════╡
|
2493
|
+
# # │ 1 ┆ 7 │
|
2494
|
+
# # │ 2 ┆ 8 │
|
2495
|
+
# # └─────┴─────┘
|
2051
2496
|
def limit(n = 5)
|
2052
|
-
head(
|
2497
|
+
head(n)
|
2053
2498
|
end
|
2054
2499
|
|
2055
2500
|
# Get the first `n` rows.
|
@@ -2063,6 +2508,41 @@ module Polars
|
|
2063
2508
|
# Consider using the {#fetch} operation if you only want to test your
|
2064
2509
|
# query. The {#fetch} operation will load the first `n` rows at the scan
|
2065
2510
|
# level, whereas the {#head}/{#limit} are applied at the end.
|
2511
|
+
#
|
2512
|
+
# @example
|
2513
|
+
# lf = Polars::LazyFrame.new(
|
2514
|
+
# {
|
2515
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2516
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2517
|
+
# }
|
2518
|
+
# )
|
2519
|
+
# lf.head.collect
|
2520
|
+
# # =>
|
2521
|
+
# # shape: (5, 2)
|
2522
|
+
# # ┌─────┬─────┐
|
2523
|
+
# # │ a ┆ b │
|
2524
|
+
# # │ --- ┆ --- │
|
2525
|
+
# # │ i64 ┆ i64 │
|
2526
|
+
# # ╞═════╪═════╡
|
2527
|
+
# # │ 1 ┆ 7 │
|
2528
|
+
# # │ 2 ┆ 8 │
|
2529
|
+
# # │ 3 ┆ 9 │
|
2530
|
+
# # │ 4 ┆ 10 │
|
2531
|
+
# # │ 5 ┆ 11 │
|
2532
|
+
# # └─────┴─────┘
|
2533
|
+
#
|
2534
|
+
# @example
|
2535
|
+
# lf.head(2).collect
|
2536
|
+
# # =>
|
2537
|
+
# # shape: (2, 2)
|
2538
|
+
# # ┌─────┬─────┐
|
2539
|
+
# # │ a ┆ b │
|
2540
|
+
# # │ --- ┆ --- │
|
2541
|
+
# # │ i64 ┆ i64 │
|
2542
|
+
# # ╞═════╪═════╡
|
2543
|
+
# # │ 1 ┆ 7 │
|
2544
|
+
# # │ 2 ┆ 8 │
|
2545
|
+
# # └─────┴─────┘
|
2066
2546
|
def head(n = 5)
|
2067
2547
|
slice(0, n)
|
2068
2548
|
end
|
@@ -2073,6 +2553,41 @@ module Polars
|
|
2073
2553
|
# Number of rows.
|
2074
2554
|
#
|
2075
2555
|
# @return [LazyFrame]
|
2556
|
+
#
|
2557
|
+
# @example
|
2558
|
+
# lf = Polars::LazyFrame.new(
|
2559
|
+
# {
|
2560
|
+
# "a" => [1, 2, 3, 4, 5, 6],
|
2561
|
+
# "b" => [7, 8, 9, 10, 11, 12]
|
2562
|
+
# }
|
2563
|
+
# )
|
2564
|
+
# lf.tail.collect
|
2565
|
+
# # =>
|
2566
|
+
# # shape: (5, 2)
|
2567
|
+
# # ┌─────┬─────┐
|
2568
|
+
# # │ a ┆ b │
|
2569
|
+
# # │ --- ┆ --- │
|
2570
|
+
# # │ i64 ┆ i64 │
|
2571
|
+
# # ╞═════╪═════╡
|
2572
|
+
# # │ 2 ┆ 8 │
|
2573
|
+
# # │ 3 ┆ 9 │
|
2574
|
+
# # │ 4 ┆ 10 │
|
2575
|
+
# # │ 5 ┆ 11 │
|
2576
|
+
# # │ 6 ┆ 12 │
|
2577
|
+
# # └─────┴─────┘
|
2578
|
+
#
|
2579
|
+
# @example
|
2580
|
+
# lf.tail(2).collect
|
2581
|
+
# # =>
|
2582
|
+
# # shape: (2, 2)
|
2583
|
+
# # ┌─────┬─────┐
|
2584
|
+
# # │ a ┆ b │
|
2585
|
+
# # │ --- ┆ --- │
|
2586
|
+
# # │ i64 ┆ i64 │
|
2587
|
+
# # ╞═════╪═════╡
|
2588
|
+
# # │ 5 ┆ 11 │
|
2589
|
+
# # │ 6 ┆ 12 │
|
2590
|
+
# # └─────┴─────┘
|
2076
2591
|
def tail(n = 5)
|
2077
2592
|
_from_rbldf(_ldf.tail(n))
|
2078
2593
|
end
|
@@ -2080,6 +2595,24 @@ module Polars
|
|
2080
2595
|
# Get the last row of the DataFrame.
|
2081
2596
|
#
|
2082
2597
|
# @return [LazyFrame]
|
2598
|
+
#
|
2599
|
+
# @example
|
2600
|
+
# lf = Polars::LazyFrame.new(
|
2601
|
+
# {
|
2602
|
+
# "a" => [1, 5, 3],
|
2603
|
+
# "b" => [2, 4, 6]
|
2604
|
+
# }
|
2605
|
+
# )
|
2606
|
+
# lf.last.collect
|
2607
|
+
# # =>
|
2608
|
+
# # shape: (1, 2)
|
2609
|
+
# # ┌─────┬─────┐
|
2610
|
+
# # │ a ┆ b │
|
2611
|
+
# # │ --- ┆ --- │
|
2612
|
+
# # │ i64 ┆ i64 │
|
2613
|
+
# # ╞═════╪═════╡
|
2614
|
+
# # │ 3 ┆ 6 │
|
2615
|
+
# # └─────┴─────┘
|
2083
2616
|
def last
|
2084
2617
|
tail(1)
|
2085
2618
|
end
|
@@ -2087,6 +2620,24 @@ module Polars
|
|
2087
2620
|
# Get the first row of the DataFrame.
|
2088
2621
|
#
|
2089
2622
|
# @return [LazyFrame]
|
2623
|
+
#
|
2624
|
+
# @example
|
2625
|
+
# lf = Polars::LazyFrame.new(
|
2626
|
+
# {
|
2627
|
+
# "a" => [1, 5, 3],
|
2628
|
+
# "b" => [2, 4, 6]
|
2629
|
+
# }
|
2630
|
+
# )
|
2631
|
+
# lf.first.collect
|
2632
|
+
# # =>
|
2633
|
+
# # shape: (1, 2)
|
2634
|
+
# # ┌─────┬─────┐
|
2635
|
+
# # │ a ┆ b │
|
2636
|
+
# # │ --- ┆ --- │
|
2637
|
+
# # │ i64 ┆ i64 │
|
2638
|
+
# # ╞═════╪═════╡
|
2639
|
+
# # │ 1 ┆ 2 │
|
2640
|
+
# # └─────┴─────┘
|
2090
2641
|
def first
|
2091
2642
|
slice(0, 1)
|
2092
2643
|
end
|
@@ -2152,6 +2703,72 @@ module Polars
|
|
2152
2703
|
# Fill null values using the specified value or strategy.
|
2153
2704
|
#
|
2154
2705
|
# @return [LazyFrame]
|
2706
|
+
#
|
2707
|
+
# @example
|
2708
|
+
# lf = Polars::LazyFrame.new(
|
2709
|
+
# {
|
2710
|
+
# "a" => [1, 2, nil, 4],
|
2711
|
+
# "b" => [0.5, 4, nil, 13]
|
2712
|
+
# }
|
2713
|
+
# )
|
2714
|
+
# lf.fill_null(99).collect
|
2715
|
+
# # =>
|
2716
|
+
# # shape: (4, 2)
|
2717
|
+
# # ┌─────┬──────┐
|
2718
|
+
# # │ a ┆ b │
|
2719
|
+
# # │ --- ┆ --- │
|
2720
|
+
# # │ i64 ┆ f64 │
|
2721
|
+
# # ╞═════╪══════╡
|
2722
|
+
# # │ 1 ┆ 0.5 │
|
2723
|
+
# # │ 2 ┆ 4.0 │
|
2724
|
+
# # │ 99 ┆ 99.0 │
|
2725
|
+
# # │ 4 ┆ 13.0 │
|
2726
|
+
# # └─────┴──────┘
|
2727
|
+
#
|
2728
|
+
# @example
|
2729
|
+
# lf.fill_null(strategy: "forward").collect
|
2730
|
+
# # =>
|
2731
|
+
# # shape: (4, 2)
|
2732
|
+
# # ┌─────┬──────┐
|
2733
|
+
# # │ a ┆ b │
|
2734
|
+
# # │ --- ┆ --- │
|
2735
|
+
# # │ i64 ┆ f64 │
|
2736
|
+
# # ╞═════╪══════╡
|
2737
|
+
# # │ 1 ┆ 0.5 │
|
2738
|
+
# # │ 2 ┆ 4.0 │
|
2739
|
+
# # │ 2 ┆ 4.0 │
|
2740
|
+
# # │ 4 ┆ 13.0 │
|
2741
|
+
# # └─────┴──────┘
|
2742
|
+
#
|
2743
|
+
# @example
|
2744
|
+
# lf.fill_null(strategy: "max").collect
|
2745
|
+
# # =>
|
2746
|
+
# # shape: (4, 2)
|
2747
|
+
# # ┌─────┬──────┐
|
2748
|
+
# # │ a ┆ b │
|
2749
|
+
# # │ --- ┆ --- │
|
2750
|
+
# # │ i64 ┆ f64 │
|
2751
|
+
# # ╞═════╪══════╡
|
2752
|
+
# # │ 1 ┆ 0.5 │
|
2753
|
+
# # │ 2 ┆ 4.0 │
|
2754
|
+
# # │ 4 ┆ 13.0 │
|
2755
|
+
# # │ 4 ┆ 13.0 │
|
2756
|
+
# # └─────┴──────┘
|
2757
|
+
#
|
2758
|
+
# @example
|
2759
|
+
# lf.fill_null(strategy: "zero").collect
|
2760
|
+
# # =>
|
2761
|
+
# # shape: (4, 2)
|
2762
|
+
# # ┌─────┬──────┐
|
2763
|
+
# # │ a ┆ b │
|
2764
|
+
# # │ --- ┆ --- │
|
2765
|
+
# # │ i64 ┆ f64 │
|
2766
|
+
# # ╞═════╪══════╡
|
2767
|
+
# # │ 1 ┆ 0.5 │
|
2768
|
+
# # │ 2 ┆ 4.0 │
|
2769
|
+
# # │ 0 ┆ 0.0 │
|
2770
|
+
# # │ 4 ┆ 13.0 │
|
2771
|
+
# # └─────┴──────┘
|
2155
2772
|
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
|
2156
2773
|
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
2157
2774
|
end
|
@@ -2431,6 +3048,53 @@ module Polars
|
|
2431
3048
|
# Which of the duplicate rows to keep.
|
2432
3049
|
#
|
2433
3050
|
# @return [LazyFrame]
|
3051
|
+
#
|
3052
|
+
# @example
|
3053
|
+
# lf = Polars::LazyFrame.new(
|
3054
|
+
# {
|
3055
|
+
# "foo" => [1, 2, 3, 1],
|
3056
|
+
# "bar" => ["a", "a", "a", "a"],
|
3057
|
+
# "ham" => ["b", "b", "b", "b"]
|
3058
|
+
# }
|
3059
|
+
# )
|
3060
|
+
# lf.unique(maintain_order: true).collect
|
3061
|
+
# # =>
|
3062
|
+
# # shape: (3, 3)
|
3063
|
+
# # ┌─────┬─────┬─────┐
|
3064
|
+
# # │ foo ┆ bar ┆ ham │
|
3065
|
+
# # │ --- ┆ --- ┆ --- │
|
3066
|
+
# # │ i64 ┆ str ┆ str │
|
3067
|
+
# # ╞═════╪═════╪═════╡
|
3068
|
+
# # │ 1 ┆ a ┆ b │
|
3069
|
+
# # │ 2 ┆ a ┆ b │
|
3070
|
+
# # │ 3 ┆ a ┆ b │
|
3071
|
+
# # └─────┴─────┴─────┘
|
3072
|
+
#
|
3073
|
+
# @example
|
3074
|
+
# lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
|
3075
|
+
# # =>
|
3076
|
+
# # shape: (1, 3)
|
3077
|
+
# # ┌─────┬─────┬─────┐
|
3078
|
+
# # │ foo ┆ bar ┆ ham │
|
3079
|
+
# # │ --- ┆ --- ┆ --- │
|
3080
|
+
# # │ i64 ┆ str ┆ str │
|
3081
|
+
# # ╞═════╪═════╪═════╡
|
3082
|
+
# # │ 1 ┆ a ┆ b │
|
3083
|
+
# # └─────┴─────┴─────┘
|
3084
|
+
#
|
3085
|
+
# @example
|
3086
|
+
# lf.unique(keep: "last", maintain_order: true).collect
|
3087
|
+
# # =>
|
3088
|
+
# # shape: (3, 3)
|
3089
|
+
# # ┌─────┬─────┬─────┐
|
3090
|
+
# # │ foo ┆ bar ┆ ham │
|
3091
|
+
# # │ --- ┆ --- ┆ --- │
|
3092
|
+
# # │ i64 ┆ str ┆ str │
|
3093
|
+
# # ╞═════╪═════╪═════╡
|
3094
|
+
# # │ 2 ┆ a ┆ b │
|
3095
|
+
# # │ 3 ┆ a ┆ b │
|
3096
|
+
# # │ 1 ┆ a ┆ b │
|
3097
|
+
# # └─────┴─────┴─────┘
|
2434
3098
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2435
3099
|
if !subset.nil? && !subset.is_a?(::Array)
|
2436
3100
|
subset = [subset]
|
@@ -2504,7 +3168,7 @@ module Polars
|
|
2504
3168
|
# "c" => [2, 4, 6]
|
2505
3169
|
# }
|
2506
3170
|
# )
|
2507
|
-
# lf.unpivot(Polars
|
3171
|
+
# lf.unpivot(Polars.cs.numeric, index: "a").collect
|
2508
3172
|
# # =>
|
2509
3173
|
# # shape: (6, 3)
|
2510
3174
|
# # ┌─────┬──────────┬───────┐
|
@@ -2530,8 +3194,8 @@ module Polars
|
|
2530
3194
|
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2531
3195
|
end
|
2532
3196
|
|
2533
|
-
on = on.nil? ? [] : Utils.
|
2534
|
-
index = index.nil? ? [] : Utils.
|
3197
|
+
on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
|
3198
|
+
index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
|
2535
3199
|
|
2536
3200
|
_from_rbldf(
|
2537
3201
|
_ldf.unpivot(on, index, value_name, variable_name)
|