polars-df 0.14.0-x64-mingw-ucrt → 0.16.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +24369 -14580
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +9 -8
  45. data/lib/polars/functions.rb +0 -57
@@ -431,7 +431,9 @@ module Polars
431
431
  projection_pushdown: true,
432
432
  simplify_expression: true,
433
433
  no_optimization: false,
434
- slice_pushdown: true
434
+ slice_pushdown: true,
435
+ storage_options: nil,
436
+ retries: 2
435
437
  )
436
438
  lf = _set_sink_optimizations(
437
439
  type_coercion: type_coercion,
@@ -460,6 +462,12 @@ module Polars
460
462
  }
461
463
  end
462
464
 
465
+ if storage_options&.any?
466
+ storage_options = storage_options.to_a
467
+ else
468
+ storage_options = nil
469
+ end
470
+
463
471
  lf.sink_parquet(
464
472
  path,
465
473
  compression,
@@ -467,7 +475,9 @@ module Polars
467
475
  statistics,
468
476
  row_group_size,
469
477
  data_pagesize_limit,
470
- maintain_order
478
+ maintain_order,
479
+ storage_options,
480
+ retries
471
481
  )
472
482
  end
473
483
 
@@ -512,6 +522,10 @@ module Polars
512
522
  slice_pushdown: true,
513
523
  no_optimization: false
514
524
  )
525
+ # TODO support storage options in Rust
526
+ storage_options = nil
527
+ retries = 2
528
+
515
529
  lf = _set_sink_optimizations(
516
530
  type_coercion: type_coercion,
517
531
  predicate_pushdown: predicate_pushdown,
@@ -521,10 +535,18 @@ module Polars
521
535
  no_optimization: no_optimization
522
536
  )
523
537
 
538
+ if storage_options&.any?
539
+ storage_options = storage_options.to_a
540
+ else
541
+ storage_options = nil
542
+ end
543
+
524
544
  lf.sink_ipc(
525
545
  path,
526
546
  compression,
527
- maintain_order
547
+ maintain_order,
548
+ storage_options,
549
+ retries
528
550
  )
529
551
  end
530
552
 
@@ -692,7 +714,9 @@ module Polars
692
714
  projection_pushdown: true,
693
715
  simplify_expression: true,
694
716
  slice_pushdown: true,
695
- no_optimization: false
717
+ no_optimization: false,
718
+ storage_options: nil,
719
+ retries: 2
696
720
  )
697
721
  lf = _set_sink_optimizations(
698
722
  type_coercion: type_coercion,
@@ -703,7 +727,13 @@ module Polars
703
727
  no_optimization: no_optimization
704
728
  )
705
729
 
706
- lf.sink_json(path, maintain_order)
730
+ if storage_options&.any?
731
+ storage_options = storage_options.to_a
732
+ else
733
+ storage_options = nil
734
+ end
735
+
736
+ lf.sink_json(path, maintain_order, storage_options, retries)
707
737
  end
708
738
 
709
739
  # @private
@@ -848,9 +878,70 @@ module Polars
848
878
  _from_rbldf(_ldf.cache)
849
879
  end
850
880
 
851
- # TODO
852
- # def cast
853
- # end
881
+ # Cast LazyFrame column(s) to the specified dtype(s).
882
+ #
883
+ # @param dtypes [Hash]
884
+ # Mapping of column names (or selector) to dtypes, or a single dtype
885
+ # to which all columns will be cast.
886
+ # @param strict [Boolean]
887
+ # Throw an error if a cast could not be done (for instance, due to an
888
+ # overflow).
889
+ #
890
+ # @return [LazyFrame]
891
+ #
892
+ # @example Cast specific frame columns to the specified dtypes:
893
+ # lf = Polars::LazyFrame.new(
894
+ # {
895
+ # "foo" => [1, 2, 3],
896
+ # "bar" => [6.0, 7.0, 8.0],
897
+ # "ham" => [Date.new(2020, 1, 2), Date.new(2021, 3, 4), Date.new(2022, 5, 6)]
898
+ # }
899
+ # )
900
+ # lf.cast({"foo" => Polars::Float32, "bar" => Polars::UInt8}).collect
901
+ # # =>
902
+ # # shape: (3, 3)
903
+ # # ┌─────┬─────┬────────────┐
904
+ # # │ foo ┆ bar ┆ ham │
905
+ # # │ --- ┆ --- ┆ --- │
906
+ # # │ f32 ┆ u8 ┆ date │
907
+ # # ╞═════╪═════╪════════════╡
908
+ # # │ 1.0 ┆ 6 ┆ 2020-01-02 │
909
+ # # │ 2.0 ┆ 7 ┆ 2021-03-04 │
910
+ # # │ 3.0 ┆ 8 ┆ 2022-05-06 │
911
+ # # └─────┴─────┴────────────┘
912
+ #
913
+ # @example Cast all frame columns matching one dtype (or dtype group) to another dtype:
914
+ # lf.cast({Polars::Date => Polars::Datetime}).collect
915
+ # # =>
916
+ # # shape: (3, 3)
917
+ # # ┌─────┬─────┬─────────────────────┐
918
+ # # │ foo ┆ bar ┆ ham │
919
+ # # │ --- ┆ --- ┆ --- │
920
+ # # │ i64 ┆ f64 ┆ datetime[μs] │
921
+ # # ╞═════╪═════╪═════════════════════╡
922
+ # # │ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
923
+ # # │ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
924
+ # # │ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
925
+ # # └─────┴─────┴─────────────────────┘
926
+ #
927
+ # @example Cast all frame columns to the specified dtype:
928
+ # lf.cast(Polars::String).collect.to_h(as_series: false)
929
+ # # => {"foo"=>["1", "2", "3"], "bar"=>["6.0", "7.0", "8.0"], "ham"=>["2020-01-02", "2021-03-04", "2022-05-06"]}
930
+ def cast(dtypes, strict: true)
931
+ if !dtypes.is_a?(Hash)
932
+ return _from_rbldf(_ldf.cast_all(dtypes, strict))
933
+ end
934
+
935
+ cast_map = {}
936
+ dtypes.each do |c, dtype|
937
+ dtype = Utils.parse_into_dtype(dtype)
938
+ cast_map.merge!(
939
+ c.is_a?(::String) ? {c => dtype} : Utils.expand_selector(self, c).to_h { |x| [x, dtype] }
940
+ )
941
+ end
942
+
943
+ _from_rbldf(_ldf.cast(cast_map, strict))
944
+ end
854
945
 
855
946
  # Create an empty copy of the current LazyFrame.
856
947
  #
@@ -1520,8 +1611,197 @@ module Polars
1520
1611
  # @param force_parallel [Boolean]
1521
1612
  # Force the physical plan to evaluate the computation of both DataFrames up to
1522
1613
  # the join in parallel.
1614
+ # @param coalesce [Boolean]
1615
+ # Coalescing behavior (merging of join columns).
1616
+ # - true: -> Always coalesce join columns.
1617
+ # - false: -> Never coalesce join columns.
1618
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1523
1619
  #
1524
1620
  # @return [LazyFrame]
1621
+ #
1622
+ # @example
1623
+ # gdp = Polars::LazyFrame.new(
1624
+ # {
1625
+ # "date" => Polars.date_range(
1626
+ # Date.new(2016, 1, 1),
1627
+ # Date.new(2020, 1, 1),
1628
+ # "1y",
1629
+ # eager: true
1630
+ # ),
1631
+ # "gdp" => [4164, 4411, 4566, 4696, 4827]
1632
+ # }
1633
+ # )
1634
+ # gdp.collect
1635
+ # # =>
1636
+ # # shape: (5, 2)
1637
+ # # ┌────────────┬──────┐
1638
+ # # │ date ┆ gdp │
1639
+ # # │ --- ┆ --- │
1640
+ # # │ date ┆ i64 │
1641
+ # # ╞════════════╪══════╡
1642
+ # # │ 2016-01-01 ┆ 4164 │
1643
+ # # │ 2017-01-01 ┆ 4411 │
1644
+ # # │ 2018-01-01 ┆ 4566 │
1645
+ # # │ 2019-01-01 ┆ 4696 │
1646
+ # # │ 2020-01-01 ┆ 4827 │
1647
+ # # └────────────┴──────┘
1648
+ #
1649
+ # @example
1650
+ # population = Polars::LazyFrame.new(
1651
+ # {
1652
+ # "date" => [Date.new(2016, 3, 1), Date.new(2018, 8, 1), Date.new(2019, 1, 1)],
1653
+ # "population" => [82.19, 82.66, 83.12]
1654
+ # }
1655
+ # ).sort("date")
1656
+ # population.collect
1657
+ # # =>
1658
+ # # shape: (3, 2)
1659
+ # # ┌────────────┬────────────┐
1660
+ # # │ date ┆ population │
1661
+ # # │ --- ┆ --- │
1662
+ # # │ date ┆ f64 │
1663
+ # # ╞════════════╪════════════╡
1664
+ # # │ 2016-03-01 ┆ 82.19 │
1665
+ # # │ 2018-08-01 ┆ 82.66 │
1666
+ # # │ 2019-01-01 ┆ 83.12 │
1667
+ # # └────────────┴────────────┘
1668
+ #
1669
+ # @example Note how the dates don't quite match. If we join them using `join_asof` and `strategy: "backward"`, then each date from `population` which doesn't have an exact match is matched with the closest earlier date from `gdp`:
1670
+ # population.join_asof(gdp, on: "date", strategy: "backward").collect
1671
+ # # =>
1672
+ # # shape: (3, 3)
1673
+ # # ┌────────────┬────────────┬──────┐
1674
+ # # │ date ┆ population ┆ gdp │
1675
+ # # │ --- ┆ --- ┆ --- │
1676
+ # # │ date ┆ f64 ┆ i64 │
1677
+ # # ╞════════════╪════════════╪══════╡
1678
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1679
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4566 │
1680
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1681
+ # # └────────────┴────────────┴──────┘
1682
+ #
1683
+ # @example
1684
+ # population.join_asof(
1685
+ # gdp, on: "date", strategy: "backward", coalesce: false
1686
+ # ).collect
1687
+ # # =>
1688
+ # # shape: (3, 4)
1689
+ # # ┌────────────┬────────────┬────────────┬──────┐
1690
+ # # │ date ┆ population ┆ date_right ┆ gdp │
1691
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1692
+ # # │ date ┆ f64 ┆ date ┆ i64 │
1693
+ # # ╞════════════╪════════════╪════════════╪══════╡
1694
+ # # │ 2016-03-01 ┆ 82.19 ┆ 2016-01-01 ┆ 4164 │
1695
+ # # │ 2018-08-01 ┆ 82.66 ┆ 2018-01-01 ┆ 4566 │
1696
+ # # │ 2019-01-01 ┆ 83.12 ┆ 2019-01-01 ┆ 4696 │
1697
+ # # └────────────┴────────────┴────────────┴──────┘
1698
+ #
1699
+ # @example If we instead use `strategy: "forward"`, then each date from `population` which doesn't have an exact match is matched with the closest later date from `gdp`:
1700
+ # population.join_asof(gdp, on: "date", strategy: "forward").collect
1701
+ # # =>
1702
+ # # shape: (3, 3)
1703
+ # # ┌────────────┬────────────┬──────┐
1704
+ # # │ date ┆ population ┆ gdp │
1705
+ # # │ --- ┆ --- ┆ --- │
1706
+ # # │ date ┆ f64 ┆ i64 │
1707
+ # # ╞════════════╪════════════╪══════╡
1708
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4411 │
1709
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1710
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1711
+ # # └────────────┴────────────┴──────┘
1712
+ #
1713
+ # @example
1714
+ # population.join_asof(gdp, on: "date", strategy: "nearest").collect
1715
+ # # =>
1716
+ # # shape: (3, 3)
1717
+ # # ┌────────────┬────────────┬──────┐
1718
+ # # │ date ┆ population ┆ gdp │
1719
+ # # │ --- ┆ --- ┆ --- │
1720
+ # # │ date ┆ f64 ┆ i64 │
1721
+ # # ╞════════════╪════════════╪══════╡
1722
+ # # │ 2016-03-01 ┆ 82.19 ┆ 4164 │
1723
+ # # │ 2018-08-01 ┆ 82.66 ┆ 4696 │
1724
+ # # │ 2019-01-01 ┆ 83.12 ┆ 4696 │
1725
+ # # └────────────┴────────────┴──────┘
1726
+ #
1727
+ # @example
1728
+ # gdp_dates = Polars.date_range(
1729
+ # Date.new(2016, 1, 1), Date.new(2020, 1, 1), "1y", eager: true
1730
+ # )
1731
+ # gdp2 = Polars::LazyFrame.new(
1732
+ # {
1733
+ # "country" => ["Germany"] * 5 + ["Netherlands"] * 5,
1734
+ # "date" => Polars.concat([gdp_dates, gdp_dates]),
1735
+ # "gdp" => [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909]
1736
+ # }
1737
+ # ).sort("country", "date")
1738
+ # gdp2.collect
1739
+ # # =>
1740
+ # # shape: (10, 3)
1741
+ # # ┌─────────────┬────────────┬──────┐
1742
+ # # │ country ┆ date ┆ gdp │
1743
+ # # │ --- ┆ --- ┆ --- │
1744
+ # # │ str ┆ date ┆ i64 │
1745
+ # # ╞═════════════╪════════════╪══════╡
1746
+ # # │ Germany ┆ 2016-01-01 ┆ 4164 │
1747
+ # # │ Germany ┆ 2017-01-01 ┆ 4411 │
1748
+ # # │ Germany ┆ 2018-01-01 ┆ 4566 │
1749
+ # # │ Germany ┆ 2019-01-01 ┆ 4696 │
1750
+ # # │ Germany ┆ 2020-01-01 ┆ 4827 │
1751
+ # # │ Netherlands ┆ 2016-01-01 ┆ 784 │
1752
+ # # │ Netherlands ┆ 2017-01-01 ┆ 833 │
1753
+ # # │ Netherlands ┆ 2018-01-01 ┆ 914 │
1754
+ # # │ Netherlands ┆ 2019-01-01 ┆ 910 │
1755
+ # # │ Netherlands ┆ 2020-01-01 ┆ 909 │
1756
+ # # └─────────────┴────────────┴──────┘
1757
+ #
1758
+ # @example
1759
+ # pop2 = Polars::LazyFrame.new(
1760
+ # {
1761
+ # "country" => ["Germany"] * 3 + ["Netherlands"] * 3,
1762
+ # "date" => [
1763
+ # Date.new(2016, 3, 1),
1764
+ # Date.new(2018, 8, 1),
1765
+ # Date.new(2019, 1, 1),
1766
+ # Date.new(2016, 3, 1),
1767
+ # Date.new(2018, 8, 1),
1768
+ # Date.new(2019, 1, 1)
1769
+ # ],
1770
+ # "population" => [82.19, 82.66, 83.12, 17.11, 17.32, 17.40]
1771
+ # }
1772
+ # ).sort("country", "date")
1773
+ # pop2.collect
1774
+ # # =>
1775
+ # # shape: (6, 3)
1776
+ # # ┌─────────────┬────────────┬────────────┐
1777
+ # # │ country ┆ date ┆ population │
1778
+ # # │ --- ┆ --- ┆ --- │
1779
+ # # │ str ┆ date ┆ f64 │
1780
+ # # ╞═════════════╪════════════╪════════════╡
1781
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 │
1782
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 │
1783
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 │
1784
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 │
1785
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 │
1786
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 │
1787
+ # # └─────────────┴────────────┴────────────┘
1788
+ #
1789
+ # @example
1790
+ # pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
1791
+ # # =>
1792
+ # # shape: (6, 4)
1793
+ # # ┌─────────────┬────────────┬────────────┬──────┐
1794
+ # # │ country ┆ date ┆ population ┆ gdp │
1795
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1796
+ # # │ str ┆ date ┆ f64 ┆ i64 │
1797
+ # # ╞═════════════╪════════════╪════════════╪══════╡
1798
+ # # │ Germany ┆ 2016-03-01 ┆ 82.19 ┆ 4164 │
1799
+ # # │ Germany ┆ 2018-08-01 ┆ 82.66 ┆ 4696 │
1800
+ # # │ Germany ┆ 2019-01-01 ┆ 83.12 ┆ 4696 │
1801
+ # # │ Netherlands ┆ 2016-03-01 ┆ 17.11 ┆ 784 │
1802
+ # # │ Netherlands ┆ 2018-08-01 ┆ 17.32 ┆ 910 │
1803
+ # # │ Netherlands ┆ 2019-01-01 ┆ 17.4 ┆ 910 │
1804
+ # # └─────────────┴────────────┴────────────┴──────┘
1525
1805
  def join_asof(
1526
1806
  other,
1527
1807
  left_on: nil,
@@ -1534,7 +1814,8 @@ module Polars
1534
1814
  suffix: "_right",
1535
1815
  tolerance: nil,
1536
1816
  allow_parallel: true,
1537
- force_parallel: false
1817
+ force_parallel: false,
1818
+ coalesce: true
1538
1819
  )
1539
1820
  if !other.is_a?(LazyFrame)
1540
1821
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1589,7 +1870,8 @@ module Polars
1589
1870
  suffix,
1590
1871
  strategy,
1591
1872
  tolerance_num,
1592
- tolerance_str
1873
+ tolerance_str,
1874
+ coalesce
1593
1875
  )
1594
1876
  )
1595
1877
  end
@@ -1609,6 +1891,12 @@ module Polars
1609
1891
  # Join strategy.
1610
1892
  # @param suffix [String]
1611
1893
  # Suffix to append to columns with a duplicate name.
1894
+ # @param validate ['m:m', 'm:1', '1:m', '1:1']
1895
+ # Checks if join is of specified type.
1896
+ # * *many_to_many* - “m:m”: default, does not result in checks
1897
+ # * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
1898
+ # * *one_to_many* - “1:m”: check if join keys are unique in left dataset
1899
+ # * *many_to_one* - “m:1”: check if join keys are unique in right dataset
1612
1900
  # @param join_nulls [Boolean]
1613
1901
  # Join on null values. By default null values will never produce matches.
1614
1902
  # @param allow_parallel [Boolean]
@@ -1617,6 +1905,12 @@ module Polars
1617
1905
  # @param force_parallel [Boolean]
1618
1906
  # Force the physical plan to evaluate the computation of both DataFrames up to
1619
1907
  # the join in parallel.
1908
+ # @param coalesce [Boolean]
1909
+ # Coalescing behavior (merging of join columns).
1910
+ # - nil: -> join specific.
1911
+ # - true: -> Always coalesce join columns.
1912
+ # - false: -> Never coalesce join columns.
1913
+ # Note that joining on any other expressions than `col` will turn off coalescing.
1620
1914
  #
1621
1915
  # @return [LazyFrame]
1622
1916
  #
@@ -1706,9 +2000,11 @@ module Polars
1706
2000
  on: nil,
1707
2001
  how: "inner",
1708
2002
  suffix: "_right",
2003
+ validate: "m:m",
1709
2004
  join_nulls: false,
1710
2005
  allow_parallel: true,
1711
- force_parallel: false
2006
+ force_parallel: false,
2007
+ coalesce: nil
1712
2008
  )
1713
2009
  if !other.is_a?(LazyFrame)
1714
2010
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1719,7 +2015,7 @@ module Polars
1719
2015
  elsif how == "cross"
1720
2016
  return _from_rbldf(
1721
2017
  _ldf.join(
1722
- other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
2018
+ other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix, validate, coalesce
1723
2019
  )
1724
2020
  )
1725
2021
  end
@@ -1745,6 +2041,8 @@ module Polars
1745
2041
  join_nulls,
1746
2042
  how,
1747
2043
  suffix,
2044
+ validate,
2045
+ coalesce
1748
2046
  )
1749
2047
  )
1750
2048
  end
@@ -1879,6 +2177,55 @@ module Polars
1879
2177
  # - List of column names.
1880
2178
  #
1881
2179
  # @return [LazyFrame]
2180
+ #
2181
+ # @example Drop a single column by passing the name of that column.
2182
+ # lf = Polars::LazyFrame.new(
2183
+ # {
2184
+ # "foo" => [1, 2, 3],
2185
+ # "bar" => [6.0, 7.0, 8.0],
2186
+ # "ham" => ["a", "b", "c"]
2187
+ # }
2188
+ # )
2189
+ # lf.drop("ham").collect
2190
+ # # =>
2191
+ # # shape: (3, 2)
2192
+ # # ┌─────┬─────┐
2193
+ # # │ foo ┆ bar │
2194
+ # # │ --- ┆ --- │
2195
+ # # │ i64 ┆ f64 │
2196
+ # # ╞═════╪═════╡
2197
+ # # │ 1 ┆ 6.0 │
2198
+ # # │ 2 ┆ 7.0 │
2199
+ # # │ 3 ┆ 8.0 │
2200
+ # # └─────┴─────┘
2201
+ #
2202
+ # @example Drop multiple columns by passing a selector.
2203
+ # lf.drop(Polars.cs.numeric).collect
2204
+ # # =>
2205
+ # # shape: (3, 1)
2206
+ # # ┌─────┐
2207
+ # # │ ham │
2208
+ # # │ --- │
2209
+ # # │ str │
2210
+ # # ╞═════╡
2211
+ # # │ a │
2212
+ # # │ b │
2213
+ # # │ c │
2214
+ # # └─────┘
2215
+ #
2216
+ # @example Use positional arguments to drop multiple columns.
2217
+ # lf.drop("foo", "ham").collect
2218
+ # # =>
2219
+ # # shape: (3, 1)
2220
+ # # ┌─────┐
2221
+ # # │ bar │
2222
+ # # │ --- │
2223
+ # # │ f64 │
2224
+ # # ╞═════╡
2225
+ # # │ 6.0 │
2226
+ # # │ 7.0 │
2227
+ # # │ 8.0 │
2228
+ # # └─────┘
1882
2229
  def drop(*columns)
1883
2230
  drop_cols = Utils._expand_selectors(self, *columns)
1884
2231
  _from_rbldf(_ldf.drop(drop_cols))
@@ -1888,17 +2235,80 @@ module Polars
1888
2235
  #
1889
2236
  # @param mapping [Hash]
1890
2237
  # Key value pairs that map from old name to new name.
2238
+ # @param strict [Boolean]
2239
+ # Validate that all column names exist in the current schema,
2240
+ # and throw an exception if any do not. (Note that this parameter
2241
+ # is a no-op when passing a function to `mapping`).
1891
2242
  #
1892
2243
  # @return [LazyFrame]
1893
- def rename(mapping)
1894
- existing = mapping.keys
1895
- _new = mapping.values
1896
- _from_rbldf(_ldf.rename(existing, _new))
2244
+ #
2245
+ # @example
2246
+ # lf = Polars::LazyFrame.new(
2247
+ # {
2248
+ # "foo" => [1, 2, 3],
2249
+ # "bar" => [6, 7, 8],
2250
+ # "ham" => ["a", "b", "c"]
2251
+ # }
2252
+ # )
2253
+ # lf.rename({"foo" => "apple"}).collect
2254
+ # # =>
2255
+ # # shape: (3, 3)
2256
+ # # ┌───────┬─────┬─────┐
2257
+ # # │ apple ┆ bar ┆ ham │
2258
+ # # │ --- ┆ --- ┆ --- │
2259
+ # # │ i64 ┆ i64 ┆ str │
2260
+ # # ╞═══════╪═════╪═════╡
2261
+ # # │ 1 ┆ 6 ┆ a │
2262
+ # # │ 2 ┆ 7 ┆ b │
2263
+ # # │ 3 ┆ 8 ┆ c │
2264
+ # # └───────┴─────┴─────┘
2265
+ #
2266
+ # @example
2267
+ # lf.rename(->(column_name) { "c" + column_name[1..] }).collect
2268
+ # # =>
2269
+ # # shape: (3, 3)
2270
+ # # ┌─────┬─────┬─────┐
2271
+ # # │ coo ┆ car ┆ cam │
2272
+ # # │ --- ┆ --- ┆ --- │
2273
+ # # │ i64 ┆ i64 ┆ str │
2274
+ # # ╞═════╪═════╪═════╡
2275
+ # # │ 1 ┆ 6 ┆ a │
2276
+ # # │ 2 ┆ 7 ┆ b │
2277
+ # # │ 3 ┆ 8 ┆ c │
2278
+ # # └─────┴─────┴─────┘
2279
+ def rename(mapping, strict: true)
2280
+ if mapping.respond_to?(:call)
2281
+ select(F.all.name.map(&mapping))
2282
+ else
2283
+ existing = mapping.keys
2284
+ _new = mapping.values
2285
+ _from_rbldf(_ldf.rename(existing, _new, strict))
2286
+ end
1897
2287
  end
1898
2288
 
1899
2289
  # Reverse the DataFrame.
1900
2290
  #
1901
2291
  # @return [LazyFrame]
2292
+ #
2293
+ # @example
2294
+ # lf = Polars::LazyFrame.new(
2295
+ # {
2296
+ # "key" => ["a", "b", "c"],
2297
+ # "val" => [1, 2, 3]
2298
+ # }
2299
+ # )
2300
+ # lf.reverse.collect
2301
+ # # =>
2302
+ # # shape: (3, 2)
2303
+ # # ┌─────┬─────┐
2304
+ # # │ key ┆ val │
2305
+ # # │ --- ┆ --- │
2306
+ # # │ str ┆ i64 │
2307
+ # # ╞═════╪═════╡
2308
+ # # │ c ┆ 3 │
2309
+ # # │ b ┆ 2 │
2310
+ # # │ a ┆ 1 │
2311
+ # # └─────┴─────┘
1902
2312
  def reverse
1903
2313
  _from_rbldf(_ldf.reverse)
1904
2314
  end
@@ -2048,8 +2458,43 @@ module Polars
2048
2458
  # Consider using the {#fetch} operation if you only want to test your
2049
2459
  # query. The {#fetch} operation will load the first `n` rows at the scan
2050
2460
  # level, whereas the {#head}/{#limit} are applied at the end.
2461
+ #
2462
+ # @example
2463
+ # lf = Polars::LazyFrame.new(
2464
+ # {
2465
+ # "a" => [1, 2, 3, 4, 5, 6],
2466
+ # "b" => [7, 8, 9, 10, 11, 12]
2467
+ # }
2468
+ # )
2469
+ # lf.limit.collect
2470
+ # # =>
2471
+ # # shape: (5, 2)
2472
+ # # ┌─────┬─────┐
2473
+ # # │ a ┆ b │
2474
+ # # │ --- ┆ --- │
2475
+ # # │ i64 ┆ i64 │
2476
+ # # ╞═════╪═════╡
2477
+ # # │ 1 ┆ 7 │
2478
+ # # │ 2 ┆ 8 │
2479
+ # # │ 3 ┆ 9 │
2480
+ # # │ 4 ┆ 10 │
2481
+ # # │ 5 ┆ 11 │
2482
+ # # └─────┴─────┘
2483
+ #
2484
+ # @example
2485
+ # lf.limit(2).collect
2486
+ # # =>
2487
+ # # shape: (2, 2)
2488
+ # # ┌─────┬─────┐
2489
+ # # │ a ┆ b │
2490
+ # # │ --- ┆ --- │
2491
+ # # │ i64 ┆ i64 │
2492
+ # # ╞═════╪═════╡
2493
+ # # │ 1 ┆ 7 │
2494
+ # # │ 2 ┆ 8 │
2495
+ # # └─────┴─────┘
2051
2496
  def limit(n = 5)
2052
- head(5)
2497
+ head(n)
2053
2498
  end
2054
2499
 
2055
2500
  # Get the first `n` rows.
@@ -2063,6 +2508,41 @@ module Polars
2063
2508
  # Consider using the {#fetch} operation if you only want to test your
2064
2509
  # query. The {#fetch} operation will load the first `n` rows at the scan
2065
2510
  # level, whereas the {#head}/{#limit} are applied at the end.
2511
+ #
2512
+ # @example
2513
+ # lf = Polars::LazyFrame.new(
2514
+ # {
2515
+ # "a" => [1, 2, 3, 4, 5, 6],
2516
+ # "b" => [7, 8, 9, 10, 11, 12]
2517
+ # }
2518
+ # )
2519
+ # lf.head.collect
2520
+ # # =>
2521
+ # # shape: (5, 2)
2522
+ # # ┌─────┬─────┐
2523
+ # # │ a ┆ b │
2524
+ # # │ --- ┆ --- │
2525
+ # # │ i64 ┆ i64 │
2526
+ # # ╞═════╪═════╡
2527
+ # # │ 1 ┆ 7 │
2528
+ # # │ 2 ┆ 8 │
2529
+ # # │ 3 ┆ 9 │
2530
+ # # │ 4 ┆ 10 │
2531
+ # # │ 5 ┆ 11 │
2532
+ # # └─────┴─────┘
2533
+ #
2534
+ # @example
2535
+ # lf.head(2).collect
2536
+ # # =>
2537
+ # # shape: (2, 2)
2538
+ # # ┌─────┬─────┐
2539
+ # # │ a ┆ b │
2540
+ # # │ --- ┆ --- │
2541
+ # # │ i64 ┆ i64 │
2542
+ # # ╞═════╪═════╡
2543
+ # # │ 1 ┆ 7 │
2544
+ # # │ 2 ┆ 8 │
2545
+ # # └─────┴─────┘
2066
2546
  def head(n = 5)
2067
2547
  slice(0, n)
2068
2548
  end
@@ -2073,6 +2553,41 @@ module Polars
2073
2553
  # Number of rows.
2074
2554
  #
2075
2555
  # @return [LazyFrame]
2556
+ #
2557
+ # @example
2558
+ # lf = Polars::LazyFrame.new(
2559
+ # {
2560
+ # "a" => [1, 2, 3, 4, 5, 6],
2561
+ # "b" => [7, 8, 9, 10, 11, 12]
2562
+ # }
2563
+ # )
2564
+ # lf.tail.collect
2565
+ # # =>
2566
+ # # shape: (5, 2)
2567
+ # # ┌─────┬─────┐
2568
+ # # │ a ┆ b │
2569
+ # # │ --- ┆ --- │
2570
+ # # │ i64 ┆ i64 │
2571
+ # # ╞═════╪═════╡
2572
+ # # │ 2 ┆ 8 │
2573
+ # # │ 3 ┆ 9 │
2574
+ # # │ 4 ┆ 10 │
2575
+ # # │ 5 ┆ 11 │
2576
+ # # │ 6 ┆ 12 │
2577
+ # # └─────┴─────┘
2578
+ #
2579
+ # @example
2580
+ # lf.tail(2).collect
2581
+ # # =>
2582
+ # # shape: (2, 2)
2583
+ # # ┌─────┬─────┐
2584
+ # # │ a ┆ b │
2585
+ # # │ --- ┆ --- │
2586
+ # # │ i64 ┆ i64 │
2587
+ # # ╞═════╪═════╡
2588
+ # # │ 5 ┆ 11 │
2589
+ # # │ 6 ┆ 12 │
2590
+ # # └─────┴─────┘
2076
2591
  def tail(n = 5)
2077
2592
  _from_rbldf(_ldf.tail(n))
2078
2593
  end
@@ -2080,6 +2595,24 @@ module Polars
2080
2595
  # Get the last row of the DataFrame.
2081
2596
  #
2082
2597
  # @return [LazyFrame]
2598
+ #
2599
+ # @example
2600
+ # lf = Polars::LazyFrame.new(
2601
+ # {
2602
+ # "a" => [1, 5, 3],
2603
+ # "b" => [2, 4, 6]
2604
+ # }
2605
+ # )
2606
+ # lf.last.collect
2607
+ # # =>
2608
+ # # shape: (1, 2)
2609
+ # # ┌─────┬─────┐
2610
+ # # │ a ┆ b │
2611
+ # # │ --- ┆ --- │
2612
+ # # │ i64 ┆ i64 │
2613
+ # # ╞═════╪═════╡
2614
+ # # │ 3 ┆ 6 │
2615
+ # # └─────┴─────┘
2083
2616
  def last
2084
2617
  tail(1)
2085
2618
  end
@@ -2087,6 +2620,24 @@ module Polars
2087
2620
  # Get the first row of the DataFrame.
2088
2621
  #
2089
2622
  # @return [LazyFrame]
2623
+ #
2624
+ # @example
2625
+ # lf = Polars::LazyFrame.new(
2626
+ # {
2627
+ # "a" => [1, 5, 3],
2628
+ # "b" => [2, 4, 6]
2629
+ # }
2630
+ # )
2631
+ # lf.first.collect
2632
+ # # =>
2633
+ # # shape: (1, 2)
2634
+ # # ┌─────┬─────┐
2635
+ # # │ a ┆ b │
2636
+ # # │ --- ┆ --- │
2637
+ # # │ i64 ┆ i64 │
2638
+ # # ╞═════╪═════╡
2639
+ # # │ 1 ┆ 2 │
2640
+ # # └─────┴─────┘
2090
2641
  def first
2091
2642
  slice(0, 1)
2092
2643
  end
@@ -2152,6 +2703,72 @@ module Polars
2152
2703
  # Fill null values using the specified value or strategy.
2153
2704
  #
2154
2705
  # @return [LazyFrame]
2706
+ #
2707
+ # @example
2708
+ # lf = Polars::LazyFrame.new(
2709
+ # {
2710
+ # "a" => [1, 2, nil, 4],
2711
+ # "b" => [0.5, 4, nil, 13]
2712
+ # }
2713
+ # )
2714
+ # lf.fill_null(99).collect
2715
+ # # =>
2716
+ # # shape: (4, 2)
2717
+ # # ┌─────┬──────┐
2718
+ # # │ a ┆ b │
2719
+ # # │ --- ┆ --- │
2720
+ # # │ i64 ┆ f64 │
2721
+ # # ╞═════╪══════╡
2722
+ # # │ 1 ┆ 0.5 │
2723
+ # # │ 2 ┆ 4.0 │
2724
+ # # │ 99 ┆ 99.0 │
2725
+ # # │ 4 ┆ 13.0 │
2726
+ # # └─────┴──────┘
2727
+ #
2728
+ # @example
2729
+ # lf.fill_null(strategy: "forward").collect
2730
+ # # =>
2731
+ # # shape: (4, 2)
2732
+ # # ┌─────┬──────┐
2733
+ # # │ a ┆ b │
2734
+ # # │ --- ┆ --- │
2735
+ # # │ i64 ┆ f64 │
2736
+ # # ╞═════╪══════╡
2737
+ # # │ 1 ┆ 0.5 │
2738
+ # # │ 2 ┆ 4.0 │
2739
+ # # │ 2 ┆ 4.0 │
2740
+ # # │ 4 ┆ 13.0 │
2741
+ # # └─────┴──────┘
2742
+ #
2743
+ # @example
2744
+ # lf.fill_null(strategy: "max").collect
2745
+ # # =>
2746
+ # # shape: (4, 2)
2747
+ # # ┌─────┬──────┐
2748
+ # # │ a ┆ b │
2749
+ # # │ --- ┆ --- │
2750
+ # # │ i64 ┆ f64 │
2751
+ # # ╞═════╪══════╡
2752
+ # # │ 1 ┆ 0.5 │
2753
+ # # │ 2 ┆ 4.0 │
2754
+ # # │ 4 ┆ 13.0 │
2755
+ # # │ 4 ┆ 13.0 │
2756
+ # # └─────┴──────┘
2757
+ #
2758
+ # @example
2759
+ # lf.fill_null(strategy: "zero").collect
2760
+ # # =>
2761
+ # # shape: (4, 2)
2762
+ # # ┌─────┬──────┐
2763
+ # # │ a ┆ b │
2764
+ # # │ --- ┆ --- │
2765
+ # # │ i64 ┆ f64 │
2766
+ # # ╞═════╪══════╡
2767
+ # # │ 1 ┆ 0.5 │
2768
+ # # │ 2 ┆ 4.0 │
2769
+ # # │ 0 ┆ 0.0 │
2770
+ # # │ 4 ┆ 13.0 │
2771
+ # # └─────┴──────┘
2155
2772
  def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
2156
2773
  select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
2157
2774
  end
@@ -2431,6 +3048,53 @@ module Polars
2431
3048
  # Which of the duplicate rows to keep.
2432
3049
  #
2433
3050
  # @return [LazyFrame]
3051
+ #
3052
+ # @example
3053
+ # lf = Polars::LazyFrame.new(
3054
+ # {
3055
+ # "foo" => [1, 2, 3, 1],
3056
+ # "bar" => ["a", "a", "a", "a"],
3057
+ # "ham" => ["b", "b", "b", "b"]
3058
+ # }
3059
+ # )
3060
+ # lf.unique(maintain_order: true).collect
3061
+ # # =>
3062
+ # # shape: (3, 3)
3063
+ # # ┌─────┬─────┬─────┐
3064
+ # # │ foo ┆ bar ┆ ham │
3065
+ # # │ --- ┆ --- ┆ --- │
3066
+ # # │ i64 ┆ str ┆ str │
3067
+ # # ╞═════╪═════╪═════╡
3068
+ # # │ 1 ┆ a ┆ b │
3069
+ # # │ 2 ┆ a ┆ b │
3070
+ # # │ 3 ┆ a ┆ b │
3071
+ # # └─────┴─────┴─────┘
3072
+ #
3073
+ # @example
3074
+ # lf.unique(subset: ["bar", "ham"], maintain_order: true).collect
3075
+ # # =>
3076
+ # # shape: (1, 3)
3077
+ # # ┌─────┬─────┬─────┐
3078
+ # # │ foo ┆ bar ┆ ham │
3079
+ # # │ --- ┆ --- ┆ --- │
3080
+ # # │ i64 ┆ str ┆ str │
3081
+ # # ╞═════╪═════╪═════╡
3082
+ # # │ 1 ┆ a ┆ b │
3083
+ # # └─────┴─────┴─────┘
3084
+ #
3085
+ # @example
3086
+ # lf.unique(keep: "last", maintain_order: true).collect
3087
+ # # =>
3088
+ # # shape: (3, 3)
3089
+ # # ┌─────┬─────┬─────┐
3090
+ # # │ foo ┆ bar ┆ ham │
3091
+ # # │ --- ┆ --- ┆ --- │
3092
+ # # │ i64 ┆ str ┆ str │
3093
+ # # ╞═════╪═════╪═════╡
3094
+ # # │ 2 ┆ a ┆ b │
3095
+ # # │ 3 ┆ a ┆ b │
3096
+ # # │ 1 ┆ a ┆ b │
3097
+ # # └─────┴─────┴─────┘
2434
3098
  def unique(maintain_order: true, subset: nil, keep: "first")
2435
3099
  if !subset.nil? && !subset.is_a?(::Array)
2436
3100
  subset = [subset]
@@ -2504,7 +3168,7 @@ module Polars
2504
3168
  # "c" => [2, 4, 6]
2505
3169
  # }
2506
3170
  # )
2507
- # lf.unpivot(Polars::Selectors.numeric, index: "a").collect
3171
+ # lf.unpivot(Polars.cs.numeric, index: "a").collect
2508
3172
  # # =>
2509
3173
  # # shape: (6, 3)
2510
3174
  # # ┌─────┬──────────┬───────┐
@@ -2530,8 +3194,8 @@ module Polars
2530
3194
  warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
2531
3195
  end
2532
3196
 
2533
- on = on.nil? ? [] : Utils._expand_selectors(self, on)
2534
- index = index.nil? ? [] : Utils._expand_selectors(self, index)
3197
+ on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
3198
+ index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
2535
3199
 
2536
3200
  _from_rbldf(
2537
3201
  _ldf.unpivot(on, index, value_name, variable_name)