polars-df 0.5.0-aarch64-linux → 0.7.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +3854 -4496
- data/README.md +11 -9
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
    
        data/lib/polars/lazy_frame.rb
    CHANGED
    
    | @@ -4,6 +4,22 @@ module Polars | |
| 4 4 | 
             
                # @private
         | 
| 5 5 | 
             
                attr_accessor :_ldf
         | 
| 6 6 |  | 
| 7 | 
            +
                # Create a new LazyFrame.
         | 
| 8 | 
            +
                def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
         | 
| 9 | 
            +
                  self._ldf = (
         | 
| 10 | 
            +
                    DataFrame.new(
         | 
| 11 | 
            +
                      data,
         | 
| 12 | 
            +
                      schema: schema,
         | 
| 13 | 
            +
                      schema_overrides: schema_overrides,
         | 
| 14 | 
            +
                      orient: orient,
         | 
| 15 | 
            +
                      infer_schema_length: infer_schema_length,
         | 
| 16 | 
            +
                      nan_to_null: nan_to_null
         | 
| 17 | 
            +
                    )
         | 
| 18 | 
            +
                    .lazy
         | 
| 19 | 
            +
                    ._ldf
         | 
| 20 | 
            +
                  )
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 7 23 | 
             
                # @private
         | 
| 8 24 | 
             
                def self._from_rbldf(rb_ldf)
         | 
| 9 25 | 
             
                  ldf = LazyFrame.allocate
         | 
| @@ -81,7 +97,8 @@ module Polars | |
| 81 97 | 
             
                  row_count_offset: 0,
         | 
| 82 98 | 
             
                  storage_options: nil,
         | 
| 83 99 | 
             
                  low_memory: false,
         | 
| 84 | 
            -
                  use_statistics: true
         | 
| 100 | 
            +
                  use_statistics: true,
         | 
| 101 | 
            +
                  hive_partitioning: true
         | 
| 85 102 | 
             
                )
         | 
| 86 103 | 
             
                  _from_rbldf(
         | 
| 87 104 | 
             
                    RbLazyFrame.new_from_parquet(
         | 
| @@ -92,7 +109,8 @@ module Polars | |
| 92 109 | 
             
                      rechunk,
         | 
| 93 110 | 
             
                      Utils._prepare_row_count_args(row_count_name, row_count_offset),
         | 
| 94 111 | 
             
                      low_memory,
         | 
| 95 | 
            -
                      use_statistics
         | 
| 112 | 
            +
                      use_statistics,
         | 
| 113 | 
            +
                      hive_partitioning
         | 
| 96 114 | 
             
                    )
         | 
| 97 115 | 
             
                  )
         | 
| 98 116 | 
             
                end
         | 
| @@ -334,6 +352,7 @@ module Polars | |
| 334 352 | 
             
                    slice_pushdown,
         | 
| 335 353 | 
             
                    common_subplan_elimination,
         | 
| 336 354 | 
             
                    allow_streaming,
         | 
| 355 | 
            +
                    false
         | 
| 337 356 | 
             
                  )
         | 
| 338 357 |  | 
| 339 358 | 
             
                  ldf.describe_optimized_plan
         | 
| @@ -379,16 +398,16 @@ module Polars | |
| 379 398 | 
             
                #   # │ 2   ┆ 7.0 ┆ b   │
         | 
| 380 399 | 
             
                #   # │ 1   ┆ 6.0 ┆ a   │
         | 
| 381 400 | 
             
                #   # └─────┴─────┴─────┘
         | 
| 382 | 
            -
                def sort(by, reverse: false, nulls_last: false)
         | 
| 401 | 
            +
                def sort(by, reverse: false, nulls_last: false, maintain_order: false)
         | 
| 383 402 | 
             
                  if by.is_a?(String)
         | 
| 384 | 
            -
                    _from_rbldf(_ldf.sort(by, reverse, nulls_last))
         | 
| 403 | 
            +
                    return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
         | 
| 385 404 | 
             
                  end
         | 
| 386 405 | 
             
                  if Utils.bool?(reverse)
         | 
| 387 406 | 
             
                    reverse = [reverse]
         | 
| 388 407 | 
             
                  end
         | 
| 389 408 |  | 
| 390 409 | 
             
                  by = Utils.selection_to_rbexpr_list(by)
         | 
| 391 | 
            -
                  _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
         | 
| 410 | 
            +
                  _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
         | 
| 392 411 | 
             
                end
         | 
| 393 412 |  | 
| 394 413 | 
             
                # def profile
         | 
| @@ -429,7 +448,7 @@ module Polars | |
| 429 448 | 
             
                #       "c" => [6, 5, 4, 3, 2, 1]
         | 
| 430 449 | 
             
                #     }
         | 
| 431 450 | 
             
                #   ).lazy
         | 
| 432 | 
            -
                #   df. | 
| 451 | 
            +
                #   df.group_by("a", maintain_order: true).agg(Polars.all.sum).collect
         | 
| 433 452 | 
             
                #   # =>
         | 
| 434 453 | 
             
                #   # shape: (3, 3)
         | 
| 435 454 | 
             
                #   # ┌─────┬─────┬─────┐
         | 
| @@ -450,7 +469,8 @@ module Polars | |
| 450 469 | 
             
                  no_optimization: false,
         | 
| 451 470 | 
             
                  slice_pushdown: true,
         | 
| 452 471 | 
             
                  common_subplan_elimination: true,
         | 
| 453 | 
            -
                  allow_streaming: false
         | 
| 472 | 
            +
                  allow_streaming: false,
         | 
| 473 | 
            +
                  _eager: false
         | 
| 454 474 | 
             
                )
         | 
| 455 475 | 
             
                  if no_optimization
         | 
| 456 476 | 
             
                    predicate_pushdown = false
         | 
| @@ -470,7 +490,8 @@ module Polars | |
| 470 490 | 
             
                    simplify_expression,
         | 
| 471 491 | 
             
                    slice_pushdown,
         | 
| 472 492 | 
             
                    common_subplan_elimination,
         | 
| 473 | 
            -
                    allow_streaming
         | 
| 493 | 
            +
                    allow_streaming,
         | 
| 494 | 
            +
                    _eager
         | 
| 474 495 | 
             
                  )
         | 
| 475 496 | 
             
                  Utils.wrap_df(ldf.collect)
         | 
| 476 497 | 
             
                end
         | 
| @@ -552,7 +573,8 @@ module Polars | |
| 552 573 | 
             
                    simplify_expression,
         | 
| 553 574 | 
             
                    slice_pushdown,
         | 
| 554 575 | 
             
                    false,
         | 
| 555 | 
            -
                    true
         | 
| 576 | 
            +
                    true,
         | 
| 577 | 
            +
                    false
         | 
| 556 578 | 
             
                  )
         | 
| 557 579 | 
             
                  lf.sink_parquet(
         | 
| 558 580 | 
             
                    path,
         | 
| @@ -607,7 +629,7 @@ module Polars | |
| 607 629 | 
             
                #       "c" => [6, 5, 4, 3, 2, 1]
         | 
| 608 630 | 
             
                #     }
         | 
| 609 631 | 
             
                #   ).lazy
         | 
| 610 | 
            -
                #   df. | 
| 632 | 
            +
                #   df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
         | 
| 611 633 | 
             
                #   # =>
         | 
| 612 634 | 
             
                #   # shape: (2, 3)
         | 
| 613 635 | 
             
                #   # ┌─────┬─────┬─────┐
         | 
| @@ -644,7 +666,8 @@ module Polars | |
| 644 666 | 
             
                    simplify_expression,
         | 
| 645 667 | 
             
                    slice_pushdown,
         | 
| 646 668 | 
             
                    common_subplan_elimination,
         | 
| 647 | 
            -
                    allow_streaming
         | 
| 669 | 
            +
                    allow_streaming,
         | 
| 670 | 
            +
                    false
         | 
| 648 671 | 
             
                  )
         | 
| 649 672 | 
             
                  Utils.wrap_df(ldf.fetch(n_rows))
         | 
| 650 673 | 
             
                end
         | 
| @@ -837,13 +860,13 @@ module Polars | |
| 837 860 | 
             
                  _from_rbldf(_ldf.select(exprs))
         | 
| 838 861 | 
             
                end
         | 
| 839 862 |  | 
| 840 | 
            -
                # Start a  | 
| 863 | 
            +
                # Start a group by operation.
         | 
| 841 864 | 
             
                #
         | 
| 842 865 | 
             
                # @param by [Object]
         | 
| 843 866 | 
             
                #   Column(s) to group by.
         | 
| 844 867 | 
             
                # @param maintain_order [Boolean]
         | 
| 845 868 | 
             
                #   Make sure that the order of the groups remain consistent. This is more
         | 
| 846 | 
            -
                #   expensive than a default  | 
| 869 | 
            +
                #   expensive than a default group by.
         | 
| 847 870 | 
             
                #
         | 
| 848 871 | 
             
                # @return [LazyGroupBy]
         | 
| 849 872 | 
             
                #
         | 
| @@ -855,7 +878,7 @@ module Polars | |
| 855 878 | 
             
                #       "c" => [6, 5, 4, 3, 2, 1]
         | 
| 856 879 | 
             
                #     }
         | 
| 857 880 | 
             
                #   ).lazy
         | 
| 858 | 
            -
                #   df. | 
| 881 | 
            +
                #   df.group_by("a", maintain_order: true).agg(Polars.col("b").sum).collect
         | 
| 859 882 | 
             
                #   # =>
         | 
| 860 883 | 
             
                #   # shape: (3, 2)
         | 
| 861 884 | 
             
                #   # ┌─────┬─────┐
         | 
| @@ -867,19 +890,21 @@ module Polars | |
| 867 890 | 
             
                #   # │ b   ┆ 11  │
         | 
| 868 891 | 
             
                #   # │ c   ┆ 6   │
         | 
| 869 892 | 
             
                #   # └─────┴─────┘
         | 
| 870 | 
            -
                def  | 
| 893 | 
            +
                def group_by(by, maintain_order: false)
         | 
| 871 894 | 
             
                  rbexprs_by = Utils.selection_to_rbexpr_list(by)
         | 
| 872 | 
            -
                  lgb = _ldf. | 
| 873 | 
            -
                  LazyGroupBy.new(lgb | 
| 895 | 
            +
                  lgb = _ldf.group_by(rbexprs_by, maintain_order)
         | 
| 896 | 
            +
                  LazyGroupBy.new(lgb)
         | 
| 874 897 | 
             
                end
         | 
| 898 | 
            +
                alias_method :groupby, :group_by
         | 
| 899 | 
            +
                alias_method :group, :group_by
         | 
| 875 900 |  | 
| 876 901 | 
             
                # Create rolling groups based on a time column.
         | 
| 877 902 | 
             
                #
         | 
| 878 903 | 
             
                # Also works for index values of type `:i32` or `:i64`.
         | 
| 879 904 | 
             
                #
         | 
| 880 | 
            -
                # Different from a ` | 
| 905 | 
            +
                # Different from a `dynamic_group_by` the windows are now determined by the
         | 
| 881 906 | 
             
                # individual values and are not of constant intervals. For constant intervals
         | 
| 882 | 
            -
                # use * | 
| 907 | 
            +
                # use *group_by_dynamic*.
         | 
| 883 908 | 
             
                #
         | 
| 884 909 | 
             
                # The `period` and `offset` arguments are created either from a timedelta, or
         | 
| 885 910 | 
             
                # by using the following string language:
         | 
| @@ -899,7 +924,7 @@ module Polars | |
| 899 924 | 
             
                # Or combine them:
         | 
| 900 925 | 
             
                # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
         | 
| 901 926 | 
             
                #
         | 
| 902 | 
            -
                # In case of a  | 
| 927 | 
            +
                # In case of a group_by_rolling on an integer column, the windows are defined by:
         | 
| 903 928 | 
             
                #
         | 
| 904 929 | 
             
                # - "1i"      # length 1
         | 
| 905 930 | 
             
                # - "10i"     # length 10
         | 
| @@ -910,7 +935,7 @@ module Polars | |
| 910 935 | 
             
                #   This column must be sorted in ascending order. If not the output will not
         | 
| 911 936 | 
             
                #   make sense.
         | 
| 912 937 | 
             
                #
         | 
| 913 | 
            -
                #   In case of a rolling  | 
| 938 | 
            +
                #   In case of a rolling group by on indices, dtype needs to be one of
         | 
| 914 939 | 
             
                #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
         | 
| 915 940 | 
             
                #   performance matters use an `:i64` column.
         | 
| 916 941 | 
             
                # @param period [Object]
         | 
| @@ -921,6 +946,12 @@ module Polars | |
| 921 946 | 
             
                #   Define whether the temporal window interval is closed or not.
         | 
| 922 947 | 
             
                # @param by [Object]
         | 
| 923 948 | 
             
                #   Also group by this column/these columns.
         | 
| 949 | 
            +
                # @param check_sorted [Boolean]
         | 
| 950 | 
            +
                #   When the `by` argument is given, polars can not check sortedness
         | 
| 951 | 
            +
                #   by the metadata and has to do a full scan on the index column to
         | 
| 952 | 
            +
                #   verify data is sorted. This is expensive. If you are sure the
         | 
| 953 | 
            +
                #   data within the by groups is sorted, you can set this to `false`.
         | 
| 954 | 
            +
                #   Doing so incorrectly will lead to incorrect output
         | 
| 924 955 | 
             
                #
         | 
| 925 956 | 
             
                # @return [LazyFrame]
         | 
| 926 957 | 
             
                #
         | 
| @@ -933,16 +964,16 @@ module Polars | |
| 933 964 | 
             
                #     "2020-01-03 19:45:32",
         | 
| 934 965 | 
             
                #     "2020-01-08 23:16:43"
         | 
| 935 966 | 
             
                #   ]
         | 
| 936 | 
            -
                #   df = Polars:: | 
| 937 | 
            -
                #     Polars.col("dt").str.strptime(Polars::Datetime)
         | 
| 967 | 
            +
                #   df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
         | 
| 968 | 
            +
                #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
         | 
| 938 969 | 
             
                #   )
         | 
| 939 | 
            -
                #   df. | 
| 970 | 
            +
                #   df.group_by_rolling(index_column: "dt", period: "2d").agg(
         | 
| 940 971 | 
             
                #     [
         | 
| 941 972 | 
             
                #       Polars.sum("a").alias("sum_a"),
         | 
| 942 973 | 
             
                #       Polars.min("a").alias("min_a"),
         | 
| 943 974 | 
             
                #       Polars.max("a").alias("max_a")
         | 
| 944 975 | 
             
                #     ]
         | 
| 945 | 
            -
                #   )
         | 
| 976 | 
            +
                #   ).collect
         | 
| 946 977 | 
             
                #   # =>
         | 
| 947 978 | 
             
                #   # shape: (6, 4)
         | 
| 948 979 | 
             
                #   # ┌─────────────────────┬───────┬───────┬───────┐
         | 
| @@ -957,14 +988,15 @@ module Polars | |
| 957 988 | 
             
                #   # │ 2020-01-03 19:45:32 ┆ 11    ┆ 2     ┆ 9     │
         | 
| 958 989 | 
             
                #   # │ 2020-01-08 23:16:43 ┆ 1     ┆ 1     ┆ 1     │
         | 
| 959 990 | 
             
                #   # └─────────────────────┴───────┴───────┴───────┘
         | 
| 960 | 
            -
                def  | 
| 991 | 
            +
                def group_by_rolling(
         | 
| 961 992 | 
             
                  index_column:,
         | 
| 962 993 | 
             
                  period:,
         | 
| 963 994 | 
             
                  offset: nil,
         | 
| 964 995 | 
             
                  closed: "right",
         | 
| 965 | 
            -
                  by: nil
         | 
| 996 | 
            +
                  by: nil,
         | 
| 997 | 
            +
                  check_sorted: true
         | 
| 966 998 | 
             
                )
         | 
| 967 | 
            -
                  index_column = Utils. | 
| 999 | 
            +
                  index_column = Utils.parse_as_expression(index_column)
         | 
| 968 1000 | 
             
                  if offset.nil?
         | 
| 969 1001 | 
             
                    offset = "-#{period}"
         | 
| 970 1002 | 
             
                  end
         | 
| @@ -973,16 +1005,17 @@ module Polars | |
| 973 1005 | 
             
                  period = Utils._timedelta_to_pl_duration(period)
         | 
| 974 1006 | 
             
                  offset = Utils._timedelta_to_pl_duration(offset)
         | 
| 975 1007 |  | 
| 976 | 
            -
                  lgb = _ldf. | 
| 977 | 
            -
                    index_column | 
| 1008 | 
            +
                  lgb = _ldf.group_by_rolling(
         | 
| 1009 | 
            +
                    index_column, period, offset, closed, rbexprs_by, check_sorted
         | 
| 978 1010 | 
             
                  )
         | 
| 979 | 
            -
                  LazyGroupBy.new(lgb | 
| 1011 | 
            +
                  LazyGroupBy.new(lgb)
         | 
| 980 1012 | 
             
                end
         | 
| 1013 | 
            +
                alias_method :groupby_rolling, :group_by_rolling
         | 
| 981 1014 |  | 
| 982 1015 | 
             
                # Group based on a time value (or index value of type `:i32`, `:i64`).
         | 
| 983 1016 | 
             
                #
         | 
| 984 1017 | 
             
                # Time windows are calculated and rows are assigned to windows. Different from a
         | 
| 985 | 
            -
                # normal  | 
| 1018 | 
            +
                # normal group by is that a row can be member of multiple groups. The time/index
         | 
| 986 1019 | 
             
                # window could be seen as a rolling window, with a window size determined by
         | 
| 987 1020 | 
             
                # dates/times/values instead of slots in the DataFrame.
         | 
| 988 1021 | 
             
                #
         | 
| @@ -1010,37 +1043,43 @@ module Polars | |
| 1010 1043 | 
             
                # Or combine them:
         | 
| 1011 1044 | 
             
                # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
         | 
| 1012 1045 | 
             
                #
         | 
| 1013 | 
            -
                # In case of a  | 
| 1046 | 
            +
                # In case of a group_by_dynamic on an integer column, the windows are defined by:
         | 
| 1014 1047 | 
             
                #
         | 
| 1015 1048 | 
             
                # - "1i"      # length 1
         | 
| 1016 1049 | 
             
                # - "10i"     # length 10
         | 
| 1017 1050 | 
             
                #
         | 
| 1018 | 
            -
                # @param index_column
         | 
| 1051 | 
            +
                # @param index_column [Object]
         | 
| 1019 1052 | 
             
                #   Column used to group based on the time window.
         | 
| 1020 1053 | 
             
                #   Often to type Date/Datetime
         | 
| 1021 1054 | 
             
                #   This column must be sorted in ascending order. If not the output will not
         | 
| 1022 1055 | 
             
                #   make sense.
         | 
| 1023 1056 | 
             
                #
         | 
| 1024 | 
            -
                #   In case of a dynamic  | 
| 1057 | 
            +
                #   In case of a dynamic group by on indices, dtype needs to be one of
         | 
| 1025 1058 | 
             
                #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
         | 
| 1026 1059 | 
             
                #   performance matters use an `:i64` column.
         | 
| 1027 | 
            -
                # @param every
         | 
| 1060 | 
            +
                # @param every [Object]
         | 
| 1028 1061 | 
             
                #   Interval of the window.
         | 
| 1029 | 
            -
                # @param period
         | 
| 1062 | 
            +
                # @param period [Object]
         | 
| 1030 1063 | 
             
                #   Length of the window, if None it is equal to 'every'.
         | 
| 1031 | 
            -
                # @param offset
         | 
| 1064 | 
            +
                # @param offset [Object]
         | 
| 1032 1065 | 
             
                #   Offset of the window if None and period is None it will be equal to negative
         | 
| 1033 1066 | 
             
                #   `every`.
         | 
| 1034 | 
            -
                # @param truncate
         | 
| 1067 | 
            +
                # @param truncate [Boolean]
         | 
| 1035 1068 | 
             
                #   Truncate the time value to the window lower bound.
         | 
| 1036 | 
            -
                # @param include_boundaries
         | 
| 1069 | 
            +
                # @param include_boundaries [Boolean]
         | 
| 1037 1070 | 
             
                #   Add the lower and upper bound of the window to the "_lower_bound" and
         | 
| 1038 1071 | 
             
                #   "_upper_bound" columns. This will impact performance because it's harder to
         | 
| 1039 1072 | 
             
                #   parallelize
         | 
| 1040 1073 | 
             
                # @param closed ["right", "left", "both", "none"]
         | 
| 1041 1074 | 
             
                #   Define whether the temporal window interval is closed or not.
         | 
| 1042 | 
            -
                # @param by
         | 
| 1075 | 
            +
                # @param by [Object]
         | 
| 1043 1076 | 
             
                #   Also group by this column/these columns
         | 
| 1077 | 
            +
                # @param check_sorted [Boolean]
         | 
| 1078 | 
            +
                #   When the `by` argument is given, polars can not check sortedness
         | 
| 1079 | 
            +
                #   by the metadata and has to do a full scan on the index column to
         | 
| 1080 | 
            +
                #   verify data is sorted. This is expensive. If you are sure the
         | 
| 1081 | 
            +
                #   data within the by groups is sorted, you can set this to `false`.
         | 
| 1082 | 
            +
                #   Doing so incorrectly will lead to incorrect output.
         | 
| 1044 1083 | 
             
                #
         | 
| 1045 1084 | 
             
                # @return [DataFrame]
         | 
| 1046 1085 | 
             
                #
         | 
| @@ -1072,7 +1111,7 @@ module Polars | |
| 1072 1111 | 
             
                #   # └─────────────────────┴─────┘
         | 
| 1073 1112 | 
             
                #
         | 
| 1074 1113 | 
             
                # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
         | 
| 1075 | 
            -
                #   df. | 
| 1114 | 
            +
                #   df.group_by_dynamic("time", every: "1h", closed: "right").agg(
         | 
| 1076 1115 | 
             
                #     [
         | 
| 1077 1116 | 
             
                #       Polars.col("time").min.alias("time_min"),
         | 
| 1078 1117 | 
             
                #       Polars.col("time").max.alias("time_max")
         | 
| @@ -1092,7 +1131,7 @@ module Polars | |
| 1092 1131 | 
             
                #   # └─────────────────────┴─────────────────────┴─────────────────────┘
         | 
| 1093 1132 | 
             
                #
         | 
| 1094 1133 | 
             
                # @example The window boundaries can also be added to the aggregation result.
         | 
| 1095 | 
            -
                #   df. | 
| 1134 | 
            +
                #   df.group_by_dynamic(
         | 
| 1096 1135 | 
             
                #     "time", every: "1h", include_boundaries: true, closed: "right"
         | 
| 1097 1136 | 
             
                #   ).agg([Polars.col("time").count.alias("time_count")])
         | 
| 1098 1137 | 
             
                #   # =>
         | 
| @@ -1109,27 +1148,27 @@ module Polars | |
| 1109 1148 | 
             
                #   # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
         | 
| 1110 1149 | 
             
                #
         | 
| 1111 1150 | 
             
                # @example When closed="left", should not include right end of interval.
         | 
| 1112 | 
            -
                #   df. | 
| 1151 | 
            +
                #   df.group_by_dynamic("time", every: "1h", closed: "left").agg(
         | 
| 1113 1152 | 
             
                #     [
         | 
| 1114 1153 | 
             
                #       Polars.col("time").count.alias("time_count"),
         | 
| 1115 | 
            -
                #       Polars.col("time"). | 
| 1154 | 
            +
                #       Polars.col("time").alias("time_agg_list")
         | 
| 1116 1155 | 
             
                #     ]
         | 
| 1117 1156 | 
             
                #   )
         | 
| 1118 1157 | 
             
                #   # =>
         | 
| 1119 1158 | 
             
                #   # shape: (4, 3)
         | 
| 1120 | 
            -
                #   #  | 
| 1121 | 
            -
                #   # │ time                ┆ time_count ┆ time_agg_list | 
| 1122 | 
            -
                #   # │ ---                 ┆ ---        ┆ --- | 
| 1123 | 
            -
                #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]] | 
| 1124 | 
            -
                #   #  | 
| 1125 | 
            -
                #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16 | 
| 1126 | 
            -
                #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16 | 
| 1127 | 
            -
                #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16 | 
| 1128 | 
            -
                #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00] | 
| 1129 | 
            -
                #   #  | 
| 1159 | 
            +
                #   # ┌─────────────────────┬────────────┬───────────────────────────────────┐
         | 
| 1160 | 
            +
                #   # │ time                ┆ time_count ┆ time_agg_list                     │
         | 
| 1161 | 
            +
                #   # │ ---                 ┆ ---        ┆ ---                               │
         | 
| 1162 | 
            +
                #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                │
         | 
| 1163 | 
            +
                #   # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
         | 
| 1164 | 
            +
                #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16… │
         | 
| 1165 | 
            +
                #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16… │
         | 
| 1166 | 
            +
                #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16… │
         | 
| 1167 | 
            +
                #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]             │
         | 
| 1168 | 
            +
                #   # └─────────────────────┴────────────┴───────────────────────────────────┘
         | 
| 1130 1169 | 
             
                #
         | 
| 1131 1170 | 
             
                # @example When closed="both" the time values at the window boundaries belong to 2 groups.
         | 
| 1132 | 
            -
                #   df. | 
| 1171 | 
            +
                #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
         | 
| 1133 1172 | 
             
                #     [Polars.col("time").count.alias("time_count")]
         | 
| 1134 1173 | 
             
                #   )
         | 
| 1135 1174 | 
             
                #   # =>
         | 
| @@ -1146,7 +1185,7 @@ module Polars | |
| 1146 1185 | 
             
                #   # │ 2021-12-16 03:00:00 ┆ 1          │
         | 
| 1147 1186 | 
             
                #   # └─────────────────────┴────────────┘
         | 
| 1148 1187 | 
             
                #
         | 
| 1149 | 
            -
                # @example Dynamic  | 
| 1188 | 
            +
                # @example Dynamic group bys can also be combined with grouping on normal keys.
         | 
| 1150 1189 | 
             
                #   df = Polars::DataFrame.new(
         | 
| 1151 1190 | 
             
                #     {
         | 
| 1152 1191 | 
             
                #       "time" => Polars.date_range(
         | 
| @@ -1157,7 +1196,7 @@ module Polars | |
| 1157 1196 | 
             
                #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
         | 
| 1158 1197 | 
             
                #     }
         | 
| 1159 1198 | 
             
                #   )
         | 
| 1160 | 
            -
                #   df. | 
| 1199 | 
            +
                #   df.group_by_dynamic(
         | 
| 1161 1200 | 
             
                #     "time",
         | 
| 1162 1201 | 
             
                #     every: "1h",
         | 
| 1163 1202 | 
             
                #     closed: "both",
         | 
| @@ -1180,20 +1219,20 @@ module Polars | |
| 1180 1219 | 
             
                #   # │ b      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1          │
         | 
| 1181 1220 | 
             
                #   # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
         | 
| 1182 1221 | 
             
                #
         | 
| 1183 | 
            -
                # @example Dynamic  | 
| 1222 | 
            +
                # @example Dynamic group by on an index column.
         | 
| 1184 1223 | 
             
                #   df = Polars::DataFrame.new(
         | 
| 1185 1224 | 
             
                #     {
         | 
| 1186 1225 | 
             
                #       "idx" => Polars.arange(0, 6, eager: true),
         | 
| 1187 1226 | 
             
                #       "A" => ["A", "A", "B", "B", "B", "C"]
         | 
| 1188 1227 | 
             
                #     }
         | 
| 1189 1228 | 
             
                #   )
         | 
| 1190 | 
            -
                #   df. | 
| 1229 | 
            +
                #   df.group_by_dynamic(
         | 
| 1191 1230 | 
             
                #     "idx",
         | 
| 1192 1231 | 
             
                #     every: "2i",
         | 
| 1193 1232 | 
             
                #     period: "3i",
         | 
| 1194 1233 | 
             
                #     include_boundaries: true,
         | 
| 1195 1234 | 
             
                #     closed: "right"
         | 
| 1196 | 
            -
                #   ).agg(Polars.col("A"). | 
| 1235 | 
            +
                #   ).agg(Polars.col("A").alias("A_agg_list"))
         | 
| 1197 1236 | 
             
                #   # =>
         | 
| 1198 1237 | 
             
                #   # shape: (3, 4)
         | 
| 1199 1238 | 
             
                #   # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
         | 
| @@ -1205,23 +1244,26 @@ module Polars | |
| 1205 1244 | 
             
                #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
         | 
| 1206 1245 | 
             
                #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
         | 
| 1207 1246 | 
             
                #   # └─────────────────┴─────────────────┴─────┴─────────────────┘
         | 
| 1208 | 
            -
                def  | 
| 1247 | 
            +
                def group_by_dynamic(
         | 
| 1209 1248 | 
             
                  index_column,
         | 
| 1210 1249 | 
             
                  every:,
         | 
| 1211 1250 | 
             
                  period: nil,
         | 
| 1212 1251 | 
             
                  offset: nil,
         | 
| 1213 | 
            -
                  truncate:  | 
| 1252 | 
            +
                  truncate: nil,
         | 
| 1214 1253 | 
             
                  include_boundaries: false,
         | 
| 1215 1254 | 
             
                  closed: "left",
         | 
| 1255 | 
            +
                  label: "left",
         | 
| 1216 1256 | 
             
                  by: nil,
         | 
| 1217 | 
            -
                  start_by: "window"
         | 
| 1257 | 
            +
                  start_by: "window",
         | 
| 1258 | 
            +
                  check_sorted: true
         | 
| 1218 1259 | 
             
                )
         | 
| 1260 | 
            +
                  if !truncate.nil?
         | 
| 1261 | 
            +
                    label = truncate ? "left" : "datapoint"
         | 
| 1262 | 
            +
                  end
         | 
| 1263 | 
            +
             | 
| 1264 | 
            +
                  index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
         | 
| 1219 1265 | 
             
                  if offset.nil?
         | 
| 1220 | 
            -
                     | 
| 1221 | 
            -
                      offset = "-#{every}"
         | 
| 1222 | 
            -
                    else
         | 
| 1223 | 
            -
                      offset = "0ns"
         | 
| 1224 | 
            -
                    end
         | 
| 1266 | 
            +
                    offset = period.nil? ? "-#{every}" : "0ns"
         | 
| 1225 1267 | 
             
                  end
         | 
| 1226 1268 |  | 
| 1227 1269 | 
             
                  if period.nil?
         | 
| @@ -1233,19 +1275,21 @@ module Polars | |
| 1233 1275 | 
             
                  every = Utils._timedelta_to_pl_duration(every)
         | 
| 1234 1276 |  | 
| 1235 1277 | 
             
                  rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
         | 
| 1236 | 
            -
                  lgb = _ldf. | 
| 1237 | 
            -
                    index_column,
         | 
| 1278 | 
            +
                  lgb = _ldf.group_by_dynamic(
         | 
| 1279 | 
            +
                    index_column._rbexpr,
         | 
| 1238 1280 | 
             
                    every,
         | 
| 1239 1281 | 
             
                    period,
         | 
| 1240 1282 | 
             
                    offset,
         | 
| 1241 | 
            -
                     | 
| 1283 | 
            +
                    label,
         | 
| 1242 1284 | 
             
                    include_boundaries,
         | 
| 1243 1285 | 
             
                    closed,
         | 
| 1244 1286 | 
             
                    rbexprs_by,
         | 
| 1245 | 
            -
                    start_by
         | 
| 1287 | 
            +
                    start_by,
         | 
| 1288 | 
            +
                    check_sorted
         | 
| 1246 1289 | 
             
                  )
         | 
| 1247 | 
            -
                  LazyGroupBy.new(lgb | 
| 1290 | 
            +
                  LazyGroupBy.new(lgb)
         | 
| 1248 1291 | 
             
                end
         | 
| 1292 | 
            +
                alias_method :groupby_dynamic, :group_by_dynamic
         | 
| 1249 1293 |  | 
| 1250 1294 | 
             
                # Perform an asof join.
         | 
| 1251 1295 | 
             
                #
         | 
| @@ -1351,7 +1395,7 @@ module Polars | |
| 1351 1395 | 
             
                  if by.is_a?(String)
         | 
| 1352 1396 | 
             
                    by_left_ = [by]
         | 
| 1353 1397 | 
             
                    by_right_ = [by]
         | 
| 1354 | 
            -
                  elsif by.is_a?(Array)
         | 
| 1398 | 
            +
                  elsif by.is_a?(::Array)
         | 
| 1355 1399 | 
             
                    by_left_ = by
         | 
| 1356 1400 | 
             
                    by_right_ = by
         | 
| 1357 1401 | 
             
                  end
         | 
| @@ -1619,7 +1663,7 @@ module Polars | |
| 1619 1663 | 
             
                #   # │ null │
         | 
| 1620 1664 | 
             
                #   # └──────┘
         | 
| 1621 1665 | 
             
                def with_context(other)
         | 
| 1622 | 
            -
                  if !other.is_a?(Array)
         | 
| 1666 | 
            +
                  if !other.is_a?(::Array)
         | 
| 1623 1667 | 
             
                    other = [other]
         | 
| 1624 1668 | 
             
                  end
         | 
| 1625 1669 |  | 
| @@ -1705,8 +1749,10 @@ module Polars | |
| 1705 1749 |  | 
| 1706 1750 | 
             
                # Shift the values by a given period.
         | 
| 1707 1751 | 
             
                #
         | 
| 1708 | 
            -
                # @param  | 
| 1752 | 
            +
                # @param n [Integer]
         | 
| 1709 1753 | 
             
                #   Number of places to shift (may be negative).
         | 
| 1754 | 
            +
                # @param fill_value [Object]
         | 
| 1755 | 
            +
                #   Fill the resulting null values with this value.
         | 
| 1710 1756 | 
             
                #
         | 
| 1711 1757 | 
             
                # @return [LazyFrame]
         | 
| 1712 1758 | 
             
                #
         | 
| @@ -1743,8 +1789,12 @@ module Polars | |
| 1743 1789 | 
             
                #   # │ 5    ┆ 6    │
         | 
| 1744 1790 | 
             
                #   # │ null ┆ null │
         | 
| 1745 1791 | 
             
                #   # └──────┴──────┘
         | 
| 1746 | 
            -
                def shift( | 
| 1747 | 
            -
                   | 
| 1792 | 
            +
                def shift(n, fill_value: nil)
         | 
| 1793 | 
            +
                  if !fill_value.nil?
         | 
| 1794 | 
            +
                    fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
         | 
| 1795 | 
            +
                  end
         | 
| 1796 | 
            +
                  n = Utils.parse_as_expression(n)
         | 
| 1797 | 
            +
                  _from_rbldf(_ldf.shift(n, fill_value))
         | 
| 1748 1798 | 
             
                end
         | 
| 1749 1799 |  | 
| 1750 1800 | 
             
                # Shift the values by a given period and fill the resulting null values.
         | 
| @@ -1790,10 +1840,7 @@ module Polars | |
| 1790 1840 | 
             
                #   # │ 0   ┆ 0   │
         | 
| 1791 1841 | 
             
                #   # └─────┴─────┘
         | 
| 1792 1842 | 
             
                def shift_and_fill(periods, fill_value)
         | 
| 1793 | 
            -
                   | 
| 1794 | 
            -
                    fill_value = Polars.lit(fill_value)
         | 
| 1795 | 
            -
                  end
         | 
| 1796 | 
            -
                  _from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
         | 
| 1843 | 
            +
                  shift(periods, fill_value: fill_value)
         | 
| 1797 1844 | 
             
                end
         | 
| 1798 1845 |  | 
| 1799 1846 | 
             
                # Get a slice of this DataFrame.
         | 
| @@ -2228,7 +2275,7 @@ module Polars | |
| 2228 2275 | 
             
                #
         | 
| 2229 2276 | 
             
                # @return [LazyFrame]
         | 
| 2230 2277 | 
             
                def unique(maintain_order: true, subset: nil, keep: "first")
         | 
| 2231 | 
            -
                  if !subset.nil? && !subset.is_a?(Array)
         | 
| 2278 | 
            +
                  if !subset.nil? && !subset.is_a?(::Array)
         | 
| 2232 2279 | 
             
                    subset = [subset]
         | 
| 2233 2280 | 
             
                  end
         | 
| 2234 2281 | 
             
                  _from_rbldf(_ldf.unique(maintain_order, subset, keep))
         | 
| @@ -2261,7 +2308,7 @@ module Polars | |
| 2261 2308 | 
             
                #   # │ 3   ┆ 8   ┆ c   │
         | 
| 2262 2309 | 
             
                #   # └─────┴─────┴─────┘
         | 
| 2263 2310 | 
             
                def drop_nulls(subset: nil)
         | 
| 2264 | 
            -
                  if !subset.nil? && !subset.is_a?(Array)
         | 
| 2311 | 
            +
                  if !subset.nil? && !subset.is_a?(::Array)
         | 
| 2265 2312 | 
             
                    subset = [subset]
         | 
| 2266 2313 | 
             
                  end
         | 
| 2267 2314 | 
             
                  _from_rbldf(_ldf.drop_nulls(subset))
         | 
| @@ -2351,16 +2398,16 @@ module Polars | |
| 2351 2398 | 
             
                #   df.interpolate.collect
         | 
| 2352 2399 | 
             
                #   # =>
         | 
| 2353 2400 | 
             
                #   # shape: (4, 3)
         | 
| 2354 | 
            -
                #   #  | 
| 2355 | 
            -
                #   # │ foo | 
| 2356 | 
            -
                #   # │ --- | 
| 2357 | 
            -
                #   # │  | 
| 2358 | 
            -
                #   #  | 
| 2359 | 
            -
                #   # │ 1 | 
| 2360 | 
            -
                #   # │ 5 | 
| 2361 | 
            -
                #   # │ 9 | 
| 2362 | 
            -
                #   # │ 10 | 
| 2363 | 
            -
                #   #  | 
| 2401 | 
            +
                #   # ┌──────┬──────┬──────────┐
         | 
| 2402 | 
            +
                #   # │ foo  ┆ bar  ┆ baz      │
         | 
| 2403 | 
            +
                #   # │ ---  ┆ ---  ┆ ---      │
         | 
| 2404 | 
            +
                #   # │ f64  ┆ f64  ┆ f64      │
         | 
| 2405 | 
            +
                #   # ╞══════╪══════╪══════════╡
         | 
| 2406 | 
            +
                #   # │ 1.0  ┆ 6.0  ┆ 1.0      │
         | 
| 2407 | 
            +
                #   # │ 5.0  ┆ 7.0  ┆ 3.666667 │
         | 
| 2408 | 
            +
                #   # │ 9.0  ┆ 9.0  ┆ 6.333333 │
         | 
| 2409 | 
            +
                #   # │ 10.0 ┆ null ┆ 9.0      │
         | 
| 2410 | 
            +
                #   # └──────┴──────┴──────────┘
         | 
| 2364 2411 | 
             
                def interpolate
         | 
| 2365 2412 | 
             
                  select(Utils.col("*").interpolate)
         | 
| 2366 2413 | 
             
                end
         | 
| @@ -2423,6 +2470,38 @@ module Polars | |
| 2423 2470 | 
             
                  _from_rbldf(_ldf.unnest(names))
         | 
| 2424 2471 | 
             
                end
         | 
| 2425 2472 |  | 
| 2473 | 
            +
                # TODO
         | 
| 2474 | 
            +
                # def merge_sorted
         | 
| 2475 | 
            +
                # end
         | 
| 2476 | 
            +
             | 
| 2477 | 
            +
                # Indicate that one or multiple columns are sorted.
         | 
| 2478 | 
            +
                #
         | 
| 2479 | 
            +
                # @param column [Object]
         | 
| 2480 | 
            +
                #   Columns that are sorted
         | 
| 2481 | 
            +
                # @param more_columns [Object]
         | 
| 2482 | 
            +
                #   Additional columns that are sorted, specified as positional arguments.
         | 
| 2483 | 
            +
                # @param descending [Boolean]
         | 
| 2484 | 
            +
                #   Whether the columns are sorted in descending order.
         | 
| 2485 | 
            +
                #
         | 
| 2486 | 
            +
                # @return [LazyFrame]
         | 
| 2487 | 
            +
                def set_sorted(
         | 
| 2488 | 
            +
                  column,
         | 
| 2489 | 
            +
                  *more_columns,
         | 
| 2490 | 
            +
                  descending: false
         | 
| 2491 | 
            +
                )
         | 
| 2492 | 
            +
                  columns = Utils.selection_to_rbexpr_list(column)
         | 
| 2493 | 
            +
                  if more_columns.any?
         | 
| 2494 | 
            +
                    columns.concat(Utils.selection_to_rbexpr_list(more_columns))
         | 
| 2495 | 
            +
                  end
         | 
| 2496 | 
            +
                  with_columns(
         | 
| 2497 | 
            +
                    columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
         | 
| 2498 | 
            +
                  )
         | 
| 2499 | 
            +
                end
         | 
| 2500 | 
            +
             | 
| 2501 | 
            +
                # TODO
         | 
| 2502 | 
            +
                # def update
         | 
| 2503 | 
            +
                # end
         | 
| 2504 | 
            +
             | 
| 2426 2505 | 
             
                private
         | 
| 2427 2506 |  | 
| 2428 2507 | 
             
                def initialize_copy(other)
         |