polars-df 0.5.0-x86_64-linux → 0.6.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/LICENSE-THIRD-PARTY.txt +1161 -832
- data/README.md +4 -3
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +4 -2
    
        data/README.md
    CHANGED
    
    | @@ -25,7 +25,7 @@ Polars.read_csv("iris.csv") | |
| 25 25 | 
             
              .collect
         | 
| 26 26 | 
             
            ```
         | 
| 27 27 |  | 
| 28 | 
            -
            You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/ | 
| 28 | 
            +
            You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
         | 
| 29 29 |  | 
| 30 30 | 
             
            ## Reference
         | 
| 31 31 |  | 
| @@ -348,7 +348,7 @@ df.to_numo | |
| 348 348 | 
             
            You can specify column types when creating a data frame
         | 
| 349 349 |  | 
| 350 350 | 
             
            ```ruby
         | 
| 351 | 
            -
            Polars::DataFrame.new(data,  | 
| 351 | 
            +
            Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
         | 
| 352 352 | 
             
            ```
         | 
| 353 353 |  | 
| 354 354 | 
             
            Supported types are:
         | 
| @@ -357,8 +357,9 @@ Supported types are: | |
| 357 357 | 
             
            - float - `Float64`, `Float32`
         | 
| 358 358 | 
             
            - integer - `Int64`, `Int32`, `Int16`, `Int8`
         | 
| 359 359 | 
             
            - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
         | 
| 360 | 
            -
            - string - `Utf8`, `Categorical`
         | 
| 360 | 
            +
            - string - `Utf8`, `Binary`, `Categorical`
         | 
| 361 361 | 
             
            - temporal - `Date`, `Datetime`, `Time`, `Duration`
         | 
| 362 | 
            +
            - other - `Object`, `List`, `Struct`, `Array` [unreleased]
         | 
| 362 363 |  | 
| 363 364 | 
             
            Get column types
         | 
| 364 365 |  | 
    
        data/lib/polars/3.0/polars.so
    CHANGED
    
    | Binary file | 
    
        data/lib/polars/3.1/polars.so
    CHANGED
    
    | Binary file | 
    
        data/lib/polars/3.2/polars.so
    CHANGED
    
    | Binary file | 
| @@ -0,0 +1,84 @@ | |
| 1 | 
            +
            module Polars
         | 
| 2 | 
            +
              # Namespace for array related expressions.
         | 
| 3 | 
            +
              class ArrayExpr
         | 
| 4 | 
            +
                # @private
         | 
| 5 | 
            +
                attr_accessor :_rbexpr
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                # @private
         | 
| 8 | 
            +
                def initialize(expr)
         | 
| 9 | 
            +
                  self._rbexpr = expr._rbexpr
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                # Compute the min values of the sub-arrays.
         | 
| 13 | 
            +
                #
         | 
| 14 | 
            +
                # @return [Expr]
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                # @example
         | 
| 17 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 18 | 
            +
                #     {"a" => [[1, 2], [4, 3]]},
         | 
| 19 | 
            +
                #     schema: {"a" => Polars::Array.new(2, Polars::Int64)}
         | 
| 20 | 
            +
                #   )
         | 
| 21 | 
            +
                #   df.select(Polars.col("a").arr.min)
         | 
| 22 | 
            +
                #   # =>
         | 
| 23 | 
            +
                #   # shape: (2, 1)
         | 
| 24 | 
            +
                #   # ┌─────┐
         | 
| 25 | 
            +
                #   # │ a   │
         | 
| 26 | 
            +
                #   # │ --- │
         | 
| 27 | 
            +
                #   # │ i64 │
         | 
| 28 | 
            +
                #   # ╞═════╡
         | 
| 29 | 
            +
                #   # │ 1   │
         | 
| 30 | 
            +
                #   # │ 3   │
         | 
| 31 | 
            +
                #   # └─────┘
         | 
| 32 | 
            +
                def min
         | 
| 33 | 
            +
                  Utils.wrap_expr(_rbexpr.array_min)
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                # Compute the max values of the sub-arrays.
         | 
| 37 | 
            +
                #
         | 
| 38 | 
            +
                # @return [Expr]
         | 
| 39 | 
            +
                #
         | 
| 40 | 
            +
                # @example
         | 
| 41 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 42 | 
            +
                #     {"a" => [[1, 2], [4, 3]]},
         | 
| 43 | 
            +
                #     schema: {"a" => Polars::Array.new(2, Polars::Int64)}
         | 
| 44 | 
            +
                #   )
         | 
| 45 | 
            +
                #   df.select(Polars.col("a").arr.max)
         | 
| 46 | 
            +
                #   # =>
         | 
| 47 | 
            +
                #   # shape: (2, 1)
         | 
| 48 | 
            +
                #   # ┌─────┐
         | 
| 49 | 
            +
                #   # │ a   │
         | 
| 50 | 
            +
                #   # │ --- │
         | 
| 51 | 
            +
                #   # │ i64 │
         | 
| 52 | 
            +
                #   # ╞═════╡
         | 
| 53 | 
            +
                #   # │ 2   │
         | 
| 54 | 
            +
                #   # │ 4   │
         | 
| 55 | 
            +
                #   # └─────┘
         | 
| 56 | 
            +
                def max
         | 
| 57 | 
            +
                  Utils.wrap_expr(_rbexpr.array_max)
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                # Compute the sum values of the sub-arrays.
         | 
| 61 | 
            +
                #
         | 
| 62 | 
            +
                # @return [Expr]
         | 
| 63 | 
            +
                #
         | 
| 64 | 
            +
                # @example
         | 
| 65 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 66 | 
            +
                #     {"a" => [[1, 2], [4, 3]]},
         | 
| 67 | 
            +
                #     schema: {"a" => Polars::Array.new(2, Polars::Int64)}
         | 
| 68 | 
            +
                #   )
         | 
| 69 | 
            +
                #   df.select(Polars.col("a").arr.sum)
         | 
| 70 | 
            +
                #   # =>
         | 
| 71 | 
            +
                #   # shape: (2, 1)
         | 
| 72 | 
            +
                #   # ┌─────┐
         | 
| 73 | 
            +
                #   # │ a   │
         | 
| 74 | 
            +
                #   # │ --- │
         | 
| 75 | 
            +
                #   # │ i64 │
         | 
| 76 | 
            +
                #   # ╞═════╡
         | 
| 77 | 
            +
                #   # │ 3   │
         | 
| 78 | 
            +
                #   # │ 7   │
         | 
| 79 | 
            +
                #   # └─────┘
         | 
| 80 | 
            +
                def sum
         | 
| 81 | 
            +
                  Utils.wrap_expr(_rbexpr.array_sum)
         | 
| 82 | 
            +
                end
         | 
| 83 | 
            +
              end
         | 
| 84 | 
            +
            end
         | 
| @@ -0,0 +1,77 @@ | |
| 1 | 
            +
            module Polars
         | 
| 2 | 
            +
              # Series.arr namespace.
         | 
| 3 | 
            +
              class ArrayNameSpace
         | 
| 4 | 
            +
                include ExprDispatch
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                self._accessor = "arr"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                # @private
         | 
| 9 | 
            +
                def initialize(series)
         | 
| 10 | 
            +
                  self._s = series._s
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                # Compute the min values of the sub-arrays.
         | 
| 14 | 
            +
                #
         | 
| 15 | 
            +
                # @return [Series]
         | 
| 16 | 
            +
                #
         | 
| 17 | 
            +
                # @example
         | 
| 18 | 
            +
                #   s = Polars::Series.new(
         | 
| 19 | 
            +
                #     "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
         | 
| 20 | 
            +
                #   )
         | 
| 21 | 
            +
                #   s.arr.min
         | 
| 22 | 
            +
                #   # =>
         | 
| 23 | 
            +
                #   # shape: (2,)
         | 
| 24 | 
            +
                #   # Series: 'a' [i64]
         | 
| 25 | 
            +
                #   # [
         | 
| 26 | 
            +
                #   #         1
         | 
| 27 | 
            +
                #   #         3
         | 
| 28 | 
            +
                #   # ]
         | 
| 29 | 
            +
                def min
         | 
| 30 | 
            +
                  super
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                # Compute the max values of the sub-arrays.
         | 
| 34 | 
            +
                #
         | 
| 35 | 
            +
                # @return [Series]
         | 
| 36 | 
            +
                #
         | 
| 37 | 
            +
                # @example
         | 
| 38 | 
            +
                #   s = Polars::Series.new(
         | 
| 39 | 
            +
                #     "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
         | 
| 40 | 
            +
                #   )
         | 
| 41 | 
            +
                #   s.arr.max
         | 
| 42 | 
            +
                #   # =>
         | 
| 43 | 
            +
                #   # shape: (2,)
         | 
| 44 | 
            +
                #   # Series: 'a' [i64]
         | 
| 45 | 
            +
                #   # [
         | 
| 46 | 
            +
                #   #         2
         | 
| 47 | 
            +
                #   #         4
         | 
| 48 | 
            +
                #   # ]
         | 
| 49 | 
            +
                def max
         | 
| 50 | 
            +
                  super
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                # Compute the sum values of the sub-arrays.
         | 
| 54 | 
            +
                #
         | 
| 55 | 
            +
                # @return [Series]
         | 
| 56 | 
            +
                #
         | 
| 57 | 
            +
                # @example
         | 
| 58 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 59 | 
            +
                #     {"a" => [[1, 2], [4, 3]]},
         | 
| 60 | 
            +
                #     schema: {"a" => Polars::Array.new(2, Polars::Int64)}
         | 
| 61 | 
            +
                #   )
         | 
| 62 | 
            +
                #   df.select(Polars.col("a").arr.sum)
         | 
| 63 | 
            +
                #   # =>
         | 
| 64 | 
            +
                #   # shape: (2, 1)
         | 
| 65 | 
            +
                #   # ┌─────┐
         | 
| 66 | 
            +
                #   # │ a   │
         | 
| 67 | 
            +
                #   # │ --- │
         | 
| 68 | 
            +
                #   # │ i64 │
         | 
| 69 | 
            +
                #   # ╞═════╡
         | 
| 70 | 
            +
                #   # │ 3   │
         | 
| 71 | 
            +
                #   # │ 7   │
         | 
| 72 | 
            +
                #   # └─────┘
         | 
| 73 | 
            +
                def sum
         | 
| 74 | 
            +
                  super
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
              end
         | 
| 77 | 
            +
            end
         | 
    
        data/lib/polars/data_frame.rb
    CHANGED
    
    | @@ -36,7 +36,7 @@ module Polars | |
| 36 36 | 
             
                  elsif data.is_a?(Hash)
         | 
| 37 37 | 
             
                    data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
         | 
| 38 38 | 
             
                    self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
         | 
| 39 | 
            -
                  elsif data.is_a?(Array)
         | 
| 39 | 
            +
                  elsif data.is_a?(::Array)
         | 
| 40 40 | 
             
                    self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
         | 
| 41 41 | 
             
                  elsif data.is_a?(Series)
         | 
| 42 42 | 
             
                    self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
         | 
| @@ -116,7 +116,7 @@ module Polars | |
| 116 116 | 
             
                      dtypes.each do|k, v|
         | 
| 117 117 | 
             
                        dtype_list << [k, Utils.rb_type_to_dtype(v)]
         | 
| 118 118 | 
             
                      end
         | 
| 119 | 
            -
                    elsif dtypes.is_a?(Array)
         | 
| 119 | 
            +
                    elsif dtypes.is_a?(::Array)
         | 
| 120 120 | 
             
                      dtype_slice = dtypes
         | 
| 121 121 | 
             
                    else
         | 
| 122 122 | 
             
                      raise ArgumentError, "dtype arg should be list or dict"
         | 
| @@ -590,7 +590,7 @@ module Polars | |
| 590 590 |  | 
| 591 591 | 
             
                    # df[2, ..] (select row as df)
         | 
| 592 592 | 
             
                    if row_selection.is_a?(Integer)
         | 
| 593 | 
            -
                      if col_selection.is_a?(Array)
         | 
| 593 | 
            +
                      if col_selection.is_a?(::Array)
         | 
| 594 594 | 
             
                        df = self[0.., col_selection]
         | 
| 595 595 | 
             
                        return df.slice(row_selection, 1)
         | 
| 596 596 | 
             
                      end
         | 
| @@ -611,7 +611,7 @@ module Polars | |
| 611 611 | 
             
                      return series[row_selection]
         | 
| 612 612 | 
             
                    end
         | 
| 613 613 |  | 
| 614 | 
            -
                    if col_selection.is_a?(Array)
         | 
| 614 | 
            +
                    if col_selection.is_a?(::Array)
         | 
| 615 615 | 
             
                      # df[.., [1, 2]]
         | 
| 616 616 | 
             
                      if Utils.is_int_sequence(col_selection)
         | 
| 617 617 | 
             
                        series_list = col_selection.map { |i| to_series(i) }
         | 
| @@ -641,7 +641,7 @@ module Polars | |
| 641 641 | 
             
                      return Slice.new(self).apply(item)
         | 
| 642 642 | 
             
                    end
         | 
| 643 643 |  | 
| 644 | 
            -
                    if item.is_a?(Array) && item.all? { |v| Utils.strlike?(v) }
         | 
| 644 | 
            +
                    if item.is_a?(::Array) && item.all? { |v| Utils.strlike?(v) }
         | 
| 645 645 | 
             
                      # select multiple columns
         | 
| 646 646 | 
             
                      # df[["foo", "bar"]]
         | 
| 647 647 | 
             
                      return _from_rbdf(_df.select(item.map(&:to_s)))
         | 
| @@ -684,13 +684,13 @@ module Polars | |
| 684 684 | 
             
                  end
         | 
| 685 685 |  | 
| 686 686 | 
             
                  if Utils.strlike?(key)
         | 
| 687 | 
            -
                    if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
         | 
| 687 | 
            +
                    if value.is_a?(::Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
         | 
| 688 688 | 
             
                      value = Series.new(value)
         | 
| 689 689 | 
             
                    elsif !value.is_a?(Series)
         | 
| 690 690 | 
             
                      value = Polars.lit(value)
         | 
| 691 691 | 
             
                    end
         | 
| 692 692 | 
             
                    self._df = with_column(value.alias(key.to_s))._df
         | 
| 693 | 
            -
                  elsif key.is_a?(Array)
         | 
| 693 | 
            +
                  elsif key.is_a?(::Array)
         | 
| 694 694 | 
             
                    row_selection, col_selection = key
         | 
| 695 695 |  | 
| 696 696 | 
             
                    if Utils.strlike?(col_selection)
         | 
| @@ -994,14 +994,21 @@ module Polars | |
| 994 994 | 
             
                #
         | 
| 995 995 | 
             
                # @return [nil]
         | 
| 996 996 | 
             
                def write_ipc(file, compression: "uncompressed")
         | 
| 997 | 
            -
                   | 
| 998 | 
            -
             | 
| 997 | 
            +
                  return_bytes = file.nil?
         | 
| 998 | 
            +
                  if return_bytes
         | 
| 999 | 
            +
                    file = StringIO.new
         | 
| 1000 | 
            +
                    file.set_encoding(Encoding::BINARY)
         | 
| 999 1001 | 
             
                  end
         | 
| 1000 1002 | 
             
                  if Utils.pathlike?(file)
         | 
| 1001 1003 | 
             
                    file = Utils.normalise_filepath(file)
         | 
| 1002 1004 | 
             
                  end
         | 
| 1003 1005 |  | 
| 1006 | 
            +
                  if compression.nil?
         | 
| 1007 | 
            +
                    compression = "uncompressed"
         | 
| 1008 | 
            +
                  end
         | 
| 1009 | 
            +
             | 
| 1004 1010 | 
             
                  _df.write_ipc(file, compression)
         | 
| 1011 | 
            +
                  return_bytes ? file.string : nil
         | 
| 1005 1012 | 
             
                end
         | 
| 1006 1013 |  | 
| 1007 1014 | 
             
                # Write to Apache Parquet file.
         | 
| @@ -1491,13 +1498,9 @@ module Polars | |
| 1491 1498 | 
             
                #   # │ 1   ┆ 6.0 ┆ a   │
         | 
| 1492 1499 | 
             
                #   # └─────┴─────┴─────┘
         | 
| 1493 1500 | 
             
                def sort(by, reverse: false, nulls_last: false)
         | 
| 1494 | 
            -
                   | 
| 1495 | 
            -
                     | 
| 1496 | 
            -
             | 
| 1497 | 
            -
                      .collect(no_optimization: true, string_cache: false)
         | 
| 1498 | 
            -
                  else
         | 
| 1499 | 
            -
                    _from_rbdf(_df.sort(by, reverse, nulls_last))
         | 
| 1500 | 
            -
                  end
         | 
| 1501 | 
            +
                  lazy
         | 
| 1502 | 
            +
                    .sort(by, reverse: reverse, nulls_last: nulls_last)
         | 
| 1503 | 
            +
                    .collect(no_optimization: true)
         | 
| 1501 1504 | 
             
                end
         | 
| 1502 1505 |  | 
| 1503 1506 | 
             
                # Sort the DataFrame by column in-place.
         | 
| @@ -1899,6 +1902,12 @@ module Polars | |
| 1899 1902 | 
             
                #   Define whether the temporal window interval is closed or not.
         | 
| 1900 1903 | 
             
                # @param by [Object]
         | 
| 1901 1904 | 
             
                #   Also group by this column/these columns.
         | 
| 1905 | 
            +
                # @param check_sorted [Boolean]
         | 
| 1906 | 
            +
                #   When the `by` argument is given, polars can not check sortedness
         | 
| 1907 | 
            +
                #   by the metadata and has to do a full scan on the index column to
         | 
| 1908 | 
            +
                #   verify data is sorted. This is expensive. If you are sure the
         | 
| 1909 | 
            +
                #   data within the by groups is sorted, you can set this to `false`.
         | 
| 1910 | 
            +
                #   Doing so incorrectly will lead to incorrect output
         | 
| 1902 1911 | 
             
                #
         | 
| 1903 1912 | 
             
                # @return [RollingGroupBy]
         | 
| 1904 1913 | 
             
                #
         | 
| @@ -1912,7 +1921,7 @@ module Polars | |
| 1912 1921 | 
             
                #     "2020-01-08 23:16:43"
         | 
| 1913 1922 | 
             
                #   ]
         | 
| 1914 1923 | 
             
                #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
         | 
| 1915 | 
            -
                #     Polars.col("dt").str.strptime(Polars::Datetime)
         | 
| 1924 | 
            +
                #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
         | 
| 1916 1925 | 
             
                #   )
         | 
| 1917 1926 | 
             
                #   df.groupby_rolling(index_column: "dt", period: "2d").agg(
         | 
| 1918 1927 | 
             
                #     [
         | 
| @@ -1940,9 +1949,10 @@ module Polars | |
| 1940 1949 | 
             
                  period:,
         | 
| 1941 1950 | 
             
                  offset: nil,
         | 
| 1942 1951 | 
             
                  closed: "right",
         | 
| 1943 | 
            -
                  by: nil
         | 
| 1952 | 
            +
                  by: nil,
         | 
| 1953 | 
            +
                  check_sorted: true
         | 
| 1944 1954 | 
             
                )
         | 
| 1945 | 
            -
                  RollingGroupBy.new(self, index_column, period, offset, closed, by)
         | 
| 1955 | 
            +
                  RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
         | 
| 1946 1956 | 
             
                end
         | 
| 1947 1957 |  | 
| 1948 1958 | 
             
                # Group based on a time value (or index value of type `:i32`, `:i64`).
         | 
| @@ -2078,21 +2088,21 @@ module Polars | |
| 2078 2088 | 
             
                #   df.groupby_dynamic("time", every: "1h", closed: "left").agg(
         | 
| 2079 2089 | 
             
                #     [
         | 
| 2080 2090 | 
             
                #       Polars.col("time").count.alias("time_count"),
         | 
| 2081 | 
            -
                #       Polars.col("time"). | 
| 2091 | 
            +
                #       Polars.col("time").alias("time_agg_list")
         | 
| 2082 2092 | 
             
                #     ]
         | 
| 2083 2093 | 
             
                #   )
         | 
| 2084 2094 | 
             
                #   # =>
         | 
| 2085 2095 | 
             
                #   # shape: (4, 3)
         | 
| 2086 | 
            -
                #   #  | 
| 2087 | 
            -
                #   # │ time                ┆ time_count ┆ time_agg_list | 
| 2088 | 
            -
                #   # │ ---                 ┆ ---        ┆ --- | 
| 2089 | 
            -
                #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]] | 
| 2090 | 
            -
                #   #  | 
| 2091 | 
            -
                #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16 | 
| 2092 | 
            -
                #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16 | 
| 2093 | 
            -
                #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16 | 
| 2094 | 
            -
                #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00] | 
| 2095 | 
            -
                #   #  | 
| 2096 | 
            +
                #   # ┌─────────────────────┬────────────┬───────────────────────────────────┐
         | 
| 2097 | 
            +
                #   # │ time                ┆ time_count ┆ time_agg_list                     │
         | 
| 2098 | 
            +
                #   # │ ---                 ┆ ---        ┆ ---                               │
         | 
| 2099 | 
            +
                #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                │
         | 
| 2100 | 
            +
                #   # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
         | 
| 2101 | 
            +
                #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16… │
         | 
| 2102 | 
            +
                #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16… │
         | 
| 2103 | 
            +
                #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16… │
         | 
| 2104 | 
            +
                #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]             │
         | 
| 2105 | 
            +
                #   # └─────────────────────┴────────────┴───────────────────────────────────┘
         | 
| 2096 2106 | 
             
                #
         | 
| 2097 2107 | 
             
                # @example When closed="both" the time values at the window boundaries belong to 2 groups.
         | 
| 2098 2108 | 
             
                #   df.groupby_dynamic("time", every: "1h", closed: "both").agg(
         | 
| @@ -2159,7 +2169,7 @@ module Polars | |
| 2159 2169 | 
             
                #     period: "3i",
         | 
| 2160 2170 | 
             
                #     include_boundaries: true,
         | 
| 2161 2171 | 
             
                #     closed: "right"
         | 
| 2162 | 
            -
                #   ).agg(Polars.col("A"). | 
| 2172 | 
            +
                #   ).agg(Polars.col("A").alias("A_agg_list"))
         | 
| 2163 2173 | 
             
                #   # =>
         | 
| 2164 2174 | 
             
                #   # shape: (3, 4)
         | 
| 2165 2175 | 
             
                #   # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
         | 
| @@ -2242,7 +2252,7 @@ module Polars | |
| 2242 2252 | 
             
                #       "groups" => ["A", "B", "A", "B"],
         | 
| 2243 2253 | 
             
                #       "values" => [0, 1, 2, 3]
         | 
| 2244 2254 | 
             
                #     }
         | 
| 2245 | 
            -
                #   )
         | 
| 2255 | 
            +
                #   ).set_sorted("time")
         | 
| 2246 2256 | 
             
                #   df.upsample(
         | 
| 2247 2257 | 
             
                #     time_column: "time", every: "1mo", by: "groups", maintain_order: true
         | 
| 2248 2258 | 
             
                #   ).select(Polars.all.forward_fill)
         | 
| @@ -2360,7 +2370,7 @@ module Polars | |
| 2360 2370 | 
             
                #       ],  # note record date: Jan 1st (sorted!)
         | 
| 2361 2371 | 
             
                #       "gdp" => [4164, 4411, 4566, 4696]
         | 
| 2362 2372 | 
             
                #     }
         | 
| 2363 | 
            -
                #   )
         | 
| 2373 | 
            +
                #   ).set_sorted("date")
         | 
| 2364 2374 | 
             
                #   population = Polars::DataFrame.new(
         | 
| 2365 2375 | 
             
                #     {
         | 
| 2366 2376 | 
             
                #       "date" => [
         | 
| @@ -2371,7 +2381,7 @@ module Polars | |
| 2371 2381 | 
             
                #       ],  # note record date: May 12th (sorted!)
         | 
| 2372 2382 | 
             
                #       "population" => [82.19, 82.66, 83.12, 83.52]
         | 
| 2373 2383 | 
             
                #     }
         | 
| 2374 | 
            -
                #   )
         | 
| 2384 | 
            +
                #   ).set_sorted("date")
         | 
| 2375 2385 | 
             
                #   population.join_asof(
         | 
| 2376 2386 | 
             
                #     gdp, left_on: "date", right_on: "date", strategy: "backward"
         | 
| 2377 2387 | 
             
                #   )
         | 
| @@ -2674,7 +2684,7 @@ module Polars | |
| 2674 2684 | 
             
                #   # │ 3   ┆ 8   ┆ c   ┆ 30    │
         | 
| 2675 2685 | 
             
                #   # └─────┴─────┴─────┴───────┘
         | 
| 2676 2686 | 
             
                def hstack(columns, in_place: false)
         | 
| 2677 | 
            -
                  if !columns.is_a?(Array)
         | 
| 2687 | 
            +
                  if !columns.is_a?(::Array)
         | 
| 2678 2688 | 
             
                    columns = columns.get_columns
         | 
| 2679 2689 | 
             
                  end
         | 
| 2680 2690 | 
             
                  if in_place
         | 
| @@ -2804,7 +2814,7 @@ module Polars | |
| 2804 2814 | 
             
                #   # │ 3   ┆ 8.0 │
         | 
| 2805 2815 | 
             
                #   # └─────┴─────┘
         | 
| 2806 2816 | 
             
                def drop(columns)
         | 
| 2807 | 
            -
                  if columns.is_a?(Array)
         | 
| 2817 | 
            +
                  if columns.is_a?(::Array)
         | 
| 2808 2818 | 
             
                    df = clone
         | 
| 2809 2819 | 
             
                    columns.each do |n|
         | 
| 2810 2820 | 
             
                      df._df.drop_in_place(n)
         | 
| @@ -3317,7 +3327,7 @@ module Polars | |
| 3317 3327 | 
             
                  n_fill = n_cols * n_rows - height
         | 
| 3318 3328 |  | 
| 3319 3329 | 
             
                  if n_fill > 0
         | 
| 3320 | 
            -
                    if !fill_values.is_a?(Array)
         | 
| 3330 | 
            +
                    if !fill_values.is_a?(::Array)
         | 
| 3321 3331 | 
             
                      fill_values = [fill_values] * df.width
         | 
| 3322 3332 | 
             
                    end
         | 
| 3323 3333 |  | 
| @@ -3426,29 +3436,29 @@ module Polars | |
| 3426 3436 | 
             
                #   # ╞═════╪═════╪═════╡
         | 
| 3427 3437 | 
             
                #   # │ C   ┆ 2   ┆ l   │
         | 
| 3428 3438 | 
             
                #   # └─────┴─────┴─────┘}
         | 
| 3429 | 
            -
                def partition_by(groups, maintain_order: true, as_dict: false)
         | 
| 3439 | 
            +
                def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
         | 
| 3430 3440 | 
             
                  if groups.is_a?(String)
         | 
| 3431 3441 | 
             
                    groups = [groups]
         | 
| 3432 | 
            -
                  elsif !groups.is_a?(Array)
         | 
| 3442 | 
            +
                  elsif !groups.is_a?(::Array)
         | 
| 3433 3443 | 
             
                    groups = Array(groups)
         | 
| 3434 3444 | 
             
                  end
         | 
| 3435 3445 |  | 
| 3436 3446 | 
             
                  if as_dict
         | 
| 3437 3447 | 
             
                    out = {}
         | 
| 3438 3448 | 
             
                    if groups.length == 1
         | 
| 3439 | 
            -
                      _df.partition_by(groups, maintain_order).each do |df|
         | 
| 3449 | 
            +
                      _df.partition_by(groups, maintain_order, include_key).each do |df|
         | 
| 3440 3450 | 
             
                        df = _from_rbdf(df)
         | 
| 3441 3451 | 
             
                        out[df[groups][0, 0]] = df
         | 
| 3442 3452 | 
             
                      end
         | 
| 3443 3453 | 
             
                    else
         | 
| 3444 | 
            -
                      _df.partition_by(groups, maintain_order).each do |df|
         | 
| 3454 | 
            +
                      _df.partition_by(groups, maintain_order, include_key).each do |df|
         | 
| 3445 3455 | 
             
                        df = _from_rbdf(df)
         | 
| 3446 3456 | 
             
                        out[df[groups].row(0)] = df
         | 
| 3447 3457 | 
             
                      end
         | 
| 3448 3458 | 
             
                    end
         | 
| 3449 3459 | 
             
                    out
         | 
| 3450 3460 | 
             
                  else
         | 
| 3451 | 
            -
                    _df.partition_by(groups, maintain_order).map { |df| _from_rbdf(df) }
         | 
| 3461 | 
            +
                    _df.partition_by(groups, maintain_order, include_key).map { |df| _from_rbdf(df) }
         | 
| 3452 3462 | 
             
                  end
         | 
| 3453 3463 | 
             
                end
         | 
| 3454 3464 |  | 
| @@ -3716,7 +3726,7 @@ module Polars | |
| 3716 3726 | 
             
                #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
         | 
| 3717 3727 | 
             
                #   # └─────┴──────┴───────┴──────┴──────┴───────┘
         | 
| 3718 3728 | 
             
                def with_columns(exprs)
         | 
| 3719 | 
            -
                  if !exprs.nil? && !exprs.is_a?(Array)
         | 
| 3729 | 
            +
                  if !exprs.nil? && !exprs.is_a?(::Array)
         | 
| 3720 3730 | 
             
                    exprs = [exprs]
         | 
| 3721 3731 | 
             
                  end
         | 
| 3722 3732 | 
             
                  lazy
         | 
| @@ -4097,11 +4107,11 @@ module Polars | |
| 4097 4107 | 
             
                #   # │ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     │
         | 
| 4098 4108 | 
             
                #   # │ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     │
         | 
| 4099 4109 | 
             
                #   # └───────┴───────┴───────┴───────┴───────┴───────┘
         | 
| 4100 | 
            -
                def to_dummies(columns: nil, separator: "_")
         | 
| 4110 | 
            +
                def to_dummies(columns: nil, separator: "_", drop_first: false)
         | 
| 4101 4111 | 
             
                  if columns.is_a?(String)
         | 
| 4102 4112 | 
             
                    columns = [columns]
         | 
| 4103 4113 | 
             
                  end
         | 
| 4104 | 
            -
                  _from_rbdf(_df.to_dummies(columns, separator))
         | 
| 4114 | 
            +
                  _from_rbdf(_df.to_dummies(columns, separator, drop_first))
         | 
| 4105 4115 | 
             
                end
         | 
| 4106 4116 |  | 
| 4107 4117 | 
             
                # Drop duplicate rows from this DataFrame.
         | 
| @@ -4189,7 +4199,7 @@ module Polars | |
| 4189 4199 | 
             
                    subset = [subset]
         | 
| 4190 4200 | 
             
                  end
         | 
| 4191 4201 |  | 
| 4192 | 
            -
                  if subset.is_a?(Array) && subset.length == 1
         | 
| 4202 | 
            +
                  if subset.is_a?(::Array) && subset.length == 1
         | 
| 4193 4203 | 
             
                    expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
         | 
| 4194 4204 | 
             
                  else
         | 
| 4195 4205 | 
             
                    struct_fields = subset.nil? ? Polars.all : subset
         | 
| @@ -4758,6 +4768,38 @@ module Polars | |
| 4758 4768 | 
             
                  _from_rbdf(_df.unnest(names))
         | 
| 4759 4769 | 
             
                end
         | 
| 4760 4770 |  | 
| 4771 | 
            +
                # TODO
         | 
| 4772 | 
            +
                # def corr
         | 
| 4773 | 
            +
                # end
         | 
| 4774 | 
            +
             | 
| 4775 | 
            +
                # TODO
         | 
| 4776 | 
            +
                # def merge_sorted
         | 
| 4777 | 
            +
                # end
         | 
| 4778 | 
            +
             | 
| 4779 | 
            +
                # Indicate that one or multiple columns are sorted.
         | 
| 4780 | 
            +
                #
         | 
| 4781 | 
            +
                # @param column [Object]
         | 
| 4782 | 
            +
                #   Columns that are sorted
         | 
| 4783 | 
            +
                # @param more_columns [Object]
         | 
| 4784 | 
            +
                #   Additional columns that are sorted, specified as positional arguments.
         | 
| 4785 | 
            +
                # @param descending [Boolean]
         | 
| 4786 | 
            +
                #   Whether the columns are sorted in descending order.
         | 
| 4787 | 
            +
                #
         | 
| 4788 | 
            +
                # @return [DataFrame]
         | 
| 4789 | 
            +
                def set_sorted(
         | 
| 4790 | 
            +
                  column,
         | 
| 4791 | 
            +
                  *more_columns,
         | 
| 4792 | 
            +
                  descending: false
         | 
| 4793 | 
            +
                )
         | 
| 4794 | 
            +
                  lazy
         | 
| 4795 | 
            +
                    .set_sorted(column, *more_columns, descending: descending)
         | 
| 4796 | 
            +
                    .collect(no_optimization: true)
         | 
| 4797 | 
            +
                end
         | 
| 4798 | 
            +
             | 
| 4799 | 
            +
                # TODO
         | 
| 4800 | 
            +
                # def update
         | 
| 4801 | 
            +
                # end
         | 
| 4802 | 
            +
             | 
| 4761 4803 | 
             
                private
         | 
| 4762 4804 |  | 
| 4763 4805 | 
             
                def initialize_copy(other)
         | 
| @@ -4967,7 +5009,7 @@ module Polars | |
| 4967 5009 | 
             
                  columns.each do |col, i|
         | 
| 4968 5010 | 
             
                    if dtypes[col] == Categorical # != rbdf_dtypes[i]
         | 
| 4969 5011 | 
             
                      column_casts << Polars.col(col).cast(Categorical)._rbexpr
         | 
| 4970 | 
            -
                    elsif structs | 
| 5012 | 
            +
                    elsif structs&.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
         | 
| 4971 5013 | 
             
                      column_casts << Polars.col(col).cast(structs[col])._rbexpr
         | 
| 4972 5014 | 
             
                    elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
         | 
| 4973 5015 | 
             
                      column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
         | 
| @@ -5012,7 +5054,7 @@ module Polars | |
| 5012 5054 | 
             
                      rbdf = _post_apply_columns(rbdf, column_names)
         | 
| 5013 5055 | 
             
                    end
         | 
| 5014 5056 | 
             
                    return rbdf
         | 
| 5015 | 
            -
                  elsif data[0].is_a?(Array)
         | 
| 5057 | 
            +
                  elsif data[0].is_a?(::Array)
         | 
| 5016 5058 | 
             
                    if orient.nil? && !columns.nil?
         | 
| 5017 5059 | 
             
                      orient = columns.length == data.length ? "col" : "row"
         | 
| 5018 5060 | 
             
                    end
         | 
| @@ -5117,7 +5159,7 @@ module Polars | |
| 5117 5159 |  | 
| 5118 5160 | 
             
                def _prepare_other_arg(other)
         | 
| 5119 5161 | 
             
                  if !other.is_a?(Series)
         | 
| 5120 | 
            -
                    if other.is_a?(Array)
         | 
| 5162 | 
            +
                    if other.is_a?(::Array)
         | 
| 5121 5163 | 
             
                      raise ArgumentError, "Operation not supported."
         | 
| 5122 5164 | 
             
                    end
         | 
| 5123 5165 |  |