polars-df 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +42 -1
 - data/Cargo.lock +159 -66
 - data/Cargo.toml +0 -3
 - data/LICENSE.txt +1 -1
 - data/README.md +3 -2
 - data/ext/polars/Cargo.toml +18 -8
 - data/ext/polars/src/batched_csv.rs +7 -5
 - data/ext/polars/src/conversion/anyvalue.rs +186 -0
 - data/ext/polars/src/conversion/chunked_array.rs +140 -0
 - data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
 - data/ext/polars/src/dataframe.rs +108 -66
 - data/ext/polars/src/expr/array.rs +78 -0
 - data/ext/polars/src/expr/datetime.rs +29 -58
 - data/ext/polars/src/expr/general.rs +83 -36
 - data/ext/polars/src/expr/list.rs +58 -6
 - data/ext/polars/src/expr/meta.rs +48 -0
 - data/ext/polars/src/expr/rolling.rs +1 -0
 - data/ext/polars/src/expr/string.rs +62 -11
 - data/ext/polars/src/expr/struct.rs +8 -4
 - data/ext/polars/src/file.rs +158 -11
 - data/ext/polars/src/functions/aggregation.rs +6 -0
 - data/ext/polars/src/functions/lazy.rs +120 -50
 - data/ext/polars/src/functions/meta.rs +45 -1
 - data/ext/polars/src/functions/string_cache.rs +14 -0
 - data/ext/polars/src/functions/whenthen.rs +47 -17
 - data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
 - data/ext/polars/src/lib.rs +246 -179
 - data/ext/polars/src/map/dataframe.rs +17 -9
 - data/ext/polars/src/series/aggregation.rs +20 -0
 - data/ext/polars/src/series/mod.rs +35 -4
 - data/lib/polars/array_expr.rb +453 -0
 - data/lib/polars/array_name_space.rb +346 -0
 - data/lib/polars/batched_csv_reader.rb +4 -2
 - data/lib/polars/cat_expr.rb +24 -0
 - data/lib/polars/cat_name_space.rb +75 -0
 - data/lib/polars/config.rb +2 -2
 - data/lib/polars/data_frame.rb +306 -96
 - data/lib/polars/data_types.rb +191 -28
 - data/lib/polars/date_time_expr.rb +41 -18
 - data/lib/polars/date_time_name_space.rb +9 -3
 - data/lib/polars/exceptions.rb +12 -1
 - data/lib/polars/expr.rb +898 -215
 - data/lib/polars/functions/aggregation/horizontal.rb +246 -0
 - data/lib/polars/functions/aggregation/vertical.rb +282 -0
 - data/lib/polars/functions/as_datatype.rb +248 -0
 - data/lib/polars/functions/col.rb +47 -0
 - data/lib/polars/functions/eager.rb +182 -0
 - data/lib/polars/functions/lazy.rb +1280 -0
 - data/lib/polars/functions/len.rb +49 -0
 - data/lib/polars/functions/lit.rb +35 -0
 - data/lib/polars/functions/random.rb +16 -0
 - data/lib/polars/functions/range/date_range.rb +103 -0
 - data/lib/polars/functions/range/int_range.rb +51 -0
 - data/lib/polars/functions/repeat.rb +144 -0
 - data/lib/polars/functions/whenthen.rb +96 -0
 - data/lib/polars/functions.rb +29 -416
 - data/lib/polars/group_by.rb +2 -2
 - data/lib/polars/io.rb +36 -31
 - data/lib/polars/lazy_frame.rb +405 -88
 - data/lib/polars/list_expr.rb +158 -8
 - data/lib/polars/list_name_space.rb +102 -0
 - data/lib/polars/meta_expr.rb +175 -7
 - data/lib/polars/series.rb +282 -41
 - data/lib/polars/string_cache.rb +75 -0
 - data/lib/polars/string_expr.rb +413 -96
 - data/lib/polars/string_name_space.rb +4 -4
 - data/lib/polars/testing.rb +507 -0
 - data/lib/polars/utils.rb +106 -8
 - data/lib/polars/version.rb +1 -1
 - data/lib/polars/whenthen.rb +83 -0
 - data/lib/polars.rb +16 -4
 - metadata +37 -8
 - data/lib/polars/lazy_functions.rb +0 -1181
 - data/lib/polars/when.rb +0 -16
 - data/lib/polars/when_then.rb +0 -19
 
    
        data/lib/polars/data_frame.rb
    CHANGED
    
    | 
         @@ -47,8 +47,8 @@ module Polars 
     | 
|
| 
       47 
47 
     | 
    
         
             
                end
         
     | 
| 
       48 
48 
     | 
    
         | 
| 
       49 
49 
     | 
    
         
             
                # @private
         
     | 
| 
       50 
     | 
    
         
            -
                def self._from_hashes(data, infer_schema_length: 100, schema: nil)
         
     | 
| 
       51 
     | 
    
         
            -
                  rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
         
     | 
| 
      
 50 
     | 
    
         
            +
                def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
         
     | 
| 
      
 51 
     | 
    
         
            +
                  rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
         
     | 
| 
       52 
52 
     | 
    
         
             
                  _from_rbdf(rbdf)
         
     | 
| 
       53 
53 
     | 
    
         
             
                end
         
     | 
| 
       54 
54 
     | 
    
         | 
| 
         @@ -91,7 +91,8 @@ module Polars 
     | 
|
| 
       91 
91 
     | 
    
         
             
                  row_count_name: nil,
         
     | 
| 
       92 
92 
     | 
    
         
             
                  row_count_offset: 0,
         
     | 
| 
       93 
93 
     | 
    
         
             
                  sample_size: 1024,
         
     | 
| 
       94 
     | 
    
         
            -
                  eol_char: "\n"
         
     | 
| 
      
 94 
     | 
    
         
            +
                  eol_char: "\n",
         
     | 
| 
      
 95 
     | 
    
         
            +
                  truncate_ragged_lines: false
         
     | 
| 
       95 
96 
     | 
    
         
             
                )
         
     | 
| 
       96 
97 
     | 
    
         
             
                  if Utils.pathlike?(file)
         
     | 
| 
       97 
98 
     | 
    
         
             
                    path = Utils.normalise_filepath(file)
         
     | 
| 
         @@ -147,7 +148,8 @@ module Polars 
     | 
|
| 
       147 
148 
     | 
    
         
             
                      skip_rows_after_header: skip_rows_after_header,
         
     | 
| 
       148 
149 
     | 
    
         
             
                      row_count_name: row_count_name,
         
     | 
| 
       149 
150 
     | 
    
         
             
                      row_count_offset: row_count_offset,
         
     | 
| 
       150 
     | 
    
         
            -
                      eol_char: eol_char
         
     | 
| 
      
 151 
     | 
    
         
            +
                      eol_char: eol_char,
         
     | 
| 
      
 152 
     | 
    
         
            +
                      truncate_ragged_lines: truncate_ragged_lines
         
     | 
| 
       151 
153 
     | 
    
         
             
                    )
         
     | 
| 
       152 
154 
     | 
    
         
             
                    if columns.nil?
         
     | 
| 
       153 
155 
     | 
    
         
             
                      return _from_rbdf(scan.collect._df)
         
     | 
| 
         @@ -186,7 +188,8 @@ module Polars 
     | 
|
| 
       186 
188 
     | 
    
         
             
                      skip_rows_after_header,
         
     | 
| 
       187 
189 
     | 
    
         
             
                      Utils._prepare_row_count_args(row_count_name, row_count_offset),
         
     | 
| 
       188 
190 
     | 
    
         
             
                      sample_size,
         
     | 
| 
       189 
     | 
    
         
            -
                      eol_char
         
     | 
| 
      
 191 
     | 
    
         
            +
                      eol_char,
         
     | 
| 
      
 192 
     | 
    
         
            +
                      truncate_ragged_lines
         
     | 
| 
       190 
193 
     | 
    
         
             
                    )
         
     | 
| 
       191 
194 
     | 
    
         
             
                  )
         
     | 
| 
       192 
195 
     | 
    
         
             
                end
         
     | 
| 
         @@ -622,7 +625,7 @@ module Polars 
     | 
|
| 
       622 
625 
     | 
    
         
             
                    # select single column
         
     | 
| 
       623 
626 
     | 
    
         
             
                    # df["foo"]
         
     | 
| 
       624 
627 
     | 
    
         
             
                    if item.is_a?(::String) || item.is_a?(Symbol)
         
     | 
| 
       625 
     | 
    
         
            -
                      return Utils.wrap_s(_df. 
     | 
| 
      
 628 
     | 
    
         
            +
                      return Utils.wrap_s(_df.get_column(item.to_s))
         
     | 
| 
       626 
629 
     | 
    
         
             
                    end
         
     | 
| 
       627 
630 
     | 
    
         | 
| 
       628 
631 
     | 
    
         
             
                    # df[idx]
         
     | 
| 
         @@ -814,8 +817,6 @@ module Polars 
     | 
|
| 
       814 
817 
     | 
    
         | 
| 
       815 
818 
     | 
    
         
             
                # Serialize to JSON representation.
         
     | 
| 
       816 
819 
     | 
    
         
             
                #
         
     | 
| 
       817 
     | 
    
         
            -
                # @return [nil]
         
     | 
| 
       818 
     | 
    
         
            -
                #
         
     | 
| 
       819 
820 
     | 
    
         
             
                # @param file [String]
         
     | 
| 
       820 
821 
     | 
    
         
             
                #   File path to which the result should be written.
         
     | 
| 
       821 
822 
     | 
    
         
             
                # @param pretty [Boolean]
         
     | 
| 
         @@ -823,17 +824,45 @@ module Polars 
     | 
|
| 
       823 
824 
     | 
    
         
             
                # @param row_oriented [Boolean]
         
     | 
| 
       824 
825 
     | 
    
         
             
                #   Write to row oriented json. This is slower, but more common.
         
     | 
| 
       825 
826 
     | 
    
         
             
                #
         
     | 
| 
       826 
     | 
    
         
            -
                # @ 
     | 
| 
      
 827 
     | 
    
         
            +
                # @return [nil]
         
     | 
| 
      
 828 
     | 
    
         
            +
                #
         
     | 
| 
      
 829 
     | 
    
         
            +
                # @example
         
     | 
| 
      
 830 
     | 
    
         
            +
                #   df = Polars::DataFrame.new(
         
     | 
| 
      
 831 
     | 
    
         
            +
                #     {
         
     | 
| 
      
 832 
     | 
    
         
            +
                #       "foo" => [1, 2, 3],
         
     | 
| 
      
 833 
     | 
    
         
            +
                #       "bar" => [6, 7, 8]
         
     | 
| 
      
 834 
     | 
    
         
            +
                #     }
         
     | 
| 
      
 835 
     | 
    
         
            +
                #   )
         
     | 
| 
      
 836 
     | 
    
         
            +
                #   df.write_json
         
     | 
| 
      
 837 
     | 
    
         
            +
                #   # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
         
     | 
| 
      
 838 
     | 
    
         
            +
                #
         
     | 
| 
      
 839 
     | 
    
         
            +
                # @example
         
     | 
| 
      
 840 
     | 
    
         
            +
                #   df.write_json(row_oriented: true)
         
     | 
| 
      
 841 
     | 
    
         
            +
                #   # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
         
     | 
| 
       827 
842 
     | 
    
         
             
                def write_json(
         
     | 
| 
       828 
     | 
    
         
            -
                  file,
         
     | 
| 
      
 843 
     | 
    
         
            +
                  file = nil,
         
     | 
| 
       829 
844 
     | 
    
         
             
                  pretty: false,
         
     | 
| 
       830 
845 
     | 
    
         
             
                  row_oriented: false
         
     | 
| 
       831 
846 
     | 
    
         
             
                )
         
     | 
| 
       832 
847 
     | 
    
         
             
                  if Utils.pathlike?(file)
         
     | 
| 
       833 
848 
     | 
    
         
             
                    file = Utils.normalise_filepath(file)
         
     | 
| 
       834 
849 
     | 
    
         
             
                  end
         
     | 
| 
       835 
     | 
    
         
            -
             
     | 
| 
       836 
     | 
    
         
            -
                   
     | 
| 
      
 850 
     | 
    
         
            +
                  to_string_io = !file.nil? && file.is_a?(StringIO)
         
     | 
| 
      
 851 
     | 
    
         
            +
                  if file.nil? || to_string_io
         
     | 
| 
      
 852 
     | 
    
         
            +
                    buf = StringIO.new
         
     | 
| 
      
 853 
     | 
    
         
            +
                    buf.set_encoding(Encoding::BINARY)
         
     | 
| 
      
 854 
     | 
    
         
            +
                    _df.write_json(buf, pretty, row_oriented)
         
     | 
| 
      
 855 
     | 
    
         
            +
                    json_bytes = buf.string
         
     | 
| 
      
 856 
     | 
    
         
            +
             
     | 
| 
      
 857 
     | 
    
         
            +
                    json_str = json_bytes.force_encoding(Encoding::UTF_8)
         
     | 
| 
      
 858 
     | 
    
         
            +
                    if to_string_io
         
     | 
| 
      
 859 
     | 
    
         
            +
                      file.write(json_str)
         
     | 
| 
      
 860 
     | 
    
         
            +
                    else
         
     | 
| 
      
 861 
     | 
    
         
            +
                      return json_str
         
     | 
| 
      
 862 
     | 
    
         
            +
                    end
         
     | 
| 
      
 863 
     | 
    
         
            +
                  else
         
     | 
| 
      
 864 
     | 
    
         
            +
                    _df.write_json(file, pretty, row_oriented)
         
     | 
| 
      
 865 
     | 
    
         
            +
                  end
         
     | 
| 
       837 
866 
     | 
    
         
             
                  nil
         
     | 
| 
       838 
867 
     | 
    
         
             
                end
         
     | 
| 
       839 
868 
     | 
    
         | 
| 
         @@ -843,12 +872,36 @@ module Polars 
     | 
|
| 
       843 
872 
     | 
    
         
             
                #   File path to which the result should be written.
         
     | 
| 
       844 
873 
     | 
    
         
             
                #
         
     | 
| 
       845 
874 
     | 
    
         
             
                # @return [nil]
         
     | 
| 
       846 
     | 
    
         
            -
                 
     | 
| 
      
 875 
     | 
    
         
            +
                #
         
     | 
| 
      
 876 
     | 
    
         
            +
                # @example
         
     | 
| 
      
 877 
     | 
    
         
            +
                #   df = Polars::DataFrame.new(
         
     | 
| 
      
 878 
     | 
    
         
            +
                #     {
         
     | 
| 
      
 879 
     | 
    
         
            +
                #       "foo" => [1, 2, 3],
         
     | 
| 
      
 880 
     | 
    
         
            +
                #       "bar" => [6, 7, 8]
         
     | 
| 
      
 881 
     | 
    
         
            +
                #     }
         
     | 
| 
      
 882 
     | 
    
         
            +
                #   )
         
     | 
| 
      
 883 
     | 
    
         
            +
                #   df.write_ndjson()
         
     | 
| 
      
 884 
     | 
    
         
            +
                #   # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
         
     | 
| 
      
 885 
     | 
    
         
            +
                def write_ndjson(file = nil)
         
     | 
| 
       847 
886 
     | 
    
         
             
                  if Utils.pathlike?(file)
         
     | 
| 
       848 
887 
     | 
    
         
             
                    file = Utils.normalise_filepath(file)
         
     | 
| 
       849 
888 
     | 
    
         
             
                  end
         
     | 
| 
       850 
     | 
    
         
            -
             
     | 
| 
       851 
     | 
    
         
            -
                   
     | 
| 
      
 889 
     | 
    
         
            +
                  to_string_io = !file.nil? && file.is_a?(StringIO)
         
     | 
| 
      
 890 
     | 
    
         
            +
                  if file.nil? || to_string_io
         
     | 
| 
      
 891 
     | 
    
         
            +
                    buf = StringIO.new
         
     | 
| 
      
 892 
     | 
    
         
            +
                    buf.set_encoding(Encoding::BINARY)
         
     | 
| 
      
 893 
     | 
    
         
            +
                    _df.write_ndjson(buf)
         
     | 
| 
      
 894 
     | 
    
         
            +
                    json_bytes = buf.string
         
     | 
| 
      
 895 
     | 
    
         
            +
             
     | 
| 
      
 896 
     | 
    
         
            +
                    json_str = json_bytes.force_encoding(Encoding::UTF_8)
         
     | 
| 
      
 897 
     | 
    
         
            +
                    if to_string_io
         
     | 
| 
      
 898 
     | 
    
         
            +
                      file.write(json_str)
         
     | 
| 
      
 899 
     | 
    
         
            +
                    else
         
     | 
| 
      
 900 
     | 
    
         
            +
                      return json_str
         
     | 
| 
      
 901 
     | 
    
         
            +
                    end
         
     | 
| 
      
 902 
     | 
    
         
            +
                  else
         
     | 
| 
      
 903 
     | 
    
         
            +
                    _df.write_ndjson(file)
         
     | 
| 
      
 904 
     | 
    
         
            +
                  end
         
     | 
| 
       852 
905 
     | 
    
         
             
                  nil
         
     | 
| 
       853 
906 
     | 
    
         
             
                end
         
     | 
| 
       854 
907 
     | 
    
         | 
| 
         @@ -1010,7 +1063,7 @@ module Polars 
     | 
|
| 
       1010 
1063 
     | 
    
         | 
| 
       1011 
1064 
     | 
    
         
             
                # Write to Apache Parquet file.
         
     | 
| 
       1012 
1065 
     | 
    
         
             
                #
         
     | 
| 
       1013 
     | 
    
         
            -
                # @param file [String]
         
     | 
| 
      
 1066 
     | 
    
         
            +
                # @param file [String, Pathname, StringIO]
         
     | 
| 
       1014 
1067 
     | 
    
         
             
                #   File path to which the file should be written.
         
     | 
| 
       1015 
1068 
     | 
    
         
             
                # @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
         
     | 
| 
       1016 
1069 
     | 
    
         
             
                #   Choose "zstd" for good compression performance.
         
     | 
| 
         @@ -1027,10 +1080,9 @@ module Polars 
     | 
|
| 
       1027 
1080 
     | 
    
         
             
                # @param statistics [Boolean]
         
     | 
| 
       1028 
1081 
     | 
    
         
             
                #   Write statistics to the parquet headers. This requires extra compute.
         
     | 
| 
       1029 
1082 
     | 
    
         
             
                # @param row_group_size [Integer, nil]
         
     | 
| 
       1030 
     | 
    
         
            -
                #   Size of the row groups in number of rows.
         
     | 
| 
       1031 
     | 
    
         
            -
                # 
     | 
| 
       1032 
     | 
    
         
            -
                #    
     | 
| 
       1033 
     | 
    
         
            -
                #   writing speeds.
         
     | 
| 
      
 1083 
     | 
    
         
            +
                #   Size of the row groups in number of rows. Defaults to 512^2 rows.
         
     | 
| 
      
 1084 
     | 
    
         
            +
                # @param data_page_size [Integer, nil]
         
     | 
| 
      
 1085 
     | 
    
         
            +
                #   Size of the data page in bytes. Defaults to 1024^2 bytes.
         
     | 
| 
       1034 
1086 
     | 
    
         
             
                #
         
     | 
| 
       1035 
1087 
     | 
    
         
             
                # @return [nil]
         
     | 
| 
       1036 
1088 
     | 
    
         
             
                def write_parquet(
         
     | 
| 
         @@ -1038,7 +1090,8 @@ module Polars 
     | 
|
| 
       1038 
1090 
     | 
    
         
             
                  compression: "zstd",
         
     | 
| 
       1039 
1091 
     | 
    
         
             
                  compression_level: nil,
         
     | 
| 
       1040 
1092 
     | 
    
         
             
                  statistics: false,
         
     | 
| 
       1041 
     | 
    
         
            -
                  row_group_size: nil
         
     | 
| 
      
 1093 
     | 
    
         
            +
                  row_group_size: nil,
         
     | 
| 
      
 1094 
     | 
    
         
            +
                  data_page_size: nil
         
     | 
| 
       1042 
1095 
     | 
    
         
             
                )
         
     | 
| 
       1043 
1096 
     | 
    
         
             
                  if compression.nil?
         
     | 
| 
       1044 
1097 
     | 
    
         
             
                    compression = "uncompressed"
         
     | 
| 
         @@ -1048,7 +1101,7 @@ module Polars 
     | 
|
| 
       1048 
1101 
     | 
    
         
             
                  end
         
     | 
| 
       1049 
1102 
     | 
    
         | 
| 
       1050 
1103 
     | 
    
         
             
                  _df.write_parquet(
         
     | 
| 
       1051 
     | 
    
         
            -
                    file, compression, compression_level, statistics, row_group_size
         
     | 
| 
      
 1104 
     | 
    
         
            +
                    file, compression, compression_level, statistics, row_group_size, data_page_size
         
     | 
| 
       1052 
1105 
     | 
    
         
             
                  )
         
     | 
| 
       1053 
1106 
     | 
    
         
             
                end
         
     | 
| 
       1054 
1107 
     | 
    
         | 
| 
         @@ -1084,7 +1137,7 @@ module Polars 
     | 
|
| 
       1084 
1137 
     | 
    
         
             
                #   df.estimated_size
         
     | 
| 
       1085 
1138 
     | 
    
         
             
                #   # => 25888898
         
     | 
| 
       1086 
1139 
     | 
    
         
             
                #   df.estimated_size("mb")
         
     | 
| 
       1087 
     | 
    
         
            -
                #   # =>  
     | 
| 
      
 1140 
     | 
    
         
            +
                #   # => 17.0601749420166
         
     | 
| 
       1088 
1141 
     | 
    
         
             
                def estimated_size(unit = "b")
         
     | 
| 
       1089 
1142 
     | 
    
         
             
                  sz = _df.estimated_size
         
     | 
| 
       1090 
1143 
     | 
    
         
             
                  Utils.scale_bytes(sz, to: unit)
         
     | 
| 
         @@ -1782,7 +1835,7 @@ module Polars 
     | 
|
| 
       1782 
1835 
     | 
    
         
             
                #       "b" => [2, 4, 6]
         
     | 
| 
       1783 
1836 
     | 
    
         
             
                #     }
         
     | 
| 
       1784 
1837 
     | 
    
         
             
                #   )
         
     | 
| 
       1785 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 1838 
     | 
    
         
            +
                #   df.with_row_index
         
     | 
| 
       1786 
1839 
     | 
    
         
             
                #   # =>
         
     | 
| 
       1787 
1840 
     | 
    
         
             
                #   # shape: (3, 3)
         
     | 
| 
       1788 
1841 
     | 
    
         
             
                #   # ┌────────┬─────┬─────┐
         
     | 
| 
         @@ -1794,9 +1847,10 @@ module Polars 
     | 
|
| 
       1794 
1847 
     | 
    
         
             
                #   # │ 1      ┆ 3   ┆ 4   │
         
     | 
| 
       1795 
1848 
     | 
    
         
             
                #   # │ 2      ┆ 5   ┆ 6   │
         
     | 
| 
       1796 
1849 
     | 
    
         
             
                #   # └────────┴─────┴─────┘
         
     | 
| 
       1797 
     | 
    
         
            -
                def  
     | 
| 
       1798 
     | 
    
         
            -
                  _from_rbdf(_df. 
     | 
| 
      
 1850 
     | 
    
         
            +
                def with_row_index(name: "row_nr", offset: 0)
         
     | 
| 
      
 1851 
     | 
    
         
            +
                  _from_rbdf(_df.with_row_index(name, offset))
         
     | 
| 
       1799 
1852 
     | 
    
         
             
                end
         
     | 
| 
      
 1853 
     | 
    
         
            +
                alias_method :with_row_count, :with_row_index
         
     | 
| 
       1800 
1854 
     | 
    
         | 
| 
       1801 
1855 
     | 
    
         
             
                # Start a group by operation.
         
     | 
| 
       1802 
1856 
     | 
    
         
             
                #
         
     | 
| 
         @@ -2160,12 +2214,13 @@ module Polars 
     | 
|
| 
       2160 
2214 
     | 
    
         
             
                #     closed: "right"
         
     | 
| 
       2161 
2215 
     | 
    
         
             
                #   ).agg(Polars.col("A").alias("A_agg_list"))
         
     | 
| 
       2162 
2216 
     | 
    
         
             
                #   # =>
         
     | 
| 
       2163 
     | 
    
         
            -
                #   # shape: ( 
     | 
| 
      
 2217 
     | 
    
         
            +
                #   # shape: (4, 4)
         
     | 
| 
       2164 
2218 
     | 
    
         
             
                #   # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
         
     | 
| 
       2165 
2219 
     | 
    
         
             
                #   # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list      │
         
     | 
| 
       2166 
2220 
     | 
    
         
             
                #   # │ ---             ┆ ---             ┆ --- ┆ ---             │
         
     | 
| 
       2167 
2221 
     | 
    
         
             
                #   # │ i64             ┆ i64             ┆ i64 ┆ list[str]       │
         
     | 
| 
       2168 
2222 
     | 
    
         
             
                #   # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
         
     | 
| 
      
 2223 
     | 
    
         
            +
                #   # │ -2              ┆ 1               ┆ -2  ┆ ["A", "A"]      │
         
     | 
| 
       2169 
2224 
     | 
    
         
             
                #   # │ 0               ┆ 3               ┆ 0   ┆ ["A", "B", "B"] │
         
     | 
| 
       2170 
2225 
     | 
    
         
             
                #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
         
     | 
| 
       2171 
2226 
     | 
    
         
             
                #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
         
     | 
| 
         @@ -2433,6 +2488,8 @@ module Polars 
     | 
|
| 
       2433 
2488 
     | 
    
         
             
                #   Join strategy.
         
     | 
| 
       2434 
2489 
     | 
    
         
             
                # @param suffix [String]
         
     | 
| 
       2435 
2490 
     | 
    
         
             
                #   Suffix to append to columns with a duplicate name.
         
     | 
| 
      
 2491 
     | 
    
         
            +
                # @param join_nulls [Boolean]
         
     | 
| 
      
 2492 
     | 
    
         
            +
                #   Join on null values. By default null values will never produce matches.
         
     | 
| 
       2436 
2493 
     | 
    
         
             
                #
         
     | 
| 
       2437 
2494 
     | 
    
         
             
                # @return [DataFrame]
         
     | 
| 
       2438 
2495 
     | 
    
         
             
                #
         
     | 
| 
         @@ -2515,7 +2572,7 @@ module Polars 
     | 
|
| 
       2515 
2572 
     | 
    
         
             
                #   # ╞═════╪═════╪═════╡
         
     | 
| 
       2516 
2573 
     | 
    
         
             
                #   # │ 3   ┆ 8.0 ┆ c   │
         
     | 
| 
       2517 
2574 
     | 
    
         
             
                #   # └─────┴─────┴─────┘
         
     | 
| 
       2518 
     | 
    
         
            -
                def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
         
     | 
| 
      
 2575 
     | 
    
         
            +
                def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right", join_nulls: false)
         
     | 
| 
       2519 
2576 
     | 
    
         
             
                  lazy
         
     | 
| 
       2520 
2577 
     | 
    
         
             
                    .join(
         
     | 
| 
       2521 
2578 
     | 
    
         
             
                      other.lazy,
         
     | 
| 
         @@ -2524,6 +2581,7 @@ module Polars 
     | 
|
| 
       2524 
2581 
     | 
    
         
             
                      on: on,
         
     | 
| 
       2525 
2582 
     | 
    
         
             
                      how: how,
         
     | 
| 
       2526 
2583 
     | 
    
         
             
                      suffix: suffix,
         
     | 
| 
      
 2584 
     | 
    
         
            +
                      join_nulls: join_nulls
         
     | 
| 
       2527 
2585 
     | 
    
         
             
                    )
         
     | 
| 
       2528 
2586 
     | 
    
         
             
                    .collect(no_optimization: true)
         
     | 
| 
       2529 
2587 
     | 
    
         
             
                end
         
     | 
| 
         @@ -2617,26 +2675,26 @@ module Polars 
     | 
|
| 
       2617 
2675 
     | 
    
         
             
                #   # ┌─────┬─────┬───────────┐
         
     | 
| 
       2618 
2676 
     | 
    
         
             
                #   # │ a   ┆ b   ┆ b_squared │
         
     | 
| 
       2619 
2677 
     | 
    
         
             
                #   # │ --- ┆ --- ┆ ---       │
         
     | 
| 
       2620 
     | 
    
         
            -
                #   # │ i64 ┆ i64 ┆  
     | 
| 
      
 2678 
     | 
    
         
            +
                #   # │ i64 ┆ i64 ┆ i64       │
         
     | 
| 
       2621 
2679 
     | 
    
         
             
                #   # ╞═════╪═════╪═══════════╡
         
     | 
| 
       2622 
     | 
    
         
            -
                #   # │ 1   ┆ 2   ┆ 4 
     | 
| 
       2623 
     | 
    
         
            -
                #   # │ 3   ┆ 4   ┆ 16 
     | 
| 
       2624 
     | 
    
         
            -
                #   # │ 5   ┆ 6   ┆ 36 
     | 
| 
      
 2680 
     | 
    
         
            +
                #   # │ 1   ┆ 2   ┆ 4         │
         
     | 
| 
      
 2681 
     | 
    
         
            +
                #   # │ 3   ┆ 4   ┆ 16        │
         
     | 
| 
      
 2682 
     | 
    
         
            +
                #   # │ 5   ┆ 6   ┆ 36        │
         
     | 
| 
       2625 
2683 
     | 
    
         
             
                #   # └─────┴─────┴───────────┘
         
     | 
| 
       2626 
2684 
     | 
    
         
             
                #
         
     | 
| 
       2627 
2685 
     | 
    
         
             
                # @example Replaced
         
     | 
| 
       2628 
2686 
     | 
    
         
             
                #   df.with_column(Polars.col("a") ** 2)
         
     | 
| 
       2629 
2687 
     | 
    
         
             
                #   # =>
         
     | 
| 
       2630 
2688 
     | 
    
         
             
                #   # shape: (3, 2)
         
     | 
| 
       2631 
     | 
    
         
            -
                #   #  
     | 
| 
       2632 
     | 
    
         
            -
                #   # │ a 
     | 
| 
       2633 
     | 
    
         
            -
                #   # │ --- 
     | 
| 
       2634 
     | 
    
         
            -
                #   # │  
     | 
| 
       2635 
     | 
    
         
            -
                #   #  
     | 
| 
       2636 
     | 
    
         
            -
                #   # │ 1 
     | 
| 
       2637 
     | 
    
         
            -
                #   # │ 9 
     | 
| 
       2638 
     | 
    
         
            -
                #   # │ 25 
     | 
| 
       2639 
     | 
    
         
            -
                #   #  
     | 
| 
      
 2689 
     | 
    
         
            +
                #   # ┌─────┬─────┐
         
     | 
| 
      
 2690 
     | 
    
         
            +
                #   # │ a   ┆ b   │
         
     | 
| 
      
 2691 
     | 
    
         
            +
                #   # │ --- ┆ --- │
         
     | 
| 
      
 2692 
     | 
    
         
            +
                #   # │ i64 ┆ i64 │
         
     | 
| 
      
 2693 
     | 
    
         
            +
                #   # ╞═════╪═════╡
         
     | 
| 
      
 2694 
     | 
    
         
            +
                #   # │ 1   ┆ 2   │
         
     | 
| 
      
 2695 
     | 
    
         
            +
                #   # │ 9   ┆ 4   │
         
     | 
| 
      
 2696 
     | 
    
         
            +
                #   # │ 25  ┆ 6   │
         
     | 
| 
      
 2697 
     | 
    
         
            +
                #   # └─────┴─────┘
         
     | 
| 
       2640 
2698 
     | 
    
         
             
                def with_column(column)
         
     | 
| 
       2641 
2699 
     | 
    
         
             
                  lazy
         
     | 
| 
       2642 
2700 
     | 
    
         
             
                    .with_column(column)
         
     | 
| 
         @@ -2803,16 +2861,36 @@ module Polars 
     | 
|
| 
       2803 
2861 
     | 
    
         
             
                #   # │ 2   ┆ 7.0 │
         
     | 
| 
       2804 
2862 
     | 
    
         
             
                #   # │ 3   ┆ 8.0 │
         
     | 
| 
       2805 
2863 
     | 
    
         
             
                #   # └─────┴─────┘
         
     | 
| 
       2806 
     | 
    
         
            -
                 
     | 
| 
       2807 
     | 
    
         
            -
             
     | 
| 
       2808 
     | 
    
         
            -
             
     | 
| 
       2809 
     | 
    
         
            -
             
     | 
| 
       2810 
     | 
    
         
            -
             
     | 
| 
       2811 
     | 
    
         
            -
             
     | 
| 
       2812 
     | 
    
         
            -
             
     | 
| 
       2813 
     | 
    
         
            -
             
     | 
| 
       2814 
     | 
    
         
            -
             
     | 
| 
       2815 
     | 
    
         
            -
             
     | 
| 
      
 2864 
     | 
    
         
            +
                #
         
     | 
| 
      
 2865 
     | 
    
         
            +
                # @example Drop multiple columns by passing a list of column names.
         
     | 
| 
      
 2866 
     | 
    
         
            +
                #   df.drop(["bar", "ham"])
         
     | 
| 
      
 2867 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 2868 
     | 
    
         
            +
                #   # shape: (3, 1)
         
     | 
| 
      
 2869 
     | 
    
         
            +
                #   # ┌─────┐
         
     | 
| 
      
 2870 
     | 
    
         
            +
                #   # │ foo │
         
     | 
| 
      
 2871 
     | 
    
         
            +
                #   # │ --- │
         
     | 
| 
      
 2872 
     | 
    
         
            +
                #   # │ i64 │
         
     | 
| 
      
 2873 
     | 
    
         
            +
                #   # ╞═════╡
         
     | 
| 
      
 2874 
     | 
    
         
            +
                #   # │ 1   │
         
     | 
| 
      
 2875 
     | 
    
         
            +
                #   # │ 2   │
         
     | 
| 
      
 2876 
     | 
    
         
            +
                #   # │ 3   │
         
     | 
| 
      
 2877 
     | 
    
         
            +
                #   # └─────┘
         
     | 
| 
      
 2878 
     | 
    
         
            +
                #
         
     | 
| 
      
 2879 
     | 
    
         
            +
                # @example Use positional arguments to drop multiple columns.
         
     | 
| 
      
 2880 
     | 
    
         
            +
                #   df.drop("foo", "ham")
         
     | 
| 
      
 2881 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 2882 
     | 
    
         
            +
                #   # shape: (3, 1)
         
     | 
| 
      
 2883 
     | 
    
         
            +
                #   # ┌─────┐
         
     | 
| 
      
 2884 
     | 
    
         
            +
                #   # │ bar │
         
     | 
| 
      
 2885 
     | 
    
         
            +
                #   # │ --- │
         
     | 
| 
      
 2886 
     | 
    
         
            +
                #   # │ f64 │
         
     | 
| 
      
 2887 
     | 
    
         
            +
                #   # ╞═════╡
         
     | 
| 
      
 2888 
     | 
    
         
            +
                #   # │ 6.0 │
         
     | 
| 
      
 2889 
     | 
    
         
            +
                #   # │ 7.0 │
         
     | 
| 
      
 2890 
     | 
    
         
            +
                #   # │ 8.0 │
         
     | 
| 
      
 2891 
     | 
    
         
            +
                #   # └─────┘
         
     | 
| 
      
 2892 
     | 
    
         
            +
                def drop(*columns)
         
     | 
| 
      
 2893 
     | 
    
         
            +
                  lazy.drop(*columns).collect(_eager: true)
         
     | 
| 
       2816 
2894 
     | 
    
         
             
                end
         
     | 
| 
       2817 
2895 
     | 
    
         | 
| 
       2818 
2896 
     | 
    
         
             
                # Drop in place.
         
     | 
| 
         @@ -2867,7 +2945,7 @@ module Polars 
     | 
|
| 
       2867 
2945 
     | 
    
         
             
                #       "c" => [true, true, false, nil]
         
     | 
| 
       2868 
2946 
     | 
    
         
             
                #     }
         
     | 
| 
       2869 
2947 
     | 
    
         
             
                #   )
         
     | 
| 
       2870 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 2948 
     | 
    
         
            +
                #   df.clear
         
     | 
| 
       2871 
2949 
     | 
    
         
             
                #   # =>
         
     | 
| 
       2872 
2950 
     | 
    
         
             
                #   # shape: (0, 3)
         
     | 
| 
       2873 
2951 
     | 
    
         
             
                #   # ┌─────┬─────┬──────┐
         
     | 
| 
         @@ -2876,9 +2954,31 @@ module Polars 
     | 
|
| 
       2876 
2954 
     | 
    
         
             
                #   # │ i64 ┆ f64 ┆ bool │
         
     | 
| 
       2877 
2955 
     | 
    
         
             
                #   # ╞═════╪═════╪══════╡
         
     | 
| 
       2878 
2956 
     | 
    
         
             
                #   # └─────┴─────┴──────┘
         
     | 
| 
       2879 
     | 
    
         
            -
                 
     | 
| 
       2880 
     | 
    
         
            -
             
     | 
| 
      
 2957 
     | 
    
         
            +
                #
         
     | 
| 
      
 2958 
     | 
    
         
            +
                # @example
         
     | 
| 
      
 2959 
     | 
    
         
            +
                #   df.clear(2)
         
     | 
| 
      
 2960 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 2961 
     | 
    
         
            +
                #   # shape: (2, 3)
         
     | 
| 
      
 2962 
     | 
    
         
            +
                #   # ┌──────┬──────┬──────┐
         
     | 
| 
      
 2963 
     | 
    
         
            +
                #   # │ a    ┆ b    ┆ c    │
         
     | 
| 
      
 2964 
     | 
    
         
            +
                #   # │ ---  ┆ ---  ┆ ---  │
         
     | 
| 
      
 2965 
     | 
    
         
            +
                #   # │ i64  ┆ f64  ┆ bool │
         
     | 
| 
      
 2966 
     | 
    
         
            +
                #   # ╞══════╪══════╪══════╡
         
     | 
| 
      
 2967 
     | 
    
         
            +
                #   # │ null ┆ null ┆ null │
         
     | 
| 
      
 2968 
     | 
    
         
            +
                #   # │ null ┆ null ┆ null │
         
     | 
| 
      
 2969 
     | 
    
         
            +
                #   # └──────┴──────┴──────┘
         
     | 
| 
      
 2970 
     | 
    
         
            +
                def clear(n = 0)
         
     | 
| 
      
 2971 
     | 
    
         
            +
                  if n == 0
         
     | 
| 
      
 2972 
     | 
    
         
            +
                    _from_rbdf(_df.clear)
         
     | 
| 
      
 2973 
     | 
    
         
            +
                  elsif n > 0 || len > 0
         
     | 
| 
      
 2974 
     | 
    
         
            +
                    self.class.new(
         
     | 
| 
      
 2975 
     | 
    
         
            +
                      schema.to_h { |nm, tp| [nm, Series.new(nm, [], dtype: tp).extend_constant(nil, n)] }
         
     | 
| 
      
 2976 
     | 
    
         
            +
                    )
         
     | 
| 
      
 2977 
     | 
    
         
            +
                  else
         
     | 
| 
      
 2978 
     | 
    
         
            +
                    clone
         
     | 
| 
      
 2979 
     | 
    
         
            +
                  end
         
     | 
| 
       2881 
2980 
     | 
    
         
             
                end
         
     | 
| 
      
 2981 
     | 
    
         
            +
                alias_method :cleared, :clear
         
     | 
| 
       2882 
2982 
     | 
    
         | 
| 
       2883 
2983 
     | 
    
         
             
                # clone handled by initialize_copy
         
     | 
| 
       2884 
2984 
     | 
    
         | 
| 
         @@ -3141,8 +3241,11 @@ module Polars 
     | 
|
| 
       3141 
3241 
     | 
    
         
             
                      aggregate_expr = Polars.element.median._rbexpr
         
     | 
| 
       3142 
3242 
     | 
    
         
             
                    when "last"
         
     | 
| 
       3143 
3243 
     | 
    
         
             
                      aggregate_expr = Polars.element.last._rbexpr
         
     | 
| 
      
 3244 
     | 
    
         
            +
                    when "len"
         
     | 
| 
      
 3245 
     | 
    
         
            +
                      aggregate_expr = Polars.len._rbexpr
         
     | 
| 
       3144 
3246 
     | 
    
         
             
                    when "count"
         
     | 
| 
       3145 
     | 
    
         
            -
                       
     | 
| 
      
 3247 
     | 
    
         
            +
                      warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
         
     | 
| 
      
 3248 
     | 
    
         
            +
                      aggregate_expr = Polars.len._rbexpr
         
     | 
| 
       3146 
3249 
     | 
    
         
             
                    else
         
     | 
| 
       3147 
3250 
     | 
    
         
             
                      raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
         
     | 
| 
       3148 
3251 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -3154,9 +3257,9 @@ module Polars 
     | 
|
| 
       3154 
3257 
     | 
    
         | 
| 
       3155 
3258 
     | 
    
         
             
                  _from_rbdf(
         
     | 
| 
       3156 
3259 
     | 
    
         
             
                    _df.pivot_expr(
         
     | 
| 
       3157 
     | 
    
         
            -
                      values,
         
     | 
| 
       3158 
3260 
     | 
    
         
             
                      index,
         
     | 
| 
       3159 
3261 
     | 
    
         
             
                      columns,
         
     | 
| 
      
 3262 
     | 
    
         
            +
                      values,
         
     | 
| 
       3160 
3263 
     | 
    
         
             
                      maintain_order,
         
     | 
| 
       3161 
3264 
     | 
    
         
             
                      sort_columns,
         
     | 
| 
       3162 
3265 
     | 
    
         
             
                      aggregate_expr,
         
     | 
| 
         @@ -3591,8 +3694,13 @@ module Polars 
     | 
|
| 
       3591 
3694 
     | 
    
         | 
| 
       3592 
3695 
     | 
    
         
             
                # Select columns from this DataFrame.
         
     | 
| 
       3593 
3696 
     | 
    
         
             
                #
         
     | 
| 
       3594 
     | 
    
         
            -
                # @param exprs [ 
     | 
| 
       3595 
     | 
    
         
            -
                #   Column  
     | 
| 
      
 3697 
     | 
    
         
            +
                # @param exprs [Array]
         
     | 
| 
      
 3698 
     | 
    
         
            +
                #   Column(s) to select, specified as positional arguments.
         
     | 
| 
      
 3699 
     | 
    
         
            +
                #   Accepts expression input. Strings are parsed as column names,
         
     | 
| 
      
 3700 
     | 
    
         
            +
                #   other non-expression inputs are parsed as literals.
         
     | 
| 
      
 3701 
     | 
    
         
            +
                # @param named_exprs [Hash]
         
     | 
| 
      
 3702 
     | 
    
         
            +
                #   Additional columns to select, specified as keyword arguments.
         
     | 
| 
      
 3703 
     | 
    
         
            +
                #   The columns will be renamed to the keyword used.
         
     | 
| 
       3596 
3704 
     | 
    
         
             
                #
         
     | 
| 
       3597 
3705 
     | 
    
         
             
                # @return [DataFrame]
         
     | 
| 
       3598 
3706 
     | 
    
         
             
                #
         
     | 
| 
         @@ -3672,23 +3780,25 @@ module Polars 
     | 
|
| 
       3672 
3780 
     | 
    
         
             
                #   # │ 0       │
         
     | 
| 
       3673 
3781 
     | 
    
         
             
                #   # │ 10      │
         
     | 
| 
       3674 
3782 
     | 
    
         
             
                #   # └─────────┘
         
     | 
| 
       3675 
     | 
    
         
            -
                def select(exprs)
         
     | 
| 
       3676 
     | 
    
         
            -
                   
     | 
| 
       3677 
     | 
    
         
            -
                    lazy
         
     | 
| 
       3678 
     | 
    
         
            -
                      .select(exprs)
         
     | 
| 
       3679 
     | 
    
         
            -
                      .collect(no_optimization: true, string_cache: false)
         
     | 
| 
       3680 
     | 
    
         
            -
                      ._df
         
     | 
| 
       3681 
     | 
    
         
            -
                  )
         
     | 
| 
      
 3783 
     | 
    
         
            +
                def select(*exprs, **named_exprs)
         
     | 
| 
      
 3784 
     | 
    
         
            +
                  lazy.select(*exprs, **named_exprs).collect(_eager: true)
         
     | 
| 
       3682 
3785 
     | 
    
         
             
                end
         
     | 
| 
       3683 
3786 
     | 
    
         | 
| 
       3684 
     | 
    
         
            -
                # Add  
     | 
| 
      
 3787 
     | 
    
         
            +
                # Add columns to this DataFrame.
         
     | 
| 
      
 3788 
     | 
    
         
            +
                #
         
     | 
| 
      
 3789 
     | 
    
         
            +
                # Added columns will replace existing columns with the same name.
         
     | 
| 
       3685 
3790 
     | 
    
         
             
                #
         
     | 
| 
       3686 
3791 
     | 
    
         
             
                # @param exprs [Array]
         
     | 
| 
       3687 
     | 
    
         
            -
                #    
     | 
| 
      
 3792 
     | 
    
         
            +
                #   Column(s) to add, specified as positional arguments.
         
     | 
| 
      
 3793 
     | 
    
         
            +
                #   Accepts expression input. Strings are parsed as column names, other
         
     | 
| 
      
 3794 
     | 
    
         
            +
                #   non-expression inputs are parsed as literals.
         
     | 
| 
      
 3795 
     | 
    
         
            +
                # @param named_exprs [Hash]
         
     | 
| 
      
 3796 
     | 
    
         
            +
                #   Additional columns to add, specified as keyword arguments.
         
     | 
| 
      
 3797 
     | 
    
         
            +
                #   The columns will be renamed to the keyword used.
         
     | 
| 
       3688 
3798 
     | 
    
         
             
                #
         
     | 
| 
       3689 
3799 
     | 
    
         
             
                # @return [DataFrame]
         
     | 
| 
       3690 
3800 
     | 
    
         
             
                #
         
     | 
| 
       3691 
     | 
    
         
            -
                # @example
         
     | 
| 
      
 3801 
     | 
    
         
            +
                # @example Pass an expression to add it as a new column.
         
     | 
| 
       3692 
3802 
     | 
    
         
             
                #   df = Polars::DataFrame.new(
         
     | 
| 
       3693 
3803 
     | 
    
         
             
                #     {
         
     | 
| 
       3694 
3804 
     | 
    
         
             
                #       "a" => [1, 2, 3, 4],
         
     | 
| 
         @@ -3696,32 +3806,94 @@ module Polars 
     | 
|
| 
       3696 
3806 
     | 
    
         
             
                #       "c" => [true, true, false, true]
         
     | 
| 
       3697 
3807 
     | 
    
         
             
                #     }
         
     | 
| 
       3698 
3808 
     | 
    
         
             
                #   )
         
     | 
| 
      
 3809 
     | 
    
         
            +
                #   df.with_columns((Polars.col("a") ** 2).alias("a^2"))
         
     | 
| 
      
 3810 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 3811 
     | 
    
         
            +
                #   # shape: (4, 4)
         
     | 
| 
      
 3812 
     | 
    
         
            +
                #   # ┌─────┬──────┬───────┬─────┐
         
     | 
| 
      
 3813 
     | 
    
         
            +
                #   # │ a   ┆ b    ┆ c     ┆ a^2 │
         
     | 
| 
      
 3814 
     | 
    
         
            +
                #   # │ --- ┆ ---  ┆ ---   ┆ --- │
         
     | 
| 
      
 3815 
     | 
    
         
            +
                #   # │ i64 ┆ f64  ┆ bool  ┆ i64 │
         
     | 
| 
      
 3816 
     | 
    
         
            +
                #   # ╞═════╪══════╪═══════╪═════╡
         
     | 
| 
      
 3817 
     | 
    
         
            +
                #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   │
         
     | 
| 
      
 3818 
     | 
    
         
            +
                #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   │
         
     | 
| 
      
 3819 
     | 
    
         
            +
                #   # │ 3   ┆ 10.0 ┆ false ┆ 9   │
         
     | 
| 
      
 3820 
     | 
    
         
            +
                #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  │
         
     | 
| 
      
 3821 
     | 
    
         
            +
                #   # └─────┴──────┴───────┴─────┘
         
     | 
| 
      
 3822 
     | 
    
         
            +
                #
         
     | 
| 
      
 3823 
     | 
    
         
            +
                # @example Added columns will replace existing columns with the same name.
         
     | 
| 
      
 3824 
     | 
    
         
            +
                #   df.with_columns(Polars.col("a").cast(Polars::Float64))
         
     | 
| 
      
 3825 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 3826 
     | 
    
         
            +
                #   # shape: (4, 3)
         
     | 
| 
      
 3827 
     | 
    
         
            +
                #   # ┌─────┬──────┬───────┐
         
     | 
| 
      
 3828 
     | 
    
         
            +
                #   # │ a   ┆ b    ┆ c     │
         
     | 
| 
      
 3829 
     | 
    
         
            +
                #   # │ --- ┆ ---  ┆ ---   │
         
     | 
| 
      
 3830 
     | 
    
         
            +
                #   # │ f64 ┆ f64  ┆ bool  │
         
     | 
| 
      
 3831 
     | 
    
         
            +
                #   # ╞═════╪══════╪═══════╡
         
     | 
| 
      
 3832 
     | 
    
         
            +
                #   # │ 1.0 ┆ 0.5  ┆ true  │
         
     | 
| 
      
 3833 
     | 
    
         
            +
                #   # │ 2.0 ┆ 4.0  ┆ true  │
         
     | 
| 
      
 3834 
     | 
    
         
            +
                #   # │ 3.0 ┆ 10.0 ┆ false │
         
     | 
| 
      
 3835 
     | 
    
         
            +
                #   # │ 4.0 ┆ 13.0 ┆ true  │
         
     | 
| 
      
 3836 
     | 
    
         
            +
                #   # └─────┴──────┴───────┘
         
     | 
| 
      
 3837 
     | 
    
         
            +
                #
         
     | 
| 
      
 3838 
     | 
    
         
            +
                # @example Multiple columns can be added by passing a list of expressions.
         
     | 
| 
       3699 
3839 
     | 
    
         
             
                #   df.with_columns(
         
     | 
| 
       3700 
3840 
     | 
    
         
             
                #     [
         
     | 
| 
       3701 
3841 
     | 
    
         
             
                #       (Polars.col("a") ** 2).alias("a^2"),
         
     | 
| 
       3702 
3842 
     | 
    
         
             
                #       (Polars.col("b") / 2).alias("b/2"),
         
     | 
| 
       3703 
     | 
    
         
            -
                #       (Polars.col("c"). 
     | 
| 
      
 3843 
     | 
    
         
            +
                #       (Polars.col("c").not_).alias("not c"),
         
     | 
| 
       3704 
3844 
     | 
    
         
             
                #     ]
         
     | 
| 
       3705 
3845 
     | 
    
         
             
                #   )
         
     | 
| 
       3706 
3846 
     | 
    
         
             
                #   # =>
         
     | 
| 
       3707 
3847 
     | 
    
         
             
                #   # shape: (4, 6)
         
     | 
| 
       3708 
     | 
    
         
            -
                #   #  
     | 
| 
       3709 
     | 
    
         
            -
                #   # │ a   ┆ b    ┆ c     ┆ a^2 
     | 
| 
       3710 
     | 
    
         
            -
                #   # │ --- ┆ ---  ┆ ---   ┆ --- 
     | 
| 
       3711 
     | 
    
         
            -
                #   # │ i64 ┆ f64  ┆ bool  ┆  
     | 
| 
       3712 
     | 
    
         
            -
                #   #  
     | 
| 
       3713 
     | 
    
         
            -
                #   # │ 1   ┆ 0.5  ┆ true  ┆ 1 
     | 
| 
       3714 
     | 
    
         
            -
                #   # │ 2   ┆ 4.0  ┆ true  ┆ 4 
     | 
| 
       3715 
     | 
    
         
            -
                #   # │ 3   ┆ 10.0 ┆ false ┆ 9 
     | 
| 
       3716 
     | 
    
         
            -
                #   # │ 4   ┆ 13.0 ┆ true  ┆ 16 
     | 
| 
       3717 
     | 
    
         
            -
                #   #  
     | 
| 
       3718 
     | 
    
         
            -
                 
     | 
| 
       3719 
     | 
    
         
            -
             
     | 
| 
       3720 
     | 
    
         
            -
             
     | 
| 
       3721 
     | 
    
         
            -
             
     | 
| 
       3722 
     | 
    
         
            -
             
     | 
| 
       3723 
     | 
    
         
            -
             
     | 
| 
       3724 
     | 
    
         
            -
             
     | 
| 
      
 3848 
     | 
    
         
            +
                #   # ┌─────┬──────┬───────┬─────┬──────┬───────┐
         
     | 
| 
      
 3849 
     | 
    
         
            +
                #   # │ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
         
     | 
| 
      
 3850 
     | 
    
         
            +
                #   # │ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
         
     | 
| 
      
 3851 
     | 
    
         
            +
                #   # │ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
         
     | 
| 
      
 3852 
     | 
    
         
            +
                #   # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
         
     | 
| 
      
 3853 
     | 
    
         
            +
                #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
         
     | 
| 
      
 3854 
     | 
    
         
            +
                #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
         
     | 
| 
      
 3855 
     | 
    
         
            +
                #   # │ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
         
     | 
| 
      
 3856 
     | 
    
         
            +
                #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
         
     | 
| 
      
 3857 
     | 
    
         
            +
                #   # └─────┴──────┴───────┴─────┴──────┴───────┘
         
     | 
| 
      
 3858 
     | 
    
         
            +
                #
         
     | 
| 
      
 3859 
     | 
    
         
            +
                # @example Multiple columns also can be added using positional arguments instead of a list.
         
     | 
| 
      
 3860 
     | 
    
         
            +
                #   df.with_columns(
         
     | 
| 
      
 3861 
     | 
    
         
            +
                #     (Polars.col("a") ** 2).alias("a^2"),
         
     | 
| 
      
 3862 
     | 
    
         
            +
                #     (Polars.col("b") / 2).alias("b/2"),
         
     | 
| 
      
 3863 
     | 
    
         
            +
                #     (Polars.col("c").not_).alias("not c"),
         
     | 
| 
      
 3864 
     | 
    
         
            +
                #   )
         
     | 
| 
      
 3865 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 3866 
     | 
    
         
            +
                #   # shape: (4, 6)
         
     | 
| 
      
 3867 
     | 
    
         
            +
                #   # ┌─────┬──────┬───────┬─────┬──────┬───────┐
         
     | 
| 
      
 3868 
     | 
    
         
            +
                #   # │ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
         
     | 
| 
      
 3869 
     | 
    
         
            +
                #   # │ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
         
     | 
| 
      
 3870 
     | 
    
         
            +
                #   # │ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
         
     | 
| 
      
 3871 
     | 
    
         
            +
                #   # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
         
     | 
| 
      
 3872 
     | 
    
         
            +
                #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
         
     | 
| 
      
 3873 
     | 
    
         
            +
                #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
         
     | 
| 
      
 3874 
     | 
    
         
            +
                #   # │ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
         
     | 
| 
      
 3875 
     | 
    
         
            +
                #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
         
     | 
| 
      
 3876 
     | 
    
         
            +
                #   # └─────┴──────┴───────┴─────┴──────┴───────┘
         
     | 
| 
      
 3877 
     | 
    
         
            +
                #
         
     | 
| 
      
 3878 
     | 
    
         
            +
                # @example Use keyword arguments to easily name your expression inputs.
         
     | 
| 
      
 3879 
     | 
    
         
            +
                #   df.with_columns(
         
     | 
| 
      
 3880 
     | 
    
         
            +
                #     ab: Polars.col("a") * Polars.col("b"),
         
     | 
| 
      
 3881 
     | 
    
         
            +
                #     not_c: Polars.col("c").not_
         
     | 
| 
      
 3882 
     | 
    
         
            +
                #   )
         
     | 
| 
      
 3883 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 3884 
     | 
    
         
            +
                #   # shape: (4, 5)
         
     | 
| 
      
 3885 
     | 
    
         
            +
                #   # ┌─────┬──────┬───────┬──────┬───────┐
         
     | 
| 
      
 3886 
     | 
    
         
            +
                #   # │ a   ┆ b    ┆ c     ┆ ab   ┆ not_c │
         
     | 
| 
      
 3887 
     | 
    
         
            +
                #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---   │
         
     | 
| 
      
 3888 
     | 
    
         
            +
                #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ bool  │
         
     | 
| 
      
 3889 
     | 
    
         
            +
                #   # ╞═════╪══════╪═══════╪══════╪═══════╡
         
     | 
| 
      
 3890 
     | 
    
         
            +
                #   # │ 1   ┆ 0.5  ┆ true  ┆ 0.5  ┆ false │
         
     | 
| 
      
 3891 
     | 
    
         
            +
                #   # │ 2   ┆ 4.0  ┆ true  ┆ 8.0  ┆ false │
         
     | 
| 
      
 3892 
     | 
    
         
            +
                #   # │ 3   ┆ 10.0 ┆ false ┆ 30.0 ┆ true  │
         
     | 
| 
      
 3893 
     | 
    
         
            +
                #   # │ 4   ┆ 13.0 ┆ true  ┆ 52.0 ┆ false │
         
     | 
| 
      
 3894 
     | 
    
         
            +
                #   # └─────┴──────┴───────┴──────┴───────┘
         
     | 
| 
      
 3895 
     | 
    
         
            +
                def with_columns(*exprs, **named_exprs)
         
     | 
| 
      
 3896 
     | 
    
         
            +
                  lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
         
     | 
| 
       3725 
3897 
     | 
    
         
             
                end
         
     | 
| 
       3726 
3898 
     | 
    
         | 
| 
       3727 
3899 
     | 
    
         
             
                # Get number of chunks used by the ChunkedArrays of this DataFrame.
         
     | 
| 
         @@ -4363,7 +4535,7 @@ module Polars 
     | 
|
| 
       4363 
4535 
     | 
    
         
             
                #   #         null
         
     | 
| 
       4364 
4536 
     | 
    
         
             
                #   # ]
         
     | 
| 
       4365 
4537 
     | 
    
         
             
                #
         
     | 
| 
       4366 
     | 
    
         
            -
                # @example A horizontal boolean or, similar to a row-wise .any 
     | 
| 
      
 4538 
     | 
    
         
            +
                # @example A horizontal boolean or, similar to a row-wise .any:
         
     | 
| 
       4367 
4539 
     | 
    
         
             
                #   df = Polars::DataFrame.new(
         
     | 
| 
       4368 
4540 
     | 
    
         
             
                #     {
         
     | 
| 
       4369 
4541 
     | 
    
         
             
                #       "a" => [false, false, true],
         
     | 
| 
         @@ -4486,7 +4658,7 @@ module Polars 
     | 
|
| 
       4486 
4658 
     | 
    
         
             
                #   # => [{"a"=>1, "b"=>2}, {"a"=>3, "b"=>4}, {"a"=>5, "b"=>6}]
         
     | 
| 
       4487 
4659 
     | 
    
         
             
                def rows(named: false)
         
     | 
| 
       4488 
4660 
     | 
    
         
             
                  if named
         
     | 
| 
       4489 
     | 
    
         
            -
                    columns = columns 
     | 
| 
      
 4661 
     | 
    
         
            +
                    columns = self.columns
         
     | 
| 
       4490 
4662 
     | 
    
         
             
                    _df.row_tuples.map do |v|
         
     | 
| 
       4491 
4663 
     | 
    
         
             
                      columns.zip(v).to_h
         
     | 
| 
       4492 
4664 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -4527,7 +4699,7 @@ module Polars 
     | 
|
| 
       4527 
4699 
     | 
    
         
             
                  return to_enum(:iter_rows, named: named, buffer_size: buffer_size) unless block_given?
         
     | 
| 
       4528 
4700 
     | 
    
         | 
| 
       4529 
4701 
     | 
    
         
             
                  # load into the local namespace for a modest performance boost in the hot loops
         
     | 
| 
       4530 
     | 
    
         
            -
                  columns = columns 
     | 
| 
      
 4702 
     | 
    
         
            +
                  columns = self.columns
         
     | 
| 
       4531 
4703 
     | 
    
         | 
| 
       4532 
4704 
     | 
    
         
             
                  # note: buffering rows results in a 2-4x speedup over individual calls
         
     | 
| 
       4533 
4705 
     | 
    
         
             
                  # to ".row(i)", so it should only be disabled in extremely specific cases.
         
     | 
| 
         @@ -4764,13 +4936,51 @@ module Polars 
     | 
|
| 
       4764 
4936 
     | 
    
         
             
                  _from_rbdf(_df.unnest(names))
         
     | 
| 
       4765 
4937 
     | 
    
         
             
                end
         
     | 
| 
       4766 
4938 
     | 
    
         | 
| 
       4767 
     | 
    
         
            -
                #  
     | 
| 
      
 4939 
     | 
    
         
            +
                # Requires NumPy
         
     | 
| 
       4768 
4940 
     | 
    
         
             
                # def corr
         
     | 
| 
       4769 
4941 
     | 
    
         
             
                # end
         
     | 
| 
       4770 
4942 
     | 
    
         | 
| 
       4771 
     | 
    
         
            -
                #  
     | 
| 
       4772 
     | 
    
         
            -
                # 
     | 
| 
       4773 
     | 
    
         
            -
                #  
     | 
| 
      
 4943 
     | 
    
         
            +
                # Take two sorted DataFrames and merge them by the sorted key.
         
     | 
| 
      
 4944 
     | 
    
         
            +
                #
         
     | 
| 
      
 4945 
     | 
    
         
            +
                # The output of this operation will also be sorted.
         
     | 
| 
      
 4946 
     | 
    
         
            +
                # It is the callers responsibility that the frames are sorted
         
     | 
| 
      
 4947 
     | 
    
         
            +
                # by that key otherwise the output will not make sense.
         
     | 
| 
      
 4948 
     | 
    
         
            +
                #
         
     | 
| 
      
 4949 
     | 
    
         
            +
                # The schemas of both DataFrames must be equal.
         
     | 
| 
      
 4950 
     | 
    
         
            +
                #
         
     | 
| 
      
 4951 
     | 
    
         
            +
                # @param other [DataFrame]
         
     | 
| 
      
 4952 
     | 
    
         
            +
                #   Other DataFrame that must be merged
         
     | 
| 
      
 4953 
     | 
    
         
            +
                # @param key [String]
         
     | 
| 
      
 4954 
     | 
    
         
            +
                #   Key that is sorted.
         
     | 
| 
      
 4955 
     | 
    
         
            +
                #
         
     | 
| 
      
 4956 
     | 
    
         
            +
                # @return [DataFrame]
         
     | 
| 
      
 4957 
     | 
    
         
            +
                #
         
     | 
| 
      
 4958 
     | 
    
         
            +
                # @example
         
     | 
| 
      
 4959 
     | 
    
         
            +
                #   df0 = Polars::DataFrame.new(
         
     | 
| 
      
 4960 
     | 
    
         
            +
                #     {"name" => ["steve", "elise", "bob"], "age" => [42, 44, 18]}
         
     | 
| 
      
 4961 
     | 
    
         
            +
                #   ).sort("age")
         
     | 
| 
      
 4962 
     | 
    
         
            +
                #   df1 = Polars::DataFrame.new(
         
     | 
| 
      
 4963 
     | 
    
         
            +
                #     {"name" => ["anna", "megan", "steve", "thomas"], "age" => [21, 33, 42, 20]}
         
     | 
| 
      
 4964 
     | 
    
         
            +
                #   ).sort("age")
         
     | 
| 
      
 4965 
     | 
    
         
            +
                #   df0.merge_sorted(df1, "age")
         
     | 
| 
      
 4966 
     | 
    
         
            +
                #   # =>
         
     | 
| 
      
 4967 
     | 
    
         
            +
                #   # shape: (7, 2)
         
     | 
| 
      
 4968 
     | 
    
         
            +
                #   # ┌────────┬─────┐
         
     | 
| 
      
 4969 
     | 
    
         
            +
                #   # │ name   ┆ age │
         
     | 
| 
      
 4970 
     | 
    
         
            +
                #   # │ ---    ┆ --- │
         
     | 
| 
      
 4971 
     | 
    
         
            +
                #   # │ str    ┆ i64 │
         
     | 
| 
      
 4972 
     | 
    
         
            +
                #   # ╞════════╪═════╡
         
     | 
| 
      
 4973 
     | 
    
         
            +
                #   # │ bob    ┆ 18  │
         
     | 
| 
      
 4974 
     | 
    
         
            +
                #   # │ thomas ┆ 20  │
         
     | 
| 
      
 4975 
     | 
    
         
            +
                #   # │ anna   ┆ 21  │
         
     | 
| 
      
 4976 
     | 
    
         
            +
                #   # │ megan  ┆ 33  │
         
     | 
| 
      
 4977 
     | 
    
         
            +
                #   # │ steve  ┆ 42  │
         
     | 
| 
      
 4978 
     | 
    
         
            +
                #   # │ steve  ┆ 42  │
         
     | 
| 
      
 4979 
     | 
    
         
            +
                #   # │ elise  ┆ 44  │
         
     | 
| 
      
 4980 
     | 
    
         
            +
                #   # └────────┴─────┘
         
     | 
| 
      
 4981 
     | 
    
         
            +
                def merge_sorted(other, key)
         
     | 
| 
      
 4982 
     | 
    
         
            +
                  lazy.merge_sorted(other.lazy, key).collect(_eager: true)
         
     | 
| 
      
 4983 
     | 
    
         
            +
                end
         
     | 
| 
       4774 
4984 
     | 
    
         | 
| 
       4775 
4985 
     | 
    
         
             
                # Indicate that one or multiple columns are sorted.
         
     | 
| 
       4776 
4986 
     | 
    
         
             
                #
         
     | 
| 
         @@ -4812,7 +5022,7 @@ module Polars 
     | 
|
| 
       4812 
5022 
     | 
    
         
             
                end
         
     | 
| 
       4813 
5023 
     | 
    
         | 
| 
       4814 
5024 
     | 
    
         
             
                def _pos_idxs(idxs, dim)
         
     | 
| 
       4815 
     | 
    
         
            -
                  idx_type =  
     | 
| 
      
 5025 
     | 
    
         
            +
                  idx_type = Plr.get_index_type
         
     | 
| 
       4816 
5026 
     | 
    
         | 
| 
       4817 
5027 
     | 
    
         
             
                  if idxs.is_a?(Series)
         
     | 
| 
       4818 
5028 
     | 
    
         
             
                    if idxs.dtype == idx_type
         
     | 
| 
         @@ -5045,14 +5255,14 @@ module Polars 
     | 
|
| 
       5045 
5255 
     | 
    
         
             
                  elsif data[0].is_a?(Hash)
         
     | 
| 
       5046 
5256 
     | 
    
         
             
                    column_names, dtypes = _unpack_schema(columns)
         
     | 
| 
       5047 
5257 
     | 
    
         
             
                    schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
         
     | 
| 
       5048 
     | 
    
         
            -
                    rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
         
     | 
| 
      
 5258 
     | 
    
         
            +
                    rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
         
     | 
| 
       5049 
5259 
     | 
    
         
             
                    if column_names
         
     | 
| 
       5050 
5260 
     | 
    
         
             
                      rbdf = _post_apply_columns(rbdf, column_names)
         
     | 
| 
       5051 
5261 
     | 
    
         
             
                    end
         
     | 
| 
       5052 
5262 
     | 
    
         
             
                    return rbdf
         
     | 
| 
       5053 
5263 
     | 
    
         
             
                  elsif data[0].is_a?(::Array)
         
     | 
| 
      
 5264 
     | 
    
         
            +
                    first_element = data[0]
         
     | 
| 
       5054 
5265 
     | 
    
         
             
                    if orient.nil? && !columns.nil?
         
     | 
| 
       5055 
     | 
    
         
            -
                      first_element = data[0]
         
     | 
| 
       5056 
5266 
     | 
    
         
             
                      row_types = first_element.filter_map { |value| value.class }.uniq
         
     | 
| 
       5057 
5267 
     | 
    
         
             
                      if row_types.include?(Integer) && row_types.include?(Float)
         
     | 
| 
       5058 
5268 
     | 
    
         
             
                        row_types.delete(Integer)
         
     |