RubyGems - polars-df - Versions diffs - 0.8.0-x86_64-linux → 0.10.0-x86_64-linux - Mend

polars-df 0.8.0-x86_64-linux → 0.10.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +42 -1
data/Cargo.lock +159 -66
data/Cargo.toml +0 -3
data/LICENSE-THIRD-PARTY.txt +3112 -1613
data/LICENSE.txt +1 -1
data/README.md +3 -2
data/lib/polars/3.1/polars.so +0 -0
data/lib/polars/3.2/polars.so +0 -0
data/lib/polars/3.3/polars.so +0 -0
data/lib/polars/array_expr.rb +453 -0
data/lib/polars/array_name_space.rb +346 -0
data/lib/polars/batched_csv_reader.rb +4 -2
data/lib/polars/cat_expr.rb +24 -0
data/lib/polars/cat_name_space.rb +75 -0
data/lib/polars/config.rb +2 -2
data/lib/polars/data_frame.rb +306 -96
data/lib/polars/data_types.rb +191 -28
data/lib/polars/date_time_expr.rb +41 -18
data/lib/polars/date_time_name_space.rb +9 -3
data/lib/polars/exceptions.rb +12 -1
data/lib/polars/expr.rb +898 -215
data/lib/polars/functions/aggregation/horizontal.rb +246 -0
data/lib/polars/functions/aggregation/vertical.rb +282 -0
data/lib/polars/functions/as_datatype.rb +248 -0
data/lib/polars/functions/col.rb +47 -0
data/lib/polars/functions/eager.rb +182 -0
data/lib/polars/functions/lazy.rb +1280 -0
data/lib/polars/functions/len.rb +49 -0
data/lib/polars/functions/lit.rb +35 -0
data/lib/polars/functions/random.rb +16 -0
data/lib/polars/functions/range/date_range.rb +103 -0
data/lib/polars/functions/range/int_range.rb +51 -0
data/lib/polars/functions/repeat.rb +144 -0
data/lib/polars/functions/whenthen.rb +96 -0
data/lib/polars/functions.rb +29 -416
data/lib/polars/group_by.rb +2 -2
data/lib/polars/io.rb +36 -31
data/lib/polars/lazy_frame.rb +405 -88
data/lib/polars/list_expr.rb +158 -8
data/lib/polars/list_name_space.rb +102 -0
data/lib/polars/meta_expr.rb +175 -7
data/lib/polars/series.rb +282 -41
data/lib/polars/string_cache.rb +75 -0
data/lib/polars/string_expr.rb +413 -96
data/lib/polars/string_name_space.rb +4 -4
data/lib/polars/testing.rb +507 -0
data/lib/polars/utils.rb +106 -8
data/lib/polars/version.rb +1 -1
data/lib/polars/whenthen.rb +83 -0
data/lib/polars.rb +16 -4
metadata +34 -6
data/lib/polars/lazy_functions.rb +0 -1181
data/lib/polars/when.rb +0 -16
data/lib/polars/when_then.rb +0 -19

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -47,8 +47,8 @@ module Polars
     end
     # @private
-    def self._from_hashes(data, infer_schema_length: 100, schema: nil)
-      rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
+    def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
+      rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
       _from_rbdf(rbdf)
     end
@@ -91,7 +91,8 @@ module Polars
       row_count_name: nil,
       row_count_offset: 0,
       sample_size: 1024,
-      eol_char: "\n"
+      eol_char: "\n",
+      truncate_ragged_lines: false
     )
       if Utils.pathlike?(file)
         path = Utils.normalise_filepath(file)
@@ -147,7 +148,8 @@ module Polars
           skip_rows_after_header: skip_rows_after_header,
           row_count_name: row_count_name,
           row_count_offset: row_count_offset,
-          eol_char: eol_char
+          eol_char: eol_char,
+          truncate_ragged_lines: truncate_ragged_lines
         )
         if columns.nil?
           return _from_rbdf(scan.collect._df)
@@ -186,7 +188,8 @@ module Polars
           skip_rows_after_header,
           Utils._prepare_row_count_args(row_count_name, row_count_offset),
           sample_size,
-          eol_char
+          eol_char,
+          truncate_ragged_lines
         )
       )
     end
@@ -622,7 +625,7 @@ module Polars
         # select single column
         # df["foo"]
         if item.is_a?(::String) || item.is_a?(Symbol)
-          return Utils.wrap_s(_df.column(item.to_s))
+          return Utils.wrap_s(_df.get_column(item.to_s))
         end
         # df[idx]
@@ -814,8 +817,6 @@ module Polars
     # Serialize to JSON representation.
     #
-    # @return [nil]
-    #
     # @param file [String]
     #   File path to which the result should be written.
     # @param pretty [Boolean]
@@ -823,17 +824,45 @@ module Polars
     # @param row_oriented [Boolean]
     #   Write to row oriented json. This is slower, but more common.
     #
-    # @see #write_ndjson
+    # @return [nil]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8]
+    #     }
+    #   )
+    #   df.write_json
+    #   # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
+    #
+    # @example
+    #   df.write_json(row_oriented: true)
+    #   # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
     def write_json(
-      file,
+      file = nil,
       pretty: false,
       row_oriented: false
     )
       if Utils.pathlike?(file)
         file = Utils.normalise_filepath(file)
       end
-      _df.write_json(file, pretty, row_oriented)
+      to_string_io = !file.nil? && file.is_a?(StringIO)
+      if file.nil? || to_string_io
+        buf = StringIO.new
+        buf.set_encoding(Encoding::BINARY)
+        _df.write_json(buf, pretty, row_oriented)
+        json_bytes = buf.string
+        json_str = json_bytes.force_encoding(Encoding::UTF_8)
+        if to_string_io
+          file.write(json_str)
+        else
+          return json_str
+        end
+      else
+        _df.write_json(file, pretty, row_oriented)
+      end
       nil
     end
@@ -843,12 +872,36 @@ module Polars
     #   File path to which the result should be written.
     #
     # @return [nil]
-    def write_ndjson(file)
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8]
+    #     }
+    #   )
+    #   df.write_ndjson()
+    #   # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
+    def write_ndjson(file = nil)
       if Utils.pathlike?(file)
         file = Utils.normalise_filepath(file)
       end
-      _df.write_ndjson(file)
+      to_string_io = !file.nil? && file.is_a?(StringIO)
+      if file.nil? || to_string_io
+        buf = StringIO.new
+        buf.set_encoding(Encoding::BINARY)
+        _df.write_ndjson(buf)
+        json_bytes = buf.string
+        json_str = json_bytes.force_encoding(Encoding::UTF_8)
+        if to_string_io
+          file.write(json_str)
+        else
+          return json_str
+        end
+      else
+        _df.write_ndjson(file)
+      end
       nil
     end
@@ -1010,7 +1063,7 @@ module Polars
     # Write to Apache Parquet file.
     #
-    # @param file [String]
+    # @param file [String, Pathname, StringIO]
     #   File path to which the file should be written.
     # @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
     #   Choose "zstd" for good compression performance.
@@ -1027,10 +1080,9 @@ module Polars
     # @param statistics [Boolean]
     #   Write statistics to the parquet headers. This requires extra compute.
     # @param row_group_size [Integer, nil]
-    #   Size of the row groups in number of rows.
-    #   If `nil` (default), the chunks of the DataFrame are
-    #   used. Writing in smaller chunks may reduce memory pressure and improve
-    #   writing speeds.
+    #   Size of the row groups in number of rows. Defaults to 512^2 rows.
+    # @param data_page_size [Integer, nil]
+    #   Size of the data page in bytes. Defaults to 1024^2 bytes.
     #
     # @return [nil]
     def write_parquet(
@@ -1038,7 +1090,8 @@ module Polars
       compression: "zstd",
       compression_level: nil,
       statistics: false,
-      row_group_size: nil
+      row_group_size: nil,
+      data_page_size: nil
     )
       if compression.nil?
         compression = "uncompressed"
@@ -1048,7 +1101,7 @@ module Polars
       end
       _df.write_parquet(
-        file, compression, compression_level, statistics, row_group_size
+        file, compression, compression_level, statistics, row_group_size, data_page_size
       )
     end
@@ -1084,7 +1137,7 @@ module Polars
     #   df.estimated_size
     #   # => 25888898
     #   df.estimated_size("mb")
-    #   # => 24.689577102661133
+    #   # => 17.0601749420166
     def estimated_size(unit = "b")
       sz = _df.estimated_size
       Utils.scale_bytes(sz, to: unit)
@@ -1782,7 +1835,7 @@ module Polars
     #       "b" => [2, 4, 6]
     #     }
     #   )
-    #   df.with_row_count
+    #   df.with_row_index
     #   # =>
     #   # shape: (3, 3)
     #   # ┌────────┬─────┬─────┐
@@ -1794,9 +1847,10 @@ module Polars
     #   # │ 1      ┆ 3   ┆ 4   │
     #   # │ 2      ┆ 5   ┆ 6   │
     #   # └────────┴─────┴─────┘
-    def with_row_count(name: "row_nr", offset: 0)
-      _from_rbdf(_df.with_row_count(name, offset))
+    def with_row_index(name: "row_nr", offset: 0)
+      _from_rbdf(_df.with_row_index(name, offset))
     end
+    alias_method :with_row_count, :with_row_index
     # Start a group by operation.
     #
@@ -2160,12 +2214,13 @@ module Polars
     #     closed: "right"
     #   ).agg(Polars.col("A").alias("A_agg_list"))
     #   # =>
-    #   # shape: (3, 4)
+    #   # shape: (4, 4)
     #   # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
     #   # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list      │
     #   # │ ---             ┆ ---             ┆ --- ┆ ---             │
     #   # │ i64             ┆ i64             ┆ i64 ┆ list[str]       │
     #   # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
+    #   # │ -2              ┆ 1               ┆ -2  ┆ ["A", "A"]      │
     #   # │ 0               ┆ 3               ┆ 0   ┆ ["A", "B", "B"] │
     #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
     #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
@@ -2433,6 +2488,8 @@ module Polars
     #   Join strategy.
     # @param suffix [String]
     #   Suffix to append to columns with a duplicate name.
+    # @param join_nulls [Boolean]
+    #   Join on null values. By default null values will never produce matches.
     #
     # @return [DataFrame]
     #
@@ -2515,7 +2572,7 @@ module Polars
     #   # ╞═════╪═════╪═════╡
     #   # │ 3   ┆ 8.0 ┆ c   │
     #   # └─────┴─────┴─────┘
-    def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
+    def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right", join_nulls: false)
       lazy
         .join(
           other.lazy,
@@ -2524,6 +2581,7 @@ module Polars
           on: on,
           how: how,
           suffix: suffix,
+          join_nulls: join_nulls
         )
         .collect(no_optimization: true)
     end
@@ -2617,26 +2675,26 @@ module Polars
     #   # ┌─────┬─────┬───────────┐
     #   # │ a   ┆ b   ┆ b_squared │
     #   # │ --- ┆ --- ┆ ---       │
-    #   # │ i64 ┆ i64 ┆ f64       │
+    #   # │ i64 ┆ i64 ┆ i64       │
     #   # ╞═════╪═════╪═══════════╡
-    #   # │ 1   ┆ 2   ┆ 4.0       │
-    #   # │ 3   ┆ 4   ┆ 16.0      │
-    #   # │ 5   ┆ 6   ┆ 36.0      │
+    #   # │ 1   ┆ 2   ┆ 4         │
+    #   # │ 3   ┆ 4   ┆ 16        │
+    #   # │ 5   ┆ 6   ┆ 36        │
     #   # └─────┴─────┴───────────┘
     #
     # @example Replaced
     #   df.with_column(Polars.col("a") ** 2)
     #   # =>
     #   # shape: (3, 2)
-    #   # ┌──────┬─────┐
-    #   # │ a    ┆ b   │
-    #   # │ ---  ┆ --- │
-    #   # │ f64  ┆ i64 │
-    #   # ╞══════╪═════╡
-    #   # │ 1.0  ┆ 2   │
-    #   # │ 9.0  ┆ 4   │
-    #   # │ 25.0 ┆ 6   │
-    #   # └──────┴─────┘
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 2   │
+    #   # │ 9   ┆ 4   │
+    #   # │ 25  ┆ 6   │
+    #   # └─────┴─────┘
     def with_column(column)
       lazy
         .with_column(column)
@@ -2803,16 +2861,36 @@ module Polars
     #   # │ 2   ┆ 7.0 │
     #   # │ 3   ┆ 8.0 │
     #   # └─────┴─────┘
-    def drop(columns)
-      if columns.is_a?(::Array)
-        df = clone
-        columns.each do |n|
-          df._df.drop_in_place(n)
-        end
-        df
-      else
-        _from_rbdf(_df.drop(columns))
-      end
+    #
+    # @example Drop multiple columns by passing a list of column names.
+    #   df.drop(["bar", "ham"])
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 1   │
+    #   # │ 2   │
+    #   # │ 3   │
+    #   # └─────┘
+    #
+    # @example Use positional arguments to drop multiple columns.
+    #   df.drop("foo", "ham")
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ bar │
+    #   # │ --- │
+    #   # │ f64 │
+    #   # ╞═════╡
+    #   # │ 6.0 │
+    #   # │ 7.0 │
+    #   # │ 8.0 │
+    #   # └─────┘
+    def drop(*columns)
+      lazy.drop(*columns).collect(_eager: true)
     end
     # Drop in place.
@@ -2867,7 +2945,7 @@ module Polars
     #       "c" => [true, true, false, nil]
     #     }
     #   )
-    #   df.cleared
+    #   df.clear
     #   # =>
     #   # shape: (0, 3)
     #   # ┌─────┬─────┬──────┐
@@ -2876,9 +2954,31 @@ module Polars
     #   # │ i64 ┆ f64 ┆ bool │
     #   # ╞═════╪═════╪══════╡
     #   # └─────┴─────┴──────┘
-    def cleared
-      height > 0 ? head(0) : clone
+    #
+    # @example
+    #   df.clear(2)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ a    ┆ b    ┆ c    │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ f64  ┆ bool │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # │ null ┆ null ┆ null │
+    #   # └──────┴──────┴──────┘
+    def clear(n = 0)
+      if n == 0
+        _from_rbdf(_df.clear)
+      elsif n > 0 || len > 0
+        self.class.new(
+          schema.to_h { |nm, tp| [nm, Series.new(nm, [], dtype: tp).extend_constant(nil, n)] }
+        )
+      else
+        clone
+      end
     end
+    alias_method :cleared, :clear
     # clone handled by initialize_copy
@@ -3141,8 +3241,11 @@ module Polars
           aggregate_expr = Polars.element.median._rbexpr
         when "last"
           aggregate_expr = Polars.element.last._rbexpr
+        when "len"
+          aggregate_expr = Polars.len._rbexpr
         when "count"
-          aggregate_expr = Polars.count._rbexpr
+          warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
+          aggregate_expr = Polars.len._rbexpr
         else
           raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
         end
@@ -3154,9 +3257,9 @@ module Polars
       _from_rbdf(
         _df.pivot_expr(
-          values,
           index,
           columns,
+          values,
           maintain_order,
           sort_columns,
           aggregate_expr,
@@ -3591,8 +3694,13 @@ module Polars
     # Select columns from this DataFrame.
     #
-    # @param exprs [Object]
-    #   Column or columns to select.
+    # @param exprs [Array]
+    #   Column(s) to select, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names,
+    #   other non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to select, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
     #
     # @return [DataFrame]
     #
@@ -3672,23 +3780,25 @@ module Polars
     #   # │ 0       │
     #   # │ 10      │
     #   # └─────────┘
-    def select(exprs)
-      _from_rbdf(
-        lazy
-          .select(exprs)
-          .collect(no_optimization: true, string_cache: false)
-          ._df
-      )
+    def select(*exprs, **named_exprs)
+      lazy.select(*exprs, **named_exprs).collect(_eager: true)
     end
-    # Add or overwrite multiple columns in a DataFrame.
+    # Add columns to this DataFrame.
+    #
+    # Added columns will replace existing columns with the same name.
     #
     # @param exprs [Array]
-    #   Array of Expressions that evaluate to columns.
+    #   Column(s) to add, specified as positional arguments.
+    #   Accepts expression input. Strings are parsed as column names, other
+    #   non-expression inputs are parsed as literals.
+    # @param named_exprs [Hash]
+    #   Additional columns to add, specified as keyword arguments.
+    #   The columns will be renamed to the keyword used.
     #
     # @return [DataFrame]
     #
-    # @example
+    # @example Pass an expression to add it as a new column.
     #   df = Polars::DataFrame.new(
     #     {
     #       "a" => [1, 2, 3, 4],
@@ -3696,32 +3806,94 @@ module Polars
     #       "c" => [true, true, false, true]
     #     }
     #   )
+    #   df.with_columns((Polars.col("a") ** 2).alias("a^2"))
+    #   # =>
+    #   # shape: (4, 4)
+    #   # ┌─────┬──────┬───────┬─────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2 │
+    #   # │ --- ┆ ---  ┆ ---   ┆ --- │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ i64 │
+    #   # ╞═════╪══════╪═══════╪═════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   │
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   │
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9   │
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  │
+    #   # └─────┴──────┴───────┴─────┘
+    #
+    # @example Added columns will replace existing columns with the same name.
+    #   df.with_columns(Polars.col("a").cast(Polars::Float64))
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     │
+    #   # │ --- ┆ ---  ┆ ---   │
+    #   # │ f64 ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╡
+    #   # │ 1.0 ┆ 0.5  ┆ true  │
+    #   # │ 2.0 ┆ 4.0  ┆ true  │
+    #   # │ 3.0 ┆ 10.0 ┆ false │
+    #   # │ 4.0 ┆ 13.0 ┆ true  │
+    #   # └─────┴──────┴───────┘
+    #
+    # @example Multiple columns can be added by passing a list of expressions.
     #   df.with_columns(
     #     [
     #       (Polars.col("a") ** 2).alias("a^2"),
     #       (Polars.col("b") / 2).alias("b/2"),
-    #       (Polars.col("c").is_not).alias("not c")
+    #       (Polars.col("c").not_).alias("not c"),
     #     ]
     #   )
     #   # =>
     #   # shape: (4, 6)
-    #   # ┌─────┬──────┬───────┬──────┬──────┬───────┐
-    #   # │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
-    #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
-    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
-    #   # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
-    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
-    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
-    #   # │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
-    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
-    #   # └─────┴──────┴───────┴──────┴──────┴───────┘
-    def with_columns(exprs)
-      if !exprs.nil? && !exprs.is_a?(::Array)
-        exprs = [exprs]
-      end
-      lazy
-        .with_columns(exprs)
-        .collect(no_optimization: true, string_cache: false)
+    #   # ┌─────┬──────┬───────┬─────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
+    #   # └─────┴──────┴───────┴─────┴──────┴───────┘
+    #
+    # @example Multiple columns also can be added using positional arguments instead of a list.
+    #   df.with_columns(
+    #     (Polars.col("a") ** 2).alias("a^2"),
+    #     (Polars.col("b") / 2).alias("b/2"),
+    #     (Polars.col("c").not_).alias("not c"),
+    #   )
+    #   # =>
+    #   # shape: (4, 6)
+    #   # ┌─────┬──────┬───────┬─────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
+    #   # └─────┴──────┴───────┴─────┴──────┴───────┘
+    #
+    # @example Use keyword arguments to easily name your expression inputs.
+    #   df.with_columns(
+    #     ab: Polars.col("a") * Polars.col("b"),
+    #     not_c: Polars.col("c").not_
+    #   )
+    #   # =>
+    #   # shape: (4, 5)
+    #   # ┌─────┬──────┬───────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ ab   ┆ not_c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 0.5  ┆ false │
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 8.0  ┆ false │
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 30.0 ┆ true  │
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 52.0 ┆ false │
+    #   # └─────┴──────┴───────┴──────┴───────┘
+    def with_columns(*exprs, **named_exprs)
+      lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
     end
     # Get number of chunks used by the ChunkedArrays of this DataFrame.
@@ -4363,7 +4535,7 @@ module Polars
     #   #         null
     #   # ]
     #
-    # @example A horizontal boolean or, similar to a row-wise .any():
+    # @example A horizontal boolean or, similar to a row-wise .any:
     #   df = Polars::DataFrame.new(
     #     {
     #       "a" => [false, false, true],
@@ -4486,7 +4658,7 @@ module Polars
     #   # => [{"a"=>1, "b"=>2}, {"a"=>3, "b"=>4}, {"a"=>5, "b"=>6}]
     def rows(named: false)
       if named
-        columns = columns()
+        columns = self.columns
         _df.row_tuples.map do |v|
           columns.zip(v).to_h
         end
@@ -4527,7 +4699,7 @@ module Polars
       return to_enum(:iter_rows, named: named, buffer_size: buffer_size) unless block_given?
       # load into the local namespace for a modest performance boost in the hot loops
-      columns = columns()
+      columns = self.columns
       # note: buffering rows results in a 2-4x speedup over individual calls
       # to ".row(i)", so it should only be disabled in extremely specific cases.
@@ -4764,13 +4936,51 @@ module Polars
       _from_rbdf(_df.unnest(names))
     end
-    # TODO
+    # Requires NumPy
     # def corr
     # end
-    # TODO
-    # def merge_sorted
-    # end
+    # Take two sorted DataFrames and merge them by the sorted key.
+    #
+    # The output of this operation will also be sorted.
+    # It is the callers responsibility that the frames are sorted
+    # by that key otherwise the output will not make sense.
+    #
+    # The schemas of both DataFrames must be equal.
+    #
+    # @param other [DataFrame]
+    #   Other DataFrame that must be merged
+    # @param key [String]
+    #   Key that is sorted.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df0 = Polars::DataFrame.new(
+    #     {"name" => ["steve", "elise", "bob"], "age" => [42, 44, 18]}
+    #   ).sort("age")
+    #   df1 = Polars::DataFrame.new(
+    #     {"name" => ["anna", "megan", "steve", "thomas"], "age" => [21, 33, 42, 20]}
+    #   ).sort("age")
+    #   df0.merge_sorted(df1, "age")
+    #   # =>
+    #   # shape: (7, 2)
+    #   # ┌────────┬─────┐
+    #   # │ name   ┆ age │
+    #   # │ ---    ┆ --- │
+    #   # │ str    ┆ i64 │
+    #   # ╞════════╪═════╡
+    #   # │ bob    ┆ 18  │
+    #   # │ thomas ┆ 20  │
+    #   # │ anna   ┆ 21  │
+    #   # │ megan  ┆ 33  │
+    #   # │ steve  ┆ 42  │
+    #   # │ steve  ┆ 42  │
+    #   # │ elise  ┆ 44  │
+    #   # └────────┴─────┘
+    def merge_sorted(other, key)
+      lazy.merge_sorted(other.lazy, key).collect(_eager: true)
+    end
     # Indicate that one or multiple columns are sorted.
     #
@@ -4812,7 +5022,7 @@ module Polars
     end
     def _pos_idxs(idxs, dim)
-      idx_type = Polars._get_idx_type
+      idx_type = Plr.get_index_type
       if idxs.is_a?(Series)
         if idxs.dtype == idx_type
@@ -5045,14 +5255,14 @@ module Polars
       elsif data[0].is_a?(Hash)
         column_names, dtypes = _unpack_schema(columns)
         schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
-        rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
+        rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
         if column_names
           rbdf = _post_apply_columns(rbdf, column_names)
         end
         return rbdf
       elsif data[0].is_a?(::Array)
+        first_element = data[0]
         if orient.nil? && !columns.nil?
-          first_element = data[0]
           row_types = first_element.filter_map { |value| value.class }.uniq
           if row_types.include?(Integer) && row_types.include?(Float)
             row_types.delete(Integer)