RubyGems - polars-df - Versions diffs - 0.9.0-arm64-darwin → 0.11.0-arm64-darwin - Mend

polars-df 0.9.0-arm64-darwin → 0.11.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +23 -0
data/Cargo.lock +144 -57
data/LICENSE-THIRD-PARTY.txt +629 -29
data/README.md +7 -6
data/lib/polars/3.1/polars.bundle +0 -0
data/lib/polars/3.2/polars.bundle +0 -0
data/lib/polars/3.3/polars.bundle +0 -0
data/lib/polars/array_expr.rb +6 -2
data/lib/polars/batched_csv_reader.rb +11 -3
data/lib/polars/convert.rb +6 -1
data/lib/polars/data_frame.rb +225 -370
data/lib/polars/date_time_expr.rb +11 -4
data/lib/polars/date_time_name_space.rb +14 -4
data/lib/polars/dynamic_group_by.rb +2 -2
data/lib/polars/exceptions.rb +4 -0
data/lib/polars/expr.rb +1171 -54
data/lib/polars/functions/lazy.rb +3 -3
data/lib/polars/functions/range/date_range.rb +92 -0
data/lib/polars/functions/range/datetime_range.rb +149 -0
data/lib/polars/functions/range/time_range.rb +141 -0
data/lib/polars/functions/whenthen.rb +74 -5
data/lib/polars/group_by.rb +88 -23
data/lib/polars/io/avro.rb +24 -0
data/lib/polars/{io.rb → io/csv.rb} +307 -489
data/lib/polars/io/database.rb +73 -0
data/lib/polars/io/ipc.rb +247 -0
data/lib/polars/io/json.rb +18 -0
data/lib/polars/io/ndjson.rb +69 -0
data/lib/polars/io/parquet.rb +226 -0
data/lib/polars/lazy_frame.rb +55 -195
data/lib/polars/lazy_group_by.rb +100 -3
data/lib/polars/list_expr.rb +6 -2
data/lib/polars/rolling_group_by.rb +2 -2
data/lib/polars/series.rb +14 -12
data/lib/polars/string_expr.rb +38 -36
data/lib/polars/utils.rb +89 -1
data/lib/polars/version.rb +1 -1
data/lib/polars/whenthen.rb +83 -0
data/lib/polars.rb +10 -3
metadata +13 -6
data/lib/polars/when.rb +0 -16
data/lib/polars/when_then.rb +0 -19

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -46,268 +46,6 @@ module Polars
       df
     end
-    # @private
-    def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
-      rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
-      _from_rbdf(rbdf)
-    end
-    # @private
-    def self._from_hash(data, schema: nil, schema_overrides: nil)
-      _from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
-    end
-    # def self._from_records
-    # end
-    # def self._from_numo
-    # end
-    # no self._from_arrow
-    # no self._from_pandas
-    # @private
-    def self._read_csv(
-      file,
-      has_header: true,
-      columns: nil,
-      sep: str = ",",
-      comment_char: nil,
-      quote_char: '"',
-      skip_rows: 0,
-      dtypes: nil,
-      null_values: nil,
-      ignore_errors: false,
-      parse_dates: false,
-      n_threads: nil,
-      infer_schema_length: 100,
-      batch_size: 8192,
-      n_rows: nil,
-      encoding: "utf8",
-      low_memory: false,
-      rechunk: true,
-      skip_rows_after_header: 0,
-      row_count_name: nil,
-      row_count_offset: 0,
-      sample_size: 1024,
-      eol_char: "\n"
-    )
-      if Utils.pathlike?(file)
-        path = Utils.normalise_filepath(file)
-      else
-        path = nil
-        # if defined?(StringIO) && file.is_a?(StringIO)
-        #   file = file.string
-        # end
-      end
-      dtype_list = nil
-      dtype_slice = nil
-      if !dtypes.nil?
-        if dtypes.is_a?(Hash)
-          dtype_list = []
-          dtypes.each do|k, v|
-            dtype_list << [k, Utils.rb_type_to_dtype(v)]
-          end
-        elsif dtypes.is_a?(::Array)
-          dtype_slice = dtypes
-        else
-          raise ArgumentError, "dtype arg should be list or dict"
-        end
-      end
-      processed_null_values = Utils._process_null_values(null_values)
-      if columns.is_a?(::String)
-        columns = [columns]
-      end
-      if file.is_a?(::String) && file.include?("*")
-        dtypes_dict = nil
-        if !dtype_list.nil?
-          dtypes_dict = dtype_list.to_h
-        end
-        if !dtype_slice.nil?
-          raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
-        end
-        scan = Polars.scan_csv(
-          file,
-          has_header: has_header,
-          sep: sep,
-          comment_char: comment_char,
-          quote_char: quote_char,
-          skip_rows: skip_rows,
-          dtypes: dtypes_dict,
-          null_values: null_values,
-          ignore_errors: ignore_errors,
-          infer_schema_length: infer_schema_length,
-          n_rows: n_rows,
-          low_memory: low_memory,
-          rechunk: rechunk,
-          skip_rows_after_header: skip_rows_after_header,
-          row_count_name: row_count_name,
-          row_count_offset: row_count_offset,
-          eol_char: eol_char
-        )
-        if columns.nil?
-          return _from_rbdf(scan.collect._df)
-        elsif is_str_sequence(columns, allow_str: false)
-          return _from_rbdf(scan.select(columns).collect._df)
-        else
-          raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
-        end
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(
-        RbDataFrame.read_csv(
-          file,
-          infer_schema_length,
-          batch_size,
-          has_header,
-          ignore_errors,
-          n_rows,
-          skip_rows,
-          projection,
-          sep,
-          rechunk,
-          columns,
-          encoding,
-          n_threads,
-          path,
-          dtype_list,
-          dtype_slice,
-          low_memory,
-          comment_char,
-          quote_char,
-          processed_null_values,
-          parse_dates,
-          skip_rows_after_header,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          sample_size,
-          eol_char
-        )
-      )
-    end
-    # @private
-    def self._read_parquet(
-      source,
-      columns: nil,
-      n_rows: nil,
-      parallel: "auto",
-      row_count_name: nil,
-      row_count_offset: 0,
-      low_memory: false,
-      use_statistics: true,
-      rechunk: true
-    )
-      if Utils.pathlike?(source)
-        source = Utils.normalise_filepath(source)
-      end
-      if columns.is_a?(::String)
-        columns = [columns]
-      end
-      if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
-        scan =
-          Polars.scan_parquet(
-            source,
-            n_rows: n_rows,
-            rechunk: true,
-            parallel: parallel,
-            row_count_name: row_count_name,
-            row_count_offset: row_count_offset,
-            low_memory: low_memory
-          )
-        if columns.nil?
-          return self._from_rbdf(scan.collect._df)
-        elsif Utils.is_str_sequence(columns, allow_str: false)
-          return self._from_rbdf(scan.select(columns).collect._df)
-        else
-          raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
-        end
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(
-        RbDataFrame.read_parquet(
-          source,
-          columns,
-          projection,
-          n_rows,
-          parallel,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          low_memory,
-          use_statistics,
-          rechunk
-        )
-      )
-    end
-    # @private
-    def self._read_avro(file, columns: nil, n_rows: nil)
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
-    end
-    # @private
-    def self._read_ipc(
-      file,
-      columns: nil,
-      n_rows: nil,
-      row_count_name: nil,
-      row_count_offset: 0,
-      rechunk: true,
-      memory_map: true
-    )
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      if columns.is_a?(::String)
-        columns = [columns]
-      end
-      if file.is_a?(::String) && file.include?("*")
-        raise Todo
-      end
-      projection, columns = Utils.handle_projection_columns(columns)
-      _from_rbdf(
-        RbDataFrame.read_ipc(
-          file,
-          columns,
-          projection,
-          n_rows,
-          Utils._prepare_row_count_args(row_count_name, row_count_offset),
-          memory_map
-        )
-      )
-    end
-    # @private
-    def self._read_json(file)
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      _from_rbdf(RbDataFrame.read_json(file))
-    end
-    # @private
-    def self._read_ndjson(file)
-      if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
-      end
-      _from_rbdf(RbDataFrame.read_ndjson(file))
-    end
     # Get the shape of the DataFrame.
     #
     # @return [Array]
@@ -416,6 +154,13 @@ module Polars
       _df.dtypes
     end
+    # Get flags that are set on the columns of this DataFrame.
+    #
+    # @return [Hash]
+    def flags
+      columns.to_h { |name| [name, self[name].flags] }
+    end
     # Get the schema.
     #
     # @return [Hash]
@@ -814,8 +559,6 @@ module Polars
     # Serialize to JSON representation.
     #
-    # @return [nil]
-    #
     # @param file [String]
     #   File path to which the result should be written.
     # @param pretty [Boolean]
@@ -823,17 +566,45 @@ module Polars
     # @param row_oriented [Boolean]
     #   Write to row oriented json. This is slower, but more common.
     #
-    # @see #write_ndjson
+    # @return [nil]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8]
+    #     }
+    #   )
+    #   df.write_json
+    #   # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
+    #
+    # @example
+    #   df.write_json(row_oriented: true)
+    #   # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
     def write_json(
-      file,
+      file = nil,
       pretty: false,
       row_oriented: false
     )
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
+      end
+      to_string_io = !file.nil? && file.is_a?(StringIO)
+      if file.nil? || to_string_io
+        buf = StringIO.new
+        buf.set_encoding(Encoding::BINARY)
+        _df.write_json(buf, pretty, row_oriented)
+        json_bytes = buf.string
+        json_str = json_bytes.force_encoding(Encoding::UTF_8)
+        if to_string_io
+          file.write(json_str)
+        else
+          return json_str
+        end
+      else
+        _df.write_json(file, pretty, row_oriented)
       end
-      _df.write_json(file, pretty, row_oriented)
       nil
     end
@@ -843,12 +614,36 @@ module Polars
     #   File path to which the result should be written.
     #
     # @return [nil]
-    def write_ndjson(file)
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8]
+    #     }
+    #   )
+    #   df.write_ndjson()
+    #   # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
+    def write_ndjson(file = nil)
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
+      end
+      to_string_io = !file.nil? && file.is_a?(StringIO)
+      if file.nil? || to_string_io
+        buf = StringIO.new
+        buf.set_encoding(Encoding::BINARY)
+        _df.write_ndjson(buf)
+        json_bytes = buf.string
+        json_str = json_bytes.force_encoding(Encoding::UTF_8)
+        if to_string_io
+          file.write(json_str)
+        else
+          return json_str
+        end
+      else
+        _df.write_ndjson(file)
       end
-      _df.write_ndjson(file)
       nil
     end
@@ -938,7 +733,7 @@ module Polars
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       _df.write_csv(
@@ -976,7 +771,7 @@ module Polars
         compression = "uncompressed"
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       _df.write_avro(file, compression)
@@ -997,7 +792,7 @@ module Polars
         file.set_encoding(Encoding::BINARY)
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       if compression.nil?
@@ -1008,9 +803,50 @@ module Polars
       return_bytes ? file.string : nil
     end
+    # Write to Arrow IPC record batch stream.
+    #
+    # See "Streaming format" in https://arrow.apache.org/docs/python/ipc.html.
+    #
+    # @param file [Object]
+    #   Path or writable file-like object to which the IPC record batch data will
+    #   be written. If set to `None`, the output is returned as a BytesIO object.
+    # @param compression ['uncompressed', 'lz4', 'zstd']
+    #   Compression method. Defaults to "uncompressed".
+    #
+    # @return [Object]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
+    #   df.write_ipc_stream("new_file.arrow")
+    def write_ipc_stream(
+      file,
+      compression: "uncompressed"
+    )
+      return_bytes = file.nil?
+      if return_bytes
+        file = StringIO.new
+        file.set_encoding(Encoding::BINARY)
+      elsif Utils.pathlike?(file)
+        file = Utils.normalize_filepath(file)
+      end
+      if compression.nil?
+        compression = "uncompressed"
+      end
+      _df.write_ipc_stream(file, compression)
+      return_bytes ? file.string : nil
+    end
     # Write to Apache Parquet file.
     #
-    # @param file [String]
+    # @param file [String, Pathname, StringIO]
     #   File path to which the file should be written.
     # @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
     #   Choose "zstd" for good compression performance.
@@ -1027,10 +863,9 @@ module Polars
     # @param statistics [Boolean]
     #   Write statistics to the parquet headers. This requires extra compute.
     # @param row_group_size [Integer, nil]
-    #   Size of the row groups in number of rows.
-    #   If `nil` (default), the chunks of the DataFrame are
-    #   used. Writing in smaller chunks may reduce memory pressure and improve
-    #   writing speeds.
+    #   Size of the row groups in number of rows. Defaults to 512^2 rows.
+    # @param data_page_size [Integer, nil]
+    #   Size of the data page in bytes. Defaults to 1024^2 bytes.
     #
     # @return [nil]
     def write_parquet(
@@ -1038,17 +873,18 @@ module Polars
       compression: "zstd",
       compression_level: nil,
       statistics: false,
-      row_group_size: nil
+      row_group_size: nil,
+      data_page_size: nil
     )
       if compression.nil?
         compression = "uncompressed"
       end
       if Utils.pathlike?(file)
-        file = Utils.normalise_filepath(file)
+        file = Utils.normalize_filepath(file)
       end
       _df.write_parquet(
-        file, compression, compression_level, statistics, row_group_size
+        file, compression, compression_level, statistics, row_group_size, data_page_size
       )
     end
@@ -1084,7 +920,7 @@ module Polars
     #   df.estimated_size
     #   # => 25888898
     #   df.estimated_size("mb")
-    #   # => 26.702880859375
+    #   # => 17.0601749420166
     def estimated_size(unit = "b")
       sz = _df.estimated_size
       Utils.scale_bytes(sz, to: unit)
@@ -1720,10 +1556,7 @@ module Polars
     #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
     def drop_nulls(subset: nil)
-      if subset.is_a?(::String)
-        subset = [subset]
-      end
-      _from_rbdf(_df.drop_nulls(subset))
+      lazy.drop_nulls(subset: subset).collect(_eager: true)
     end
     # Offers a structured way to apply a sequence of user-defined functions (UDFs).
@@ -1785,16 +1618,16 @@ module Polars
     #   df.with_row_index
     #   # =>
     #   # shape: (3, 3)
-    #   # ┌────────┬─────┬─────┐
-    #   # │ row_nr ┆ a   ┆ b   │
-    #   # │ ---    ┆ --- ┆ --- │
-    #   # │ u32    ┆ i64 ┆ i64 │
-    #   # ╞════════╪═════╪═════╡
-    #   # │ 0      ┆ 1   ┆ 2   │
-    #   # │ 1      ┆ 3   ┆ 4   │
-    #   # │ 2      ┆ 5   ┆ 6   │
-    #   # └────────┴─────┴─────┘
-    def with_row_index(name: "row_nr", offset: 0)
+    #   # ┌───────┬─────┬─────┐
+    #   # │ index ┆ a   ┆ b   │
+    #   # │ ---   ┆ --- ┆ --- │
+    #   # │ u32   ┆ i64 ┆ i64 │
+    #   # ╞═══════╪═════╪═════╡
+    #   # │ 0     ┆ 1   ┆ 2   │
+    #   # │ 1     ┆ 3   ┆ 4   │
+    #   # │ 2     ┆ 5   ┆ 6   │
+    #   # └───────┴─────┴─────┘
+    def with_row_index(name: "index", offset: 0)
       _from_rbdf(_df.with_row_index(name, offset))
     end
     alias_method :with_row_count, :with_row_index
@@ -2083,16 +1916,16 @@ module Polars
     #   )
     #   # =>
     #   # shape: (4, 3)
-    #   # ┌─────────────────────┬────────────┬───────────────────────────────────┐
-    #   # │ time                ┆ time_count ┆ time_agg_list                     │
-    #   # │ ---                 ┆ ---        ┆ ---                               │
-    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                │
-    #   # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
-    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16… │
-    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]             │
-    #   # └─────────────────────┴────────────┴───────────────────────────────────┘
+    #   # ┌─────────────────────┬────────────┬─────────────────────────────────┐
+    #   # │ time                ┆ time_count ┆ time_agg_list                   │
+    #   # │ ---                 ┆ ---        ┆ ---                             │
+    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]              │
+    #   # ╞═════════════════════╪════════════╪═════════════════════════════════╡
+    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-… │
+    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-… │
+    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-… │
+    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]           │
+    #   # └─────────────────────┴────────────┴─────────────────────────────────┘
     #
     # @example When closed="both" the time values at the window boundaries belong to 2 groups.
     #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
@@ -2161,12 +1994,13 @@ module Polars
     #     closed: "right"
     #   ).agg(Polars.col("A").alias("A_agg_list"))
     #   # =>
-    #   # shape: (3, 4)
+    #   # shape: (4, 4)
     #   # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
     #   # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list      │
     #   # │ ---             ┆ ---             ┆ --- ┆ ---             │
     #   # │ i64             ┆ i64             ┆ i64 ┆ list[str]       │
     #   # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
+    #   # │ -2              ┆ 1               ┆ -2  ┆ ["A", "A"]      │
     #   # │ 0               ┆ 3               ┆ 0   ┆ ["A", "B", "B"] │
     #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
     #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
@@ -2566,7 +2400,7 @@ module Polars
     #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [-1, 5, 8]})
     #
     # @example Return a DataFrame by mapping each row to a tuple:
-    #   df.apply { |t| [t[0] * 2, t[1] * 3] }
+    #   df.map_rows { |t| [t[0] * 2, t[1] * 3] }
     #   # =>
     #   # shape: (3, 2)
     #   # ┌──────────┬──────────┐
@@ -2580,7 +2414,7 @@ module Polars
     #   # └──────────┴──────────┘
     #
     # @example Return a Series by mapping each row to a scalar:
-    #   df.apply { |t| t[0] * 2 + t[1] }
+    #   df.map_rows { |t| t[0] * 2 + t[1] }
     #   # =>
     #   # shape: (3, 1)
     #   # ┌───────┐
@@ -2592,14 +2426,15 @@ module Polars
     #   # │ 9     │
     #   # │ 14    │
     #   # └───────┘
-    def apply(return_dtype: nil, inference_size: 256, &f)
-      out, is_df = _df.apply(f, return_dtype, inference_size)
+    def map_rows(return_dtype: nil, inference_size: 256, &f)
+      out, is_df = _df.map_rows(f, return_dtype, inference_size)
       if is_df
         _from_rbdf(out)
       else
         _from_rbdf(Utils.wrap_s(out).to_frame._df)
       end
     end
+    alias_method :apply, :map_rows
     # Return a new DataFrame with the column added or replaced.
     #
@@ -2621,26 +2456,26 @@ module Polars
     #   # ┌─────┬─────┬───────────┐
     #   # │ a   ┆ b   ┆ b_squared │
     #   # │ --- ┆ --- ┆ ---       │
-    #   # │ i64 ┆ i64 ┆ f64       │
+    #   # │ i64 ┆ i64 ┆ i64       │
     #   # ╞═════╪═════╪═══════════╡
-    #   # │ 1   ┆ 2   ┆ 4.0       │
-    #   # │ 3   ┆ 4   ┆ 16.0      │
-    #   # │ 5   ┆ 6   ┆ 36.0      │
+    #   # │ 1   ┆ 2   ┆ 4         │
+    #   # │ 3   ┆ 4   ┆ 16        │
+    #   # │ 5   ┆ 6   ┆ 36        │
     #   # └─────┴─────┴───────────┘
     #
     # @example Replaced
     #   df.with_column(Polars.col("a") ** 2)
     #   # =>
     #   # shape: (3, 2)
-    #   # ┌──────┬─────┐
-    #   # │ a    ┆ b   │
-    #   # │ ---  ┆ --- │
-    #   # │ f64  ┆ i64 │
-    #   # ╞══════╪═════╡
-    #   # │ 1.0  ┆ 2   │
-    #   # │ 9.0  ┆ 4   │
-    #   # │ 25.0 ┆ 6   │
-    #   # └──────┴─────┘
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 2   │
+    #   # │ 9   ┆ 4   │
+    #   # │ 25  ┆ 6   │
+    #   # └─────┴─────┘
     def with_column(column)
       lazy
         .with_column(column)
@@ -2807,16 +2642,36 @@ module Polars
     #   # │ 2   ┆ 7.0 │
     #   # │ 3   ┆ 8.0 │
     #   # └─────┴─────┘
-    def drop(columns)
-      if columns.is_a?(::Array)
-        df = clone
-        columns.each do |n|
-          df._df.drop_in_place(n)
-        end
-        df
-      else
-        _from_rbdf(_df.drop(columns))
-      end
+    #
+    # @example Drop multiple columns by passing a list of column names.
+    #   df.drop(["bar", "ham"])
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 1   │
+    #   # │ 2   │
+    #   # │ 3   │
+    #   # └─────┘
+    #
+    # @example Use positional arguments to drop multiple columns.
+    #   df.drop("foo", "ham")
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ bar │
+    #   # │ --- │
+    #   # │ f64 │
+    #   # ╞═════╡
+    #   # │ 6.0 │
+    #   # │ 7.0 │
+    #   # │ 8.0 │
+    #   # └─────┘
+    def drop(*columns)
+      lazy.drop(*columns).collect(_eager: true)
     end
     # Drop in place.
@@ -3700,7 +3555,7 @@ module Polars
     #   # ┌─────────┐
     #   # │ literal │
     #   # │ ---     │
-    #   # │ i64     │
+    #   # │ i32     │
     #   # ╞═════════╡
     #   # │ 0       │
     #   # │ 0       │
@@ -3735,16 +3590,16 @@ module Polars
     #   df.with_columns((Polars.col("a") ** 2).alias("a^2"))
     #   # =>
     #   # shape: (4, 4)
-    #   # ┌─────┬──────┬───────┬──────┐
-    #   # │ a   ┆ b    ┆ c     ┆ a^2  │
-    #   # │ --- ┆ ---  ┆ ---   ┆ ---  │
-    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  │
-    #   # ╞═════╪══════╪═══════╪══════╡
-    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1.0  │
-    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4.0  │
-    #   # │ 3   ┆ 10.0 ┆ false ┆ 9.0  │
-    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 │
-    #   # └─────┴──────┴───────┴──────┘
+    #   # ┌─────┬──────┬───────┬─────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2 │
+    #   # │ --- ┆ ---  ┆ ---   ┆ --- │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ i64 │
+    #   # ╞═════╪══════╪═══════╪═════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   │
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   │
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9   │
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  │
+    #   # └─────┴──────┴───────┴─────┘
     #
     # @example Added columns will replace existing columns with the same name.
     #   df.with_columns(Polars.col("a").cast(Polars::Float64))
@@ -3771,16 +3626,16 @@ module Polars
     #   )
     #   # =>
     #   # shape: (4, 6)
-    #   # ┌─────┬──────┬───────┬──────┬──────┬───────┐
-    #   # │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
-    #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
-    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
-    #   # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
-    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
-    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
-    #   # │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
-    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
-    #   # └─────┴──────┴───────┴──────┴──────┴───────┘
+    #   # ┌─────┬──────┬───────┬─────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
+    #   # └─────┴──────┴───────┴─────┴──────┴───────┘
     #
     # @example Multiple columns also can be added using positional arguments instead of a list.
     #   df.with_columns(
@@ -3790,16 +3645,16 @@ module Polars
     #   )
     #   # =>
     #   # shape: (4, 6)
-    #   # ┌─────┬──────┬───────┬──────┬──────┬───────┐
-    #   # │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
-    #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
-    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
-    #   # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
-    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
-    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
-    #   # │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
-    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
-    #   # └─────┴──────┴───────┴──────┴──────┴───────┘
+    #   # ┌─────┬──────┬───────┬─────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
+    #   # └─────┴──────┴───────┴─────┴──────┴───────┘
     #
     # @example Use keyword arguments to easily name your expression inputs.
     #   df.with_columns(
@@ -5181,7 +5036,7 @@ module Polars
       elsif data[0].is_a?(Hash)
         column_names, dtypes = _unpack_schema(columns)
         schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
-        rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
+        rbdf = RbDataFrame.from_hashes(data, schema, schema_overrides, false, infer_schema_length)
         if column_names
           rbdf = _post_apply_columns(rbdf, column_names)
         end
@@ -5215,7 +5070,7 @@ module Polars
           if unpack_nested
             raise Todo
           else
-            rbdf = RbDataFrame.read_rows(
+            rbdf = RbDataFrame.from_rows(
               data,
               infer_schema_length,
               local_schema_override.any? ? local_schema_override : nil