RubyGems - polars-df - Versions diffs - 0.6.0 → 0.8.0 - Mend

polars-df 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +24 -0
data/Cargo.lock +597 -599
data/Cargo.toml +1 -0
data/README.md +8 -7
data/ext/polars/Cargo.toml +20 -10
data/ext/polars/src/batched_csv.rs +27 -28
data/ext/polars/src/conversion.rs +135 -106
data/ext/polars/src/dataframe.rs +140 -131
data/ext/polars/src/error.rs +0 -5
data/ext/polars/src/expr/binary.rs +18 -6
data/ext/polars/src/expr/categorical.rs +8 -1
data/ext/polars/src/expr/datetime.rs +10 -12
data/ext/polars/src/expr/general.rs +129 -286
data/ext/polars/src/expr/list.rs +17 -9
data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
data/ext/polars/src/expr/name.rs +44 -0
data/ext/polars/src/expr/rolling.rs +201 -0
data/ext/polars/src/expr/string.rs +94 -67
data/ext/polars/src/file.rs +3 -3
data/ext/polars/src/functions/aggregation.rs +35 -0
data/ext/polars/src/functions/eager.rs +7 -31
data/ext/polars/src/functions/io.rs +10 -10
data/ext/polars/src/functions/lazy.rs +66 -41
data/ext/polars/src/functions/meta.rs +30 -0
data/ext/polars/src/functions/misc.rs +8 -0
data/ext/polars/src/functions/mod.rs +5 -0
data/ext/polars/src/functions/random.rs +6 -0
data/ext/polars/src/functions/range.rs +41 -0
data/ext/polars/src/functions/string_cache.rs +11 -0
data/ext/polars/src/functions/whenthen.rs +7 -7
data/ext/polars/src/lazyframe.rs +74 -60
data/ext/polars/src/lib.rs +175 -91
data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
data/ext/polars/src/{apply → map}/mod.rs +5 -5
data/ext/polars/src/{apply → map}/series.rs +18 -22
data/ext/polars/src/object.rs +0 -30
data/ext/polars/src/on_startup.rs +32 -0
data/ext/polars/src/rb_modules.rs +22 -7
data/ext/polars/src/series/aggregation.rs +3 -0
data/ext/polars/src/series/construction.rs +5 -5
data/ext/polars/src/series/export.rs +4 -4
data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
data/ext/polars/src/sql.rs +46 -0
data/ext/polars/src/utils.rs +1 -1
data/lib/polars/config.rb +530 -0
data/lib/polars/data_frame.rb +182 -145
data/lib/polars/data_types.rb +4 -1
data/lib/polars/date_time_expr.rb +23 -28
data/lib/polars/date_time_name_space.rb +17 -37
data/lib/polars/dynamic_group_by.rb +2 -2
data/lib/polars/expr.rb +398 -110
data/lib/polars/functions.rb +29 -37
data/lib/polars/group_by.rb +38 -55
data/lib/polars/io.rb +40 -5
data/lib/polars/lazy_frame.rb +116 -89
data/lib/polars/lazy_functions.rb +40 -68
data/lib/polars/lazy_group_by.rb +7 -8
data/lib/polars/list_expr.rb +12 -8
data/lib/polars/list_name_space.rb +2 -2
data/lib/polars/name_expr.rb +198 -0
data/lib/polars/rolling_group_by.rb +2 -2
data/lib/polars/series.rb +315 -43
data/lib/polars/sql_context.rb +194 -0
data/lib/polars/string_expr.rb +114 -60
data/lib/polars/string_name_space.rb +19 -4
data/lib/polars/struct_expr.rb +1 -1
data/lib/polars/struct_name_space.rb +1 -1
data/lib/polars/utils.rb +25 -13
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +3 -0
metadata +23 -11
/data/ext/polars/src/{apply → map}/lazy.rs +0 -0

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -20,15 +20,9 @@ module Polars
     #   this does not yield conclusive results, column orientation is used.
     def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
       schema ||= columns
-      raise Todo if schema_overrides
-      # TODO deprecate in favor of read_sql
       if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
-        result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
-        data = {}
-        result.columns.each_with_index do |k, i|
-          data[k] = result.rows.map { |r| r[i] }
-        end
+        raise ArgumentError, "Use read_database instead"
       end
       if data.nil?
@@ -125,10 +119,10 @@ module Polars
       processed_null_values = Utils._process_null_values(null_values)
-      if columns.is_a?(String)
+      if columns.is_a?(::String)
         columns = [columns]
       end
-      if file.is_a?(String) && file.include?("*")
+      if file.is_a?(::String) && file.include?("*")
         dtypes_dict = nil
         if !dtype_list.nil?
           dtypes_dict = dtype_list.to_h
@@ -212,11 +206,11 @@ module Polars
       if Utils.pathlike?(source)
         source = Utils.normalise_filepath(source)
       end
-      if columns.is_a?(String)
+      if columns.is_a?(::String)
         columns = [columns]
       end
-      if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
+      if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
         scan =
           Polars.scan_parquet(
             source,
@@ -275,11 +269,11 @@ module Polars
       if Utils.pathlike?(file)
         file = Utils.normalise_filepath(file)
       end
-      if columns.is_a?(String)
+      if columns.is_a?(::String)
         columns = [columns]
       end
-      if file.is_a?(String) && file.include?("*")
+      if file.is_a?(::String) && file.include?("*")
         raise Todo
       end
@@ -417,7 +411,7 @@ module Polars
     #     }
     #   )
     #   df.dtypes
-    #   # => [Polars::Int64, Polars::Float64, Polars::Utf8]
+    #   # => [Polars::Int64, Polars::Float64, Polars::String]
     def dtypes
       _df.dtypes
     end
@@ -435,7 +429,7 @@ module Polars
     #     }
     #   )
     #   df.schema
-    #   # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::Utf8}
+    #   # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
     def schema
       columns.zip(dtypes).to_h
     end
@@ -595,13 +589,13 @@ module Polars
             return df.slice(row_selection, 1)
           end
           # df[2, "a"]
-          if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
+          if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
             return self[col_selection][row_selection]
           end
         end
         # column selection can be "a" and ["a", "b"]
-        if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
+        if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
           col_selection = [col_selection]
         end
@@ -627,7 +621,7 @@ module Polars
         # select single column
         # df["foo"]
-        if item.is_a?(String) || item.is_a?(Symbol)
+        if item.is_a?(::String) || item.is_a?(Symbol)
           return Utils.wrap_s(_df.column(item.to_s))
         end
@@ -653,7 +647,7 @@ module Polars
         if item.is_a?(Series)
           dtype = item.dtype
-          if dtype == Utf8
+          if dtype == String
             return _from_rbdf(_df.select(item))
           elsif dtype == UInt32
             return _from_rbdf(_df.take_with_series(item._s))
@@ -704,7 +698,7 @@ module Polars
         s[row_selection] = value
         if col_selection.is_a?(Integer)
-          replace_at_idx(col_selection, s)
+          replace_column(col_selection, s)
         elsif Utils.strlike?(col_selection)
           replace(col_selection, s)
         end
@@ -905,6 +899,7 @@ module Polars
     def write_csv(
       file = nil,
       has_header: true,
+      include_header: nil,
       sep: ",",
       quote: '"',
       batch_size: 1024,
@@ -914,6 +909,8 @@ module Polars
       float_precision: nil,
       null_value: nil
     )
+      include_header = has_header if include_header.nil?
       if sep.length > 1
         raise ArgumentError, "only single byte separator is allowed"
       elsif quote.length > 1
@@ -927,7 +924,7 @@ module Polars
         buffer.set_encoding(Encoding::BINARY)
         _df.write_csv(
           buffer,
-          has_header,
+          include_header,
           sep.ord,
           quote.ord,
           batch_size,
@@ -946,7 +943,7 @@ module Polars
       _df.write_csv(
         file,
-        has_header,
+        include_header,
         sep.ord,
         quote.ord,
         batch_size,
@@ -1151,22 +1148,8 @@ module Polars
     #   # │ b   ┆ 1   ┆ 2   ┆ 3   │
     #   # └─────┴─────┴─────┴─────┘
     def transpose(include_header: false, header_name: "column", column_names: nil)
-      df = _from_rbdf(_df.transpose(include_header, header_name))
-      if !column_names.nil?
-        names = []
-        n = df.width
-        if include_header
-          names << header_name
-          n -= 1
-        end
-        column_names = column_names.each
-        n.times do
-          names << column_names.next
-        end
-        df.columns = names
-      end
-      df
+      keep_names_as = include_header ? header_name : nil
+      _from_rbdf(_df.transpose(keep_names_as, column_names))
     end
     # Reverse the DataFrame.
@@ -1239,7 +1222,7 @@ module Polars
     # @example
     #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
     #   s = Polars::Series.new("baz", [97, 98, 99])
-    #   df.insert_at_idx(1, s)
+    #   df.insert_column(1, s)
     #   # =>
     #   # shape: (3, 3)
     #   # ┌─────┬─────┬─────┐
@@ -1261,7 +1244,7 @@ module Polars
     #     }
     #   )
     #   s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
-    #   df.insert_at_idx(3, s)
+    #   df.insert_column(3, s)
     #   # =>
     #   # shape: (4, 4)
     #   # ┌─────┬──────┬───────┬──────┐
@@ -1274,13 +1257,14 @@ module Polars
     #   # │ 3   ┆ 10.0 ┆ false ┆ 20.5 │
     #   # │ 4   ┆ 13.0 ┆ true  ┆ 0.0  │
     #   # └─────┴──────┴───────┴──────┘
-    def insert_at_idx(index, series)
+    def insert_column(index, series)
       if index < 0
         index = columns.length + index
       end
-      _df.insert_at_idx(index, series._s)
+      _df.insert_column(index, series._s)
       self
     end
+    alias_method :insert_at_idx, :insert_column
     # Filter the rows in the DataFrame based on a predicate expression.
     #
@@ -1384,7 +1368,7 @@ module Polars
           ]
         )._df
       )
-      summary.insert_at_idx(
+      summary.insert_column(
         0,
         Polars::Series.new(
           "describe",
@@ -1405,11 +1389,12 @@ module Polars
     #   df = Polars::DataFrame.new(
     #     {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
     #   )
-    #   df.find_idx_by_name("ham")
+    #   df.get_column_index("ham")
     #   # => 2
-    def find_idx_by_name(name)
-      _df.find_idx_by_name(name)
+    def get_column_index(name)
+      _df.get_column_index(name)
     end
+    alias_method :find_idx_by_name, :get_column_index
     # Replace a column at an index location.
     #
@@ -1429,7 +1414,7 @@ module Polars
     #     }
     #   )
     #   s = Polars::Series.new("apple", [10, 20, 30])
-    #   df.replace_at_idx(0, s)
+    #   df.replace_column(0, s)
     #   # =>
     #   # shape: (3, 3)
     #   # ┌───────┬─────┬─────┐
@@ -1441,13 +1426,14 @@ module Polars
     #   # │ 20    ┆ 7   ┆ b   │
     #   # │ 30    ┆ 8   ┆ c   │
     #   # └───────┴─────┴─────┘
-    def replace_at_idx(index, series)
+    def replace_column(index, series)
       if index < 0
         index = columns.length + index
       end
-      _df.replace_at_idx(index, series._s)
+      _df.replace_column(index, series._s)
       self
     end
+    alias_method :replace_at_idx, :replace_column
     # Sort the DataFrame by column.
     #
@@ -1541,13 +1527,14 @@ module Polars
     #       "ham" => ["c", "b", "a"]
     #     }
     #   )
-    #   df1.frame_equal(df1)
+    #   df1.equals(df1)
     #   # => true
-    #   df1.frame_equal(df2)
+    #   df1.equals(df2)
     #   # => false
-    def frame_equal(other, null_equal: true)
-      _df.frame_equal(other._df, null_equal)
+    def equals(other, null_equal: true)
+      _df.equals(other._df, null_equal)
     end
+    alias_method :frame_equal, :equals
     # Replace a column by a new Series.
     #
@@ -1733,7 +1720,7 @@ module Polars
     #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
     def drop_nulls(subset: nil)
-      if subset.is_a?(String)
+      if subset.is_a?(::String)
         subset = [subset]
       end
       _from_rbdf(_df.drop_nulls(subset))
@@ -1811,13 +1798,13 @@ module Polars
       _from_rbdf(_df.with_row_count(name, offset))
     end
-    # Start a groupby operation.
+    # Start a group by operation.
     #
     # @param by [Object]
     #   Column(s) to group by.
     # @param maintain_order [Boolean]
     #   Make sure that the order of the groups remain consistent. This is more
-    #   expensive than a default groupby. Note that this only works in expression
+    #   expensive than a default group by. Note that this only works in expression
     #   aggregations.
     #
     # @return [GroupBy]
@@ -1830,7 +1817,7 @@ module Polars
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   )
-    #   df.groupby("a").agg(Polars.col("b").sum).sort("a")
+    #   df.group_by("a").agg(Polars.col("b").sum).sort("a")
     #   # =>
     #   # shape: (3, 2)
     #   # ┌─────┬─────┐
@@ -1842,25 +1829,26 @@ module Polars
     #   # │ b   ┆ 11  │
     #   # │ c   ┆ 6   │
     #   # └─────┴─────┘
-    def groupby(by, maintain_order: false)
+    def group_by(by, maintain_order: false)
       if !Utils.bool?(maintain_order)
-        raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
+        raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
       end
       GroupBy.new(
-        _df,
+        self,
         by,
-        self.class,
         maintain_order: maintain_order
       )
     end
+    alias_method :groupby, :group_by
+    alias_method :group, :group_by
     # Create rolling groups based on a time column.
     #
     # Also works for index values of type `:i32` or `:i64`.
     #
-    # Different from a `dynamic_groupby` the windows are now determined by the
+    # Different from a `dynamic_group_by` the windows are now determined by the
     # individual values and are not of constant intervals. For constant intervals use
-    # *groupby_dynamic*
+    # *group_by_dynamic*
     #
     # The `period` and `offset` arguments are created either from a timedelta, or
     # by using the following string language:
@@ -1880,7 +1868,7 @@ module Polars
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_rolling on an integer column, the windows are defined by:
+    # In case of a group_by_rolling on an integer column, the windows are defined by:
     #
     # - **"1i"      # length 1**
     # - **"10i"     # length 10**
@@ -1891,7 +1879,7 @@ module Polars
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a rolling groupby on indices, dtype needs to be one of
+    #   In case of a rolling group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
     # @param period [Object]
@@ -1923,7 +1911,7 @@ module Polars
     #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
     #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
     #   )
-    #   df.groupby_rolling(index_column: "dt", period: "2d").agg(
+    #   df.group_by_rolling(index_column: "dt", period: "2d").agg(
     #     [
     #       Polars.sum("a").alias("sum_a"),
     #       Polars.min("a").alias("min_a"),
@@ -1944,7 +1932,7 @@ module Polars
     #   # │ 2020-01-03 19:45:32 ┆ 11    ┆ 2     ┆ 9     │
     #   # │ 2020-01-08 23:16:43 ┆ 1     ┆ 1     ┆ 1     │
     #   # └─────────────────────┴───────┴───────┴───────┘
-    def groupby_rolling(
+    def group_by_rolling(
       index_column:,
       period:,
       offset: nil,
@@ -1954,11 +1942,12 @@ module Polars
     )
       RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
     end
+    alias_method :groupby_rolling, :group_by_rolling
     # Group based on a time value (or index value of type `:i32`, `:i64`).
     #
     # Time windows are calculated and rows are assigned to windows. Different from a
-    # normal groupby is that a row can be member of multiple groups. The time/index
+    # normal group by is that a row can be member of multiple groups. The time/index
     # window could be seen as a rolling window, with a window size determined by
     # dates/times/values instead of slots in the DataFrame.
     #
@@ -1986,7 +1975,7 @@ module Polars
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_dynamic on an integer column, the windows are defined by:
+    # In case of a group_by_dynamic on an integer column, the windows are defined by:
     #
     # - "1i"      # length 1
     # - "10i"     # length 10
@@ -1997,7 +1986,7 @@ module Polars
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a dynamic groupby on indices, dtype needs to be one of
+    #   In case of a dynamic group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
     # @param every
@@ -2048,7 +2037,7 @@ module Polars
     #   # └─────────────────────┴─────┘
     #
     # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
-    #   df.groupby_dynamic("time", every: "1h", closed: "right").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "right").agg(
     #     [
     #       Polars.col("time").min.alias("time_min"),
     #       Polars.col("time").max.alias("time_max")
@@ -2068,7 +2057,7 @@ module Polars
     #   # └─────────────────────┴─────────────────────┴─────────────────────┘
     #
     # @example The window boundaries can also be added to the aggregation result.
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time", every: "1h", include_boundaries: true, closed: "right"
     #   ).agg([Polars.col("time").count.alias("time_count")])
     #   # =>
@@ -2085,7 +2074,7 @@ module Polars
     #   # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
     # @example When closed="left", should not include right end of interval.
-    #   df.groupby_dynamic("time", every: "1h", closed: "left").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "left").agg(
     #     [
     #       Polars.col("time").count.alias("time_count"),
     #       Polars.col("time").alias("time_agg_list")
@@ -2105,7 +2094,7 @@ module Polars
     #   # └─────────────────────┴────────────┴───────────────────────────────────┘
     #
     # @example When closed="both" the time values at the window boundaries belong to 2 groups.
-    #   df.groupby_dynamic("time", every: "1h", closed: "both").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
     #     [Polars.col("time").count.alias("time_count")]
     #   )
     #   # =>
@@ -2122,7 +2111,7 @@ module Polars
     #   # │ 2021-12-16 03:00:00 ┆ 1          │
     #   # └─────────────────────┴────────────┘
     #
-    # @example Dynamic groupbys can also be combined with grouping on normal keys.
+    # @example Dynamic group bys can also be combined with grouping on normal keys.
     #   df = Polars::DataFrame.new(
     #     {
     #       "time" => Polars.date_range(
@@ -2133,7 +2122,7 @@ module Polars
     #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time",
     #     every: "1h",
     #     closed: "both",
@@ -2156,14 +2145,14 @@ module Polars
     #   # │ b      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1          │
     #   # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
-    # @example Dynamic groupby on an index column.
+    # @example Dynamic group by on an index column.
     #   df = Polars::DataFrame.new(
     #     {
     #       "idx" => Polars.arange(0, 6, eager: true),
     #       "A" => ["A", "A", "B", "B", "B", "C"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "idx",
     #     every: "2i",
     #     period: "3i",
@@ -2181,7 +2170,7 @@ module Polars
     #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
     #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
     #   # └─────────────────┴─────────────────┴─────┴─────────────────┘
-    def groupby_dynamic(
+    def group_by_dynamic(
       index_column,
       every:,
       period: nil,
@@ -2205,6 +2194,7 @@ module Polars
         start_by
       )
     end
+    alias_method :groupby_dynamic, :group_by_dynamic
     # Upsample a DataFrame at a regular frequency.
     #
@@ -2281,7 +2271,7 @@ module Polars
       if by.nil?
         by = []
       end
-      if by.is_a?(String)
+      if by.is_a?(::String)
         by = [by]
       end
       if offset.nil?
@@ -2475,17 +2465,17 @@ module Polars
     # @example
     #   df.join(other_df, on: "ham", how: "outer")
     #   # =>
-    #   # shape: (4, 4)
-    #   # ┌──────┬──────┬─────┬───────┐
-    #   # │ foo  ┆ bar  ┆ ham ┆ apple │
-    #   # │ ---  ┆ ---  ┆ --- ┆ ---   │
-    #   # │ i64  ┆ f64  ┆ str ┆ str   │
-    #   # ╞══════╪══════╪═════╪═══════╡
-    #   # │ 1    ┆ 6.0  ┆ a   ┆ x     │
-    #   # │ 2    ┆ 7.0  ┆ b   ┆ y     │
-    #   # │ null ┆ null ┆ d   ┆ z     │
-    #   # │ 3    ┆ 8.0  ┆ c   ┆ null  │
-    #   # └──────┴──────┴─────┴───────┘
+    #   # shape: (4, 5)
+    #   # ┌──────┬──────┬──────┬───────┬───────────┐
+    #   # │ foo  ┆ bar  ┆ ham  ┆ apple ┆ ham_right │
+    #   # │ ---  ┆ ---  ┆ ---  ┆ ---   ┆ ---       │
+    #   # │ i64  ┆ f64  ┆ str  ┆ str   ┆ str       │
+    #   # ╞══════╪══════╪══════╪═══════╪═══════════╡
+    #   # │ 1    ┆ 6.0  ┆ a    ┆ x     ┆ a         │
+    #   # │ 2    ┆ 7.0  ┆ b    ┆ y     ┆ b         │
+    #   # │ null ┆ null ┆ null ┆ z     ┆ d         │
+    #   # │ 3    ┆ 8.0  ┆ c    ┆ null  ┆ null      │
+    #   # └──────┴──────┴──────┴───────┴───────────┘
     #
     # @example
     #   df.join(other_df, on: "ham", how: "left")
@@ -3125,17 +3115,17 @@ module Polars
       sort_columns: false,
       separator: "_"
     )
-      if values.is_a?(String)
+      if values.is_a?(::String)
         values = [values]
       end
-      if index.is_a?(String)
+      if index.is_a?(::String)
         index = [index]
       end
-      if columns.is_a?(String)
+      if columns.is_a?(::String)
         columns = [columns]
       end
-      if aggregate_fn.is_a?(String)
+      if aggregate_fn.is_a?(::String)
         case aggregate_fn
         when "first"
           aggregate_expr = Polars.element.first._rbexpr
@@ -3220,10 +3210,10 @@ module Polars
     #   # │ z   ┆ c        ┆ 6     │
     #   # └─────┴──────────┴───────┘
     def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
-      if value_vars.is_a?(String)
+      if value_vars.is_a?(::String)
         value_vars = [value_vars]
       end
-      if id_vars.is_a?(String)
+      if id_vars.is_a?(::String)
         id_vars = [id_vars]
       end
       if value_vars.nil?
@@ -3437,7 +3427,7 @@ module Polars
     #   # │ C   ┆ 2   ┆ l   │
     #   # └─────┴─────┴─────┘}
     def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
-      if groups.is_a?(String)
+      if groups.is_a?(::String)
         groups = [groups]
       elsif !groups.is_a?(::Array)
         groups = Array(groups)
@@ -3464,8 +3454,10 @@ module Polars
     # Shift values by the given period.
     #
-    # @param periods [Integer]
+    # @param n [Integer]
     #   Number of places to shift (may be negative).
+    # @param fill_value [Object]
+    #  Fill the resulting null values with this value.
     #
     # @return [DataFrame]
     #
@@ -3503,8 +3495,8 @@ module Polars
     #   # │ 3    ┆ 8    ┆ c    │
     #   # │ null ┆ null ┆ null │
     #   # └──────┴──────┴──────┘
-    def shift(periods)
-      _from_rbdf(_df.shift(periods))
+    def shift(n, fill_value: nil)
+      lazy.shift(n, fill_value: fill_value).collect(_eager: true)
     end
     # Shift the values by a given period and fill the resulting null values.
@@ -3537,9 +3529,7 @@ module Polars
     #   # │ 2   ┆ 7   ┆ b   │
     #   # └─────┴─────┴─────┘
     def shift_and_fill(periods, fill_value)
-      lazy
-        .shift_and_fill(periods, fill_value)
-        .collect(no_optimization: true, string_cache: false)
+      shift(periods, fill_value: fill_value)
     end
     # Get a mask of all duplicated rows in this DataFrame.
@@ -3788,9 +3778,9 @@ module Polars
     #   # └─────┴─────┴─────┘
     def max(axis: 0)
       if axis == 0
-        _from_rbdf(_df.max)
+        lazy.max.collect(_eager: true)
       elsif axis == 1
-        Utils.wrap_s(_df.hmax)
+        Utils.wrap_s(_df.max_horizontal)
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
@@ -3820,9 +3810,9 @@ module Polars
     #   # └─────┴─────┴─────┘
     def min(axis: 0)
       if axis == 0
-        _from_rbdf(_df.min)
+        lazy.min.collect(_eager: true)
       elsif axis == 1
-        Utils.wrap_s(_df.hmin)
+        Utils.wrap_s(_df.min_horizontal)
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
@@ -3869,9 +3859,9 @@ module Polars
     def sum(axis: 0, null_strategy: "ignore")
       case axis
       when 0
-        _from_rbdf(_df.sum)
+        lazy.sum.collect(_eager: true)
       when 1
-        Utils.wrap_s(_df.hsum(null_strategy))
+        Utils.wrap_s(_df.sum_horizontal(null_strategy))
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
@@ -3907,9 +3897,9 @@ module Polars
     def mean(axis: 0, null_strategy: "ignore")
       case axis
       when 0
-        _from_rbdf(_df.mean)
+        lazy.mean.collect(_eager: true)
       when 1
-        Utils.wrap_s(_df.hmean(null_strategy))
+        Utils.wrap_s(_df.mean_horizontal(null_strategy))
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
@@ -3953,7 +3943,7 @@ module Polars
     #   # │ 0.816497 ┆ 0.816497 ┆ null │
     #   # └──────────┴──────────┴──────┘
     def std(ddof: 1)
-      _from_rbdf(_df.std(ddof))
+      lazy.std(ddof: ddof).collect(_eager: true)
     end
     # Aggregate the columns of this DataFrame to their variance value.
@@ -3994,7 +3984,7 @@ module Polars
     #   # │ 0.666667 ┆ 0.666667 ┆ null │
     #   # └──────────┴──────────┴──────┘
     def var(ddof: 1)
-      _from_rbdf(_df.var(ddof))
+      lazy.var(ddof: ddof).collect(_eager: true)
     end
     # Aggregate the columns of this DataFrame to their median value.
@@ -4020,7 +4010,7 @@ module Polars
     #   # │ 2.0 ┆ 7.0 ┆ null │
     #   # └─────┴─────┴──────┘
     def median
-      _from_rbdf(_df.median)
+      lazy.median.collect(_eager: true)
     end
     # Aggregate the columns of this DataFrame to their product values.
@@ -4077,7 +4067,7 @@ module Polars
     #   # │ 2.0 ┆ 7.0 ┆ null │
     #   # └─────┴─────┴──────┘
     def quantile(quantile, interpolation: "nearest")
-      _from_rbdf(_df.quantile(quantile, interpolation))
+      lazy.quantile(quantile, interpolation: interpolation).collect(_eager: true)
     end
     # Get one hot encoded dummy variables.
@@ -4108,7 +4098,7 @@ module Polars
     #   # │ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     │
     #   # └───────┴───────┴───────┴───────┴───────┴───────┘
     def to_dummies(columns: nil, separator: "_", drop_first: false)
-      if columns.is_a?(String)
+      if columns.is_a?(::String)
         columns = [columns]
       end
       _from_rbdf(_df.to_dummies(columns, separator, drop_first))
@@ -4294,15 +4284,20 @@ module Polars
       end
       if n.nil? && !frac.nil?
+        frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
         _from_rbdf(
-          _df.sample_frac(frac, with_replacement, shuffle, seed)
+          _df.sample_frac(frac._s, with_replacement, shuffle, seed)
         )
       end
       if n.nil?
         n = 1
       end
-      _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
+      n = Series.new("", [n]) unless n.is_a?(Series)
+      _from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
     end
     # Apply a horizontal reduction on a DataFrame.
@@ -4601,7 +4596,7 @@ module Polars
     #
     # @example
     #   s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
-    #   s.take_every(2)
+    #   s.gather_every(2)
     #   # =>
     #   # shape: (2, 2)
     #   # ┌─────┬─────┐
@@ -4612,9 +4607,10 @@ module Polars
     #   # │ 1   ┆ 5   │
     #   # │ 3   ┆ 7   │
     #   # └─────┴─────┘
-    def take_every(n)
-      select(Utils.col("*").take_every(n))
+    def gather_every(n, offset = 0)
+      select(Utils.col("*").gather_every(n, offset))
     end
+    alias_method :take_every, :gather_every
     # Hash and combine the rows in this DataFrame.
     #
@@ -4671,16 +4667,16 @@ module Polars
     #   df.interpolate
     #   # =>
     #   # shape: (4, 3)
-    #   # ┌─────┬──────┬─────┐
-    #   # │ foo ┆ bar  ┆ baz │
-    #   # │ --- ┆ ---  ┆ --- │
-    #   # │ i64 ┆ i64  ┆ i64 │
-    #   # ╞═════╪══════╪═════╡
-    #   # │ 1   ┆ 6    ┆ 1   │
-    #   # │ 5   ┆ 7    ┆ 3   │
-    #   # │ 9   ┆ 9    ┆ 6   │
-    #   # │ 10  ┆ null ┆ 9   │
-    #   # └─────┴──────┴─────┘
+    #   # ┌──────┬──────┬──────────┐
+    #   # │ foo  ┆ bar  ┆ baz      │
+    #   # │ ---  ┆ ---  ┆ ---      │
+    #   # │ f64  ┆ f64  ┆ f64      │
+    #   # ╞══════╪══════╪══════════╡
+    #   # │ 1.0  ┆ 6.0  ┆ 1.0      │
+    #   # │ 5.0  ┆ 7.0  ┆ 3.666667 │
+    #   # │ 9.0  ┆ 9.0  ┆ 6.333333 │
+    #   # │ 10.0 ┆ null ┆ 9.0      │
+    #   # └──────┴──────┴──────────┘
     def interpolate
       select(Utils.col("*").interpolate)
     end
@@ -4762,7 +4758,7 @@ module Polars
     #   # │ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
     #   # └────────┴─────┴─────┴──────┴───────────┴───────┘
     def unnest(names)
-      if names.is_a?(String)
+      if names.is_a?(::String)
         names = [names]
       end
       _from_rbdf(_df.unnest(names))
@@ -4875,10 +4871,10 @@ module Polars
             if val.is_a?(Hash) && dtype != Struct
               updated_data[name] = DataFrame.new(val).to_struct(name)
             elsif !Utils.arrlen(val).nil?
-              updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
-            elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
+              updated_data[name] = Series.new(::String.new(name), val, dtype: dtype)
+            elsif val.nil? || [Integer, Float, TrueClass, FalseClass, ::String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
               dtype = Polars::Float64 if val.nil? && dtype.nil?
-              updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
+              updated_data[name] = Series.new(::String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
             else
               raise Todo
             end
@@ -4935,7 +4931,7 @@ module Polars
       end
       column_names =
         (schema || []).map.with_index do |col, i|
-          if col.is_a?(String)
+          if col.is_a?(::String)
             col || "column_#{i}"
           else
             col[0]
@@ -4948,12 +4944,12 @@ module Polars
       lookup = column_names.zip(lookup_names || []).to_h
       column_dtypes =
-        (schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
+        (schema || []).select { |col| !col.is_a?(::String) && col[1] }.to_h do |col|
           [lookup[col[0]] || col[0], col[1]]
         end
-      if schema_overrides
-        raise Todo
+      if schema_overrides && schema_overrides.any?
+        column_dtypes.merge!(schema_overrides)
       end
       column_dtypes.each do |col, dtype|
@@ -5056,13 +5052,54 @@ module Polars
         return rbdf
       elsif data[0].is_a?(::Array)
         if orient.nil? && !columns.nil?
-          orient = columns.length == data.length ? "col" : "row"
+          first_element = data[0]
+          row_types = first_element.filter_map { |value| value.class }.uniq
+          if row_types.include?(Integer) && row_types.include?(Float)
+            row_types.delete(Integer)
+          end
+          orient = row_types.length == 1 ? "col" : "row"
         end
         if orient == "row"
-          raise Todo
+          column_names, schema_overrides = _unpack_schema(
+            schema, schema_overrides: schema_overrides, n_expected: first_element.length
+          )
+          local_schema_override = (
+            schema_overrides.any? ? (raise Todo) : {}
+          )
+          if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
+            raise ArgumentError, "the row data does not match the number of columns"
+          end
+          unpack_nested = false
+          local_schema_override.each do |col, tp|
+            raise Todo
+          end
+          if unpack_nested
+            raise Todo
+          else
+            rbdf = RbDataFrame.read_rows(
+              data,
+              infer_schema_length,
+              local_schema_override.any? ? local_schema_override : nil
+            )
+          end
+          if column_names.any? || schema_overrides.any?
+            rbdf = _post_apply_columns(
+              rbdf, column_names, schema_overrides: schema_overrides
+            )
+          end
+          return rbdf
         elsif orient == "col" || orient.nil?
-          raise Todo
+          column_names, schema_overrides = _unpack_schema(
+            schema, schema_overrides: schema_overrides, n_expected: data.length
+          )
+          data_series =
+            data.map.with_index do |element, i|
+              Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
+            end
+          return RbDataFrame.new(data_series)
         else
           raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
         end
@@ -5108,10 +5145,10 @@ module Polars
     def _compare_to_other_df(other, op)
       if columns != other.columns
-        raise ArgmentError, "DataFrame columns do not match"
+        raise ArgumentError, "DataFrame columns do not match"
       end
       if shape != other.shape
-        raise ArgmentError, "DataFrame dimensions do not match"
+        raise ArgumentError, "DataFrame dimensions do not match"
       end
       suffix = "__POLARS_CMP_OTHER"