RubyGems - polars-df - Versions diffs - 0.5.0 → 0.7.0 - Mend

polars-df 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +26 -0
data/Cargo.lock +595 -709
data/Cargo.toml +1 -0
data/README.md +11 -9
data/ext/polars/Cargo.toml +18 -10
data/ext/polars/src/batched_csv.rs +26 -26
data/ext/polars/src/conversion.rs +272 -136
data/ext/polars/src/dataframe.rs +135 -94
data/ext/polars/src/error.rs +8 -5
data/ext/polars/src/expr/array.rs +15 -0
data/ext/polars/src/expr/binary.rs +18 -6
data/ext/polars/src/expr/datetime.rs +10 -12
data/ext/polars/src/expr/general.rs +78 -264
data/ext/polars/src/expr/list.rs +41 -28
data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
data/ext/polars/src/expr/name.rs +44 -0
data/ext/polars/src/expr/rolling.rs +196 -0
data/ext/polars/src/expr/string.rs +94 -66
data/ext/polars/src/file.rs +3 -3
data/ext/polars/src/functions/aggregation.rs +35 -0
data/ext/polars/src/functions/eager.rs +7 -31
data/ext/polars/src/functions/io.rs +10 -10
data/ext/polars/src/functions/lazy.rs +119 -54
data/ext/polars/src/functions/meta.rs +30 -0
data/ext/polars/src/functions/misc.rs +8 -0
data/ext/polars/src/functions/mod.rs +5 -0
data/ext/polars/src/functions/random.rs +6 -0
data/ext/polars/src/functions/range.rs +46 -0
data/ext/polars/src/functions/string_cache.rs +11 -0
data/ext/polars/src/functions/whenthen.rs +7 -7
data/ext/polars/src/lazyframe.rs +61 -44
data/ext/polars/src/lib.rs +173 -84
data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
data/ext/polars/src/{apply → map}/mod.rs +10 -6
data/ext/polars/src/{apply → map}/series.rs +12 -16
data/ext/polars/src/object.rs +2 -2
data/ext/polars/src/rb_modules.rs +25 -6
data/ext/polars/src/series/construction.rs +32 -6
data/ext/polars/src/series/export.rs +2 -2
data/ext/polars/src/series/set_at_idx.rs +33 -17
data/ext/polars/src/series.rs +62 -42
data/ext/polars/src/sql.rs +46 -0
data/lib/polars/array_expr.rb +84 -0
data/lib/polars/array_name_space.rb +77 -0
data/lib/polars/batched_csv_reader.rb +1 -1
data/lib/polars/config.rb +530 -0
data/lib/polars/data_frame.rb +206 -131
data/lib/polars/data_types.rb +163 -29
data/lib/polars/date_time_expr.rb +13 -18
data/lib/polars/date_time_name_space.rb +22 -28
data/lib/polars/dynamic_group_by.rb +2 -2
data/lib/polars/expr.rb +241 -151
data/lib/polars/functions.rb +29 -38
data/lib/polars/group_by.rb +38 -76
data/lib/polars/io.rb +37 -2
data/lib/polars/lazy_frame.rb +174 -95
data/lib/polars/lazy_functions.rb +87 -63
data/lib/polars/lazy_group_by.rb +7 -8
data/lib/polars/list_expr.rb +40 -36
data/lib/polars/list_name_space.rb +15 -15
data/lib/polars/name_expr.rb +198 -0
data/lib/polars/rolling_group_by.rb +6 -4
data/lib/polars/series.rb +95 -28
data/lib/polars/sql_context.rb +194 -0
data/lib/polars/string_expr.rb +249 -69
data/lib/polars/string_name_space.rb +155 -25
data/lib/polars/utils.rb +119 -57
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +6 -0
metadata +21 -7
/data/ext/polars/src/{apply → map}/lazy.rs +0 -0

data/lib/polars/lazy_frame.rb CHANGED Viewed

@@ -4,6 +4,22 @@ module Polars
     # @private
     attr_accessor :_ldf
+    # Create a new LazyFrame.
+    def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
+      self._ldf = (
+        DataFrame.new(
+          data,
+          schema: schema,
+          schema_overrides: schema_overrides,
+          orient: orient,
+          infer_schema_length: infer_schema_length,
+          nan_to_null: nan_to_null
+        )
+        .lazy
+        ._ldf
+      )
+    end
     # @private
     def self._from_rbldf(rb_ldf)
       ldf = LazyFrame.allocate
@@ -81,7 +97,8 @@ module Polars
       row_count_offset: 0,
       storage_options: nil,
       low_memory: false,
-      use_statistics: true
+      use_statistics: true,
+      hive_partitioning: true
     )
       _from_rbldf(
         RbLazyFrame.new_from_parquet(
@@ -92,7 +109,8 @@ module Polars
           rechunk,
           Utils._prepare_row_count_args(row_count_name, row_count_offset),
           low_memory,
-          use_statistics
+          use_statistics,
+          hive_partitioning
         )
       )
     end
@@ -334,6 +352,7 @@ module Polars
         slice_pushdown,
         common_subplan_elimination,
         allow_streaming,
+        false
       )
       ldf.describe_optimized_plan
@@ -379,16 +398,16 @@ module Polars
     #   # │ 2   ┆ 7.0 ┆ b   │
     #   # │ 1   ┆ 6.0 ┆ a   │
     #   # └─────┴─────┴─────┘
-    def sort(by, reverse: false, nulls_last: false)
+    def sort(by, reverse: false, nulls_last: false, maintain_order: false)
       if by.is_a?(String)
-        _from_rbldf(_ldf.sort(by, reverse, nulls_last))
+        return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
       end
       if Utils.bool?(reverse)
         reverse = [reverse]
       end
       by = Utils.selection_to_rbexpr_list(by)
-      _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
+      _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
     end
     # def profile
@@ -429,7 +448,7 @@ module Polars
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   ).lazy
-    #   df.groupby("a", maintain_order: true).agg(Polars.all.sum).collect
+    #   df.group_by("a", maintain_order: true).agg(Polars.all.sum).collect
     #   # =>
     #   # shape: (3, 3)
     #   # ┌─────┬─────┬─────┐
@@ -450,7 +469,8 @@ module Polars
       no_optimization: false,
       slice_pushdown: true,
       common_subplan_elimination: true,
-      allow_streaming: false
+      allow_streaming: false,
+      _eager: false
     )
       if no_optimization
         predicate_pushdown = false
@@ -470,7 +490,8 @@ module Polars
         simplify_expression,
         slice_pushdown,
         common_subplan_elimination,
-        allow_streaming
+        allow_streaming,
+        _eager
       )
       Utils.wrap_df(ldf.collect)
     end
@@ -552,7 +573,8 @@ module Polars
         simplify_expression,
         slice_pushdown,
         false,
-        true
+        true,
+        false
       )
       lf.sink_parquet(
         path,
@@ -607,7 +629,7 @@ module Polars
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   ).lazy
-    #   df.groupby("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
+    #   df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
     #   # =>
     #   # shape: (2, 3)
     #   # ┌─────┬─────┬─────┐
@@ -644,7 +666,8 @@ module Polars
         simplify_expression,
         slice_pushdown,
         common_subplan_elimination,
-        allow_streaming
+        allow_streaming,
+        false
       )
       Utils.wrap_df(ldf.fetch(n_rows))
     end
@@ -837,13 +860,13 @@ module Polars
       _from_rbldf(_ldf.select(exprs))
     end
-    # Start a groupby operation.
+    # Start a group by operation.
     #
     # @param by [Object]
     #   Column(s) to group by.
     # @param maintain_order [Boolean]
     #   Make sure that the order of the groups remain consistent. This is more
-    #   expensive than a default groupby.
+    #   expensive than a default group by.
     #
     # @return [LazyGroupBy]
     #
@@ -855,7 +878,7 @@ module Polars
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   ).lazy
-    #   df.groupby("a", maintain_order: true).agg(Polars.col("b").sum).collect
+    #   df.group_by("a", maintain_order: true).agg(Polars.col("b").sum).collect
     #   # =>
     #   # shape: (3, 2)
     #   # ┌─────┬─────┐
@@ -867,19 +890,21 @@ module Polars
     #   # │ b   ┆ 11  │
     #   # │ c   ┆ 6   │
     #   # └─────┴─────┘
-    def groupby(by, maintain_order: false)
+    def group_by(by, maintain_order: false)
       rbexprs_by = Utils.selection_to_rbexpr_list(by)
-      lgb = _ldf.groupby(rbexprs_by, maintain_order)
-      LazyGroupBy.new(lgb, self.class)
+      lgb = _ldf.group_by(rbexprs_by, maintain_order)
+      LazyGroupBy.new(lgb)
     end
+    alias_method :groupby, :group_by
+    alias_method :group, :group_by
     # Create rolling groups based on a time column.
     #
     # Also works for index values of type `:i32` or `:i64`.
     #
-    # Different from a `dynamic_groupby` the windows are now determined by the
+    # Different from a `dynamic_group_by` the windows are now determined by the
     # individual values and are not of constant intervals. For constant intervals
-    # use *groupby_dynamic*.
+    # use *group_by_dynamic*.
     #
     # The `period` and `offset` arguments are created either from a timedelta, or
     # by using the following string language:
@@ -899,7 +924,7 @@ module Polars
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_rolling on an integer column, the windows are defined by:
+    # In case of a group_by_rolling on an integer column, the windows are defined by:
     #
     # - "1i"      # length 1
     # - "10i"     # length 10
@@ -910,7 +935,7 @@ module Polars
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a rolling groupby on indices, dtype needs to be one of
+    #   In case of a rolling group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
     # @param period [Object]
@@ -921,6 +946,12 @@ module Polars
     #   Define whether the temporal window interval is closed or not.
     # @param by [Object]
     #   Also group by this column/these columns.
+    # @param check_sorted [Boolean]
+    #   When the `by` argument is given, polars can not check sortedness
+    #   by the metadata and has to do a full scan on the index column to
+    #   verify data is sorted. This is expensive. If you are sure the
+    #   data within the by groups is sorted, you can set this to `false`.
+    #   Doing so incorrectly will lead to incorrect output
     #
     # @return [LazyFrame]
     #
@@ -933,16 +964,16 @@ module Polars
     #     "2020-01-03 19:45:32",
     #     "2020-01-08 23:16:43"
     #   ]
-    #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
-    #     Polars.col("dt").str.strptime(Polars::Datetime)
+    #   df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
+    #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
     #   )
-    #   df.groupby_rolling(index_column: "dt", period: "2d").agg(
+    #   df.group_by_rolling(index_column: "dt", period: "2d").agg(
     #     [
     #       Polars.sum("a").alias("sum_a"),
     #       Polars.min("a").alias("min_a"),
     #       Polars.max("a").alias("max_a")
     #     ]
-    #   )
+    #   ).collect
     #   # =>
     #   # shape: (6, 4)
     #   # ┌─────────────────────┬───────┬───────┬───────┐
@@ -957,14 +988,15 @@ module Polars
     #   # │ 2020-01-03 19:45:32 ┆ 11    ┆ 2     ┆ 9     │
     #   # │ 2020-01-08 23:16:43 ┆ 1     ┆ 1     ┆ 1     │
     #   # └─────────────────────┴───────┴───────┴───────┘
-    def groupby_rolling(
+    def group_by_rolling(
       index_column:,
       period:,
       offset: nil,
       closed: "right",
-      by: nil
+      by: nil,
+      check_sorted: true
     )
-      index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
+      index_column = Utils.parse_as_expression(index_column)
       if offset.nil?
         offset = "-#{period}"
       end
@@ -973,16 +1005,17 @@ module Polars
       period = Utils._timedelta_to_pl_duration(period)
       offset = Utils._timedelta_to_pl_duration(offset)
-      lgb = _ldf.groupby_rolling(
-        index_column._rbexpr, period, offset, closed, rbexprs_by
+      lgb = _ldf.group_by_rolling(
+        index_column, period, offset, closed, rbexprs_by, check_sorted
       )
-      LazyGroupBy.new(lgb, self.class)
+      LazyGroupBy.new(lgb)
     end
+    alias_method :groupby_rolling, :group_by_rolling
     # Group based on a time value (or index value of type `:i32`, `:i64`).
     #
     # Time windows are calculated and rows are assigned to windows. Different from a
-    # normal groupby is that a row can be member of multiple groups. The time/index
+    # normal group by is that a row can be member of multiple groups. The time/index
     # window could be seen as a rolling window, with a window size determined by
     # dates/times/values instead of slots in the DataFrame.
     #
@@ -1010,37 +1043,43 @@ module Polars
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_dynamic on an integer column, the windows are defined by:
+    # In case of a group_by_dynamic on an integer column, the windows are defined by:
     #
     # - "1i"      # length 1
     # - "10i"     # length 10
     #
-    # @param index_column
+    # @param index_column [Object]
     #   Column used to group based on the time window.
     #   Often to type Date/Datetime
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a dynamic groupby on indices, dtype needs to be one of
+    #   In case of a dynamic group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
-    # @param every
+    # @param every [Object]
     #   Interval of the window.
-    # @param period
+    # @param period [Object]
     #   Length of the window, if None it is equal to 'every'.
-    # @param offset
+    # @param offset [Object]
     #   Offset of the window if None and period is None it will be equal to negative
     #   `every`.
-    # @param truncate
+    # @param truncate [Boolean]
     #   Truncate the time value to the window lower bound.
-    # @param include_boundaries
+    # @param include_boundaries [Boolean]
     #   Add the lower and upper bound of the window to the "_lower_bound" and
     #   "_upper_bound" columns. This will impact performance because it's harder to
     #   parallelize
     # @param closed ["right", "left", "both", "none"]
     #   Define whether the temporal window interval is closed or not.
-    # @param by
+    # @param by [Object]
     #   Also group by this column/these columns
+    # @param check_sorted [Boolean]
+    #   When the `by` argument is given, polars can not check sortedness
+    #   by the metadata and has to do a full scan on the index column to
+    #   verify data is sorted. This is expensive. If you are sure the
+    #   data within the by groups is sorted, you can set this to `false`.
+    #   Doing so incorrectly will lead to incorrect output.
     #
     # @return [DataFrame]
     #
@@ -1072,7 +1111,7 @@ module Polars
     #   # └─────────────────────┴─────┘
     #
     # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
-    #   df.groupby_dynamic("time", every: "1h", closed: "right").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "right").agg(
     #     [
     #       Polars.col("time").min.alias("time_min"),
     #       Polars.col("time").max.alias("time_max")
@@ -1092,7 +1131,7 @@ module Polars
     #   # └─────────────────────┴─────────────────────┴─────────────────────┘
     #
     # @example The window boundaries can also be added to the aggregation result.
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time", every: "1h", include_boundaries: true, closed: "right"
     #   ).agg([Polars.col("time").count.alias("time_count")])
     #   # =>
@@ -1109,27 +1148,27 @@ module Polars
     #   # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
     # @example When closed="left", should not include right end of interval.
-    #   df.groupby_dynamic("time", every: "1h", closed: "left").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "left").agg(
     #     [
     #       Polars.col("time").count.alias("time_count"),
-    #       Polars.col("time").list.alias("time_agg_list")
+    #       Polars.col("time").alias("time_agg_list")
     #     ]
     #   )
     #   # =>
     #   # shape: (4, 3)
-    #   # ┌─────────────────────┬────────────┬─────────────────────────────────────┐
-    #   # │ time                ┆ time_count ┆ time_agg_list                       │
-    #   # │ ---                 ┆ ---        ┆ ---                                 │
-    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                  │
-    #   # ╞═════════════════════╪════════════╪═════════════════════════════════════╡
-    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16... │
-    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16... │
-    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16... │
-    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]               │
-    #   # └─────────────────────┴────────────┴─────────────────────────────────────┘
+    #   # ┌─────────────────────┬────────────┬───────────────────────────────────┐
+    #   # │ time                ┆ time_count ┆ time_agg_list                     │
+    #   # │ ---                 ┆ ---        ┆ ---                               │
+    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                │
+    #   # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
+    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16… │
+    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16… │
+    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16… │
+    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]             │
+    #   # └─────────────────────┴────────────┴───────────────────────────────────┘
     #
     # @example When closed="both" the time values at the window boundaries belong to 2 groups.
-    #   df.groupby_dynamic("time", every: "1h", closed: "both").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
     #     [Polars.col("time").count.alias("time_count")]
     #   )
     #   # =>
@@ -1146,7 +1185,7 @@ module Polars
     #   # │ 2021-12-16 03:00:00 ┆ 1          │
     #   # └─────────────────────┴────────────┘
     #
-    # @example Dynamic groupbys can also be combined with grouping on normal keys.
+    # @example Dynamic group bys can also be combined with grouping on normal keys.
     #   df = Polars::DataFrame.new(
     #     {
     #       "time" => Polars.date_range(
@@ -1157,7 +1196,7 @@ module Polars
     #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time",
     #     every: "1h",
     #     closed: "both",
@@ -1180,20 +1219,20 @@ module Polars
     #   # │ b      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1          │
     #   # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
-    # @example Dynamic groupby on an index column.
+    # @example Dynamic group by on an index column.
     #   df = Polars::DataFrame.new(
     #     {
     #       "idx" => Polars.arange(0, 6, eager: true),
     #       "A" => ["A", "A", "B", "B", "B", "C"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "idx",
     #     every: "2i",
     #     period: "3i",
     #     include_boundaries: true,
     #     closed: "right"
-    #   ).agg(Polars.col("A").list.alias("A_agg_list"))
+    #   ).agg(Polars.col("A").alias("A_agg_list"))
     #   # =>
     #   # shape: (3, 4)
     #   # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
@@ -1205,23 +1244,26 @@ module Polars
     #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
     #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
     #   # └─────────────────┴─────────────────┴─────┴─────────────────┘
-    def groupby_dynamic(
+    def group_by_dynamic(
       index_column,
       every:,
       period: nil,
       offset: nil,
-      truncate: true,
+      truncate: nil,
       include_boundaries: false,
       closed: "left",
+      label: "left",
       by: nil,
-      start_by: "window"
+      start_by: "window",
+      check_sorted: true
     )
+      if !truncate.nil?
+        label = truncate ? "left" : "datapoint"
+      end
+      index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
       if offset.nil?
-        if period.nil?
-          offset = "-#{every}"
-        else
-          offset = "0ns"
-        end
+        offset = period.nil? ? "-#{every}" : "0ns"
       end
       if period.nil?
@@ -1233,19 +1275,21 @@ module Polars
       every = Utils._timedelta_to_pl_duration(every)
       rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
-      lgb = _ldf.groupby_dynamic(
-        index_column,
+      lgb = _ldf.group_by_dynamic(
+        index_column._rbexpr,
         every,
         period,
         offset,
-        truncate,
+        label,
         include_boundaries,
         closed,
         rbexprs_by,
-        start_by
+        start_by,
+        check_sorted
       )
-      LazyGroupBy.new(lgb, self.class)
+      LazyGroupBy.new(lgb)
     end
+    alias_method :groupby_dynamic, :group_by_dynamic
     # Perform an asof join.
     #
@@ -1351,7 +1395,7 @@ module Polars
       if by.is_a?(String)
         by_left_ = [by]
         by_right_ = [by]
-      elsif by.is_a?(Array)
+      elsif by.is_a?(::Array)
         by_left_ = by
         by_right_ = by
       end
@@ -1619,7 +1663,7 @@ module Polars
     #   # │ null │
     #   # └──────┘
     def with_context(other)
-      if !other.is_a?(Array)
+      if !other.is_a?(::Array)
         other = [other]
       end
@@ -1705,8 +1749,10 @@ module Polars
     # Shift the values by a given period.
     #
-    # @param periods [Integer]
+    # @param n [Integer]
     #   Number of places to shift (may be negative).
+    # @param fill_value [Object]
+    #   Fill the resulting null values with this value.
     #
     # @return [LazyFrame]
     #
@@ -1743,8 +1789,12 @@ module Polars
     #   # │ 5    ┆ 6    │
     #   # │ null ┆ null │
     #   # └──────┴──────┘
-    def shift(periods)
-      _from_rbldf(_ldf.shift(periods))
+    def shift(n, fill_value: nil)
+      if !fill_value.nil?
+        fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
+      end
+      n = Utils.parse_as_expression(n)
+      _from_rbldf(_ldf.shift(n, fill_value))
     end
     # Shift the values by a given period and fill the resulting null values.
@@ -1790,10 +1840,7 @@ module Polars
     #   # │ 0   ┆ 0   │
     #   # └─────┴─────┘
     def shift_and_fill(periods, fill_value)
-      if !fill_value.is_a?(Expr)
-        fill_value = Polars.lit(fill_value)
-      end
-      _from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
+      shift(periods, fill_value: fill_value)
     end
     # Get a slice of this DataFrame.
@@ -2228,7 +2275,7 @@ module Polars
     #
     # @return [LazyFrame]
     def unique(maintain_order: true, subset: nil, keep: "first")
-      if !subset.nil? && !subset.is_a?(Array)
+      if !subset.nil? && !subset.is_a?(::Array)
         subset = [subset]
       end
       _from_rbldf(_ldf.unique(maintain_order, subset, keep))
@@ -2261,7 +2308,7 @@ module Polars
     #   # │ 3   ┆ 8   ┆ c   │
     #   # └─────┴─────┴─────┘
     def drop_nulls(subset: nil)
-      if !subset.nil? && !subset.is_a?(Array)
+      if !subset.nil? && !subset.is_a?(::Array)
         subset = [subset]
       end
       _from_rbldf(_ldf.drop_nulls(subset))
@@ -2351,16 +2398,16 @@ module Polars
     #   df.interpolate.collect
     #   # =>
     #   # shape: (4, 3)
-    #   # ┌─────┬──────┬─────┐
-    #   # │ foo ┆ bar  ┆ baz │
-    #   # │ --- ┆ ---  ┆ --- │
-    #   # │ i64 ┆ i64  ┆ i64 │
-    #   # ╞═════╪══════╪═════╡
-    #   # │ 1   ┆ 6    ┆ 1   │
-    #   # │ 5   ┆ 7    ┆ 3   │
-    #   # │ 9   ┆ 9    ┆ 6   │
-    #   # │ 10  ┆ null ┆ 9   │
-    #   # └─────┴──────┴─────┘
+    #   # ┌──────┬──────┬──────────┐
+    #   # │ foo  ┆ bar  ┆ baz      │
+    #   # │ ---  ┆ ---  ┆ ---      │
+    #   # │ f64  ┆ f64  ┆ f64      │
+    #   # ╞══════╪══════╪══════════╡
+    #   # │ 1.0  ┆ 6.0  ┆ 1.0      │
+    #   # │ 5.0  ┆ 7.0  ┆ 3.666667 │
+    #   # │ 9.0  ┆ 9.0  ┆ 6.333333 │
+    #   # │ 10.0 ┆ null ┆ 9.0      │
+    #   # └──────┴──────┴──────────┘
     def interpolate
       select(Utils.col("*").interpolate)
     end
@@ -2423,6 +2470,38 @@ module Polars
       _from_rbldf(_ldf.unnest(names))
     end
+    # TODO
+    # def merge_sorted
+    # end
+    # Indicate that one or multiple columns are sorted.
+    #
+    # @param column [Object]
+    #   Columns that are sorted
+    # @param more_columns [Object]
+    #   Additional columns that are sorted, specified as positional arguments.
+    # @param descending [Boolean]
+    #   Whether the columns are sorted in descending order.
+    #
+    # @return [LazyFrame]
+    def set_sorted(
+      column,
+      *more_columns,
+      descending: false
+    )
+      columns = Utils.selection_to_rbexpr_list(column)
+      if more_columns.any?
+        columns.concat(Utils.selection_to_rbexpr_list(more_columns))
+      end
+      with_columns(
+        columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
+      )
+    end
+    # TODO
+    # def update
+    # end
     private
     def initialize_copy(other)