RubyGems - polars-df - Versions diffs - 0.6.0-x86_64-linux → 0.7.0-x86_64-linux - Mend

polars-df 0.6.0-x86_64-linux → 0.7.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/Cargo.lock +468 -538
data/Cargo.toml +1 -0
data/LICENSE-THIRD-PARTY.txt +3223 -4194
data/README.md +8 -7
data/lib/polars/3.0/polars.so +0 -0
data/lib/polars/3.1/polars.so +0 -0
data/lib/polars/3.2/polars.so +0 -0
data/lib/polars/config.rb +530 -0
data/lib/polars/data_frame.rb +115 -82
data/lib/polars/date_time_expr.rb +13 -18
data/lib/polars/date_time_name_space.rb +5 -25
data/lib/polars/dynamic_group_by.rb +2 -2
data/lib/polars/expr.rb +177 -94
data/lib/polars/functions.rb +29 -37
data/lib/polars/group_by.rb +38 -55
data/lib/polars/io.rb +37 -2
data/lib/polars/lazy_frame.rb +93 -66
data/lib/polars/lazy_functions.rb +36 -48
data/lib/polars/lazy_group_by.rb +7 -8
data/lib/polars/list_expr.rb +12 -8
data/lib/polars/list_name_space.rb +2 -2
data/lib/polars/name_expr.rb +198 -0
data/lib/polars/rolling_group_by.rb +2 -2
data/lib/polars/series.rb +26 -13
data/lib/polars/sql_context.rb +194 -0
data/lib/polars/string_expr.rb +114 -60
data/lib/polars/string_name_space.rb +19 -4
data/lib/polars/utils.rb +12 -0
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +3 -0
metadata +5 -2

data/lib/polars/lazy_frame.rb CHANGED Viewed

@@ -97,7 +97,8 @@ module Polars
       row_count_offset: 0,
       storage_options: nil,
       low_memory: false,
-      use_statistics: true
+      use_statistics: true,
+      hive_partitioning: true
     )
       _from_rbldf(
         RbLazyFrame.new_from_parquet(
@@ -108,7 +109,8 @@ module Polars
           rechunk,
           Utils._prepare_row_count_args(row_count_name, row_count_offset),
           low_memory,
-          use_statistics
+          use_statistics,
+          hive_partitioning
         )
       )
     end
@@ -350,6 +352,7 @@ module Polars
         slice_pushdown,
         common_subplan_elimination,
         allow_streaming,
+        false
       )
       ldf.describe_optimized_plan
@@ -445,7 +448,7 @@ module Polars
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   ).lazy
-    #   df.groupby("a", maintain_order: true).agg(Polars.all.sum).collect
+    #   df.group_by("a", maintain_order: true).agg(Polars.all.sum).collect
     #   # =>
     #   # shape: (3, 3)
     #   # ┌─────┬─────┬─────┐
@@ -466,7 +469,8 @@ module Polars
       no_optimization: false,
       slice_pushdown: true,
       common_subplan_elimination: true,
-      allow_streaming: false
+      allow_streaming: false,
+      _eager: false
     )
       if no_optimization
         predicate_pushdown = false
@@ -486,7 +490,8 @@ module Polars
         simplify_expression,
         slice_pushdown,
         common_subplan_elimination,
-        allow_streaming
+        allow_streaming,
+        _eager
       )
       Utils.wrap_df(ldf.collect)
     end
@@ -568,7 +573,8 @@ module Polars
         simplify_expression,
         slice_pushdown,
         false,
-        true
+        true,
+        false
       )
       lf.sink_parquet(
         path,
@@ -623,7 +629,7 @@ module Polars
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   ).lazy
-    #   df.groupby("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
+    #   df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
     #   # =>
     #   # shape: (2, 3)
     #   # ┌─────┬─────┬─────┐
@@ -660,7 +666,8 @@ module Polars
         simplify_expression,
         slice_pushdown,
         common_subplan_elimination,
-        allow_streaming
+        allow_streaming,
+        false
       )
       Utils.wrap_df(ldf.fetch(n_rows))
     end
@@ -853,13 +860,13 @@ module Polars
       _from_rbldf(_ldf.select(exprs))
     end
-    # Start a groupby operation.
+    # Start a group by operation.
     #
     # @param by [Object]
     #   Column(s) to group by.
     # @param maintain_order [Boolean]
     #   Make sure that the order of the groups remain consistent. This is more
-    #   expensive than a default groupby.
+    #   expensive than a default group by.
     #
     # @return [LazyGroupBy]
     #
@@ -871,7 +878,7 @@ module Polars
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   ).lazy
-    #   df.groupby("a", maintain_order: true).agg(Polars.col("b").sum).collect
+    #   df.group_by("a", maintain_order: true).agg(Polars.col("b").sum).collect
     #   # =>
     #   # shape: (3, 2)
     #   # ┌─────┬─────┐
@@ -883,19 +890,21 @@ module Polars
     #   # │ b   ┆ 11  │
     #   # │ c   ┆ 6   │
     #   # └─────┴─────┘
-    def groupby(by, maintain_order: false)
+    def group_by(by, maintain_order: false)
       rbexprs_by = Utils.selection_to_rbexpr_list(by)
-      lgb = _ldf.groupby(rbexprs_by, maintain_order)
-      LazyGroupBy.new(lgb, self.class)
+      lgb = _ldf.group_by(rbexprs_by, maintain_order)
+      LazyGroupBy.new(lgb)
     end
+    alias_method :groupby, :group_by
+    alias_method :group, :group_by
     # Create rolling groups based on a time column.
     #
     # Also works for index values of type `:i32` or `:i64`.
     #
-    # Different from a `dynamic_groupby` the windows are now determined by the
+    # Different from a `dynamic_group_by` the windows are now determined by the
     # individual values and are not of constant intervals. For constant intervals
-    # use *groupby_dynamic*.
+    # use *group_by_dynamic*.
     #
     # The `period` and `offset` arguments are created either from a timedelta, or
     # by using the following string language:
@@ -915,7 +924,7 @@ module Polars
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_rolling on an integer column, the windows are defined by:
+    # In case of a group_by_rolling on an integer column, the windows are defined by:
     #
     # - "1i"      # length 1
     # - "10i"     # length 10
@@ -926,7 +935,7 @@ module Polars
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a rolling groupby on indices, dtype needs to be one of
+    #   In case of a rolling group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
     # @param period [Object]
@@ -958,7 +967,7 @@ module Polars
     #   df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
     #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
     #   )
-    #   df.groupby_rolling(index_column: "dt", period: "2d").agg(
+    #   df.group_by_rolling(index_column: "dt", period: "2d").agg(
     #     [
     #       Polars.sum("a").alias("sum_a"),
     #       Polars.min("a").alias("min_a"),
@@ -979,7 +988,7 @@ module Polars
     #   # │ 2020-01-03 19:45:32 ┆ 11    ┆ 2     ┆ 9     │
     #   # │ 2020-01-08 23:16:43 ┆ 1     ┆ 1     ┆ 1     │
     #   # └─────────────────────┴───────┴───────┴───────┘
-    def groupby_rolling(
+    def group_by_rolling(
       index_column:,
       period:,
       offset: nil,
@@ -987,7 +996,7 @@ module Polars
       by: nil,
       check_sorted: true
     )
-      index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
+      index_column = Utils.parse_as_expression(index_column)
       if offset.nil?
         offset = "-#{period}"
       end
@@ -996,16 +1005,17 @@ module Polars
       period = Utils._timedelta_to_pl_duration(period)
       offset = Utils._timedelta_to_pl_duration(offset)
-      lgb = _ldf.groupby_rolling(
-        index_column._rbexpr, period, offset, closed, rbexprs_by, check_sorted
+      lgb = _ldf.group_by_rolling(
+        index_column, period, offset, closed, rbexprs_by, check_sorted
       )
-      LazyGroupBy.new(lgb, self.class)
+      LazyGroupBy.new(lgb)
     end
+    alias_method :groupby_rolling, :group_by_rolling
     # Group based on a time value (or index value of type `:i32`, `:i64`).
     #
     # Time windows are calculated and rows are assigned to windows. Different from a
-    # normal groupby is that a row can be member of multiple groups. The time/index
+    # normal group by is that a row can be member of multiple groups. The time/index
     # window could be seen as a rolling window, with a window size determined by
     # dates/times/values instead of slots in the DataFrame.
     #
@@ -1033,37 +1043,43 @@ module Polars
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_dynamic on an integer column, the windows are defined by:
+    # In case of a group_by_dynamic on an integer column, the windows are defined by:
     #
     # - "1i"      # length 1
     # - "10i"     # length 10
     #
-    # @param index_column
+    # @param index_column [Object]
     #   Column used to group based on the time window.
     #   Often to type Date/Datetime
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a dynamic groupby on indices, dtype needs to be one of
+    #   In case of a dynamic group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
-    # @param every
+    # @param every [Object]
     #   Interval of the window.
-    # @param period
+    # @param period [Object]
     #   Length of the window, if None it is equal to 'every'.
-    # @param offset
+    # @param offset [Object]
     #   Offset of the window if None and period is None it will be equal to negative
     #   `every`.
-    # @param truncate
+    # @param truncate [Boolean]
     #   Truncate the time value to the window lower bound.
-    # @param include_boundaries
+    # @param include_boundaries [Boolean]
     #   Add the lower and upper bound of the window to the "_lower_bound" and
     #   "_upper_bound" columns. This will impact performance because it's harder to
     #   parallelize
     # @param closed ["right", "left", "both", "none"]
     #   Define whether the temporal window interval is closed or not.
-    # @param by
+    # @param by [Object]
     #   Also group by this column/these columns
+    # @param check_sorted [Boolean]
+    #   When the `by` argument is given, polars can not check sortedness
+    #   by the metadata and has to do a full scan on the index column to
+    #   verify data is sorted. This is expensive. If you are sure the
+    #   data within the by groups is sorted, you can set this to `false`.
+    #   Doing so incorrectly will lead to incorrect output.
     #
     # @return [DataFrame]
     #
@@ -1095,7 +1111,7 @@ module Polars
     #   # └─────────────────────┴─────┘
     #
     # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
-    #   df.groupby_dynamic("time", every: "1h", closed: "right").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "right").agg(
     #     [
     #       Polars.col("time").min.alias("time_min"),
     #       Polars.col("time").max.alias("time_max")
@@ -1115,7 +1131,7 @@ module Polars
     #   # └─────────────────────┴─────────────────────┴─────────────────────┘
     #
     # @example The window boundaries can also be added to the aggregation result.
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time", every: "1h", include_boundaries: true, closed: "right"
     #   ).agg([Polars.col("time").count.alias("time_count")])
     #   # =>
@@ -1132,7 +1148,7 @@ module Polars
     #   # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
     # @example When closed="left", should not include right end of interval.
-    #   df.groupby_dynamic("time", every: "1h", closed: "left").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "left").agg(
     #     [
     #       Polars.col("time").count.alias("time_count"),
     #       Polars.col("time").alias("time_agg_list")
@@ -1152,7 +1168,7 @@ module Polars
     #   # └─────────────────────┴────────────┴───────────────────────────────────┘
     #
     # @example When closed="both" the time values at the window boundaries belong to 2 groups.
-    #   df.groupby_dynamic("time", every: "1h", closed: "both").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
     #     [Polars.col("time").count.alias("time_count")]
     #   )
     #   # =>
@@ -1169,7 +1185,7 @@ module Polars
     #   # │ 2021-12-16 03:00:00 ┆ 1          │
     #   # └─────────────────────┴────────────┘
     #
-    # @example Dynamic groupbys can also be combined with grouping on normal keys.
+    # @example Dynamic group bys can also be combined with grouping on normal keys.
     #   df = Polars::DataFrame.new(
     #     {
     #       "time" => Polars.date_range(
@@ -1180,7 +1196,7 @@ module Polars
     #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time",
     #     every: "1h",
     #     closed: "both",
@@ -1203,14 +1219,14 @@ module Polars
     #   # │ b      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1          │
     #   # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
-    # @example Dynamic groupby on an index column.
+    # @example Dynamic group by on an index column.
     #   df = Polars::DataFrame.new(
     #     {
     #       "idx" => Polars.arange(0, 6, eager: true),
     #       "A" => ["A", "A", "B", "B", "B", "C"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "idx",
     #     every: "2i",
     #     period: "3i",
@@ -1228,17 +1244,23 @@ module Polars
     #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
     #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
     #   # └─────────────────┴─────────────────┴─────┴─────────────────┘
-    def groupby_dynamic(
+    def group_by_dynamic(
       index_column,
       every:,
       period: nil,
       offset: nil,
-      truncate: true,
+      truncate: nil,
       include_boundaries: false,
       closed: "left",
+      label: "left",
       by: nil,
-      start_by: "window"
+      start_by: "window",
+      check_sorted: true
     )
+      if !truncate.nil?
+        label = truncate ? "left" : "datapoint"
+      end
       index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
       if offset.nil?
         offset = period.nil? ? "-#{every}" : "0ns"
@@ -1253,19 +1275,21 @@ module Polars
       every = Utils._timedelta_to_pl_duration(every)
       rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
-      lgb = _ldf.groupby_dynamic(
+      lgb = _ldf.group_by_dynamic(
         index_column._rbexpr,
         every,
         period,
         offset,
-        truncate,
+        label,
         include_boundaries,
         closed,
         rbexprs_by,
-        start_by
+        start_by,
+        check_sorted
       )
-      LazyGroupBy.new(lgb, self.class)
+      LazyGroupBy.new(lgb)
     end
+    alias_method :groupby_dynamic, :group_by_dynamic
     # Perform an asof join.
     #
@@ -1725,8 +1749,10 @@ module Polars
     # Shift the values by a given period.
     #
-    # @param periods [Integer]
+    # @param n [Integer]
     #   Number of places to shift (may be negative).
+    # @param fill_value [Object]
+    #   Fill the resulting null values with this value.
     #
     # @return [LazyFrame]
     #
@@ -1763,8 +1789,12 @@ module Polars
     #   # │ 5    ┆ 6    │
     #   # │ null ┆ null │
     #   # └──────┴──────┘
-    def shift(periods)
-      _from_rbldf(_ldf.shift(periods))
+    def shift(n, fill_value: nil)
+      if !fill_value.nil?
+        fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
+      end
+      n = Utils.parse_as_expression(n)
+      _from_rbldf(_ldf.shift(n, fill_value))
     end
     # Shift the values by a given period and fill the resulting null values.
@@ -1810,10 +1840,7 @@ module Polars
     #   # │ 0   ┆ 0   │
     #   # └─────┴─────┘
     def shift_and_fill(periods, fill_value)
-      if !fill_value.is_a?(Expr)
-        fill_value = Polars.lit(fill_value)
-      end
-      _from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
+      shift(periods, fill_value: fill_value)
     end
     # Get a slice of this DataFrame.
@@ -2371,16 +2398,16 @@ module Polars
     #   df.interpolate.collect
     #   # =>
     #   # shape: (4, 3)
-    #   # ┌─────┬──────┬─────┐
-    #   # │ foo ┆ bar  ┆ baz │
-    #   # │ --- ┆ ---  ┆ --- │
-    #   # │ i64 ┆ i64  ┆ i64 │
-    #   # ╞═════╪══════╪═════╡
-    #   # │ 1   ┆ 6    ┆ 1   │
-    #   # │ 5   ┆ 7    ┆ 3   │
-    #   # │ 9   ┆ 9    ┆ 6   │
-    #   # │ 10  ┆ null ┆ 9   │
-    #   # └─────┴──────┴─────┘
+    #   # ┌──────┬──────┬──────────┐
+    #   # │ foo  ┆ bar  ┆ baz      │
+    #   # │ ---  ┆ ---  ┆ ---      │
+    #   # │ f64  ┆ f64  ┆ f64      │
+    #   # ╞══════╪══════╪══════════╡
+    #   # │ 1.0  ┆ 6.0  ┆ 1.0      │
+    #   # │ 5.0  ┆ 7.0  ┆ 3.666667 │
+    #   # │ 9.0  ┆ 9.0  ┆ 6.333333 │
+    #   # │ 10.0 ┆ null ┆ 9.0      │
+    #   # └──────┴──────┴──────────┘
     def interpolate
       select(Utils.col("*").interpolate)
     end

data/lib/polars/lazy_functions.rb CHANGED Viewed

@@ -43,7 +43,7 @@ module Polars
     #   # ┌─────┬─────┬────────────┐
     #   # │ a   ┆ b   ┆ rank       │
     #   # │ --- ┆ --- ┆ ---        │
-    #   # │ i64 ┆ i64 ┆ list[f32]  │
+    #   # │ i64 ┆ i64 ┆ list[f64]  │
     #   # ╞═════╪═════╪════════════╡
     #   # │ 1   ┆ 4   ┆ [1.0, 2.0] │
     #   # │ 8   ┆ 5   ┆ [2.0, 1.0] │
@@ -158,7 +158,7 @@ module Polars
         col(column.to_s).sum
       elsif column.is_a?(::Array)
         exprs = Utils.selection_to_rbexpr_list(column)
-        Utils.wrap_expr(_sum_exprs(exprs))
+        Utils.wrap_expr(_sum_horizontal(exprs))
       else
         fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
       end
@@ -625,16 +625,16 @@ module Polars
     # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
     # range size is equal to the length of the DataFrame you are collecting.
     #
-    # @param low [Integer, Expr, Series]
+    # @param start [Integer, Expr, Series]
     #   Lower bound of range.
-    # @param high [Integer, Expr, Series]
+    # @param stop [Integer, Expr, Series]
     #   Upper bound of range.
     # @param step [Integer]
     #   Step size of the range.
     # @param eager [Boolean]
     #   If eager evaluation is `True`, a Series is returned instead of an Expr.
     # @param dtype [Symbol]
-    #   Apply an explicit integer dtype to the resulting expression (default is `:i64`).
+    #   Apply an explicit integer dtype to the resulting expression (default is `Int64`).
     #
     # @return [Expr, Series]
     #
@@ -648,35 +648,20 @@ module Polars
     #   #         1
     #   #         2
     #   # ]
-    #
-    # @example
-    #   df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]})
-    #   df.select(Polars.arange(Polars.col("a"), Polars.col("b")))
-    #   # =>
-    #   # shape: (2, 1)
-    #   # ┌───────────┐
-    #   # │ arange    │
-    #   # │ ---       │
-    #   # │ list[i64] │
-    #   # ╞═══════════╡
-    #   # │ [1, 2]    │
-    #   # │ [2, 3]    │
-    #   # └───────────┘
-    def arange(low, high, step: 1, eager: false, dtype: nil)
-      low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
-      high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
-      range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
-      if !dtype.nil? && !["i64", Int64].include?(dtype)
-        range_expr = range_expr.cast(dtype)
-      end
+    def int_range(start, stop, step: 1, eager: false, dtype: nil)
+      start = Utils.parse_as_expression(start)
+      stop = Utils.parse_as_expression(stop)
+      dtype ||= Int64
+      dtype = dtype.to_s if dtype.is_a?(Symbol)
+      result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange")
-      if !eager
-        range_expr
-      else
-        DataFrame.new.select(range_expr.alias("arange")).to_series
+      if eager
+        return select(result).to_series
       end
+      result
     end
+    alias_method :arange, :int_range
     # Find the indexes that would sort the columns.
     #
@@ -735,15 +720,22 @@ module Polars
     #   # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
     #   # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
     def duration(
+      weeks: nil,
       days: nil,
+      hours: nil,
+      minutes: nil,
       seconds: nil,
-      nanoseconds: nil,
-      microseconds: nil,
       milliseconds: nil,
-      minutes: nil,
-      hours: nil,
-      weeks: nil
+      microseconds: nil,
+      nanoseconds: nil,
+      time_unit: "us"
     )
+      if !weeks.nil?
+        weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
+      end
+      if !days.nil?
+        days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
+      end
       if !hours.nil?
         hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
       end
@@ -762,23 +754,18 @@ module Polars
       if !nanoseconds.nil?
         nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
       end
-      if !days.nil?
-        days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
-      end
-      if !weeks.nil?
-        weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
-      end
       Utils.wrap_expr(
         _rb_duration(
+          weeks,
           days,
+          hours,
+          minutes,
           seconds,
-          nanoseconds,
-          microseconds,
           milliseconds,
-          minutes,
-          hours,
-          weeks
+          microseconds,
+          nanoseconds,
+          time_unit
         )
       )
     end
@@ -944,7 +931,8 @@ module Polars
           simplify_expression,
           slice_pushdown,
           common_subplan_elimination,
-          allow_streaming
+          allow_streaming,
+          false
         )
         prepared << ldf
       end

data/lib/polars/lazy_group_by.rb CHANGED Viewed

@@ -1,10 +1,9 @@
 module Polars
-  # Created by `df.lazy.groupby("foo")`.
+  # Created by `df.lazy.group_by("foo")`.
   class LazyGroupBy
     # @private
-    def initialize(lgb, lazyframe_class)
+    def initialize(lgb)
       @lgb = lgb
-      @lazyframe_class = lazyframe_class
     end
     # Describe the aggregation that need to be done on a group.
@@ -12,7 +11,7 @@ module Polars
     # @return [LazyFrame]
     def agg(aggs)
       rbexprs = Utils.selection_to_rbexpr_list(aggs)
-      @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
+      Utils.wrap_ldf(@lgb.agg(rbexprs))
     end
     # Get the first `n` rows of each group.
@@ -29,7 +28,7 @@ module Polars
     #       "nrs" => [1, 2, 3, 4, 5, 6]
     #     }
     #   )
-    #   df.groupby("letters").head(2).sort("letters")
+    #   df.group_by("letters").head(2).sort("letters")
     #   # =>
     #   # shape: (5, 2)
     #   # ┌─────────┬─────┐
@@ -44,7 +43,7 @@ module Polars
     #   # │ c       ┆ 2   │
     #   # └─────────┴─────┘
     def head(n = 5)
-      @lazyframe_class._from_rbldf(@lgb.head(n))
+      Utils.wrap_ldf(@lgb.head(n))
     end
     # Get the last `n` rows of each group.
@@ -61,7 +60,7 @@ module Polars
     #       "nrs" => [1, 2, 3, 4, 5, 6]
     #     }
     #   )
-    #   df.groupby("letters").tail(2).sort("letters")
+    #   df.group_by("letters").tail(2).sort("letters")
     #   # =>
     #   # shape: (5, 2)
     #   # ┌─────────┬─────┐
@@ -76,7 +75,7 @@ module Polars
     #   # │ c       ┆ 4   │
     #   # └─────────┴─────┘
     def tail(n = 5)
-      @lazyframe_class._from_rbldf(@lgb.tail(n))
+      Utils.wrap_ldf(@lgb.tail(n))
     end
     # def apply