RubyGems - polars-df - Versions diffs - 0.6.0-arm64-darwin → 0.8.0-arm64-darwin - Mend

polars-df 0.6.0-arm64-darwin → 0.8.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +24 -0
data/Cargo.lock +597 -599
data/Cargo.toml +1 -0
data/LICENSE-THIRD-PARTY.txt +5523 -6947
data/README.md +8 -7
data/lib/polars/3.1/polars.bundle +0 -0
data/lib/polars/3.2/polars.bundle +0 -0
data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
data/lib/polars/config.rb +530 -0
data/lib/polars/data_frame.rb +182 -145
data/lib/polars/data_types.rb +4 -1
data/lib/polars/date_time_expr.rb +23 -28
data/lib/polars/date_time_name_space.rb +17 -37
data/lib/polars/dynamic_group_by.rb +2 -2
data/lib/polars/expr.rb +398 -110
data/lib/polars/functions.rb +29 -37
data/lib/polars/group_by.rb +38 -55
data/lib/polars/io.rb +40 -5
data/lib/polars/lazy_frame.rb +116 -89
data/lib/polars/lazy_functions.rb +40 -68
data/lib/polars/lazy_group_by.rb +7 -8
data/lib/polars/list_expr.rb +12 -8
data/lib/polars/list_name_space.rb +2 -2
data/lib/polars/name_expr.rb +198 -0
data/lib/polars/rolling_group_by.rb +2 -2
data/lib/polars/series.rb +315 -43
data/lib/polars/sql_context.rb +194 -0
data/lib/polars/string_expr.rb +114 -60
data/lib/polars/string_name_space.rb +19 -4
data/lib/polars/struct_expr.rb +1 -1
data/lib/polars/struct_name_space.rb +1 -1
data/lib/polars/utils.rb +25 -13
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +3 -0
metadata +8 -5

data/lib/polars/lazy_functions.rb CHANGED Viewed

@@ -43,7 +43,7 @@ module Polars
     #   # ┌─────┬─────┬────────────┐
     #   # │ a   ┆ b   ┆ rank       │
     #   # │ --- ┆ --- ┆ ---        │
-    #   # │ i64 ┆ i64 ┆ list[f32]  │
+    #   # │ i64 ┆ i64 ┆ list[f64]  │
     #   # ╞═════╪═════╪════════════╡
     #   # │ 1   ┆ 4   ┆ [1.0, 2.0] │
     #   # │ 8   ┆ 5   ┆ [2.0, 1.0] │
@@ -107,44 +107,28 @@ module Polars
     # Get the maximum value.
     #
     # @param column [Object]
-    #   Column(s) to be used in aggregation. Will lead to different behavior based on
-    #   the input:
-    #
-    #   - [String, Series] -> aggregate the maximum value of that column.
-    #   - [Array<Expr>] -> aggregate the maximum value horizontally.
+    #   Column(s) to be used in aggregation.
     #
     # @return [Expr, Object]
     def max(column)
       if column.is_a?(Series)
         column.max
-      elsif Utils.strlike?(column)
-        col(column).max
       else
-        exprs = Utils.selection_to_rbexpr_list(column)
-        # TODO
-        Utils.wrap_expr(_max_exprs(exprs))
+        col(column).max
       end
     end
     # Get the minimum value.
     #
     # @param column [Object]
-    #   Column(s) to be used in aggregation. Will lead to different behavior based on
-    #   the input:
-    #
-    #   - [String, Series] -> aggregate the minimum value of that column.
-    #   - [Array<Expr>] -> aggregate the minimum value horizontally.
+    #   Column(s) to be used in aggregation.
     #
     # @return [Expr, Object]
     def min(column)
       if column.is_a?(Series)
         column.min
-      elsif Utils.strlike?(column)
-        col(column).min
       else
-        exprs = Utils.selection_to_rbexpr_list(column)
-        # TODO
-        Utils.wrap_expr(_min_exprs(exprs))
+        col(column).min
       end
     end
@@ -158,7 +142,7 @@ module Polars
         col(column.to_s).sum
       elsif column.is_a?(::Array)
         exprs = Utils.selection_to_rbexpr_list(column)
-        Utils.wrap_expr(_sum_exprs(exprs))
+        Utils.wrap_expr(_sum_horizontal(exprs))
       else
         fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
       end
@@ -625,16 +609,16 @@ module Polars
     # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
     # range size is equal to the length of the DataFrame you are collecting.
     #
-    # @param low [Integer, Expr, Series]
+    # @param start [Integer, Expr, Series]
     #   Lower bound of range.
-    # @param high [Integer, Expr, Series]
+    # @param stop [Integer, Expr, Series]
     #   Upper bound of range.
     # @param step [Integer]
     #   Step size of the range.
     # @param eager [Boolean]
     #   If eager evaluation is `True`, a Series is returned instead of an Expr.
     # @param dtype [Symbol]
-    #   Apply an explicit integer dtype to the resulting expression (default is `:i64`).
+    #   Apply an explicit integer dtype to the resulting expression (default is `Int64`).
     #
     # @return [Expr, Series]
     #
@@ -648,35 +632,20 @@ module Polars
     #   #         1
     #   #         2
     #   # ]
-    #
-    # @example
-    #   df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]})
-    #   df.select(Polars.arange(Polars.col("a"), Polars.col("b")))
-    #   # =>
-    #   # shape: (2, 1)
-    #   # ┌───────────┐
-    #   # │ arange    │
-    #   # │ ---       │
-    #   # │ list[i64] │
-    #   # ╞═══════════╡
-    #   # │ [1, 2]    │
-    #   # │ [2, 3]    │
-    #   # └───────────┘
-    def arange(low, high, step: 1, eager: false, dtype: nil)
-      low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
-      high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
-      range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
-      if !dtype.nil? && !["i64", Int64].include?(dtype)
-        range_expr = range_expr.cast(dtype)
-      end
+    def int_range(start, stop, step: 1, eager: false, dtype: nil)
+      start = Utils.parse_as_expression(start)
+      stop = Utils.parse_as_expression(stop)
+      dtype ||= Int64
+      dtype = dtype.to_s if dtype.is_a?(Symbol)
+      result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange")
-      if !eager
-        range_expr
-      else
-        DataFrame.new.select(range_expr.alias("arange")).to_series
+      if eager
+        return select(result).to_series
       end
+      result
     end
+    alias_method :arange, :int_range
     # Find the indexes that would sort the columns.
     #
@@ -735,15 +704,22 @@ module Polars
     #   # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
     #   # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
     def duration(
+      weeks: nil,
       days: nil,
+      hours: nil,
+      minutes: nil,
       seconds: nil,
-      nanoseconds: nil,
-      microseconds: nil,
       milliseconds: nil,
-      minutes: nil,
-      hours: nil,
-      weeks: nil
+      microseconds: nil,
+      nanoseconds: nil,
+      time_unit: "us"
     )
+      if !weeks.nil?
+        weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
+      end
+      if !days.nil?
+        days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
+      end
       if !hours.nil?
         hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
       end
@@ -762,23 +738,18 @@ module Polars
       if !nanoseconds.nil?
         nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
       end
-      if !days.nil?
-        days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
-      end
-      if !weeks.nil?
-        weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
-      end
       Utils.wrap_expr(
         _rb_duration(
+          weeks,
           days,
+          hours,
+          minutes,
           seconds,
-          nanoseconds,
-          microseconds,
           milliseconds,
-          minutes,
-          hours,
-          weeks
+          microseconds,
+          nanoseconds,
+          time_unit
         )
       )
     end
@@ -944,7 +915,8 @@ module Polars
           simplify_expression,
           slice_pushdown,
           common_subplan_elimination,
-          allow_streaming
+          allow_streaming,
+          false
         )
         prepared << ldf
       end

data/lib/polars/lazy_group_by.rb CHANGED Viewed

@@ -1,10 +1,9 @@
 module Polars
-  # Created by `df.lazy.groupby("foo")`.
+  # Created by `df.lazy.group_by("foo")`.
   class LazyGroupBy
     # @private
-    def initialize(lgb, lazyframe_class)
+    def initialize(lgb)
       @lgb = lgb
-      @lazyframe_class = lazyframe_class
     end
     # Describe the aggregation that need to be done on a group.
@@ -12,7 +11,7 @@ module Polars
     # @return [LazyFrame]
     def agg(aggs)
       rbexprs = Utils.selection_to_rbexpr_list(aggs)
-      @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
+      Utils.wrap_ldf(@lgb.agg(rbexprs))
     end
     # Get the first `n` rows of each group.
@@ -29,7 +28,7 @@ module Polars
     #       "nrs" => [1, 2, 3, 4, 5, 6]
     #     }
     #   )
-    #   df.groupby("letters").head(2).sort("letters")
+    #   df.group_by("letters").head(2).sort("letters")
     #   # =>
     #   # shape: (5, 2)
     #   # ┌─────────┬─────┐
@@ -44,7 +43,7 @@ module Polars
     #   # │ c       ┆ 2   │
     #   # └─────────┴─────┘
     def head(n = 5)
-      @lazyframe_class._from_rbldf(@lgb.head(n))
+      Utils.wrap_ldf(@lgb.head(n))
     end
     # Get the last `n` rows of each group.
@@ -61,7 +60,7 @@ module Polars
     #       "nrs" => [1, 2, 3, 4, 5, 6]
     #     }
     #   )
-    #   df.groupby("letters").tail(2).sort("letters")
+    #   df.group_by("letters").tail(2).sort("letters")
     #   # =>
     #   # shape: (5, 2)
     #   # ┌─────────┬─────┐
@@ -76,7 +75,7 @@ module Polars
     #   # │ c       ┆ 4   │
     #   # └─────────┴─────┘
     def tail(n = 5)
-      @lazyframe_class._from_rbldf(@lgb.tail(n))
+      Utils.wrap_ldf(@lgb.tail(n))
     end
     # def apply

data/lib/polars/list_expr.rb CHANGED Viewed

@@ -27,8 +27,9 @@ module Polars
     #   # │ 1   │
     #   # └─────┘
     def lengths
-      Utils.wrap_expr(_rbexpr.list_lengths)
+      Utils.wrap_expr(_rbexpr.list_len)
     end
+    alias_method :len, :lengths
     # Sum all the lists in the array.
     #
@@ -379,6 +380,7 @@ module Polars
     #   # │ x y   │
     #   # └───────┘
     def join(separator)
+      separator = Utils.parse_as_expression(separator, str_as_lit: true)
       Utils.wrap_expr(_rbexpr.list_join(separator))
     end
@@ -457,7 +459,7 @@ module Polars
     # Shift values by the given period.
     #
-    # @param periods [Integer]
+    # @param n [Integer]
     #   Number of places to shift (may be negative).
     #
     # @return [Expr]
@@ -472,8 +474,9 @@ module Polars
     #   #         [null, 1, … 3]
     #   #         [null, 10, 2]
     #   # ]
-    def shift(periods = 1)
-      Utils.wrap_expr(_rbexpr.list_shift(periods))
+    def shift(n = 1)
+      n = Utils.parse_as_expression(n)
+      Utils.wrap_expr(_rbexpr.list_shift(n))
     end
     # Slice every sublist.
@@ -568,9 +571,10 @@ module Polars
     #   # │ 1              │
     #   # │ 0              │
     #   # └────────────────┘
-    def count_match(element)
-      Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
+    def count_matches(element)
+      Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
     end
+    alias_method :count_match, :count_matches
     # Convert the series of type `List` to a series of type `Struct`.
     #
@@ -609,7 +613,7 @@ module Polars
     #   Run all expression parallel. Don't activate this blindly.
     #   Parallelism is worth it if there is enough work to do per thread.
     #
-    #   This likely should not be use in the groupby context, because we already
+    #   This likely should not be use in the group by context, because we already
     #   parallel execution per group
     #
     # @return [Expr]
@@ -624,7 +628,7 @@ module Polars
     #   # ┌─────┬─────┬────────────┐
     #   # │ a   ┆ b   ┆ rank       │
     #   # │ --- ┆ --- ┆ ---        │
-    #   # │ i64 ┆ i64 ┆ list[f32]  │
+    #   # │ i64 ┆ i64 ┆ list[f64]  │
     #   # ╞═════╪═════╪════════════╡
     #   # │ 1   ┆ 4   ┆ [1.0, 2.0] │
     #   # │ 8   ┆ 5   ┆ [2.0, 1.0] │

data/lib/polars/list_name_space.rb CHANGED Viewed

@@ -315,7 +315,7 @@ module Polars
     #   Run all expression parallel. Don't activate this blindly.
     #   Parallelism is worth it if there is enough work to do per thread.
     #
-    #   This likely should not be use in the groupby context, because we already
+    #   This likely should not be use in the group by context, because we already
     #   parallel execution per group
     #
     # @return [Series]
@@ -330,7 +330,7 @@ module Polars
     #   # ┌─────┬─────┬────────────┐
     #   # │ a   ┆ b   ┆ rank       │
     #   # │ --- ┆ --- ┆ ---        │
-    #   # │ i64 ┆ i64 ┆ list[f32]  │
+    #   # │ i64 ┆ i64 ┆ list[f64]  │
     #   # ╞═════╪═════╪════════════╡
     #   # │ 1   ┆ 4   ┆ [1.0, 2.0] │
     #   # │ 8   ┆ 5   ┆ [2.0, 1.0] │

data/lib/polars/name_expr.rb ADDED Viewed

@@ -0,0 +1,198 @@
+module Polars
+  # Namespace for expressions that operate on expression names.
+  class NameExpr
+    # @private
+    attr_accessor :_rbexpr
+    # @private
+    def initialize(expr)
+      self._rbexpr = expr._rbexpr
+    end
+    # Keep the original root name of the expression.
+    #
+    # @note
+    #   Due to implementation constraints, this method can only be called as the last
+    #   expression in a chain.
+    #
+    # @return [Expr]
+    #
+    # @example Prevent errors due to potential duplicate column names.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2],
+    #       "b" => [3, 4]
+    #     }
+    #   )
+    #   df.select((Polars.lit(10) / Polars.all).name.keep)
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌──────┬──────────┐
+    #   # │ a    ┆ b        │
+    #   # │ ---  ┆ ---      │
+    #   # │ f64  ┆ f64      │
+    #   # ╞══════╪══════════╡
+    #   # │ 10.0 ┆ 3.333333 │
+    #   # │ 5.0  ┆ 2.5      │
+    #   # └──────┴──────────┘
+    #
+    # @example Undo an alias operation.
+    #   df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 9   ┆ 3   │
+    #   # │ 18  ┆ 4   │
+    #   # └─────┴─────┘
+    def keep
+      Utils.wrap_expr(_rbexpr.name_keep)
+    end
+    # Rename the output of an expression by mapping a function over the root name.
+    #
+    # @return [Expr]
+    #
+    # @example Remove a common suffix and convert to lower case.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "A_reverse" => [3, 2, 1],
+    #       "B_reverse" => ["z", "y", "x"]
+    #     }
+    #   )
+    #   df.with_columns(
+    #     Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
+    #   )
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌───────────┬───────────┬─────┬─────┐
+    #   # │ A_reverse ┆ B_reverse ┆ a   ┆ b   │
+    #   # │ ---       ┆ ---       ┆ --- ┆ --- │
+    #   # │ i64       ┆ str       ┆ i64 ┆ str │
+    #   # ╞═══════════╪═══════════╪═════╪═════╡
+    #   # │ 3         ┆ z         ┆ 1   ┆ x   │
+    #   # │ 2         ┆ y         ┆ 2   ┆ y   │
+    #   # │ 1         ┆ x         ┆ 3   ┆ z   │
+    #   # └───────────┴───────────┴─────┴─────┘
+    def map(&f)
+      Utils.wrap_expr(_rbexpr.name_map(f))
+    end
+    # Add a prefix to the root column name of the expression.
+    #
+    # @param prefix [Object]
+    #   Prefix to add to the root column name.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3],
+    #       "b" => ["x", "y", "z"]
+    #     }
+    #   )
+    #   df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────┬─────┬───────────┬───────────┐
+    #   # │ a   ┆ b   ┆ reverse_a ┆ reverse_b │
+    #   # │ --- ┆ --- ┆ ---       ┆ ---       │
+    #   # │ i64 ┆ str ┆ i64       ┆ str       │
+    #   # ╞═════╪═════╪═══════════╪═══════════╡
+    #   # │ 1   ┆ x   ┆ 3         ┆ z         │
+    #   # │ 2   ┆ y   ┆ 2         ┆ y         │
+    #   # │ 3   ┆ z   ┆ 1         ┆ x         │
+    #   # └─────┴─────┴───────────┴───────────┘
+    def prefix(prefix)
+      Utils.wrap_expr(_rbexpr.name_prefix(prefix))
+    end
+    # Add a suffix to the root column name of the expression.
+    #
+    # @param suffix [Object]
+    #   Suffix to add to the root column name.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3],
+    #       "b" => ["x", "y", "z"]
+    #     }
+    #   )
+    #   df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────┬─────┬───────────┬───────────┐
+    #   # │ a   ┆ b   ┆ a_reverse ┆ b_reverse │
+    #   # │ --- ┆ --- ┆ ---       ┆ ---       │
+    #   # │ i64 ┆ str ┆ i64       ┆ str       │
+    #   # ╞═════╪═════╪═══════════╪═══════════╡
+    #   # │ 1   ┆ x   ┆ 3         ┆ z         │
+    #   # │ 2   ┆ y   ┆ 2         ┆ y         │
+    #   # │ 3   ┆ z   ┆ 1         ┆ x         │
+    #   # └─────┴─────┴───────────┴───────────┘
+    def suffix(suffix)
+      Utils.wrap_expr(_rbexpr.name_suffix(suffix))
+    end
+    # Make the root column name lowercase.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "ColX" => [1, 2, 3],
+    #       "ColY" => ["x", "y", "z"],
+    #     }
+    #   )
+    #   df.with_columns(Polars.all.name.to_lowercase)
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌──────┬──────┬──────┬──────┐
+    #   # │ ColX ┆ ColY ┆ colx ┆ coly │
+    #   # │ ---  ┆ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ str  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╪══════╡
+    #   # │ 1    ┆ x    ┆ 1    ┆ x    │
+    #   # │ 2    ┆ y    ┆ 2    ┆ y    │
+    #   # │ 3    ┆ z    ┆ 3    ┆ z    │
+    #   # └──────┴──────┴──────┴──────┘
+    def to_lowercase
+      Utils.wrap_expr(_rbexpr.name_to_lowercase)
+    end
+    # Make the root column name uppercase.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "ColX" => [1, 2, 3],
+    #       "ColY" => ["x", "y", "z"]
+    #     }
+    #   )
+    #   df.with_columns(Polars.all.name.to_uppercase)
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌──────┬──────┬──────┬──────┐
+    #   # │ ColX ┆ ColY ┆ COLX ┆ COLY │
+    #   # │ ---  ┆ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ str  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╪══════╡
+    #   # │ 1    ┆ x    ┆ 1    ┆ x    │
+    #   # │ 2    ┆ y    ┆ 2    ┆ y    │
+    #   # │ 3    ┆ z    ┆ 3    ┆ z    │
+    #   # └──────┴──────┴──────┴──────┘
+    def to_uppercase
+      Utils.wrap_expr(_rbexpr.name_to_uppercase)
+    end
+  end
+end

data/lib/polars/rolling_group_by.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Polars
   # A rolling grouper.
   #
   # This has an `.agg` method which will allow you to run all polars expressions in a
-  # groupby context.
+  # group by context.
   class RollingGroupBy
     def initialize(
       df,
@@ -27,7 +27,7 @@ module Polars
     def agg(aggs)
       @df.lazy
-        .groupby_rolling(
+        .group_by_rolling(
           index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
         )
         .agg(aggs)