RubyGems - polars-df - Versions diffs - 0.1.4 → 0.1.5 - Mend

polars-df 0.1.4 → 0.1.5

Files changed (32) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/Cargo.lock +70 -9
data/Cargo.toml +2 -0
data/ext/polars/Cargo.toml +6 -1
data/ext/polars/src/apply/dataframe.rs +292 -0
data/ext/polars/src/apply/mod.rs +254 -0
data/ext/polars/src/apply/series.rs +1173 -0
data/ext/polars/src/conversion.rs +100 -5
data/ext/polars/src/dataframe.rs +146 -1
data/ext/polars/src/error.rs +8 -0
data/ext/polars/src/lazy/apply.rs +34 -2
data/ext/polars/src/lazy/dataframe.rs +72 -1
data/ext/polars/src/lazy/dsl.rs +38 -0
data/ext/polars/src/lib.rs +165 -1
data/ext/polars/src/series.rs +296 -0
data/ext/polars/src/utils.rs +25 -0
data/lib/polars/convert.rb +100 -0
data/lib/polars/data_frame.rb +1457 -56
data/lib/polars/dynamic_group_by.rb +49 -0
data/lib/polars/expr.rb +258 -9
data/lib/polars/functions.rb +192 -3
data/lib/polars/group_by.rb +43 -3
data/lib/polars/io.rb +19 -3
data/lib/polars/lazy_frame.rb +792 -22
data/lib/polars/lazy_functions.rb +561 -27
data/lib/polars/rolling_group_by.rb +35 -0
data/lib/polars/series.rb +132 -10
data/lib/polars/utils.rb +16 -0
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +9 -1
metadata +9 -3

data/lib/polars/dynamic_group_by.rb ADDED Viewed

@@ -0,0 +1,49 @@
+module Polars
+  # A dynamic grouper.
+  #
+  # This has an `.agg` method which allows you to run all polars expressions in a
+  # groupby context.
+  class DynamicGroupBy
+    def initialize(
+      df,
+      index_column,
+      every,
+      period,
+      offset,
+      truncate,
+      include_boundaries,
+      closed,
+      by
+    )
+      period = Utils._timedelta_to_pl_duration(period)
+      offset = Utils._timedelta_to_pl_duration(offset)
+      every = Utils._timedelta_to_pl_duration(every)
+      @df = df
+      @time_column = index_column
+      @every = every
+      @period = period
+      @offset = offset
+      @truncate = truncate
+      @include_boundaries = include_boundaries
+      @closed = closed
+      @by = by
+    end
+    def agg(aggs)
+      @df.lazy
+        .groupby_dynamic(
+          @time_column,
+          every: @every,
+          period: @period,
+          offset: @offset,
+          truncate: @truncate,
+          include_boundaries: @include_boundaries,
+          closed: @closed,
+          by: @by
+        )
+        .agg(aggs)
+        .collect(no_optimization: true, string_cache: false)
+    end
+  end
+end

data/lib/polars/expr.rb CHANGED Viewed

@@ -432,8 +432,34 @@ module Polars
       wrap_expr(_rbexpr.suffix(suffix))
     end
-    # def map_alias
-    # end
+    # Rename the output of an expression by mapping a function over the root name.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "A" => [1, 2],
+    #       "B" => [3, 4]
+    #     }
+    #   )
+    #   df.select(
+    #     Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
+    #   )
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌───────────┬───────────┐
+    #   # │ A_reverse ┆ B_reverse │
+    #   # │ ---       ┆ ---       │
+    #   # │ i64       ┆ i64       │
+    #   # ╞═══════════╪═══════════╡
+    #   # │ 2         ┆ 4         │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 1         ┆ 3         │
+    #   # └───────────┴───────────┘
+    def map_alias(&f)
+      Utils.wrap_expr(_rbexpr.map_alias(f))
+    end
     # Negate a boolean expression.
     #
@@ -2575,14 +2601,98 @@ module Polars
     #   # ╞══════╪════════╡
     #   # │ 1    ┆ 0      │
     #   # └──────┴────────┘
-    # def map(return_dtype: nil, agg_list: false, &block)
+    # def map(return_dtype: nil, agg_list: false, &f)
     #   if !return_dtype.nil?
     #     return_dtype = Utils.rb_type_to_dtype(return_dtype)
     #   end
-    #   wrap_expr(_rbexpr.map(return_dtype, agg_list, &block))
+    #   wrap_expr(_rbexpr.map(f, return_dtype, agg_list))
     # end
-    # def apply
+    # Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
+    #
+    # Depending on the context it has the following behavior:
+    #
+    # * Selection
+    #     Expects `f` to be of type Callable[[Any], Any].
+    #     Applies a Ruby function over each individual value in the column.
+    # * GroupBy
+    #     Expects `f` to be of type Callable[[Series], Series].
+    #     Applies a Ruby function over each group.
+    #
+    # Implementing logic using a Ruby function is almost always _significantly_
+    # slower and more memory intensive than implementing the same logic using
+    # the native expression API because:
+    #
+    # - The native expression engine runs in Rust; UDFs run in Ruby.
+    # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
+    # - Polars-native expressions can be parallelised (UDFs cannot).
+    # - Polars-native expressions can be logically optimised (UDFs cannot).
+    #
+    # Wherever possible you should strongly prefer the native expression API
+    # to achieve the best performance.
+    #
+    # @param return_dtype [Symbol]
+    #   Dtype of the output Series.
+    #   If not set, polars will assume that
+    #   the dtype remains unchanged.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 1],
+    #       "b" => ["a", "b", "c", "c"]
+    #     }
+    #   )
+    #
+    # @example In a selection context, the function is applied by row.
+    #   df.with_column(
+    #     Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
+    #   )
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬─────┬───────────┐
+    #   # │ a   ┆ b   ┆ a_times_2 │
+    #   # │ --- ┆ --- ┆ ---       │
+    #   # │ i64 ┆ str ┆ i64       │
+    #   # ╞═════╪═════╪═══════════╡
+    #   # │ 1   ┆ a   ┆ 2         │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ b   ┆ 4         │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ c   ┆ 6         │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 1   ┆ c   ┆ 2         │
+    #   # └─────┴─────┴───────────┘
+    #
+    # @example In a GroupBy context the function is applied by group:
+    #   df.lazy
+    #     .groupby("b", maintain_order: true)
+    #     .agg(
+    #       [
+    #         Polars.col("a").apply { |x| x.sum }
+    #       ]
+    #     )
+    #     .collect
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ b   ┆ a   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ a   ┆ 1   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ b   ┆ 2   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ c   ┆ 4   │
+    #   # └─────┴─────┘
+    # def apply(return_dtype: nil, &f)
+    #   wrap_f = lambda do |x|
+    #     x.apply(return_dtype: return_dtype, &f)
+    #   end
+    #   map(agg_list: true, return_dtype: return_dtype, &wrap_f)
     # end
     # Explode a list or utf8 Series. This means that every item is expanded to a new
@@ -2898,8 +3008,49 @@ module Polars
       end
     end
-    # def _hash
-    # end
+    # Hash the elements in the selection.
+    #
+    # The hash value is of type `:u64`.
+    #
+    # @param seed [Integer]
+    #   Random seed parameter. Defaults to 0.
+    # @param seed_1 [Integer]
+    #   Random seed parameter. Defaults to `seed` if not set.
+    # @param seed_2 [Integer]
+    #   Random seed parameter. Defaults to `seed` if not set.
+    # @param seed_3 [Integer]
+    #   Random seed parameter. Defaults to `seed` if not set.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, nil],
+    #       "b" => ["x", nil, "z"]
+    #     }
+    #   )
+    #   df.with_column(Polars.all._hash(10, 20, 30, 40))
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌──────────────────────┬──────────────────────┐
+    #   # │ a                    ┆ b                    │
+    #   # │ ---                  ┆ ---                  │
+    #   # │ u64                  ┆ u64                  │
+    #   # ╞══════════════════════╪══════════════════════╡
+    #   # │ 4629889412789719550  ┆ 6959506404929392568  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 16386608652769605760 ┆ 11638928888656214026 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 11638928888656214026 ┆ 11040941213715918520 │
+    #   # └──────────────────────┴──────────────────────┘
+    def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
+      k0 = seed
+      k1 = seed_1.nil? ? seed : seed_1
+      k2 = seed_2.nil? ? seed : seed_2
+      k3 = seed_3.nil? ? seed : seed_3
+      wrap_expr(_rbexpr._hash(k0, k1, k2, k3))
+    end
     # Reinterpret the underlying bits as a signed/unsigned integer.
     #
@@ -2937,7 +3088,40 @@ module Polars
       wrap_expr(_rbexpr.reinterpret(signed))
     end
-    # def _inspect
+    # Print the value that this expression evaluates to and pass on the value.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
+    #   df.select(Polars.col("foo").cumsum._inspect("value is: %s").alias("bar"))
+    #   # =>
+    #   # value is: shape: (3,)
+    #   # Series: 'foo' [i64]
+    #   # [
+    #   #     1
+    #   #     2
+    #   #     4
+    #   # ]
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ bar │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 1   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 2   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 4   │
+    #   # └─────┘
+    # def _inspect(fmt = "%s")
+    #   inspect = lambda do |s|
+    #     puts(fmt % [s])
+    #     s
+    #   end
+    #   map(return_dtype: nil, agg_list: true, &inspect)
     # end
     # Fill nulls with linear interpolation over missing values.
@@ -3721,7 +3905,72 @@ module Polars
       )
     end
-    # def rolling_apply
+    # Apply a custom rolling window function.
+    #
+    # Prefer the specific rolling window functions over this one, as they are faster.
+    #
+    # Prefer:
+    # * rolling_min
+    # * rolling_max
+    # * rolling_mean
+    # * rolling_sum
+    #
+    # @param window_size [Integer]
+    #   The length of the window.
+    # @param weights [Object]
+    #   An optional slice with the same length as the window that will be multiplied
+    #   elementwise with the values in the window.
+    # @param min_periods [Integer]
+    #   The number of values in the window that should be non-null before computing
+    #   a result. If nil, it will be set equal to window size.
+    # @param center [Boolean]
+    #   Set the labels at the center of the window
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "A" => [1.0, 2.0, 9.0, 2.0, 13.0]
+    #     }
+    #   )
+    #   df.select(
+    #     [
+    #       Polars.col("A").rolling_apply(window_size: 3) { |s| s.std }
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (5, 1)
+    #   # ┌──────────┐
+    #   # │ A        │
+    #   # │ ---      │
+    #   # │ f64      │
+    #   # ╞══════════╡
+    #   # │ null     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ null     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 4.358899 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 4.041452 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 5.567764 │
+    #   # └──────────┘
+    # def rolling_apply(
+    #   window_size:,
+    #   weights: nil,
+    #   min_periods: nil,
+    #   center: false,
+    #   &function
+    # )
+    #   if min_periods.nil?
+    #     min_periods = window_size
+    #   end
+    #   wrap_expr(
+    #     _rbexpr.rolling_apply(
+    #       function, window_size, weights, min_periods, center
+    #     )
+    #   )
     # end
     # Compute a rolling skew.

data/lib/polars/functions.rb CHANGED Viewed

@@ -199,12 +199,201 @@ module Polars
       dt_range
     end
-    # def cut
-    # end
+    # Bin values into discrete values.
+    #
+    # @param s [Series]
+    #   Series to bin.
+    # @param bins [Array]
+    #   Bins to create.
+    # @param labels [Array]
+    #   Labels to assign to the bins. If given the length of labels must be
+    #   len(bins) + 1.
+    # @param break_point_label [String]
+    #   Name given to the breakpoint column.
+    # @param category_label [String]
+    #   Name given to the category column.
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   This functionality is experimental and may change without it being considered a
+    #   breaking change.
+    #
+    # @example
+    #   a = Polars::Series.new("a", 13.times.map { |i| (-30 + i * 5) / 10.0 })
+    #   Polars.cut(a, [-1, 1])
+    #   # =>
+    #   # shape: (12, 3)
+    #   # ┌──────┬─────────────┬──────────────┐
+    #   # │ a    ┆ break_point ┆ category     │
+    #   # │ ---  ┆ ---         ┆ ---          │
+    #   # │ f64  ┆ f64         ┆ cat          │
+    #   # ╞══════╪═════════════╪══════════════╡
+    #   # │ -3.0 ┆ -1.0        ┆ (-inf, -1.0] │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ -2.5 ┆ -1.0        ┆ (-inf, -1.0] │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ -2.0 ┆ -1.0        ┆ (-inf, -1.0] │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ -1.5 ┆ -1.0        ┆ (-inf, -1.0] │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ ...  ┆ ...         ┆ ...          │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 1.0  ┆ 1.0         ┆ (-1.0, 1.0]  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 1.5  ┆ inf         ┆ (1.0, inf]   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2.0  ┆ inf         ┆ (1.0, inf]   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2.5  ┆ inf         ┆ (1.0, inf]   │
+    #   # └──────┴─────────────┴──────────────┘
+    # def cut(
+    #   s,
+    #   bins,
+    #   labels: nil,
+    #   break_point_label: "break_point",
+    #   category_label: "category"
+    # )
+    #   var_nm = s.name
-    # def align_frames
+    #   cuts_df = DataFrame.new(
+    #     [
+    #       Series.new(
+    #         break_point_label, bins, dtype: :f64
+    #       ).extend_constant(Float::INFINITY, 1)
+    #     ]
+    #   )
+    #   if labels
+    #     if labels.length != bins.length + 1
+    #       raise ArgumentError, "expected more labels"
+    #     end
+    #     cuts_df = cuts_df.with_column(Series.new(category_label, labels))
+    #   else
+    #     cuts_df = cuts_df.with_column(
+    #       Polars.format(
+    #         "({}, {}]",
+    #         Polars.col(break_point_label).shift_and_fill(1, -Float::INFINITY),
+    #         Polars.col(break_point_label)
+    #       ).alias(category_label)
+    #     )
+    #   end
+    #   cuts_df = cuts_df.with_column(Polars.col(category_label).cast(:cat))
+    #   s.cast(:f64)
+    #     .sort
+    #     .to_frame
+    #     .join_asof(
+    #       cuts_df,
+    #       left_on: var_nm,
+    #       right_on: break_point_label,
+    #       strategy: "forward"
+    #     )
     # end
+    # Align a sequence of frames using the uique values from one or more columns as a key.
+    #
+    # Frames that do not contain the given key values have rows injected (with nulls
+    # filling the non-key columns), and each resulting frame is sorted by the key.
+    #
+    # The original column order of input frames is not changed unless ``select`` is
+    # specified (in which case the final column order is determined from that).
+    #
+    # Note that this does not result in a joined frame - you receive the same number
+    # of frames back that you passed in, but each is now aligned by key and has
+    # the same number of rows.
+    #
+    # @param frames [Array]
+    #   Sequence of DataFrames or LazyFrames.
+    # @param on [Object]
+    #   One or more columns whose unique values will be used to align the frames.
+    # @param select [Object]
+    #   Optional post-alignment column select to constrain and/or order
+    #   the columns returned from the newly aligned frames.
+    # @param reverse [Object]
+    #   Sort the alignment column values in descending order; can be a single
+    #   boolean or a list of booleans associated with each column in `on`.
+    #
+    # @return [Object]
+    #
+    # @example
+    #   df1 = Polars::DataFrame.new(
+    #     {
+    #       "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
+    #       "x" => [3.5, 4.0, 1.0],
+    #       "y" => [10.0, 2.5, 1.5]
+    #     }
+    #   )
+    #   df2 = Polars::DataFrame.new(
+    #     {
+    #       "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
+    #       "x" => [8.0, 1.0, 3.5],
+    #       "y" => [1.5, 12.0, 5.0]
+    #     }
+    #   )
+    #   df3 = Polars::DataFrame.new(
+    #     {
+    #       "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
+    #       "x" => [2.0, 5.0],
+    #       "y" => [2.5, 2.0]
+    #     }
+    #   )
+    #   af1, af2, af3 = Polars.align_frames(
+    #     df1, df2, df3, on: "dt", select: ["x", "y"]
+    #   )
+    #   (af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌───────┐
+    #   # │ dot   │
+    #   # │ ---   │
+    #   # │ f64   │
+    #   # ╞═══════╡
+    #   # │ 0.0   │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │ 167.5 │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │ 47.0  │
+    #   # └───────┘
+    def align_frames(
+      *frames,
+      on:,
+      select: nil,
+      reverse: false
+    )
+      if frames.empty?
+        return []
+      elsif frames.map(&:class).uniq.length != 1
+        raise TypeError, "Input frames must be of a consistent type (all LazyFrame or all DataFrame)"
+      end
+      # establish the superset of all "on" column values, sort, and cache
+      eager = frames[0].is_a?(DataFrame)
+      alignment_frame = (
+        concat(frames.map { |df| df.lazy.select(on) })
+          .unique(maintain_order: false)
+          .sort(on, reverse: reverse)
+      )
+      alignment_frame = (
+        eager ? alignment_frame.collect.lazy : alignment_frame.cache
+      )
+      # finally, align all frames
+      aligned_frames =
+        frames.map do |df|
+          alignment_frame.join(
+            df.lazy,
+            on: alignment_frame.columns,
+            how: "left"
+          ).select(df.columns)
+        end
+      if !select.nil?
+        aligned_frames = aligned_frames.map { |df| df.select(select) }
+      end
+      eager ? aligned_frames.map(&:collect) : aligned_frames
+    end
     # Return a new Series of given length and type, filled with ones.
     #
     # @param n [Integer]

data/lib/polars/group_by.rb CHANGED Viewed

@@ -12,7 +12,48 @@ module Polars
       self.maintain_order = maintain_order
     end
-    # def apply
+    # Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
+    #
+    # Implementing logic using a Ruby function is almost always _significantly_
+    # slower and more memory intensive than implementing the same logic using
+    # the native expression API because:
+    # - The native expression engine runs in Rust; UDFs run in Ruby.
+    # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
+    # - Polars-native expressions can be parallelised (UDFs cannot).
+    # - Polars-native expressions can be logically optimised (UDFs cannot).
+    #
+    # Wherever possible you should strongly prefer the native expression API
+    # to achieve the best performance.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "id" => [0, 1, 2, 3, 4],
+    #       "color" => ["red", "green", "green", "red", "red"],
+    #       "shape" => ["square", "triangle", "square", "triangle", "square"]
+    #     }
+    #   )
+    #   df.groupby("color").apply { |group_df| group_df.sample(2) }
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬───────┬──────────┐
+    #   # │ id  ┆ color ┆ shape    │
+    #   # │ --- ┆ ---   ┆ ---      │
+    #   # │ i64 ┆ str   ┆ str      │
+    #   # ╞═════╪═══════╪══════════╡
+    #   # │ 1   ┆ green ┆ triangle │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ green ┆ square   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 4   ┆ red   ┆ square   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ red   ┆ triangle │
+    #   # └─────┴───────┴──────────┘
+    # def apply(&f)
+    #   _dataframe_class._from_rbdf(_df.groupby_apply(by, f))
     # end
     # Use multiple aggregations on columns.
@@ -182,8 +223,7 @@ module Polars
       _dataframe_class._from_rbdf(df._df)
     end
-    # def pivot
-    # end
+    # pivot is deprecated
     # Aggregate the first values in the group.
     #

data/lib/polars/io.rb CHANGED Viewed

@@ -59,7 +59,7 @@ module Polars
     #   Lossy means that invalid utf8 values are replaced with `�`
     #   characters. When using other encodings than `utf8` or
     #   `utf8-lossy`, the input is first decoded im memory with
-    #   python.
+    #   Ruby.
     # @param low_memory [Boolean]
     #   Reduce memory usage at expense of performance.
     # @param rechunk [Boolean]
@@ -451,8 +451,24 @@ module Polars
       )
     end
-    # def read_avro
-    # end
+    # Read into a DataFrame from Apache Avro format.
+    #
+    # @param file [Object]
+    #   Path to a file or a file-like object.
+    # @param columns [Object]
+    #   Columns to select. Accepts a list of column indices (starting at zero) or a list
+    #   of column names.
+    # @param n_rows [Integer]
+    #   Stop reading from Apache Avro file after reading ``n_rows``.
+    #
+    # @return [DataFrame]
+    def read_avro(file, columns: nil, n_rows: nil)
+      if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
+        file = Utils.format_path(file)
+      end
+      DataFrame._read_avro(file, n_rows: n_rows, columns: columns)
+    end
     # Read into a DataFrame from Arrow IPC (Feather v2) file.
     #