polars-df 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion.rs +35 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/lazy/dataframe.rs +3 -3
- data/ext/polars/src/lazy/dsl.rs +59 -2
- data/ext/polars/src/lib.rs +151 -10
- data/ext/polars/src/series.rs +182 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2284 -137
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +612 -7
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1261 -67
- data/lib/polars/lazy_functions.rb +288 -10
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +1476 -212
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +43 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +7 -10
- metadata +9 -2
| @@ -1,5 +1,8 @@ | |
| 1 1 | 
             
            module Polars
         | 
| 2 2 | 
             
              module LazyFunctions
         | 
| 3 | 
            +
                # Return an expression representing a column in a DataFrame.
         | 
| 4 | 
            +
                #
         | 
| 5 | 
            +
                # @return [Expr]
         | 
| 3 6 | 
             
                def col(name)
         | 
| 4 7 | 
             
                  if name.is_a?(Series)
         | 
| 5 8 | 
             
                    name = name.to_a
         | 
| @@ -21,10 +24,42 @@ module Polars | |
| 21 24 | 
             
                  end
         | 
| 22 25 | 
             
                end
         | 
| 23 26 |  | 
| 27 | 
            +
                # Alias for an element in evaluated in an `eval` expression.
         | 
| 28 | 
            +
                #
         | 
| 29 | 
            +
                # @return [Expr]
         | 
| 30 | 
            +
                #
         | 
| 31 | 
            +
                # @example A horizontal rank computation by taking the elements of a list
         | 
| 32 | 
            +
                #   df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
         | 
| 33 | 
            +
                #   df.with_column(
         | 
| 34 | 
            +
                #     Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
         | 
| 35 | 
            +
                #   )
         | 
| 36 | 
            +
                #   # =>
         | 
| 37 | 
            +
                #   # shape: (3, 3)
         | 
| 38 | 
            +
                #   # ┌─────┬─────┬────────────┐
         | 
| 39 | 
            +
                #   # │ a   ┆ b   ┆ rank       │
         | 
| 40 | 
            +
                #   # │ --- ┆ --- ┆ ---        │
         | 
| 41 | 
            +
                #   # │ i64 ┆ i64 ┆ list[f32]  │
         | 
| 42 | 
            +
                #   # ╞═════╪═════╪════════════╡
         | 
| 43 | 
            +
                #   # │ 1   ┆ 4   ┆ [1.0, 2.0] │
         | 
| 44 | 
            +
                #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
         | 
| 45 | 
            +
                #   # │ 8   ┆ 5   ┆ [2.0, 1.0] │
         | 
| 46 | 
            +
                #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
         | 
| 47 | 
            +
                #   # │ 3   ┆ 2   ┆ [2.0, 1.0] │
         | 
| 48 | 
            +
                #   # └─────┴─────┴────────────┘
         | 
| 24 49 | 
             
                def element
         | 
| 25 50 | 
             
                  col("")
         | 
| 26 51 | 
             
                end
         | 
| 27 52 |  | 
| 53 | 
            +
                # Count the number of values in this column/context.
         | 
| 54 | 
            +
                #
         | 
| 55 | 
            +
                # @param column [String, Series, nil]
         | 
| 56 | 
            +
                #     If dtype is:
         | 
| 57 | 
            +
                #
         | 
| 58 | 
            +
                #     * `Series` : count the values in the series.
         | 
| 59 | 
            +
                #     * `String` : count the values in this column.
         | 
| 60 | 
            +
                #     * `None` : count the number of values in this context.
         | 
| 61 | 
            +
                #
         | 
| 62 | 
            +
                # @return [Expr, Integer]
         | 
| 28 63 | 
             
                def count(column = nil)
         | 
| 29 64 | 
             
                  if column.nil?
         | 
| 30 65 | 
             
                    return Utils.wrap_expr(RbExpr.count)
         | 
| @@ -37,9 +72,16 @@ module Polars | |
| 37 72 | 
             
                  end
         | 
| 38 73 | 
             
                end
         | 
| 39 74 |  | 
| 40 | 
            -
                #  | 
| 41 | 
            -
                # | 
| 75 | 
            +
                # Aggregate to list.
         | 
| 76 | 
            +
                #
         | 
| 77 | 
            +
                # @return [Expr]
         | 
| 78 | 
            +
                def to_list(name)
         | 
| 79 | 
            +
                  col(name).list
         | 
| 80 | 
            +
                end
         | 
| 42 81 |  | 
| 82 | 
            +
                # Get the standard deviation.
         | 
| 83 | 
            +
                #
         | 
| 84 | 
            +
                # @return [Object]
         | 
| 43 85 | 
             
                def std(column, ddof: 1)
         | 
| 44 86 | 
             
                  if column.is_a?(Series)
         | 
| 45 87 | 
             
                    column.std(ddof: ddof)
         | 
| @@ -48,6 +90,9 @@ module Polars | |
| 48 90 | 
             
                  end
         | 
| 49 91 | 
             
                end
         | 
| 50 92 |  | 
| 93 | 
            +
                # Get the variance.
         | 
| 94 | 
            +
                #
         | 
| 95 | 
            +
                # @return [Object]
         | 
| 51 96 | 
             
                def var(column, ddof: 1)
         | 
| 52 97 | 
             
                  if column.is_a?(Series)
         | 
| 53 98 | 
             
                    column.var(ddof: ddof)
         | 
| @@ -56,6 +101,16 @@ module Polars | |
| 56 101 | 
             
                  end
         | 
| 57 102 | 
             
                end
         | 
| 58 103 |  | 
| 104 | 
            +
                # Get the maximum value.
         | 
| 105 | 
            +
                #
         | 
| 106 | 
            +
                # @param column [Object]
         | 
| 107 | 
            +
                #   Column(s) to be used in aggregation. Will lead to different behavior based on
         | 
| 108 | 
            +
                #   the input:
         | 
| 109 | 
            +
                #
         | 
| 110 | 
            +
                #   - [String, Series] -> aggregate the maximum value of that column.
         | 
| 111 | 
            +
                #   - [Array<Expr>] -> aggregate the maximum value horizontally.
         | 
| 112 | 
            +
                #
         | 
| 113 | 
            +
                # @return [Expr, Object]
         | 
| 59 114 | 
             
                def max(column)
         | 
| 60 115 | 
             
                  if column.is_a?(Series)
         | 
| 61 116 | 
             
                    column.max
         | 
| @@ -68,6 +123,16 @@ module Polars | |
| 68 123 | 
             
                  end
         | 
| 69 124 | 
             
                end
         | 
| 70 125 |  | 
| 126 | 
            +
                # Get the minimum value.
         | 
| 127 | 
            +
                #
         | 
| 128 | 
            +
                # @param column [Object]
         | 
| 129 | 
            +
                #   Column(s) to be used in aggregation. Will lead to different behavior based on
         | 
| 130 | 
            +
                #   the input:
         | 
| 131 | 
            +
                #
         | 
| 132 | 
            +
                #   - [String, Series] -> aggregate the minimum value of that column.
         | 
| 133 | 
            +
                #   - [Array<Expr>] -> aggregate the minimum value horizontally.
         | 
| 134 | 
            +
                #
         | 
| 135 | 
            +
                # @return [Expr, Object]
         | 
| 71 136 | 
             
                def min(column)
         | 
| 72 137 | 
             
                  if column.is_a?(Series)
         | 
| 73 138 | 
             
                    column.min
         | 
| @@ -80,6 +145,9 @@ module Polars | |
| 80 145 | 
             
                  end
         | 
| 81 146 | 
             
                end
         | 
| 82 147 |  | 
| 148 | 
            +
                # Sum values in a column/Series, or horizontally across list of columns/expressions.
         | 
| 149 | 
            +
                #
         | 
| 150 | 
            +
                # @return [Object]
         | 
| 83 151 | 
             
                def sum(column)
         | 
| 84 152 | 
             
                  if column.is_a?(Series)
         | 
| 85 153 | 
             
                    column.sum
         | 
| @@ -94,6 +162,9 @@ module Polars | |
| 94 162 | 
             
                  end
         | 
| 95 163 | 
             
                end
         | 
| 96 164 |  | 
| 165 | 
            +
                # Get the mean value.
         | 
| 166 | 
            +
                #
         | 
| 167 | 
            +
                # @return [Expr, Float]
         | 
| 97 168 | 
             
                def mean(column)
         | 
| 98 169 | 
             
                  if column.is_a?(Series)
         | 
| 99 170 | 
             
                    column.mean
         | 
| @@ -102,10 +173,16 @@ module Polars | |
| 102 173 | 
             
                  end
         | 
| 103 174 | 
             
                end
         | 
| 104 175 |  | 
| 176 | 
            +
                # Get the mean value.
         | 
| 177 | 
            +
                #
         | 
| 178 | 
            +
                # @return [Expr, Float]
         | 
| 105 179 | 
             
                def avg(column)
         | 
| 106 180 | 
             
                  mean(column)
         | 
| 107 181 | 
             
                end
         | 
| 108 182 |  | 
| 183 | 
            +
                # Get the median value.
         | 
| 184 | 
            +
                #
         | 
| 185 | 
            +
                # @return [Object]
         | 
| 109 186 | 
             
                def median(column)
         | 
| 110 187 | 
             
                  if column.is_a?(Series)
         | 
| 111 188 | 
             
                    column.median
         | 
| @@ -117,6 +194,9 @@ module Polars | |
| 117 194 | 
             
                # def n_unique
         | 
| 118 195 | 
             
                # end
         | 
| 119 196 |  | 
| 197 | 
            +
                # Get the first value.
         | 
| 198 | 
            +
                #
         | 
| 199 | 
            +
                # @return [Object]
         | 
| 120 200 | 
             
                def first(column = nil)
         | 
| 121 201 | 
             
                  if column.nil?
         | 
| 122 202 | 
             
                    return Utils.wrap_expr(RbExpr.first)
         | 
| @@ -142,7 +222,20 @@ module Polars | |
| 142 222 | 
             
                # def tail
         | 
| 143 223 | 
             
                # end
         | 
| 144 224 |  | 
| 225 | 
            +
                # Return an expression representing a literal value.
         | 
| 226 | 
            +
                #
         | 
| 227 | 
            +
                # @return [Expr]
         | 
| 145 228 | 
             
                def lit(value)
         | 
| 229 | 
            +
                  if value.is_a?(Polars::Series)
         | 
| 230 | 
            +
                    name = value.name
         | 
| 231 | 
            +
                    value = value._s
         | 
| 232 | 
            +
                    e = Utils.wrap_expr(RbExpr.lit(value))
         | 
| 233 | 
            +
                    if name == ""
         | 
| 234 | 
            +
                      return e
         | 
| 235 | 
            +
                    end
         | 
| 236 | 
            +
                    return e.alias(name)
         | 
| 237 | 
            +
                  end
         | 
| 238 | 
            +
             | 
| 146 239 | 
             
                  Utils.wrap_expr(RbExpr.lit(value))
         | 
| 147 240 | 
             
                end
         | 
| 148 241 |  | 
| @@ -164,6 +257,9 @@ module Polars | |
| 164 257 | 
             
                # def apply
         | 
| 165 258 | 
             
                # end
         | 
| 166 259 |  | 
| 260 | 
            +
                # Accumulate over multiple columns horizontally/ row wise with a left fold.
         | 
| 261 | 
            +
                #
         | 
| 262 | 
            +
                # @return [Expr]
         | 
| 167 263 | 
             
                def fold(acc, f, exprs)
         | 
| 168 264 | 
             
                  acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
         | 
| 169 265 | 
             
                  if exprs.is_a?(Expr)
         | 
| @@ -189,6 +285,30 @@ module Polars | |
| 189 285 | 
             
                # def exclude
         | 
| 190 286 | 
             
                # end
         | 
| 191 287 |  | 
| 288 | 
            +
                # Do one of two things.
         | 
| 289 | 
            +
                #
         | 
| 290 | 
            +
                # * function can do a columnwise or elementwise AND operation
         | 
| 291 | 
            +
                # * a wildcard column selection
         | 
| 292 | 
            +
                #
         | 
| 293 | 
            +
                # @param name [Object]
         | 
| 294 | 
            +
                #   If given this function will apply a bitwise & on the columns.
         | 
| 295 | 
            +
                #
         | 
| 296 | 
            +
                # @return [Expr]
         | 
| 297 | 
            +
                #
         | 
| 298 | 
            +
                # @example Sum all columns
         | 
| 299 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 300 | 
            +
                #     {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
         | 
| 301 | 
            +
                #   )
         | 
| 302 | 
            +
                #   df.select(Polars.all.sum)
         | 
| 303 | 
            +
                #   # =>
         | 
| 304 | 
            +
                #   # shape: (1, 3)
         | 
| 305 | 
            +
                #   # ┌─────┬──────┬─────┐
         | 
| 306 | 
            +
                #   # │ a   ┆ b    ┆ c   │
         | 
| 307 | 
            +
                #   # │ --- ┆ ---  ┆ --- │
         | 
| 308 | 
            +
                #   # │ i64 ┆ str  ┆ i64 │
         | 
| 309 | 
            +
                #   # ╞═════╪══════╪═════╡
         | 
| 310 | 
            +
                #   # │ 6   ┆ null ┆ 3   │
         | 
| 311 | 
            +
                #   # └─────┴──────┴─────┘
         | 
| 192 312 | 
             
                def all(name = nil)
         | 
| 193 313 | 
             
                  if name.nil?
         | 
| 194 314 | 
             
                    col("*")
         | 
| @@ -205,6 +325,26 @@ module Polars | |
| 205 325 | 
             
                # def quantile
         | 
| 206 326 | 
             
                # end
         | 
| 207 327 |  | 
| 328 | 
            +
                # Create a range expression (or Series).
         | 
| 329 | 
            +
                #
         | 
| 330 | 
            +
                # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
         | 
| 331 | 
            +
                # range size is equal to the length of the DataFrame you are collecting.
         | 
| 332 | 
            +
                #
         | 
| 333 | 
            +
                # @param low [Integer, Expr, Series]
         | 
| 334 | 
            +
                #   Lower bound of range.
         | 
| 335 | 
            +
                # @param high [Integer, Expr, Series]
         | 
| 336 | 
            +
                #   Upper bound of range.
         | 
| 337 | 
            +
                # @param step [Integer]
         | 
| 338 | 
            +
                #   Step size of the range.
         | 
| 339 | 
            +
                # @param eager [Boolean]
         | 
| 340 | 
            +
                #   If eager evaluation is `True`, a Series is returned instead of an Expr.
         | 
| 341 | 
            +
                # @param dtype [Symbol]
         | 
| 342 | 
            +
                #   Apply an explicit integer dtype to the resulting expression (default is Int64).
         | 
| 343 | 
            +
                #
         | 
| 344 | 
            +
                # @return [Expr, Series]
         | 
| 345 | 
            +
                #
         | 
| 346 | 
            +
                # @example
         | 
| 347 | 
            +
                #   df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect
         | 
| 208 348 | 
             
                def arange(low, high, step: 1, eager: false, dtype: nil)
         | 
| 209 349 | 
             
                  low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
         | 
| 210 350 | 
             
                  high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
         | 
| @@ -233,6 +373,9 @@ module Polars | |
| 233 373 | 
             
                # def format
         | 
| 234 374 | 
             
                # end
         | 
| 235 375 |  | 
| 376 | 
            +
                # Concat the arrays in a Series dtype List in linear time.
         | 
| 377 | 
            +
                #
         | 
| 378 | 
            +
                # @return [Expr]
         | 
| 236 379 | 
             
                def concat_list(exprs)
         | 
| 237 380 | 
             
                  exprs = Utils.selection_to_rbexpr_list(exprs)
         | 
| 238 381 | 
             
                  Utils.wrap_expr(RbExpr.concat_lst(exprs))
         | 
| @@ -241,17 +384,132 @@ module Polars | |
| 241 384 | 
             
                # def collect_all
         | 
| 242 385 | 
             
                # end
         | 
| 243 386 |  | 
| 244 | 
            -
                #  | 
| 245 | 
            -
                # | 
| 387 | 
            +
                # Run polars expressions without a context.
         | 
| 388 | 
            +
                #
         | 
| 389 | 
            +
                # @return [DataFrame]
         | 
| 390 | 
            +
                def select(exprs)
         | 
| 391 | 
            +
                  DataFrame.new([]).select(exprs)
         | 
| 392 | 
            +
                end
         | 
| 246 393 |  | 
| 247 | 
            -
                #  | 
| 248 | 
            -
                # | 
| 394 | 
            +
                # Collect several columns into a Series of dtype Struct.
         | 
| 395 | 
            +
                #
         | 
| 396 | 
            +
                # @param exprs [Object]
         | 
| 397 | 
            +
                #   Columns/Expressions to collect into a Struct
         | 
| 398 | 
            +
                # @param eager [Boolean]
         | 
| 399 | 
            +
                #   Evaluate immediately
         | 
| 400 | 
            +
                #
         | 
| 401 | 
            +
                # @return [Object]
         | 
| 402 | 
            +
                #
         | 
| 403 | 
            +
                # @example
         | 
| 404 | 
            +
                #   Polars::DataFrame.new(
         | 
| 405 | 
            +
                #     {
         | 
| 406 | 
            +
                #       "int" => [1, 2],
         | 
| 407 | 
            +
                #       "str" => ["a", "b"],
         | 
| 408 | 
            +
                #       "bool" => [true, nil],
         | 
| 409 | 
            +
                #       "list" => [[1, 2], [3]],
         | 
| 410 | 
            +
                #     }
         | 
| 411 | 
            +
                #   ).select([Polars.struct(Polars.all()).alias("my_struct")])
         | 
| 412 | 
            +
                #   # =>
         | 
| 413 | 
            +
                #   # shape: (2, 1)
         | 
| 414 | 
            +
                #   # ┌─────────────────────┐
         | 
| 415 | 
            +
                #   # │ my_struct           │
         | 
| 416 | 
            +
                #   # │ ---                 │
         | 
| 417 | 
            +
                #   # │ struct[4]           │
         | 
| 418 | 
            +
                #   # ╞═════════════════════╡
         | 
| 419 | 
            +
                #   # │ {1,"a",true,[1, 2]} │
         | 
| 420 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
         | 
| 421 | 
            +
                #   # │ {2,"b",null,[3]}    │
         | 
| 422 | 
            +
                #   # └─────────────────────┘
         | 
| 423 | 
            +
                #
         | 
| 424 | 
            +
                # @example Only collect specific columns as a struct:
         | 
| 425 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 426 | 
            +
                #     {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
         | 
| 427 | 
            +
                #   )
         | 
| 428 | 
            +
                #   df.with_column(pl.struct(pl.col(["a", "b"])).alias("a_and_b"))
         | 
| 429 | 
            +
                #   # =>
         | 
| 430 | 
            +
                #   # shape: (4, 4)
         | 
| 431 | 
            +
                #   # ┌─────┬───────┬─────┬─────────────┐
         | 
| 432 | 
            +
                #   # │ a   ┆ b     ┆ c   ┆ a_and_b     │
         | 
| 433 | 
            +
                #   # │ --- ┆ ---   ┆ --- ┆ ---         │
         | 
| 434 | 
            +
                #   # │ i64 ┆ str   ┆ i64 ┆ struct[2]   │
         | 
| 435 | 
            +
                #   # ╞═════╪═══════╪═════╪═════════════╡
         | 
| 436 | 
            +
                #   # │ 1   ┆ one   ┆ 9   ┆ {1,"one"}   │
         | 
| 437 | 
            +
                #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
         | 
| 438 | 
            +
                #   # │ 2   ┆ two   ┆ 8   ┆ {2,"two"}   │
         | 
| 439 | 
            +
                #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
         | 
| 440 | 
            +
                #   # │ 3   ┆ three ┆ 7   ┆ {3,"three"} │
         | 
| 441 | 
            +
                #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
         | 
| 442 | 
            +
                #   # │ 4   ┆ four  ┆ 6   ┆ {4,"four"}  │
         | 
| 443 | 
            +
                #   # └─────┴───────┴─────┴─────────────┘
         | 
| 444 | 
            +
                def struct(exprs, eager: false)
         | 
| 445 | 
            +
                  if eager
         | 
| 446 | 
            +
                    Polars.select(struct(exprs, eager: false)).to_series
         | 
| 447 | 
            +
                  end
         | 
| 448 | 
            +
                  exprs = Utils.selection_to_rbexpr_list(exprs)
         | 
| 449 | 
            +
                  Utils.wrap_expr(_as_struct(exprs))
         | 
| 450 | 
            +
                end
         | 
| 249 451 |  | 
| 250 | 
            -
                #  | 
| 251 | 
            -
                # | 
| 452 | 
            +
                # Repeat a single value n times.
         | 
| 453 | 
            +
                #
         | 
| 454 | 
            +
                # @param value [Object]
         | 
| 455 | 
            +
                #   Value to repeat.
         | 
| 456 | 
            +
                # @param n [Integer]
         | 
| 457 | 
            +
                #   Repeat `n` times.
         | 
| 458 | 
            +
                # @param eager [Boolean]
         | 
| 459 | 
            +
                #   Run eagerly and collect into a `Series`.
         | 
| 460 | 
            +
                # @param name [String]
         | 
| 461 | 
            +
                #   Only used in `eager` mode. As expression, use `alias`.
         | 
| 462 | 
            +
                #
         | 
| 463 | 
            +
                # @return [Expr]
         | 
| 464 | 
            +
                def repeat(value, n, eager: false, name: nil)
         | 
| 465 | 
            +
                  if eager
         | 
| 466 | 
            +
                    if name.nil?
         | 
| 467 | 
            +
                      name = ""
         | 
| 468 | 
            +
                    end
         | 
| 469 | 
            +
                    dtype = py_type_to_dtype(type(value))
         | 
| 470 | 
            +
                    Series._repeat(name, value, n, dtype)
         | 
| 471 | 
            +
                  else
         | 
| 472 | 
            +
                    if n.is_a?(Integer)
         | 
| 473 | 
            +
                      n = lit(n)
         | 
| 474 | 
            +
                    end
         | 
| 475 | 
            +
                    Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
         | 
| 476 | 
            +
                  end
         | 
| 477 | 
            +
                end
         | 
| 252 478 |  | 
| 253 | 
            -
                #  | 
| 254 | 
            -
                # | 
| 479 | 
            +
                # Return indices where `condition` evaluates `true`.
         | 
| 480 | 
            +
                #
         | 
| 481 | 
            +
                # @param condition [Expr]
         | 
| 482 | 
            +
                #   Boolean expression to evaluate
         | 
| 483 | 
            +
                # @param eager [Boolean]
         | 
| 484 | 
            +
                #   Whether to apply this function eagerly (as opposed to lazily).
         | 
| 485 | 
            +
                #
         | 
| 486 | 
            +
                # @return [Expr, Series]
         | 
| 487 | 
            +
                #
         | 
| 488 | 
            +
                # @example
         | 
| 489 | 
            +
                #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
         | 
| 490 | 
            +
                #   df.select(
         | 
| 491 | 
            +
                #     [
         | 
| 492 | 
            +
                #       Polars.arg_where(Polars.col("a") % 2 == 0)
         | 
| 493 | 
            +
                #     ]
         | 
| 494 | 
            +
                #   ).to_series
         | 
| 495 | 
            +
                #   # =>
         | 
| 496 | 
            +
                #   # shape: (2,)
         | 
| 497 | 
            +
                #   # Series: 'a' [u32]
         | 
| 498 | 
            +
                #   # [
         | 
| 499 | 
            +
                #   #         1
         | 
| 500 | 
            +
                #   #         3
         | 
| 501 | 
            +
                #   # ]
         | 
| 502 | 
            +
                def arg_where(condition, eager: false)
         | 
| 503 | 
            +
                  if eager
         | 
| 504 | 
            +
                    if !condition.is_a?(Series)
         | 
| 505 | 
            +
                      raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
         | 
| 506 | 
            +
                    end
         | 
| 507 | 
            +
                    condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
         | 
| 508 | 
            +
                  else
         | 
| 509 | 
            +
                    condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
         | 
| 510 | 
            +
                    Utils.wrap_expr(_arg_where(condition._rbexpr))
         | 
| 511 | 
            +
                  end
         | 
| 512 | 
            +
                end
         | 
| 255 513 |  | 
| 256 514 | 
             
                # def coalesce
         | 
| 257 515 | 
             
                # end
         | 
| @@ -259,6 +517,26 @@ module Polars | |
| 259 517 | 
             
                # def from_epoch
         | 
| 260 518 | 
             
                # end
         | 
| 261 519 |  | 
| 520 | 
            +
                # Start a "when, then, otherwise" expression.
         | 
| 521 | 
            +
                #
         | 
| 522 | 
            +
                # @return [When]
         | 
| 523 | 
            +
                #
         | 
| 524 | 
            +
                # @example
         | 
| 525 | 
            +
                #   df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
         | 
| 526 | 
            +
                #   df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
         | 
| 527 | 
            +
                #   # =>
         | 
| 528 | 
            +
                #   # shape: (3, 3)
         | 
| 529 | 
            +
                #   # ┌─────┬─────┬─────────┐
         | 
| 530 | 
            +
                #   # │ foo ┆ bar ┆ literal │
         | 
| 531 | 
            +
                #   # │ --- ┆ --- ┆ ---     │
         | 
| 532 | 
            +
                #   # │ i64 ┆ i64 ┆ i32     │
         | 
| 533 | 
            +
                #   # ╞═════╪═════╪═════════╡
         | 
| 534 | 
            +
                #   # │ 1   ┆ 3   ┆ -1      │
         | 
| 535 | 
            +
                #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
         | 
| 536 | 
            +
                #   # │ 3   ┆ 4   ┆ 1       │
         | 
| 537 | 
            +
                #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
         | 
| 538 | 
            +
                #   # │ 4   ┆ 0   ┆ 1       │
         | 
| 539 | 
            +
                #   # └─────┴─────┴─────────┘
         | 
| 262 540 | 
             
                def when(expr)
         | 
| 263 541 | 
             
                  expr = Utils.expr_to_lit_or_expr(expr)
         | 
| 264 542 | 
             
                  pw = RbExpr.when(expr._rbexpr)
         | 
    
        data/lib/polars/lazy_group_by.rb
    CHANGED
    
    | @@ -1,13 +1,92 @@ | |
| 1 1 | 
             
            module Polars
         | 
| 2 2 | 
             
              class LazyGroupBy
         | 
| 3 | 
            +
                # @private
         | 
| 3 4 | 
             
                def initialize(lgb, lazyframe_class)
         | 
| 4 5 | 
             
                  @lgb = lgb
         | 
| 5 6 | 
             
                  @lazyframe_class = lazyframe_class
         | 
| 6 7 | 
             
                end
         | 
| 7 8 |  | 
| 9 | 
            +
                # Describe the aggregation that need to be done on a group.
         | 
| 10 | 
            +
                #
         | 
| 11 | 
            +
                # @return [LazyFrame]
         | 
| 8 12 | 
             
                def agg(aggs)
         | 
| 9 13 | 
             
                  rbexprs = Utils.selection_to_rbexpr_list(aggs)
         | 
| 10 14 | 
             
                  @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
         | 
| 11 15 | 
             
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                # Get the first `n` rows of each group.
         | 
| 18 | 
            +
                #
         | 
| 19 | 
            +
                # @param n [Integer]
         | 
| 20 | 
            +
                #   Number of rows to return.
         | 
| 21 | 
            +
                #
         | 
| 22 | 
            +
                # @return [LazyFrame]
         | 
| 23 | 
            +
                #
         | 
| 24 | 
            +
                # @example
         | 
| 25 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 26 | 
            +
                #     {
         | 
| 27 | 
            +
                #       "letters" => ["c", "c", "a", "c", "a", "b"],
         | 
| 28 | 
            +
                #       "nrs" => [1, 2, 3, 4, 5, 6]
         | 
| 29 | 
            +
                #     }
         | 
| 30 | 
            +
                #   )
         | 
| 31 | 
            +
                #   df.groupby("letters").head(2).sort("letters")
         | 
| 32 | 
            +
                #   # =>
         | 
| 33 | 
            +
                #   # shape: (5, 2)
         | 
| 34 | 
            +
                #   # ┌─────────┬─────┐
         | 
| 35 | 
            +
                #   # │ letters ┆ nrs │
         | 
| 36 | 
            +
                #   # │ ---     ┆ --- │
         | 
| 37 | 
            +
                #   # │ str     ┆ i64 │
         | 
| 38 | 
            +
                #   # ╞═════════╪═════╡
         | 
| 39 | 
            +
                #   # │ a       ┆ 3   │
         | 
| 40 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 41 | 
            +
                #   # │ a       ┆ 5   │
         | 
| 42 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 43 | 
            +
                #   # │ b       ┆ 6   │
         | 
| 44 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 45 | 
            +
                #   # │ c       ┆ 1   │
         | 
| 46 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 47 | 
            +
                #   # │ c       ┆ 2   │
         | 
| 48 | 
            +
                #   # └─────────┴─────┘
         | 
| 49 | 
            +
                def head(n = 5)
         | 
| 50 | 
            +
                  @lazyframe_class._from_rbldf(@lgb.head(n))
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                # Get the last `n` rows of each group.
         | 
| 54 | 
            +
                #
         | 
| 55 | 
            +
                # @param n [Integer]
         | 
| 56 | 
            +
                #   Number of rows to return.
         | 
| 57 | 
            +
                #
         | 
| 58 | 
            +
                # @return [LazyFrame]
         | 
| 59 | 
            +
                #
         | 
| 60 | 
            +
                # @example
         | 
| 61 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 62 | 
            +
                #     {
         | 
| 63 | 
            +
                #       "letters" => ["c", "c", "a", "c", "a", "b"],
         | 
| 64 | 
            +
                #       "nrs" => [1, 2, 3, 4, 5, 6]
         | 
| 65 | 
            +
                #     }
         | 
| 66 | 
            +
                #   )
         | 
| 67 | 
            +
                #   df.groupby("letters").tail(2).sort("letters")
         | 
| 68 | 
            +
                #   # =>
         | 
| 69 | 
            +
                #   # shape: (5, 2)
         | 
| 70 | 
            +
                #   # ┌─────────┬─────┐
         | 
| 71 | 
            +
                #   # │ letters ┆ nrs │
         | 
| 72 | 
            +
                #   # │ ---     ┆ --- │
         | 
| 73 | 
            +
                #   # │ str     ┆ i64 │
         | 
| 74 | 
            +
                #   # ╞═════════╪═════╡
         | 
| 75 | 
            +
                #   # │ a       ┆ 3   │
         | 
| 76 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 77 | 
            +
                #   # │ a       ┆ 5   │
         | 
| 78 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 79 | 
            +
                #   # │ b       ┆ 6   │
         | 
| 80 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 81 | 
            +
                #   # │ c       ┆ 2   │
         | 
| 82 | 
            +
                #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
         | 
| 83 | 
            +
                #   # │ c       ┆ 4   │
         | 
| 84 | 
            +
                #   # └─────────┴─────┘
         | 
| 85 | 
            +
                def tail(n = 5)
         | 
| 86 | 
            +
                  @lazyframe_class._from_rbldf(@lgb.tail(n))
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                # def apply
         | 
| 90 | 
            +
                # end
         | 
| 12 91 | 
             
              end
         | 
| 13 92 | 
             
            end
         | 
    
        data/lib/polars/list_expr.rb
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 1 | 
             
            module Polars
         | 
| 2 | 
            +
              # Namespace for list related expressions.
         | 
| 2 3 | 
             
              class ListExpr
         | 
| 4 | 
            +
                # @private
         | 
| 3 5 | 
             
                attr_accessor :_rbexpr
         | 
| 4 6 |  | 
| 7 | 
            +
                # @private
         | 
| 5 8 | 
             
                def initialize(expr)
         | 
| 6 9 | 
             
                  self._rbexpr = expr._rbexpr
         | 
| 7 10 | 
             
                end
         | 
| @@ -41,6 +44,7 @@ module Polars | |
| 41 44 | 
             
                # def concat
         | 
| 42 45 | 
             
                # end
         | 
| 43 46 |  | 
| 47 | 
            +
                #
         | 
| 44 48 | 
             
                def get(index)
         | 
| 45 49 | 
             
                  index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
         | 
| 46 50 | 
             
                  Utils.wrap_expr(_rbexpr.lst_get(index))
         | 
| @@ -101,6 +105,7 @@ module Polars | |
| 101 105 | 
             
                #   Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator))
         | 
| 102 106 | 
             
                # end
         | 
| 103 107 |  | 
| 108 | 
            +
                #
         | 
| 104 109 | 
             
                def eval(expr, parallel: false)
         | 
| 105 110 | 
             
                   Utils.wrap_expr(_rbexpr.lst_eval(expr._rbexpr, parallel))
         | 
| 106 111 | 
             
                end
         | 
    
        data/lib/polars/meta_expr.rb
    CHANGED
    
    | @@ -1,31 +1,52 @@ | |
| 1 1 | 
             
            module Polars
         | 
| 2 | 
            +
              # Namespace for expressions on a meta level.
         | 
| 2 3 | 
             
              class MetaExpr
         | 
| 4 | 
            +
                # @private
         | 
| 3 5 | 
             
                attr_accessor :_rbexpr
         | 
| 4 6 |  | 
| 7 | 
            +
                # @private
         | 
| 5 8 | 
             
                def initialize(expr)
         | 
| 6 9 | 
             
                  self._rbexpr = expr._rbexpr
         | 
| 7 10 | 
             
                end
         | 
| 8 11 |  | 
| 12 | 
            +
                # Equal.
         | 
| 13 | 
            +
                #
         | 
| 14 | 
            +
                # @return [Boolean]
         | 
| 9 15 | 
             
                def ==(other)
         | 
| 10 16 | 
             
                  _rbexpr.meta_eq(other._rbexpr)
         | 
| 11 17 | 
             
                end
         | 
| 12 18 |  | 
| 19 | 
            +
                # Not equal.
         | 
| 20 | 
            +
                #
         | 
| 21 | 
            +
                # @return [Boolean]
         | 
| 13 22 | 
             
                def !=(other)
         | 
| 14 23 | 
             
                  !(self == other)
         | 
| 15 24 | 
             
                end
         | 
| 16 25 |  | 
| 26 | 
            +
                # Pop the latest expression and return the input(s) of the popped expression.
         | 
| 27 | 
            +
                #
         | 
| 28 | 
            +
                # @return [Array]
         | 
| 17 29 | 
             
                def pop
         | 
| 18 30 | 
             
                  _rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
         | 
| 19 31 | 
             
                end
         | 
| 20 32 |  | 
| 33 | 
            +
                # Get a list with the root column name.
         | 
| 34 | 
            +
                #
         | 
| 35 | 
            +
                # @return [Array]
         | 
| 21 36 | 
             
                def root_names
         | 
| 22 37 | 
             
                  _rbexpr.meta_roots
         | 
| 23 38 | 
             
                end
         | 
| 24 39 |  | 
| 40 | 
            +
                # Get the column name that this expression would produce.
         | 
| 41 | 
            +
                #
         | 
| 42 | 
            +
                # @return [String]
         | 
| 25 43 | 
             
                def output_name
         | 
| 26 44 | 
             
                  _rbexpr.meta_output_name
         | 
| 27 45 | 
             
                end
         | 
| 28 46 |  | 
| 47 | 
            +
                # Undo any renaming operation like `alias` or `keep_name`.
         | 
| 48 | 
            +
                #
         | 
| 49 | 
            +
                # @return [Expr]
         | 
| 29 50 | 
             
                def undo_aliases
         | 
| 30 51 | 
             
                  Utils.wrap_expr(_rbexpr.meta_undo_aliases)
         | 
| 31 52 | 
             
                end
         |