polars-df 0.21.1-x86_64-linux → 0.22.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +55 -48
- data/Cargo.toml +3 -0
- data/LICENSE-THIRD-PARTY.txt +23 -49
- data/README.md +12 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/array_expr.rb +1 -1
- data/lib/polars/data_frame.rb +110 -8
- data/lib/polars/data_types.rb +14 -5
- data/lib/polars/date_time_expr.rb +1 -1
- data/lib/polars/expr.rb +39 -30
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/lazy.rb +1 -1
- data/lib/polars/io/iceberg.rb +27 -0
- data/lib/polars/io/parquet.rb +7 -4
- data/lib/polars/io/scan_options.rb +4 -1
- data/lib/polars/lazy_frame.rb +92 -8
- data/lib/polars/list_expr.rb +21 -13
- data/lib/polars/list_name_space.rb +33 -21
- data/lib/polars/meta_expr.rb +25 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +20 -1
- data/lib/polars/schema.rb +1 -1
- data/lib/polars/series.rb +3 -1
- data/lib/polars/string_expr.rb +26 -27
- data/lib/polars/string_name_space.rb +17 -4
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +4 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -0
- metadata +6 -2
    
        data/lib/polars/data_frame.rb
    CHANGED
    
    | @@ -72,6 +72,43 @@ module Polars | |
| 72 72 | 
             
                  end
         | 
| 73 73 | 
             
                end
         | 
| 74 74 |  | 
| 75 | 
            +
                # Read a serialized DataFrame from a file.
         | 
| 76 | 
            +
                #
         | 
| 77 | 
            +
                # @param source [Object]
         | 
| 78 | 
            +
                #     Path to a file or a file-like object (by file-like object, we refer to
         | 
| 79 | 
            +
                #     objects that have a `read` method, such as a file handler or `StringIO`).
         | 
| 80 | 
            +
                #
         | 
| 81 | 
            +
                # @return [DataFrame]
         | 
| 82 | 
            +
                #
         | 
| 83 | 
            +
                # @note
         | 
| 84 | 
            +
                #   Serialization is not stable across Polars versions: a LazyFrame serialized
         | 
| 85 | 
            +
                #   in one Polars version may not be deserializable in another Polars version.
         | 
| 86 | 
            +
                #
         | 
| 87 | 
            +
                # @example
         | 
| 88 | 
            +
                #   df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4.0, 5.0, 6.0]})
         | 
| 89 | 
            +
                #   bytes = df.serialize
         | 
| 90 | 
            +
                #   Polars::DataFrame.deserialize(StringIO.new(bytes))
         | 
| 91 | 
            +
                #   # =>
         | 
| 92 | 
            +
                #   # shape: (3, 2)
         | 
| 93 | 
            +
                #   # ┌─────┬─────┐
         | 
| 94 | 
            +
                #   # │ a   ┆ b   │
         | 
| 95 | 
            +
                #   # │ --- ┆ --- │
         | 
| 96 | 
            +
                #   # │ i64 ┆ f64 │
         | 
| 97 | 
            +
                #   # ╞═════╪═════╡
         | 
| 98 | 
            +
                #   # │ 1   ┆ 4.0 │
         | 
| 99 | 
            +
                #   # │ 2   ┆ 5.0 │
         | 
| 100 | 
            +
                #   # │ 3   ┆ 6.0 │
         | 
| 101 | 
            +
                #   # └─────┴─────┘
         | 
| 102 | 
            +
                def self.deserialize(source)
         | 
| 103 | 
            +
                  if Utils.pathlike?(source)
         | 
| 104 | 
            +
                    source = Utils.normalize_filepath(source)
         | 
| 105 | 
            +
                  end
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                  deserializer = RbDataFrame.method(:deserialize_binary)
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                  _from_rbdf(deserializer.(source))
         | 
| 110 | 
            +
                end
         | 
| 111 | 
            +
             | 
| 75 112 | 
             
                # @private
         | 
| 76 113 | 
             
                def self._from_rbdf(rb_df)
         | 
| 77 114 | 
             
                  df = DataFrame.allocate
         | 
| @@ -562,8 +599,6 @@ module Polars | |
| 562 599 |  | 
| 563 600 | 
             
                # Convert every row to a hash.
         | 
| 564 601 | 
             
                #
         | 
| 565 | 
            -
                # Note that this is slow.
         | 
| 566 | 
            -
                #
         | 
| 567 602 | 
             
                # @return [Array]
         | 
| 568 603 | 
             
                #
         | 
| 569 604 | 
             
                # @example
         | 
| @@ -572,12 +607,7 @@ module Polars | |
| 572 607 | 
             
                #   # =>
         | 
| 573 608 | 
             
                #   # [{"foo"=>1, "bar"=>4}, {"foo"=>2, "bar"=>5}, {"foo"=>3, "bar"=>6}]
         | 
| 574 609 | 
             
                def to_hashes
         | 
| 575 | 
            -
                   | 
| 576 | 
            -
                  names = columns
         | 
| 577 | 
            -
             | 
| 578 | 
            -
                  height.times.map do |i|
         | 
| 579 | 
            -
                    names.zip(rbdf.row_tuple(i)).to_h
         | 
| 580 | 
            -
                  end
         | 
| 610 | 
            +
                  rows(named: true)
         | 
| 581 611 | 
             
                end
         | 
| 582 612 |  | 
| 583 613 | 
             
                # Convert DataFrame to a 2D Numo array.
         | 
| @@ -634,6 +664,44 @@ module Polars | |
| 634 664 | 
             
                  Utils.wrap_s(_df.select_at_idx(index))
         | 
| 635 665 | 
             
                end
         | 
| 636 666 |  | 
| 667 | 
            +
                # Serialize this DataFrame to a file or string.
         | 
| 668 | 
            +
                #
         | 
| 669 | 
            +
                # @param file [Object]
         | 
| 670 | 
            +
                #   File path or writable file-like object to which the result will be written.
         | 
| 671 | 
            +
                #   If set to `nil` (default), the output is returned as a string instead.
         | 
| 672 | 
            +
                #
         | 
| 673 | 
            +
                # @return [Object]
         | 
| 674 | 
            +
                #
         | 
| 675 | 
            +
                # @note
         | 
| 676 | 
            +
                #   Serialization is not stable across Polars versions: a LazyFrame serialized
         | 
| 677 | 
            +
                #   in one Polars version may not be deserializable in another Polars version.
         | 
| 678 | 
            +
                #
         | 
| 679 | 
            +
                # @example
         | 
| 680 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 681 | 
            +
                #     {
         | 
| 682 | 
            +
                #       "foo" => [1, 2, 3],
         | 
| 683 | 
            +
                #       "bar" => [6, 7, 8]
         | 
| 684 | 
            +
                #     }
         | 
| 685 | 
            +
                #   )
         | 
| 686 | 
            +
                #   bytes = df.serialize
         | 
| 687 | 
            +
                #   Polars::DataFrame.deserialize(StringIO.new(bytes))
         | 
| 688 | 
            +
                #   # =>
         | 
| 689 | 
            +
                #   # shape: (3, 2)
         | 
| 690 | 
            +
                #   # ┌─────┬─────┐
         | 
| 691 | 
            +
                #   # │ foo ┆ bar │
         | 
| 692 | 
            +
                #   # │ --- ┆ --- │
         | 
| 693 | 
            +
                #   # │ i64 ┆ i64 │
         | 
| 694 | 
            +
                #   # ╞═════╪═════╡
         | 
| 695 | 
            +
                #   # │ 1   ┆ 6   │
         | 
| 696 | 
            +
                #   # │ 2   ┆ 7   │
         | 
| 697 | 
            +
                #   # │ 3   ┆ 8   │
         | 
| 698 | 
            +
                #   # └─────┴─────┘
         | 
| 699 | 
            +
                def serialize(file = nil)
         | 
| 700 | 
            +
                  serializer = _df.method(:serialize_binary)
         | 
| 701 | 
            +
             | 
| 702 | 
            +
                  Utils.serialize_polars_object(serializer, file)
         | 
| 703 | 
            +
                end
         | 
| 704 | 
            +
             | 
| 637 705 | 
             
                # Serialize to JSON representation.
         | 
| 638 706 | 
             
                #
         | 
| 639 707 | 
             
                # @param file [String]
         | 
| @@ -1148,6 +1216,40 @@ module Polars | |
| 1148 1216 | 
             
                  end
         | 
| 1149 1217 | 
             
                end
         | 
| 1150 1218 |  | 
| 1219 | 
            +
                # Write DataFrame to an Iceberg table.
         | 
| 1220 | 
            +
                #
         | 
| 1221 | 
            +
                # @note
         | 
| 1222 | 
            +
                #   This functionality is currently considered **unstable**. It may be
         | 
| 1223 | 
            +
                #   changed at any point without it being considered a breaking change.
         | 
| 1224 | 
            +
                #
         | 
| 1225 | 
            +
                # @param target [Object]
         | 
| 1226 | 
            +
                #   Name of the table or the Table object representing an Iceberg table.
         | 
| 1227 | 
            +
                # @param mode ['append', 'overwrite']
         | 
| 1228 | 
            +
                #   How to handle existing data.
         | 
| 1229 | 
            +
                #
         | 
| 1230 | 
            +
                #   - If 'append', will add new data.
         | 
| 1231 | 
            +
                #   - If 'overwrite', will replace table with new data.
         | 
| 1232 | 
            +
                #
         | 
| 1233 | 
            +
                # @return [nil]
         | 
| 1234 | 
            +
                def write_iceberg(target, mode:)
         | 
| 1235 | 
            +
                  require "iceberg"
         | 
| 1236 | 
            +
             | 
| 1237 | 
            +
                  table =
         | 
| 1238 | 
            +
                    if target.is_a?(Iceberg::Table)
         | 
| 1239 | 
            +
                      target
         | 
| 1240 | 
            +
                    else
         | 
| 1241 | 
            +
                      raise Todo
         | 
| 1242 | 
            +
                    end
         | 
| 1243 | 
            +
             | 
| 1244 | 
            +
                  data = self
         | 
| 1245 | 
            +
             | 
| 1246 | 
            +
                  if mode == "append"
         | 
| 1247 | 
            +
                    table.append(data)
         | 
| 1248 | 
            +
                  else
         | 
| 1249 | 
            +
                    raise Todo
         | 
| 1250 | 
            +
                  end
         | 
| 1251 | 
            +
                end
         | 
| 1252 | 
            +
             | 
| 1151 1253 | 
             
                # Write DataFrame as delta table.
         | 
| 1152 1254 | 
             
                #
         | 
| 1153 1255 | 
             
                # @param target [Object]
         | 
    
        data/lib/polars/data_types.rb
    CHANGED
    
    | @@ -110,12 +110,23 @@ module Polars | |
| 110 110 | 
             
                  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
         | 
| 111 111 | 
             
                end
         | 
| 112 112 |  | 
| 113 | 
            -
                [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested | 
| 113 | 
            +
                [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
         | 
| 114 114 | 
             
                  define_method(v) do
         | 
| 115 115 | 
             
                    self.class.public_send(v)
         | 
| 116 116 | 
             
                  end
         | 
| 117 117 | 
             
                end
         | 
| 118 118 |  | 
| 119 | 
            +
                # Return a `DataTypeExpr` with a static `DataType`.
         | 
| 120 | 
            +
                #
         | 
| 121 | 
            +
                # @return [Expr]
         | 
| 122 | 
            +
                #
         | 
| 123 | 
            +
                # @example
         | 
| 124 | 
            +
                #   Polars::Int16.new.to_dtype_expr.collect_dtype({})
         | 
| 125 | 
            +
                #   # => Polars::Int16
         | 
| 126 | 
            +
                def to_dtype_expr
         | 
| 127 | 
            +
                  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
         | 
| 128 | 
            +
                end
         | 
| 129 | 
            +
             | 
| 119 130 | 
             
                # Returns a string representing the data type.
         | 
| 120 131 | 
             
                #
         | 
| 121 132 | 
             
                # @return [String]
         | 
| @@ -317,11 +328,9 @@ module Polars | |
| 317 328 | 
             
              class Categories
         | 
| 318 329 | 
             
                attr_accessor :_categories
         | 
| 319 330 |  | 
| 320 | 
            -
                def initialize
         | 
| 321 | 
            -
                  # TODO fix
         | 
| 322 | 
            -
                  name = nil
         | 
| 331 | 
            +
                def initialize(name = nil)
         | 
| 323 332 | 
             
                  if name.nil? || name == ""
         | 
| 324 | 
            -
                     | 
| 333 | 
            +
                    self._categories = RbCategories.global_categories
         | 
| 325 334 | 
             
                    return
         | 
| 326 335 | 
             
                  end
         | 
| 327 336 |  | 
| @@ -1188,7 +1188,7 @@ module Polars | |
| 1188 1188 | 
             
                  if Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
         | 
| 1189 1189 | 
             
                    timestamp(time_unit)
         | 
| 1190 1190 | 
             
                  elsif time_unit == "s"
         | 
| 1191 | 
            -
                     | 
| 1191 | 
            +
                    timestamp("ms").floordiv(F.lit(1000, dtype: Int64))
         | 
| 1192 1192 | 
             
                  elsif time_unit == "d"
         | 
| 1193 1193 | 
             
                    Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
         | 
| 1194 1194 | 
             
                  else
         | 
    
        data/lib/polars/expr.rb
    CHANGED
    
    | @@ -146,6 +146,40 @@ module Polars | |
| 146 146 | 
             
                  wrap_expr(_rbexpr.neg)
         | 
| 147 147 | 
             
                end
         | 
| 148 148 |  | 
| 149 | 
            +
                # Read a serialized expression from a file.
         | 
| 150 | 
            +
                #
         | 
| 151 | 
            +
                # @param source [Object]
         | 
| 152 | 
            +
                #   Path to a file or a file-like object (by file-like object, we refer to
         | 
| 153 | 
            +
                #   objects that have a `read` method, such as a file handler or `StringIO`).
         | 
| 154 | 
            +
                #
         | 
| 155 | 
            +
                # @return [Expr]
         | 
| 156 | 
            +
                #
         | 
| 157 | 
            +
                # @note
         | 
| 158 | 
            +
                #   This function uses  marshaling if the logical plan contains Ruby UDFs,
         | 
| 159 | 
            +
                #   and as such inherits the security implications. Deserializing can execute
         | 
| 160 | 
            +
                #   arbitrary code, so it should only be attempted on trusted data.
         | 
| 161 | 
            +
                #
         | 
| 162 | 
            +
                # @note
         | 
| 163 | 
            +
                #   Serialization is not stable across Polars versions: a LazyFrame serialized
         | 
| 164 | 
            +
                #   in one Polars version may not be deserializable in another Polars version.
         | 
| 165 | 
            +
                #
         | 
| 166 | 
            +
                # @example
         | 
| 167 | 
            +
                #   expr = Polars.col("foo").sum.over("bar")
         | 
| 168 | 
            +
                #   bytes = expr.meta.serialize
         | 
| 169 | 
            +
                #   Polars::Expr.deserialize(StringIO.new(bytes))
         | 
| 170 | 
            +
                #   # => col("foo").sum().over([col("bar")])
         | 
| 171 | 
            +
                def self.deserialize(source)
         | 
| 172 | 
            +
                  raise Todo unless RbExpr.respond_to?(:deserialize_binary)
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                  if Utils.pathlike?(source)
         | 
| 175 | 
            +
                    source = Utils.normalize_filepath(source)
         | 
| 176 | 
            +
                  end
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                  deserializer = RbExpr.method(:deserialize_binary)
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                  _from_rbexpr(deserializer.(source))
         | 
| 181 | 
            +
                end
         | 
| 182 | 
            +
             | 
| 149 183 | 
             
                # Cast to physical representation of the logical dtype.
         | 
| 150 184 | 
             
                #
         | 
| 151 185 | 
             
                # - `:date` -> `:i32`
         | 
| @@ -377,8 +411,6 @@ module Polars | |
| 377 411 | 
             
                  wrap_expr(_rbexpr._alias(name))
         | 
| 378 412 | 
             
                end
         | 
| 379 413 |  | 
| 380 | 
            -
                # TODO support symbols for exclude
         | 
| 381 | 
            -
             | 
| 382 414 | 
             
                # Exclude certain columns from a wildcard/regex selection.
         | 
| 383 415 | 
             
                #
         | 
| 384 416 | 
             
                # You may also use regexes in the exclude list. They must start with `^` and end
         | 
| @@ -1787,7 +1819,7 @@ module Polars | |
| 1787 1819 | 
             
                  wrap_expr(_rbexpr.arg_min)
         | 
| 1788 1820 | 
             
                end
         | 
| 1789 1821 |  | 
| 1790 | 
            -
                # Get the index of the first occurrence of a value, or  | 
| 1822 | 
            +
                # Get the index of the first occurrence of a value, or `nil` if it's not found.
         | 
| 1791 1823 | 
             
                #
         | 
| 1792 1824 | 
             
                # @param element [Object]
         | 
| 1793 1825 | 
             
                #   Value to find.
         | 
| @@ -7571,7 +7603,8 @@ module Polars | |
| 7571 7603 | 
             
                #   # │ 1.584963 │
         | 
| 7572 7604 | 
             
                #   # └──────────┘
         | 
| 7573 7605 | 
             
                def log(base = Math::E)
         | 
| 7574 | 
            -
                   | 
| 7606 | 
            +
                  base_rbexpr = Utils.parse_into_expression(base)
         | 
| 7607 | 
            +
                  wrap_expr(_rbexpr.log(base_rbexpr))
         | 
| 7575 7608 | 
             
                end
         | 
| 7576 7609 |  | 
| 7577 7610 | 
             
                # Compute the natural logarithm of each element plus one.
         | 
| @@ -7743,33 +7776,9 @@ module Polars | |
| 7743 7776 | 
             
                # This can be used to reduce memory pressure.
         | 
| 7744 7777 | 
             
                #
         | 
| 7745 7778 | 
             
                # @return [Expr]
         | 
| 7746 | 
            -
                #
         | 
| 7747 | 
            -
                # @example
         | 
| 7748 | 
            -
                #   Polars::DataFrame.new(
         | 
| 7749 | 
            -
                #     {
         | 
| 7750 | 
            -
                #       "a" => [1, 2, 3],
         | 
| 7751 | 
            -
                #       "b" => [1, 2, 2 << 32],
         | 
| 7752 | 
            -
                #       "c" => [-1, 2, 1 << 30],
         | 
| 7753 | 
            -
                #       "d" => [-112, 2, 112],
         | 
| 7754 | 
            -
                #       "e" => [-112, 2, 129],
         | 
| 7755 | 
            -
                #       "f" => ["a", "b", "c"],
         | 
| 7756 | 
            -
                #       "g" => [0.1, 1.32, 0.12],
         | 
| 7757 | 
            -
                #       "h" => [true, nil, false]
         | 
| 7758 | 
            -
                #     }
         | 
| 7759 | 
            -
                #   ).select(Polars.all.shrink_dtype)
         | 
| 7760 | 
            -
                #   # =>
         | 
| 7761 | 
            -
                #   # shape: (3, 8)
         | 
| 7762 | 
            -
                #   # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
         | 
| 7763 | 
            -
                #   # │ a   ┆ b          ┆ c          ┆ d    ┆ e    ┆ f   ┆ g    ┆ h     │
         | 
| 7764 | 
            -
                #   # │ --- ┆ ---        ┆ ---        ┆ ---  ┆ ---  ┆ --- ┆ ---  ┆ ---   │
         | 
| 7765 | 
            -
                #   # │ i8  ┆ i64        ┆ i32        ┆ i8   ┆ i16  ┆ str ┆ f32  ┆ bool  │
         | 
| 7766 | 
            -
                #   # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
         | 
| 7767 | 
            -
                #   # │ 1   ┆ 1          ┆ -1         ┆ -112 ┆ -112 ┆ a   ┆ 0.1  ┆ true  │
         | 
| 7768 | 
            -
                #   # │ 2   ┆ 2          ┆ 2          ┆ 2    ┆ 2    ┆ b   ┆ 1.32 ┆ null  │
         | 
| 7769 | 
            -
                #   # │ 3   ┆ 8589934592 ┆ 1073741824 ┆ 112  ┆ 129  ┆ c   ┆ 0.12 ┆ false │
         | 
| 7770 | 
            -
                #   # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
         | 
| 7771 7779 | 
             
                def shrink_dtype
         | 
| 7772 | 
            -
                   | 
| 7780 | 
            +
                  warn "`Expr.shrink_dtype` is deprecated and is a no-op; use `Series.shrink_dtype` instead."
         | 
| 7781 | 
            +
                  self
         | 
| 7773 7782 | 
             
                end
         | 
| 7774 7783 |  | 
| 7775 7784 | 
             
                # Bin values into buckets and count their occurrences.
         | 
| @@ -0,0 +1,95 @@ | |
| 1 | 
            +
            module Polars
         | 
| 2 | 
            +
              module Functions
         | 
| 3 | 
            +
                # Count the number of business days between `start` and `end` (not including `end`).
         | 
| 4 | 
            +
                #
         | 
| 5 | 
            +
                # @note
         | 
| 6 | 
            +
                #   This functionality is considered **unstable**. It may be changed
         | 
| 7 | 
            +
                #   at any point without it being considered a breaking change.
         | 
| 8 | 
            +
                #
         | 
| 9 | 
            +
                # @param start [Object]
         | 
| 10 | 
            +
                #   Start dates.
         | 
| 11 | 
            +
                # @param stop [Object]
         | 
| 12 | 
            +
                #   End dates.
         | 
| 13 | 
            +
                # @param week_mask [Array]
         | 
| 14 | 
            +
                #   Which days of the week to count. The default is Monday to Friday.
         | 
| 15 | 
            +
                #   If you wanted to count only Monday to Thursday, you would pass
         | 
| 16 | 
            +
                #   `[true, true, true, true, false, false, false]`.
         | 
| 17 | 
            +
                # @param holidays [Array]
         | 
| 18 | 
            +
                #   Holidays to exclude from the count.
         | 
| 19 | 
            +
                #
         | 
| 20 | 
            +
                # @return [Expr]
         | 
| 21 | 
            +
                #
         | 
| 22 | 
            +
                # @example
         | 
| 23 | 
            +
                #   df = Polars::DataFrame.new(
         | 
| 24 | 
            +
                #     {
         | 
| 25 | 
            +
                #       "start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)],
         | 
| 26 | 
            +
                #       "end" => [Date.new(2020, 1, 2), Date.new(2020, 1, 10)]
         | 
| 27 | 
            +
                #     }
         | 
| 28 | 
            +
                #   )
         | 
| 29 | 
            +
                #   df.with_columns(
         | 
| 30 | 
            +
                #     business_day_count: Polars.business_day_count("start", "end")
         | 
| 31 | 
            +
                #   )
         | 
| 32 | 
            +
                #   # =>
         | 
| 33 | 
            +
                #   # shape: (2, 3)
         | 
| 34 | 
            +
                #   # ┌────────────┬────────────┬────────────────────┐
         | 
| 35 | 
            +
                #   # │ start      ┆ end        ┆ business_day_count │
         | 
| 36 | 
            +
                #   # │ ---        ┆ ---        ┆ ---                │
         | 
| 37 | 
            +
                #   # │ date       ┆ date       ┆ i32                │
         | 
| 38 | 
            +
                #   # ╞════════════╪════════════╪════════════════════╡
         | 
| 39 | 
            +
                #   # │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
         | 
| 40 | 
            +
                #   # │ 2020-01-02 ┆ 2020-01-10 ┆ 6                  │
         | 
| 41 | 
            +
                #   # └────────────┴────────────┴────────────────────┘
         | 
| 42 | 
            +
                #
         | 
| 43 | 
            +
                # @example You can pass a custom weekend - for example, if you only take Sunday off:
         | 
| 44 | 
            +
                #   week_mask = [true, true, true, true, true, true, false]
         | 
| 45 | 
            +
                #   df.with_columns(
         | 
| 46 | 
            +
                #     business_day_count: Polars.business_day_count(
         | 
| 47 | 
            +
                #       "start", "end", week_mask: week_mask
         | 
| 48 | 
            +
                #     )
         | 
| 49 | 
            +
                #   )
         | 
| 50 | 
            +
                #   # =>
         | 
| 51 | 
            +
                #   # shape: (2, 3)
         | 
| 52 | 
            +
                #   # ┌────────────┬────────────┬────────────────────┐
         | 
| 53 | 
            +
                #   # │ start      ┆ end        ┆ business_day_count │
         | 
| 54 | 
            +
                #   # │ ---        ┆ ---        ┆ ---                │
         | 
| 55 | 
            +
                #   # │ date       ┆ date       ┆ i32                │
         | 
| 56 | 
            +
                #   # ╞════════════╪════════════╪════════════════════╡
         | 
| 57 | 
            +
                #   # │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
         | 
| 58 | 
            +
                #   # │ 2020-01-02 ┆ 2020-01-10 ┆ 7                  │
         | 
| 59 | 
            +
                #   # └────────────┴────────────┴────────────────────┘
         | 
| 60 | 
            +
                #
         | 
| 61 | 
            +
                # @example You can also pass a list of holidays to exclude from the count:
         | 
| 62 | 
            +
                #   holidays = [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]
         | 
| 63 | 
            +
                #   df.with_columns(
         | 
| 64 | 
            +
                #     business_day_count: Polars.business_day_count("start", "end", holidays: holidays)
         | 
| 65 | 
            +
                #   )
         | 
| 66 | 
            +
                #   # =>
         | 
| 67 | 
            +
                #   # shape: (2, 3)
         | 
| 68 | 
            +
                #   # ┌────────────┬────────────┬────────────────────┐
         | 
| 69 | 
            +
                #   # │ start      ┆ end        ┆ business_day_count │
         | 
| 70 | 
            +
                #   # │ ---        ┆ ---        ┆ ---                │
         | 
| 71 | 
            +
                #   # │ date       ┆ date       ┆ i32                │
         | 
| 72 | 
            +
                #   # ╞════════════╪════════════╪════════════════════╡
         | 
| 73 | 
            +
                #   # │ 2020-01-01 ┆ 2020-01-02 ┆ 0                  │
         | 
| 74 | 
            +
                #   # │ 2020-01-02 ┆ 2020-01-10 ┆ 5                  │
         | 
| 75 | 
            +
                #   # └────────────┴────────────┴────────────────────┘
         | 
| 76 | 
            +
                def business_day_count(
         | 
| 77 | 
            +
                  start,
         | 
| 78 | 
            +
                  stop,
         | 
| 79 | 
            +
                  week_mask: [true, true, true, true, true, false, false],
         | 
| 80 | 
            +
                  holidays: []
         | 
| 81 | 
            +
                )
         | 
| 82 | 
            +
                  start_rbexpr = Utils.parse_into_expression(start)
         | 
| 83 | 
            +
                  end_rbexpr = Utils.parse_into_expression(stop)
         | 
| 84 | 
            +
                  unix_epoch = ::Date.new(1970, 1, 1)
         | 
| 85 | 
            +
                  Utils.wrap_expr(
         | 
| 86 | 
            +
                    Plr.business_day_count(
         | 
| 87 | 
            +
                      start_rbexpr,
         | 
| 88 | 
            +
                      end_rbexpr,
         | 
| 89 | 
            +
                      week_mask,
         | 
| 90 | 
            +
                      holidays.map { |holiday| holiday - unix_epoch }
         | 
| 91 | 
            +
                    )
         | 
| 92 | 
            +
                  )
         | 
| 93 | 
            +
                end
         | 
| 94 | 
            +
              end
         | 
| 95 | 
            +
            end
         | 
| @@ -0,0 +1,27 @@ | |
| 1 | 
            +
            module Polars
         | 
| 2 | 
            +
              module IO
         | 
| 3 | 
            +
                # Lazily read from an Apache Iceberg table.
         | 
| 4 | 
            +
                #
         | 
| 5 | 
            +
                # @param source [Object]
         | 
| 6 | 
            +
                #   A Iceberg Ruby table, or a direct path to the metadata.
         | 
| 7 | 
            +
                # @param snapshot_id [Integer]
         | 
| 8 | 
            +
                #   The snapshot ID to scan from.
         | 
| 9 | 
            +
                # @param storage_options [Hash]
         | 
| 10 | 
            +
                #   Extra options for the storage backends.
         | 
| 11 | 
            +
                #
         | 
| 12 | 
            +
                # @return [LazyFrame]
         | 
| 13 | 
            +
                def scan_iceberg(
         | 
| 14 | 
            +
                  source,
         | 
| 15 | 
            +
                  snapshot_id: nil,
         | 
| 16 | 
            +
                  storage_options: nil
         | 
| 17 | 
            +
                )
         | 
| 18 | 
            +
                  require "iceberg"
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  unless source.is_a?(Iceberg::Table)
         | 
| 21 | 
            +
                    raise Todo
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  source.to_polars(snapshot_id:, storage_options:)
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
            end
         | 
    
        data/lib/polars/io/parquet.rb
    CHANGED
    
    | @@ -117,14 +117,13 @@ module Polars | |
| 117 117 | 
             
                # @param source [Object]
         | 
| 118 118 | 
             
                #   Path to a file or a file-like object.
         | 
| 119 119 | 
             
                #
         | 
| 120 | 
            -
                # @return [ | 
| 120 | 
            +
                # @return [Schema]
         | 
| 121 121 | 
             
                def read_parquet_schema(source)
         | 
| 122 122 | 
             
                  if Utils.pathlike?(source)
         | 
| 123 123 | 
             
                    source = Utils.normalize_filepath(source)
         | 
| 124 124 | 
             
                  end
         | 
| 125 125 |  | 
| 126 | 
            -
                   | 
| 127 | 
            -
                  scan_parquet(source).collect_schema.to_h
         | 
| 126 | 
            +
                  scan_parquet(source).collect_schema
         | 
| 128 127 | 
             
                end
         | 
| 129 128 |  | 
| 130 129 | 
             
                # Get file-level custom metadata of a Parquet file without reading data.
         | 
| @@ -207,6 +206,9 @@ module Polars | |
| 207 206 | 
             
                #   defined schema are encountered in the data:
         | 
| 208 207 | 
             
                #     * `ignore`: Silently ignores.
         | 
| 209 208 | 
             
                #     * `raise`: Raises an error.
         | 
| 209 | 
            +
                # @param cast_options [Object]
         | 
| 210 | 
            +
                #   Configuration for column type-casting during scans. Useful for datasets
         | 
| 211 | 
            +
                #   containing files that have differing schemas.
         | 
| 210 212 | 
             
                #
         | 
| 211 213 | 
             
                # @return [LazyFrame]
         | 
| 212 214 | 
             
                def scan_parquet(
         | 
| @@ -230,6 +232,7 @@ module Polars | |
| 230 232 | 
             
                  include_file_paths: nil,
         | 
| 231 233 | 
             
                  allow_missing_columns: false,
         | 
| 232 234 | 
             
                  extra_columns: "raise",
         | 
| 235 | 
            +
                  cast_options: nil,
         | 
| 233 236 | 
             
                  _column_mapping: nil,
         | 
| 234 237 | 
             
                  _deletion_files: nil
         | 
| 235 238 | 
             
                )
         | 
| @@ -268,7 +271,7 @@ module Polars | |
| 268 271 | 
             
                      ScanOptions.new(
         | 
| 269 272 | 
             
                        row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
         | 
| 270 273 | 
             
                        pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
         | 
| 271 | 
            -
                         | 
| 274 | 
            +
                        cast_options: cast_options,
         | 
| 272 275 | 
             
                        extra_columns: extra_columns,
         | 
| 273 276 | 
             
                        missing_columns: missing_columns,
         | 
| 274 277 | 
             
                        include_file_paths: include_file_paths,
         | 
| @@ -3,7 +3,8 @@ module Polars | |
| 3 3 | 
             
                class ScanOptions
         | 
| 4 4 | 
             
                  attr_reader :row_index, :pre_slice, :cast_options, :extra_columns, :missing_columns,
         | 
| 5 5 | 
             
                    :include_file_paths, :glob, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
         | 
| 6 | 
            -
                    :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping, | 
| 6 | 
            +
                    :rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping,
         | 
| 7 | 
            +
                    :default_values, :deletion_files
         | 
| 7 8 |  | 
| 8 9 | 
             
                  def initialize(
         | 
| 9 10 | 
             
                    row_index: nil,
         | 
| @@ -22,6 +23,7 @@ module Polars | |
| 22 23 | 
             
                    credential_provider: nil,
         | 
| 23 24 | 
             
                    retries: 2,
         | 
| 24 25 | 
             
                    column_mapping: nil,
         | 
| 26 | 
            +
                    default_values: nil,
         | 
| 25 27 | 
             
                    deletion_files: nil
         | 
| 26 28 | 
             
                  )
         | 
| 27 29 | 
             
                    @row_index = row_index
         | 
| @@ -40,6 +42,7 @@ module Polars | |
| 40 42 | 
             
                    @credential_provider = credential_provider
         | 
| 41 43 | 
             
                    @retries = retries
         | 
| 42 44 | 
             
                    @column_mapping = column_mapping
         | 
| 45 | 
            +
                    @default_values = default_values
         | 
| 43 46 | 
             
                    @deletion_files = deletion_files
         | 
| 44 47 | 
             
                  end
         | 
| 45 48 | 
             
                end
         | 
    
        data/lib/polars/lazy_frame.rb
    CHANGED
    
    | @@ -27,9 +27,6 @@ module Polars | |
| 27 27 | 
             
                  ldf
         | 
| 28 28 | 
             
                end
         | 
| 29 29 |  | 
| 30 | 
            -
                # def self.from_json
         | 
| 31 | 
            -
                # end
         | 
| 32 | 
            -
             | 
| 33 30 | 
             
                # Read a logical plan from a JSON file to construct a LazyFrame.
         | 
| 34 31 | 
             
                #
         | 
| 35 32 | 
             
                # @param file [String]
         | 
| @@ -41,7 +38,49 @@ module Polars | |
| 41 38 | 
             
                    file = Utils.normalize_filepath(file)
         | 
| 42 39 | 
             
                  end
         | 
| 43 40 |  | 
| 44 | 
            -
                  Utils.wrap_ldf(RbLazyFrame. | 
| 41 | 
            +
                  Utils.wrap_ldf(RbLazyFrame.deserialize_json(file))
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # Read a logical plan from a file to construct a LazyFrame.
         | 
| 45 | 
            +
                #
         | 
| 46 | 
            +
                # @param source [Object]
         | 
| 47 | 
            +
                #   Path to a file or a file-like object (by file-like object, we refer to
         | 
| 48 | 
            +
                #   objects that have a `read` method, such as a file handler or `StringIO`).
         | 
| 49 | 
            +
                #
         | 
| 50 | 
            +
                # @return [LazyFrame]
         | 
| 51 | 
            +
                #
         | 
| 52 | 
            +
                # @note
         | 
| 53 | 
            +
                #   This function uses marshaling if the logical plan contains Ruby UDFs,
         | 
| 54 | 
            +
                #   and as such inherits the security implications. Deserializing can execute
         | 
| 55 | 
            +
                #   arbitrary code, so it should only be attempted on trusted data.
         | 
| 56 | 
            +
                #
         | 
| 57 | 
            +
                # @note
         | 
| 58 | 
            +
                #   Serialization is not stable across Polars versions: a LazyFrame serialized
         | 
| 59 | 
            +
                #   in one Polars version may not be deserializable in another Polars version.
         | 
| 60 | 
            +
                #
         | 
| 61 | 
            +
                # @example
         | 
| 62 | 
            +
                #   lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
         | 
| 63 | 
            +
                #   bytes = lf.serialize
         | 
| 64 | 
            +
                #   Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
         | 
| 65 | 
            +
                #   # =>
         | 
| 66 | 
            +
                #   # shape: (1, 1)
         | 
| 67 | 
            +
                #   # ┌─────┐
         | 
| 68 | 
            +
                #   # │ a   │
         | 
| 69 | 
            +
                #   # │ --- │
         | 
| 70 | 
            +
                #   # │ i64 │
         | 
| 71 | 
            +
                #   # ╞═════╡
         | 
| 72 | 
            +
                #   # │ 6   │
         | 
| 73 | 
            +
                #   # └─────┘
         | 
| 74 | 
            +
                def self.deserialize(source)
         | 
| 75 | 
            +
                  raise Todo unless RbLazyFrame.respond_to?(:deserialize_binary)
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                  if Utils.pathlike?(source)
         | 
| 78 | 
            +
                    source = Utils.normalize_filepath(source)
         | 
| 79 | 
            +
                  end
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                  deserializer = RbLazyFrame.method(:deserialize_binary)
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                  _from_rbldf(deserializer.(source))
         | 
| 45 84 | 
             
                end
         | 
| 46 85 |  | 
| 47 86 | 
             
                # Get or set column names.
         | 
| @@ -151,6 +190,38 @@ module Polars | |
| 151 190 | 
             
                  nil
         | 
| 152 191 | 
             
                end
         | 
| 153 192 |  | 
| 193 | 
            +
                # Serialize the logical plan of this LazyFrame to a file or string.
         | 
| 194 | 
            +
                #
         | 
| 195 | 
            +
                # @param file [Object]
         | 
| 196 | 
            +
                #   File path to which the result should be written. If set to `nil`
         | 
| 197 | 
            +
                #   (default), the output is returned as a string instead.
         | 
| 198 | 
            +
                #
         | 
| 199 | 
            +
                # @return [Object]
         | 
| 200 | 
            +
                #
         | 
| 201 | 
            +
                # @note
         | 
| 202 | 
            +
                #   Serialization is not stable across Polars versions: a LazyFrame serialized
         | 
| 203 | 
            +
                #   in one Polars version may not be deserializable in another Polars version.
         | 
| 204 | 
            +
                #
         | 
| 205 | 
            +
                # @example Serialize the logical plan into a binary representation.
         | 
| 206 | 
            +
                #   lf = Polars::LazyFrame.new({"a" => [1, 2, 3]}).sum
         | 
| 207 | 
            +
                #   bytes = lf.serialize
         | 
| 208 | 
            +
                #   Polars::LazyFrame.deserialize(StringIO.new(bytes)).collect
         | 
| 209 | 
            +
                #   # =>
         | 
| 210 | 
            +
                #   # shape: (1, 1)
         | 
| 211 | 
            +
                #   # ┌─────┐
         | 
| 212 | 
            +
                #   # │ a   │
         | 
| 213 | 
            +
                #   # │ --- │
         | 
| 214 | 
            +
                #   # │ i64 │
         | 
| 215 | 
            +
                #   # ╞═════╡
         | 
| 216 | 
            +
                #   # │ 6   │
         | 
| 217 | 
            +
                #   # └─────┘
         | 
| 218 | 
            +
                def serialize(file = nil)
         | 
| 219 | 
            +
                  raise Todo unless _ldf.respond_to?(:serialize_binary)
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                  serializer = _ldf.method(:serialize_binary)
         | 
| 222 | 
            +
                  Utils.serialize_polars_object(serializer, file)
         | 
| 223 | 
            +
                end
         | 
| 224 | 
            +
             | 
| 154 225 | 
             
                # Offers a structured way to apply a sequence of user-defined functions (UDFs).
         | 
| 155 226 | 
             
                #
         | 
| 156 227 | 
             
                # @param func [Object]
         | 
| @@ -774,6 +845,21 @@ module Polars | |
| 774 845 | 
             
                # @param maintain_order [Boolean]
         | 
| 775 846 | 
             
                #   Maintain the order in which data is processed.
         | 
| 776 847 | 
             
                #   Setting this to `false` will  be slightly faster.
         | 
| 848 | 
            +
                # @param storage_options [String]
         | 
| 849 | 
            +
                #   Options that indicate how to connect to a cloud provider.
         | 
| 850 | 
            +
                #
         | 
| 851 | 
            +
                #   The cloud providers currently supported are AWS, GCP, and Azure.
         | 
| 852 | 
            +
                #   See supported keys here:
         | 
| 853 | 
            +
                #
         | 
| 854 | 
            +
                #   * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
         | 
| 855 | 
            +
                #   * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
         | 
| 856 | 
            +
                #   * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
         | 
| 857 | 
            +
                #   * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
         | 
| 858 | 
            +
                #
         | 
| 859 | 
            +
                #   If `storage_options` is not provided, Polars will try to infer the
         | 
| 860 | 
            +
                #   information from environment variables.
         | 
| 861 | 
            +
                # @param retries [Integer]
         | 
| 862 | 
            +
                #   Number of retries if accessing a cloud instance fails.
         | 
| 777 863 | 
             
                # @param type_coercion [Boolean]
         | 
| 778 864 | 
             
                #   Do type coercion optimization.
         | 
| 779 865 | 
             
                # @param predicate_pushdown [Boolean]
         | 
| @@ -806,6 +892,8 @@ module Polars | |
| 806 892 | 
             
                  path,
         | 
| 807 893 | 
             
                  compression: "zstd",
         | 
| 808 894 | 
             
                  maintain_order: true,
         | 
| 895 | 
            +
                  storage_options: nil,
         | 
| 896 | 
            +
                  retries: 2,
         | 
| 809 897 | 
             
                  type_coercion: true,
         | 
| 810 898 | 
             
                  predicate_pushdown: true,
         | 
| 811 899 | 
             
                  projection_pushdown: true,
         | 
| @@ -816,10 +904,6 @@ module Polars | |
| 816 904 | 
             
                  mkdir: false,
         | 
| 817 905 | 
             
                  lazy: false
         | 
| 818 906 | 
             
                )
         | 
| 819 | 
            -
                  # TODO support storage options in Rust
         | 
| 820 | 
            -
                  storage_options = nil
         | 
| 821 | 
            -
                  retries = 2
         | 
| 822 | 
            -
             | 
| 823 907 | 
             
                  lf = _set_sink_optimizations(
         | 
| 824 908 | 
             
                    type_coercion: type_coercion,
         | 
| 825 909 | 
             
                    predicate_pushdown: predicate_pushdown,
         |