RubyGems - polars-df - Versions diffs - 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt - Mend

polars-df 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -0
data/Cargo.lock +192 -186
data/LICENSE-THIRD-PARTY.txt +2153 -2532
data/LICENSE.txt +1 -1
data/lib/polars/3.2/polars.so +0 -0
data/lib/polars/3.3/polars.so +0 -0
data/lib/polars/3.4/polars.so +0 -0
data/lib/polars/array_expr.rb +382 -3
data/lib/polars/array_name_space.rb +281 -0
data/lib/polars/binary_expr.rb +67 -0
data/lib/polars/binary_name_space.rb +43 -0
data/lib/polars/cat_expr.rb +224 -0
data/lib/polars/cat_name_space.rb +130 -32
data/lib/polars/catalog/unity/catalog_info.rb +20 -0
data/lib/polars/catalog/unity/column_info.rb +31 -0
data/lib/polars/catalog/unity/namespace_info.rb +21 -0
data/lib/polars/catalog/unity/table_info.rb +50 -0
data/lib/polars/catalog.rb +448 -0
data/lib/polars/config.rb +2 -2
data/lib/polars/convert.rb +12 -2
data/lib/polars/data_frame.rb +834 -48
data/lib/polars/data_type_expr.rb +52 -0
data/lib/polars/data_types.rb +61 -5
data/lib/polars/date_time_expr.rb +251 -0
data/lib/polars/date_time_name_space.rb +299 -0
data/lib/polars/exceptions.rb +7 -2
data/lib/polars/expr.rb +1247 -211
data/lib/polars/functions/col.rb +6 -5
data/lib/polars/functions/datatype.rb +21 -0
data/lib/polars/functions/lazy.rb +127 -15
data/lib/polars/functions/repeat.rb +4 -0
data/lib/polars/io/csv.rb +19 -1
data/lib/polars/io/json.rb +16 -0
data/lib/polars/io/ndjson.rb +13 -0
data/lib/polars/io/parquet.rb +70 -66
data/lib/polars/io/scan_options.rb +47 -0
data/lib/polars/lazy_frame.rb +1099 -95
data/lib/polars/list_expr.rb +400 -11
data/lib/polars/list_name_space.rb +321 -5
data/lib/polars/meta_expr.rb +71 -22
data/lib/polars/name_expr.rb +36 -0
data/lib/polars/scan_cast_options.rb +64 -0
data/lib/polars/schema.rb +84 -3
data/lib/polars/selector.rb +210 -0
data/lib/polars/selectors.rb +932 -203
data/lib/polars/series.rb +1083 -63
data/lib/polars/string_expr.rb +435 -9
data/lib/polars/string_name_space.rb +729 -45
data/lib/polars/struct_expr.rb +103 -0
data/lib/polars/struct_name_space.rb +19 -1
data/lib/polars/utils/parse.rb +40 -0
data/lib/polars/utils/various.rb +18 -1
data/lib/polars/utils.rb +9 -1
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +10 -0
metadata +12 -2

data/lib/polars/list_name_space.rb CHANGED Viewed

@@ -66,7 +66,7 @@ module Polars
     #
     # @example
     #   s = Polars::Series.new([[1, 2, 3], [5]])
-    #   s.list.lengths
+    #   s.list.len
     #   # =>
     #   # shape: (2,)
     #   # Series: '' [u32]
@@ -74,9 +74,10 @@ module Polars
     #   #         3
     #   #         1
     #   # ]
-    def lengths
+    def len
       super
     end
+    alias_method :lengths, :len
     # Drop all null values in the list.
     #
@@ -123,7 +124,7 @@ module Polars
     #   # shape: (2,)
     #   # Series: 'values' [list[i64]]
     #   # [
-    #   #         [2, 1]
+    #   #         [2, 3]
     #   #         [5]
     #   # ]
     def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
@@ -202,6 +203,60 @@ module Polars
       super
     end
+    # Compute the median value of the arrays in the list.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("values", [[-1, 0, 1], [1, 10]])
+    #   s.list.median
+    #   # =>
+    #   # shape: (2,)
+    #   # Series: 'values' [f64]
+    #   # [
+    #   #         0.0
+    #   #         5.5
+    #   # ]
+    def median
+      super
+    end
+    # Compute the std value of the arrays in the list.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("values", [[-1, 0, 1], [1, 10]])
+    #   s.list.std
+    #   # =>
+    #   # shape: (2,)
+    #   # Series: 'values' [f64]
+    #   # [
+    #   #         1.0
+    #   #         6.363961
+    #   # ]
+    def std(ddof: 1)
+      super
+    end
+    # Compute the var value of the arrays in the list.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("values", [[-1, 0, 1], [1, 10]])
+    #   s.list.var
+    #   # =>
+    #   # shape: (2,)
+    #   # Series: 'values' [f64]
+    #   # [
+    #   #         1.0
+    #   #         40.5
+    #   # ]
+    def var(ddof: 1)
+      super
+    end
     # Sort the arrays in the list.
     #
     # @return [Series]
@@ -254,7 +309,7 @@ module Polars
     #
     # @example
     #   s = Polars::Series.new("a", [[1, 1, 2], [2, 3, 3]])
-    #   s.list.unique()
+    #   s.list.unique
     #   # =>
     #   # shape: (2,)
     #   # Series: 'a' [list[i64]]
@@ -266,6 +321,24 @@ module Polars
       super
     end
+    # Count the number of unique values in every sub-lists.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("a", [[1, 1, 2], [2, 3, 4]])
+    #   s.list.n_unique
+    #   # =>
+    #   # shape: (2,)
+    #   # Series: 'a' [u32]
+    #   # [
+    #   #         2
+    #   #         3
+    #   # ]
+    def n_unique
+      super
+    end
     # Concat the arrays in a Series dtype List in linear time.
     #
     # @param other [Object]
@@ -292,7 +365,7 @@ module Polars
     #
     # So index `0` would return the first item of every sublist
     # and index `-1` would return the last item of every sublist
-    # if an index is out of bounds, it will return a `None`.
+    # if an index is out of bounds, it will return a `nil`.
     #
     # @param index [Integer]
     #   Index to return per sublist
@@ -318,6 +391,63 @@ module Polars
       super
     end
+    # Take sublists by multiple indices.
+    #
+    # The indices may be defined in a single column, or by sublists in another
+    # column of dtype `List`.
+    #
+    # @param indices [Object]
+    #   Indices to return per sublist
+    # @param null_on_oob [Boolean]
+    #   Behavior if an index is out of bounds:
+    #   True -> set as null
+    #   False -> raise an error
+    #   Note that defaulting to raising an error is much cheaper
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("a", [[3, 2, 1], [], [1, 2]])
+    #   s.list.gather([0, 2], null_on_oob: true)
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [list[i64]]
+    #   # [
+    #   #         [3, 1]
+    #   #         [null, null]
+    #   #         [1, null]
+    #   # ]
+    def gather(
+      indices,
+      null_on_oob: false
+    )
+      super
+    end
+    # Take every n-th value start from offset in sublists.
+    #
+    # @param n [Integer]
+    #   Gather every n-th element.
+    # @param offset [Integer]
+    #   Starting index.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("a", [[1, 2, 3], [], [6, 7, 8, 9]])
+    #   s.list.gather_every(2, 1)
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [list[i64]]
+    #   # [
+    #   #         [2]
+    #   #         []
+    #   #         [7, 9]
+    #   # ]
+    def gather_every(n, offset = 0)
+      super
+    end
     # Get the value by index in the sublists.
     #
     # @return [Series]
@@ -554,6 +684,73 @@ module Polars
       super
     end
+    # Returns a column with a separate row for every list element.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("a", [[1, 2, 3], [4, 5, 6]])
+    #   s.list.explode
+    #   # =>
+    #   # shape: (6,)
+    #   # Series: 'a' [i64]
+    #   # [
+    #   #         1
+    #   #         2
+    #   #         3
+    #   #         4
+    #   #         5
+    #   #         6
+    #   # ]
+    def explode
+      super
+    end
+    # Count how often the value produced by `element` occurs.
+    #
+    # @param element [Object]
+    #   An expression that produces a single value
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("a", [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]])
+    #   s.list.count_matches(1)
+    #   # =>
+    #   # shape: (5,)
+    #   # Series: 'a' [u32]
+    #   # [
+    #   #         0
+    #   #         1
+    #   #         1
+    #   #         2
+    #   #         0
+    #   # ]
+    def count_matches(element)
+      super
+    end
+    # Convert a List column into an Array column with the same inner data type.
+    #
+    # @param width [Integer]
+    #   Width of the resulting Array column.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new([[1, 2], [3, 4]], dtype: Polars::List.new(Polars::Int8))
+    #   s.list.to_array(2)
+    #   # =>
+    #   # shape: (2,)
+    #   # Series: '' [array[i8, 2]]
+    #   # [
+    #   #         [1, 2]
+    #   #         [3, 4]
+    #   # ]
+    def to_array(width)
+      super
+    end
     # Convert the series of type `List` to a series of type `Struct`.
     #
     # @param n_field_strategy ["first_non_null", "max_width"]
@@ -608,5 +805,124 @@ module Polars
     def eval(expr)
       super
     end
+    # Filter elements in each list by a boolean expression, returning a new Series of lists.
+    #
+    # @param predicate [Object]
+    #   A boolean expression evaluated on each list element.
+    #   Use `Polars.element` to refer to the current element.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
+    #   s.list.filter(Polars.element % 2 == 0)
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [list[i64]]
+    #   # [
+    #   #         [4]
+    #   #         [8]
+    #   #         [2]
+    #   # ]
+    def filter(predicate)
+      super
+    end
+    # Compute the SET UNION between the elements in this list and the elements of `other`.
+    #
+    # @param other [Object]
+    #   Right hand side of the set operation.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
+    #   b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
+    #   a.list.set_union(b)
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [list[i64]]
+    #   # [
+    #   #         [1, 2, … 4]
+    #   #         [3]
+    #   #         [null, 3, 4]
+    #   #         [5, 6, … 8]
+    #   # ]
+    def set_union(other)
+      super
+    end
+    # Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
+    #
+    # @param other [Object]
+    #   Right hand side of the set operation.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
+    #   b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
+    #   a.list.set_difference(b)
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [list[i64]]
+    #   # [
+    #   #         [1]
+    #   #         []
+    #   #         []
+    #   #         [5, 7]
+    #   # ]
+    def set_difference(other)
+      super
+    end
+    # Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
+    #
+    # @param other [Object]
+    #   Right hand side of the set operation.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
+    #   b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
+    #   a.list.set_intersection(b)
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [list[i64]]
+    #   # [
+    #   #         [2, 3]
+    #   #         []
+    #   #         [null, 3]
+    #   #         [6]
+    #   # ]
+    def set_intersection(other)
+      super
+    end
+    # Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
+    #
+    # @param other [Object]
+    #   Right hand side of the set operation.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
+    #   b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
+    #   a.list.set_symmetric_difference(b)
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [list[i64]]
+    #   # [
+    #   #         [1, 4]
+    #   #         [3]
+    #   #         [4]
+    #   #         [5, 7, 8]
+    #   # ]
+    def set_symmetric_difference(other)
+      super
+    end
   end
 end

data/lib/polars/meta_expr.rb CHANGED Viewed

@@ -97,6 +97,70 @@ module Polars
       _rbexpr.meta_is_regex_projection
     end
+    # Indicate if this expression only selects columns (optionally with aliasing).
+    #
+    # This can include bare columns, columns matched by regex or dtype, selectors
+    # and exclude ops, and (optionally) column/expression aliasing.
+    #
+    # @param allow_aliasing [Boolean]
+    #   If false (default), any aliasing is not considered to be column selection.
+    #   Set true to allow for column selection that also includes aliasing.
+    #
+    # @return [Boolean]
+    #
+    # @example
+    #   e = Polars.col("foo")
+    #   e.meta.is_column_selection
+    #   # => true
+    #
+    # @example
+    #   e = Polars.col("foo").alias("bar")
+    #   e.meta.is_column_selection
+    #   # => false
+    #
+    # @example
+    #   e.meta.is_column_selection(allow_aliasing: true)
+    #   # => true
+    #
+    # @example
+    #   e = Polars.col("foo") * Polars.col("bar")
+    #   e.meta.is_column_selection
+    #   # => false
+    #
+    # @example
+    #   e = Polars.cs.starts_with("foo")
+    #   e.meta.is_column_selection
+    #   # => true
+    #
+    # @example
+    #   e = Polars.cs.starts_with("foo").exclude("foo!")
+    #   e.meta.is_column_selection
+    #   # => true
+    def is_column_selection(allow_aliasing: false)
+      _rbexpr.meta_is_column_selection(allow_aliasing)
+    end
+    # Indicate if this expression is a literal value (optionally aliased).
+    #
+    # @param allow_aliasing [Boolean]
+    #   If false (default), only a bare literal will match.
+    #   Set true to also allow for aliased literals.
+    #
+    # @return [Boolean]
+    #
+    # @example
+    #   e = Polars.lit(123)
+    #   e.meta.is_literal
+    #   # => true
+    #
+    # @example
+    #   e = Polars.lit(987.654321).alias("foo")
+    #   e.meta.is_literal
+    #   # => false
+    def is_literal(allow_aliasing: false)
+      _rbexpr.meta_is_literal(allow_aliasing)
+    end
     # Get the column name that this expression would produce.
     #
     # @return [String]
@@ -171,32 +235,17 @@ module Polars
       Utils.wrap_expr(_rbexpr.meta_undo_aliases)
     end
-    # Turn this expression in a selector.
-    #
-    # @return [Expr]
-    def _as_selector
-      Utils.wrap_expr(_rbexpr._meta_as_selector)
-    end
-    # Add selectors.
+    # Try to turn this expression in a selector.
     #
-    # @return [Expr]
-    def _selector_add(other)
-      Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
-    end
-    # Subtract selectors.
+    # Raises if the underlying expressions is not a column or selector.
     #
     # @return [Expr]
-    def _selector_sub(other)
-      Utils.wrap_expr(_rbexpr._meta_selector_sub(other._rbexpr))
-    end
-    # & selectors.
     #
-    # @return [Expr]
-    def _selector_and(other)
-      Utils.wrap_expr(_rbexpr._meta_selector_and(other._rbexpr))
+    # @note
+    #   This functionality is considered **unstable**. It may be changed
+    #   at any point without it being considered a breaking change.
+    def as_selector
+      Selector._from_rbselector(_rbexpr.into_selector)
     end
     # Format the expression as a tree.

data/lib/polars/name_expr.rb CHANGED Viewed

@@ -194,5 +194,41 @@ module Polars
     def to_uppercase
       Utils.wrap_expr(_rbexpr.name_to_uppercase)
     end
+    # Add a prefix to all field names of a struct.
+    #
+    # @note
+    #   This only takes effect for struct columns.
+    #
+    # @param prefix [String]
+    #   Prefix to add to the field name.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
+    #   df.select(Polars.col("x").name.prefix_fields("prefix_")).schema
+    #   # => {"x"=>Polars::Struct({"prefix_a"=>Polars::Int64, "prefix_b"=>Polars::Int64})}
+    def prefix_fields(prefix)
+      Utils.wrap_expr(_rbexpr.name_prefix_fields(prefix))
+    end
+    # Add a suffix to all field names of a struct.
+    #
+    # @note
+    #   This only takes effect for struct columns.
+    #
+    # @param suffix [String]
+    #   Suffix to add to the field name.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
+    #   df.select(Polars.col("x").name.suffix_fields("_suffix")).schema
+    #   # => {"x"=>Polars::Struct({"a_suffix"=>Polars::Int64, "b_suffix"=>Polars::Int64})}
+    def suffix_fields(suffix)
+      Utils.wrap_expr(_rbexpr.name_suffix_fields(suffix))
+    end
   end
 end

data/lib/polars/scan_cast_options.rb ADDED Viewed

@@ -0,0 +1,64 @@
+module Polars
+  # Options for scanning files.
+  class ScanCastOptions
+    # Common configuration for scanning files.
+    #
+    # @note
+    #   This functionality is considered **unstable**. It may be changed
+    #   at any point without it being considered a breaking change.
+    #
+    # @param integer_cast ['upcast', 'forbid']
+    #   Configuration for casting from integer types:
+    #
+    #   * `upcast`: Allow lossless casting to wider integer types.
+    #   * `forbid`: Raises an error if dtypes do not match.
+    #
+    # @param float_cast ['upcast', 'downcast', 'forbid']
+    #   Configuration for casting from float types:
+    #
+    #   * `upcast`: Allow casting to higher precision float types.
+    #   * `downcast`: Allow casting to lower precision float types.
+    #   * `forbid`: Raises an error if dtypes do not match.
+    #
+    # @param datetime_cast ['nanosecond-downcast', 'convert-timezone', 'forbid']
+    #   Configuration for casting from datetime types:
+    #
+    #   * `nanosecond-downcast`: Allow nanosecond precision datetime to be
+    #     downcasted to any lower precision. This has a similar effect to
+    #     PyArrow's `coerce_int96_timestamp_unit`.
+    #   * `convert-timezone`: Allow casting to a different timezone.
+    #   * `forbid`: Raises an error if dtypes do not match.
+    #
+    # @param missing_struct_fields ['insert', 'raise']
+    #   Configuration for behavior when struct fields defined in the schema
+    #   are missing from the data:
+    #
+    #   * `insert`: Inserts the missing fields.
+    #   * `raise`: Raises an error.
+    #
+    # @param extra_struct_fields ['ignore', 'raise']
+    #   Configuration for behavior when extra struct fields outside of the
+    #   defined schema are encountered in the data:
+    #
+    #   * `ignore`: Silently ignores.
+    #   * `raise`: Raises an error.
+    def initialize(
+      integer_cast: "forbid",
+      float_cast: "forbid",
+      datetime_cast: "forbid",
+      missing_struct_fields: "raise",
+      extra_struct_fields: "raise",
+      _internal_call: false
+    )
+      @integer_cast = integer_cast
+      @float_cast = float_cast
+      @datetime_cast = datetime_cast
+      @missing_struct_fields = missing_struct_fields
+      @extra_struct_fields = extra_struct_fields
+    end
+    def self.default
+      new(_internal_call: true)
+    end
+  end
+end