RubyGems - polars-df - Versions diffs - 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin - Mend

polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -0
data/Cargo.lock +192 -186
data/LICENSE-THIRD-PARTY.txt +1431 -1810
data/LICENSE.txt +1 -1
data/lib/polars/3.2/polars.bundle +0 -0
data/lib/polars/3.3/polars.bundle +0 -0
data/lib/polars/3.4/polars.bundle +0 -0
data/lib/polars/cat_name_space.rb +3 -43
data/lib/polars/catalog/unity/catalog_info.rb +20 -0
data/lib/polars/catalog/unity/column_info.rb +31 -0
data/lib/polars/catalog/unity/namespace_info.rb +21 -0
data/lib/polars/catalog/unity/table_info.rb +50 -0
data/lib/polars/catalog.rb +448 -0
data/lib/polars/convert.rb +10 -0
data/lib/polars/data_frame.rb +151 -30
data/lib/polars/data_types.rb +47 -3
data/lib/polars/exceptions.rb +7 -2
data/lib/polars/expr.rb +34 -31
data/lib/polars/functions/col.rb +6 -5
data/lib/polars/functions/lazy.rb +114 -15
data/lib/polars/functions/repeat.rb +4 -0
data/lib/polars/io/csv.rb +18 -0
data/lib/polars/io/json.rb +16 -0
data/lib/polars/io/ndjson.rb +13 -0
data/lib/polars/io/parquet.rb +45 -63
data/lib/polars/io/scan_options.rb +47 -0
data/lib/polars/lazy_frame.rb +163 -75
data/lib/polars/list_expr.rb +204 -7
data/lib/polars/list_name_space.rb +120 -1
data/lib/polars/meta_expr.rb +7 -22
data/lib/polars/scan_cast_options.rb +64 -0
data/lib/polars/schema.rb +6 -1
data/lib/polars/selector.rb +138 -0
data/lib/polars/selectors.rb +931 -202
data/lib/polars/series.rb +34 -11
data/lib/polars/string_expr.rb +24 -3
data/lib/polars/string_name_space.rb +11 -0
data/lib/polars/utils/parse.rb +40 -0
data/lib/polars/utils.rb +5 -1
data/lib/polars/version.rb +1 -1
data/lib/polars.rb +8 -0
metadata +10 -2

data/lib/polars/data_frame.rb CHANGED Viewed

@@ -47,12 +47,7 @@ module Polars
     # @param nan_to_null [Boolean]
     #   If the data comes from one or more Numo arrays, can optionally convert input
     #   data NaN values to null instead. This is a no-op for all other input data.
-    def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
-      if schema && columns
-        warn "columns is ignored when schema is passed"
-      end
-      schema ||= columns
+    def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
       if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
         raise ArgumentError, "Use read_database instead"
       end
@@ -722,7 +717,7 @@ module Polars
     # @param file [String, nil]
     #   File path to which the result should be written. If set to `nil`
     #   (default), the output is returned as a string instead.
-    # @param has_header [Boolean]
+    # @param include_header [Boolean]
     #   Whether to include header in the CSV output.
     # @param sep [String]
     #   Separate CSV fields with this symbol.
@@ -763,8 +758,7 @@ module Polars
     #   df.write_csv("file.csv")
     def write_csv(
       file = nil,
-      has_header: true,
-      include_header: nil,
+      include_header: true,
       sep: ",",
       quote: '"',
       batch_size: 1024,
@@ -774,8 +768,6 @@ module Polars
       float_precision: nil,
       null_value: nil
     )
-      include_header = has_header if include_header.nil?
       if sep.length > 1
         raise ArgumentError, "only single byte separator is allowed"
       elsif quote.length > 1
@@ -834,6 +826,8 @@ module Polars
     #   File path to which the file should be written.
     # @param compression ["uncompressed", "snappy", "deflate"]
     #   Compression method. Defaults to "uncompressed".
+    # @param name [String]
+    #   Schema name. Defaults to empty string.
     #
     # @return [nil]
     def write_avro(file, compression = "uncompressed", name: "")
@@ -856,6 +850,24 @@ module Polars
     #   File path to which the file should be written.
     # @param compression ["uncompressed", "lz4", "zstd"]
     #   Compression method. Defaults to "uncompressed".
+    # @param compat_level [Object]
+    #   Use a specific compatibility level
+    #   when exporting Polars' internal data structures.
+    # @param storage_options [Hash]
+    #   Options that indicate how to connect to a cloud provider.
+    #
+    #   The cloud providers currently supported are AWS, GCP, and Azure.
+    #   See supported keys here:
+    #
+    #   * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
+    #   * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
+    #   * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
+    #   * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
+    #
+    #   If `storage_options` is not provided, Polars will try to infer the
+    #   information from environment variables.
+    # @param retries [Integer]
+    #   Number of retries if accessing a cloud instance fails.
     #
     # @return [nil]
     def write_ipc(
@@ -901,6 +913,9 @@ module Polars
     #   be written. If set to `None`, the output is returned as a BytesIO object.
     # @param compression ['uncompressed', 'lz4', 'zstd']
     #   Compression method. Defaults to "uncompressed".
+    # @param compat_level [Object]
+    #   Use a specific compatibility level
+    #   when exporting Polars' internal data structures.
     #
     # @return [Object]
     #
@@ -1215,7 +1230,7 @@ module Polars
     #       "y" => 1_000_000.times.map { |v| v / 1000.0 },
     #       "z" => 1_000_000.times.map(&:to_s)
     #     },
-    #     columns: {"x" => :u32, "y" => :f64, "z" => :str}
+    #     schema: {"x" => :u32, "y" => :f64, "z" => :str}
     #   )
     #   df.estimated_size
     #   # => 25888898
@@ -2138,6 +2153,22 @@ module Polars
     #   Define whether the temporal window interval is closed or not.
     # @param by
     #   Also group by this column/these columns
+    # @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
+    #   The strategy to determine the start of the first window by.
+    #
+    #   * 'window': Start by taking the earliest timestamp, truncating it with
+    #     `every`, and then adding `offset`.
+    #     Note that weekly windows start on Monday.
+    #   * 'datapoint': Start from the first encountered data point.
+    #   * a day of the week (only takes effect if `every` contains `'w'`):
+    #
+    #     * 'monday': Start the window on the Monday before the first data point.
+    #     * 'tuesday': Start the window on the Tuesday before the first data point.
+    #     * ...
+    #     * 'sunday': Start the window on the Sunday before the first data point.
+    #
+    #     The resulting window is then shifted back until the earliest datapoint
+    #     is in or in front of it.
     #
     # @return [DataFrame]
     #
@@ -2439,12 +2470,12 @@ module Polars
     # @param on [String]
     #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
     #   None.
-    # @param by [Object]
-    #   join on these columns before doing asof join
     # @param by_left [Object]
     #   join on these columns before doing asof join
     # @param by_right [Object]
     #   join on these columns before doing asof join
+    # @param by [Object]
+    #   join on these columns before doing asof join
     # @param strategy ["backward", "forward"]
     #   Join strategy.
     # @param suffix [String]
@@ -2454,14 +2485,6 @@ module Polars
     #   keys are within this distance. If an asof join is done on columns of dtype
     #   "Date", "Datetime", "Duration" or "Time" you use the following string
     #   language:
-    # @param allow_exact_matches [Boolean]
-    #   Whether exact matches are valid join predicates.
-    #     - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
-    #     - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
-    # @param check_sortedness [Boolean]
-    #   Check the sortedness of the asof keys. If the keys are not sorted Polars
-    #   will error, or in case of 'by' argument raise a warning. This might become
-    #   a hard error in the future.
     #
     #    - 1ns   (1 nanosecond)
     #    - 1us   (1 microsecond)
@@ -2489,6 +2512,14 @@ module Polars
     #     - true: -> Always coalesce join columns.
     #     - false: -> Never coalesce join columns.
     #   Note that joining on any other expressions than `col` will turn off coalescing.
+    # @param allow_exact_matches [Boolean]
+    #   Whether exact matches are valid join predicates.
+    #     - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
+    #     - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
+    # @param check_sortedness [Boolean]
+    #   Check the sortedness of the asof keys. If the keys are not sorted Polars
+    #   will error, or in case of 'by' argument raise a warning. This might become
+    #   a hard error in the future.
     #
     # @return [DataFrame]
     #
@@ -3436,19 +3467,22 @@ module Polars
     # Create a spreadsheet-style pivot table as a DataFrame.
     #
+    # @param on [Object]
+    #   Columns whose values will be used as the header of the output DataFrame
+    # @param index [Object]
+    #   One or multiple keys to group by
     # @param values [Object]
     #   Column values to aggregate. Can be multiple columns if the *columns*
     #   arguments contains multiple columns as well
-    # @param index [Object]
-    #   One or multiple keys to group by
-    # @param on [Object]
-    #   Columns whose values will be used as the header of the output DataFrame
     # @param aggregate_function ["first", "sum", "max", "min", "mean", "median", "last", "count"]
     #   A predefined aggregate function str or an expression.
     # @param maintain_order [Object]
     #   Sort the grouped keys so that the output order is predictable.
     # @param sort_columns [Object]
     #   Sort the transposed columns by name. Default is by order of discovery.
+    # @param separator [String]
+    #   Used as separator/delimiter in generated column names in case of multiple
+    #   `values` columns.
     #
     # @return [DataFrame]
     #
@@ -3712,6 +3746,8 @@ module Polars
     # @param maintain_order [Boolean]
     #   Keep predictable output order. This is slower as it requires an extra sort
     #   operation.
+    # @param include_key [Boolean]
+    #   Include the columns used to partition the DataFrame in the output.
     # @param as_dict [Boolean]
     #   If true, return the partitions in a dictionary keyed by the distinct group
     #   values instead of a list.
@@ -4556,9 +4592,15 @@ module Polars
     # Get one hot encoded dummy variables.
     #
-    # @param columns
+    # @param columns [Array]
     #   A subset of columns to convert to dummy variables. `nil` means
     #   "all columns".
+    # @param separator [String]
+    #   Separator/delimiter used when generating column names.
+    # @param drop_first [Boolean]
+    #   Remove the first category from the variables being encoded.
+    # @param drop_nulls [Boolean]
+    #   If there are `None` values in the series, a `null` column is not generated
     #
     # @return [DataFrame]
     #
@@ -4581,11 +4623,11 @@ module Polars
     #   # │ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     │
     #   # │ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     │
     #   # └───────┴───────┴───────┴───────┴───────┴───────┘
-    def to_dummies(columns: nil, separator: "_", drop_first: false)
+    def to_dummies(columns: nil, separator: "_", drop_first: false, drop_nulls: false)
       if columns.is_a?(::String)
         columns = [columns]
       end
-      _from_rbdf(_df.to_dummies(columns, separator, drop_first))
+      _from_rbdf(_df.to_dummies(columns, separator, drop_first, drop_nulls))
     end
     # Drop duplicate rows from this DataFrame.
@@ -4753,7 +4795,7 @@ module Polars
     #   # │ --- ┆ --- ┆ --- │
     #   # │ i64 ┆ i64 ┆ str │
     #   # ╞═════╪═════╪═════╡
-    #   # │ 3   ┆ 8   ┆ c   │
+    #   # │ 1   ┆ 6   ┆ a   │
     #   # │ 2   ┆ 7   ┆ b   │
     #   # └─────┴─────┴─────┘
     def sample(
@@ -4979,6 +5021,85 @@ module Polars
       end
     end
+    # Convert columnar data to rows as Ruby arrays in a hash keyed by some column.
+    #
+    # This method is like `rows`, but instead of returning rows in a flat list, rows
+    # are grouped by the values in the `key` column(s) and returned as a hash.
+    #
+    # Note that this method should not be used in place of native operations, due to
+    # the high cost of materializing all frame data out into a hash; it should
+    # be used only when you need to move the values out into a Ruby data structure
+    # or other object that cannot operate directly with Polars/Arrow.
+    #
+    # @param key [Object]
+    #   The column(s) to use as the key for the returned hash. If multiple
+    #   columns are specified, the key will be a tuple of those values, otherwise
+    #   it will be a string.
+    # @param named [Boolean]
+    #   Return hashes instead of arrays. The hashes are a mapping of
+    #   column name to row value. This is more expensive than returning an
+    #   array, but allows for accessing values by column name.
+    # @param include_key [Boolean]
+    #   Include key values inline with the associated data (by default the key
+    #   values are omitted as a memory/performance optimisation, as they can be
+    #   reoconstructed from the key).
+    # @param unique [Boolean]
+    #   Indicate that the key is unique; this will result in a 1:1 mapping from
+    #   key to a single associated row. Note that if the key is *not* actually
+    #   unique the last row with the given key will be returned.
+    #
+    # @return [Hash]
+    #
+    # @example Group rows by the given key column(s):
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "w" => ["a", "b", "b", "a"],
+    #       "x" => ["q", "q", "q", "k"],
+    #       "y" => [1.0, 2.5, 3.0, 4.5],
+    #       "z" => [9, 8, 7, 6]
+    #     }
+    #   )
+    #   df.rows_by_key(["w"])
+    #   # => {"a"=>[["q", 1.0, 9], ["k", 4.5, 6]], "b"=>[["q", 2.5, 8], ["q", 3.0, 7]]}
+    #
+    # @example Return the same row groupings as hashes:
+    #   df.rows_by_key(["w"], named: true)
+    #   # => {"a"=>[{"x"=>"q", "y"=>1.0, "z"=>9}, {"x"=>"k", "y"=>4.5, "z"=>6}], "b"=>[{"x"=>"q", "y"=>2.5, "z"=>8}, {"x"=>"q", "y"=>3.0, "z"=>7}]}
+    #
+    # @example Return row groupings, assuming keys are unique:
+    #   df.rows_by_key(["z"], unique: true)
+    #   # => {9=>["a", "q", 1.0], 8=>["b", "q", 2.5], 7=>["b", "q", 3.0], 6=>["a", "k", 4.5]}
+    #
+    # @example Return row groupings as hashes, assuming keys are unique:
+    #   df.rows_by_key(["z"], named: true, unique: true)
+    #   # => {9=>{"w"=>"a", "x"=>"q", "y"=>1.0}, 8=>{"w"=>"b", "x"=>"q", "y"=>2.5}, 7=>{"w"=>"b", "x"=>"q", "y"=>3.0}, 6=>{"w"=>"a", "x"=>"k", "y"=>4.5}}
+    #
+    # @example Return hash rows grouped by a compound key, including key values:
+    #   df.rows_by_key(["w", "x"], named: true, include_key: true)
+    #   # => {["a", "q"]=>[{"w"=>"a", "x"=>"q", "y"=>1.0, "z"=>9}], ["b", "q"]=>[{"w"=>"b", "x"=>"q", "y"=>2.5, "z"=>8}, {"w"=>"b", "x"=>"q", "y"=>3.0, "z"=>7}], ["a", "k"]=>[{"w"=>"a", "x"=>"k", "y"=>4.5, "z"=>6}]}
+    def rows_by_key(key, named: false, include_key: false, unique: false)
+      key = Utils._expand_selectors(self, key)
+      keys = key.size == 1 ? get_column(key[0]) : select(key).iter_rows
+      if include_key
+        values = self
+      else
+        data_cols = schema.keys - key
+        values = select(data_cols)
+      end
+      zipped = keys.each.zip(values.iter_rows(named: named))
+      # if unique, we expect to write just one entry per key; otherwise, we're
+      # returning a list of rows for each key, so append into a hash of arrays.
+      if unique
+        zipped.to_h
+      else
+        zipped.each_with_object({}) { |(key, data), h| (h[key] ||= []) << data }
+      end
+    end
     # Returns an iterator over the DataFrame of rows of Ruby-native values.
     #
     # @param named [Boolean]

data/lib/polars/data_types.rb CHANGED Viewed

@@ -294,12 +294,56 @@ module Polars
     end
   end
+  # A named collection of categories for `Categorical`.
+  #
+  # Two categories are considered equal (and will use the same physical mapping of
+  # categories to strings) if they have the same name, namespace and physical backing
+  # type, even if they are created in separate calls to `Categories`.
+  #
+  # @note
+  #   This functionality is currently considered **unstable**. It may be
+  #   changed at any point without it being considered a breaking change.
+  class Categories
+    attr_accessor :_categories
+    def initialize
+      # TODO fix
+      name = nil
+      if name.nil? || name == ""
+        @_categories = RbCategories.global_categories
+        return
+      end
+      raise Todo
+    end
+    def self._from_rb_categories(rb_categories)
+      slf = new
+      slf._categories = rb_categories
+      slf
+    end
+  end
   # A categorical encoding of a set of strings.
   class Categorical < DataType
-    attr_reader :ordering
+    attr_reader :ordering, :categories
-    def initialize(ordering = "physical")
-      @ordering = ordering
+    def initialize(ordering = "physical", **kwargs)
+      if ordering.is_a?(Categories)
+        @ordering = "lexical"
+        @categories = ordering
+        # assert kwargs.length == 0
+        return
+      end
+      @ordering = "lexical"
+      if kwargs[:categories]
+        # assert kwargs.length == 1
+        @categories = kwargs[:categories]
+      else
+        # assert kwargs.length == 0
+        @categories = Categories.new
+      end
     end
   end

data/lib/polars/exceptions.rb CHANGED Viewed

@@ -24,10 +24,15 @@ module Polars
   class TooManyRowsReturned < RowsException; end
   # @private
-  class AssertionError < Error; end
+  # Exception raised when Polars could not perform an underlying computation.
+  class ComputeError < Error; end
   # @private
-  class ComputeError < Error; end
+  # Exception raised when a column name is duplicated.
+  class DuplicateError < Error; end
+  # @private
+  class AssertionError < Error; end
   # @private
   class Todo < Error

data/lib/polars/expr.rb CHANGED Viewed

@@ -333,12 +333,11 @@ module Polars
     # with `$`.
     #
     # @param columns [Object]
-    #   Column(s) to exclude from selection.
-    #   This can be:
-    #
-    #   - a column name, or multiple column names
-    #   - a regular expression starting with `^` and ending with `$`
-    #   - a dtype or multiple dtypes
+    #   The name or datatype of the column(s) to exclude. Accepts regular expression
+    #   input. Regular expressions should start with `^` and end with `$`.
+    # @param more_columns [Array]
+    #   Additional names or datatypes of columns to exclude, specified as positional
+    #   arguments.
     #
     # @return [Expr]
     #
@@ -362,24 +361,8 @@ module Polars
     #   # │ 2   ┆ 2.5  │
     #   # │ 3   ┆ 1.5  │
     #   # └─────┴──────┘
-    def exclude(columns)
-      if columns.is_a?(::String)
-        columns = [columns]
-        return _from_rbexpr(_rbexpr.exclude(columns))
-      elsif !columns.is_a?(::Array)
-        columns = [columns]
-        return _from_rbexpr(_rbexpr.exclude_dtype(columns))
-      end
-      if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
-        raise ArgumentError, "input should be all string or all DataType"
-      end
-      if columns[0].is_a?(::String)
-        _from_rbexpr(_rbexpr.exclude(columns))
-      else
-        _from_rbexpr(_rbexpr.exclude_dtype(columns))
-      end
+    def exclude(columns, *more_columns)
+      meta.as_selector.exclude(columns, *more_columns).as_expr
     end
     # Keep the original root name of the expression.
@@ -1158,6 +1141,13 @@ module Polars
     #
     # @param decimals [Integer]
     #   Number of decimals to round by.
+    # @param mode ['half_to_even', 'half_away_from_zero']
+    #   RoundMode.
+    #
+    #   * *half_to_even*
+    #     round to the nearest even number
+    #   * *half_away_from_zero*
+    #     round to the nearest number away from zero
     #
     # @return [Expr]
     #
@@ -1556,9 +1546,18 @@ module Polars
     #
     # @param by [Object]
     #   The column(s) used for sorting.
+    # @param more_by [Array]
+    #   Additional columns to sort by, specified as positional arguments.
     # @param reverse [Boolean]
     #   false -> order from small to large.
     #   true -> order from large to small.
+    # @param nulls_last [Boolean]
+    #   Place null values last; can specify a single boolean applying to all columns
+    #   or a sequence of booleans for per-column control.
+    # @param multithreaded [Boolean]
+    #   Sort using multiple threads.
+    # @param maintain_order [Boolean]
+    #   Whether the order should be maintained if elements are equal.
     #
     # @return [Expr]
     #
@@ -4908,10 +4907,6 @@ module Polars
     #
     # @param by [String]
     #   This column must be of dtype Datetime or Date.
-    # @param quantile [Float]
-    #   Quantile between 0.0 and 1.0.
-    # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
-    #   Interpolation method.
     # @param window_size  [String]
     #   The length of the window. Can be a dynamic
     #   temporal size indicated by a timedelta or the following string language:
@@ -4932,6 +4927,10 @@ module Polars
     #   (which may not be 24 hours, due to daylight savings). Similarly for
     #   "calendar week", "calendar month", "calendar quarter", and
     #   "calendar year".
+    # @param quantile [Float]
+    #   Quantile between 0.0 and 1.0.
+    # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
+    #   Interpolation method.
     # @param min_periods [Integer]
     #   The number of values in the window that should be non-null before computing
     #   a result.
@@ -5366,6 +5365,8 @@ module Polars
     #   a result. If None, it will be set equal to window size.
     # @param center [Boolean]
     #   Set the labels at the center of the window
+    # @param ddof [Integer]
+    #   "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
     #
     # @note
     #   This functionality is experimental and may change without it being considered a
@@ -5445,6 +5446,8 @@ module Polars
     #   a result. If None, it will be set equal to window size.
     # @param center [Boolean]
     #   Set the labels at the center of the window
+    # @param ddof [Integer]
+    #   "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
     #
     # @note
     #   This functionality is experimental and may change without it being considered a
@@ -5626,10 +5629,10 @@ module Polars
     #   # ╞══════╡
     #   # │ null │
     #   # │ null │
-    #   # │ 1.0  │
     #   # │ 2.0  │
     #   # │ 3.0  │
     #   # │ 4.0  │
+    #   # │ 6.0  │
     #   # └──────┘
     def rolling_quantile(
       quantile,
@@ -6471,8 +6474,8 @@ module Polars
     #   # │ i64 │
     #   # ╞═════╡
     #   # │ 2   │
-    #   # │ 1   │
     #   # │ 3   │
+    #   # │ 1   │
     #   # └─────┘
     def shuffle(seed: nil)
       if seed.nil?
@@ -6508,7 +6511,7 @@ module Polars
     #   # │ i64 │
     #   # ╞═════╡
     #   # │ 3   │
-    #   # │ 1   │
+    #   # │ 3   │
     #   # │ 1   │
     #   # └─────┘
     def sample(

data/lib/polars/functions/col.rb CHANGED Viewed

@@ -8,11 +8,11 @@ module Polars
         if Utils.strlike?(name)
           names_str = [name]
           names_str.concat(more_names)
-          return Utils.wrap_expr(Plr.cols(names_str.map(&:to_s)))
+          return Selector._by_name(names_str.map(&:to_s), strict: true).as_expr
         elsif Utils.is_polars_dtype(name)
           dtypes = [name]
           dtypes.concat(more_names)
-          return Utils.wrap_expr(Plr.dtype_cols(dtypes))
+          return Selector._by_type(dtypes).as_expr
         else
           msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
           raise TypeError, msg
@@ -22,7 +22,8 @@ module Polars
       if Utils.strlike?(name)
         Utils.wrap_expr(Plr.col(name.to_s))
       elsif Utils.is_polars_dtype(name)
-        Utils.wrap_expr(Plr.dtype_cols([name]))
+        dtypes = [name]
+        Selector._by_dtype(dtypes).as_expr
       elsif name.is_a?(::Array) || name.is_a?(::Set)
         names = Array(name)
         if names.empty?
@@ -31,9 +32,9 @@ module Polars
         item = names[0]
         if Utils.strlike?(item)
-          Utils.wrap_expr(Plr.cols(names.map(&:to_s)))
+          Selector._by_name(names.map(&:to_s), strict: true).as_expr
         elsif Utils.is_polars_dtype(item)
-          Utils.wrap_expr(Plr.dtype_cols(names))
+          Selector._by_dtype(names).as_expr
         else
           msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
           raise TypeError, msg