polars-df 0.19.0-x86_64-darwin → 0.21.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +211 -320
  4. data/LICENSE-THIRD-PARTY.txt +1256 -2131
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/3.4/polars.bundle +0 -0
  9. data/lib/polars/cat_name_space.rb +3 -43
  10. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  11. data/lib/polars/catalog/unity/column_info.rb +31 -0
  12. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  13. data/lib/polars/catalog/unity/table_info.rb +50 -0
  14. data/lib/polars/catalog.rb +448 -0
  15. data/lib/polars/convert.rb +10 -0
  16. data/lib/polars/data_frame.rb +151 -30
  17. data/lib/polars/data_types.rb +47 -3
  18. data/lib/polars/exceptions.rb +7 -2
  19. data/lib/polars/expr.rb +48 -39
  20. data/lib/polars/functions/col.rb +6 -5
  21. data/lib/polars/functions/eager.rb +1 -1
  22. data/lib/polars/functions/lazy.rb +114 -15
  23. data/lib/polars/functions/repeat.rb +4 -0
  24. data/lib/polars/io/csv.rb +18 -0
  25. data/lib/polars/io/json.rb +16 -0
  26. data/lib/polars/io/ndjson.rb +13 -0
  27. data/lib/polars/io/parquet.rb +45 -63
  28. data/lib/polars/io/scan_options.rb +47 -0
  29. data/lib/polars/lazy_frame.rb +163 -75
  30. data/lib/polars/list_expr.rb +213 -17
  31. data/lib/polars/list_name_space.rb +121 -8
  32. data/lib/polars/meta_expr.rb +14 -29
  33. data/lib/polars/scan_cast_options.rb +64 -0
  34. data/lib/polars/schema.rb +6 -1
  35. data/lib/polars/selector.rb +138 -0
  36. data/lib/polars/selectors.rb +931 -202
  37. data/lib/polars/series.rb +46 -19
  38. data/lib/polars/string_expr.rb +24 -3
  39. data/lib/polars/string_name_space.rb +12 -1
  40. data/lib/polars/utils/parse.rb +40 -0
  41. data/lib/polars/utils.rb +5 -1
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +8 -0
  44. metadata +10 -2
@@ -47,12 +47,7 @@ module Polars
47
47
  # @param nan_to_null [Boolean]
48
48
  # If the data comes from one or more Numo arrays, can optionally convert input
49
49
  # data NaN values to null instead. This is a no-op for all other input data.
50
- def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
51
- if schema && columns
52
- warn "columns is ignored when schema is passed"
53
- end
54
- schema ||= columns
55
-
50
+ def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
56
51
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
57
52
  raise ArgumentError, "Use read_database instead"
58
53
  end
@@ -722,7 +717,7 @@ module Polars
722
717
  # @param file [String, nil]
723
718
  # File path to which the result should be written. If set to `nil`
724
719
  # (default), the output is returned as a string instead.
725
- # @param has_header [Boolean]
720
+ # @param include_header [Boolean]
726
721
  # Whether to include header in the CSV output.
727
722
  # @param sep [String]
728
723
  # Separate CSV fields with this symbol.
@@ -763,8 +758,7 @@ module Polars
763
758
  # df.write_csv("file.csv")
764
759
  def write_csv(
765
760
  file = nil,
766
- has_header: true,
767
- include_header: nil,
761
+ include_header: true,
768
762
  sep: ",",
769
763
  quote: '"',
770
764
  batch_size: 1024,
@@ -774,8 +768,6 @@ module Polars
774
768
  float_precision: nil,
775
769
  null_value: nil
776
770
  )
777
- include_header = has_header if include_header.nil?
778
-
779
771
  if sep.length > 1
780
772
  raise ArgumentError, "only single byte separator is allowed"
781
773
  elsif quote.length > 1
@@ -834,6 +826,8 @@ module Polars
834
826
  # File path to which the file should be written.
835
827
  # @param compression ["uncompressed", "snappy", "deflate"]
836
828
  # Compression method. Defaults to "uncompressed".
829
+ # @param name [String]
830
+ # Schema name. Defaults to empty string.
837
831
  #
838
832
  # @return [nil]
839
833
  def write_avro(file, compression = "uncompressed", name: "")
@@ -856,6 +850,24 @@ module Polars
856
850
  # File path to which the file should be written.
857
851
  # @param compression ["uncompressed", "lz4", "zstd"]
858
852
  # Compression method. Defaults to "uncompressed".
853
+ # @param compat_level [Object]
854
+ # Use a specific compatibility level
855
+ # when exporting Polars' internal data structures.
856
+ # @param storage_options [Hash]
857
+ # Options that indicate how to connect to a cloud provider.
858
+ #
859
+ # The cloud providers currently supported are AWS, GCP, and Azure.
860
+ # See supported keys here:
861
+ #
862
+ # * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
863
+ # * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
864
+ # * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
865
+ # * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
866
+ #
867
+ # If `storage_options` is not provided, Polars will try to infer the
868
+ # information from environment variables.
869
+ # @param retries [Integer]
870
+ # Number of retries if accessing a cloud instance fails.
859
871
  #
860
872
  # @return [nil]
861
873
  def write_ipc(
@@ -901,6 +913,9 @@ module Polars
901
913
  # be written. If set to `None`, the output is returned as a BytesIO object.
902
914
  # @param compression ['uncompressed', 'lz4', 'zstd']
903
915
  # Compression method. Defaults to "uncompressed".
916
+ # @param compat_level [Object]
917
+ # Use a specific compatibility level
918
+ # when exporting Polars' internal data structures.
904
919
  #
905
920
  # @return [Object]
906
921
  #
@@ -1215,7 +1230,7 @@ module Polars
1215
1230
  # "y" => 1_000_000.times.map { |v| v / 1000.0 },
1216
1231
  # "z" => 1_000_000.times.map(&:to_s)
1217
1232
  # },
1218
- # columns: {"x" => :u32, "y" => :f64, "z" => :str}
1233
+ # schema: {"x" => :u32, "y" => :f64, "z" => :str}
1219
1234
  # )
1220
1235
  # df.estimated_size
1221
1236
  # # => 25888898
@@ -2138,6 +2153,22 @@ module Polars
2138
2153
  # Define whether the temporal window interval is closed or not.
2139
2154
  # @param by
2140
2155
  # Also group by this column/these columns
2156
+ # @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
2157
+ # The strategy to determine the start of the first window by.
2158
+ #
2159
+ # * 'window': Start by taking the earliest timestamp, truncating it with
2160
+ # `every`, and then adding `offset`.
2161
+ # Note that weekly windows start on Monday.
2162
+ # * 'datapoint': Start from the first encountered data point.
2163
+ # * a day of the week (only takes effect if `every` contains `'w'`):
2164
+ #
2165
+ # * 'monday': Start the window on the Monday before the first data point.
2166
+ # * 'tuesday': Start the window on the Tuesday before the first data point.
2167
+ # * ...
2168
+ # * 'sunday': Start the window on the Sunday before the first data point.
2169
+ #
2170
+ # The resulting window is then shifted back until the earliest datapoint
2171
+ # is in or in front of it.
2141
2172
  #
2142
2173
  # @return [DataFrame]
2143
2174
  #
@@ -2439,12 +2470,12 @@ module Polars
2439
2470
  # @param on [String]
2440
2471
  # Join column of both DataFrames. If set, `left_on` and `right_on` should be
2441
2472
  # None.
2442
- # @param by [Object]
2443
- # join on these columns before doing asof join
2444
2473
  # @param by_left [Object]
2445
2474
  # join on these columns before doing asof join
2446
2475
  # @param by_right [Object]
2447
2476
  # join on these columns before doing asof join
2477
+ # @param by [Object]
2478
+ # join on these columns before doing asof join
2448
2479
  # @param strategy ["backward", "forward"]
2449
2480
  # Join strategy.
2450
2481
  # @param suffix [String]
@@ -2454,14 +2485,6 @@ module Polars
2454
2485
  # keys are within this distance. If an asof join is done on columns of dtype
2455
2486
  # "Date", "Datetime", "Duration" or "Time" you use the following string
2456
2487
  # language:
2457
- # @param allow_exact_matches [Boolean]
2458
- # Whether exact matches are valid join predicates.
2459
- # - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
2460
- # - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
2461
- # @param check_sortedness [Boolean]
2462
- # Check the sortedness of the asof keys. If the keys are not sorted Polars
2463
- # will error, or in case of 'by' argument raise a warning. This might become
2464
- # a hard error in the future.
2465
2488
  #
2466
2489
  # - 1ns (1 nanosecond)
2467
2490
  # - 1us (1 microsecond)
@@ -2489,6 +2512,14 @@ module Polars
2489
2512
  # - true: -> Always coalesce join columns.
2490
2513
  # - false: -> Never coalesce join columns.
2491
2514
  # Note that joining on any other expressions than `col` will turn off coalescing.
2515
+ # @param allow_exact_matches [Boolean]
2516
+ # Whether exact matches are valid join predicates.
2517
+ # - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
2518
+ # - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
2519
+ # @param check_sortedness [Boolean]
2520
+ # Check the sortedness of the asof keys. If the keys are not sorted Polars
2521
+ # will error, or in case of 'by' argument raise a warning. This might become
2522
+ # a hard error in the future.
2492
2523
  #
2493
2524
  # @return [DataFrame]
2494
2525
  #
@@ -3436,19 +3467,22 @@ module Polars
3436
3467
 
3437
3468
  # Create a spreadsheet-style pivot table as a DataFrame.
3438
3469
  #
3470
+ # @param on [Object]
3471
+ # Columns whose values will be used as the header of the output DataFrame
3472
+ # @param index [Object]
3473
+ # One or multiple keys to group by
3439
3474
  # @param values [Object]
3440
3475
  # Column values to aggregate. Can be multiple columns if the *columns*
3441
3476
  # arguments contains multiple columns as well
3442
- # @param index [Object]
3443
- # One or multiple keys to group by
3444
- # @param on [Object]
3445
- # Columns whose values will be used as the header of the output DataFrame
3446
3477
  # @param aggregate_function ["first", "sum", "max", "min", "mean", "median", "last", "count"]
3447
3478
  # A predefined aggregate function str or an expression.
3448
3479
  # @param maintain_order [Object]
3449
3480
  # Sort the grouped keys so that the output order is predictable.
3450
3481
  # @param sort_columns [Object]
3451
3482
  # Sort the transposed columns by name. Default is by order of discovery.
3483
+ # @param separator [String]
3484
+ # Used as separator/delimiter in generated column names in case of multiple
3485
+ # `values` columns.
3452
3486
  #
3453
3487
  # @return [DataFrame]
3454
3488
  #
@@ -3712,6 +3746,8 @@ module Polars
3712
3746
  # @param maintain_order [Boolean]
3713
3747
  # Keep predictable output order. This is slower as it requires an extra sort
3714
3748
  # operation.
3749
+ # @param include_key [Boolean]
3750
+ # Include the columns used to partition the DataFrame in the output.
3715
3751
  # @param as_dict [Boolean]
3716
3752
  # If true, return the partitions in a dictionary keyed by the distinct group
3717
3753
  # values instead of a list.
@@ -4556,9 +4592,15 @@ module Polars
4556
4592
 
4557
4593
  # Get one hot encoded dummy variables.
4558
4594
  #
4559
- # @param columns
4595
+ # @param columns [Array]
4560
4596
  # A subset of columns to convert to dummy variables. `nil` means
4561
4597
  # "all columns".
4598
+ # @param separator [String]
4599
+ # Separator/delimiter used when generating column names.
4600
+ # @param drop_first [Boolean]
4601
+ # Remove the first category from the variables being encoded.
4602
+ # @param drop_nulls [Boolean]
4603
+ # If there are `None` values in the series, a `null` column is not generated
4562
4604
  #
4563
4605
  # @return [DataFrame]
4564
4606
  #
@@ -4581,11 +4623,11 @@ module Polars
4581
4623
  # # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
4582
4624
  # # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
4583
4625
  # # └───────┴───────┴───────┴───────┴───────┴───────┘
4584
- def to_dummies(columns: nil, separator: "_", drop_first: false)
4626
+ def to_dummies(columns: nil, separator: "_", drop_first: false, drop_nulls: false)
4585
4627
  if columns.is_a?(::String)
4586
4628
  columns = [columns]
4587
4629
  end
4588
- _from_rbdf(_df.to_dummies(columns, separator, drop_first))
4630
+ _from_rbdf(_df.to_dummies(columns, separator, drop_first, drop_nulls))
4589
4631
  end
4590
4632
 
4591
4633
  # Drop duplicate rows from this DataFrame.
@@ -4753,7 +4795,7 @@ module Polars
4753
4795
  # # │ --- ┆ --- ┆ --- │
4754
4796
  # # │ i64 ┆ i64 ┆ str │
4755
4797
  # # ╞═════╪═════╪═════╡
4756
- # # │ 38c
4798
+ # # │ 16a
4757
4799
  # # │ 2 ┆ 7 ┆ b │
4758
4800
  # # └─────┴─────┴─────┘
4759
4801
  def sample(
@@ -4979,6 +5021,85 @@ module Polars
4979
5021
  end
4980
5022
  end
4981
5023
 
5024
+ # Convert columnar data to rows as Ruby arrays in a hash keyed by some column.
5025
+ #
5026
+ # This method is like `rows`, but instead of returning rows in a flat list, rows
5027
+ # are grouped by the values in the `key` column(s) and returned as a hash.
5028
+ #
5029
+ # Note that this method should not be used in place of native operations, due to
5030
+ # the high cost of materializing all frame data out into a hash; it should
5031
+ # be used only when you need to move the values out into a Ruby data structure
5032
+ # or other object that cannot operate directly with Polars/Arrow.
5033
+ #
5034
+ # @param key [Object]
5035
+ # The column(s) to use as the key for the returned hash. If multiple
5036
+ # columns are specified, the key will be a tuple of those values, otherwise
5037
+ # it will be a string.
5038
+ # @param named [Boolean]
5039
+ # Return hashes instead of arrays. The hashes are a mapping of
5040
+ # column name to row value. This is more expensive than returning an
5041
+ # array, but allows for accessing values by column name.
5042
+ # @param include_key [Boolean]
5043
+ # Include key values inline with the associated data (by default the key
5044
+ # values are omitted as a memory/performance optimisation, as they can be
5045
+ # reoconstructed from the key).
5046
+ # @param unique [Boolean]
5047
+ # Indicate that the key is unique; this will result in a 1:1 mapping from
5048
+ # key to a single associated row. Note that if the key is *not* actually
5049
+ # unique the last row with the given key will be returned.
5050
+ #
5051
+ # @return [Hash]
5052
+ #
5053
+ # @example Group rows by the given key column(s):
5054
+ # df = Polars::DataFrame.new(
5055
+ # {
5056
+ # "w" => ["a", "b", "b", "a"],
5057
+ # "x" => ["q", "q", "q", "k"],
5058
+ # "y" => [1.0, 2.5, 3.0, 4.5],
5059
+ # "z" => [9, 8, 7, 6]
5060
+ # }
5061
+ # )
5062
+ # df.rows_by_key(["w"])
5063
+ # # => {"a"=>[["q", 1.0, 9], ["k", 4.5, 6]], "b"=>[["q", 2.5, 8], ["q", 3.0, 7]]}
5064
+ #
5065
+ # @example Return the same row groupings as hashes:
5066
+ # df.rows_by_key(["w"], named: true)
5067
+ # # => {"a"=>[{"x"=>"q", "y"=>1.0, "z"=>9}, {"x"=>"k", "y"=>4.5, "z"=>6}], "b"=>[{"x"=>"q", "y"=>2.5, "z"=>8}, {"x"=>"q", "y"=>3.0, "z"=>7}]}
5068
+ #
5069
+ # @example Return row groupings, assuming keys are unique:
5070
+ # df.rows_by_key(["z"], unique: true)
5071
+ # # => {9=>["a", "q", 1.0], 8=>["b", "q", 2.5], 7=>["b", "q", 3.0], 6=>["a", "k", 4.5]}
5072
+ #
5073
+ # @example Return row groupings as hashes, assuming keys are unique:
5074
+ # df.rows_by_key(["z"], named: true, unique: true)
5075
+ # # => {9=>{"w"=>"a", "x"=>"q", "y"=>1.0}, 8=>{"w"=>"b", "x"=>"q", "y"=>2.5}, 7=>{"w"=>"b", "x"=>"q", "y"=>3.0}, 6=>{"w"=>"a", "x"=>"k", "y"=>4.5}}
5076
+ #
5077
+ # @example Return hash rows grouped by a compound key, including key values:
5078
+ # df.rows_by_key(["w", "x"], named: true, include_key: true)
5079
+ # # => {["a", "q"]=>[{"w"=>"a", "x"=>"q", "y"=>1.0, "z"=>9}], ["b", "q"]=>[{"w"=>"b", "x"=>"q", "y"=>2.5, "z"=>8}, {"w"=>"b", "x"=>"q", "y"=>3.0, "z"=>7}], ["a", "k"]=>[{"w"=>"a", "x"=>"k", "y"=>4.5, "z"=>6}]}
5080
+ def rows_by_key(key, named: false, include_key: false, unique: false)
5081
+ key = Utils._expand_selectors(self, key)
5082
+
5083
+ keys = key.size == 1 ? get_column(key[0]) : select(key).iter_rows
5084
+
5085
+ if include_key
5086
+ values = self
5087
+ else
5088
+ data_cols = schema.keys - key
5089
+ values = select(data_cols)
5090
+ end
5091
+
5092
+ zipped = keys.each.zip(values.iter_rows(named: named))
5093
+
5094
+ # if unique, we expect to write just one entry per key; otherwise, we're
5095
+ # returning a list of rows for each key, so append into a hash of arrays.
5096
+ if unique
5097
+ zipped.to_h
5098
+ else
5099
+ zipped.each_with_object({}) { |(key, data), h| (h[key] ||= []) << data }
5100
+ end
5101
+ end
5102
+
4982
5103
  # Returns an iterator over the DataFrame of rows of Ruby-native values.
4983
5104
  #
4984
5105
  # @param named [Boolean]
@@ -294,12 +294,56 @@ module Polars
294
294
  end
295
295
  end
296
296
 
297
+ # A named collection of categories for `Categorical`.
298
+ #
299
+ # Two categories are considered equal (and will use the same physical mapping of
300
+ # categories to strings) if they have the same name, namespace and physical backing
301
+ # type, even if they are created in separate calls to `Categories`.
302
+ #
303
+ # @note
304
+ # This functionality is currently considered **unstable**. It may be
305
+ # changed at any point without it being considered a breaking change.
306
+ class Categories
307
+ attr_accessor :_categories
308
+
309
+ def initialize
310
+ # TODO fix
311
+ name = nil
312
+ if name.nil? || name == ""
313
+ @_categories = RbCategories.global_categories
314
+ return
315
+ end
316
+
317
+ raise Todo
318
+ end
319
+
320
+ def self._from_rb_categories(rb_categories)
321
+ slf = new
322
+ slf._categories = rb_categories
323
+ slf
324
+ end
325
+ end
326
+
297
327
  # A categorical encoding of a set of strings.
298
328
  class Categorical < DataType
299
- attr_reader :ordering
329
+ attr_reader :ordering, :categories
300
330
 
301
- def initialize(ordering = "physical")
302
- @ordering = ordering
331
+ def initialize(ordering = "physical", **kwargs)
332
+ if ordering.is_a?(Categories)
333
+ @ordering = "lexical"
334
+ @categories = ordering
335
+ # assert kwargs.length == 0
336
+ return
337
+ end
338
+
339
+ @ordering = "lexical"
340
+ if kwargs[:categories]
341
+ # assert kwargs.length == 1
342
+ @categories = kwargs[:categories]
343
+ else
344
+ # assert kwargs.length == 0
345
+ @categories = Categories.new
346
+ end
303
347
  end
304
348
  end
305
349
 
@@ -24,10 +24,15 @@ module Polars
24
24
  class TooManyRowsReturned < RowsException; end
25
25
 
26
26
  # @private
27
- class AssertionError < Error; end
27
+ # Exception raised when Polars could not perform an underlying computation.
28
+ class ComputeError < Error; end
28
29
 
29
30
  # @private
30
- class ComputeError < Error; end
31
+ # Exception raised when a column name is duplicated.
32
+ class DuplicateError < Error; end
33
+
34
+ # @private
35
+ class AssertionError < Error; end
31
36
 
32
37
  # @private
33
38
  class Todo < Error
data/lib/polars/expr.rb CHANGED
@@ -333,12 +333,11 @@ module Polars
333
333
  # with `$`.
334
334
  #
335
335
  # @param columns [Object]
336
- # Column(s) to exclude from selection.
337
- # This can be:
338
- #
339
- # - a column name, or multiple column names
340
- # - a regular expression starting with `^` and ending with `$`
341
- # - a dtype or multiple dtypes
336
+ # The name or datatype of the column(s) to exclude. Accepts regular expression
337
+ # input. Regular expressions should start with `^` and end with `$`.
338
+ # @param more_columns [Array]
339
+ # Additional names or datatypes of columns to exclude, specified as positional
340
+ # arguments.
342
341
  #
343
342
  # @return [Expr]
344
343
  #
@@ -362,24 +361,8 @@ module Polars
362
361
  # # │ 2 ┆ 2.5 │
363
362
  # # │ 3 ┆ 1.5 │
364
363
  # # └─────┴──────┘
365
- def exclude(columns)
366
- if columns.is_a?(::String)
367
- columns = [columns]
368
- return _from_rbexpr(_rbexpr.exclude(columns))
369
- elsif !columns.is_a?(::Array)
370
- columns = [columns]
371
- return _from_rbexpr(_rbexpr.exclude_dtype(columns))
372
- end
373
-
374
- if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
375
- raise ArgumentError, "input should be all string or all DataType"
376
- end
377
-
378
- if columns[0].is_a?(::String)
379
- _from_rbexpr(_rbexpr.exclude(columns))
380
- else
381
- _from_rbexpr(_rbexpr.exclude_dtype(columns))
382
- end
364
+ def exclude(columns, *more_columns)
365
+ meta.as_selector.exclude(columns, *more_columns).as_expr
383
366
  end
384
367
 
385
368
  # Keep the original root name of the expression.
@@ -1158,6 +1141,13 @@ module Polars
1158
1141
  #
1159
1142
  # @param decimals [Integer]
1160
1143
  # Number of decimals to round by.
1144
+ # @param mode ['half_to_even', 'half_away_from_zero']
1145
+ # RoundMode.
1146
+ #
1147
+ # * *half_to_even*
1148
+ # round to the nearest even number
1149
+ # * *half_away_from_zero*
1150
+ # round to the nearest number away from zero
1161
1151
  #
1162
1152
  # @return [Expr]
1163
1153
  #
@@ -1512,6 +1502,13 @@ module Polars
1512
1502
  #
1513
1503
  # @param element [Object]
1514
1504
  # Expression or scalar value.
1505
+ # @param side ['any', 'left', 'right']
1506
+ # If 'any', the index of the first suitable location found is given.
1507
+ # If 'left', the index of the leftmost suitable location found is given.
1508
+ # If 'right', return the rightmost suitable location found is given.
1509
+ # @param descending [Boolean]
1510
+ # Boolean indicating whether the values are descending or not (they
1511
+ # are required to be sorted either way).
1515
1512
  #
1516
1513
  # @return [Expr]
1517
1514
  #
@@ -1537,9 +1534,9 @@ module Polars
1537
1534
  # # ╞══════╪═══════╪═════╡
1538
1535
  # # │ 0 ┆ 2 ┆ 4 │
1539
1536
  # # └──────┴───────┴─────┘
1540
- def search_sorted(element, side: "any")
1537
+ def search_sorted(element, side: "any", descending: false)
1541
1538
  element = Utils.parse_into_expression(element, str_as_lit: false)
1542
- _from_rbexpr(_rbexpr.search_sorted(element, side))
1539
+ _from_rbexpr(_rbexpr.search_sorted(element, side, descending))
1543
1540
  end
1544
1541
 
1545
1542
  # Sort this column by the ordering of another column, or multiple other columns.
@@ -1549,9 +1546,18 @@ module Polars
1549
1546
  #
1550
1547
  # @param by [Object]
1551
1548
  # The column(s) used for sorting.
1549
+ # @param more_by [Array]
1550
+ # Additional columns to sort by, specified as positional arguments.
1552
1551
  # @param reverse [Boolean]
1553
1552
  # false -> order from small to large.
1554
1553
  # true -> order from large to small.
1554
+ # @param nulls_last [Boolean]
1555
+ # Place null values last; can specify a single boolean applying to all columns
1556
+ # or a sequence of booleans for per-column control.
1557
+ # @param multithreaded [Boolean]
1558
+ # Sort using multiple threads.
1559
+ # @param maintain_order [Boolean]
1560
+ # Whether the order should be maintained if elements are equal.
1555
1561
  #
1556
1562
  # @return [Expr]
1557
1563
  #
@@ -4901,10 +4907,6 @@ module Polars
4901
4907
  #
4902
4908
  # @param by [String]
4903
4909
  # This column must be of dtype Datetime or Date.
4904
- # @param quantile [Float]
4905
- # Quantile between 0.0 and 1.0.
4906
- # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
4907
- # Interpolation method.
4908
4910
  # @param window_size [String]
4909
4911
  # The length of the window. Can be a dynamic
4910
4912
  # temporal size indicated by a timedelta or the following string language:
@@ -4925,6 +4927,10 @@ module Polars
4925
4927
  # (which may not be 24 hours, due to daylight savings). Similarly for
4926
4928
  # "calendar week", "calendar month", "calendar quarter", and
4927
4929
  # "calendar year".
4930
+ # @param quantile [Float]
4931
+ # Quantile between 0.0 and 1.0.
4932
+ # @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
4933
+ # Interpolation method.
4928
4934
  # @param min_periods [Integer]
4929
4935
  # The number of values in the window that should be non-null before computing
4930
4936
  # a result.
@@ -5359,6 +5365,8 @@ module Polars
5359
5365
  # a result. If None, it will be set equal to window size.
5360
5366
  # @param center [Boolean]
5361
5367
  # Set the labels at the center of the window
5368
+ # @param ddof [Integer]
5369
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
5362
5370
  #
5363
5371
  # @note
5364
5372
  # This functionality is experimental and may change without it being considered a
@@ -5438,6 +5446,8 @@ module Polars
5438
5446
  # a result. If None, it will be set equal to window size.
5439
5447
  # @param center [Boolean]
5440
5448
  # Set the labels at the center of the window
5449
+ # @param ddof [Integer]
5450
+ # "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
5441
5451
  #
5442
5452
  # @note
5443
5453
  # This functionality is experimental and may change without it being considered a
@@ -5619,10 +5629,10 @@ module Polars
5619
5629
  # # ╞══════╡
5620
5630
  # # │ null │
5621
5631
  # # │ null │
5622
- # # │ 1.0 │
5623
5632
  # # │ 2.0 │
5624
5633
  # # │ 3.0 │
5625
5634
  # # │ 4.0 │
5635
+ # # │ 6.0 │
5626
5636
  # # └──────┘
5627
5637
  def rolling_quantile(
5628
5638
  quantile,
@@ -6464,8 +6474,8 @@ module Polars
6464
6474
  # # │ i64 │
6465
6475
  # # ╞═════╡
6466
6476
  # # │ 2 │
6467
- # # │ 1 │
6468
6477
  # # │ 3 │
6478
+ # # │ 1 │
6469
6479
  # # └─────┘
6470
6480
  def shuffle(seed: nil)
6471
6481
  if seed.nil?
@@ -6501,7 +6511,7 @@ module Polars
6501
6511
  # # │ i64 │
6502
6512
  # # ╞═════╡
6503
6513
  # # │ 3 │
6504
- # # │ 1
6514
+ # # │ 3
6505
6515
  # # │ 1 │
6506
6516
  # # └─────┘
6507
6517
  def sample(
@@ -6651,6 +6661,8 @@ module Polars
6651
6661
  # # │ 99 │
6652
6662
  # # └────────┘
6653
6663
  def extend_constant(value, n)
6664
+ value = Utils.parse_into_expression(value, str_as_lit: true)
6665
+ n = Utils.parse_into_expression(n)
6654
6666
  _from_rbexpr(_rbexpr.extend_constant(value, n))
6655
6667
  end
6656
6668
 
@@ -6814,9 +6826,6 @@ module Polars
6814
6826
  # @param min_periods [Integer]
6815
6827
  # Number of valid values there should be in the window before the expression
6816
6828
  # is evaluated. valid values = `length - null_count`
6817
- # @param parallel [Boolean]
6818
- # Run in parallel. Don't do this in a group by or another operation that
6819
- # already has much parallelization.
6820
6829
  #
6821
6830
  # @return [Expr]
6822
6831
  #
@@ -6850,9 +6859,9 @@ module Polars
6850
6859
  # # │ -15 │
6851
6860
  # # │ -24 │
6852
6861
  # # └────────┘
6853
- def cumulative_eval(expr, min_periods: 1, parallel: false)
6862
+ def cumulative_eval(expr, min_periods: 1)
6854
6863
  _from_rbexpr(
6855
- _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
6864
+ _rbexpr.cumulative_eval(expr._rbexpr, min_periods)
6856
6865
  )
6857
6866
  end
6858
6867
 
@@ -7117,7 +7126,7 @@ module Polars
7117
7126
  # Accepts expression input. Sequences are parsed as Series,
7118
7127
  # other non-expression inputs are parsed as literals.
7119
7128
  # Also accepts a mapping of values to their replacement as syntactic sugar for
7120
- # `replace_all(old: Series.new(mapping.keys), new: Serie.new(mapping.values))`.
7129
+ # `replace_all(old: Series.new(mapping.keys), new: Series.new(mapping.values))`.
7121
7130
  # @param new [Object]
7122
7131
  # Value or sequence of values to replace by.
7123
7132
  # Accepts expression input. Sequences are parsed as Series,
@@ -8,11 +8,11 @@ module Polars
8
8
  if Utils.strlike?(name)
9
9
  names_str = [name]
10
10
  names_str.concat(more_names)
11
- return Utils.wrap_expr(Plr.cols(names_str.map(&:to_s)))
11
+ return Selector._by_name(names_str.map(&:to_s), strict: true).as_expr
12
12
  elsif Utils.is_polars_dtype(name)
13
13
  dtypes = [name]
14
14
  dtypes.concat(more_names)
15
- return Utils.wrap_expr(Plr.dtype_cols(dtypes))
15
+ return Selector._by_type(dtypes).as_expr
16
16
  else
17
17
  msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
18
18
  raise TypeError, msg
@@ -22,7 +22,8 @@ module Polars
22
22
  if Utils.strlike?(name)
23
23
  Utils.wrap_expr(Plr.col(name.to_s))
24
24
  elsif Utils.is_polars_dtype(name)
25
- Utils.wrap_expr(Plr.dtype_cols([name]))
25
+ dtypes = [name]
26
+ Selector._by_dtype(dtypes).as_expr
26
27
  elsif name.is_a?(::Array) || name.is_a?(::Set)
27
28
  names = Array(name)
28
29
  if names.empty?
@@ -31,9 +32,9 @@ module Polars
31
32
 
32
33
  item = names[0]
33
34
  if Utils.strlike?(item)
34
- Utils.wrap_expr(Plr.cols(names.map(&:to_s)))
35
+ Selector._by_name(names.map(&:to_s), strict: true).as_expr
35
36
  elsif Utils.is_polars_dtype(item)
36
- Utils.wrap_expr(Plr.dtype_cols(names))
37
+ Selector._by_dtype(names).as_expr
37
38
  else
38
39
  msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
39
40
  raise TypeError, msg
@@ -206,7 +206,7 @@ module Polars
206
206
  end
207
207
  end
208
208
 
209
- # Align a sequence of frames using the uique values from one or more columns as a key.
209
+ # Align a sequence of frames using the unique values from one or more columns as a key.
210
210
  #
211
211
  # Frames that do not contain the given key values have rows injected (with nulls
212
212
  # filling the non-key columns), and each resulting frame is sorted by the key.