polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +1431 -1810
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +34 -31
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +204 -7
- data/lib/polars/list_name_space.rb +120 -1
- data/lib/polars/meta_expr.rb +7 -22
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +34 -11
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +11 -0
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
data/lib/polars/data_frame.rb
CHANGED
@@ -47,12 +47,7 @@ module Polars
|
|
47
47
|
# @param nan_to_null [Boolean]
|
48
48
|
# If the data comes from one or more Numo arrays, can optionally convert input
|
49
49
|
# data NaN values to null instead. This is a no-op for all other input data.
|
50
|
-
def initialize(data = nil, schema: nil,
|
51
|
-
if schema && columns
|
52
|
-
warn "columns is ignored when schema is passed"
|
53
|
-
end
|
54
|
-
schema ||= columns
|
55
|
-
|
50
|
+
def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
56
51
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
57
52
|
raise ArgumentError, "Use read_database instead"
|
58
53
|
end
|
@@ -722,7 +717,7 @@ module Polars
|
|
722
717
|
# @param file [String, nil]
|
723
718
|
# File path to which the result should be written. If set to `nil`
|
724
719
|
# (default), the output is returned as a string instead.
|
725
|
-
# @param
|
720
|
+
# @param include_header [Boolean]
|
726
721
|
# Whether to include header in the CSV output.
|
727
722
|
# @param sep [String]
|
728
723
|
# Separate CSV fields with this symbol.
|
@@ -763,8 +758,7 @@ module Polars
|
|
763
758
|
# df.write_csv("file.csv")
|
764
759
|
def write_csv(
|
765
760
|
file = nil,
|
766
|
-
|
767
|
-
include_header: nil,
|
761
|
+
include_header: true,
|
768
762
|
sep: ",",
|
769
763
|
quote: '"',
|
770
764
|
batch_size: 1024,
|
@@ -774,8 +768,6 @@ module Polars
|
|
774
768
|
float_precision: nil,
|
775
769
|
null_value: nil
|
776
770
|
)
|
777
|
-
include_header = has_header if include_header.nil?
|
778
|
-
|
779
771
|
if sep.length > 1
|
780
772
|
raise ArgumentError, "only single byte separator is allowed"
|
781
773
|
elsif quote.length > 1
|
@@ -834,6 +826,8 @@ module Polars
|
|
834
826
|
# File path to which the file should be written.
|
835
827
|
# @param compression ["uncompressed", "snappy", "deflate"]
|
836
828
|
# Compression method. Defaults to "uncompressed".
|
829
|
+
# @param name [String]
|
830
|
+
# Schema name. Defaults to empty string.
|
837
831
|
#
|
838
832
|
# @return [nil]
|
839
833
|
def write_avro(file, compression = "uncompressed", name: "")
|
@@ -856,6 +850,24 @@ module Polars
|
|
856
850
|
# File path to which the file should be written.
|
857
851
|
# @param compression ["uncompressed", "lz4", "zstd"]
|
858
852
|
# Compression method. Defaults to "uncompressed".
|
853
|
+
# @param compat_level [Object]
|
854
|
+
# Use a specific compatibility level
|
855
|
+
# when exporting Polars' internal data structures.
|
856
|
+
# @param storage_options [Hash]
|
857
|
+
# Options that indicate how to connect to a cloud provider.
|
858
|
+
#
|
859
|
+
# The cloud providers currently supported are AWS, GCP, and Azure.
|
860
|
+
# See supported keys here:
|
861
|
+
#
|
862
|
+
# * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
|
863
|
+
# * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
|
864
|
+
# * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
|
865
|
+
# * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
866
|
+
#
|
867
|
+
# If `storage_options` is not provided, Polars will try to infer the
|
868
|
+
# information from environment variables.
|
869
|
+
# @param retries [Integer]
|
870
|
+
# Number of retries if accessing a cloud instance fails.
|
859
871
|
#
|
860
872
|
# @return [nil]
|
861
873
|
def write_ipc(
|
@@ -901,6 +913,9 @@ module Polars
|
|
901
913
|
# be written. If set to `None`, the output is returned as a BytesIO object.
|
902
914
|
# @param compression ['uncompressed', 'lz4', 'zstd']
|
903
915
|
# Compression method. Defaults to "uncompressed".
|
916
|
+
# @param compat_level [Object]
|
917
|
+
# Use a specific compatibility level
|
918
|
+
# when exporting Polars' internal data structures.
|
904
919
|
#
|
905
920
|
# @return [Object]
|
906
921
|
#
|
@@ -1215,7 +1230,7 @@ module Polars
|
|
1215
1230
|
# "y" => 1_000_000.times.map { |v| v / 1000.0 },
|
1216
1231
|
# "z" => 1_000_000.times.map(&:to_s)
|
1217
1232
|
# },
|
1218
|
-
#
|
1233
|
+
# schema: {"x" => :u32, "y" => :f64, "z" => :str}
|
1219
1234
|
# )
|
1220
1235
|
# df.estimated_size
|
1221
1236
|
# # => 25888898
|
@@ -2138,6 +2153,22 @@ module Polars
|
|
2138
2153
|
# Define whether the temporal window interval is closed or not.
|
2139
2154
|
# @param by
|
2140
2155
|
# Also group by this column/these columns
|
2156
|
+
# @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
|
2157
|
+
# The strategy to determine the start of the first window by.
|
2158
|
+
#
|
2159
|
+
# * 'window': Start by taking the earliest timestamp, truncating it with
|
2160
|
+
# `every`, and then adding `offset`.
|
2161
|
+
# Note that weekly windows start on Monday.
|
2162
|
+
# * 'datapoint': Start from the first encountered data point.
|
2163
|
+
# * a day of the week (only takes effect if `every` contains `'w'`):
|
2164
|
+
#
|
2165
|
+
# * 'monday': Start the window on the Monday before the first data point.
|
2166
|
+
# * 'tuesday': Start the window on the Tuesday before the first data point.
|
2167
|
+
# * ...
|
2168
|
+
# * 'sunday': Start the window on the Sunday before the first data point.
|
2169
|
+
#
|
2170
|
+
# The resulting window is then shifted back until the earliest datapoint
|
2171
|
+
# is in or in front of it.
|
2141
2172
|
#
|
2142
2173
|
# @return [DataFrame]
|
2143
2174
|
#
|
@@ -2439,12 +2470,12 @@ module Polars
|
|
2439
2470
|
# @param on [String]
|
2440
2471
|
# Join column of both DataFrames. If set, `left_on` and `right_on` should be
|
2441
2472
|
# None.
|
2442
|
-
# @param by [Object]
|
2443
|
-
# join on these columns before doing asof join
|
2444
2473
|
# @param by_left [Object]
|
2445
2474
|
# join on these columns before doing asof join
|
2446
2475
|
# @param by_right [Object]
|
2447
2476
|
# join on these columns before doing asof join
|
2477
|
+
# @param by [Object]
|
2478
|
+
# join on these columns before doing asof join
|
2448
2479
|
# @param strategy ["backward", "forward"]
|
2449
2480
|
# Join strategy.
|
2450
2481
|
# @param suffix [String]
|
@@ -2454,14 +2485,6 @@ module Polars
|
|
2454
2485
|
# keys are within this distance. If an asof join is done on columns of dtype
|
2455
2486
|
# "Date", "Datetime", "Duration" or "Time" you use the following string
|
2456
2487
|
# language:
|
2457
|
-
# @param allow_exact_matches [Boolean]
|
2458
|
-
# Whether exact matches are valid join predicates.
|
2459
|
-
# - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
|
2460
|
-
# - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
|
2461
|
-
# @param check_sortedness [Boolean]
|
2462
|
-
# Check the sortedness of the asof keys. If the keys are not sorted Polars
|
2463
|
-
# will error, or in case of 'by' argument raise a warning. This might become
|
2464
|
-
# a hard error in the future.
|
2465
2488
|
#
|
2466
2489
|
# - 1ns (1 nanosecond)
|
2467
2490
|
# - 1us (1 microsecond)
|
@@ -2489,6 +2512,14 @@ module Polars
|
|
2489
2512
|
# - true: -> Always coalesce join columns.
|
2490
2513
|
# - false: -> Never coalesce join columns.
|
2491
2514
|
# Note that joining on any other expressions than `col` will turn off coalescing.
|
2515
|
+
# @param allow_exact_matches [Boolean]
|
2516
|
+
# Whether exact matches are valid join predicates.
|
2517
|
+
# - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
|
2518
|
+
# - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
|
2519
|
+
# @param check_sortedness [Boolean]
|
2520
|
+
# Check the sortedness of the asof keys. If the keys are not sorted Polars
|
2521
|
+
# will error, or in case of 'by' argument raise a warning. This might become
|
2522
|
+
# a hard error in the future.
|
2492
2523
|
#
|
2493
2524
|
# @return [DataFrame]
|
2494
2525
|
#
|
@@ -3436,19 +3467,22 @@ module Polars
|
|
3436
3467
|
|
3437
3468
|
# Create a spreadsheet-style pivot table as a DataFrame.
|
3438
3469
|
#
|
3470
|
+
# @param on [Object]
|
3471
|
+
# Columns whose values will be used as the header of the output DataFrame
|
3472
|
+
# @param index [Object]
|
3473
|
+
# One or multiple keys to group by
|
3439
3474
|
# @param values [Object]
|
3440
3475
|
# Column values to aggregate. Can be multiple columns if the *columns*
|
3441
3476
|
# arguments contains multiple columns as well
|
3442
|
-
# @param index [Object]
|
3443
|
-
# One or multiple keys to group by
|
3444
|
-
# @param on [Object]
|
3445
|
-
# Columns whose values will be used as the header of the output DataFrame
|
3446
3477
|
# @param aggregate_function ["first", "sum", "max", "min", "mean", "median", "last", "count"]
|
3447
3478
|
# A predefined aggregate function str or an expression.
|
3448
3479
|
# @param maintain_order [Object]
|
3449
3480
|
# Sort the grouped keys so that the output order is predictable.
|
3450
3481
|
# @param sort_columns [Object]
|
3451
3482
|
# Sort the transposed columns by name. Default is by order of discovery.
|
3483
|
+
# @param separator [String]
|
3484
|
+
# Used as separator/delimiter in generated column names in case of multiple
|
3485
|
+
# `values` columns.
|
3452
3486
|
#
|
3453
3487
|
# @return [DataFrame]
|
3454
3488
|
#
|
@@ -3712,6 +3746,8 @@ module Polars
|
|
3712
3746
|
# @param maintain_order [Boolean]
|
3713
3747
|
# Keep predictable output order. This is slower as it requires an extra sort
|
3714
3748
|
# operation.
|
3749
|
+
# @param include_key [Boolean]
|
3750
|
+
# Include the columns used to partition the DataFrame in the output.
|
3715
3751
|
# @param as_dict [Boolean]
|
3716
3752
|
# If true, return the partitions in a dictionary keyed by the distinct group
|
3717
3753
|
# values instead of a list.
|
@@ -4556,9 +4592,15 @@ module Polars
|
|
4556
4592
|
|
4557
4593
|
# Get one hot encoded dummy variables.
|
4558
4594
|
#
|
4559
|
-
# @param columns
|
4595
|
+
# @param columns [Array]
|
4560
4596
|
# A subset of columns to convert to dummy variables. `nil` means
|
4561
4597
|
# "all columns".
|
4598
|
+
# @param separator [String]
|
4599
|
+
# Separator/delimiter used when generating column names.
|
4600
|
+
# @param drop_first [Boolean]
|
4601
|
+
# Remove the first category from the variables being encoded.
|
4602
|
+
# @param drop_nulls [Boolean]
|
4603
|
+
# If there are `None` values in the series, a `null` column is not generated
|
4562
4604
|
#
|
4563
4605
|
# @return [DataFrame]
|
4564
4606
|
#
|
@@ -4581,11 +4623,11 @@ module Polars
|
|
4581
4623
|
# # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
|
4582
4624
|
# # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
|
4583
4625
|
# # └───────┴───────┴───────┴───────┴───────┴───────┘
|
4584
|
-
def to_dummies(columns: nil, separator: "_", drop_first: false)
|
4626
|
+
def to_dummies(columns: nil, separator: "_", drop_first: false, drop_nulls: false)
|
4585
4627
|
if columns.is_a?(::String)
|
4586
4628
|
columns = [columns]
|
4587
4629
|
end
|
4588
|
-
_from_rbdf(_df.to_dummies(columns, separator, drop_first))
|
4630
|
+
_from_rbdf(_df.to_dummies(columns, separator, drop_first, drop_nulls))
|
4589
4631
|
end
|
4590
4632
|
|
4591
4633
|
# Drop duplicate rows from this DataFrame.
|
@@ -4753,7 +4795,7 @@ module Polars
|
|
4753
4795
|
# # │ --- ┆ --- ┆ --- │
|
4754
4796
|
# # │ i64 ┆ i64 ┆ str │
|
4755
4797
|
# # ╞═════╪═════╪═════╡
|
4756
|
-
# # │
|
4798
|
+
# # │ 1 ┆ 6 ┆ a │
|
4757
4799
|
# # │ 2 ┆ 7 ┆ b │
|
4758
4800
|
# # └─────┴─────┴─────┘
|
4759
4801
|
def sample(
|
@@ -4979,6 +5021,85 @@ module Polars
|
|
4979
5021
|
end
|
4980
5022
|
end
|
4981
5023
|
|
5024
|
+
# Convert columnar data to rows as Ruby arrays in a hash keyed by some column.
|
5025
|
+
#
|
5026
|
+
# This method is like `rows`, but instead of returning rows in a flat list, rows
|
5027
|
+
# are grouped by the values in the `key` column(s) and returned as a hash.
|
5028
|
+
#
|
5029
|
+
# Note that this method should not be used in place of native operations, due to
|
5030
|
+
# the high cost of materializing all frame data out into a hash; it should
|
5031
|
+
# be used only when you need to move the values out into a Ruby data structure
|
5032
|
+
# or other object that cannot operate directly with Polars/Arrow.
|
5033
|
+
#
|
5034
|
+
# @param key [Object]
|
5035
|
+
# The column(s) to use as the key for the returned hash. If multiple
|
5036
|
+
# columns are specified, the key will be a tuple of those values, otherwise
|
5037
|
+
# it will be a string.
|
5038
|
+
# @param named [Boolean]
|
5039
|
+
# Return hashes instead of arrays. The hashes are a mapping of
|
5040
|
+
# column name to row value. This is more expensive than returning an
|
5041
|
+
# array, but allows for accessing values by column name.
|
5042
|
+
# @param include_key [Boolean]
|
5043
|
+
# Include key values inline with the associated data (by default the key
|
5044
|
+
# values are omitted as a memory/performance optimisation, as they can be
|
5045
|
+
# reoconstructed from the key).
|
5046
|
+
# @param unique [Boolean]
|
5047
|
+
# Indicate that the key is unique; this will result in a 1:1 mapping from
|
5048
|
+
# key to a single associated row. Note that if the key is *not* actually
|
5049
|
+
# unique the last row with the given key will be returned.
|
5050
|
+
#
|
5051
|
+
# @return [Hash]
|
5052
|
+
#
|
5053
|
+
# @example Group rows by the given key column(s):
|
5054
|
+
# df = Polars::DataFrame.new(
|
5055
|
+
# {
|
5056
|
+
# "w" => ["a", "b", "b", "a"],
|
5057
|
+
# "x" => ["q", "q", "q", "k"],
|
5058
|
+
# "y" => [1.0, 2.5, 3.0, 4.5],
|
5059
|
+
# "z" => [9, 8, 7, 6]
|
5060
|
+
# }
|
5061
|
+
# )
|
5062
|
+
# df.rows_by_key(["w"])
|
5063
|
+
# # => {"a"=>[["q", 1.0, 9], ["k", 4.5, 6]], "b"=>[["q", 2.5, 8], ["q", 3.0, 7]]}
|
5064
|
+
#
|
5065
|
+
# @example Return the same row groupings as hashes:
|
5066
|
+
# df.rows_by_key(["w"], named: true)
|
5067
|
+
# # => {"a"=>[{"x"=>"q", "y"=>1.0, "z"=>9}, {"x"=>"k", "y"=>4.5, "z"=>6}], "b"=>[{"x"=>"q", "y"=>2.5, "z"=>8}, {"x"=>"q", "y"=>3.0, "z"=>7}]}
|
5068
|
+
#
|
5069
|
+
# @example Return row groupings, assuming keys are unique:
|
5070
|
+
# df.rows_by_key(["z"], unique: true)
|
5071
|
+
# # => {9=>["a", "q", 1.0], 8=>["b", "q", 2.5], 7=>["b", "q", 3.0], 6=>["a", "k", 4.5]}
|
5072
|
+
#
|
5073
|
+
# @example Return row groupings as hashes, assuming keys are unique:
|
5074
|
+
# df.rows_by_key(["z"], named: true, unique: true)
|
5075
|
+
# # => {9=>{"w"=>"a", "x"=>"q", "y"=>1.0}, 8=>{"w"=>"b", "x"=>"q", "y"=>2.5}, 7=>{"w"=>"b", "x"=>"q", "y"=>3.0}, 6=>{"w"=>"a", "x"=>"k", "y"=>4.5}}
|
5076
|
+
#
|
5077
|
+
# @example Return hash rows grouped by a compound key, including key values:
|
5078
|
+
# df.rows_by_key(["w", "x"], named: true, include_key: true)
|
5079
|
+
# # => {["a", "q"]=>[{"w"=>"a", "x"=>"q", "y"=>1.0, "z"=>9}], ["b", "q"]=>[{"w"=>"b", "x"=>"q", "y"=>2.5, "z"=>8}, {"w"=>"b", "x"=>"q", "y"=>3.0, "z"=>7}], ["a", "k"]=>[{"w"=>"a", "x"=>"k", "y"=>4.5, "z"=>6}]}
|
5080
|
+
def rows_by_key(key, named: false, include_key: false, unique: false)
|
5081
|
+
key = Utils._expand_selectors(self, key)
|
5082
|
+
|
5083
|
+
keys = key.size == 1 ? get_column(key[0]) : select(key).iter_rows
|
5084
|
+
|
5085
|
+
if include_key
|
5086
|
+
values = self
|
5087
|
+
else
|
5088
|
+
data_cols = schema.keys - key
|
5089
|
+
values = select(data_cols)
|
5090
|
+
end
|
5091
|
+
|
5092
|
+
zipped = keys.each.zip(values.iter_rows(named: named))
|
5093
|
+
|
5094
|
+
# if unique, we expect to write just one entry per key; otherwise, we're
|
5095
|
+
# returning a list of rows for each key, so append into a hash of arrays.
|
5096
|
+
if unique
|
5097
|
+
zipped.to_h
|
5098
|
+
else
|
5099
|
+
zipped.each_with_object({}) { |(key, data), h| (h[key] ||= []) << data }
|
5100
|
+
end
|
5101
|
+
end
|
5102
|
+
|
4982
5103
|
# Returns an iterator over the DataFrame of rows of Ruby-native values.
|
4983
5104
|
#
|
4984
5105
|
# @param named [Boolean]
|
data/lib/polars/data_types.rb
CHANGED
@@ -294,12 +294,56 @@ module Polars
|
|
294
294
|
end
|
295
295
|
end
|
296
296
|
|
297
|
+
# A named collection of categories for `Categorical`.
|
298
|
+
#
|
299
|
+
# Two categories are considered equal (and will use the same physical mapping of
|
300
|
+
# categories to strings) if they have the same name, namespace and physical backing
|
301
|
+
# type, even if they are created in separate calls to `Categories`.
|
302
|
+
#
|
303
|
+
# @note
|
304
|
+
# This functionality is currently considered **unstable**. It may be
|
305
|
+
# changed at any point without it being considered a breaking change.
|
306
|
+
class Categories
|
307
|
+
attr_accessor :_categories
|
308
|
+
|
309
|
+
def initialize
|
310
|
+
# TODO fix
|
311
|
+
name = nil
|
312
|
+
if name.nil? || name == ""
|
313
|
+
@_categories = RbCategories.global_categories
|
314
|
+
return
|
315
|
+
end
|
316
|
+
|
317
|
+
raise Todo
|
318
|
+
end
|
319
|
+
|
320
|
+
def self._from_rb_categories(rb_categories)
|
321
|
+
slf = new
|
322
|
+
slf._categories = rb_categories
|
323
|
+
slf
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
297
327
|
# A categorical encoding of a set of strings.
|
298
328
|
class Categorical < DataType
|
299
|
-
attr_reader :ordering
|
329
|
+
attr_reader :ordering, :categories
|
300
330
|
|
301
|
-
def initialize(ordering = "physical")
|
302
|
-
|
331
|
+
def initialize(ordering = "physical", **kwargs)
|
332
|
+
if ordering.is_a?(Categories)
|
333
|
+
@ordering = "lexical"
|
334
|
+
@categories = ordering
|
335
|
+
# assert kwargs.length == 0
|
336
|
+
return
|
337
|
+
end
|
338
|
+
|
339
|
+
@ordering = "lexical"
|
340
|
+
if kwargs[:categories]
|
341
|
+
# assert kwargs.length == 1
|
342
|
+
@categories = kwargs[:categories]
|
343
|
+
else
|
344
|
+
# assert kwargs.length == 0
|
345
|
+
@categories = Categories.new
|
346
|
+
end
|
303
347
|
end
|
304
348
|
end
|
305
349
|
|
data/lib/polars/exceptions.rb
CHANGED
@@ -24,10 +24,15 @@ module Polars
|
|
24
24
|
class TooManyRowsReturned < RowsException; end
|
25
25
|
|
26
26
|
# @private
|
27
|
-
|
27
|
+
# Exception raised when Polars could not perform an underlying computation.
|
28
|
+
class ComputeError < Error; end
|
28
29
|
|
29
30
|
# @private
|
30
|
-
|
31
|
+
# Exception raised when a column name is duplicated.
|
32
|
+
class DuplicateError < Error; end
|
33
|
+
|
34
|
+
# @private
|
35
|
+
class AssertionError < Error; end
|
31
36
|
|
32
37
|
# @private
|
33
38
|
class Todo < Error
|
data/lib/polars/expr.rb
CHANGED
@@ -333,12 +333,11 @@ module Polars
|
|
333
333
|
# with `$`.
|
334
334
|
#
|
335
335
|
# @param columns [Object]
|
336
|
-
#
|
337
|
-
#
|
338
|
-
#
|
339
|
-
#
|
340
|
-
#
|
341
|
-
# - a dtype or multiple dtypes
|
336
|
+
# The name or datatype of the column(s) to exclude. Accepts regular expression
|
337
|
+
# input. Regular expressions should start with `^` and end with `$`.
|
338
|
+
# @param more_columns [Array]
|
339
|
+
# Additional names or datatypes of columns to exclude, specified as positional
|
340
|
+
# arguments.
|
342
341
|
#
|
343
342
|
# @return [Expr]
|
344
343
|
#
|
@@ -362,24 +361,8 @@ module Polars
|
|
362
361
|
# # │ 2 ┆ 2.5 │
|
363
362
|
# # │ 3 ┆ 1.5 │
|
364
363
|
# # └─────┴──────┘
|
365
|
-
def exclude(columns)
|
366
|
-
|
367
|
-
columns = [columns]
|
368
|
-
return _from_rbexpr(_rbexpr.exclude(columns))
|
369
|
-
elsif !columns.is_a?(::Array)
|
370
|
-
columns = [columns]
|
371
|
-
return _from_rbexpr(_rbexpr.exclude_dtype(columns))
|
372
|
-
end
|
373
|
-
|
374
|
-
if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
|
375
|
-
raise ArgumentError, "input should be all string or all DataType"
|
376
|
-
end
|
377
|
-
|
378
|
-
if columns[0].is_a?(::String)
|
379
|
-
_from_rbexpr(_rbexpr.exclude(columns))
|
380
|
-
else
|
381
|
-
_from_rbexpr(_rbexpr.exclude_dtype(columns))
|
382
|
-
end
|
364
|
+
def exclude(columns, *more_columns)
|
365
|
+
meta.as_selector.exclude(columns, *more_columns).as_expr
|
383
366
|
end
|
384
367
|
|
385
368
|
# Keep the original root name of the expression.
|
@@ -1158,6 +1141,13 @@ module Polars
|
|
1158
1141
|
#
|
1159
1142
|
# @param decimals [Integer]
|
1160
1143
|
# Number of decimals to round by.
|
1144
|
+
# @param mode ['half_to_even', 'half_away_from_zero']
|
1145
|
+
# RoundMode.
|
1146
|
+
#
|
1147
|
+
# * *half_to_even*
|
1148
|
+
# round to the nearest even number
|
1149
|
+
# * *half_away_from_zero*
|
1150
|
+
# round to the nearest number away from zero
|
1161
1151
|
#
|
1162
1152
|
# @return [Expr]
|
1163
1153
|
#
|
@@ -1556,9 +1546,18 @@ module Polars
|
|
1556
1546
|
#
|
1557
1547
|
# @param by [Object]
|
1558
1548
|
# The column(s) used for sorting.
|
1549
|
+
# @param more_by [Array]
|
1550
|
+
# Additional columns to sort by, specified as positional arguments.
|
1559
1551
|
# @param reverse [Boolean]
|
1560
1552
|
# false -> order from small to large.
|
1561
1553
|
# true -> order from large to small.
|
1554
|
+
# @param nulls_last [Boolean]
|
1555
|
+
# Place null values last; can specify a single boolean applying to all columns
|
1556
|
+
# or a sequence of booleans for per-column control.
|
1557
|
+
# @param multithreaded [Boolean]
|
1558
|
+
# Sort using multiple threads.
|
1559
|
+
# @param maintain_order [Boolean]
|
1560
|
+
# Whether the order should be maintained if elements are equal.
|
1562
1561
|
#
|
1563
1562
|
# @return [Expr]
|
1564
1563
|
#
|
@@ -4908,10 +4907,6 @@ module Polars
|
|
4908
4907
|
#
|
4909
4908
|
# @param by [String]
|
4910
4909
|
# This column must be of dtype Datetime or Date.
|
4911
|
-
# @param quantile [Float]
|
4912
|
-
# Quantile between 0.0 and 1.0.
|
4913
|
-
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
4914
|
-
# Interpolation method.
|
4915
4910
|
# @param window_size [String]
|
4916
4911
|
# The length of the window. Can be a dynamic
|
4917
4912
|
# temporal size indicated by a timedelta or the following string language:
|
@@ -4932,6 +4927,10 @@ module Polars
|
|
4932
4927
|
# (which may not be 24 hours, due to daylight savings). Similarly for
|
4933
4928
|
# "calendar week", "calendar month", "calendar quarter", and
|
4934
4929
|
# "calendar year".
|
4930
|
+
# @param quantile [Float]
|
4931
|
+
# Quantile between 0.0 and 1.0.
|
4932
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
4933
|
+
# Interpolation method.
|
4935
4934
|
# @param min_periods [Integer]
|
4936
4935
|
# The number of values in the window that should be non-null before computing
|
4937
4936
|
# a result.
|
@@ -5366,6 +5365,8 @@ module Polars
|
|
5366
5365
|
# a result. If None, it will be set equal to window size.
|
5367
5366
|
# @param center [Boolean]
|
5368
5367
|
# Set the labels at the center of the window
|
5368
|
+
# @param ddof [Integer]
|
5369
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
5369
5370
|
#
|
5370
5371
|
# @note
|
5371
5372
|
# This functionality is experimental and may change without it being considered a
|
@@ -5445,6 +5446,8 @@ module Polars
|
|
5445
5446
|
# a result. If None, it will be set equal to window size.
|
5446
5447
|
# @param center [Boolean]
|
5447
5448
|
# Set the labels at the center of the window
|
5449
|
+
# @param ddof [Integer]
|
5450
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
5448
5451
|
#
|
5449
5452
|
# @note
|
5450
5453
|
# This functionality is experimental and may change without it being considered a
|
@@ -5626,10 +5629,10 @@ module Polars
|
|
5626
5629
|
# # ╞══════╡
|
5627
5630
|
# # │ null │
|
5628
5631
|
# # │ null │
|
5629
|
-
# # │ 1.0 │
|
5630
5632
|
# # │ 2.0 │
|
5631
5633
|
# # │ 3.0 │
|
5632
5634
|
# # │ 4.0 │
|
5635
|
+
# # │ 6.0 │
|
5633
5636
|
# # └──────┘
|
5634
5637
|
def rolling_quantile(
|
5635
5638
|
quantile,
|
@@ -6471,8 +6474,8 @@ module Polars
|
|
6471
6474
|
# # │ i64 │
|
6472
6475
|
# # ╞═════╡
|
6473
6476
|
# # │ 2 │
|
6474
|
-
# # │ 1 │
|
6475
6477
|
# # │ 3 │
|
6478
|
+
# # │ 1 │
|
6476
6479
|
# # └─────┘
|
6477
6480
|
def shuffle(seed: nil)
|
6478
6481
|
if seed.nil?
|
@@ -6508,7 +6511,7 @@ module Polars
|
|
6508
6511
|
# # │ i64 │
|
6509
6512
|
# # ╞═════╡
|
6510
6513
|
# # │ 3 │
|
6511
|
-
# # │
|
6514
|
+
# # │ 3 │
|
6512
6515
|
# # │ 1 │
|
6513
6516
|
# # └─────┘
|
6514
6517
|
def sample(
|
data/lib/polars/functions/col.rb
CHANGED
@@ -8,11 +8,11 @@ module Polars
|
|
8
8
|
if Utils.strlike?(name)
|
9
9
|
names_str = [name]
|
10
10
|
names_str.concat(more_names)
|
11
|
-
return
|
11
|
+
return Selector._by_name(names_str.map(&:to_s), strict: true).as_expr
|
12
12
|
elsif Utils.is_polars_dtype(name)
|
13
13
|
dtypes = [name]
|
14
14
|
dtypes.concat(more_names)
|
15
|
-
return
|
15
|
+
return Selector._by_type(dtypes).as_expr
|
16
16
|
else
|
17
17
|
msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
|
18
18
|
raise TypeError, msg
|
@@ -22,7 +22,8 @@ module Polars
|
|
22
22
|
if Utils.strlike?(name)
|
23
23
|
Utils.wrap_expr(Plr.col(name.to_s))
|
24
24
|
elsif Utils.is_polars_dtype(name)
|
25
|
-
|
25
|
+
dtypes = [name]
|
26
|
+
Selector._by_dtype(dtypes).as_expr
|
26
27
|
elsif name.is_a?(::Array) || name.is_a?(::Set)
|
27
28
|
names = Array(name)
|
28
29
|
if names.empty?
|
@@ -31,9 +32,9 @@ module Polars
|
|
31
32
|
|
32
33
|
item = names[0]
|
33
34
|
if Utils.strlike?(item)
|
34
|
-
|
35
|
+
Selector._by_name(names.map(&:to_s), strict: true).as_expr
|
35
36
|
elsif Utils.is_polars_dtype(item)
|
36
|
-
|
37
|
+
Selector._by_dtype(names).as_expr
|
37
38
|
else
|
38
39
|
msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
|
39
40
|
raise TypeError, msg
|