polars-df 0.19.0-x64-mingw-ucrt → 0.21.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE-THIRD-PARTY.txt +1376 -2634
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
data/lib/polars/lazy_frame.rb
CHANGED
@@ -234,10 +234,18 @@ module Polars
|
|
234
234
|
#
|
235
235
|
# @param by [Object]
|
236
236
|
# Column (expressions) to sort by.
|
237
|
+
# @param more_by [Array]
|
238
|
+
# Additional columns to sort by, specified as positional arguments.
|
237
239
|
# @param reverse [Boolean]
|
238
240
|
# Sort in descending order.
|
239
241
|
# @param nulls_last [Boolean]
|
240
242
|
# Place null values last. Can only be used if sorted by a single column.
|
243
|
+
# @param maintain_order [Boolean]
|
244
|
+
# Whether the order should be maintained if elements are equal.
|
245
|
+
# Note that if `true` streaming is not possible and performance might be
|
246
|
+
# worse since this requires a stable search.
|
247
|
+
# @param multithreaded [Boolean]
|
248
|
+
# Sort using multiple threads.
|
241
249
|
#
|
242
250
|
# @return [LazyFrame]
|
243
251
|
#
|
@@ -305,6 +313,8 @@ module Polars
|
|
305
313
|
# Slice pushdown optimization.
|
306
314
|
# @param common_subplan_elimination [Boolean]
|
307
315
|
# Will try to cache branching subplans that occur on self-joins or unions.
|
316
|
+
# @param comm_subexpr_elim [Boolean]
|
317
|
+
# Common subexpressions will be cached and reused.
|
308
318
|
# @param allow_streaming [Boolean]
|
309
319
|
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
310
320
|
#
|
@@ -412,6 +422,31 @@ module Polars
|
|
412
422
|
# Turn off (certain) optimizations.
|
413
423
|
# @param slice_pushdown [Boolean]
|
414
424
|
# Slice pushdown optimization.
|
425
|
+
# @param storage_options [String]
|
426
|
+
# Options that indicate how to connect to a cloud provider.
|
427
|
+
#
|
428
|
+
# The cloud providers currently supported are AWS, GCP, and Azure.
|
429
|
+
# See supported keys here:
|
430
|
+
#
|
431
|
+
# * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
|
432
|
+
# * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
|
433
|
+
# * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
|
434
|
+
# * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
435
|
+
#
|
436
|
+
# If `storage_options` is not provided, Polars will try to infer the
|
437
|
+
# information from environment variables.
|
438
|
+
# @param retries [Integer]
|
439
|
+
# Number of retries if accessing a cloud instance fails.
|
440
|
+
# @param sync_on_close ['data', 'all']
|
441
|
+
# Sync to disk when before closing a file.
|
442
|
+
#
|
443
|
+
# * `nil` does not sync.
|
444
|
+
# * `data` syncs the file contents.
|
445
|
+
# * `all` syncs the file contents and metadata.
|
446
|
+
# @param mkdir [Boolean]
|
447
|
+
# Recursively create all the directories in the path.
|
448
|
+
# @param lazy [Boolean]
|
449
|
+
# Wait to start execution until `collect` is called.
|
415
450
|
#
|
416
451
|
# @return [DataFrame]
|
417
452
|
#
|
@@ -521,6 +556,16 @@ module Polars
|
|
521
556
|
# Slice pushdown optimization.
|
522
557
|
# @param no_optimization [Boolean]
|
523
558
|
# Turn off (certain) optimizations.
|
559
|
+
# @param sync_on_close ['data', 'all']
|
560
|
+
# Sync to disk when before closing a file.
|
561
|
+
#
|
562
|
+
# * `nil` does not sync.
|
563
|
+
# * `data` syncs the file contents.
|
564
|
+
# * `all` syncs the file contents and metadata.
|
565
|
+
# @param mkdir [Boolean]
|
566
|
+
# Recursively create all the directories in the path.
|
567
|
+
# @param lazy [Boolean]
|
568
|
+
# Wait to start execution until `collect` is called.
|
524
569
|
#
|
525
570
|
# @return [DataFrame]
|
526
571
|
#
|
@@ -614,9 +659,15 @@ module Polars
|
|
614
659
|
# A format string, with the specifiers defined by the
|
615
660
|
# `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
|
616
661
|
# Rust crate.
|
662
|
+
# @param float_scientific [Integer]
|
663
|
+
# Whether to use scientific form always (true), never (false), or
|
664
|
+
# automatically (nil) for `Float32` and `Float64` datatypes.
|
617
665
|
# @param float_precision [Integer]
|
618
666
|
# Number of decimal places to write, applied to both `Float32` and
|
619
667
|
# `Float64` datatypes.
|
668
|
+
# @param decimal_comma [Boolean]
|
669
|
+
# Use a comma as the decimal separator instead of a point. Floats will be
|
670
|
+
# encapsulated in quotes if necessary; set the field separator to override.
|
620
671
|
# @param null_value [String]
|
621
672
|
# A string representing null values (defaulting to the empty string).
|
622
673
|
# @param quote_style ["necessary", "always", "non_numeric", "never"]
|
@@ -655,6 +706,16 @@ module Polars
|
|
655
706
|
# Options that indicate how to connect to a cloud provider.
|
656
707
|
# @param retries [Integer]
|
657
708
|
# Number of retries if accessing a cloud instance fails.
|
709
|
+
# @param sync_on_close ['data', 'all']
|
710
|
+
# Sync to disk when before closing a file.
|
711
|
+
#
|
712
|
+
# * `nil` does not sync.
|
713
|
+
# * `data` syncs the file contents.
|
714
|
+
# * `all` syncs the file contents and metadata.
|
715
|
+
# @param mkdir [Boolean]
|
716
|
+
# Recursively create all the directories in the path.
|
717
|
+
# @param lazy [Boolean]
|
718
|
+
# Wait to start execution until `collect` is called.
|
658
719
|
#
|
659
720
|
# @return [DataFrame]
|
660
721
|
#
|
@@ -674,6 +735,7 @@ module Polars
|
|
674
735
|
time_format: nil,
|
675
736
|
float_scientific: nil,
|
676
737
|
float_precision: nil,
|
738
|
+
decimal_comma: false,
|
677
739
|
null_value: nil,
|
678
740
|
quote_style: nil,
|
679
741
|
maintain_order: true,
|
@@ -726,6 +788,7 @@ module Polars
|
|
726
788
|
time_format,
|
727
789
|
float_scientific,
|
728
790
|
float_precision,
|
791
|
+
decimal_comma,
|
729
792
|
null_value,
|
730
793
|
quote_style,
|
731
794
|
storage_options,
|
@@ -762,6 +825,31 @@ module Polars
|
|
762
825
|
# Slice pushdown optimization.
|
763
826
|
# @param no_optimization [Boolean]
|
764
827
|
# Turn off (certain) optimizations.
|
828
|
+
# @param storage_options [String]
|
829
|
+
# Options that indicate how to connect to a cloud provider.
|
830
|
+
#
|
831
|
+
# The cloud providers currently supported are AWS, GCP, and Azure.
|
832
|
+
# See supported keys here:
|
833
|
+
#
|
834
|
+
# * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
|
835
|
+
# * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
|
836
|
+
# * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
|
837
|
+
# * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
838
|
+
#
|
839
|
+
# If `storage_options` is not provided, Polars will try to infer the
|
840
|
+
# information from environment variables.
|
841
|
+
# @param retries [Integer]
|
842
|
+
# Number of retries if accessing a cloud instance fails.
|
843
|
+
# @param sync_on_close ['data', 'all']
|
844
|
+
# Sync to disk when before closing a file.
|
845
|
+
#
|
846
|
+
# * `nil` does not sync.
|
847
|
+
# * `data` syncs the file contents.
|
848
|
+
# * `all` syncs the file contents and metadata.
|
849
|
+
# @param mkdir [Boolean]
|
850
|
+
# Recursively create all the directories in the path.
|
851
|
+
# @param lazy [Boolean]
|
852
|
+
# Wait to start execution until `collect` is called.
|
765
853
|
#
|
766
854
|
# @return [DataFrame]
|
767
855
|
#
|
@@ -854,25 +942,6 @@ module Polars
|
|
854
942
|
#
|
855
943
|
# @param n_rows [Integer]
|
856
944
|
# Collect n_rows from the data sources.
|
857
|
-
# @param type_coercion [Boolean]
|
858
|
-
# Run type coercion optimization.
|
859
|
-
# @param predicate_pushdown [Boolean]
|
860
|
-
# Run predicate pushdown optimization.
|
861
|
-
# @param projection_pushdown [Boolean]
|
862
|
-
# Run projection pushdown optimization.
|
863
|
-
# @param simplify_expression [Boolean]
|
864
|
-
# Run simplify expressions optimization.
|
865
|
-
# @param string_cache [Boolean]
|
866
|
-
# This argument is deprecated. Please set the string cache globally.
|
867
|
-
# The argument will be ignored
|
868
|
-
# @param no_optimization [Boolean]
|
869
|
-
# Turn off optimizations.
|
870
|
-
# @param slice_pushdown [Boolean]
|
871
|
-
# Slice pushdown optimization
|
872
|
-
# @param common_subplan_elimination [Boolean]
|
873
|
-
# Will try to cache branching subplans that occur on self-joins or unions.
|
874
|
-
# @param allow_streaming [Boolean]
|
875
|
-
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
876
945
|
#
|
877
946
|
# @return [DataFrame]
|
878
947
|
#
|
@@ -892,41 +961,11 @@ module Polars
|
|
892
961
|
# # │ --- ┆ --- ┆ --- │
|
893
962
|
# # │ str ┆ i64 ┆ i64 │
|
894
963
|
# # ╞═════╪═════╪═════╡
|
895
|
-
# # │ a ┆
|
896
|
-
# # │ b ┆
|
964
|
+
# # │ a ┆ 4 ┆ 10 │
|
965
|
+
# # │ b ┆ 11 ┆ 10 │
|
897
966
|
# # └─────┴─────┴─────┘
|
898
|
-
def fetch(
|
899
|
-
n_rows
|
900
|
-
type_coercion: true,
|
901
|
-
predicate_pushdown: true,
|
902
|
-
projection_pushdown: true,
|
903
|
-
simplify_expression: true,
|
904
|
-
string_cache: false,
|
905
|
-
no_optimization: false,
|
906
|
-
slice_pushdown: true,
|
907
|
-
common_subplan_elimination: true,
|
908
|
-
comm_subexpr_elim: true,
|
909
|
-
allow_streaming: false
|
910
|
-
)
|
911
|
-
if no_optimization
|
912
|
-
predicate_pushdown = false
|
913
|
-
projection_pushdown = false
|
914
|
-
slice_pushdown = false
|
915
|
-
common_subplan_elimination = false
|
916
|
-
end
|
917
|
-
|
918
|
-
ldf = _ldf.optimization_toggle(
|
919
|
-
type_coercion,
|
920
|
-
predicate_pushdown,
|
921
|
-
projection_pushdown,
|
922
|
-
simplify_expression,
|
923
|
-
slice_pushdown,
|
924
|
-
common_subplan_elimination,
|
925
|
-
comm_subexpr_elim,
|
926
|
-
allow_streaming,
|
927
|
-
false
|
928
|
-
)
|
929
|
-
Utils.wrap_df(ldf.fetch(n_rows))
|
967
|
+
def fetch(n_rows = 500, **kwargs)
|
968
|
+
head(n_rows).collect(**kwargs)
|
930
969
|
end
|
931
970
|
|
932
971
|
# Return lazy representation, i.e. itself.
|
@@ -1058,7 +1097,7 @@ module Polars
|
|
1058
1097
|
# # │ null ┆ null ┆ null │
|
1059
1098
|
# # └──────┴──────┴──────┘
|
1060
1099
|
def clear(n = 0)
|
1061
|
-
DataFrame.new(
|
1100
|
+
DataFrame.new(schema: schema).clear(n).lazy
|
1062
1101
|
end
|
1063
1102
|
alias_method :cleared, :clear
|
1064
1103
|
|
@@ -1413,8 +1452,32 @@ module Polars
|
|
1413
1452
|
# parallelize
|
1414
1453
|
# @param closed ["right", "left", "both", "none"]
|
1415
1454
|
# Define whether the temporal window interval is closed or not.
|
1455
|
+
# @param label ['left', 'right', 'datapoint']
|
1456
|
+
# Define which label to use for the window:
|
1457
|
+
#
|
1458
|
+
# - 'left': lower boundary of the window
|
1459
|
+
# - 'right': upper boundary of the window
|
1460
|
+
# - 'datapoint': the first value of the index column in the given window.
|
1461
|
+
# If you don't need the label to be at one of the boundaries, choose this
|
1462
|
+
# option for maximum performance
|
1416
1463
|
# @param by [Object]
|
1417
1464
|
# Also group by this column/these columns
|
1465
|
+
# @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
|
1466
|
+
# The strategy to determine the start of the first window by.
|
1467
|
+
#
|
1468
|
+
# * 'window': Start by taking the earliest timestamp, truncating it with
|
1469
|
+
# `every`, and then adding `offset`.
|
1470
|
+
# Note that weekly windows start on Monday.
|
1471
|
+
# * 'datapoint': Start from the first encountered data point.
|
1472
|
+
# * a day of the week (only takes effect if `every` contains `'w'`):
|
1473
|
+
#
|
1474
|
+
# * 'monday': Start the window on the Monday before the first data point.
|
1475
|
+
# * 'tuesday': Start the window on the Tuesday before the first data point.
|
1476
|
+
# * ...
|
1477
|
+
# * 'sunday': Start the window on the Sunday before the first data point.
|
1478
|
+
#
|
1479
|
+
# The resulting window is then shifted back until the earliest datapoint
|
1480
|
+
# is in or in front of it.
|
1418
1481
|
#
|
1419
1482
|
# @return [DataFrame]
|
1420
1483
|
#
|
@@ -1652,12 +1715,12 @@ module Polars
|
|
1652
1715
|
# @param on [String]
|
1653
1716
|
# Join column of both DataFrames. If set, `left_on` and `right_on` should be
|
1654
1717
|
# None.
|
1655
|
-
# @param by [Object]
|
1656
|
-
# Join on these columns before doing asof join.
|
1657
1718
|
# @param by_left [Object]
|
1658
1719
|
# Join on these columns before doing asof join.
|
1659
1720
|
# @param by_right [Object]
|
1660
1721
|
# Join on these columns before doing asof join.
|
1722
|
+
# @param by [Object]
|
1723
|
+
# Join on these columns before doing asof join.
|
1661
1724
|
# @param strategy ["backward", "forward"]
|
1662
1725
|
# Join strategy.
|
1663
1726
|
# @param suffix [String]
|
@@ -1873,7 +1936,7 @@ module Polars
|
|
1873
1936
|
# # └─────────────┴────────────┴────────────┘
|
1874
1937
|
#
|
1875
1938
|
# @example
|
1876
|
-
# pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
|
1939
|
+
# pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest", check_sortedness: false).collect
|
1877
1940
|
# # =>
|
1878
1941
|
# # shape: (6, 4)
|
1879
1942
|
# # ┌─────────────┬────────────┬────────────┬──────┐
|
@@ -2175,6 +2238,9 @@ module Polars
|
|
2175
2238
|
#
|
2176
2239
|
# @param exprs [Object]
|
2177
2240
|
# List of Expressions that evaluate to columns.
|
2241
|
+
# @param named_exprs [Hash]
|
2242
|
+
# Additional columns to add, specified as keyword arguments.
|
2243
|
+
# The columns will be renamed to the keyword used.
|
2178
2244
|
#
|
2179
2245
|
# @return [LazyFrame]
|
2180
2246
|
#
|
@@ -2299,6 +2365,9 @@ module Polars
|
|
2299
2365
|
# @param columns [Object]
|
2300
2366
|
# - Name of the column that should be removed.
|
2301
2367
|
# - List of column names.
|
2368
|
+
# @param strict [Boolean]
|
2369
|
+
# Validate that all column names exist in the current schema,
|
2370
|
+
# and throw an exception if any do not.
|
2302
2371
|
#
|
2303
2372
|
# @return [LazyFrame]
|
2304
2373
|
#
|
@@ -2350,9 +2419,18 @@ module Polars
|
|
2350
2419
|
# # │ 7.0 │
|
2351
2420
|
# # │ 8.0 │
|
2352
2421
|
# # └─────┘
|
2353
|
-
def drop(*columns)
|
2354
|
-
|
2355
|
-
|
2422
|
+
def drop(*columns, strict: true)
|
2423
|
+
selectors = []
|
2424
|
+
columns.each do |c|
|
2425
|
+
if c.is_a?(Enumerable)
|
2426
|
+
selectors += c
|
2427
|
+
else
|
2428
|
+
selectors += [c]
|
2429
|
+
end
|
2430
|
+
end
|
2431
|
+
|
2432
|
+
drop_cols = Utils.parse_list_into_selector(selectors, strict: strict)
|
2433
|
+
_from_rbldf(_ldf.drop(drop_cols._rbselector))
|
2356
2434
|
end
|
2357
2435
|
|
2358
2436
|
# Rename column names.
|
@@ -3153,9 +3231,11 @@ module Polars
|
|
3153
3231
|
# # │ c ┆ 7 │
|
3154
3232
|
# # │ c ┆ 8 │
|
3155
3233
|
# # └─────────┴─────────┘
|
3156
|
-
def explode(columns)
|
3157
|
-
|
3158
|
-
|
3234
|
+
def explode(columns, *more_columns)
|
3235
|
+
subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
|
3236
|
+
more_columns
|
3237
|
+
)
|
3238
|
+
_from_rbldf(_ldf.explode(subset._rbselector))
|
3159
3239
|
end
|
3160
3240
|
|
3161
3241
|
# Drop duplicate rows from this DataFrame.
|
@@ -3220,10 +3300,11 @@ module Polars
|
|
3220
3300
|
# # │ 1 ┆ a ┆ b │
|
3221
3301
|
# # └─────┴─────┴─────┘
|
3222
3302
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
3223
|
-
|
3224
|
-
|
3303
|
+
selector_subset = nil
|
3304
|
+
if !subset.nil?
|
3305
|
+
selector_subset = Utils.parse_list_into_selector(subset)._rbselector
|
3225
3306
|
end
|
3226
|
-
_from_rbldf(_ldf.unique(maintain_order,
|
3307
|
+
_from_rbldf(_ldf.unique(maintain_order, selector_subset, keep))
|
3227
3308
|
end
|
3228
3309
|
|
3229
3310
|
# Drop rows with null values from this LazyFrame.
|
@@ -3318,11 +3399,16 @@ module Polars
|
|
3318
3399
|
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
3319
3400
|
end
|
3320
3401
|
|
3321
|
-
|
3322
|
-
|
3402
|
+
selector_on = on.nil? ? Selectors.empty : Utils.parse_list_into_selector(on)
|
3403
|
+
selector_index = index.nil? ? Selectors.empty : Utils.parse_list_into_selector(index)
|
3323
3404
|
|
3324
3405
|
_from_rbldf(
|
3325
|
-
_ldf.unpivot(
|
3406
|
+
_ldf.unpivot(
|
3407
|
+
selector_on._rbselector,
|
3408
|
+
selector_index._rbselector,
|
3409
|
+
value_name,
|
3410
|
+
variable_name
|
3411
|
+
)
|
3326
3412
|
)
|
3327
3413
|
end
|
3328
3414
|
alias_method :melt, :unpivot
|
@@ -3364,8 +3450,10 @@ module Polars
|
|
3364
3450
|
# The fields will be inserted into the `DataFrame` on the location of the
|
3365
3451
|
# `struct` type.
|
3366
3452
|
#
|
3367
|
-
# @param
|
3453
|
+
# @param columns [Object]
|
3368
3454
|
# Names of the struct columns that will be decomposed by its fields
|
3455
|
+
# @param more_columns [Array]
|
3456
|
+
# Additional columns to unnest, specified as positional arguments.
|
3369
3457
|
#
|
3370
3458
|
# @return [LazyFrame]
|
3371
3459
|
#
|
@@ -3410,11 +3498,11 @@ module Polars
|
|
3410
3498
|
# # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
|
3411
3499
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
3412
3500
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
3413
|
-
def unnest(
|
3414
|
-
|
3415
|
-
|
3416
|
-
|
3417
|
-
_from_rbldf(_ldf.unnest(
|
3501
|
+
def unnest(columns, *more_columns)
|
3502
|
+
subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
|
3503
|
+
more_columns
|
3504
|
+
)
|
3505
|
+
_from_rbldf(_ldf.unnest(subset._rbselector))
|
3418
3506
|
end
|
3419
3507
|
|
3420
3508
|
# Take two sorted DataFrames and merge them by the sorted key.
|
data/lib/polars/list_expr.rb
CHANGED
@@ -136,7 +136,7 @@ module Polars
|
|
136
136
|
# # │ --- ┆ --- ┆ --- │
|
137
137
|
# # │ list[i64] ┆ i64 ┆ list[i64] │
|
138
138
|
# # ╞═══════════╪═════╪═══════════╡
|
139
|
-
# # │ [1, 2, 3] ┆ 2 ┆ [2,
|
139
|
+
# # │ [1, 2, 3] ┆ 2 ┆ [2, 3] │
|
140
140
|
# # │ [4, 5] ┆ 1 ┆ [5] │
|
141
141
|
# # └───────────┴─────┴───────────┘
|
142
142
|
def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
|
@@ -245,6 +245,11 @@ module Polars
|
|
245
245
|
|
246
246
|
# Sort the arrays in the list.
|
247
247
|
#
|
248
|
+
# @param reverse [Boolean]
|
249
|
+
# Sort in descending order.
|
250
|
+
# @param nulls_last [Boolean]
|
251
|
+
# Place null values last.
|
252
|
+
#
|
248
253
|
# @return [Expr]
|
249
254
|
#
|
250
255
|
# @example
|
@@ -264,8 +269,8 @@ module Polars
|
|
264
269
|
# # │ [1, 2, 3] │
|
265
270
|
# # │ [1, 2, 9] │
|
266
271
|
# # └───────────┘
|
267
|
-
def sort(reverse: false)
|
268
|
-
Utils.wrap_expr(_rbexpr.list_sort(reverse))
|
272
|
+
def sort(reverse: false, nulls_last: false)
|
273
|
+
Utils.wrap_expr(_rbexpr.list_sort(reverse, nulls_last))
|
269
274
|
end
|
270
275
|
|
271
276
|
# Reverse the arrays in the list.
|
@@ -729,9 +734,21 @@ module Polars
|
|
729
734
|
#
|
730
735
|
# @param n_field_strategy ["first_non_null", "max_width"]
|
731
736
|
# Strategy to determine the number of fields of the struct.
|
732
|
-
# @param
|
733
|
-
#
|
734
|
-
#
|
737
|
+
# @param fields pArray
|
738
|
+
# If the name and number of the desired fields is known in advance
|
739
|
+
# a list of field names can be given, which will be assigned by index.
|
740
|
+
# Otherwise, to dynamically assign field names, a custom function can be
|
741
|
+
# used; if neither are set, fields will be `field_0, field_1 .. field_n`.
|
742
|
+
# @param upper_bound [Object]
|
743
|
+
# A polars `LazyFrame` needs to know the schema at all times, so the
|
744
|
+
# caller must provide an upper bound of the number of struct fields that
|
745
|
+
# will be created; if set incorrectly, subsequent operations may fail.
|
746
|
+
# (For example, an `all.sum` expression will look in the current
|
747
|
+
# schema to determine which columns to select).
|
748
|
+
#
|
749
|
+
# When operating on a `DataFrame`, the schema does not need to be
|
750
|
+
# tracked or pre-determined, as the result will be eagerly evaluated,
|
751
|
+
# so you can leave this parameter unset.
|
735
752
|
#
|
736
753
|
# @return [Expr]
|
737
754
|
#
|
@@ -748,9 +765,8 @@ module Polars
|
|
748
765
|
# # │ {1,2,3} │
|
749
766
|
# # │ {1,2,null} │
|
750
767
|
# # └────────────┘
|
751
|
-
def to_struct(n_field_strategy: "first_non_null",
|
752
|
-
|
753
|
-
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, nil))
|
768
|
+
def to_struct(n_field_strategy: "first_non_null", fields: nil, upper_bound: nil)
|
769
|
+
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, fields, nil))
|
754
770
|
end
|
755
771
|
|
756
772
|
# Run any polars expression against the lists' elements.
|
@@ -758,12 +774,6 @@ module Polars
|
|
758
774
|
# @param expr [Expr]
|
759
775
|
# Expression to run. Note that you can select an element with `Polars.first`, or
|
760
776
|
# `Polars.col`
|
761
|
-
# @param parallel [Boolean]
|
762
|
-
# Run all expression parallel. Don't activate this blindly.
|
763
|
-
# Parallelism is worth it if there is enough work to do per thread.
|
764
|
-
#
|
765
|
-
# This likely should not be use in the group by context, because we already
|
766
|
-
# parallel execution per group
|
767
777
|
#
|
768
778
|
# @return [Expr]
|
769
779
|
#
|
@@ -783,8 +793,194 @@ module Polars
|
|
783
793
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
784
794
|
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
785
795
|
# # └─────┴─────┴────────────┘
|
786
|
-
def eval(expr
|
787
|
-
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr
|
796
|
+
def eval(expr)
|
797
|
+
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr))
|
798
|
+
end
|
799
|
+
|
800
|
+
# Filter elements in each list by a boolean expression.
|
801
|
+
#
|
802
|
+
# @param predicate [Object]
|
803
|
+
# A boolean expression that is evaluated per list element.
|
804
|
+
# You can refer to the current element with `Polars.element`.
|
805
|
+
#
|
806
|
+
# @return [Expr]
|
807
|
+
#
|
808
|
+
# @example
|
809
|
+
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
810
|
+
# df.with_columns(
|
811
|
+
# evens: Polars.concat_list("a", "b").list.filter(Polars.element % 2 == 0)
|
812
|
+
# )
|
813
|
+
# # =>
|
814
|
+
# # shape: (3, 3)
|
815
|
+
# # ┌─────┬─────┬───────────┐
|
816
|
+
# # │ a ┆ b ┆ evens │
|
817
|
+
# # │ --- ┆ --- ┆ --- │
|
818
|
+
# # │ i64 ┆ i64 ┆ list[i64] │
|
819
|
+
# # ╞═════╪═════╪═══════════╡
|
820
|
+
# # │ 1 ┆ 4 ┆ [4] │
|
821
|
+
# # │ 8 ┆ 5 ┆ [8] │
|
822
|
+
# # │ 3 ┆ 2 ┆ [2] │
|
823
|
+
# # └─────┴─────┴───────────┘
|
824
|
+
def filter(predicate)
|
825
|
+
Utils.wrap_expr(_rbexpr.list_filter(predicate._rbexpr))
|
826
|
+
end
|
827
|
+
|
828
|
+
# Compute the SET UNION between the elements in this list and the elements of `other`.
|
829
|
+
#
|
830
|
+
# @param other [Object]
|
831
|
+
# Right hand side of the set operation.
|
832
|
+
#
|
833
|
+
# @return [Expr]
|
834
|
+
#
|
835
|
+
# @example
|
836
|
+
# df = Polars::DataFrame.new(
|
837
|
+
# {
|
838
|
+
# "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
|
839
|
+
# "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
|
840
|
+
# }
|
841
|
+
# )
|
842
|
+
# df.with_columns(
|
843
|
+
# union: Polars.col("a").list.set_union("b")
|
844
|
+
# )
|
845
|
+
# # =>
|
846
|
+
# # shape: (4, 3)
|
847
|
+
# # ┌───────────┬──────────────┬──────────────┐
|
848
|
+
# # │ a ┆ b ┆ union │
|
849
|
+
# # │ --- ┆ --- ┆ --- │
|
850
|
+
# # │ list[i64] ┆ list[i64] ┆ list[i64] │
|
851
|
+
# # ╞═══════════╪══════════════╪══════════════╡
|
852
|
+
# # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [1, 2, … 4] │
|
853
|
+
# # │ [] ┆ [3] ┆ [3] │
|
854
|
+
# # │ [null, 3] ┆ [3, 4, null] ┆ [null, 3, 4] │
|
855
|
+
# # │ [5, 6, 7] ┆ [6, 8] ┆ [5, 6, … 8] │
|
856
|
+
# # └───────────┴──────────────┴──────────────┘
|
857
|
+
def set_union(other)
|
858
|
+
if other.respond_to?(:each)
|
859
|
+
if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
|
860
|
+
other = other.to_a
|
861
|
+
end
|
862
|
+
other = F.lit(other)._rbexpr
|
863
|
+
else
|
864
|
+
other = Utils.parse_into_expression(other)
|
865
|
+
end
|
866
|
+
Utils.wrap_expr(_rbexpr.list_set_operation(other, "union"))
|
867
|
+
end
|
868
|
+
|
869
|
+
# Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
|
870
|
+
#
|
871
|
+
# @param other [Object]
|
872
|
+
# Right hand side of the set operation.
|
873
|
+
#
|
874
|
+
# @return [Expr]
|
875
|
+
#
|
876
|
+
# @example
|
877
|
+
# df = Polars::DataFrame.new(
|
878
|
+
# {
|
879
|
+
# "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
|
880
|
+
# "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
|
881
|
+
# }
|
882
|
+
# )
|
883
|
+
# df.with_columns(difference: Polars.col("a").list.set_difference("b"))
|
884
|
+
# # =>
|
885
|
+
# # shape: (4, 3)
|
886
|
+
# # ┌───────────┬──────────────┬────────────┐
|
887
|
+
# # │ a ┆ b ┆ difference │
|
888
|
+
# # │ --- ┆ --- ┆ --- │
|
889
|
+
# # │ list[i64] ┆ list[i64] ┆ list[i64] │
|
890
|
+
# # ╞═══════════╪══════════════╪════════════╡
|
891
|
+
# # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [1] │
|
892
|
+
# # │ [] ┆ [3] ┆ [] │
|
893
|
+
# # │ [null, 3] ┆ [3, 4, null] ┆ [] │
|
894
|
+
# # │ [5, 6, 7] ┆ [6, 8] ┆ [5, 7] │
|
895
|
+
# # └───────────┴──────────────┴────────────┘
|
896
|
+
def set_difference(other)
|
897
|
+
if other.respond_to?(:each)
|
898
|
+
if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
|
899
|
+
other = other.to_a
|
900
|
+
end
|
901
|
+
other = F.lit(other)._rbexpr
|
902
|
+
else
|
903
|
+
other = Utils.parse_into_expression(other)
|
904
|
+
end
|
905
|
+
Utils.wrap_expr(_rbexpr.list_set_operation(other, "difference"))
|
906
|
+
end
|
907
|
+
|
908
|
+
# Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
|
909
|
+
#
|
910
|
+
# @param other [Object]
|
911
|
+
# Right hand side of the set operation.
|
912
|
+
#
|
913
|
+
# @return [Expr]
|
914
|
+
#
|
915
|
+
# @example
|
916
|
+
# df = Polars::DataFrame.new(
|
917
|
+
# {
|
918
|
+
# "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
|
919
|
+
# "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
|
920
|
+
# }
|
921
|
+
# )
|
922
|
+
# df.with_columns(intersection: Polars.col("a").list.set_intersection("b"))
|
923
|
+
# # =>
|
924
|
+
# # shape: (4, 3)
|
925
|
+
# # ┌───────────┬──────────────┬──────────────┐
|
926
|
+
# # │ a ┆ b ┆ intersection │
|
927
|
+
# # │ --- ┆ --- ┆ --- │
|
928
|
+
# # │ list[i64] ┆ list[i64] ┆ list[i64] │
|
929
|
+
# # ╞═══════════╪══════════════╪══════════════╡
|
930
|
+
# # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [2, 3] │
|
931
|
+
# # │ [] ┆ [3] ┆ [] │
|
932
|
+
# # │ [null, 3] ┆ [3, 4, null] ┆ [null, 3] │
|
933
|
+
# # │ [5, 6, 7] ┆ [6, 8] ┆ [6] │
|
934
|
+
# # └───────────┴──────────────┴──────────────┘
|
935
|
+
def set_intersection(other)
|
936
|
+
if other.respond_to?(:each)
|
937
|
+
if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
|
938
|
+
other = other.to_a
|
939
|
+
end
|
940
|
+
other = F.lit(other)._rbexpr
|
941
|
+
else
|
942
|
+
other = Utils.parse_into_expression(other)
|
943
|
+
end
|
944
|
+
Utils.wrap_expr(_rbexpr.list_set_operation(other, "intersection"))
|
945
|
+
end
|
946
|
+
|
947
|
+
# Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
|
948
|
+
#
|
949
|
+
# @param other [Object]
|
950
|
+
# Right hand side of the set operation.
|
951
|
+
#
|
952
|
+
# @return [Expr]
|
953
|
+
#
|
954
|
+
# @example
|
955
|
+
# df = Polars::DataFrame.new(
|
956
|
+
# {
|
957
|
+
# "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
|
958
|
+
# "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
|
959
|
+
# }
|
960
|
+
# )
|
961
|
+
# df.with_columns(sdiff: Polars.col("b").list.set_symmetric_difference("a"))
|
962
|
+
# # =>
|
963
|
+
# # shape: (4, 3)
|
964
|
+
# # ┌───────────┬──────────────┬───────────┐
|
965
|
+
# # │ a ┆ b ┆ sdiff │
|
966
|
+
# # │ --- ┆ --- ┆ --- │
|
967
|
+
# # │ list[i64] ┆ list[i64] ┆ list[i64] │
|
968
|
+
# # ╞═══════════╪══════════════╪═══════════╡
|
969
|
+
# # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [4, 1] │
|
970
|
+
# # │ [] ┆ [3] ┆ [3] │
|
971
|
+
# # │ [null, 3] ┆ [3, 4, null] ┆ [4] │
|
972
|
+
# # │ [5, 6, 7] ┆ [6, 8] ┆ [8, 5, 7] │
|
973
|
+
# # └───────────┴──────────────┴───────────┘
|
974
|
+
def set_symmetric_difference(other)
|
975
|
+
if other.respond_to?(:each)
|
976
|
+
if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
|
977
|
+
other = other.to_a
|
978
|
+
end
|
979
|
+
other = F.lit(other)._rbexpr
|
980
|
+
else
|
981
|
+
other = Utils.parse_into_expression(other)
|
982
|
+
end
|
983
|
+
Utils.wrap_expr(_rbexpr.list_set_operation(other, "symmetric_difference"))
|
788
984
|
end
|
789
985
|
end
|
790
986
|
end
|