polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE-THIRD-PARTY.txt +1431 -1810
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/3.4/polars.bundle +0 -0
  9. data/lib/polars/cat_name_space.rb +3 -43
  10. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  11. data/lib/polars/catalog/unity/column_info.rb +31 -0
  12. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  13. data/lib/polars/catalog/unity/table_info.rb +50 -0
  14. data/lib/polars/catalog.rb +448 -0
  15. data/lib/polars/convert.rb +10 -0
  16. data/lib/polars/data_frame.rb +151 -30
  17. data/lib/polars/data_types.rb +47 -3
  18. data/lib/polars/exceptions.rb +7 -2
  19. data/lib/polars/expr.rb +34 -31
  20. data/lib/polars/functions/col.rb +6 -5
  21. data/lib/polars/functions/lazy.rb +114 -15
  22. data/lib/polars/functions/repeat.rb +4 -0
  23. data/lib/polars/io/csv.rb +18 -0
  24. data/lib/polars/io/json.rb +16 -0
  25. data/lib/polars/io/ndjson.rb +13 -0
  26. data/lib/polars/io/parquet.rb +45 -63
  27. data/lib/polars/io/scan_options.rb +47 -0
  28. data/lib/polars/lazy_frame.rb +163 -75
  29. data/lib/polars/list_expr.rb +204 -7
  30. data/lib/polars/list_name_space.rb +120 -1
  31. data/lib/polars/meta_expr.rb +7 -22
  32. data/lib/polars/scan_cast_options.rb +64 -0
  33. data/lib/polars/schema.rb +6 -1
  34. data/lib/polars/selector.rb +138 -0
  35. data/lib/polars/selectors.rb +931 -202
  36. data/lib/polars/series.rb +34 -11
  37. data/lib/polars/string_expr.rb +24 -3
  38. data/lib/polars/string_name_space.rb +11 -0
  39. data/lib/polars/utils/parse.rb +40 -0
  40. data/lib/polars/utils.rb +5 -1
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +8 -0
  43. metadata +10 -2
@@ -234,10 +234,18 @@ module Polars
234
234
  #
235
235
  # @param by [Object]
236
236
  # Column (expressions) to sort by.
237
+ # @param more_by [Array]
238
+ # Additional columns to sort by, specified as positional arguments.
237
239
  # @param reverse [Boolean]
238
240
  # Sort in descending order.
239
241
  # @param nulls_last [Boolean]
240
242
  # Place null values last. Can only be used if sorted by a single column.
243
+ # @param maintain_order [Boolean]
244
+ # Whether the order should be maintained if elements are equal.
245
+ # Note that if `true` streaming is not possible and performance might be
246
+ # worse since this requires a stable search.
247
+ # @param multithreaded [Boolean]
248
+ # Sort using multiple threads.
241
249
  #
242
250
  # @return [LazyFrame]
243
251
  #
@@ -305,6 +313,8 @@ module Polars
305
313
  # Slice pushdown optimization.
306
314
  # @param common_subplan_elimination [Boolean]
307
315
  # Will try to cache branching subplans that occur on self-joins or unions.
316
+ # @param comm_subexpr_elim [Boolean]
317
+ # Common subexpressions will be cached and reused.
308
318
  # @param allow_streaming [Boolean]
309
319
  # Run parts of the query in a streaming fashion (this is in an alpha state)
310
320
  #
@@ -412,6 +422,31 @@ module Polars
412
422
  # Turn off (certain) optimizations.
413
423
  # @param slice_pushdown [Boolean]
414
424
  # Slice pushdown optimization.
425
+ # @param storage_options [String]
426
+ # Options that indicate how to connect to a cloud provider.
427
+ #
428
+ # The cloud providers currently supported are AWS, GCP, and Azure.
429
+ # See supported keys here:
430
+ #
431
+ # * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
432
+ # * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
433
+ # * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
434
+ # * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
435
+ #
436
+ # If `storage_options` is not provided, Polars will try to infer the
437
+ # information from environment variables.
438
+ # @param retries [Integer]
439
+ # Number of retries if accessing a cloud instance fails.
440
+ # @param sync_on_close ['data', 'all']
441
+ # Sync to disk when before closing a file.
442
+ #
443
+ # * `nil` does not sync.
444
+ # * `data` syncs the file contents.
445
+ # * `all` syncs the file contents and metadata.
446
+ # @param mkdir [Boolean]
447
+ # Recursively create all the directories in the path.
448
+ # @param lazy [Boolean]
449
+ # Wait to start execution until `collect` is called.
415
450
  #
416
451
  # @return [DataFrame]
417
452
  #
@@ -521,6 +556,16 @@ module Polars
521
556
  # Slice pushdown optimization.
522
557
  # @param no_optimization [Boolean]
523
558
  # Turn off (certain) optimizations.
559
+ # @param sync_on_close ['data', 'all']
560
+ # Sync to disk when before closing a file.
561
+ #
562
+ # * `nil` does not sync.
563
+ # * `data` syncs the file contents.
564
+ # * `all` syncs the file contents and metadata.
565
+ # @param mkdir [Boolean]
566
+ # Recursively create all the directories in the path.
567
+ # @param lazy [Boolean]
568
+ # Wait to start execution until `collect` is called.
524
569
  #
525
570
  # @return [DataFrame]
526
571
  #
@@ -614,9 +659,15 @@ module Polars
614
659
  # A format string, with the specifiers defined by the
615
660
  # `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
616
661
  # Rust crate.
662
+ # @param float_scientific [Integer]
663
+ # Whether to use scientific form always (true), never (false), or
664
+ # automatically (nil) for `Float32` and `Float64` datatypes.
617
665
  # @param float_precision [Integer]
618
666
  # Number of decimal places to write, applied to both `Float32` and
619
667
  # `Float64` datatypes.
668
+ # @param decimal_comma [Boolean]
669
+ # Use a comma as the decimal separator instead of a point. Floats will be
670
+ # encapsulated in quotes if necessary; set the field separator to override.
620
671
  # @param null_value [String]
621
672
  # A string representing null values (defaulting to the empty string).
622
673
  # @param quote_style ["necessary", "always", "non_numeric", "never"]
@@ -655,6 +706,16 @@ module Polars
655
706
  # Options that indicate how to connect to a cloud provider.
656
707
  # @param retries [Integer]
657
708
  # Number of retries if accessing a cloud instance fails.
709
+ # @param sync_on_close ['data', 'all']
710
+ # Sync to disk when before closing a file.
711
+ #
712
+ # * `nil` does not sync.
713
+ # * `data` syncs the file contents.
714
+ # * `all` syncs the file contents and metadata.
715
+ # @param mkdir [Boolean]
716
+ # Recursively create all the directories in the path.
717
+ # @param lazy [Boolean]
718
+ # Wait to start execution until `collect` is called.
658
719
  #
659
720
  # @return [DataFrame]
660
721
  #
@@ -674,6 +735,7 @@ module Polars
674
735
  time_format: nil,
675
736
  float_scientific: nil,
676
737
  float_precision: nil,
738
+ decimal_comma: false,
677
739
  null_value: nil,
678
740
  quote_style: nil,
679
741
  maintain_order: true,
@@ -726,6 +788,7 @@ module Polars
726
788
  time_format,
727
789
  float_scientific,
728
790
  float_precision,
791
+ decimal_comma,
729
792
  null_value,
730
793
  quote_style,
731
794
  storage_options,
@@ -762,6 +825,31 @@ module Polars
762
825
  # Slice pushdown optimization.
763
826
  # @param no_optimization [Boolean]
764
827
  # Turn off (certain) optimizations.
828
+ # @param storage_options [String]
829
+ # Options that indicate how to connect to a cloud provider.
830
+ #
831
+ # The cloud providers currently supported are AWS, GCP, and Azure.
832
+ # See supported keys here:
833
+ #
834
+ # * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
835
+ # * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
836
+ # * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
837
+ # * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
838
+ #
839
+ # If `storage_options` is not provided, Polars will try to infer the
840
+ # information from environment variables.
841
+ # @param retries [Integer]
842
+ # Number of retries if accessing a cloud instance fails.
843
+ # @param sync_on_close ['data', 'all']
844
+ # Sync to disk when before closing a file.
845
+ #
846
+ # * `nil` does not sync.
847
+ # * `data` syncs the file contents.
848
+ # * `all` syncs the file contents and metadata.
849
+ # @param mkdir [Boolean]
850
+ # Recursively create all the directories in the path.
851
+ # @param lazy [Boolean]
852
+ # Wait to start execution until `collect` is called.
765
853
  #
766
854
  # @return [DataFrame]
767
855
  #
@@ -854,25 +942,6 @@ module Polars
854
942
  #
855
943
  # @param n_rows [Integer]
856
944
  # Collect n_rows from the data sources.
857
- # @param type_coercion [Boolean]
858
- # Run type coercion optimization.
859
- # @param predicate_pushdown [Boolean]
860
- # Run predicate pushdown optimization.
861
- # @param projection_pushdown [Boolean]
862
- # Run projection pushdown optimization.
863
- # @param simplify_expression [Boolean]
864
- # Run simplify expressions optimization.
865
- # @param string_cache [Boolean]
866
- # This argument is deprecated. Please set the string cache globally.
867
- # The argument will be ignored
868
- # @param no_optimization [Boolean]
869
- # Turn off optimizations.
870
- # @param slice_pushdown [Boolean]
871
- # Slice pushdown optimization
872
- # @param common_subplan_elimination [Boolean]
873
- # Will try to cache branching subplans that occur on self-joins or unions.
874
- # @param allow_streaming [Boolean]
875
- # Run parts of the query in a streaming fashion (this is in an alpha state)
876
945
  #
877
946
  # @return [DataFrame]
878
947
  #
@@ -892,41 +961,11 @@ module Polars
892
961
  # # │ --- ┆ --- ┆ --- │
893
962
  # # │ str ┆ i64 ┆ i64 │
894
963
  # # ╞═════╪═════╪═════╡
895
- # # │ a ┆ 16
896
- # # │ b ┆ 2 5
964
+ # # │ a ┆ 410
965
+ # # │ b ┆ 11 10
897
966
  # # └─────┴─────┴─────┘
898
- def fetch(
899
- n_rows = 500,
900
- type_coercion: true,
901
- predicate_pushdown: true,
902
- projection_pushdown: true,
903
- simplify_expression: true,
904
- string_cache: false,
905
- no_optimization: false,
906
- slice_pushdown: true,
907
- common_subplan_elimination: true,
908
- comm_subexpr_elim: true,
909
- allow_streaming: false
910
- )
911
- if no_optimization
912
- predicate_pushdown = false
913
- projection_pushdown = false
914
- slice_pushdown = false
915
- common_subplan_elimination = false
916
- end
917
-
918
- ldf = _ldf.optimization_toggle(
919
- type_coercion,
920
- predicate_pushdown,
921
- projection_pushdown,
922
- simplify_expression,
923
- slice_pushdown,
924
- common_subplan_elimination,
925
- comm_subexpr_elim,
926
- allow_streaming,
927
- false
928
- )
929
- Utils.wrap_df(ldf.fetch(n_rows))
967
+ def fetch(n_rows = 500, **kwargs)
968
+ head(n_rows).collect(**kwargs)
930
969
  end
931
970
 
932
971
  # Return lazy representation, i.e. itself.
@@ -1058,7 +1097,7 @@ module Polars
1058
1097
  # # │ null ┆ null ┆ null │
1059
1098
  # # └──────┴──────┴──────┘
1060
1099
  def clear(n = 0)
1061
- DataFrame.new(columns: schema).clear(n).lazy
1100
+ DataFrame.new(schema: schema).clear(n).lazy
1062
1101
  end
1063
1102
  alias_method :cleared, :clear
1064
1103
 
@@ -1413,8 +1452,32 @@ module Polars
1413
1452
  # parallelize
1414
1453
  # @param closed ["right", "left", "both", "none"]
1415
1454
  # Define whether the temporal window interval is closed or not.
1455
+ # @param label ['left', 'right', 'datapoint']
1456
+ # Define which label to use for the window:
1457
+ #
1458
+ # - 'left': lower boundary of the window
1459
+ # - 'right': upper boundary of the window
1460
+ # - 'datapoint': the first value of the index column in the given window.
1461
+ # If you don't need the label to be at one of the boundaries, choose this
1462
+ # option for maximum performance
1416
1463
  # @param by [Object]
1417
1464
  # Also group by this column/these columns
1465
+ # @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
1466
+ # The strategy to determine the start of the first window by.
1467
+ #
1468
+ # * 'window': Start by taking the earliest timestamp, truncating it with
1469
+ # `every`, and then adding `offset`.
1470
+ # Note that weekly windows start on Monday.
1471
+ # * 'datapoint': Start from the first encountered data point.
1472
+ # * a day of the week (only takes effect if `every` contains `'w'`):
1473
+ #
1474
+ # * 'monday': Start the window on the Monday before the first data point.
1475
+ # * 'tuesday': Start the window on the Tuesday before the first data point.
1476
+ # * ...
1477
+ # * 'sunday': Start the window on the Sunday before the first data point.
1478
+ #
1479
+ # The resulting window is then shifted back until the earliest datapoint
1480
+ # is in or in front of it.
1418
1481
  #
1419
1482
  # @return [DataFrame]
1420
1483
  #
@@ -1652,12 +1715,12 @@ module Polars
1652
1715
  # @param on [String]
1653
1716
  # Join column of both DataFrames. If set, `left_on` and `right_on` should be
1654
1717
  # None.
1655
- # @param by [Object]
1656
- # Join on these columns before doing asof join.
1657
1718
  # @param by_left [Object]
1658
1719
  # Join on these columns before doing asof join.
1659
1720
  # @param by_right [Object]
1660
1721
  # Join on these columns before doing asof join.
1722
+ # @param by [Object]
1723
+ # Join on these columns before doing asof join.
1661
1724
  # @param strategy ["backward", "forward"]
1662
1725
  # Join strategy.
1663
1726
  # @param suffix [String]
@@ -1873,7 +1936,7 @@ module Polars
1873
1936
  # # └─────────────┴────────────┴────────────┘
1874
1937
  #
1875
1938
  # @example
1876
- # pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest").collect
1939
+ # pop2.join_asof(gdp2, by: "country", on: "date", strategy: "nearest", check_sortedness: false).collect
1877
1940
  # # =>
1878
1941
  # # shape: (6, 4)
1879
1942
  # # ┌─────────────┬────────────┬────────────┬──────┐
@@ -2175,6 +2238,9 @@ module Polars
2175
2238
  #
2176
2239
  # @param exprs [Object]
2177
2240
  # List of Expressions that evaluate to columns.
2241
+ # @param named_exprs [Hash]
2242
+ # Additional columns to add, specified as keyword arguments.
2243
+ # The columns will be renamed to the keyword used.
2178
2244
  #
2179
2245
  # @return [LazyFrame]
2180
2246
  #
@@ -2299,6 +2365,9 @@ module Polars
2299
2365
  # @param columns [Object]
2300
2366
  # - Name of the column that should be removed.
2301
2367
  # - List of column names.
2368
+ # @param strict [Boolean]
2369
+ # Validate that all column names exist in the current schema,
2370
+ # and throw an exception if any do not.
2302
2371
  #
2303
2372
  # @return [LazyFrame]
2304
2373
  #
@@ -2350,9 +2419,18 @@ module Polars
2350
2419
  # # │ 7.0 │
2351
2420
  # # │ 8.0 │
2352
2421
  # # └─────┘
2353
- def drop(*columns)
2354
- drop_cols = Utils._expand_selectors(self, *columns)
2355
- _from_rbldf(_ldf.drop(drop_cols))
2422
+ def drop(*columns, strict: true)
2423
+ selectors = []
2424
+ columns.each do |c|
2425
+ if c.is_a?(Enumerable)
2426
+ selectors += c
2427
+ else
2428
+ selectors += [c]
2429
+ end
2430
+ end
2431
+
2432
+ drop_cols = Utils.parse_list_into_selector(selectors, strict: strict)
2433
+ _from_rbldf(_ldf.drop(drop_cols._rbselector))
2356
2434
  end
2357
2435
 
2358
2436
  # Rename column names.
@@ -3153,9 +3231,11 @@ module Polars
3153
3231
  # # │ c ┆ 7 │
3154
3232
  # # │ c ┆ 8 │
3155
3233
  # # └─────────┴─────────┘
3156
- def explode(columns)
3157
- columns = Utils.parse_into_list_of_expressions(columns)
3158
- _from_rbldf(_ldf.explode(columns))
3234
+ def explode(columns, *more_columns)
3235
+ subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
3236
+ more_columns
3237
+ )
3238
+ _from_rbldf(_ldf.explode(subset._rbselector))
3159
3239
  end
3160
3240
 
3161
3241
  # Drop duplicate rows from this DataFrame.
@@ -3220,10 +3300,11 @@ module Polars
3220
3300
  # # │ 1 ┆ a ┆ b │
3221
3301
  # # └─────┴─────┴─────┘
3222
3302
  def unique(maintain_order: true, subset: nil, keep: "first")
3223
- if !subset.nil? && !subset.is_a?(::Array)
3224
- subset = [subset]
3303
+ selector_subset = nil
3304
+ if !subset.nil?
3305
+ selector_subset = Utils.parse_list_into_selector(subset)._rbselector
3225
3306
  end
3226
- _from_rbldf(_ldf.unique(maintain_order, subset, keep))
3307
+ _from_rbldf(_ldf.unique(maintain_order, selector_subset, keep))
3227
3308
  end
3228
3309
 
3229
3310
  # Drop rows with null values from this LazyFrame.
@@ -3318,11 +3399,16 @@ module Polars
3318
3399
  warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
3319
3400
  end
3320
3401
 
3321
- on = on.nil? ? [] : Utils.parse_into_list_of_expressions(on)
3322
- index = index.nil? ? [] : Utils.parse_into_list_of_expressions(index)
3402
+ selector_on = on.nil? ? Selectors.empty : Utils.parse_list_into_selector(on)
3403
+ selector_index = index.nil? ? Selectors.empty : Utils.parse_list_into_selector(index)
3323
3404
 
3324
3405
  _from_rbldf(
3325
- _ldf.unpivot(on, index, value_name, variable_name)
3406
+ _ldf.unpivot(
3407
+ selector_on._rbselector,
3408
+ selector_index._rbselector,
3409
+ value_name,
3410
+ variable_name
3411
+ )
3326
3412
  )
3327
3413
  end
3328
3414
  alias_method :melt, :unpivot
@@ -3364,8 +3450,10 @@ module Polars
3364
3450
  # The fields will be inserted into the `DataFrame` on the location of the
3365
3451
  # `struct` type.
3366
3452
  #
3367
- # @param names [Object]
3453
+ # @param columns [Object]
3368
3454
  # Names of the struct columns that will be decomposed by its fields
3455
+ # @param more_columns [Array]
3456
+ # Additional columns to unnest, specified as positional arguments.
3369
3457
  #
3370
3458
  # @return [LazyFrame]
3371
3459
  #
@@ -3410,11 +3498,11 @@ module Polars
3410
3498
  # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
3411
3499
  # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
3412
3500
  # # └────────┴─────┴─────┴──────┴───────────┴───────┘
3413
- def unnest(names)
3414
- if names.is_a?(::String)
3415
- names = [names]
3416
- end
3417
- _from_rbldf(_ldf.unnest(names))
3501
+ def unnest(columns, *more_columns)
3502
+ subset = Utils.parse_list_into_selector(columns) | Utils.parse_list_into_selector(
3503
+ more_columns
3504
+ )
3505
+ _from_rbldf(_ldf.unnest(subset._rbselector))
3418
3506
  end
3419
3507
 
3420
3508
  # Take two sorted DataFrames and merge them by the sorted key.
@@ -136,7 +136,7 @@ module Polars
136
136
  # # │ --- ┆ --- ┆ --- │
137
137
  # # │ list[i64] ┆ i64 ┆ list[i64] │
138
138
  # # ╞═══════════╪═════╪═══════════╡
139
- # # │ [1, 2, 3] ┆ 2 ┆ [2, 1] │
139
+ # # │ [1, 2, 3] ┆ 2 ┆ [2, 3] │
140
140
  # # │ [4, 5] ┆ 1 ┆ [5] │
141
141
  # # └───────────┴─────┴───────────┘
142
142
  def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
@@ -734,9 +734,21 @@ module Polars
734
734
  #
735
735
  # @param n_field_strategy ["first_non_null", "max_width"]
736
736
  # Strategy to determine the number of fields of the struct.
737
- # @param name_generator [Object]
738
- # A custom function that can be used to generate the field names.
739
- # Default field names are `field_0, field_1 .. field_n`
737
+ # @param fields pArray
738
+ # If the name and number of the desired fields is known in advance
739
+ # a list of field names can be given, which will be assigned by index.
740
+ # Otherwise, to dynamically assign field names, a custom function can be
741
+ # used; if neither are set, fields will be `field_0, field_1 .. field_n`.
742
+ # @param upper_bound [Object]
743
+ # A polars `LazyFrame` needs to know the schema at all times, so the
744
+ # caller must provide an upper bound of the number of struct fields that
745
+ # will be created; if set incorrectly, subsequent operations may fail.
746
+ # (For example, an `all.sum` expression will look in the current
747
+ # schema to determine which columns to select).
748
+ #
749
+ # When operating on a `DataFrame`, the schema does not need to be
750
+ # tracked or pre-determined, as the result will be eagerly evaluated,
751
+ # so you can leave this parameter unset.
740
752
  #
741
753
  # @return [Expr]
742
754
  #
@@ -753,9 +765,8 @@ module Polars
753
765
  # # │ {1,2,3} │
754
766
  # # │ {1,2,null} │
755
767
  # # └────────────┘
756
- def to_struct(n_field_strategy: "first_non_null", name_generator: nil, upper_bound: nil)
757
- raise Todo if name_generator
758
- Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, nil))
768
+ def to_struct(n_field_strategy: "first_non_null", fields: nil, upper_bound: nil)
769
+ Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, fields, nil))
759
770
  end
760
771
 
761
772
  # Run any polars expression against the lists' elements.
@@ -785,5 +796,191 @@ module Polars
785
796
  def eval(expr)
786
797
  Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr))
787
798
  end
799
+
800
+ # Filter elements in each list by a boolean expression.
801
+ #
802
+ # @param predicate [Object]
803
+ # A boolean expression that is evaluated per list element.
804
+ # You can refer to the current element with `Polars.element`.
805
+ #
806
+ # @return [Expr]
807
+ #
808
+ # @example
809
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
810
+ # df.with_columns(
811
+ # evens: Polars.concat_list("a", "b").list.filter(Polars.element % 2 == 0)
812
+ # )
813
+ # # =>
814
+ # # shape: (3, 3)
815
+ # # ┌─────┬─────┬───────────┐
816
+ # # │ a ┆ b ┆ evens │
817
+ # # │ --- ┆ --- ┆ --- │
818
+ # # │ i64 ┆ i64 ┆ list[i64] │
819
+ # # ╞═════╪═════╪═══════════╡
820
+ # # │ 1 ┆ 4 ┆ [4] │
821
+ # # │ 8 ┆ 5 ┆ [8] │
822
+ # # │ 3 ┆ 2 ┆ [2] │
823
+ # # └─────┴─────┴───────────┘
824
+ def filter(predicate)
825
+ Utils.wrap_expr(_rbexpr.list_filter(predicate._rbexpr))
826
+ end
827
+
828
+ # Compute the SET UNION between the elements in this list and the elements of `other`.
829
+ #
830
+ # @param other [Object]
831
+ # Right hand side of the set operation.
832
+ #
833
+ # @return [Expr]
834
+ #
835
+ # @example
836
+ # df = Polars::DataFrame.new(
837
+ # {
838
+ # "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
839
+ # "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
840
+ # }
841
+ # )
842
+ # df.with_columns(
843
+ # union: Polars.col("a").list.set_union("b")
844
+ # )
845
+ # # =>
846
+ # # shape: (4, 3)
847
+ # # ┌───────────┬──────────────┬──────────────┐
848
+ # # │ a ┆ b ┆ union │
849
+ # # │ --- ┆ --- ┆ --- │
850
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
851
+ # # ╞═══════════╪══════════════╪══════════════╡
852
+ # # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [1, 2, … 4] │
853
+ # # │ [] ┆ [3] ┆ [3] │
854
+ # # │ [null, 3] ┆ [3, 4, null] ┆ [null, 3, 4] │
855
+ # # │ [5, 6, 7] ┆ [6, 8] ┆ [5, 6, … 8] │
856
+ # # └───────────┴──────────────┴──────────────┘
857
+ def set_union(other)
858
+ if other.respond_to?(:each)
859
+ if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
860
+ other = other.to_a
861
+ end
862
+ other = F.lit(other)._rbexpr
863
+ else
864
+ other = Utils.parse_into_expression(other)
865
+ end
866
+ Utils.wrap_expr(_rbexpr.list_set_operation(other, "union"))
867
+ end
868
+
869
+ # Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
870
+ #
871
+ # @param other [Object]
872
+ # Right hand side of the set operation.
873
+ #
874
+ # @return [Expr]
875
+ #
876
+ # @example
877
+ # df = Polars::DataFrame.new(
878
+ # {
879
+ # "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
880
+ # "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
881
+ # }
882
+ # )
883
+ # df.with_columns(difference: Polars.col("a").list.set_difference("b"))
884
+ # # =>
885
+ # # shape: (4, 3)
886
+ # # ┌───────────┬──────────────┬────────────┐
887
+ # # │ a ┆ b ┆ difference │
888
+ # # │ --- ┆ --- ┆ --- │
889
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
890
+ # # ╞═══════════╪══════════════╪════════════╡
891
+ # # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [1] │
892
+ # # │ [] ┆ [3] ┆ [] │
893
+ # # │ [null, 3] ┆ [3, 4, null] ┆ [] │
894
+ # # │ [5, 6, 7] ┆ [6, 8] ┆ [5, 7] │
895
+ # # └───────────┴──────────────┴────────────┘
896
+ def set_difference(other)
897
+ if other.respond_to?(:each)
898
+ if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
899
+ other = other.to_a
900
+ end
901
+ other = F.lit(other)._rbexpr
902
+ else
903
+ other = Utils.parse_into_expression(other)
904
+ end
905
+ Utils.wrap_expr(_rbexpr.list_set_operation(other, "difference"))
906
+ end
907
+
908
+ # Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
909
+ #
910
+ # @param other [Object]
911
+ # Right hand side of the set operation.
912
+ #
913
+ # @return [Expr]
914
+ #
915
+ # @example
916
+ # df = Polars::DataFrame.new(
917
+ # {
918
+ # "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
919
+ # "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
920
+ # }
921
+ # )
922
+ # df.with_columns(intersection: Polars.col("a").list.set_intersection("b"))
923
+ # # =>
924
+ # # shape: (4, 3)
925
+ # # ┌───────────┬──────────────┬──────────────┐
926
+ # # │ a ┆ b ┆ intersection │
927
+ # # │ --- ┆ --- ┆ --- │
928
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
929
+ # # ╞═══════════╪══════════════╪══════════════╡
930
+ # # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [2, 3] │
931
+ # # │ [] ┆ [3] ┆ [] │
932
+ # # │ [null, 3] ┆ [3, 4, null] ┆ [null, 3] │
933
+ # # │ [5, 6, 7] ┆ [6, 8] ┆ [6] │
934
+ # # └───────────┴──────────────┴──────────────┘
935
+ def set_intersection(other)
936
+ if other.respond_to?(:each)
937
+ if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
938
+ other = other.to_a
939
+ end
940
+ other = F.lit(other)._rbexpr
941
+ else
942
+ other = Utils.parse_into_expression(other)
943
+ end
944
+ Utils.wrap_expr(_rbexpr.list_set_operation(other, "intersection"))
945
+ end
946
+
947
+ # Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
948
+ #
949
+ # @param other [Object]
950
+ # Right hand side of the set operation.
951
+ #
952
+ # @return [Expr]
953
+ #
954
+ # @example
955
+ # df = Polars::DataFrame.new(
956
+ # {
957
+ # "a" => [[1, 2, 3], [], [nil, 3], [5, 6, 7]],
958
+ # "b" => [[2, 3, 4], [3], [3, 4, nil], [6, 8]]
959
+ # }
960
+ # )
961
+ # df.with_columns(sdiff: Polars.col("b").list.set_symmetric_difference("a"))
962
+ # # =>
963
+ # # shape: (4, 3)
964
+ # # ┌───────────┬──────────────┬───────────┐
965
+ # # │ a ┆ b ┆ sdiff │
966
+ # # │ --- ┆ --- ┆ --- │
967
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
968
+ # # ╞═══════════╪══════════════╪═══════════╡
969
+ # # │ [1, 2, 3] ┆ [2, 3, 4] ┆ [4, 1] │
970
+ # # │ [] ┆ [3] ┆ [3] │
971
+ # # │ [null, 3] ┆ [3, 4, null] ┆ [4] │
972
+ # # │ [5, 6, 7] ┆ [6, 8] ┆ [8, 5, 7] │
973
+ # # └───────────┴──────────────┴───────────┘
974
+ def set_symmetric_difference(other)
975
+ if other.respond_to?(:each)
976
+ if !other.is_a?(::Array) && !other.is_a?(Series) && !other.is_a?(DataFrame)
977
+ other = other.to_a
978
+ end
979
+ other = F.lit(other)._rbexpr
980
+ else
981
+ other = Utils.parse_into_expression(other)
982
+ end
983
+ Utils.wrap_expr(_rbexpr.list_set_operation(other, "symmetric_difference"))
984
+ end
788
985
  end
789
986
  end