polars-df 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
@@ -149,8 +149,19 @@ module Polars
149
149
  # def self.from_json
150
150
  # end
151
151
 
152
- # def self.read_json
153
- # end
152
+ # Read a logical plan from a JSON file to construct a LazyFrame.
153
+ #
154
+ # @param file [String]
155
+ # Path to a file or a file-like object.
156
+ #
157
+ # @return [LazyFrame]
158
+ def self.read_json(file)
159
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
160
+ file = Utils.format_path(file)
161
+ end
162
+
163
+ Utils.wrap_ldf(RbLazyFrame.read_json(file))
164
+ end
154
165
 
155
166
  # Get or set column names.
156
167
  #
@@ -245,11 +256,57 @@ module Polars
245
256
  EOS
246
257
  end
247
258
 
248
- # def write_json
249
- # end
259
+ # Write the logical plan of this LazyFrame to a file or string in JSON format.
260
+ #
261
+ # @param file [String]
262
+ # File path to which the result should be written.
263
+ #
264
+ # @return [nil]
265
+ def write_json(file)
266
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
267
+ file = Utils.format_path(file)
268
+ end
269
+ _ldf.write_json(file)
270
+ nil
271
+ end
250
272
 
251
- # def pipe
252
- # end
273
+ # Offers a structured way to apply a sequence of user-defined functions (UDFs).
274
+ #
275
+ # @param func [Object]
276
+ # Callable; will receive the frame as the first parameter,
277
+ # followed by any given args/kwargs.
278
+ # @param args [Object]
279
+ # Arguments to pass to the UDF.
280
+ # @param kwargs [Object]
281
+ # Keyword arguments to pass to the UDF.
282
+ #
283
+ # @return [LazyFrame]
284
+ #
285
+ # @example
286
+ # cast_str_to_int = lambda do |data, col_name:|
287
+ # data.with_column(Polars.col(col_name).cast(:i64))
288
+ # end
289
+ #
290
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => ["10", "20", "30", "40"]}).lazy
291
+ # df.pipe(cast_str_to_int, col_name: "b").collect()
292
+ # # =>
293
+ # # shape: (4, 2)
294
+ # # ┌─────┬─────┐
295
+ # # │ a ┆ b │
296
+ # # │ --- ┆ --- │
297
+ # # │ i64 ┆ i64 │
298
+ # # ╞═════╪═════╡
299
+ # # │ 1 ┆ 10 │
300
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
301
+ # # │ 2 ┆ 20 │
302
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
303
+ # # │ 3 ┆ 30 │
304
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
305
+ # # │ 4 ┆ 40 │
306
+ # # └─────┴─────┘
307
+ def pipe(func, *args, **kwargs, &block)
308
+ func.call(self, *args, **kwargs, &block)
309
+ end
253
310
 
254
311
  # Create a string representation of the unoptimized query plan.
255
312
  #
@@ -261,8 +318,27 @@ module Polars
261
318
  # Create a string representation of the optimized query plan.
262
319
  #
263
320
  # @return [String]
264
- # def describe_optimized_plan
265
- # end
321
+ def describe_optimized_plan(
322
+ type_coercion: true,
323
+ predicate_pushdown: true,
324
+ projection_pushdown: true,
325
+ simplify_expression: true,
326
+ slice_pushdown: true,
327
+ common_subplan_elimination: true,
328
+ allow_streaming: false
329
+ )
330
+ ldf = _ldf.optimization_toggle(
331
+ type_coercion,
332
+ predicate_pushdown,
333
+ projection_pushdown,
334
+ simplify_expression,
335
+ slice_pushdown,
336
+ common_subplan_elimination,
337
+ allow_streaming,
338
+ )
339
+
340
+ ldf.describe_optimized_plan
341
+ end
266
342
 
267
343
  # def show_graph
268
344
  # end
@@ -558,7 +634,7 @@ module Polars
558
634
  # "ham" => ["a", "b", "c"]
559
635
  # }
560
636
  # ).lazy
561
- # lf.filter(Polars.col("foo") < 3).collect()
637
+ # lf.filter(Polars.col("foo") < 3).collect
562
638
  # # =>
563
639
  # # shape: (2, 3)
564
640
  # # ┌─────┬─────┬─────┐
@@ -726,14 +802,542 @@ module Polars
726
802
  LazyGroupBy.new(lgb, self.class)
727
803
  end
728
804
 
729
- # def groupby_rolling
730
- # end
805
+ # Create rolling groups based on a time column.
806
+ #
807
+ # Also works for index values of type `:i32` or `:i64`.
808
+ #
809
+ # Different from a `dynamic_groupby` the windows are now determined by the
810
+ # individual values and are not of constant intervals. For constant intervals
811
+ # use *groupby_dynamic*.
812
+ #
813
+ # The `period` and `offset` arguments are created either from a timedelta, or
814
+ # by using the following string language:
815
+ #
816
+ # - 1ns (1 nanosecond)
817
+ # - 1us (1 microsecond)
818
+ # - 1ms (1 millisecond)
819
+ # - 1s (1 second)
820
+ # - 1m (1 minute)
821
+ # - 1h (1 hour)
822
+ # - 1d (1 day)
823
+ # - 1w (1 week)
824
+ # - 1mo (1 calendar month)
825
+ # - 1y (1 calendar year)
826
+ # - 1i (1 index count)
827
+ #
828
+ # Or combine them:
829
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
830
+ #
831
+ # In case of a groupby_rolling on an integer column, the windows are defined by:
832
+ #
833
+ # - "1i" # length 1
834
+ # - "10i" # length 10
835
+ #
836
+ # @param index_column [Object]
837
+ # Column used to group based on the time window.
838
+ # Often to type Date/Datetime
839
+ # This column must be sorted in ascending order. If not the output will not
840
+ # make sense.
841
+ #
842
+ # In case of a rolling groupby on indices, dtype needs to be one of
843
+ # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
844
+ # performance matters use an `:i64` column.
845
+ # @param period [Object]
846
+ # Length of the window.
847
+ # @param offset [Object]
848
+ # Offset of the window. Default is -period.
849
+ # @param closed ["right", "left", "both", "none"]
850
+ # Define whether the temporal window interval is closed or not.
851
+ # @param by [Object]
852
+ # Also group by this column/these columns.
853
+ #
854
+ # @return [LazyFrame]
855
+ #
856
+ # @example
857
+ # dates = [
858
+ # "2020-01-01 13:45:48",
859
+ # "2020-01-01 16:42:13",
860
+ # "2020-01-01 16:45:09",
861
+ # "2020-01-02 18:12:48",
862
+ # "2020-01-03 19:45:32",
863
+ # "2020-01-08 23:16:43"
864
+ # ]
865
+ # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
866
+ # Polars.col("dt").str.strptime(:datetime)
867
+ # )
868
+ # df.groupby_rolling(index_column: "dt", period: "2d").agg(
869
+ # [
870
+ # Polars.sum("a").alias("sum_a"),
871
+ # Polars.min("a").alias("min_a"),
872
+ # Polars.max("a").alias("max_a")
873
+ # ]
874
+ # )
875
+ # # =>
876
+ # # shape: (6, 4)
877
+ # # ┌─────────────────────┬───────┬───────┬───────┐
878
+ # # │ dt ┆ sum_a ┆ min_a ┆ max_a │
879
+ # # │ --- ┆ --- ┆ --- ┆ --- │
880
+ # # │ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │
881
+ # # ╞═════════════════════╪═══════╪═══════╪═══════╡
882
+ # # │ 2020-01-01 13:45:48 ┆ 3 ┆ 3 ┆ 3 │
883
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
884
+ # # │ 2020-01-01 16:42:13 ┆ 10 ┆ 3 ┆ 7 │
885
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
886
+ # # │ 2020-01-01 16:45:09 ┆ 15 ┆ 3 ┆ 7 │
887
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
888
+ # # │ 2020-01-02 18:12:48 ┆ 24 ┆ 3 ┆ 9 │
889
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
890
+ # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
891
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
892
+ # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
893
+ # # └─────────────────────┴───────┴───────┴───────┘
894
+ def groupby_rolling(
895
+ index_column:,
896
+ period:,
897
+ offset: nil,
898
+ closed: "right",
899
+ by: nil
900
+ )
901
+ if offset.nil?
902
+ offset = "-#{period}"
903
+ end
731
904
 
732
- # def groupby_dynamic
733
- # end
905
+ rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
906
+ period = Utils._timedelta_to_pl_duration(period)
907
+ offset = Utils._timedelta_to_pl_duration(offset)
734
908
 
735
- # def join_asof
736
- # end
909
+ lgb = _ldf.groupby_rolling(
910
+ index_column, period, offset, closed, rbexprs_by
911
+ )
912
+ LazyGroupBy.new(lgb, self.class)
913
+ end
914
+
915
+ # Group based on a time value (or index value of type `:i32`, `:i64`).
916
+ #
917
+ # Time windows are calculated and rows are assigned to windows. Different from a
918
+ # normal groupby is that a row can be member of multiple groups. The time/index
919
+ # window could be seen as a rolling window, with a window size determined by
920
+ # dates/times/values instead of slots in the DataFrame.
921
+ #
922
+ # A window is defined by:
923
+ #
924
+ # - every: interval of the window
925
+ # - period: length of the window
926
+ # - offset: offset of the window
927
+ #
928
+ # The `every`, `period` and `offset` arguments are created with
929
+ # the following string language:
930
+ #
931
+ # - 1ns (1 nanosecond)
932
+ # - 1us (1 microsecond)
933
+ # - 1ms (1 millisecond)
934
+ # - 1s (1 second)
935
+ # - 1m (1 minute)
936
+ # - 1h (1 hour)
937
+ # - 1d (1 day)
938
+ # - 1w (1 week)
939
+ # - 1mo (1 calendar month)
940
+ # - 1y (1 calendar year)
941
+ # - 1i (1 index count)
942
+ #
943
+ # Or combine them:
944
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
945
+ #
946
+ # In case of a groupby_dynamic on an integer column, the windows are defined by:
947
+ #
948
+ # - "1i" # length 1
949
+ # - "10i" # length 10
950
+ #
951
+ # @param index_column
952
+ # Column used to group based on the time window.
953
+ # Often to type Date/Datetime
954
+ # This column must be sorted in ascending order. If not the output will not
955
+ # make sense.
956
+ #
957
+ # In case of a dynamic groupby on indices, dtype needs to be one of
958
+ # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
959
+ # performance matters use an `:i64` column.
960
+ # @param every
961
+ # Interval of the window.
962
+ # @param period
963
+ # Length of the window, if None it is equal to 'every'.
964
+ # @param offset
965
+ # Offset of the window if None and period is None it will be equal to negative
966
+ # `every`.
967
+ # @param truncate
968
+ # Truncate the time value to the window lower bound.
969
+ # @param include_boundaries
970
+ # Add the lower and upper bound of the window to the "_lower_bound" and
971
+ # "_upper_bound" columns. This will impact performance because it's harder to
972
+ # parallelize
973
+ # @param closed ["right", "left", "both", "none"]
974
+ # Define whether the temporal window interval is closed or not.
975
+ # @param by
976
+ # Also group by this column/these columns
977
+ #
978
+ # @return [DataFrame]
979
+ #
980
+ # @example
981
+ # df = Polars::DataFrame.new(
982
+ # {
983
+ # "time" => Polars.date_range(
984
+ # DateTime.new(2021, 12, 16),
985
+ # DateTime.new(2021, 12, 16, 3),
986
+ # "30m"
987
+ # ),
988
+ # "n" => 0..6
989
+ # }
990
+ # )
991
+ # # =>
992
+ # # shape: (7, 2)
993
+ # # ┌─────────────────────┬─────┐
994
+ # # │ time ┆ n │
995
+ # # │ --- ┆ --- │
996
+ # # │ datetime[μs] ┆ i64 │
997
+ # # ╞═════════════════════╪═════╡
998
+ # # │ 2021-12-16 00:00:00 ┆ 0 │
999
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1000
+ # # │ 2021-12-16 00:30:00 ┆ 1 │
1001
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1002
+ # # │ 2021-12-16 01:00:00 ┆ 2 │
1003
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1004
+ # # │ 2021-12-16 01:30:00 ┆ 3 │
1005
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1006
+ # # │ 2021-12-16 02:00:00 ┆ 4 │
1007
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1008
+ # # │ 2021-12-16 02:30:00 ┆ 5 │
1009
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1010
+ # # │ 2021-12-16 03:00:00 ┆ 6 │
1011
+ # # └─────────────────────┴─────┘
1012
+ #
1013
+ # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
1014
+ # df.groupby_dynamic("time", every: "1h", closed: "right").agg(
1015
+ # [
1016
+ # Polars.col("time").min.alias("time_min"),
1017
+ # Polars.col("time").max.alias("time_max")
1018
+ # ]
1019
+ # )
1020
+ # # =>
1021
+ # # shape: (4, 3)
1022
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┐
1023
+ # # │ time ┆ time_min ┆ time_max │
1024
+ # # │ --- ┆ --- ┆ --- │
1025
+ # # │ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] │
1026
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╡
1027
+ # # │ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-16 00:00:00 │
1028
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1029
+ # # │ 2021-12-16 00:00:00 ┆ 2021-12-16 00:30:00 ┆ 2021-12-16 01:00:00 │
1030
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1031
+ # # │ 2021-12-16 01:00:00 ┆ 2021-12-16 01:30:00 ┆ 2021-12-16 02:00:00 │
1032
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1033
+ # # │ 2021-12-16 02:00:00 ┆ 2021-12-16 02:30:00 ┆ 2021-12-16 03:00:00 │
1034
+ # # └─────────────────────┴─────────────────────┴─────────────────────┘
1035
+ #
1036
+ # @example The window boundaries can also be added to the aggregation result.
1037
+ # df.groupby_dynamic(
1038
+ # "time", every: "1h", include_boundaries: true, closed: "right"
1039
+ # ).agg([Polars.col("time").count.alias("time_count")])
1040
+ # # =>
1041
+ # # shape: (4, 4)
1042
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬────────────┐
1043
+ # # │ _lower_boundary ┆ _upper_boundary ┆ time ┆ time_count │
1044
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1045
+ # # │ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ u32 │
1046
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪════════════╡
1047
+ # # │ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-15 23:00:00 ┆ 1 │
1048
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1049
+ # # │ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ 2 │
1050
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1051
+ # # │ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 2 │
1052
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1053
+ # # │ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 2 │
1054
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
1055
+ #
1056
+ # @example When closed="left", should not include right end of interval.
1057
+ # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
1058
+ # [
1059
+ # Polars.col("time").count.alias("time_count"),
1060
+ # Polars.col("time").list.alias("time_agg_list")
1061
+ # ]
1062
+ # )
1063
+ # # =>
1064
+ # # shape: (4, 3)
1065
+ # # ┌─────────────────────┬────────────┬─────────────────────────────────────┐
1066
+ # # │ time ┆ time_count ┆ time_agg_list │
1067
+ # # │ --- ┆ --- ┆ --- │
1068
+ # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
1069
+ # # ╞═════════════════════╪════════════╪═════════════════════════════════════╡
1070
+ # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16... │
1071
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1072
+ # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16... │
1073
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1074
+ # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16... │
1075
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1076
+ # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
1077
+ # # └─────────────────────┴────────────┴─────────────────────────────────────┘
1078
+ #
1079
+ # @example When closed="both" the time values at the window boundaries belong to 2 groups.
1080
+ # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
1081
+ # [Polars.col("time").count.alias("time_count")]
1082
+ # )
1083
+ # # =>
1084
+ # # shape: (5, 2)
1085
+ # # ┌─────────────────────┬────────────┐
1086
+ # # │ time ┆ time_count │
1087
+ # # │ --- ┆ --- │
1088
+ # # │ datetime[μs] ┆ u32 │
1089
+ # # ╞═════════════════════╪════════════╡
1090
+ # # │ 2021-12-15 23:00:00 ┆ 1 │
1091
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1092
+ # # │ 2021-12-16 00:00:00 ┆ 3 │
1093
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1094
+ # # │ 2021-12-16 01:00:00 ┆ 3 │
1095
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1096
+ # # │ 2021-12-16 02:00:00 ┆ 3 │
1097
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1098
+ # # │ 2021-12-16 03:00:00 ┆ 1 │
1099
+ # # └─────────────────────┴────────────┘
1100
+ #
1101
+ # @example Dynamic groupbys can also be combined with grouping on normal keys.
1102
+ # df = Polars::DataFrame.new(
1103
+ # {
1104
+ # "time" => Polars.date_range(
1105
+ # DateTime.new(2021, 12, 16),
1106
+ # DateTime.new(2021, 12, 16, 3),
1107
+ # "30m"
1108
+ # ),
1109
+ # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
1110
+ # }
1111
+ # )
1112
+ # df.groupby_dynamic(
1113
+ # "time",
1114
+ # every: "1h",
1115
+ # closed: "both",
1116
+ # by: "groups",
1117
+ # include_boundaries: true
1118
+ # ).agg([Polars.col("time").count.alias("time_count")])
1119
+ # # =>
1120
+ # # shape: (7, 5)
1121
+ # # ┌────────┬─────────────────────┬─────────────────────┬─────────────────────┬────────────┐
1122
+ # # │ groups ┆ _lower_boundary ┆ _upper_boundary ┆ time ┆ time_count │
1123
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1124
+ # # │ str ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ u32 │
1125
+ # # ╞════════╪═════════════════════╪═════════════════════╪═════════════════════╪════════════╡
1126
+ # # │ a ┆ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-15 23:00:00 ┆ 1 │
1127
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1128
+ # # │ a ┆ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ 3 │
1129
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1130
+ # # │ a ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 1 │
1131
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1132
+ # # │ a ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 2 │
1133
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1134
+ # # │ a ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 04:00:00 ┆ 2021-12-16 03:00:00 ┆ 1 │
1135
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1136
+ # # │ b ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 2 │
1137
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1138
+ # # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
1139
+ # # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
1140
+ #
1141
+ # @example Dynamic groupby on an index column.
1142
+ # df = Polars::DataFrame.new(
1143
+ # {
1144
+ # "idx" => Polars.arange(0, 6, eager: true),
1145
+ # "A" => ["A", "A", "B", "B", "B", "C"]
1146
+ # }
1147
+ # )
1148
+ # df.groupby_dynamic(
1149
+ # "idx",
1150
+ # every: "2i",
1151
+ # period: "3i",
1152
+ # include_boundaries: true,
1153
+ # closed: "right"
1154
+ # ).agg(Polars.col("A").list.alias("A_agg_list"))
1155
+ # # =>
1156
+ # # shape: (3, 4)
1157
+ # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
1158
+ # # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
1159
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1160
+ # # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
1161
+ # # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
1162
+ # # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
1163
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1164
+ # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
1165
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1166
+ # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
1167
+ # # └─────────────────┴─────────────────┴─────┴─────────────────┘
1168
+ def groupby_dynamic(
1169
+ index_column,
1170
+ every:,
1171
+ period: nil,
1172
+ offset: nil,
1173
+ truncate: true,
1174
+ include_boundaries: false,
1175
+ closed: "left",
1176
+ by: nil
1177
+ )
1178
+ if offset.nil?
1179
+ if period.nil?
1180
+ offset = "-#{every}"
1181
+ else
1182
+ offset = "0ns"
1183
+ end
1184
+ end
1185
+
1186
+ if period.nil?
1187
+ period = every
1188
+ end
1189
+
1190
+ period = Utils._timedelta_to_pl_duration(period)
1191
+ offset = Utils._timedelta_to_pl_duration(offset)
1192
+ every = Utils._timedelta_to_pl_duration(every)
1193
+
1194
+ rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1195
+ lgb = _ldf.groupby_dynamic(
1196
+ index_column,
1197
+ every,
1198
+ period,
1199
+ offset,
1200
+ truncate,
1201
+ include_boundaries,
1202
+ closed,
1203
+ rbexprs_by
1204
+ )
1205
+ LazyGroupBy.new(lgb, self.class)
1206
+ end
1207
+
1208
+ # Perform an asof join.
1209
+ #
1210
+ # This is similar to a left-join except that we match on nearest key rather than
1211
+ # equal keys.
1212
+ #
1213
+ # Both DataFrames must be sorted by the join_asof key.
1214
+ #
1215
+ # For each row in the left DataFrame:
1216
+ #
1217
+ # - A "backward" search selects the last row in the right DataFrame whose 'on' key is less than or equal to the left's key.
1218
+ # - A "forward" search selects the first row in the right DataFrame whose 'on' key is greater than or equal to the left's key.
1219
+ #
1220
+ # The default is "backward".
1221
+ #
1222
+ # @param other [LazyFrame]
1223
+ # Lazy DataFrame to join with.
1224
+ # @param left_on [String]
1225
+ # Join column of the left DataFrame.
1226
+ # @param right_on [String]
1227
+ # Join column of the right DataFrame.
1228
+ # @param on [String]
1229
+ # Join column of both DataFrames. If set, `left_on` and `right_on` should be
1230
+ # None.
1231
+ # @param by [Object]
1232
+ # Join on these columns before doing asof join.
1233
+ # @param by_left [Object]
1234
+ # Join on these columns before doing asof join.
1235
+ # @param by_right [Object]
1236
+ # Join on these columns before doing asof join.
1237
+ # @param strategy ["backward", "forward"]
1238
+ # Join strategy.
1239
+ # @param suffix [String]
1240
+ # Suffix to append to columns with a duplicate name.
1241
+ # @param tolerance [Object]
1242
+ # Numeric tolerance. By setting this the join will only be done if the near
1243
+ # keys are within this distance. If an asof join is done on columns of dtype
1244
+ # "Date", "Datetime", "Duration" or "Time" you use the following string
1245
+ # language:
1246
+ #
1247
+ # - 1ns (1 nanosecond)
1248
+ # - 1us (1 microsecond)
1249
+ # - 1ms (1 millisecond)
1250
+ # - 1s (1 second)
1251
+ # - 1m (1 minute)
1252
+ # - 1h (1 hour)
1253
+ # - 1d (1 day)
1254
+ # - 1w (1 week)
1255
+ # - 1mo (1 calendar month)
1256
+ # - 1y (1 calendar year)
1257
+ # - 1i (1 index count)
1258
+ #
1259
+ # Or combine them:
1260
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1261
+ #
1262
+ # @param allow_parallel [Boolean]
1263
+ # Allow the physical plan to optionally evaluate the computation of both
1264
+ # DataFrames up to the join in parallel.
1265
+ # @param force_parallel [Boolean]
1266
+ # Force the physical plan to evaluate the computation of both DataFrames up to
1267
+ # the join in parallel.
1268
+ #
1269
+ # @return [LazyFrame]
1270
+ def join_asof(
1271
+ other,
1272
+ left_on: nil,
1273
+ right_on: nil,
1274
+ on: nil,
1275
+ by_left: nil,
1276
+ by_right: nil,
1277
+ by: nil,
1278
+ strategy: "backward",
1279
+ suffix: "_right",
1280
+ tolerance: nil,
1281
+ allow_parallel: true,
1282
+ force_parallel: false
1283
+ )
1284
+ if !other.is_a?(LazyFrame)
1285
+ raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
1286
+ end
1287
+
1288
+ if on.is_a?(String)
1289
+ left_on = on
1290
+ right_on = on
1291
+ end
1292
+
1293
+ if left_on.nil? || right_on.nil?
1294
+ raise ArgumentError, "You should pass the column to join on as an argument."
1295
+ end
1296
+
1297
+ if by_left.is_a?(String) || by_left.is_a?(Expr)
1298
+ by_left_ = [by_left]
1299
+ else
1300
+ by_left_ = by_left
1301
+ end
1302
+
1303
+ if by_right.is_a?(String) || by_right.is_a?(Expr)
1304
+ by_right_ = [by_right]
1305
+ else
1306
+ by_right_ = by_right
1307
+ end
1308
+
1309
+ if by.is_a?(String)
1310
+ by_left_ = [by]
1311
+ by_right_ = [by]
1312
+ elsif by.is_a?(Array)
1313
+ by_left_ = by
1314
+ by_right_ = by
1315
+ end
1316
+
1317
+ tolerance_str = nil
1318
+ tolerance_num = nil
1319
+ if tolerance.is_a?(String)
1320
+ tolerance_str = tolerance
1321
+ else
1322
+ tolerance_num = tolerance
1323
+ end
1324
+
1325
+ _from_rbldf(
1326
+ _ldf.join_asof(
1327
+ other._ldf,
1328
+ Polars.col(left_on)._rbexpr,
1329
+ Polars.col(right_on)._rbexpr,
1330
+ by_left_,
1331
+ by_right_,
1332
+ allow_parallel,
1333
+ force_parallel,
1334
+ suffix,
1335
+ strategy,
1336
+ tolerance_num,
1337
+ tolerance_str
1338
+ )
1339
+ )
1340
+ end
737
1341
 
738
1342
  # Add a join operation to the Logical Plan.
739
1343
  #
@@ -910,7 +1514,7 @@ module Polars
910
1514
  # [
911
1515
  # (Polars.col("a") ** 2).alias("a^2"),
912
1516
  # (Polars.col("b") / 2).alias("b/2"),
913
- # (Polars.col("c").is_not()).alias("not c")
1517
+ # (Polars.col("c").is_not).alias("not c")
914
1518
  # ]
915
1519
  # ).collect
916
1520
  # # =>
@@ -953,8 +1557,44 @@ module Polars
953
1557
  _from_rbldf(_ldf.with_columns(rbexprs))
954
1558
  end
955
1559
 
956
- # def with_context
957
- # end
1560
+ # Add an external context to the computation graph.
1561
+ #
1562
+ # This allows expressions to also access columns from DataFrames
1563
+ # that are not part of this one.
1564
+ #
1565
+ # @param other [Object]
1566
+ # Lazy DataFrame to join with.
1567
+ #
1568
+ # @return [LazyFrame]
1569
+ #
1570
+ # @example
1571
+ # df_a = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["a", "c", nil]}).lazy
1572
+ # df_other = Polars::DataFrame.new({"c" => ["foo", "ham"]})
1573
+ # (
1574
+ # df_a.with_context(df_other.lazy).select(
1575
+ # [Polars.col("b") + Polars.col("c").first]
1576
+ # )
1577
+ # ).collect
1578
+ # # =>
1579
+ # # shape: (3, 1)
1580
+ # # ┌──────┐
1581
+ # # │ b │
1582
+ # # │ --- │
1583
+ # # │ str │
1584
+ # # ╞══════╡
1585
+ # # │ afoo │
1586
+ # # ├╌╌╌╌╌╌┤
1587
+ # # │ cfoo │
1588
+ # # ├╌╌╌╌╌╌┤
1589
+ # # │ null │
1590
+ # # └──────┘
1591
+ def with_context(other)
1592
+ if !other.is_a?(Array)
1593
+ other = [other]
1594
+ end
1595
+
1596
+ _from_rbldf(_ldf.with_context(other.map(&:_ldf)))
1597
+ end
958
1598
 
959
1599
  # Add or overwrite column in a DataFrame.
960
1600
  #
@@ -1231,8 +1871,43 @@ module Polars
1231
1871
  slice(0, 1)
1232
1872
  end
1233
1873
 
1234
- # def with_row_count
1235
- # end
1874
+ # Add a column at index 0 that counts the rows.
1875
+ #
1876
+ # @param name [String]
1877
+ # Name of the column to add.
1878
+ # @param offset [Integer]
1879
+ # Start the row count at this offset.
1880
+ #
1881
+ # @return [LazyFrame]
1882
+ #
1883
+ # @note
1884
+ # This can have a negative effect on query performance.
1885
+ # This may, for instance, block predicate pushdown optimization.
1886
+ #
1887
+ # @example
1888
+ # df = Polars::DataFrame.new(
1889
+ # {
1890
+ # "a" => [1, 3, 5],
1891
+ # "b" => [2, 4, 6]
1892
+ # }
1893
+ # ).lazy
1894
+ # df.with_row_count.collect
1895
+ # # =>
1896
+ # # shape: (3, 3)
1897
+ # # ┌────────┬─────┬─────┐
1898
+ # # │ row_nr ┆ a ┆ b │
1899
+ # # │ --- ┆ --- ┆ --- │
1900
+ # # │ u32 ┆ i64 ┆ i64 │
1901
+ # # ╞════════╪═════╪═════╡
1902
+ # # │ 0 ┆ 1 ┆ 2 │
1903
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1904
+ # # │ 1 ┆ 3 ┆ 4 │
1905
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1906
+ # # │ 2 ┆ 5 ┆ 6 │
1907
+ # # └────────┴─────┴─────┘
1908
+ def with_row_count(name: "row_nr", offset: 0)
1909
+ _from_rbldf(_ldf.with_row_count(name, offset))
1910
+ end
1236
1911
 
1237
1912
  # Take every nth row in the LazyFrame and return as a new LazyFrame.
1238
1913
  #
@@ -1256,8 +1931,12 @@ module Polars
1256
1931
  select(Utils.col("*").take_every(n))
1257
1932
  end
1258
1933
 
1259
- # def fill_null
1260
- # end
1934
+ # Fill null values using the specified value or strategy.
1935
+ #
1936
+ # @return [LazyFrame]
1937
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
1938
+ select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
1939
+ end
1261
1940
 
1262
1941
  # Fill floating point NaN values.
1263
1942
  #
@@ -1550,11 +2229,106 @@ module Polars
1550
2229
  _from_rbldf(_ldf.unique(maintain_order, subset, keep))
1551
2230
  end
1552
2231
 
1553
- # def drop_nulls
1554
- # end
2232
+ # Drop rows with null values from this LazyFrame.
2233
+ #
2234
+ # @param subset [Object]
2235
+ # Subset of column(s) on which `drop_nulls` will be applied.
2236
+ #
2237
+ # @return [LazyFrame]
2238
+ #
2239
+ # @example
2240
+ # df = Polars::DataFrame.new(
2241
+ # {
2242
+ # "foo" => [1, 2, 3],
2243
+ # "bar" => [6, nil, 8],
2244
+ # "ham" => ["a", "b", "c"]
2245
+ # }
2246
+ # )
2247
+ # df.lazy.drop_nulls.collect
2248
+ # # =>
2249
+ # # shape: (2, 3)
2250
+ # # ┌─────┬─────┬─────┐
2251
+ # # │ foo ┆ bar ┆ ham │
2252
+ # # │ --- ┆ --- ┆ --- │
2253
+ # # │ i64 ┆ i64 ┆ str │
2254
+ # # ╞═════╪═════╪═════╡
2255
+ # # │ 1 ┆ 6 ┆ a │
2256
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2257
+ # # │ 3 ┆ 8 ┆ c │
2258
+ # # └─────┴─────┴─────┘
2259
+ def drop_nulls(subset: nil)
2260
+ if !subset.nil? && !subset.is_a?(Array)
2261
+ subset = [subset]
2262
+ end
2263
+ _from_rbldf(_ldf.drop_nulls(subset))
2264
+ end
1555
2265
 
1556
- # def melt
1557
- # end
2266
+ # Unpivot a DataFrame from wide to long format.
2267
+ #
2268
+ # Optionally leaves identifiers set.
2269
+ #
2270
+ # This function is useful to massage a DataFrame into a format where one or more
2271
+ # columns are identifier variables (id_vars), while all other columns, considered
2272
+ # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
2273
+ # two non-identifier columns, 'variable' and 'value'.
2274
+ #
2275
+ # @param id_vars [Object]
2276
+ # Columns to use as identifier variables.
2277
+ # @param value_vars [Object]
2278
+ # Values to use as identifier variables.
2279
+ # If `value_vars` is empty all columns that are not in `id_vars` will be used.
2280
+ # @param variable_name [String]
2281
+ # Name to give to the `value` column. Defaults to "variable"
2282
+ # @param value_name [String]
2283
+ # Name to give to the `value` column. Defaults to "value"
2284
+ #
2285
+ # @return [LazyFrame]
2286
+ #
2287
+ # @example
2288
+ # df = Polars::DataFrame.new(
2289
+ # {
2290
+ # "a" => ["x", "y", "z"],
2291
+ # "b" => [1, 3, 5],
2292
+ # "c" => [2, 4, 6]
2293
+ # }
2294
+ # ).lazy
2295
+ # df.melt(id_vars: "a", value_vars: ["b", "c"]).collect
2296
+ # # =>
2297
+ # # shape: (6, 3)
2298
+ # # ┌─────┬──────────┬───────┐
2299
+ # # │ a ┆ variable ┆ value │
2300
+ # # │ --- ┆ --- ┆ --- │
2301
+ # # │ str ┆ str ┆ i64 │
2302
+ # # ╞═════╪══════════╪═══════╡
2303
+ # # │ x ┆ b ┆ 1 │
2304
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2305
+ # # │ y ┆ b ┆ 3 │
2306
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2307
+ # # │ z ┆ b ┆ 5 │
2308
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2309
+ # # │ x ┆ c ┆ 2 │
2310
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2311
+ # # │ y ┆ c ┆ 4 │
2312
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2313
+ # # │ z ┆ c ┆ 6 │
2314
+ # # └─────┴──────────┴───────┘
2315
+ def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
2316
+ if value_vars.is_a?(String)
2317
+ value_vars = [value_vars]
2318
+ end
2319
+ if id_vars.is_a?(String)
2320
+ id_vars = [id_vars]
2321
+ end
2322
+ if value_vars.nil?
2323
+ value_vars = []
2324
+ end
2325
+ if id_vars.nil?
2326
+ id_vars = []
2327
+ end
2328
+ _from_rbldf(
2329
+ _ldf.melt(id_vars, value_vars, value_name, variable_name)
2330
+ )
2331
+ end
1558
2332
 
1559
2333
  # def map
1560
2334
  # end