polars-df 0.5.0-x86_64-darwin → 0.7.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,22 @@ module Polars
4
4
  # @private
5
5
  attr_accessor :_ldf
6
6
 
7
+ # Create a new LazyFrame.
8
+ def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
9
+ self._ldf = (
10
+ DataFrame.new(
11
+ data,
12
+ schema: schema,
13
+ schema_overrides: schema_overrides,
14
+ orient: orient,
15
+ infer_schema_length: infer_schema_length,
16
+ nan_to_null: nan_to_null
17
+ )
18
+ .lazy
19
+ ._ldf
20
+ )
21
+ end
22
+
7
23
  # @private
8
24
  def self._from_rbldf(rb_ldf)
9
25
  ldf = LazyFrame.allocate
@@ -81,7 +97,8 @@ module Polars
81
97
  row_count_offset: 0,
82
98
  storage_options: nil,
83
99
  low_memory: false,
84
- use_statistics: true
100
+ use_statistics: true,
101
+ hive_partitioning: true
85
102
  )
86
103
  _from_rbldf(
87
104
  RbLazyFrame.new_from_parquet(
@@ -92,7 +109,8 @@ module Polars
92
109
  rechunk,
93
110
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
94
111
  low_memory,
95
- use_statistics
112
+ use_statistics,
113
+ hive_partitioning
96
114
  )
97
115
  )
98
116
  end
@@ -334,6 +352,7 @@ module Polars
334
352
  slice_pushdown,
335
353
  common_subplan_elimination,
336
354
  allow_streaming,
355
+ false
337
356
  )
338
357
 
339
358
  ldf.describe_optimized_plan
@@ -379,16 +398,16 @@ module Polars
379
398
  # # │ 2 ┆ 7.0 ┆ b │
380
399
  # # │ 1 ┆ 6.0 ┆ a │
381
400
  # # └─────┴─────┴─────┘
382
- def sort(by, reverse: false, nulls_last: false)
401
+ def sort(by, reverse: false, nulls_last: false, maintain_order: false)
383
402
  if by.is_a?(String)
384
- _from_rbldf(_ldf.sort(by, reverse, nulls_last))
403
+ return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
385
404
  end
386
405
  if Utils.bool?(reverse)
387
406
  reverse = [reverse]
388
407
  end
389
408
 
390
409
  by = Utils.selection_to_rbexpr_list(by)
391
- _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
410
+ _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
392
411
  end
393
412
 
394
413
  # def profile
@@ -429,7 +448,7 @@ module Polars
429
448
  # "c" => [6, 5, 4, 3, 2, 1]
430
449
  # }
431
450
  # ).lazy
432
- # df.groupby("a", maintain_order: true).agg(Polars.all.sum).collect
451
+ # df.group_by("a", maintain_order: true).agg(Polars.all.sum).collect
433
452
  # # =>
434
453
  # # shape: (3, 3)
435
454
  # # ┌─────┬─────┬─────┐
@@ -450,7 +469,8 @@ module Polars
450
469
  no_optimization: false,
451
470
  slice_pushdown: true,
452
471
  common_subplan_elimination: true,
453
- allow_streaming: false
472
+ allow_streaming: false,
473
+ _eager: false
454
474
  )
455
475
  if no_optimization
456
476
  predicate_pushdown = false
@@ -470,7 +490,8 @@ module Polars
470
490
  simplify_expression,
471
491
  slice_pushdown,
472
492
  common_subplan_elimination,
473
- allow_streaming
493
+ allow_streaming,
494
+ _eager
474
495
  )
475
496
  Utils.wrap_df(ldf.collect)
476
497
  end
@@ -552,7 +573,8 @@ module Polars
552
573
  simplify_expression,
553
574
  slice_pushdown,
554
575
  false,
555
- true
576
+ true,
577
+ false
556
578
  )
557
579
  lf.sink_parquet(
558
580
  path,
@@ -607,7 +629,7 @@ module Polars
607
629
  # "c" => [6, 5, 4, 3, 2, 1]
608
630
  # }
609
631
  # ).lazy
610
- # df.groupby("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
632
+ # df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
611
633
  # # =>
612
634
  # # shape: (2, 3)
613
635
  # # ┌─────┬─────┬─────┐
@@ -644,7 +666,8 @@ module Polars
644
666
  simplify_expression,
645
667
  slice_pushdown,
646
668
  common_subplan_elimination,
647
- allow_streaming
669
+ allow_streaming,
670
+ false
648
671
  )
649
672
  Utils.wrap_df(ldf.fetch(n_rows))
650
673
  end
@@ -837,13 +860,13 @@ module Polars
837
860
  _from_rbldf(_ldf.select(exprs))
838
861
  end
839
862
 
840
- # Start a groupby operation.
863
+ # Start a group by operation.
841
864
  #
842
865
  # @param by [Object]
843
866
  # Column(s) to group by.
844
867
  # @param maintain_order [Boolean]
845
868
  # Make sure that the order of the groups remain consistent. This is more
846
- # expensive than a default groupby.
869
+ # expensive than a default group by.
847
870
  #
848
871
  # @return [LazyGroupBy]
849
872
  #
@@ -855,7 +878,7 @@ module Polars
855
878
  # "c" => [6, 5, 4, 3, 2, 1]
856
879
  # }
857
880
  # ).lazy
858
- # df.groupby("a", maintain_order: true).agg(Polars.col("b").sum).collect
881
+ # df.group_by("a", maintain_order: true).agg(Polars.col("b").sum).collect
859
882
  # # =>
860
883
  # # shape: (3, 2)
861
884
  # # ┌─────┬─────┐
@@ -867,19 +890,21 @@ module Polars
867
890
  # # │ b ┆ 11 │
868
891
  # # │ c ┆ 6 │
869
892
  # # └─────┴─────┘
870
- def groupby(by, maintain_order: false)
893
+ def group_by(by, maintain_order: false)
871
894
  rbexprs_by = Utils.selection_to_rbexpr_list(by)
872
- lgb = _ldf.groupby(rbexprs_by, maintain_order)
873
- LazyGroupBy.new(lgb, self.class)
895
+ lgb = _ldf.group_by(rbexprs_by, maintain_order)
896
+ LazyGroupBy.new(lgb)
874
897
  end
898
+ alias_method :groupby, :group_by
899
+ alias_method :group, :group_by
875
900
 
876
901
  # Create rolling groups based on a time column.
877
902
  #
878
903
  # Also works for index values of type `:i32` or `:i64`.
879
904
  #
880
- # Different from a `dynamic_groupby` the windows are now determined by the
905
+ # Different from a `dynamic_group_by` the windows are now determined by the
881
906
  # individual values and are not of constant intervals. For constant intervals
882
- # use *groupby_dynamic*.
907
+ # use *group_by_dynamic*.
883
908
  #
884
909
  # The `period` and `offset` arguments are created either from a timedelta, or
885
910
  # by using the following string language:
@@ -899,7 +924,7 @@ module Polars
899
924
  # Or combine them:
900
925
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
901
926
  #
902
- # In case of a groupby_rolling on an integer column, the windows are defined by:
927
+ # In case of a group_by_rolling on an integer column, the windows are defined by:
903
928
  #
904
929
  # - "1i" # length 1
905
930
  # - "10i" # length 10
@@ -910,7 +935,7 @@ module Polars
910
935
  # This column must be sorted in ascending order. If not the output will not
911
936
  # make sense.
912
937
  #
913
- # In case of a rolling groupby on indices, dtype needs to be one of
938
+ # In case of a rolling group by on indices, dtype needs to be one of
914
939
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
915
940
  # performance matters use an `:i64` column.
916
941
  # @param period [Object]
@@ -921,6 +946,12 @@ module Polars
921
946
  # Define whether the temporal window interval is closed or not.
922
947
  # @param by [Object]
923
948
  # Also group by this column/these columns.
949
+ # @param check_sorted [Boolean]
950
+ # When the `by` argument is given, polars can not check sortedness
951
+ # by the metadata and has to do a full scan on the index column to
952
+ # verify data is sorted. This is expensive. If you are sure the
953
+ # data within the by groups is sorted, you can set this to `false`.
954
+ # Doing so incorrectly will lead to incorrect output
924
955
  #
925
956
  # @return [LazyFrame]
926
957
  #
@@ -933,16 +964,16 @@ module Polars
933
964
  # "2020-01-03 19:45:32",
934
965
  # "2020-01-08 23:16:43"
935
966
  # ]
936
- # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
937
- # Polars.col("dt").str.strptime(Polars::Datetime)
967
+ # df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
968
+ # Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
938
969
  # )
939
- # df.groupby_rolling(index_column: "dt", period: "2d").agg(
970
+ # df.group_by_rolling(index_column: "dt", period: "2d").agg(
940
971
  # [
941
972
  # Polars.sum("a").alias("sum_a"),
942
973
  # Polars.min("a").alias("min_a"),
943
974
  # Polars.max("a").alias("max_a")
944
975
  # ]
945
- # )
976
+ # ).collect
946
977
  # # =>
947
978
  # # shape: (6, 4)
948
979
  # # ┌─────────────────────┬───────┬───────┬───────┐
@@ -957,14 +988,15 @@ module Polars
957
988
  # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
958
989
  # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
959
990
  # # └─────────────────────┴───────┴───────┴───────┘
960
- def groupby_rolling(
991
+ def group_by_rolling(
961
992
  index_column:,
962
993
  period:,
963
994
  offset: nil,
964
995
  closed: "right",
965
- by: nil
996
+ by: nil,
997
+ check_sorted: true
966
998
  )
967
- index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
999
+ index_column = Utils.parse_as_expression(index_column)
968
1000
  if offset.nil?
969
1001
  offset = "-#{period}"
970
1002
  end
@@ -973,16 +1005,17 @@ module Polars
973
1005
  period = Utils._timedelta_to_pl_duration(period)
974
1006
  offset = Utils._timedelta_to_pl_duration(offset)
975
1007
 
976
- lgb = _ldf.groupby_rolling(
977
- index_column._rbexpr, period, offset, closed, rbexprs_by
1008
+ lgb = _ldf.group_by_rolling(
1009
+ index_column, period, offset, closed, rbexprs_by, check_sorted
978
1010
  )
979
- LazyGroupBy.new(lgb, self.class)
1011
+ LazyGroupBy.new(lgb)
980
1012
  end
1013
+ alias_method :groupby_rolling, :group_by_rolling
981
1014
 
982
1015
  # Group based on a time value (or index value of type `:i32`, `:i64`).
983
1016
  #
984
1017
  # Time windows are calculated and rows are assigned to windows. Different from a
985
- # normal groupby is that a row can be member of multiple groups. The time/index
1018
+ # normal group by is that a row can be member of multiple groups. The time/index
986
1019
  # window could be seen as a rolling window, with a window size determined by
987
1020
  # dates/times/values instead of slots in the DataFrame.
988
1021
  #
@@ -1010,37 +1043,43 @@ module Polars
1010
1043
  # Or combine them:
1011
1044
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1012
1045
  #
1013
- # In case of a groupby_dynamic on an integer column, the windows are defined by:
1046
+ # In case of a group_by_dynamic on an integer column, the windows are defined by:
1014
1047
  #
1015
1048
  # - "1i" # length 1
1016
1049
  # - "10i" # length 10
1017
1050
  #
1018
- # @param index_column
1051
+ # @param index_column [Object]
1019
1052
  # Column used to group based on the time window.
1020
1053
  # Often to type Date/Datetime
1021
1054
  # This column must be sorted in ascending order. If not the output will not
1022
1055
  # make sense.
1023
1056
  #
1024
- # In case of a dynamic groupby on indices, dtype needs to be one of
1057
+ # In case of a dynamic group by on indices, dtype needs to be one of
1025
1058
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
1026
1059
  # performance matters use an `:i64` column.
1027
- # @param every
1060
+ # @param every [Object]
1028
1061
  # Interval of the window.
1029
- # @param period
1062
+ # @param period [Object]
1030
1063
  # Length of the window, if None it is equal to 'every'.
1031
- # @param offset
1064
+ # @param offset [Object]
1032
1065
  # Offset of the window if None and period is None it will be equal to negative
1033
1066
  # `every`.
1034
- # @param truncate
1067
+ # @param truncate [Boolean]
1035
1068
  # Truncate the time value to the window lower bound.
1036
- # @param include_boundaries
1069
+ # @param include_boundaries [Boolean]
1037
1070
  # Add the lower and upper bound of the window to the "_lower_bound" and
1038
1071
  # "_upper_bound" columns. This will impact performance because it's harder to
1039
1072
  # parallelize
1040
1073
  # @param closed ["right", "left", "both", "none"]
1041
1074
  # Define whether the temporal window interval is closed or not.
1042
- # @param by
1075
+ # @param by [Object]
1043
1076
  # Also group by this column/these columns
1077
+ # @param check_sorted [Boolean]
1078
+ # When the `by` argument is given, polars can not check sortedness
1079
+ # by the metadata and has to do a full scan on the index column to
1080
+ # verify data is sorted. This is expensive. If you are sure the
1081
+ # data within the by groups is sorted, you can set this to `false`.
1082
+ # Doing so incorrectly will lead to incorrect output.
1044
1083
  #
1045
1084
  # @return [DataFrame]
1046
1085
  #
@@ -1072,7 +1111,7 @@ module Polars
1072
1111
  # # └─────────────────────┴─────┘
1073
1112
  #
1074
1113
  # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
1075
- # df.groupby_dynamic("time", every: "1h", closed: "right").agg(
1114
+ # df.group_by_dynamic("time", every: "1h", closed: "right").agg(
1076
1115
  # [
1077
1116
  # Polars.col("time").min.alias("time_min"),
1078
1117
  # Polars.col("time").max.alias("time_max")
@@ -1092,7 +1131,7 @@ module Polars
1092
1131
  # # └─────────────────────┴─────────────────────┴─────────────────────┘
1093
1132
  #
1094
1133
  # @example The window boundaries can also be added to the aggregation result.
1095
- # df.groupby_dynamic(
1134
+ # df.group_by_dynamic(
1096
1135
  # "time", every: "1h", include_boundaries: true, closed: "right"
1097
1136
  # ).agg([Polars.col("time").count.alias("time_count")])
1098
1137
  # # =>
@@ -1109,27 +1148,27 @@ module Polars
1109
1148
  # # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
1110
1149
  #
1111
1150
  # @example When closed="left", should not include right end of interval.
1112
- # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
1151
+ # df.group_by_dynamic("time", every: "1h", closed: "left").agg(
1113
1152
  # [
1114
1153
  # Polars.col("time").count.alias("time_count"),
1115
- # Polars.col("time").list.alias("time_agg_list")
1154
+ # Polars.col("time").alias("time_agg_list")
1116
1155
  # ]
1117
1156
  # )
1118
1157
  # # =>
1119
1158
  # # shape: (4, 3)
1120
- # # ┌─────────────────────┬────────────┬─────────────────────────────────────┐
1121
- # # │ time ┆ time_count ┆ time_agg_list
1122
- # # │ --- ┆ --- ┆ ---
1123
- # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
1124
- # # ╞═════════════════════╪════════════╪═════════════════════════════════════╡
1125
- # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16...
1126
- # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16...
1127
- # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16...
1128
- # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
1129
- # # └─────────────────────┴────────────┴─────────────────────────────────────┘
1159
+ # # ┌─────────────────────┬────────────┬───────────────────────────────────┐
1160
+ # # │ time ┆ time_count ┆ time_agg_list
1161
+ # # │ --- ┆ --- ┆ ---
1162
+ # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
1163
+ # # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
1164
+ # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16
1165
+ # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16
1166
+ # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16
1167
+ # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
1168
+ # # └─────────────────────┴────────────┴───────────────────────────────────┘
1130
1169
  #
1131
1170
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
1132
- # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
1171
+ # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
1133
1172
  # [Polars.col("time").count.alias("time_count")]
1134
1173
  # )
1135
1174
  # # =>
@@ -1146,7 +1185,7 @@ module Polars
1146
1185
  # # │ 2021-12-16 03:00:00 ┆ 1 │
1147
1186
  # # └─────────────────────┴────────────┘
1148
1187
  #
1149
- # @example Dynamic groupbys can also be combined with grouping on normal keys.
1188
+ # @example Dynamic group bys can also be combined with grouping on normal keys.
1150
1189
  # df = Polars::DataFrame.new(
1151
1190
  # {
1152
1191
  # "time" => Polars.date_range(
@@ -1157,7 +1196,7 @@ module Polars
1157
1196
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
1158
1197
  # }
1159
1198
  # )
1160
- # df.groupby_dynamic(
1199
+ # df.group_by_dynamic(
1161
1200
  # "time",
1162
1201
  # every: "1h",
1163
1202
  # closed: "both",
@@ -1180,20 +1219,20 @@ module Polars
1180
1219
  # # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
1181
1220
  # # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
1182
1221
  #
1183
- # @example Dynamic groupby on an index column.
1222
+ # @example Dynamic group by on an index column.
1184
1223
  # df = Polars::DataFrame.new(
1185
1224
  # {
1186
1225
  # "idx" => Polars.arange(0, 6, eager: true),
1187
1226
  # "A" => ["A", "A", "B", "B", "B", "C"]
1188
1227
  # }
1189
1228
  # )
1190
- # df.groupby_dynamic(
1229
+ # df.group_by_dynamic(
1191
1230
  # "idx",
1192
1231
  # every: "2i",
1193
1232
  # period: "3i",
1194
1233
  # include_boundaries: true,
1195
1234
  # closed: "right"
1196
- # ).agg(Polars.col("A").list.alias("A_agg_list"))
1235
+ # ).agg(Polars.col("A").alias("A_agg_list"))
1197
1236
  # # =>
1198
1237
  # # shape: (3, 4)
1199
1238
  # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
@@ -1205,23 +1244,26 @@ module Polars
1205
1244
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
1206
1245
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
1207
1246
  # # └─────────────────┴─────────────────┴─────┴─────────────────┘
1208
- def groupby_dynamic(
1247
+ def group_by_dynamic(
1209
1248
  index_column,
1210
1249
  every:,
1211
1250
  period: nil,
1212
1251
  offset: nil,
1213
- truncate: true,
1252
+ truncate: nil,
1214
1253
  include_boundaries: false,
1215
1254
  closed: "left",
1255
+ label: "left",
1216
1256
  by: nil,
1217
- start_by: "window"
1257
+ start_by: "window",
1258
+ check_sorted: true
1218
1259
  )
1260
+ if !truncate.nil?
1261
+ label = truncate ? "left" : "datapoint"
1262
+ end
1263
+
1264
+ index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
1219
1265
  if offset.nil?
1220
- if period.nil?
1221
- offset = "-#{every}"
1222
- else
1223
- offset = "0ns"
1224
- end
1266
+ offset = period.nil? ? "-#{every}" : "0ns"
1225
1267
  end
1226
1268
 
1227
1269
  if period.nil?
@@ -1233,19 +1275,21 @@ module Polars
1233
1275
  every = Utils._timedelta_to_pl_duration(every)
1234
1276
 
1235
1277
  rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1236
- lgb = _ldf.groupby_dynamic(
1237
- index_column,
1278
+ lgb = _ldf.group_by_dynamic(
1279
+ index_column._rbexpr,
1238
1280
  every,
1239
1281
  period,
1240
1282
  offset,
1241
- truncate,
1283
+ label,
1242
1284
  include_boundaries,
1243
1285
  closed,
1244
1286
  rbexprs_by,
1245
- start_by
1287
+ start_by,
1288
+ check_sorted
1246
1289
  )
1247
- LazyGroupBy.new(lgb, self.class)
1290
+ LazyGroupBy.new(lgb)
1248
1291
  end
1292
+ alias_method :groupby_dynamic, :group_by_dynamic
1249
1293
 
1250
1294
  # Perform an asof join.
1251
1295
  #
@@ -1351,7 +1395,7 @@ module Polars
1351
1395
  if by.is_a?(String)
1352
1396
  by_left_ = [by]
1353
1397
  by_right_ = [by]
1354
- elsif by.is_a?(Array)
1398
+ elsif by.is_a?(::Array)
1355
1399
  by_left_ = by
1356
1400
  by_right_ = by
1357
1401
  end
@@ -1619,7 +1663,7 @@ module Polars
1619
1663
  # # │ null │
1620
1664
  # # └──────┘
1621
1665
  def with_context(other)
1622
- if !other.is_a?(Array)
1666
+ if !other.is_a?(::Array)
1623
1667
  other = [other]
1624
1668
  end
1625
1669
 
@@ -1705,8 +1749,10 @@ module Polars
1705
1749
 
1706
1750
  # Shift the values by a given period.
1707
1751
  #
1708
- # @param periods [Integer]
1752
+ # @param n [Integer]
1709
1753
  # Number of places to shift (may be negative).
1754
+ # @param fill_value [Object]
1755
+ # Fill the resulting null values with this value.
1710
1756
  #
1711
1757
  # @return [LazyFrame]
1712
1758
  #
@@ -1743,8 +1789,12 @@ module Polars
1743
1789
  # # │ 5 ┆ 6 │
1744
1790
  # # │ null ┆ null │
1745
1791
  # # └──────┴──────┘
1746
- def shift(periods)
1747
- _from_rbldf(_ldf.shift(periods))
1792
+ def shift(n, fill_value: nil)
1793
+ if !fill_value.nil?
1794
+ fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1795
+ end
1796
+ n = Utils.parse_as_expression(n)
1797
+ _from_rbldf(_ldf.shift(n, fill_value))
1748
1798
  end
1749
1799
 
1750
1800
  # Shift the values by a given period and fill the resulting null values.
@@ -1790,10 +1840,7 @@ module Polars
1790
1840
  # # │ 0 ┆ 0 │
1791
1841
  # # └─────┴─────┘
1792
1842
  def shift_and_fill(periods, fill_value)
1793
- if !fill_value.is_a?(Expr)
1794
- fill_value = Polars.lit(fill_value)
1795
- end
1796
- _from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
1843
+ shift(periods, fill_value: fill_value)
1797
1844
  end
1798
1845
 
1799
1846
  # Get a slice of this DataFrame.
@@ -2228,7 +2275,7 @@ module Polars
2228
2275
  #
2229
2276
  # @return [LazyFrame]
2230
2277
  def unique(maintain_order: true, subset: nil, keep: "first")
2231
- if !subset.nil? && !subset.is_a?(Array)
2278
+ if !subset.nil? && !subset.is_a?(::Array)
2232
2279
  subset = [subset]
2233
2280
  end
2234
2281
  _from_rbldf(_ldf.unique(maintain_order, subset, keep))
@@ -2261,7 +2308,7 @@ module Polars
2261
2308
  # # │ 3 ┆ 8 ┆ c │
2262
2309
  # # └─────┴─────┴─────┘
2263
2310
  def drop_nulls(subset: nil)
2264
- if !subset.nil? && !subset.is_a?(Array)
2311
+ if !subset.nil? && !subset.is_a?(::Array)
2265
2312
  subset = [subset]
2266
2313
  end
2267
2314
  _from_rbldf(_ldf.drop_nulls(subset))
@@ -2351,16 +2398,16 @@ module Polars
2351
2398
  # df.interpolate.collect
2352
2399
  # # =>
2353
2400
  # # shape: (4, 3)
2354
- # # ┌─────┬──────┬─────┐
2355
- # # │ foo ┆ bar ┆ baz
2356
- # # │ --- ┆ --- ┆ ---
2357
- # # │ i64 i64i64
2358
- # # ╞═════╪══════╪═════╡
2359
- # # │ 1 ┆ 6 ┆ 1
2360
- # # │ 5 ┆ 7 ┆ 3
2361
- # # │ 9 ┆ 9 ┆ 6
2362
- # # │ 10 ┆ null ┆ 9
2363
- # # └─────┴──────┴─────┘
2401
+ # # ┌──────┬──────┬──────────┐
2402
+ # # │ foo ┆ bar ┆ baz
2403
+ # # │ --- ┆ --- ┆ ---
2404
+ # # │ f64 f64f64
2405
+ # # ╞══════╪══════╪══════════╡
2406
+ # # │ 1.0 ┆ 6.0 ┆ 1.0
2407
+ # # │ 5.0 ┆ 7.0 ┆ 3.666667
2408
+ # # │ 9.0 ┆ 9.0 ┆ 6.333333
2409
+ # # │ 10.0 ┆ null ┆ 9.0
2410
+ # # └──────┴──────┴──────────┘
2364
2411
  def interpolate
2365
2412
  select(Utils.col("*").interpolate)
2366
2413
  end
@@ -2423,6 +2470,38 @@ module Polars
2423
2470
  _from_rbldf(_ldf.unnest(names))
2424
2471
  end
2425
2472
 
2473
+ # TODO
2474
+ # def merge_sorted
2475
+ # end
2476
+
2477
+ # Indicate that one or multiple columns are sorted.
2478
+ #
2479
+ # @param column [Object]
2480
+ # Columns that are sorted
2481
+ # @param more_columns [Object]
2482
+ # Additional columns that are sorted, specified as positional arguments.
2483
+ # @param descending [Boolean]
2484
+ # Whether the columns are sorted in descending order.
2485
+ #
2486
+ # @return [LazyFrame]
2487
+ def set_sorted(
2488
+ column,
2489
+ *more_columns,
2490
+ descending: false
2491
+ )
2492
+ columns = Utils.selection_to_rbexpr_list(column)
2493
+ if more_columns.any?
2494
+ columns.concat(Utils.selection_to_rbexpr_list(more_columns))
2495
+ end
2496
+ with_columns(
2497
+ columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
2498
+ )
2499
+ end
2500
+
2501
+ # TODO
2502
+ # def update
2503
+ # end
2504
+
2426
2505
  private
2427
2506
 
2428
2507
  def initialize_copy(other)