polars-df 0.6.0-x86_64-darwin → 0.7.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,7 +97,8 @@ module Polars
97
97
  row_count_offset: 0,
98
98
  storage_options: nil,
99
99
  low_memory: false,
100
- use_statistics: true
100
+ use_statistics: true,
101
+ hive_partitioning: true
101
102
  )
102
103
  _from_rbldf(
103
104
  RbLazyFrame.new_from_parquet(
@@ -108,7 +109,8 @@ module Polars
108
109
  rechunk,
109
110
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
110
111
  low_memory,
111
- use_statistics
112
+ use_statistics,
113
+ hive_partitioning
112
114
  )
113
115
  )
114
116
  end
@@ -350,6 +352,7 @@ module Polars
350
352
  slice_pushdown,
351
353
  common_subplan_elimination,
352
354
  allow_streaming,
355
+ false
353
356
  )
354
357
 
355
358
  ldf.describe_optimized_plan
@@ -445,7 +448,7 @@ module Polars
445
448
  # "c" => [6, 5, 4, 3, 2, 1]
446
449
  # }
447
450
  # ).lazy
448
- # df.groupby("a", maintain_order: true).agg(Polars.all.sum).collect
451
+ # df.group_by("a", maintain_order: true).agg(Polars.all.sum).collect
449
452
  # # =>
450
453
  # # shape: (3, 3)
451
454
  # # ┌─────┬─────┬─────┐
@@ -466,7 +469,8 @@ module Polars
466
469
  no_optimization: false,
467
470
  slice_pushdown: true,
468
471
  common_subplan_elimination: true,
469
- allow_streaming: false
472
+ allow_streaming: false,
473
+ _eager: false
470
474
  )
471
475
  if no_optimization
472
476
  predicate_pushdown = false
@@ -486,7 +490,8 @@ module Polars
486
490
  simplify_expression,
487
491
  slice_pushdown,
488
492
  common_subplan_elimination,
489
- allow_streaming
493
+ allow_streaming,
494
+ _eager
490
495
  )
491
496
  Utils.wrap_df(ldf.collect)
492
497
  end
@@ -568,7 +573,8 @@ module Polars
568
573
  simplify_expression,
569
574
  slice_pushdown,
570
575
  false,
571
- true
576
+ true,
577
+ false
572
578
  )
573
579
  lf.sink_parquet(
574
580
  path,
@@ -623,7 +629,7 @@ module Polars
623
629
  # "c" => [6, 5, 4, 3, 2, 1]
624
630
  # }
625
631
  # ).lazy
626
- # df.groupby("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
632
+ # df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
627
633
  # # =>
628
634
  # # shape: (2, 3)
629
635
  # # ┌─────┬─────┬─────┐
@@ -660,7 +666,8 @@ module Polars
660
666
  simplify_expression,
661
667
  slice_pushdown,
662
668
  common_subplan_elimination,
663
- allow_streaming
669
+ allow_streaming,
670
+ false
664
671
  )
665
672
  Utils.wrap_df(ldf.fetch(n_rows))
666
673
  end
@@ -853,13 +860,13 @@ module Polars
853
860
  _from_rbldf(_ldf.select(exprs))
854
861
  end
855
862
 
856
- # Start a groupby operation.
863
+ # Start a group by operation.
857
864
  #
858
865
  # @param by [Object]
859
866
  # Column(s) to group by.
860
867
  # @param maintain_order [Boolean]
861
868
  # Make sure that the order of the groups remain consistent. This is more
862
- # expensive than a default groupby.
869
+ # expensive than a default group by.
863
870
  #
864
871
  # @return [LazyGroupBy]
865
872
  #
@@ -871,7 +878,7 @@ module Polars
871
878
  # "c" => [6, 5, 4, 3, 2, 1]
872
879
  # }
873
880
  # ).lazy
874
- # df.groupby("a", maintain_order: true).agg(Polars.col("b").sum).collect
881
+ # df.group_by("a", maintain_order: true).agg(Polars.col("b").sum).collect
875
882
  # # =>
876
883
  # # shape: (3, 2)
877
884
  # # ┌─────┬─────┐
@@ -883,19 +890,21 @@ module Polars
883
890
  # # │ b ┆ 11 │
884
891
  # # │ c ┆ 6 │
885
892
  # # └─────┴─────┘
886
- def groupby(by, maintain_order: false)
893
+ def group_by(by, maintain_order: false)
887
894
  rbexprs_by = Utils.selection_to_rbexpr_list(by)
888
- lgb = _ldf.groupby(rbexprs_by, maintain_order)
889
- LazyGroupBy.new(lgb, self.class)
895
+ lgb = _ldf.group_by(rbexprs_by, maintain_order)
896
+ LazyGroupBy.new(lgb)
890
897
  end
898
+ alias_method :groupby, :group_by
899
+ alias_method :group, :group_by
891
900
 
892
901
  # Create rolling groups based on a time column.
893
902
  #
894
903
  # Also works for index values of type `:i32` or `:i64`.
895
904
  #
896
- # Different from a `dynamic_groupby` the windows are now determined by the
905
+ # Different from a `dynamic_group_by` the windows are now determined by the
897
906
  # individual values and are not of constant intervals. For constant intervals
898
- # use *groupby_dynamic*.
907
+ # use *group_by_dynamic*.
899
908
  #
900
909
  # The `period` and `offset` arguments are created either from a timedelta, or
901
910
  # by using the following string language:
@@ -915,7 +924,7 @@ module Polars
915
924
  # Or combine them:
916
925
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
917
926
  #
918
- # In case of a groupby_rolling on an integer column, the windows are defined by:
927
+ # In case of a group_by_rolling on an integer column, the windows are defined by:
919
928
  #
920
929
  # - "1i" # length 1
921
930
  # - "10i" # length 10
@@ -926,7 +935,7 @@ module Polars
926
935
  # This column must be sorted in ascending order. If not the output will not
927
936
  # make sense.
928
937
  #
929
- # In case of a rolling groupby on indices, dtype needs to be one of
938
+ # In case of a rolling group by on indices, dtype needs to be one of
930
939
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
931
940
  # performance matters use an `:i64` column.
932
941
  # @param period [Object]
@@ -958,7 +967,7 @@ module Polars
958
967
  # df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
959
968
  # Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
960
969
  # )
961
- # df.groupby_rolling(index_column: "dt", period: "2d").agg(
970
+ # df.group_by_rolling(index_column: "dt", period: "2d").agg(
962
971
  # [
963
972
  # Polars.sum("a").alias("sum_a"),
964
973
  # Polars.min("a").alias("min_a"),
@@ -979,7 +988,7 @@ module Polars
979
988
  # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
980
989
  # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
981
990
  # # └─────────────────────┴───────┴───────┴───────┘
982
- def groupby_rolling(
991
+ def group_by_rolling(
983
992
  index_column:,
984
993
  period:,
985
994
  offset: nil,
@@ -987,7 +996,7 @@ module Polars
987
996
  by: nil,
988
997
  check_sorted: true
989
998
  )
990
- index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
999
+ index_column = Utils.parse_as_expression(index_column)
991
1000
  if offset.nil?
992
1001
  offset = "-#{period}"
993
1002
  end
@@ -996,16 +1005,17 @@ module Polars
996
1005
  period = Utils._timedelta_to_pl_duration(period)
997
1006
  offset = Utils._timedelta_to_pl_duration(offset)
998
1007
 
999
- lgb = _ldf.groupby_rolling(
1000
- index_column._rbexpr, period, offset, closed, rbexprs_by, check_sorted
1008
+ lgb = _ldf.group_by_rolling(
1009
+ index_column, period, offset, closed, rbexprs_by, check_sorted
1001
1010
  )
1002
- LazyGroupBy.new(lgb, self.class)
1011
+ LazyGroupBy.new(lgb)
1003
1012
  end
1013
+ alias_method :groupby_rolling, :group_by_rolling
1004
1014
 
1005
1015
  # Group based on a time value (or index value of type `:i32`, `:i64`).
1006
1016
  #
1007
1017
  # Time windows are calculated and rows are assigned to windows. Different from a
1008
- # normal groupby is that a row can be member of multiple groups. The time/index
1018
+ # normal group by is that a row can be member of multiple groups. The time/index
1009
1019
  # window could be seen as a rolling window, with a window size determined by
1010
1020
  # dates/times/values instead of slots in the DataFrame.
1011
1021
  #
@@ -1033,37 +1043,43 @@ module Polars
1033
1043
  # Or combine them:
1034
1044
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1035
1045
  #
1036
- # In case of a groupby_dynamic on an integer column, the windows are defined by:
1046
+ # In case of a group_by_dynamic on an integer column, the windows are defined by:
1037
1047
  #
1038
1048
  # - "1i" # length 1
1039
1049
  # - "10i" # length 10
1040
1050
  #
1041
- # @param index_column
1051
+ # @param index_column [Object]
1042
1052
  # Column used to group based on the time window.
1043
1053
  # Often to type Date/Datetime
1044
1054
  # This column must be sorted in ascending order. If not the output will not
1045
1055
  # make sense.
1046
1056
  #
1047
- # In case of a dynamic groupby on indices, dtype needs to be one of
1057
+ # In case of a dynamic group by on indices, dtype needs to be one of
1048
1058
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
1049
1059
  # performance matters use an `:i64` column.
1050
- # @param every
1060
+ # @param every [Object]
1051
1061
  # Interval of the window.
1052
- # @param period
1062
+ # @param period [Object]
1053
1063
  # Length of the window, if None it is equal to 'every'.
1054
- # @param offset
1064
+ # @param offset [Object]
1055
1065
  # Offset of the window if None and period is None it will be equal to negative
1056
1066
  # `every`.
1057
- # @param truncate
1067
+ # @param truncate [Boolean]
1058
1068
  # Truncate the time value to the window lower bound.
1059
- # @param include_boundaries
1069
+ # @param include_boundaries [Boolean]
1060
1070
  # Add the lower and upper bound of the window to the "_lower_bound" and
1061
1071
  # "_upper_bound" columns. This will impact performance because it's harder to
1062
1072
  # parallelize
1063
1073
  # @param closed ["right", "left", "both", "none"]
1064
1074
  # Define whether the temporal window interval is closed or not.
1065
- # @param by
1075
+ # @param by [Object]
1066
1076
  # Also group by this column/these columns
1077
+ # @param check_sorted [Boolean]
1078
+ # When the `by` argument is given, polars can not check sortedness
1079
+ # by the metadata and has to do a full scan on the index column to
1080
+ # verify data is sorted. This is expensive. If you are sure the
1081
+ # data within the by groups is sorted, you can set this to `false`.
1082
+ # Doing so incorrectly will lead to incorrect output.
1067
1083
  #
1068
1084
  # @return [DataFrame]
1069
1085
  #
@@ -1095,7 +1111,7 @@ module Polars
1095
1111
  # # └─────────────────────┴─────┘
1096
1112
  #
1097
1113
  # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
1098
- # df.groupby_dynamic("time", every: "1h", closed: "right").agg(
1114
+ # df.group_by_dynamic("time", every: "1h", closed: "right").agg(
1099
1115
  # [
1100
1116
  # Polars.col("time").min.alias("time_min"),
1101
1117
  # Polars.col("time").max.alias("time_max")
@@ -1115,7 +1131,7 @@ module Polars
1115
1131
  # # └─────────────────────┴─────────────────────┴─────────────────────┘
1116
1132
  #
1117
1133
  # @example The window boundaries can also be added to the aggregation result.
1118
- # df.groupby_dynamic(
1134
+ # df.group_by_dynamic(
1119
1135
  # "time", every: "1h", include_boundaries: true, closed: "right"
1120
1136
  # ).agg([Polars.col("time").count.alias("time_count")])
1121
1137
  # # =>
@@ -1132,7 +1148,7 @@ module Polars
1132
1148
  # # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
1133
1149
  #
1134
1150
  # @example When closed="left", should not include right end of interval.
1135
- # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
1151
+ # df.group_by_dynamic("time", every: "1h", closed: "left").agg(
1136
1152
  # [
1137
1153
  # Polars.col("time").count.alias("time_count"),
1138
1154
  # Polars.col("time").alias("time_agg_list")
@@ -1152,7 +1168,7 @@ module Polars
1152
1168
  # # └─────────────────────┴────────────┴───────────────────────────────────┘
1153
1169
  #
1154
1170
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
1155
- # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
1171
+ # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
1156
1172
  # [Polars.col("time").count.alias("time_count")]
1157
1173
  # )
1158
1174
  # # =>
@@ -1169,7 +1185,7 @@ module Polars
1169
1185
  # # │ 2021-12-16 03:00:00 ┆ 1 │
1170
1186
  # # └─────────────────────┴────────────┘
1171
1187
  #
1172
- # @example Dynamic groupbys can also be combined with grouping on normal keys.
1188
+ # @example Dynamic group bys can also be combined with grouping on normal keys.
1173
1189
  # df = Polars::DataFrame.new(
1174
1190
  # {
1175
1191
  # "time" => Polars.date_range(
@@ -1180,7 +1196,7 @@ module Polars
1180
1196
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
1181
1197
  # }
1182
1198
  # )
1183
- # df.groupby_dynamic(
1199
+ # df.group_by_dynamic(
1184
1200
  # "time",
1185
1201
  # every: "1h",
1186
1202
  # closed: "both",
@@ -1203,14 +1219,14 @@ module Polars
1203
1219
  # # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
1204
1220
  # # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
1205
1221
  #
1206
- # @example Dynamic groupby on an index column.
1222
+ # @example Dynamic group by on an index column.
1207
1223
  # df = Polars::DataFrame.new(
1208
1224
  # {
1209
1225
  # "idx" => Polars.arange(0, 6, eager: true),
1210
1226
  # "A" => ["A", "A", "B", "B", "B", "C"]
1211
1227
  # }
1212
1228
  # )
1213
- # df.groupby_dynamic(
1229
+ # df.group_by_dynamic(
1214
1230
  # "idx",
1215
1231
  # every: "2i",
1216
1232
  # period: "3i",
@@ -1228,17 +1244,23 @@ module Polars
1228
1244
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
1229
1245
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
1230
1246
  # # └─────────────────┴─────────────────┴─────┴─────────────────┘
1231
- def groupby_dynamic(
1247
+ def group_by_dynamic(
1232
1248
  index_column,
1233
1249
  every:,
1234
1250
  period: nil,
1235
1251
  offset: nil,
1236
- truncate: true,
1252
+ truncate: nil,
1237
1253
  include_boundaries: false,
1238
1254
  closed: "left",
1255
+ label: "left",
1239
1256
  by: nil,
1240
- start_by: "window"
1257
+ start_by: "window",
1258
+ check_sorted: true
1241
1259
  )
1260
+ if !truncate.nil?
1261
+ label = truncate ? "left" : "datapoint"
1262
+ end
1263
+
1242
1264
  index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
1243
1265
  if offset.nil?
1244
1266
  offset = period.nil? ? "-#{every}" : "0ns"
@@ -1253,19 +1275,21 @@ module Polars
1253
1275
  every = Utils._timedelta_to_pl_duration(every)
1254
1276
 
1255
1277
  rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1256
- lgb = _ldf.groupby_dynamic(
1278
+ lgb = _ldf.group_by_dynamic(
1257
1279
  index_column._rbexpr,
1258
1280
  every,
1259
1281
  period,
1260
1282
  offset,
1261
- truncate,
1283
+ label,
1262
1284
  include_boundaries,
1263
1285
  closed,
1264
1286
  rbexprs_by,
1265
- start_by
1287
+ start_by,
1288
+ check_sorted
1266
1289
  )
1267
- LazyGroupBy.new(lgb, self.class)
1290
+ LazyGroupBy.new(lgb)
1268
1291
  end
1292
+ alias_method :groupby_dynamic, :group_by_dynamic
1269
1293
 
1270
1294
  # Perform an asof join.
1271
1295
  #
@@ -1725,8 +1749,10 @@ module Polars
1725
1749
 
1726
1750
  # Shift the values by a given period.
1727
1751
  #
1728
- # @param periods [Integer]
1752
+ # @param n [Integer]
1729
1753
  # Number of places to shift (may be negative).
1754
+ # @param fill_value [Object]
1755
+ # Fill the resulting null values with this value.
1730
1756
  #
1731
1757
  # @return [LazyFrame]
1732
1758
  #
@@ -1763,8 +1789,12 @@ module Polars
1763
1789
  # # │ 5 ┆ 6 │
1764
1790
  # # │ null ┆ null │
1765
1791
  # # └──────┴──────┘
1766
- def shift(periods)
1767
- _from_rbldf(_ldf.shift(periods))
1792
+ def shift(n, fill_value: nil)
1793
+ if !fill_value.nil?
1794
+ fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1795
+ end
1796
+ n = Utils.parse_as_expression(n)
1797
+ _from_rbldf(_ldf.shift(n, fill_value))
1768
1798
  end
1769
1799
 
1770
1800
  # Shift the values by a given period and fill the resulting null values.
@@ -1810,10 +1840,7 @@ module Polars
1810
1840
  # # │ 0 ┆ 0 │
1811
1841
  # # └─────┴─────┘
1812
1842
  def shift_and_fill(periods, fill_value)
1813
- if !fill_value.is_a?(Expr)
1814
- fill_value = Polars.lit(fill_value)
1815
- end
1816
- _from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
1843
+ shift(periods, fill_value: fill_value)
1817
1844
  end
1818
1845
 
1819
1846
  # Get a slice of this DataFrame.
@@ -2371,16 +2398,16 @@ module Polars
2371
2398
  # df.interpolate.collect
2372
2399
  # # =>
2373
2400
  # # shape: (4, 3)
2374
- # # ┌─────┬──────┬─────┐
2375
- # # │ foo ┆ bar ┆ baz
2376
- # # │ --- ┆ --- ┆ ---
2377
- # # │ i64 i64i64
2378
- # # ╞═════╪══════╪═════╡
2379
- # # │ 1 ┆ 6 ┆ 1
2380
- # # │ 5 ┆ 7 ┆ 3
2381
- # # │ 9 ┆ 9 ┆ 6
2382
- # # │ 10 ┆ null ┆ 9
2383
- # # └─────┴──────┴─────┘
2401
+ # # ┌──────┬──────┬──────────┐
2402
+ # # │ foo ┆ bar ┆ baz
2403
+ # # │ --- ┆ --- ┆ ---
2404
+ # # │ f64 f64f64
2405
+ # # ╞══════╪══════╪══════════╡
2406
+ # # │ 1.0 ┆ 6.0 ┆ 1.0
2407
+ # # │ 5.0 ┆ 7.0 ┆ 3.666667
2408
+ # # │ 9.0 ┆ 9.0 ┆ 6.333333
2409
+ # # │ 10.0 ┆ null ┆ 9.0
2410
+ # # └──────┴──────┴──────────┘
2384
2411
  def interpolate
2385
2412
  select(Utils.col("*").interpolate)
2386
2413
  end
@@ -43,7 +43,7 @@ module Polars
43
43
  # # ┌─────┬─────┬────────────┐
44
44
  # # │ a ┆ b ┆ rank │
45
45
  # # │ --- ┆ --- ┆ --- │
46
- # # │ i64 ┆ i64 ┆ list[f32] │
46
+ # # │ i64 ┆ i64 ┆ list[f64] │
47
47
  # # ╞═════╪═════╪════════════╡
48
48
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
49
49
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
@@ -158,7 +158,7 @@ module Polars
158
158
  col(column.to_s).sum
159
159
  elsif column.is_a?(::Array)
160
160
  exprs = Utils.selection_to_rbexpr_list(column)
161
- Utils.wrap_expr(_sum_exprs(exprs))
161
+ Utils.wrap_expr(_sum_horizontal(exprs))
162
162
  else
163
163
  fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
164
164
  end
@@ -625,16 +625,16 @@ module Polars
625
625
  # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
626
626
  # range size is equal to the length of the DataFrame you are collecting.
627
627
  #
628
- # @param low [Integer, Expr, Series]
628
+ # @param start [Integer, Expr, Series]
629
629
  # Lower bound of range.
630
- # @param high [Integer, Expr, Series]
630
+ # @param stop [Integer, Expr, Series]
631
631
  # Upper bound of range.
632
632
  # @param step [Integer]
633
633
  # Step size of the range.
634
634
  # @param eager [Boolean]
635
635
  # If eager evaluation is `True`, a Series is returned instead of an Expr.
636
636
  # @param dtype [Symbol]
637
- # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
637
+ # Apply an explicit integer dtype to the resulting expression (default is `Int64`).
638
638
  #
639
639
  # @return [Expr, Series]
640
640
  #
@@ -648,35 +648,20 @@ module Polars
648
648
  # # 1
649
649
  # # 2
650
650
  # # ]
651
- #
652
- # @example
653
- # df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]})
654
- # df.select(Polars.arange(Polars.col("a"), Polars.col("b")))
655
- # # =>
656
- # # shape: (2, 1)
657
- # # ┌───────────┐
658
- # # │ arange │
659
- # # │ --- │
660
- # # │ list[i64] │
661
- # # ╞═══════════╡
662
- # # │ [1, 2] │
663
- # # │ [2, 3] │
664
- # # └───────────┘
665
- def arange(low, high, step: 1, eager: false, dtype: nil)
666
- low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
667
- high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
668
- range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
669
-
670
- if !dtype.nil? && !["i64", Int64].include?(dtype)
671
- range_expr = range_expr.cast(dtype)
672
- end
651
+ def int_range(start, stop, step: 1, eager: false, dtype: nil)
652
+ start = Utils.parse_as_expression(start)
653
+ stop = Utils.parse_as_expression(stop)
654
+ dtype ||= Int64
655
+ dtype = dtype.to_s if dtype.is_a?(Symbol)
656
+ result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange")
673
657
 
674
- if !eager
675
- range_expr
676
- else
677
- DataFrame.new.select(range_expr.alias("arange")).to_series
658
+ if eager
659
+ return select(result).to_series
678
660
  end
661
+
662
+ result
679
663
  end
664
+ alias_method :arange, :int_range
680
665
 
681
666
  # Find the indexes that would sort the columns.
682
667
  #
@@ -735,15 +720,22 @@ module Polars
735
720
  # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
736
721
  # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
737
722
  def duration(
723
+ weeks: nil,
738
724
  days: nil,
725
+ hours: nil,
726
+ minutes: nil,
739
727
  seconds: nil,
740
- nanoseconds: nil,
741
- microseconds: nil,
742
728
  milliseconds: nil,
743
- minutes: nil,
744
- hours: nil,
745
- weeks: nil
729
+ microseconds: nil,
730
+ nanoseconds: nil,
731
+ time_unit: "us"
746
732
  )
733
+ if !weeks.nil?
734
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
735
+ end
736
+ if !days.nil?
737
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
738
+ end
747
739
  if !hours.nil?
748
740
  hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
749
741
  end
@@ -762,23 +754,18 @@ module Polars
762
754
  if !nanoseconds.nil?
763
755
  nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
764
756
  end
765
- if !days.nil?
766
- days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
767
- end
768
- if !weeks.nil?
769
- weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
770
- end
771
757
 
772
758
  Utils.wrap_expr(
773
759
  _rb_duration(
760
+ weeks,
774
761
  days,
762
+ hours,
763
+ minutes,
775
764
  seconds,
776
- nanoseconds,
777
- microseconds,
778
765
  milliseconds,
779
- minutes,
780
- hours,
781
- weeks
766
+ microseconds,
767
+ nanoseconds,
768
+ time_unit
782
769
  )
783
770
  )
784
771
  end
@@ -944,7 +931,8 @@ module Polars
944
931
  simplify_expression,
945
932
  slice_pushdown,
946
933
  common_subplan_elimination,
947
- allow_streaming
934
+ allow_streaming,
935
+ false
948
936
  )
949
937
  prepared << ldf
950
938
  end
@@ -1,10 +1,9 @@
1
1
  module Polars
2
- # Created by `df.lazy.groupby("foo")`.
2
+ # Created by `df.lazy.group_by("foo")`.
3
3
  class LazyGroupBy
4
4
  # @private
5
- def initialize(lgb, lazyframe_class)
5
+ def initialize(lgb)
6
6
  @lgb = lgb
7
- @lazyframe_class = lazyframe_class
8
7
  end
9
8
 
10
9
  # Describe the aggregation that need to be done on a group.
@@ -12,7 +11,7 @@ module Polars
12
11
  # @return [LazyFrame]
13
12
  def agg(aggs)
14
13
  rbexprs = Utils.selection_to_rbexpr_list(aggs)
15
- @lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
14
+ Utils.wrap_ldf(@lgb.agg(rbexprs))
16
15
  end
17
16
 
18
17
  # Get the first `n` rows of each group.
@@ -29,7 +28,7 @@ module Polars
29
28
  # "nrs" => [1, 2, 3, 4, 5, 6]
30
29
  # }
31
30
  # )
32
- # df.groupby("letters").head(2).sort("letters")
31
+ # df.group_by("letters").head(2).sort("letters")
33
32
  # # =>
34
33
  # # shape: (5, 2)
35
34
  # # ┌─────────┬─────┐
@@ -44,7 +43,7 @@ module Polars
44
43
  # # │ c ┆ 2 │
45
44
  # # └─────────┴─────┘
46
45
  def head(n = 5)
47
- @lazyframe_class._from_rbldf(@lgb.head(n))
46
+ Utils.wrap_ldf(@lgb.head(n))
48
47
  end
49
48
 
50
49
  # Get the last `n` rows of each group.
@@ -61,7 +60,7 @@ module Polars
61
60
  # "nrs" => [1, 2, 3, 4, 5, 6]
62
61
  # }
63
62
  # )
64
- # df.groupby("letters").tail(2).sort("letters")
63
+ # df.group_by("letters").tail(2).sort("letters")
65
64
  # # =>
66
65
  # # shape: (5, 2)
67
66
  # # ┌─────────┬─────┐
@@ -76,7 +75,7 @@ module Polars
76
75
  # # │ c ┆ 4 │
77
76
  # # └─────────┴─────┘
78
77
  def tail(n = 5)
79
- @lazyframe_class._from_rbldf(@lgb.tail(n))
78
+ Utils.wrap_ldf(@lgb.tail(n))
80
79
  end
81
80
 
82
81
  # def apply