polars-df 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/ext/polars/Cargo.toml +10 -7
  5. data/ext/polars/src/batched_csv.rs +1 -1
  6. data/ext/polars/src/conversion/any_value.rs +261 -0
  7. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  8. data/ext/polars/src/conversion/mod.rs +51 -10
  9. data/ext/polars/src/dataframe/construction.rs +6 -8
  10. data/ext/polars/src/dataframe/general.rs +19 -29
  11. data/ext/polars/src/dataframe/io.rs +43 -33
  12. data/ext/polars/src/error.rs +26 -4
  13. data/ext/polars/src/expr/categorical.rs +0 -10
  14. data/ext/polars/src/expr/datetime.rs +4 -12
  15. data/ext/polars/src/expr/general.rs +123 -110
  16. data/ext/polars/src/expr/mod.rs +2 -2
  17. data/ext/polars/src/expr/rolling.rs +17 -9
  18. data/ext/polars/src/expr/string.rs +2 -6
  19. data/ext/polars/src/functions/eager.rs +10 -10
  20. data/ext/polars/src/functions/lazy.rs +21 -21
  21. data/ext/polars/src/functions/range.rs +6 -12
  22. data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
  23. data/ext/polars/src/lazyframe/mod.rs +81 -98
  24. data/ext/polars/src/lib.rs +55 -45
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/rb_modules.rs +25 -1
  27. data/ext/polars/src/series/aggregation.rs +4 -2
  28. data/ext/polars/src/series/arithmetic.rs +21 -11
  29. data/ext/polars/src/series/construction.rs +56 -38
  30. data/ext/polars/src/series/export.rs +1 -1
  31. data/ext/polars/src/series/mod.rs +31 -10
  32. data/ext/polars/src/sql.rs +3 -1
  33. data/lib/polars/array_expr.rb +4 -4
  34. data/lib/polars/batched_csv_reader.rb +2 -2
  35. data/lib/polars/cat_expr.rb +0 -36
  36. data/lib/polars/cat_name_space.rb +0 -37
  37. data/lib/polars/data_frame.rb +93 -101
  38. data/lib/polars/data_types.rb +1 -1
  39. data/lib/polars/date_time_expr.rb +525 -573
  40. data/lib/polars/date_time_name_space.rb +263 -464
  41. data/lib/polars/dynamic_group_by.rb +3 -3
  42. data/lib/polars/exceptions.rb +3 -0
  43. data/lib/polars/expr.rb +367 -330
  44. data/lib/polars/expr_dispatch.rb +1 -1
  45. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  46. data/lib/polars/functions/as_datatype.rb +63 -40
  47. data/lib/polars/functions/lazy.rb +63 -14
  48. data/lib/polars/functions/lit.rb +1 -1
  49. data/lib/polars/functions/range/date_range.rb +18 -77
  50. data/lib/polars/functions/range/datetime_range.rb +4 -4
  51. data/lib/polars/functions/range/int_range.rb +2 -2
  52. data/lib/polars/functions/range/time_range.rb +4 -4
  53. data/lib/polars/functions/repeat.rb +1 -1
  54. data/lib/polars/functions/whenthen.rb +1 -1
  55. data/lib/polars/io/csv.rb +8 -8
  56. data/lib/polars/io/ipc.rb +3 -3
  57. data/lib/polars/io/json.rb +13 -2
  58. data/lib/polars/io/ndjson.rb +15 -4
  59. data/lib/polars/io/parquet.rb +5 -4
  60. data/lib/polars/lazy_frame.rb +120 -106
  61. data/lib/polars/lazy_group_by.rb +1 -1
  62. data/lib/polars/list_expr.rb +11 -11
  63. data/lib/polars/list_name_space.rb +5 -1
  64. data/lib/polars/rolling_group_by.rb +5 -7
  65. data/lib/polars/series.rb +105 -189
  66. data/lib/polars/string_expr.rb +42 -67
  67. data/lib/polars/string_name_space.rb +5 -4
  68. data/lib/polars/testing.rb +2 -2
  69. data/lib/polars/utils/constants.rb +9 -0
  70. data/lib/polars/utils/convert.rb +97 -0
  71. data/lib/polars/utils/parse.rb +89 -0
  72. data/lib/polars/utils/various.rb +76 -0
  73. data/lib/polars/utils/wrap.rb +19 -0
  74. data/lib/polars/utils.rb +4 -330
  75. data/lib/polars/version.rb +1 -1
  76. data/lib/polars/whenthen.rb +6 -6
  77. data/lib/polars.rb +11 -0
  78. metadata +9 -4
  79. data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -63,7 +63,7 @@ module Polars
63
63
  # df.columns
64
64
  # # => ["foo", "bar"]
65
65
  def columns
66
- _ldf.columns
66
+ _ldf.collect_schema.keys
67
67
  end
68
68
 
69
69
  # Get dtypes of columns in LazyFrame.
@@ -81,7 +81,7 @@ module Polars
81
81
  # lf.dtypes
82
82
  # # => [Polars::Int64, Polars::Float64, Polars::String]
83
83
  def dtypes
84
- _ldf.dtypes
84
+ _ldf.collect_schema.values
85
85
  end
86
86
 
87
87
  # Get the schema.
@@ -99,7 +99,7 @@ module Polars
99
99
  # lf.schema
100
100
  # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
101
101
  def schema
102
- _ldf.schema
102
+ _ldf.collect_schema
103
103
  end
104
104
 
105
105
  # Get the width of the LazyFrame.
@@ -111,7 +111,7 @@ module Polars
111
111
  # lf.width
112
112
  # # => 2
113
113
  def width
114
- _ldf.width
114
+ _ldf.collect_schema.length
115
115
  end
116
116
 
117
117
  # Check if LazyFrame includes key.
@@ -261,16 +261,23 @@ module Polars
261
261
  # # │ 2 ┆ 7.0 ┆ b │
262
262
  # # │ 1 ┆ 6.0 ┆ a │
263
263
  # # └─────┴─────┴─────┘
264
- def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
265
- if by.is_a?(::String)
266
- return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
267
- end
268
- if Utils.bool?(reverse)
269
- reverse = [reverse]
264
+ def sort(by, *more_by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
265
+ if by.is_a?(::String) && more_by.empty?
266
+ return _from_rbldf(
267
+ _ldf.sort(
268
+ by, reverse, nulls_last, maintain_order, multithreaded
269
+ )
270
+ )
270
271
  end
271
272
 
272
- by = Utils.selection_to_rbexpr_list(by)
273
- _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
273
+ by = Utils.parse_into_list_of_expressions(by, *more_by)
274
+ reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
275
+ nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
276
+ _from_rbldf(
277
+ _ldf.sort_by_exprs(
278
+ by, reverse, nulls_last, maintain_order, multithreaded
279
+ )
280
+ )
274
281
  end
275
282
 
276
283
  # def profile
@@ -415,7 +422,7 @@ module Polars
415
422
  path,
416
423
  compression: "zstd",
417
424
  compression_level: nil,
418
- statistics: false,
425
+ statistics: true,
419
426
  row_group_size: nil,
420
427
  data_pagesize_limit: nil,
421
428
  maintain_order: true,
@@ -435,6 +442,24 @@ module Polars
435
442
  no_optimization: no_optimization
436
443
  )
437
444
 
445
+ if statistics == true
446
+ statistics = {
447
+ min: true,
448
+ max: true,
449
+ distinct_count: false,
450
+ null_count: true
451
+ }
452
+ elsif statistics == false
453
+ statistics = {}
454
+ elsif statistics == "full"
455
+ statistics = {
456
+ min: true,
457
+ max: true,
458
+ distinct_count: true,
459
+ null_count: true
460
+ }
461
+ end
462
+
438
463
  lf.sink_parquet(
439
464
  path,
440
465
  compression,
@@ -589,6 +614,7 @@ module Polars
589
614
  datetime_format: nil,
590
615
  date_format: nil,
591
616
  time_format: nil,
617
+ float_scientific: nil,
592
618
  float_precision: nil,
593
619
  null_value: nil,
594
620
  quote_style: nil,
@@ -623,6 +649,7 @@ module Polars
623
649
  datetime_format,
624
650
  date_format,
625
651
  time_format,
652
+ float_scientific,
626
653
  float_precision,
627
654
  null_value,
628
655
  quote_style,
@@ -907,7 +934,7 @@ module Polars
907
934
  def filter(predicate)
908
935
  _from_rbldf(
909
936
  _ldf.filter(
910
- Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
937
+ Utils.parse_into_expression(predicate, str_as_lit: false)
911
938
  )
912
939
  )
913
940
  end
@@ -1003,7 +1030,7 @@ module Polars
1003
1030
  def select(*exprs, **named_exprs)
1004
1031
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1005
1032
 
1006
- rbexprs = Utils.parse_as_list_of_expressions(
1033
+ rbexprs = Utils.parse_into_list_of_expressions(
1007
1034
  *exprs, **named_exprs, __structify: structify
1008
1035
  )
1009
1036
  _from_rbldf(_ldf.select(rbexprs))
@@ -1011,12 +1038,14 @@ module Polars
1011
1038
 
1012
1039
  # Start a group by operation.
1013
1040
  #
1014
- # @param by [Object]
1041
+ # @param by [Array]
1015
1042
  # Column(s) to group by.
1016
1043
  # @param maintain_order [Boolean]
1017
1044
  # Make sure that the order of the groups remain consistent. This is more
1018
1045
  # expensive than a default group by.
1019
- #
1046
+ # @param named_by [Hash]
1047
+ # Additional columns to group by, specified as keyword arguments.
1048
+ # The columns will be renamed to the keyword used.
1020
1049
  # @return [LazyGroupBy]
1021
1050
  #
1022
1051
  # @example
@@ -1039,9 +1068,9 @@ module Polars
1039
1068
  # # │ b ┆ 11 │
1040
1069
  # # │ c ┆ 6 │
1041
1070
  # # └─────┴─────┘
1042
- def group_by(by, maintain_order: false)
1043
- rbexprs_by = Utils.selection_to_rbexpr_list(by)
1044
- lgb = _ldf.group_by(rbexprs_by, maintain_order)
1071
+ def group_by(*by, maintain_order: false, **named_by)
1072
+ exprs = Utils.parse_into_list_of_expressions(*by, **named_by)
1073
+ lgb = _ldf.group_by(exprs, maintain_order)
1045
1074
  LazyGroupBy.new(lgb)
1046
1075
  end
1047
1076
  alias_method :groupby, :group_by
@@ -1095,12 +1124,6 @@ module Polars
1095
1124
  # Define whether the temporal window interval is closed or not.
1096
1125
  # @param by [Object]
1097
1126
  # Also group by this column/these columns.
1098
- # @param check_sorted [Boolean]
1099
- # When the `by` argument is given, polars can not check sortedness
1100
- # by the metadata and has to do a full scan on the index column to
1101
- # verify data is sorted. This is expensive. If you are sure the
1102
- # data within the by groups is sorted, you can set this to `false`.
1103
- # Doing so incorrectly will lead to incorrect output
1104
1127
  #
1105
1128
  # @return [LazyFrame]
1106
1129
  #
@@ -1142,21 +1165,20 @@ module Polars
1142
1165
  period:,
1143
1166
  offset: nil,
1144
1167
  closed: "right",
1145
- by: nil,
1146
- check_sorted: true
1168
+ by: nil
1147
1169
  )
1148
- index_column = Utils.parse_as_expression(index_column)
1170
+ index_column = Utils.parse_into_expression(index_column)
1149
1171
  if offset.nil?
1150
- offset = "-#{period}"
1172
+ offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
1151
1173
  end
1152
1174
 
1153
- rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1154
- period = Utils._timedelta_to_pl_duration(period)
1155
- offset = Utils._timedelta_to_pl_duration(offset)
1156
-
1157
- lgb = _ldf.rolling(
1158
- index_column, period, offset, closed, rbexprs_by, check_sorted
1175
+ rbexprs_by = (
1176
+ !by.nil? ? Utils.parse_into_list_of_expressions(by) : []
1159
1177
  )
1178
+ period = Utils.parse_as_duration_string(period)
1179
+ offset = Utils.parse_as_duration_string(offset)
1180
+
1181
+ lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
1160
1182
  LazyGroupBy.new(lgb)
1161
1183
  end
1162
1184
  alias_method :group_by_rolling, :rolling
@@ -1224,22 +1246,18 @@ module Polars
1224
1246
  # Define whether the temporal window interval is closed or not.
1225
1247
  # @param by [Object]
1226
1248
  # Also group by this column/these columns
1227
- # @param check_sorted [Boolean]
1228
- # When the `by` argument is given, polars can not check sortedness
1229
- # by the metadata and has to do a full scan on the index column to
1230
- # verify data is sorted. This is expensive. If you are sure the
1231
- # data within the by groups is sorted, you can set this to `false`.
1232
- # Doing so incorrectly will lead to incorrect output.
1233
1249
  #
1234
1250
  # @return [DataFrame]
1235
1251
  #
1236
1252
  # @example
1237
1253
  # df = Polars::DataFrame.new(
1238
1254
  # {
1239
- # "time" => Polars.date_range(
1255
+ # "time" => Polars.datetime_range(
1240
1256
  # DateTime.new(2021, 12, 16),
1241
1257
  # DateTime.new(2021, 12, 16, 3),
1242
- # "30m"
1258
+ # "30m",
1259
+ # time_unit: "us",
1260
+ # eager: true
1243
1261
  # ),
1244
1262
  # "n" => 0..6
1245
1263
  # }
@@ -1338,10 +1356,12 @@ module Polars
1338
1356
  # @example Dynamic group bys can also be combined with grouping on normal keys.
1339
1357
  # df = Polars::DataFrame.new(
1340
1358
  # {
1341
- # "time" => Polars.date_range(
1359
+ # "time" => Polars.datetime_range(
1342
1360
  # DateTime.new(2021, 12, 16),
1343
1361
  # DateTime.new(2021, 12, 16, 3),
1344
- # "30m"
1362
+ # "30m",
1363
+ # time_unit: "us",
1364
+ # eager: true
1345
1365
  # ),
1346
1366
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
1347
1367
  # }
@@ -1405,14 +1425,13 @@ module Polars
1405
1425
  closed: "left",
1406
1426
  label: "left",
1407
1427
  by: nil,
1408
- start_by: "window",
1409
- check_sorted: true
1428
+ start_by: "window"
1410
1429
  )
1411
1430
  if !truncate.nil?
1412
1431
  label = truncate ? "left" : "datapoint"
1413
1432
  end
1414
1433
 
1415
- index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
1434
+ index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
1416
1435
  if offset.nil?
1417
1436
  offset = period.nil? ? "-#{every}" : "0ns"
1418
1437
  end
@@ -1421,13 +1440,13 @@ module Polars
1421
1440
  period = every
1422
1441
  end
1423
1442
 
1424
- period = Utils._timedelta_to_pl_duration(period)
1425
- offset = Utils._timedelta_to_pl_duration(offset)
1426
- every = Utils._timedelta_to_pl_duration(every)
1443
+ period = Utils.parse_as_duration_string(period)
1444
+ offset = Utils.parse_as_duration_string(offset)
1445
+ every = Utils.parse_as_duration_string(every)
1427
1446
 
1428
- rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1447
+ rbexprs_by = by.nil? ? [] : Utils.parse_into_list_of_expressions(by)
1429
1448
  lgb = _ldf.group_by_dynamic(
1430
- index_column._rbexpr,
1449
+ index_column,
1431
1450
  every,
1432
1451
  period,
1433
1452
  offset,
@@ -1435,8 +1454,7 @@ module Polars
1435
1454
  include_boundaries,
1436
1455
  closed,
1437
1456
  rbexprs_by,
1438
- start_by,
1439
- check_sorted
1457
+ start_by
1440
1458
  )
1441
1459
  LazyGroupBy.new(lgb)
1442
1460
  end
@@ -1587,7 +1605,7 @@ module Polars
1587
1605
  # @param on Object
1588
1606
  # Join column of both DataFrames. If set, `left_on` and `right_on` should be
1589
1607
  # None.
1590
- # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
1608
+ # @param how ["inner", "left", "full", "semi", "anti", "cross"]
1591
1609
  # Join strategy.
1592
1610
  # @param suffix [String]
1593
1611
  # Suffix to append to columns with a duplicate name.
@@ -1629,7 +1647,7 @@ module Polars
1629
1647
  # # └─────┴─────┴─────┴───────┘
1630
1648
  #
1631
1649
  # @example
1632
- # df.join(other_df, on: "ham", how: "outer").collect
1650
+ # df.join(other_df, on: "ham", how: "full").collect
1633
1651
  # # =>
1634
1652
  # # shape: (4, 5)
1635
1653
  # # ┌──────┬──────┬──────┬───────┬───────────┐
@@ -1696,7 +1714,9 @@ module Polars
1696
1714
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
1697
1715
  end
1698
1716
 
1699
- if how == "cross"
1717
+ if how == "outer"
1718
+ how = "full"
1719
+ elsif how == "cross"
1700
1720
  return _from_rbldf(
1701
1721
  _ldf.join(
1702
1722
  other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
@@ -1705,12 +1725,12 @@ module Polars
1705
1725
  end
1706
1726
 
1707
1727
  if !on.nil?
1708
- rbexprs = Utils.selection_to_rbexpr_list(on)
1728
+ rbexprs = Utils.parse_into_list_of_expressions(on)
1709
1729
  rbexprs_left = rbexprs
1710
1730
  rbexprs_right = rbexprs
1711
1731
  elsif !left_on.nil? && !right_on.nil?
1712
- rbexprs_left = Utils.selection_to_rbexpr_list(left_on)
1713
- rbexprs_right = Utils.selection_to_rbexpr_list(right_on)
1732
+ rbexprs_left = Utils.parse_into_list_of_expressions(left_on)
1733
+ rbexprs_right = Utils.parse_into_list_of_expressions(right_on)
1714
1734
  else
1715
1735
  raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
1716
1736
  end
@@ -1765,7 +1785,8 @@ module Polars
1765
1785
  # # └─────┴──────┴───────┴─────┴──────┴───────┘
1766
1786
  def with_columns(*exprs, **named_exprs)
1767
1787
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1768
- rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
1788
+
1789
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs, __structify: structify)
1769
1790
 
1770
1791
  _from_rbldf(_ldf.with_columns(rbexprs))
1771
1792
  end
@@ -1926,9 +1947,9 @@ module Polars
1926
1947
  # # └──────┴──────┘
1927
1948
  def shift(n, fill_value: nil)
1928
1949
  if !fill_value.nil?
1929
- fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1950
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1930
1951
  end
1931
- n = Utils.parse_as_expression(n)
1952
+ n = Utils.parse_into_expression(n)
1932
1953
  _from_rbldf(_ldf.shift(n, fill_value))
1933
1954
  end
1934
1955
 
@@ -2125,7 +2146,7 @@ module Polars
2125
2146
  # # │ 3 ┆ 7 │
2126
2147
  # # └─────┴─────┘
2127
2148
  def take_every(n)
2128
- select(Utils.col("*").take_every(n))
2149
+ select(F.col("*").take_every(n))
2129
2150
  end
2130
2151
 
2131
2152
  # Fill null values using the specified value or strategy.
@@ -2168,7 +2189,7 @@ module Polars
2168
2189
  # # └──────┴──────┘
2169
2190
  def fill_nan(fill_value)
2170
2191
  if !fill_value.is_a?(Expr)
2171
- fill_value = Utils.lit(fill_value)
2192
+ fill_value = F.lit(fill_value)
2172
2193
  end
2173
2194
  _from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
2174
2195
  end
@@ -2359,8 +2380,8 @@ module Polars
2359
2380
  # # │ 3.0 ┆ 1.0 │
2360
2381
  # # └─────┴─────┘
2361
2382
  def quantile(quantile, interpolation: "nearest")
2362
- quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2363
- _from_rbldf(_ldf.quantile(quantile._rbexpr, interpolation))
2383
+ quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
2384
+ _from_rbldf(_ldf.quantile(quantile, interpolation))
2364
2385
  end
2365
2386
 
2366
2387
  # Explode lists to long format.
@@ -2392,7 +2413,7 @@ module Polars
2392
2413
  # # │ c ┆ 8 │
2393
2414
  # # └─────────┴─────────┘
2394
2415
  def explode(columns)
2395
- columns = Utils.selection_to_rbexpr_list(columns)
2416
+ columns = Utils.parse_into_list_of_expressions(columns)
2396
2417
  _from_rbldf(_ldf.explode(columns))
2397
2418
  end
2398
2419
 
@@ -2455,35 +2476,35 @@ module Polars
2455
2476
  # Optionally leaves identifiers set.
2456
2477
  #
2457
2478
  # This function is useful to massage a DataFrame into a format where one or more
2458
- # columns are identifier variables (id_vars), while all other columns, considered
2459
- # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
2479
+ # columns are identifier variables (index) while all other columns, considered
2480
+ # measured variables (on), are "unpivoted" to the row axis leaving just
2460
2481
  # two non-identifier columns, 'variable' and 'value'.
2461
2482
  #
2462
- # @param id_vars [Object]
2463
- # Columns to use as identifier variables.
2464
- # @param value_vars [Object]
2465
- # Values to use as identifier variables.
2466
- # If `value_vars` is empty all columns that are not in `id_vars` will be used.
2483
+ # @param on [Object]
2484
+ # Column(s) or selector(s) to use as values variables; if `on`
2485
+ # is empty all columns that are not in `index` will be used.
2486
+ # @param index [Object]
2487
+ # Column(s) or selector(s) to use as identifier variables.
2467
2488
  # @param variable_name [String]
2468
- # Name to give to the `value` column. Defaults to "variable"
2489
+ # Name to give to the `variable` column. Defaults to "variable"
2469
2490
  # @param value_name [String]
2470
2491
  # Name to give to the `value` column. Defaults to "value"
2471
2492
  # @param streamable [Boolean]
2472
2493
  # Allow this node to run in the streaming engine.
2473
- # If this runs in streaming, the output of the melt operation
2494
+ # If this runs in streaming, the output of the unpivot operation
2474
2495
  # will not have a stable ordering.
2475
2496
  #
2476
2497
  # @return [LazyFrame]
2477
2498
  #
2478
2499
  # @example
2479
- # df = Polars::DataFrame.new(
2500
+ # lf = Polars::LazyFrame.new(
2480
2501
  # {
2481
2502
  # "a" => ["x", "y", "z"],
2482
2503
  # "b" => [1, 3, 5],
2483
2504
  # "c" => [2, 4, 6]
2484
2505
  # }
2485
- # ).lazy
2486
- # df.melt(id_vars: "a", value_vars: ["b", "c"]).collect
2506
+ # )
2507
+ # lf.unpivot(Polars::Selectors.numeric, index: "a").collect
2487
2508
  # # =>
2488
2509
  # # shape: (6, 3)
2489
2510
  # # ┌─────┬──────────┬───────┐
@@ -2498,23 +2519,21 @@ module Polars
2498
2519
  # # │ y ┆ c ┆ 4 │
2499
2520
  # # │ z ┆ c ┆ 6 │
2500
2521
  # # └─────┴──────────┴───────┘
2501
- def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
2502
- if value_vars.is_a?(::String)
2503
- value_vars = [value_vars]
2504
- end
2505
- if id_vars.is_a?(::String)
2506
- id_vars = [id_vars]
2507
- end
2508
- if value_vars.nil?
2509
- value_vars = []
2510
- end
2511
- if id_vars.nil?
2512
- id_vars = []
2513
- end
2522
+ def unpivot(
2523
+ on,
2524
+ index: nil,
2525
+ variable_name: nil,
2526
+ value_name: nil,
2527
+ streamable: true
2528
+ )
2529
+ on = on.nil? ? [] : Utils._expand_selectors(self, on)
2530
+ index = index.nil? ? [] : Utils._expand_selectors(self, index)
2531
+
2514
2532
  _from_rbldf(
2515
- _ldf.melt(id_vars, value_vars, value_name, variable_name, streamable)
2533
+ _ldf.unpivot(on, index, value_name, variable_name, streamable)
2516
2534
  )
2517
2535
  end
2536
+ alias_method :melt, :unpivot
2518
2537
 
2519
2538
  # def map
2520
2539
  # end
@@ -2545,7 +2564,7 @@ module Polars
2545
2564
  # # │ 10.0 ┆ null ┆ 9.0 │
2546
2565
  # # └──────┴──────┴──────────┘
2547
2566
  def interpolate
2548
- select(Utils.col("*").interpolate)
2567
+ select(F.col("*").interpolate)
2549
2568
  end
2550
2569
 
2551
2570
  # Decompose a struct into its fields.
@@ -2652,24 +2671,19 @@ module Polars
2652
2671
  #
2653
2672
  # @param column [Object]
2654
2673
  # Columns that are sorted
2655
- # @param more_columns [Object]
2656
- # Additional columns that are sorted, specified as positional arguments.
2657
2674
  # @param descending [Boolean]
2658
2675
  # Whether the columns are sorted in descending order.
2659
2676
  #
2660
2677
  # @return [LazyFrame]
2661
2678
  def set_sorted(
2662
2679
  column,
2663
- *more_columns,
2664
2680
  descending: false
2665
2681
  )
2666
- columns = Utils.selection_to_rbexpr_list(column)
2667
- if more_columns.any?
2668
- columns.concat(Utils.selection_to_rbexpr_list(more_columns))
2682
+ if !Utils.strlike?(column)
2683
+ msg = "expected a 'str' for argument 'column' in 'set_sorted'"
2684
+ raise TypeError, msg
2669
2685
  end
2670
- with_columns(
2671
- columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
2672
- )
2686
+ with_columns(F.col(column).set_sorted(descending: descending))
2673
2687
  end
2674
2688
 
2675
2689
  # TODO
@@ -107,7 +107,7 @@ module Polars
107
107
  # # │ b ┆ 5 ┆ 10.0 │
108
108
  # # └─────┴───────┴────────────────┘
109
109
  def agg(*aggs, **named_aggs)
110
- rbexprs = Utils.parse_as_list_of_expressions(*aggs, **named_aggs)
110
+ rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
111
111
  Utils.wrap_ldf(@lgb.agg(rbexprs))
112
112
  end
113
113
 
@@ -146,7 +146,7 @@ module Polars
146
146
  end
147
147
 
148
148
  if !fraction.nil?
149
- fraction = Utils.parse_as_expression(fraction)
149
+ fraction = Utils.parse_into_expression(fraction)
150
150
  return Utils.wrap_expr(
151
151
  _rbexpr.list_sample_fraction(
152
152
  fraction, with_replacement, shuffle, seed
@@ -155,7 +155,7 @@ module Polars
155
155
  end
156
156
 
157
157
  n = 1 if n.nil?
158
- n = Utils.parse_as_expression(n)
158
+ n = Utils.parse_into_expression(n)
159
159
  Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
160
  end
161
161
 
@@ -387,7 +387,7 @@ module Polars
387
387
  # # │ 1 │
388
388
  # # └──────┘
389
389
  def get(index, null_on_oob: true)
390
- index = Utils.parse_as_expression(index)
390
+ index = Utils.parse_into_expression(index)
391
391
  Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
392
392
  end
393
393
 
@@ -431,7 +431,7 @@ module Polars
431
431
  if index.is_a?(::Array)
432
432
  index = Series.new(index)
433
433
  end
434
- index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
434
+ index = Utils.parse_into_expression(index, str_as_lit: false)
435
435
  Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
436
436
  end
437
437
  alias_method :take, :gather
@@ -502,7 +502,7 @@ module Polars
502
502
  # # │ true │
503
503
  # # └───────┘
504
504
  def contains(item)
505
- Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
505
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
506
506
  end
507
507
 
508
508
  # Join all string items in a sublist and place a separator between them.
@@ -530,7 +530,7 @@ module Polars
530
530
  # # │ x y │
531
531
  # # └───────┘
532
532
  def join(separator, ignore_nulls: true)
533
- separator = Utils.parse_as_expression(separator, str_as_lit: true)
533
+ separator = Utils.parse_into_expression(separator, str_as_lit: true)
534
534
  Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
535
535
  end
536
536
 
@@ -625,7 +625,7 @@ module Polars
625
625
  # # [null, 10, 2]
626
626
  # # ]
627
627
  def shift(n = 1)
628
- n = Utils.parse_as_expression(n)
628
+ n = Utils.parse_into_expression(n)
629
629
  Utils.wrap_expr(_rbexpr.list_shift(n))
630
630
  end
631
631
 
@@ -650,8 +650,8 @@ module Polars
650
650
  # # [2, 1]
651
651
  # # ]
652
652
  def slice(offset, length = nil)
653
- offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
654
- length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
653
+ offset = Utils.parse_into_expression(offset, str_as_lit: false)
654
+ length = Utils.parse_into_expression(length, str_as_lit: false)
655
655
  Utils.wrap_expr(_rbexpr.list_slice(offset, length))
656
656
  end
657
657
 
@@ -694,7 +694,7 @@ module Polars
694
694
  # # [2, 1]
695
695
  # # ]
696
696
  def tail(n = 5)
697
- n = Utils.parse_as_expression(n)
697
+ n = Utils.parse_into_expression(n)
698
698
  Utils.wrap_expr(_rbexpr.list_tail(n))
699
699
  end
700
700
 
@@ -722,7 +722,7 @@ module Polars
722
722
  # # │ 0 │
723
723
  # # └────────────────┘
724
724
  def count_matches(element)
725
- Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
725
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
726
726
  end
727
727
  alias_method :count_match, :count_matches
728
728
 
@@ -197,9 +197,13 @@ module Polars
197
197
  #
198
198
  # @param index [Integer]
199
199
  # Index to return per sublist
200
+ # @param null_on_oob [Boolean]
201
+ # Behavior if an index is out of bounds:
202
+ # true -> set as null
203
+ # false -> raise an error
200
204
  #
201
205
  # @return [Series]
202
- def get(index)
206
+ def get(index, null_on_oob: false)
203
207
  super
204
208
  end
205
209
 
@@ -10,25 +10,23 @@ module Polars
10
10
  period,
11
11
  offset,
12
12
  closed,
13
- by,
14
- check_sorted
13
+ group_by
15
14
  )
16
- period = Utils._timedelta_to_pl_duration(period)
17
- offset = Utils._timedelta_to_pl_duration(offset)
15
+ period = Utils.parse_as_duration_string(period)
16
+ offset = Utils.parse_as_duration_string(offset)
18
17
 
19
18
  @df = df
20
19
  @time_column = index_column
21
20
  @period = period
22
21
  @offset = offset
23
22
  @closed = closed
24
- @by = by
25
- @check_sorted = check_sorted
23
+ @group_by = group_by
26
24
  end
27
25
 
28
26
  def agg(*aggs, **named_aggs)
29
27
  @df.lazy
30
28
  .group_by_rolling(
31
- index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
32
30
  )
33
31
  .agg(*aggs, **named_aggs)
34
32
  .collect(no_optimization: true, string_cache: false)