polars-df 0.11.0-aarch64-linux → 0.12.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/LICENSE-THIRD-PARTY.txt +1065 -878
  5. data/lib/polars/3.1/polars.so +0 -0
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +3 -3
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +5 -4
  35. data/lib/polars/lazy_frame.rb +120 -106
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +105 -189
  41. data/lib/polars/string_expr.rb +42 -67
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
@@ -63,7 +63,7 @@ module Polars
63
63
  # df.columns
64
64
  # # => ["foo", "bar"]
65
65
  def columns
66
- _ldf.columns
66
+ _ldf.collect_schema.keys
67
67
  end
68
68
 
69
69
  # Get dtypes of columns in LazyFrame.
@@ -81,7 +81,7 @@ module Polars
81
81
  # lf.dtypes
82
82
  # # => [Polars::Int64, Polars::Float64, Polars::String]
83
83
  def dtypes
84
- _ldf.dtypes
84
+ _ldf.collect_schema.values
85
85
  end
86
86
 
87
87
  # Get the schema.
@@ -99,7 +99,7 @@ module Polars
99
99
  # lf.schema
100
100
  # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
101
101
  def schema
102
- _ldf.schema
102
+ _ldf.collect_schema
103
103
  end
104
104
 
105
105
  # Get the width of the LazyFrame.
@@ -111,7 +111,7 @@ module Polars
111
111
  # lf.width
112
112
  # # => 2
113
113
  def width
114
- _ldf.width
114
+ _ldf.collect_schema.length
115
115
  end
116
116
 
117
117
  # Check if LazyFrame includes key.
@@ -261,16 +261,23 @@ module Polars
261
261
  # # │ 2 ┆ 7.0 ┆ b │
262
262
  # # │ 1 ┆ 6.0 ┆ a │
263
263
  # # └─────┴─────┴─────┘
264
- def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
265
- if by.is_a?(::String)
266
- return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
267
- end
268
- if Utils.bool?(reverse)
269
- reverse = [reverse]
264
+ def sort(by, *more_by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
265
+ if by.is_a?(::String) && more_by.empty?
266
+ return _from_rbldf(
267
+ _ldf.sort(
268
+ by, reverse, nulls_last, maintain_order, multithreaded
269
+ )
270
+ )
270
271
  end
271
272
 
272
- by = Utils.selection_to_rbexpr_list(by)
273
- _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
273
+ by = Utils.parse_into_list_of_expressions(by, *more_by)
274
+ reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
275
+ nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
276
+ _from_rbldf(
277
+ _ldf.sort_by_exprs(
278
+ by, reverse, nulls_last, maintain_order, multithreaded
279
+ )
280
+ )
274
281
  end
275
282
 
276
283
  # def profile
@@ -415,7 +422,7 @@ module Polars
415
422
  path,
416
423
  compression: "zstd",
417
424
  compression_level: nil,
418
- statistics: false,
425
+ statistics: true,
419
426
  row_group_size: nil,
420
427
  data_pagesize_limit: nil,
421
428
  maintain_order: true,
@@ -435,6 +442,24 @@ module Polars
435
442
  no_optimization: no_optimization
436
443
  )
437
444
 
445
+ if statistics == true
446
+ statistics = {
447
+ min: true,
448
+ max: true,
449
+ distinct_count: false,
450
+ null_count: true
451
+ }
452
+ elsif statistics == false
453
+ statistics = {}
454
+ elsif statistics == "full"
455
+ statistics = {
456
+ min: true,
457
+ max: true,
458
+ distinct_count: true,
459
+ null_count: true
460
+ }
461
+ end
462
+
438
463
  lf.sink_parquet(
439
464
  path,
440
465
  compression,
@@ -589,6 +614,7 @@ module Polars
589
614
  datetime_format: nil,
590
615
  date_format: nil,
591
616
  time_format: nil,
617
+ float_scientific: nil,
592
618
  float_precision: nil,
593
619
  null_value: nil,
594
620
  quote_style: nil,
@@ -623,6 +649,7 @@ module Polars
623
649
  datetime_format,
624
650
  date_format,
625
651
  time_format,
652
+ float_scientific,
626
653
  float_precision,
627
654
  null_value,
628
655
  quote_style,
@@ -907,7 +934,7 @@ module Polars
907
934
  def filter(predicate)
908
935
  _from_rbldf(
909
936
  _ldf.filter(
910
- Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
937
+ Utils.parse_into_expression(predicate, str_as_lit: false)
911
938
  )
912
939
  )
913
940
  end
@@ -1003,7 +1030,7 @@ module Polars
1003
1030
  def select(*exprs, **named_exprs)
1004
1031
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1005
1032
 
1006
- rbexprs = Utils.parse_as_list_of_expressions(
1033
+ rbexprs = Utils.parse_into_list_of_expressions(
1007
1034
  *exprs, **named_exprs, __structify: structify
1008
1035
  )
1009
1036
  _from_rbldf(_ldf.select(rbexprs))
@@ -1011,12 +1038,14 @@ module Polars
1011
1038
 
1012
1039
  # Start a group by operation.
1013
1040
  #
1014
- # @param by [Object]
1041
+ # @param by [Array]
1015
1042
  # Column(s) to group by.
1016
1043
  # @param maintain_order [Boolean]
1017
1044
  # Make sure that the order of the groups remain consistent. This is more
1018
1045
  # expensive than a default group by.
1019
- #
1046
+ # @param named_by [Hash]
1047
+ # Additional columns to group by, specified as keyword arguments.
1048
+ # The columns will be renamed to the keyword used.
1020
1049
  # @return [LazyGroupBy]
1021
1050
  #
1022
1051
  # @example
@@ -1039,9 +1068,9 @@ module Polars
1039
1068
  # # │ b ┆ 11 │
1040
1069
  # # │ c ┆ 6 │
1041
1070
  # # └─────┴─────┘
1042
- def group_by(by, maintain_order: false)
1043
- rbexprs_by = Utils.selection_to_rbexpr_list(by)
1044
- lgb = _ldf.group_by(rbexprs_by, maintain_order)
1071
+ def group_by(*by, maintain_order: false, **named_by)
1072
+ exprs = Utils.parse_into_list_of_expressions(*by, **named_by)
1073
+ lgb = _ldf.group_by(exprs, maintain_order)
1045
1074
  LazyGroupBy.new(lgb)
1046
1075
  end
1047
1076
  alias_method :groupby, :group_by
@@ -1095,12 +1124,6 @@ module Polars
1095
1124
  # Define whether the temporal window interval is closed or not.
1096
1125
  # @param by [Object]
1097
1126
  # Also group by this column/these columns.
1098
- # @param check_sorted [Boolean]
1099
- # When the `by` argument is given, polars can not check sortedness
1100
- # by the metadata and has to do a full scan on the index column to
1101
- # verify data is sorted. This is expensive. If you are sure the
1102
- # data within the by groups is sorted, you can set this to `false`.
1103
- # Doing so incorrectly will lead to incorrect output
1104
1127
  #
1105
1128
  # @return [LazyFrame]
1106
1129
  #
@@ -1142,21 +1165,20 @@ module Polars
1142
1165
  period:,
1143
1166
  offset: nil,
1144
1167
  closed: "right",
1145
- by: nil,
1146
- check_sorted: true
1168
+ by: nil
1147
1169
  )
1148
- index_column = Utils.parse_as_expression(index_column)
1170
+ index_column = Utils.parse_into_expression(index_column)
1149
1171
  if offset.nil?
1150
- offset = "-#{period}"
1172
+ offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
1151
1173
  end
1152
1174
 
1153
- rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1154
- period = Utils._timedelta_to_pl_duration(period)
1155
- offset = Utils._timedelta_to_pl_duration(offset)
1156
-
1157
- lgb = _ldf.rolling(
1158
- index_column, period, offset, closed, rbexprs_by, check_sorted
1175
+ rbexprs_by = (
1176
+ !by.nil? ? Utils.parse_into_list_of_expressions(by) : []
1159
1177
  )
1178
+ period = Utils.parse_as_duration_string(period)
1179
+ offset = Utils.parse_as_duration_string(offset)
1180
+
1181
+ lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
1160
1182
  LazyGroupBy.new(lgb)
1161
1183
  end
1162
1184
  alias_method :group_by_rolling, :rolling
@@ -1224,22 +1246,18 @@ module Polars
1224
1246
  # Define whether the temporal window interval is closed or not.
1225
1247
  # @param by [Object]
1226
1248
  # Also group by this column/these columns
1227
- # @param check_sorted [Boolean]
1228
- # When the `by` argument is given, polars can not check sortedness
1229
- # by the metadata and has to do a full scan on the index column to
1230
- # verify data is sorted. This is expensive. If you are sure the
1231
- # data within the by groups is sorted, you can set this to `false`.
1232
- # Doing so incorrectly will lead to incorrect output.
1233
1249
  #
1234
1250
  # @return [DataFrame]
1235
1251
  #
1236
1252
  # @example
1237
1253
  # df = Polars::DataFrame.new(
1238
1254
  # {
1239
- # "time" => Polars.date_range(
1255
+ # "time" => Polars.datetime_range(
1240
1256
  # DateTime.new(2021, 12, 16),
1241
1257
  # DateTime.new(2021, 12, 16, 3),
1242
- # "30m"
1258
+ # "30m",
1259
+ # time_unit: "us",
1260
+ # eager: true
1243
1261
  # ),
1244
1262
  # "n" => 0..6
1245
1263
  # }
@@ -1338,10 +1356,12 @@ module Polars
1338
1356
  # @example Dynamic group bys can also be combined with grouping on normal keys.
1339
1357
  # df = Polars::DataFrame.new(
1340
1358
  # {
1341
- # "time" => Polars.date_range(
1359
+ # "time" => Polars.datetime_range(
1342
1360
  # DateTime.new(2021, 12, 16),
1343
1361
  # DateTime.new(2021, 12, 16, 3),
1344
- # "30m"
1362
+ # "30m",
1363
+ # time_unit: "us",
1364
+ # eager: true
1345
1365
  # ),
1346
1366
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
1347
1367
  # }
@@ -1405,14 +1425,13 @@ module Polars
1405
1425
  closed: "left",
1406
1426
  label: "left",
1407
1427
  by: nil,
1408
- start_by: "window",
1409
- check_sorted: true
1428
+ start_by: "window"
1410
1429
  )
1411
1430
  if !truncate.nil?
1412
1431
  label = truncate ? "left" : "datapoint"
1413
1432
  end
1414
1433
 
1415
- index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
1434
+ index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
1416
1435
  if offset.nil?
1417
1436
  offset = period.nil? ? "-#{every}" : "0ns"
1418
1437
  end
@@ -1421,13 +1440,13 @@ module Polars
1421
1440
  period = every
1422
1441
  end
1423
1442
 
1424
- period = Utils._timedelta_to_pl_duration(period)
1425
- offset = Utils._timedelta_to_pl_duration(offset)
1426
- every = Utils._timedelta_to_pl_duration(every)
1443
+ period = Utils.parse_as_duration_string(period)
1444
+ offset = Utils.parse_as_duration_string(offset)
1445
+ every = Utils.parse_as_duration_string(every)
1427
1446
 
1428
- rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1447
+ rbexprs_by = by.nil? ? [] : Utils.parse_into_list_of_expressions(by)
1429
1448
  lgb = _ldf.group_by_dynamic(
1430
- index_column._rbexpr,
1449
+ index_column,
1431
1450
  every,
1432
1451
  period,
1433
1452
  offset,
@@ -1435,8 +1454,7 @@ module Polars
1435
1454
  include_boundaries,
1436
1455
  closed,
1437
1456
  rbexprs_by,
1438
- start_by,
1439
- check_sorted
1457
+ start_by
1440
1458
  )
1441
1459
  LazyGroupBy.new(lgb)
1442
1460
  end
@@ -1587,7 +1605,7 @@ module Polars
1587
1605
  # @param on Object
1588
1606
  # Join column of both DataFrames. If set, `left_on` and `right_on` should be
1589
1607
  # None.
1590
- # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
1608
+ # @param how ["inner", "left", "full", "semi", "anti", "cross"]
1591
1609
  # Join strategy.
1592
1610
  # @param suffix [String]
1593
1611
  # Suffix to append to columns with a duplicate name.
@@ -1629,7 +1647,7 @@ module Polars
1629
1647
  # # └─────┴─────┴─────┴───────┘
1630
1648
  #
1631
1649
  # @example
1632
- # df.join(other_df, on: "ham", how: "outer").collect
1650
+ # df.join(other_df, on: "ham", how: "full").collect
1633
1651
  # # =>
1634
1652
  # # shape: (4, 5)
1635
1653
  # # ┌──────┬──────┬──────┬───────┬───────────┐
@@ -1696,7 +1714,9 @@ module Polars
1696
1714
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
1697
1715
  end
1698
1716
 
1699
- if how == "cross"
1717
+ if how == "outer"
1718
+ how = "full"
1719
+ elsif how == "cross"
1700
1720
  return _from_rbldf(
1701
1721
  _ldf.join(
1702
1722
  other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
@@ -1705,12 +1725,12 @@ module Polars
1705
1725
  end
1706
1726
 
1707
1727
  if !on.nil?
1708
- rbexprs = Utils.selection_to_rbexpr_list(on)
1728
+ rbexprs = Utils.parse_into_list_of_expressions(on)
1709
1729
  rbexprs_left = rbexprs
1710
1730
  rbexprs_right = rbexprs
1711
1731
  elsif !left_on.nil? && !right_on.nil?
1712
- rbexprs_left = Utils.selection_to_rbexpr_list(left_on)
1713
- rbexprs_right = Utils.selection_to_rbexpr_list(right_on)
1732
+ rbexprs_left = Utils.parse_into_list_of_expressions(left_on)
1733
+ rbexprs_right = Utils.parse_into_list_of_expressions(right_on)
1714
1734
  else
1715
1735
  raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
1716
1736
  end
@@ -1765,7 +1785,8 @@ module Polars
1765
1785
  # # └─────┴──────┴───────┴─────┴──────┴───────┘
1766
1786
  def with_columns(*exprs, **named_exprs)
1767
1787
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1768
- rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
1788
+
1789
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs, __structify: structify)
1769
1790
 
1770
1791
  _from_rbldf(_ldf.with_columns(rbexprs))
1771
1792
  end
@@ -1926,9 +1947,9 @@ module Polars
1926
1947
  # # └──────┴──────┘
1927
1948
  def shift(n, fill_value: nil)
1928
1949
  if !fill_value.nil?
1929
- fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1950
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
1930
1951
  end
1931
- n = Utils.parse_as_expression(n)
1952
+ n = Utils.parse_into_expression(n)
1932
1953
  _from_rbldf(_ldf.shift(n, fill_value))
1933
1954
  end
1934
1955
 
@@ -2125,7 +2146,7 @@ module Polars
2125
2146
  # # │ 3 ┆ 7 │
2126
2147
  # # └─────┴─────┘
2127
2148
  def take_every(n)
2128
- select(Utils.col("*").take_every(n))
2149
+ select(F.col("*").take_every(n))
2129
2150
  end
2130
2151
 
2131
2152
  # Fill null values using the specified value or strategy.
@@ -2168,7 +2189,7 @@ module Polars
2168
2189
  # # └──────┴──────┘
2169
2190
  def fill_nan(fill_value)
2170
2191
  if !fill_value.is_a?(Expr)
2171
- fill_value = Utils.lit(fill_value)
2192
+ fill_value = F.lit(fill_value)
2172
2193
  end
2173
2194
  _from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
2174
2195
  end
@@ -2359,8 +2380,8 @@ module Polars
2359
2380
  # # │ 3.0 ┆ 1.0 │
2360
2381
  # # └─────┴─────┘
2361
2382
  def quantile(quantile, interpolation: "nearest")
2362
- quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2363
- _from_rbldf(_ldf.quantile(quantile._rbexpr, interpolation))
2383
+ quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
2384
+ _from_rbldf(_ldf.quantile(quantile, interpolation))
2364
2385
  end
2365
2386
 
2366
2387
  # Explode lists to long format.
@@ -2392,7 +2413,7 @@ module Polars
2392
2413
  # # │ c ┆ 8 │
2393
2414
  # # └─────────┴─────────┘
2394
2415
  def explode(columns)
2395
- columns = Utils.selection_to_rbexpr_list(columns)
2416
+ columns = Utils.parse_into_list_of_expressions(columns)
2396
2417
  _from_rbldf(_ldf.explode(columns))
2397
2418
  end
2398
2419
 
@@ -2455,35 +2476,35 @@ module Polars
2455
2476
  # Optionally leaves identifiers set.
2456
2477
  #
2457
2478
  # This function is useful to massage a DataFrame into a format where one or more
2458
- # columns are identifier variables (id_vars), while all other columns, considered
2459
- # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
2479
+ # columns are identifier variables (index) while all other columns, considered
2480
+ # measured variables (on), are "unpivoted" to the row axis leaving just
2460
2481
  # two non-identifier columns, 'variable' and 'value'.
2461
2482
  #
2462
- # @param id_vars [Object]
2463
- # Columns to use as identifier variables.
2464
- # @param value_vars [Object]
2465
- # Values to use as identifier variables.
2466
- # If `value_vars` is empty all columns that are not in `id_vars` will be used.
2483
+ # @param on [Object]
2484
+ # Column(s) or selector(s) to use as values variables; if `on`
2485
+ # is empty all columns that are not in `index` will be used.
2486
+ # @param index [Object]
2487
+ # Column(s) or selector(s) to use as identifier variables.
2467
2488
  # @param variable_name [String]
2468
- # Name to give to the `value` column. Defaults to "variable"
2489
+ # Name to give to the `variable` column. Defaults to "variable"
2469
2490
  # @param value_name [String]
2470
2491
  # Name to give to the `value` column. Defaults to "value"
2471
2492
  # @param streamable [Boolean]
2472
2493
  # Allow this node to run in the streaming engine.
2473
- # If this runs in streaming, the output of the melt operation
2494
+ # If this runs in streaming, the output of the unpivot operation
2474
2495
  # will not have a stable ordering.
2475
2496
  #
2476
2497
  # @return [LazyFrame]
2477
2498
  #
2478
2499
  # @example
2479
- # df = Polars::DataFrame.new(
2500
+ # lf = Polars::LazyFrame.new(
2480
2501
  # {
2481
2502
  # "a" => ["x", "y", "z"],
2482
2503
  # "b" => [1, 3, 5],
2483
2504
  # "c" => [2, 4, 6]
2484
2505
  # }
2485
- # ).lazy
2486
- # df.melt(id_vars: "a", value_vars: ["b", "c"]).collect
2506
+ # )
2507
+ # lf.unpivot(Polars::Selectors.numeric, index: "a").collect
2487
2508
  # # =>
2488
2509
  # # shape: (6, 3)
2489
2510
  # # ┌─────┬──────────┬───────┐
@@ -2498,23 +2519,21 @@ module Polars
2498
2519
  # # │ y ┆ c ┆ 4 │
2499
2520
  # # │ z ┆ c ┆ 6 │
2500
2521
  # # └─────┴──────────┴───────┘
2501
- def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
2502
- if value_vars.is_a?(::String)
2503
- value_vars = [value_vars]
2504
- end
2505
- if id_vars.is_a?(::String)
2506
- id_vars = [id_vars]
2507
- end
2508
- if value_vars.nil?
2509
- value_vars = []
2510
- end
2511
- if id_vars.nil?
2512
- id_vars = []
2513
- end
2522
+ def unpivot(
2523
+ on,
2524
+ index: nil,
2525
+ variable_name: nil,
2526
+ value_name: nil,
2527
+ streamable: true
2528
+ )
2529
+ on = on.nil? ? [] : Utils._expand_selectors(self, on)
2530
+ index = index.nil? ? [] : Utils._expand_selectors(self, index)
2531
+
2514
2532
  _from_rbldf(
2515
- _ldf.melt(id_vars, value_vars, value_name, variable_name, streamable)
2533
+ _ldf.unpivot(on, index, value_name, variable_name, streamable)
2516
2534
  )
2517
2535
  end
2536
+ alias_method :melt, :unpivot
2518
2537
 
2519
2538
  # def map
2520
2539
  # end
@@ -2545,7 +2564,7 @@ module Polars
2545
2564
  # # │ 10.0 ┆ null ┆ 9.0 │
2546
2565
  # # └──────┴──────┴──────────┘
2547
2566
  def interpolate
2548
- select(Utils.col("*").interpolate)
2567
+ select(F.col("*").interpolate)
2549
2568
  end
2550
2569
 
2551
2570
  # Decompose a struct into its fields.
@@ -2652,24 +2671,19 @@ module Polars
2652
2671
  #
2653
2672
  # @param column [Object]
2654
2673
  # Columns that are sorted
2655
- # @param more_columns [Object]
2656
- # Additional columns that are sorted, specified as positional arguments.
2657
2674
  # @param descending [Boolean]
2658
2675
  # Whether the columns are sorted in descending order.
2659
2676
  #
2660
2677
  # @return [LazyFrame]
2661
2678
  def set_sorted(
2662
2679
  column,
2663
- *more_columns,
2664
2680
  descending: false
2665
2681
  )
2666
- columns = Utils.selection_to_rbexpr_list(column)
2667
- if more_columns.any?
2668
- columns.concat(Utils.selection_to_rbexpr_list(more_columns))
2682
+ if !Utils.strlike?(column)
2683
+ msg = "expected a 'str' for argument 'column' in 'set_sorted'"
2684
+ raise TypeError, msg
2669
2685
  end
2670
- with_columns(
2671
- columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
2672
- )
2686
+ with_columns(F.col(column).set_sorted(descending: descending))
2673
2687
  end
2674
2688
 
2675
2689
  # TODO
@@ -107,7 +107,7 @@ module Polars
107
107
  # # │ b ┆ 5 ┆ 10.0 │
108
108
  # # └─────┴───────┴────────────────┘
109
109
  def agg(*aggs, **named_aggs)
110
- rbexprs = Utils.parse_as_list_of_expressions(*aggs, **named_aggs)
110
+ rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
111
111
  Utils.wrap_ldf(@lgb.agg(rbexprs))
112
112
  end
113
113
 
@@ -146,7 +146,7 @@ module Polars
146
146
  end
147
147
 
148
148
  if !fraction.nil?
149
- fraction = Utils.parse_as_expression(fraction)
149
+ fraction = Utils.parse_into_expression(fraction)
150
150
  return Utils.wrap_expr(
151
151
  _rbexpr.list_sample_fraction(
152
152
  fraction, with_replacement, shuffle, seed
@@ -155,7 +155,7 @@ module Polars
155
155
  end
156
156
 
157
157
  n = 1 if n.nil?
158
- n = Utils.parse_as_expression(n)
158
+ n = Utils.parse_into_expression(n)
159
159
  Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
160
  end
161
161
 
@@ -387,7 +387,7 @@ module Polars
387
387
  # # │ 1 │
388
388
  # # └──────┘
389
389
  def get(index, null_on_oob: true)
390
- index = Utils.parse_as_expression(index)
390
+ index = Utils.parse_into_expression(index)
391
391
  Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
392
392
  end
393
393
 
@@ -431,7 +431,7 @@ module Polars
431
431
  if index.is_a?(::Array)
432
432
  index = Series.new(index)
433
433
  end
434
- index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
434
+ index = Utils.parse_into_expression(index, str_as_lit: false)
435
435
  Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
436
436
  end
437
437
  alias_method :take, :gather
@@ -502,7 +502,7 @@ module Polars
502
502
  # # │ true │
503
503
  # # └───────┘
504
504
  def contains(item)
505
- Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
505
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
506
506
  end
507
507
 
508
508
  # Join all string items in a sublist and place a separator between them.
@@ -530,7 +530,7 @@ module Polars
530
530
  # # │ x y │
531
531
  # # └───────┘
532
532
  def join(separator, ignore_nulls: true)
533
- separator = Utils.parse_as_expression(separator, str_as_lit: true)
533
+ separator = Utils.parse_into_expression(separator, str_as_lit: true)
534
534
  Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
535
535
  end
536
536
 
@@ -625,7 +625,7 @@ module Polars
625
625
  # # [null, 10, 2]
626
626
  # # ]
627
627
  def shift(n = 1)
628
- n = Utils.parse_as_expression(n)
628
+ n = Utils.parse_into_expression(n)
629
629
  Utils.wrap_expr(_rbexpr.list_shift(n))
630
630
  end
631
631
 
@@ -650,8 +650,8 @@ module Polars
650
650
  # # [2, 1]
651
651
  # # ]
652
652
  def slice(offset, length = nil)
653
- offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
654
- length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
653
+ offset = Utils.parse_into_expression(offset, str_as_lit: false)
654
+ length = Utils.parse_into_expression(length, str_as_lit: false)
655
655
  Utils.wrap_expr(_rbexpr.list_slice(offset, length))
656
656
  end
657
657
 
@@ -694,7 +694,7 @@ module Polars
694
694
  # # [2, 1]
695
695
  # # ]
696
696
  def tail(n = 5)
697
- n = Utils.parse_as_expression(n)
697
+ n = Utils.parse_into_expression(n)
698
698
  Utils.wrap_expr(_rbexpr.list_tail(n))
699
699
  end
700
700
 
@@ -722,7 +722,7 @@ module Polars
722
722
  # # │ 0 │
723
723
  # # └────────────────┘
724
724
  def count_matches(element)
725
- Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
725
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
726
726
  end
727
727
  alias_method :count_match, :count_matches
728
728
 
@@ -197,9 +197,13 @@ module Polars
197
197
  #
198
198
  # @param index [Integer]
199
199
  # Index to return per sublist
200
+ # @param null_on_oob [Boolean]
201
+ # Behavior if an index is out of bounds:
202
+ # true -> set as null
203
+ # false -> raise an error
200
204
  #
201
205
  # @return [Series]
202
- def get(index)
206
+ def get(index, null_on_oob: false)
203
207
  super
204
208
  end
205
209
 
@@ -10,25 +10,23 @@ module Polars
10
10
  period,
11
11
  offset,
12
12
  closed,
13
- by,
14
- check_sorted
13
+ group_by
15
14
  )
16
- period = Utils._timedelta_to_pl_duration(period)
17
- offset = Utils._timedelta_to_pl_duration(offset)
15
+ period = Utils.parse_as_duration_string(period)
16
+ offset = Utils.parse_as_duration_string(offset)
18
17
 
19
18
  @df = df
20
19
  @time_column = index_column
21
20
  @period = period
22
21
  @offset = offset
23
22
  @closed = closed
24
- @by = by
25
- @check_sorted = check_sorted
23
+ @group_by = group_by
26
24
  end
27
25
 
28
26
  def agg(*aggs, **named_aggs)
29
27
  @df.lazy
30
28
  .group_by_rolling(
31
- index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
32
30
  )
33
31
  .agg(*aggs, **named_aggs)
34
32
  .collect(no_optimization: true, string_cache: false)