polars-df 0.10.0-x86_64-darwin → 0.12.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/LICENSE-THIRD-PARTY.txt +1127 -867
  5. data/README.md +6 -6
  6. data/lib/polars/3.1/polars.bundle +0 -0
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/array_expr.rb +4 -4
  10. data/lib/polars/batched_csv_reader.rb +11 -5
  11. data/lib/polars/cat_expr.rb +0 -36
  12. data/lib/polars/cat_name_space.rb +0 -37
  13. data/lib/polars/convert.rb +6 -1
  14. data/lib/polars/data_frame.rb +176 -403
  15. data/lib/polars/data_types.rb +1 -1
  16. data/lib/polars/date_time_expr.rb +525 -572
  17. data/lib/polars/date_time_name_space.rb +263 -460
  18. data/lib/polars/dynamic_group_by.rb +5 -5
  19. data/lib/polars/exceptions.rb +7 -0
  20. data/lib/polars/expr.rb +1394 -243
  21. data/lib/polars/expr_dispatch.rb +1 -1
  22. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  23. data/lib/polars/functions/as_datatype.rb +63 -40
  24. data/lib/polars/functions/lazy.rb +63 -14
  25. data/lib/polars/functions/lit.rb +1 -1
  26. data/lib/polars/functions/range/date_range.rb +90 -57
  27. data/lib/polars/functions/range/datetime_range.rb +149 -0
  28. data/lib/polars/functions/range/int_range.rb +2 -2
  29. data/lib/polars/functions/range/time_range.rb +141 -0
  30. data/lib/polars/functions/repeat.rb +1 -1
  31. data/lib/polars/functions/whenthen.rb +1 -1
  32. data/lib/polars/group_by.rb +88 -23
  33. data/lib/polars/io/avro.rb +24 -0
  34. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  35. data/lib/polars/io/database.rb +73 -0
  36. data/lib/polars/io/ipc.rb +247 -0
  37. data/lib/polars/io/json.rb +29 -0
  38. data/lib/polars/io/ndjson.rb +80 -0
  39. data/lib/polars/io/parquet.rb +227 -0
  40. data/lib/polars/lazy_frame.rb +143 -272
  41. data/lib/polars/lazy_group_by.rb +100 -3
  42. data/lib/polars/list_expr.rb +11 -11
  43. data/lib/polars/list_name_space.rb +5 -1
  44. data/lib/polars/rolling_group_by.rb +7 -9
  45. data/lib/polars/series.rb +103 -187
  46. data/lib/polars/string_expr.rb +78 -102
  47. data/lib/polars/string_name_space.rb +5 -4
  48. data/lib/polars/testing.rb +2 -2
  49. data/lib/polars/utils/constants.rb +9 -0
  50. data/lib/polars/utils/convert.rb +97 -0
  51. data/lib/polars/utils/parse.rb +89 -0
  52. data/lib/polars/utils/various.rb +76 -0
  53. data/lib/polars/utils/wrap.rb +19 -0
  54. data/lib/polars/utils.rb +8 -300
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars/whenthen.rb +6 -6
  57. data/lib/polars.rb +20 -1
  58. metadata +17 -4
@@ -27,149 +27,6 @@ module Polars
27
27
  ldf
28
28
  end
29
29
 
30
- # @private
31
- def self._scan_csv(
32
- file,
33
- has_header: true,
34
- sep: ",",
35
- comment_char: nil,
36
- quote_char: '"',
37
- skip_rows: 0,
38
- dtypes: nil,
39
- null_values: nil,
40
- ignore_errors: false,
41
- cache: true,
42
- with_column_names: nil,
43
- infer_schema_length: 100,
44
- n_rows: nil,
45
- encoding: "utf8",
46
- low_memory: false,
47
- rechunk: true,
48
- skip_rows_after_header: 0,
49
- row_count_name: nil,
50
- row_count_offset: 0,
51
- parse_dates: false,
52
- eol_char: "\n",
53
- truncate_ragged_lines: true
54
- )
55
- dtype_list = nil
56
- if !dtypes.nil?
57
- dtype_list = []
58
- dtypes.each do |k, v|
59
- dtype_list << [k, Utils.rb_type_to_dtype(v)]
60
- end
61
- end
62
- processed_null_values = Utils._process_null_values(null_values)
63
-
64
- _from_rbldf(
65
- RbLazyFrame.new_from_csv(
66
- file,
67
- sep,
68
- has_header,
69
- ignore_errors,
70
- skip_rows,
71
- n_rows,
72
- cache,
73
- dtype_list,
74
- low_memory,
75
- comment_char,
76
- quote_char,
77
- processed_null_values,
78
- infer_schema_length,
79
- with_column_names,
80
- rechunk,
81
- skip_rows_after_header,
82
- encoding,
83
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
84
- parse_dates,
85
- eol_char,
86
- truncate_ragged_lines
87
- )
88
- )
89
- end
90
-
91
- # @private
92
- def self._scan_parquet(
93
- file,
94
- n_rows: nil,
95
- cache: true,
96
- parallel: "auto",
97
- rechunk: true,
98
- row_count_name: nil,
99
- row_count_offset: 0,
100
- storage_options: nil,
101
- low_memory: false,
102
- use_statistics: true,
103
- hive_partitioning: true
104
- )
105
- _from_rbldf(
106
- RbLazyFrame.new_from_parquet(
107
- file,
108
- [],
109
- n_rows,
110
- cache,
111
- parallel,
112
- rechunk,
113
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
114
- low_memory,
115
- use_statistics,
116
- hive_partitioning,
117
- nil
118
- )
119
- )
120
- end
121
-
122
- # @private
123
- def self._scan_ipc(
124
- file,
125
- n_rows: nil,
126
- cache: true,
127
- rechunk: true,
128
- row_count_name: nil,
129
- row_count_offset: 0,
130
- storage_options: nil,
131
- memory_map: true
132
- )
133
- if Utils.pathlike?(file)
134
- file = Utils.normalise_filepath(file)
135
- end
136
-
137
- _from_rbldf(
138
- RbLazyFrame.new_from_ipc(
139
- file,
140
- n_rows,
141
- cache,
142
- rechunk,
143
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
144
- memory_map
145
- )
146
- )
147
- end
148
-
149
- # @private
150
- def self._scan_ndjson(
151
- file,
152
- infer_schema_length: nil,
153
- batch_size: nil,
154
- n_rows: nil,
155
- low_memory: false,
156
- rechunk: true,
157
- row_count_name: nil,
158
- row_count_offset: 0
159
- )
160
- _from_rbldf(
161
- RbLazyFrame.new_from_ndjson(
162
- file,
163
- infer_schema_length,
164
- batch_size,
165
- n_rows,
166
- low_memory,
167
- rechunk,
168
- Utils._prepare_row_count_args(row_count_name, row_count_offset)
169
- )
170
- )
171
- end
172
-
173
30
  # def self.from_json
174
31
  # end
175
32
 
@@ -181,7 +38,7 @@ module Polars
181
38
  # @return [LazyFrame]
182
39
  def self.read_json(file)
183
40
  if Utils.pathlike?(file)
184
- file = Utils.normalise_filepath(file)
41
+ file = Utils.normalize_filepath(file)
185
42
  end
186
43
 
187
44
  Utils.wrap_ldf(RbLazyFrame.read_json(file))
@@ -206,7 +63,7 @@ module Polars
206
63
  # df.columns
207
64
  # # => ["foo", "bar"]
208
65
  def columns
209
- _ldf.columns
66
+ _ldf.collect_schema.keys
210
67
  end
211
68
 
212
69
  # Get dtypes of columns in LazyFrame.
@@ -224,7 +81,7 @@ module Polars
224
81
  # lf.dtypes
225
82
  # # => [Polars::Int64, Polars::Float64, Polars::String]
226
83
  def dtypes
227
- _ldf.dtypes
84
+ _ldf.collect_schema.values
228
85
  end
229
86
 
230
87
  # Get the schema.
@@ -242,7 +99,7 @@ module Polars
242
99
  # lf.schema
243
100
  # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
244
101
  def schema
245
- _ldf.schema
102
+ _ldf.collect_schema
246
103
  end
247
104
 
248
105
  # Get the width of the LazyFrame.
@@ -254,7 +111,7 @@ module Polars
254
111
  # lf.width
255
112
  # # => 2
256
113
  def width
257
- _ldf.width
114
+ _ldf.collect_schema.length
258
115
  end
259
116
 
260
117
  # Check if LazyFrame includes key.
@@ -288,7 +145,7 @@ module Polars
288
145
  # @return [nil]
289
146
  def write_json(file)
290
147
  if Utils.pathlike?(file)
291
- file = Utils.normalise_filepath(file)
148
+ file = Utils.normalize_filepath(file)
292
149
  end
293
150
  _ldf.write_json(file)
294
151
  nil
@@ -404,16 +261,23 @@ module Polars
404
261
  # # │ 2 ┆ 7.0 ┆ b │
405
262
  # # │ 1 ┆ 6.0 ┆ a │
406
263
  # # └─────┴─────┴─────┘
407
- def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
408
- if by.is_a?(::String)
409
- return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
410
- end
411
- if Utils.bool?(reverse)
412
- reverse = [reverse]
264
+ def sort(by, *more_by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
265
+ if by.is_a?(::String) && more_by.empty?
266
+ return _from_rbldf(
267
+ _ldf.sort(
268
+ by, reverse, nulls_last, maintain_order, multithreaded
269
+ )
270
+ )
413
271
  end
414
272
 
415
- by = Utils.selection_to_rbexpr_list(by)
416
- _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
273
+ by = Utils.parse_into_list_of_expressions(by, *more_by)
274
+ reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
275
+ nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
276
+ _from_rbldf(
277
+ _ldf.sort_by_exprs(
278
+ by, reverse, nulls_last, maintain_order, multithreaded
279
+ )
280
+ )
417
281
  end
418
282
 
419
283
  # def profile
@@ -558,7 +422,7 @@ module Polars
558
422
  path,
559
423
  compression: "zstd",
560
424
  compression_level: nil,
561
- statistics: false,
425
+ statistics: true,
562
426
  row_group_size: nil,
563
427
  data_pagesize_limit: nil,
564
428
  maintain_order: true,
@@ -578,6 +442,24 @@ module Polars
578
442
  no_optimization: no_optimization
579
443
  )
580
444
 
445
+ if statistics == true
446
+ statistics = {
447
+ min: true,
448
+ max: true,
449
+ distinct_count: false,
450
+ null_count: true
451
+ }
452
+ elsif statistics == false
453
+ statistics = {}
454
+ elsif statistics == "full"
455
+ statistics = {
456
+ min: true,
457
+ max: true,
458
+ distinct_count: true,
459
+ null_count: true
460
+ }
461
+ end
462
+
581
463
  lf.sink_parquet(
582
464
  path,
583
465
  compression,
@@ -732,6 +614,7 @@ module Polars
732
614
  datetime_format: nil,
733
615
  date_format: nil,
734
616
  time_format: nil,
617
+ float_scientific: nil,
735
618
  float_precision: nil,
736
619
  null_value: nil,
737
620
  quote_style: nil,
@@ -766,6 +649,7 @@ module Polars
766
649
  datetime_format,
767
650
  date_format,
768
651
  time_format,
652
+ float_scientific,
769
653
  float_precision,
770
654
  null_value,
771
655
  quote_style,
@@ -1050,7 +934,7 @@ module Polars
1050
934
  def filter(predicate)
1051
935
  _from_rbldf(
1052
936
  _ldf.filter(
1053
- Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
937
+ Utils.parse_into_expression(predicate, str_as_lit: false)
1054
938
  )
1055
939
  )
1056
940
  end
@@ -1137,7 +1021,7 @@ module Polars
1137
1021
  # # ┌─────────┐
1138
1022
  # # │ literal │
1139
1023
  # # │ --- │
1140
- # # │ i64
1024
+ # # │ i32
1141
1025
  # # ╞═════════╡
1142
1026
  # # │ 0 │
1143
1027
  # # │ 0 │
@@ -1146,7 +1030,7 @@ module Polars
1146
1030
  def select(*exprs, **named_exprs)
1147
1031
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1148
1032
 
1149
- rbexprs = Utils.parse_as_list_of_expressions(
1033
+ rbexprs = Utils.parse_into_list_of_expressions(
1150
1034
  *exprs, **named_exprs, __structify: structify
1151
1035
  )
1152
1036
  _from_rbldf(_ldf.select(rbexprs))
@@ -1154,12 +1038,14 @@ module Polars
1154
1038
 
1155
1039
  # Start a group by operation.
1156
1040
  #
1157
- # @param by [Object]
1041
+ # @param by [Array]
1158
1042
  # Column(s) to group by.
1159
1043
  # @param maintain_order [Boolean]
1160
1044
  # Make sure that the order of the groups remain consistent. This is more
1161
1045
  # expensive than a default group by.
1162
- #
1046
+ # @param named_by [Hash]
1047
+ # Additional columns to group by, specified as keyword arguments.
1048
+ # The columns will be renamed to the keyword used.
1163
1049
  # @return [LazyGroupBy]
1164
1050
  #
1165
1051
  # @example
@@ -1182,9 +1068,9 @@ module Polars
1182
1068
  # # │ b ┆ 11 │
1183
1069
  # # │ c ┆ 6 │
1184
1070
  # # └─────┴─────┘
1185
- def group_by(by, maintain_order: false)
1186
- rbexprs_by = Utils.selection_to_rbexpr_list(by)
1187
- lgb = _ldf.group_by(rbexprs_by, maintain_order)
1071
+ def group_by(*by, maintain_order: false, **named_by)
1072
+ exprs = Utils.parse_into_list_of_expressions(*by, **named_by)
1073
+ lgb = _ldf.group_by(exprs, maintain_order)
1188
1074
  LazyGroupBy.new(lgb)
1189
1075
  end
1190
1076
  alias_method :groupby, :group_by
@@ -1238,12 +1124,6 @@ module Polars
1238
1124
  # Define whether the temporal window interval is closed or not.
1239
1125
  # @param by [Object]
1240
1126
  # Also group by this column/these columns.
1241
- # @param check_sorted [Boolean]
1242
- # When the `by` argument is given, polars can not check sortedness
1243
- # by the metadata and has to do a full scan on the index column to
1244
- # verify data is sorted. This is expensive. If you are sure the
1245
- # data within the by groups is sorted, you can set this to `false`.
1246
- # Doing so incorrectly will lead to incorrect output
1247
1127
  #
1248
1128
  # @return [LazyFrame]
1249
1129
  #
@@ -1285,21 +1165,20 @@ module Polars
1285
1165
  period:,
1286
1166
  offset: nil,
1287
1167
  closed: "right",
1288
- by: nil,
1289
- check_sorted: true
1168
+ by: nil
1290
1169
  )
1291
- index_column = Utils.parse_as_expression(index_column)
1170
+ index_column = Utils.parse_into_expression(index_column)
1292
1171
  if offset.nil?
1293
- offset = "-#{period}"
1172
+ offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
1294
1173
  end
1295
1174
 
1296
- rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1297
- period = Utils._timedelta_to_pl_duration(period)
1298
- offset = Utils._timedelta_to_pl_duration(offset)
1299
-
1300
- lgb = _ldf.rolling(
1301
- index_column, period, offset, closed, rbexprs_by, check_sorted
1175
+ rbexprs_by = (
1176
+ !by.nil? ? Utils.parse_into_list_of_expressions(by) : []
1302
1177
  )
1178
+ period = Utils.parse_as_duration_string(period)
1179
+ offset = Utils.parse_as_duration_string(offset)
1180
+
1181
+ lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
1303
1182
  LazyGroupBy.new(lgb)
1304
1183
  end
1305
1184
  alias_method :group_by_rolling, :rolling
@@ -1367,22 +1246,18 @@ module Polars
1367
1246
  # Define whether the temporal window interval is closed or not.
1368
1247
  # @param by [Object]
1369
1248
  # Also group by this column/these columns
1370
- # @param check_sorted [Boolean]
1371
- # When the `by` argument is given, polars can not check sortedness
1372
- # by the metadata and has to do a full scan on the index column to
1373
- # verify data is sorted. This is expensive. If you are sure the
1374
- # data within the by groups is sorted, you can set this to `false`.
1375
- # Doing so incorrectly will lead to incorrect output.
1376
1249
  #
1377
1250
  # @return [DataFrame]
1378
1251
  #
1379
1252
  # @example
1380
1253
  # df = Polars::DataFrame.new(
1381
1254
  # {
1382
- # "time" => Polars.date_range(
1255
+ # "time" => Polars.datetime_range(
1383
1256
  # DateTime.new(2021, 12, 16),
1384
1257
  # DateTime.new(2021, 12, 16, 3),
1385
- # "30m"
1258
+ # "30m",
1259
+ # time_unit: "us",
1260
+ # eager: true
1386
1261
  # ),
1387
1262
  # "n" => 0..6
1388
1263
  # }
@@ -1449,16 +1324,16 @@ module Polars
1449
1324
  # )
1450
1325
  # # =>
1451
1326
  # # shape: (4, 3)
1452
- # # ┌─────────────────────┬────────────┬───────────────────────────────────┐
1453
- # # │ time ┆ time_count ┆ time_agg_list
1454
- # # │ --- ┆ --- ┆ ---
1455
- # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
1456
- # # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
1457
- # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16…
1458
- # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16…
1459
- # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16…
1460
- # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
1461
- # # └─────────────────────┴────────────┴───────────────────────────────────┘
1327
+ # # ┌─────────────────────┬────────────┬─────────────────────────────────┐
1328
+ # # │ time ┆ time_count ┆ time_agg_list
1329
+ # # │ --- ┆ --- ┆ ---
1330
+ # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
1331
+ # # ╞═════════════════════╪════════════╪═════════════════════════════════╡
1332
+ # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-…
1333
+ # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-…
1334
+ # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-…
1335
+ # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
1336
+ # # └─────────────────────┴────────────┴─────────────────────────────────┘
1462
1337
  #
1463
1338
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
1464
1339
  # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
@@ -1481,10 +1356,12 @@ module Polars
1481
1356
  # @example Dynamic group bys can also be combined with grouping on normal keys.
1482
1357
  # df = Polars::DataFrame.new(
1483
1358
  # {
1484
- # "time" => Polars.date_range(
1359
+ # "time" => Polars.datetime_range(
1485
1360
  # DateTime.new(2021, 12, 16),
1486
1361
  # DateTime.new(2021, 12, 16, 3),
1487
- # "30m"
1362
+ # "30m",
1363
+ # time_unit: "us",
1364
+ # eager: true
1488
1365
  # ),
1489
1366
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
1490
1367
  # }
@@ -1548,14 +1425,13 @@ module Polars
1548
1425
  closed: "left",
1549
1426
  label: "left",
1550
1427
  by: nil,
1551
- start_by: "window",
1552
- check_sorted: true
1428
+ start_by: "window"
1553
1429
  )
1554
1430
  if !truncate.nil?
1555
1431
  label = truncate ? "left" : "datapoint"
1556
1432
  end
1557
1433
 
1558
- index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
1434
+ index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
1559
1435
  if offset.nil?
1560
1436
  offset = period.nil? ? "-#{every}" : "0ns"
1561
1437
  end
@@ -1564,13 +1440,13 @@ module Polars
1564
1440
  period = every
1565
1441
  end
1566
1442
 
1567
- period = Utils._timedelta_to_pl_duration(period)
1568
- offset = Utils._timedelta_to_pl_duration(offset)
1569
- every = Utils._timedelta_to_pl_duration(every)
1443
+ period = Utils.parse_as_duration_string(period)
1444
+ offset = Utils.parse_as_duration_string(offset)
1445
+ every = Utils.parse_as_duration_string(every)
1570
1446
 
1571
- rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
1447
+ rbexprs_by = by.nil? ? [] : Utils.parse_into_list_of_expressions(by)
1572
1448
  lgb = _ldf.group_by_dynamic(
1573
- index_column._rbexpr,
1449
+ index_column,
1574
1450
  every,
1575
1451
  period,
1576
1452
  offset,
@@ -1578,8 +1454,7 @@ module Polars
1578
1454
  include_boundaries,
1579
1455
  closed,
1580
1456
  rbexprs_by,
1581
- start_by,
1582
- check_sorted
1457
+ start_by
1583
1458
  )
1584
1459
  LazyGroupBy.new(lgb)
1585
1460
  end
@@ -1730,7 +1605,7 @@ module Polars
1730
1605
  # @param on Object
1731
1606
  # Join column of both DataFrames. If set, `left_on` and `right_on` should be
1732
1607
  # None.
1733
- # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
1608
+ # @param how ["inner", "left", "full", "semi", "anti", "cross"]
1734
1609
  # Join strategy.
1735
1610
  # @param suffix [String]
1736
1611
  # Suffix to append to columns with a duplicate name.
@@ -1772,7 +1647,7 @@ module Polars
1772
1647
  # # └─────┴─────┴─────┴───────┘
1773
1648
  #
1774
1649
  # @example
1775
- # df.join(other_df, on: "ham", how: "outer").collect
1650
+ # df.join(other_df, on: "ham", how: "full").collect
1776
1651
  # # =>
1777
1652
  # # shape: (4, 5)
1778
1653
  # # ┌──────┬──────┬──────┬───────┬───────────┐
@@ -1839,7 +1714,9 @@ module Polars
1839
1714
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
1840
1715
  end
1841
1716
 
1842
- if how == "cross"
1717
+ if how == "outer"
1718
+ how = "full"
1719
+ elsif how == "cross"
1843
1720
  return _from_rbldf(
1844
1721
  _ldf.join(
1845
1722
  other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
@@ -1848,12 +1725,12 @@ module Polars
1848
1725
  end
1849
1726
 
1850
1727
  if !on.nil?
1851
- rbexprs = Utils.selection_to_rbexpr_list(on)
1728
+ rbexprs = Utils.parse_into_list_of_expressions(on)
1852
1729
  rbexprs_left = rbexprs
1853
1730
  rbexprs_right = rbexprs
1854
1731
  elsif !left_on.nil? && !right_on.nil?
1855
- rbexprs_left = Utils.selection_to_rbexpr_list(left_on)
1856
- rbexprs_right = Utils.selection_to_rbexpr_list(right_on)
1732
+ rbexprs_left = Utils.parse_into_list_of_expressions(left_on)
1733
+ rbexprs_right = Utils.parse_into_list_of_expressions(right_on)
1857
1734
  else
1858
1735
  raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
1859
1736
  end
@@ -1908,7 +1785,8 @@ module Polars
1908
1785
  # # └─────┴──────┴───────┴─────┴──────┴───────┘
1909
1786
  def with_columns(*exprs, **named_exprs)
1910
1787
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1911
- rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
1788
+
1789
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs, __structify: structify)
1912
1790
 
1913
1791
  _from_rbldf(_ldf.with_columns(rbexprs))
1914
1792
  end
@@ -2069,9 +1947,9 @@ module Polars
2069
1947
  # # └──────┴──────┘
2070
1948
  def shift(n, fill_value: nil)
2071
1949
  if !fill_value.nil?
2072
- fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
1950
+ fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
2073
1951
  end
2074
- n = Utils.parse_as_expression(n)
1952
+ n = Utils.parse_into_expression(n)
2075
1953
  _from_rbldf(_ldf.shift(n, fill_value))
2076
1954
  end
2077
1955
 
@@ -2236,16 +2114,16 @@ module Polars
2236
2114
  # df.with_row_index.collect
2237
2115
  # # =>
2238
2116
  # # shape: (3, 3)
2239
- # # ┌────────┬─────┬─────┐
2240
- # # │ row_nr ┆ a ┆ b │
2241
- # # │ --- ┆ --- ┆ --- │
2242
- # # │ u32 ┆ i64 ┆ i64 │
2243
- # # ╞════════╪═════╪═════╡
2244
- # # │ 0 ┆ 1 ┆ 2 │
2245
- # # │ 1 ┆ 3 ┆ 4 │
2246
- # # │ 2 ┆ 5 ┆ 6 │
2247
- # # └────────┴─────┴─────┘
2248
- def with_row_index(name: "row_nr", offset: 0)
2117
+ # # ┌───────┬─────┬─────┐
2118
+ # # │ index ┆ a ┆ b │
2119
+ # # │ --- ┆ --- ┆ --- │
2120
+ # # │ u32 ┆ i64 ┆ i64 │
2121
+ # # ╞═══════╪═════╪═════╡
2122
+ # # │ 0 ┆ 1 ┆ 2 │
2123
+ # # │ 1 ┆ 3 ┆ 4 │
2124
+ # # │ 2 ┆ 5 ┆ 6 │
2125
+ # # └───────┴─────┴─────┘
2126
+ def with_row_index(name: "index", offset: 0)
2249
2127
  _from_rbldf(_ldf.with_row_index(name, offset))
2250
2128
  end
2251
2129
  alias_method :with_row_count, :with_row_index
@@ -2268,7 +2146,7 @@ module Polars
2268
2146
  # # │ 3 ┆ 7 │
2269
2147
  # # └─────┴─────┘
2270
2148
  def take_every(n)
2271
- select(Utils.col("*").take_every(n))
2149
+ select(F.col("*").take_every(n))
2272
2150
  end
2273
2151
 
2274
2152
  # Fill null values using the specified value or strategy.
@@ -2311,7 +2189,7 @@ module Polars
2311
2189
  # # └──────┴──────┘
2312
2190
  def fill_nan(fill_value)
2313
2191
  if !fill_value.is_a?(Expr)
2314
- fill_value = Utils.lit(fill_value)
2192
+ fill_value = F.lit(fill_value)
2315
2193
  end
2316
2194
  _from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
2317
2195
  end
@@ -2502,8 +2380,8 @@ module Polars
2502
2380
  # # │ 3.0 ┆ 1.0 │
2503
2381
  # # └─────┴─────┘
2504
2382
  def quantile(quantile, interpolation: "nearest")
2505
- quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2506
- _from_rbldf(_ldf.quantile(quantile._rbexpr, interpolation))
2383
+ quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
2384
+ _from_rbldf(_ldf.quantile(quantile, interpolation))
2507
2385
  end
2508
2386
 
2509
2387
  # Explode lists to long format.
@@ -2535,7 +2413,7 @@ module Polars
2535
2413
  # # │ c ┆ 8 │
2536
2414
  # # └─────────┴─────────┘
2537
2415
  def explode(columns)
2538
- columns = Utils.selection_to_rbexpr_list(columns)
2416
+ columns = Utils.parse_into_list_of_expressions(columns)
2539
2417
  _from_rbldf(_ldf.explode(columns))
2540
2418
  end
2541
2419
 
@@ -2598,35 +2476,35 @@ module Polars
2598
2476
  # Optionally leaves identifiers set.
2599
2477
  #
2600
2478
  # This function is useful to massage a DataFrame into a format where one or more
2601
- # columns are identifier variables (id_vars), while all other columns, considered
2602
- # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
2479
+ # columns are identifier variables (index) while all other columns, considered
2480
+ # measured variables (on), are "unpivoted" to the row axis leaving just
2603
2481
  # two non-identifier columns, 'variable' and 'value'.
2604
2482
  #
2605
- # @param id_vars [Object]
2606
- # Columns to use as identifier variables.
2607
- # @param value_vars [Object]
2608
- # Values to use as identifier variables.
2609
- # If `value_vars` is empty all columns that are not in `id_vars` will be used.
2483
+ # @param on [Object]
2484
+ # Column(s) or selector(s) to use as values variables; if `on`
2485
+ # is empty all columns that are not in `index` will be used.
2486
+ # @param index [Object]
2487
+ # Column(s) or selector(s) to use as identifier variables.
2610
2488
  # @param variable_name [String]
2611
- # Name to give to the `value` column. Defaults to "variable"
2489
+ # Name to give to the `variable` column. Defaults to "variable"
2612
2490
  # @param value_name [String]
2613
2491
  # Name to give to the `value` column. Defaults to "value"
2614
2492
  # @param streamable [Boolean]
2615
2493
  # Allow this node to run in the streaming engine.
2616
- # If this runs in streaming, the output of the melt operation
2494
+ # If this runs in streaming, the output of the unpivot operation
2617
2495
  # will not have a stable ordering.
2618
2496
  #
2619
2497
  # @return [LazyFrame]
2620
2498
  #
2621
2499
  # @example
2622
- # df = Polars::DataFrame.new(
2500
+ # lf = Polars::LazyFrame.new(
2623
2501
  # {
2624
2502
  # "a" => ["x", "y", "z"],
2625
2503
  # "b" => [1, 3, 5],
2626
2504
  # "c" => [2, 4, 6]
2627
2505
  # }
2628
- # ).lazy
2629
- # df.melt(id_vars: "a", value_vars: ["b", "c"]).collect
2506
+ # )
2507
+ # lf.unpivot(Polars::Selectors.numeric, index: "a").collect
2630
2508
  # # =>
2631
2509
  # # shape: (6, 3)
2632
2510
  # # ┌─────┬──────────┬───────┐
@@ -2641,23 +2519,21 @@ module Polars
2641
2519
  # # │ y ┆ c ┆ 4 │
2642
2520
  # # │ z ┆ c ┆ 6 │
2643
2521
  # # └─────┴──────────┴───────┘
2644
- def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
2645
- if value_vars.is_a?(::String)
2646
- value_vars = [value_vars]
2647
- end
2648
- if id_vars.is_a?(::String)
2649
- id_vars = [id_vars]
2650
- end
2651
- if value_vars.nil?
2652
- value_vars = []
2653
- end
2654
- if id_vars.nil?
2655
- id_vars = []
2656
- end
2522
+ def unpivot(
2523
+ on,
2524
+ index: nil,
2525
+ variable_name: nil,
2526
+ value_name: nil,
2527
+ streamable: true
2528
+ )
2529
+ on = on.nil? ? [] : Utils._expand_selectors(self, on)
2530
+ index = index.nil? ? [] : Utils._expand_selectors(self, index)
2531
+
2657
2532
  _from_rbldf(
2658
- _ldf.melt(id_vars, value_vars, value_name, variable_name, streamable)
2533
+ _ldf.unpivot(on, index, value_name, variable_name, streamable)
2659
2534
  )
2660
2535
  end
2536
+ alias_method :melt, :unpivot
2661
2537
 
2662
2538
  # def map
2663
2539
  # end
@@ -2688,7 +2564,7 @@ module Polars
2688
2564
  # # │ 10.0 ┆ null ┆ 9.0 │
2689
2565
  # # └──────┴──────┴──────────┘
2690
2566
  def interpolate
2691
- select(Utils.col("*").interpolate)
2567
+ select(F.col("*").interpolate)
2692
2568
  end
2693
2569
 
2694
2570
  # Decompose a struct into its fields.
@@ -2795,24 +2671,19 @@ module Polars
2795
2671
  #
2796
2672
  # @param column [Object]
2797
2673
  # Columns that are sorted
2798
- # @param more_columns [Object]
2799
- # Additional columns that are sorted, specified as positional arguments.
2800
2674
  # @param descending [Boolean]
2801
2675
  # Whether the columns are sorted in descending order.
2802
2676
  #
2803
2677
  # @return [LazyFrame]
2804
2678
  def set_sorted(
2805
2679
  column,
2806
- *more_columns,
2807
2680
  descending: false
2808
2681
  )
2809
- columns = Utils.selection_to_rbexpr_list(column)
2810
- if more_columns.any?
2811
- columns.concat(Utils.selection_to_rbexpr_list(more_columns))
2682
+ if !Utils.strlike?(column)
2683
+ msg = "expected a 'str' for argument 'column' in 'set_sorted'"
2684
+ raise TypeError, msg
2812
2685
  end
2813
- with_columns(
2814
- columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
2815
- )
2686
+ with_columns(F.col(column).set_sorted(descending: descending))
2816
2687
  end
2817
2688
 
2818
2689
  # TODO