polars-df 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +90 -45
  4. data/README.md +1 -0
  5. data/ext/polars/Cargo.toml +8 -6
  6. data/ext/polars/src/batched_csv.rs +3 -1
  7. data/ext/polars/src/conversion/anyvalue.rs +3 -2
  8. data/ext/polars/src/conversion/mod.rs +18 -7
  9. data/ext/polars/src/dataframe.rs +40 -14
  10. data/ext/polars/src/expr/array.rs +6 -2
  11. data/ext/polars/src/expr/datetime.rs +7 -2
  12. data/ext/polars/src/expr/general.rs +22 -3
  13. data/ext/polars/src/expr/list.rs +6 -2
  14. data/ext/polars/src/expr/string.rs +3 -3
  15. data/ext/polars/src/file.rs +158 -11
  16. data/ext/polars/src/functions/lazy.rs +18 -3
  17. data/ext/polars/src/functions/whenthen.rs +47 -17
  18. data/ext/polars/src/lazyframe/mod.rs +58 -19
  19. data/ext/polars/src/lib.rs +23 -14
  20. data/ext/polars/src/map/dataframe.rs +17 -9
  21. data/ext/polars/src/series/mod.rs +12 -2
  22. data/lib/polars/array_expr.rb +6 -2
  23. data/lib/polars/batched_csv_reader.rb +4 -2
  24. data/lib/polars/data_frame.rb +148 -74
  25. data/lib/polars/date_time_expr.rb +10 -4
  26. data/lib/polars/date_time_name_space.rb +9 -3
  27. data/lib/polars/expr.rb +37 -34
  28. data/lib/polars/functions/lazy.rb +3 -3
  29. data/lib/polars/functions/whenthen.rb +74 -5
  30. data/lib/polars/io.rb +18 -6
  31. data/lib/polars/lazy_frame.rb +39 -36
  32. data/lib/polars/list_expr.rb +6 -2
  33. data/lib/polars/series.rb +12 -10
  34. data/lib/polars/string_expr.rb +1 -0
  35. data/lib/polars/utils.rb +54 -0
  36. data/lib/polars/version.rb +1 -1
  37. data/lib/polars/whenthen.rb +83 -0
  38. data/lib/polars.rb +1 -2
  39. metadata +4 -5
  40. data/lib/polars/when.rb +0 -16
  41. data/lib/polars/when_then.rb +0 -19
data/lib/polars/expr.rb CHANGED
@@ -1544,16 +1544,14 @@ module Polars
1544
1544
  # # │ one │
1545
1545
  # # │ two │
1546
1546
  # # └───────┘
1547
- def sort_by(by, reverse: false)
1548
- if !by.is_a?(::Array)
1549
- by = [by]
1550
- end
1547
+ def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
1548
+ by = Utils.parse_as_list_of_expressions(by, *more_by)
1551
1549
  if !reverse.is_a?(::Array)
1552
1550
  reverse = [reverse]
1551
+ elsif by.length != reverse.length
1552
+ raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})"
1553
1553
  end
1554
- by = Utils.selection_to_rbexpr_list(by)
1555
-
1556
- _from_rbexpr(_rbexpr.sort_by(by, reverse))
1554
+ _from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order))
1557
1555
  end
1558
1556
 
1559
1557
  # Take values by index.
@@ -3515,20 +3513,23 @@ module Polars
3515
3513
  # @return [Expr]
3516
3514
  #
3517
3515
  # @example
3518
- # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
3519
- # df.select(Polars.col("foo").pow(3))
3516
+ # df = Polars::DataFrame.new({"x" => [1, 2, 4, 8]})
3517
+ # df.with_columns(
3518
+ # Polars.col("x").pow(3).alias("cube"),
3519
+ # Polars.col("x").pow(Polars.col("x").log(2)).alias("x ** xlog2")
3520
+ # )
3520
3521
  # # =>
3521
- # # shape: (4, 1)
3522
- # # ┌──────┐
3523
- # # │ foo
3524
- # # │ --- │
3525
- # # │ f64
3526
- # # ╞══════╡
3527
- # # │ 1.0
3528
- # # │ 8.0
3529
- # # │ 27.0
3530
- # # │ 64.0
3531
- # # └──────┘
3522
+ # # shape: (4, 3)
3523
+ # # ┌─────┬──────┬────────────┐
3524
+ # # │ x ┆ cube ┆ x ** xlog2
3525
+ # # │ --- ┆ --- ┆ ---
3526
+ # # │ i64 ┆ i64 ┆ f64
3527
+ # # ╞═════╪══════╪════════════╡
3528
+ # # │ 1 ┆ 1 ┆ 1.0
3529
+ # # │ 2 ┆ 8 ┆ 2.0
3530
+ # # │ 4 ┆ 64 ┆ 16.0
3531
+ # # │ 8 ┆ 512 ┆ 512.0
3532
+ # # └─────┴──────┴────────────┘
3532
3533
  def pow(exponent)
3533
3534
  self**exponent
3534
3535
  end
@@ -3933,7 +3934,7 @@ module Polars
3933
3934
  min_periods: nil,
3934
3935
  center: false,
3935
3936
  by: nil,
3936
- closed: "left"
3937
+ closed: nil
3937
3938
  )
3938
3939
  window_size, min_periods = _prepare_rolling_window_args(
3939
3940
  window_size, min_periods
@@ -4022,7 +4023,7 @@ module Polars
4022
4023
  min_periods: nil,
4023
4024
  center: false,
4024
4025
  by: nil,
4025
- closed: "left"
4026
+ closed: nil
4026
4027
  )
4027
4028
  window_size, min_periods = _prepare_rolling_window_args(
4028
4029
  window_size, min_periods
@@ -4111,7 +4112,7 @@ module Polars
4111
4112
  min_periods: nil,
4112
4113
  center: false,
4113
4114
  by: nil,
4114
- closed: "left"
4115
+ closed: nil
4115
4116
  )
4116
4117
  window_size, min_periods = _prepare_rolling_window_args(
4117
4118
  window_size, min_periods
@@ -4200,7 +4201,7 @@ module Polars
4200
4201
  min_periods: nil,
4201
4202
  center: false,
4202
4203
  by: nil,
4203
- closed: "left"
4204
+ closed: nil
4204
4205
  )
4205
4206
  window_size, min_periods = _prepare_rolling_window_args(
4206
4207
  window_size, min_periods
@@ -4289,7 +4290,7 @@ module Polars
4289
4290
  min_periods: nil,
4290
4291
  center: false,
4291
4292
  by: nil,
4292
- closed: "left",
4293
+ closed: nil,
4293
4294
  ddof: 1,
4294
4295
  warn_if_unsorted: true
4295
4296
  )
@@ -4380,7 +4381,7 @@ module Polars
4380
4381
  min_periods: nil,
4381
4382
  center: false,
4382
4383
  by: nil,
4383
- closed: "left",
4384
+ closed: nil,
4384
4385
  ddof: 1,
4385
4386
  warn_if_unsorted: true
4386
4387
  )
@@ -4467,7 +4468,7 @@ module Polars
4467
4468
  min_periods: nil,
4468
4469
  center: false,
4469
4470
  by: nil,
4470
- closed: "left",
4471
+ closed: nil,
4471
4472
  warn_if_unsorted: true
4472
4473
  )
4473
4474
  window_size, min_periods = _prepare_rolling_window_args(
@@ -4559,7 +4560,7 @@ module Polars
4559
4560
  min_periods: nil,
4560
4561
  center: false,
4561
4562
  by: nil,
4562
- closed: "left",
4563
+ closed: nil,
4563
4564
  warn_if_unsorted: true
4564
4565
  )
4565
4566
  window_size, min_periods = _prepare_rolling_window_args(
@@ -4730,6 +4731,8 @@ module Polars
4730
4731
  # on the order that the values occur in the Series.
4731
4732
  # @param reverse [Boolean]
4732
4733
  # Reverse the operation.
4734
+ # @param seed [Integer]
4735
+ # If `method: "random"`, use this as seed.
4733
4736
  #
4734
4737
  # @return [Expr]
4735
4738
  #
@@ -5711,13 +5714,13 @@ module Polars
5711
5714
  # # ┌────────┐
5712
5715
  # # │ values │
5713
5716
  # # │ --- │
5714
- # # │ f64
5717
+ # # │ i64
5715
5718
  # # ╞════════╡
5716
- # # │ 0.0
5717
- # # │ -3.0
5718
- # # │ -8.0
5719
- # # │ -15.0
5720
- # # │ -24.0
5719
+ # # │ 0
5720
+ # # │ -3
5721
+ # # │ -8
5722
+ # # │ -15
5723
+ # # │ -24
5721
5724
  # # └────────┘
5722
5725
  def cumulative_eval(expr, min_periods: 1, parallel: false)
5723
5726
  _from_rbexpr(
@@ -1264,10 +1264,10 @@ module Polars
1264
1264
  # # ┌─────┬─────┬───────┐
1265
1265
  # # │ a ┆ a_a ┆ a_txt │
1266
1266
  # # │ --- ┆ --- ┆ --- │
1267
- # # │ i64 ┆ f64 ┆ str │
1267
+ # # │ i64 ┆ i64 ┆ str │
1268
1268
  # # ╞═════╪═════╪═══════╡
1269
- # # │ 2 ┆ 4.0 ┆ 2 │
1270
- # # │ 1 ┆ 1.0 ┆ 1 │
1269
+ # # │ 2 ┆ 4 ┆ 2 │
1270
+ # # │ 1 ┆ 1 ┆ 1 │
1271
1271
  # # └─────┴─────┴───────┘
1272
1272
  def sql_expr(sql)
1273
1273
  if sql.is_a?(::String)
@@ -4,7 +4,7 @@ module Polars
4
4
  #
5
5
  # @return [When]
6
6
  #
7
- # @example
7
+ # @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
8
8
  # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
9
9
  # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
10
10
  # # =>
@@ -18,10 +18,79 @@ module Polars
18
18
  # # │ 3 ┆ 4 ┆ 1 │
19
19
  # # │ 4 ┆ 0 ┆ 1 │
20
20
  # # └─────┴─────┴─────────┘
21
- def when(expr)
22
- expr = Utils.expr_to_lit_or_expr(expr)
23
- pw = Plr.when(expr._rbexpr)
24
- When.new(pw)
21
+ #
22
+ # @example Or with multiple when-then operations chained:
23
+ # df.with_columns(
24
+ # Polars.when(Polars.col("foo") > 2)
25
+ # .then(1)
26
+ # .when(Polars.col("bar") > 2)
27
+ # .then(4)
28
+ # .otherwise(-1)
29
+ # .alias("val")
30
+ # )
31
+ # # =>
32
+ # # shape: (3, 3)
33
+ # # ┌─────┬─────┬─────┐
34
+ # # │ foo ┆ bar ┆ val │
35
+ # # │ --- ┆ --- ┆ --- │
36
+ # # │ i64 ┆ i64 ┆ i32 │
37
+ # # ╞═════╪═════╪═════╡
38
+ # # │ 1 ┆ 3 ┆ 4 │
39
+ # # │ 3 ┆ 4 ┆ 1 │
40
+ # # │ 4 ┆ 0 ┆ 1 │
41
+ # # └─────┴─────┴─────┘
42
+ #
43
+ # @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
44
+ # df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
45
+ # # =>
46
+ # # shape: (3, 3)
47
+ # # ┌─────┬─────┬──────┐
48
+ # # │ foo ┆ bar ┆ val │
49
+ # # │ --- ┆ --- ┆ --- │
50
+ # # │ i64 ┆ i64 ┆ i32 │
51
+ # # ╞═════╪═════╪══════╡
52
+ # # │ 1 ┆ 3 ┆ null │
53
+ # # │ 3 ┆ 4 ┆ 1 │
54
+ # # │ 4 ┆ 0 ┆ 1 │
55
+ # # └─────┴─────┴──────┘
56
+ #
57
+ # @example Pass multiple predicates, each of which must be met:
58
+ # df.with_columns(
59
+ # val: Polars.when(
60
+ # Polars.col("bar") > 0,
61
+ # Polars.col("foo") % 2 != 0
62
+ # )
63
+ # .then(99)
64
+ # .otherwise(-1)
65
+ # )
66
+ # # =>
67
+ # # shape: (3, 3)
68
+ # # ┌─────┬─────┬─────┐
69
+ # # │ foo ┆ bar ┆ val │
70
+ # # │ --- ┆ --- ┆ --- │
71
+ # # │ i64 ┆ i64 ┆ i32 │
72
+ # # ╞═════╪═════╪═════╡
73
+ # # │ 1 ┆ 3 ┆ 99 │
74
+ # # │ 3 ┆ 4 ┆ 99 │
75
+ # # │ 4 ┆ 0 ┆ -1 │
76
+ # # └─────┴─────┴─────┘
77
+ #
78
+ # @example Pass conditions as keyword arguments:
79
+ # df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
80
+ # # =>
81
+ # # shape: (3, 3)
82
+ # # ┌─────┬─────┬─────┐
83
+ # # │ foo ┆ bar ┆ val │
84
+ # # │ --- ┆ --- ┆ --- │
85
+ # # │ i64 ┆ i64 ┆ i32 │
86
+ # # ╞═════╪═════╪═════╡
87
+ # # │ 1 ┆ 3 ┆ -1 │
88
+ # # │ 3 ┆ 4 ┆ -1 │
89
+ # # │ 4 ┆ 0 ┆ 99 │
90
+ # # └─────┴─────┴─────┘
91
+ def when(*predicates, **constraints)
92
+ condition = Utils.parse_when_inputs(*predicates, **constraints)
93
+ When.new(Plr.when(condition))
25
94
  end
26
95
  end
27
96
  end
data/lib/polars/io.rb CHANGED
@@ -80,6 +80,8 @@ module Polars
80
80
  # allocation needed.
81
81
  # @param eol_char [String]
82
82
  # Single byte end of line character.
83
+ # @param truncate_ragged_lines [Boolean]
84
+ # Truncate lines that are longer than the schema.
83
85
  #
84
86
  # @return [DataFrame]
85
87
  #
@@ -113,7 +115,8 @@ module Polars
113
115
  row_count_name: nil,
114
116
  row_count_offset: 0,
115
117
  sample_size: 1024,
116
- eol_char: "\n"
118
+ eol_char: "\n",
119
+ truncate_ragged_lines: false
117
120
  )
118
121
  Utils._check_arg_is_1byte("sep", sep, false)
119
122
  Utils._check_arg_is_1byte("comment_char", comment_char, false)
@@ -161,7 +164,8 @@ module Polars
161
164
  row_count_name: row_count_name,
162
165
  row_count_offset: row_count_offset,
163
166
  sample_size: sample_size,
164
- eol_char: eol_char
167
+ eol_char: eol_char,
168
+ truncate_ragged_lines: truncate_ragged_lines
165
169
  )
166
170
  end
167
171
 
@@ -239,6 +243,8 @@ module Polars
239
243
  # the column remains of data type `:str`.
240
244
  # @param eol_char [String]
241
245
  # Single byte end of line character.
246
+ # @param truncate_ragged_lines [Boolean]
247
+ # Truncate lines that are longer than the schema.
242
248
  #
243
249
  # @return [LazyFrame]
244
250
  def scan_csv(
@@ -262,7 +268,8 @@ module Polars
262
268
  row_count_name: nil,
263
269
  row_count_offset: 0,
264
270
  parse_dates: false,
265
- eol_char: "\n"
271
+ eol_char: "\n",
272
+ truncate_ragged_lines: false
266
273
  )
267
274
  Utils._check_arg_is_1byte("sep", sep, false)
268
275
  Utils._check_arg_is_1byte("comment_char", comment_char, false)
@@ -294,6 +301,7 @@ module Polars
294
301
  row_count_offset: row_count_offset,
295
302
  parse_dates: parse_dates,
296
303
  eol_char: eol_char,
304
+ truncate_ragged_lines: truncate_ragged_lines
297
305
  )
298
306
  end
299
307
 
@@ -520,7 +528,7 @@ module Polars
520
528
 
521
529
  # Read into a DataFrame from a parquet file.
522
530
  #
523
- # @param source [Object]
531
+ # @param source [String, Pathname, StringIO]
524
532
  # Path to a file or a file-like object.
525
533
  # @param columns [Object]
526
534
  # Columns to select. Accepts a list of column indices (starting at zero) or a list
@@ -755,6 +763,8 @@ module Polars
755
763
  # allocation needed.
756
764
  # @param eol_char [String]
757
765
  # Single byte end of line character.
766
+ # @param truncate_ragged_lines [Boolean]
767
+ # Truncate lines that are longer than the schema.
758
768
  #
759
769
  # @return [BatchedCsvReader]
760
770
  #
@@ -787,7 +797,8 @@ module Polars
787
797
  row_count_name: nil,
788
798
  row_count_offset: 0,
789
799
  sample_size: 1024,
790
- eol_char: "\n"
800
+ eol_char: "\n",
801
+ truncate_ragged_lines: false
791
802
  )
792
803
  projection, columns = Utils.handle_projection_columns(columns)
793
804
 
@@ -827,7 +838,8 @@ module Polars
827
838
  row_count_offset: row_count_offset,
828
839
  sample_size: sample_size,
829
840
  eol_char: eol_char,
830
- new_columns: new_columns
841
+ new_columns: new_columns,
842
+ truncate_ragged_lines: truncate_ragged_lines
831
843
  )
832
844
  end
833
845
 
@@ -49,7 +49,8 @@ module Polars
49
49
  row_count_name: nil,
50
50
  row_count_offset: 0,
51
51
  parse_dates: false,
52
- eol_char: "\n"
52
+ eol_char: "\n",
53
+ truncate_ragged_lines: true
53
54
  )
54
55
  dtype_list = nil
55
56
  if !dtypes.nil?
@@ -81,7 +82,8 @@ module Polars
81
82
  encoding,
82
83
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
83
84
  parse_dates,
84
- eol_char
85
+ eol_char,
86
+ truncate_ragged_lines
85
87
  )
86
88
  )
87
89
  end
@@ -103,6 +105,7 @@ module Polars
103
105
  _from_rbldf(
104
106
  RbLazyFrame.new_from_parquet(
105
107
  file,
108
+ [],
106
109
  n_rows,
107
110
  cache,
108
111
  parallel,
@@ -110,7 +113,8 @@ module Polars
110
113
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
111
114
  low_memory,
112
115
  use_statistics,
113
- hive_partitioning
116
+ hive_partitioning,
117
+ nil
114
118
  )
115
119
  )
116
120
  end
@@ -400,16 +404,16 @@ module Polars
400
404
  # # │ 2 ┆ 7.0 ┆ b │
401
405
  # # │ 1 ┆ 6.0 ┆ a │
402
406
  # # └─────┴─────┴─────┘
403
- def sort(by, reverse: false, nulls_last: false, maintain_order: false)
407
+ def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
404
408
  if by.is_a?(::String)
405
- return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
409
+ return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
406
410
  end
407
411
  if Utils.bool?(reverse)
408
412
  reverse = [reverse]
409
413
  end
410
414
 
411
415
  by = Utils.selection_to_rbexpr_list(by)
412
- _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
416
+ _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
413
417
  end
414
418
 
415
419
  # def profile
@@ -1523,12 +1527,13 @@ module Polars
1523
1527
  # closed: "right"
1524
1528
  # ).agg(Polars.col("A").alias("A_agg_list"))
1525
1529
  # # =>
1526
- # # shape: (3, 4)
1530
+ # # shape: (4, 4)
1527
1531
  # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
1528
1532
  # # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
1529
1533
  # # │ --- ┆ --- ┆ --- ┆ --- │
1530
1534
  # # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
1531
1535
  # # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
1536
+ # # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
1532
1537
  # # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
1533
1538
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
1534
1539
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
@@ -1837,7 +1842,7 @@ module Polars
1837
1842
  if how == "cross"
1838
1843
  return _from_rbldf(
1839
1844
  _ldf.join(
1840
- other._ldf, [], [], allow_parallel, force_parallel, how, suffix
1845
+ other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
1841
1846
  )
1842
1847
  )
1843
1848
  end
@@ -1891,16 +1896,16 @@ module Polars
1891
1896
  # ).collect
1892
1897
  # # =>
1893
1898
  # # shape: (4, 6)
1894
- # # ┌─────┬──────┬───────┬──────┬──────┬───────┐
1895
- # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
1896
- # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1897
- # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
1898
- # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
1899
- # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
1900
- # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
1901
- # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
1902
- # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
1903
- # # └─────┴──────┴───────┴──────┴──────┴───────┘
1899
+ # # ┌─────┬──────┬───────┬─────┬──────┬───────┐
1900
+ # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
1901
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1902
+ # # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
1903
+ # # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
1904
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
1905
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
1906
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
1907
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
1908
+ # # └─────┴──────┴───────┴─────┴──────┴───────┘
1904
1909
  def with_columns(*exprs, **named_exprs)
1905
1910
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1906
1911
  rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
@@ -1965,26 +1970,26 @@ module Polars
1965
1970
  # # ┌─────┬─────┬───────────┐
1966
1971
  # # │ a ┆ b ┆ b_squared │
1967
1972
  # # │ --- ┆ --- ┆ --- │
1968
- # # │ i64 ┆ i64 ┆ f64
1973
+ # # │ i64 ┆ i64 ┆ i64
1969
1974
  # # ╞═════╪═════╪═══════════╡
1970
- # # │ 1 ┆ 2 ┆ 4.0
1971
- # # │ 3 ┆ 4 ┆ 16.0
1972
- # # │ 5 ┆ 6 ┆ 36.0
1975
+ # # │ 1 ┆ 2 ┆ 4
1976
+ # # │ 3 ┆ 4 ┆ 16
1977
+ # # │ 5 ┆ 6 ┆ 36
1973
1978
  # # └─────┴─────┴───────────┘
1974
1979
  #
1975
1980
  # @example
1976
1981
  # df.with_column(Polars.col("a") ** 2).collect
1977
1982
  # # =>
1978
1983
  # # shape: (3, 2)
1979
- # # ┌──────┬─────┐
1980
- # # │ a ┆ b │
1981
- # # │ --- ┆ --- │
1982
- # # │ f64 ┆ i64 │
1983
- # # ╞══════╪═════╡
1984
- # # │ 1.0 ┆ 2 │
1985
- # # │ 9.0 ┆ 4 │
1986
- # # │ 25.0 ┆ 6 │
1987
- # # └──────┴─────┘
1984
+ # # ┌─────┬─────┐
1985
+ # # │ a ┆ b │
1986
+ # # │ --- ┆ --- │
1987
+ # # │ i64 ┆ i64 │
1988
+ # # ╞═════╪═════╡
1989
+ # # │ 1 ┆ 2 │
1990
+ # # │ 9 ┆ 4 │
1991
+ # # │ 25 ┆ 6 │
1992
+ # # └─────┴─────┘
1988
1993
  def with_column(column)
1989
1994
  with_columns([column])
1990
1995
  end
@@ -1996,11 +2001,9 @@ module Polars
1996
2001
  # - List of column names.
1997
2002
  #
1998
2003
  # @return [LazyFrame]
1999
- def drop(columns)
2000
- if columns.is_a?(::String)
2001
- columns = [columns]
2002
- end
2003
- _from_rbldf(_ldf.drop(columns))
2004
+ def drop(*columns)
2005
+ drop_cols = Utils._expand_selectors(self, *columns)
2006
+ _from_rbldf(_ldf.drop(drop_cols))
2004
2007
  end
2005
2008
 
2006
2009
  # Rename column names.
@@ -365,6 +365,10 @@ module Polars
365
365
  #
366
366
  # @param index [Integer]
367
367
  # Index to return per sublist
368
+ # @param null_on_oob [Boolean]
369
+ # Behavior if an index is out of bounds:
370
+ # true -> set as null
371
+ # false -> raise an error
368
372
  #
369
373
  # @return [Expr]
370
374
  #
@@ -382,9 +386,9 @@ module Polars
382
386
  # # │ null │
383
387
  # # │ 1 │
384
388
  # # └──────┘
385
- def get(index)
389
+ def get(index, null_on_oob: true)
386
390
  index = Utils.parse_as_expression(index)
387
- Utils.wrap_expr(_rbexpr.list_get(index))
391
+ Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
388
392
  end
389
393
 
390
394
  # Get the value by index in the sublists.
data/lib/polars/series.rb CHANGED
@@ -1155,13 +1155,13 @@ module Polars
1155
1155
  # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
1156
1156
  # # =>
1157
1157
  # # shape: (5,)
1158
- # # Series: 'values' [f64]
1158
+ # # Series: 'values' [i64]
1159
1159
  # # [
1160
- # # 0.0
1161
- # # -3.0
1162
- # # -8.0
1163
- # # -15.0
1164
- # # -24.0
1160
+ # # 0
1161
+ # # -3
1162
+ # # -8
1163
+ # # -15
1164
+ # # -24
1165
1165
  # # ]
1166
1166
  def cumulative_eval(expr, min_periods: 1, parallel: false)
1167
1167
  super
@@ -1567,12 +1567,12 @@ module Polars
1567
1567
  # # 2
1568
1568
  # # 1
1569
1569
  # # ]
1570
- def sort(reverse: false, nulls_last: false, in_place: false)
1570
+ def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
1571
1571
  if in_place
1572
- self._s = _s.sort(reverse, nulls_last)
1572
+ self._s = _s.sort(reverse, nulls_last, multithreaded)
1573
1573
  self
1574
1574
  else
1575
- Utils.wrap_s(_s.sort(reverse, nulls_last))
1575
+ Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
1576
1576
  end
1577
1577
  end
1578
1578
 
@@ -3646,6 +3646,8 @@ module Polars
3646
3646
  # on the order that the values occur in the Series.
3647
3647
  # @param reverse [Boolean]
3648
3648
  # Reverse the operation.
3649
+ # @param seed [Integer]
3650
+ # If `method: "random"`, use this as seed.
3649
3651
  #
3650
3652
  # @return [Series]
3651
3653
  #
@@ -3676,7 +3678,7 @@ module Polars
3676
3678
  # # 2
3677
3679
  # # 5
3678
3680
  # # ]
3679
- def rank(method: "average", reverse: false)
3681
+ def rank(method: "average", reverse: false, seed: nil)
3680
3682
  super
3681
3683
  end
3682
3684
 
@@ -1354,6 +1354,7 @@ module Polars
1354
1354
  # # │ null ┆ null │
1355
1355
  # # └──────┴────────┘
1356
1356
  def to_integer(base: 10, strict: true)
1357
+ base = Utils.parse_as_expression(base, str_as_lit: false)
1357
1358
  Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
1358
1359
  end
1359
1360
 
data/lib/polars/utils.rb CHANGED
@@ -364,5 +364,59 @@ module Polars
364
364
  end
365
365
  end
366
366
  end
367
+
368
+ def self._expand_selectors(frame, *items)
369
+ items_iter = _parse_inputs_as_iterable(items)
370
+
371
+ expanded = []
372
+ items_iter.each do |item|
373
+ if is_selector(item)
374
+ selector_cols = expand_selector(frame, item)
375
+ expanded.concat(selector_cols)
376
+ else
377
+ expanded << item
378
+ end
379
+ end
380
+ expanded
381
+ end
382
+
383
+ # TODO
384
+ def self.is_selector(obj)
385
+ false
386
+ end
387
+
388
+ def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
389
+ all_predicates = _parse_positional_inputs(predicates)
390
+
391
+ if constraints.any?
392
+ constraint_predicates = _parse_constraints(constraints)
393
+ all_predicates.concat(constraint_predicates)
394
+ end
395
+
396
+ _combine_predicates(all_predicates)
397
+ end
398
+
399
+ def self._parse_constraints(constraints)
400
+ constraints.map do |name, value|
401
+ Polars.col(name).eq(value)._rbexpr
402
+ end
403
+ end
404
+
405
+ def self._combine_predicates(predicates)
406
+ if !predicates.any?
407
+ msg = "at least one predicate or constraint must be provided"
408
+ raise TypeError, msg
409
+ end
410
+
411
+ if predicates.length == 1
412
+ return predicates[0]
413
+ end
414
+
415
+ Plr.all_horizontal(predicates)
416
+ end
417
+
418
+ def self.parse_when_inputs(*predicates, **constraints)
419
+ parse_predicates_constraints_as_expression(*predicates, **constraints)
420
+ end
367
421
  end
368
422
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.9.0"
3
+ VERSION = "0.10.0"
4
4
  end