polars-df 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +90 -45
- data/README.md +1 -0
- data/ext/polars/Cargo.toml +8 -6
- data/ext/polars/src/batched_csv.rs +3 -1
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +18 -7
- data/ext/polars/src/dataframe.rs +40 -14
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +7 -2
- data/ext/polars/src/expr/general.rs +22 -3
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/string.rs +3 -3
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +18 -3
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/lazyframe/mod.rs +58 -19
- data/ext/polars/src/lib.rs +23 -14
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/mod.rs +12 -2
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/data_frame.rb +148 -74
- data/lib/polars/date_time_expr.rb +10 -4
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/expr.rb +37 -34
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/io.rb +18 -6
- data/lib/polars/lazy_frame.rb +39 -36
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/series.rb +12 -10
- data/lib/polars/string_expr.rb +1 -0
- data/lib/polars/utils.rb +54 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +1 -2
- metadata +4 -5
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/expr.rb
CHANGED
@@ -1544,16 +1544,14 @@ module Polars
|
|
1544
1544
|
# # │ one │
|
1545
1545
|
# # │ two │
|
1546
1546
|
# # └───────┘
|
1547
|
-
def sort_by(by, reverse: false)
|
1548
|
-
|
1549
|
-
by = [by]
|
1550
|
-
end
|
1547
|
+
def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
|
1548
|
+
by = Utils.parse_as_list_of_expressions(by, *more_by)
|
1551
1549
|
if !reverse.is_a?(::Array)
|
1552
1550
|
reverse = [reverse]
|
1551
|
+
elsif by.length != reverse.length
|
1552
|
+
raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})"
|
1553
1553
|
end
|
1554
|
-
by
|
1555
|
-
|
1556
|
-
_from_rbexpr(_rbexpr.sort_by(by, reverse))
|
1554
|
+
_from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order))
|
1557
1555
|
end
|
1558
1556
|
|
1559
1557
|
# Take values by index.
|
@@ -3515,20 +3513,23 @@ module Polars
|
|
3515
3513
|
# @return [Expr]
|
3516
3514
|
#
|
3517
3515
|
# @example
|
3518
|
-
# df = Polars::DataFrame.new({"
|
3519
|
-
# df.
|
3516
|
+
# df = Polars::DataFrame.new({"x" => [1, 2, 4, 8]})
|
3517
|
+
# df.with_columns(
|
3518
|
+
# Polars.col("x").pow(3).alias("cube"),
|
3519
|
+
# Polars.col("x").pow(Polars.col("x").log(2)).alias("x ** xlog2")
|
3520
|
+
# )
|
3520
3521
|
# # =>
|
3521
|
-
# # shape: (4,
|
3522
|
-
# #
|
3523
|
-
# # │
|
3524
|
-
# # │ --- │
|
3525
|
-
# # │ f64
|
3526
|
-
# #
|
3527
|
-
# # │ 1.0
|
3528
|
-
# # │ 8.0
|
3529
|
-
# # │
|
3530
|
-
# # │
|
3531
|
-
# #
|
3522
|
+
# # shape: (4, 3)
|
3523
|
+
# # ┌─────┬──────┬────────────┐
|
3524
|
+
# # │ x ┆ cube ┆ x ** xlog2 │
|
3525
|
+
# # │ --- ┆ --- ┆ --- │
|
3526
|
+
# # │ i64 ┆ i64 ┆ f64 │
|
3527
|
+
# # ╞═════╪══════╪════════════╡
|
3528
|
+
# # │ 1 ┆ 1 ┆ 1.0 │
|
3529
|
+
# # │ 2 ┆ 8 ┆ 2.0 │
|
3530
|
+
# # │ 4 ┆ 64 ┆ 16.0 │
|
3531
|
+
# # │ 8 ┆ 512 ┆ 512.0 │
|
3532
|
+
# # └─────┴──────┴────────────┘
|
3532
3533
|
def pow(exponent)
|
3533
3534
|
self**exponent
|
3534
3535
|
end
|
@@ -3933,7 +3934,7 @@ module Polars
|
|
3933
3934
|
min_periods: nil,
|
3934
3935
|
center: false,
|
3935
3936
|
by: nil,
|
3936
|
-
closed:
|
3937
|
+
closed: nil
|
3937
3938
|
)
|
3938
3939
|
window_size, min_periods = _prepare_rolling_window_args(
|
3939
3940
|
window_size, min_periods
|
@@ -4022,7 +4023,7 @@ module Polars
|
|
4022
4023
|
min_periods: nil,
|
4023
4024
|
center: false,
|
4024
4025
|
by: nil,
|
4025
|
-
closed:
|
4026
|
+
closed: nil
|
4026
4027
|
)
|
4027
4028
|
window_size, min_periods = _prepare_rolling_window_args(
|
4028
4029
|
window_size, min_periods
|
@@ -4111,7 +4112,7 @@ module Polars
|
|
4111
4112
|
min_periods: nil,
|
4112
4113
|
center: false,
|
4113
4114
|
by: nil,
|
4114
|
-
closed:
|
4115
|
+
closed: nil
|
4115
4116
|
)
|
4116
4117
|
window_size, min_periods = _prepare_rolling_window_args(
|
4117
4118
|
window_size, min_periods
|
@@ -4200,7 +4201,7 @@ module Polars
|
|
4200
4201
|
min_periods: nil,
|
4201
4202
|
center: false,
|
4202
4203
|
by: nil,
|
4203
|
-
closed:
|
4204
|
+
closed: nil
|
4204
4205
|
)
|
4205
4206
|
window_size, min_periods = _prepare_rolling_window_args(
|
4206
4207
|
window_size, min_periods
|
@@ -4289,7 +4290,7 @@ module Polars
|
|
4289
4290
|
min_periods: nil,
|
4290
4291
|
center: false,
|
4291
4292
|
by: nil,
|
4292
|
-
closed:
|
4293
|
+
closed: nil,
|
4293
4294
|
ddof: 1,
|
4294
4295
|
warn_if_unsorted: true
|
4295
4296
|
)
|
@@ -4380,7 +4381,7 @@ module Polars
|
|
4380
4381
|
min_periods: nil,
|
4381
4382
|
center: false,
|
4382
4383
|
by: nil,
|
4383
|
-
closed:
|
4384
|
+
closed: nil,
|
4384
4385
|
ddof: 1,
|
4385
4386
|
warn_if_unsorted: true
|
4386
4387
|
)
|
@@ -4467,7 +4468,7 @@ module Polars
|
|
4467
4468
|
min_periods: nil,
|
4468
4469
|
center: false,
|
4469
4470
|
by: nil,
|
4470
|
-
closed:
|
4471
|
+
closed: nil,
|
4471
4472
|
warn_if_unsorted: true
|
4472
4473
|
)
|
4473
4474
|
window_size, min_periods = _prepare_rolling_window_args(
|
@@ -4559,7 +4560,7 @@ module Polars
|
|
4559
4560
|
min_periods: nil,
|
4560
4561
|
center: false,
|
4561
4562
|
by: nil,
|
4562
|
-
closed:
|
4563
|
+
closed: nil,
|
4563
4564
|
warn_if_unsorted: true
|
4564
4565
|
)
|
4565
4566
|
window_size, min_periods = _prepare_rolling_window_args(
|
@@ -4730,6 +4731,8 @@ module Polars
|
|
4730
4731
|
# on the order that the values occur in the Series.
|
4731
4732
|
# @param reverse [Boolean]
|
4732
4733
|
# Reverse the operation.
|
4734
|
+
# @param seed [Integer]
|
4735
|
+
# If `method: "random"`, use this as seed.
|
4733
4736
|
#
|
4734
4737
|
# @return [Expr]
|
4735
4738
|
#
|
@@ -5711,13 +5714,13 @@ module Polars
|
|
5711
5714
|
# # ┌────────┐
|
5712
5715
|
# # │ values │
|
5713
5716
|
# # │ --- │
|
5714
|
-
# # │
|
5717
|
+
# # │ i64 │
|
5715
5718
|
# # ╞════════╡
|
5716
|
-
# # │ 0
|
5717
|
-
# # │ -3
|
5718
|
-
# # │ -8
|
5719
|
-
# # │ -15
|
5720
|
-
# # │ -24
|
5719
|
+
# # │ 0 │
|
5720
|
+
# # │ -3 │
|
5721
|
+
# # │ -8 │
|
5722
|
+
# # │ -15 │
|
5723
|
+
# # │ -24 │
|
5721
5724
|
# # └────────┘
|
5722
5725
|
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
5723
5726
|
_from_rbexpr(
|
@@ -1264,10 +1264,10 @@ module Polars
|
|
1264
1264
|
# # ┌─────┬─────┬───────┐
|
1265
1265
|
# # │ a ┆ a_a ┆ a_txt │
|
1266
1266
|
# # │ --- ┆ --- ┆ --- │
|
1267
|
-
# # │ i64 ┆
|
1267
|
+
# # │ i64 ┆ i64 ┆ str │
|
1268
1268
|
# # ╞═════╪═════╪═══════╡
|
1269
|
-
# # │ 2 ┆ 4
|
1270
|
-
# # │ 1 ┆ 1
|
1269
|
+
# # │ 2 ┆ 4 ┆ 2 │
|
1270
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
1271
1271
|
# # └─────┴─────┴───────┘
|
1272
1272
|
def sql_expr(sql)
|
1273
1273
|
if sql.is_a?(::String)
|
@@ -4,7 +4,7 @@ module Polars
|
|
4
4
|
#
|
5
5
|
# @return [When]
|
6
6
|
#
|
7
|
-
# @example
|
7
|
+
# @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
|
8
8
|
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
9
9
|
# df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
10
10
|
# # =>
|
@@ -18,10 +18,79 @@ module Polars
|
|
18
18
|
# # │ 3 ┆ 4 ┆ 1 │
|
19
19
|
# # │ 4 ┆ 0 ┆ 1 │
|
20
20
|
# # └─────┴─────┴─────────┘
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
#
|
22
|
+
# @example Or with multiple when-then operations chained:
|
23
|
+
# df.with_columns(
|
24
|
+
# Polars.when(Polars.col("foo") > 2)
|
25
|
+
# .then(1)
|
26
|
+
# .when(Polars.col("bar") > 2)
|
27
|
+
# .then(4)
|
28
|
+
# .otherwise(-1)
|
29
|
+
# .alias("val")
|
30
|
+
# )
|
31
|
+
# # =>
|
32
|
+
# # shape: (3, 3)
|
33
|
+
# # ┌─────┬─────┬─────┐
|
34
|
+
# # │ foo ┆ bar ┆ val │
|
35
|
+
# # │ --- ┆ --- ┆ --- │
|
36
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
37
|
+
# # ╞═════╪═════╪═════╡
|
38
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
39
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
40
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
41
|
+
# # └─────┴─────┴─────┘
|
42
|
+
#
|
43
|
+
# @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
|
44
|
+
# df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
|
45
|
+
# # =>
|
46
|
+
# # shape: (3, 3)
|
47
|
+
# # ┌─────┬─────┬──────┐
|
48
|
+
# # │ foo ┆ bar ┆ val │
|
49
|
+
# # │ --- ┆ --- ┆ --- │
|
50
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
51
|
+
# # ╞═════╪═════╪══════╡
|
52
|
+
# # │ 1 ┆ 3 ┆ null │
|
53
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
54
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
55
|
+
# # └─────┴─────┴──────┘
|
56
|
+
#
|
57
|
+
# @example Pass multiple predicates, each of which must be met:
|
58
|
+
# df.with_columns(
|
59
|
+
# val: Polars.when(
|
60
|
+
# Polars.col("bar") > 0,
|
61
|
+
# Polars.col("foo") % 2 != 0
|
62
|
+
# )
|
63
|
+
# .then(99)
|
64
|
+
# .otherwise(-1)
|
65
|
+
# )
|
66
|
+
# # =>
|
67
|
+
# # shape: (3, 3)
|
68
|
+
# # ┌─────┬─────┬─────┐
|
69
|
+
# # │ foo ┆ bar ┆ val │
|
70
|
+
# # │ --- ┆ --- ┆ --- │
|
71
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
72
|
+
# # ╞═════╪═════╪═════╡
|
73
|
+
# # │ 1 ┆ 3 ┆ 99 │
|
74
|
+
# # │ 3 ┆ 4 ┆ 99 │
|
75
|
+
# # │ 4 ┆ 0 ┆ -1 │
|
76
|
+
# # └─────┴─────┴─────┘
|
77
|
+
#
|
78
|
+
# @example Pass conditions as keyword arguments:
|
79
|
+
# df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
|
80
|
+
# # =>
|
81
|
+
# # shape: (3, 3)
|
82
|
+
# # ┌─────┬─────┬─────┐
|
83
|
+
# # │ foo ┆ bar ┆ val │
|
84
|
+
# # │ --- ┆ --- ┆ --- │
|
85
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
86
|
+
# # ╞═════╪═════╪═════╡
|
87
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
88
|
+
# # │ 3 ┆ 4 ┆ -1 │
|
89
|
+
# # │ 4 ┆ 0 ┆ 99 │
|
90
|
+
# # └─────┴─────┴─────┘
|
91
|
+
def when(*predicates, **constraints)
|
92
|
+
condition = Utils.parse_when_inputs(*predicates, **constraints)
|
93
|
+
When.new(Plr.when(condition))
|
25
94
|
end
|
26
95
|
end
|
27
96
|
end
|
data/lib/polars/io.rb
CHANGED
@@ -80,6 +80,8 @@ module Polars
|
|
80
80
|
# allocation needed.
|
81
81
|
# @param eol_char [String]
|
82
82
|
# Single byte end of line character.
|
83
|
+
# @param truncate_ragged_lines [Boolean]
|
84
|
+
# Truncate lines that are longer than the schema.
|
83
85
|
#
|
84
86
|
# @return [DataFrame]
|
85
87
|
#
|
@@ -113,7 +115,8 @@ module Polars
|
|
113
115
|
row_count_name: nil,
|
114
116
|
row_count_offset: 0,
|
115
117
|
sample_size: 1024,
|
116
|
-
eol_char: "\n"
|
118
|
+
eol_char: "\n",
|
119
|
+
truncate_ragged_lines: false
|
117
120
|
)
|
118
121
|
Utils._check_arg_is_1byte("sep", sep, false)
|
119
122
|
Utils._check_arg_is_1byte("comment_char", comment_char, false)
|
@@ -161,7 +164,8 @@ module Polars
|
|
161
164
|
row_count_name: row_count_name,
|
162
165
|
row_count_offset: row_count_offset,
|
163
166
|
sample_size: sample_size,
|
164
|
-
eol_char: eol_char
|
167
|
+
eol_char: eol_char,
|
168
|
+
truncate_ragged_lines: truncate_ragged_lines
|
165
169
|
)
|
166
170
|
end
|
167
171
|
|
@@ -239,6 +243,8 @@ module Polars
|
|
239
243
|
# the column remains of data type `:str`.
|
240
244
|
# @param eol_char [String]
|
241
245
|
# Single byte end of line character.
|
246
|
+
# @param truncate_ragged_lines [Boolean]
|
247
|
+
# Truncate lines that are longer than the schema.
|
242
248
|
#
|
243
249
|
# @return [LazyFrame]
|
244
250
|
def scan_csv(
|
@@ -262,7 +268,8 @@ module Polars
|
|
262
268
|
row_count_name: nil,
|
263
269
|
row_count_offset: 0,
|
264
270
|
parse_dates: false,
|
265
|
-
eol_char: "\n"
|
271
|
+
eol_char: "\n",
|
272
|
+
truncate_ragged_lines: false
|
266
273
|
)
|
267
274
|
Utils._check_arg_is_1byte("sep", sep, false)
|
268
275
|
Utils._check_arg_is_1byte("comment_char", comment_char, false)
|
@@ -294,6 +301,7 @@ module Polars
|
|
294
301
|
row_count_offset: row_count_offset,
|
295
302
|
parse_dates: parse_dates,
|
296
303
|
eol_char: eol_char,
|
304
|
+
truncate_ragged_lines: truncate_ragged_lines
|
297
305
|
)
|
298
306
|
end
|
299
307
|
|
@@ -520,7 +528,7 @@ module Polars
|
|
520
528
|
|
521
529
|
# Read into a DataFrame from a parquet file.
|
522
530
|
#
|
523
|
-
# @param source [
|
531
|
+
# @param source [String, Pathname, StringIO]
|
524
532
|
# Path to a file or a file-like object.
|
525
533
|
# @param columns [Object]
|
526
534
|
# Columns to select. Accepts a list of column indices (starting at zero) or a list
|
@@ -755,6 +763,8 @@ module Polars
|
|
755
763
|
# allocation needed.
|
756
764
|
# @param eol_char [String]
|
757
765
|
# Single byte end of line character.
|
766
|
+
# @param truncate_ragged_lines [Boolean]
|
767
|
+
# Truncate lines that are longer than the schema.
|
758
768
|
#
|
759
769
|
# @return [BatchedCsvReader]
|
760
770
|
#
|
@@ -787,7 +797,8 @@ module Polars
|
|
787
797
|
row_count_name: nil,
|
788
798
|
row_count_offset: 0,
|
789
799
|
sample_size: 1024,
|
790
|
-
eol_char: "\n"
|
800
|
+
eol_char: "\n",
|
801
|
+
truncate_ragged_lines: false
|
791
802
|
)
|
792
803
|
projection, columns = Utils.handle_projection_columns(columns)
|
793
804
|
|
@@ -827,7 +838,8 @@ module Polars
|
|
827
838
|
row_count_offset: row_count_offset,
|
828
839
|
sample_size: sample_size,
|
829
840
|
eol_char: eol_char,
|
830
|
-
new_columns: new_columns
|
841
|
+
new_columns: new_columns,
|
842
|
+
truncate_ragged_lines: truncate_ragged_lines
|
831
843
|
)
|
832
844
|
end
|
833
845
|
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -49,7 +49,8 @@ module Polars
|
|
49
49
|
row_count_name: nil,
|
50
50
|
row_count_offset: 0,
|
51
51
|
parse_dates: false,
|
52
|
-
eol_char: "\n"
|
52
|
+
eol_char: "\n",
|
53
|
+
truncate_ragged_lines: true
|
53
54
|
)
|
54
55
|
dtype_list = nil
|
55
56
|
if !dtypes.nil?
|
@@ -81,7 +82,8 @@ module Polars
|
|
81
82
|
encoding,
|
82
83
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
83
84
|
parse_dates,
|
84
|
-
eol_char
|
85
|
+
eol_char,
|
86
|
+
truncate_ragged_lines
|
85
87
|
)
|
86
88
|
)
|
87
89
|
end
|
@@ -103,6 +105,7 @@ module Polars
|
|
103
105
|
_from_rbldf(
|
104
106
|
RbLazyFrame.new_from_parquet(
|
105
107
|
file,
|
108
|
+
[],
|
106
109
|
n_rows,
|
107
110
|
cache,
|
108
111
|
parallel,
|
@@ -110,7 +113,8 @@ module Polars
|
|
110
113
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
111
114
|
low_memory,
|
112
115
|
use_statistics,
|
113
|
-
hive_partitioning
|
116
|
+
hive_partitioning,
|
117
|
+
nil
|
114
118
|
)
|
115
119
|
)
|
116
120
|
end
|
@@ -400,16 +404,16 @@ module Polars
|
|
400
404
|
# # │ 2 ┆ 7.0 ┆ b │
|
401
405
|
# # │ 1 ┆ 6.0 ┆ a │
|
402
406
|
# # └─────┴─────┴─────┘
|
403
|
-
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
407
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
404
408
|
if by.is_a?(::String)
|
405
|
-
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
409
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
|
406
410
|
end
|
407
411
|
if Utils.bool?(reverse)
|
408
412
|
reverse = [reverse]
|
409
413
|
end
|
410
414
|
|
411
415
|
by = Utils.selection_to_rbexpr_list(by)
|
412
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
416
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
|
413
417
|
end
|
414
418
|
|
415
419
|
# def profile
|
@@ -1523,12 +1527,13 @@ module Polars
|
|
1523
1527
|
# closed: "right"
|
1524
1528
|
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1525
1529
|
# # =>
|
1526
|
-
# # shape: (
|
1530
|
+
# # shape: (4, 4)
|
1527
1531
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
1528
1532
|
# # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
|
1529
1533
|
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1530
1534
|
# # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
|
1531
1535
|
# # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
|
1536
|
+
# # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
|
1532
1537
|
# # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
|
1533
1538
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
1534
1539
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
@@ -1837,7 +1842,7 @@ module Polars
|
|
1837
1842
|
if how == "cross"
|
1838
1843
|
return _from_rbldf(
|
1839
1844
|
_ldf.join(
|
1840
|
-
other._ldf, [], [], allow_parallel, force_parallel, how, suffix
|
1845
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
1841
1846
|
)
|
1842
1847
|
)
|
1843
1848
|
end
|
@@ -1891,16 +1896,16 @@ module Polars
|
|
1891
1896
|
# ).collect
|
1892
1897
|
# # =>
|
1893
1898
|
# # shape: (4, 6)
|
1894
|
-
# #
|
1895
|
-
# # │ a ┆ b ┆ c ┆ a^2
|
1896
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1897
|
-
# # │ i64 ┆ f64 ┆ bool ┆
|
1898
|
-
# #
|
1899
|
-
# # │ 1 ┆ 0.5 ┆ true ┆ 1
|
1900
|
-
# # │ 2 ┆ 4.0 ┆ true ┆ 4
|
1901
|
-
# # │ 3 ┆ 10.0 ┆ false ┆ 9
|
1902
|
-
# # │ 4 ┆ 13.0 ┆ true ┆ 16
|
1903
|
-
# #
|
1899
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
1900
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
1901
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1902
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
1903
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
1904
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
1905
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
1906
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
1907
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
1908
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
1904
1909
|
def with_columns(*exprs, **named_exprs)
|
1905
1910
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1906
1911
|
rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
|
@@ -1965,26 +1970,26 @@ module Polars
|
|
1965
1970
|
# # ┌─────┬─────┬───────────┐
|
1966
1971
|
# # │ a ┆ b ┆ b_squared │
|
1967
1972
|
# # │ --- ┆ --- ┆ --- │
|
1968
|
-
# # │ i64 ┆ i64 ┆
|
1973
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
1969
1974
|
# # ╞═════╪═════╪═══════════╡
|
1970
|
-
# # │ 1 ┆ 2 ┆ 4
|
1971
|
-
# # │ 3 ┆ 4 ┆ 16
|
1972
|
-
# # │ 5 ┆ 6 ┆ 36
|
1975
|
+
# # │ 1 ┆ 2 ┆ 4 │
|
1976
|
+
# # │ 3 ┆ 4 ┆ 16 │
|
1977
|
+
# # │ 5 ┆ 6 ┆ 36 │
|
1973
1978
|
# # └─────┴─────┴───────────┘
|
1974
1979
|
#
|
1975
1980
|
# @example
|
1976
1981
|
# df.with_column(Polars.col("a") ** 2).collect
|
1977
1982
|
# # =>
|
1978
1983
|
# # shape: (3, 2)
|
1979
|
-
# #
|
1980
|
-
# # │ a
|
1981
|
-
# # │ ---
|
1982
|
-
# # │
|
1983
|
-
# #
|
1984
|
-
# # │ 1
|
1985
|
-
# # │ 9
|
1986
|
-
# # │ 25
|
1987
|
-
# #
|
1984
|
+
# # ┌─────┬─────┐
|
1985
|
+
# # │ a ┆ b │
|
1986
|
+
# # │ --- ┆ --- │
|
1987
|
+
# # │ i64 ┆ i64 │
|
1988
|
+
# # ╞═════╪═════╡
|
1989
|
+
# # │ 1 ┆ 2 │
|
1990
|
+
# # │ 9 ┆ 4 │
|
1991
|
+
# # │ 25 ┆ 6 │
|
1992
|
+
# # └─────┴─────┘
|
1988
1993
|
def with_column(column)
|
1989
1994
|
with_columns([column])
|
1990
1995
|
end
|
@@ -1996,11 +2001,9 @@ module Polars
|
|
1996
2001
|
# - List of column names.
|
1997
2002
|
#
|
1998
2003
|
# @return [LazyFrame]
|
1999
|
-
def drop(columns)
|
2000
|
-
|
2001
|
-
|
2002
|
-
end
|
2003
|
-
_from_rbldf(_ldf.drop(columns))
|
2004
|
+
def drop(*columns)
|
2005
|
+
drop_cols = Utils._expand_selectors(self, *columns)
|
2006
|
+
_from_rbldf(_ldf.drop(drop_cols))
|
2004
2007
|
end
|
2005
2008
|
|
2006
2009
|
# Rename column names.
|
data/lib/polars/list_expr.rb
CHANGED
@@ -365,6 +365,10 @@ module Polars
|
|
365
365
|
#
|
366
366
|
# @param index [Integer]
|
367
367
|
# Index to return per sublist
|
368
|
+
# @param null_on_oob [Boolean]
|
369
|
+
# Behavior if an index is out of bounds:
|
370
|
+
# true -> set as null
|
371
|
+
# false -> raise an error
|
368
372
|
#
|
369
373
|
# @return [Expr]
|
370
374
|
#
|
@@ -382,9 +386,9 @@ module Polars
|
|
382
386
|
# # │ null │
|
383
387
|
# # │ 1 │
|
384
388
|
# # └──────┘
|
385
|
-
def get(index)
|
389
|
+
def get(index, null_on_oob: true)
|
386
390
|
index = Utils.parse_as_expression(index)
|
387
|
-
Utils.wrap_expr(_rbexpr.list_get(index))
|
391
|
+
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
388
392
|
end
|
389
393
|
|
390
394
|
# Get the value by index in the sublists.
|
data/lib/polars/series.rb
CHANGED
@@ -1155,13 +1155,13 @@ module Polars
|
|
1155
1155
|
# s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
|
1156
1156
|
# # =>
|
1157
1157
|
# # shape: (5,)
|
1158
|
-
# # Series: 'values' [
|
1158
|
+
# # Series: 'values' [i64]
|
1159
1159
|
# # [
|
1160
|
-
# # 0
|
1161
|
-
# # -3
|
1162
|
-
# # -8
|
1163
|
-
# # -15
|
1164
|
-
# # -24
|
1160
|
+
# # 0
|
1161
|
+
# # -3
|
1162
|
+
# # -8
|
1163
|
+
# # -15
|
1164
|
+
# # -24
|
1165
1165
|
# # ]
|
1166
1166
|
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
1167
1167
|
super
|
@@ -1567,12 +1567,12 @@ module Polars
|
|
1567
1567
|
# # 2
|
1568
1568
|
# # 1
|
1569
1569
|
# # ]
|
1570
|
-
def sort(reverse: false, nulls_last: false, in_place: false)
|
1570
|
+
def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
|
1571
1571
|
if in_place
|
1572
|
-
self._s = _s.sort(reverse, nulls_last)
|
1572
|
+
self._s = _s.sort(reverse, nulls_last, multithreaded)
|
1573
1573
|
self
|
1574
1574
|
else
|
1575
|
-
Utils.wrap_s(_s.sort(reverse, nulls_last))
|
1575
|
+
Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
|
1576
1576
|
end
|
1577
1577
|
end
|
1578
1578
|
|
@@ -3646,6 +3646,8 @@ module Polars
|
|
3646
3646
|
# on the order that the values occur in the Series.
|
3647
3647
|
# @param reverse [Boolean]
|
3648
3648
|
# Reverse the operation.
|
3649
|
+
# @param seed [Integer]
|
3650
|
+
# If `method: "random"`, use this as seed.
|
3649
3651
|
#
|
3650
3652
|
# @return [Series]
|
3651
3653
|
#
|
@@ -3676,7 +3678,7 @@ module Polars
|
|
3676
3678
|
# # 2
|
3677
3679
|
# # 5
|
3678
3680
|
# # ]
|
3679
|
-
def rank(method: "average", reverse: false)
|
3681
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3680
3682
|
super
|
3681
3683
|
end
|
3682
3684
|
|
data/lib/polars/string_expr.rb
CHANGED
data/lib/polars/utils.rb
CHANGED
@@ -364,5 +364,59 @@ module Polars
|
|
364
364
|
end
|
365
365
|
end
|
366
366
|
end
|
367
|
+
|
368
|
+
def self._expand_selectors(frame, *items)
|
369
|
+
items_iter = _parse_inputs_as_iterable(items)
|
370
|
+
|
371
|
+
expanded = []
|
372
|
+
items_iter.each do |item|
|
373
|
+
if is_selector(item)
|
374
|
+
selector_cols = expand_selector(frame, item)
|
375
|
+
expanded.concat(selector_cols)
|
376
|
+
else
|
377
|
+
expanded << item
|
378
|
+
end
|
379
|
+
end
|
380
|
+
expanded
|
381
|
+
end
|
382
|
+
|
383
|
+
# TODO
|
384
|
+
def self.is_selector(obj)
|
385
|
+
false
|
386
|
+
end
|
387
|
+
|
388
|
+
def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
|
389
|
+
all_predicates = _parse_positional_inputs(predicates)
|
390
|
+
|
391
|
+
if constraints.any?
|
392
|
+
constraint_predicates = _parse_constraints(constraints)
|
393
|
+
all_predicates.concat(constraint_predicates)
|
394
|
+
end
|
395
|
+
|
396
|
+
_combine_predicates(all_predicates)
|
397
|
+
end
|
398
|
+
|
399
|
+
def self._parse_constraints(constraints)
|
400
|
+
constraints.map do |name, value|
|
401
|
+
Polars.col(name).eq(value)._rbexpr
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
def self._combine_predicates(predicates)
|
406
|
+
if !predicates.any?
|
407
|
+
msg = "at least one predicate or constraint must be provided"
|
408
|
+
raise TypeError, msg
|
409
|
+
end
|
410
|
+
|
411
|
+
if predicates.length == 1
|
412
|
+
return predicates[0]
|
413
|
+
end
|
414
|
+
|
415
|
+
Plr.all_horizontal(predicates)
|
416
|
+
end
|
417
|
+
|
418
|
+
def self.parse_when_inputs(*predicates, **constraints)
|
419
|
+
parse_predicates_constraints_as_expression(*predicates, **constraints)
|
420
|
+
end
|
367
421
|
end
|
368
422
|
end
|
data/lib/polars/version.rb
CHANGED