polars-df 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +90 -45
- data/README.md +1 -0
- data/ext/polars/Cargo.toml +8 -6
- data/ext/polars/src/batched_csv.rs +3 -1
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +18 -7
- data/ext/polars/src/dataframe.rs +40 -14
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +7 -2
- data/ext/polars/src/expr/general.rs +22 -3
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/string.rs +3 -3
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +18 -3
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/lazyframe/mod.rs +58 -19
- data/ext/polars/src/lib.rs +23 -14
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/mod.rs +12 -2
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/data_frame.rb +148 -74
- data/lib/polars/date_time_expr.rb +10 -4
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/expr.rb +37 -34
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/io.rb +18 -6
- data/lib/polars/lazy_frame.rb +39 -36
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/series.rb +12 -10
- data/lib/polars/string_expr.rb +1 -0
- data/lib/polars/utils.rb +54 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +1 -2
- metadata +4 -5
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/expr.rb
CHANGED
@@ -1544,16 +1544,14 @@ module Polars
|
|
1544
1544
|
# # │ one │
|
1545
1545
|
# # │ two │
|
1546
1546
|
# # └───────┘
|
1547
|
-
def sort_by(by, reverse: false)
|
1548
|
-
|
1549
|
-
by = [by]
|
1550
|
-
end
|
1547
|
+
def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false)
|
1548
|
+
by = Utils.parse_as_list_of_expressions(by, *more_by)
|
1551
1549
|
if !reverse.is_a?(::Array)
|
1552
1550
|
reverse = [reverse]
|
1551
|
+
elsif by.length != reverse.length
|
1552
|
+
raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})"
|
1553
1553
|
end
|
1554
|
-
by
|
1555
|
-
|
1556
|
-
_from_rbexpr(_rbexpr.sort_by(by, reverse))
|
1554
|
+
_from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order))
|
1557
1555
|
end
|
1558
1556
|
|
1559
1557
|
# Take values by index.
|
@@ -3515,20 +3513,23 @@ module Polars
|
|
3515
3513
|
# @return [Expr]
|
3516
3514
|
#
|
3517
3515
|
# @example
|
3518
|
-
# df = Polars::DataFrame.new({"
|
3519
|
-
# df.
|
3516
|
+
# df = Polars::DataFrame.new({"x" => [1, 2, 4, 8]})
|
3517
|
+
# df.with_columns(
|
3518
|
+
# Polars.col("x").pow(3).alias("cube"),
|
3519
|
+
# Polars.col("x").pow(Polars.col("x").log(2)).alias("x ** xlog2")
|
3520
|
+
# )
|
3520
3521
|
# # =>
|
3521
|
-
# # shape: (4,
|
3522
|
-
# #
|
3523
|
-
# # │
|
3524
|
-
# # │ --- │
|
3525
|
-
# # │ f64
|
3526
|
-
# #
|
3527
|
-
# # │ 1.0
|
3528
|
-
# # │ 8.0
|
3529
|
-
# # │
|
3530
|
-
# # │
|
3531
|
-
# #
|
3522
|
+
# # shape: (4, 3)
|
3523
|
+
# # ┌─────┬──────┬────────────┐
|
3524
|
+
# # │ x ┆ cube ┆ x ** xlog2 │
|
3525
|
+
# # │ --- ┆ --- ┆ --- │
|
3526
|
+
# # │ i64 ┆ i64 ┆ f64 │
|
3527
|
+
# # ╞═════╪══════╪════════════╡
|
3528
|
+
# # │ 1 ┆ 1 ┆ 1.0 │
|
3529
|
+
# # │ 2 ┆ 8 ┆ 2.0 │
|
3530
|
+
# # │ 4 ┆ 64 ┆ 16.0 │
|
3531
|
+
# # │ 8 ┆ 512 ┆ 512.0 │
|
3532
|
+
# # └─────┴──────┴────────────┘
|
3532
3533
|
def pow(exponent)
|
3533
3534
|
self**exponent
|
3534
3535
|
end
|
@@ -3933,7 +3934,7 @@ module Polars
|
|
3933
3934
|
min_periods: nil,
|
3934
3935
|
center: false,
|
3935
3936
|
by: nil,
|
3936
|
-
closed:
|
3937
|
+
closed: nil
|
3937
3938
|
)
|
3938
3939
|
window_size, min_periods = _prepare_rolling_window_args(
|
3939
3940
|
window_size, min_periods
|
@@ -4022,7 +4023,7 @@ module Polars
|
|
4022
4023
|
min_periods: nil,
|
4023
4024
|
center: false,
|
4024
4025
|
by: nil,
|
4025
|
-
closed:
|
4026
|
+
closed: nil
|
4026
4027
|
)
|
4027
4028
|
window_size, min_periods = _prepare_rolling_window_args(
|
4028
4029
|
window_size, min_periods
|
@@ -4111,7 +4112,7 @@ module Polars
|
|
4111
4112
|
min_periods: nil,
|
4112
4113
|
center: false,
|
4113
4114
|
by: nil,
|
4114
|
-
closed:
|
4115
|
+
closed: nil
|
4115
4116
|
)
|
4116
4117
|
window_size, min_periods = _prepare_rolling_window_args(
|
4117
4118
|
window_size, min_periods
|
@@ -4200,7 +4201,7 @@ module Polars
|
|
4200
4201
|
min_periods: nil,
|
4201
4202
|
center: false,
|
4202
4203
|
by: nil,
|
4203
|
-
closed:
|
4204
|
+
closed: nil
|
4204
4205
|
)
|
4205
4206
|
window_size, min_periods = _prepare_rolling_window_args(
|
4206
4207
|
window_size, min_periods
|
@@ -4289,7 +4290,7 @@ module Polars
|
|
4289
4290
|
min_periods: nil,
|
4290
4291
|
center: false,
|
4291
4292
|
by: nil,
|
4292
|
-
closed:
|
4293
|
+
closed: nil,
|
4293
4294
|
ddof: 1,
|
4294
4295
|
warn_if_unsorted: true
|
4295
4296
|
)
|
@@ -4380,7 +4381,7 @@ module Polars
|
|
4380
4381
|
min_periods: nil,
|
4381
4382
|
center: false,
|
4382
4383
|
by: nil,
|
4383
|
-
closed:
|
4384
|
+
closed: nil,
|
4384
4385
|
ddof: 1,
|
4385
4386
|
warn_if_unsorted: true
|
4386
4387
|
)
|
@@ -4467,7 +4468,7 @@ module Polars
|
|
4467
4468
|
min_periods: nil,
|
4468
4469
|
center: false,
|
4469
4470
|
by: nil,
|
4470
|
-
closed:
|
4471
|
+
closed: nil,
|
4471
4472
|
warn_if_unsorted: true
|
4472
4473
|
)
|
4473
4474
|
window_size, min_periods = _prepare_rolling_window_args(
|
@@ -4559,7 +4560,7 @@ module Polars
|
|
4559
4560
|
min_periods: nil,
|
4560
4561
|
center: false,
|
4561
4562
|
by: nil,
|
4562
|
-
closed:
|
4563
|
+
closed: nil,
|
4563
4564
|
warn_if_unsorted: true
|
4564
4565
|
)
|
4565
4566
|
window_size, min_periods = _prepare_rolling_window_args(
|
@@ -4730,6 +4731,8 @@ module Polars
|
|
4730
4731
|
# on the order that the values occur in the Series.
|
4731
4732
|
# @param reverse [Boolean]
|
4732
4733
|
# Reverse the operation.
|
4734
|
+
# @param seed [Integer]
|
4735
|
+
# If `method: "random"`, use this as seed.
|
4733
4736
|
#
|
4734
4737
|
# @return [Expr]
|
4735
4738
|
#
|
@@ -5711,13 +5714,13 @@ module Polars
|
|
5711
5714
|
# # ┌────────┐
|
5712
5715
|
# # │ values │
|
5713
5716
|
# # │ --- │
|
5714
|
-
# # │
|
5717
|
+
# # │ i64 │
|
5715
5718
|
# # ╞════════╡
|
5716
|
-
# # │ 0
|
5717
|
-
# # │ -3
|
5718
|
-
# # │ -8
|
5719
|
-
# # │ -15
|
5720
|
-
# # │ -24
|
5719
|
+
# # │ 0 │
|
5720
|
+
# # │ -3 │
|
5721
|
+
# # │ -8 │
|
5722
|
+
# # │ -15 │
|
5723
|
+
# # │ -24 │
|
5721
5724
|
# # └────────┘
|
5722
5725
|
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
5723
5726
|
_from_rbexpr(
|
@@ -1264,10 +1264,10 @@ module Polars
|
|
1264
1264
|
# # ┌─────┬─────┬───────┐
|
1265
1265
|
# # │ a ┆ a_a ┆ a_txt │
|
1266
1266
|
# # │ --- ┆ --- ┆ --- │
|
1267
|
-
# # │ i64 ┆
|
1267
|
+
# # │ i64 ┆ i64 ┆ str │
|
1268
1268
|
# # ╞═════╪═════╪═══════╡
|
1269
|
-
# # │ 2 ┆ 4
|
1270
|
-
# # │ 1 ┆ 1
|
1269
|
+
# # │ 2 ┆ 4 ┆ 2 │
|
1270
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
1271
1271
|
# # └─────┴─────┴───────┘
|
1272
1272
|
def sql_expr(sql)
|
1273
1273
|
if sql.is_a?(::String)
|
@@ -4,7 +4,7 @@ module Polars
|
|
4
4
|
#
|
5
5
|
# @return [When]
|
6
6
|
#
|
7
|
-
# @example
|
7
|
+
# @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
|
8
8
|
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
9
9
|
# df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
10
10
|
# # =>
|
@@ -18,10 +18,79 @@ module Polars
|
|
18
18
|
# # │ 3 ┆ 4 ┆ 1 │
|
19
19
|
# # │ 4 ┆ 0 ┆ 1 │
|
20
20
|
# # └─────┴─────┴─────────┘
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
#
|
22
|
+
# @example Or with multiple when-then operations chained:
|
23
|
+
# df.with_columns(
|
24
|
+
# Polars.when(Polars.col("foo") > 2)
|
25
|
+
# .then(1)
|
26
|
+
# .when(Polars.col("bar") > 2)
|
27
|
+
# .then(4)
|
28
|
+
# .otherwise(-1)
|
29
|
+
# .alias("val")
|
30
|
+
# )
|
31
|
+
# # =>
|
32
|
+
# # shape: (3, 3)
|
33
|
+
# # ┌─────┬─────┬─────┐
|
34
|
+
# # │ foo ┆ bar ┆ val │
|
35
|
+
# # │ --- ┆ --- ┆ --- │
|
36
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
37
|
+
# # ╞═════╪═════╪═════╡
|
38
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
39
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
40
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
41
|
+
# # └─────┴─────┴─────┘
|
42
|
+
#
|
43
|
+
# @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
|
44
|
+
# df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
|
45
|
+
# # =>
|
46
|
+
# # shape: (3, 3)
|
47
|
+
# # ┌─────┬─────┬──────┐
|
48
|
+
# # │ foo ┆ bar ┆ val │
|
49
|
+
# # │ --- ┆ --- ┆ --- │
|
50
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
51
|
+
# # ╞═════╪═════╪══════╡
|
52
|
+
# # │ 1 ┆ 3 ┆ null │
|
53
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
54
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
55
|
+
# # └─────┴─────┴──────┘
|
56
|
+
#
|
57
|
+
# @example Pass multiple predicates, each of which must be met:
|
58
|
+
# df.with_columns(
|
59
|
+
# val: Polars.when(
|
60
|
+
# Polars.col("bar") > 0,
|
61
|
+
# Polars.col("foo") % 2 != 0
|
62
|
+
# )
|
63
|
+
# .then(99)
|
64
|
+
# .otherwise(-1)
|
65
|
+
# )
|
66
|
+
# # =>
|
67
|
+
# # shape: (3, 3)
|
68
|
+
# # ┌─────┬─────┬─────┐
|
69
|
+
# # │ foo ┆ bar ┆ val │
|
70
|
+
# # │ --- ┆ --- ┆ --- │
|
71
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
72
|
+
# # ╞═════╪═════╪═════╡
|
73
|
+
# # │ 1 ┆ 3 ┆ 99 │
|
74
|
+
# # │ 3 ┆ 4 ┆ 99 │
|
75
|
+
# # │ 4 ┆ 0 ┆ -1 │
|
76
|
+
# # └─────┴─────┴─────┘
|
77
|
+
#
|
78
|
+
# @example Pass conditions as keyword arguments:
|
79
|
+
# df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
|
80
|
+
# # =>
|
81
|
+
# # shape: (3, 3)
|
82
|
+
# # ┌─────┬─────┬─────┐
|
83
|
+
# # │ foo ┆ bar ┆ val │
|
84
|
+
# # │ --- ┆ --- ┆ --- │
|
85
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
86
|
+
# # ╞═════╪═════╪═════╡
|
87
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
88
|
+
# # │ 3 ┆ 4 ┆ -1 │
|
89
|
+
# # │ 4 ┆ 0 ┆ 99 │
|
90
|
+
# # └─────┴─────┴─────┘
|
91
|
+
def when(*predicates, **constraints)
|
92
|
+
condition = Utils.parse_when_inputs(*predicates, **constraints)
|
93
|
+
When.new(Plr.when(condition))
|
25
94
|
end
|
26
95
|
end
|
27
96
|
end
|
data/lib/polars/io.rb
CHANGED
@@ -80,6 +80,8 @@ module Polars
|
|
80
80
|
# allocation needed.
|
81
81
|
# @param eol_char [String]
|
82
82
|
# Single byte end of line character.
|
83
|
+
# @param truncate_ragged_lines [Boolean]
|
84
|
+
# Truncate lines that are longer than the schema.
|
83
85
|
#
|
84
86
|
# @return [DataFrame]
|
85
87
|
#
|
@@ -113,7 +115,8 @@ module Polars
|
|
113
115
|
row_count_name: nil,
|
114
116
|
row_count_offset: 0,
|
115
117
|
sample_size: 1024,
|
116
|
-
eol_char: "\n"
|
118
|
+
eol_char: "\n",
|
119
|
+
truncate_ragged_lines: false
|
117
120
|
)
|
118
121
|
Utils._check_arg_is_1byte("sep", sep, false)
|
119
122
|
Utils._check_arg_is_1byte("comment_char", comment_char, false)
|
@@ -161,7 +164,8 @@ module Polars
|
|
161
164
|
row_count_name: row_count_name,
|
162
165
|
row_count_offset: row_count_offset,
|
163
166
|
sample_size: sample_size,
|
164
|
-
eol_char: eol_char
|
167
|
+
eol_char: eol_char,
|
168
|
+
truncate_ragged_lines: truncate_ragged_lines
|
165
169
|
)
|
166
170
|
end
|
167
171
|
|
@@ -239,6 +243,8 @@ module Polars
|
|
239
243
|
# the column remains of data type `:str`.
|
240
244
|
# @param eol_char [String]
|
241
245
|
# Single byte end of line character.
|
246
|
+
# @param truncate_ragged_lines [Boolean]
|
247
|
+
# Truncate lines that are longer than the schema.
|
242
248
|
#
|
243
249
|
# @return [LazyFrame]
|
244
250
|
def scan_csv(
|
@@ -262,7 +268,8 @@ module Polars
|
|
262
268
|
row_count_name: nil,
|
263
269
|
row_count_offset: 0,
|
264
270
|
parse_dates: false,
|
265
|
-
eol_char: "\n"
|
271
|
+
eol_char: "\n",
|
272
|
+
truncate_ragged_lines: false
|
266
273
|
)
|
267
274
|
Utils._check_arg_is_1byte("sep", sep, false)
|
268
275
|
Utils._check_arg_is_1byte("comment_char", comment_char, false)
|
@@ -294,6 +301,7 @@ module Polars
|
|
294
301
|
row_count_offset: row_count_offset,
|
295
302
|
parse_dates: parse_dates,
|
296
303
|
eol_char: eol_char,
|
304
|
+
truncate_ragged_lines: truncate_ragged_lines
|
297
305
|
)
|
298
306
|
end
|
299
307
|
|
@@ -520,7 +528,7 @@ module Polars
|
|
520
528
|
|
521
529
|
# Read into a DataFrame from a parquet file.
|
522
530
|
#
|
523
|
-
# @param source [
|
531
|
+
# @param source [String, Pathname, StringIO]
|
524
532
|
# Path to a file or a file-like object.
|
525
533
|
# @param columns [Object]
|
526
534
|
# Columns to select. Accepts a list of column indices (starting at zero) or a list
|
@@ -755,6 +763,8 @@ module Polars
|
|
755
763
|
# allocation needed.
|
756
764
|
# @param eol_char [String]
|
757
765
|
# Single byte end of line character.
|
766
|
+
# @param truncate_ragged_lines [Boolean]
|
767
|
+
# Truncate lines that are longer than the schema.
|
758
768
|
#
|
759
769
|
# @return [BatchedCsvReader]
|
760
770
|
#
|
@@ -787,7 +797,8 @@ module Polars
|
|
787
797
|
row_count_name: nil,
|
788
798
|
row_count_offset: 0,
|
789
799
|
sample_size: 1024,
|
790
|
-
eol_char: "\n"
|
800
|
+
eol_char: "\n",
|
801
|
+
truncate_ragged_lines: false
|
791
802
|
)
|
792
803
|
projection, columns = Utils.handle_projection_columns(columns)
|
793
804
|
|
@@ -827,7 +838,8 @@ module Polars
|
|
827
838
|
row_count_offset: row_count_offset,
|
828
839
|
sample_size: sample_size,
|
829
840
|
eol_char: eol_char,
|
830
|
-
new_columns: new_columns
|
841
|
+
new_columns: new_columns,
|
842
|
+
truncate_ragged_lines: truncate_ragged_lines
|
831
843
|
)
|
832
844
|
end
|
833
845
|
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -49,7 +49,8 @@ module Polars
|
|
49
49
|
row_count_name: nil,
|
50
50
|
row_count_offset: 0,
|
51
51
|
parse_dates: false,
|
52
|
-
eol_char: "\n"
|
52
|
+
eol_char: "\n",
|
53
|
+
truncate_ragged_lines: true
|
53
54
|
)
|
54
55
|
dtype_list = nil
|
55
56
|
if !dtypes.nil?
|
@@ -81,7 +82,8 @@ module Polars
|
|
81
82
|
encoding,
|
82
83
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
83
84
|
parse_dates,
|
84
|
-
eol_char
|
85
|
+
eol_char,
|
86
|
+
truncate_ragged_lines
|
85
87
|
)
|
86
88
|
)
|
87
89
|
end
|
@@ -103,6 +105,7 @@ module Polars
|
|
103
105
|
_from_rbldf(
|
104
106
|
RbLazyFrame.new_from_parquet(
|
105
107
|
file,
|
108
|
+
[],
|
106
109
|
n_rows,
|
107
110
|
cache,
|
108
111
|
parallel,
|
@@ -110,7 +113,8 @@ module Polars
|
|
110
113
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
111
114
|
low_memory,
|
112
115
|
use_statistics,
|
113
|
-
hive_partitioning
|
116
|
+
hive_partitioning,
|
117
|
+
nil
|
114
118
|
)
|
115
119
|
)
|
116
120
|
end
|
@@ -400,16 +404,16 @@ module Polars
|
|
400
404
|
# # │ 2 ┆ 7.0 ┆ b │
|
401
405
|
# # │ 1 ┆ 6.0 ┆ a │
|
402
406
|
# # └─────┴─────┴─────┘
|
403
|
-
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
407
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
404
408
|
if by.is_a?(::String)
|
405
|
-
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
409
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
|
406
410
|
end
|
407
411
|
if Utils.bool?(reverse)
|
408
412
|
reverse = [reverse]
|
409
413
|
end
|
410
414
|
|
411
415
|
by = Utils.selection_to_rbexpr_list(by)
|
412
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
416
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
|
413
417
|
end
|
414
418
|
|
415
419
|
# def profile
|
@@ -1523,12 +1527,13 @@ module Polars
|
|
1523
1527
|
# closed: "right"
|
1524
1528
|
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1525
1529
|
# # =>
|
1526
|
-
# # shape: (
|
1530
|
+
# # shape: (4, 4)
|
1527
1531
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
1528
1532
|
# # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
|
1529
1533
|
# # │ --- ┆ --- ┆ --- ┆ --- │
|
1530
1534
|
# # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
|
1531
1535
|
# # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
|
1536
|
+
# # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
|
1532
1537
|
# # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
|
1533
1538
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
1534
1539
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
@@ -1837,7 +1842,7 @@ module Polars
|
|
1837
1842
|
if how == "cross"
|
1838
1843
|
return _from_rbldf(
|
1839
1844
|
_ldf.join(
|
1840
|
-
other._ldf, [], [], allow_parallel, force_parallel, how, suffix
|
1845
|
+
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
1841
1846
|
)
|
1842
1847
|
)
|
1843
1848
|
end
|
@@ -1891,16 +1896,16 @@ module Polars
|
|
1891
1896
|
# ).collect
|
1892
1897
|
# # =>
|
1893
1898
|
# # shape: (4, 6)
|
1894
|
-
# #
|
1895
|
-
# # │ a ┆ b ┆ c ┆ a^2
|
1896
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1897
|
-
# # │ i64 ┆ f64 ┆ bool ┆
|
1898
|
-
# #
|
1899
|
-
# # │ 1 ┆ 0.5 ┆ true ┆ 1
|
1900
|
-
# # │ 2 ┆ 4.0 ┆ true ┆ 4
|
1901
|
-
# # │ 3 ┆ 10.0 ┆ false ┆ 9
|
1902
|
-
# # │ 4 ┆ 13.0 ┆ true ┆ 16
|
1903
|
-
# #
|
1899
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
1900
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
1901
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1902
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
1903
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
1904
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
1905
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
1906
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
1907
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
1908
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
1904
1909
|
def with_columns(*exprs, **named_exprs)
|
1905
1910
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1906
1911
|
rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
|
@@ -1965,26 +1970,26 @@ module Polars
|
|
1965
1970
|
# # ┌─────┬─────┬───────────┐
|
1966
1971
|
# # │ a ┆ b ┆ b_squared │
|
1967
1972
|
# # │ --- ┆ --- ┆ --- │
|
1968
|
-
# # │ i64 ┆ i64 ┆
|
1973
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
1969
1974
|
# # ╞═════╪═════╪═══════════╡
|
1970
|
-
# # │ 1 ┆ 2 ┆ 4
|
1971
|
-
# # │ 3 ┆ 4 ┆ 16
|
1972
|
-
# # │ 5 ┆ 6 ┆ 36
|
1975
|
+
# # │ 1 ┆ 2 ┆ 4 │
|
1976
|
+
# # │ 3 ┆ 4 ┆ 16 │
|
1977
|
+
# # │ 5 ┆ 6 ┆ 36 │
|
1973
1978
|
# # └─────┴─────┴───────────┘
|
1974
1979
|
#
|
1975
1980
|
# @example
|
1976
1981
|
# df.with_column(Polars.col("a") ** 2).collect
|
1977
1982
|
# # =>
|
1978
1983
|
# # shape: (3, 2)
|
1979
|
-
# #
|
1980
|
-
# # │ a
|
1981
|
-
# # │ ---
|
1982
|
-
# # │
|
1983
|
-
# #
|
1984
|
-
# # │ 1
|
1985
|
-
# # │ 9
|
1986
|
-
# # │ 25
|
1987
|
-
# #
|
1984
|
+
# # ┌─────┬─────┐
|
1985
|
+
# # │ a ┆ b │
|
1986
|
+
# # │ --- ┆ --- │
|
1987
|
+
# # │ i64 ┆ i64 │
|
1988
|
+
# # ╞═════╪═════╡
|
1989
|
+
# # │ 1 ┆ 2 │
|
1990
|
+
# # │ 9 ┆ 4 │
|
1991
|
+
# # │ 25 ┆ 6 │
|
1992
|
+
# # └─────┴─────┘
|
1988
1993
|
def with_column(column)
|
1989
1994
|
with_columns([column])
|
1990
1995
|
end
|
@@ -1996,11 +2001,9 @@ module Polars
|
|
1996
2001
|
# - List of column names.
|
1997
2002
|
#
|
1998
2003
|
# @return [LazyFrame]
|
1999
|
-
def drop(columns)
|
2000
|
-
|
2001
|
-
|
2002
|
-
end
|
2003
|
-
_from_rbldf(_ldf.drop(columns))
|
2004
|
+
def drop(*columns)
|
2005
|
+
drop_cols = Utils._expand_selectors(self, *columns)
|
2006
|
+
_from_rbldf(_ldf.drop(drop_cols))
|
2004
2007
|
end
|
2005
2008
|
|
2006
2009
|
# Rename column names.
|
data/lib/polars/list_expr.rb
CHANGED
@@ -365,6 +365,10 @@ module Polars
|
|
365
365
|
#
|
366
366
|
# @param index [Integer]
|
367
367
|
# Index to return per sublist
|
368
|
+
# @param null_on_oob [Boolean]
|
369
|
+
# Behavior if an index is out of bounds:
|
370
|
+
# true -> set as null
|
371
|
+
# false -> raise an error
|
368
372
|
#
|
369
373
|
# @return [Expr]
|
370
374
|
#
|
@@ -382,9 +386,9 @@ module Polars
|
|
382
386
|
# # │ null │
|
383
387
|
# # │ 1 │
|
384
388
|
# # └──────┘
|
385
|
-
def get(index)
|
389
|
+
def get(index, null_on_oob: true)
|
386
390
|
index = Utils.parse_as_expression(index)
|
387
|
-
Utils.wrap_expr(_rbexpr.list_get(index))
|
391
|
+
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
388
392
|
end
|
389
393
|
|
390
394
|
# Get the value by index in the sublists.
|
data/lib/polars/series.rb
CHANGED
@@ -1155,13 +1155,13 @@ module Polars
|
|
1155
1155
|
# s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
|
1156
1156
|
# # =>
|
1157
1157
|
# # shape: (5,)
|
1158
|
-
# # Series: 'values' [
|
1158
|
+
# # Series: 'values' [i64]
|
1159
1159
|
# # [
|
1160
|
-
# # 0
|
1161
|
-
# # -3
|
1162
|
-
# # -8
|
1163
|
-
# # -15
|
1164
|
-
# # -24
|
1160
|
+
# # 0
|
1161
|
+
# # -3
|
1162
|
+
# # -8
|
1163
|
+
# # -15
|
1164
|
+
# # -24
|
1165
1165
|
# # ]
|
1166
1166
|
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
1167
1167
|
super
|
@@ -1567,12 +1567,12 @@ module Polars
|
|
1567
1567
|
# # 2
|
1568
1568
|
# # 1
|
1569
1569
|
# # ]
|
1570
|
-
def sort(reverse: false, nulls_last: false, in_place: false)
|
1570
|
+
def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
|
1571
1571
|
if in_place
|
1572
|
-
self._s = _s.sort(reverse, nulls_last)
|
1572
|
+
self._s = _s.sort(reverse, nulls_last, multithreaded)
|
1573
1573
|
self
|
1574
1574
|
else
|
1575
|
-
Utils.wrap_s(_s.sort(reverse, nulls_last))
|
1575
|
+
Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
|
1576
1576
|
end
|
1577
1577
|
end
|
1578
1578
|
|
@@ -3646,6 +3646,8 @@ module Polars
|
|
3646
3646
|
# on the order that the values occur in the Series.
|
3647
3647
|
# @param reverse [Boolean]
|
3648
3648
|
# Reverse the operation.
|
3649
|
+
# @param seed [Integer]
|
3650
|
+
# If `method: "random"`, use this as seed.
|
3649
3651
|
#
|
3650
3652
|
# @return [Series]
|
3651
3653
|
#
|
@@ -3676,7 +3678,7 @@ module Polars
|
|
3676
3678
|
# # 2
|
3677
3679
|
# # 5
|
3678
3680
|
# # ]
|
3679
|
-
def rank(method: "average", reverse: false)
|
3681
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3680
3682
|
super
|
3681
3683
|
end
|
3682
3684
|
|
data/lib/polars/string_expr.rb
CHANGED
data/lib/polars/utils.rb
CHANGED
@@ -364,5 +364,59 @@ module Polars
|
|
364
364
|
end
|
365
365
|
end
|
366
366
|
end
|
367
|
+
|
368
|
+
def self._expand_selectors(frame, *items)
|
369
|
+
items_iter = _parse_inputs_as_iterable(items)
|
370
|
+
|
371
|
+
expanded = []
|
372
|
+
items_iter.each do |item|
|
373
|
+
if is_selector(item)
|
374
|
+
selector_cols = expand_selector(frame, item)
|
375
|
+
expanded.concat(selector_cols)
|
376
|
+
else
|
377
|
+
expanded << item
|
378
|
+
end
|
379
|
+
end
|
380
|
+
expanded
|
381
|
+
end
|
382
|
+
|
383
|
+
# TODO
|
384
|
+
def self.is_selector(obj)
|
385
|
+
false
|
386
|
+
end
|
387
|
+
|
388
|
+
def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
|
389
|
+
all_predicates = _parse_positional_inputs(predicates)
|
390
|
+
|
391
|
+
if constraints.any?
|
392
|
+
constraint_predicates = _parse_constraints(constraints)
|
393
|
+
all_predicates.concat(constraint_predicates)
|
394
|
+
end
|
395
|
+
|
396
|
+
_combine_predicates(all_predicates)
|
397
|
+
end
|
398
|
+
|
399
|
+
def self._parse_constraints(constraints)
|
400
|
+
constraints.map do |name, value|
|
401
|
+
Polars.col(name).eq(value)._rbexpr
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
def self._combine_predicates(predicates)
|
406
|
+
if !predicates.any?
|
407
|
+
msg = "at least one predicate or constraint must be provided"
|
408
|
+
raise TypeError, msg
|
409
|
+
end
|
410
|
+
|
411
|
+
if predicates.length == 1
|
412
|
+
return predicates[0]
|
413
|
+
end
|
414
|
+
|
415
|
+
Plr.all_horizontal(predicates)
|
416
|
+
end
|
417
|
+
|
418
|
+
def self.parse_when_inputs(*predicates, **constraints)
|
419
|
+
parse_predicates_constraints_as_expression(*predicates, **constraints)
|
420
|
+
end
|
367
421
|
end
|
368
422
|
end
|
data/lib/polars/version.rb
CHANGED