polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
data/lib/polars/functions/lit.rb
CHANGED
|
@@ -3,7 +3,23 @@ module Polars
|
|
|
3
3
|
# Return an expression representing a literal value.
|
|
4
4
|
#
|
|
5
5
|
# @return [Expr]
|
|
6
|
-
|
|
6
|
+
#
|
|
7
|
+
# @example Literal scalar values:
|
|
8
|
+
# Polars.lit(1)
|
|
9
|
+
# Polars.lit(5.5)
|
|
10
|
+
# Polars.lit(nil)
|
|
11
|
+
# Polars.lit("foo_bar")
|
|
12
|
+
# Polars.lit(Date.new(2021, 1, 20))
|
|
13
|
+
# Polars.lit(DateTime.new(2023, 3, 31, 10, 30, 45))
|
|
14
|
+
#
|
|
15
|
+
# @example Literal list/Series data (1D):
|
|
16
|
+
# Polars.lit([1, 2, 3])
|
|
17
|
+
# Polars.lit(Polars::Series.new("x", [1, 2, 3]))
|
|
18
|
+
#
|
|
19
|
+
# @example Literal list/Series data (2D):
|
|
20
|
+
# Polars.lit([[1, 2], [3, 4]])
|
|
21
|
+
# Polars.lit(Polars::Series.new("y", [[1, 2], [3, 4]]))
|
|
22
|
+
def lit(value, dtype: nil, allow_object: false)
|
|
7
23
|
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
|
8
24
|
time_unit = dtype&.time_unit || "ns"
|
|
9
25
|
time_zone = dtype.&time_zone
|
|
@@ -12,7 +12,7 @@ module Polars
|
|
|
12
12
|
# @param step [Integer]
|
|
13
13
|
# Step size of the range.
|
|
14
14
|
# @param eager [Boolean]
|
|
15
|
-
# If eager evaluation is `
|
|
15
|
+
# If eager evaluation is `true`, a Series is returned instead of an Expr.
|
|
16
16
|
# @param dtype [Symbol]
|
|
17
17
|
# Apply an explicit integer dtype to the resulting expression (default is `Int64`).
|
|
18
18
|
#
|
|
@@ -28,7 +28,7 @@ module Polars
|
|
|
28
28
|
# # 1
|
|
29
29
|
# # 2
|
|
30
30
|
# # ]
|
|
31
|
-
def int_range(start, stop = nil, step: 1, eager: false, dtype:
|
|
31
|
+
def int_range(start = 0, stop = nil, step: 1, eager: false, dtype: Int64)
|
|
32
32
|
if stop.nil?
|
|
33
33
|
stop = start
|
|
34
34
|
start = 0
|
|
@@ -47,5 +47,77 @@ module Polars
|
|
|
47
47
|
result
|
|
48
48
|
end
|
|
49
49
|
alias_method :arange, :int_range
|
|
50
|
+
|
|
51
|
+
# Generate a range of integers for each row of the input columns.
|
|
52
|
+
#
|
|
53
|
+
# @param start [Integer, Expr, Series]
|
|
54
|
+
# Start of the range (inclusive). Defaults to 0.
|
|
55
|
+
# @param stop [Integer, Expr, Series]
|
|
56
|
+
# End of the range (exclusive). If set to `nil` (default),
|
|
57
|
+
# the value of `start` is used and `start` is set to `0`.
|
|
58
|
+
# @param step [Integer]
|
|
59
|
+
# Step size of the range.
|
|
60
|
+
# @param dtype [Object]
|
|
61
|
+
# Integer data type of the ranges. Defaults to `Int64`.
|
|
62
|
+
# @param eager [Boolean]
|
|
63
|
+
# Evaluate immediately and return a `Series`.
|
|
64
|
+
# If set to `false` (default), return an expression instead.
|
|
65
|
+
#
|
|
66
|
+
# @return [Expr, Series]
|
|
67
|
+
#
|
|
68
|
+
# @example
|
|
69
|
+
# df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2]})
|
|
70
|
+
# df.with_columns(int_range: Polars.int_ranges("start", "end"))
|
|
71
|
+
# # =>
|
|
72
|
+
# # shape: (2, 3)
|
|
73
|
+
# # ┌───────┬─────┬────────────┐
|
|
74
|
+
# # │ start ┆ end ┆ int_range │
|
|
75
|
+
# # │ --- ┆ --- ┆ --- │
|
|
76
|
+
# # │ i64 ┆ i64 ┆ list[i64] │
|
|
77
|
+
# # ╞═══════╪═════╪════════════╡
|
|
78
|
+
# # │ 1 ┆ 3 ┆ [1, 2] │
|
|
79
|
+
# # │ -1 ┆ 2 ┆ [-1, 0, 1] │
|
|
80
|
+
# # └───────┴─────┴────────────┘
|
|
81
|
+
#
|
|
82
|
+
# @example `end` can be omitted for a shorter syntax.
|
|
83
|
+
# df.select("end", int_range: Polars.int_ranges("end"))
|
|
84
|
+
# # =>
|
|
85
|
+
# # shape: (2, 2)
|
|
86
|
+
# # ┌─────┬───────────┐
|
|
87
|
+
# # │ end ┆ int_range │
|
|
88
|
+
# # │ --- ┆ --- │
|
|
89
|
+
# # │ i64 ┆ list[i64] │
|
|
90
|
+
# # ╞═════╪═══════════╡
|
|
91
|
+
# # │ 3 ┆ [0, 1, 2] │
|
|
92
|
+
# # │ 2 ┆ [0, 1] │
|
|
93
|
+
# # └─────┴───────────┘
|
|
94
|
+
def int_ranges(
|
|
95
|
+
start = 0,
|
|
96
|
+
stop = nil,
|
|
97
|
+
step: 1,
|
|
98
|
+
dtype: Int64,
|
|
99
|
+
eager: false
|
|
100
|
+
)
|
|
101
|
+
if stop.nil?
|
|
102
|
+
stop = start
|
|
103
|
+
start = 0
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
dtype_expr = Utils.parse_into_datatype_expr(dtype)
|
|
107
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
|
108
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
|
109
|
+
step_rbexpr = Utils.parse_into_expression(step)
|
|
110
|
+
result = Utils.wrap_expr(
|
|
111
|
+
Plr.int_ranges(
|
|
112
|
+
start_rbexpr, end_rbexpr, step_rbexpr, dtype_expr._rbdatatype_expr
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if eager
|
|
117
|
+
return F.select(result).to_series
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
result
|
|
121
|
+
end
|
|
50
122
|
end
|
|
51
123
|
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module Polars
|
|
2
|
+
module Functions
|
|
3
|
+
# Generate a sequence of evenly-spaced values for each row between `start` and `end`.
|
|
4
|
+
#
|
|
5
|
+
# The number of values in each sequence is determined by `num_samples`.
|
|
6
|
+
#
|
|
7
|
+
# @param start [Object]
|
|
8
|
+
# Lower bound of the range.
|
|
9
|
+
# @param stop [Object]
|
|
10
|
+
# Upper bound of the range.
|
|
11
|
+
# @param num_samples [Integer]
|
|
12
|
+
# Number of samples in the output sequence.
|
|
13
|
+
# @param closed ['both', 'left', 'right', 'none']
|
|
14
|
+
# Define which sides of the interval are closed (inclusive).
|
|
15
|
+
# @param as_array [Boolean]
|
|
16
|
+
# Return result as a fixed-length `Array`. `num_samples` must be a constant.
|
|
17
|
+
# @param eager [Boolean]
|
|
18
|
+
# Evaluate immediately and return a `Series`.
|
|
19
|
+
# If set to `false` (default), return an expression instead.
|
|
20
|
+
#
|
|
21
|
+
# @return [Expr, Series]
|
|
22
|
+
#
|
|
23
|
+
# @note
|
|
24
|
+
# This functionality is experimental. It may be changed at any point without it
|
|
25
|
+
# being considered a breaking change.
|
|
26
|
+
#
|
|
27
|
+
# @example
|
|
28
|
+
# df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2], "num_samples" => [4, 5]})
|
|
29
|
+
# df.with_columns(ls: Polars.linear_spaces("start", "end", "num_samples"))
|
|
30
|
+
# # =>
|
|
31
|
+
# # shape: (2, 4)
|
|
32
|
+
# # ┌───────┬─────┬─────────────┬────────────────────────┐
|
|
33
|
+
# # │ start ┆ end ┆ num_samples ┆ ls │
|
|
34
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
35
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ list[f64] │
|
|
36
|
+
# # ╞═══════╪═════╪═════════════╪════════════════════════╡
|
|
37
|
+
# # │ 1 ┆ 3 ┆ 4 ┆ [1.0, 1.666667, … 3.0] │
|
|
38
|
+
# # │ -1 ┆ 2 ┆ 5 ┆ [-1.0, -0.25, … 2.0] │
|
|
39
|
+
# # └───────┴─────┴─────────────┴────────────────────────┘
|
|
40
|
+
#
|
|
41
|
+
# @example
|
|
42
|
+
# df.with_columns(ls: Polars.linear_spaces("start", "end", 3, as_array: true))
|
|
43
|
+
# # =>
|
|
44
|
+
# # shape: (2, 4)
|
|
45
|
+
# # ┌───────┬─────┬─────────────┬──────────────────┐
|
|
46
|
+
# # │ start ┆ end ┆ num_samples ┆ ls │
|
|
47
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
|
48
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ array[f64, 3] │
|
|
49
|
+
# # ╞═══════╪═════╪═════════════╪══════════════════╡
|
|
50
|
+
# # │ 1 ┆ 3 ┆ 4 ┆ [1.0, 2.0, 3.0] │
|
|
51
|
+
# # │ -1 ┆ 2 ┆ 5 ┆ [-1.0, 0.5, 2.0] │
|
|
52
|
+
# # └───────┴─────┴─────────────┴──────────────────┘
|
|
53
|
+
def linear_spaces(
|
|
54
|
+
start,
|
|
55
|
+
stop,
|
|
56
|
+
num_samples,
|
|
57
|
+
closed: "both",
|
|
58
|
+
as_array: false,
|
|
59
|
+
eager: false
|
|
60
|
+
)
|
|
61
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
|
62
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
|
63
|
+
num_samples_rbexpr = Utils.parse_into_expression(num_samples)
|
|
64
|
+
result = Utils.wrap_expr(
|
|
65
|
+
Plr.linear_spaces(
|
|
66
|
+
start_rbexpr, end_rbexpr, num_samples_rbexpr, closed, as_array
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if eager
|
|
71
|
+
return F.select(result).to_series
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
result
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -12,7 +12,7 @@ module Polars
|
|
|
12
12
|
# Define which sides of the range are closed (inclusive).
|
|
13
13
|
# @param eager [Boolean]
|
|
14
14
|
# Evaluate immediately and return a `Series`.
|
|
15
|
-
# If set to `
|
|
15
|
+
# If set to `false` (default), return an expression instead.
|
|
16
16
|
#
|
|
17
17
|
# @return [Object]
|
|
18
18
|
#
|
|
@@ -12,8 +12,6 @@ module Polars
|
|
|
12
12
|
# Int64 is required to fit the given value. Defaults to Float64 for float values.
|
|
13
13
|
# @param eager [Boolean]
|
|
14
14
|
# Run eagerly and collect into a `Series`.
|
|
15
|
-
# @param name [String]
|
|
16
|
-
# Only used in `eager` mode. As expression, use `alias`.
|
|
17
15
|
#
|
|
18
16
|
# @return [Object]
|
|
19
17
|
#
|
|
@@ -38,20 +36,13 @@ module Polars
|
|
|
38
36
|
# # 3
|
|
39
37
|
# # 3
|
|
40
38
|
# # ]
|
|
41
|
-
def repeat(value, n, dtype: nil, eager: false
|
|
42
|
-
if !name.nil?
|
|
43
|
-
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
|
44
|
-
end
|
|
45
|
-
|
|
39
|
+
def repeat(value, n, dtype: nil, eager: false)
|
|
46
40
|
if n.is_a?(Integer)
|
|
47
41
|
n = lit(n)
|
|
48
42
|
end
|
|
49
43
|
|
|
50
44
|
value = Utils.parse_into_expression(value, str_as_lit: true)
|
|
51
45
|
expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
|
|
52
|
-
if !name.nil?
|
|
53
|
-
expr = expr.alias(name)
|
|
54
|
-
end
|
|
55
46
|
if eager
|
|
56
47
|
return select(expr).to_series
|
|
57
48
|
end
|
|
@@ -82,7 +73,7 @@ module Polars
|
|
|
82
73
|
# # 1
|
|
83
74
|
# # 1
|
|
84
75
|
# # ]
|
|
85
|
-
def ones(n, dtype:
|
|
76
|
+
def ones(n, dtype: Float64, eager: false)
|
|
86
77
|
if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
|
|
87
78
|
msg = "invalid dtype for `ones`; found #{dtype}"
|
|
88
79
|
raise TypeError, msg
|
|
@@ -115,7 +106,7 @@ module Polars
|
|
|
115
106
|
# # 0
|
|
116
107
|
# # 0
|
|
117
108
|
# # ]
|
|
118
|
-
def zeros(n, dtype:
|
|
109
|
+
def zeros(n, dtype: Float64, eager: false)
|
|
119
110
|
if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
|
|
120
111
|
msg = "invalid dtype for `zeros`; found #{dtype}"
|
|
121
112
|
raise TypeError, msg
|
|
@@ -6,7 +6,7 @@ module Polars
|
|
|
6
6
|
#
|
|
7
7
|
# @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
|
|
8
8
|
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
|
9
|
-
# df.
|
|
9
|
+
# df.with_columns(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
|
10
10
|
# # =>
|
|
11
11
|
# # shape: (3, 3)
|
|
12
12
|
# # ┌─────┬─────┬─────────┐
|
|
@@ -40,7 +40,7 @@ module Polars
|
|
|
40
40
|
# # │ 4 ┆ 0 ┆ 1 │
|
|
41
41
|
# # └─────┴─────┴─────┘
|
|
42
42
|
#
|
|
43
|
-
# @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to
|
|
43
|
+
# @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to true, are set to `null`:
|
|
44
44
|
# df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
|
|
45
45
|
# # =>
|
|
46
46
|
# # shape: (3, 3)
|
data/lib/polars/group_by.rb
CHANGED
|
@@ -2,9 +2,10 @@ module Polars
|
|
|
2
2
|
# Starts a new GroupBy operation.
|
|
3
3
|
class GroupBy
|
|
4
4
|
# @private
|
|
5
|
-
def initialize(df, by, maintain_order
|
|
5
|
+
def initialize(df, by, maintain_order:, **named_by)
|
|
6
6
|
@df = df
|
|
7
7
|
@by = by
|
|
8
|
+
@named_by = named_by
|
|
8
9
|
@maintain_order = maintain_order
|
|
9
10
|
end
|
|
10
11
|
|
|
@@ -39,9 +40,9 @@ module Polars
|
|
|
39
40
|
groups_df =
|
|
40
41
|
@df.lazy
|
|
41
42
|
.with_row_index(name: temp_col)
|
|
42
|
-
.group_by(@by, maintain_order: @maintain_order)
|
|
43
|
+
.group_by(@by, **@named_by, maintain_order: @maintain_order)
|
|
43
44
|
.agg(Polars.col(temp_col))
|
|
44
|
-
.collect(
|
|
45
|
+
.collect(optimizations: QueryOptFlags.none)
|
|
45
46
|
|
|
46
47
|
group_names = groups_df.select(Polars.all.exclude(temp_col))
|
|
47
48
|
|
|
@@ -202,9 +203,9 @@ module Polars
|
|
|
202
203
|
# # └─────┴───────┴────────────────┘
|
|
203
204
|
def agg(*aggs, **named_aggs)
|
|
204
205
|
@df.lazy
|
|
205
|
-
.group_by(@by, maintain_order: @maintain_order)
|
|
206
|
+
.group_by(@by, **@named_by, maintain_order: @maintain_order)
|
|
206
207
|
.agg(*aggs, **named_aggs)
|
|
207
|
-
.collect(
|
|
208
|
+
.collect(optimizations: QueryOptFlags.none)
|
|
208
209
|
end
|
|
209
210
|
|
|
210
211
|
# Get the first `n` rows of each group.
|
|
@@ -253,9 +254,9 @@ module Polars
|
|
|
253
254
|
# # └─────────┴─────┘
|
|
254
255
|
def head(n = 5)
|
|
255
256
|
@df.lazy
|
|
256
|
-
.group_by(@by, maintain_order: @maintain_order)
|
|
257
|
+
.group_by(@by, **@named_by, maintain_order: @maintain_order)
|
|
257
258
|
.head(n)
|
|
258
|
-
.collect(
|
|
259
|
+
.collect(optimizations: QueryOptFlags._eager)
|
|
259
260
|
end
|
|
260
261
|
|
|
261
262
|
# Get the last `n` rows of each group.
|
|
@@ -304,9 +305,71 @@ module Polars
|
|
|
304
305
|
# # └─────────┴─────┘
|
|
305
306
|
def tail(n = 5)
|
|
306
307
|
@df.lazy
|
|
307
|
-
.group_by(@by, maintain_order: @maintain_order)
|
|
308
|
+
.group_by(@by, **@named_by, maintain_order: @maintain_order)
|
|
308
309
|
.tail(n)
|
|
309
|
-
.collect(
|
|
310
|
+
.collect(optimizations: QueryOptFlags.none)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Aggregate the groups into Series.
|
|
314
|
+
#
|
|
315
|
+
# @return [DataFrame]
|
|
316
|
+
#
|
|
317
|
+
# @example
|
|
318
|
+
# df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
|
|
319
|
+
# df.group_by("a", maintain_order: true).all
|
|
320
|
+
# # =>
|
|
321
|
+
# # shape: (2, 2)
|
|
322
|
+
# # ┌─────┬───────────┐
|
|
323
|
+
# # │ a ┆ b │
|
|
324
|
+
# # │ --- ┆ --- │
|
|
325
|
+
# # │ str ┆ list[i64] │
|
|
326
|
+
# # ╞═════╪═══════════╡
|
|
327
|
+
# # │ one ┆ [1, 3] │
|
|
328
|
+
# # │ two ┆ [2, 4] │
|
|
329
|
+
# # └─────┴───────────┘
|
|
330
|
+
def all
|
|
331
|
+
agg(F.all)
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# Return the number of rows in each group.
|
|
335
|
+
#
|
|
336
|
+
# @param name [String]
|
|
337
|
+
# Assign a name to the resulting column; if unset, defaults to "len".
|
|
338
|
+
#
|
|
339
|
+
# @return [DataFrame]
|
|
340
|
+
#
|
|
341
|
+
# @example
|
|
342
|
+
# df = Polars::DataFrame.new({"a" => ["Apple", "Apple", "Orange"], "b" => [1, nil, 2]})
|
|
343
|
+
# df.group_by("a").len
|
|
344
|
+
# # =>
|
|
345
|
+
# # shape: (2, 2)
|
|
346
|
+
# # ┌────────┬─────┐
|
|
347
|
+
# # │ a ┆ len │
|
|
348
|
+
# # │ --- ┆ --- │
|
|
349
|
+
# # │ str ┆ u32 │
|
|
350
|
+
# # ╞════════╪═════╡
|
|
351
|
+
# # │ Apple ┆ 2 │
|
|
352
|
+
# # │ Orange ┆ 1 │
|
|
353
|
+
# # └────────┴─────┘
|
|
354
|
+
#
|
|
355
|
+
# @example
|
|
356
|
+
# df.group_by("a").len(name: "n")
|
|
357
|
+
# # =>
|
|
358
|
+
# # shape: (2, 2)
|
|
359
|
+
# # ┌────────┬─────┐
|
|
360
|
+
# # │ a ┆ n │
|
|
361
|
+
# # │ --- ┆ --- │
|
|
362
|
+
# # │ str ┆ u32 │
|
|
363
|
+
# # ╞════════╪═════╡
|
|
364
|
+
# # │ Apple ┆ 2 │
|
|
365
|
+
# # │ Orange ┆ 1 │
|
|
366
|
+
# # └────────┴─────┘
|
|
367
|
+
def len(name: nil)
|
|
368
|
+
len_expr = F.len
|
|
369
|
+
if !name.nil?
|
|
370
|
+
len_expr = len_expr.alias(name)
|
|
371
|
+
end
|
|
372
|
+
agg(len_expr)
|
|
310
373
|
end
|
|
311
374
|
|
|
312
375
|
# Aggregate the first values in the group.
|
|
@@ -598,16 +661,5 @@ module Polars
|
|
|
598
661
|
def median
|
|
599
662
|
agg(Polars.all.median)
|
|
600
663
|
end
|
|
601
|
-
|
|
602
|
-
# Plot data.
|
|
603
|
-
#
|
|
604
|
-
# @return [Vega::LiteChart]
|
|
605
|
-
def plot(*args, **options)
|
|
606
|
-
raise ArgumentError, "Multiple groups not supported" if @by.is_a?(::Array) && @by.size > 1
|
|
607
|
-
# same message as Ruby
|
|
608
|
-
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
|
609
|
-
|
|
610
|
-
@df.plot(*args, **options, group: @by)
|
|
611
|
-
end
|
|
612
664
|
end
|
|
613
665
|
end
|
|
@@ -12,11 +12,6 @@ module Polars
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def to_lazyframe
|
|
15
|
-
# for iceberg < 0.1.3
|
|
16
|
-
if !@source.respond_to?(:scan)
|
|
17
|
-
return @source.to_polars(snapshot_id: @snapshot_id, storage_options: @storage_options)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
15
|
scan = @source.scan(snapshot_id: @snapshot_id)
|
|
21
16
|
files = scan.plan_files
|
|
22
17
|
|
|
@@ -66,7 +61,7 @@ module Polars
|
|
|
66
61
|
scan_options = {
|
|
67
62
|
storage_options: @storage_options,
|
|
68
63
|
cast_options: Polars::ScanCastOptions._default_iceberg,
|
|
69
|
-
|
|
64
|
+
missing_columns: "insert",
|
|
70
65
|
extra_columns: "ignore",
|
|
71
66
|
_column_mapping: column_mapping,
|
|
72
67
|
_deletion_files: deletion_files
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Polars
|
|
2
|
+
# A placeholder for an in process query.
|
|
3
|
+
#
|
|
4
|
+
# This can be used to do something else while a query is running.
|
|
5
|
+
# The queries can be cancelled. You can peek if the query is finished,
|
|
6
|
+
# or you can await the result.
|
|
7
|
+
class InProcessQuery
|
|
8
|
+
# @private
|
|
9
|
+
attr_accessor :_inner
|
|
10
|
+
|
|
11
|
+
def initialize(ipq)
|
|
12
|
+
self._inner = ipq
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Cancel the query at earliest convenience.
|
|
16
|
+
def cancel
|
|
17
|
+
_inner.cancel
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Fetch the result.
|
|
21
|
+
#
|
|
22
|
+
# If it is ready, a materialized DataFrame is returned.
|
|
23
|
+
# If it is not ready it will return `nil`.
|
|
24
|
+
def fetch
|
|
25
|
+
if !(out = _inner.fetch).nil?
|
|
26
|
+
Utils.wrap_df(out)
|
|
27
|
+
else
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Await the result synchronously.
|
|
33
|
+
def fetch_blocking
|
|
34
|
+
Utils.wrap_df(_inner.fetch_blocking)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
module Polars
|
|
2
|
+
module IO
|
|
3
|
+
private
|
|
4
|
+
|
|
5
|
+
def _init_credential_provider_builder(
|
|
6
|
+
credential_provider,
|
|
7
|
+
source,
|
|
8
|
+
storage_options,
|
|
9
|
+
caller_name
|
|
10
|
+
)
|
|
11
|
+
if credential_provider && credential_provider != "auto"
|
|
12
|
+
raise Todo
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
nil
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|