polars-df 0.10.0-aarch64-linux → 0.12.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/LICENSE-THIRD-PARTY.txt +1125 -865
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +17 -4
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -6,11 +6,108 @@ module Polars
|
|
6
6
|
@lgb = lgb
|
7
7
|
end
|
8
8
|
|
9
|
-
#
|
9
|
+
# Compute aggregations for each group of a group by operation.
|
10
|
+
#
|
11
|
+
# @param aggs [Array]
|
12
|
+
# Aggregations to compute for each group of the group by operation,
|
13
|
+
# specified as positional arguments.
|
14
|
+
# Accepts expression input. Strings are parsed as column names.
|
15
|
+
# @param named_aggs [Hash]
|
16
|
+
# Additional aggregations, specified as keyword arguments.
|
17
|
+
# The resulting columns will be renamed to the keyword used.
|
10
18
|
#
|
11
19
|
# @return [LazyFrame]
|
12
|
-
|
13
|
-
|
20
|
+
#
|
21
|
+
# @example Compute the aggregation of the columns for each group.
|
22
|
+
# ldf = Polars::DataFrame.new(
|
23
|
+
# {
|
24
|
+
# "a" => ["a", "b", "a", "b", "c"],
|
25
|
+
# "b" => [1, 2, 1, 3, 3],
|
26
|
+
# "c" => [5, 4, 3, 2, 1]
|
27
|
+
# }
|
28
|
+
# ).lazy
|
29
|
+
# ldf.group_by("a").agg(
|
30
|
+
# [Polars.col("b"), Polars.col("c")]
|
31
|
+
# ).collect
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌─────┬───────────┬───────────┐
|
35
|
+
# # │ a ┆ b ┆ c │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ list[i64] ┆ list[i64] │
|
38
|
+
# # ╞═════╪═══════════╪═══════════╡
|
39
|
+
# # │ a ┆ [1, 1] ┆ [5, 3] │
|
40
|
+
# # │ b ┆ [2, 3] ┆ [4, 2] │
|
41
|
+
# # │ c ┆ [3] ┆ [1] │
|
42
|
+
# # └─────┴───────────┴───────────┘
|
43
|
+
#
|
44
|
+
# @example Compute the sum of a column for each group.
|
45
|
+
# ldf.group_by("a").agg(
|
46
|
+
# Polars.col("b").sum
|
47
|
+
# ).collect
|
48
|
+
# # =>
|
49
|
+
# # shape: (3, 2)
|
50
|
+
# # ┌─────┬─────┐
|
51
|
+
# # │ a ┆ b │
|
52
|
+
# # │ --- ┆ --- │
|
53
|
+
# # │ str ┆ i64 │
|
54
|
+
# # ╞═════╪═════╡
|
55
|
+
# # │ a ┆ 2 │
|
56
|
+
# # │ b ┆ 5 │
|
57
|
+
# # │ c ┆ 3 │
|
58
|
+
# # └─────┴─────┘
|
59
|
+
#
|
60
|
+
# @example Compute multiple aggregates at once by passing a list of expressions.
|
61
|
+
# ldf.group_by("a").agg(
|
62
|
+
# [Polars.sum("b"), Polars.mean("c")]
|
63
|
+
# ).collect
|
64
|
+
# # =>
|
65
|
+
# # shape: (3, 3)
|
66
|
+
# # ┌─────┬─────┬─────┐
|
67
|
+
# # │ a ┆ b ┆ c │
|
68
|
+
# # │ --- ┆ --- ┆ --- │
|
69
|
+
# # │ str ┆ i64 ┆ f64 │
|
70
|
+
# # ╞═════╪═════╪═════╡
|
71
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
72
|
+
# # │ a ┆ 2 ┆ 4.0 │
|
73
|
+
# # │ b ┆ 5 ┆ 3.0 │
|
74
|
+
# # └─────┴─────┴─────┘
|
75
|
+
#
|
76
|
+
# @example Or use positional arguments to compute multiple aggregations in the same way.
|
77
|
+
# ldf.group_by("a").agg(
|
78
|
+
# Polars.sum("b").name.suffix("_sum"),
|
79
|
+
# (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
|
80
|
+
# ).collect
|
81
|
+
# # =>
|
82
|
+
# # shape: (3, 3)
|
83
|
+
# # ┌─────┬───────┬────────────────┐
|
84
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
85
|
+
# # │ --- ┆ --- ┆ --- │
|
86
|
+
# # │ str ┆ i64 ┆ f64 │
|
87
|
+
# # ╞═════╪═══════╪════════════════╡
|
88
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
89
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
90
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
91
|
+
# # └─────┴───────┴────────────────┘
|
92
|
+
#
|
93
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
94
|
+
# ldf.group_by("a").agg(
|
95
|
+
# b_sum: Polars.sum("b"),
|
96
|
+
# c_mean_squared: (Polars.col("c") ** 2).mean
|
97
|
+
# ).collect
|
98
|
+
# # =>
|
99
|
+
# # shape: (3, 3)
|
100
|
+
# # ┌─────┬───────┬────────────────┐
|
101
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
102
|
+
# # │ --- ┆ --- ┆ --- │
|
103
|
+
# # │ str ┆ i64 ┆ f64 │
|
104
|
+
# # ╞═════╪═══════╪════════════════╡
|
105
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
106
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
107
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
108
|
+
# # └─────┴───────┴────────────────┘
|
109
|
+
def agg(*aggs, **named_aggs)
|
110
|
+
rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
|
14
111
|
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
15
112
|
end
|
16
113
|
|
data/lib/polars/list_expr.rb
CHANGED
@@ -146,7 +146,7 @@ module Polars
|
|
146
146
|
end
|
147
147
|
|
148
148
|
if !fraction.nil?
|
149
|
-
fraction = Utils.
|
149
|
+
fraction = Utils.parse_into_expression(fraction)
|
150
150
|
return Utils.wrap_expr(
|
151
151
|
_rbexpr.list_sample_fraction(
|
152
152
|
fraction, with_replacement, shuffle, seed
|
@@ -155,7 +155,7 @@ module Polars
|
|
155
155
|
end
|
156
156
|
|
157
157
|
n = 1 if n.nil?
|
158
|
-
n = Utils.
|
158
|
+
n = Utils.parse_into_expression(n)
|
159
159
|
Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
|
160
160
|
end
|
161
161
|
|
@@ -387,7 +387,7 @@ module Polars
|
|
387
387
|
# # │ 1 │
|
388
388
|
# # └──────┘
|
389
389
|
def get(index, null_on_oob: true)
|
390
|
-
index = Utils.
|
390
|
+
index = Utils.parse_into_expression(index)
|
391
391
|
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
392
392
|
end
|
393
393
|
|
@@ -431,7 +431,7 @@ module Polars
|
|
431
431
|
if index.is_a?(::Array)
|
432
432
|
index = Series.new(index)
|
433
433
|
end
|
434
|
-
index = Utils.
|
434
|
+
index = Utils.parse_into_expression(index, str_as_lit: false)
|
435
435
|
Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
|
436
436
|
end
|
437
437
|
alias_method :take, :gather
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ true │
|
503
503
|
# # └───────┘
|
504
504
|
def contains(item)
|
505
|
-
Utils.wrap_expr(_rbexpr.list_contains(Utils.
|
505
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
|
506
506
|
end
|
507
507
|
|
508
508
|
# Join all string items in a sublist and place a separator between them.
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ x y │
|
531
531
|
# # └───────┘
|
532
532
|
def join(separator, ignore_nulls: true)
|
533
|
-
separator = Utils.
|
533
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
|
535
535
|
end
|
536
536
|
|
@@ -625,7 +625,7 @@ module Polars
|
|
625
625
|
# # [null, 10, 2]
|
626
626
|
# # ]
|
627
627
|
def shift(n = 1)
|
628
|
-
n = Utils.
|
628
|
+
n = Utils.parse_into_expression(n)
|
629
629
|
Utils.wrap_expr(_rbexpr.list_shift(n))
|
630
630
|
end
|
631
631
|
|
@@ -650,8 +650,8 @@ module Polars
|
|
650
650
|
# # [2, 1]
|
651
651
|
# # ]
|
652
652
|
def slice(offset, length = nil)
|
653
|
-
offset = Utils.
|
654
|
-
length = Utils.
|
653
|
+
offset = Utils.parse_into_expression(offset, str_as_lit: false)
|
654
|
+
length = Utils.parse_into_expression(length, str_as_lit: false)
|
655
655
|
Utils.wrap_expr(_rbexpr.list_slice(offset, length))
|
656
656
|
end
|
657
657
|
|
@@ -694,7 +694,7 @@ module Polars
|
|
694
694
|
# # [2, 1]
|
695
695
|
# # ]
|
696
696
|
def tail(n = 5)
|
697
|
-
n = Utils.
|
697
|
+
n = Utils.parse_into_expression(n)
|
698
698
|
Utils.wrap_expr(_rbexpr.list_tail(n))
|
699
699
|
end
|
700
700
|
|
@@ -722,7 +722,7 @@ module Polars
|
|
722
722
|
# # │ 0 │
|
723
723
|
# # └────────────────┘
|
724
724
|
def count_matches(element)
|
725
|
-
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.
|
725
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
|
726
726
|
end
|
727
727
|
alias_method :count_match, :count_matches
|
728
728
|
|
@@ -197,9 +197,13 @@ module Polars
|
|
197
197
|
#
|
198
198
|
# @param index [Integer]
|
199
199
|
# Index to return per sublist
|
200
|
+
# @param null_on_oob [Boolean]
|
201
|
+
# Behavior if an index is out of bounds:
|
202
|
+
# true -> set as null
|
203
|
+
# false -> raise an error
|
200
204
|
#
|
201
205
|
# @return [Series]
|
202
|
-
def get(index)
|
206
|
+
def get(index, null_on_oob: false)
|
203
207
|
super
|
204
208
|
end
|
205
209
|
|
@@ -10,27 +10,25 @@ module Polars
|
|
10
10
|
period,
|
11
11
|
offset,
|
12
12
|
closed,
|
13
|
-
|
14
|
-
check_sorted
|
13
|
+
group_by
|
15
14
|
)
|
16
|
-
period = Utils.
|
17
|
-
offset = Utils.
|
15
|
+
period = Utils.parse_as_duration_string(period)
|
16
|
+
offset = Utils.parse_as_duration_string(offset)
|
18
17
|
|
19
18
|
@df = df
|
20
19
|
@time_column = index_column
|
21
20
|
@period = period
|
22
21
|
@offset = offset
|
23
22
|
@closed = closed
|
24
|
-
@
|
25
|
-
@check_sorted = check_sorted
|
23
|
+
@group_by = group_by
|
26
24
|
end
|
27
25
|
|
28
|
-
def agg(aggs)
|
26
|
+
def agg(*aggs, **named_aggs)
|
29
27
|
@df.lazy
|
30
28
|
.group_by_rolling(
|
31
|
-
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
|
32
30
|
)
|
33
|
-
.agg(aggs)
|
31
|
+
.agg(*aggs, **named_aggs)
|
34
32
|
.collect(no_optimization: true, string_cache: false)
|
35
33
|
end
|
36
34
|
end
|