polars-df 0.13.0-x64-mingw-ucrt
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Repeat a single value n times.
|
4
|
+
#
|
5
|
+
# @param value [Object]
|
6
|
+
# Value to repeat.
|
7
|
+
# @param n [Integer]
|
8
|
+
# Repeat `n` times.
|
9
|
+
# @param eager [Boolean]
|
10
|
+
# Run eagerly and collect into a `Series`.
|
11
|
+
# @param name [String]
|
12
|
+
# Only used in `eager` mode. As expression, use `alias`.
|
13
|
+
#
|
14
|
+
# @return [Object]
|
15
|
+
#
|
16
|
+
# @example Construct a column with a repeated value in a lazy context.
|
17
|
+
# Polars.select(Polars.repeat("z", 3)).to_series
|
18
|
+
# # =>
|
19
|
+
# # shape: (3,)
|
20
|
+
# # Series: 'repeat' [str]
|
21
|
+
# # [
|
22
|
+
# # "z"
|
23
|
+
# # "z"
|
24
|
+
# # "z"
|
25
|
+
# # ]
|
26
|
+
#
|
27
|
+
# @example Generate a Series directly by setting `eager: true`.
|
28
|
+
# Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
|
29
|
+
# # =>
|
30
|
+
# # shape: (3,)
|
31
|
+
# # Series: 'repeat' [i8]
|
32
|
+
# # [
|
33
|
+
# # 3
|
34
|
+
# # 3
|
35
|
+
# # 3
|
36
|
+
# # ]
|
37
|
+
def repeat(value, n, dtype: nil, eager: false, name: nil)
|
38
|
+
if !name.nil?
|
39
|
+
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
40
|
+
end
|
41
|
+
|
42
|
+
if n.is_a?(Integer)
|
43
|
+
n = lit(n)
|
44
|
+
end
|
45
|
+
|
46
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
47
|
+
expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
|
48
|
+
if !name.nil?
|
49
|
+
expr = expr.alias(name)
|
50
|
+
end
|
51
|
+
if eager
|
52
|
+
return select(expr).to_series
|
53
|
+
end
|
54
|
+
expr
|
55
|
+
end
|
56
|
+
|
57
|
+
# Construct a column of length `n` filled with ones.
|
58
|
+
#
|
59
|
+
# This is syntactic sugar for the `repeat` function.
|
60
|
+
#
|
61
|
+
# @param n [Integer]
|
62
|
+
# Length of the resulting column.
|
63
|
+
# @param dtype [Object]
|
64
|
+
# Data type of the resulting column. Defaults to Float64.
|
65
|
+
# @param eager [Boolean]
|
66
|
+
# Evaluate immediately and return a `Series`. If set to `false`,
|
67
|
+
# return an expression instead.
|
68
|
+
#
|
69
|
+
# @return [Object]
|
70
|
+
#
|
71
|
+
# @example
|
72
|
+
# Polars.ones(3, dtype: Polars::Int8, eager: true)
|
73
|
+
# # =>
|
74
|
+
# # shape: (3,)
|
75
|
+
# # Series: 'ones' [i8]
|
76
|
+
# # [
|
77
|
+
# # 1
|
78
|
+
# # 1
|
79
|
+
# # 1
|
80
|
+
# # ]
|
81
|
+
def ones(n, dtype: nil, eager: true)
|
82
|
+
if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
|
83
|
+
msg = "invalid dtype for `ones`; found #{dtype}"
|
84
|
+
raise TypeError, msg
|
85
|
+
end
|
86
|
+
|
87
|
+
repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
|
88
|
+
end
|
89
|
+
|
90
|
+
# Construct a column of length `n` filled with zeros.
|
91
|
+
#
|
92
|
+
# This is syntactic sugar for the `repeat` function.
|
93
|
+
#
|
94
|
+
# @param n [Integer]
|
95
|
+
# Length of the resulting column.
|
96
|
+
# @param dtype [Object]
|
97
|
+
# Data type of the resulting column. Defaults to Float64.
|
98
|
+
# @param eager [Boolean]
|
99
|
+
# Evaluate immediately and return a `Series`. If set to `false`,
|
100
|
+
# return an expression instead.
|
101
|
+
#
|
102
|
+
# @return [Object]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# Polars.zeros(3, dtype: Polars::Int8, eager: true)
|
106
|
+
# # =>
|
107
|
+
# # shape: (3,)
|
108
|
+
# # Series: 'zeros' [i8]
|
109
|
+
# # [
|
110
|
+
# # 0
|
111
|
+
# # 0
|
112
|
+
# # 0
|
113
|
+
# # ]
|
114
|
+
def zeros(n, dtype: nil, eager: true)
|
115
|
+
if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
|
116
|
+
msg = "invalid dtype for `zeros`; found #{dtype}"
|
117
|
+
raise TypeError, msg
|
118
|
+
end
|
119
|
+
|
120
|
+
repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def _one_or_zero_by_dtype(value, dtype)
|
126
|
+
if dtype.integer?
|
127
|
+
value
|
128
|
+
elsif dtype.float?
|
129
|
+
value.to_f
|
130
|
+
elsif dtype == Boolean
|
131
|
+
value != 0
|
132
|
+
elsif dtype == Utf8
|
133
|
+
value.to_s
|
134
|
+
elsif dtype == Decimal
|
135
|
+
Decimal(value.to_s)
|
136
|
+
elsif [List, Array].include?(dtype)
|
137
|
+
arr_width = dtype.respond_to?(:width) ? dtype.width : 1
|
138
|
+
[_one_or_zero_by_dtype(value, dtype.inner)] * arr_width
|
139
|
+
else
|
140
|
+
nil
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Start a "when, then, otherwise" expression.
|
4
|
+
#
|
5
|
+
# @return [When]
|
6
|
+
#
|
7
|
+
# @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
|
8
|
+
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
9
|
+
# df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
10
|
+
# # =>
|
11
|
+
# # shape: (3, 3)
|
12
|
+
# # ┌─────┬─────┬─────────┐
|
13
|
+
# # │ foo ┆ bar ┆ literal │
|
14
|
+
# # │ --- ┆ --- ┆ --- │
|
15
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
16
|
+
# # ╞═════╪═════╪═════════╡
|
17
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
18
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
19
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
20
|
+
# # └─────┴─────┴─────────┘
|
21
|
+
#
|
22
|
+
# @example Or with multiple when-then operations chained:
|
23
|
+
# df.with_columns(
|
24
|
+
# Polars.when(Polars.col("foo") > 2)
|
25
|
+
# .then(1)
|
26
|
+
# .when(Polars.col("bar") > 2)
|
27
|
+
# .then(4)
|
28
|
+
# .otherwise(-1)
|
29
|
+
# .alias("val")
|
30
|
+
# )
|
31
|
+
# # =>
|
32
|
+
# # shape: (3, 3)
|
33
|
+
# # ┌─────┬─────┬─────┐
|
34
|
+
# # │ foo ┆ bar ┆ val │
|
35
|
+
# # │ --- ┆ --- ┆ --- │
|
36
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
37
|
+
# # ╞═════╪═════╪═════╡
|
38
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
39
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
40
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
41
|
+
# # └─────┴─────┴─────┘
|
42
|
+
#
|
43
|
+
# @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
|
44
|
+
# df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
|
45
|
+
# # =>
|
46
|
+
# # shape: (3, 3)
|
47
|
+
# # ┌─────┬─────┬──────┐
|
48
|
+
# # │ foo ┆ bar ┆ val │
|
49
|
+
# # │ --- ┆ --- ┆ --- │
|
50
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
51
|
+
# # ╞═════╪═════╪══════╡
|
52
|
+
# # │ 1 ┆ 3 ┆ null │
|
53
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
54
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
55
|
+
# # └─────┴─────┴──────┘
|
56
|
+
#
|
57
|
+
# @example Pass multiple predicates, each of which must be met:
|
58
|
+
# df.with_columns(
|
59
|
+
# val: Polars.when(
|
60
|
+
# Polars.col("bar") > 0,
|
61
|
+
# Polars.col("foo") % 2 != 0
|
62
|
+
# )
|
63
|
+
# .then(99)
|
64
|
+
# .otherwise(-1)
|
65
|
+
# )
|
66
|
+
# # =>
|
67
|
+
# # shape: (3, 3)
|
68
|
+
# # ┌─────┬─────┬─────┐
|
69
|
+
# # │ foo ┆ bar ┆ val │
|
70
|
+
# # │ --- ┆ --- ┆ --- │
|
71
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
72
|
+
# # ╞═════╪═════╪═════╡
|
73
|
+
# # │ 1 ┆ 3 ┆ 99 │
|
74
|
+
# # │ 3 ┆ 4 ┆ 99 │
|
75
|
+
# # │ 4 ┆ 0 ┆ -1 │
|
76
|
+
# # └─────┴─────┴─────┘
|
77
|
+
#
|
78
|
+
# @example Pass conditions as keyword arguments:
|
79
|
+
# df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
|
80
|
+
# # =>
|
81
|
+
# # shape: (3, 3)
|
82
|
+
# # ┌─────┬─────┬─────┐
|
83
|
+
# # │ foo ┆ bar ┆ val │
|
84
|
+
# # │ --- ┆ --- ┆ --- │
|
85
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
86
|
+
# # ╞═════╪═════╪═════╡
|
87
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
88
|
+
# # │ 3 ┆ 4 ┆ -1 │
|
89
|
+
# # │ 4 ┆ 0 ┆ 99 │
|
90
|
+
# # └─────┴─────┴─────┘
|
91
|
+
def when(*predicates, **constraints)
|
92
|
+
condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
93
|
+
When.new(Plr.when(condition))
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Convert categorical variables into dummy/indicator variables.
|
4
|
+
#
|
5
|
+
# @param df [DataFrame]
|
6
|
+
# DataFrame to convert.
|
7
|
+
# @param columns [Array, nil]
|
8
|
+
# A subset of columns to convert to dummy variables. `nil` means
|
9
|
+
# "all columns".
|
10
|
+
#
|
11
|
+
# @return [DataFrame]
|
12
|
+
def get_dummies(df, columns: nil)
|
13
|
+
df.to_dummies(columns: columns)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Aggregate to list.
|
17
|
+
#
|
18
|
+
# @return [Expr]
|
19
|
+
def to_list(name)
|
20
|
+
col(name).list
|
21
|
+
end
|
22
|
+
|
23
|
+
# Compute the spearman rank correlation between two columns.
|
24
|
+
#
|
25
|
+
# Missing data will be excluded from the computation.
|
26
|
+
#
|
27
|
+
# @param a [Object]
|
28
|
+
# Column name or Expression.
|
29
|
+
# @param b [Object]
|
30
|
+
# Column name or Expression.
|
31
|
+
# @param ddof [Integer]
|
32
|
+
# Delta degrees of freedom
|
33
|
+
# @param propagate_nans [Boolean]
|
34
|
+
# If `True` any `NaN` encountered will lead to `NaN` in the output.
|
35
|
+
# Defaults to `False` where `NaN` are regarded as larger than any finite number
|
36
|
+
# and thus lead to the highest rank.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
39
|
+
def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
|
40
|
+
corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Compute the pearson's correlation between two columns.
|
44
|
+
#
|
45
|
+
# @param a [Object]
|
46
|
+
# Column name or Expression.
|
47
|
+
# @param b [Object]
|
48
|
+
# Column name or Expression.
|
49
|
+
# @param ddof [Integer]
|
50
|
+
# Delta degrees of freedom
|
51
|
+
#
|
52
|
+
# @return [Expr]
|
53
|
+
def pearson_corr(a, b, ddof: 1)
|
54
|
+
corr(a, b, method: "pearson", ddof: ddof)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|