polars-df 0.13.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Repeat a single value n times.
|
4
|
+
#
|
5
|
+
# @param value [Object]
|
6
|
+
# Value to repeat.
|
7
|
+
# @param n [Integer]
|
8
|
+
# Repeat `n` times.
|
9
|
+
# @param eager [Boolean]
|
10
|
+
# Run eagerly and collect into a `Series`.
|
11
|
+
# @param name [String]
|
12
|
+
# Only used in `eager` mode. As expression, use `alias`.
|
13
|
+
#
|
14
|
+
# @return [Object]
|
15
|
+
#
|
16
|
+
# @example Construct a column with a repeated value in a lazy context.
|
17
|
+
# Polars.select(Polars.repeat("z", 3)).to_series
|
18
|
+
# # =>
|
19
|
+
# # shape: (3,)
|
20
|
+
# # Series: 'repeat' [str]
|
21
|
+
# # [
|
22
|
+
# # "z"
|
23
|
+
# # "z"
|
24
|
+
# # "z"
|
25
|
+
# # ]
|
26
|
+
#
|
27
|
+
# @example Generate a Series directly by setting `eager: true`.
|
28
|
+
# Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
|
29
|
+
# # =>
|
30
|
+
# # shape: (3,)
|
31
|
+
# # Series: 'repeat' [i8]
|
32
|
+
# # [
|
33
|
+
# # 3
|
34
|
+
# # 3
|
35
|
+
# # 3
|
36
|
+
# # ]
|
37
|
+
def repeat(value, n, dtype: nil, eager: false, name: nil)
|
38
|
+
if !name.nil?
|
39
|
+
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
40
|
+
end
|
41
|
+
|
42
|
+
if n.is_a?(Integer)
|
43
|
+
n = lit(n)
|
44
|
+
end
|
45
|
+
|
46
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
47
|
+
expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
|
48
|
+
if !name.nil?
|
49
|
+
expr = expr.alias(name)
|
50
|
+
end
|
51
|
+
if eager
|
52
|
+
return select(expr).to_series
|
53
|
+
end
|
54
|
+
expr
|
55
|
+
end
|
56
|
+
|
57
|
+
# Construct a column of length `n` filled with ones.
|
58
|
+
#
|
59
|
+
# This is syntactic sugar for the `repeat` function.
|
60
|
+
#
|
61
|
+
# @param n [Integer]
|
62
|
+
# Length of the resulting column.
|
63
|
+
# @param dtype [Object]
|
64
|
+
# Data type of the resulting column. Defaults to Float64.
|
65
|
+
# @param eager [Boolean]
|
66
|
+
# Evaluate immediately and return a `Series`. If set to `false`,
|
67
|
+
# return an expression instead.
|
68
|
+
#
|
69
|
+
# @return [Object]
|
70
|
+
#
|
71
|
+
# @example
|
72
|
+
# Polars.ones(3, dtype: Polars::Int8, eager: true)
|
73
|
+
# # =>
|
74
|
+
# # shape: (3,)
|
75
|
+
# # Series: 'ones' [i8]
|
76
|
+
# # [
|
77
|
+
# # 1
|
78
|
+
# # 1
|
79
|
+
# # 1
|
80
|
+
# # ]
|
81
|
+
def ones(n, dtype: nil, eager: true)
|
82
|
+
if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
|
83
|
+
msg = "invalid dtype for `ones`; found #{dtype}"
|
84
|
+
raise TypeError, msg
|
85
|
+
end
|
86
|
+
|
87
|
+
repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
|
88
|
+
end
|
89
|
+
|
90
|
+
# Construct a column of length `n` filled with zeros.
|
91
|
+
#
|
92
|
+
# This is syntactic sugar for the `repeat` function.
|
93
|
+
#
|
94
|
+
# @param n [Integer]
|
95
|
+
# Length of the resulting column.
|
96
|
+
# @param dtype [Object]
|
97
|
+
# Data type of the resulting column. Defaults to Float64.
|
98
|
+
# @param eager [Boolean]
|
99
|
+
# Evaluate immediately and return a `Series`. If set to `false`,
|
100
|
+
# return an expression instead.
|
101
|
+
#
|
102
|
+
# @return [Object]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# Polars.zeros(3, dtype: Polars::Int8, eager: true)
|
106
|
+
# # =>
|
107
|
+
# # shape: (3,)
|
108
|
+
# # Series: 'zeros' [i8]
|
109
|
+
# # [
|
110
|
+
# # 0
|
111
|
+
# # 0
|
112
|
+
# # 0
|
113
|
+
# # ]
|
114
|
+
def zeros(n, dtype: nil, eager: true)
|
115
|
+
if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
|
116
|
+
msg = "invalid dtype for `zeros`; found #{dtype}"
|
117
|
+
raise TypeError, msg
|
118
|
+
end
|
119
|
+
|
120
|
+
repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def _one_or_zero_by_dtype(value, dtype)
|
126
|
+
if dtype.integer?
|
127
|
+
value
|
128
|
+
elsif dtype.float?
|
129
|
+
value.to_f
|
130
|
+
elsif dtype == Boolean
|
131
|
+
value != 0
|
132
|
+
elsif dtype == Utf8
|
133
|
+
value.to_s
|
134
|
+
elsif dtype == Decimal
|
135
|
+
Decimal(value.to_s)
|
136
|
+
elsif [List, Array].include?(dtype)
|
137
|
+
arr_width = dtype.respond_to?(:width) ? dtype.width : 1
|
138
|
+
[_one_or_zero_by_dtype(value, dtype.inner)] * arr_width
|
139
|
+
else
|
140
|
+
nil
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Start a "when, then, otherwise" expression.
|
4
|
+
#
|
5
|
+
# @return [When]
|
6
|
+
#
|
7
|
+
# @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
|
8
|
+
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
9
|
+
# df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
10
|
+
# # =>
|
11
|
+
# # shape: (3, 3)
|
12
|
+
# # ┌─────┬─────┬─────────┐
|
13
|
+
# # │ foo ┆ bar ┆ literal │
|
14
|
+
# # │ --- ┆ --- ┆ --- │
|
15
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
16
|
+
# # ╞═════╪═════╪═════════╡
|
17
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
18
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
19
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
20
|
+
# # └─────┴─────┴─────────┘
|
21
|
+
#
|
22
|
+
# @example Or with multiple when-then operations chained:
|
23
|
+
# df.with_columns(
|
24
|
+
# Polars.when(Polars.col("foo") > 2)
|
25
|
+
# .then(1)
|
26
|
+
# .when(Polars.col("bar") > 2)
|
27
|
+
# .then(4)
|
28
|
+
# .otherwise(-1)
|
29
|
+
# .alias("val")
|
30
|
+
# )
|
31
|
+
# # =>
|
32
|
+
# # shape: (3, 3)
|
33
|
+
# # ┌─────┬─────┬─────┐
|
34
|
+
# # │ foo ┆ bar ┆ val │
|
35
|
+
# # │ --- ┆ --- ┆ --- │
|
36
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
37
|
+
# # ╞═════╪═════╪═════╡
|
38
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
39
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
40
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
41
|
+
# # └─────┴─────┴─────┘
|
42
|
+
#
|
43
|
+
# @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
|
44
|
+
# df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
|
45
|
+
# # =>
|
46
|
+
# # shape: (3, 3)
|
47
|
+
# # ┌─────┬─────┬──────┐
|
48
|
+
# # │ foo ┆ bar ┆ val │
|
49
|
+
# # │ --- ┆ --- ┆ --- │
|
50
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
51
|
+
# # ╞═════╪═════╪══════╡
|
52
|
+
# # │ 1 ┆ 3 ┆ null │
|
53
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
54
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
55
|
+
# # └─────┴─────┴──────┘
|
56
|
+
#
|
57
|
+
# @example Pass multiple predicates, each of which must be met:
|
58
|
+
# df.with_columns(
|
59
|
+
# val: Polars.when(
|
60
|
+
# Polars.col("bar") > 0,
|
61
|
+
# Polars.col("foo") % 2 != 0
|
62
|
+
# )
|
63
|
+
# .then(99)
|
64
|
+
# .otherwise(-1)
|
65
|
+
# )
|
66
|
+
# # =>
|
67
|
+
# # shape: (3, 3)
|
68
|
+
# # ┌─────┬─────┬─────┐
|
69
|
+
# # │ foo ┆ bar ┆ val │
|
70
|
+
# # │ --- ┆ --- ┆ --- │
|
71
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
72
|
+
# # ╞═════╪═════╪═════╡
|
73
|
+
# # │ 1 ┆ 3 ┆ 99 │
|
74
|
+
# # │ 3 ┆ 4 ┆ 99 │
|
75
|
+
# # │ 4 ┆ 0 ┆ -1 │
|
76
|
+
# # └─────┴─────┴─────┘
|
77
|
+
#
|
78
|
+
# @example Pass conditions as keyword arguments:
|
79
|
+
# df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
|
80
|
+
# # =>
|
81
|
+
# # shape: (3, 3)
|
82
|
+
# # ┌─────┬─────┬─────┐
|
83
|
+
# # │ foo ┆ bar ┆ val │
|
84
|
+
# # │ --- ┆ --- ┆ --- │
|
85
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
86
|
+
# # ╞═════╪═════╪═════╡
|
87
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
88
|
+
# # │ 3 ┆ 4 ┆ -1 │
|
89
|
+
# # │ 4 ┆ 0 ┆ 99 │
|
90
|
+
# # └─────┴─────┴─────┘
|
91
|
+
def when(*predicates, **constraints)
|
92
|
+
condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
93
|
+
When.new(Plr.when(condition))
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Convert categorical variables into dummy/indicator variables.
|
4
|
+
#
|
5
|
+
# @param df [DataFrame]
|
6
|
+
# DataFrame to convert.
|
7
|
+
# @param columns [Array, nil]
|
8
|
+
# A subset of columns to convert to dummy variables. `nil` means
|
9
|
+
# "all columns".
|
10
|
+
#
|
11
|
+
# @return [DataFrame]
|
12
|
+
def get_dummies(df, columns: nil)
|
13
|
+
df.to_dummies(columns: columns)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Aggregate to list.
|
17
|
+
#
|
18
|
+
# @return [Expr]
|
19
|
+
def to_list(name)
|
20
|
+
col(name).list
|
21
|
+
end
|
22
|
+
|
23
|
+
# Compute the spearman rank correlation between two columns.
|
24
|
+
#
|
25
|
+
# Missing data will be excluded from the computation.
|
26
|
+
#
|
27
|
+
# @param a [Object]
|
28
|
+
# Column name or Expression.
|
29
|
+
# @param b [Object]
|
30
|
+
# Column name or Expression.
|
31
|
+
# @param ddof [Integer]
|
32
|
+
# Delta degrees of freedom
|
33
|
+
# @param propagate_nans [Boolean]
|
34
|
+
# If `True` any `NaN` encountered will lead to `NaN` in the output.
|
35
|
+
# Defaults to `False` where `NaN` are regarded as larger than any finite number
|
36
|
+
# and thus lead to the highest rank.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
39
|
+
def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
|
40
|
+
corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Compute the pearson's correlation between two columns.
|
44
|
+
#
|
45
|
+
# @param a [Object]
|
46
|
+
# Column name or Expression.
|
47
|
+
# @param b [Object]
|
48
|
+
# Column name or Expression.
|
49
|
+
# @param ddof [Integer]
|
50
|
+
# Delta degrees of freedom
|
51
|
+
#
|
52
|
+
# @return [Expr]
|
53
|
+
def pearson_corr(a, b, ddof: 1)
|
54
|
+
corr(a, b, method: "pearson", ddof: ddof)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|