polars-df 0.8.0-aarch64-linux → 0.9.0-aarch64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +1726 -754
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +33 -4
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -0,0 +1,49 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Return the number of rows in the context.
|
4
|
+
#
|
5
|
+
# This is similar to `COUNT(*)` in SQL.
|
6
|
+
#
|
7
|
+
# @return [Expr]
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# df = Polars::DataFrame.new(
|
11
|
+
# {
|
12
|
+
# "a" => [1, 2, nil],
|
13
|
+
# "b" => [3, nil, nil],
|
14
|
+
# "c" => ["foo", "bar", "foo"]
|
15
|
+
# }
|
16
|
+
# )
|
17
|
+
# df.select(Polars.len)
|
18
|
+
# # =>
|
19
|
+
# # shape: (1, 1)
|
20
|
+
# # ┌─────┐
|
21
|
+
# # │ len │
|
22
|
+
# # │ --- │
|
23
|
+
# # │ u32 │
|
24
|
+
# # ╞═════╡
|
25
|
+
# # │ 3 │
|
26
|
+
# # └─────┘
|
27
|
+
#
|
28
|
+
# @example Generate an index column by using `len` in conjunction with `int_range`.
|
29
|
+
# df.select([
|
30
|
+
# Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
|
31
|
+
# Polars.all
|
32
|
+
# ])
|
33
|
+
# # =>
|
34
|
+
# # shape: (3, 4)
|
35
|
+
# # ┌───────┬──────┬──────┬─────┐
|
36
|
+
# # │ index ┆ a ┆ b ┆ c │
|
37
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
38
|
+
# # │ u32 ┆ i64 ┆ i64 ┆ str │
|
39
|
+
# # ╞═══════╪══════╪══════╪═════╡
|
40
|
+
# # │ 0 ┆ 1 ┆ 3 ┆ foo │
|
41
|
+
# # │ 1 ┆ 2 ┆ null ┆ bar │
|
42
|
+
# # │ 2 ┆ null ┆ null ┆ foo │
|
43
|
+
# # └───────┴──────┴──────┴─────┘
|
44
|
+
def len
|
45
|
+
Utils.wrap_expr(Plr.len)
|
46
|
+
end
|
47
|
+
alias_method :length, :len
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Return an expression representing a literal value.
|
4
|
+
#
|
5
|
+
# @return [Expr]
|
6
|
+
def lit(value, dtype: nil, allow_object: nil)
|
7
|
+
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
8
|
+
time_unit = dtype&.time_unit || "ns"
|
9
|
+
time_zone = dtype.&time_zone
|
10
|
+
e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
|
11
|
+
if time_zone
|
12
|
+
return e.dt.replace_time_zone(time_zone.to_s)
|
13
|
+
else
|
14
|
+
return e
|
15
|
+
end
|
16
|
+
elsif value.is_a?(::Date)
|
17
|
+
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
18
|
+
elsif value.is_a?(Polars::Series)
|
19
|
+
name = value.name
|
20
|
+
value = value._s
|
21
|
+
e = Utils.wrap_expr(Plr.lit(value, allow_object))
|
22
|
+
if name == ""
|
23
|
+
return e
|
24
|
+
end
|
25
|
+
return e.alias(name)
|
26
|
+
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
27
|
+
return lit(Series.new("", value))
|
28
|
+
elsif dtype
|
29
|
+
return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
|
30
|
+
end
|
31
|
+
|
32
|
+
Utils.wrap_expr(Plr.lit(value, allow_object))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Set the global random seed for Polars.
|
4
|
+
#
|
5
|
+
# This random seed is used to determine things such as shuffle ordering.
|
6
|
+
#
|
7
|
+
# @param seed [Integer]
|
8
|
+
# A non-negative integer < 2**64 used to seed the internal global
|
9
|
+
# random number generator.
|
10
|
+
#
|
11
|
+
# @return [nil]
|
12
|
+
def set_random_seed(seed)
|
13
|
+
Plr.set_random_seed(seed)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Create a range of type `Datetime` (or `Date`).
|
4
|
+
#
|
5
|
+
# @param start [Object]
|
6
|
+
# Lower bound of the date range.
|
7
|
+
# @param stop [Object]
|
8
|
+
# Upper bound of the date range.
|
9
|
+
# @param interval [Object]
|
10
|
+
# Interval periods. It can be a polars duration string, such as `3d12h4m25s`
|
11
|
+
# representing 3 days, 12 hours, 4 minutes, and 25 seconds.
|
12
|
+
# @param lazy [Boolean]
|
13
|
+
# Return an expression.
|
14
|
+
# @param closed ["both", "left", "right", "none"]
|
15
|
+
# Define whether the temporal window interval is closed or not.
|
16
|
+
# @param name [String]
|
17
|
+
# Name of the output Series.
|
18
|
+
# @param time_unit [nil, "ns", "us", "ms"]
|
19
|
+
# Set the time unit.
|
20
|
+
# @param time_zone [String]
|
21
|
+
# Optional timezone
|
22
|
+
#
|
23
|
+
# @return [Object]
|
24
|
+
#
|
25
|
+
# @note
|
26
|
+
# If both `low` and `high` are passed as date types (not datetime), and the
|
27
|
+
# interval granularity is no finer than 1d, the returned range is also of
|
28
|
+
# type date. All other permutations return a datetime Series.
|
29
|
+
#
|
30
|
+
# @example Using polars duration string to specify the interval
|
31
|
+
# Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", name: "drange")
|
32
|
+
# # =>
|
33
|
+
# # shape: (3,)
|
34
|
+
# # Series: 'drange' [date]
|
35
|
+
# # [
|
36
|
+
# # 2022-01-01
|
37
|
+
# # 2022-02-01
|
38
|
+
# # 2022-03-01
|
39
|
+
# # ]
|
40
|
+
#
|
41
|
+
# @example Using `timedelta` object to specify the interval:
|
42
|
+
# Polars.date_range(
|
43
|
+
# DateTime.new(1985, 1, 1),
|
44
|
+
# DateTime.new(1985, 1, 10),
|
45
|
+
# "1d12h",
|
46
|
+
# time_unit: "ms"
|
47
|
+
# )
|
48
|
+
# # =>
|
49
|
+
# # shape: (7,)
|
50
|
+
# # Series: '' [datetime[ms]]
|
51
|
+
# # [
|
52
|
+
# # 1985-01-01 00:00:00
|
53
|
+
# # 1985-01-02 12:00:00
|
54
|
+
# # 1985-01-04 00:00:00
|
55
|
+
# # 1985-01-05 12:00:00
|
56
|
+
# # 1985-01-07 00:00:00
|
57
|
+
# # 1985-01-08 12:00:00
|
58
|
+
# # 1985-01-10 00:00:00
|
59
|
+
# # ]
|
60
|
+
def date_range(
|
61
|
+
start,
|
62
|
+
stop,
|
63
|
+
interval,
|
64
|
+
lazy: false,
|
65
|
+
closed: "both",
|
66
|
+
name: nil,
|
67
|
+
time_unit: nil,
|
68
|
+
time_zone: nil
|
69
|
+
)
|
70
|
+
if defined?(ActiveSupport::Duration) && interval.is_a?(ActiveSupport::Duration)
|
71
|
+
raise Todo
|
72
|
+
else
|
73
|
+
interval = interval.to_s
|
74
|
+
if interval.include?(" ")
|
75
|
+
interval = interval.gsub(" ", "")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
if time_unit.nil?
|
80
|
+
if interval.include?("ns")
|
81
|
+
time_unit = "ns"
|
82
|
+
else
|
83
|
+
time_unit = "us"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
start_rbexpr = Utils.parse_as_expression(start)
|
88
|
+
stop_rbexpr = Utils.parse_as_expression(stop)
|
89
|
+
|
90
|
+
result = Utils.wrap_expr(
|
91
|
+
Plr.date_range(start_rbexpr, stop_rbexpr, interval, closed, time_unit, time_zone)
|
92
|
+
)
|
93
|
+
|
94
|
+
result = result.alias(name.to_s)
|
95
|
+
|
96
|
+
if !lazy
|
97
|
+
return select(result).to_series
|
98
|
+
end
|
99
|
+
|
100
|
+
result
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Create a range expression (or Series).
|
4
|
+
#
|
5
|
+
# This can be used in a `select`, `with_column`, etc. Be sure that the resulting
|
6
|
+
# range size is equal to the length of the DataFrame you are collecting.
|
7
|
+
#
|
8
|
+
# @param start [Integer, Expr, Series]
|
9
|
+
# Lower bound of range.
|
10
|
+
# @param stop [Integer, Expr, Series]
|
11
|
+
# Upper bound of range.
|
12
|
+
# @param step [Integer]
|
13
|
+
# Step size of the range.
|
14
|
+
# @param eager [Boolean]
|
15
|
+
# If eager evaluation is `True`, a Series is returned instead of an Expr.
|
16
|
+
# @param dtype [Symbol]
|
17
|
+
# Apply an explicit integer dtype to the resulting expression (default is `Int64`).
|
18
|
+
#
|
19
|
+
# @return [Expr, Series]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# Polars.arange(0, 3, eager: true)
|
23
|
+
# # =>
|
24
|
+
# # shape: (3,)
|
25
|
+
# # Series: 'arange' [i64]
|
26
|
+
# # [
|
27
|
+
# # 0
|
28
|
+
# # 1
|
29
|
+
# # 2
|
30
|
+
# # ]
|
31
|
+
def int_range(start, stop = nil, step: 1, eager: false, dtype: nil)
|
32
|
+
if stop.nil?
|
33
|
+
stop = start
|
34
|
+
start = 0
|
35
|
+
end
|
36
|
+
|
37
|
+
start = Utils.parse_as_expression(start)
|
38
|
+
stop = Utils.parse_as_expression(stop)
|
39
|
+
dtype ||= Int64
|
40
|
+
dtype = dtype.to_s if dtype.is_a?(Symbol)
|
41
|
+
result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange")
|
42
|
+
|
43
|
+
if eager
|
44
|
+
return select(result).to_series
|
45
|
+
end
|
46
|
+
|
47
|
+
result
|
48
|
+
end
|
49
|
+
alias_method :arange, :int_range
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Repeat a single value n times.
|
4
|
+
#
|
5
|
+
# @param value [Object]
|
6
|
+
# Value to repeat.
|
7
|
+
# @param n [Integer]
|
8
|
+
# Repeat `n` times.
|
9
|
+
# @param eager [Boolean]
|
10
|
+
# Run eagerly and collect into a `Series`.
|
11
|
+
# @param name [String]
|
12
|
+
# Only used in `eager` mode. As expression, use `alias`.
|
13
|
+
#
|
14
|
+
# @return [Object]
|
15
|
+
#
|
16
|
+
# @example Construct a column with a repeated value in a lazy context.
|
17
|
+
# Polars.select(Polars.repeat("z", 3)).to_series
|
18
|
+
# # =>
|
19
|
+
# # shape: (3,)
|
20
|
+
# # Series: 'repeat' [str]
|
21
|
+
# # [
|
22
|
+
# # "z"
|
23
|
+
# # "z"
|
24
|
+
# # "z"
|
25
|
+
# # ]
|
26
|
+
#
|
27
|
+
# @example Generate a Series directly by setting `eager: true`.
|
28
|
+
# Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
|
29
|
+
# # =>
|
30
|
+
# # shape: (3,)
|
31
|
+
# # Series: 'repeat' [i8]
|
32
|
+
# # [
|
33
|
+
# # 3
|
34
|
+
# # 3
|
35
|
+
# # 3
|
36
|
+
# # ]
|
37
|
+
def repeat(value, n, dtype: nil, eager: false, name: nil)
|
38
|
+
if !name.nil?
|
39
|
+
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
40
|
+
end
|
41
|
+
|
42
|
+
if n.is_a?(Integer)
|
43
|
+
n = lit(n)
|
44
|
+
end
|
45
|
+
|
46
|
+
value = Utils.parse_as_expression(value, str_as_lit: true)
|
47
|
+
expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
|
48
|
+
if !name.nil?
|
49
|
+
expr = expr.alias(name)
|
50
|
+
end
|
51
|
+
if eager
|
52
|
+
return select(expr).to_series
|
53
|
+
end
|
54
|
+
expr
|
55
|
+
end
|
56
|
+
|
57
|
+
# Construct a column of length `n` filled with ones.
|
58
|
+
#
|
59
|
+
# This is syntactic sugar for the `repeat` function.
|
60
|
+
#
|
61
|
+
# @param n [Integer]
|
62
|
+
# Length of the resulting column.
|
63
|
+
# @param dtype [Object]
|
64
|
+
# Data type of the resulting column. Defaults to Float64.
|
65
|
+
# @param eager [Boolean]
|
66
|
+
# Evaluate immediately and return a `Series`. If set to `false`,
|
67
|
+
# return an expression instead.
|
68
|
+
#
|
69
|
+
# @return [Object]
|
70
|
+
#
|
71
|
+
# @example
|
72
|
+
# Polars.ones(3, dtype: Polars::Int8, eager: true)
|
73
|
+
# # =>
|
74
|
+
# # shape: (3,)
|
75
|
+
# # Series: 'ones' [i8]
|
76
|
+
# # [
|
77
|
+
# # 1
|
78
|
+
# # 1
|
79
|
+
# # 1
|
80
|
+
# # ]
|
81
|
+
def ones(n, dtype: nil, eager: true)
|
82
|
+
if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
|
83
|
+
msg = "invalid dtype for `ones`; found #{dtype}"
|
84
|
+
raise TypeError, msg
|
85
|
+
end
|
86
|
+
|
87
|
+
repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
|
88
|
+
end
|
89
|
+
|
90
|
+
# Construct a column of length `n` filled with zeros.
|
91
|
+
#
|
92
|
+
# This is syntactic sugar for the `repeat` function.
|
93
|
+
#
|
94
|
+
# @param n [Integer]
|
95
|
+
# Length of the resulting column.
|
96
|
+
# @param dtype [Object]
|
97
|
+
# Data type of the resulting column. Defaults to Float64.
|
98
|
+
# @param eager [Boolean]
|
99
|
+
# Evaluate immediately and return a `Series`. If set to `false`,
|
100
|
+
# return an expression instead.
|
101
|
+
#
|
102
|
+
# @return [Object]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# Polars.zeros(3, dtype: Polars::Int8, eager: true)
|
106
|
+
# # =>
|
107
|
+
# # shape: (3,)
|
108
|
+
# # Series: 'zeros' [i8]
|
109
|
+
# # [
|
110
|
+
# # 0
|
111
|
+
# # 0
|
112
|
+
# # 0
|
113
|
+
# # ]
|
114
|
+
def zeros(n, dtype: nil, eager: true)
|
115
|
+
if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
|
116
|
+
msg = "invalid dtype for `zeros`; found #{dtype}"
|
117
|
+
raise TypeError, msg
|
118
|
+
end
|
119
|
+
|
120
|
+
repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def _one_or_zero_by_dtype(value, dtype)
|
126
|
+
if dtype.integer?
|
127
|
+
value
|
128
|
+
elsif dtype.float?
|
129
|
+
value.to_f
|
130
|
+
elsif dtype == Boolean
|
131
|
+
value != 0
|
132
|
+
elsif dtype == Utf8
|
133
|
+
value.to_s
|
134
|
+
elsif dtype == Decimal
|
135
|
+
Decimal(value.to_s)
|
136
|
+
elsif [List, Array].include?(dtype)
|
137
|
+
arr_width = dtype.respond_to?(:width) ? dtype.width : 1
|
138
|
+
[_one_or_zero_by_dtype(value, dtype.inner)] * arr_width
|
139
|
+
else
|
140
|
+
nil
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Start a "when, then, otherwise" expression.
|
4
|
+
#
|
5
|
+
# @return [When]
|
6
|
+
#
|
7
|
+
# @example
|
8
|
+
# df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
|
9
|
+
# df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
|
10
|
+
# # =>
|
11
|
+
# # shape: (3, 3)
|
12
|
+
# # ┌─────┬─────┬─────────┐
|
13
|
+
# # │ foo ┆ bar ┆ literal │
|
14
|
+
# # │ --- ┆ --- ┆ --- │
|
15
|
+
# # │ i64 ┆ i64 ┆ i32 │
|
16
|
+
# # ╞═════╪═════╪═════════╡
|
17
|
+
# # │ 1 ┆ 3 ┆ -1 │
|
18
|
+
# # │ 3 ┆ 4 ┆ 1 │
|
19
|
+
# # │ 4 ┆ 0 ┆ 1 │
|
20
|
+
# # └─────┴─────┴─────────┘
|
21
|
+
def when(expr)
|
22
|
+
expr = Utils.expr_to_lit_or_expr(expr)
|
23
|
+
pw = Plr.when(expr._rbexpr)
|
24
|
+
When.new(pw)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|