polars-df 0.13.0-x64-mingw-ucrt
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Return the number of rows in the context.
|
4
|
+
#
|
5
|
+
# This is similar to `COUNT(*)` in SQL.
|
6
|
+
#
|
7
|
+
# @return [Expr]
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# df = Polars::DataFrame.new(
|
11
|
+
# {
|
12
|
+
# "a" => [1, 2, nil],
|
13
|
+
# "b" => [3, nil, nil],
|
14
|
+
# "c" => ["foo", "bar", "foo"]
|
15
|
+
# }
|
16
|
+
# )
|
17
|
+
# df.select(Polars.len)
|
18
|
+
# # =>
|
19
|
+
# # shape: (1, 1)
|
20
|
+
# # ┌─────┐
|
21
|
+
# # │ len │
|
22
|
+
# # │ --- │
|
23
|
+
# # │ u32 │
|
24
|
+
# # ╞═════╡
|
25
|
+
# # │ 3 │
|
26
|
+
# # └─────┘
|
27
|
+
#
|
28
|
+
# @example Generate an index column by using `len` in conjunction with `int_range`.
|
29
|
+
# df.select([
|
30
|
+
# Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
|
31
|
+
# Polars.all
|
32
|
+
# ])
|
33
|
+
# # =>
|
34
|
+
# # shape: (3, 4)
|
35
|
+
# # ┌───────┬──────┬──────┬─────┐
|
36
|
+
# # │ index ┆ a ┆ b ┆ c │
|
37
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
38
|
+
# # │ u32 ┆ i64 ┆ i64 ┆ str │
|
39
|
+
# # ╞═══════╪══════╪══════╪═════╡
|
40
|
+
# # │ 0 ┆ 1 ┆ 3 ┆ foo │
|
41
|
+
# # │ 1 ┆ 2 ┆ null ┆ bar │
|
42
|
+
# # │ 2 ┆ null ┆ null ┆ foo │
|
43
|
+
# # └───────┴──────┴──────┴─────┘
|
44
|
+
def len
|
45
|
+
Utils.wrap_expr(Plr.len)
|
46
|
+
end
|
47
|
+
alias_method :length, :len
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Return an expression representing a literal value.
|
4
|
+
#
|
5
|
+
# @return [Expr]
|
6
|
+
def lit(value, dtype: nil, allow_object: nil)
|
7
|
+
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
8
|
+
time_unit = dtype&.time_unit || "ns"
|
9
|
+
time_zone = dtype.&time_zone
|
10
|
+
e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit))
|
11
|
+
if time_zone
|
12
|
+
return e.dt.replace_time_zone(time_zone.to_s)
|
13
|
+
else
|
14
|
+
return e
|
15
|
+
end
|
16
|
+
elsif value.is_a?(::Date)
|
17
|
+
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
18
|
+
elsif value.is_a?(Polars::Series)
|
19
|
+
name = value.name
|
20
|
+
value = value._s
|
21
|
+
e = Utils.wrap_expr(Plr.lit(value, allow_object))
|
22
|
+
if name == ""
|
23
|
+
return e
|
24
|
+
end
|
25
|
+
return e.alias(name)
|
26
|
+
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
27
|
+
return lit(Series.new("", value))
|
28
|
+
elsif dtype
|
29
|
+
return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
|
30
|
+
end
|
31
|
+
|
32
|
+
Utils.wrap_expr(Plr.lit(value, allow_object))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Set the global random seed for Polars.
|
4
|
+
#
|
5
|
+
# This random seed is used to determine things such as shuffle ordering.
|
6
|
+
#
|
7
|
+
# @param seed [Integer]
|
8
|
+
# A non-negative integer < 2**64 used to seed the internal global
|
9
|
+
# random number generator.
|
10
|
+
#
|
11
|
+
# @return [nil]
|
12
|
+
def set_random_seed(seed)
|
13
|
+
Plr.set_random_seed(seed)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Create a range of type `Datetime` (or `Date`).
|
4
|
+
#
|
5
|
+
# @param start [Object]
|
6
|
+
# Lower bound of the date range.
|
7
|
+
# @param stop [Object]
|
8
|
+
# Upper bound of the date range.
|
9
|
+
# @param interval [Object]
|
10
|
+
# Interval periods. It can be a polars duration string, such as `3d12h4m25s`
|
11
|
+
# representing 3 days, 12 hours, 4 minutes, and 25 seconds.
|
12
|
+
# @param closed ["both", "left", "right", "none"]
|
13
|
+
# Define whether the temporal window interval is closed or not.
|
14
|
+
# @param eager [Boolean]
|
15
|
+
# Evaluate immediately and return a `Series`.
|
16
|
+
# If set to `false` (default), return an expression instead.
|
17
|
+
#
|
18
|
+
# @return [Object]
|
19
|
+
#
|
20
|
+
# @note
|
21
|
+
# If both `low` and `high` are passed as date types (not datetime), and the
|
22
|
+
# interval granularity is no finer than 1d, the returned range is also of
|
23
|
+
# type date. All other permutations return a datetime Series.
|
24
|
+
#
|
25
|
+
# @example Using polars duration string to specify the interval
|
26
|
+
# Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
|
27
|
+
# "date"
|
28
|
+
# )
|
29
|
+
# # =>
|
30
|
+
# # shape: (3,)
|
31
|
+
# # Series: 'date' [date]
|
32
|
+
# # [
|
33
|
+
# # 2022-01-01
|
34
|
+
# # 2022-02-01
|
35
|
+
# # 2022-03-01
|
36
|
+
# # ]
|
37
|
+
def date_range(
|
38
|
+
start,
|
39
|
+
stop,
|
40
|
+
interval = "1d",
|
41
|
+
closed: "both",
|
42
|
+
eager: false
|
43
|
+
)
|
44
|
+
interval = Utils.parse_interval_argument(interval)
|
45
|
+
|
46
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
47
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
48
|
+
|
49
|
+
result = Utils.wrap_expr(
|
50
|
+
Plr.date_range(start_rbexpr, end_rbexpr, interval, closed)
|
51
|
+
)
|
52
|
+
|
53
|
+
if eager
|
54
|
+
return F.select(result).to_series
|
55
|
+
end
|
56
|
+
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
# Create a column of date ranges.
|
61
|
+
#
|
62
|
+
# @param start [Object]
|
63
|
+
# Lower bound of the date range.
|
64
|
+
# @param stop [Object]
|
65
|
+
# Upper bound of the date range.
|
66
|
+
# @param interval [Object]
|
67
|
+
# Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).
|
68
|
+
# @param closed ["both", "left", "right", "none"]
|
69
|
+
# Define which sides of the range are closed (inclusive).
|
70
|
+
# @param eager [Boolean]
|
71
|
+
# Evaluate immediately and return a `Series`.
|
72
|
+
# If set to `false` (default), return an expression instead.
|
73
|
+
#
|
74
|
+
# @return [Object]
|
75
|
+
#
|
76
|
+
# @note
|
77
|
+
# `interval` is created according to the following string language:
|
78
|
+
#
|
79
|
+
# - 1ns (1 nanosecond)
|
80
|
+
# - 1us (1 microsecond)
|
81
|
+
# - 1ms (1 millisecond)
|
82
|
+
# - 1s (1 second)
|
83
|
+
# - 1m (1 minute)
|
84
|
+
# - 1h (1 hour)
|
85
|
+
# - 1d (1 calendar day)
|
86
|
+
# - 1w (1 calendar week)
|
87
|
+
# - 1mo (1 calendar month)
|
88
|
+
# - 1q (1 calendar quarter)
|
89
|
+
# - 1y (1 calendar year)
|
90
|
+
#
|
91
|
+
# Or combine them:
|
92
|
+
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
93
|
+
#
|
94
|
+
# By "calendar day", we mean the corresponding time on the next day (which may
|
95
|
+
# not be 24 hours, due to daylight savings). Similarly for "calendar week",
|
96
|
+
# "calendar month", "calendar quarter", and "calendar year".
|
97
|
+
#
|
98
|
+
# @example
|
99
|
+
# df = Polars::DataFrame.new(
|
100
|
+
# {
|
101
|
+
# "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
|
102
|
+
# "end" => Date.new(2022, 1, 3)
|
103
|
+
# }
|
104
|
+
# )
|
105
|
+
# df.with_columns(date_range: Polars.date_ranges("start", "end"))
|
106
|
+
# # =>
|
107
|
+
# # shape: (2, 3)
|
108
|
+
# # ┌────────────┬────────────┬─────────────────────────────────┐
|
109
|
+
# # │ start ┆ end ┆ date_range │
|
110
|
+
# # │ --- ┆ --- ┆ --- │
|
111
|
+
# # │ date ┆ date ┆ list[date] │
|
112
|
+
# # ╞════════════╪════════════╪═════════════════════════════════╡
|
113
|
+
# # │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
|
114
|
+
# # │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03] │
|
115
|
+
# # └────────────┴────────────┴─────────────────────────────────┘
|
116
|
+
def date_ranges(
|
117
|
+
start,
|
118
|
+
stop,
|
119
|
+
interval = "1d",
|
120
|
+
closed: "both",
|
121
|
+
eager: false
|
122
|
+
)
|
123
|
+
interval = Utils.parse_interval_argument(interval)
|
124
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
125
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
126
|
+
|
127
|
+
result = Utils.wrap_expr(Plr.date_ranges(start_rbexpr, end_rbexpr, interval, closed))
|
128
|
+
|
129
|
+
if eager
|
130
|
+
return F.select(result).to_series
|
131
|
+
end
|
132
|
+
|
133
|
+
result
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Generate a datetime range.
|
4
|
+
#
|
5
|
+
# @param start [Object]
|
6
|
+
# Lower bound of the datetime range.
|
7
|
+
# @param stop [Object]
|
8
|
+
# Upper bound of the datetime range.
|
9
|
+
# @param interval [String]
|
10
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
11
|
+
# @param closed ['both', 'left', 'right', 'none']
|
12
|
+
# Define which sides of the range are closed (inclusive).
|
13
|
+
# @param time_unit [nil, 'ns', 'us', 'ms']
|
14
|
+
# Time unit of the resulting `Datetime` data type.
|
15
|
+
# @param time_zone [String]
|
16
|
+
# Time zone of the resulting `Datetime` data type.
|
17
|
+
# @param eager [Boolean]
|
18
|
+
# Evaluate immediately and return a `Series`.
|
19
|
+
# If set to `false` (default), return an expression instead.
|
20
|
+
#
|
21
|
+
# @return [Object]
|
22
|
+
#
|
23
|
+
# @example Using Polars duration string to specify the interval:
|
24
|
+
# Polars.datetime_range(
|
25
|
+
# DateTime.new(2022, 1, 1), DateTime.new(2022, 3, 1), "1mo", eager: true
|
26
|
+
# ).alias("datetime")
|
27
|
+
# # =>
|
28
|
+
# # shape: (3,)
|
29
|
+
# # Series: 'datetime' [datetime[ns]]
|
30
|
+
# # [
|
31
|
+
# # 2022-01-01 00:00:00
|
32
|
+
# # 2022-02-01 00:00:00
|
33
|
+
# # 2022-03-01 00:00:00
|
34
|
+
# # ]
|
35
|
+
#
|
36
|
+
# @example Specifying a time zone:
|
37
|
+
# Polars.datetime_range(
|
38
|
+
# DateTime.new(2022, 1, 1),
|
39
|
+
# DateTime.new(2022, 3, 1),
|
40
|
+
# "1mo",
|
41
|
+
# time_zone: "America/New_York",
|
42
|
+
# eager: true
|
43
|
+
# ).alias("datetime")
|
44
|
+
# # =>
|
45
|
+
# # shape: (3,)
|
46
|
+
# # Series: 'datetime' [datetime[ns, America/New_York]]
|
47
|
+
# # [
|
48
|
+
# # 2022-01-01 00:00:00 EST
|
49
|
+
# # 2022-02-01 00:00:00 EST
|
50
|
+
# # 2022-03-01 00:00:00 EST
|
51
|
+
# # ]
|
52
|
+
def datetime_range(
|
53
|
+
start,
|
54
|
+
stop,
|
55
|
+
interval = "1d",
|
56
|
+
closed: "both",
|
57
|
+
time_unit: nil,
|
58
|
+
time_zone: nil,
|
59
|
+
eager: false
|
60
|
+
)
|
61
|
+
interval = Utils.parse_interval_argument(interval)
|
62
|
+
if time_unit.nil? && interval.include?("ns")
|
63
|
+
time_unit = "ns"
|
64
|
+
end
|
65
|
+
|
66
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
67
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
68
|
+
result = Utils.wrap_expr(
|
69
|
+
Plr.datetime_range(
|
70
|
+
start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
|
71
|
+
)
|
72
|
+
)
|
73
|
+
|
74
|
+
if eager
|
75
|
+
return Polars.select(result).to_series
|
76
|
+
end
|
77
|
+
|
78
|
+
result
|
79
|
+
end
|
80
|
+
|
81
|
+
# Create a column of datetime ranges.
|
82
|
+
#
|
83
|
+
# @param start [Object]
|
84
|
+
# Lower bound of the datetime range.
|
85
|
+
# @param stop [Object]
|
86
|
+
# Upper bound of the datetime range.
|
87
|
+
# @param interval [String]
|
88
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
89
|
+
# @param closed ['both', 'left', 'right', 'none']
|
90
|
+
# Define which sides of the range are closed (inclusive).
|
91
|
+
# @param time_unit [nil, 'ns', 'us', 'ms']
|
92
|
+
# Time unit of the resulting `Datetime` data type.
|
93
|
+
# @param time_zone [String]
|
94
|
+
# Time zone of the resulting `Datetime` data type.
|
95
|
+
# @param eager [Boolean]
|
96
|
+
# Evaluate immediately and return a `Series`.
|
97
|
+
# If set to `false` (default), return an expression instead.
|
98
|
+
#
|
99
|
+
# @return [Object]
|
100
|
+
#
|
101
|
+
# @example
|
102
|
+
# df = Polars::DataFrame.new(
|
103
|
+
# {
|
104
|
+
# "start" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
|
105
|
+
# "end" => DateTime.new(2022, 1, 3),
|
106
|
+
# }
|
107
|
+
# )
|
108
|
+
# df.select(datetime_range: Polars.datetime_ranges("start", "end"))
|
109
|
+
# # =>
|
110
|
+
# # shape: (2, 1)
|
111
|
+
# # ┌─────────────────────────────────┐
|
112
|
+
# # │ datetime_range │
|
113
|
+
# # │ --- │
|
114
|
+
# # │ list[datetime[ns]] │
|
115
|
+
# # ╞═════════════════════════════════╡
|
116
|
+
# # │ [2022-01-01 00:00:00, 2022-01-… │
|
117
|
+
# # │ [2022-01-02 00:00:00, 2022-01-… │
|
118
|
+
# # └─────────────────────────────────┘
|
119
|
+
def datetime_ranges(
|
120
|
+
start,
|
121
|
+
stop,
|
122
|
+
interval: "1d",
|
123
|
+
closed: "both",
|
124
|
+
time_unit: nil,
|
125
|
+
time_zone: nil,
|
126
|
+
eager: false
|
127
|
+
)
|
128
|
+
interval = Utils.parse_interval_argument(interval)
|
129
|
+
if time_unit.nil? && interval.include?("ns")
|
130
|
+
time_unit = "ns"
|
131
|
+
end
|
132
|
+
|
133
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
134
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
135
|
+
|
136
|
+
result = Utils.wrap_expr(
|
137
|
+
Plr.datetime_ranges(
|
138
|
+
start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
|
139
|
+
)
|
140
|
+
)
|
141
|
+
|
142
|
+
if eager
|
143
|
+
return Polars.select(result).to_series
|
144
|
+
end
|
145
|
+
|
146
|
+
result
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Create a range expression (or Series).
|
4
|
+
#
|
5
|
+
# This can be used in a `select`, `with_column`, etc. Be sure that the resulting
|
6
|
+
# range size is equal to the length of the DataFrame you are collecting.
|
7
|
+
#
|
8
|
+
# @param start [Integer, Expr, Series]
|
9
|
+
# Lower bound of range.
|
10
|
+
# @param stop [Integer, Expr, Series]
|
11
|
+
# Upper bound of range.
|
12
|
+
# @param step [Integer]
|
13
|
+
# Step size of the range.
|
14
|
+
# @param eager [Boolean]
|
15
|
+
# If eager evaluation is `True`, a Series is returned instead of an Expr.
|
16
|
+
# @param dtype [Symbol]
|
17
|
+
# Apply an explicit integer dtype to the resulting expression (default is `Int64`).
|
18
|
+
#
|
19
|
+
# @return [Expr, Series]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# Polars.arange(0, 3, eager: true)
|
23
|
+
# # =>
|
24
|
+
# # shape: (3,)
|
25
|
+
# # Series: 'arange' [i64]
|
26
|
+
# # [
|
27
|
+
# # 0
|
28
|
+
# # 1
|
29
|
+
# # 2
|
30
|
+
# # ]
|
31
|
+
def int_range(start, stop = nil, step: 1, eager: false, dtype: nil)
|
32
|
+
if stop.nil?
|
33
|
+
stop = start
|
34
|
+
start = 0
|
35
|
+
end
|
36
|
+
|
37
|
+
start = Utils.parse_into_expression(start)
|
38
|
+
stop = Utils.parse_into_expression(stop)
|
39
|
+
dtype ||= Int64
|
40
|
+
dtype = dtype.to_s if dtype.is_a?(Symbol)
|
41
|
+
result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange")
|
42
|
+
|
43
|
+
if eager
|
44
|
+
return select(result).to_series
|
45
|
+
end
|
46
|
+
|
47
|
+
result
|
48
|
+
end
|
49
|
+
alias_method :arange, :int_range
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Generate a time range.
|
4
|
+
#
|
5
|
+
# @param start [Object]
|
6
|
+
# Lower bound of the time range.
|
7
|
+
# @param stop [Object]
|
8
|
+
# Upper bound of the time range.
|
9
|
+
# @param interval [String]
|
10
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
11
|
+
# @param closed ['both', 'left', 'right', 'none']
|
12
|
+
# Define which sides of the range are closed (inclusive).
|
13
|
+
# @param eager [Boolean]
|
14
|
+
# Evaluate immediately and return a `Series`.
|
15
|
+
# If set to `False` (default), return an expression instead.
|
16
|
+
#
|
17
|
+
# @return [Object]
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
# Polars.time_range(
|
21
|
+
# time(14, 0),
|
22
|
+
# nil,
|
23
|
+
# "3h15m",
|
24
|
+
# eager: true
|
25
|
+
# ).alias("time")
|
26
|
+
# # =>
|
27
|
+
# # shape: (4,)
|
28
|
+
# # Series: 'time' [time]
|
29
|
+
# # [
|
30
|
+
# # 14:00:00
|
31
|
+
# # 17:15:00
|
32
|
+
# # 20:30:00
|
33
|
+
# # 23:45:00
|
34
|
+
# # ]
|
35
|
+
def time_range(
|
36
|
+
start = nil,
|
37
|
+
stop = nil,
|
38
|
+
interval = "1h",
|
39
|
+
closed: "both",
|
40
|
+
eager: false
|
41
|
+
)
|
42
|
+
interval = Utils.parse_interval_argument(interval)
|
43
|
+
["y", "mo", "w", "d"].each do |unit|
|
44
|
+
if interval.include?(unit)
|
45
|
+
msg = "invalid interval unit for time_range: found #{unit.inspect}"
|
46
|
+
raise ArgumentError, msg
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
if start.nil?
|
51
|
+
# start = time(0, 0, 0)
|
52
|
+
raise Todo
|
53
|
+
end
|
54
|
+
if stop.nil?
|
55
|
+
# stop = time(23, 59, 59, 999999)
|
56
|
+
raise Todo
|
57
|
+
end
|
58
|
+
|
59
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
60
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
61
|
+
|
62
|
+
result = Utils.wrap_expr(Plr.time_range(start_rbexpr, end_rbexpr, interval, closed))
|
63
|
+
|
64
|
+
if eager
|
65
|
+
return Polars.select(result).to_series
|
66
|
+
end
|
67
|
+
|
68
|
+
result
|
69
|
+
end
|
70
|
+
|
71
|
+
# Create a column of time ranges.
|
72
|
+
#
|
73
|
+
# @param start [Object]
|
74
|
+
# Lower bound of the time range.
|
75
|
+
# @param stop [Object]
|
76
|
+
# Upper bound of the time range.
|
77
|
+
# @param interval [Integer]
|
78
|
+
# Interval of the range periods, specified using the Polars duration string language.
|
79
|
+
# @param closed ['both', 'left', 'right', 'none']
|
80
|
+
# Define which sides of the range are closed (inclusive).
|
81
|
+
# @param eager [Boolean]
|
82
|
+
# Evaluate immediately and return a `Series`.
|
83
|
+
# If set to `false` (default), return an expression instead.
|
84
|
+
#
|
85
|
+
# @return [Object]
|
86
|
+
#
|
87
|
+
# @example
|
88
|
+
# df = Polars::DataFrame.new(
|
89
|
+
# {
|
90
|
+
# "start" => [time(9, 0), time(10, 0)],
|
91
|
+
# "end" => time(11, 0)
|
92
|
+
# }
|
93
|
+
# )
|
94
|
+
# df.with_columns(time_range: Polars.time_ranges("start", "end"))
|
95
|
+
# # =>
|
96
|
+
# # shape: (2, 3)
|
97
|
+
# # ┌──────────┬──────────┬────────────────────────────────┐
|
98
|
+
# # │ start ┆ end ┆ time_range │
|
99
|
+
# # │ --- ┆ --- ┆ --- │
|
100
|
+
# # │ time ┆ time ┆ list[time] │
|
101
|
+
# # ╞══════════╪══════════╪════════════════════════════════╡
|
102
|
+
# # │ 09:00:00 ┆ 11:00:00 ┆ [09:00:00, 10:00:00, 11:00:00] │
|
103
|
+
# # │ 10:00:00 ┆ 11:00:00 ┆ [10:00:00, 11:00:00] │
|
104
|
+
# # └──────────┴──────────┴────────────────────────────────┘
|
105
|
+
def time_ranges(
|
106
|
+
start = nil,
|
107
|
+
stop = nil,
|
108
|
+
interval = "1h",
|
109
|
+
closed: "both",
|
110
|
+
eager: false
|
111
|
+
)
|
112
|
+
interval = Utils.parse_interval_argument(interval)
|
113
|
+
["y", "mo", "w", "d"].each do |unit|
|
114
|
+
if interval.include?(unit)
|
115
|
+
msg = "invalid interval unit for time_range: found #{unit.inspect}"
|
116
|
+
raise ArgumentError, msg
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
if start.nil?
|
121
|
+
# start = time(0, 0, 0)
|
122
|
+
raise Todo
|
123
|
+
end
|
124
|
+
if stop.nil?
|
125
|
+
# stop = time(23, 59, 59, 999999)
|
126
|
+
raise Todo
|
127
|
+
end
|
128
|
+
|
129
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
130
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
131
|
+
|
132
|
+
result = Utils.wrap_expr(Plr.time_ranges(start_rbexpr, end_rbexpr, interval, closed))
|
133
|
+
|
134
|
+
if eager
|
135
|
+
return Polars.select(result).to_series
|
136
|
+
end
|
137
|
+
|
138
|
+
result
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|