prophet-rb 0.1.1 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/LICENSE.txt +1 -1
- data/README.md +58 -21
- data/lib/prophet.rb +64 -1
- data/lib/prophet/forecaster.rb +135 -130
- data/lib/prophet/holidays.rb +2 -2
- data/lib/prophet/plot.rb +31 -31
- data/lib/prophet/stan_backend.rb +1 -1
- data/lib/prophet/version.rb +1 -1
- data/stan/unix/prophet.stan +27 -16
- data/stan/win/prophet.stan +15 -2
- metadata +12 -82
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 506ab7cfb738d7f7289db812134b2fb64d6371da66e7586a1bcc254b26c6fa1c
|
4
|
+
data.tar.gz: ed97303bb3563bdebe86cf9865ec04142e9b3fa0e3f7bfdea8ae6a8a2ff8570f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5339ac3f8e7f26539137dc23b40d481013bc1ec082a008221edff00ea574ab08617956849e7cd221af40e9b12105630cbe294f8b5e2cfa1a33670d9c1fcd970c
|
7
|
+
data.tar.gz: 63b64fbef8414f65dfb39b7c266e780790f28b42cc6f2a0da415885b293c57ba309d50243e9d9ad2d61d93fe034017a31871cef26eed5e33b01cc651816eed3c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
## 0.2.4 (2021-04-02)
|
2
|
+
|
3
|
+
- Added support for flat growth
|
4
|
+
|
5
|
+
## 0.2.3 (2020-10-14)
|
6
|
+
|
7
|
+
- Added support for times to `forecast` method
|
8
|
+
|
9
|
+
## 0.2.2 (2020-07-26)
|
10
|
+
|
11
|
+
- Fixed error with constant series
|
12
|
+
- Fixed error with no changepoints
|
13
|
+
|
14
|
+
## 0.2.1 (2020-07-15)
|
15
|
+
|
16
|
+
- Added `forecast` method
|
17
|
+
|
18
|
+
## 0.2.0 (2020-05-13)
|
19
|
+
|
20
|
+
- Switched from Daru to Rover
|
21
|
+
|
1
22
|
## 0.1.1 (2020-04-10)
|
2
23
|
|
3
24
|
- Added `add_changepoints_to_plot`
|
data/LICENSE.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2020 Andrew Kane
|
4
3
|
Copyright (c) Facebook, Inc. and its affiliates.
|
4
|
+
Copyright (c) 2020 Andrew Kane
|
5
5
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining
|
7
7
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -10,7 +10,7 @@ Supports:
|
|
10
10
|
|
11
11
|
And gracefully handles missing data
|
12
12
|
|
13
|
-
[![Build Status](https://
|
13
|
+
[![Build Status](https://github.com/ankane/prophet/workflows/build/badge.svg?branch=master)](https://github.com/ankane/prophet/actions)
|
14
14
|
|
15
15
|
## Installation
|
16
16
|
|
@@ -20,19 +20,47 @@ Add this line to your application’s Gemfile:
|
|
20
20
|
gem 'prophet-rb'
|
21
21
|
```
|
22
22
|
|
23
|
-
##
|
23
|
+
## Simple API
|
24
24
|
|
25
|
-
|
25
|
+
Get future predictions for a time series
|
26
26
|
|
27
|
-
|
27
|
+
```ruby
|
28
|
+
series = {
|
29
|
+
Date.parse("2020-01-01") => 100,
|
30
|
+
Date.parse("2020-01-02") => 150,
|
31
|
+
Date.parse("2020-01-03") => 136,
|
32
|
+
# ...
|
33
|
+
}
|
34
|
+
|
35
|
+
Prophet.forecast(series)
|
36
|
+
```
|
37
|
+
|
38
|
+
Specify the number of predictions to return
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
Prophet.forecast(series, count: 3)
|
42
|
+
```
|
43
|
+
|
44
|
+
Works great with [Groupdate](https://github.com/ankane/groupdate)
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
series = User.group_by_day(:created_at).count
|
48
|
+
Prophet.forecast(series)
|
49
|
+
```
|
50
|
+
|
51
|
+
## Advanced API
|
52
|
+
|
53
|
+
Check out the [Prophet documentation](https://facebook.github.io/prophet/docs/quick_start.html) for a great explanation of all of the features. The advanced API follows the Python API and supports the same features. It uses [Rover](https://github.com/ankane/rover) for data frames.
|
54
|
+
|
55
|
+
## Advanced Quick Start
|
28
56
|
|
29
57
|
[Explanation](https://facebook.github.io/prophet/docs/quick_start.html)
|
30
58
|
|
31
59
|
Create a data frame with `ds` and `y` columns - here’s [an example](examples/example_wp_log_peyton_manning.csv) you can use
|
32
60
|
|
33
61
|
```ruby
|
34
|
-
df =
|
35
|
-
df.head
|
62
|
+
df = Rover.read_csv("example_wp_log_peyton_manning.csv")
|
63
|
+
df.head
|
36
64
|
```
|
37
65
|
|
38
66
|
ds | y
|
@@ -54,7 +82,7 @@ Make a data frame with a `ds` column for future predictions
|
|
54
82
|
|
55
83
|
```ruby
|
56
84
|
future = m.make_future_dataframe(periods: 365)
|
57
|
-
future.tail
|
85
|
+
future.tail
|
58
86
|
```
|
59
87
|
|
60
88
|
ds |
|
@@ -69,7 +97,7 @@ Make predictions
|
|
69
97
|
|
70
98
|
```ruby
|
71
99
|
forecast = m.predict(future)
|
72
|
-
forecast["ds", "yhat", "yhat_lower", "yhat_upper"].tail
|
100
|
+
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail
|
73
101
|
```
|
74
102
|
|
75
103
|
ds | yhat | yhat_lower | yhat_upper
|
@@ -90,7 +118,7 @@ Plot the forecast
|
|
90
118
|
m.plot(forecast).savefig("forecast.png")
|
91
119
|
```
|
92
120
|
|
93
|
-
![Forecast](https://blazer.dokkuapp.com/assets/prophet/forecast-
|
121
|
+
![Forecast](https://blazer.dokkuapp.com/assets/prophet/forecast-77cf453fda67d1b462c6c22aee3a02572203b71c4517fedecc1f438cd374a876.png)
|
94
122
|
|
95
123
|
Plot components
|
96
124
|
|
@@ -98,7 +126,7 @@ Plot components
|
|
98
126
|
m.plot_components(forecast).savefig("components.png")
|
99
127
|
```
|
100
128
|
|
101
|
-
![Components](https://blazer.dokkuapp.com/assets/prophet/components-
|
129
|
+
![Components](https://blazer.dokkuapp.com/assets/prophet/components-2cdd260e23bc89824ecca25f6bfe394deb5821d60b7e0e551469c90d204acd67.png)
|
102
130
|
|
103
131
|
## Saturating Forecasts
|
104
132
|
|
@@ -107,7 +135,7 @@ m.plot_components(forecast).savefig("components.png")
|
|
107
135
|
Forecast logistic growth instead of linear
|
108
136
|
|
109
137
|
```ruby
|
110
|
-
df =
|
138
|
+
df = Rover.read_csv("example_wp_log_R.csv")
|
111
139
|
df["cap"] = 8.5
|
112
140
|
m = Prophet.new(growth: "logistic")
|
113
141
|
m.fit(df)
|
@@ -146,21 +174,21 @@ m = Prophet.new(changepoints: ["2014-01-01"])
|
|
146
174
|
Create a data frame with `holiday` and `ds` columns. Include all occurrences in your past data and future occurrences you’d like to forecast.
|
147
175
|
|
148
176
|
```ruby
|
149
|
-
playoffs =
|
150
|
-
"holiday" =>
|
177
|
+
playoffs = Rover::DataFrame.new(
|
178
|
+
"holiday" => "playoff",
|
151
179
|
"ds" => ["2008-01-13", "2009-01-03", "2010-01-16",
|
152
180
|
"2010-01-24", "2010-02-07", "2011-01-08",
|
153
181
|
"2013-01-12", "2014-01-12", "2014-01-19",
|
154
182
|
"2014-02-02", "2015-01-11", "2016-01-17",
|
155
183
|
"2016-01-24", "2016-02-07"],
|
156
|
-
"lower_window" =>
|
157
|
-
"upper_window" =>
|
184
|
+
"lower_window" => 0,
|
185
|
+
"upper_window" => 1
|
158
186
|
)
|
159
|
-
superbowls =
|
160
|
-
"holiday" =>
|
187
|
+
superbowls = Rover::DataFrame.new(
|
188
|
+
"holiday" => "superbowl",
|
161
189
|
"ds" => ["2010-02-07", "2014-02-02", "2016-02-07"],
|
162
|
-
"lower_window" =>
|
163
|
-
"upper_window" =>
|
190
|
+
"lower_window" => 0,
|
191
|
+
"upper_window" => 1
|
164
192
|
)
|
165
193
|
holidays = playoffs.concat(superbowls)
|
166
194
|
|
@@ -208,7 +236,7 @@ forecast = m.predict(future)
|
|
208
236
|
[Explanation](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html)
|
209
237
|
|
210
238
|
```ruby
|
211
|
-
df =
|
239
|
+
df = Rover.read_csv("example_air_passengers.csv")
|
212
240
|
m = Prophet.new(seasonality_mode: "multiplicative")
|
213
241
|
m.fit(df)
|
214
242
|
future = m.make_future_dataframe(periods: 50, freq: "MS")
|
@@ -236,7 +264,7 @@ Prophet.new(mcmc_samples: 300)
|
|
236
264
|
Sub-daily data
|
237
265
|
|
238
266
|
```ruby
|
239
|
-
df =
|
267
|
+
df = Rover.read_csv("example_yosemite_temps.csv")
|
240
268
|
m = Prophet.new(changepoint_prior_scale: 0.01).fit(df)
|
241
269
|
future = m.make_future_dataframe(periods: 300, freq: "H")
|
242
270
|
forecast = m.predict(future)
|
@@ -246,6 +274,15 @@ forecast = m.predict(future)
|
|
246
274
|
|
247
275
|
- [Forecasting at Scale](https://peerj.com/preprints/3190.pdf)
|
248
276
|
|
277
|
+
## Upgrading
|
278
|
+
|
279
|
+
### 0.2.0
|
280
|
+
|
281
|
+
Prophet now uses [Rover](https://github.com/ankane/rover) instead of Daru. Two changes you may need to make are:
|
282
|
+
|
283
|
+
- `Rover.read_csv` instead of `Daru::DataFrame.from_csv`
|
284
|
+
- `df[["ds", "yhat"]]` instead of `df["ds", "yhat"]`
|
285
|
+
|
249
286
|
## Credits
|
250
287
|
|
251
288
|
This library was ported from the [Prophet Python library](https://github.com/facebook/prophet) and is available under the same license.
|
data/lib/prophet.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# dependencies
|
2
2
|
require "cmdstan"
|
3
|
-
require "
|
3
|
+
require "rover"
|
4
4
|
require "numo/narray"
|
5
5
|
|
6
6
|
# stdlib
|
@@ -20,4 +20,67 @@ module Prophet
|
|
20
20
|
def self.new(**kwargs)
|
21
21
|
Forecaster.new(**kwargs)
|
22
22
|
end
|
23
|
+
|
24
|
+
def self.forecast(series, count: 10)
|
25
|
+
raise ArgumentError, "Series must have at least 10 data points" if series.size < 10
|
26
|
+
|
27
|
+
# check type to determine output format
|
28
|
+
# check for before converting to time
|
29
|
+
keys = series.keys
|
30
|
+
dates = keys.all? { |k| k.is_a?(Date) }
|
31
|
+
time_zone = keys.first.time_zone if keys.first.respond_to?(:time_zone)
|
32
|
+
utc = keys.first.utc? if keys.first.respond_to?(:utc?)
|
33
|
+
times = keys.map(&:to_time)
|
34
|
+
|
35
|
+
day = times.all? { |t| t.hour == 0 && t.min == 0 && t.sec == 0 && t.nsec == 0 }
|
36
|
+
week = day && times.map { |k| k.wday }.uniq.size == 1
|
37
|
+
month = day && times.all? { |k| k.day == 1 }
|
38
|
+
quarter = month && times.all? { |k| k.month % 3 == 1 }
|
39
|
+
year = quarter && times.all? { |k| k.month == 1 }
|
40
|
+
|
41
|
+
freq =
|
42
|
+
if year
|
43
|
+
"YS"
|
44
|
+
elsif quarter
|
45
|
+
"QS"
|
46
|
+
elsif month
|
47
|
+
"MS"
|
48
|
+
elsif week
|
49
|
+
"W"
|
50
|
+
elsif day
|
51
|
+
"D"
|
52
|
+
else
|
53
|
+
diff = Rover::Vector.new(times).sort.diff.to_numo[1..-1]
|
54
|
+
min_diff = diff.min.to_i
|
55
|
+
|
56
|
+
# could be another common divisor
|
57
|
+
# but keep it simple for now
|
58
|
+
raise "Unknown frequency" unless (diff % min_diff).eq(0).all?
|
59
|
+
|
60
|
+
"#{min_diff}S"
|
61
|
+
end
|
62
|
+
|
63
|
+
# use series, not times, so dates are handled correctly
|
64
|
+
df = Rover::DataFrame.new({"ds" => series.keys, "y" => series.values})
|
65
|
+
|
66
|
+
m = Prophet.new
|
67
|
+
m.logger.level = ::Logger::FATAL # no logging
|
68
|
+
m.fit(df)
|
69
|
+
|
70
|
+
future = m.make_future_dataframe(periods: count, include_history: false, freq: freq)
|
71
|
+
forecast = m.predict(future)
|
72
|
+
result = forecast[["ds", "yhat"]].to_a
|
73
|
+
|
74
|
+
# use the same format as input
|
75
|
+
if dates
|
76
|
+
result.each { |v| v["ds"] = v["ds"].to_date }
|
77
|
+
elsif time_zone
|
78
|
+
result.each { |v| v["ds"] = v["ds"].in_time_zone(time_zone) }
|
79
|
+
elsif utc
|
80
|
+
result.each { |v| v["ds"] = v["ds"].utc }
|
81
|
+
else
|
82
|
+
result.each { |v| v["ds"] = v["ds"].localtime }
|
83
|
+
end
|
84
|
+
result.map { |v| [v["ds"], v["yhat"]] }.to_h
|
85
|
+
end
|
23
86
|
end
|
data/lib/prophet/forecaster.rb
CHANGED
@@ -75,19 +75,19 @@ module Prophet
|
|
75
75
|
end
|
76
76
|
|
77
77
|
def validate_inputs
|
78
|
-
if !["linear", "logistic"].include?(@growth)
|
79
|
-
raise ArgumentError, "Parameter \"growth\" should be \"linear\" or \"
|
78
|
+
if !["linear", "logistic", "flat"].include?(@growth)
|
79
|
+
raise ArgumentError, "Parameter \"growth\" should be \"linear\", \"logistic\", or \"flat\"."
|
80
80
|
end
|
81
81
|
if @changepoint_range < 0 || @changepoint_range > 1
|
82
82
|
raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
|
83
83
|
end
|
84
84
|
if @holidays
|
85
|
-
if !@holidays.is_a?(
|
85
|
+
if !@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")
|
86
86
|
raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
|
87
87
|
end
|
88
88
|
@holidays["ds"] = to_datetime(@holidays["ds"])
|
89
|
-
has_lower = @holidays.
|
90
|
-
has_upper = @holidays.
|
89
|
+
has_lower = @holidays.include?("lower_window")
|
90
|
+
has_upper = @holidays.include?("upper_window")
|
91
91
|
if has_lower ^ has_upper # xor
|
92
92
|
raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
|
93
93
|
end
|
@@ -141,7 +141,7 @@ module Prophet
|
|
141
141
|
end
|
142
142
|
|
143
143
|
def setup_dataframe(df, initialize_scales: false)
|
144
|
-
if df.
|
144
|
+
if df.include?("y")
|
145
145
|
df["y"] = df["y"].map(&:to_f)
|
146
146
|
raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
|
147
147
|
end
|
@@ -152,7 +152,7 @@ module Prophet
|
|
152
152
|
raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
|
153
153
|
|
154
154
|
@extra_regressors.each_key do |name|
|
155
|
-
if !df.
|
155
|
+
if !df.include?(name)
|
156
156
|
raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
|
157
157
|
end
|
158
158
|
df[name] = df[name].map(&:to_f)
|
@@ -163,7 +163,7 @@ module Prophet
|
|
163
163
|
@seasonalities.values.each do |props|
|
164
164
|
condition_name = props[:condition_name]
|
165
165
|
if condition_name
|
166
|
-
if !df.
|
166
|
+
if !df.include?(condition_name)
|
167
167
|
raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
|
168
168
|
end
|
169
169
|
if df.where(!df[condition_name].in([true, false])).any?
|
@@ -172,36 +172,33 @@ module Prophet
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
-
|
176
|
-
df.index.name = nil
|
177
|
-
end
|
178
|
-
df = df.sort(["ds"])
|
175
|
+
df = df.sort_by { |r| r["ds"] }
|
179
176
|
|
180
177
|
initialize_scales(initialize_scales, df)
|
181
178
|
|
182
|
-
if @logistic_floor && !df.
|
179
|
+
if @logistic_floor && !df.include?("floor")
|
183
180
|
raise ArgumentError, "Expected column \"floor\"."
|
184
181
|
else
|
185
182
|
df["floor"] = 0
|
186
183
|
end
|
187
184
|
|
188
185
|
if @growth == "logistic"
|
189
|
-
unless df.
|
186
|
+
unless df.include?("cap")
|
190
187
|
raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
|
191
188
|
end
|
192
|
-
if df
|
189
|
+
if df[df["cap"] <= df["floor"]].size > 0
|
193
190
|
raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
|
194
191
|
end
|
195
|
-
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
|
192
|
+
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f
|
196
193
|
end
|
197
194
|
|
198
195
|
df["t"] = (df["ds"] - @start) / @t_scale.to_f
|
199
|
-
if df.
|
200
|
-
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
|
196
|
+
if df.include?("y")
|
197
|
+
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f
|
201
198
|
end
|
202
199
|
|
203
200
|
@extra_regressors.each do |name, props|
|
204
|
-
df[name] = (
|
201
|
+
df[name] = (df[name] - props[:mu]) / props[:std].to_f
|
205
202
|
end
|
206
203
|
|
207
204
|
df
|
@@ -237,21 +234,21 @@ module Prophet
|
|
237
234
|
if @n_changepoints > 0
|
238
235
|
step = (hist_size - 1) / @n_changepoints.to_f
|
239
236
|
cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
|
240
|
-
@changepoints =
|
237
|
+
@changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1)
|
241
238
|
else
|
242
239
|
@changepoints = []
|
243
240
|
end
|
244
241
|
end
|
245
242
|
|
246
243
|
if @changepoints.size > 0
|
247
|
-
@changepoints_t = (
|
244
|
+
@changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f
|
248
245
|
else
|
249
246
|
@changepoints_t = Numo::NArray.asarray([0])
|
250
247
|
end
|
251
248
|
end
|
252
249
|
|
253
250
|
def fourier_series(dates, period, series_order)
|
254
|
-
t =
|
251
|
+
t = dates.map(&:to_i).to_numo / (3600 * 24.0)
|
255
252
|
|
256
253
|
# no need for column_stack
|
257
254
|
series_order.times.flat_map do |i|
|
@@ -263,11 +260,11 @@ module Prophet
|
|
263
260
|
|
264
261
|
def make_seasonality_features(dates, period, series_order, prefix)
|
265
262
|
features = fourier_series(dates, period, series_order)
|
266
|
-
|
263
|
+
Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
|
267
264
|
end
|
268
265
|
|
269
266
|
def construct_holiday_dataframe(dates)
|
270
|
-
all_holidays =
|
267
|
+
all_holidays = Rover::DataFrame.new
|
271
268
|
if @holidays
|
272
269
|
all_holidays = @holidays.dup
|
273
270
|
end
|
@@ -279,12 +276,12 @@ module Prophet
|
|
279
276
|
# Drop future holidays not previously seen in training data
|
280
277
|
if @train_holiday_names
|
281
278
|
# Remove holiday names didn't show up in fit
|
282
|
-
all_holidays = all_holidays
|
279
|
+
all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)]
|
283
280
|
|
284
281
|
# Add holiday names in fit but not in predict with ds as NA
|
285
|
-
holidays_to_add =
|
286
|
-
"holiday" => @train_holiday_names
|
287
|
-
)
|
282
|
+
holidays_to_add = Rover::DataFrame.new({
|
283
|
+
"holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])]
|
284
|
+
})
|
288
285
|
all_holidays = all_holidays.concat(holidays_to_add)
|
289
286
|
end
|
290
287
|
|
@@ -318,7 +315,7 @@ module Prophet
|
|
318
315
|
|
319
316
|
lw.upto(uw).each do |offset|
|
320
317
|
occurrence = dt ? dt + offset : nil
|
321
|
-
loc = occurrence ? row_index.index(occurrence) : nil
|
318
|
+
loc = occurrence ? row_index.to_a.index(occurrence) : nil
|
322
319
|
key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
|
323
320
|
if loc
|
324
321
|
expanded_holidays[key][loc] = 1.0
|
@@ -327,14 +324,14 @@ module Prophet
|
|
327
324
|
end
|
328
325
|
end
|
329
326
|
end
|
330
|
-
holiday_features =
|
331
|
-
#
|
332
|
-
holiday_features = holiday_features[
|
333
|
-
prior_scale_list = holiday_features.
|
327
|
+
holiday_features = Rover::DataFrame.new(expanded_holidays)
|
328
|
+
# Make sure column order is consistent
|
329
|
+
holiday_features = holiday_features[holiday_features.vector_names.sort]
|
330
|
+
prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] }
|
334
331
|
holiday_names = prior_scales.keys
|
335
332
|
# Store holiday names used in fit
|
336
|
-
if
|
337
|
-
@train_holiday_names =
|
333
|
+
if @train_holiday_names.nil?
|
334
|
+
@train_holiday_names = Rover::Vector.new(holiday_names)
|
338
335
|
end
|
339
336
|
[holiday_features, prior_scale_list, holiday_names]
|
340
337
|
end
|
@@ -432,16 +429,16 @@ module Prophet
|
|
432
429
|
modes[@seasonality_mode].concat(holiday_names)
|
433
430
|
end
|
434
431
|
|
435
|
-
#
|
432
|
+
# Additional regressors
|
436
433
|
@extra_regressors.each do |name, props|
|
437
|
-
seasonal_features << df[name]
|
434
|
+
seasonal_features << Rover::DataFrame.new({name => df[name]})
|
438
435
|
prior_scales << props[:prior_scale]
|
439
436
|
modes[props[:mode]] << name
|
440
437
|
end
|
441
438
|
|
442
|
-
#
|
439
|
+
# Dummy to prevent empty X
|
443
440
|
if seasonal_features.size == 0
|
444
|
-
seasonal_features <<
|
441
|
+
seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]})
|
445
442
|
prior_scales << 1.0
|
446
443
|
end
|
447
444
|
|
@@ -453,16 +450,16 @@ module Prophet
|
|
453
450
|
end
|
454
451
|
|
455
452
|
def regressor_column_matrix(seasonal_features, modes)
|
456
|
-
components =
|
453
|
+
components = Rover::DataFrame.new(
|
457
454
|
"col" => seasonal_features.shape[1].times.to_a,
|
458
|
-
"component" => seasonal_features.
|
455
|
+
"component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] }
|
459
456
|
)
|
460
457
|
|
461
|
-
#
|
458
|
+
# Add total for holidays
|
462
459
|
if @train_holiday_names
|
463
460
|
components = add_group_component(components, "holidays", @train_holiday_names.uniq)
|
464
461
|
end
|
465
|
-
#
|
462
|
+
# Add totals additive and multiplicative components, and regressors
|
466
463
|
["additive", "multiplicative"].each do |mode|
|
467
464
|
components = add_group_component(components, mode + "_terms", modes[mode])
|
468
465
|
regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
|
@@ -473,20 +470,15 @@ module Prophet
|
|
473
470
|
modes[mode] << mode + "_terms"
|
474
471
|
modes[mode] << "extra_regressors_" + mode
|
475
472
|
end
|
476
|
-
#
|
473
|
+
# After all of the additive/multiplicative groups have been added,
|
477
474
|
modes[@seasonality_mode] << "holidays"
|
478
|
-
#
|
479
|
-
component_cols =
|
480
|
-
|
481
|
-
)
|
482
|
-
component_cols.each_vector do |v|
|
483
|
-
v.map! { |vi| vi.nil? ? 0 : vi }
|
484
|
-
end
|
485
|
-
component_cols.rename_vectors(:_id => "col")
|
475
|
+
# Convert to a binary matrix
|
476
|
+
component_cols = components["col"].crosstab(components["component"])
|
477
|
+
component_cols["col"] = component_cols.delete("_")
|
486
478
|
|
487
479
|
# Add columns for additive and multiplicative terms, if missing
|
488
480
|
["additive_terms", "multiplicative_terms"].each do |name|
|
489
|
-
component_cols[name] = 0 unless component_cols.
|
481
|
+
component_cols[name] = 0 unless component_cols.include?(name)
|
490
482
|
end
|
491
483
|
|
492
484
|
# TODO validation
|
@@ -495,10 +487,10 @@ module Prophet
|
|
495
487
|
end
|
496
488
|
|
497
489
|
def add_group_component(components, name, group)
|
498
|
-
new_comp = components
|
490
|
+
new_comp = components[components["component"].in?(group)].dup
|
499
491
|
group_cols = new_comp["col"].uniq
|
500
492
|
if group_cols.size > 0
|
501
|
-
new_comp =
|
493
|
+
new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name})
|
502
494
|
components = components.concat(new_comp)
|
503
495
|
end
|
504
496
|
components
|
@@ -574,8 +566,8 @@ module Prophet
|
|
574
566
|
end
|
575
567
|
|
576
568
|
def linear_growth_init(df)
|
577
|
-
i0 =
|
578
|
-
i1 = df
|
569
|
+
i0 = 0
|
570
|
+
i1 = df.size - 1
|
579
571
|
t = df["t"][i1] - df["t"][i0]
|
580
572
|
k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
|
581
573
|
m = df["y_scaled"][i0] - k * df["t"][i0]
|
@@ -583,8 +575,8 @@ module Prophet
|
|
583
575
|
end
|
584
576
|
|
585
577
|
def logistic_growth_init(df)
|
586
|
-
i0 =
|
587
|
-
i1 = df
|
578
|
+
i0 = 0
|
579
|
+
i1 = df.size - 1
|
588
580
|
t = df["t"][i1] - df["t"][i0]
|
589
581
|
|
590
582
|
# Force valid values, in case y > cap or y < 0
|
@@ -610,11 +602,22 @@ module Prophet
|
|
610
602
|
[k, m]
|
611
603
|
end
|
612
604
|
|
605
|
+
def flat_growth_init(df)
|
606
|
+
k = 0
|
607
|
+
m = df["y_scaled"].mean
|
608
|
+
[k, m]
|
609
|
+
end
|
610
|
+
|
613
611
|
def fit(df, **kwargs)
|
614
612
|
raise Error, "Prophet object can only be fit once" if @history
|
615
613
|
|
616
|
-
|
617
|
-
|
614
|
+
if defined?(Daru::DataFrame) && df.is_a?(Daru::DataFrame)
|
615
|
+
df = Rover::DataFrame.new(df.to_h)
|
616
|
+
end
|
617
|
+
raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame)
|
618
|
+
|
619
|
+
history = df[!df["y"].missing]
|
620
|
+
raise Error, "Data has less than 2 non-nil rows" if history.size < 2
|
618
621
|
|
619
622
|
@history_dates = to_datetime(df["ds"]).sort
|
620
623
|
history = setup_dataframe(history, initialize_scales: true)
|
@@ -627,6 +630,8 @@ module Prophet
|
|
627
630
|
|
628
631
|
set_changepoints
|
629
632
|
|
633
|
+
trend_indicator = {"linear" => 0, "logistic" => 1, "flat" => 2}
|
634
|
+
|
630
635
|
dat = {
|
631
636
|
"T" => history.shape[0],
|
632
637
|
"K" => seasonal_features.shape[1],
|
@@ -637,7 +642,7 @@ module Prophet
|
|
637
642
|
"X" => seasonal_features,
|
638
643
|
"sigmas" => prior_scales,
|
639
644
|
"tau" => @changepoint_prior_scale,
|
640
|
-
"trend_indicator" => @growth
|
645
|
+
"trend_indicator" => trend_indicator[@growth],
|
641
646
|
"s_a" => component_cols["additive_terms"],
|
642
647
|
"s_m" => component_cols["multiplicative_terms"]
|
643
648
|
}
|
@@ -645,6 +650,9 @@ module Prophet
|
|
645
650
|
if @growth == "linear"
|
646
651
|
dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
|
647
652
|
kinit = linear_growth_init(history)
|
653
|
+
elsif @growth == "flat"
|
654
|
+
dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
|
655
|
+
kinit = flat_growth_init(history)
|
648
656
|
else
|
649
657
|
dat["cap"] = history["cap_scaled"]
|
650
658
|
kinit = logistic_growth_init(history)
|
@@ -658,12 +666,12 @@ module Prophet
|
|
658
666
|
"sigma_obs" => 1
|
659
667
|
}
|
660
668
|
|
661
|
-
if history["y"].min == history["y"].max && @growth == "linear"
|
669
|
+
if history["y"].min == history["y"].max && (@growth == "linear" || @growth == "flat")
|
662
670
|
# Nothing to fit.
|
663
671
|
@params = stan_init
|
664
672
|
@params["sigma_obs"] = 1e-9
|
665
|
-
@params.each do |par|
|
666
|
-
@params[par] = Numo::NArray.asarray(@params[par])
|
673
|
+
@params.each do |par, _|
|
674
|
+
@params[par] = Numo::NArray.asarray([@params[par]])
|
667
675
|
end
|
668
676
|
elsif @mcmc_samples > 0
|
669
677
|
@params = @stan_backend.sampling(stan_init, dat, @mcmc_samples, **kwargs)
|
@@ -674,8 +682,10 @@ module Prophet
|
|
674
682
|
# If no changepoints were requested, replace delta with 0s
|
675
683
|
if @changepoints.size == 0
|
676
684
|
# Fold delta into the base rate k
|
677
|
-
|
678
|
-
|
685
|
+
# Numo doesn't support -1 with reshape
|
686
|
+
negative_one = @params["delta"].shape.inject(&:*)
|
687
|
+
@params["k"] = @params["k"] + @params["delta"].reshape(negative_one)
|
688
|
+
@params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(negative_one, 1)
|
679
689
|
end
|
680
690
|
|
681
691
|
self
|
@@ -701,10 +711,10 @@ module Prophet
|
|
701
711
|
|
702
712
|
# Drop columns except ds, cap, floor, and trend
|
703
713
|
cols = ["ds", "trend"]
|
704
|
-
cols << "cap" if df.
|
714
|
+
cols << "cap" if df.include?("cap")
|
705
715
|
cols << "floor" if @logistic_floor
|
706
716
|
# Add in forecast components
|
707
|
-
df2 = df_concat_axis_one([df[
|
717
|
+
df2 = df_concat_axis_one([df[cols], intervals, seasonal_components])
|
708
718
|
df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
|
709
719
|
df2
|
710
720
|
end
|
@@ -739,8 +749,12 @@ module Prophet
|
|
739
749
|
k_t[indx] += deltas[s]
|
740
750
|
m_t[indx] += gammas[s]
|
741
751
|
end
|
742
|
-
|
743
|
-
|
752
|
+
cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
|
753
|
+
end
|
754
|
+
|
755
|
+
def flat_trend(t, m)
|
756
|
+
m_t = m * t.new_ones
|
757
|
+
m_t
|
744
758
|
end
|
745
759
|
|
746
760
|
def predict_trend(df)
|
@@ -751,9 +765,11 @@ module Prophet
|
|
751
765
|
t = Numo::NArray.asarray(df["t"].to_a)
|
752
766
|
if @growth == "linear"
|
753
767
|
trend = piecewise_linear(t, deltas, k, m, @changepoints_t)
|
754
|
-
|
768
|
+
elsif @growth == "logistic"
|
755
769
|
cap = df["cap_scaled"]
|
756
770
|
trend = piecewise_logistic(t, cap, deltas, k, m, @changepoints_t)
|
771
|
+
elsif @growth == "flat"
|
772
|
+
trend = flat_trend(t, m)
|
757
773
|
end
|
758
774
|
|
759
775
|
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
@@ -766,10 +782,10 @@ module Prophet
|
|
766
782
|
upper_p = 100 * (1.0 + @interval_width) / 2
|
767
783
|
end
|
768
784
|
|
769
|
-
x =
|
785
|
+
x = seasonal_features.to_numo
|
770
786
|
data = {}
|
771
|
-
component_cols.
|
772
|
-
beta_c =
|
787
|
+
component_cols.vector_names.each do |component|
|
788
|
+
beta_c = @params["beta"] * component_cols[component].to_numo
|
773
789
|
|
774
790
|
comp = x.dot(beta_c.transpose)
|
775
791
|
if @component_modes["additive"].include?(component)
|
@@ -777,11 +793,11 @@ module Prophet
|
|
777
793
|
end
|
778
794
|
data[component] = comp.mean(axis: 1, nan: true)
|
779
795
|
if @uncertainty_samples
|
780
|
-
data[component + "_lower"] = percentile(
|
781
|
-
data[component + "_upper"] = percentile(
|
796
|
+
data[component + "_lower"] = comp.percentile(lower_p, axis: 1)
|
797
|
+
data[component + "_upper"] = comp.percentile(upper_p, axis: 1)
|
782
798
|
end
|
783
799
|
end
|
784
|
-
|
800
|
+
Rover::DataFrame.new(data)
|
785
801
|
end
|
786
802
|
|
787
803
|
def sample_posterior_predictive(df)
|
@@ -792,9 +808,9 @@ module Prophet
|
|
792
808
|
seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
|
793
809
|
|
794
810
|
# convert to Numo for performance
|
795
|
-
seasonal_features =
|
796
|
-
additive_terms =
|
797
|
-
multiplicative_terms =
|
811
|
+
seasonal_features = seasonal_features.to_numo
|
812
|
+
additive_terms = component_cols["additive_terms"].to_numo
|
813
|
+
multiplicative_terms = component_cols["multiplicative_terms"].to_numo
|
798
814
|
|
799
815
|
sim_values = {"yhat" => [], "trend" => []}
|
800
816
|
n_iterations.times do |i|
|
@@ -831,11 +847,11 @@ module Prophet
|
|
831
847
|
|
832
848
|
series = {}
|
833
849
|
["yhat", "trend"].each do |key|
|
834
|
-
series["#{key}_lower"] =
|
835
|
-
series["#{key}_upper"] =
|
850
|
+
series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1)
|
851
|
+
series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1)
|
836
852
|
end
|
837
853
|
|
838
|
-
|
854
|
+
Rover::DataFrame.new(series)
|
839
855
|
end
|
840
856
|
|
841
857
|
def sample_model(df, seasonal_features, iteration, s_a, s_m)
|
@@ -889,35 +905,25 @@ module Prophet
|
|
889
905
|
|
890
906
|
if @growth == "linear"
|
891
907
|
trend = piecewise_linear(t, deltas, k, m, changepoint_ts)
|
892
|
-
|
908
|
+
elsif @growth == "logistic"
|
893
909
|
cap = df["cap_scaled"]
|
894
910
|
trend = piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
|
911
|
+
elsif @growth == "flat"
|
912
|
+
trend = flat_trend(t, m)
|
895
913
|
end
|
896
914
|
|
897
915
|
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
898
916
|
end
|
899
917
|
|
900
|
-
def percentile(a, percentile, axis:)
|
901
|
-
raise Error, "Axis must be 1" if axis != 1
|
902
|
-
|
903
|
-
sorted = a.sort(axis: axis)
|
904
|
-
x = percentile / 100.0 * (sorted.shape[axis] - 1)
|
905
|
-
r = x % 1
|
906
|
-
i = x.floor
|
907
|
-
# this should use axis, but we only need axis: 1
|
908
|
-
if i == sorted.shape[axis] - 1
|
909
|
-
sorted[true, -1]
|
910
|
-
else
|
911
|
-
sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
|
912
|
-
end
|
913
|
-
end
|
914
|
-
|
915
918
|
def make_future_dataframe(periods:, freq: "D", include_history: true)
|
916
919
|
raise Error, "Model has not been fit" unless @history_dates
|
917
920
|
last_date = @history_dates.max
|
918
921
|
# TODO add more freq
|
919
922
|
# https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
|
920
923
|
case freq
|
924
|
+
when /\A\d+S\z/
|
925
|
+
secs = freq.to_i
|
926
|
+
dates = (periods + 1).times.map { |i| last_date + i * secs }
|
921
927
|
when "H"
|
922
928
|
hour = 3600
|
923
929
|
dates = (periods + 1).times.map { |i| last_date + i * hour }
|
@@ -930,56 +936,60 @@ module Prophet
|
|
930
936
|
dates = (periods + 1).times.map { |i| last_date + i * week }
|
931
937
|
when "MS"
|
932
938
|
dates = [last_date]
|
939
|
+
# TODO reset day from last date, but keep time
|
933
940
|
periods.times do
|
934
941
|
dates << dates.last.to_datetime.next_month.to_time.utc
|
935
942
|
end
|
943
|
+
when "QS"
|
944
|
+
dates = [last_date]
|
945
|
+
# TODO reset day and month from last date, but keep time
|
946
|
+
periods.times do
|
947
|
+
dates << dates.last.to_datetime.next_month.next_month.next_month.to_time.utc
|
948
|
+
end
|
949
|
+
when "YS"
|
950
|
+
dates = [last_date]
|
951
|
+
# TODO reset day and month from last date, but keep time
|
952
|
+
periods.times do
|
953
|
+
dates << dates.last.to_datetime.next_year.to_time.utc
|
954
|
+
end
|
936
955
|
else
|
937
956
|
raise ArgumentError, "Unknown freq: #{freq}"
|
938
957
|
end
|
939
958
|
dates.select! { |d| d > last_date }
|
940
959
|
dates = dates.last(periods)
|
941
|
-
dates = @history_dates
|
942
|
-
|
960
|
+
dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history
|
961
|
+
Rover::DataFrame.new({"ds" => dates})
|
943
962
|
end
|
944
963
|
|
945
964
|
private
|
946
965
|
|
947
|
-
# Time is
|
966
|
+
# Time is preferred over DateTime in Ruby docs
|
948
967
|
# use UTC to be consistent with Python
|
949
968
|
# and so days have equal length (no DST)
|
950
969
|
def to_datetime(vec)
|
951
970
|
return if vec.nil?
|
952
|
-
vec
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
971
|
+
vec =
|
972
|
+
vec.map do |v|
|
973
|
+
case v
|
974
|
+
when Time
|
975
|
+
v.utc
|
976
|
+
when Date
|
977
|
+
v.to_datetime.to_time.utc
|
978
|
+
else
|
979
|
+
DateTime.parse(v.to_s).to_time.utc
|
980
|
+
end
|
960
981
|
end
|
961
|
-
|
982
|
+
Rover::Vector.new(vec)
|
962
983
|
end
|
963
984
|
|
964
985
|
# okay to do in-place
|
965
986
|
def df_concat_axis_one(dfs)
|
966
987
|
dfs[1..-1].each do |df|
|
967
|
-
df
|
968
|
-
dfs[0][k] = v
|
969
|
-
end
|
988
|
+
dfs[0].merge!(df)
|
970
989
|
end
|
971
990
|
dfs[0]
|
972
991
|
end
|
973
992
|
|
974
|
-
def df_values(df)
|
975
|
-
if df.is_a?(Daru::Vector)
|
976
|
-
Numo::NArray.asarray(df.to_a)
|
977
|
-
else
|
978
|
-
# TODO make more performant
|
979
|
-
Numo::NArray.asarray(df.to_matrix.to_a)
|
980
|
-
end
|
981
|
-
end
|
982
|
-
|
983
993
|
# https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
|
984
994
|
def poisson(lam)
|
985
995
|
l = Math.exp(-lam)
|
@@ -994,13 +1004,8 @@ module Prophet
|
|
994
1004
|
|
995
1005
|
# https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
|
996
1006
|
def laplace(loc, scale, size)
|
997
|
-
u = Numo::DFloat.new(size).rand
|
1007
|
+
u = Numo::DFloat.new(size).rand(-0.5, 0.5)
|
998
1008
|
loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
|
999
1009
|
end
|
1000
|
-
|
1001
|
-
def ensure_arr(value)
|
1002
|
-
value = [value] unless value.is_a?(Array)
|
1003
|
-
value
|
1004
|
-
end
|
1005
1010
|
end
|
1006
1011
|
end
|