prophet-rb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +23 -0
- data/README.md +202 -0
- data/data-raw/generated_holidays.csv +96474 -0
- data/ext/prophet/Makefile +5 -0
- data/ext/prophet/extconf.rb +18 -0
- data/lib/prophet-rb.rb +1 -0
- data/lib/prophet.rb +23 -0
- data/lib/prophet/forecaster.rb +986 -0
- data/lib/prophet/holidays.rb +27 -0
- data/lib/prophet/plot.rb +269 -0
- data/lib/prophet/stan_backend.rb +136 -0
- data/lib/prophet/version.rb +3 -0
- data/stan/unix/prophet.stan +131 -0
- data/stan/win/prophet.stan +162 -0
- metadata +170 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
require "cmdstan"
|
2
|
+
require "fileutils"
|
3
|
+
require "tmpdir"
|
4
|
+
|
5
|
+
platform = Gem.win_platform? ? "win" : "unix"
|
6
|
+
stan_file = File.expand_path("../../stan/#{platform}/prophet.stan", __dir__)
|
7
|
+
|
8
|
+
# copy to avoid temp file in repo
|
9
|
+
temp_file = "#{Dir.tmpdir}/prophet.stan"
|
10
|
+
FileUtils.cp(stan_file, temp_file)
|
11
|
+
|
12
|
+
# compile
|
13
|
+
sm = CmdStan::Model.new(stan_file: temp_file)
|
14
|
+
|
15
|
+
# save
|
16
|
+
target_dir = File.expand_path("../../stan_model", __dir__)
|
17
|
+
FileUtils.mkdir_p(target_dir)
|
18
|
+
FileUtils.cp(sm.exe_file, "#{target_dir}/prophet_model.bin")
|
data/lib/prophet-rb.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "prophet"
|
data/lib/prophet.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "cmdstan"
|
3
|
+
require "daru"
|
4
|
+
require "numo/narray"
|
5
|
+
|
6
|
+
# stdlib
|
7
|
+
require "logger"
|
8
|
+
require "set"
|
9
|
+
|
10
|
+
# modules
|
11
|
+
require "prophet/holidays"
|
12
|
+
require "prophet/plot"
|
13
|
+
require "prophet/forecaster"
|
14
|
+
require "prophet/stan_backend"
|
15
|
+
require "prophet/version"
|
16
|
+
|
17
|
+
module Prophet
|
18
|
+
class Error < StandardError; end
|
19
|
+
|
20
|
+
def self.new(**kwargs)
|
21
|
+
Forecaster.new(**kwargs)
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,986 @@
|
|
1
|
+
module Prophet
|
2
|
+
class Forecaster
|
3
|
+
include Holidays
|
4
|
+
include Plot
|
5
|
+
|
6
|
+
attr_reader :logger, :params, :train_holiday_names
|
7
|
+
|
8
|
+
def initialize(
|
9
|
+
growth: "linear",
|
10
|
+
changepoints: nil,
|
11
|
+
n_changepoints: 25,
|
12
|
+
changepoint_range: 0.8,
|
13
|
+
yearly_seasonality: "auto",
|
14
|
+
weekly_seasonality: "auto",
|
15
|
+
daily_seasonality: "auto",
|
16
|
+
holidays: nil,
|
17
|
+
seasonality_mode: "additive",
|
18
|
+
seasonality_prior_scale: 10.0,
|
19
|
+
holidays_prior_scale: 10.0,
|
20
|
+
changepoint_prior_scale: 0.05,
|
21
|
+
mcmc_samples: 0,
|
22
|
+
interval_width: 0.80,
|
23
|
+
uncertainty_samples: 1000
|
24
|
+
)
|
25
|
+
@growth = growth
|
26
|
+
|
27
|
+
@changepoints = to_datetime(changepoints)
|
28
|
+
if !@changepoints.nil?
|
29
|
+
@n_changepoints = @changepoints.size
|
30
|
+
@specified_changepoints = true
|
31
|
+
else
|
32
|
+
@n_changepoints = n_changepoints
|
33
|
+
@specified_changepoints = false
|
34
|
+
end
|
35
|
+
|
36
|
+
@changepoint_range = changepoint_range
|
37
|
+
@yearly_seasonality = yearly_seasonality
|
38
|
+
@weekly_seasonality = weekly_seasonality
|
39
|
+
@daily_seasonality = daily_seasonality
|
40
|
+
@holidays = holidays
|
41
|
+
|
42
|
+
@seasonality_mode = seasonality_mode
|
43
|
+
@seasonality_prior_scale = seasonality_prior_scale.to_f
|
44
|
+
@changepoint_prior_scale = changepoint_prior_scale.to_f
|
45
|
+
@holidays_prior_scale = holidays_prior_scale.to_f
|
46
|
+
|
47
|
+
@mcmc_samples = mcmc_samples
|
48
|
+
@interval_width = interval_width
|
49
|
+
@uncertainty_samples = uncertainty_samples
|
50
|
+
|
51
|
+
# Set during fitting or by other methods
|
52
|
+
@start = nil
|
53
|
+
@y_scale = nil
|
54
|
+
@logistic_floor = false
|
55
|
+
@t_scale = nil
|
56
|
+
@changepoints_t = nil
|
57
|
+
@seasonalities = {}
|
58
|
+
@extra_regressors = {}
|
59
|
+
@country_holidays = nil
|
60
|
+
@stan_fit = nil
|
61
|
+
@params = {}
|
62
|
+
@history = nil
|
63
|
+
@history_dates = nil
|
64
|
+
@train_component_cols = nil
|
65
|
+
@component_modes = nil
|
66
|
+
@train_holiday_names = nil
|
67
|
+
@fit_kwargs = {}
|
68
|
+
validate_inputs
|
69
|
+
|
70
|
+
@logger = ::Logger.new($stderr)
|
71
|
+
@logger.formatter = proc do |severity, datetime, progname, msg|
|
72
|
+
"[prophet] #{msg}\n"
|
73
|
+
end
|
74
|
+
@stan_backend = StanBackend.new(@logger)
|
75
|
+
end
|
76
|
+
|
77
|
+
def validate_inputs
|
78
|
+
if !["linear", "logistic"].include?(@growth)
|
79
|
+
raise ArgumentError, "Parameter \"growth\" should be \"linear\" or \"logistic\"."
|
80
|
+
end
|
81
|
+
if @changepoint_range < 0 || @changepoint_range > 1
|
82
|
+
raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
|
83
|
+
end
|
84
|
+
if @holidays
|
85
|
+
if !@holidays.is_a?(Daru::DataFrame) && @holidays.vectors.include?("ds") && @holidays.vectors.include?("holiday")
|
86
|
+
raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
|
87
|
+
end
|
88
|
+
@holidays["ds"] = to_datetime(@holidays["ds"])
|
89
|
+
has_lower = @holidays.vectors.include?("lower_window")
|
90
|
+
has_upper = @holidays.vectors.include?("upper_window")
|
91
|
+
if has_lower ^ has_upper # xor
|
92
|
+
raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
|
93
|
+
end
|
94
|
+
if has_lower
|
95
|
+
if @holidays["lower_window"].max > 0
|
96
|
+
raise ArgumentError, "Holiday lower_window should be <= 0"
|
97
|
+
end
|
98
|
+
if @holidays["upper_window"].min < 0
|
99
|
+
raise ArgumentError, "Holiday upper_window should be >= 0"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
@holidays["holiday"].uniq.each do |h|
|
103
|
+
validate_column_name(h, check_holidays: false)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
if !["additive", "multiplicative"].include?(@seasonality_mode)
|
108
|
+
raise ArgumentError, "seasonality_mode must be \"additive\" or \"multiplicative\""
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true)
|
113
|
+
if name.include?("_delim_")
|
114
|
+
raise ArgumentError, "Name cannot contain \"_delim_\""
|
115
|
+
end
|
116
|
+
reserved_names = [
|
117
|
+
"trend", "additive_terms", "daily", "weekly", "yearly",
|
118
|
+
"holidays", "zeros", "extra_regressors_additive", "yhat",
|
119
|
+
"extra_regressors_multiplicative", "multiplicative_terms",
|
120
|
+
]
|
121
|
+
rn_l = reserved_names.map { |n| n + "_lower" }
|
122
|
+
rn_u = reserved_names.map { |n| n + "_upper" }
|
123
|
+
reserved_names.concat(rn_l)
|
124
|
+
reserved_names.concat(rn_u)
|
125
|
+
reserved_names.concat(["ds", "y", "cap", "floor", "y_scaled", "cap_scaled"])
|
126
|
+
if reserved_names.include?(name)
|
127
|
+
raise ArgumentError, "Name #{name.inspect} is reserved."
|
128
|
+
end
|
129
|
+
if check_holidays && @holidays && @holidays["holiday"].uniq.include?(name)
|
130
|
+
raise ArgumentError, "Name #{name.inspect} already used for a holiday."
|
131
|
+
end
|
132
|
+
if check_holidays && @country_holidays && get_holiday_names(@country_holidays).include?(name)
|
133
|
+
raise ArgumentError, "Name #{name.inspect} is a holiday name in #{@country_holidays.inspect}."
|
134
|
+
end
|
135
|
+
if check_seasonalities && @seasonalities[name]
|
136
|
+
raise ArgumentError, "Name #{name.inspect} already used for a seasonality."
|
137
|
+
end
|
138
|
+
if check_regressors and @extra_regressors[name]
|
139
|
+
raise ArgumentError, "Name #{name.inspect} already used for an added regressor."
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def setup_dataframe(df, initialize_scales: false)
|
144
|
+
if df.vectors.include?("y")
|
145
|
+
df["y"] = df["y"].map(&:to_f)
|
146
|
+
raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
|
147
|
+
end
|
148
|
+
# TODO support integers
|
149
|
+
|
150
|
+
df["ds"] = to_datetime(df["ds"])
|
151
|
+
|
152
|
+
raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
|
153
|
+
|
154
|
+
@extra_regressors.each_key do |name|
|
155
|
+
if !df.vectors.include?(name)
|
156
|
+
raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
|
157
|
+
end
|
158
|
+
df[name] = df[name].map(&:to_f)
|
159
|
+
if df[name].any?(&:nil)
|
160
|
+
raise ArgumentError, "Found NaN in column #{name.inspect}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
@seasonalities.values.each do |props|
|
164
|
+
condition_name = props[:condition_name]
|
165
|
+
if condition_name
|
166
|
+
if !df.vectors.include?(condition_name)
|
167
|
+
raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
|
168
|
+
end
|
169
|
+
if df.where(!df[condition_name].in([true, false])).any?
|
170
|
+
raise ArgumentError, "Found non-boolean in column #{condition_name.inspect}"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
if df.index.name == "ds"
|
176
|
+
df.index.name = nil
|
177
|
+
end
|
178
|
+
df = df.sort(["ds"])
|
179
|
+
|
180
|
+
initialize_scales(initialize_scales, df)
|
181
|
+
|
182
|
+
if @logistic_floor && !df.vectors.include?("floor")
|
183
|
+
raise ArgumentError, "Expected column \"floor\"."
|
184
|
+
else
|
185
|
+
df["floor"] = 0
|
186
|
+
end
|
187
|
+
|
188
|
+
if @growth == "logistic"
|
189
|
+
unless df.vectors.include?("cap")
|
190
|
+
raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
|
191
|
+
end
|
192
|
+
if df.where(df["cap"] <= df["floor"]).size > 0
|
193
|
+
raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
|
194
|
+
end
|
195
|
+
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
|
196
|
+
end
|
197
|
+
|
198
|
+
df["t"] = (df["ds"] - @start) / @t_scale.to_f
|
199
|
+
if df.vectors.include?("y")
|
200
|
+
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
|
201
|
+
end
|
202
|
+
|
203
|
+
@extra_regressors.each do |name, props|
|
204
|
+
df[name] = ((df[name] - props["mu"]) / props["std"])
|
205
|
+
end
|
206
|
+
|
207
|
+
df
|
208
|
+
end
|
209
|
+
|
210
|
+
def initialize_scales(initialize_scales, df)
|
211
|
+
return unless initialize_scales
|
212
|
+
|
213
|
+
floor = 0
|
214
|
+
@y_scale = (df["y"] - floor).abs.max
|
215
|
+
@y_scale = 1 if @y_scale == 0
|
216
|
+
@start = df["ds"].min
|
217
|
+
@t_scale = df["ds"].max - @start
|
218
|
+
end
|
219
|
+
|
220
|
+
def set_changepoints
|
221
|
+
hist_size = (@history.shape[0] * @changepoint_range).floor
|
222
|
+
|
223
|
+
if @n_changepoints + 1 > hist_size
|
224
|
+
@n_changepoints = hist_size - 1
|
225
|
+
logger.info "n_changepoints greater than number of observations. Using #{@n_changepoints}"
|
226
|
+
end
|
227
|
+
|
228
|
+
if @n_changepoints > 0
|
229
|
+
step = (hist_size - 1) / @n_changepoints.to_f
|
230
|
+
cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
|
231
|
+
@changepoints = @history["ds"][*cp_indexes][1..-1]
|
232
|
+
else
|
233
|
+
@changepoints = []
|
234
|
+
end
|
235
|
+
|
236
|
+
if @changepoints.size > 0
|
237
|
+
@changepoints_t = Numo::NArray.asarray(((@changepoints - @start) / @t_scale.to_f).to_a).sort
|
238
|
+
else
|
239
|
+
@changepoints_t = Numo::NArray.asarray([0])
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def fourier_series(dates, period, series_order)
|
244
|
+
start = Time.utc(1970).to_i
|
245
|
+
# uses to_datetime first so we get UTC
|
246
|
+
t = Numo::DFloat.asarray(dates.map { |v| v.to_i - start }) / (3600 * 24.0)
|
247
|
+
|
248
|
+
# no need for column_stack
|
249
|
+
series_order.times.flat_map do |i|
|
250
|
+
[Numo::DFloat::Math.method(:sin), Numo::DFloat::Math.method(:cos)].map do |fun|
|
251
|
+
fun.call(2.0 * (i + 1) * Math::PI * t / period)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
def make_seasonality_features(dates, period, series_order, prefix)
|
257
|
+
features = fourier_series(dates, period, series_order)
|
258
|
+
Daru::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
|
259
|
+
end
|
260
|
+
|
261
|
+
def construct_holiday_dataframe(dates)
|
262
|
+
all_holidays = Daru::DataFrame.new
|
263
|
+
if @holidays
|
264
|
+
all_holidays = @holidays.dup
|
265
|
+
end
|
266
|
+
if @country_holidays
|
267
|
+
year_list = dates.map(&:year)
|
268
|
+
country_holidays_df = make_holidays_df(year_list, @country_holidays)
|
269
|
+
all_holidays = all_holidays.concat(country_holidays_df)
|
270
|
+
end
|
271
|
+
# Drop future holidays not previously seen in training data
|
272
|
+
if @train_holiday_names
|
273
|
+
# Remove holiday names didn't show up in fit
|
274
|
+
all_holidays = all_holidays.where(all_holidays["holiday"].in(@train_holiday_names))
|
275
|
+
|
276
|
+
# Add holiday names in fit but not in predict with ds as NA
|
277
|
+
holidays_to_add = Daru::DataFrame.new(
|
278
|
+
"holiday" => @train_holiday_names.where(!@train_holiday_names.in(all_holidays["holiday"]))
|
279
|
+
)
|
280
|
+
all_holidays = all_holidays.concat(holidays_to_add)
|
281
|
+
end
|
282
|
+
|
283
|
+
all_holidays
|
284
|
+
end
|
285
|
+
|
286
|
+
def make_holiday_features(dates, holidays)
|
287
|
+
expanded_holidays = Hash.new { |hash, key| hash[key] = Numo::DFloat.zeros(dates.size) }
|
288
|
+
prior_scales = {}
|
289
|
+
# Makes an index so we can perform `get_loc` below.
|
290
|
+
# Strip to just dates.
|
291
|
+
row_index = dates.map(&:to_date)
|
292
|
+
|
293
|
+
holidays.each_row do |row|
|
294
|
+
dt = row["ds"]
|
295
|
+
lw = nil
|
296
|
+
uw = nil
|
297
|
+
begin
|
298
|
+
lw = row["lower_window"].to_i
|
299
|
+
uw = row["upper_window"].to_i
|
300
|
+
rescue IndexError
|
301
|
+
lw = 0
|
302
|
+
uw = 0
|
303
|
+
end
|
304
|
+
ps = @holidays_prior_scale
|
305
|
+
if prior_scales[row["holiday"]] && prior_scales[row["holiday"]] != ps
|
306
|
+
raise ArgumentError, "Holiday #{row["holiday"].inspect} does not have consistent prior scale specification."
|
307
|
+
end
|
308
|
+
raise ArgumentError, "Prior scale must be > 0" if ps <= 0
|
309
|
+
prior_scales[row["holiday"]] = ps
|
310
|
+
|
311
|
+
lw.upto(uw).each do |offset|
|
312
|
+
occurrence = dt ? dt + offset : nil
|
313
|
+
loc = occurrence ? row_index.index(occurrence) : nil
|
314
|
+
key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
|
315
|
+
if loc
|
316
|
+
expanded_holidays[key][loc] = 1.0
|
317
|
+
else
|
318
|
+
expanded_holidays[key] # Access key to generate value
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
holiday_features = Daru::DataFrame.new(expanded_holidays)
|
323
|
+
# # Make sure column order is consistent
|
324
|
+
holiday_features = holiday_features[*holiday_features.vectors.sort]
|
325
|
+
prior_scale_list = holiday_features.vectors.map { |h| prior_scales[h.split("_delim_")[0]] }
|
326
|
+
holiday_names = prior_scales.keys
|
327
|
+
# Store holiday names used in fit
|
328
|
+
if !@train_holiday_names
|
329
|
+
@train_holiday_names = Daru::Vector.new(holiday_names)
|
330
|
+
end
|
331
|
+
[holiday_features, prior_scale_list, holiday_names]
|
332
|
+
end
|
333
|
+
|
334
|
+
def add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil)
|
335
|
+
raise Error, "Regressors must be added prior to model fitting." if @history
|
336
|
+
validate_column_name(name, check_regressors: false)
|
337
|
+
prior_scale ||= @holidays_prior_scale.to_f
|
338
|
+
mode ||= @seasonality_mode
|
339
|
+
raise ArgumentError, "Prior scale must be > 0" if prior_scale <= 0
|
340
|
+
if !["additive", "multiplicative"].include?(mode)
|
341
|
+
raise ArgumentError, "mode must be \"additive\" or \"multiplicative\""
|
342
|
+
end
|
343
|
+
@extra_regressors[name] = {
|
344
|
+
prior_scale: prior_scale,
|
345
|
+
standardize: standardize,
|
346
|
+
mu: 0.0,
|
347
|
+
std: 1.0,
|
348
|
+
mode: mode
|
349
|
+
}
|
350
|
+
self
|
351
|
+
end
|
352
|
+
|
353
|
+
def add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil)
|
354
|
+
raise Error, "Seasonality must be added prior to model fitting." if @history
|
355
|
+
|
356
|
+
if !["daily", "weekly", "yearly"].include?(name)
|
357
|
+
# Allow overwriting built-in seasonalities
|
358
|
+
validate_column_name(name, check_seasonalities: false)
|
359
|
+
end
|
360
|
+
if prior_scale.nil?
|
361
|
+
ps = @seasonality_prior_scale
|
362
|
+
else
|
363
|
+
ps = prior_scale.to_f
|
364
|
+
end
|
365
|
+
raise ArgumentError, "Prior scale must be > 0" if ps <= 0
|
366
|
+
raise ArgumentError, "Fourier Order must be > 0" if fourier_order <= 0
|
367
|
+
mode ||= @seasonality_mode
|
368
|
+
if !["additive", "multiplicative"].include?(mode)
|
369
|
+
raise ArgumentError, "mode must be \"additive\" or \"multiplicative\""
|
370
|
+
end
|
371
|
+
validate_column_name(condition_name) if condition_name
|
372
|
+
@seasonalities[name] = {
|
373
|
+
period: period,
|
374
|
+
fourier_order: fourier_order,
|
375
|
+
prior_scale: ps,
|
376
|
+
mode: mode,
|
377
|
+
condition_name: condition_name
|
378
|
+
}
|
379
|
+
self
|
380
|
+
end
|
381
|
+
|
382
|
+
def add_country_holidays(country_name)
|
383
|
+
raise Error, "Country holidays must be added prior to model fitting." if @history
|
384
|
+
# Validate names.
|
385
|
+
get_holiday_names(country_name).each do |name|
|
386
|
+
# Allow merging with existing holidays
|
387
|
+
validate_column_name(name, check_holidays: false)
|
388
|
+
end
|
389
|
+
# Set the holidays.
|
390
|
+
if @country_holidays
|
391
|
+
logger.warn "Changing country holidays from #{@country_holidays.inspect} to #{country_name.inspect}."
|
392
|
+
end
|
393
|
+
@country_holidays = country_name
|
394
|
+
self
|
395
|
+
end
|
396
|
+
|
397
|
+
def make_all_seasonality_features(df)
|
398
|
+
seasonal_features = []
|
399
|
+
prior_scales = []
|
400
|
+
modes = {"additive" => [], "multiplicative" => []}
|
401
|
+
|
402
|
+
# Seasonality features
|
403
|
+
@seasonalities.each do |name, props|
|
404
|
+
features = make_seasonality_features(
|
405
|
+
df["ds"],
|
406
|
+
props[:period],
|
407
|
+
props[:fourier_order],
|
408
|
+
name
|
409
|
+
)
|
410
|
+
if props[:condition_name]
|
411
|
+
features[!df.where(props[:condition_name])] = 0
|
412
|
+
end
|
413
|
+
seasonal_features << features
|
414
|
+
prior_scales.concat([props[:prior_scale]] * features.shape[1])
|
415
|
+
modes[props[:mode]] << name
|
416
|
+
end
|
417
|
+
|
418
|
+
# Holiday features
|
419
|
+
holidays = construct_holiday_dataframe(df["ds"])
|
420
|
+
if holidays.size > 0
|
421
|
+
features, holiday_priors, holiday_names = make_holiday_features(df["ds"], holidays)
|
422
|
+
seasonal_features << features
|
423
|
+
prior_scales.concat(holiday_priors)
|
424
|
+
modes[@seasonality_mode].concat(holiday_names)
|
425
|
+
end
|
426
|
+
|
427
|
+
# # Additional regressors
|
428
|
+
@extra_regressors.each do |name, props|
|
429
|
+
seasonal_features << df[name].to_df
|
430
|
+
prior_scales << props[:prior_scale]
|
431
|
+
modes[props[:mode]] << name
|
432
|
+
end
|
433
|
+
|
434
|
+
# # Dummy to prevent empty X
|
435
|
+
if seasonal_features.size == 0
|
436
|
+
seasonal_features << Daru::DataFrame.new("zeros" => [0] * df.shape[0])
|
437
|
+
prior_scales << 1.0
|
438
|
+
end
|
439
|
+
|
440
|
+
seasonal_features = df_concat_axis_one(seasonal_features)
|
441
|
+
|
442
|
+
component_cols, modes = regressor_column_matrix(seasonal_features, modes)
|
443
|
+
|
444
|
+
[seasonal_features, prior_scales, component_cols, modes]
|
445
|
+
end
|
446
|
+
|
447
|
+
def regressor_column_matrix(seasonal_features, modes)
|
448
|
+
components = Daru::DataFrame.new(
|
449
|
+
"col" => seasonal_features.shape[1].times.to_a,
|
450
|
+
"component" => seasonal_features.vectors.map { |x| x.split("_delim_")[0] }
|
451
|
+
)
|
452
|
+
|
453
|
+
# # Add total for holidays
|
454
|
+
if @train_holiday_names
|
455
|
+
components = add_group_component(components, "holidays", @train_holiday_names.uniq)
|
456
|
+
end
|
457
|
+
# # Add totals additive and multiplicative components, and regressors
|
458
|
+
["additive", "multiplicative"].each do |mode|
|
459
|
+
components = add_group_component(components, mode + "_terms", modes[mode])
|
460
|
+
regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
|
461
|
+
.map { |r, props| r }
|
462
|
+
components = add_group_component(components, "extra_regressors_" + mode, regressors_by_mode)
|
463
|
+
|
464
|
+
# Add combination components to modes
|
465
|
+
modes[mode] << mode + "_terms"
|
466
|
+
modes[mode] << "extra_regressors_" + mode
|
467
|
+
end
|
468
|
+
# # After all of the additive/multiplicative groups have been added,
|
469
|
+
modes[@seasonality_mode] << "holidays"
|
470
|
+
# # Convert to a binary matrix
|
471
|
+
component_cols = Daru::DataFrame.crosstab_by_assignation(
|
472
|
+
components["col"], components["component"], [1] * components.size
|
473
|
+
)
|
474
|
+
component_cols.each_vector do |v|
|
475
|
+
v.map! { |vi| vi.nil? ? 0 : vi }
|
476
|
+
end
|
477
|
+
component_cols.rename_vectors(:_id => "col")
|
478
|
+
|
479
|
+
# Add columns for additive and multiplicative terms, if missing
|
480
|
+
["additive_terms", "multiplicative_terms"].each do |name|
|
481
|
+
component_cols[name] = 0 unless component_cols.vectors.include?(name)
|
482
|
+
end
|
483
|
+
|
484
|
+
# TODO validation
|
485
|
+
|
486
|
+
[component_cols, modes]
|
487
|
+
end
|
488
|
+
|
489
|
+
def add_group_component(components, name, group)
|
490
|
+
new_comp = components.where(components["component"].in(group)).dup
|
491
|
+
group_cols = new_comp["col"].uniq
|
492
|
+
if group_cols.size > 0
|
493
|
+
new_comp = Daru::DataFrame.new("col" => group_cols, "component" => [name] * group_cols.size)
|
494
|
+
components = components.concat(new_comp)
|
495
|
+
end
|
496
|
+
components
|
497
|
+
end
|
498
|
+
|
499
|
+
def parse_seasonality_args(name, arg, auto_disable, default_order)
|
500
|
+
case arg
|
501
|
+
when "auto"
|
502
|
+
fourier_order = 0
|
503
|
+
if @seasonalities.include?(name)
|
504
|
+
logger.info "Found custom seasonality named #{name.inspect}, disabling built-in #{name.inspect}seasonality."
|
505
|
+
elsif auto_disable
|
506
|
+
logger.info "Disabling #{name} seasonality. Run prophet with #{name}_seasonality: true to override this."
|
507
|
+
else
|
508
|
+
fourier_order = default_order
|
509
|
+
end
|
510
|
+
when true
|
511
|
+
fourier_order = default_order
|
512
|
+
when false
|
513
|
+
fourier_order = 0
|
514
|
+
else
|
515
|
+
fourier_order = arg.to_i
|
516
|
+
end
|
517
|
+
fourier_order
|
518
|
+
end
|
519
|
+
|
520
|
+
def set_auto_seasonalities
|
521
|
+
first = @history["ds"].min
|
522
|
+
last = @history["ds"].max
|
523
|
+
dt = @history["ds"].diff
|
524
|
+
min_dt = dt.min
|
525
|
+
|
526
|
+
days = 86400
|
527
|
+
|
528
|
+
# Yearly seasonality
|
529
|
+
yearly_disable = last - first < 370 * days
|
530
|
+
fourier_order = parse_seasonality_args("yearly", @yearly_seasonality, yearly_disable, 10)
|
531
|
+
if fourier_order > 0
|
532
|
+
@seasonalities["yearly"] = {
|
533
|
+
period: 365.25,
|
534
|
+
fourier_order: fourier_order,
|
535
|
+
prior_scale: @seasonality_prior_scale,
|
536
|
+
mode: @seasonality_mode,
|
537
|
+
condition_name: nil
|
538
|
+
}
|
539
|
+
end
|
540
|
+
|
541
|
+
# Weekly seasonality
|
542
|
+
weekly_disable = last - first < 14 * days || min_dt >= 7 * days
|
543
|
+
fourier_order = parse_seasonality_args("weekly", @weekly_seasonality, weekly_disable, 3)
|
544
|
+
if fourier_order > 0
|
545
|
+
@seasonalities["weekly"] = {
|
546
|
+
period: 7,
|
547
|
+
fourier_order: fourier_order,
|
548
|
+
prior_scale: @seasonality_prior_scale,
|
549
|
+
mode: @seasonality_mode,
|
550
|
+
condition_name: nil
|
551
|
+
}
|
552
|
+
end
|
553
|
+
|
554
|
+
# Daily seasonality
|
555
|
+
daily_disable = last - first < 2 * days || min_dt >= 1 * days
|
556
|
+
fourier_order = parse_seasonality_args("daily", @daily_seasonality, daily_disable, 4)
|
557
|
+
if fourier_order > 0
|
558
|
+
@seasonalities["daily"] = {
|
559
|
+
period: 1,
|
560
|
+
fourier_order: fourier_order,
|
561
|
+
prior_scale: @seasonality_prior_scale,
|
562
|
+
mode: @seasonality_mode,
|
563
|
+
condition_name: nil
|
564
|
+
}
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
def linear_growth_init(df)
|
569
|
+
i0 = df["ds"].index.min
|
570
|
+
i1 = df["ds"].index.max
|
571
|
+
t = df["t"][i1] - df["t"][i0]
|
572
|
+
k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
|
573
|
+
m = df["y_scaled"][i0] - k * df["t"][i0]
|
574
|
+
[k, m]
|
575
|
+
end
|
576
|
+
|
577
|
+
def logistic_growth_init(df)
|
578
|
+
i0 = df["ds"].index.min
|
579
|
+
i1 = df["ds"].index.max
|
580
|
+
t = df["t"][i1] - df["t"][i0]
|
581
|
+
|
582
|
+
# Force valid values, in case y > cap or y < 0
|
583
|
+
c0 = df["cap_scaled"][i0]
|
584
|
+
c1 = df["cap_scaled"][i1]
|
585
|
+
y0 = [0.01 * c0, [0.99 * c0, df["y_scaled"][i0]].min].max
|
586
|
+
y1 = [0.01 * c1, [0.99 * c1, df["y_scaled"][i1]].min].max
|
587
|
+
|
588
|
+
r0 = c0 / y0
|
589
|
+
r1 = c1 / y1
|
590
|
+
|
591
|
+
if (r0 - r1).abs <= 0.01
|
592
|
+
r0 = 1.05 * r0
|
593
|
+
end
|
594
|
+
|
595
|
+
l0 = Math.log(r0 - 1)
|
596
|
+
l1 = Math.log(r1 - 1)
|
597
|
+
|
598
|
+
# Initialize the offset
|
599
|
+
m = l0 * t / (l0 - l1)
|
600
|
+
# And the rate
|
601
|
+
k = (l0 - l1) / t
|
602
|
+
[k, m]
|
603
|
+
end
|
604
|
+
|
605
|
+
def fit(df, **kwargs)
|
606
|
+
raise Error, "Prophet object can only be fit once" if @history
|
607
|
+
|
608
|
+
history = df.where(!df["y"].in([nil, Float::NAN]))
|
609
|
+
raise Error, "Data has less than 2 non-nil rows" if history.shape[0] < 2
|
610
|
+
|
611
|
+
@history_dates = to_datetime(df["ds"]).sort
|
612
|
+
history = setup_dataframe(history, initialize_scales: true)
|
613
|
+
@history = history
|
614
|
+
set_auto_seasonalities
|
615
|
+
seasonal_features, prior_scales, component_cols, modes = make_all_seasonality_features(history)
|
616
|
+
@train_component_cols = component_cols
|
617
|
+
@component_modes = modes
|
618
|
+
@fit_kwargs = kwargs.dup # TODO deep dup?
|
619
|
+
|
620
|
+
set_changepoints
|
621
|
+
|
622
|
+
dat = {
|
623
|
+
"T" => history.shape[0],
|
624
|
+
"K" => seasonal_features.shape[1],
|
625
|
+
"S" => @changepoints_t.size,
|
626
|
+
"y" => history["y_scaled"],
|
627
|
+
"t" => history["t"],
|
628
|
+
"t_change" => @changepoints_t,
|
629
|
+
"X" => seasonal_features,
|
630
|
+
"sigmas" => prior_scales,
|
631
|
+
"tau" => @changepoint_prior_scale,
|
632
|
+
"trend_indicator" => @growth == "logistic" ? 1 : 0,
|
633
|
+
"s_a" => component_cols["additive_terms"],
|
634
|
+
"s_m" => component_cols["multiplicative_terms"]
|
635
|
+
}
|
636
|
+
|
637
|
+
if @growth == "linear"
|
638
|
+
dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
|
639
|
+
kinit = linear_growth_init(history)
|
640
|
+
else
|
641
|
+
dat["cap"] = history["cap_scaled"]
|
642
|
+
kinit = logistic_growth_init(history)
|
643
|
+
end
|
644
|
+
|
645
|
+
stan_init = {
|
646
|
+
"k" => kinit[0],
|
647
|
+
"m" => kinit[1],
|
648
|
+
"delta" => Numo::DFloat.zeros(@changepoints_t.size),
|
649
|
+
"beta" => Numo::DFloat.zeros(seasonal_features.shape[1]),
|
650
|
+
"sigma_obs" => 1
|
651
|
+
}
|
652
|
+
|
653
|
+
if history["y"].min == history["y"].max && @growth == "linear"
|
654
|
+
# Nothing to fit.
|
655
|
+
@params = stan_init
|
656
|
+
@params["sigma_obs"] = 1e-9
|
657
|
+
@params.each do |par|
|
658
|
+
@params[par] = Numo::NArray.asarray(@params[par])
|
659
|
+
end
|
660
|
+
elsif @mcmc_samples > 0
|
661
|
+
@params = @stan_backend.sampling(stan_init, dat, @mcmc_samples, **kwargs)
|
662
|
+
else
|
663
|
+
@params = @stan_backend.fit(stan_init, dat, **kwargs)
|
664
|
+
end
|
665
|
+
|
666
|
+
# If no changepoints were requested, replace delta with 0s
|
667
|
+
if @changepoints.size == 0
|
668
|
+
# Fold delta into the base rate k
|
669
|
+
@params["k"] = @params["k"] + @params["delta"].reshape(-1)
|
670
|
+
@params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(-1, 1)
|
671
|
+
end
|
672
|
+
|
673
|
+
self
|
674
|
+
end
|
675
|
+
|
676
|
+
def predict(df = nil)
|
677
|
+
raise Error, "Model has not been fit." unless @history
|
678
|
+
|
679
|
+
if df.nil?
|
680
|
+
df = @history.dup
|
681
|
+
else
|
682
|
+
raise ArgumentError, "Dataframe has no rows." if df.shape[0] == 0
|
683
|
+
df = setup_dataframe(df.dup)
|
684
|
+
end
|
685
|
+
|
686
|
+
df["trend"] = predict_trend(df)
|
687
|
+
seasonal_components = predict_seasonal_components(df)
|
688
|
+
if @uncertainty_samples
|
689
|
+
intervals = predict_uncertainty(df)
|
690
|
+
else
|
691
|
+
intervals = nil
|
692
|
+
end
|
693
|
+
|
694
|
+
# Drop columns except ds, cap, floor, and trend
|
695
|
+
cols = ["ds", "trend"]
|
696
|
+
cols << "cap" if df.vectors.include?("cap")
|
697
|
+
cols << "floor" if @logistic_floor
|
698
|
+
# Add in forecast components
|
699
|
+
df2 = df_concat_axis_one([df[*cols], intervals, seasonal_components])
|
700
|
+
df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
|
701
|
+
df2
|
702
|
+
end
|
703
|
+
|
704
|
+
def piecewise_linear(t, deltas, k, m, changepoint_ts)
|
705
|
+
# Intercept changes
|
706
|
+
gammas = -changepoint_ts * deltas
|
707
|
+
# Get cumulative slope and intercept at each t
|
708
|
+
k_t = t.new_ones * k
|
709
|
+
m_t = t.new_ones * m
|
710
|
+
changepoint_ts.each_with_index do |t_s, s|
|
711
|
+
indx = t >= t_s
|
712
|
+
k_t[indx] += deltas[s]
|
713
|
+
m_t[indx] += gammas[s]
|
714
|
+
end
|
715
|
+
k_t * t + m_t
|
716
|
+
end
|
717
|
+
|
718
|
+
def piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
|
719
|
+
k_1d = Numo::NArray.asarray(k)
|
720
|
+
k_1d = k_1d.reshape(1) if k_1d.ndim < 1
|
721
|
+
k_cum = k_1d.concatenate(deltas.cumsum + k)
|
722
|
+
gammas = Numo::DFloat.zeros(changepoint_ts.size)
|
723
|
+
changepoint_ts.each_with_index do |t_s, i|
|
724
|
+
gammas[i] = (t_s - m - gammas.sum) * (1 - k_cum[i] / k_cum[i + 1])
|
725
|
+
end
|
726
|
+
# Get cumulative rate and offset at each t
|
727
|
+
k_t = t.new_ones * k
|
728
|
+
m_t = t.new_ones * m
|
729
|
+
changepoint_ts.each_with_index do |t_s, s|
|
730
|
+
indx = t >= t_s
|
731
|
+
k_t[indx] += deltas[s]
|
732
|
+
m_t[indx] += gammas[s]
|
733
|
+
end
|
734
|
+
# need df_values to prevent memory from blowing up
|
735
|
+
df_values(cap) / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
|
736
|
+
end
|
737
|
+
|
738
|
+
def predict_trend(df)
|
739
|
+
k = @params["k"].mean(nan: true)
|
740
|
+
m = @params["m"].mean(nan: true)
|
741
|
+
deltas = @params["delta"].mean(axis: 0, nan: true)
|
742
|
+
|
743
|
+
t = Numo::NArray.asarray(df["t"].to_a)
|
744
|
+
if @growth == "linear"
|
745
|
+
trend = piecewise_linear(t, deltas, k, m, @changepoints_t)
|
746
|
+
else
|
747
|
+
cap = df["cap_scaled"]
|
748
|
+
trend = piecewise_logistic(t, cap, deltas, k, m, @changepoints_t)
|
749
|
+
end
|
750
|
+
|
751
|
+
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
752
|
+
end
|
753
|
+
|
754
|
+
def predict_seasonal_components(df)
|
755
|
+
seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
|
756
|
+
if @uncertainty_samples
|
757
|
+
lower_p = 100 * (1.0 - @interval_width) / 2
|
758
|
+
upper_p = 100 * (1.0 + @interval_width) / 2
|
759
|
+
end
|
760
|
+
|
761
|
+
x = df_values(seasonal_features)
|
762
|
+
data = {}
|
763
|
+
component_cols.vectors.each do |component|
|
764
|
+
beta_c = @params["beta"] * Numo::NArray.asarray(component_cols[component].to_a)
|
765
|
+
|
766
|
+
comp = x.dot(beta_c.transpose)
|
767
|
+
if @component_modes["additive"].include?(component)
|
768
|
+
comp *= @y_scale
|
769
|
+
end
|
770
|
+
data[component] = comp.mean(axis: 1, nan: true)
|
771
|
+
if @uncertainty_samples
|
772
|
+
data[component + "_lower"] = percentile(comp, lower_p, axis: 1)
|
773
|
+
data[component + "_upper"] = percentile(comp, upper_p, axis: 1)
|
774
|
+
end
|
775
|
+
end
|
776
|
+
Daru::DataFrame.new(data)
|
777
|
+
end
|
778
|
+
|
779
|
+
def sample_posterior_predictive(df)
|
780
|
+
n_iterations = @params["k"].shape[0]
|
781
|
+
samp_per_iter = [1, (@uncertainty_samples / n_iterations.to_f).ceil].max
|
782
|
+
|
783
|
+
# Generate seasonality features once so we can re-use them.
|
784
|
+
seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
|
785
|
+
|
786
|
+
# convert to Numo for performance
|
787
|
+
seasonal_features = df_values(seasonal_features)
|
788
|
+
additive_terms = df_values(component_cols["additive_terms"])
|
789
|
+
multiplicative_terms = df_values(component_cols["multiplicative_terms"])
|
790
|
+
|
791
|
+
sim_values = {"yhat" => [], "trend" => []}
|
792
|
+
n_iterations.times do |i|
|
793
|
+
samp_per_iter.times do
|
794
|
+
sim = sample_model(
|
795
|
+
df,
|
796
|
+
seasonal_features,
|
797
|
+
i,
|
798
|
+
additive_terms,
|
799
|
+
multiplicative_terms
|
800
|
+
)
|
801
|
+
sim_values.each_key do |key|
|
802
|
+
sim_values[key] << sim[key]
|
803
|
+
end
|
804
|
+
end
|
805
|
+
end
|
806
|
+
sim_values.each do |k, v|
|
807
|
+
sim_values[k] = Numo::NArray.column_stack(v)
|
808
|
+
end
|
809
|
+
sim_values
|
810
|
+
end
|
811
|
+
|
812
|
+
def predictive_samples(df)
|
813
|
+
df = setup_dataframe(df.dup)
|
814
|
+
sim_values = sample_posterior_predictive(df)
|
815
|
+
sim_values
|
816
|
+
end
|
817
|
+
|
818
|
+
def predict_uncertainty(df)
|
819
|
+
sim_values = sample_posterior_predictive(df)
|
820
|
+
|
821
|
+
lower_p = 100 * (1.0 - @interval_width) / 2
|
822
|
+
upper_p = 100 * (1.0 + @interval_width) / 2
|
823
|
+
|
824
|
+
series = {}
|
825
|
+
["yhat", "trend"].each do |key|
|
826
|
+
series["#{key}_lower"] = percentile(sim_values[key], lower_p, axis: 1)
|
827
|
+
series["#{key}_upper"] = percentile(sim_values[key], upper_p, axis: 1)
|
828
|
+
end
|
829
|
+
|
830
|
+
Daru::DataFrame.new(series)
|
831
|
+
end
|
832
|
+
|
833
|
+
def sample_model(df, seasonal_features, iteration, s_a, s_m)
|
834
|
+
trend = sample_predictive_trend(df, iteration)
|
835
|
+
|
836
|
+
beta = @params["beta"][iteration, true]
|
837
|
+
xb_a = seasonal_features.dot(beta * s_a) * @y_scale
|
838
|
+
xb_m = seasonal_features.dot(beta * s_m)
|
839
|
+
|
840
|
+
sigma = @params["sigma_obs"][iteration]
|
841
|
+
noise = Numo::DFloat.new(*df.shape[0]).rand_norm(0, sigma) * @y_scale
|
842
|
+
|
843
|
+
# skip data frame for performance
|
844
|
+
{
|
845
|
+
"yhat" => trend * (1 + xb_m) + xb_a + noise,
|
846
|
+
"trend" => trend
|
847
|
+
}
|
848
|
+
end
|
849
|
+
|
850
|
+
def sample_predictive_trend(df, iteration)
|
851
|
+
k = @params["k"][iteration, true]
|
852
|
+
m = @params["m"][iteration, true]
|
853
|
+
deltas = @params["delta"][iteration, true]
|
854
|
+
|
855
|
+
t = Numo::NArray.asarray(df["t"].to_a)
|
856
|
+
upper_t = t.max
|
857
|
+
|
858
|
+
# New changepoints from a Poisson process with rate S on [1, T]
|
859
|
+
if upper_t > 1
|
860
|
+
s = @changepoints_t.size
|
861
|
+
n_changes = poisson(s * (upper_t - 1))
|
862
|
+
else
|
863
|
+
n_changes = 0
|
864
|
+
end
|
865
|
+
if n_changes > 0
|
866
|
+
changepoint_ts_new = 1 + Numo::DFloat.new(n_changes).rand * (upper_t - 1)
|
867
|
+
changepoint_ts_new.sort
|
868
|
+
else
|
869
|
+
changepoint_ts_new = []
|
870
|
+
end
|
871
|
+
|
872
|
+
# Get the empirical scale of the deltas, plus epsilon to avoid NaNs.
|
873
|
+
lambda_ = deltas.abs.mean + 1e-8
|
874
|
+
|
875
|
+
# Sample deltas
|
876
|
+
deltas_new = laplace(0, lambda_, n_changes)
|
877
|
+
|
878
|
+
# Prepend the times and deltas from the history
|
879
|
+
changepoint_ts = @changepoints_t.concatenate(changepoint_ts_new)
|
880
|
+
deltas = deltas.concatenate(deltas_new)
|
881
|
+
|
882
|
+
if @growth == "linear"
|
883
|
+
trend = piecewise_linear(t, deltas, k, m, changepoint_ts)
|
884
|
+
else
|
885
|
+
cap = df["cap_scaled"]
|
886
|
+
trend = piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
|
887
|
+
end
|
888
|
+
|
889
|
+
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
890
|
+
end
|
891
|
+
|
892
|
+
def percentile(a, percentile, axis:)
|
893
|
+
raise Error, "Axis must be 1" if axis != 1
|
894
|
+
|
895
|
+
sorted = a.sort(axis: axis)
|
896
|
+
x = percentile / 100.0 * (sorted.shape[axis] - 1)
|
897
|
+
r = x % 1
|
898
|
+
i = x.floor
|
899
|
+
# this should use axis, but we only need axis: 1
|
900
|
+
if i == sorted.shape[axis] - 1
|
901
|
+
sorted[true, -1]
|
902
|
+
else
|
903
|
+
sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
|
904
|
+
end
|
905
|
+
end
|
906
|
+
|
907
|
+
def make_future_dataframe(periods:, freq: "D", include_history: true)
|
908
|
+
raise Error, "Model has not been fit" unless @history_dates
|
909
|
+
last_date = @history_dates.max
|
910
|
+
case freq
|
911
|
+
when "D"
|
912
|
+
# days have constant length with UTC (no DST or leap seconds)
|
913
|
+
dates = (periods + 1).times.map { |i| last_date + i * 86400 }
|
914
|
+
when "H"
|
915
|
+
dates = (periods + 1).times.map { |i| last_date + i * 3600 }
|
916
|
+
when "MS"
|
917
|
+
dates = [last_date]
|
918
|
+
periods.times do
|
919
|
+
dates << dates.last.to_datetime.next_month.to_time.utc
|
920
|
+
end
|
921
|
+
else
|
922
|
+
raise ArgumentError, "Unknown freq: #{freq}"
|
923
|
+
end
|
924
|
+
dates.select! { |d| d > last_date }
|
925
|
+
dates = dates.last(periods)
|
926
|
+
dates = @history_dates + dates if include_history
|
927
|
+
Daru::DataFrame.new("ds" => dates)
|
928
|
+
end
|
929
|
+
|
930
|
+
private
|
931
|
+
|
932
|
+
# Time is prefer over DateTime Ruby
|
933
|
+
# use UTC to be consistent with Python
|
934
|
+
# and so days have equal length (no DST)
|
935
|
+
def to_datetime(vec)
|
936
|
+
return if vec.nil?
|
937
|
+
vec.map do |v|
|
938
|
+
case v
|
939
|
+
when Time
|
940
|
+
v.utc
|
941
|
+
when Date
|
942
|
+
v.to_datetime.to_time.utc
|
943
|
+
else
|
944
|
+
DateTime.parse(v.to_s).to_time.utc
|
945
|
+
end
|
946
|
+
end
|
947
|
+
end
|
948
|
+
|
949
|
+
# okay to do in-place
|
950
|
+
def df_concat_axis_one(dfs)
|
951
|
+
dfs[1..-1].each do |df|
|
952
|
+
df.each_vector_with_index do |v, k|
|
953
|
+
dfs[0][k] = v
|
954
|
+
end
|
955
|
+
end
|
956
|
+
dfs[0]
|
957
|
+
end
|
958
|
+
|
959
|
+
def df_values(df)
|
960
|
+
if df.is_a?(Daru::Vector)
|
961
|
+
Numo::NArray.asarray(df.to_a)
|
962
|
+
else
|
963
|
+
# TODO make more performant
|
964
|
+
Numo::NArray.asarray(df.to_matrix.to_a)
|
965
|
+
end
|
966
|
+
end
|
967
|
+
|
968
|
+
# https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
|
969
|
+
def poisson(lam)
|
970
|
+
l = Math.exp(-lam)
|
971
|
+
k = 0
|
972
|
+
p = 1
|
973
|
+
while p > l
|
974
|
+
k += 1
|
975
|
+
p *= rand
|
976
|
+
end
|
977
|
+
k - 1
|
978
|
+
end
|
979
|
+
|
980
|
+
# https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
|
981
|
+
def laplace(loc, scale, size)
|
982
|
+
u = Numo::DFloat.new(size).rand - 0.5
|
983
|
+
loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
|
984
|
+
end
|
985
|
+
end
|
986
|
+
end
|