prophet-rb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +23 -0
- data/README.md +202 -0
- data/data-raw/generated_holidays.csv +96474 -0
- data/ext/prophet/Makefile +5 -0
- data/ext/prophet/extconf.rb +18 -0
- data/lib/prophet-rb.rb +1 -0
- data/lib/prophet.rb +23 -0
- data/lib/prophet/forecaster.rb +986 -0
- data/lib/prophet/holidays.rb +27 -0
- data/lib/prophet/plot.rb +269 -0
- data/lib/prophet/stan_backend.rb +136 -0
- data/lib/prophet/version.rb +3 -0
- data/stan/unix/prophet.stan +131 -0
- data/stan/win/prophet.stan +162 -0
- metadata +170 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
require "cmdstan"
|
2
|
+
require "fileutils"
|
3
|
+
require "tmpdir"
|
4
|
+
|
5
|
+
platform = Gem.win_platform? ? "win" : "unix"
|
6
|
+
stan_file = File.expand_path("../../stan/#{platform}/prophet.stan", __dir__)
|
7
|
+
|
8
|
+
# copy to avoid temp file in repo
|
9
|
+
temp_file = "#{Dir.tmpdir}/prophet.stan"
|
10
|
+
FileUtils.cp(stan_file, temp_file)
|
11
|
+
|
12
|
+
# compile
|
13
|
+
sm = CmdStan::Model.new(stan_file: temp_file)
|
14
|
+
|
15
|
+
# save
|
16
|
+
target_dir = File.expand_path("../../stan_model", __dir__)
|
17
|
+
FileUtils.mkdir_p(target_dir)
|
18
|
+
FileUtils.cp(sm.exe_file, "#{target_dir}/prophet_model.bin")
|
data/lib/prophet-rb.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "prophet"
|
data/lib/prophet.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "cmdstan"
|
3
|
+
require "daru"
|
4
|
+
require "numo/narray"
|
5
|
+
|
6
|
+
# stdlib
|
7
|
+
require "logger"
|
8
|
+
require "set"
|
9
|
+
|
10
|
+
# modules
|
11
|
+
require "prophet/holidays"
|
12
|
+
require "prophet/plot"
|
13
|
+
require "prophet/forecaster"
|
14
|
+
require "prophet/stan_backend"
|
15
|
+
require "prophet/version"
|
16
|
+
|
17
|
+
module Prophet
|
18
|
+
class Error < StandardError; end
|
19
|
+
|
20
|
+
def self.new(**kwargs)
|
21
|
+
Forecaster.new(**kwargs)
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,986 @@
|
|
1
|
+
module Prophet
|
2
|
+
class Forecaster
|
3
|
+
include Holidays
|
4
|
+
include Plot
|
5
|
+
|
6
|
+
attr_reader :logger, :params, :train_holiday_names
|
7
|
+
|
8
|
+
def initialize(
|
9
|
+
growth: "linear",
|
10
|
+
changepoints: nil,
|
11
|
+
n_changepoints: 25,
|
12
|
+
changepoint_range: 0.8,
|
13
|
+
yearly_seasonality: "auto",
|
14
|
+
weekly_seasonality: "auto",
|
15
|
+
daily_seasonality: "auto",
|
16
|
+
holidays: nil,
|
17
|
+
seasonality_mode: "additive",
|
18
|
+
seasonality_prior_scale: 10.0,
|
19
|
+
holidays_prior_scale: 10.0,
|
20
|
+
changepoint_prior_scale: 0.05,
|
21
|
+
mcmc_samples: 0,
|
22
|
+
interval_width: 0.80,
|
23
|
+
uncertainty_samples: 1000
|
24
|
+
)
|
25
|
+
@growth = growth
|
26
|
+
|
27
|
+
@changepoints = to_datetime(changepoints)
|
28
|
+
if !@changepoints.nil?
|
29
|
+
@n_changepoints = @changepoints.size
|
30
|
+
@specified_changepoints = true
|
31
|
+
else
|
32
|
+
@n_changepoints = n_changepoints
|
33
|
+
@specified_changepoints = false
|
34
|
+
end
|
35
|
+
|
36
|
+
@changepoint_range = changepoint_range
|
37
|
+
@yearly_seasonality = yearly_seasonality
|
38
|
+
@weekly_seasonality = weekly_seasonality
|
39
|
+
@daily_seasonality = daily_seasonality
|
40
|
+
@holidays = holidays
|
41
|
+
|
42
|
+
@seasonality_mode = seasonality_mode
|
43
|
+
@seasonality_prior_scale = seasonality_prior_scale.to_f
|
44
|
+
@changepoint_prior_scale = changepoint_prior_scale.to_f
|
45
|
+
@holidays_prior_scale = holidays_prior_scale.to_f
|
46
|
+
|
47
|
+
@mcmc_samples = mcmc_samples
|
48
|
+
@interval_width = interval_width
|
49
|
+
@uncertainty_samples = uncertainty_samples
|
50
|
+
|
51
|
+
# Set during fitting or by other methods
|
52
|
+
@start = nil
|
53
|
+
@y_scale = nil
|
54
|
+
@logistic_floor = false
|
55
|
+
@t_scale = nil
|
56
|
+
@changepoints_t = nil
|
57
|
+
@seasonalities = {}
|
58
|
+
@extra_regressors = {}
|
59
|
+
@country_holidays = nil
|
60
|
+
@stan_fit = nil
|
61
|
+
@params = {}
|
62
|
+
@history = nil
|
63
|
+
@history_dates = nil
|
64
|
+
@train_component_cols = nil
|
65
|
+
@component_modes = nil
|
66
|
+
@train_holiday_names = nil
|
67
|
+
@fit_kwargs = {}
|
68
|
+
validate_inputs
|
69
|
+
|
70
|
+
@logger = ::Logger.new($stderr)
|
71
|
+
@logger.formatter = proc do |severity, datetime, progname, msg|
|
72
|
+
"[prophet] #{msg}\n"
|
73
|
+
end
|
74
|
+
@stan_backend = StanBackend.new(@logger)
|
75
|
+
end
|
76
|
+
|
77
|
+
def validate_inputs
|
78
|
+
if !["linear", "logistic"].include?(@growth)
|
79
|
+
raise ArgumentError, "Parameter \"growth\" should be \"linear\" or \"logistic\"."
|
80
|
+
end
|
81
|
+
if @changepoint_range < 0 || @changepoint_range > 1
|
82
|
+
raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
|
83
|
+
end
|
84
|
+
if @holidays
|
85
|
+
if !@holidays.is_a?(Daru::DataFrame) && @holidays.vectors.include?("ds") && @holidays.vectors.include?("holiday")
|
86
|
+
raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
|
87
|
+
end
|
88
|
+
@holidays["ds"] = to_datetime(@holidays["ds"])
|
89
|
+
has_lower = @holidays.vectors.include?("lower_window")
|
90
|
+
has_upper = @holidays.vectors.include?("upper_window")
|
91
|
+
if has_lower ^ has_upper # xor
|
92
|
+
raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
|
93
|
+
end
|
94
|
+
if has_lower
|
95
|
+
if @holidays["lower_window"].max > 0
|
96
|
+
raise ArgumentError, "Holiday lower_window should be <= 0"
|
97
|
+
end
|
98
|
+
if @holidays["upper_window"].min < 0
|
99
|
+
raise ArgumentError, "Holiday upper_window should be >= 0"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
@holidays["holiday"].uniq.each do |h|
|
103
|
+
validate_column_name(h, check_holidays: false)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
if !["additive", "multiplicative"].include?(@seasonality_mode)
|
108
|
+
raise ArgumentError, "seasonality_mode must be \"additive\" or \"multiplicative\""
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true)
|
113
|
+
if name.include?("_delim_")
|
114
|
+
raise ArgumentError, "Name cannot contain \"_delim_\""
|
115
|
+
end
|
116
|
+
reserved_names = [
|
117
|
+
"trend", "additive_terms", "daily", "weekly", "yearly",
|
118
|
+
"holidays", "zeros", "extra_regressors_additive", "yhat",
|
119
|
+
"extra_regressors_multiplicative", "multiplicative_terms",
|
120
|
+
]
|
121
|
+
rn_l = reserved_names.map { |n| n + "_lower" }
|
122
|
+
rn_u = reserved_names.map { |n| n + "_upper" }
|
123
|
+
reserved_names.concat(rn_l)
|
124
|
+
reserved_names.concat(rn_u)
|
125
|
+
reserved_names.concat(["ds", "y", "cap", "floor", "y_scaled", "cap_scaled"])
|
126
|
+
if reserved_names.include?(name)
|
127
|
+
raise ArgumentError, "Name #{name.inspect} is reserved."
|
128
|
+
end
|
129
|
+
if check_holidays && @holidays && @holidays["holiday"].uniq.include?(name)
|
130
|
+
raise ArgumentError, "Name #{name.inspect} already used for a holiday."
|
131
|
+
end
|
132
|
+
if check_holidays && @country_holidays && get_holiday_names(@country_holidays).include?(name)
|
133
|
+
raise ArgumentError, "Name #{name.inspect} is a holiday name in #{@country_holidays.inspect}."
|
134
|
+
end
|
135
|
+
if check_seasonalities && @seasonalities[name]
|
136
|
+
raise ArgumentError, "Name #{name.inspect} already used for a seasonality."
|
137
|
+
end
|
138
|
+
if check_regressors and @extra_regressors[name]
|
139
|
+
raise ArgumentError, "Name #{name.inspect} already used for an added regressor."
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def setup_dataframe(df, initialize_scales: false)
|
144
|
+
if df.vectors.include?("y")
|
145
|
+
df["y"] = df["y"].map(&:to_f)
|
146
|
+
raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
|
147
|
+
end
|
148
|
+
# TODO support integers
|
149
|
+
|
150
|
+
df["ds"] = to_datetime(df["ds"])
|
151
|
+
|
152
|
+
raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
|
153
|
+
|
154
|
+
@extra_regressors.each_key do |name|
|
155
|
+
if !df.vectors.include?(name)
|
156
|
+
raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
|
157
|
+
end
|
158
|
+
df[name] = df[name].map(&:to_f)
|
159
|
+
if df[name].any?(&:nil)
|
160
|
+
raise ArgumentError, "Found NaN in column #{name.inspect}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
@seasonalities.values.each do |props|
|
164
|
+
condition_name = props[:condition_name]
|
165
|
+
if condition_name
|
166
|
+
if !df.vectors.include?(condition_name)
|
167
|
+
raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
|
168
|
+
end
|
169
|
+
if df.where(!df[condition_name].in([true, false])).any?
|
170
|
+
raise ArgumentError, "Found non-boolean in column #{condition_name.inspect}"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
if df.index.name == "ds"
|
176
|
+
df.index.name = nil
|
177
|
+
end
|
178
|
+
df = df.sort(["ds"])
|
179
|
+
|
180
|
+
initialize_scales(initialize_scales, df)
|
181
|
+
|
182
|
+
if @logistic_floor && !df.vectors.include?("floor")
|
183
|
+
raise ArgumentError, "Expected column \"floor\"."
|
184
|
+
else
|
185
|
+
df["floor"] = 0
|
186
|
+
end
|
187
|
+
|
188
|
+
if @growth == "logistic"
|
189
|
+
unless df.vectors.include?("cap")
|
190
|
+
raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
|
191
|
+
end
|
192
|
+
if df.where(df["cap"] <= df["floor"]).size > 0
|
193
|
+
raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
|
194
|
+
end
|
195
|
+
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
|
196
|
+
end
|
197
|
+
|
198
|
+
df["t"] = (df["ds"] - @start) / @t_scale.to_f
|
199
|
+
if df.vectors.include?("y")
|
200
|
+
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
|
201
|
+
end
|
202
|
+
|
203
|
+
@extra_regressors.each do |name, props|
|
204
|
+
df[name] = ((df[name] - props["mu"]) / props["std"])
|
205
|
+
end
|
206
|
+
|
207
|
+
df
|
208
|
+
end
|
209
|
+
|
210
|
+
def initialize_scales(initialize_scales, df)
|
211
|
+
return unless initialize_scales
|
212
|
+
|
213
|
+
floor = 0
|
214
|
+
@y_scale = (df["y"] - floor).abs.max
|
215
|
+
@y_scale = 1 if @y_scale == 0
|
216
|
+
@start = df["ds"].min
|
217
|
+
@t_scale = df["ds"].max - @start
|
218
|
+
end
|
219
|
+
|
220
|
+
def set_changepoints
|
221
|
+
hist_size = (@history.shape[0] * @changepoint_range).floor
|
222
|
+
|
223
|
+
if @n_changepoints + 1 > hist_size
|
224
|
+
@n_changepoints = hist_size - 1
|
225
|
+
logger.info "n_changepoints greater than number of observations. Using #{@n_changepoints}"
|
226
|
+
end
|
227
|
+
|
228
|
+
if @n_changepoints > 0
|
229
|
+
step = (hist_size - 1) / @n_changepoints.to_f
|
230
|
+
cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
|
231
|
+
@changepoints = @history["ds"][*cp_indexes][1..-1]
|
232
|
+
else
|
233
|
+
@changepoints = []
|
234
|
+
end
|
235
|
+
|
236
|
+
if @changepoints.size > 0
|
237
|
+
@changepoints_t = Numo::NArray.asarray(((@changepoints - @start) / @t_scale.to_f).to_a).sort
|
238
|
+
else
|
239
|
+
@changepoints_t = Numo::NArray.asarray([0])
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def fourier_series(dates, period, series_order)
|
244
|
+
start = Time.utc(1970).to_i
|
245
|
+
# uses to_datetime first so we get UTC
|
246
|
+
t = Numo::DFloat.asarray(dates.map { |v| v.to_i - start }) / (3600 * 24.0)
|
247
|
+
|
248
|
+
# no need for column_stack
|
249
|
+
series_order.times.flat_map do |i|
|
250
|
+
[Numo::DFloat::Math.method(:sin), Numo::DFloat::Math.method(:cos)].map do |fun|
|
251
|
+
fun.call(2.0 * (i + 1) * Math::PI * t / period)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
def make_seasonality_features(dates, period, series_order, prefix)
|
257
|
+
features = fourier_series(dates, period, series_order)
|
258
|
+
Daru::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
|
259
|
+
end
|
260
|
+
|
261
|
+
def construct_holiday_dataframe(dates)
|
262
|
+
all_holidays = Daru::DataFrame.new
|
263
|
+
if @holidays
|
264
|
+
all_holidays = @holidays.dup
|
265
|
+
end
|
266
|
+
if @country_holidays
|
267
|
+
year_list = dates.map(&:year)
|
268
|
+
country_holidays_df = make_holidays_df(year_list, @country_holidays)
|
269
|
+
all_holidays = all_holidays.concat(country_holidays_df)
|
270
|
+
end
|
271
|
+
# Drop future holidays not previously seen in training data
|
272
|
+
if @train_holiday_names
|
273
|
+
# Remove holiday names didn't show up in fit
|
274
|
+
all_holidays = all_holidays.where(all_holidays["holiday"].in(@train_holiday_names))
|
275
|
+
|
276
|
+
# Add holiday names in fit but not in predict with ds as NA
|
277
|
+
holidays_to_add = Daru::DataFrame.new(
|
278
|
+
"holiday" => @train_holiday_names.where(!@train_holiday_names.in(all_holidays["holiday"]))
|
279
|
+
)
|
280
|
+
all_holidays = all_holidays.concat(holidays_to_add)
|
281
|
+
end
|
282
|
+
|
283
|
+
all_holidays
|
284
|
+
end
|
285
|
+
|
286
|
+
def make_holiday_features(dates, holidays)
|
287
|
+
expanded_holidays = Hash.new { |hash, key| hash[key] = Numo::DFloat.zeros(dates.size) }
|
288
|
+
prior_scales = {}
|
289
|
+
# Makes an index so we can perform `get_loc` below.
|
290
|
+
# Strip to just dates.
|
291
|
+
row_index = dates.map(&:to_date)
|
292
|
+
|
293
|
+
holidays.each_row do |row|
|
294
|
+
dt = row["ds"]
|
295
|
+
lw = nil
|
296
|
+
uw = nil
|
297
|
+
begin
|
298
|
+
lw = row["lower_window"].to_i
|
299
|
+
uw = row["upper_window"].to_i
|
300
|
+
rescue IndexError
|
301
|
+
lw = 0
|
302
|
+
uw = 0
|
303
|
+
end
|
304
|
+
ps = @holidays_prior_scale
|
305
|
+
if prior_scales[row["holiday"]] && prior_scales[row["holiday"]] != ps
|
306
|
+
raise ArgumentError, "Holiday #{row["holiday"].inspect} does not have consistent prior scale specification."
|
307
|
+
end
|
308
|
+
raise ArgumentError, "Prior scale must be > 0" if ps <= 0
|
309
|
+
prior_scales[row["holiday"]] = ps
|
310
|
+
|
311
|
+
lw.upto(uw).each do |offset|
|
312
|
+
occurrence = dt ? dt + offset : nil
|
313
|
+
loc = occurrence ? row_index.index(occurrence) : nil
|
314
|
+
key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
|
315
|
+
if loc
|
316
|
+
expanded_holidays[key][loc] = 1.0
|
317
|
+
else
|
318
|
+
expanded_holidays[key] # Access key to generate value
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
holiday_features = Daru::DataFrame.new(expanded_holidays)
|
323
|
+
# # Make sure column order is consistent
|
324
|
+
holiday_features = holiday_features[*holiday_features.vectors.sort]
|
325
|
+
prior_scale_list = holiday_features.vectors.map { |h| prior_scales[h.split("_delim_")[0]] }
|
326
|
+
holiday_names = prior_scales.keys
|
327
|
+
# Store holiday names used in fit
|
328
|
+
if !@train_holiday_names
|
329
|
+
@train_holiday_names = Daru::Vector.new(holiday_names)
|
330
|
+
end
|
331
|
+
[holiday_features, prior_scale_list, holiday_names]
|
332
|
+
end
|
333
|
+
|
334
|
+
def add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil)
|
335
|
+
raise Error, "Regressors must be added prior to model fitting." if @history
|
336
|
+
validate_column_name(name, check_regressors: false)
|
337
|
+
prior_scale ||= @holidays_prior_scale.to_f
|
338
|
+
mode ||= @seasonality_mode
|
339
|
+
raise ArgumentError, "Prior scale must be > 0" if prior_scale <= 0
|
340
|
+
if !["additive", "multiplicative"].include?(mode)
|
341
|
+
raise ArgumentError, "mode must be \"additive\" or \"multiplicative\""
|
342
|
+
end
|
343
|
+
@extra_regressors[name] = {
|
344
|
+
prior_scale: prior_scale,
|
345
|
+
standardize: standardize,
|
346
|
+
mu: 0.0,
|
347
|
+
std: 1.0,
|
348
|
+
mode: mode
|
349
|
+
}
|
350
|
+
self
|
351
|
+
end
|
352
|
+
|
353
|
+
def add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil)
|
354
|
+
raise Error, "Seasonality must be added prior to model fitting." if @history
|
355
|
+
|
356
|
+
if !["daily", "weekly", "yearly"].include?(name)
|
357
|
+
# Allow overwriting built-in seasonalities
|
358
|
+
validate_column_name(name, check_seasonalities: false)
|
359
|
+
end
|
360
|
+
if prior_scale.nil?
|
361
|
+
ps = @seasonality_prior_scale
|
362
|
+
else
|
363
|
+
ps = prior_scale.to_f
|
364
|
+
end
|
365
|
+
raise ArgumentError, "Prior scale must be > 0" if ps <= 0
|
366
|
+
raise ArgumentError, "Fourier Order must be > 0" if fourier_order <= 0
|
367
|
+
mode ||= @seasonality_mode
|
368
|
+
if !["additive", "multiplicative"].include?(mode)
|
369
|
+
raise ArgumentError, "mode must be \"additive\" or \"multiplicative\""
|
370
|
+
end
|
371
|
+
validate_column_name(condition_name) if condition_name
|
372
|
+
@seasonalities[name] = {
|
373
|
+
period: period,
|
374
|
+
fourier_order: fourier_order,
|
375
|
+
prior_scale: ps,
|
376
|
+
mode: mode,
|
377
|
+
condition_name: condition_name
|
378
|
+
}
|
379
|
+
self
|
380
|
+
end
|
381
|
+
|
382
|
+
def add_country_holidays(country_name)
|
383
|
+
raise Error, "Country holidays must be added prior to model fitting." if @history
|
384
|
+
# Validate names.
|
385
|
+
get_holiday_names(country_name).each do |name|
|
386
|
+
# Allow merging with existing holidays
|
387
|
+
validate_column_name(name, check_holidays: false)
|
388
|
+
end
|
389
|
+
# Set the holidays.
|
390
|
+
if @country_holidays
|
391
|
+
logger.warn "Changing country holidays from #{@country_holidays.inspect} to #{country_name.inspect}."
|
392
|
+
end
|
393
|
+
@country_holidays = country_name
|
394
|
+
self
|
395
|
+
end
|
396
|
+
|
397
|
+
def make_all_seasonality_features(df)
|
398
|
+
seasonal_features = []
|
399
|
+
prior_scales = []
|
400
|
+
modes = {"additive" => [], "multiplicative" => []}
|
401
|
+
|
402
|
+
# Seasonality features
|
403
|
+
@seasonalities.each do |name, props|
|
404
|
+
features = make_seasonality_features(
|
405
|
+
df["ds"],
|
406
|
+
props[:period],
|
407
|
+
props[:fourier_order],
|
408
|
+
name
|
409
|
+
)
|
410
|
+
if props[:condition_name]
|
411
|
+
features[!df.where(props[:condition_name])] = 0
|
412
|
+
end
|
413
|
+
seasonal_features << features
|
414
|
+
prior_scales.concat([props[:prior_scale]] * features.shape[1])
|
415
|
+
modes[props[:mode]] << name
|
416
|
+
end
|
417
|
+
|
418
|
+
# Holiday features
|
419
|
+
holidays = construct_holiday_dataframe(df["ds"])
|
420
|
+
if holidays.size > 0
|
421
|
+
features, holiday_priors, holiday_names = make_holiday_features(df["ds"], holidays)
|
422
|
+
seasonal_features << features
|
423
|
+
prior_scales.concat(holiday_priors)
|
424
|
+
modes[@seasonality_mode].concat(holiday_names)
|
425
|
+
end
|
426
|
+
|
427
|
+
# # Additional regressors
|
428
|
+
@extra_regressors.each do |name, props|
|
429
|
+
seasonal_features << df[name].to_df
|
430
|
+
prior_scales << props[:prior_scale]
|
431
|
+
modes[props[:mode]] << name
|
432
|
+
end
|
433
|
+
|
434
|
+
# # Dummy to prevent empty X
|
435
|
+
if seasonal_features.size == 0
|
436
|
+
seasonal_features << Daru::DataFrame.new("zeros" => [0] * df.shape[0])
|
437
|
+
prior_scales << 1.0
|
438
|
+
end
|
439
|
+
|
440
|
+
seasonal_features = df_concat_axis_one(seasonal_features)
|
441
|
+
|
442
|
+
component_cols, modes = regressor_column_matrix(seasonal_features, modes)
|
443
|
+
|
444
|
+
[seasonal_features, prior_scales, component_cols, modes]
|
445
|
+
end
|
446
|
+
|
447
|
+
def regressor_column_matrix(seasonal_features, modes)
|
448
|
+
components = Daru::DataFrame.new(
|
449
|
+
"col" => seasonal_features.shape[1].times.to_a,
|
450
|
+
"component" => seasonal_features.vectors.map { |x| x.split("_delim_")[0] }
|
451
|
+
)
|
452
|
+
|
453
|
+
# # Add total for holidays
|
454
|
+
if @train_holiday_names
|
455
|
+
components = add_group_component(components, "holidays", @train_holiday_names.uniq)
|
456
|
+
end
|
457
|
+
# # Add totals additive and multiplicative components, and regressors
|
458
|
+
["additive", "multiplicative"].each do |mode|
|
459
|
+
components = add_group_component(components, mode + "_terms", modes[mode])
|
460
|
+
regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
|
461
|
+
.map { |r, props| r }
|
462
|
+
components = add_group_component(components, "extra_regressors_" + mode, regressors_by_mode)
|
463
|
+
|
464
|
+
# Add combination components to modes
|
465
|
+
modes[mode] << mode + "_terms"
|
466
|
+
modes[mode] << "extra_regressors_" + mode
|
467
|
+
end
|
468
|
+
# # After all of the additive/multiplicative groups have been added,
|
469
|
+
modes[@seasonality_mode] << "holidays"
|
470
|
+
# # Convert to a binary matrix
|
471
|
+
component_cols = Daru::DataFrame.crosstab_by_assignation(
|
472
|
+
components["col"], components["component"], [1] * components.size
|
473
|
+
)
|
474
|
+
component_cols.each_vector do |v|
|
475
|
+
v.map! { |vi| vi.nil? ? 0 : vi }
|
476
|
+
end
|
477
|
+
component_cols.rename_vectors(:_id => "col")
|
478
|
+
|
479
|
+
# Add columns for additive and multiplicative terms, if missing
|
480
|
+
["additive_terms", "multiplicative_terms"].each do |name|
|
481
|
+
component_cols[name] = 0 unless component_cols.vectors.include?(name)
|
482
|
+
end
|
483
|
+
|
484
|
+
# TODO validation
|
485
|
+
|
486
|
+
[component_cols, modes]
|
487
|
+
end
|
488
|
+
|
489
|
+
def add_group_component(components, name, group)
|
490
|
+
new_comp = components.where(components["component"].in(group)).dup
|
491
|
+
group_cols = new_comp["col"].uniq
|
492
|
+
if group_cols.size > 0
|
493
|
+
new_comp = Daru::DataFrame.new("col" => group_cols, "component" => [name] * group_cols.size)
|
494
|
+
components = components.concat(new_comp)
|
495
|
+
end
|
496
|
+
components
|
497
|
+
end
|
498
|
+
|
499
|
+
def parse_seasonality_args(name, arg, auto_disable, default_order)
|
500
|
+
case arg
|
501
|
+
when "auto"
|
502
|
+
fourier_order = 0
|
503
|
+
if @seasonalities.include?(name)
|
504
|
+
logger.info "Found custom seasonality named #{name.inspect}, disabling built-in #{name.inspect}seasonality."
|
505
|
+
elsif auto_disable
|
506
|
+
logger.info "Disabling #{name} seasonality. Run prophet with #{name}_seasonality: true to override this."
|
507
|
+
else
|
508
|
+
fourier_order = default_order
|
509
|
+
end
|
510
|
+
when true
|
511
|
+
fourier_order = default_order
|
512
|
+
when false
|
513
|
+
fourier_order = 0
|
514
|
+
else
|
515
|
+
fourier_order = arg.to_i
|
516
|
+
end
|
517
|
+
fourier_order
|
518
|
+
end
|
519
|
+
|
520
|
+
def set_auto_seasonalities
|
521
|
+
first = @history["ds"].min
|
522
|
+
last = @history["ds"].max
|
523
|
+
dt = @history["ds"].diff
|
524
|
+
min_dt = dt.min
|
525
|
+
|
526
|
+
days = 86400
|
527
|
+
|
528
|
+
# Yearly seasonality
|
529
|
+
yearly_disable = last - first < 370 * days
|
530
|
+
fourier_order = parse_seasonality_args("yearly", @yearly_seasonality, yearly_disable, 10)
|
531
|
+
if fourier_order > 0
|
532
|
+
@seasonalities["yearly"] = {
|
533
|
+
period: 365.25,
|
534
|
+
fourier_order: fourier_order,
|
535
|
+
prior_scale: @seasonality_prior_scale,
|
536
|
+
mode: @seasonality_mode,
|
537
|
+
condition_name: nil
|
538
|
+
}
|
539
|
+
end
|
540
|
+
|
541
|
+
# Weekly seasonality
|
542
|
+
weekly_disable = last - first < 14 * days || min_dt >= 7 * days
|
543
|
+
fourier_order = parse_seasonality_args("weekly", @weekly_seasonality, weekly_disable, 3)
|
544
|
+
if fourier_order > 0
|
545
|
+
@seasonalities["weekly"] = {
|
546
|
+
period: 7,
|
547
|
+
fourier_order: fourier_order,
|
548
|
+
prior_scale: @seasonality_prior_scale,
|
549
|
+
mode: @seasonality_mode,
|
550
|
+
condition_name: nil
|
551
|
+
}
|
552
|
+
end
|
553
|
+
|
554
|
+
# Daily seasonality
|
555
|
+
daily_disable = last - first < 2 * days || min_dt >= 1 * days
|
556
|
+
fourier_order = parse_seasonality_args("daily", @daily_seasonality, daily_disable, 4)
|
557
|
+
if fourier_order > 0
|
558
|
+
@seasonalities["daily"] = {
|
559
|
+
period: 1,
|
560
|
+
fourier_order: fourier_order,
|
561
|
+
prior_scale: @seasonality_prior_scale,
|
562
|
+
mode: @seasonality_mode,
|
563
|
+
condition_name: nil
|
564
|
+
}
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
def linear_growth_init(df)
|
569
|
+
i0 = df["ds"].index.min
|
570
|
+
i1 = df["ds"].index.max
|
571
|
+
t = df["t"][i1] - df["t"][i0]
|
572
|
+
k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
|
573
|
+
m = df["y_scaled"][i0] - k * df["t"][i0]
|
574
|
+
[k, m]
|
575
|
+
end
|
576
|
+
|
577
|
+
def logistic_growth_init(df)
|
578
|
+
i0 = df["ds"].index.min
|
579
|
+
i1 = df["ds"].index.max
|
580
|
+
t = df["t"][i1] - df["t"][i0]
|
581
|
+
|
582
|
+
# Force valid values, in case y > cap or y < 0
|
583
|
+
c0 = df["cap_scaled"][i0]
|
584
|
+
c1 = df["cap_scaled"][i1]
|
585
|
+
y0 = [0.01 * c0, [0.99 * c0, df["y_scaled"][i0]].min].max
|
586
|
+
y1 = [0.01 * c1, [0.99 * c1, df["y_scaled"][i1]].min].max
|
587
|
+
|
588
|
+
r0 = c0 / y0
|
589
|
+
r1 = c1 / y1
|
590
|
+
|
591
|
+
if (r0 - r1).abs <= 0.01
|
592
|
+
r0 = 1.05 * r0
|
593
|
+
end
|
594
|
+
|
595
|
+
l0 = Math.log(r0 - 1)
|
596
|
+
l1 = Math.log(r1 - 1)
|
597
|
+
|
598
|
+
# Initialize the offset
|
599
|
+
m = l0 * t / (l0 - l1)
|
600
|
+
# And the rate
|
601
|
+
k = (l0 - l1) / t
|
602
|
+
[k, m]
|
603
|
+
end
|
604
|
+
|
605
|
+
def fit(df, **kwargs)
|
606
|
+
raise Error, "Prophet object can only be fit once" if @history
|
607
|
+
|
608
|
+
history = df.where(!df["y"].in([nil, Float::NAN]))
|
609
|
+
raise Error, "Data has less than 2 non-nil rows" if history.shape[0] < 2
|
610
|
+
|
611
|
+
@history_dates = to_datetime(df["ds"]).sort
|
612
|
+
history = setup_dataframe(history, initialize_scales: true)
|
613
|
+
@history = history
|
614
|
+
set_auto_seasonalities
|
615
|
+
seasonal_features, prior_scales, component_cols, modes = make_all_seasonality_features(history)
|
616
|
+
@train_component_cols = component_cols
|
617
|
+
@component_modes = modes
|
618
|
+
@fit_kwargs = kwargs.dup # TODO deep dup?
|
619
|
+
|
620
|
+
set_changepoints
|
621
|
+
|
622
|
+
dat = {
|
623
|
+
"T" => history.shape[0],
|
624
|
+
"K" => seasonal_features.shape[1],
|
625
|
+
"S" => @changepoints_t.size,
|
626
|
+
"y" => history["y_scaled"],
|
627
|
+
"t" => history["t"],
|
628
|
+
"t_change" => @changepoints_t,
|
629
|
+
"X" => seasonal_features,
|
630
|
+
"sigmas" => prior_scales,
|
631
|
+
"tau" => @changepoint_prior_scale,
|
632
|
+
"trend_indicator" => @growth == "logistic" ? 1 : 0,
|
633
|
+
"s_a" => component_cols["additive_terms"],
|
634
|
+
"s_m" => component_cols["multiplicative_terms"]
|
635
|
+
}
|
636
|
+
|
637
|
+
if @growth == "linear"
|
638
|
+
dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
|
639
|
+
kinit = linear_growth_init(history)
|
640
|
+
else
|
641
|
+
dat["cap"] = history["cap_scaled"]
|
642
|
+
kinit = logistic_growth_init(history)
|
643
|
+
end
|
644
|
+
|
645
|
+
stan_init = {
|
646
|
+
"k" => kinit[0],
|
647
|
+
"m" => kinit[1],
|
648
|
+
"delta" => Numo::DFloat.zeros(@changepoints_t.size),
|
649
|
+
"beta" => Numo::DFloat.zeros(seasonal_features.shape[1]),
|
650
|
+
"sigma_obs" => 1
|
651
|
+
}
|
652
|
+
|
653
|
+
if history["y"].min == history["y"].max && @growth == "linear"
|
654
|
+
# Nothing to fit.
|
655
|
+
@params = stan_init
|
656
|
+
@params["sigma_obs"] = 1e-9
|
657
|
+
@params.each do |par|
|
658
|
+
@params[par] = Numo::NArray.asarray(@params[par])
|
659
|
+
end
|
660
|
+
elsif @mcmc_samples > 0
|
661
|
+
@params = @stan_backend.sampling(stan_init, dat, @mcmc_samples, **kwargs)
|
662
|
+
else
|
663
|
+
@params = @stan_backend.fit(stan_init, dat, **kwargs)
|
664
|
+
end
|
665
|
+
|
666
|
+
# If no changepoints were requested, replace delta with 0s
|
667
|
+
if @changepoints.size == 0
|
668
|
+
# Fold delta into the base rate k
|
669
|
+
@params["k"] = @params["k"] + @params["delta"].reshape(-1)
|
670
|
+
@params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(-1, 1)
|
671
|
+
end
|
672
|
+
|
673
|
+
self
|
674
|
+
end
|
675
|
+
|
676
|
+
def predict(df = nil)
|
677
|
+
raise Error, "Model has not been fit." unless @history
|
678
|
+
|
679
|
+
if df.nil?
|
680
|
+
df = @history.dup
|
681
|
+
else
|
682
|
+
raise ArgumentError, "Dataframe has no rows." if df.shape[0] == 0
|
683
|
+
df = setup_dataframe(df.dup)
|
684
|
+
end
|
685
|
+
|
686
|
+
df["trend"] = predict_trend(df)
|
687
|
+
seasonal_components = predict_seasonal_components(df)
|
688
|
+
if @uncertainty_samples
|
689
|
+
intervals = predict_uncertainty(df)
|
690
|
+
else
|
691
|
+
intervals = nil
|
692
|
+
end
|
693
|
+
|
694
|
+
# Drop columns except ds, cap, floor, and trend
|
695
|
+
cols = ["ds", "trend"]
|
696
|
+
cols << "cap" if df.vectors.include?("cap")
|
697
|
+
cols << "floor" if @logistic_floor
|
698
|
+
# Add in forecast components
|
699
|
+
df2 = df_concat_axis_one([df[*cols], intervals, seasonal_components])
|
700
|
+
df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
|
701
|
+
df2
|
702
|
+
end
|
703
|
+
|
704
|
+
def piecewise_linear(t, deltas, k, m, changepoint_ts)
|
705
|
+
# Intercept changes
|
706
|
+
gammas = -changepoint_ts * deltas
|
707
|
+
# Get cumulative slope and intercept at each t
|
708
|
+
k_t = t.new_ones * k
|
709
|
+
m_t = t.new_ones * m
|
710
|
+
changepoint_ts.each_with_index do |t_s, s|
|
711
|
+
indx = t >= t_s
|
712
|
+
k_t[indx] += deltas[s]
|
713
|
+
m_t[indx] += gammas[s]
|
714
|
+
end
|
715
|
+
k_t * t + m_t
|
716
|
+
end
|
717
|
+
|
718
|
+
def piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
|
719
|
+
k_1d = Numo::NArray.asarray(k)
|
720
|
+
k_1d = k_1d.reshape(1) if k_1d.ndim < 1
|
721
|
+
k_cum = k_1d.concatenate(deltas.cumsum + k)
|
722
|
+
gammas = Numo::DFloat.zeros(changepoint_ts.size)
|
723
|
+
changepoint_ts.each_with_index do |t_s, i|
|
724
|
+
gammas[i] = (t_s - m - gammas.sum) * (1 - k_cum[i] / k_cum[i + 1])
|
725
|
+
end
|
726
|
+
# Get cumulative rate and offset at each t
|
727
|
+
k_t = t.new_ones * k
|
728
|
+
m_t = t.new_ones * m
|
729
|
+
changepoint_ts.each_with_index do |t_s, s|
|
730
|
+
indx = t >= t_s
|
731
|
+
k_t[indx] += deltas[s]
|
732
|
+
m_t[indx] += gammas[s]
|
733
|
+
end
|
734
|
+
# need df_values to prevent memory from blowing up
|
735
|
+
df_values(cap) / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
|
736
|
+
end
|
737
|
+
|
738
|
+
def predict_trend(df)
|
739
|
+
k = @params["k"].mean(nan: true)
|
740
|
+
m = @params["m"].mean(nan: true)
|
741
|
+
deltas = @params["delta"].mean(axis: 0, nan: true)
|
742
|
+
|
743
|
+
t = Numo::NArray.asarray(df["t"].to_a)
|
744
|
+
if @growth == "linear"
|
745
|
+
trend = piecewise_linear(t, deltas, k, m, @changepoints_t)
|
746
|
+
else
|
747
|
+
cap = df["cap_scaled"]
|
748
|
+
trend = piecewise_logistic(t, cap, deltas, k, m, @changepoints_t)
|
749
|
+
end
|
750
|
+
|
751
|
+
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
752
|
+
end
|
753
|
+
|
754
|
+
def predict_seasonal_components(df)
|
755
|
+
seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
|
756
|
+
if @uncertainty_samples
|
757
|
+
lower_p = 100 * (1.0 - @interval_width) / 2
|
758
|
+
upper_p = 100 * (1.0 + @interval_width) / 2
|
759
|
+
end
|
760
|
+
|
761
|
+
x = df_values(seasonal_features)
|
762
|
+
data = {}
|
763
|
+
component_cols.vectors.each do |component|
|
764
|
+
beta_c = @params["beta"] * Numo::NArray.asarray(component_cols[component].to_a)
|
765
|
+
|
766
|
+
comp = x.dot(beta_c.transpose)
|
767
|
+
if @component_modes["additive"].include?(component)
|
768
|
+
comp *= @y_scale
|
769
|
+
end
|
770
|
+
data[component] = comp.mean(axis: 1, nan: true)
|
771
|
+
if @uncertainty_samples
|
772
|
+
data[component + "_lower"] = percentile(comp, lower_p, axis: 1)
|
773
|
+
data[component + "_upper"] = percentile(comp, upper_p, axis: 1)
|
774
|
+
end
|
775
|
+
end
|
776
|
+
Daru::DataFrame.new(data)
|
777
|
+
end
|
778
|
+
|
779
|
+
def sample_posterior_predictive(df)
|
780
|
+
n_iterations = @params["k"].shape[0]
|
781
|
+
samp_per_iter = [1, (@uncertainty_samples / n_iterations.to_f).ceil].max
|
782
|
+
|
783
|
+
# Generate seasonality features once so we can re-use them.
|
784
|
+
seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
|
785
|
+
|
786
|
+
# convert to Numo for performance
|
787
|
+
seasonal_features = df_values(seasonal_features)
|
788
|
+
additive_terms = df_values(component_cols["additive_terms"])
|
789
|
+
multiplicative_terms = df_values(component_cols["multiplicative_terms"])
|
790
|
+
|
791
|
+
sim_values = {"yhat" => [], "trend" => []}
|
792
|
+
n_iterations.times do |i|
|
793
|
+
samp_per_iter.times do
|
794
|
+
sim = sample_model(
|
795
|
+
df,
|
796
|
+
seasonal_features,
|
797
|
+
i,
|
798
|
+
additive_terms,
|
799
|
+
multiplicative_terms
|
800
|
+
)
|
801
|
+
sim_values.each_key do |key|
|
802
|
+
sim_values[key] << sim[key]
|
803
|
+
end
|
804
|
+
end
|
805
|
+
end
|
806
|
+
sim_values.each do |k, v|
|
807
|
+
sim_values[k] = Numo::NArray.column_stack(v)
|
808
|
+
end
|
809
|
+
sim_values
|
810
|
+
end
|
811
|
+
|
812
|
+
def predictive_samples(df)
|
813
|
+
df = setup_dataframe(df.dup)
|
814
|
+
sim_values = sample_posterior_predictive(df)
|
815
|
+
sim_values
|
816
|
+
end
|
817
|
+
|
818
|
+
def predict_uncertainty(df)
|
819
|
+
sim_values = sample_posterior_predictive(df)
|
820
|
+
|
821
|
+
lower_p = 100 * (1.0 - @interval_width) / 2
|
822
|
+
upper_p = 100 * (1.0 + @interval_width) / 2
|
823
|
+
|
824
|
+
series = {}
|
825
|
+
["yhat", "trend"].each do |key|
|
826
|
+
series["#{key}_lower"] = percentile(sim_values[key], lower_p, axis: 1)
|
827
|
+
series["#{key}_upper"] = percentile(sim_values[key], upper_p, axis: 1)
|
828
|
+
end
|
829
|
+
|
830
|
+
Daru::DataFrame.new(series)
|
831
|
+
end
|
832
|
+
|
833
|
+
def sample_model(df, seasonal_features, iteration, s_a, s_m)
|
834
|
+
trend = sample_predictive_trend(df, iteration)
|
835
|
+
|
836
|
+
beta = @params["beta"][iteration, true]
|
837
|
+
xb_a = seasonal_features.dot(beta * s_a) * @y_scale
|
838
|
+
xb_m = seasonal_features.dot(beta * s_m)
|
839
|
+
|
840
|
+
sigma = @params["sigma_obs"][iteration]
|
841
|
+
noise = Numo::DFloat.new(*df.shape[0]).rand_norm(0, sigma) * @y_scale
|
842
|
+
|
843
|
+
# skip data frame for performance
|
844
|
+
{
|
845
|
+
"yhat" => trend * (1 + xb_m) + xb_a + noise,
|
846
|
+
"trend" => trend
|
847
|
+
}
|
848
|
+
end
|
849
|
+
|
850
|
+
def sample_predictive_trend(df, iteration)
|
851
|
+
k = @params["k"][iteration, true]
|
852
|
+
m = @params["m"][iteration, true]
|
853
|
+
deltas = @params["delta"][iteration, true]
|
854
|
+
|
855
|
+
t = Numo::NArray.asarray(df["t"].to_a)
|
856
|
+
upper_t = t.max
|
857
|
+
|
858
|
+
# New changepoints from a Poisson process with rate S on [1, T]
|
859
|
+
if upper_t > 1
|
860
|
+
s = @changepoints_t.size
|
861
|
+
n_changes = poisson(s * (upper_t - 1))
|
862
|
+
else
|
863
|
+
n_changes = 0
|
864
|
+
end
|
865
|
+
if n_changes > 0
|
866
|
+
changepoint_ts_new = 1 + Numo::DFloat.new(n_changes).rand * (upper_t - 1)
|
867
|
+
changepoint_ts_new.sort
|
868
|
+
else
|
869
|
+
changepoint_ts_new = []
|
870
|
+
end
|
871
|
+
|
872
|
+
# Get the empirical scale of the deltas, plus epsilon to avoid NaNs.
|
873
|
+
lambda_ = deltas.abs.mean + 1e-8
|
874
|
+
|
875
|
+
# Sample deltas
|
876
|
+
deltas_new = laplace(0, lambda_, n_changes)
|
877
|
+
|
878
|
+
# Prepend the times and deltas from the history
|
879
|
+
changepoint_ts = @changepoints_t.concatenate(changepoint_ts_new)
|
880
|
+
deltas = deltas.concatenate(deltas_new)
|
881
|
+
|
882
|
+
if @growth == "linear"
|
883
|
+
trend = piecewise_linear(t, deltas, k, m, changepoint_ts)
|
884
|
+
else
|
885
|
+
cap = df["cap_scaled"]
|
886
|
+
trend = piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
|
887
|
+
end
|
888
|
+
|
889
|
+
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
890
|
+
end
|
891
|
+
|
892
|
+
def percentile(a, percentile, axis:)
|
893
|
+
raise Error, "Axis must be 1" if axis != 1
|
894
|
+
|
895
|
+
sorted = a.sort(axis: axis)
|
896
|
+
x = percentile / 100.0 * (sorted.shape[axis] - 1)
|
897
|
+
r = x % 1
|
898
|
+
i = x.floor
|
899
|
+
# this should use axis, but we only need axis: 1
|
900
|
+
if i == sorted.shape[axis] - 1
|
901
|
+
sorted[true, -1]
|
902
|
+
else
|
903
|
+
sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
|
904
|
+
end
|
905
|
+
end
|
906
|
+
|
907
|
+
def make_future_dataframe(periods:, freq: "D", include_history: true)
|
908
|
+
raise Error, "Model has not been fit" unless @history_dates
|
909
|
+
last_date = @history_dates.max
|
910
|
+
case freq
|
911
|
+
when "D"
|
912
|
+
# days have constant length with UTC (no DST or leap seconds)
|
913
|
+
dates = (periods + 1).times.map { |i| last_date + i * 86400 }
|
914
|
+
when "H"
|
915
|
+
dates = (periods + 1).times.map { |i| last_date + i * 3600 }
|
916
|
+
when "MS"
|
917
|
+
dates = [last_date]
|
918
|
+
periods.times do
|
919
|
+
dates << dates.last.to_datetime.next_month.to_time.utc
|
920
|
+
end
|
921
|
+
else
|
922
|
+
raise ArgumentError, "Unknown freq: #{freq}"
|
923
|
+
end
|
924
|
+
dates.select! { |d| d > last_date }
|
925
|
+
dates = dates.last(periods)
|
926
|
+
dates = @history_dates + dates if include_history
|
927
|
+
Daru::DataFrame.new("ds" => dates)
|
928
|
+
end
|
929
|
+
|
930
|
+
private
|
931
|
+
|
932
|
+
# Time is prefer over DateTime Ruby
|
933
|
+
# use UTC to be consistent with Python
|
934
|
+
# and so days have equal length (no DST)
|
935
|
+
def to_datetime(vec)
|
936
|
+
return if vec.nil?
|
937
|
+
vec.map do |v|
|
938
|
+
case v
|
939
|
+
when Time
|
940
|
+
v.utc
|
941
|
+
when Date
|
942
|
+
v.to_datetime.to_time.utc
|
943
|
+
else
|
944
|
+
DateTime.parse(v.to_s).to_time.utc
|
945
|
+
end
|
946
|
+
end
|
947
|
+
end
|
948
|
+
|
949
|
+
# okay to do in-place
|
950
|
+
def df_concat_axis_one(dfs)
|
951
|
+
dfs[1..-1].each do |df|
|
952
|
+
df.each_vector_with_index do |v, k|
|
953
|
+
dfs[0][k] = v
|
954
|
+
end
|
955
|
+
end
|
956
|
+
dfs[0]
|
957
|
+
end
|
958
|
+
|
959
|
+
def df_values(df)
|
960
|
+
if df.is_a?(Daru::Vector)
|
961
|
+
Numo::NArray.asarray(df.to_a)
|
962
|
+
else
|
963
|
+
# TODO make more performant
|
964
|
+
Numo::NArray.asarray(df.to_matrix.to_a)
|
965
|
+
end
|
966
|
+
end
|
967
|
+
|
968
|
+
# https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
|
969
|
+
def poisson(lam)
|
970
|
+
l = Math.exp(-lam)
|
971
|
+
k = 0
|
972
|
+
p = 1
|
973
|
+
while p > l
|
974
|
+
k += 1
|
975
|
+
p *= rand
|
976
|
+
end
|
977
|
+
k - 1
|
978
|
+
end
|
979
|
+
|
980
|
+
# https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
|
981
|
+
def laplace(loc, scale, size)
|
982
|
+
u = Numo::DFloat.new(size).rand - 0.5
|
983
|
+
loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
|
984
|
+
end
|
985
|
+
end
|
986
|
+
end
|