prophet-rb 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/lib/prophet/forecaster.rb +87 -120
- data/lib/prophet/holidays.rb +2 -2
- data/lib/prophet/plot.rb +31 -31
- data/lib/prophet/stan_backend.rb +1 -1
- data/lib/prophet/version.rb +1 -1
- data/lib/prophet.rb +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b37dc1a6be57b67cd740727e0bf4ac0b3a4cf2e27ed19647b631094696787da
|
4
|
+
data.tar.gz: 892af24ebdd897d7dba904ed7b11a83390533666de13bb3f57172e26839ade3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53bc289290cf1a7861419634a057253f3727994c2e911247af0adf7e39688a15c019e180274cb8dd1e2a140e94dafc90ef7ff996319446b535c5f08a9090a990
|
7
|
+
data.tar.gz: 6b4ecdfcfb03f3e9da68f244fd8d25f5f9bee9a854bdc4d9f68d4907f59260a485df323eb70cc60cdc68e1c1e0d5e9b5431313785bd22da72758fd90fbda7bd2
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,7 @@ Supports:
|
|
10
10
|
|
11
11
|
And gracefully handles missing data
|
12
12
|
|
13
|
-
[![Build Status](https://travis-ci.org/ankane/prophet.svg?branch=master)](https://travis-ci.org/ankane/prophet)
|
13
|
+
[![Build Status](https://travis-ci.org/ankane/prophet.svg?branch=master)](https://travis-ci.org/ankane/prophet) [![Build status](https://ci.appveyor.com/api/projects/status/8ahmsvvhum4ivnmv/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/prophet/branch/master)
|
14
14
|
|
15
15
|
## Installation
|
16
16
|
|
@@ -31,7 +31,7 @@ Check out the [Prophet documentation](https://facebook.github.io/prophet/docs/qu
|
|
31
31
|
Create a data frame with `ds` and `y` columns - here’s [an example](examples/example_wp_log_peyton_manning.csv) you can use
|
32
32
|
|
33
33
|
```ruby
|
34
|
-
df =
|
34
|
+
df = Rover.read_csv("example_wp_log_peyton_manning.csv")
|
35
35
|
df.head(5)
|
36
36
|
```
|
37
37
|
|
@@ -107,7 +107,7 @@ m.plot_components(forecast).savefig("components.png")
|
|
107
107
|
Forecast logistic growth instead of linear
|
108
108
|
|
109
109
|
```ruby
|
110
|
-
df =
|
110
|
+
df = Rover.read_csv("example_wp_log_R.csv")
|
111
111
|
df["cap"] = 8.5
|
112
112
|
m = Prophet.new(growth: "logistic")
|
113
113
|
m.fit(df)
|
@@ -146,7 +146,7 @@ m = Prophet.new(changepoints: ["2014-01-01"])
|
|
146
146
|
Create a data frame with `holiday` and `ds` columns. Include all occurrences in your past data and future occurrences you’d like to forecast.
|
147
147
|
|
148
148
|
```ruby
|
149
|
-
playoffs =
|
149
|
+
playoffs = Rover::DataFrame.new(
|
150
150
|
"holiday" => ["playoff"] * 14,
|
151
151
|
"ds" => ["2008-01-13", "2009-01-03", "2010-01-16",
|
152
152
|
"2010-01-24", "2010-02-07", "2011-01-08",
|
@@ -156,7 +156,7 @@ playoffs = Daru::DataFrame.new(
|
|
156
156
|
"lower_window" => [0] * 14,
|
157
157
|
"upper_window" => [1] * 14
|
158
158
|
)
|
159
|
-
superbowls =
|
159
|
+
superbowls = Rover::DataFrame.new(
|
160
160
|
"holiday" => ["superbowl"] * 3,
|
161
161
|
"ds" => ["2010-02-07", "2014-02-02", "2016-02-07"],
|
162
162
|
"lower_window" => [0] * 3,
|
@@ -208,7 +208,7 @@ forecast = m.predict(future)
|
|
208
208
|
[Explanation](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html)
|
209
209
|
|
210
210
|
```ruby
|
211
|
-
df =
|
211
|
+
df = Rover.read_csv("example_air_passengers.csv")
|
212
212
|
m = Prophet.new(seasonality_mode: "multiplicative")
|
213
213
|
m.fit(df)
|
214
214
|
future = m.make_future_dataframe(periods: 50, freq: "MS")
|
@@ -236,7 +236,7 @@ Prophet.new(mcmc_samples: 300)
|
|
236
236
|
Sub-daily data
|
237
237
|
|
238
238
|
```ruby
|
239
|
-
df =
|
239
|
+
df = Rover.read_csv("example_yosemite_temps.csv")
|
240
240
|
m = Prophet.new(changepoint_prior_scale: 0.01).fit(df)
|
241
241
|
future = m.make_future_dataframe(periods: 300, freq: "H")
|
242
242
|
forecast = m.predict(future)
|
data/lib/prophet/forecaster.rb
CHANGED
@@ -82,12 +82,12 @@ module Prophet
|
|
82
82
|
raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
|
83
83
|
end
|
84
84
|
if @holidays
|
85
|
-
if !@holidays.is_a?(
|
85
|
+
if !@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")
|
86
86
|
raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
|
87
87
|
end
|
88
88
|
@holidays["ds"] = to_datetime(@holidays["ds"])
|
89
|
-
has_lower = @holidays.
|
90
|
-
has_upper = @holidays.
|
89
|
+
has_lower = @holidays.include?("lower_window")
|
90
|
+
has_upper = @holidays.include?("upper_window")
|
91
91
|
if has_lower ^ has_upper # xor
|
92
92
|
raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
|
93
93
|
end
|
@@ -141,7 +141,7 @@ module Prophet
|
|
141
141
|
end
|
142
142
|
|
143
143
|
def setup_dataframe(df, initialize_scales: false)
|
144
|
-
if df.
|
144
|
+
if df.include?("y")
|
145
145
|
df["y"] = df["y"].map(&:to_f)
|
146
146
|
raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
|
147
147
|
end
|
@@ -152,7 +152,7 @@ module Prophet
|
|
152
152
|
raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
|
153
153
|
|
154
154
|
@extra_regressors.each_key do |name|
|
155
|
-
if !df.
|
155
|
+
if !df.include?(name)
|
156
156
|
raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
|
157
157
|
end
|
158
158
|
df[name] = df[name].map(&:to_f)
|
@@ -163,7 +163,7 @@ module Prophet
|
|
163
163
|
@seasonalities.values.each do |props|
|
164
164
|
condition_name = props[:condition_name]
|
165
165
|
if condition_name
|
166
|
-
if !df.
|
166
|
+
if !df.include?(condition_name)
|
167
167
|
raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
|
168
168
|
end
|
169
169
|
if df.where(!df[condition_name].in([true, false])).any?
|
@@ -172,36 +172,33 @@ module Prophet
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
-
|
176
|
-
df.index.name = nil
|
177
|
-
end
|
178
|
-
df = df.sort(["ds"])
|
175
|
+
df = df.sort_by { |r| r["ds"] }
|
179
176
|
|
180
177
|
initialize_scales(initialize_scales, df)
|
181
178
|
|
182
|
-
if @logistic_floor && !df.
|
179
|
+
if @logistic_floor && !df.include?("floor")
|
183
180
|
raise ArgumentError, "Expected column \"floor\"."
|
184
181
|
else
|
185
182
|
df["floor"] = 0
|
186
183
|
end
|
187
184
|
|
188
185
|
if @growth == "logistic"
|
189
|
-
unless df.
|
186
|
+
unless df.include?("cap")
|
190
187
|
raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
|
191
188
|
end
|
192
|
-
if df
|
189
|
+
if df[df["cap"] <= df["floor"]].size > 0
|
193
190
|
raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
|
194
191
|
end
|
195
|
-
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
|
192
|
+
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f
|
196
193
|
end
|
197
194
|
|
198
195
|
df["t"] = (df["ds"] - @start) / @t_scale.to_f
|
199
|
-
if df.
|
200
|
-
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
|
196
|
+
if df.include?("y")
|
197
|
+
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f
|
201
198
|
end
|
202
199
|
|
203
200
|
@extra_regressors.each do |name, props|
|
204
|
-
df[name] = (
|
201
|
+
df[name] = (df[name] - props[:mu]) / props[:std].to_f
|
205
202
|
end
|
206
203
|
|
207
204
|
df
|
@@ -237,21 +234,21 @@ module Prophet
|
|
237
234
|
if @n_changepoints > 0
|
238
235
|
step = (hist_size - 1) / @n_changepoints.to_f
|
239
236
|
cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
|
240
|
-
@changepoints =
|
237
|
+
@changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1)
|
241
238
|
else
|
242
239
|
@changepoints = []
|
243
240
|
end
|
244
241
|
end
|
245
242
|
|
246
243
|
if @changepoints.size > 0
|
247
|
-
@changepoints_t = (
|
244
|
+
@changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f
|
248
245
|
else
|
249
246
|
@changepoints_t = Numo::NArray.asarray([0])
|
250
247
|
end
|
251
248
|
end
|
252
249
|
|
253
250
|
def fourier_series(dates, period, series_order)
|
254
|
-
t =
|
251
|
+
t = dates.map(&:to_i).to_numo / (3600 * 24.0)
|
255
252
|
|
256
253
|
# no need for column_stack
|
257
254
|
series_order.times.flat_map do |i|
|
@@ -263,11 +260,11 @@ module Prophet
|
|
263
260
|
|
264
261
|
def make_seasonality_features(dates, period, series_order, prefix)
|
265
262
|
features = fourier_series(dates, period, series_order)
|
266
|
-
|
263
|
+
Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
|
267
264
|
end
|
268
265
|
|
269
266
|
def construct_holiday_dataframe(dates)
|
270
|
-
all_holidays =
|
267
|
+
all_holidays = Rover::DataFrame.new
|
271
268
|
if @holidays
|
272
269
|
all_holidays = @holidays.dup
|
273
270
|
end
|
@@ -279,12 +276,12 @@ module Prophet
|
|
279
276
|
# Drop future holidays not previously seen in training data
|
280
277
|
if @train_holiday_names
|
281
278
|
# Remove holiday names didn't show up in fit
|
282
|
-
all_holidays = all_holidays
|
279
|
+
all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)]
|
283
280
|
|
284
281
|
# Add holiday names in fit but not in predict with ds as NA
|
285
|
-
holidays_to_add =
|
286
|
-
"holiday" => @train_holiday_names
|
287
|
-
)
|
282
|
+
holidays_to_add = Rover::DataFrame.new({
|
283
|
+
"holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])]
|
284
|
+
})
|
288
285
|
all_holidays = all_holidays.concat(holidays_to_add)
|
289
286
|
end
|
290
287
|
|
@@ -318,7 +315,7 @@ module Prophet
|
|
318
315
|
|
319
316
|
lw.upto(uw).each do |offset|
|
320
317
|
occurrence = dt ? dt + offset : nil
|
321
|
-
loc = occurrence ? row_index.index(occurrence) : nil
|
318
|
+
loc = occurrence ? row_index.to_a.index(occurrence) : nil
|
322
319
|
key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
|
323
320
|
if loc
|
324
321
|
expanded_holidays[key][loc] = 1.0
|
@@ -327,14 +324,14 @@ module Prophet
|
|
327
324
|
end
|
328
325
|
end
|
329
326
|
end
|
330
|
-
holiday_features =
|
331
|
-
#
|
332
|
-
holiday_features = holiday_features[
|
333
|
-
prior_scale_list = holiday_features.
|
327
|
+
holiday_features = Rover::DataFrame.new(expanded_holidays)
|
328
|
+
# Make sure column order is consistent
|
329
|
+
holiday_features = holiday_features[holiday_features.vector_names.sort]
|
330
|
+
prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] }
|
334
331
|
holiday_names = prior_scales.keys
|
335
332
|
# Store holiday names used in fit
|
336
|
-
if
|
337
|
-
@train_holiday_names =
|
333
|
+
if @train_holiday_names.nil?
|
334
|
+
@train_holiday_names = Rover::Vector.new(holiday_names)
|
338
335
|
end
|
339
336
|
[holiday_features, prior_scale_list, holiday_names]
|
340
337
|
end
|
@@ -432,16 +429,16 @@ module Prophet
|
|
432
429
|
modes[@seasonality_mode].concat(holiday_names)
|
433
430
|
end
|
434
431
|
|
435
|
-
#
|
432
|
+
# Additional regressors
|
436
433
|
@extra_regressors.each do |name, props|
|
437
|
-
seasonal_features << df[name]
|
434
|
+
seasonal_features << Rover::DataFrame.new({name => df[name]})
|
438
435
|
prior_scales << props[:prior_scale]
|
439
436
|
modes[props[:mode]] << name
|
440
437
|
end
|
441
438
|
|
442
|
-
#
|
439
|
+
# Dummy to prevent empty X
|
443
440
|
if seasonal_features.size == 0
|
444
|
-
seasonal_features <<
|
441
|
+
seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]})
|
445
442
|
prior_scales << 1.0
|
446
443
|
end
|
447
444
|
|
@@ -453,16 +450,16 @@ module Prophet
|
|
453
450
|
end
|
454
451
|
|
455
452
|
def regressor_column_matrix(seasonal_features, modes)
|
456
|
-
components =
|
453
|
+
components = Rover::DataFrame.new(
|
457
454
|
"col" => seasonal_features.shape[1].times.to_a,
|
458
|
-
"component" => seasonal_features.
|
455
|
+
"component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] }
|
459
456
|
)
|
460
457
|
|
461
|
-
#
|
458
|
+
# Add total for holidays
|
462
459
|
if @train_holiday_names
|
463
460
|
components = add_group_component(components, "holidays", @train_holiday_names.uniq)
|
464
461
|
end
|
465
|
-
#
|
462
|
+
# Add totals additive and multiplicative components, and regressors
|
466
463
|
["additive", "multiplicative"].each do |mode|
|
467
464
|
components = add_group_component(components, mode + "_terms", modes[mode])
|
468
465
|
regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
|
@@ -473,20 +470,15 @@ module Prophet
|
|
473
470
|
modes[mode] << mode + "_terms"
|
474
471
|
modes[mode] << "extra_regressors_" + mode
|
475
472
|
end
|
476
|
-
#
|
473
|
+
# After all of the additive/multiplicative groups have been added,
|
477
474
|
modes[@seasonality_mode] << "holidays"
|
478
|
-
#
|
479
|
-
component_cols =
|
480
|
-
|
481
|
-
)
|
482
|
-
component_cols.each_vector do |v|
|
483
|
-
v.map! { |vi| vi.nil? ? 0 : vi }
|
484
|
-
end
|
485
|
-
component_cols.rename_vectors(:_id => "col")
|
475
|
+
# Convert to a binary matrix
|
476
|
+
component_cols = components["col"].crosstab(components["component"])
|
477
|
+
component_cols["col"] = component_cols.delete("_")
|
486
478
|
|
487
479
|
# Add columns for additive and multiplicative terms, if missing
|
488
480
|
["additive_terms", "multiplicative_terms"].each do |name|
|
489
|
-
component_cols[name] = 0 unless component_cols.
|
481
|
+
component_cols[name] = 0 unless component_cols.include?(name)
|
490
482
|
end
|
491
483
|
|
492
484
|
# TODO validation
|
@@ -495,10 +487,10 @@ module Prophet
|
|
495
487
|
end
|
496
488
|
|
497
489
|
def add_group_component(components, name, group)
|
498
|
-
new_comp = components
|
490
|
+
new_comp = components[components["component"].in?(group)].dup
|
499
491
|
group_cols = new_comp["col"].uniq
|
500
492
|
if group_cols.size > 0
|
501
|
-
new_comp =
|
493
|
+
new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name})
|
502
494
|
components = components.concat(new_comp)
|
503
495
|
end
|
504
496
|
components
|
@@ -574,8 +566,8 @@ module Prophet
|
|
574
566
|
end
|
575
567
|
|
576
568
|
def linear_growth_init(df)
|
577
|
-
i0 =
|
578
|
-
i1 = df
|
569
|
+
i0 = 0
|
570
|
+
i1 = df.size - 1
|
579
571
|
t = df["t"][i1] - df["t"][i0]
|
580
572
|
k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
|
581
573
|
m = df["y_scaled"][i0] - k * df["t"][i0]
|
@@ -583,8 +575,8 @@ module Prophet
|
|
583
575
|
end
|
584
576
|
|
585
577
|
def logistic_growth_init(df)
|
586
|
-
i0 =
|
587
|
-
i1 = df
|
578
|
+
i0 = 0
|
579
|
+
i1 = df.size - 1
|
588
580
|
t = df["t"][i1] - df["t"][i0]
|
589
581
|
|
590
582
|
# Force valid values, in case y > cap or y < 0
|
@@ -613,8 +605,13 @@ module Prophet
|
|
613
605
|
def fit(df, **kwargs)
|
614
606
|
raise Error, "Prophet object can only be fit once" if @history
|
615
607
|
|
616
|
-
|
617
|
-
|
608
|
+
if defined?(Daru::DataFrame) && df.is_a?(Daru::DataFrame)
|
609
|
+
df = Rover::DataFrame.new(df.to_h)
|
610
|
+
end
|
611
|
+
raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame)
|
612
|
+
|
613
|
+
history = df[!df["y"].missing]
|
614
|
+
raise Error, "Data has less than 2 non-nil rows" if history.size < 2
|
618
615
|
|
619
616
|
@history_dates = to_datetime(df["ds"]).sort
|
620
617
|
history = setup_dataframe(history, initialize_scales: true)
|
@@ -701,10 +698,10 @@ module Prophet
|
|
701
698
|
|
702
699
|
# Drop columns except ds, cap, floor, and trend
|
703
700
|
cols = ["ds", "trend"]
|
704
|
-
cols << "cap" if df.
|
701
|
+
cols << "cap" if df.include?("cap")
|
705
702
|
cols << "floor" if @logistic_floor
|
706
703
|
# Add in forecast components
|
707
|
-
df2 = df_concat_axis_one([df[
|
704
|
+
df2 = df_concat_axis_one([df[cols], intervals, seasonal_components])
|
708
705
|
df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
|
709
706
|
df2
|
710
707
|
end
|
@@ -739,8 +736,7 @@ module Prophet
|
|
739
736
|
k_t[indx] += deltas[s]
|
740
737
|
m_t[indx] += gammas[s]
|
741
738
|
end
|
742
|
-
|
743
|
-
df_values(cap) / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
|
739
|
+
cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
|
744
740
|
end
|
745
741
|
|
746
742
|
def predict_trend(df)
|
@@ -766,10 +762,10 @@ module Prophet
|
|
766
762
|
upper_p = 100 * (1.0 + @interval_width) / 2
|
767
763
|
end
|
768
764
|
|
769
|
-
x =
|
765
|
+
x = seasonal_features.to_numo
|
770
766
|
data = {}
|
771
|
-
component_cols.
|
772
|
-
beta_c =
|
767
|
+
component_cols.vector_names.each do |component|
|
768
|
+
beta_c = @params["beta"] * component_cols[component].to_numo
|
773
769
|
|
774
770
|
comp = x.dot(beta_c.transpose)
|
775
771
|
if @component_modes["additive"].include?(component)
|
@@ -777,11 +773,11 @@ module Prophet
|
|
777
773
|
end
|
778
774
|
data[component] = comp.mean(axis: 1, nan: true)
|
779
775
|
if @uncertainty_samples
|
780
|
-
data[component + "_lower"] = percentile(
|
781
|
-
data[component + "_upper"] = percentile(
|
776
|
+
data[component + "_lower"] = comp.percentile(lower_p, axis: 1)
|
777
|
+
data[component + "_upper"] = comp.percentile(upper_p, axis: 1)
|
782
778
|
end
|
783
779
|
end
|
784
|
-
|
780
|
+
Rover::DataFrame.new(data)
|
785
781
|
end
|
786
782
|
|
787
783
|
def sample_posterior_predictive(df)
|
@@ -792,9 +788,9 @@ module Prophet
|
|
792
788
|
seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
|
793
789
|
|
794
790
|
# convert to Numo for performance
|
795
|
-
seasonal_features =
|
796
|
-
additive_terms =
|
797
|
-
multiplicative_terms =
|
791
|
+
seasonal_features = seasonal_features.to_numo
|
792
|
+
additive_terms = component_cols["additive_terms"].to_numo
|
793
|
+
multiplicative_terms = component_cols["multiplicative_terms"].to_numo
|
798
794
|
|
799
795
|
sim_values = {"yhat" => [], "trend" => []}
|
800
796
|
n_iterations.times do |i|
|
@@ -831,11 +827,11 @@ module Prophet
|
|
831
827
|
|
832
828
|
series = {}
|
833
829
|
["yhat", "trend"].each do |key|
|
834
|
-
series["#{key}_lower"] =
|
835
|
-
series["#{key}_upper"] =
|
830
|
+
series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1)
|
831
|
+
series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1)
|
836
832
|
end
|
837
833
|
|
838
|
-
|
834
|
+
Rover::DataFrame.new(series)
|
839
835
|
end
|
840
836
|
|
841
837
|
def sample_model(df, seasonal_features, iteration, s_a, s_m)
|
@@ -897,21 +893,6 @@ module Prophet
|
|
897
893
|
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
898
894
|
end
|
899
895
|
|
900
|
-
def percentile(a, percentile, axis:)
|
901
|
-
raise Error, "Axis must be 1" if axis != 1
|
902
|
-
|
903
|
-
sorted = a.sort(axis: axis)
|
904
|
-
x = percentile / 100.0 * (sorted.shape[axis] - 1)
|
905
|
-
r = x % 1
|
906
|
-
i = x.floor
|
907
|
-
# this should use axis, but we only need axis: 1
|
908
|
-
if i == sorted.shape[axis] - 1
|
909
|
-
sorted[true, -1]
|
910
|
-
else
|
911
|
-
sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
|
912
|
-
end
|
913
|
-
end
|
914
|
-
|
915
896
|
def make_future_dataframe(periods:, freq: "D", include_history: true)
|
916
897
|
raise Error, "Model has not been fit" unless @history_dates
|
917
898
|
last_date = @history_dates.max
|
@@ -938,48 +919,39 @@ module Prophet
|
|
938
919
|
end
|
939
920
|
dates.select! { |d| d > last_date }
|
940
921
|
dates = dates.last(periods)
|
941
|
-
dates = @history_dates
|
942
|
-
|
922
|
+
dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history
|
923
|
+
Rover::DataFrame.new({"ds" => dates})
|
943
924
|
end
|
944
925
|
|
945
926
|
private
|
946
927
|
|
947
|
-
# Time is
|
928
|
+
# Time is preferred over DateTime in Ruby docs
|
948
929
|
# use UTC to be consistent with Python
|
949
930
|
# and so days have equal length (no DST)
|
950
931
|
def to_datetime(vec)
|
951
932
|
return if vec.nil?
|
952
|
-
vec
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
933
|
+
vec =
|
934
|
+
vec.map do |v|
|
935
|
+
case v
|
936
|
+
when Time
|
937
|
+
v.utc
|
938
|
+
when Date
|
939
|
+
v.to_datetime.to_time.utc
|
940
|
+
else
|
941
|
+
DateTime.parse(v.to_s).to_time.utc
|
942
|
+
end
|
960
943
|
end
|
961
|
-
|
944
|
+
Rover::Vector.new(vec)
|
962
945
|
end
|
963
946
|
|
964
947
|
# okay to do in-place
|
965
948
|
def df_concat_axis_one(dfs)
|
966
949
|
dfs[1..-1].each do |df|
|
967
|
-
df
|
968
|
-
dfs[0][k] = v
|
969
|
-
end
|
950
|
+
dfs[0].merge!(df)
|
970
951
|
end
|
971
952
|
dfs[0]
|
972
953
|
end
|
973
954
|
|
974
|
-
def df_values(df)
|
975
|
-
if df.is_a?(Daru::Vector)
|
976
|
-
Numo::NArray.asarray(df.to_a)
|
977
|
-
else
|
978
|
-
# TODO make more performant
|
979
|
-
Numo::NArray.asarray(df.to_matrix.to_a)
|
980
|
-
end
|
981
|
-
end
|
982
|
-
|
983
955
|
# https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
|
984
956
|
def poisson(lam)
|
985
957
|
l = Math.exp(-lam)
|
@@ -994,13 +966,8 @@ module Prophet
|
|
994
966
|
|
995
967
|
# https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
|
996
968
|
def laplace(loc, scale, size)
|
997
|
-
u = Numo::DFloat.new(size).rand
|
969
|
+
u = Numo::DFloat.new(size).rand(-0.5, 0.5)
|
998
970
|
loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
|
999
971
|
end
|
1000
|
-
|
1001
|
-
def ensure_arr(value)
|
1002
|
-
value = [value] unless value.is_a?(Array)
|
1003
|
-
value
|
1004
|
-
end
|
1005
972
|
end
|
1006
973
|
end
|
data/lib/prophet/holidays.rb
CHANGED
@@ -6,7 +6,7 @@ module Prophet
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def make_holidays_df(year_list, country)
|
9
|
-
holidays_df
|
9
|
+
holidays_df[(holidays_df["country"] == country) & (holidays_df["year"].in?(year_list))][["ds", "holiday"]]
|
10
10
|
end
|
11
11
|
|
12
12
|
# TODO marshal on installation
|
@@ -20,7 +20,7 @@ module Prophet
|
|
20
20
|
holidays["country"] << row["country"]
|
21
21
|
holidays["year"] << row["year"]
|
22
22
|
end
|
23
|
-
|
23
|
+
Rover::DataFrame.new(holidays)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
data/lib/prophet/plot.rb
CHANGED
@@ -8,16 +8,16 @@ module Prophet
|
|
8
8
|
fig = ax.get_figure
|
9
9
|
end
|
10
10
|
fcst_t = to_pydatetime(fcst["ds"])
|
11
|
-
ax.plot(to_pydatetime(@history["ds"]), @history["y"].
|
12
|
-
ax.plot(fcst_t, fcst["yhat"].
|
13
|
-
if fcst.
|
14
|
-
ax.plot(fcst_t, fcst["cap"].
|
11
|
+
ax.plot(to_pydatetime(@history["ds"]), @history["y"].to_a, "k.")
|
12
|
+
ax.plot(fcst_t, fcst["yhat"].to_a, ls: "-", c: "#0072B2")
|
13
|
+
if fcst.include?("cap") && plot_cap
|
14
|
+
ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
|
15
15
|
end
|
16
|
-
if @logistic_floor && fcst.
|
17
|
-
ax.plot(fcst_t, fcst["floor"].
|
16
|
+
if @logistic_floor && fcst.include?("floor") && plot_cap
|
17
|
+
ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
|
18
18
|
end
|
19
19
|
if uncertainty && @uncertainty_samples
|
20
|
-
ax.fill_between(fcst_t, fcst["yhat_lower"].
|
20
|
+
ax.fill_between(fcst_t, fcst["yhat_lower"].to_a, fcst["yhat_upper"].to_a, color: "#0072B2", alpha: 0.2)
|
21
21
|
end
|
22
22
|
# Specify formatting to workaround matplotlib issue #12925
|
23
23
|
locator = dates.AutoDateLocator.new(interval_multiples: false)
|
@@ -33,25 +33,25 @@ module Prophet
|
|
33
33
|
|
34
34
|
def plot_components(fcst, uncertainty: true, plot_cap: true, weekly_start: 0, yearly_start: 0, figsize: nil)
|
35
35
|
components = ["trend"]
|
36
|
-
if @train_holiday_names && fcst.
|
36
|
+
if @train_holiday_names && fcst.include?("holidays")
|
37
37
|
components << "holidays"
|
38
38
|
end
|
39
39
|
# Plot weekly seasonality, if present
|
40
|
-
if @seasonalities["weekly"] && fcst.
|
40
|
+
if @seasonalities["weekly"] && fcst.include?("weekly")
|
41
41
|
components << "weekly"
|
42
42
|
end
|
43
43
|
# Yearly if present
|
44
|
-
if @seasonalities["yearly"] && fcst.
|
44
|
+
if @seasonalities["yearly"] && fcst.include?("yearly")
|
45
45
|
components << "yearly"
|
46
46
|
end
|
47
47
|
# Other seasonalities
|
48
|
-
components.concat(@seasonalities.keys.select { |name| fcst.
|
48
|
+
components.concat(@seasonalities.keys.select { |name| fcst.include?(name) && !["weekly", "yearly"].include?(name) }.sort)
|
49
49
|
regressors = {"additive" => false, "multiplicative" => false}
|
50
50
|
@extra_regressors.each do |name, props|
|
51
51
|
regressors[props[:mode]] = true
|
52
52
|
end
|
53
53
|
["additive", "multiplicative"].each do |mode|
|
54
|
-
if regressors[mode] && fcst.
|
54
|
+
if regressors[mode] && fcst.include?("extra_regressors_#{mode}")
|
55
55
|
components << "extra_regressors_#{mode}"
|
56
56
|
end
|
57
57
|
end
|
@@ -97,11 +97,11 @@ module Prophet
|
|
97
97
|
def add_changepoints_to_plot(ax, fcst, threshold: 0.01, cp_color: "r", cp_linestyle: "--", trend: true)
|
98
98
|
artists = []
|
99
99
|
if trend
|
100
|
-
artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].
|
100
|
+
artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].to_a, c: cp_color)
|
101
101
|
end
|
102
102
|
signif_changepoints =
|
103
103
|
if @changepoints.size > 0
|
104
|
-
(@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints)
|
104
|
+
(@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints.to_numo)
|
105
105
|
else
|
106
106
|
[]
|
107
107
|
end
|
@@ -120,15 +120,15 @@ module Prophet
|
|
120
120
|
ax = fig.add_subplot(111)
|
121
121
|
end
|
122
122
|
fcst_t = to_pydatetime(fcst["ds"])
|
123
|
-
artists += ax.plot(fcst_t, fcst[name].
|
124
|
-
if fcst.
|
125
|
-
artists += ax.plot(fcst_t, fcst["cap"].
|
123
|
+
artists += ax.plot(fcst_t, fcst[name].to_a, ls: "-", c: "#0072B2")
|
124
|
+
if fcst.include?("cap") && plot_cap
|
125
|
+
artists += ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
|
126
126
|
end
|
127
|
-
if @logistic_floor && fcst.
|
128
|
-
ax.plot(fcst_t, fcst["floor"].
|
127
|
+
if @logistic_floor && fcst.include?("floor") && plot_cap
|
128
|
+
ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
|
129
129
|
end
|
130
130
|
if uncertainty && @uncertainty_samples
|
131
|
-
artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].
|
131
|
+
artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].to_a, fcst[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
132
132
|
end
|
133
133
|
# Specify formatting to workaround matplotlib issue #12925
|
134
134
|
locator = dates.AutoDateLocator.new(interval_multiples: false)
|
@@ -145,17 +145,17 @@ module Prophet
|
|
145
145
|
end
|
146
146
|
|
147
147
|
def seasonality_plot_df(ds)
|
148
|
-
df_dict = {"ds" => ds, "cap" =>
|
148
|
+
df_dict = {"ds" => ds, "cap" => 1.0, "floor" => 0.0}
|
149
149
|
@extra_regressors.each_key do |name|
|
150
|
-
df_dict[name] =
|
150
|
+
df_dict[name] = 0.0
|
151
151
|
end
|
152
152
|
# Activate all conditional seasonality columns
|
153
153
|
@seasonalities.values.each do |props|
|
154
154
|
if props[:condition_name]
|
155
|
-
df_dict[props[:condition_name]] =
|
155
|
+
df_dict[props[:condition_name]] = true
|
156
156
|
end
|
157
157
|
end
|
158
|
-
df =
|
158
|
+
df = Rover::DataFrame.new(df_dict)
|
159
159
|
df = setup_dataframe(df)
|
160
160
|
df
|
161
161
|
end
|
@@ -172,9 +172,9 @@ module Prophet
|
|
172
172
|
df_w = seasonality_plot_df(days)
|
173
173
|
seas = predict_seasonal_components(df_w)
|
174
174
|
days = days.map { |v| v.strftime("%A") }
|
175
|
-
artists += ax.plot(days.size.times.to_a, seas[name].
|
175
|
+
artists += ax.plot(days.size.times.to_a, seas[name].to_a, ls: "-", c: "#0072B2")
|
176
176
|
if uncertainty && @uncertainty_samples
|
177
|
-
artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].
|
177
|
+
artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
178
178
|
end
|
179
179
|
ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
|
180
180
|
ax.set_xticks(days.size.times.to_a)
|
@@ -198,9 +198,9 @@ module Prophet
|
|
198
198
|
days = 365.times.map { |i| start + i + yearly_start }
|
199
199
|
df_y = seasonality_plot_df(days)
|
200
200
|
seas = predict_seasonal_components(df_y)
|
201
|
-
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].
|
201
|
+
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
|
202
202
|
if uncertainty && @uncertainty_samples
|
203
|
-
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].
|
203
|
+
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
204
204
|
end
|
205
205
|
ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
|
206
206
|
months = dates.MonthLocator.new((1..12).to_a, bymonthday: 1, interval: 2)
|
@@ -231,9 +231,9 @@ module Prophet
|
|
231
231
|
days = plot_points.times.map { |i| Time.at(start + i * step).utc }
|
232
232
|
df_y = seasonality_plot_df(days)
|
233
233
|
seas = predict_seasonal_components(df_y)
|
234
|
-
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].
|
234
|
+
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
|
235
235
|
if uncertainty && @uncertainty_samples
|
236
|
-
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].
|
236
|
+
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
237
237
|
end
|
238
238
|
ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
|
239
239
|
step = (finish - start) / (7 - 1).to_f
|
@@ -281,7 +281,7 @@ module Prophet
|
|
281
281
|
|
282
282
|
def to_pydatetime(v)
|
283
283
|
datetime = PyCall.import_module("datetime")
|
284
|
-
v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }
|
284
|
+
v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }.to_a
|
285
285
|
end
|
286
286
|
end
|
287
287
|
end
|
data/lib/prophet/stan_backend.rb
CHANGED
@@ -127,7 +127,7 @@ module Prophet
|
|
127
127
|
stan_data["t_change"] = stan_data["t_change"].to_a
|
128
128
|
stan_data["s_a"] = stan_data["s_a"].to_a
|
129
129
|
stan_data["s_m"] = stan_data["s_m"].to_a
|
130
|
-
stan_data["X"] = stan_data["X"].
|
130
|
+
stan_data["X"] = stan_data["X"].to_numo.to_a
|
131
131
|
stan_init["delta"] = stan_init["delta"].to_a
|
132
132
|
stan_init["beta"] = stan_init["beta"].to_a
|
133
133
|
[stan_init, stan_data]
|
data/lib/prophet/version.rb
CHANGED
data/lib/prophet.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prophet-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdstan
|
@@ -25,21 +25,21 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.1.2
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: numo-narray
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 0.9.1.7
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.9.1.7
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: rover-df
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '5'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: daru
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: matplotlib
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - ">="
|