prophet-rb 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/lib/prophet/forecaster.rb +87 -120
- data/lib/prophet/holidays.rb +2 -2
- data/lib/prophet/plot.rb +31 -31
- data/lib/prophet/stan_backend.rb +1 -1
- data/lib/prophet/version.rb +1 -1
- data/lib/prophet.rb +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b37dc1a6be57b67cd740727e0bf4ac0b3a4cf2e27ed19647b631094696787da
|
4
|
+
data.tar.gz: 892af24ebdd897d7dba904ed7b11a83390533666de13bb3f57172e26839ade3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53bc289290cf1a7861419634a057253f3727994c2e911247af0adf7e39688a15c019e180274cb8dd1e2a140e94dafc90ef7ff996319446b535c5f08a9090a990
|
7
|
+
data.tar.gz: 6b4ecdfcfb03f3e9da68f244fd8d25f5f9bee9a854bdc4d9f68d4907f59260a485df323eb70cc60cdc68e1c1e0d5e9b5431313785bd22da72758fd90fbda7bd2
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,7 @@ Supports:
|
|
10
10
|
|
11
11
|
And gracefully handles missing data
|
12
12
|
|
13
|
-
[](https://travis-ci.org/ankane/prophet)
|
13
|
+
[](https://travis-ci.org/ankane/prophet) [](https://ci.appveyor.com/project/ankane/prophet/branch/master)
|
14
14
|
|
15
15
|
## Installation
|
16
16
|
|
@@ -31,7 +31,7 @@ Check out the [Prophet documentation](https://facebook.github.io/prophet/docs/qu
|
|
31
31
|
Create a data frame with `ds` and `y` columns - here’s [an example](examples/example_wp_log_peyton_manning.csv) you can use
|
32
32
|
|
33
33
|
```ruby
|
34
|
-
df =
|
34
|
+
df = Rover.read_csv("example_wp_log_peyton_manning.csv")
|
35
35
|
df.head(5)
|
36
36
|
```
|
37
37
|
|
@@ -107,7 +107,7 @@ m.plot_components(forecast).savefig("components.png")
|
|
107
107
|
Forecast logistic growth instead of linear
|
108
108
|
|
109
109
|
```ruby
|
110
|
-
df =
|
110
|
+
df = Rover.read_csv("example_wp_log_R.csv")
|
111
111
|
df["cap"] = 8.5
|
112
112
|
m = Prophet.new(growth: "logistic")
|
113
113
|
m.fit(df)
|
@@ -146,7 +146,7 @@ m = Prophet.new(changepoints: ["2014-01-01"])
|
|
146
146
|
Create a data frame with `holiday` and `ds` columns. Include all occurrences in your past data and future occurrences you’d like to forecast.
|
147
147
|
|
148
148
|
```ruby
|
149
|
-
playoffs =
|
149
|
+
playoffs = Rover::DataFrame.new(
|
150
150
|
"holiday" => ["playoff"] * 14,
|
151
151
|
"ds" => ["2008-01-13", "2009-01-03", "2010-01-16",
|
152
152
|
"2010-01-24", "2010-02-07", "2011-01-08",
|
@@ -156,7 +156,7 @@ playoffs = Daru::DataFrame.new(
|
|
156
156
|
"lower_window" => [0] * 14,
|
157
157
|
"upper_window" => [1] * 14
|
158
158
|
)
|
159
|
-
superbowls =
|
159
|
+
superbowls = Rover::DataFrame.new(
|
160
160
|
"holiday" => ["superbowl"] * 3,
|
161
161
|
"ds" => ["2010-02-07", "2014-02-02", "2016-02-07"],
|
162
162
|
"lower_window" => [0] * 3,
|
@@ -208,7 +208,7 @@ forecast = m.predict(future)
|
|
208
208
|
[Explanation](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html)
|
209
209
|
|
210
210
|
```ruby
|
211
|
-
df =
|
211
|
+
df = Rover.read_csv("example_air_passengers.csv")
|
212
212
|
m = Prophet.new(seasonality_mode: "multiplicative")
|
213
213
|
m.fit(df)
|
214
214
|
future = m.make_future_dataframe(periods: 50, freq: "MS")
|
@@ -236,7 +236,7 @@ Prophet.new(mcmc_samples: 300)
|
|
236
236
|
Sub-daily data
|
237
237
|
|
238
238
|
```ruby
|
239
|
-
df =
|
239
|
+
df = Rover.read_csv("example_yosemite_temps.csv")
|
240
240
|
m = Prophet.new(changepoint_prior_scale: 0.01).fit(df)
|
241
241
|
future = m.make_future_dataframe(periods: 300, freq: "H")
|
242
242
|
forecast = m.predict(future)
|
data/lib/prophet/forecaster.rb
CHANGED
@@ -82,12 +82,12 @@ module Prophet
|
|
82
82
|
raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
|
83
83
|
end
|
84
84
|
if @holidays
|
85
|
-
if !@holidays.is_a?(
|
85
|
+
if !@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")
|
86
86
|
raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
|
87
87
|
end
|
88
88
|
@holidays["ds"] = to_datetime(@holidays["ds"])
|
89
|
-
has_lower = @holidays.
|
90
|
-
has_upper = @holidays.
|
89
|
+
has_lower = @holidays.include?("lower_window")
|
90
|
+
has_upper = @holidays.include?("upper_window")
|
91
91
|
if has_lower ^ has_upper # xor
|
92
92
|
raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
|
93
93
|
end
|
@@ -141,7 +141,7 @@ module Prophet
|
|
141
141
|
end
|
142
142
|
|
143
143
|
def setup_dataframe(df, initialize_scales: false)
|
144
|
-
if df.
|
144
|
+
if df.include?("y")
|
145
145
|
df["y"] = df["y"].map(&:to_f)
|
146
146
|
raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
|
147
147
|
end
|
@@ -152,7 +152,7 @@ module Prophet
|
|
152
152
|
raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
|
153
153
|
|
154
154
|
@extra_regressors.each_key do |name|
|
155
|
-
if !df.
|
155
|
+
if !df.include?(name)
|
156
156
|
raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
|
157
157
|
end
|
158
158
|
df[name] = df[name].map(&:to_f)
|
@@ -163,7 +163,7 @@ module Prophet
|
|
163
163
|
@seasonalities.values.each do |props|
|
164
164
|
condition_name = props[:condition_name]
|
165
165
|
if condition_name
|
166
|
-
if !df.
|
166
|
+
if !df.include?(condition_name)
|
167
167
|
raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
|
168
168
|
end
|
169
169
|
if df.where(!df[condition_name].in([true, false])).any?
|
@@ -172,36 +172,33 @@ module Prophet
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
-
|
176
|
-
df.index.name = nil
|
177
|
-
end
|
178
|
-
df = df.sort(["ds"])
|
175
|
+
df = df.sort_by { |r| r["ds"] }
|
179
176
|
|
180
177
|
initialize_scales(initialize_scales, df)
|
181
178
|
|
182
|
-
if @logistic_floor && !df.
|
179
|
+
if @logistic_floor && !df.include?("floor")
|
183
180
|
raise ArgumentError, "Expected column \"floor\"."
|
184
181
|
else
|
185
182
|
df["floor"] = 0
|
186
183
|
end
|
187
184
|
|
188
185
|
if @growth == "logistic"
|
189
|
-
unless df.
|
186
|
+
unless df.include?("cap")
|
190
187
|
raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
|
191
188
|
end
|
192
|
-
if df
|
189
|
+
if df[df["cap"] <= df["floor"]].size > 0
|
193
190
|
raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
|
194
191
|
end
|
195
|
-
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
|
192
|
+
df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f
|
196
193
|
end
|
197
194
|
|
198
195
|
df["t"] = (df["ds"] - @start) / @t_scale.to_f
|
199
|
-
if df.
|
200
|
-
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
|
196
|
+
if df.include?("y")
|
197
|
+
df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f
|
201
198
|
end
|
202
199
|
|
203
200
|
@extra_regressors.each do |name, props|
|
204
|
-
df[name] = (
|
201
|
+
df[name] = (df[name] - props[:mu]) / props[:std].to_f
|
205
202
|
end
|
206
203
|
|
207
204
|
df
|
@@ -237,21 +234,21 @@ module Prophet
|
|
237
234
|
if @n_changepoints > 0
|
238
235
|
step = (hist_size - 1) / @n_changepoints.to_f
|
239
236
|
cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
|
240
|
-
@changepoints =
|
237
|
+
@changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1)
|
241
238
|
else
|
242
239
|
@changepoints = []
|
243
240
|
end
|
244
241
|
end
|
245
242
|
|
246
243
|
if @changepoints.size > 0
|
247
|
-
@changepoints_t = (
|
244
|
+
@changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f
|
248
245
|
else
|
249
246
|
@changepoints_t = Numo::NArray.asarray([0])
|
250
247
|
end
|
251
248
|
end
|
252
249
|
|
253
250
|
def fourier_series(dates, period, series_order)
|
254
|
-
t =
|
251
|
+
t = dates.map(&:to_i).to_numo / (3600 * 24.0)
|
255
252
|
|
256
253
|
# no need for column_stack
|
257
254
|
series_order.times.flat_map do |i|
|
@@ -263,11 +260,11 @@ module Prophet
|
|
263
260
|
|
264
261
|
def make_seasonality_features(dates, period, series_order, prefix)
|
265
262
|
features = fourier_series(dates, period, series_order)
|
266
|
-
|
263
|
+
Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
|
267
264
|
end
|
268
265
|
|
269
266
|
def construct_holiday_dataframe(dates)
|
270
|
-
all_holidays =
|
267
|
+
all_holidays = Rover::DataFrame.new
|
271
268
|
if @holidays
|
272
269
|
all_holidays = @holidays.dup
|
273
270
|
end
|
@@ -279,12 +276,12 @@ module Prophet
|
|
279
276
|
# Drop future holidays not previously seen in training data
|
280
277
|
if @train_holiday_names
|
281
278
|
# Remove holiday names didn't show up in fit
|
282
|
-
all_holidays = all_holidays
|
279
|
+
all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)]
|
283
280
|
|
284
281
|
# Add holiday names in fit but not in predict with ds as NA
|
285
|
-
holidays_to_add =
|
286
|
-
"holiday" => @train_holiday_names
|
287
|
-
)
|
282
|
+
holidays_to_add = Rover::DataFrame.new({
|
283
|
+
"holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])]
|
284
|
+
})
|
288
285
|
all_holidays = all_holidays.concat(holidays_to_add)
|
289
286
|
end
|
290
287
|
|
@@ -318,7 +315,7 @@ module Prophet
|
|
318
315
|
|
319
316
|
lw.upto(uw).each do |offset|
|
320
317
|
occurrence = dt ? dt + offset : nil
|
321
|
-
loc = occurrence ? row_index.index(occurrence) : nil
|
318
|
+
loc = occurrence ? row_index.to_a.index(occurrence) : nil
|
322
319
|
key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
|
323
320
|
if loc
|
324
321
|
expanded_holidays[key][loc] = 1.0
|
@@ -327,14 +324,14 @@ module Prophet
|
|
327
324
|
end
|
328
325
|
end
|
329
326
|
end
|
330
|
-
holiday_features =
|
331
|
-
#
|
332
|
-
holiday_features = holiday_features[
|
333
|
-
prior_scale_list = holiday_features.
|
327
|
+
holiday_features = Rover::DataFrame.new(expanded_holidays)
|
328
|
+
# Make sure column order is consistent
|
329
|
+
holiday_features = holiday_features[holiday_features.vector_names.sort]
|
330
|
+
prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] }
|
334
331
|
holiday_names = prior_scales.keys
|
335
332
|
# Store holiday names used in fit
|
336
|
-
if
|
337
|
-
@train_holiday_names =
|
333
|
+
if @train_holiday_names.nil?
|
334
|
+
@train_holiday_names = Rover::Vector.new(holiday_names)
|
338
335
|
end
|
339
336
|
[holiday_features, prior_scale_list, holiday_names]
|
340
337
|
end
|
@@ -432,16 +429,16 @@ module Prophet
|
|
432
429
|
modes[@seasonality_mode].concat(holiday_names)
|
433
430
|
end
|
434
431
|
|
435
|
-
#
|
432
|
+
# Additional regressors
|
436
433
|
@extra_regressors.each do |name, props|
|
437
|
-
seasonal_features << df[name]
|
434
|
+
seasonal_features << Rover::DataFrame.new({name => df[name]})
|
438
435
|
prior_scales << props[:prior_scale]
|
439
436
|
modes[props[:mode]] << name
|
440
437
|
end
|
441
438
|
|
442
|
-
#
|
439
|
+
# Dummy to prevent empty X
|
443
440
|
if seasonal_features.size == 0
|
444
|
-
seasonal_features <<
|
441
|
+
seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]})
|
445
442
|
prior_scales << 1.0
|
446
443
|
end
|
447
444
|
|
@@ -453,16 +450,16 @@ module Prophet
|
|
453
450
|
end
|
454
451
|
|
455
452
|
def regressor_column_matrix(seasonal_features, modes)
|
456
|
-
components =
|
453
|
+
components = Rover::DataFrame.new(
|
457
454
|
"col" => seasonal_features.shape[1].times.to_a,
|
458
|
-
"component" => seasonal_features.
|
455
|
+
"component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] }
|
459
456
|
)
|
460
457
|
|
461
|
-
#
|
458
|
+
# Add total for holidays
|
462
459
|
if @train_holiday_names
|
463
460
|
components = add_group_component(components, "holidays", @train_holiday_names.uniq)
|
464
461
|
end
|
465
|
-
#
|
462
|
+
# Add totals additive and multiplicative components, and regressors
|
466
463
|
["additive", "multiplicative"].each do |mode|
|
467
464
|
components = add_group_component(components, mode + "_terms", modes[mode])
|
468
465
|
regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
|
@@ -473,20 +470,15 @@ module Prophet
|
|
473
470
|
modes[mode] << mode + "_terms"
|
474
471
|
modes[mode] << "extra_regressors_" + mode
|
475
472
|
end
|
476
|
-
#
|
473
|
+
# After all of the additive/multiplicative groups have been added,
|
477
474
|
modes[@seasonality_mode] << "holidays"
|
478
|
-
#
|
479
|
-
component_cols =
|
480
|
-
|
481
|
-
)
|
482
|
-
component_cols.each_vector do |v|
|
483
|
-
v.map! { |vi| vi.nil? ? 0 : vi }
|
484
|
-
end
|
485
|
-
component_cols.rename_vectors(:_id => "col")
|
475
|
+
# Convert to a binary matrix
|
476
|
+
component_cols = components["col"].crosstab(components["component"])
|
477
|
+
component_cols["col"] = component_cols.delete("_")
|
486
478
|
|
487
479
|
# Add columns for additive and multiplicative terms, if missing
|
488
480
|
["additive_terms", "multiplicative_terms"].each do |name|
|
489
|
-
component_cols[name] = 0 unless component_cols.
|
481
|
+
component_cols[name] = 0 unless component_cols.include?(name)
|
490
482
|
end
|
491
483
|
|
492
484
|
# TODO validation
|
@@ -495,10 +487,10 @@ module Prophet
|
|
495
487
|
end
|
496
488
|
|
497
489
|
def add_group_component(components, name, group)
|
498
|
-
new_comp = components
|
490
|
+
new_comp = components[components["component"].in?(group)].dup
|
499
491
|
group_cols = new_comp["col"].uniq
|
500
492
|
if group_cols.size > 0
|
501
|
-
new_comp =
|
493
|
+
new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name})
|
502
494
|
components = components.concat(new_comp)
|
503
495
|
end
|
504
496
|
components
|
@@ -574,8 +566,8 @@ module Prophet
|
|
574
566
|
end
|
575
567
|
|
576
568
|
def linear_growth_init(df)
|
577
|
-
i0 =
|
578
|
-
i1 = df
|
569
|
+
i0 = 0
|
570
|
+
i1 = df.size - 1
|
579
571
|
t = df["t"][i1] - df["t"][i0]
|
580
572
|
k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
|
581
573
|
m = df["y_scaled"][i0] - k * df["t"][i0]
|
@@ -583,8 +575,8 @@ module Prophet
|
|
583
575
|
end
|
584
576
|
|
585
577
|
def logistic_growth_init(df)
|
586
|
-
i0 =
|
587
|
-
i1 = df
|
578
|
+
i0 = 0
|
579
|
+
i1 = df.size - 1
|
588
580
|
t = df["t"][i1] - df["t"][i0]
|
589
581
|
|
590
582
|
# Force valid values, in case y > cap or y < 0
|
@@ -613,8 +605,13 @@ module Prophet
|
|
613
605
|
def fit(df, **kwargs)
|
614
606
|
raise Error, "Prophet object can only be fit once" if @history
|
615
607
|
|
616
|
-
|
617
|
-
|
608
|
+
if defined?(Daru::DataFrame) && df.is_a?(Daru::DataFrame)
|
609
|
+
df = Rover::DataFrame.new(df.to_h)
|
610
|
+
end
|
611
|
+
raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame)
|
612
|
+
|
613
|
+
history = df[!df["y"].missing]
|
614
|
+
raise Error, "Data has less than 2 non-nil rows" if history.size < 2
|
618
615
|
|
619
616
|
@history_dates = to_datetime(df["ds"]).sort
|
620
617
|
history = setup_dataframe(history, initialize_scales: true)
|
@@ -701,10 +698,10 @@ module Prophet
|
|
701
698
|
|
702
699
|
# Drop columns except ds, cap, floor, and trend
|
703
700
|
cols = ["ds", "trend"]
|
704
|
-
cols << "cap" if df.
|
701
|
+
cols << "cap" if df.include?("cap")
|
705
702
|
cols << "floor" if @logistic_floor
|
706
703
|
# Add in forecast components
|
707
|
-
df2 = df_concat_axis_one([df[
|
704
|
+
df2 = df_concat_axis_one([df[cols], intervals, seasonal_components])
|
708
705
|
df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
|
709
706
|
df2
|
710
707
|
end
|
@@ -739,8 +736,7 @@ module Prophet
|
|
739
736
|
k_t[indx] += deltas[s]
|
740
737
|
m_t[indx] += gammas[s]
|
741
738
|
end
|
742
|
-
|
743
|
-
df_values(cap) / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
|
739
|
+
cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
|
744
740
|
end
|
745
741
|
|
746
742
|
def predict_trend(df)
|
@@ -766,10 +762,10 @@ module Prophet
|
|
766
762
|
upper_p = 100 * (1.0 + @interval_width) / 2
|
767
763
|
end
|
768
764
|
|
769
|
-
x =
|
765
|
+
x = seasonal_features.to_numo
|
770
766
|
data = {}
|
771
|
-
component_cols.
|
772
|
-
beta_c =
|
767
|
+
component_cols.vector_names.each do |component|
|
768
|
+
beta_c = @params["beta"] * component_cols[component].to_numo
|
773
769
|
|
774
770
|
comp = x.dot(beta_c.transpose)
|
775
771
|
if @component_modes["additive"].include?(component)
|
@@ -777,11 +773,11 @@ module Prophet
|
|
777
773
|
end
|
778
774
|
data[component] = comp.mean(axis: 1, nan: true)
|
779
775
|
if @uncertainty_samples
|
780
|
-
data[component + "_lower"] = percentile(
|
781
|
-
data[component + "_upper"] = percentile(
|
776
|
+
data[component + "_lower"] = comp.percentile(lower_p, axis: 1)
|
777
|
+
data[component + "_upper"] = comp.percentile(upper_p, axis: 1)
|
782
778
|
end
|
783
779
|
end
|
784
|
-
|
780
|
+
Rover::DataFrame.new(data)
|
785
781
|
end
|
786
782
|
|
787
783
|
def sample_posterior_predictive(df)
|
@@ -792,9 +788,9 @@ module Prophet
|
|
792
788
|
seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
|
793
789
|
|
794
790
|
# convert to Numo for performance
|
795
|
-
seasonal_features =
|
796
|
-
additive_terms =
|
797
|
-
multiplicative_terms =
|
791
|
+
seasonal_features = seasonal_features.to_numo
|
792
|
+
additive_terms = component_cols["additive_terms"].to_numo
|
793
|
+
multiplicative_terms = component_cols["multiplicative_terms"].to_numo
|
798
794
|
|
799
795
|
sim_values = {"yhat" => [], "trend" => []}
|
800
796
|
n_iterations.times do |i|
|
@@ -831,11 +827,11 @@ module Prophet
|
|
831
827
|
|
832
828
|
series = {}
|
833
829
|
["yhat", "trend"].each do |key|
|
834
|
-
series["#{key}_lower"] =
|
835
|
-
series["#{key}_upper"] =
|
830
|
+
series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1)
|
831
|
+
series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1)
|
836
832
|
end
|
837
833
|
|
838
|
-
|
834
|
+
Rover::DataFrame.new(series)
|
839
835
|
end
|
840
836
|
|
841
837
|
def sample_model(df, seasonal_features, iteration, s_a, s_m)
|
@@ -897,21 +893,6 @@ module Prophet
|
|
897
893
|
trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
|
898
894
|
end
|
899
895
|
|
900
|
-
def percentile(a, percentile, axis:)
|
901
|
-
raise Error, "Axis must be 1" if axis != 1
|
902
|
-
|
903
|
-
sorted = a.sort(axis: axis)
|
904
|
-
x = percentile / 100.0 * (sorted.shape[axis] - 1)
|
905
|
-
r = x % 1
|
906
|
-
i = x.floor
|
907
|
-
# this should use axis, but we only need axis: 1
|
908
|
-
if i == sorted.shape[axis] - 1
|
909
|
-
sorted[true, -1]
|
910
|
-
else
|
911
|
-
sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
|
912
|
-
end
|
913
|
-
end
|
914
|
-
|
915
896
|
def make_future_dataframe(periods:, freq: "D", include_history: true)
|
916
897
|
raise Error, "Model has not been fit" unless @history_dates
|
917
898
|
last_date = @history_dates.max
|
@@ -938,48 +919,39 @@ module Prophet
|
|
938
919
|
end
|
939
920
|
dates.select! { |d| d > last_date }
|
940
921
|
dates = dates.last(periods)
|
941
|
-
dates = @history_dates
|
942
|
-
|
922
|
+
dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history
|
923
|
+
Rover::DataFrame.new({"ds" => dates})
|
943
924
|
end
|
944
925
|
|
945
926
|
private
|
946
927
|
|
947
|
-
# Time is
|
928
|
+
# Time is preferred over DateTime in Ruby docs
|
948
929
|
# use UTC to be consistent with Python
|
949
930
|
# and so days have equal length (no DST)
|
950
931
|
def to_datetime(vec)
|
951
932
|
return if vec.nil?
|
952
|
-
vec
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
933
|
+
vec =
|
934
|
+
vec.map do |v|
|
935
|
+
case v
|
936
|
+
when Time
|
937
|
+
v.utc
|
938
|
+
when Date
|
939
|
+
v.to_datetime.to_time.utc
|
940
|
+
else
|
941
|
+
DateTime.parse(v.to_s).to_time.utc
|
942
|
+
end
|
960
943
|
end
|
961
|
-
|
944
|
+
Rover::Vector.new(vec)
|
962
945
|
end
|
963
946
|
|
964
947
|
# okay to do in-place
|
965
948
|
def df_concat_axis_one(dfs)
|
966
949
|
dfs[1..-1].each do |df|
|
967
|
-
df
|
968
|
-
dfs[0][k] = v
|
969
|
-
end
|
950
|
+
dfs[0].merge!(df)
|
970
951
|
end
|
971
952
|
dfs[0]
|
972
953
|
end
|
973
954
|
|
974
|
-
def df_values(df)
|
975
|
-
if df.is_a?(Daru::Vector)
|
976
|
-
Numo::NArray.asarray(df.to_a)
|
977
|
-
else
|
978
|
-
# TODO make more performant
|
979
|
-
Numo::NArray.asarray(df.to_matrix.to_a)
|
980
|
-
end
|
981
|
-
end
|
982
|
-
|
983
955
|
# https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
|
984
956
|
def poisson(lam)
|
985
957
|
l = Math.exp(-lam)
|
@@ -994,13 +966,8 @@ module Prophet
|
|
994
966
|
|
995
967
|
# https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
|
996
968
|
def laplace(loc, scale, size)
|
997
|
-
u = Numo::DFloat.new(size).rand
|
969
|
+
u = Numo::DFloat.new(size).rand(-0.5, 0.5)
|
998
970
|
loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
|
999
971
|
end
|
1000
|
-
|
1001
|
-
def ensure_arr(value)
|
1002
|
-
value = [value] unless value.is_a?(Array)
|
1003
|
-
value
|
1004
|
-
end
|
1005
972
|
end
|
1006
973
|
end
|
data/lib/prophet/holidays.rb
CHANGED
@@ -6,7 +6,7 @@ module Prophet
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def make_holidays_df(year_list, country)
|
9
|
-
holidays_df
|
9
|
+
holidays_df[(holidays_df["country"] == country) & (holidays_df["year"].in?(year_list))][["ds", "holiday"]]
|
10
10
|
end
|
11
11
|
|
12
12
|
# TODO marshal on installation
|
@@ -20,7 +20,7 @@ module Prophet
|
|
20
20
|
holidays["country"] << row["country"]
|
21
21
|
holidays["year"] << row["year"]
|
22
22
|
end
|
23
|
-
|
23
|
+
Rover::DataFrame.new(holidays)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
data/lib/prophet/plot.rb
CHANGED
@@ -8,16 +8,16 @@ module Prophet
|
|
8
8
|
fig = ax.get_figure
|
9
9
|
end
|
10
10
|
fcst_t = to_pydatetime(fcst["ds"])
|
11
|
-
ax.plot(to_pydatetime(@history["ds"]), @history["y"].
|
12
|
-
ax.plot(fcst_t, fcst["yhat"].
|
13
|
-
if fcst.
|
14
|
-
ax.plot(fcst_t, fcst["cap"].
|
11
|
+
ax.plot(to_pydatetime(@history["ds"]), @history["y"].to_a, "k.")
|
12
|
+
ax.plot(fcst_t, fcst["yhat"].to_a, ls: "-", c: "#0072B2")
|
13
|
+
if fcst.include?("cap") && plot_cap
|
14
|
+
ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
|
15
15
|
end
|
16
|
-
if @logistic_floor && fcst.
|
17
|
-
ax.plot(fcst_t, fcst["floor"].
|
16
|
+
if @logistic_floor && fcst.include?("floor") && plot_cap
|
17
|
+
ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
|
18
18
|
end
|
19
19
|
if uncertainty && @uncertainty_samples
|
20
|
-
ax.fill_between(fcst_t, fcst["yhat_lower"].
|
20
|
+
ax.fill_between(fcst_t, fcst["yhat_lower"].to_a, fcst["yhat_upper"].to_a, color: "#0072B2", alpha: 0.2)
|
21
21
|
end
|
22
22
|
# Specify formatting to workaround matplotlib issue #12925
|
23
23
|
locator = dates.AutoDateLocator.new(interval_multiples: false)
|
@@ -33,25 +33,25 @@ module Prophet
|
|
33
33
|
|
34
34
|
def plot_components(fcst, uncertainty: true, plot_cap: true, weekly_start: 0, yearly_start: 0, figsize: nil)
|
35
35
|
components = ["trend"]
|
36
|
-
if @train_holiday_names && fcst.
|
36
|
+
if @train_holiday_names && fcst.include?("holidays")
|
37
37
|
components << "holidays"
|
38
38
|
end
|
39
39
|
# Plot weekly seasonality, if present
|
40
|
-
if @seasonalities["weekly"] && fcst.
|
40
|
+
if @seasonalities["weekly"] && fcst.include?("weekly")
|
41
41
|
components << "weekly"
|
42
42
|
end
|
43
43
|
# Yearly if present
|
44
|
-
if @seasonalities["yearly"] && fcst.
|
44
|
+
if @seasonalities["yearly"] && fcst.include?("yearly")
|
45
45
|
components << "yearly"
|
46
46
|
end
|
47
47
|
# Other seasonalities
|
48
|
-
components.concat(@seasonalities.keys.select { |name| fcst.
|
48
|
+
components.concat(@seasonalities.keys.select { |name| fcst.include?(name) && !["weekly", "yearly"].include?(name) }.sort)
|
49
49
|
regressors = {"additive" => false, "multiplicative" => false}
|
50
50
|
@extra_regressors.each do |name, props|
|
51
51
|
regressors[props[:mode]] = true
|
52
52
|
end
|
53
53
|
["additive", "multiplicative"].each do |mode|
|
54
|
-
if regressors[mode] && fcst.
|
54
|
+
if regressors[mode] && fcst.include?("extra_regressors_#{mode}")
|
55
55
|
components << "extra_regressors_#{mode}"
|
56
56
|
end
|
57
57
|
end
|
@@ -97,11 +97,11 @@ module Prophet
|
|
97
97
|
def add_changepoints_to_plot(ax, fcst, threshold: 0.01, cp_color: "r", cp_linestyle: "--", trend: true)
|
98
98
|
artists = []
|
99
99
|
if trend
|
100
|
-
artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].
|
100
|
+
artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].to_a, c: cp_color)
|
101
101
|
end
|
102
102
|
signif_changepoints =
|
103
103
|
if @changepoints.size > 0
|
104
|
-
(@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints)
|
104
|
+
(@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints.to_numo)
|
105
105
|
else
|
106
106
|
[]
|
107
107
|
end
|
@@ -120,15 +120,15 @@ module Prophet
|
|
120
120
|
ax = fig.add_subplot(111)
|
121
121
|
end
|
122
122
|
fcst_t = to_pydatetime(fcst["ds"])
|
123
|
-
artists += ax.plot(fcst_t, fcst[name].
|
124
|
-
if fcst.
|
125
|
-
artists += ax.plot(fcst_t, fcst["cap"].
|
123
|
+
artists += ax.plot(fcst_t, fcst[name].to_a, ls: "-", c: "#0072B2")
|
124
|
+
if fcst.include?("cap") && plot_cap
|
125
|
+
artists += ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
|
126
126
|
end
|
127
|
-
if @logistic_floor && fcst.
|
128
|
-
ax.plot(fcst_t, fcst["floor"].
|
127
|
+
if @logistic_floor && fcst.include?("floor") && plot_cap
|
128
|
+
ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
|
129
129
|
end
|
130
130
|
if uncertainty && @uncertainty_samples
|
131
|
-
artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].
|
131
|
+
artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].to_a, fcst[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
132
132
|
end
|
133
133
|
# Specify formatting to workaround matplotlib issue #12925
|
134
134
|
locator = dates.AutoDateLocator.new(interval_multiples: false)
|
@@ -145,17 +145,17 @@ module Prophet
|
|
145
145
|
end
|
146
146
|
|
147
147
|
def seasonality_plot_df(ds)
|
148
|
-
df_dict = {"ds" => ds, "cap" =>
|
148
|
+
df_dict = {"ds" => ds, "cap" => 1.0, "floor" => 0.0}
|
149
149
|
@extra_regressors.each_key do |name|
|
150
|
-
df_dict[name] =
|
150
|
+
df_dict[name] = 0.0
|
151
151
|
end
|
152
152
|
# Activate all conditional seasonality columns
|
153
153
|
@seasonalities.values.each do |props|
|
154
154
|
if props[:condition_name]
|
155
|
-
df_dict[props[:condition_name]] =
|
155
|
+
df_dict[props[:condition_name]] = true
|
156
156
|
end
|
157
157
|
end
|
158
|
-
df =
|
158
|
+
df = Rover::DataFrame.new(df_dict)
|
159
159
|
df = setup_dataframe(df)
|
160
160
|
df
|
161
161
|
end
|
@@ -172,9 +172,9 @@ module Prophet
|
|
172
172
|
df_w = seasonality_plot_df(days)
|
173
173
|
seas = predict_seasonal_components(df_w)
|
174
174
|
days = days.map { |v| v.strftime("%A") }
|
175
|
-
artists += ax.plot(days.size.times.to_a, seas[name].
|
175
|
+
artists += ax.plot(days.size.times.to_a, seas[name].to_a, ls: "-", c: "#0072B2")
|
176
176
|
if uncertainty && @uncertainty_samples
|
177
|
-
artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].
|
177
|
+
artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
178
178
|
end
|
179
179
|
ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
|
180
180
|
ax.set_xticks(days.size.times.to_a)
|
@@ -198,9 +198,9 @@ module Prophet
|
|
198
198
|
days = 365.times.map { |i| start + i + yearly_start }
|
199
199
|
df_y = seasonality_plot_df(days)
|
200
200
|
seas = predict_seasonal_components(df_y)
|
201
|
-
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].
|
201
|
+
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
|
202
202
|
if uncertainty && @uncertainty_samples
|
203
|
-
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].
|
203
|
+
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
204
204
|
end
|
205
205
|
ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
|
206
206
|
months = dates.MonthLocator.new((1..12).to_a, bymonthday: 1, interval: 2)
|
@@ -231,9 +231,9 @@ module Prophet
|
|
231
231
|
days = plot_points.times.map { |i| Time.at(start + i * step).utc }
|
232
232
|
df_y = seasonality_plot_df(days)
|
233
233
|
seas = predict_seasonal_components(df_y)
|
234
|
-
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].
|
234
|
+
artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
|
235
235
|
if uncertainty && @uncertainty_samples
|
236
|
-
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].
|
236
|
+
artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
|
237
237
|
end
|
238
238
|
ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
|
239
239
|
step = (finish - start) / (7 - 1).to_f
|
@@ -281,7 +281,7 @@ module Prophet
|
|
281
281
|
|
282
282
|
def to_pydatetime(v)
|
283
283
|
datetime = PyCall.import_module("datetime")
|
284
|
-
v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }
|
284
|
+
v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }.to_a
|
285
285
|
end
|
286
286
|
end
|
287
287
|
end
|
data/lib/prophet/stan_backend.rb
CHANGED
@@ -127,7 +127,7 @@ module Prophet
|
|
127
127
|
stan_data["t_change"] = stan_data["t_change"].to_a
|
128
128
|
stan_data["s_a"] = stan_data["s_a"].to_a
|
129
129
|
stan_data["s_m"] = stan_data["s_m"].to_a
|
130
|
-
stan_data["X"] = stan_data["X"].
|
130
|
+
stan_data["X"] = stan_data["X"].to_numo.to_a
|
131
131
|
stan_init["delta"] = stan_init["delta"].to_a
|
132
132
|
stan_init["beta"] = stan_init["beta"].to_a
|
133
133
|
[stan_init, stan_data]
|
data/lib/prophet/version.rb
CHANGED
data/lib/prophet.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prophet-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdstan
|
@@ -25,21 +25,21 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.1.2
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: numo-narray
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 0.9.1.7
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.9.1.7
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: rover-df
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '5'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: daru
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: matplotlib
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - ">="
|