prophet-rb 0.1.1 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e97133837196d4e1c97d69687d42f2d97e552d7be897f5e7a805efb5bab73e32
4
- data.tar.gz: 4450d57d2c3da8632011f9f5a802891586b3e19347abe377e533ba5e8922708f
3
+ metadata.gz: 506ab7cfb738d7f7289db812134b2fb64d6371da66e7586a1bcc254b26c6fa1c
4
+ data.tar.gz: ed97303bb3563bdebe86cf9865ec04142e9b3fa0e3f7bfdea8ae6a8a2ff8570f
5
5
  SHA512:
6
- metadata.gz: 6f0ed88d1a93d2f15e9750640833dbd889d8dea86255c8ec29c0fdc608ce27d17a0f617cbcaaee0be4b469b8e945f0ead9161875907a44a0555173e0f1a2c984
7
- data.tar.gz: 485b4742b5267a8540445a87d59320a6ba5cc5589192369d22d69bfc1002d1ae2cb822a88a547ab63ff113e44b5ba47db51c45acb4bedc84079afd57210ea4ed
6
+ metadata.gz: 5339ac3f8e7f26539137dc23b40d481013bc1ec082a008221edff00ea574ab08617956849e7cd221af40e9b12105630cbe294f8b5e2cfa1a33670d9c1fcd970c
7
+ data.tar.gz: 63b64fbef8414f65dfb39b7c266e780790f28b42cc6f2a0da415885b293c57ba309d50243e9d9ad2d61d93fe034017a31871cef26eed5e33b01cc651816eed3c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,24 @@
1
+ ## 0.2.4 (2021-04-02)
2
+
3
+ - Added support for flat growth
4
+
5
+ ## 0.2.3 (2020-10-14)
6
+
7
+ - Added support for times to `forecast` method
8
+
9
+ ## 0.2.2 (2020-07-26)
10
+
11
+ - Fixed error with constant series
12
+ - Fixed error with no changepoints
13
+
14
+ ## 0.2.1 (2020-07-15)
15
+
16
+ - Added `forecast` method
17
+
18
+ ## 0.2.0 (2020-05-13)
19
+
20
+ - Switched from Daru to Rover
21
+
1
22
  ## 0.1.1 (2020-04-10)
2
23
 
3
24
  - Added `add_changepoints_to_plot`
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2020 Andrew Kane
4
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
+ Copyright (c) 2020 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining
7
7
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -10,7 +10,7 @@ Supports:
10
10
 
11
11
  And gracefully handles missing data
12
12
 
13
- [![Build Status](https://travis-ci.org/ankane/prophet.svg?branch=master)](https://travis-ci.org/ankane/prophet)
13
+ [![Build Status](https://github.com/ankane/prophet/workflows/build/badge.svg?branch=master)](https://github.com/ankane/prophet/actions)
14
14
 
15
15
  ## Installation
16
16
 
@@ -20,19 +20,47 @@ Add this line to your application’s Gemfile:
20
20
  gem 'prophet-rb'
21
21
  ```
22
22
 
23
- ## Documentation
23
+ ## Simple API
24
24
 
25
- Check out the [Prophet documentation](https://facebook.github.io/prophet/docs/quick_start.html) for a great explanation of all of the features. The Ruby API follows the Python API and supports the same features.
25
+ Get future predictions for a time series
26
26
 
27
- ## Quick Start
27
+ ```ruby
28
+ series = {
29
+ Date.parse("2020-01-01") => 100,
30
+ Date.parse("2020-01-02") => 150,
31
+ Date.parse("2020-01-03") => 136,
32
+ # ...
33
+ }
34
+
35
+ Prophet.forecast(series)
36
+ ```
37
+
38
+ Specify the number of predictions to return
39
+
40
+ ```ruby
41
+ Prophet.forecast(series, count: 3)
42
+ ```
43
+
44
+ Works great with [Groupdate](https://github.com/ankane/groupdate)
45
+
46
+ ```ruby
47
+ series = User.group_by_day(:created_at).count
48
+ Prophet.forecast(series)
49
+ ```
50
+
51
+ ## Advanced API
52
+
53
+ Check out the [Prophet documentation](https://facebook.github.io/prophet/docs/quick_start.html) for a great explanation of all of the features. The advanced API follows the Python API and supports the same features. It uses [Rover](https://github.com/ankane/rover) for data frames.
54
+
55
+ ## Advanced Quick Start
28
56
 
29
57
  [Explanation](https://facebook.github.io/prophet/docs/quick_start.html)
30
58
 
31
59
  Create a data frame with `ds` and `y` columns - here’s [an example](examples/example_wp_log_peyton_manning.csv) you can use
32
60
 
33
61
  ```ruby
34
- df = Daru::DataFrame.from_csv("example_wp_log_peyton_manning.csv")
35
- df.head(5)
62
+ df = Rover.read_csv("example_wp_log_peyton_manning.csv")
63
+ df.head
36
64
  ```
37
65
 
38
66
  ds | y
@@ -54,7 +82,7 @@ Make a data frame with a `ds` column for future predictions
54
82
 
55
83
  ```ruby
56
84
  future = m.make_future_dataframe(periods: 365)
57
- future.tail(5)
85
+ future.tail
58
86
  ```
59
87
 
60
88
  ds |
@@ -69,7 +97,7 @@ Make predictions
69
97
 
70
98
  ```ruby
71
99
  forecast = m.predict(future)
72
- forecast["ds", "yhat", "yhat_lower", "yhat_upper"].tail(5)
100
+ forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail
73
101
  ```
74
102
 
75
103
  ds | yhat | yhat_lower | yhat_upper
@@ -90,7 +118,7 @@ Plot the forecast
90
118
  m.plot(forecast).savefig("forecast.png")
91
119
  ```
92
120
 
93
- ![Forecast](https://blazer.dokkuapp.com/assets/prophet/forecast-a9d43195b8ad23703eda7bb8b52b8a758efb4699e2313f32d7bbdfaa2f4275f6.png)
121
+ ![Forecast](https://blazer.dokkuapp.com/assets/prophet/forecast-77cf453fda67d1b462c6c22aee3a02572203b71c4517fedecc1f438cd374a876.png)
94
122
 
95
123
  Plot components
96
124
 
@@ -98,7 +126,7 @@ Plot components
98
126
  m.plot_components(forecast).savefig("components.png")
99
127
  ```
100
128
 
101
- ![Components](https://blazer.dokkuapp.com/assets/prophet/components-b9e31bfcf77e57bbd503c0bcff5e5544e66085b90709b06dd96c5f622a87d84f.png)
129
+ ![Components](https://blazer.dokkuapp.com/assets/prophet/components-2cdd260e23bc89824ecca25f6bfe394deb5821d60b7e0e551469c90d204acd67.png)
102
130
 
103
131
  ## Saturating Forecasts
104
132
 
@@ -107,7 +135,7 @@ m.plot_components(forecast).savefig("components.png")
107
135
  Forecast logistic growth instead of linear
108
136
 
109
137
  ```ruby
110
- df = Daru::DataFrame.from_csv("example_wp_log_R.csv")
138
+ df = Rover.read_csv("example_wp_log_R.csv")
111
139
  df["cap"] = 8.5
112
140
  m = Prophet.new(growth: "logistic")
113
141
  m.fit(df)
@@ -146,21 +174,21 @@ m = Prophet.new(changepoints: ["2014-01-01"])
146
174
  Create a data frame with `holiday` and `ds` columns. Include all occurrences in your past data and future occurrences you’d like to forecast.
147
175
 
148
176
  ```ruby
149
- playoffs = Daru::DataFrame.new(
150
- "holiday" => ["playoff"] * 14,
177
+ playoffs = Rover::DataFrame.new(
178
+ "holiday" => "playoff",
151
179
  "ds" => ["2008-01-13", "2009-01-03", "2010-01-16",
152
180
  "2010-01-24", "2010-02-07", "2011-01-08",
153
181
  "2013-01-12", "2014-01-12", "2014-01-19",
154
182
  "2014-02-02", "2015-01-11", "2016-01-17",
155
183
  "2016-01-24", "2016-02-07"],
156
- "lower_window" => [0] * 14,
157
- "upper_window" => [1] * 14
184
+ "lower_window" => 0,
185
+ "upper_window" => 1
158
186
  )
159
- superbowls = Daru::DataFrame.new(
160
- "holiday" => ["superbowl"] * 3,
187
+ superbowls = Rover::DataFrame.new(
188
+ "holiday" => "superbowl",
161
189
  "ds" => ["2010-02-07", "2014-02-02", "2016-02-07"],
162
- "lower_window" => [0] * 3,
163
- "upper_window" => [1] * 3
190
+ "lower_window" => 0,
191
+ "upper_window" => 1
164
192
  )
165
193
  holidays = playoffs.concat(superbowls)
166
194
 
@@ -208,7 +236,7 @@ forecast = m.predict(future)
208
236
  [Explanation](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html)
209
237
 
210
238
  ```ruby
211
- df = Daru::DataFrame.from_csv("example_air_passengers.csv")
239
+ df = Rover.read_csv("example_air_passengers.csv")
212
240
  m = Prophet.new(seasonality_mode: "multiplicative")
213
241
  m.fit(df)
214
242
  future = m.make_future_dataframe(periods: 50, freq: "MS")
@@ -236,7 +264,7 @@ Prophet.new(mcmc_samples: 300)
236
264
  Sub-daily data
237
265
 
238
266
  ```ruby
239
- df = Daru::DataFrame.from_csv("example_yosemite_temps.csv")
267
+ df = Rover.read_csv("example_yosemite_temps.csv")
240
268
  m = Prophet.new(changepoint_prior_scale: 0.01).fit(df)
241
269
  future = m.make_future_dataframe(periods: 300, freq: "H")
242
270
  forecast = m.predict(future)
@@ -246,6 +274,15 @@ forecast = m.predict(future)
246
274
 
247
275
  - [Forecasting at Scale](https://peerj.com/preprints/3190.pdf)
248
276
 
277
+ ## Upgrading
278
+
279
+ ### 0.2.0
280
+
281
+ Prophet now uses [Rover](https://github.com/ankane/rover) instead of Daru. Two changes you may need to make are:
282
+
283
+ - `Rover.read_csv` instead of `Daru::DataFrame.from_csv`
284
+ - `df[["ds", "yhat"]]` instead of `df["ds", "yhat"]`
285
+
249
286
  ## Credits
250
287
 
251
288
  This library was ported from the [Prophet Python library](https://github.com/facebook/prophet) and is available under the same license.
data/lib/prophet.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # dependencies
2
2
  require "cmdstan"
3
- require "daru"
3
+ require "rover"
4
4
  require "numo/narray"
5
5
 
6
6
  # stdlib
@@ -20,4 +20,67 @@ module Prophet
20
20
  def self.new(**kwargs)
21
21
  Forecaster.new(**kwargs)
22
22
  end
23
+
24
+ def self.forecast(series, count: 10)
25
+ raise ArgumentError, "Series must have at least 10 data points" if series.size < 10
26
+
27
+ # check type to determine output format
28
+ # check for before converting to time
29
+ keys = series.keys
30
+ dates = keys.all? { |k| k.is_a?(Date) }
31
+ time_zone = keys.first.time_zone if keys.first.respond_to?(:time_zone)
32
+ utc = keys.first.utc? if keys.first.respond_to?(:utc?)
33
+ times = keys.map(&:to_time)
34
+
35
+ day = times.all? { |t| t.hour == 0 && t.min == 0 && t.sec == 0 && t.nsec == 0 }
36
+ week = day && times.map { |k| k.wday }.uniq.size == 1
37
+ month = day && times.all? { |k| k.day == 1 }
38
+ quarter = month && times.all? { |k| k.month % 3 == 1 }
39
+ year = quarter && times.all? { |k| k.month == 1 }
40
+
41
+ freq =
42
+ if year
43
+ "YS"
44
+ elsif quarter
45
+ "QS"
46
+ elsif month
47
+ "MS"
48
+ elsif week
49
+ "W"
50
+ elsif day
51
+ "D"
52
+ else
53
+ diff = Rover::Vector.new(times).sort.diff.to_numo[1..-1]
54
+ min_diff = diff.min.to_i
55
+
56
+ # could be another common divisor
57
+ # but keep it simple for now
58
+ raise "Unknown frequency" unless (diff % min_diff).eq(0).all?
59
+
60
+ "#{min_diff}S"
61
+ end
62
+
63
+ # use series, not times, so dates are handled correctly
64
+ df = Rover::DataFrame.new({"ds" => series.keys, "y" => series.values})
65
+
66
+ m = Prophet.new
67
+ m.logger.level = ::Logger::FATAL # no logging
68
+ m.fit(df)
69
+
70
+ future = m.make_future_dataframe(periods: count, include_history: false, freq: freq)
71
+ forecast = m.predict(future)
72
+ result = forecast[["ds", "yhat"]].to_a
73
+
74
+ # use the same format as input
75
+ if dates
76
+ result.each { |v| v["ds"] = v["ds"].to_date }
77
+ elsif time_zone
78
+ result.each { |v| v["ds"] = v["ds"].in_time_zone(time_zone) }
79
+ elsif utc
80
+ result.each { |v| v["ds"] = v["ds"].utc }
81
+ else
82
+ result.each { |v| v["ds"] = v["ds"].localtime }
83
+ end
84
+ result.map { |v| [v["ds"], v["yhat"]] }.to_h
85
+ end
23
86
  end
@@ -75,19 +75,19 @@ module Prophet
75
75
  end
76
76
 
77
77
  def validate_inputs
78
- if !["linear", "logistic"].include?(@growth)
79
- raise ArgumentError, "Parameter \"growth\" should be \"linear\" or \"logistic\"."
78
+ if !["linear", "logistic", "flat"].include?(@growth)
79
+ raise ArgumentError, "Parameter \"growth\" should be \"linear\", \"logistic\", or \"flat\"."
80
80
  end
81
81
  if @changepoint_range < 0 || @changepoint_range > 1
82
82
  raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
83
83
  end
84
84
  if @holidays
85
- if !@holidays.is_a?(Daru::DataFrame) && @holidays.vectors.include?("ds") && @holidays.vectors.include?("holiday")
85
+ if !@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")
86
86
  raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
87
87
  end
88
88
  @holidays["ds"] = to_datetime(@holidays["ds"])
89
- has_lower = @holidays.vectors.include?("lower_window")
90
- has_upper = @holidays.vectors.include?("upper_window")
89
+ has_lower = @holidays.include?("lower_window")
90
+ has_upper = @holidays.include?("upper_window")
91
91
  if has_lower ^ has_upper # xor
92
92
  raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
93
93
  end
@@ -141,7 +141,7 @@ module Prophet
141
141
  end
142
142
 
143
143
  def setup_dataframe(df, initialize_scales: false)
144
- if df.vectors.include?("y")
144
+ if df.include?("y")
145
145
  df["y"] = df["y"].map(&:to_f)
146
146
  raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
147
147
  end
@@ -152,7 +152,7 @@ module Prophet
152
152
  raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
153
153
 
154
154
  @extra_regressors.each_key do |name|
155
- if !df.vectors.include?(name)
155
+ if !df.include?(name)
156
156
  raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
157
157
  end
158
158
  df[name] = df[name].map(&:to_f)
@@ -163,7 +163,7 @@ module Prophet
163
163
  @seasonalities.values.each do |props|
164
164
  condition_name = props[:condition_name]
165
165
  if condition_name
166
- if !df.vectors.include?(condition_name)
166
+ if !df.include?(condition_name)
167
167
  raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
168
168
  end
169
169
  if df.where(!df[condition_name].in([true, false])).any?
@@ -172,36 +172,33 @@ module Prophet
172
172
  end
173
173
  end
174
174
 
175
- if df.index.name == "ds"
176
- df.index.name = nil
177
- end
178
- df = df.sort(["ds"])
175
+ df = df.sort_by { |r| r["ds"] }
179
176
 
180
177
  initialize_scales(initialize_scales, df)
181
178
 
182
- if @logistic_floor && !df.vectors.include?("floor")
179
+ if @logistic_floor && !df.include?("floor")
183
180
  raise ArgumentError, "Expected column \"floor\"."
184
181
  else
185
182
  df["floor"] = 0
186
183
  end
187
184
 
188
185
  if @growth == "logistic"
189
- unless df.vectors.include?("cap")
186
+ unless df.include?("cap")
190
187
  raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
191
188
  end
192
- if df.where(df["cap"] <= df["floor"]).size > 0
189
+ if df[df["cap"] <= df["floor"]].size > 0
193
190
  raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
194
191
  end
195
- df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
192
+ df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f
196
193
  end
197
194
 
198
195
  df["t"] = (df["ds"] - @start) / @t_scale.to_f
199
- if df.vectors.include?("y")
200
- df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
196
+ if df.include?("y")
197
+ df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f
201
198
  end
202
199
 
203
200
  @extra_regressors.each do |name, props|
204
- df[name] = ((df[name] - props[:mu]) / props[:std])
201
+ df[name] = (df[name] - props[:mu]) / props[:std].to_f
205
202
  end
206
203
 
207
204
  df
@@ -237,21 +234,21 @@ module Prophet
237
234
  if @n_changepoints > 0
238
235
  step = (hist_size - 1) / @n_changepoints.to_f
239
236
  cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
240
- @changepoints = ensure_arr(@history["ds"][*cp_indexes].to_a.last(cp_indexes.size - 1))
237
+ @changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1)
241
238
  else
242
239
  @changepoints = []
243
240
  end
244
241
  end
245
242
 
246
243
  if @changepoints.size > 0
247
- @changepoints_t = (Numo::DFloat.cast(@changepoints.map(&:to_i).sort) - @start.to_i) / @t_scale.to_f
244
+ @changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f
248
245
  else
249
246
  @changepoints_t = Numo::NArray.asarray([0])
250
247
  end
251
248
  end
252
249
 
253
250
  def fourier_series(dates, period, series_order)
254
- t = Numo::DFloat.asarray(dates.map(&:to_i)) / (3600 * 24.0)
251
+ t = dates.map(&:to_i).to_numo / (3600 * 24.0)
255
252
 
256
253
  # no need for column_stack
257
254
  series_order.times.flat_map do |i|
@@ -263,11 +260,11 @@ module Prophet
263
260
 
264
261
  def make_seasonality_features(dates, period, series_order, prefix)
265
262
  features = fourier_series(dates, period, series_order)
266
- Daru::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
263
+ Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
267
264
  end
268
265
 
269
266
  def construct_holiday_dataframe(dates)
270
- all_holidays = Daru::DataFrame.new
267
+ all_holidays = Rover::DataFrame.new
271
268
  if @holidays
272
269
  all_holidays = @holidays.dup
273
270
  end
@@ -279,12 +276,12 @@ module Prophet
279
276
  # Drop future holidays not previously seen in training data
280
277
  if @train_holiday_names
281
278
  # Remove holiday names didn't show up in fit
282
- all_holidays = all_holidays.where(all_holidays["holiday"].in(@train_holiday_names))
279
+ all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)]
283
280
 
284
281
  # Add holiday names in fit but not in predict with ds as NA
285
- holidays_to_add = Daru::DataFrame.new(
286
- "holiday" => @train_holiday_names.where(!@train_holiday_names.in(all_holidays["holiday"]))
287
- )
282
+ holidays_to_add = Rover::DataFrame.new({
283
+ "holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])]
284
+ })
288
285
  all_holidays = all_holidays.concat(holidays_to_add)
289
286
  end
290
287
 
@@ -318,7 +315,7 @@ module Prophet
318
315
 
319
316
  lw.upto(uw).each do |offset|
320
317
  occurrence = dt ? dt + offset : nil
321
- loc = occurrence ? row_index.index(occurrence) : nil
318
+ loc = occurrence ? row_index.to_a.index(occurrence) : nil
322
319
  key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
323
320
  if loc
324
321
  expanded_holidays[key][loc] = 1.0
@@ -327,14 +324,14 @@ module Prophet
327
324
  end
328
325
  end
329
326
  end
330
- holiday_features = Daru::DataFrame.new(expanded_holidays)
331
- # # Make sure column order is consistent
332
- holiday_features = holiday_features[*holiday_features.vectors.sort]
333
- prior_scale_list = holiday_features.vectors.map { |h| prior_scales[h.split("_delim_")[0]] }
327
+ holiday_features = Rover::DataFrame.new(expanded_holidays)
328
+ # Make sure column order is consistent
329
+ holiday_features = holiday_features[holiday_features.vector_names.sort]
330
+ prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] }
334
331
  holiday_names = prior_scales.keys
335
332
  # Store holiday names used in fit
336
- if !@train_holiday_names
337
- @train_holiday_names = Daru::Vector.new(holiday_names)
333
+ if @train_holiday_names.nil?
334
+ @train_holiday_names = Rover::Vector.new(holiday_names)
338
335
  end
339
336
  [holiday_features, prior_scale_list, holiday_names]
340
337
  end
@@ -432,16 +429,16 @@ module Prophet
432
429
  modes[@seasonality_mode].concat(holiday_names)
433
430
  end
434
431
 
435
- # # Additional regressors
432
+ # Additional regressors
436
433
  @extra_regressors.each do |name, props|
437
- seasonal_features << df[name].to_df
434
+ seasonal_features << Rover::DataFrame.new({name => df[name]})
438
435
  prior_scales << props[:prior_scale]
439
436
  modes[props[:mode]] << name
440
437
  end
441
438
 
442
- # # Dummy to prevent empty X
439
+ # Dummy to prevent empty X
443
440
  if seasonal_features.size == 0
444
- seasonal_features << Daru::DataFrame.new("zeros" => [0] * df.shape[0])
441
+ seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]})
445
442
  prior_scales << 1.0
446
443
  end
447
444
 
@@ -453,16 +450,16 @@ module Prophet
453
450
  end
454
451
 
455
452
  def regressor_column_matrix(seasonal_features, modes)
456
- components = Daru::DataFrame.new(
453
+ components = Rover::DataFrame.new(
457
454
  "col" => seasonal_features.shape[1].times.to_a,
458
- "component" => seasonal_features.vectors.map { |x| x.split("_delim_")[0] }
455
+ "component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] }
459
456
  )
460
457
 
461
- # # Add total for holidays
458
+ # Add total for holidays
462
459
  if @train_holiday_names
463
460
  components = add_group_component(components, "holidays", @train_holiday_names.uniq)
464
461
  end
465
- # # Add totals additive and multiplicative components, and regressors
462
+ # Add totals additive and multiplicative components, and regressors
466
463
  ["additive", "multiplicative"].each do |mode|
467
464
  components = add_group_component(components, mode + "_terms", modes[mode])
468
465
  regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
@@ -473,20 +470,15 @@ module Prophet
473
470
  modes[mode] << mode + "_terms"
474
471
  modes[mode] << "extra_regressors_" + mode
475
472
  end
476
- # # After all of the additive/multiplicative groups have been added,
473
+ # After all of the additive/multiplicative groups have been added,
477
474
  modes[@seasonality_mode] << "holidays"
478
- # # Convert to a binary matrix
479
- component_cols = Daru::DataFrame.crosstab_by_assignation(
480
- components["col"], components["component"], [1] * components.size
481
- )
482
- component_cols.each_vector do |v|
483
- v.map! { |vi| vi.nil? ? 0 : vi }
484
- end
485
- component_cols.rename_vectors(:_id => "col")
475
+ # Convert to a binary matrix
476
+ component_cols = components["col"].crosstab(components["component"])
477
+ component_cols["col"] = component_cols.delete("_")
486
478
 
487
479
  # Add columns for additive and multiplicative terms, if missing
488
480
  ["additive_terms", "multiplicative_terms"].each do |name|
489
- component_cols[name] = 0 unless component_cols.vectors.include?(name)
481
+ component_cols[name] = 0 unless component_cols.include?(name)
490
482
  end
491
483
 
492
484
  # TODO validation
@@ -495,10 +487,10 @@ module Prophet
495
487
  end
496
488
 
497
489
  def add_group_component(components, name, group)
498
- new_comp = components.where(components["component"].in(group)).dup
490
+ new_comp = components[components["component"].in?(group)].dup
499
491
  group_cols = new_comp["col"].uniq
500
492
  if group_cols.size > 0
501
- new_comp = Daru::DataFrame.new("col" => group_cols, "component" => [name] * group_cols.size)
493
+ new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name})
502
494
  components = components.concat(new_comp)
503
495
  end
504
496
  components
@@ -574,8 +566,8 @@ module Prophet
574
566
  end
575
567
 
576
568
  def linear_growth_init(df)
577
- i0 = df["ds"].index.min
578
- i1 = df["ds"].index.max
569
+ i0 = 0
570
+ i1 = df.size - 1
579
571
  t = df["t"][i1] - df["t"][i0]
580
572
  k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
581
573
  m = df["y_scaled"][i0] - k * df["t"][i0]
@@ -583,8 +575,8 @@ module Prophet
583
575
  end
584
576
 
585
577
  def logistic_growth_init(df)
586
- i0 = df["ds"].index.min
587
- i1 = df["ds"].index.max
578
+ i0 = 0
579
+ i1 = df.size - 1
588
580
  t = df["t"][i1] - df["t"][i0]
589
581
 
590
582
  # Force valid values, in case y > cap or y < 0
@@ -610,11 +602,22 @@ module Prophet
610
602
  [k, m]
611
603
  end
612
604
 
605
+ def flat_growth_init(df)
606
+ k = 0
607
+ m = df["y_scaled"].mean
608
+ [k, m]
609
+ end
610
+
613
611
  def fit(df, **kwargs)
614
612
  raise Error, "Prophet object can only be fit once" if @history
615
613
 
616
- history = df.where(!df["y"].in([nil, Float::NAN]))
617
- raise Error, "Data has less than 2 non-nil rows" if history.shape[0] < 2
614
+ if defined?(Daru::DataFrame) && df.is_a?(Daru::DataFrame)
615
+ df = Rover::DataFrame.new(df.to_h)
616
+ end
617
+ raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame)
618
+
619
+ history = df[!df["y"].missing]
620
+ raise Error, "Data has less than 2 non-nil rows" if history.size < 2
618
621
 
619
622
  @history_dates = to_datetime(df["ds"]).sort
620
623
  history = setup_dataframe(history, initialize_scales: true)
@@ -627,6 +630,8 @@ module Prophet
627
630
 
628
631
  set_changepoints
629
632
 
633
+ trend_indicator = {"linear" => 0, "logistic" => 1, "flat" => 2}
634
+
630
635
  dat = {
631
636
  "T" => history.shape[0],
632
637
  "K" => seasonal_features.shape[1],
@@ -637,7 +642,7 @@ module Prophet
637
642
  "X" => seasonal_features,
638
643
  "sigmas" => prior_scales,
639
644
  "tau" => @changepoint_prior_scale,
640
- "trend_indicator" => @growth == "logistic" ? 1 : 0,
645
+ "trend_indicator" => trend_indicator[@growth],
641
646
  "s_a" => component_cols["additive_terms"],
642
647
  "s_m" => component_cols["multiplicative_terms"]
643
648
  }
@@ -645,6 +650,9 @@ module Prophet
645
650
  if @growth == "linear"
646
651
  dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
647
652
  kinit = linear_growth_init(history)
653
+ elsif @growth == "flat"
654
+ dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
655
+ kinit = flat_growth_init(history)
648
656
  else
649
657
  dat["cap"] = history["cap_scaled"]
650
658
  kinit = logistic_growth_init(history)
@@ -658,12 +666,12 @@ module Prophet
658
666
  "sigma_obs" => 1
659
667
  }
660
668
 
661
- if history["y"].min == history["y"].max && @growth == "linear"
669
+ if history["y"].min == history["y"].max && (@growth == "linear" || @growth == "flat")
662
670
  # Nothing to fit.
663
671
  @params = stan_init
664
672
  @params["sigma_obs"] = 1e-9
665
- @params.each do |par|
666
- @params[par] = Numo::NArray.asarray(@params[par])
673
+ @params.each do |par, _|
674
+ @params[par] = Numo::NArray.asarray([@params[par]])
667
675
  end
668
676
  elsif @mcmc_samples > 0
669
677
  @params = @stan_backend.sampling(stan_init, dat, @mcmc_samples, **kwargs)
@@ -674,8 +682,10 @@ module Prophet
674
682
  # If no changepoints were requested, replace delta with 0s
675
683
  if @changepoints.size == 0
676
684
  # Fold delta into the base rate k
677
- @params["k"] = @params["k"] + @params["delta"].reshape(-1)
678
- @params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(-1, 1)
685
+ # Numo doesn't support -1 with reshape
686
+ negative_one = @params["delta"].shape.inject(&:*)
687
+ @params["k"] = @params["k"] + @params["delta"].reshape(negative_one)
688
+ @params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(negative_one, 1)
679
689
  end
680
690
 
681
691
  self
@@ -701,10 +711,10 @@ module Prophet
701
711
 
702
712
  # Drop columns except ds, cap, floor, and trend
703
713
  cols = ["ds", "trend"]
704
- cols << "cap" if df.vectors.include?("cap")
714
+ cols << "cap" if df.include?("cap")
705
715
  cols << "floor" if @logistic_floor
706
716
  # Add in forecast components
707
- df2 = df_concat_axis_one([df[*cols], intervals, seasonal_components])
717
+ df2 = df_concat_axis_one([df[cols], intervals, seasonal_components])
708
718
  df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
709
719
  df2
710
720
  end
@@ -739,8 +749,12 @@ module Prophet
739
749
  k_t[indx] += deltas[s]
740
750
  m_t[indx] += gammas[s]
741
751
  end
742
- # need df_values to prevent memory from blowing up
743
- df_values(cap) / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
752
+ cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
753
+ end
754
+
755
+ def flat_trend(t, m)
756
+ m_t = m * t.new_ones
757
+ m_t
744
758
  end
745
759
 
746
760
  def predict_trend(df)
@@ -751,9 +765,11 @@ module Prophet
751
765
  t = Numo::NArray.asarray(df["t"].to_a)
752
766
  if @growth == "linear"
753
767
  trend = piecewise_linear(t, deltas, k, m, @changepoints_t)
754
- else
768
+ elsif @growth == "logistic"
755
769
  cap = df["cap_scaled"]
756
770
  trend = piecewise_logistic(t, cap, deltas, k, m, @changepoints_t)
771
+ elsif @growth == "flat"
772
+ trend = flat_trend(t, m)
757
773
  end
758
774
 
759
775
  trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
@@ -766,10 +782,10 @@ module Prophet
766
782
  upper_p = 100 * (1.0 + @interval_width) / 2
767
783
  end
768
784
 
769
- x = df_values(seasonal_features)
785
+ x = seasonal_features.to_numo
770
786
  data = {}
771
- component_cols.vectors.each do |component|
772
- beta_c = @params["beta"] * Numo::NArray.asarray(component_cols[component].to_a)
787
+ component_cols.vector_names.each do |component|
788
+ beta_c = @params["beta"] * component_cols[component].to_numo
773
789
 
774
790
  comp = x.dot(beta_c.transpose)
775
791
  if @component_modes["additive"].include?(component)
@@ -777,11 +793,11 @@ module Prophet
777
793
  end
778
794
  data[component] = comp.mean(axis: 1, nan: true)
779
795
  if @uncertainty_samples
780
- data[component + "_lower"] = percentile(comp, lower_p, axis: 1)
781
- data[component + "_upper"] = percentile(comp, upper_p, axis: 1)
796
+ data[component + "_lower"] = comp.percentile(lower_p, axis: 1)
797
+ data[component + "_upper"] = comp.percentile(upper_p, axis: 1)
782
798
  end
783
799
  end
784
- Daru::DataFrame.new(data)
800
+ Rover::DataFrame.new(data)
785
801
  end
786
802
 
787
803
  def sample_posterior_predictive(df)
@@ -792,9 +808,9 @@ module Prophet
792
808
  seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
793
809
 
794
810
  # convert to Numo for performance
795
- seasonal_features = df_values(seasonal_features)
796
- additive_terms = df_values(component_cols["additive_terms"])
797
- multiplicative_terms = df_values(component_cols["multiplicative_terms"])
811
+ seasonal_features = seasonal_features.to_numo
812
+ additive_terms = component_cols["additive_terms"].to_numo
813
+ multiplicative_terms = component_cols["multiplicative_terms"].to_numo
798
814
 
799
815
  sim_values = {"yhat" => [], "trend" => []}
800
816
  n_iterations.times do |i|
@@ -831,11 +847,11 @@ module Prophet
831
847
 
832
848
  series = {}
833
849
  ["yhat", "trend"].each do |key|
834
- series["#{key}_lower"] = percentile(sim_values[key], lower_p, axis: 1)
835
- series["#{key}_upper"] = percentile(sim_values[key], upper_p, axis: 1)
850
+ series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1)
851
+ series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1)
836
852
  end
837
853
 
838
- Daru::DataFrame.new(series)
854
+ Rover::DataFrame.new(series)
839
855
  end
840
856
 
841
857
  def sample_model(df, seasonal_features, iteration, s_a, s_m)
@@ -889,35 +905,25 @@ module Prophet
889
905
 
890
906
  if @growth == "linear"
891
907
  trend = piecewise_linear(t, deltas, k, m, changepoint_ts)
892
- else
908
+ elsif @growth == "logistic"
893
909
  cap = df["cap_scaled"]
894
910
  trend = piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
911
+ elsif @growth == "flat"
912
+ trend = flat_trend(t, m)
895
913
  end
896
914
 
897
915
  trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
898
916
  end
899
917
 
900
- def percentile(a, percentile, axis:)
901
- raise Error, "Axis must be 1" if axis != 1
902
-
903
- sorted = a.sort(axis: axis)
904
- x = percentile / 100.0 * (sorted.shape[axis] - 1)
905
- r = x % 1
906
- i = x.floor
907
- # this should use axis, but we only need axis: 1
908
- if i == sorted.shape[axis] - 1
909
- sorted[true, -1]
910
- else
911
- sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
912
- end
913
- end
914
-
915
918
  def make_future_dataframe(periods:, freq: "D", include_history: true)
916
919
  raise Error, "Model has not been fit" unless @history_dates
917
920
  last_date = @history_dates.max
918
921
  # TODO add more freq
919
922
  # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
920
923
  case freq
924
+ when /\A\d+S\z/
925
+ secs = freq.to_i
926
+ dates = (periods + 1).times.map { |i| last_date + i * secs }
921
927
  when "H"
922
928
  hour = 3600
923
929
  dates = (periods + 1).times.map { |i| last_date + i * hour }
@@ -930,56 +936,60 @@ module Prophet
930
936
  dates = (periods + 1).times.map { |i| last_date + i * week }
931
937
  when "MS"
932
938
  dates = [last_date]
939
+ # TODO reset day from last date, but keep time
933
940
  periods.times do
934
941
  dates << dates.last.to_datetime.next_month.to_time.utc
935
942
  end
943
+ when "QS"
944
+ dates = [last_date]
945
+ # TODO reset day and month from last date, but keep time
946
+ periods.times do
947
+ dates << dates.last.to_datetime.next_month.next_month.next_month.to_time.utc
948
+ end
949
+ when "YS"
950
+ dates = [last_date]
951
+ # TODO reset day and month from last date, but keep time
952
+ periods.times do
953
+ dates << dates.last.to_datetime.next_year.to_time.utc
954
+ end
936
955
  else
937
956
  raise ArgumentError, "Unknown freq: #{freq}"
938
957
  end
939
958
  dates.select! { |d| d > last_date }
940
959
  dates = dates.last(periods)
941
- dates = @history_dates + dates if include_history
942
- Daru::DataFrame.new("ds" => dates)
960
+ dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history
961
+ Rover::DataFrame.new({"ds" => dates})
943
962
  end
944
963
 
945
964
  private
946
965
 
947
- # Time is prefer over DateTime Ruby
966
+ # Time is preferred over DateTime in Ruby docs
948
967
  # use UTC to be consistent with Python
949
968
  # and so days have equal length (no DST)
950
969
  def to_datetime(vec)
951
970
  return if vec.nil?
952
- vec.map do |v|
953
- case v
954
- when Time
955
- v.utc
956
- when Date
957
- v.to_datetime.to_time.utc
958
- else
959
- DateTime.parse(v.to_s).to_time.utc
971
+ vec =
972
+ vec.map do |v|
973
+ case v
974
+ when Time
975
+ v.utc
976
+ when Date
977
+ v.to_datetime.to_time.utc
978
+ else
979
+ DateTime.parse(v.to_s).to_time.utc
980
+ end
960
981
  end
961
- end
982
+ Rover::Vector.new(vec)
962
983
  end
963
984
 
964
985
  # okay to do in-place
965
986
  def df_concat_axis_one(dfs)
966
987
  dfs[1..-1].each do |df|
967
- df.each_vector_with_index do |v, k|
968
- dfs[0][k] = v
969
- end
988
+ dfs[0].merge!(df)
970
989
  end
971
990
  dfs[0]
972
991
  end
973
992
 
974
- def df_values(df)
975
- if df.is_a?(Daru::Vector)
976
- Numo::NArray.asarray(df.to_a)
977
- else
978
- # TODO make more performant
979
- Numo::NArray.asarray(df.to_matrix.to_a)
980
- end
981
- end
982
-
983
993
  # https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
984
994
  def poisson(lam)
985
995
  l = Math.exp(-lam)
@@ -994,13 +1004,8 @@ module Prophet
994
1004
 
995
1005
  # https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
996
1006
  def laplace(loc, scale, size)
997
- u = Numo::DFloat.new(size).rand - 0.5
1007
+ u = Numo::DFloat.new(size).rand(-0.5, 0.5)
998
1008
  loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
999
1009
  end
1000
-
1001
- def ensure_arr(value)
1002
- value = [value] unless value.is_a?(Array)
1003
- value
1004
- end
1005
1010
  end
1006
1011
  end