prophet-rb 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e97133837196d4e1c97d69687d42f2d97e552d7be897f5e7a805efb5bab73e32
4
- data.tar.gz: 4450d57d2c3da8632011f9f5a802891586b3e19347abe377e533ba5e8922708f
3
+ metadata.gz: 2b37dc1a6be57b67cd740727e0bf4ac0b3a4cf2e27ed19647b631094696787da
4
+ data.tar.gz: 892af24ebdd897d7dba904ed7b11a83390533666de13bb3f57172e26839ade3a
5
5
  SHA512:
6
- metadata.gz: 6f0ed88d1a93d2f15e9750640833dbd889d8dea86255c8ec29c0fdc608ce27d17a0f617cbcaaee0be4b469b8e945f0ead9161875907a44a0555173e0f1a2c984
7
- data.tar.gz: 485b4742b5267a8540445a87d59320a6ba5cc5589192369d22d69bfc1002d1ae2cb822a88a547ab63ff113e44b5ba47db51c45acb4bedc84079afd57210ea4ed
6
+ metadata.gz: 53bc289290cf1a7861419634a057253f3727994c2e911247af0adf7e39688a15c019e180274cb8dd1e2a140e94dafc90ef7ff996319446b535c5f08a9090a990
7
+ data.tar.gz: 6b4ecdfcfb03f3e9da68f244fd8d25f5f9bee9a854bdc4d9f68d4907f59260a485df323eb70cc60cdc68e1c1e0d5e9b5431313785bd22da72758fd90fbda7bd2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.0 (2020-05-13)
2
+
3
+ - Switched from Daru to Rover
4
+
1
5
  ## 0.1.1 (2020-04-10)
2
6
 
3
7
  - Added `add_changepoints_to_plot`
data/README.md CHANGED
@@ -10,7 +10,7 @@ Supports:
10
10
 
11
11
  And gracefully handles missing data
12
12
 
13
- [![Build Status](https://travis-ci.org/ankane/prophet.svg?branch=master)](https://travis-ci.org/ankane/prophet)
13
+ [![Build Status](https://travis-ci.org/ankane/prophet.svg?branch=master)](https://travis-ci.org/ankane/prophet) [![Build status](https://ci.appveyor.com/api/projects/status/8ahmsvvhum4ivnmv/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/prophet/branch/master)
14
14
 
15
15
  ## Installation
16
16
 
@@ -31,7 +31,7 @@ Check out the [Prophet documentation](https://facebook.github.io/prophet/docs/qu
31
31
  Create a data frame with `ds` and `y` columns - here’s [an example](examples/example_wp_log_peyton_manning.csv) you can use
32
32
 
33
33
  ```ruby
34
- df = Daru::DataFrame.from_csv("example_wp_log_peyton_manning.csv")
34
+ df = Rover.read_csv("example_wp_log_peyton_manning.csv")
35
35
  df.head(5)
36
36
  ```
37
37
 
@@ -107,7 +107,7 @@ m.plot_components(forecast).savefig("components.png")
107
107
  Forecast logistic growth instead of linear
108
108
 
109
109
  ```ruby
110
- df = Daru::DataFrame.from_csv("example_wp_log_R.csv")
110
+ df = Rover.read_csv("example_wp_log_R.csv")
111
111
  df["cap"] = 8.5
112
112
  m = Prophet.new(growth: "logistic")
113
113
  m.fit(df)
@@ -146,7 +146,7 @@ m = Prophet.new(changepoints: ["2014-01-01"])
146
146
  Create a data frame with `holiday` and `ds` columns. Include all occurrences in your past data and future occurrences you’d like to forecast.
147
147
 
148
148
  ```ruby
149
- playoffs = Daru::DataFrame.new(
149
+ playoffs = Rover::DataFrame.new(
150
150
  "holiday" => ["playoff"] * 14,
151
151
  "ds" => ["2008-01-13", "2009-01-03", "2010-01-16",
152
152
  "2010-01-24", "2010-02-07", "2011-01-08",
@@ -156,7 +156,7 @@ playoffs = Daru::DataFrame.new(
156
156
  "lower_window" => [0] * 14,
157
157
  "upper_window" => [1] * 14
158
158
  )
159
- superbowls = Daru::DataFrame.new(
159
+ superbowls = Rover::DataFrame.new(
160
160
  "holiday" => ["superbowl"] * 3,
161
161
  "ds" => ["2010-02-07", "2014-02-02", "2016-02-07"],
162
162
  "lower_window" => [0] * 3,
@@ -208,7 +208,7 @@ forecast = m.predict(future)
208
208
  [Explanation](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html)
209
209
 
210
210
  ```ruby
211
- df = Daru::DataFrame.from_csv("example_air_passengers.csv")
211
+ df = Rover.read_csv("example_air_passengers.csv")
212
212
  m = Prophet.new(seasonality_mode: "multiplicative")
213
213
  m.fit(df)
214
214
  future = m.make_future_dataframe(periods: 50, freq: "MS")
@@ -236,7 +236,7 @@ Prophet.new(mcmc_samples: 300)
236
236
  Sub-daily data
237
237
 
238
238
  ```ruby
239
- df = Daru::DataFrame.from_csv("example_yosemite_temps.csv")
239
+ df = Rover.read_csv("example_yosemite_temps.csv")
240
240
  m = Prophet.new(changepoint_prior_scale: 0.01).fit(df)
241
241
  future = m.make_future_dataframe(periods: 300, freq: "H")
242
242
  forecast = m.predict(future)
@@ -82,12 +82,12 @@ module Prophet
82
82
  raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
83
83
  end
84
84
  if @holidays
85
- if !@holidays.is_a?(Daru::DataFrame) && @holidays.vectors.include?("ds") && @holidays.vectors.include?("holiday")
85
+ if !@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")
86
86
  raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
87
87
  end
88
88
  @holidays["ds"] = to_datetime(@holidays["ds"])
89
- has_lower = @holidays.vectors.include?("lower_window")
90
- has_upper = @holidays.vectors.include?("upper_window")
89
+ has_lower = @holidays.include?("lower_window")
90
+ has_upper = @holidays.include?("upper_window")
91
91
  if has_lower ^ has_upper # xor
92
92
  raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
93
93
  end
@@ -141,7 +141,7 @@ module Prophet
141
141
  end
142
142
 
143
143
  def setup_dataframe(df, initialize_scales: false)
144
- if df.vectors.include?("y")
144
+ if df.include?("y")
145
145
  df["y"] = df["y"].map(&:to_f)
146
146
  raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
147
147
  end
@@ -152,7 +152,7 @@ module Prophet
152
152
  raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
153
153
 
154
154
  @extra_regressors.each_key do |name|
155
- if !df.vectors.include?(name)
155
+ if !df.include?(name)
156
156
  raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
157
157
  end
158
158
  df[name] = df[name].map(&:to_f)
@@ -163,7 +163,7 @@ module Prophet
163
163
  @seasonalities.values.each do |props|
164
164
  condition_name = props[:condition_name]
165
165
  if condition_name
166
- if !df.vectors.include?(condition_name)
166
+ if !df.include?(condition_name)
167
167
  raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
168
168
  end
169
169
  if df.where(!df[condition_name].in([true, false])).any?
@@ -172,36 +172,33 @@ module Prophet
172
172
  end
173
173
  end
174
174
 
175
- if df.index.name == "ds"
176
- df.index.name = nil
177
- end
178
- df = df.sort(["ds"])
175
+ df = df.sort_by { |r| r["ds"] }
179
176
 
180
177
  initialize_scales(initialize_scales, df)
181
178
 
182
- if @logistic_floor && !df.vectors.include?("floor")
179
+ if @logistic_floor && !df.include?("floor")
183
180
  raise ArgumentError, "Expected column \"floor\"."
184
181
  else
185
182
  df["floor"] = 0
186
183
  end
187
184
 
188
185
  if @growth == "logistic"
189
- unless df.vectors.include?("cap")
186
+ unless df.include?("cap")
190
187
  raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
191
188
  end
192
- if df.where(df["cap"] <= df["floor"]).size > 0
189
+ if df[df["cap"] <= df["floor"]].size > 0
193
190
  raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
194
191
  end
195
- df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
192
+ df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f
196
193
  end
197
194
 
198
195
  df["t"] = (df["ds"] - @start) / @t_scale.to_f
199
- if df.vectors.include?("y")
200
- df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
196
+ if df.include?("y")
197
+ df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f
201
198
  end
202
199
 
203
200
  @extra_regressors.each do |name, props|
204
- df[name] = ((df[name] - props[:mu]) / props[:std])
201
+ df[name] = (df[name] - props[:mu]) / props[:std].to_f
205
202
  end
206
203
 
207
204
  df
@@ -237,21 +234,21 @@ module Prophet
237
234
  if @n_changepoints > 0
238
235
  step = (hist_size - 1) / @n_changepoints.to_f
239
236
  cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
240
- @changepoints = ensure_arr(@history["ds"][*cp_indexes].to_a.last(cp_indexes.size - 1))
237
+ @changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1)
241
238
  else
242
239
  @changepoints = []
243
240
  end
244
241
  end
245
242
 
246
243
  if @changepoints.size > 0
247
- @changepoints_t = (Numo::DFloat.cast(@changepoints.map(&:to_i).sort) - @start.to_i) / @t_scale.to_f
244
+ @changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f
248
245
  else
249
246
  @changepoints_t = Numo::NArray.asarray([0])
250
247
  end
251
248
  end
252
249
 
253
250
  def fourier_series(dates, period, series_order)
254
- t = Numo::DFloat.asarray(dates.map(&:to_i)) / (3600 * 24.0)
251
+ t = dates.map(&:to_i).to_numo / (3600 * 24.0)
255
252
 
256
253
  # no need for column_stack
257
254
  series_order.times.flat_map do |i|
@@ -263,11 +260,11 @@ module Prophet
263
260
 
264
261
  def make_seasonality_features(dates, period, series_order, prefix)
265
262
  features = fourier_series(dates, period, series_order)
266
- Daru::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
263
+ Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
267
264
  end
268
265
 
269
266
  def construct_holiday_dataframe(dates)
270
- all_holidays = Daru::DataFrame.new
267
+ all_holidays = Rover::DataFrame.new
271
268
  if @holidays
272
269
  all_holidays = @holidays.dup
273
270
  end
@@ -279,12 +276,12 @@ module Prophet
279
276
  # Drop future holidays not previously seen in training data
280
277
  if @train_holiday_names
281
278
  # Remove holiday names didn't show up in fit
282
- all_holidays = all_holidays.where(all_holidays["holiday"].in(@train_holiday_names))
279
+ all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)]
283
280
 
284
281
  # Add holiday names in fit but not in predict with ds as NA
285
- holidays_to_add = Daru::DataFrame.new(
286
- "holiday" => @train_holiday_names.where(!@train_holiday_names.in(all_holidays["holiday"]))
287
- )
282
+ holidays_to_add = Rover::DataFrame.new({
283
+ "holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])]
284
+ })
288
285
  all_holidays = all_holidays.concat(holidays_to_add)
289
286
  end
290
287
 
@@ -318,7 +315,7 @@ module Prophet
318
315
 
319
316
  lw.upto(uw).each do |offset|
320
317
  occurrence = dt ? dt + offset : nil
321
- loc = occurrence ? row_index.index(occurrence) : nil
318
+ loc = occurrence ? row_index.to_a.index(occurrence) : nil
322
319
  key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
323
320
  if loc
324
321
  expanded_holidays[key][loc] = 1.0
@@ -327,14 +324,14 @@ module Prophet
327
324
  end
328
325
  end
329
326
  end
330
- holiday_features = Daru::DataFrame.new(expanded_holidays)
331
- # # Make sure column order is consistent
332
- holiday_features = holiday_features[*holiday_features.vectors.sort]
333
- prior_scale_list = holiday_features.vectors.map { |h| prior_scales[h.split("_delim_")[0]] }
327
+ holiday_features = Rover::DataFrame.new(expanded_holidays)
328
+ # Make sure column order is consistent
329
+ holiday_features = holiday_features[holiday_features.vector_names.sort]
330
+ prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] }
334
331
  holiday_names = prior_scales.keys
335
332
  # Store holiday names used in fit
336
- if !@train_holiday_names
337
- @train_holiday_names = Daru::Vector.new(holiday_names)
333
+ if @train_holiday_names.nil?
334
+ @train_holiday_names = Rover::Vector.new(holiday_names)
338
335
  end
339
336
  [holiday_features, prior_scale_list, holiday_names]
340
337
  end
@@ -432,16 +429,16 @@ module Prophet
432
429
  modes[@seasonality_mode].concat(holiday_names)
433
430
  end
434
431
 
435
- # # Additional regressors
432
+ # Additional regressors
436
433
  @extra_regressors.each do |name, props|
437
- seasonal_features << df[name].to_df
434
+ seasonal_features << Rover::DataFrame.new({name => df[name]})
438
435
  prior_scales << props[:prior_scale]
439
436
  modes[props[:mode]] << name
440
437
  end
441
438
 
442
- # # Dummy to prevent empty X
439
+ # Dummy to prevent empty X
443
440
  if seasonal_features.size == 0
444
- seasonal_features << Daru::DataFrame.new("zeros" => [0] * df.shape[0])
441
+ seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]})
445
442
  prior_scales << 1.0
446
443
  end
447
444
 
@@ -453,16 +450,16 @@ module Prophet
453
450
  end
454
451
 
455
452
  def regressor_column_matrix(seasonal_features, modes)
456
- components = Daru::DataFrame.new(
453
+ components = Rover::DataFrame.new(
457
454
  "col" => seasonal_features.shape[1].times.to_a,
458
- "component" => seasonal_features.vectors.map { |x| x.split("_delim_")[0] }
455
+ "component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] }
459
456
  )
460
457
 
461
- # # Add total for holidays
458
+ # Add total for holidays
462
459
  if @train_holiday_names
463
460
  components = add_group_component(components, "holidays", @train_holiday_names.uniq)
464
461
  end
465
- # # Add totals additive and multiplicative components, and regressors
462
+ # Add totals additive and multiplicative components, and regressors
466
463
  ["additive", "multiplicative"].each do |mode|
467
464
  components = add_group_component(components, mode + "_terms", modes[mode])
468
465
  regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
@@ -473,20 +470,15 @@ module Prophet
473
470
  modes[mode] << mode + "_terms"
474
471
  modes[mode] << "extra_regressors_" + mode
475
472
  end
476
- # # After all of the additive/multiplicative groups have been added,
473
+ # After all of the additive/multiplicative groups have been added,
477
474
  modes[@seasonality_mode] << "holidays"
478
- # # Convert to a binary matrix
479
- component_cols = Daru::DataFrame.crosstab_by_assignation(
480
- components["col"], components["component"], [1] * components.size
481
- )
482
- component_cols.each_vector do |v|
483
- v.map! { |vi| vi.nil? ? 0 : vi }
484
- end
485
- component_cols.rename_vectors(:_id => "col")
475
+ # Convert to a binary matrix
476
+ component_cols = components["col"].crosstab(components["component"])
477
+ component_cols["col"] = component_cols.delete("_")
486
478
 
487
479
  # Add columns for additive and multiplicative terms, if missing
488
480
  ["additive_terms", "multiplicative_terms"].each do |name|
489
- component_cols[name] = 0 unless component_cols.vectors.include?(name)
481
+ component_cols[name] = 0 unless component_cols.include?(name)
490
482
  end
491
483
 
492
484
  # TODO validation
@@ -495,10 +487,10 @@ module Prophet
495
487
  end
496
488
 
497
489
  def add_group_component(components, name, group)
498
- new_comp = components.where(components["component"].in(group)).dup
490
+ new_comp = components[components["component"].in?(group)].dup
499
491
  group_cols = new_comp["col"].uniq
500
492
  if group_cols.size > 0
501
- new_comp = Daru::DataFrame.new("col" => group_cols, "component" => [name] * group_cols.size)
493
+ new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name})
502
494
  components = components.concat(new_comp)
503
495
  end
504
496
  components
@@ -574,8 +566,8 @@ module Prophet
574
566
  end
575
567
 
576
568
  def linear_growth_init(df)
577
- i0 = df["ds"].index.min
578
- i1 = df["ds"].index.max
569
+ i0 = 0
570
+ i1 = df.size - 1
579
571
  t = df["t"][i1] - df["t"][i0]
580
572
  k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
581
573
  m = df["y_scaled"][i0] - k * df["t"][i0]
@@ -583,8 +575,8 @@ module Prophet
583
575
  end
584
576
 
585
577
  def logistic_growth_init(df)
586
- i0 = df["ds"].index.min
587
- i1 = df["ds"].index.max
578
+ i0 = 0
579
+ i1 = df.size - 1
588
580
  t = df["t"][i1] - df["t"][i0]
589
581
 
590
582
  # Force valid values, in case y > cap or y < 0
@@ -613,8 +605,13 @@ module Prophet
613
605
  def fit(df, **kwargs)
614
606
  raise Error, "Prophet object can only be fit once" if @history
615
607
 
616
- history = df.where(!df["y"].in([nil, Float::NAN]))
617
- raise Error, "Data has less than 2 non-nil rows" if history.shape[0] < 2
608
+ if defined?(Daru::DataFrame) && df.is_a?(Daru::DataFrame)
609
+ df = Rover::DataFrame.new(df.to_h)
610
+ end
611
+ raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame)
612
+
613
+ history = df[!df["y"].missing]
614
+ raise Error, "Data has less than 2 non-nil rows" if history.size < 2
618
615
 
619
616
  @history_dates = to_datetime(df["ds"]).sort
620
617
  history = setup_dataframe(history, initialize_scales: true)
@@ -701,10 +698,10 @@ module Prophet
701
698
 
702
699
  # Drop columns except ds, cap, floor, and trend
703
700
  cols = ["ds", "trend"]
704
- cols << "cap" if df.vectors.include?("cap")
701
+ cols << "cap" if df.include?("cap")
705
702
  cols << "floor" if @logistic_floor
706
703
  # Add in forecast components
707
- df2 = df_concat_axis_one([df[*cols], intervals, seasonal_components])
704
+ df2 = df_concat_axis_one([df[cols], intervals, seasonal_components])
708
705
  df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
709
706
  df2
710
707
  end
@@ -739,8 +736,7 @@ module Prophet
739
736
  k_t[indx] += deltas[s]
740
737
  m_t[indx] += gammas[s]
741
738
  end
742
- # need df_values to prevent memory from blowing up
743
- df_values(cap) / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
739
+ cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
744
740
  end
745
741
 
746
742
  def predict_trend(df)
@@ -766,10 +762,10 @@ module Prophet
766
762
  upper_p = 100 * (1.0 + @interval_width) / 2
767
763
  end
768
764
 
769
- x = df_values(seasonal_features)
765
+ x = seasonal_features.to_numo
770
766
  data = {}
771
- component_cols.vectors.each do |component|
772
- beta_c = @params["beta"] * Numo::NArray.asarray(component_cols[component].to_a)
767
+ component_cols.vector_names.each do |component|
768
+ beta_c = @params["beta"] * component_cols[component].to_numo
773
769
 
774
770
  comp = x.dot(beta_c.transpose)
775
771
  if @component_modes["additive"].include?(component)
@@ -777,11 +773,11 @@ module Prophet
777
773
  end
778
774
  data[component] = comp.mean(axis: 1, nan: true)
779
775
  if @uncertainty_samples
780
- data[component + "_lower"] = percentile(comp, lower_p, axis: 1)
781
- data[component + "_upper"] = percentile(comp, upper_p, axis: 1)
776
+ data[component + "_lower"] = comp.percentile(lower_p, axis: 1)
777
+ data[component + "_upper"] = comp.percentile(upper_p, axis: 1)
782
778
  end
783
779
  end
784
- Daru::DataFrame.new(data)
780
+ Rover::DataFrame.new(data)
785
781
  end
786
782
 
787
783
  def sample_posterior_predictive(df)
@@ -792,9 +788,9 @@ module Prophet
792
788
  seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
793
789
 
794
790
  # convert to Numo for performance
795
- seasonal_features = df_values(seasonal_features)
796
- additive_terms = df_values(component_cols["additive_terms"])
797
- multiplicative_terms = df_values(component_cols["multiplicative_terms"])
791
+ seasonal_features = seasonal_features.to_numo
792
+ additive_terms = component_cols["additive_terms"].to_numo
793
+ multiplicative_terms = component_cols["multiplicative_terms"].to_numo
798
794
 
799
795
  sim_values = {"yhat" => [], "trend" => []}
800
796
  n_iterations.times do |i|
@@ -831,11 +827,11 @@ module Prophet
831
827
 
832
828
  series = {}
833
829
  ["yhat", "trend"].each do |key|
834
- series["#{key}_lower"] = percentile(sim_values[key], lower_p, axis: 1)
835
- series["#{key}_upper"] = percentile(sim_values[key], upper_p, axis: 1)
830
+ series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1)
831
+ series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1)
836
832
  end
837
833
 
838
- Daru::DataFrame.new(series)
834
+ Rover::DataFrame.new(series)
839
835
  end
840
836
 
841
837
  def sample_model(df, seasonal_features, iteration, s_a, s_m)
@@ -897,21 +893,6 @@ module Prophet
897
893
  trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
898
894
  end
899
895
 
900
- def percentile(a, percentile, axis:)
901
- raise Error, "Axis must be 1" if axis != 1
902
-
903
- sorted = a.sort(axis: axis)
904
- x = percentile / 100.0 * (sorted.shape[axis] - 1)
905
- r = x % 1
906
- i = x.floor
907
- # this should use axis, but we only need axis: 1
908
- if i == sorted.shape[axis] - 1
909
- sorted[true, -1]
910
- else
911
- sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
912
- end
913
- end
914
-
915
896
  def make_future_dataframe(periods:, freq: "D", include_history: true)
916
897
  raise Error, "Model has not been fit" unless @history_dates
917
898
  last_date = @history_dates.max
@@ -938,48 +919,39 @@ module Prophet
938
919
  end
939
920
  dates.select! { |d| d > last_date }
940
921
  dates = dates.last(periods)
941
- dates = @history_dates + dates if include_history
942
- Daru::DataFrame.new("ds" => dates)
922
+ dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history
923
+ Rover::DataFrame.new({"ds" => dates})
943
924
  end
944
925
 
945
926
  private
946
927
 
947
- # Time is prefer over DateTime Ruby
928
+ # Time is preferred over DateTime in Ruby docs
948
929
  # use UTC to be consistent with Python
949
930
  # and so days have equal length (no DST)
950
931
  def to_datetime(vec)
951
932
  return if vec.nil?
952
- vec.map do |v|
953
- case v
954
- when Time
955
- v.utc
956
- when Date
957
- v.to_datetime.to_time.utc
958
- else
959
- DateTime.parse(v.to_s).to_time.utc
933
+ vec =
934
+ vec.map do |v|
935
+ case v
936
+ when Time
937
+ v.utc
938
+ when Date
939
+ v.to_datetime.to_time.utc
940
+ else
941
+ DateTime.parse(v.to_s).to_time.utc
942
+ end
960
943
  end
961
- end
944
+ Rover::Vector.new(vec)
962
945
  end
963
946
 
964
947
  # okay to do in-place
965
948
  def df_concat_axis_one(dfs)
966
949
  dfs[1..-1].each do |df|
967
- df.each_vector_with_index do |v, k|
968
- dfs[0][k] = v
969
- end
950
+ dfs[0].merge!(df)
970
951
  end
971
952
  dfs[0]
972
953
  end
973
954
 
974
- def df_values(df)
975
- if df.is_a?(Daru::Vector)
976
- Numo::NArray.asarray(df.to_a)
977
- else
978
- # TODO make more performant
979
- Numo::NArray.asarray(df.to_matrix.to_a)
980
- end
981
- end
982
-
983
955
  # https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
984
956
  def poisson(lam)
985
957
  l = Math.exp(-lam)
@@ -994,13 +966,8 @@ module Prophet
994
966
 
995
967
  # https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
996
968
  def laplace(loc, scale, size)
997
- u = Numo::DFloat.new(size).rand - 0.5
969
+ u = Numo::DFloat.new(size).rand(-0.5, 0.5)
998
970
  loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
999
971
  end
1000
-
1001
- def ensure_arr(value)
1002
- value = [value] unless value.is_a?(Array)
1003
- value
1004
- end
1005
972
  end
1006
973
  end
@@ -6,7 +6,7 @@ module Prophet
6
6
  end
7
7
 
8
8
  def make_holidays_df(year_list, country)
9
- holidays_df.where(holidays_df["country"].eq(country) & holidays_df["year"].in(year_list))["ds", "holiday"]
9
+ holidays_df[(holidays_df["country"] == country) & (holidays_df["year"].in?(year_list))][["ds", "holiday"]]
10
10
  end
11
11
 
12
12
  # TODO marshal on installation
@@ -20,7 +20,7 @@ module Prophet
20
20
  holidays["country"] << row["country"]
21
21
  holidays["year"] << row["year"]
22
22
  end
23
- Daru::DataFrame.new(holidays)
23
+ Rover::DataFrame.new(holidays)
24
24
  end
25
25
  end
26
26
  end
data/lib/prophet/plot.rb CHANGED
@@ -8,16 +8,16 @@ module Prophet
8
8
  fig = ax.get_figure
9
9
  end
10
10
  fcst_t = to_pydatetime(fcst["ds"])
11
- ax.plot(to_pydatetime(@history["ds"]), @history["y"].map(&:to_f), "k.")
12
- ax.plot(fcst_t, fcst["yhat"].map(&:to_f), ls: "-", c: "#0072B2")
13
- if fcst.vectors.include?("cap") && plot_cap
14
- ax.plot(fcst_t, fcst["cap"].map(&:to_f), ls: "--", c: "k")
11
+ ax.plot(to_pydatetime(@history["ds"]), @history["y"].to_a, "k.")
12
+ ax.plot(fcst_t, fcst["yhat"].to_a, ls: "-", c: "#0072B2")
13
+ if fcst.include?("cap") && plot_cap
14
+ ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
15
15
  end
16
- if @logistic_floor && fcst.vectors.include?("floor") && plot_cap
17
- ax.plot(fcst_t, fcst["floor"].map(&:to_f), ls: "--", c: "k")
16
+ if @logistic_floor && fcst.include?("floor") && plot_cap
17
+ ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
18
18
  end
19
19
  if uncertainty && @uncertainty_samples
20
- ax.fill_between(fcst_t, fcst["yhat_lower"].map(&:to_f), fcst["yhat_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)
20
+ ax.fill_between(fcst_t, fcst["yhat_lower"].to_a, fcst["yhat_upper"].to_a, color: "#0072B2", alpha: 0.2)
21
21
  end
22
22
  # Specify formatting to workaround matplotlib issue #12925
23
23
  locator = dates.AutoDateLocator.new(interval_multiples: false)
@@ -33,25 +33,25 @@ module Prophet
33
33
 
34
34
  def plot_components(fcst, uncertainty: true, plot_cap: true, weekly_start: 0, yearly_start: 0, figsize: nil)
35
35
  components = ["trend"]
36
- if @train_holiday_names && fcst.vectors.include?("holidays")
36
+ if @train_holiday_names && fcst.include?("holidays")
37
37
  components << "holidays"
38
38
  end
39
39
  # Plot weekly seasonality, if present
40
- if @seasonalities["weekly"] && fcst.vectors.include?("weekly")
40
+ if @seasonalities["weekly"] && fcst.include?("weekly")
41
41
  components << "weekly"
42
42
  end
43
43
  # Yearly if present
44
- if @seasonalities["yearly"] && fcst.vectors.include?("yearly")
44
+ if @seasonalities["yearly"] && fcst.include?("yearly")
45
45
  components << "yearly"
46
46
  end
47
47
  # Other seasonalities
48
- components.concat(@seasonalities.keys.select { |name| fcst.vectors.include?(name) && !["weekly", "yearly"].include?(name) }.sort)
48
+ components.concat(@seasonalities.keys.select { |name| fcst.include?(name) && !["weekly", "yearly"].include?(name) }.sort)
49
49
  regressors = {"additive" => false, "multiplicative" => false}
50
50
  @extra_regressors.each do |name, props|
51
51
  regressors[props[:mode]] = true
52
52
  end
53
53
  ["additive", "multiplicative"].each do |mode|
54
- if regressors[mode] && fcst.vectors.include?("extra_regressors_#{mode}")
54
+ if regressors[mode] && fcst.include?("extra_regressors_#{mode}")
55
55
  components << "extra_regressors_#{mode}"
56
56
  end
57
57
  end
@@ -97,11 +97,11 @@ module Prophet
97
97
  def add_changepoints_to_plot(ax, fcst, threshold: 0.01, cp_color: "r", cp_linestyle: "--", trend: true)
98
98
  artists = []
99
99
  if trend
100
- artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].map(&:to_f), c: cp_color)
100
+ artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].to_a, c: cp_color)
101
101
  end
102
102
  signif_changepoints =
103
103
  if @changepoints.size > 0
104
- (@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints)
104
+ (@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints.to_numo)
105
105
  else
106
106
  []
107
107
  end
@@ -120,15 +120,15 @@ module Prophet
120
120
  ax = fig.add_subplot(111)
121
121
  end
122
122
  fcst_t = to_pydatetime(fcst["ds"])
123
- artists += ax.plot(fcst_t, fcst[name].map(&:to_f), ls: "-", c: "#0072B2")
124
- if fcst.vectors.include?("cap") && plot_cap
125
- artists += ax.plot(fcst_t, fcst["cap"].map(&:to_f), ls: "--", c: "k")
123
+ artists += ax.plot(fcst_t, fcst[name].to_a, ls: "-", c: "#0072B2")
124
+ if fcst.include?("cap") && plot_cap
125
+ artists += ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
126
126
  end
127
- if @logistic_floor && fcst.vectors.include?("floor") && plot_cap
128
- ax.plot(fcst_t, fcst["floor"].map(&:to_f), ls: "--", c: "k")
127
+ if @logistic_floor && fcst.include?("floor") && plot_cap
128
+ ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
129
129
  end
130
130
  if uncertainty && @uncertainty_samples
131
- artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].map(&:to_f), fcst[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
131
+ artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].to_a, fcst[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
132
132
  end
133
133
  # Specify formatting to workaround matplotlib issue #12925
134
134
  locator = dates.AutoDateLocator.new(interval_multiples: false)
@@ -145,17 +145,17 @@ module Prophet
145
145
  end
146
146
 
147
147
  def seasonality_plot_df(ds)
148
- df_dict = {"ds" => ds, "cap" => [1.0] * ds.size, "floor" => [0.0] * ds.size}
148
+ df_dict = {"ds" => ds, "cap" => 1.0, "floor" => 0.0}
149
149
  @extra_regressors.each_key do |name|
150
- df_dict[name] = [0.0] * ds.size
150
+ df_dict[name] = 0.0
151
151
  end
152
152
  # Activate all conditional seasonality columns
153
153
  @seasonalities.values.each do |props|
154
154
  if props[:condition_name]
155
- df_dict[props[:condition_name]] = [true] * ds.size
155
+ df_dict[props[:condition_name]] = true
156
156
  end
157
157
  end
158
- df = Daru::DataFrame.new(df_dict)
158
+ df = Rover::DataFrame.new(df_dict)
159
159
  df = setup_dataframe(df)
160
160
  df
161
161
  end
@@ -172,9 +172,9 @@ module Prophet
172
172
  df_w = seasonality_plot_df(days)
173
173
  seas = predict_seasonal_components(df_w)
174
174
  days = days.map { |v| v.strftime("%A") }
175
- artists += ax.plot(days.size.times.to_a, seas[name].map(&:to_f), ls: "-", c: "#0072B2")
175
+ artists += ax.plot(days.size.times.to_a, seas[name].to_a, ls: "-", c: "#0072B2")
176
176
  if uncertainty && @uncertainty_samples
177
- artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].map(&:to_f), seas[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
177
+ artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
178
178
  end
179
179
  ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
180
180
  ax.set_xticks(days.size.times.to_a)
@@ -198,9 +198,9 @@ module Prophet
198
198
  days = 365.times.map { |i| start + i + yearly_start }
199
199
  df_y = seasonality_plot_df(days)
200
200
  seas = predict_seasonal_components(df_y)
201
- artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].map(&:to_f), ls: "-", c: "#0072B2")
201
+ artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
202
202
  if uncertainty && @uncertainty_samples
203
- artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].map(&:to_f), seas[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
203
+ artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
204
204
  end
205
205
  ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
206
206
  months = dates.MonthLocator.new((1..12).to_a, bymonthday: 1, interval: 2)
@@ -231,9 +231,9 @@ module Prophet
231
231
  days = plot_points.times.map { |i| Time.at(start + i * step).utc }
232
232
  df_y = seasonality_plot_df(days)
233
233
  seas = predict_seasonal_components(df_y)
234
- artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].map(&:to_f), ls: "-", c: "#0072B2")
234
+ artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
235
235
  if uncertainty && @uncertainty_samples
236
- artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].map(&:to_f), seas[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
236
+ artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
237
237
  end
238
238
  ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
239
239
  step = (finish - start) / (7 - 1).to_f
@@ -281,7 +281,7 @@ module Prophet
281
281
 
282
282
  def to_pydatetime(v)
283
283
  datetime = PyCall.import_module("datetime")
284
- v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }
284
+ v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }.to_a
285
285
  end
286
286
  end
287
287
  end
@@ -127,7 +127,7 @@ module Prophet
127
127
  stan_data["t_change"] = stan_data["t_change"].to_a
128
128
  stan_data["s_a"] = stan_data["s_a"].to_a
129
129
  stan_data["s_m"] = stan_data["s_m"].to_a
130
- stan_data["X"] = stan_data["X"].to_matrix.to_a
130
+ stan_data["X"] = stan_data["X"].to_numo.to_a
131
131
  stan_init["delta"] = stan_init["delta"].to_a
132
132
  stan_init["beta"] = stan_init["beta"].to_a
133
133
  [stan_init, stan_data]
@@ -1,3 +1,3 @@
1
1
  module Prophet
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/prophet.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # dependencies
2
2
  require "cmdstan"
3
- require "daru"
3
+ require "rover"
4
4
  require "numo/narray"
5
5
 
6
6
  # stdlib
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prophet-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-10 00:00:00.000000000 Z
11
+ date: 2020-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdstan
@@ -25,21 +25,21 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.1.2
27
27
  - !ruby/object:Gem::Dependency
28
- name: daru
28
+ name: numo-narray
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 0.9.1.7
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 0.9.1.7
41
41
  - !ruby/object:Gem::Dependency
42
- name: numo-narray
42
+ name: rover-df
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -95,7 +95,7 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '5'
97
97
  - !ruby/object:Gem::Dependency
98
- name: matplotlib
98
+ name: daru
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -109,7 +109,7 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: ruby-prof
112
+ name: matplotlib
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - ">="