prophet-rb 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e97133837196d4e1c97d69687d42f2d97e552d7be897f5e7a805efb5bab73e32
4
- data.tar.gz: 4450d57d2c3da8632011f9f5a802891586b3e19347abe377e533ba5e8922708f
3
+ metadata.gz: 2b37dc1a6be57b67cd740727e0bf4ac0b3a4cf2e27ed19647b631094696787da
4
+ data.tar.gz: 892af24ebdd897d7dba904ed7b11a83390533666de13bb3f57172e26839ade3a
5
5
  SHA512:
6
- metadata.gz: 6f0ed88d1a93d2f15e9750640833dbd889d8dea86255c8ec29c0fdc608ce27d17a0f617cbcaaee0be4b469b8e945f0ead9161875907a44a0555173e0f1a2c984
7
- data.tar.gz: 485b4742b5267a8540445a87d59320a6ba5cc5589192369d22d69bfc1002d1ae2cb822a88a547ab63ff113e44b5ba47db51c45acb4bedc84079afd57210ea4ed
6
+ metadata.gz: 53bc289290cf1a7861419634a057253f3727994c2e911247af0adf7e39688a15c019e180274cb8dd1e2a140e94dafc90ef7ff996319446b535c5f08a9090a990
7
+ data.tar.gz: 6b4ecdfcfb03f3e9da68f244fd8d25f5f9bee9a854bdc4d9f68d4907f59260a485df323eb70cc60cdc68e1c1e0d5e9b5431313785bd22da72758fd90fbda7bd2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.0 (2020-05-13)
2
+
3
+ - Switched from Daru to Rover
4
+
1
5
  ## 0.1.1 (2020-04-10)
2
6
 
3
7
  - Added `add_changepoints_to_plot`
data/README.md CHANGED
@@ -10,7 +10,7 @@ Supports:
10
10
 
11
11
  And gracefully handles missing data
12
12
 
13
- [![Build Status](https://travis-ci.org/ankane/prophet.svg?branch=master)](https://travis-ci.org/ankane/prophet)
13
+ [![Build Status](https://travis-ci.org/ankane/prophet.svg?branch=master)](https://travis-ci.org/ankane/prophet) [![Build status](https://ci.appveyor.com/api/projects/status/8ahmsvvhum4ivnmv/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/prophet/branch/master)
14
14
 
15
15
  ## Installation
16
16
 
@@ -31,7 +31,7 @@ Check out the [Prophet documentation](https://facebook.github.io/prophet/docs/qu
31
31
  Create a data frame with `ds` and `y` columns - here’s [an example](examples/example_wp_log_peyton_manning.csv) you can use
32
32
 
33
33
  ```ruby
34
- df = Daru::DataFrame.from_csv("example_wp_log_peyton_manning.csv")
34
+ df = Rover.read_csv("example_wp_log_peyton_manning.csv")
35
35
  df.head(5)
36
36
  ```
37
37
 
@@ -107,7 +107,7 @@ m.plot_components(forecast).savefig("components.png")
107
107
  Forecast logistic growth instead of linear
108
108
 
109
109
  ```ruby
110
- df = Daru::DataFrame.from_csv("example_wp_log_R.csv")
110
+ df = Rover.read_csv("example_wp_log_R.csv")
111
111
  df["cap"] = 8.5
112
112
  m = Prophet.new(growth: "logistic")
113
113
  m.fit(df)
@@ -146,7 +146,7 @@ m = Prophet.new(changepoints: ["2014-01-01"])
146
146
  Create a data frame with `holiday` and `ds` columns. Include all occurrences in your past data and future occurrences you’d like to forecast.
147
147
 
148
148
  ```ruby
149
- playoffs = Daru::DataFrame.new(
149
+ playoffs = Rover::DataFrame.new(
150
150
  "holiday" => ["playoff"] * 14,
151
151
  "ds" => ["2008-01-13", "2009-01-03", "2010-01-16",
152
152
  "2010-01-24", "2010-02-07", "2011-01-08",
@@ -156,7 +156,7 @@ playoffs = Daru::DataFrame.new(
156
156
  "lower_window" => [0] * 14,
157
157
  "upper_window" => [1] * 14
158
158
  )
159
- superbowls = Daru::DataFrame.new(
159
+ superbowls = Rover::DataFrame.new(
160
160
  "holiday" => ["superbowl"] * 3,
161
161
  "ds" => ["2010-02-07", "2014-02-02", "2016-02-07"],
162
162
  "lower_window" => [0] * 3,
@@ -208,7 +208,7 @@ forecast = m.predict(future)
208
208
  [Explanation](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html)
209
209
 
210
210
  ```ruby
211
- df = Daru::DataFrame.from_csv("example_air_passengers.csv")
211
+ df = Rover.read_csv("example_air_passengers.csv")
212
212
  m = Prophet.new(seasonality_mode: "multiplicative")
213
213
  m.fit(df)
214
214
  future = m.make_future_dataframe(periods: 50, freq: "MS")
@@ -236,7 +236,7 @@ Prophet.new(mcmc_samples: 300)
236
236
  Sub-daily data
237
237
 
238
238
  ```ruby
239
- df = Daru::DataFrame.from_csv("example_yosemite_temps.csv")
239
+ df = Rover.read_csv("example_yosemite_temps.csv")
240
240
  m = Prophet.new(changepoint_prior_scale: 0.01).fit(df)
241
241
  future = m.make_future_dataframe(periods: 300, freq: "H")
242
242
  forecast = m.predict(future)
@@ -82,12 +82,12 @@ module Prophet
82
82
  raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
83
83
  end
84
84
  if @holidays
85
- if !@holidays.is_a?(Daru::DataFrame) && @holidays.vectors.include?("ds") && @holidays.vectors.include?("holiday")
85
+ if !@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")
86
86
  raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
87
87
  end
88
88
  @holidays["ds"] = to_datetime(@holidays["ds"])
89
- has_lower = @holidays.vectors.include?("lower_window")
90
- has_upper = @holidays.vectors.include?("upper_window")
89
+ has_lower = @holidays.include?("lower_window")
90
+ has_upper = @holidays.include?("upper_window")
91
91
  if has_lower ^ has_upper # xor
92
92
  raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
93
93
  end
@@ -141,7 +141,7 @@ module Prophet
141
141
  end
142
142
 
143
143
  def setup_dataframe(df, initialize_scales: false)
144
- if df.vectors.include?("y")
144
+ if df.include?("y")
145
145
  df["y"] = df["y"].map(&:to_f)
146
146
  raise ArgumentError "Found infinity in column y." unless df["y"].all?(&:finite?)
147
147
  end
@@ -152,7 +152,7 @@ module Prophet
152
152
  raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)
153
153
 
154
154
  @extra_regressors.each_key do |name|
155
- if !df.vectors.include?(name)
155
+ if !df.include?(name)
156
156
  raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
157
157
  end
158
158
  df[name] = df[name].map(&:to_f)
@@ -163,7 +163,7 @@ module Prophet
163
163
  @seasonalities.values.each do |props|
164
164
  condition_name = props[:condition_name]
165
165
  if condition_name
166
- if !df.vectors.include?(condition_name)
166
+ if !df.include?(condition_name)
167
167
  raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
168
168
  end
169
169
  if df.where(!df[condition_name].in([true, false])).any?
@@ -172,36 +172,33 @@ module Prophet
172
172
  end
173
173
  end
174
174
 
175
- if df.index.name == "ds"
176
- df.index.name = nil
177
- end
178
- df = df.sort(["ds"])
175
+ df = df.sort_by { |r| r["ds"] }
179
176
 
180
177
  initialize_scales(initialize_scales, df)
181
178
 
182
- if @logistic_floor && !df.vectors.include?("floor")
179
+ if @logistic_floor && !df.include?("floor")
183
180
  raise ArgumentError, "Expected column \"floor\"."
184
181
  else
185
182
  df["floor"] = 0
186
183
  end
187
184
 
188
185
  if @growth == "logistic"
189
- unless df.vectors.include?("cap")
186
+ unless df.include?("cap")
190
187
  raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
191
188
  end
192
- if df.where(df["cap"] <= df["floor"]).size > 0
189
+ if df[df["cap"] <= df["floor"]].size > 0
193
190
  raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
194
191
  end
195
- df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale
192
+ df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f
196
193
  end
197
194
 
198
195
  df["t"] = (df["ds"] - @start) / @t_scale.to_f
199
- if df.vectors.include?("y")
200
- df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale
196
+ if df.include?("y")
197
+ df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f
201
198
  end
202
199
 
203
200
  @extra_regressors.each do |name, props|
204
- df[name] = ((df[name] - props[:mu]) / props[:std])
201
+ df[name] = (df[name] - props[:mu]) / props[:std].to_f
205
202
  end
206
203
 
207
204
  df
@@ -237,21 +234,21 @@ module Prophet
237
234
  if @n_changepoints > 0
238
235
  step = (hist_size - 1) / @n_changepoints.to_f
239
236
  cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
240
- @changepoints = ensure_arr(@history["ds"][*cp_indexes].to_a.last(cp_indexes.size - 1))
237
+ @changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1)
241
238
  else
242
239
  @changepoints = []
243
240
  end
244
241
  end
245
242
 
246
243
  if @changepoints.size > 0
247
- @changepoints_t = (Numo::DFloat.cast(@changepoints.map(&:to_i).sort) - @start.to_i) / @t_scale.to_f
244
+ @changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f
248
245
  else
249
246
  @changepoints_t = Numo::NArray.asarray([0])
250
247
  end
251
248
  end
252
249
 
253
250
  def fourier_series(dates, period, series_order)
254
- t = Numo::DFloat.asarray(dates.map(&:to_i)) / (3600 * 24.0)
251
+ t = dates.map(&:to_i).to_numo / (3600 * 24.0)
255
252
 
256
253
  # no need for column_stack
257
254
  series_order.times.flat_map do |i|
@@ -263,11 +260,11 @@ module Prophet
263
260
 
264
261
  def make_seasonality_features(dates, period, series_order, prefix)
265
262
  features = fourier_series(dates, period, series_order)
266
- Daru::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
263
+ Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
267
264
  end
268
265
 
269
266
  def construct_holiday_dataframe(dates)
270
- all_holidays = Daru::DataFrame.new
267
+ all_holidays = Rover::DataFrame.new
271
268
  if @holidays
272
269
  all_holidays = @holidays.dup
273
270
  end
@@ -279,12 +276,12 @@ module Prophet
279
276
  # Drop future holidays not previously seen in training data
280
277
  if @train_holiday_names
281
278
  # Remove holiday names didn't show up in fit
282
- all_holidays = all_holidays.where(all_holidays["holiday"].in(@train_holiday_names))
279
+ all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)]
283
280
 
284
281
  # Add holiday names in fit but not in predict with ds as NA
285
- holidays_to_add = Daru::DataFrame.new(
286
- "holiday" => @train_holiday_names.where(!@train_holiday_names.in(all_holidays["holiday"]))
287
- )
282
+ holidays_to_add = Rover::DataFrame.new({
283
+ "holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])]
284
+ })
288
285
  all_holidays = all_holidays.concat(holidays_to_add)
289
286
  end
290
287
 
@@ -318,7 +315,7 @@ module Prophet
318
315
 
319
316
  lw.upto(uw).each do |offset|
320
317
  occurrence = dt ? dt + offset : nil
321
- loc = occurrence ? row_index.index(occurrence) : nil
318
+ loc = occurrence ? row_index.to_a.index(occurrence) : nil
322
319
  key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
323
320
  if loc
324
321
  expanded_holidays[key][loc] = 1.0
@@ -327,14 +324,14 @@ module Prophet
327
324
  end
328
325
  end
329
326
  end
330
- holiday_features = Daru::DataFrame.new(expanded_holidays)
331
- # # Make sure column order is consistent
332
- holiday_features = holiday_features[*holiday_features.vectors.sort]
333
- prior_scale_list = holiday_features.vectors.map { |h| prior_scales[h.split("_delim_")[0]] }
327
+ holiday_features = Rover::DataFrame.new(expanded_holidays)
328
+ # Make sure column order is consistent
329
+ holiday_features = holiday_features[holiday_features.vector_names.sort]
330
+ prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] }
334
331
  holiday_names = prior_scales.keys
335
332
  # Store holiday names used in fit
336
- if !@train_holiday_names
337
- @train_holiday_names = Daru::Vector.new(holiday_names)
333
+ if @train_holiday_names.nil?
334
+ @train_holiday_names = Rover::Vector.new(holiday_names)
338
335
  end
339
336
  [holiday_features, prior_scale_list, holiday_names]
340
337
  end
@@ -432,16 +429,16 @@ module Prophet
432
429
  modes[@seasonality_mode].concat(holiday_names)
433
430
  end
434
431
 
435
- # # Additional regressors
432
+ # Additional regressors
436
433
  @extra_regressors.each do |name, props|
437
- seasonal_features << df[name].to_df
434
+ seasonal_features << Rover::DataFrame.new({name => df[name]})
438
435
  prior_scales << props[:prior_scale]
439
436
  modes[props[:mode]] << name
440
437
  end
441
438
 
442
- # # Dummy to prevent empty X
439
+ # Dummy to prevent empty X
443
440
  if seasonal_features.size == 0
444
- seasonal_features << Daru::DataFrame.new("zeros" => [0] * df.shape[0])
441
+ seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]})
445
442
  prior_scales << 1.0
446
443
  end
447
444
 
@@ -453,16 +450,16 @@ module Prophet
453
450
  end
454
451
 
455
452
  def regressor_column_matrix(seasonal_features, modes)
456
- components = Daru::DataFrame.new(
453
+ components = Rover::DataFrame.new(
457
454
  "col" => seasonal_features.shape[1].times.to_a,
458
- "component" => seasonal_features.vectors.map { |x| x.split("_delim_")[0] }
455
+ "component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] }
459
456
  )
460
457
 
461
- # # Add total for holidays
458
+ # Add total for holidays
462
459
  if @train_holiday_names
463
460
  components = add_group_component(components, "holidays", @train_holiday_names.uniq)
464
461
  end
465
- # # Add totals additive and multiplicative components, and regressors
462
+ # Add totals additive and multiplicative components, and regressors
466
463
  ["additive", "multiplicative"].each do |mode|
467
464
  components = add_group_component(components, mode + "_terms", modes[mode])
468
465
  regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
@@ -473,20 +470,15 @@ module Prophet
473
470
  modes[mode] << mode + "_terms"
474
471
  modes[mode] << "extra_regressors_" + mode
475
472
  end
476
- # # After all of the additive/multiplicative groups have been added,
473
+ # After all of the additive/multiplicative groups have been added,
477
474
  modes[@seasonality_mode] << "holidays"
478
- # # Convert to a binary matrix
479
- component_cols = Daru::DataFrame.crosstab_by_assignation(
480
- components["col"], components["component"], [1] * components.size
481
- )
482
- component_cols.each_vector do |v|
483
- v.map! { |vi| vi.nil? ? 0 : vi }
484
- end
485
- component_cols.rename_vectors(:_id => "col")
475
+ # Convert to a binary matrix
476
+ component_cols = components["col"].crosstab(components["component"])
477
+ component_cols["col"] = component_cols.delete("_")
486
478
 
487
479
  # Add columns for additive and multiplicative terms, if missing
488
480
  ["additive_terms", "multiplicative_terms"].each do |name|
489
- component_cols[name] = 0 unless component_cols.vectors.include?(name)
481
+ component_cols[name] = 0 unless component_cols.include?(name)
490
482
  end
491
483
 
492
484
  # TODO validation
@@ -495,10 +487,10 @@ module Prophet
495
487
  end
496
488
 
497
489
  def add_group_component(components, name, group)
498
- new_comp = components.where(components["component"].in(group)).dup
490
+ new_comp = components[components["component"].in?(group)].dup
499
491
  group_cols = new_comp["col"].uniq
500
492
  if group_cols.size > 0
501
- new_comp = Daru::DataFrame.new("col" => group_cols, "component" => [name] * group_cols.size)
493
+ new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name})
502
494
  components = components.concat(new_comp)
503
495
  end
504
496
  components
@@ -574,8 +566,8 @@ module Prophet
574
566
  end
575
567
 
576
568
  def linear_growth_init(df)
577
- i0 = df["ds"].index.min
578
- i1 = df["ds"].index.max
569
+ i0 = 0
570
+ i1 = df.size - 1
579
571
  t = df["t"][i1] - df["t"][i0]
580
572
  k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
581
573
  m = df["y_scaled"][i0] - k * df["t"][i0]
@@ -583,8 +575,8 @@ module Prophet
583
575
  end
584
576
 
585
577
  def logistic_growth_init(df)
586
- i0 = df["ds"].index.min
587
- i1 = df["ds"].index.max
578
+ i0 = 0
579
+ i1 = df.size - 1
588
580
  t = df["t"][i1] - df["t"][i0]
589
581
 
590
582
  # Force valid values, in case y > cap or y < 0
@@ -613,8 +605,13 @@ module Prophet
613
605
  def fit(df, **kwargs)
614
606
  raise Error, "Prophet object can only be fit once" if @history
615
607
 
616
- history = df.where(!df["y"].in([nil, Float::NAN]))
617
- raise Error, "Data has less than 2 non-nil rows" if history.shape[0] < 2
608
+ if defined?(Daru::DataFrame) && df.is_a?(Daru::DataFrame)
609
+ df = Rover::DataFrame.new(df.to_h)
610
+ end
611
+ raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame)
612
+
613
+ history = df[!df["y"].missing]
614
+ raise Error, "Data has less than 2 non-nil rows" if history.size < 2
618
615
 
619
616
  @history_dates = to_datetime(df["ds"]).sort
620
617
  history = setup_dataframe(history, initialize_scales: true)
@@ -701,10 +698,10 @@ module Prophet
701
698
 
702
699
  # Drop columns except ds, cap, floor, and trend
703
700
  cols = ["ds", "trend"]
704
- cols << "cap" if df.vectors.include?("cap")
701
+ cols << "cap" if df.include?("cap")
705
702
  cols << "floor" if @logistic_floor
706
703
  # Add in forecast components
707
- df2 = df_concat_axis_one([df[*cols], intervals, seasonal_components])
704
+ df2 = df_concat_axis_one([df[cols], intervals, seasonal_components])
708
705
  df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
709
706
  df2
710
707
  end
@@ -739,8 +736,7 @@ module Prophet
739
736
  k_t[indx] += deltas[s]
740
737
  m_t[indx] += gammas[s]
741
738
  end
742
- # need df_values to prevent memory from blowing up
743
- df_values(cap) / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
739
+ cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
744
740
  end
745
741
 
746
742
  def predict_trend(df)
@@ -766,10 +762,10 @@ module Prophet
766
762
  upper_p = 100 * (1.0 + @interval_width) / 2
767
763
  end
768
764
 
769
- x = df_values(seasonal_features)
765
+ x = seasonal_features.to_numo
770
766
  data = {}
771
- component_cols.vectors.each do |component|
772
- beta_c = @params["beta"] * Numo::NArray.asarray(component_cols[component].to_a)
767
+ component_cols.vector_names.each do |component|
768
+ beta_c = @params["beta"] * component_cols[component].to_numo
773
769
 
774
770
  comp = x.dot(beta_c.transpose)
775
771
  if @component_modes["additive"].include?(component)
@@ -777,11 +773,11 @@ module Prophet
777
773
  end
778
774
  data[component] = comp.mean(axis: 1, nan: true)
779
775
  if @uncertainty_samples
780
- data[component + "_lower"] = percentile(comp, lower_p, axis: 1)
781
- data[component + "_upper"] = percentile(comp, upper_p, axis: 1)
776
+ data[component + "_lower"] = comp.percentile(lower_p, axis: 1)
777
+ data[component + "_upper"] = comp.percentile(upper_p, axis: 1)
782
778
  end
783
779
  end
784
- Daru::DataFrame.new(data)
780
+ Rover::DataFrame.new(data)
785
781
  end
786
782
 
787
783
  def sample_posterior_predictive(df)
@@ -792,9 +788,9 @@ module Prophet
792
788
  seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
793
789
 
794
790
  # convert to Numo for performance
795
- seasonal_features = df_values(seasonal_features)
796
- additive_terms = df_values(component_cols["additive_terms"])
797
- multiplicative_terms = df_values(component_cols["multiplicative_terms"])
791
+ seasonal_features = seasonal_features.to_numo
792
+ additive_terms = component_cols["additive_terms"].to_numo
793
+ multiplicative_terms = component_cols["multiplicative_terms"].to_numo
798
794
 
799
795
  sim_values = {"yhat" => [], "trend" => []}
800
796
  n_iterations.times do |i|
@@ -831,11 +827,11 @@ module Prophet
831
827
 
832
828
  series = {}
833
829
  ["yhat", "trend"].each do |key|
834
- series["#{key}_lower"] = percentile(sim_values[key], lower_p, axis: 1)
835
- series["#{key}_upper"] = percentile(sim_values[key], upper_p, axis: 1)
830
+ series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1)
831
+ series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1)
836
832
  end
837
833
 
838
- Daru::DataFrame.new(series)
834
+ Rover::DataFrame.new(series)
839
835
  end
840
836
 
841
837
  def sample_model(df, seasonal_features, iteration, s_a, s_m)
@@ -897,21 +893,6 @@ module Prophet
897
893
  trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
898
894
  end
899
895
 
900
- def percentile(a, percentile, axis:)
901
- raise Error, "Axis must be 1" if axis != 1
902
-
903
- sorted = a.sort(axis: axis)
904
- x = percentile / 100.0 * (sorted.shape[axis] - 1)
905
- r = x % 1
906
- i = x.floor
907
- # this should use axis, but we only need axis: 1
908
- if i == sorted.shape[axis] - 1
909
- sorted[true, -1]
910
- else
911
- sorted[true, i] + r * (sorted[true, i + 1] - sorted[true, i])
912
- end
913
- end
914
-
915
896
  def make_future_dataframe(periods:, freq: "D", include_history: true)
916
897
  raise Error, "Model has not been fit" unless @history_dates
917
898
  last_date = @history_dates.max
@@ -938,48 +919,39 @@ module Prophet
938
919
  end
939
920
  dates.select! { |d| d > last_date }
940
921
  dates = dates.last(periods)
941
- dates = @history_dates + dates if include_history
942
- Daru::DataFrame.new("ds" => dates)
922
+ dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history
923
+ Rover::DataFrame.new({"ds" => dates})
943
924
  end
944
925
 
945
926
  private
946
927
 
947
- # Time is prefer over DateTime Ruby
928
+ # Time is preferred over DateTime in Ruby docs
948
929
  # use UTC to be consistent with Python
949
930
  # and so days have equal length (no DST)
950
931
  def to_datetime(vec)
951
932
  return if vec.nil?
952
- vec.map do |v|
953
- case v
954
- when Time
955
- v.utc
956
- when Date
957
- v.to_datetime.to_time.utc
958
- else
959
- DateTime.parse(v.to_s).to_time.utc
933
+ vec =
934
+ vec.map do |v|
935
+ case v
936
+ when Time
937
+ v.utc
938
+ when Date
939
+ v.to_datetime.to_time.utc
940
+ else
941
+ DateTime.parse(v.to_s).to_time.utc
942
+ end
960
943
  end
961
- end
944
+ Rover::Vector.new(vec)
962
945
  end
963
946
 
964
947
  # okay to do in-place
965
948
  def df_concat_axis_one(dfs)
966
949
  dfs[1..-1].each do |df|
967
- df.each_vector_with_index do |v, k|
968
- dfs[0][k] = v
969
- end
950
+ dfs[0].merge!(df)
970
951
  end
971
952
  dfs[0]
972
953
  end
973
954
 
974
- def df_values(df)
975
- if df.is_a?(Daru::Vector)
976
- Numo::NArray.asarray(df.to_a)
977
- else
978
- # TODO make more performant
979
- Numo::NArray.asarray(df.to_matrix.to_a)
980
- end
981
- end
982
-
983
955
  # https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
984
956
  def poisson(lam)
985
957
  l = Math.exp(-lam)
@@ -994,13 +966,8 @@ module Prophet
994
966
 
995
967
  # https://en.wikipedia.org/wiki/Laplace_distribution#Generating_values_from_the_Laplace_distribution
996
968
  def laplace(loc, scale, size)
997
- u = Numo::DFloat.new(size).rand - 0.5
969
+ u = Numo::DFloat.new(size).rand(-0.5, 0.5)
998
970
  loc - scale * u.sign * Numo::NMath.log(1 - 2 * u.abs)
999
971
  end
1000
-
1001
- def ensure_arr(value)
1002
- value = [value] unless value.is_a?(Array)
1003
- value
1004
- end
1005
972
  end
1006
973
  end
@@ -6,7 +6,7 @@ module Prophet
6
6
  end
7
7
 
8
8
  def make_holidays_df(year_list, country)
9
- holidays_df.where(holidays_df["country"].eq(country) & holidays_df["year"].in(year_list))["ds", "holiday"]
9
+ holidays_df[(holidays_df["country"] == country) & (holidays_df["year"].in?(year_list))][["ds", "holiday"]]
10
10
  end
11
11
 
12
12
  # TODO marshal on installation
@@ -20,7 +20,7 @@ module Prophet
20
20
  holidays["country"] << row["country"]
21
21
  holidays["year"] << row["year"]
22
22
  end
23
- Daru::DataFrame.new(holidays)
23
+ Rover::DataFrame.new(holidays)
24
24
  end
25
25
  end
26
26
  end
data/lib/prophet/plot.rb CHANGED
@@ -8,16 +8,16 @@ module Prophet
8
8
  fig = ax.get_figure
9
9
  end
10
10
  fcst_t = to_pydatetime(fcst["ds"])
11
- ax.plot(to_pydatetime(@history["ds"]), @history["y"].map(&:to_f), "k.")
12
- ax.plot(fcst_t, fcst["yhat"].map(&:to_f), ls: "-", c: "#0072B2")
13
- if fcst.vectors.include?("cap") && plot_cap
14
- ax.plot(fcst_t, fcst["cap"].map(&:to_f), ls: "--", c: "k")
11
+ ax.plot(to_pydatetime(@history["ds"]), @history["y"].to_a, "k.")
12
+ ax.plot(fcst_t, fcst["yhat"].to_a, ls: "-", c: "#0072B2")
13
+ if fcst.include?("cap") && plot_cap
14
+ ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
15
15
  end
16
- if @logistic_floor && fcst.vectors.include?("floor") && plot_cap
17
- ax.plot(fcst_t, fcst["floor"].map(&:to_f), ls: "--", c: "k")
16
+ if @logistic_floor && fcst.include?("floor") && plot_cap
17
+ ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
18
18
  end
19
19
  if uncertainty && @uncertainty_samples
20
- ax.fill_between(fcst_t, fcst["yhat_lower"].map(&:to_f), fcst["yhat_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)
20
+ ax.fill_between(fcst_t, fcst["yhat_lower"].to_a, fcst["yhat_upper"].to_a, color: "#0072B2", alpha: 0.2)
21
21
  end
22
22
  # Specify formatting to workaround matplotlib issue #12925
23
23
  locator = dates.AutoDateLocator.new(interval_multiples: false)
@@ -33,25 +33,25 @@ module Prophet
33
33
 
34
34
  def plot_components(fcst, uncertainty: true, plot_cap: true, weekly_start: 0, yearly_start: 0, figsize: nil)
35
35
  components = ["trend"]
36
- if @train_holiday_names && fcst.vectors.include?("holidays")
36
+ if @train_holiday_names && fcst.include?("holidays")
37
37
  components << "holidays"
38
38
  end
39
39
  # Plot weekly seasonality, if present
40
- if @seasonalities["weekly"] && fcst.vectors.include?("weekly")
40
+ if @seasonalities["weekly"] && fcst.include?("weekly")
41
41
  components << "weekly"
42
42
  end
43
43
  # Yearly if present
44
- if @seasonalities["yearly"] && fcst.vectors.include?("yearly")
44
+ if @seasonalities["yearly"] && fcst.include?("yearly")
45
45
  components << "yearly"
46
46
  end
47
47
  # Other seasonalities
48
- components.concat(@seasonalities.keys.select { |name| fcst.vectors.include?(name) && !["weekly", "yearly"].include?(name) }.sort)
48
+ components.concat(@seasonalities.keys.select { |name| fcst.include?(name) && !["weekly", "yearly"].include?(name) }.sort)
49
49
  regressors = {"additive" => false, "multiplicative" => false}
50
50
  @extra_regressors.each do |name, props|
51
51
  regressors[props[:mode]] = true
52
52
  end
53
53
  ["additive", "multiplicative"].each do |mode|
54
- if regressors[mode] && fcst.vectors.include?("extra_regressors_#{mode}")
54
+ if regressors[mode] && fcst.include?("extra_regressors_#{mode}")
55
55
  components << "extra_regressors_#{mode}"
56
56
  end
57
57
  end
@@ -97,11 +97,11 @@ module Prophet
97
97
  def add_changepoints_to_plot(ax, fcst, threshold: 0.01, cp_color: "r", cp_linestyle: "--", trend: true)
98
98
  artists = []
99
99
  if trend
100
- artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].map(&:to_f), c: cp_color)
100
+ artists << ax.plot(to_pydatetime(fcst["ds"]), fcst["trend"].to_a, c: cp_color)
101
101
  end
102
102
  signif_changepoints =
103
103
  if @changepoints.size > 0
104
- (@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints)
104
+ (@params["delta"].mean(axis: 0, nan: true).abs >= threshold).mask(@changepoints.to_numo)
105
105
  else
106
106
  []
107
107
  end
@@ -120,15 +120,15 @@ module Prophet
120
120
  ax = fig.add_subplot(111)
121
121
  end
122
122
  fcst_t = to_pydatetime(fcst["ds"])
123
- artists += ax.plot(fcst_t, fcst[name].map(&:to_f), ls: "-", c: "#0072B2")
124
- if fcst.vectors.include?("cap") && plot_cap
125
- artists += ax.plot(fcst_t, fcst["cap"].map(&:to_f), ls: "--", c: "k")
123
+ artists += ax.plot(fcst_t, fcst[name].to_a, ls: "-", c: "#0072B2")
124
+ if fcst.include?("cap") && plot_cap
125
+ artists += ax.plot(fcst_t, fcst["cap"].to_a, ls: "--", c: "k")
126
126
  end
127
- if @logistic_floor && fcst.vectors.include?("floor") && plot_cap
128
- ax.plot(fcst_t, fcst["floor"].map(&:to_f), ls: "--", c: "k")
127
+ if @logistic_floor && fcst.include?("floor") && plot_cap
128
+ ax.plot(fcst_t, fcst["floor"].to_a, ls: "--", c: "k")
129
129
  end
130
130
  if uncertainty && @uncertainty_samples
131
- artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].map(&:to_f), fcst[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
131
+ artists += [ax.fill_between(fcst_t, fcst[name + "_lower"].to_a, fcst[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
132
132
  end
133
133
  # Specify formatting to workaround matplotlib issue #12925
134
134
  locator = dates.AutoDateLocator.new(interval_multiples: false)
@@ -145,17 +145,17 @@ module Prophet
145
145
  end
146
146
 
147
147
  def seasonality_plot_df(ds)
148
- df_dict = {"ds" => ds, "cap" => [1.0] * ds.size, "floor" => [0.0] * ds.size}
148
+ df_dict = {"ds" => ds, "cap" => 1.0, "floor" => 0.0}
149
149
  @extra_regressors.each_key do |name|
150
- df_dict[name] = [0.0] * ds.size
150
+ df_dict[name] = 0.0
151
151
  end
152
152
  # Activate all conditional seasonality columns
153
153
  @seasonalities.values.each do |props|
154
154
  if props[:condition_name]
155
- df_dict[props[:condition_name]] = [true] * ds.size
155
+ df_dict[props[:condition_name]] = true
156
156
  end
157
157
  end
158
- df = Daru::DataFrame.new(df_dict)
158
+ df = Rover::DataFrame.new(df_dict)
159
159
  df = setup_dataframe(df)
160
160
  df
161
161
  end
@@ -172,9 +172,9 @@ module Prophet
172
172
  df_w = seasonality_plot_df(days)
173
173
  seas = predict_seasonal_components(df_w)
174
174
  days = days.map { |v| v.strftime("%A") }
175
- artists += ax.plot(days.size.times.to_a, seas[name].map(&:to_f), ls: "-", c: "#0072B2")
175
+ artists += ax.plot(days.size.times.to_a, seas[name].to_a, ls: "-", c: "#0072B2")
176
176
  if uncertainty && @uncertainty_samples
177
- artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].map(&:to_f), seas[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
177
+ artists += [ax.fill_between(days.size.times.to_a, seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
178
178
  end
179
179
  ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
180
180
  ax.set_xticks(days.size.times.to_a)
@@ -198,9 +198,9 @@ module Prophet
198
198
  days = 365.times.map { |i| start + i + yearly_start }
199
199
  df_y = seasonality_plot_df(days)
200
200
  seas = predict_seasonal_components(df_y)
201
- artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].map(&:to_f), ls: "-", c: "#0072B2")
201
+ artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
202
202
  if uncertainty && @uncertainty_samples
203
- artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].map(&:to_f), seas[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
203
+ artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
204
204
  end
205
205
  ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
206
206
  months = dates.MonthLocator.new((1..12).to_a, bymonthday: 1, interval: 2)
@@ -231,9 +231,9 @@ module Prophet
231
231
  days = plot_points.times.map { |i| Time.at(start + i * step).utc }
232
232
  df_y = seasonality_plot_df(days)
233
233
  seas = predict_seasonal_components(df_y)
234
- artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].map(&:to_f), ls: "-", c: "#0072B2")
234
+ artists += ax.plot(to_pydatetime(df_y["ds"]), seas[name].to_a, ls: "-", c: "#0072B2")
235
235
  if uncertainty && @uncertainty_samples
236
- artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].map(&:to_f), seas[name + "_upper"].map(&:to_f), color: "#0072B2", alpha: 0.2)]
236
+ artists += [ax.fill_between(to_pydatetime(df_y["ds"]), seas[name + "_lower"].to_a, seas[name + "_upper"].to_a, color: "#0072B2", alpha: 0.2)]
237
237
  end
238
238
  ax.grid(true, which: "major", c: "gray", ls: "-", lw: 1, alpha: 0.2)
239
239
  step = (finish - start) / (7 - 1).to_f
@@ -281,7 +281,7 @@ module Prophet
281
281
 
282
282
  def to_pydatetime(v)
283
283
  datetime = PyCall.import_module("datetime")
284
- v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }
284
+ v.map { |v| datetime.datetime.utcfromtimestamp(v.to_i) }.to_a
285
285
  end
286
286
  end
287
287
  end
@@ -127,7 +127,7 @@ module Prophet
127
127
  stan_data["t_change"] = stan_data["t_change"].to_a
128
128
  stan_data["s_a"] = stan_data["s_a"].to_a
129
129
  stan_data["s_m"] = stan_data["s_m"].to_a
130
- stan_data["X"] = stan_data["X"].to_matrix.to_a
130
+ stan_data["X"] = stan_data["X"].to_numo.to_a
131
131
  stan_init["delta"] = stan_init["delta"].to_a
132
132
  stan_init["beta"] = stan_init["beta"].to_a
133
133
  [stan_init, stan_data]
@@ -1,3 +1,3 @@
1
1
  module Prophet
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/prophet.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # dependencies
2
2
  require "cmdstan"
3
- require "daru"
3
+ require "rover"
4
4
  require "numo/narray"
5
5
 
6
6
  # stdlib
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prophet-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-10 00:00:00.000000000 Z
11
+ date: 2020-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdstan
@@ -25,21 +25,21 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.1.2
27
27
  - !ruby/object:Gem::Dependency
28
- name: daru
28
+ name: numo-narray
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 0.9.1.7
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 0.9.1.7
41
41
  - !ruby/object:Gem::Dependency
42
- name: numo-narray
42
+ name: rover-df
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -95,7 +95,7 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '5'
97
97
  - !ruby/object:Gem::Dependency
98
- name: matplotlib
98
+ name: daru
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -109,7 +109,7 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: ruby-prof
112
+ name: matplotlib
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - ">="