google-meridian 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,7 @@ from meridian import constants as c
28
28
  from meridian.analysis import analyzer
29
29
  from meridian.analysis import formatter
30
30
  from meridian.analysis import summary_text
31
+ from meridian.data import time_coordinates as tc
31
32
  from meridian.model import model
32
33
  import numpy as np
33
34
  import pandas as pd
@@ -37,6 +38,7 @@ import xarray as xr
37
38
 
38
39
  __all__ = [
39
40
  'BudgetOptimizer',
41
+ 'OptimizationGrid',
40
42
  'OptimizationResults',
41
43
  ]
42
44
 
@@ -92,10 +94,14 @@ class OptimizationGrid:
92
94
  Attributes:
93
95
  historical_spend: ndarray of shape `(n_paid_channels,)` containing
94
96
  aggregated historical spend allocation for spend for all media and RF
95
- channels. The order matches `InputData.get_all_paid_channels`.
97
+ channels.
96
98
  use_kpi: Whether using generic KPI or revenue.
97
99
  use_posterior: Whether posterior distributions were used, or prior.
98
100
  use_optimal_frequency: Whether optimal frequency was used.
101
+ gtol: Float indicating the acceptable relative error for the budget used in
102
+ the grid setup. The budget is rounded by `10*n`, where `n` is the smallest
103
+ integer such that `(budget - rounded_budget)` is less than or equal to
104
+ `(budget * gtol)`.
99
105
  round_factor: The round factor used for the optimization grid.
100
106
  optimal_frequency: Optional ndarray of shape `(n_paid_channels,)`,
101
107
  containing the optimal frequency per channel. Value is `None` if the model
@@ -111,9 +117,10 @@ class OptimizationGrid:
111
117
  use_kpi: bool
112
118
  use_posterior: bool
113
119
  use_optimal_frequency: bool
120
+ gtol: float
114
121
  round_factor: int
115
122
  optimal_frequency: np.ndarray | None
116
- selected_times: list[str] | None
123
+ selected_times: Sequence[str] | Sequence[bool] | None
117
124
 
118
125
  @property
119
126
  def grid_dataset(self) -> xr.Dataset:
@@ -142,35 +149,149 @@ class OptimizationGrid:
142
149
  """The spend step size."""
143
150
  return self.grid_dataset.attrs[c.SPEND_STEP_SIZE]
144
151
 
145
- # TODO: b/402950014 - Add per-channel constraints parameter.
152
+ @property
153
+ def channels(self) -> list[str]:
154
+ """The spend channels in the grid."""
155
+ return self.grid_dataset.channel.data.tolist()
156
+
146
157
  def optimize(
147
158
  self,
148
159
  scenario: FixedBudgetScenario | FlexibleBudgetScenario,
160
+ pct_of_spend: Sequence[float] | None = None,
161
+ spend_constraint_lower: _SpendConstraint | None = None,
162
+ spend_constraint_upper: _SpendConstraint | None = None,
163
+ ) -> xr.Dataset:
164
+ """Finds the optimal budget allocation that maximizes outcome.
165
+
166
+ Args:
167
+ scenario: The optimization scenario with corresponding parameters.
168
+ pct_of_spend: Numeric list of size `channels` containing the percentage
169
+ allocation for spend for all channels. The values must be between 0-1,
170
+ summing to 1. By default, the historical allocation is used. Budget and
171
+ allocation are used in conjunction to determine the non-optimized
172
+ media-level spend, which is used to calculate the non-optimized
173
+ performance metrics (for example, ROI) and construct the feasible range
174
+ of media-level spend with the spend constraints.
175
+ spend_constraint_lower: Numeric list of size `channels` or float (same
176
+ constraint for all channels) indicating the lower bound of media-level
177
+ spend. If given as a channel-indexed array, the order must match
178
+ `channels`. The lower bound of media-level spend is `(1 -
179
+ spend_constraint_lower) * budget * allocation)`. The value must be
180
+ between 0-1. Defaults to `0.3` for fixed budget and `1` for flexible.
181
+ spend_constraint_upper: Numeric list of size `channels` or float (same
182
+ constraint for all channels) indicating the upper bound of media-level
183
+ spend. If given as a channel-indexed array, the order must match
184
+ `channels`. The upper bound of media-level spend is `(1 +
185
+ spend_constraint_upper) * budget * allocation)`. Defaults to `0.3` for
186
+ fixed budget and `1` for flexible.
187
+
188
+ Returns:
189
+ An xarray Dataset with `channel` as the coordinate and the following data
190
+ variables:
191
+ * `optimized`: media spend that maximizes incremental outcome based
192
+ on spend constraints for all media and RF channels.
193
+ * `non_optimized`: Channel-level spend.
194
+
195
+ Raises:
196
+ A warning if the budget's rounding should be different from the grid's
197
+ round factor.'.
198
+ ValueError: If spend allocation is not within the grid coverage.
199
+ """
200
+ total_budget = (
201
+ scenario.total_budget
202
+ if isinstance(scenario, FixedBudgetScenario)
203
+ else None
204
+ )
205
+ budget = total_budget or np.sum(self.historical_spend)
206
+ valid_pct_of_spend = _validate_pct_of_spend(
207
+ n_channels=len(self.channels),
208
+ hist_spend=self.historical_spend,
209
+ pct_of_spend=pct_of_spend,
210
+ )
211
+ spend = budget * valid_pct_of_spend
212
+ spend_constraint_default = (
213
+ c.SPEND_CONSTRAINT_DEFAULT_FIXED_BUDGET
214
+ if isinstance(scenario, FixedBudgetScenario)
215
+ else c.SPEND_CONSTRAINT_DEFAULT_FLEXIBLE_BUDGET
216
+ )
217
+ if spend_constraint_lower is None:
218
+ spend_constraint_lower = spend_constraint_default
219
+ if spend_constraint_upper is None:
220
+ spend_constraint_upper = spend_constraint_default
221
+ (optimization_lower_bound, optimization_upper_bound) = (
222
+ _get_optimization_bounds(
223
+ n_channels=len(self.channels),
224
+ spend=spend,
225
+ round_factor=self.round_factor,
226
+ spend_constraint_lower=spend_constraint_lower,
227
+ spend_constraint_upper=spend_constraint_upper,
228
+ )
229
+ )
230
+ self._check_optimization_bounds(
231
+ lower_bound=optimization_lower_bound,
232
+ upper_bound=optimization_upper_bound,
233
+ )
234
+ round_factor = _get_round_factor(budget, self.gtol)
235
+ if round_factor != self.round_factor:
236
+ warnings.warn(
237
+ 'Optimization accuracy may suffer owing to budget level differences.'
238
+ ' Consider creating a new grid with smaller `gtol` if you intend to'
239
+ " shrink budgets significantly. It's only a problem when you use a"
240
+ ' smaller budget, for which the intended step size is meant to be'
241
+ ' smaller for one or more channels.'
242
+ )
243
+ (spend_grid, incremental_outcome_grid) = self._trim_grid(
244
+ spend_bound_lower=optimization_lower_bound,
245
+ spend_bound_upper=optimization_upper_bound,
246
+ )
247
+ if isinstance(scenario, FixedBudgetScenario):
248
+ rounded_spend = np.round(spend, self.round_factor)
249
+ scenario = dataclasses.replace(
250
+ scenario, total_budget=np.sum(rounded_spend)
251
+ )
252
+ optimal_spend = self._grid_search(
253
+ spend_grid=spend_grid,
254
+ incremental_outcome_grid=incremental_outcome_grid,
255
+ scenario=scenario,
256
+ )
257
+
258
+ return xr.Dataset(
259
+ coords={c.CHANNEL: self.channels},
260
+ data_vars={
261
+ c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
262
+ c.NON_OPTIMIZED: ([c.CHANNEL], spend),
263
+ },
264
+ )
265
+
266
+ def _grid_search(
267
+ self,
268
+ spend_grid: np.ndarray,
269
+ incremental_outcome_grid: np.ndarray,
270
+ scenario: FixedBudgetScenario | FlexibleBudgetScenario,
149
271
  ) -> np.ndarray:
150
272
  """Hill-climbing search algorithm for budget optimization.
151
273
 
152
274
  Args:
275
+ spend_grid: Discrete grid with dimensions (`grid_length` x
276
+ `n_total_channels`) containing spend by channel for all media and RF
277
+ channels, used in the hill-climbing search algorithm.
278
+ incremental_outcome_grid: Discrete grid with dimensions (`grid_length` x
279
+ `n_total_channels`) containing incremental outcome by channel for all
280
+ media and RF channels, used in the hill-climbing search algorithm.
153
281
  scenario: The optimization scenario with corresponding parameters.
154
282
 
155
283
  Returns:
156
- optimal_spend: `np.ndarray` with shape `(n_paid_channels,)` containing the
157
- media spend that maximizes incremental outcome based on spend
284
+ optimal_spend: `np.ndarray` of dimension (`n_total_channels`) containing
285
+ the media spend that maximizes incremental outcome based on spend
158
286
  constraints for all media and RF channels.
287
+ optimal_inc_outcome: `np.ndarray` of dimension (`n_total_channels`)
288
+ containing the post optimization incremental outcome per channel for all
289
+ media and RF channels.
159
290
  """
160
- if (
161
- isinstance(scenario, FixedBudgetScenario)
162
- and scenario.total_budget is None
163
- ):
164
- rounded_spend = np.round(self.historical_spend, self.round_factor).astype(
165
- int
166
- )
167
- budget = np.sum(rounded_spend)
168
- scenario = dataclasses.replace(scenario, total_budget=budget)
169
-
170
- spend = self.spend_grid[0, :].copy()
171
- incremental_outcome = self.incremental_outcome_grid[0, :].copy()
172
- spend_grid = self.spend_grid[1:, :]
173
- incremental_outcome_grid = self.incremental_outcome_grid[1:, :]
291
+ spend = spend_grid[0, :].copy()
292
+ incremental_outcome = incremental_outcome_grid[0, :].copy()
293
+ spend_grid = spend_grid[1:, :]
294
+ incremental_outcome_grid = incremental_outcome_grid[1:, :]
174
295
  iterative_roi_grid = np.round(
175
296
  tf.math.divide_no_nan(
176
297
  incremental_outcome_grid - incremental_outcome, spend_grid - spend
@@ -211,9 +332,97 @@ class OptimizationGrid:
211
332
  ),
212
333
  decimals=8,
213
334
  )
214
-
215
335
  return spend_optimal
216
336
 
337
+ def _trim_grid(
338
+ self,
339
+ spend_bound_lower: np.ndarray,
340
+ spend_bound_upper: np.ndarray,
341
+ ) -> tuple[np.ndarray, np.ndarray]:
342
+ """Trim the grids based on a more restricted spend bound.
343
+
344
+ It is assumed that spend bounds are validated: their values are within the
345
+ grid coverage and they are rounded using this grid's round factor.
346
+
347
+ Args:
348
+ spend_bound_lower: The lower bound of spend for each channel.
349
+ spend_bound_upper: The upper bound of spend for each channel.
350
+
351
+ Returns:
352
+ updated_spend: The updated spend grid with valid spend values moved up to
353
+ the first row and invalid spend values filled with NaN.
354
+ updated_incremental_outcome: The updated incremental outcome grid with the
355
+ corresponding incremental outcome values moved up to the first row and
356
+ invalid incremental outcome values filled with NaN.
357
+ """
358
+ spend_grid = self.spend_grid
359
+ updated_spend = self.spend_grid.copy()
360
+ updated_incremental_outcome = self.incremental_outcome_grid.copy()
361
+
362
+ for ch in range(len(self.channels)):
363
+ valid_indices = np.where(
364
+ (spend_grid[:, ch] >= spend_bound_lower[ch])
365
+ & (spend_grid[:, ch] <= spend_bound_upper[ch])
366
+ )[0]
367
+ first_valid_index = valid_indices[0]
368
+ last_valid_index = valid_indices[-1]
369
+
370
+ # Move the smallest spend to the first row.
371
+ updated_spend[:, ch] = np.roll(
372
+ updated_spend[:, ch], shift=-first_valid_index
373
+ )
374
+ # Move the corresponding incremental outcome to the first row.
375
+ updated_incremental_outcome[:, ch] = np.roll(
376
+ updated_incremental_outcome[:, ch], shift=-first_valid_index
377
+ )
378
+
379
+ # Fill the invalid indices with NaN.
380
+ nan_indices = last_valid_index - first_valid_index + 1
381
+ updated_spend[nan_indices:, ch] = np.nan
382
+ updated_incremental_outcome[nan_indices:, ch] = np.nan
383
+
384
+ return (updated_spend, updated_incremental_outcome)
385
+
386
+ def _check_optimization_bounds(
387
+ self,
388
+ lower_bound: np.ndarray,
389
+ upper_bound: np.ndarray,
390
+ ) -> None:
391
+ """Checks if the spend grid fits within the optimization bounds.
392
+
393
+ Args:
394
+ lower_bound: `np.ndarray` of shape `(n_channels,)` containing the lower
395
+ bound for each channel.
396
+ upper_bound: `np.ndarray` of shape `(n_channels,)` containing the upper
397
+ bound for each channel.
398
+
399
+ Raises:
400
+ ValueError: If the spend grid does not fit within the optimization bounds.
401
+ """
402
+ min_spend = np.min(self.spend_grid, axis=0)
403
+ max_spend = np.max(self.spend_grid, axis=0)
404
+ errors = []
405
+ for i, channel_min_spend in enumerate(min_spend.data):
406
+ if lower_bound[i] < channel_min_spend:
407
+ errors.append(
408
+ f'Lower bound {lower_bound[i]} for channel'
409
+ f' {self.channels[i]} is below the mimimum spend of the grid'
410
+ f' {channel_min_spend}.'
411
+ )
412
+ for i, channel_max_spend in enumerate(max_spend.data):
413
+ if upper_bound[i] > channel_max_spend:
414
+ errors.append(
415
+ f'Upper bound {upper_bound[i]} for channel'
416
+ f' {self.channels[i]} is above the maximum spend of the grid'
417
+ f' {channel_max_spend}.'
418
+ )
419
+
420
+ if errors:
421
+ raise ValueError(
422
+ 'Spend allocation is not within the grid coverage:\n'
423
+ + '\n'.join(errors)
424
+ )
425
+
217
426
 
218
427
  @dataclasses.dataclass(frozen=True)
219
428
  class OptimizationResults:
@@ -413,7 +622,7 @@ class OptimizationResults:
413
622
  # by adjusting the domain of the y-axis so that the incremental outcome does
414
623
  # not start at 0. Calculate the total decrease in incremental outcome to pad
415
624
  # the y-axis from the non-optimized total incremental outcome value.
416
- sum_decr = sum(df[df.incremental_outcome < 0].incremental_outcome)
625
+ sum_decr = df[df.incremental_outcome < 0].incremental_outcome.sum()
417
626
  y_padding = float(f'1e{int(math.log10(-sum_decr))}') if sum_decr < 0 else 2
418
627
  domain_scale = [
419
628
  self.nonoptimized_data.total_incremental_outcome + sum_decr - y_padding,
@@ -490,7 +699,7 @@ class OptimizationResults:
490
699
  title=formatter.custom_title_params(
491
700
  summary_text.SPEND_ALLOCATION_CHART_TITLE
492
701
  ),
493
- width=c.VEGALITE_FACET_DEFAULT_WIDTH
702
+ width=c.VEGALITE_FACET_DEFAULT_WIDTH,
494
703
  )
495
704
  )
496
705
 
@@ -698,6 +907,7 @@ class OptimizationResults:
698
907
  use_posterior=self.optimization_grid.use_posterior,
699
908
  selected_times=selected_times,
700
909
  by_reach=True,
910
+ use_kpi=not self.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
701
911
  use_optimal_frequency=self.optimization_grid.use_optimal_frequency,
702
912
  )
703
913
 
@@ -807,8 +1017,16 @@ class OptimizationResults:
807
1017
 
808
1018
  def _gen_optimization_summary(self) -> str:
809
1019
  """Generates HTML optimization summary output (as sanitized content str)."""
810
- self.template_env.globals[c.START_DATE] = self.optimized_data.start_date
811
- self.template_env.globals[c.END_DATE] = self.optimized_data.end_date
1020
+ start_date = tc.normalize_date(self.optimized_data.start_date)
1021
+ self.template_env.globals[c.START_DATE] = start_date.strftime(
1022
+ f'%b {start_date.day}, %Y'
1023
+ )
1024
+ interval_days = self.meridian.input_data.time_coordinates.interval_days
1025
+ end_date = tc.normalize_date(self.optimized_data.end_date)
1026
+ end_date_adjusted = end_date + pd.Timedelta(days=interval_days)
1027
+ self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
1028
+ f'%b {end_date_adjusted.day}, %Y'
1029
+ )
812
1030
 
813
1031
  html_template = self.template_env.get_template('summary.html.jinja')
814
1032
  return html_template.render(
@@ -1056,6 +1274,7 @@ class BudgetOptimizer:
1056
1274
 
1057
1275
  def optimize(
1058
1276
  self,
1277
+ new_data: analyzer.DataTensors | None = None,
1059
1278
  use_posterior: bool = True,
1060
1279
  selected_times: tuple[str | None, str | None] | None = None,
1061
1280
  fixed_budget: bool = True,
@@ -1073,18 +1292,50 @@ class BudgetOptimizer:
1073
1292
  ) -> OptimizationResults:
1074
1293
  """Finds the optimal budget allocation that maximizes outcome.
1075
1294
 
1076
- Outcome is typically revenue, but when the KPI is not revenue and "revenue
1077
- per KPI" data is not available, then Meridian defines the Outcome to be the
1078
- KPI itself.
1295
+ Optimization depends on the following:
1296
+ 1. Flighting pattern (the relative allocation of a channels' media units
1297
+ across geos and time periods, which is held fixed for each channel)
1298
+ 2. Cost per media unit (This is assumed to be constant for each channel, and
1299
+ can optionally vary by geo and/or time period)
1300
+ 3. `pct_of_spend` (center of the spend box constraint for each channel)
1301
+ 4. `budget` (total budget used for fixed budget scenarios)
1302
+
1303
+ By default, these values are assigned based on the historical data. The
1304
+ `pct_of_spend` and `budget` are optimization arguments that can be
1305
+ overridden directly. Passing `new_data.media` (or `new_data.reach` or
1306
+ `new_data.frequency`) will override both the flighting pattern and cost per
1307
+ media unit. Passing `new_data.spend` (or `new_data.rf_spend) will only
1308
+ override the cost per media unit.
1309
+
1310
+ If `new_data` is passed with a different number of time periods than the
1311
+ historical data, then all of the optimization parameters will be inferred
1312
+ from it. Default values for `pct_of_spend` and `budget` (if
1313
+ `fixed_budget=True`) will be inferred from the `new_data`, but can be
1314
+ overridden using the `pct_of_spend` and `budget` arguments.
1315
+
1316
+ If `selected_times` is specified, then the default values are inferred based
1317
+ on the subset of time periods specified.
1079
1318
 
1080
1319
  Args:
1320
+ new_data: An optional `DataTensors` container with optional tensors:
1321
+ `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
1322
+ `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
1323
+ Meridian object are used. If `new_data` is provided, the optimization is
1324
+ run on the versions of the tensors in `new_data` and the original
1325
+ versions of all the remaining tensors. If any of the tensors in
1326
+ `new_data` is provided with a different number of time periods than in
1327
+ `InputData`, then all tensors must be provided with the same number of
1328
+ time periods and the `time` tensor must be provided.
1081
1329
  use_posterior: Boolean. If `True`, then the budget is optimized based on
1082
1330
  the posterior distribution of the model. Otherwise, the prior
1083
1331
  distribution is used.
1084
1332
  selected_times: Tuple containing the start and end time dimension
1085
1333
  coordinates for the duration to run the optimization on. Selected time
1086
1334
  values should align with the Meridian time dimension coordinates in the
1087
- underlying model. By default, all times periods are used. Either start
1335
+ underlying model if optimizing the original data. If `new_data` is
1336
+ provided with a different number of time periods than in `InputData`,
1337
+ then the start and end time coordinates must match the time dimensions
1338
+ in `new_data.time`. By default, all times periods are used. Either start
1088
1339
  or end time component can be `None` to represent the first or the last
1089
1340
  time coordinate, respectively.
1090
1341
  fixed_budget: Boolean indicating whether it's a fixed budget optimization
@@ -1101,7 +1352,7 @@ class BudgetOptimizer:
1101
1352
  performance metrics (for example, ROI) and construct the feasible range
1102
1353
  of media-level spend with the spend constraints. Consider using
1103
1354
  `InputData.get_paid_channels_argument_builder()` to construct this
1104
- argument.
1355
+ argument. If using `new_data`, this argument is ignored.
1105
1356
  spend_constraint_lower: Numeric list of size `n_paid_channels` or float
1106
1357
  (same constraint for all channels) indicating the lower bound of
1107
1358
  media-level spend. If given as a channel-indexed array, the order must
@@ -1149,66 +1400,30 @@ class BudgetOptimizer:
1149
1400
  target_roi=target_roi,
1150
1401
  target_mroi=target_mroi,
1151
1402
  )
1152
-
1153
- if selected_times is not None:
1154
- start_date, end_date = selected_times
1155
- selected_time_dims = self._meridian.expand_selected_time_dims(
1156
- start_date=start_date,
1157
- end_date=end_date,
1158
- )
1159
- else:
1160
- selected_time_dims = None
1161
- hist_spend = self._analyzer.get_historical_spend(
1162
- selected_time_dims,
1163
- include_media=self._meridian.n_media_channels > 0,
1164
- include_rf=self._meridian.n_rf_channels > 0,
1165
- ).data
1166
-
1167
- use_historical_budget = budget is None or round(budget) == round(
1168
- np.sum(hist_spend)
1169
- )
1170
- budget = budget or np.sum(hist_spend)
1171
- pct_of_spend = self._validate_pct_of_spend(hist_spend, pct_of_spend)
1172
- spend = budget * pct_of_spend
1173
- round_factor = _get_round_factor(budget, gtol)
1174
- rounded_spend = np.round(spend, round_factor).astype(int)
1175
- if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
1176
- optimal_frequency = tf.convert_to_tensor(
1177
- self._analyzer.optimal_freq(
1178
- use_posterior=use_posterior,
1179
- selected_times=selected_time_dims,
1180
- use_kpi=use_kpi,
1181
- ).optimal_frequency,
1182
- dtype=tf.float32,
1183
- )
1184
- else:
1185
- optimal_frequency = None
1186
-
1187
- (optimization_lower_bound, optimization_upper_bound, spend_bounds) = (
1188
- self._get_optimization_bounds(
1189
- spend=rounded_spend,
1190
- spend_constraint_lower=spend_constraint_lower,
1191
- spend_constraint_upper=spend_constraint_upper,
1192
- round_factor=round_factor,
1193
- fixed_budget=fixed_budget,
1194
- )
1403
+ spend_constraint_default = (
1404
+ c.SPEND_CONSTRAINT_DEFAULT_FIXED_BUDGET
1405
+ if fixed_budget
1406
+ else c.SPEND_CONSTRAINT_DEFAULT_FLEXIBLE_BUDGET
1195
1407
  )
1408
+ if spend_constraint_lower is None:
1409
+ spend_constraint_lower = spend_constraint_default
1410
+ if spend_constraint_upper is None:
1411
+ spend_constraint_upper = spend_constraint_default
1196
1412
  optimization_grid = self.create_optimization_grid(
1197
- historical_spend=hist_spend,
1198
- spend_bound_lower=optimization_lower_bound,
1199
- spend_bound_upper=optimization_upper_bound,
1200
- selected_times=selected_time_dims,
1201
- round_factor=round_factor,
1413
+ new_data=new_data,
1414
+ selected_times=selected_times,
1415
+ budget=budget,
1416
+ pct_of_spend=pct_of_spend,
1417
+ spend_constraint_lower=spend_constraint_lower,
1418
+ spend_constraint_upper=spend_constraint_upper,
1419
+ gtol=gtol,
1202
1420
  use_posterior=use_posterior,
1203
1421
  use_kpi=use_kpi,
1204
1422
  use_optimal_frequency=use_optimal_frequency,
1205
- optimal_frequency=optimal_frequency,
1206
1423
  batch_size=batch_size,
1207
1424
  )
1208
-
1209
1425
  if fixed_budget:
1210
- total_budget = None if use_historical_budget else np.sum(rounded_spend)
1211
- scenario = FixedBudgetScenario(total_budget=total_budget)
1426
+ scenario = FixedBudgetScenario(total_budget=budget)
1212
1427
  elif target_roi:
1213
1428
  scenario = FlexibleBudgetScenario(
1214
1429
  target_metric=c.ROI, target_value=target_roi
@@ -1217,27 +1432,38 @@ class BudgetOptimizer:
1217
1432
  scenario = FlexibleBudgetScenario(
1218
1433
  target_metric=c.MROI, target_value=target_mroi
1219
1434
  )
1220
-
1221
- optimal_spend = optimization_grid.optimize(
1435
+ spend = optimization_grid.optimize(
1222
1436
  scenario=scenario,
1437
+ pct_of_spend=pct_of_spend,
1438
+ spend_constraint_lower=spend_constraint_lower,
1439
+ spend_constraint_upper=spend_constraint_upper,
1440
+ )
1441
+
1442
+ use_historical_budget = budget is None or np.isclose(
1443
+ budget, np.sum(optimization_grid.historical_spend)
1223
1444
  )
1445
+ rounded_spend = np.round(
1446
+ spend.non_optimized, optimization_grid.round_factor
1447
+ ).astype(int)
1224
1448
  nonoptimized_data = self._create_budget_dataset(
1449
+ new_data=new_data,
1225
1450
  use_posterior=use_posterior,
1226
1451
  use_kpi=use_kpi,
1227
- hist_spend=hist_spend,
1452
+ hist_spend=optimization_grid.historical_spend,
1228
1453
  spend=rounded_spend,
1229
- selected_times=selected_time_dims,
1454
+ selected_times=optimization_grid.selected_times,
1230
1455
  confidence_level=confidence_level,
1231
1456
  batch_size=batch_size,
1232
1457
  use_historical_budget=use_historical_budget,
1233
1458
  )
1234
1459
  nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
1460
+ new_data=new_data,
1235
1461
  use_posterior=use_posterior,
1236
1462
  use_kpi=use_kpi,
1237
- hist_spend=hist_spend,
1463
+ hist_spend=optimization_grid.historical_spend,
1238
1464
  spend=rounded_spend,
1239
- selected_times=selected_time_dims,
1240
- optimal_frequency=optimal_frequency,
1465
+ selected_times=optimization_grid.selected_times,
1466
+ optimal_frequency=optimization_grid.optimal_frequency,
1241
1467
  confidence_level=confidence_level,
1242
1468
  batch_size=batch_size,
1243
1469
  use_historical_budget=use_historical_budget,
@@ -1250,12 +1476,13 @@ class BudgetOptimizer:
1250
1476
  elif target_mroi:
1251
1477
  constraints[c.TARGET_MROI] = target_mroi
1252
1478
  optimized_data = self._create_budget_dataset(
1479
+ new_data=new_data,
1253
1480
  use_posterior=use_posterior,
1254
1481
  use_kpi=use_kpi,
1255
- hist_spend=hist_spend,
1256
- spend=optimal_spend,
1257
- selected_times=selected_time_dims,
1258
- optimal_frequency=optimal_frequency,
1482
+ hist_spend=optimization_grid.historical_spend,
1483
+ spend=spend.optimized,
1484
+ selected_times=optimization_grid.selected_times,
1485
+ optimal_frequency=optimization_grid.optimal_frequency,
1259
1486
  attrs=constraints,
1260
1487
  confidence_level=confidence_level,
1261
1488
  batch_size=batch_size,
@@ -1263,17 +1490,23 @@ class BudgetOptimizer:
1263
1490
  )
1264
1491
 
1265
1492
  if not fixed_budget:
1266
- self._raise_warning_if_target_constraints_not_met(
1493
+ _raise_warning_if_target_constraints_not_met(
1267
1494
  target_roi=target_roi,
1268
1495
  target_mroi=target_mroi,
1269
1496
  optimized_data=optimized_data,
1270
1497
  )
1271
1498
 
1272
1499
  spend_ratio = np.divide(
1273
- spend,
1274
- hist_spend,
1275
- out=np.zeros_like(hist_spend, dtype=float),
1276
- where=hist_spend != 0,
1500
+ spend.non_optimized,
1501
+ optimization_grid.historical_spend,
1502
+ out=np.zeros_like(optimization_grid.historical_spend, dtype=float),
1503
+ where=optimization_grid.historical_spend != 0,
1504
+ )
1505
+ n_paid_channels = len(self._meridian.input_data.get_all_paid_channels())
1506
+ spend_bounds = _get_spend_bounds(
1507
+ n_channels=n_paid_channels,
1508
+ spend_constraint_lower=spend_constraint_lower,
1509
+ spend_constraint_upper=spend_constraint_upper,
1277
1510
  )
1278
1511
 
1279
1512
  return OptimizationResults(
@@ -1287,71 +1520,81 @@ class BudgetOptimizer:
1287
1520
  _optimization_grid=optimization_grid,
1288
1521
  )
1289
1522
 
1290
- def _raise_warning_if_target_constraints_not_met(
1291
- self,
1292
- target_roi: float | None,
1293
- target_mroi: float | None,
1294
- optimized_data: xr.Dataset,
1295
- ) -> None:
1296
- """Raises a warning if the target constraints are not met."""
1297
- if target_roi:
1298
- # Total ROI is a scalar value.
1299
- optimized_roi = optimized_data.attrs[c.TOTAL_ROI]
1300
- if optimized_roi < target_roi:
1301
- warnings.warn(
1302
- f'Target ROI constraint was not met. The target ROI is {target_roi}'
1303
- f', but the actual ROI is {optimized_roi}.'
1304
- )
1305
- elif target_mroi:
1306
- # Compare each channel's marginal ROI to the target.
1307
- # optimized_data[c.MROI] is an array of shape (n_channels, 4), where the
1308
- # last dimension is [mean, median, ci_lo, ci_hi].
1309
- optimized_mroi = optimized_data[c.MROI][:, 0]
1310
- if np.any(optimized_mroi < target_mroi):
1311
- warnings.warn(
1312
- 'Target marginal ROI constraint was not met. The target marginal'
1313
- f' ROI is {target_mroi}, but the actual channel marginal ROIs are'
1314
- f' {optimized_mroi}.'
1315
- )
1316
-
1317
1523
  def create_optimization_grid(
1318
1524
  self,
1319
- historical_spend: np.ndarray,
1320
- spend_bound_lower: np.ndarray,
1321
- spend_bound_upper: np.ndarray,
1322
- selected_times: Sequence[str] | None,
1323
- round_factor: int,
1525
+ new_data: xr.Dataset | None = None,
1324
1526
  use_posterior: bool = True,
1527
+ selected_times: tuple[str | None, str | None] | None = None,
1528
+ budget: float | None = None,
1529
+ pct_of_spend: Sequence[float] | None = None,
1530
+ spend_constraint_lower: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
1531
+ spend_constraint_upper: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
1532
+ gtol: float = 0.0001,
1533
+ use_optimal_frequency: bool = True,
1325
1534
  use_kpi: bool = False,
1326
- use_optimal_frequency: bool = False,
1327
- optimal_frequency: xr.DataArray | None = None,
1328
1535
  batch_size: int = c.DEFAULT_BATCH_SIZE,
1329
1536
  ) -> OptimizationGrid:
1330
1537
  """Creates a OptimizationGrid for optimization.
1331
1538
 
1332
1539
  Args:
1333
- historical_spend: ndarray of shape `(n_paid_channels,)` with arrgegated
1334
- historical spend per paid channel.
1335
- spend_bound_lower: ndarray of dimension `(n_total_channels,)` containing
1336
- the lower constraint spend for each channel.
1337
- spend_bound_upper: ndarray of dimension `(n_total_channels,)` containing
1338
- the upper constraint spend for each channel.
1339
- selected_times: Sequence of strings representing the time dimensions in
1340
- `meridian.input_data.time` to use for optimization.
1341
- round_factor: The round factor used for the optimization grid.
1540
+ new_data: An optional `DataTensors` container with optional tensors:
1541
+ `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
1542
+ `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
1543
+ Meridian object are used. If `new_data` is provided, the grid is created
1544
+ using the versions of the tensors in `new_data` and the original
1545
+ versions of all the remaining tensors. If any of the tensors in
1546
+ `new_data` is provided with a different number of time periods than in
1547
+ `InputData`, then all tensors must be provided with the same number of
1548
+ time periods and the `time` tensor must be provided.
1342
1549
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1343
1550
  from the posterior distribution of the model. Otherwise, the prior
1344
1551
  distribution is used.
1552
+ selected_times: Tuple containing the start and end time dimension
1553
+ coordinates for the duration to run the optimization on. Selected time
1554
+ values should align with the Meridian time dimension coordinates in the
1555
+ underlying model if optimizing the original data. If `new_data` is
1556
+ provided with a different number of time periods than in `InputData`,
1557
+ then the start and end time coordinates must match the time dimensions
1558
+ in `new_data.time`. By default, all times periods are used. Either start
1559
+ or end time component can be `None` to represent the first or the last
1560
+ time coordinate, respectively.
1561
+ budget: Number indicating the total budget for the fixed budget scenario.
1562
+ Defaults to the historical budget.
1563
+ pct_of_spend: Numeric list of size `n_paid_channels` containing the
1564
+ percentage allocation for spend for all media and RF channels. The order
1565
+ must match `(InputData.media + InputData.reach)` with values between
1566
+ 0-1, summing to 1. By default, the historical allocation is used. Budget
1567
+ and allocation are used in conjunction to determine the non-optimized
1568
+ media-level spend, which is used to calculate the non-optimized
1569
+ performance metrics (for example, ROI) and construct the feasible range
1570
+ of media-level spend with the spend constraints. Consider using
1571
+ `InputData.get_paid_channels_argument_builder()` to construct this
1572
+ argument. If using `new_data`, this argument is ignored.
1573
+ spend_constraint_lower: Numeric list of size `n_paid_channels` or float
1574
+ (same constraint for all channels) indicating the lower bound of
1575
+ media-level spend. If given as a channel-indexed array, the order must
1576
+ match `(InputData.media + InputData.reach)`. The lower bound of
1577
+ media-level spend is `(1 - spend_constraint_lower) * budget *
1578
+ allocation)`. The value must be between 0-1. Defaults to `0.3` for fixed
1579
+ budget and `1` for flexible. Consider using
1580
+ `InputData.get_paid_channels_argument_builder()` to construct this
1581
+ argument.
1582
+ spend_constraint_upper: Numeric list of size `n_paid_channels` or float
1583
+ (same constraint for all channels) indicating the upper bound of
1584
+ media-level spend. If given as a channel-indexed array, the order must
1585
+ match `(InputData.media + InputData.reach)`. The upper bound of
1586
+ media-level spend is `(1 + spend_constraint_upper) * budget *
1587
+ allocation)`. Defaults to `0.3` for fixed budget and `1` for flexible.
1588
+ Consider using `InputData.get_paid_channels_argument_builder()` to
1589
+ construct this argument.
1590
+ gtol: Float indicating the acceptable relative error for the budget used
1591
+ in the grid setup. The budget will be rounded by `10*n`, where `n` is
1592
+ the smallest integer such that `(budget - rounded_budget)` is less than
1593
+ or equal to `(budget * gtol)`. `gtol` must be less than 1.
1594
+ use_optimal_frequency: Boolean. Whether optimal frequency was used.
1345
1595
  use_kpi: Boolean. If `True`, then the incremental outcome is derived from
1346
1596
  the KPI impact. Otherwise, the incremental outcome is derived from the
1347
1597
  revenue impact.
1348
- use_optimal_frequency: Boolean. Whether optimal frequency was used.
1349
- optimal_frequency: `xr.DataArray` with dimension `n_rf_channels`,
1350
- containing the optimal frequency per channel, that maximizes mean ROI
1351
- over the corresponding prior/posterior distribution. Value is `None` if
1352
- the model does not contain reach and frequency data, or if the model
1353
- does contain reach and frequency data, but historical frequency is used
1354
- for the optimization scenario.
1355
1598
  batch_size: Max draws per chain in each batch. The calculation is run in
1356
1599
  batches to avoid memory exhaustion. If a memory error occurs, try
1357
1600
  reducing `batch_size`. The calculation will generally be faster with
@@ -1361,14 +1604,62 @@ class BudgetOptimizer:
1361
1604
  An OptimizationGrid object containing the grid data for optimization.
1362
1605
  """
1363
1606
  self._validate_model_fit(use_posterior)
1607
+ if new_data is None:
1608
+ new_data = analyzer.DataTensors()
1609
+
1610
+ required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
1611
+ filled_data = new_data.validate_and_fill_missing_data(
1612
+ required_tensors_names=required_tensors, meridian=self._meridian
1613
+ )
1614
+
1615
+ selected_time_dims = self._validate_selected_times(
1616
+ selected_times, filled_data
1617
+ )
1618
+ hist_spend = self._analyzer.get_aggregated_spend(
1619
+ new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
1620
+ selected_times=selected_time_dims,
1621
+ include_media=self._meridian.n_media_channels > 0,
1622
+ include_rf=self._meridian.n_rf_channels > 0,
1623
+ ).data
1624
+ n_paid_channels = len(self._meridian.input_data.get_all_paid_channels())
1625
+ budget = budget or np.sum(hist_spend)
1626
+ valid_pct_of_spend = _validate_pct_of_spend(
1627
+ n_channels=n_paid_channels,
1628
+ hist_spend=hist_spend,
1629
+ pct_of_spend=pct_of_spend,
1630
+ )
1631
+ spend = budget * valid_pct_of_spend
1632
+ round_factor = _get_round_factor(budget, gtol)
1633
+ (optimization_lower_bound, optimization_upper_bound) = (
1634
+ _get_optimization_bounds(
1635
+ n_channels=n_paid_channels,
1636
+ spend=spend,
1637
+ round_factor=round_factor,
1638
+ spend_constraint_lower=spend_constraint_lower,
1639
+ spend_constraint_upper=spend_constraint_upper,
1640
+ )
1641
+ )
1642
+ if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
1643
+ optimal_frequency = tf.convert_to_tensor(
1644
+ self._analyzer.optimal_freq(
1645
+ new_data=filled_data.filter_fields(c.RF_DATA),
1646
+ use_posterior=use_posterior,
1647
+ selected_times=selected_time_dims,
1648
+ use_kpi=use_kpi,
1649
+ ).optimal_frequency,
1650
+ dtype=tf.float32,
1651
+ )
1652
+ else:
1653
+ optimal_frequency = None
1364
1654
 
1365
1655
  step_size = 10 ** (-round_factor)
1366
1656
  (spend_grid, incremental_outcome_grid) = self._create_grids(
1367
- spend=historical_spend,
1368
- spend_bound_lower=spend_bound_lower,
1369
- spend_bound_upper=spend_bound_upper,
1657
+ spend=hist_spend,
1658
+ spend_bound_lower=optimization_lower_bound,
1659
+ spend_bound_upper=optimization_upper_bound,
1370
1660
  step_size=step_size,
1371
- selected_times=selected_times,
1661
+ selected_times=selected_time_dims,
1662
+ new_data=filled_data.filter_fields(c.PAID_DATA),
1372
1663
  use_posterior=use_posterior,
1373
1664
  use_kpi=use_kpi,
1374
1665
  optimal_frequency=optimal_frequency,
@@ -1382,13 +1673,14 @@ class BudgetOptimizer:
1382
1673
 
1383
1674
  return OptimizationGrid(
1384
1675
  _grid_dataset=grid_dataset,
1385
- historical_spend=historical_spend,
1676
+ historical_spend=hist_spend,
1386
1677
  use_kpi=use_kpi,
1387
1678
  use_posterior=use_posterior,
1388
1679
  use_optimal_frequency=use_optimal_frequency,
1680
+ gtol=gtol,
1389
1681
  round_factor=round_factor,
1390
1682
  optimal_frequency=optimal_frequency,
1391
- selected_times=selected_times,
1683
+ selected_times=selected_time_dims,
1392
1684
  )
1393
1685
 
1394
1686
  def _create_grid_dataset(
@@ -1425,82 +1717,46 @@ class BudgetOptimizer:
1425
1717
  return xr.Dataset(
1426
1718
  data_vars=data_vars,
1427
1719
  coords={
1428
- c.GRID_SPEND_INDEX: (
1429
- [c.GRID_SPEND_INDEX],
1430
- np.arange(0, len(spend_grid)),
1431
- ),
1432
- c.CHANNEL: (
1433
- [c.CHANNEL],
1434
- self._meridian.input_data.get_all_paid_channels(),
1435
- ),
1720
+ c.GRID_SPEND_INDEX: np.arange(0, len(spend_grid)),
1721
+ c.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
1436
1722
  },
1437
1723
  attrs={c.SPEND_STEP_SIZE: spend_step_size},
1438
1724
  )
1439
1725
 
1440
- def _validate_pct_of_spend(
1441
- self, hist_spend: np.ndarray, pct_of_spend: Sequence[float] | None
1442
- ) -> np.ndarray:
1443
- """Validates and returns the percent of spend."""
1444
- if pct_of_spend is not None:
1445
- if len(pct_of_spend) != len(
1446
- self._meridian.input_data.get_all_paid_channels()
1447
- ):
1448
- raise ValueError('Percent of spend must be specified for all channels.')
1449
- if not math.isclose(np.sum(pct_of_spend), 1.0, abs_tol=0.001):
1450
- raise ValueError('Percent of spend must sum to one.')
1451
- return np.array(pct_of_spend)
1452
- else:
1453
- return hist_spend / np.sum(hist_spend)
1454
-
1455
- def _validate_spend_constraints(
1726
+ def _validate_selected_times(
1456
1727
  self,
1457
- fixed_budget: bool,
1458
- const_lower: _SpendConstraint | None,
1459
- const_upper: _SpendConstraint | None,
1460
- ) -> tuple[np.ndarray, np.ndarray]:
1461
- """Validates and returns the spend constraint requirements."""
1462
-
1463
- def get_const_array(const: _SpendConstraint | None) -> np.ndarray:
1464
- if const is None:
1465
- const = (
1466
- np.array([c.SPEND_CONSTRAINT_DEFAULT_FIXED_BUDGET])
1467
- if fixed_budget
1468
- else np.array([c.SPEND_CONSTRAINT_DEFAULT_FLEXIBLE_BUDGET])
1469
- )
1470
- elif isinstance(const, (float, int)):
1471
- const = np.array([const])
1472
- else:
1473
- const = np.array(const)
1474
- return const
1475
-
1476
- const_lower = get_const_array(const_lower)
1477
- const_upper = get_const_array(const_upper)
1478
-
1479
- if any(
1480
- len(const)
1481
- not in (1, len(self._meridian.input_data.get_all_paid_channels()))
1482
- for const in [const_lower, const_upper]
1483
- ):
1484
- raise ValueError(
1485
- 'Spend constraints must be either a single constraint or be specified'
1486
- ' for all channels.'
1728
+ selected_times: tuple[str | None, str | None] | None,
1729
+ new_data: analyzer.DataTensors | None,
1730
+ ) -> Sequence[str] | Sequence[bool] | None:
1731
+ """Validates and returns the selected times."""
1732
+ if selected_times is None:
1733
+ return None
1734
+ start_date, end_date = selected_times
1735
+ if start_date is None and end_date is None:
1736
+ return None
1737
+
1738
+ new_data = new_data or analyzer.DataTensors()
1739
+ if new_data.get_modified_times(self._meridian) is None:
1740
+ return self._meridian.expand_selected_time_dims(
1741
+ start_date=start_date,
1742
+ end_date=end_date,
1487
1743
  )
1488
-
1489
- for const in const_lower:
1490
- if not 0.0 <= const <= 1.0:
1491
- raise ValueError(
1492
- 'The lower spend constraint must be between 0 and 1 inclusive.'
1493
- )
1494
- for const in const_upper:
1495
- if const < 0:
1496
- raise ValueError('The upper spend constraint must be positive.')
1497
-
1498
- return (const_lower, const_upper)
1744
+ else:
1745
+ assert new_data.time is not None
1746
+ new_times_str = new_data.time.numpy().astype(str).tolist()
1747
+ time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
1748
+ expanded_dates = time_coordinates.expand_selected_time_dims(
1749
+ start_date=start_date,
1750
+ end_date=end_date,
1751
+ )
1752
+ expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
1753
+ return [x in expanded_str for x in new_times_str]
1499
1754
 
1500
1755
  def _get_incremental_outcome_tensors(
1501
1756
  self,
1502
1757
  hist_spend: np.ndarray,
1503
1758
  spend: np.ndarray,
1759
+ new_data: analyzer.DataTensors | None = None,
1504
1760
  optimal_frequency: Sequence[float] | None = None,
1505
1761
  ) -> tuple[
1506
1762
  tf.Tensor | None,
@@ -1525,6 +1781,11 @@ class BudgetOptimizer:
1525
1781
  Args:
1526
1782
  hist_spend: historical spend data.
1527
1783
  spend: new optimized spend data.
1784
+ new_data: An optional `DataTensors` object containing the new `media`,
1785
+ `reach`, and `frequency` tensors. If `None`, the existing tensors from
1786
+ the Meridian object are used. If any of the tensors is provided with a
1787
+ different number of time periods than in `InputData`, then all tensors
1788
+ must be provided with the same number of time periods.
1528
1789
  optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
1529
1790
  the optimal frequency per channel, that maximizes posterior mean roi.
1530
1791
  Value is `None` if the model does not contain reach and frequency data,
@@ -1535,13 +1796,18 @@ class BudgetOptimizer:
1535
1796
  Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
1536
1797
  new_rf_spend).
1537
1798
  """
1799
+ new_data = new_data or analyzer.DataTensors()
1800
+ filled_data = new_data.validate_and_fill_missing_data(
1801
+ c.PAID_CHANNELS,
1802
+ self._meridian,
1803
+ )
1538
1804
  if self._meridian.n_media_channels > 0:
1539
1805
  new_media = (
1540
1806
  tf.math.divide_no_nan(
1541
1807
  spend[: self._meridian.n_media_channels],
1542
1808
  hist_spend[: self._meridian.n_media_channels],
1543
1809
  )
1544
- * self._meridian.media_tensors.media
1810
+ * filled_data.media
1545
1811
  )
1546
1812
  new_media_spend = tf.convert_to_tensor(
1547
1813
  spend[: self._meridian.n_media_channels]
@@ -1550,9 +1816,7 @@ class BudgetOptimizer:
1550
1816
  new_media = None
1551
1817
  new_media_spend = None
1552
1818
  if self._meridian.n_rf_channels > 0:
1553
- rf_media = (
1554
- self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
1555
- )
1819
+ rf_media = filled_data.reach * filled_data.frequency
1556
1820
  new_rf_media = (
1557
1821
  tf.math.divide_no_nan(
1558
1822
  spend[-self._meridian.n_rf_channels :],
@@ -1561,7 +1825,7 @@ class BudgetOptimizer:
1561
1825
  * rf_media
1562
1826
  )
1563
1827
  frequency = (
1564
- self._meridian.rf_tensors.frequency
1828
+ filled_data.frequency
1565
1829
  if optimal_frequency is None
1566
1830
  else optimal_frequency
1567
1831
  )
@@ -1581,9 +1845,10 @@ class BudgetOptimizer:
1581
1845
  self,
1582
1846
  hist_spend: np.ndarray,
1583
1847
  spend: np.ndarray,
1848
+ new_data: analyzer.DataTensors | None = None,
1584
1849
  use_posterior: bool = True,
1585
1850
  use_kpi: bool = False,
1586
- selected_times: Sequence[str] | None = None,
1851
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1587
1852
  optimal_frequency: Sequence[float] | None = None,
1588
1853
  attrs: Mapping[str, Any] | None = None,
1589
1854
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
@@ -1591,15 +1856,22 @@ class BudgetOptimizer:
1591
1856
  use_historical_budget: bool = True,
1592
1857
  ) -> xr.Dataset:
1593
1858
  """Creates the budget dataset."""
1859
+ new_data = new_data or analyzer.DataTensors()
1860
+ filled_data = new_data.validate_and_fill_missing_data(
1861
+ c.PAID_DATA + (c.TIME,),
1862
+ self._meridian,
1863
+ )
1594
1864
  spend = tf.convert_to_tensor(spend, dtype=tf.float32)
1595
1865
  hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
1596
1866
  (new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
1597
1867
  self._get_incremental_outcome_tensors(
1598
- hist_spend, spend, optimal_frequency
1868
+ hist_spend,
1869
+ spend,
1870
+ new_data=filled_data.filter_fields(c.PAID_CHANNELS),
1871
+ optimal_frequency=optimal_frequency,
1599
1872
  )
1600
1873
  )
1601
1874
  budget = np.sum(spend)
1602
- all_times = self._meridian.input_data.time.values.tolist()
1603
1875
 
1604
1876
  # incremental_outcome here is a tensor with the shape
1605
1877
  # (n_chains, n_draws, n_channels)
@@ -1609,6 +1881,7 @@ class BudgetOptimizer:
1609
1881
  media=new_media,
1610
1882
  reach=new_reach,
1611
1883
  frequency=new_frequency,
1884
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1612
1885
  ),
1613
1886
  selected_times=selected_times,
1614
1887
  use_kpi=use_kpi,
@@ -1631,6 +1904,9 @@ class BudgetOptimizer:
1631
1904
  )
1632
1905
 
1633
1906
  aggregated_impressions = self._analyzer.get_aggregated_impressions(
1907
+ new_data=analyzer.DataTensors(
1908
+ media=new_media, reach=new_reach, frequency=new_frequency
1909
+ ),
1634
1910
  selected_times=selected_times,
1635
1911
  selected_geos=None,
1636
1912
  aggregate_times=True,
@@ -1638,10 +1914,11 @@ class BudgetOptimizer:
1638
1914
  optimal_frequency=optimal_frequency,
1639
1915
  include_non_paid_channels=False,
1640
1916
  )
1641
- effectiveness = incremental_outcome / aggregated_impressions
1642
1917
  effectiveness_with_mean_median_and_ci = (
1643
1918
  analyzer.get_central_tendency_and_ci(
1644
- data=effectiveness,
1919
+ data=tf.math.divide_no_nan(
1920
+ incremental_outcome, aggregated_impressions
1921
+ ),
1645
1922
  confidence_level=confidence_level,
1646
1923
  include_median=True,
1647
1924
  )
@@ -1661,6 +1938,7 @@ class BudgetOptimizer:
1661
1938
  frequency=new_frequency,
1662
1939
  media_spend=new_media_spend,
1663
1940
  rf_spend=new_rf_spend,
1941
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1664
1942
  ),
1665
1943
  selected_times=selected_times,
1666
1944
  batch_size=batch_size,
@@ -1699,6 +1977,18 @@ class BudgetOptimizer:
1699
1977
  c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
1700
1978
  }
1701
1979
 
1980
+ all_times = (
1981
+ filled_data.time.numpy().astype(str).tolist()
1982
+ if filled_data.time is not None
1983
+ else self._meridian.input_data.time.values.tolist()
1984
+ )
1985
+ if selected_times is not None and all(
1986
+ isinstance(time, bool) for time in selected_times
1987
+ ):
1988
+ selected_times = [
1989
+ time for time, selected in zip(all_times, selected_times) if selected
1990
+ ]
1991
+
1702
1992
  attributes = {
1703
1993
  c.START_DATE: min(selected_times) if selected_times else all_times[0],
1704
1994
  c.END_DATE: max(selected_times) if selected_times else all_times[-1],
@@ -1717,73 +2007,19 @@ class BudgetOptimizer:
1717
2007
  return xr.Dataset(
1718
2008
  data_vars=data_vars,
1719
2009
  coords={
1720
- c.CHANNEL: (
1721
- [c.CHANNEL],
1722
- self._meridian.input_data.get_all_paid_channels(),
1723
- ),
1724
- c.METRIC: (
1725
- [c.METRIC],
1726
- [c.MEAN, c.MEDIAN, c.CI_LO, c.CI_HI],
1727
- ),
2010
+ c.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
2011
+ c.METRIC: [c.MEAN, c.MEDIAN, c.CI_LO, c.CI_HI],
1728
2012
  },
1729
2013
  attrs=attributes | (attrs or {}),
1730
2014
  )
1731
2015
 
1732
- def _get_optimization_bounds(
1733
- self,
1734
- spend: np.ndarray,
1735
- spend_constraint_lower: _SpendConstraint | None,
1736
- spend_constraint_upper: _SpendConstraint | None,
1737
- round_factor: int,
1738
- fixed_budget: bool,
1739
- ) -> tuple[np.ndarray, np.ndarray, tuple[np.ndarray, np.ndarray]]:
1740
- """Get optimization bounds from spend and spend constraints.
1741
-
1742
- Args:
1743
- spend: np.ndarray with size `n_total_channels` containing media-level
1744
- spend for all media and RF channels.
1745
- spend_constraint_lower: Numeric list of size `n_total_channels` or float
1746
- (same constraint for all media) indicating the lower bound of
1747
- media-level spend. The lower bound of media-level spend is `(1 -
1748
- spend_constraint_lower) * budget * allocation)`. The value must be
1749
- between 0-1.
1750
- spend_constraint_upper: Numeric list of size `n_total_channels` or float
1751
- (same constraint for all media) indicating the upper bound of
1752
- media-level spend. The upper bound of media-level spend is `(1 +
1753
- spend_constraint_upper) * budget * allocation)`.
1754
- round_factor: Integer number of digits to round optimization bounds.
1755
- fixed_budget: Boolean indicating whether it's a fixed budget optimization
1756
- or flexible budget optimization.
1757
-
1758
- Returns:
1759
- lower_bound: np.ndarray of size `n_total_channels` containing the treated
1760
- lower bound spend for each media and RF channel.
1761
- upper_bound: np.ndarray of size `n_total_channels` containing the treated
1762
- upper bound spend for each media and RF channel.
1763
- spend_bounds: tuple of np.ndarray of size `n_total_channels` containing
1764
- the untreated lower and upper bound spend for each media and RF channel.
1765
- """
1766
- (spend_const_lower, spend_const_upper) = self._validate_spend_constraints(
1767
- fixed_budget, spend_constraint_lower, spend_constraint_upper
1768
- )
1769
- spend_bounds = (
1770
- np.maximum((1 - spend_const_lower), 0),
1771
- (1 + spend_const_upper),
1772
- )
1773
-
1774
- lower_bound = np.round(
1775
- (spend_bounds[0] * spend),
1776
- round_factor,
1777
- ).astype(int)
1778
- upper_bound = np.round(spend_bounds[1] * spend, round_factor).astype(int)
1779
- return (lower_bound, upper_bound, spend_bounds)
1780
-
1781
2016
  def _update_incremental_outcome_grid(
1782
2017
  self,
1783
2018
  i: int,
1784
2019
  incremental_outcome_grid: np.ndarray,
1785
2020
  multipliers_grid: tf.Tensor,
1786
- selected_times: Sequence[str],
2021
+ new_data: analyzer.DataTensors | None = None,
2022
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1787
2023
  use_posterior: bool = True,
1788
2024
  use_kpi: bool = False,
1789
2025
  optimal_frequency: xr.DataArray | None = None,
@@ -1798,8 +2034,16 @@ class BudgetOptimizer:
1798
2034
  number of columns is equal to the number of total channels, containing
1799
2035
  incremental outcome by channel.
1800
2036
  multipliers_grid: A grid derived from spend.
1801
- selected_times: Sequence of strings representing the time dimensions in
1802
- `meridian.input_data.time` to use for optimization.
2037
+ new_data: An optional `DataTensors` object containing the new `media`,
2038
+ `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
2039
+ existing tensors from the Meridian object are used. If any of the
2040
+ tensors is provided with a different number of time periods than in
2041
+ `InputData`, then all tensors must be provided with the same number of
2042
+ time periods.
2043
+ selected_times: Optional list of times to optimize. This can either be a
2044
+ string list containing a subset of time dimension coordinates from
2045
+ `InputData.time` or a boolean list with length equal to the time
2046
+ dimension of the tensor. By default, all time periods are included.
1803
2047
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1804
2048
  from the posterior distribution of the model. Otherwise, the prior
1805
2049
  distribution is used.
@@ -1816,10 +2060,14 @@ class BudgetOptimizer:
1816
2060
  reducing `batch_size`. The calculation will generally be faster with
1817
2061
  larger `batch_size` values.
1818
2062
  """
2063
+ new_data = new_data or analyzer.DataTensors()
2064
+ filled_data = new_data.validate_and_fill_missing_data(
2065
+ c.PAID_DATA, self._meridian
2066
+ )
1819
2067
  if self._meridian.n_media_channels > 0:
1820
2068
  new_media = (
1821
2069
  multipliers_grid[i, : self._meridian.n_media_channels]
1822
- * self._meridian.media_tensors.media
2070
+ * filled_data.media
1823
2071
  )
1824
2072
  else:
1825
2073
  new_media = None
@@ -1828,20 +2076,18 @@ class BudgetOptimizer:
1828
2076
  new_frequency = None
1829
2077
  new_reach = None
1830
2078
  elif optimal_frequency is not None:
1831
- new_frequency = (
1832
- tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency
1833
- )
2079
+ new_frequency = tf.ones_like(filled_data.frequency) * optimal_frequency
1834
2080
  new_reach = tf.math.divide_no_nan(
1835
2081
  multipliers_grid[i, -self._meridian.n_rf_channels :]
1836
- * self._meridian.rf_tensors.reach
1837
- * self._meridian.rf_tensors.frequency,
2082
+ * filled_data.reach
2083
+ * filled_data.frequency,
1838
2084
  new_frequency,
1839
2085
  )
1840
2086
  else:
1841
- new_frequency = self._meridian.rf_tensors.frequency
2087
+ new_frequency = filled_data.frequency
1842
2088
  new_reach = (
1843
2089
  multipliers_grid[i, -self._meridian.n_rf_channels :]
1844
- * self._meridian.rf_tensors.reach
2090
+ * filled_data.reach
1845
2091
  )
1846
2092
 
1847
2093
  # incremental_outcome returns a three dimensional tensor with dims
@@ -1854,6 +2100,7 @@ class BudgetOptimizer:
1854
2100
  media=new_media,
1855
2101
  reach=new_reach,
1856
2102
  frequency=new_frequency,
2103
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1857
2104
  ),
1858
2105
  selected_times=selected_times,
1859
2106
  use_kpi=use_kpi,
@@ -1870,7 +2117,8 @@ class BudgetOptimizer:
1870
2117
  spend_bound_lower: np.ndarray,
1871
2118
  spend_bound_upper: np.ndarray,
1872
2119
  step_size: int,
1873
- selected_times: Sequence[str],
2120
+ new_data: analyzer.DataTensors | None = None,
2121
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1874
2122
  use_posterior: bool = True,
1875
2123
  use_kpi: bool = False,
1876
2124
  optimal_frequency: xr.DataArray | None = None,
@@ -1886,8 +2134,16 @@ class BudgetOptimizer:
1886
2134
  containing the upper constraint spend for each channel.
1887
2135
  step_size: Integer indicating the step size, or interval, between values
1888
2136
  in the spend grid. All media channels have the same step size.
1889
- selected_times: Sequence of strings representing the time dimensions in
1890
- `meridian.input_data.time` to use for optimization.
2137
+ new_data: An optional `DataTensors` object containing the new `media`,
2138
+ `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
2139
+ existing tensors from the Meridian object are used. If any of the
2140
+ tensors is provided with a different number of time periods than in
2141
+ `InputData`, then all tensors must be provided with the same number of
2142
+ time periods.
2143
+ selected_times: Optional list of times to optimize. This can either be a
2144
+ string list containing a subset of time dimension coordinates from
2145
+ `InputData.time` or a boolean list with length equal to the time
2146
+ dimension of the tensor. By default, all time periods are included.
1891
2147
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1892
2148
  from the posterior distribution of the model. Otherwise, the prior
1893
2149
  distribution is used.
@@ -1941,6 +2197,7 @@ class BudgetOptimizer:
1941
2197
  incremental_outcome_grid=incremental_outcome_grid,
1942
2198
  multipliers_grid=multipliers_grid,
1943
2199
  selected_times=selected_times,
2200
+ new_data=new_data,
1944
2201
  use_posterior=use_posterior,
1945
2202
  use_kpi=use_kpi,
1946
2203
  optimal_frequency=optimal_frequency,
@@ -1967,6 +2224,135 @@ class BudgetOptimizer:
1967
2224
  return (spend_grid, incremental_outcome_grid)
1968
2225
 
1969
2226
 
2227
+ def _validate_pct_of_spend(
2228
+ n_channels: int,
2229
+ hist_spend: np.ndarray,
2230
+ pct_of_spend: Sequence[float] | None,
2231
+ ) -> np.ndarray:
2232
+ """Validates and returns the percent of spend."""
2233
+ if pct_of_spend is not None:
2234
+ if len(pct_of_spend) != n_channels:
2235
+ raise ValueError('Percent of spend must be specified for all channels.')
2236
+ if not math.isclose(np.sum(pct_of_spend), 1.0, abs_tol=0.001):
2237
+ raise ValueError('Percent of spend must sum to one.')
2238
+ return np.array(pct_of_spend)
2239
+ else:
2240
+ return hist_spend / np.sum(hist_spend)
2241
+
2242
+
2243
+ def _validate_spend_constraints(
2244
+ n_channels: int,
2245
+ const_lower: _SpendConstraint,
2246
+ const_upper: _SpendConstraint,
2247
+ ) -> tuple[np.ndarray, np.ndarray]:
2248
+ """Validates and returns the spend constraint requirements."""
2249
+
2250
+ def get_const_array(const: _SpendConstraint) -> np.ndarray:
2251
+ if isinstance(const, (float, int)):
2252
+ const = np.array([const])
2253
+ else:
2254
+ const = np.array(const)
2255
+ return const
2256
+
2257
+ const_lower = get_const_array(const_lower)
2258
+ const_upper = get_const_array(const_upper)
2259
+
2260
+ if any(
2261
+ len(const) not in (1, n_channels) for const in [const_lower, const_upper]
2262
+ ):
2263
+ raise ValueError(
2264
+ 'Spend constraints must be either a single constraint or be specified'
2265
+ ' for all channels.'
2266
+ )
2267
+
2268
+ for const in const_lower:
2269
+ if not 0.0 <= const <= 1.0:
2270
+ raise ValueError(
2271
+ 'The lower spend constraint must be between 0 and 1 inclusive.'
2272
+ )
2273
+ for const in const_upper:
2274
+ if const < 0:
2275
+ raise ValueError('The upper spend constraint must be positive.')
2276
+
2277
+ return (const_lower, const_upper)
2278
+
2279
+
2280
+ def _get_spend_bounds(
2281
+ n_channels: int,
2282
+ spend_constraint_lower: _SpendConstraint,
2283
+ spend_constraint_upper: _SpendConstraint,
2284
+ ) -> tuple[np.ndarray, np.ndarray]:
2285
+ """Get spend bounds from spend constraints.
2286
+
2287
+ Args:
2288
+ n_channels: Integer number of total channels.
2289
+ spend_constraint_lower: Numeric list of size `n_total_channels` or float
2290
+ (same constraint for all media) indicating the lower bound of media-level
2291
+ spend. The lower bound of media-level spend is `(1 -
2292
+ spend_constraint_lower) * budget * allocation)`. The value must be between
2293
+ 0-1.
2294
+ spend_constraint_upper: Numeric list of size `n_total_channels` or float
2295
+ (same constraint for all media) indicating the upper bound of media-level
2296
+ spend. The upper bound of media-level spend is `(1 +
2297
+ spend_constraint_upper) * budget * allocation)`.
2298
+
2299
+ Returns:
2300
+ spend_bounds: tuple of np.ndarray of size `n_total_channels` containing
2301
+ the untreated lower and upper bound spend for each media and RF channel.
2302
+ """
2303
+ (spend_const_lower, spend_const_upper) = _validate_spend_constraints(
2304
+ n_channels,
2305
+ spend_constraint_lower,
2306
+ spend_constraint_upper,
2307
+ )
2308
+ spend_bounds = (
2309
+ np.maximum((1 - spend_const_lower), 0),
2310
+ (1 + spend_const_upper),
2311
+ )
2312
+ return spend_bounds
2313
+
2314
+
2315
+ def _get_optimization_bounds(
2316
+ n_channels: int,
2317
+ spend: np.ndarray,
2318
+ round_factor: int,
2319
+ spend_constraint_lower: _SpendConstraint,
2320
+ spend_constraint_upper: _SpendConstraint,
2321
+ ) -> tuple[np.ndarray, np.ndarray]:
2322
+ """Get optimization bounds from spend and spend constraints.
2323
+
2324
+ Args:
2325
+ n_channels: Integer number of total channels.
2326
+ spend: np.ndarray with size `n_total_channels` containing media-level spend
2327
+ for all media and RF channels.
2328
+ round_factor: Integer number of digits to round optimization bounds.
2329
+ spend_constraint_lower: Numeric list of size `n_total_channels` or float
2330
+ (same constraint for all media) indicating the lower bound of media-level
2331
+ spend. The lower bound of media-level spend is `(1 -
2332
+ spend_constraint_lower) * budget * allocation)`. The value must be between
2333
+ 0-1.
2334
+ spend_constraint_upper: Numeric list of size `n_total_channels` or float
2335
+ (same constraint for all media) indicating the upper bound of media-level
2336
+ spend. The upper bound of media-level spend is `(1 +
2337
+ spend_constraint_upper) * budget * allocation)`.
2338
+
2339
+ Returns:
2340
+ lower_bound: np.ndarray of size `n_total_channels` containing the treated
2341
+ lower bound spend for each media and RF channel.
2342
+ upper_bound: np.ndarray of size `n_total_channels` containing the treated
2343
+ upper bound spend for each media and RF channel.
2344
+ """
2345
+ spend_bounds = _get_spend_bounds(
2346
+ n_channels=n_channels,
2347
+ spend_constraint_lower=spend_constraint_lower,
2348
+ spend_constraint_upper=spend_constraint_upper,
2349
+ )
2350
+ rounded_spend = np.round(spend, round_factor).astype(int)
2351
+ lower = np.round((spend_bounds[0] * rounded_spend), round_factor).astype(int)
2352
+ upper = np.round(spend_bounds[1] * rounded_spend, round_factor).astype(int)
2353
+ return (lower, upper)
2354
+
2355
+
1970
2356
  def _validate_budget(
1971
2357
  fixed_budget: bool,
1972
2358
  budget: float | None,
@@ -2063,3 +2449,30 @@ def _exceeds_optimization_constraints(
2063
2449
  return cur_total_roi < target_value and roi_grid_point < cur_total_roi
2064
2450
  else:
2065
2451
  return roi_grid_point < scenario.target_value
2452
+
2453
+
2454
+ def _raise_warning_if_target_constraints_not_met(
2455
+ target_roi: float | None,
2456
+ target_mroi: float | None,
2457
+ optimized_data: xr.Dataset,
2458
+ ) -> None:
2459
+ """Raises a warning if the target constraints are not met."""
2460
+ if target_roi:
2461
+ # Total ROI is a scalar value.
2462
+ optimized_roi = optimized_data.attrs[c.TOTAL_ROI]
2463
+ if optimized_roi < target_roi:
2464
+ warnings.warn(
2465
+ f'Target ROI constraint was not met. The target ROI is {target_roi}'
2466
+ f', but the actual ROI is {optimized_roi}.'
2467
+ )
2468
+ elif target_mroi:
2469
+ # Compare each channel's marginal ROI to the target.
2470
+ # optimized_data[c.MROI] is an array of shape (n_channels, 4), where the
2471
+ # last dimension is [mean, median, ci_lo, ci_hi].
2472
+ optimized_mroi = optimized_data[c.MROI][:, 0]
2473
+ if np.any(optimized_mroi < target_mroi):
2474
+ warnings.warn(
2475
+ 'Target marginal ROI constraint was not met. The target marginal'
2476
+ f' ROI is {target_mroi}, but the actual channel marginal ROIs are'
2477
+ f' {optimized_mroi}.'
2478
+ )