google-meridian 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,7 @@ from meridian import constants as c
28
28
  from meridian.analysis import analyzer
29
29
  from meridian.analysis import formatter
30
30
  from meridian.analysis import summary_text
31
+ from meridian.data import time_coordinates as tc
31
32
  from meridian.model import model
32
33
  import numpy as np
33
34
  import pandas as pd
@@ -97,6 +98,8 @@ class OptimizationGrid:
97
98
  use_kpi: Whether using generic KPI or revenue.
98
99
  use_posterior: Whether posterior distributions were used, or prior.
99
100
  use_optimal_frequency: Whether optimal frequency was used.
101
+ start_date: The start date of the optimization period.
102
+ end_date: The end date of the optimization period.
100
103
  gtol: Float indicating the acceptable relative error for the budget used in
101
104
  the grid setup. The budget is rounded by `10*n`, where `n` is the smallest
102
105
  integer such that `(budget - rounded_budget)` is less than or equal to
@@ -116,10 +119,12 @@ class OptimizationGrid:
116
119
  use_kpi: bool
117
120
  use_posterior: bool
118
121
  use_optimal_frequency: bool
122
+ start_date: tc.Date
123
+ end_date: tc.Date
119
124
  gtol: float
120
125
  round_factor: int
121
126
  optimal_frequency: np.ndarray | None
122
- selected_times: list[str] | None
127
+ selected_times: Sequence[str] | None
123
128
 
124
129
  @property
125
130
  def grid_dataset(self) -> xr.Dataset:
@@ -189,7 +194,7 @@ class OptimizationGrid:
189
194
  variables:
190
195
  * `optimized`: media spend that maximizes incremental outcome based
191
196
  on spend constraints for all media and RF channels.
192
- * `non_optimized`: Channel-level spend.
197
+ * `non_optimized`: rounded channel-level spend.
193
198
 
194
199
  Raises:
195
200
  A warning if the budget's rounding should be different from the grid's
@@ -226,7 +231,7 @@ class OptimizationGrid:
226
231
  spend_constraint_upper=spend_constraint_upper,
227
232
  )
228
233
  )
229
- self._check_optimization_bounds(
234
+ self.check_optimization_bounds(
230
235
  lower_bound=optimization_lower_bound,
231
236
  upper_bound=optimization_upper_bound,
232
237
  )
@@ -235,16 +240,16 @@ class OptimizationGrid:
235
240
  warnings.warn(
236
241
  'Optimization accuracy may suffer owing to budget level differences.'
237
242
  ' Consider creating a new grid with smaller `gtol` if you intend to'
238
- " shrink budgets significantly. It's only a problem when you use a"
239
- ' smaller budget, for which the intended step size is meant to be'
240
- ' smaller for one or more channels.'
243
+ ' shrink total budget significantly across optimization runs.'
244
+ ' It is only a problem when you use a much smaller budget, '
245
+ ' for which the intended step size is smaller. '
241
246
  )
242
247
  (spend_grid, incremental_outcome_grid) = self._trim_grid(
243
248
  spend_bound_lower=optimization_lower_bound,
244
249
  spend_bound_upper=optimization_upper_bound,
245
250
  )
251
+ rounded_spend = np.round(spend, self.round_factor).astype(int)
246
252
  if isinstance(scenario, FixedBudgetScenario):
247
- rounded_spend = np.round(spend, self.round_factor)
248
253
  scenario = dataclasses.replace(
249
254
  scenario, total_budget=np.sum(rounded_spend)
250
255
  )
@@ -258,7 +263,7 @@ class OptimizationGrid:
258
263
  coords={c.CHANNEL: self.channels},
259
264
  data_vars={
260
265
  c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
261
- c.NON_OPTIMIZED: ([c.CHANNEL], spend),
266
+ c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
262
267
  },
263
268
  )
264
269
 
@@ -344,8 +349,10 @@ class OptimizationGrid:
344
349
  grid coverage and they are rounded using this grid's round factor.
345
350
 
346
351
  Args:
347
- spend_bound_lower: The lower bound of spend for each channel.
348
- spend_bound_upper: The upper bound of spend for each channel.
352
+ spend_bound_lower: The lower bound of spend for each channel. Must be in
353
+ the same order as `self.channels`.
354
+ spend_bound_upper: The upper bound of spend for each channel. Must be in
355
+ the same order as `self.channels`.
349
356
 
350
357
  Returns:
351
358
  updated_spend: The updated spend grid with valid spend values moved up to
@@ -382,7 +389,7 @@ class OptimizationGrid:
382
389
 
383
390
  return (updated_spend, updated_incremental_outcome)
384
391
 
385
- def _check_optimization_bounds(
392
+ def check_optimization_bounds(
386
393
  self,
387
394
  lower_bound: np.ndarray,
388
395
  upper_bound: np.ndarray,
@@ -391,9 +398,9 @@ class OptimizationGrid:
391
398
 
392
399
  Args:
393
400
  lower_bound: `np.ndarray` of shape `(n_channels,)` containing the lower
394
- bound for each channel.
401
+ bound for each channel. Must be in the same order as `self.channels`.
395
402
  upper_bound: `np.ndarray` of shape `(n_channels,)` containing the upper
396
- bound for each channel.
403
+ bound for each channel. Must be in the same order as `self.channels`.
397
404
 
398
405
  Raises:
399
406
  ValueError: If the spend grid does not fit within the optimization bounds.
@@ -621,7 +628,7 @@ class OptimizationResults:
621
628
  # by adjusting the domain of the y-axis so that the incremental outcome does
622
629
  # not start at 0. Calculate the total decrease in incremental outcome to pad
623
630
  # the y-axis from the non-optimized total incremental outcome value.
624
- sum_decr = sum(df[df.incremental_outcome < 0].incremental_outcome)
631
+ sum_decr = df[df.incremental_outcome < 0].incremental_outcome.sum()
625
632
  y_padding = float(f'1e{int(math.log10(-sum_decr))}') if sum_decr < 0 else 2
626
633
  domain_scale = [
627
634
  self.nonoptimized_data.total_incremental_outcome + sum_decr - y_padding,
@@ -1016,8 +1023,16 @@ class OptimizationResults:
1016
1023
 
1017
1024
  def _gen_optimization_summary(self) -> str:
1018
1025
  """Generates HTML optimization summary output (as sanitized content str)."""
1019
- self.template_env.globals[c.START_DATE] = self.optimized_data.start_date
1020
- self.template_env.globals[c.END_DATE] = self.optimized_data.end_date
1026
+ start_date = tc.normalize_date(self.optimized_data.start_date)
1027
+ self.template_env.globals[c.START_DATE] = start_date.strftime(
1028
+ f'%b {start_date.day}, %Y'
1029
+ )
1030
+ interval_days = self.meridian.input_data.time_coordinates.interval_days
1031
+ end_date = tc.normalize_date(self.optimized_data.end_date)
1032
+ end_date_adjusted = end_date + pd.Timedelta(days=interval_days)
1033
+ self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
1034
+ f'%b {end_date_adjusted.day}, %Y'
1035
+ )
1021
1036
 
1022
1037
  html_template = self.template_env.get_template('summary.html.jinja')
1023
1038
  return html_template.render(
@@ -1129,21 +1144,26 @@ class OptimizationResults:
1129
1144
  - self.nonoptimized_data.total_incremental_outcome
1130
1145
  )
1131
1146
  inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
1147
+ currency = '$' if outcome == c.REVENUE else ''
1132
1148
  non_optimized_inc_outcome = formatter.StatsSpec(
1133
1149
  title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
1134
1150
  outcome=outcome
1135
1151
  ),
1136
- stat=formatter.format_monetary_num(
1137
- self.nonoptimized_data.total_incremental_outcome,
1152
+ stat=formatter.compact_number(
1153
+ n=self.nonoptimized_data.total_incremental_outcome,
1154
+ precision=0,
1155
+ currency=currency,
1138
1156
  ),
1139
1157
  )
1140
1158
  optimized_inc_outcome = formatter.StatsSpec(
1141
1159
  title=summary_text.OPTIMIZED_INC_OUTCOME_LABEL.format(outcome=outcome),
1142
- stat=formatter.format_monetary_num(
1143
- self.optimized_data.total_incremental_outcome,
1160
+ stat=formatter.compact_number(
1161
+ n=self.optimized_data.total_incremental_outcome,
1162
+ precision=0,
1163
+ currency=currency,
1144
1164
  ),
1145
1165
  delta=inc_outcome_prefix
1146
- + formatter.format_monetary_num(inc_outcome_diff),
1166
+ + formatter.compact_number(inc_outcome_diff, 0, currency),
1147
1167
  )
1148
1168
  return [
1149
1169
  non_optimized_budget,
@@ -1265,8 +1285,12 @@ class BudgetOptimizer:
1265
1285
 
1266
1286
  def optimize(
1267
1287
  self,
1288
+ new_data: analyzer.DataTensors | None = None,
1268
1289
  use_posterior: bool = True,
1290
+ # TODO: b/409550413 - Remove this argument.
1269
1291
  selected_times: tuple[str | None, str | None] | None = None,
1292
+ start_date: tc.Date = None,
1293
+ end_date: tc.Date = None,
1270
1294
  fixed_budget: bool = True,
1271
1295
  budget: float | None = None,
1272
1296
  pct_of_spend: Sequence[float] | None = None,
@@ -1279,23 +1303,61 @@ class BudgetOptimizer:
1279
1303
  use_kpi: bool = False,
1280
1304
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
1281
1305
  batch_size: int = c.DEFAULT_BATCH_SIZE,
1306
+ optimization_grid: OptimizationGrid | None = None,
1282
1307
  ) -> OptimizationResults:
1283
1308
  """Finds the optimal budget allocation that maximizes outcome.
1284
1309
 
1285
- Outcome is typically revenue, but when the KPI is not revenue and "revenue
1286
- per KPI" data is not available, then Meridian defines the Outcome to be the
1287
- KPI itself.
1310
+ Optimization depends on the following:
1311
+ 1. Flighting pattern (the relative allocation of a channels' media units
1312
+ across geos and time periods, which is held fixed for each channel)
1313
+ 2. Cost per media unit (This is assumed to be constant for each channel, and
1314
+ can optionally vary by geo and/or time period)
1315
+ 3. `pct_of_spend` (center of the spend box constraint for each channel)
1316
+ 4. `budget` (total budget used for fixed budget scenarios)
1317
+
1318
+ By default, these values are assigned based on the historical data. The
1319
+ `pct_of_spend` and `budget` are optimization arguments that can be
1320
+ overridden directly. Passing `new_data.media` (or `new_data.reach` or
1321
+ `new_data.frequency`) will override both the flighting pattern and cost per
1322
+ media unit. Passing `new_data.spend` (or `new_data.rf_spend) will only
1323
+ override the cost per media unit.
1324
+
1325
+ If `new_data` is passed with a different number of time periods than the
1326
+ historical data, then all of the optimization parameters will be inferred
1327
+ from it. Default values for `pct_of_spend` and `budget` (if
1328
+ `fixed_budget=True`) will be inferred from the `new_data`, but can be
1329
+ overridden using the `pct_of_spend` and `budget` arguments.
1330
+
1331
+ If `start_date` or `end_date` is specified, then the default values are
1332
+ inferred based on the subset of time periods specified. Both start and end
1333
+ time selectors should align with the Meridian time dimension coordinates in
1334
+ the underlying model if optimizing the original data. If `new_data` is
1335
+ provided with a different number of time periods than in `InputData`, then
1336
+ the start and end time coordinates must match the time dimensions in
1337
+ `new_data.time`. By default, all times periods are used. Either start or
1338
+ end time component can be `None` to represent the first or the last time
1339
+ coordinate, respectively.
1288
1340
 
1289
1341
  Args:
1342
+ new_data: An optional `DataTensors` container with optional tensors:
1343
+ `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
1344
+ `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
1345
+ Meridian object are used. If `new_data` is provided, the optimization is
1346
+ run on the versions of the tensors in `new_data` and the original
1347
+ versions of all the remaining tensors. If any of the tensors in
1348
+ `new_data` is provided with a different number of time periods than in
1349
+ `InputData`, then all tensors must be provided with the same number of
1350
+ time periods and the `time` tensor must be provided.
1290
1351
  use_posterior: Boolean. If `True`, then the budget is optimized based on
1291
1352
  the posterior distribution of the model. Otherwise, the prior
1292
1353
  distribution is used.
1293
- selected_times: Tuple containing the start and end time dimension
1294
- coordinates for the duration to run the optimization on. Selected time
1295
- values should align with the Meridian time dimension coordinates in the
1296
- underlying model. By default, all times periods are used. Either start
1297
- or end time component can be `None` to represent the first or the last
1298
- time coordinate, respectively.
1354
+ selected_times: Deprecated. Tuple containing the start and end time
1355
+ dimension coordinates for the duration to run the optimization on.
1356
+ Please Use `start_date` and `end_date` instead.
1357
+ start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
1358
+ format. Default is `None`, i.e. the first time period.
1359
+ end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
1360
+ Default is `None`, i.e. the last time period.
1299
1361
  fixed_budget: Boolean indicating whether it's a fixed budget optimization
1300
1362
  or flexible budget optimization. Defaults to `True`. If `False`, must
1301
1363
  specify either `target_roi` or `target_mroi`.
@@ -1347,11 +1409,27 @@ class BudgetOptimizer:
1347
1409
  in batches to avoid memory exhaustion. If a memory error occurs, try
1348
1410
  reducing `batch_size`. The calculation will generally be faster with
1349
1411
  larger `batch_size` values.
1412
+ optimization_grid: An `OptimizationGrid` object containing the grid
1413
+ information. Grid creating is a time consuming part of optimization.
1414
+ Creating one grid and running various optimizations on it can save time.
1415
+ If `None` or grid doesn't match the optimization arguments, a new grid
1416
+ will be created.
1350
1417
 
1351
1418
  Returns:
1352
1419
  An `OptimizationResults` object containing optimized budget allocation
1353
1420
  datasets, along with some of the intermediate values used to derive them.
1354
1421
  """
1422
+ if selected_times is not None:
1423
+ warnings.warn(
1424
+ '`selected_times` is deprecated. Please use `start_date` and'
1425
+ ' `end_date` instead.',
1426
+ DeprecationWarning,
1427
+ stacklevel=2,
1428
+ )
1429
+ deprecated_start_date, deprecated_end_date = selected_times
1430
+ start_date = start_date or deprecated_start_date
1431
+ end_date = end_date or deprecated_end_date
1432
+
1355
1433
  _validate_budget(
1356
1434
  fixed_budget=fixed_budget,
1357
1435
  budget=budget,
@@ -1367,18 +1445,36 @@ class BudgetOptimizer:
1367
1445
  spend_constraint_lower = spend_constraint_default
1368
1446
  if spend_constraint_upper is None:
1369
1447
  spend_constraint_upper = spend_constraint_default
1370
- optimization_grid = self.create_optimization_grid(
1371
- selected_times=selected_times,
1448
+ use_grid_arg = optimization_grid is not None and self._validate_grid(
1449
+ new_data=new_data,
1450
+ use_posterior=use_posterior,
1451
+ start_date=start_date,
1452
+ end_date=end_date,
1372
1453
  budget=budget,
1373
1454
  pct_of_spend=pct_of_spend,
1374
1455
  spend_constraint_lower=spend_constraint_lower,
1375
1456
  spend_constraint_upper=spend_constraint_upper,
1376
1457
  gtol=gtol,
1377
- use_posterior=use_posterior,
1378
- use_kpi=use_kpi,
1379
1458
  use_optimal_frequency=use_optimal_frequency,
1380
- batch_size=batch_size,
1459
+ use_kpi=use_kpi,
1460
+ optimization_grid=optimization_grid,
1381
1461
  )
1462
+ if optimization_grid is None or not use_grid_arg:
1463
+ optimization_grid = self.create_optimization_grid(
1464
+ new_data=new_data,
1465
+ start_date=start_date,
1466
+ end_date=end_date,
1467
+ budget=budget,
1468
+ pct_of_spend=pct_of_spend,
1469
+ spend_constraint_lower=spend_constraint_lower,
1470
+ spend_constraint_upper=spend_constraint_upper,
1471
+ gtol=gtol,
1472
+ use_posterior=use_posterior,
1473
+ use_kpi=use_kpi,
1474
+ use_optimal_frequency=use_optimal_frequency,
1475
+ batch_size=batch_size,
1476
+ )
1477
+
1382
1478
  if fixed_budget:
1383
1479
  scenario = FixedBudgetScenario(total_budget=budget)
1384
1480
  elif target_roi:
@@ -1399,24 +1495,24 @@ class BudgetOptimizer:
1399
1495
  use_historical_budget = budget is None or np.isclose(
1400
1496
  budget, np.sum(optimization_grid.historical_spend)
1401
1497
  )
1402
- rounded_spend = np.round(
1403
- spend.non_optimized, optimization_grid.round_factor
1404
- ).astype(int)
1498
+ new_data = new_data or analyzer.DataTensors()
1405
1499
  nonoptimized_data = self._create_budget_dataset(
1500
+ new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
1406
1501
  use_posterior=use_posterior,
1407
1502
  use_kpi=use_kpi,
1408
1503
  hist_spend=optimization_grid.historical_spend,
1409
- spend=rounded_spend,
1504
+ spend=spend.non_optimized,
1410
1505
  selected_times=optimization_grid.selected_times,
1411
1506
  confidence_level=confidence_level,
1412
1507
  batch_size=batch_size,
1413
1508
  use_historical_budget=use_historical_budget,
1414
1509
  )
1415
1510
  nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
1511
+ new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
1416
1512
  use_posterior=use_posterior,
1417
1513
  use_kpi=use_kpi,
1418
1514
  hist_spend=optimization_grid.historical_spend,
1419
- spend=rounded_spend,
1515
+ spend=spend.non_optimized,
1420
1516
  selected_times=optimization_grid.selected_times,
1421
1517
  optimal_frequency=optimization_grid.optimal_frequency,
1422
1518
  confidence_level=confidence_level,
@@ -1431,6 +1527,7 @@ class BudgetOptimizer:
1431
1527
  elif target_mroi:
1432
1528
  constraints[c.TARGET_MROI] = target_mroi
1433
1529
  optimized_data = self._create_budget_dataset(
1530
+ new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
1434
1531
  use_posterior=use_posterior,
1435
1532
  use_kpi=use_kpi,
1436
1533
  hist_spend=optimization_grid.historical_spend,
@@ -1474,10 +1571,139 @@ class BudgetOptimizer:
1474
1571
  _optimization_grid=optimization_grid,
1475
1572
  )
1476
1573
 
1574
+ def _validate_grid(
1575
+ self,
1576
+ new_data: analyzer.DataTensors | None,
1577
+ use_posterior: bool,
1578
+ start_date: tc.Date,
1579
+ end_date: tc.Date,
1580
+ budget: float | None,
1581
+ pct_of_spend: Sequence[float] | None,
1582
+ spend_constraint_lower: _SpendConstraint,
1583
+ spend_constraint_upper: _SpendConstraint,
1584
+ gtol: float,
1585
+ use_optimal_frequency: bool,
1586
+ use_kpi: bool,
1587
+ optimization_grid: OptimizationGrid,
1588
+ ) -> bool:
1589
+ """Checks if the grid is valid for the optimization scenario."""
1590
+
1591
+ if use_posterior != optimization_grid.use_posterior:
1592
+ warnings.warn(
1593
+ 'Given optimization grid was created with `use_posterior` ='
1594
+ f' {optimization_grid.use_posterior}, but optimization was called'
1595
+ f' with `use_posterior` = {use_posterior}. A new grid will be'
1596
+ ' created.'
1597
+ )
1598
+ return False
1599
+
1600
+ if use_kpi != optimization_grid.use_kpi:
1601
+ warnings.warn(
1602
+ 'Given optimization grid was created with `use_kpi` ='
1603
+ f' {optimization_grid.use_kpi}, but optimization was called'
1604
+ f' with `use_kpi` = {use_kpi}. A new grid will be'
1605
+ ' created.'
1606
+ )
1607
+ return False
1608
+
1609
+ if use_optimal_frequency != optimization_grid.use_optimal_frequency:
1610
+ warnings.warn(
1611
+ 'Given optimization grid was created with `use_optimal_frequency` ='
1612
+ f' {optimization_grid.use_optimal_frequency}, but optimization was'
1613
+ f' called with `use_optimal_frequency` = {use_optimal_frequency}. A'
1614
+ ' new grid will be created.'
1615
+ )
1616
+ return False
1617
+
1618
+ if (
1619
+ start_date != optimization_grid.start_date
1620
+ or end_date != optimization_grid.end_date
1621
+ ):
1622
+ warnings.warn(
1623
+ 'Given optimization grid was created with `start_date` ='
1624
+ f' {optimization_grid.start_date} and `end_date` ='
1625
+ f' {optimization_grid.end_date}, but optimization was called with'
1626
+ f' `start_date` = {start_date} and `end_date` = {end_date}. A new'
1627
+ ' grid will be created.'
1628
+ )
1629
+ return False
1630
+
1631
+ if new_data is None:
1632
+ new_data = analyzer.DataTensors()
1633
+ required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
1634
+ filled_data = new_data.validate_and_fill_missing_data(
1635
+ required_tensors_names=required_tensors, meridian=self._meridian
1636
+ )
1637
+ paid_channels = self._meridian.input_data.get_all_paid_channels()
1638
+ if not np.array_equal(paid_channels, optimization_grid.channels):
1639
+ warnings.warn(
1640
+ 'Given optimization grid was created with `channels` ='
1641
+ f' {optimization_grid.channels}, but optimization request was'
1642
+ f' resolved with `channels` = {paid_channels}. A new grid will be'
1643
+ ' created.'
1644
+ )
1645
+ return False
1646
+
1647
+ n_channels = len(optimization_grid.channels)
1648
+ selected_times = self._validate_selected_times(
1649
+ start_date=start_date,
1650
+ end_date=end_date,
1651
+ new_data=new_data,
1652
+ )
1653
+ hist_spend = self._analyzer.get_aggregated_spend(
1654
+ new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
1655
+ selected_times=selected_times,
1656
+ include_media=self._meridian.n_media_channels > 0,
1657
+ include_rf=self._meridian.n_rf_channels > 0,
1658
+ ).data
1659
+ budget = budget or np.sum(hist_spend)
1660
+ valid_pct_of_spend = _validate_pct_of_spend(
1661
+ n_channels=n_channels,
1662
+ hist_spend=hist_spend,
1663
+ pct_of_spend=pct_of_spend,
1664
+ )
1665
+ spend = budget * valid_pct_of_spend
1666
+ (optimization_lower_bound, optimization_upper_bound) = (
1667
+ _get_optimization_bounds(
1668
+ n_channels=n_channels,
1669
+ spend=spend,
1670
+ round_factor=optimization_grid.round_factor,
1671
+ spend_constraint_lower=spend_constraint_lower,
1672
+ spend_constraint_upper=spend_constraint_upper,
1673
+ )
1674
+ )
1675
+ try:
1676
+ optimization_grid.check_optimization_bounds(
1677
+ lower_bound=optimization_lower_bound,
1678
+ upper_bound=optimization_upper_bound,
1679
+ )
1680
+ except ValueError as e:
1681
+ warnings.warn(
1682
+ 'Optimization called with bounds that are not within the grid. A new'
1683
+ f' grid will be created. Error message: {str(e)}'
1684
+ )
1685
+ return False
1686
+
1687
+ round_factor = _get_round_factor(budget, gtol)
1688
+ if round_factor != optimization_grid.round_factor:
1689
+ warnings.warn(
1690
+ 'Optimization accuracy may suffer owing to budget level differences.'
1691
+ ' Consider creating a new grid with smaller `gtol` if you intend to'
1692
+ ' shrink total budget significantly across optimization runs.'
1693
+ ' It is only a problem when you use a much smaller budget, '
1694
+ ' for which the intended step size is smaller.'
1695
+ )
1696
+
1697
+ return True
1698
+
1477
1699
  def create_optimization_grid(
1478
1700
  self,
1701
+ new_data: xr.Dataset | None = None,
1479
1702
  use_posterior: bool = True,
1703
+ # TODO: b/409550413 - Remove this argument.
1480
1704
  selected_times: tuple[str | None, str | None] | None = None,
1705
+ start_date: tc.Date = None,
1706
+ end_date: tc.Date = None,
1481
1707
  budget: float | None = None,
1482
1708
  pct_of_spend: Sequence[float] | None = None,
1483
1709
  spend_constraint_lower: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
@@ -1489,16 +1715,35 @@ class BudgetOptimizer:
1489
1715
  ) -> OptimizationGrid:
1490
1716
  """Creates a OptimizationGrid for optimization.
1491
1717
 
1718
+ If `start_date` or `end_date` is specified, then the default values are
1719
+ inferred based on the subset of time periods specified. Both start and end
1720
+ time selectors should align with the Meridian time dimension coordinates in
1721
+ the underlying model if optimizing the original data. If `new_data` is
1722
+ provided with a different number of time periods than in `InputData`, then
1723
+ the start and end time coordinates must match the time dimensions in
1724
+ `new_data.time`. By default, all times periods are used. Either start or
1725
+ end time component can be `None` to represent the first or the last time
1726
+ coordinate, respectively.
1727
+
1492
1728
  Args:
1729
+ new_data: An optional `DataTensors` container with optional tensors:
1730
+ `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
1731
+ `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
1732
+ Meridian object are used. If `new_data` is provided, the grid is created
1733
+ using the versions of the tensors in `new_data` and the original
1734
+ versions of all the remaining tensors. If any of the tensors in
1735
+ `new_data` is provided with a different number of time periods than in
1736
+ `InputData`, then all tensors must be provided with the same number of
1737
+ time periods and the `time` tensor must be provided.
1493
1738
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1494
1739
  from the posterior distribution of the model. Otherwise, the prior
1495
1740
  distribution is used.
1496
- selected_times: Tuple containing the start and end time dimension
1497
- coordinates for the duration to run the optimization on. Selected time
1498
- values should align with the Meridian time dimension coordinates in the
1499
- underlying model. By default, all times periods are used. Either start
1500
- or end time component can be `None` to represent the first or the last
1501
- time coordinate, respectively.
1741
+ selected_times: Deprecated. Tuple containing the start and end time
1742
+ dimension coordinates. Please Use `start_date` and `end_date` instead.
1743
+ start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
1744
+ format. Default is `None`, i.e. the first time period.
1745
+ end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
1746
+ Default is `None`, i.e. the last time period.
1502
1747
  budget: Number indicating the total budget for the fixed budget scenario.
1503
1748
  Defaults to the historical budget.
1504
1749
  pct_of_spend: Numeric list of size `n_paid_channels` containing the
@@ -1545,16 +1790,32 @@ class BudgetOptimizer:
1545
1790
  An OptimizationGrid object containing the grid data for optimization.
1546
1791
  """
1547
1792
  self._validate_model_fit(use_posterior)
1793
+ if new_data is None:
1794
+ new_data = analyzer.DataTensors()
1795
+
1548
1796
  if selected_times is not None:
1549
- start_date, end_date = selected_times
1550
- selected_time_dims = self._meridian.expand_selected_time_dims(
1551
- start_date=start_date,
1552
- end_date=end_date,
1797
+ warnings.warn(
1798
+ '`selected_times` is deprecated. Please use `start_date` and'
1799
+ ' `end_date` instead.',
1800
+ DeprecationWarning,
1801
+ stacklevel=2,
1553
1802
  )
1554
- else:
1555
- selected_time_dims = None
1556
- hist_spend = self._analyzer.get_historical_spend(
1557
- selected_time_dims,
1803
+ deprecated_start_date, deprecated_end_date = selected_times
1804
+ start_date = start_date or deprecated_start_date
1805
+ end_date = end_date or deprecated_end_date
1806
+
1807
+ required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
1808
+ filled_data = new_data.validate_and_fill_missing_data(
1809
+ required_tensors_names=required_tensors, meridian=self._meridian
1810
+ )
1811
+ selected_times = self._validate_selected_times(
1812
+ start_date=start_date,
1813
+ end_date=end_date,
1814
+ new_data=filled_data,
1815
+ )
1816
+ hist_spend = self._analyzer.get_aggregated_spend(
1817
+ new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
1818
+ selected_times=selected_times,
1558
1819
  include_media=self._meridian.n_media_channels > 0,
1559
1820
  include_rf=self._meridian.n_rf_channels > 0,
1560
1821
  ).data
@@ -1579,8 +1840,9 @@ class BudgetOptimizer:
1579
1840
  if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
1580
1841
  optimal_frequency = tf.convert_to_tensor(
1581
1842
  self._analyzer.optimal_freq(
1843
+ new_data=filled_data.filter_fields(c.RF_DATA),
1582
1844
  use_posterior=use_posterior,
1583
- selected_times=selected_time_dims,
1845
+ selected_times=selected_times,
1584
1846
  use_kpi=use_kpi,
1585
1847
  ).optimal_frequency,
1586
1848
  dtype=tf.float32,
@@ -1594,7 +1856,8 @@ class BudgetOptimizer:
1594
1856
  spend_bound_lower=optimization_lower_bound,
1595
1857
  spend_bound_upper=optimization_upper_bound,
1596
1858
  step_size=step_size,
1597
- selected_times=selected_time_dims,
1859
+ selected_times=selected_times,
1860
+ new_data=filled_data.filter_fields(c.PAID_DATA),
1598
1861
  use_posterior=use_posterior,
1599
1862
  use_kpi=use_kpi,
1600
1863
  optimal_frequency=optimal_frequency,
@@ -1612,10 +1875,12 @@ class BudgetOptimizer:
1612
1875
  use_kpi=use_kpi,
1613
1876
  use_posterior=use_posterior,
1614
1877
  use_optimal_frequency=use_optimal_frequency,
1878
+ start_date=start_date,
1879
+ end_date=end_date,
1615
1880
  gtol=gtol,
1616
1881
  round_factor=round_factor,
1617
1882
  optimal_frequency=optimal_frequency,
1618
- selected_times=selected_time_dims,
1883
+ selected_times=selected_times,
1619
1884
  )
1620
1885
 
1621
1886
  def _create_grid_dataset(
@@ -1658,10 +1923,38 @@ class BudgetOptimizer:
1658
1923
  attrs={c.SPEND_STEP_SIZE: spend_step_size},
1659
1924
  )
1660
1925
 
1926
+ def _validate_selected_times(
1927
+ self,
1928
+ start_date: tc.Date,
1929
+ end_date: tc.Date,
1930
+ new_data: analyzer.DataTensors | None,
1931
+ ) -> Sequence[str] | Sequence[bool] | None:
1932
+ """Validates and returns the selected times."""
1933
+ if start_date is None and end_date is None:
1934
+ return None
1935
+
1936
+ new_data = new_data or analyzer.DataTensors()
1937
+ if new_data.get_modified_times(self._meridian) is None:
1938
+ return self._meridian.expand_selected_time_dims(
1939
+ start_date=start_date,
1940
+ end_date=end_date,
1941
+ )
1942
+ else:
1943
+ assert new_data.time is not None
1944
+ new_times_str = new_data.time.numpy().astype(str).tolist()
1945
+ time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
1946
+ expanded_dates = time_coordinates.expand_selected_time_dims(
1947
+ start_date=start_date,
1948
+ end_date=end_date,
1949
+ )
1950
+ expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
1951
+ return [x in expanded_str for x in new_times_str]
1952
+
1661
1953
  def _get_incremental_outcome_tensors(
1662
1954
  self,
1663
1955
  hist_spend: np.ndarray,
1664
1956
  spend: np.ndarray,
1957
+ new_data: analyzer.DataTensors | None = None,
1665
1958
  optimal_frequency: Sequence[float] | None = None,
1666
1959
  ) -> tuple[
1667
1960
  tf.Tensor | None,
@@ -1686,6 +1979,11 @@ class BudgetOptimizer:
1686
1979
  Args:
1687
1980
  hist_spend: historical spend data.
1688
1981
  spend: new optimized spend data.
1982
+ new_data: An optional `DataTensors` object containing the new `media`,
1983
+ `reach`, and `frequency` tensors. If `None`, the existing tensors from
1984
+ the Meridian object are used. If any of the tensors is provided with a
1985
+ different number of time periods than in `InputData`, then all tensors
1986
+ must be provided with the same number of time periods.
1689
1987
  optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
1690
1988
  the optimal frequency per channel, that maximizes posterior mean roi.
1691
1989
  Value is `None` if the model does not contain reach and frequency data,
@@ -1696,13 +1994,18 @@ class BudgetOptimizer:
1696
1994
  Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
1697
1995
  new_rf_spend).
1698
1996
  """
1997
+ new_data = new_data or analyzer.DataTensors()
1998
+ filled_data = new_data.validate_and_fill_missing_data(
1999
+ c.PAID_CHANNELS,
2000
+ self._meridian,
2001
+ )
1699
2002
  if self._meridian.n_media_channels > 0:
1700
2003
  new_media = (
1701
2004
  tf.math.divide_no_nan(
1702
2005
  spend[: self._meridian.n_media_channels],
1703
2006
  hist_spend[: self._meridian.n_media_channels],
1704
2007
  )
1705
- * self._meridian.media_tensors.media
2008
+ * filled_data.media
1706
2009
  )
1707
2010
  new_media_spend = tf.convert_to_tensor(
1708
2011
  spend[: self._meridian.n_media_channels]
@@ -1711,9 +2014,7 @@ class BudgetOptimizer:
1711
2014
  new_media = None
1712
2015
  new_media_spend = None
1713
2016
  if self._meridian.n_rf_channels > 0:
1714
- rf_media = (
1715
- self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
1716
- )
2017
+ rf_media = filled_data.reach * filled_data.frequency
1717
2018
  new_rf_media = (
1718
2019
  tf.math.divide_no_nan(
1719
2020
  spend[-self._meridian.n_rf_channels :],
@@ -1722,7 +2023,7 @@ class BudgetOptimizer:
1722
2023
  * rf_media
1723
2024
  )
1724
2025
  frequency = (
1725
- self._meridian.rf_tensors.frequency
2026
+ filled_data.frequency
1726
2027
  if optimal_frequency is None
1727
2028
  else optimal_frequency
1728
2029
  )
@@ -1742,9 +2043,10 @@ class BudgetOptimizer:
1742
2043
  self,
1743
2044
  hist_spend: np.ndarray,
1744
2045
  spend: np.ndarray,
2046
+ new_data: analyzer.DataTensors | None = None,
1745
2047
  use_posterior: bool = True,
1746
2048
  use_kpi: bool = False,
1747
- selected_times: Sequence[str] | None = None,
2049
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1748
2050
  optimal_frequency: Sequence[float] | None = None,
1749
2051
  attrs: Mapping[str, Any] | None = None,
1750
2052
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
@@ -1752,15 +2054,22 @@ class BudgetOptimizer:
1752
2054
  use_historical_budget: bool = True,
1753
2055
  ) -> xr.Dataset:
1754
2056
  """Creates the budget dataset."""
2057
+ new_data = new_data or analyzer.DataTensors()
2058
+ filled_data = new_data.validate_and_fill_missing_data(
2059
+ c.PAID_DATA + (c.TIME,),
2060
+ self._meridian,
2061
+ )
1755
2062
  spend = tf.convert_to_tensor(spend, dtype=tf.float32)
1756
2063
  hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
1757
2064
  (new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
1758
2065
  self._get_incremental_outcome_tensors(
1759
- hist_spend, spend, optimal_frequency
2066
+ hist_spend,
2067
+ spend,
2068
+ new_data=filled_data.filter_fields(c.PAID_CHANNELS),
2069
+ optimal_frequency=optimal_frequency,
1760
2070
  )
1761
2071
  )
1762
2072
  budget = np.sum(spend)
1763
- all_times = self._meridian.input_data.time.values.tolist()
1764
2073
 
1765
2074
  # incremental_outcome here is a tensor with the shape
1766
2075
  # (n_chains, n_draws, n_channels)
@@ -1770,6 +2079,7 @@ class BudgetOptimizer:
1770
2079
  media=new_media,
1771
2080
  reach=new_reach,
1772
2081
  frequency=new_frequency,
2082
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1773
2083
  ),
1774
2084
  selected_times=selected_times,
1775
2085
  use_kpi=use_kpi,
@@ -1792,6 +2102,9 @@ class BudgetOptimizer:
1792
2102
  )
1793
2103
 
1794
2104
  aggregated_impressions = self._analyzer.get_aggregated_impressions(
2105
+ new_data=analyzer.DataTensors(
2106
+ media=new_media, reach=new_reach, frequency=new_frequency
2107
+ ),
1795
2108
  selected_times=selected_times,
1796
2109
  selected_geos=None,
1797
2110
  aggregate_times=True,
@@ -1799,10 +2112,11 @@ class BudgetOptimizer:
1799
2112
  optimal_frequency=optimal_frequency,
1800
2113
  include_non_paid_channels=False,
1801
2114
  )
1802
- effectiveness = incremental_outcome / aggregated_impressions
1803
2115
  effectiveness_with_mean_median_and_ci = (
1804
2116
  analyzer.get_central_tendency_and_ci(
1805
- data=effectiveness,
2117
+ data=tf.math.divide_no_nan(
2118
+ incremental_outcome, aggregated_impressions
2119
+ ),
1806
2120
  confidence_level=confidence_level,
1807
2121
  include_median=True,
1808
2122
  )
@@ -1822,6 +2136,7 @@ class BudgetOptimizer:
1822
2136
  frequency=new_frequency,
1823
2137
  media_spend=new_media_spend,
1824
2138
  rf_spend=new_rf_spend,
2139
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1825
2140
  ),
1826
2141
  selected_times=selected_times,
1827
2142
  batch_size=batch_size,
@@ -1860,6 +2175,18 @@ class BudgetOptimizer:
1860
2175
  c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
1861
2176
  }
1862
2177
 
2178
+ all_times = (
2179
+ filled_data.time.numpy().astype(str).tolist()
2180
+ if filled_data.time is not None
2181
+ else self._meridian.input_data.time.values.tolist()
2182
+ )
2183
+ if selected_times is not None and all(
2184
+ isinstance(time, bool) for time in selected_times
2185
+ ):
2186
+ selected_times = [
2187
+ time for time, selected in zip(all_times, selected_times) if selected
2188
+ ]
2189
+
1863
2190
  attributes = {
1864
2191
  c.START_DATE: min(selected_times) if selected_times else all_times[0],
1865
2192
  c.END_DATE: max(selected_times) if selected_times else all_times[-1],
@@ -1889,7 +2216,8 @@ class BudgetOptimizer:
1889
2216
  i: int,
1890
2217
  incremental_outcome_grid: np.ndarray,
1891
2218
  multipliers_grid: tf.Tensor,
1892
- selected_times: Sequence[str],
2219
+ new_data: analyzer.DataTensors | None = None,
2220
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1893
2221
  use_posterior: bool = True,
1894
2222
  use_kpi: bool = False,
1895
2223
  optimal_frequency: xr.DataArray | None = None,
@@ -1904,8 +2232,16 @@ class BudgetOptimizer:
1904
2232
  number of columns is equal to the number of total channels, containing
1905
2233
  incremental outcome by channel.
1906
2234
  multipliers_grid: A grid derived from spend.
1907
- selected_times: Sequence of strings representing the time dimensions in
1908
- `meridian.input_data.time` to use for optimization.
2235
+ new_data: An optional `DataTensors` object containing the new `media`,
2236
+ `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
2237
+ existing tensors from the Meridian object are used. If any of the
2238
+ tensors is provided with a different number of time periods than in
2239
+ `InputData`, then all tensors must be provided with the same number of
2240
+ time periods.
2241
+ selected_times: Optional list of times to optimize. This can either be a
2242
+ string list containing a subset of time dimension coordinates from
2243
+ `InputData.time` or a boolean list with length equal to the time
2244
+ dimension of the tensor. By default, all time periods are included.
1909
2245
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1910
2246
  from the posterior distribution of the model. Otherwise, the prior
1911
2247
  distribution is used.
@@ -1922,10 +2258,14 @@ class BudgetOptimizer:
1922
2258
  reducing `batch_size`. The calculation will generally be faster with
1923
2259
  larger `batch_size` values.
1924
2260
  """
2261
+ new_data = new_data or analyzer.DataTensors()
2262
+ filled_data = new_data.validate_and_fill_missing_data(
2263
+ c.PAID_DATA, self._meridian
2264
+ )
1925
2265
  if self._meridian.n_media_channels > 0:
1926
2266
  new_media = (
1927
2267
  multipliers_grid[i, : self._meridian.n_media_channels]
1928
- * self._meridian.media_tensors.media
2268
+ * filled_data.media
1929
2269
  )
1930
2270
  else:
1931
2271
  new_media = None
@@ -1934,20 +2274,18 @@ class BudgetOptimizer:
1934
2274
  new_frequency = None
1935
2275
  new_reach = None
1936
2276
  elif optimal_frequency is not None:
1937
- new_frequency = (
1938
- tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency
1939
- )
2277
+ new_frequency = tf.ones_like(filled_data.frequency) * optimal_frequency
1940
2278
  new_reach = tf.math.divide_no_nan(
1941
2279
  multipliers_grid[i, -self._meridian.n_rf_channels :]
1942
- * self._meridian.rf_tensors.reach
1943
- * self._meridian.rf_tensors.frequency,
2280
+ * filled_data.reach
2281
+ * filled_data.frequency,
1944
2282
  new_frequency,
1945
2283
  )
1946
2284
  else:
1947
- new_frequency = self._meridian.rf_tensors.frequency
2285
+ new_frequency = filled_data.frequency
1948
2286
  new_reach = (
1949
2287
  multipliers_grid[i, -self._meridian.n_rf_channels :]
1950
- * self._meridian.rf_tensors.reach
2288
+ * filled_data.reach
1951
2289
  )
1952
2290
 
1953
2291
  # incremental_outcome returns a three dimensional tensor with dims
@@ -1960,6 +2298,7 @@ class BudgetOptimizer:
1960
2298
  media=new_media,
1961
2299
  reach=new_reach,
1962
2300
  frequency=new_frequency,
2301
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1963
2302
  ),
1964
2303
  selected_times=selected_times,
1965
2304
  use_kpi=use_kpi,
@@ -1976,7 +2315,8 @@ class BudgetOptimizer:
1976
2315
  spend_bound_lower: np.ndarray,
1977
2316
  spend_bound_upper: np.ndarray,
1978
2317
  step_size: int,
1979
- selected_times: Sequence[str],
2318
+ new_data: analyzer.DataTensors | None = None,
2319
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1980
2320
  use_posterior: bool = True,
1981
2321
  use_kpi: bool = False,
1982
2322
  optimal_frequency: xr.DataArray | None = None,
@@ -1992,8 +2332,16 @@ class BudgetOptimizer:
1992
2332
  containing the upper constraint spend for each channel.
1993
2333
  step_size: Integer indicating the step size, or interval, between values
1994
2334
  in the spend grid. All media channels have the same step size.
1995
- selected_times: Sequence of strings representing the time dimensions in
1996
- `meridian.input_data.time` to use for optimization.
2335
+ new_data: An optional `DataTensors` object containing the new `media`,
2336
+ `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
2337
+ existing tensors from the Meridian object are used. If any of the
2338
+ tensors is provided with a different number of time periods than in
2339
+ `InputData`, then all tensors must be provided with the same number of
2340
+ time periods.
2341
+ selected_times: Optional list of times to optimize. This can either be a
2342
+ string list containing a subset of time dimension coordinates from
2343
+ `InputData.time` or a boolean list with length equal to the time
2344
+ dimension of the tensor. By default, all time periods are included.
1997
2345
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1998
2346
  from the posterior distribution of the model. Otherwise, the prior
1999
2347
  distribution is used.
@@ -2047,6 +2395,7 @@ class BudgetOptimizer:
2047
2395
  incremental_outcome_grid=incremental_outcome_grid,
2048
2396
  multipliers_grid=multipliers_grid,
2049
2397
  selected_times=selected_times,
2398
+ new_data=new_data,
2050
2399
  use_posterior=use_posterior,
2051
2400
  use_kpi=use_kpi,
2052
2401
  optimal_frequency=optimal_frequency,
@@ -2207,7 +2556,7 @@ def _validate_budget(
2207
2556
  budget: float | None,
2208
2557
  target_roi: float | None,
2209
2558
  target_mroi: float | None,
2210
- ):
2559
+ ) -> None:
2211
2560
  """Validates the budget optimization arguments."""
2212
2561
  if fixed_budget:
2213
2562
  if target_roi is not None: