google-meridian 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. google_meridian-1.3.1.dist-info/METADATA +209 -0
  2. google_meridian-1.3.1.dist-info/RECORD +76 -0
  3. {google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/top_level.txt +1 -0
  4. meridian/analysis/__init__.py +2 -0
  5. meridian/analysis/analyzer.py +179 -105
  6. meridian/analysis/formatter.py +2 -2
  7. meridian/analysis/optimizer.py +227 -87
  8. meridian/analysis/review/__init__.py +20 -0
  9. meridian/analysis/review/checks.py +721 -0
  10. meridian/analysis/review/configs.py +110 -0
  11. meridian/analysis/review/constants.py +40 -0
  12. meridian/analysis/review/results.py +544 -0
  13. meridian/analysis/review/reviewer.py +186 -0
  14. meridian/analysis/summarizer.py +21 -34
  15. meridian/analysis/templates/chips.html.jinja +12 -0
  16. meridian/analysis/test_utils.py +27 -5
  17. meridian/analysis/visualizer.py +41 -57
  18. meridian/backend/__init__.py +457 -118
  19. meridian/backend/test_utils.py +162 -0
  20. meridian/constants.py +39 -3
  21. meridian/model/__init__.py +1 -0
  22. meridian/model/eda/__init__.py +3 -0
  23. meridian/model/eda/constants.py +21 -0
  24. meridian/model/eda/eda_engine.py +1309 -196
  25. meridian/model/eda/eda_outcome.py +200 -0
  26. meridian/model/eda/eda_spec.py +84 -0
  27. meridian/model/eda/meridian_eda.py +220 -0
  28. meridian/model/knots.py +55 -49
  29. meridian/model/media.py +10 -8
  30. meridian/model/model.py +79 -16
  31. meridian/model/model_test_data.py +53 -0
  32. meridian/model/posterior_sampler.py +39 -32
  33. meridian/model/prior_distribution.py +12 -2
  34. meridian/model/prior_sampler.py +146 -90
  35. meridian/model/spec.py +7 -8
  36. meridian/model/transformers.py +11 -3
  37. meridian/version.py +1 -1
  38. schema/__init__.py +18 -0
  39. schema/serde/__init__.py +26 -0
  40. schema/serde/constants.py +48 -0
  41. schema/serde/distribution.py +515 -0
  42. schema/serde/eda_spec.py +192 -0
  43. schema/serde/function_registry.py +143 -0
  44. schema/serde/hyperparameters.py +363 -0
  45. schema/serde/inference_data.py +105 -0
  46. schema/serde/marketing_data.py +1321 -0
  47. schema/serde/meridian_serde.py +413 -0
  48. schema/serde/serde.py +47 -0
  49. schema/serde/test_data.py +4608 -0
  50. schema/utils/__init__.py +17 -0
  51. schema/utils/time_record.py +156 -0
  52. google_meridian-1.2.1.dist-info/METADATA +0 -409
  53. google_meridian-1.2.1.dist-info/RECORD +0 -52
  54. {google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/WHEEL +0 -0
  55. {google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -102,6 +102,7 @@ class OptimizationGrid:
102
102
  use_kpi: Whether using generic KPI or revenue.
103
103
  use_posterior: Whether posterior distributions were used, or prior.
104
104
  use_optimal_frequency: Whether optimal frequency was used.
105
+ max_frequency: The maximum frequency for reach and frequency channels.
105
106
  start_date: The start date of the optimization period.
106
107
  end_date: The end date of the optimization period.
107
108
  gtol: Float indicating the acceptable relative error for the budget used in
@@ -114,9 +115,10 @@ class OptimizationGrid:
114
115
  does not contain reach and frequency data, or if the model does contain
115
116
  reach and frequency data, but historical frequency is used for the
116
117
  optimization scenario.
118
+ selected_geos: The geo coordinates from the model used in this grid.
117
119
  selected_times: The time coordinates from the model used in this grid. If
118
- new data with modified time coordinates is used for optimization, this
119
- is a list of booleans indicating which time coordinates are selected.
120
+ new data with modified time coordinates is used for optimization, this is
121
+ a list of booleans indicating which time coordinates are selected.
120
122
  Otherwise, this is a list of strings indicating the time coordinates used
121
123
  in this grid.
122
124
  """
@@ -132,7 +134,9 @@ class OptimizationGrid:
132
134
  gtol: float
133
135
  round_factor: int
134
136
  optimal_frequency: np.ndarray | None
137
+ selected_geos: Sequence[str] | None
135
138
  selected_times: Sequence[str] | Sequence[bool] | None
139
+ max_frequency: float | None = None
136
140
 
137
141
  @property
138
142
  def grid_dataset(self) -> xr.Dataset:
@@ -266,7 +270,7 @@ class OptimizationGrid:
266
270
  return xr.Dataset(
267
271
  coords={c.CHANNEL: self.channels},
268
272
  data_vars={
269
- c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
273
+ c.OPTIMIZED: ([c.CHANNEL], optimal_spend),
270
274
  c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
271
275
  },
272
276
  )
@@ -390,16 +394,26 @@ class OptimizationGrid:
390
394
  media spend that maximizes incremental outcome based on spend constraints
391
395
  for all media and RF channels.
392
396
  """
393
- spend = spend_grid[0, :].copy()
394
- incremental_outcome = incremental_outcome_grid[0, :].copy()
395
- spend_grid = spend_grid[1:, :]
396
- incremental_outcome_grid = incremental_outcome_grid[1:, :]
397
- iterative_roi_grid = np.round(
398
- backend.divide_no_nan(
399
- incremental_outcome_grid - incremental_outcome, spend_grid - spend
400
- ),
401
- decimals=8,
397
+ spend_grid_values = np.array(spend_grid.values, dtype=np.float64)
398
+ incremental_outcome_grid_values = np.array(
399
+ incremental_outcome_grid.values, dtype=np.float64
400
+ )
401
+
402
+ spend = spend_grid_values[0, :].copy()
403
+ incremental_outcome = incremental_outcome_grid_values[0, :].copy()
404
+ spend_grid_values = spend_grid_values[1:, :]
405
+ incremental_outcome_grid_values = incremental_outcome_grid_values[1:, :]
406
+
407
+ numerator = incremental_outcome_grid_values - incremental_outcome
408
+ denominator = spend_grid_values - spend
409
+ iterative_roi_grid = np.divide(
410
+ numerator,
411
+ denominator,
412
+ out=np.zeros_like(numerator),
413
+ where=(denominator != 0),
402
414
  )
415
+ iterative_roi_grid = np.round(iterative_roi_grid, decimals=8)
416
+
403
417
  while True:
404
418
  spend_optimal = spend.astype(int)
405
419
  # If none of the exit criteria are met roi_grid will eventually be filled
@@ -411,8 +425,8 @@ class OptimizationGrid:
411
425
  )
412
426
  row_idx = point[0]
413
427
  media_idx = point[1]
414
- spend[media_idx] = spend_grid[row_idx, media_idx]
415
- incremental_outcome[media_idx] = incremental_outcome_grid[
428
+ spend[media_idx] = spend_grid_values[row_idx, media_idx]
429
+ incremental_outcome[media_idx] = incremental_outcome_grid_values[
416
430
  row_idx, media_idx
417
431
  ]
418
432
  roi_grid_point = iterative_roi_grid[row_idx, media_idx]
@@ -425,14 +439,23 @@ class OptimizationGrid:
425
439
  break
426
440
 
427
441
  iterative_roi_grid[0 : row_idx + 1, media_idx] = np.nan
442
+
443
+ num_col = (
444
+ incremental_outcome_grid_values[row_idx + 1 :, media_idx]
445
+ - incremental_outcome_grid_values[row_idx, media_idx]
446
+ )
447
+ den_col = (
448
+ spend_grid_values[row_idx + 1 :, media_idx]
449
+ - spend_grid_values[row_idx, media_idx]
450
+ )
451
+ new_roi_col = np.divide(
452
+ num_col,
453
+ den_col,
454
+ out=np.zeros_like(num_col),
455
+ where=(den_col != 0),
456
+ )
428
457
  iterative_roi_grid[row_idx + 1 :, media_idx] = np.round(
429
- backend.divide_no_nan(
430
- incremental_outcome_grid[row_idx + 1 :, media_idx]
431
- - incremental_outcome_grid[row_idx, media_idx],
432
- spend_grid[row_idx + 1 :, media_idx]
433
- - spend_grid[row_idx, media_idx],
434
- ),
435
- decimals=8,
458
+ new_roi_col, decimals=8
436
459
  )
437
460
  return spend_optimal
438
461
 
@@ -559,11 +582,16 @@ class OptimizationResults:
559
582
  """The grid information used for optimization."""
560
583
  return self._optimization_grid
561
584
 
562
- def output_optimization_summary(self, filename: str, filepath: str):
585
+ def output_optimization_summary(
586
+ self,
587
+ filename: str,
588
+ filepath: str,
589
+ currency: str = c.DEFAULT_CURRENCY,
590
+ ):
563
591
  """Generates and saves the HTML optimization summary output."""
564
592
  os.makedirs(filepath, exist_ok=True)
565
593
  with open(os.path.join(filepath, filename), 'w') as f:
566
- f.write(self._gen_optimization_summary())
594
+ f.write(self._gen_optimization_summary(currency))
567
595
 
568
596
  def plot_incremental_outcome_delta(self) -> alt.Chart:
569
597
  """Plots a waterfall chart showing the change in incremental outcome."""
@@ -713,7 +741,7 @@ class OptimizationResults:
713
741
  )
714
742
  )
715
743
 
716
- def plot_spend_delta(self) -> alt.Chart:
744
+ def plot_spend_delta(self, currency: str = c.DEFAULT_CURRENCY) -> alt.Chart:
717
745
  """Plots a bar chart showing the optimized change in spend per channel."""
718
746
  df = self._get_delta_data(c.SPEND)
719
747
  base = (
@@ -734,7 +762,7 @@ class OptimizationResults:
734
762
  y=alt.Y(
735
763
  f'{c.SPEND}:Q',
736
764
  axis=alt.Axis(
737
- title='$',
765
+ title=currency,
738
766
  domain=False,
739
767
  labelExpr=formatter.compact_number_expr(),
740
768
  **formatter.AXIS_CONFIG,
@@ -919,6 +947,7 @@ class OptimizationResults:
919
947
  new_data=self.new_data,
920
948
  spend_multipliers=spend_multiplier,
921
949
  use_posterior=self.optimization_grid.use_posterior,
950
+ selected_geos=self.optimization_grid.selected_geos,
922
951
  selected_times=selected_times,
923
952
  by_reach=True,
924
953
  use_kpi=not self.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
@@ -1029,7 +1058,7 @@ class OptimizationResults:
1029
1058
  sorted_df.sort_index(inplace=True)
1030
1059
  return sorted_df
1031
1060
 
1032
- def _gen_optimization_summary(self) -> str:
1061
+ def _gen_optimization_summary(self, currency: str) -> str:
1033
1062
  """Generates HTML optimization summary output (as sanitized content str)."""
1034
1063
  start_date = tc.normalize_date(self.optimized_data.start_date)
1035
1064
  self.template_env.globals[c.START_DATE] = start_date.strftime(
@@ -1041,22 +1070,25 @@ class OptimizationResults:
1041
1070
  self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
1042
1071
  f'%b {end_date_adjusted.day}, %Y'
1043
1072
  )
1073
+ self.template_env.globals[c.SELECTED_GEOS] = (
1074
+ self.optimization_grid.selected_geos
1075
+ )
1044
1076
 
1045
1077
  html_template = self.template_env.get_template('summary.html.jinja')
1046
1078
  return html_template.render(
1047
1079
  title=summary_text.OPTIMIZATION_TITLE,
1048
- cards=self._create_output_sections(),
1080
+ cards=self._create_output_sections(currency),
1049
1081
  )
1050
1082
 
1051
- def _create_output_sections(self) -> Sequence[str]:
1083
+ def _create_output_sections(self, currency: str) -> Sequence[str]:
1052
1084
  """Creates the HTML snippets for cards in the summary page."""
1053
1085
  return [
1054
- self._create_scenario_plan_section(),
1055
- self._create_budget_allocation_section(),
1086
+ self._create_scenario_plan_section(currency),
1087
+ self._create_budget_allocation_section(currency),
1056
1088
  self._create_response_curves_section(),
1057
1089
  ]
1058
1090
 
1059
- def _create_scenario_plan_section(self) -> str:
1091
+ def _create_scenario_plan_section(self, currency: str) -> str:
1060
1092
  """Creates the HTML card snippet for the scenario plan section."""
1061
1093
  card_spec = formatter.CardSpec(
1062
1094
  id=summary_text.SCENARIO_PLAN_CARD_ID,
@@ -1099,22 +1131,32 @@ class OptimizationResults:
1099
1131
  self.template_env,
1100
1132
  card_spec,
1101
1133
  insights,
1102
- stats_specs=self._create_scenario_stats_specs(),
1134
+ stats_specs=self._create_scenario_stats_specs(currency),
1103
1135
  )
1104
1136
 
1105
- def _create_scenario_stats_specs(self) -> Sequence[formatter.StatsSpec]:
1137
+ def _create_scenario_stats_specs(
1138
+ self, currency: str
1139
+ ) -> Sequence[formatter.StatsSpec]:
1106
1140
  """Creates the stats to fill the scenario plan section."""
1107
1141
  outcome = self._kpi_or_revenue
1108
1142
  budget_diff = self.optimized_data.budget - self.nonoptimized_data.budget
1109
1143
  budget_prefix = '+' if budget_diff > 0 else ''
1110
1144
  non_optimized_budget = formatter.StatsSpec(
1111
1145
  title=summary_text.NON_OPTIMIZED_BUDGET_LABEL,
1112
- stat=formatter.format_monetary_num(self.nonoptimized_data.budget),
1146
+ stat=formatter.format_monetary_num(
1147
+ num=self.nonoptimized_data.budget,
1148
+ currency=currency,
1149
+ ),
1113
1150
  )
1114
1151
  optimized_budget = formatter.StatsSpec(
1115
1152
  title=summary_text.OPTIMIZED_BUDGET_LABEL,
1116
- stat=formatter.format_monetary_num(self.optimized_data.budget),
1117
- delta=(budget_prefix + formatter.format_monetary_num(budget_diff)),
1153
+ stat=formatter.format_monetary_num(
1154
+ num=self.optimized_data.budget, currency=currency
1155
+ ),
1156
+ delta=(
1157
+ budget_prefix
1158
+ + formatter.format_monetary_num(num=budget_diff, currency=currency)
1159
+ ),
1118
1160
  )
1119
1161
 
1120
1162
  if outcome == c.REVENUE:
@@ -1136,7 +1178,7 @@ class OptimizationResults:
1136
1178
  )
1137
1179
  optimized_performance_title = summary_text.OPTIMIZED_CPIK_LABEL
1138
1180
  optimized_performance_stat = f'${self.optimized_data.total_cpik:.2f}'
1139
- optimized_performance_diff = formatter.compact_number(diff, 2, '$')
1181
+ optimized_performance_diff = formatter.compact_number(diff, 2, currency)
1140
1182
  non_optimized_performance = formatter.StatsSpec(
1141
1183
  title=non_optimized_performance_title,
1142
1184
  stat=non_optimized_performance_stat,
@@ -1152,7 +1194,7 @@ class OptimizationResults:
1152
1194
  - self.nonoptimized_data.total_incremental_outcome
1153
1195
  )
1154
1196
  inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
1155
- currency = '$' if outcome == c.REVENUE else ''
1197
+ currency = currency if outcome == c.REVENUE else ''
1156
1198
  non_optimized_inc_outcome = formatter.StatsSpec(
1157
1199
  title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
1158
1200
  outcome=outcome
@@ -1182,7 +1224,7 @@ class OptimizationResults:
1182
1224
  optimized_inc_outcome,
1183
1225
  ]
1184
1226
 
1185
- def _create_budget_allocation_section(self) -> str:
1227
+ def _create_budget_allocation_section(self, currency: str) -> str:
1186
1228
  """Creates the HTML card snippet for the budget allocation section."""
1187
1229
  outcome = self._kpi_or_revenue
1188
1230
  card_spec = formatter.CardSpec(
@@ -1192,7 +1234,7 @@ class OptimizationResults:
1192
1234
  spend_delta = formatter.ChartSpec(
1193
1235
  id=summary_text.SPEND_DELTA_CHART_ID,
1194
1236
  description=summary_text.SPEND_DELTA_CHART_INSIGHTS,
1195
- chart_json=self.plot_spend_delta().to_json(),
1237
+ chart_json=self.plot_spend_delta(currency).to_json(),
1196
1238
  )
1197
1239
  spend_allocation = formatter.ChartSpec(
1198
1240
  id=summary_text.SPEND_ALLOCATION_CHART_ID,
@@ -1295,6 +1337,7 @@ class BudgetOptimizer:
1295
1337
  self,
1296
1338
  new_data: analyzer_module.DataTensors | None = None,
1297
1339
  use_posterior: bool = True,
1340
+ selected_geos: Sequence[str] | None = None,
1298
1341
  # TODO: b/409550413 - Remove this argument.
1299
1342
  selected_times: tuple[str | None, str | None] | None = None,
1300
1343
  start_date: tc.Date = None,
@@ -1307,7 +1350,10 @@ class BudgetOptimizer:
1307
1350
  target_roi: float | None = None,
1308
1351
  target_mroi: float | None = None,
1309
1352
  gtol: float = 0.0001,
1353
+ # TODO:
1354
+ # merging use_optimal_frequency and max_frequency into a single argument.
1310
1355
  use_optimal_frequency: bool = True,
1356
+ max_frequency: float | None = None,
1311
1357
  use_kpi: bool = False,
1312
1358
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
1313
1359
  batch_size: int = c.DEFAULT_BATCH_SIZE,
@@ -1383,6 +1429,9 @@ class BudgetOptimizer:
1383
1429
  use_posterior: Boolean. If `True`, then the budget is optimized based on
1384
1430
  the posterior distribution of the model. Otherwise, the prior
1385
1431
  distribution is used.
1432
+ selected_geos: Optional list containing a subset of geos to include. By
1433
+ default, all geos are included. The selected geos should match those in
1434
+ `InputData.geo`.
1386
1435
  selected_times: Deprecated. Tuple containing the start and end time
1387
1436
  dimension coordinates for the duration to run the optimization on.
1388
1437
  Please Use `start_date` and `end_date` instead.
@@ -1439,6 +1488,10 @@ class BudgetOptimizer:
1439
1488
  use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
1440
1489
  trained Meridian model for optimization. If `False`, uses historical
1441
1490
  frequency or `new_data.frequency` if provided.
1491
+ max_frequency: Float indicating the frequency upper bound for the optimal
1492
+ frequency search space. If `None` when `use_optimal_frequency` is
1493
+ `True`, the max frequency of the input data is used. If
1494
+ `use_optimal_frequency` is `False`, `max_frequency` is ignored.
1442
1495
  use_kpi: If `True`, runs the optimization on KPI. Defaults to revenue.
1443
1496
  confidence_level: The threshold for computing the confidence intervals.
1444
1497
  batch_size: Maximum draws per chain in each batch. The calculation is run
@@ -1484,6 +1537,7 @@ class BudgetOptimizer:
1484
1537
  use_grid_arg = optimization_grid is not None and self._validate_grid(
1485
1538
  new_data=new_data,
1486
1539
  use_posterior=use_posterior,
1540
+ selected_geos=selected_geos,
1487
1541
  start_date=start_date,
1488
1542
  end_date=end_date,
1489
1543
  budget=budget,
@@ -1492,12 +1546,14 @@ class BudgetOptimizer:
1492
1546
  spend_constraint_upper=spend_constraint_upper,
1493
1547
  gtol=gtol,
1494
1548
  use_optimal_frequency=use_optimal_frequency,
1549
+ max_frequency=max_frequency,
1495
1550
  use_kpi=use_kpi,
1496
1551
  optimization_grid=optimization_grid,
1497
1552
  )
1498
1553
  if optimization_grid is None or not use_grid_arg:
1499
1554
  optimization_grid = self.create_optimization_grid(
1500
1555
  new_data=new_data,
1556
+ selected_geos=selected_geos,
1501
1557
  start_date=start_date,
1502
1558
  end_date=end_date,
1503
1559
  budget=budget,
@@ -1508,6 +1564,7 @@ class BudgetOptimizer:
1508
1564
  use_posterior=use_posterior,
1509
1565
  use_kpi=use_kpi,
1510
1566
  use_optimal_frequency=use_optimal_frequency,
1567
+ max_frequency=max_frequency,
1511
1568
  batch_size=batch_size,
1512
1569
  )
1513
1570
 
@@ -1538,6 +1595,7 @@ class BudgetOptimizer:
1538
1595
  use_kpi=use_kpi,
1539
1596
  hist_spend=optimization_grid.historical_spend,
1540
1597
  spend=spend.non_optimized,
1598
+ selected_geos=selected_geos,
1541
1599
  start_date=start_date,
1542
1600
  end_date=end_date,
1543
1601
  confidence_level=confidence_level,
@@ -1550,6 +1608,7 @@ class BudgetOptimizer:
1550
1608
  use_kpi=use_kpi,
1551
1609
  hist_spend=optimization_grid.historical_spend,
1552
1610
  spend=spend.non_optimized,
1611
+ selected_geos=selected_geos,
1553
1612
  start_date=start_date,
1554
1613
  end_date=end_date,
1555
1614
  optimal_frequency=optimization_grid.optimal_frequency,
@@ -1570,6 +1629,7 @@ class BudgetOptimizer:
1570
1629
  use_kpi=use_kpi,
1571
1630
  hist_spend=optimization_grid.historical_spend,
1572
1631
  spend=spend.optimized,
1632
+ selected_geos=selected_geos,
1573
1633
  start_date=start_date,
1574
1634
  end_date=end_date,
1575
1635
  optimal_frequency=optimization_grid.optimal_frequency,
@@ -1690,7 +1750,11 @@ class BudgetOptimizer:
1690
1750
  A `DataTensors` object with optional tensors `media`, `reach`,
1691
1751
  `frequency`, `media_spend`, `rf_spend`, `revenue_per_kpi`, and `time`.
1692
1752
  """
1753
+ n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
1754
+ n_geos = self._meridian.n_geos
1693
1755
  self._validate_optimization_tensors(
1756
+ expected_n_geos=n_geos,
1757
+ expected_n_times=n_times,
1694
1758
  cpmu=cpmu,
1695
1759
  cprf=cprf,
1696
1760
  media=media,
@@ -1701,13 +1765,6 @@ class BudgetOptimizer:
1701
1765
  revenue_per_kpi=revenue_per_kpi,
1702
1766
  use_optimal_frequency=use_optimal_frequency,
1703
1767
  )
1704
- n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
1705
- n_geos = self._meridian.n_geos
1706
- revenue_per_kpi = (
1707
- _expand_tensor(revenue_per_kpi, (n_geos, n_times))
1708
- if revenue_per_kpi is not None
1709
- else None
1710
- )
1711
1768
 
1712
1769
  tensors = {}
1713
1770
  if media is not None:
@@ -1743,7 +1800,9 @@ class BudgetOptimizer:
1743
1800
  impressions, tensors[c.FREQUENCY]
1744
1801
  )
1745
1802
  if revenue_per_kpi is not None:
1746
- tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
1803
+ tensors[c.REVENUE_PER_KPI] = _expand_tensor(
1804
+ revenue_per_kpi, (n_geos, n_times)
1805
+ )
1747
1806
  tensors[c.TIME] = backend.to_tensor(time)
1748
1807
  return analyzer_module.DataTensors(**tensors)
1749
1808
 
@@ -1751,6 +1810,7 @@ class BudgetOptimizer:
1751
1810
  self,
1752
1811
  new_data: analyzer_module.DataTensors | None,
1753
1812
  use_posterior: bool,
1813
+ selected_geos: Sequence[str] | None,
1754
1814
  start_date: tc.Date,
1755
1815
  end_date: tc.Date,
1756
1816
  budget: float | None,
@@ -1759,6 +1819,7 @@ class BudgetOptimizer:
1759
1819
  spend_constraint_upper: _SpendConstraint,
1760
1820
  gtol: float,
1761
1821
  use_optimal_frequency: bool,
1822
+ max_frequency: float | None,
1762
1823
  use_kpi: bool,
1763
1824
  optimization_grid: OptimizationGrid,
1764
1825
  ) -> bool:
@@ -1791,6 +1852,15 @@ class BudgetOptimizer:
1791
1852
  )
1792
1853
  return False
1793
1854
 
1855
+ if max_frequency != optimization_grid.max_frequency:
1856
+ warnings.warn(
1857
+ 'Given optimization grid was created with `use_optimal_frequency` ='
1858
+ f' {optimization_grid.max_frequency}, but optimization was'
1859
+ f' called with `max_frequency` = {max_frequency}. A'
1860
+ ' new grid will be created.'
1861
+ )
1862
+ return False
1863
+
1794
1864
  if (
1795
1865
  start_date != optimization_grid.start_date
1796
1866
  or end_date != optimization_grid.end_date
@@ -1820,6 +1890,17 @@ class BudgetOptimizer:
1820
1890
  )
1821
1891
  return False
1822
1892
 
1893
+ s_geos = sorted(selected_geos or [])
1894
+ g_geos = sorted(optimization_grid.selected_geos or [])
1895
+ if s_geos != g_geos:
1896
+ warnings.warn(
1897
+ 'Given optimization grid was created with `selected_geos` ='
1898
+ f' {optimization_grid.selected_geos}, but optimization request was'
1899
+ f' called with `selected_geos` = {selected_geos}. A new grid will be'
1900
+ ' created.'
1901
+ )
1902
+ return False
1903
+
1823
1904
  n_channels = len(optimization_grid.channels)
1824
1905
  selected_times = _expand_selected_times(
1825
1906
  meridian=self._meridian,
@@ -1877,6 +1958,7 @@ class BudgetOptimizer:
1877
1958
  self,
1878
1959
  new_data: xr.Dataset | None = None,
1879
1960
  use_posterior: bool = True,
1961
+ selected_geos: Sequence[str] | None = None,
1880
1962
  # TODO: b/409550413 - Remove this argument.
1881
1963
  selected_times: tuple[str | None, str | None] | None = None,
1882
1964
  start_date: tc.Date = None,
@@ -1887,6 +1969,7 @@ class BudgetOptimizer:
1887
1969
  spend_constraint_upper: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
1888
1970
  gtol: float = 0.0001,
1889
1971
  use_optimal_frequency: bool = True,
1972
+ max_frequency: float | None = None,
1890
1973
  use_kpi: bool = False,
1891
1974
  batch_size: int = c.DEFAULT_BATCH_SIZE,
1892
1975
  ) -> OptimizationGrid:
@@ -1915,6 +1998,9 @@ class BudgetOptimizer:
1915
1998
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1916
1999
  from the posterior distribution of the model. Otherwise, the prior
1917
2000
  distribution is used.
2001
+ selected_geos: Optional list containing a subset of geos to include. By
2002
+ default, all geos are included. The selected geos should match those in
2003
+ `InputData.geo`.
1918
2004
  selected_times: Deprecated. Tuple containing the start and end time
1919
2005
  dimension coordinates. Please Use `start_date` and `end_date` instead.
1920
2006
  start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
@@ -1955,6 +2041,10 @@ class BudgetOptimizer:
1955
2041
  the smallest integer such that `(budget - rounded_budget)` is less than
1956
2042
  or equal to `(budget * gtol)`. `gtol` must be less than 1.
1957
2043
  use_optimal_frequency: Boolean. Whether optimal frequency was used.
2044
+ max_frequency: Float indicating the frequency upper bound for the optimal
2045
+ frequency search space. If `None` when `use_optimal_frequency` is
2046
+ `True`, the max frequency of the input data is used. If
2047
+ `use_optimal_frequency` is `False`, `max_frequency` is ignored.
1958
2048
  use_kpi: Boolean. If `True`, then the incremental outcome is derived from
1959
2049
  the KPI impact. Otherwise, the incremental outcome is derived from the
1960
2050
  revenue impact.
@@ -1969,7 +2059,8 @@ class BudgetOptimizer:
1969
2059
  self._validate_model_fit(use_posterior)
1970
2060
  if new_data is None:
1971
2061
  new_data = analyzer_module.DataTensors()
1972
-
2062
+ if selected_geos is not None and not selected_geos:
2063
+ raise ValueError('`selected_geos` must not be empty.')
1973
2064
  if selected_times is not None:
1974
2065
  warnings.warn(
1975
2066
  '`selected_times` is deprecated. Please use `start_date` and'
@@ -1993,6 +2084,7 @@ class BudgetOptimizer:
1993
2084
  )
1994
2085
  hist_spend = self._analyzer.get_aggregated_spend(
1995
2086
  new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
2087
+ selected_geos=selected_geos,
1996
2088
  selected_times=selected_times,
1997
2089
  include_media=self._meridian.n_media_channels > 0,
1998
2090
  include_rf=self._meridian.n_rf_channels > 0,
@@ -2025,8 +2117,10 @@ class BudgetOptimizer:
2025
2117
  self._analyzer.optimal_freq(
2026
2118
  new_data=opt_freq_data,
2027
2119
  use_posterior=use_posterior,
2120
+ selected_geos=selected_geos,
2028
2121
  selected_times=selected_times,
2029
2122
  use_kpi=use_kpi,
2123
+ max_frequency=max_frequency,
2030
2124
  ).optimal_frequency,
2031
2125
  dtype=backend.float32,
2032
2126
  )
@@ -2039,6 +2133,7 @@ class BudgetOptimizer:
2039
2133
  spend_bound_lower=optimization_lower_bound,
2040
2134
  spend_bound_upper=optimization_upper_bound,
2041
2135
  step_size=step_size,
2136
+ selected_geos=selected_geos,
2042
2137
  selected_times=selected_times,
2043
2138
  new_data=filled_data.filter_fields(c.PAID_DATA),
2044
2139
  use_posterior=use_posterior,
@@ -2058,11 +2153,13 @@ class BudgetOptimizer:
2058
2153
  use_kpi=use_kpi,
2059
2154
  use_posterior=use_posterior,
2060
2155
  use_optimal_frequency=use_optimal_frequency,
2156
+ max_frequency=max_frequency,
2061
2157
  start_date=start_date,
2062
2158
  end_date=end_date,
2063
2159
  gtol=gtol,
2064
2160
  round_factor=round_factor,
2065
2161
  optimal_frequency=optimal_frequency,
2162
+ selected_geos=selected_geos,
2066
2163
  selected_times=selected_times,
2067
2164
  )
2068
2165
 
@@ -2190,6 +2287,7 @@ class BudgetOptimizer:
2190
2287
  new_data: analyzer_module.DataTensors | None = None,
2191
2288
  use_posterior: bool = True,
2192
2289
  use_kpi: bool = False,
2290
+ selected_geos: Sequence[str] | None = None,
2193
2291
  start_date: tc.Date = None,
2194
2292
  end_date: tc.Date = None,
2195
2293
  optimal_frequency: Sequence[float] | None = None,
@@ -2233,6 +2331,7 @@ class BudgetOptimizer:
2233
2331
  incremental_outcome = self._analyzer.incremental_outcome(
2234
2332
  use_posterior=use_posterior,
2235
2333
  new_data=inc_outcome_data,
2334
+ selected_geos=selected_geos,
2236
2335
  selected_times=selected_times,
2237
2336
  use_kpi=use_kpi,
2238
2337
  batch_size=batch_size,
@@ -2241,6 +2340,7 @@ class BudgetOptimizer:
2241
2340
  incremental_increase = 0.01
2242
2341
  mroi_numerator = self._analyzer.incremental_outcome(
2243
2342
  new_data=inc_outcome_data,
2343
+ selected_geos=selected_geos,
2244
2344
  selected_times=selected_times,
2245
2345
  scaling_factor0=1.0,
2246
2346
  scaling_factor1=1 + incremental_increase,
@@ -2269,7 +2369,7 @@ class BudgetOptimizer:
2269
2369
  media=new_media, reach=new_reach, frequency=new_frequency
2270
2370
  ),
2271
2371
  selected_times=selected_times,
2272
- selected_geos=None,
2372
+ selected_geos=selected_geos,
2273
2373
  aggregate_times=True,
2274
2374
  aggregate_geos=True,
2275
2375
  optimal_frequency=optimal_frequency,
@@ -2312,19 +2412,27 @@ class BudgetOptimizer:
2312
2412
  total_spend = np.sum(spend) if np.sum(spend) > 0 else 1
2313
2413
  pct_of_spend = spend / total_spend
2314
2414
  data_vars = {
2315
- c.SPEND: ([c.CHANNEL], spend.data),
2316
- c.PCT_OF_SPEND: ([c.CHANNEL], pct_of_spend.data),
2415
+ c.SPEND: ([c.CHANNEL], np.array(spend.data, dtype=np.float64)),
2416
+ c.PCT_OF_SPEND: (
2417
+ [c.CHANNEL],
2418
+ np.array(pct_of_spend.data, dtype=np.float64),
2419
+ ),
2317
2420
  c.INCREMENTAL_OUTCOME: (
2318
2421
  [c.CHANNEL, c.METRIC],
2319
- incremental_outcome_with_mean_median_and_ci,
2422
+ np.array(
2423
+ incremental_outcome_with_mean_median_and_ci, dtype=np.float64
2424
+ ),
2320
2425
  ),
2321
2426
  c.EFFECTIVENESS: (
2322
2427
  [c.CHANNEL, c.METRIC],
2323
- effectiveness_with_mean_median_and_ci,
2428
+ np.array(effectiveness_with_mean_median_and_ci, dtype=np.float64),
2429
+ ),
2430
+ c.ROI: ([c.CHANNEL, c.METRIC], np.array(roi, dtype=np.float64)),
2431
+ c.MROI: (
2432
+ [c.CHANNEL, c.METRIC],
2433
+ np.array(marginal_roi, dtype=np.float64),
2324
2434
  ),
2325
- c.ROI: ([c.CHANNEL, c.METRIC], roi),
2326
- c.MROI: ([c.CHANNEL, c.METRIC], marginal_roi),
2327
- c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
2435
+ c.CPIK: ([c.CHANNEL, c.METRIC], np.array(cpik, dtype=np.float64)),
2328
2436
  }
2329
2437
 
2330
2438
  all_times = np.asarray(filled_data.time).astype(str).tolist()
@@ -2359,6 +2467,7 @@ class BudgetOptimizer:
2359
2467
  incremental_outcome_grid: np.ndarray,
2360
2468
  multipliers_grid: backend.Tensor,
2361
2469
  new_data: analyzer_module.DataTensors | None = None,
2470
+ selected_geos: Sequence[str] | None = None,
2362
2471
  selected_times: Sequence[str] | Sequence[bool] | None = None,
2363
2472
  use_posterior: bool = True,
2364
2473
  use_kpi: bool = False,
@@ -2380,6 +2489,9 @@ class BudgetOptimizer:
2380
2489
  tensors is provided with a different number of time periods than in
2381
2490
  `InputData`, then all tensors must be provided with the same number of
2382
2491
  time periods.
2492
+ selected_geos: Optional list containing a subset of geos to include. By
2493
+ default, all geos are included. The selected geos should match those in
2494
+ `InputData.geo`.
2383
2495
  selected_times: Optional list of times to optimize. This can either be a
2384
2496
  string list containing a subset of time dimension coordinates from
2385
2497
  `InputData.time` or a boolean list with length equal to the time
@@ -2445,6 +2557,7 @@ class BudgetOptimizer:
2445
2557
  frequency=new_frequency,
2446
2558
  revenue_per_kpi=filled_data.revenue_per_kpi,
2447
2559
  ),
2560
+ selected_geos=selected_geos,
2448
2561
  selected_times=selected_times,
2449
2562
  use_kpi=use_kpi,
2450
2563
  include_non_paid_channels=False,
@@ -2462,6 +2575,7 @@ class BudgetOptimizer:
2462
2575
  spend_bound_upper: np.ndarray,
2463
2576
  step_size: int,
2464
2577
  new_data: analyzer_module.DataTensors | None = None,
2578
+ selected_geos: Sequence[str] | None = None,
2465
2579
  selected_times: Sequence[str] | Sequence[bool] | None = None,
2466
2580
  use_posterior: bool = True,
2467
2581
  use_kpi: bool = False,
@@ -2484,6 +2598,9 @@ class BudgetOptimizer:
2484
2598
  tensors is provided with a different number of time periods than in
2485
2599
  `InputData`, then all tensors must be provided with the same number of
2486
2600
  time periods.
2601
+ selected_geos: Optional list containing a subset of geos to include. By
2602
+ default, all geos are included. The selected geos should match those in
2603
+ `InputData.geo`.
2487
2604
  selected_times: Optional list of times to optimize. This can either be a
2488
2605
  string list containing a subset of time dimension coordinates from
2489
2606
  `InputData.time` or a boolean list with length equal to the time
@@ -2540,6 +2657,7 @@ class BudgetOptimizer:
2540
2657
  i=i,
2541
2658
  incremental_outcome_grid=incremental_outcome_grid,
2542
2659
  multipliers_grid=multipliers_grid,
2660
+ selected_geos=selected_geos,
2543
2661
  selected_times=selected_times,
2544
2662
  new_data=new_data,
2545
2663
  use_posterior=use_posterior,
@@ -2555,20 +2673,15 @@ class BudgetOptimizer:
2555
2673
  # we use the following code to fix it, and ensure incremental_outcome/spend
2556
2674
  # is always same for RF channels.
2557
2675
  if self._meridian.n_rf_channels > 0:
2558
- rf_incremental_outcome_max = np.nanmax(
2559
- incremental_outcome_grid[:, -self._meridian.n_rf_channels :], axis=0
2560
- )
2561
- rf_spend_max = np.nanmax(
2562
- spend_grid[:, -self._meridian.n_rf_channels :], axis=0
2563
- )
2564
- rf_roi = backend.divide_no_nan(rf_incremental_outcome_max, rf_spend_max)
2565
- incremental_outcome_grid[:, -self._meridian.n_rf_channels :] = (
2566
- rf_roi * spend_grid[:, -self._meridian.n_rf_channels :]
2676
+ incremental_outcome_grid = backend.stabilize_rf_roi_grid(
2677
+ spend_grid, incremental_outcome_grid, self._meridian.n_rf_channels
2567
2678
  )
2568
2679
  return (spend_grid, incremental_outcome_grid)
2569
2680
 
2570
2681
  def _validate_optimization_tensors(
2571
2682
  self,
2683
+ expected_n_geos: int,
2684
+ expected_n_times: int,
2572
2685
  cpmu: backend.Tensor | None = None,
2573
2686
  cprf: backend.Tensor | None = None,
2574
2687
  media: backend.Tensor | None = None,
@@ -2585,11 +2698,21 @@ class BudgetOptimizer:
2585
2698
  'If `media` or `media_spend` is provided, then `cpmu` must also be'
2586
2699
  ' provided.'
2587
2700
  )
2701
+ if (media is None and media_spend is None) and cpmu is not None:
2702
+ raise ValueError(
2703
+ 'If `cpmu` is provided, then one of `media` or `media_spend` must'
2704
+ ' also be provided.'
2705
+ )
2588
2706
  if (rf_impressions is not None or rf_spend is not None) and cprf is None:
2589
2707
  raise ValueError(
2590
2708
  'If `reach` and `frequency` or `rf_spend` is provided, then `cprf`'
2591
2709
  ' must also be provided.'
2592
2710
  )
2711
+ if (rf_impressions is None and rf_spend is None) and cprf is not None:
2712
+ raise ValueError(
2713
+ 'If `cprf` is provided, then one of `rf_impressions` or `rf_spend`'
2714
+ ' must also be provided.'
2715
+ )
2593
2716
  if media is not None and media_spend is not None:
2594
2717
  raise ValueError('Only one of `media` or `media_spend` can be provided.')
2595
2718
  if rf_impressions is not None and rf_spend is not None:
@@ -2607,26 +2730,44 @@ class BudgetOptimizer:
2607
2730
  'If `use_optimal_frequency` is `False`, then `frequency` must be'
2608
2731
  ' provided.'
2609
2732
  )
2610
-
2611
- n_geos = [
2612
- t.shape[0]
2613
- for t in [
2614
- cpmu,
2615
- cprf,
2616
- media,
2617
- rf_impressions,
2618
- frequency,
2619
- media_spend,
2620
- rf_spend,
2621
- ]
2622
- if t is not None and t.ndim == 3
2733
+ n_geos_list = []
2734
+ n_times_list = []
2735
+ tensor_list = [
2736
+ cpmu,
2737
+ cprf,
2738
+ media,
2739
+ rf_impressions,
2740
+ frequency,
2741
+ media_spend,
2742
+ rf_spend,
2623
2743
  ]
2744
+ for t in tensor_list:
2745
+ # `(n_geos, T, n_channels)` shape
2746
+ if t is not None and t.ndim == 3:
2747
+ n_geos_list.append(t.shape[0])
2748
+ n_times_list.append(t.shape[1])
2749
+ # `(T, n_channels)` shape
2750
+ elif t is not None and t.ndim == 2:
2751
+ n_times_list.append(t.shape[0])
2752
+
2753
+ # `(n_geos, T)` shape
2624
2754
  if revenue_per_kpi is not None and revenue_per_kpi.ndim == 2:
2625
- n_geos.append(revenue_per_kpi.shape[0])
2626
- if any(n_geo != self._meridian.n_geos for n_geo in n_geos):
2755
+ n_geos_list.append(revenue_per_kpi.shape[0])
2756
+ n_times_list.append(revenue_per_kpi.shape[1])
2757
+ # `(T)` shape
2758
+ elif revenue_per_kpi is not None and revenue_per_kpi.ndim == 1:
2759
+ n_times_list.append(revenue_per_kpi.shape[0])
2760
+
2761
+ if any(n_geo != expected_n_geos for n_geo in n_geos_list):
2762
+ raise ValueError(
2763
+ 'All tensors with a geo dimension must have'
2764
+ f' {expected_n_geos} geos (as defined in `meridian.InputData`).'
2765
+ )
2766
+
2767
+ if any(n_time != expected_n_times for n_time in n_times_list):
2627
2768
  raise ValueError(
2628
- 'All tensors with a geo dimension must have the same number of geos'
2629
- ' as in `meridian.InputData`.'
2769
+ 'All tensors with a time dimension must have'
2770
+ f' {expected_n_times} times (as defined in `time` argument).'
2630
2771
  )
2631
2772
 
2632
2773
  def _allocate_tensor_by_population(
@@ -2993,12 +3134,11 @@ def _expand_selected_times(
2993
3134
  start_date=start_date,
2994
3135
  end_date=end_date,
2995
3136
  )
3137
+ if expanded_dates is None:
3138
+ expanded_dates = time_coordinates.all_dates
3139
+ expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
2996
3140
  if return_flexible_str:
2997
- if expanded_dates is None:
2998
- expanded_dates = time_coordinates.all_dates
2999
- expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
3000
3141
  return [x for x in new_times_str if x in expanded_str]
3001
3142
  # TODO: Remove once every method uses `new_data.time`.
3002
3143
  else:
3003
- expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
3004
3144
  return [x in expanded_str for x in new_times_str]