google-meridian 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/METADATA +10 -10
  2. google_meridian-1.3.0.dist-info/RECORD +62 -0
  3. meridian/analysis/__init__.py +2 -0
  4. meridian/analysis/analyzer.py +280 -142
  5. meridian/analysis/formatter.py +2 -2
  6. meridian/analysis/optimizer.py +353 -169
  7. meridian/analysis/review/__init__.py +20 -0
  8. meridian/analysis/review/checks.py +721 -0
  9. meridian/analysis/review/configs.py +110 -0
  10. meridian/analysis/review/constants.py +40 -0
  11. meridian/analysis/review/results.py +544 -0
  12. meridian/analysis/review/reviewer.py +186 -0
  13. meridian/analysis/summarizer.py +14 -12
  14. meridian/analysis/templates/chips.html.jinja +12 -0
  15. meridian/analysis/test_utils.py +27 -5
  16. meridian/analysis/visualizer.py +45 -50
  17. meridian/backend/__init__.py +698 -55
  18. meridian/backend/config.py +75 -16
  19. meridian/backend/test_utils.py +127 -1
  20. meridian/constants.py +52 -11
  21. meridian/data/input_data.py +7 -2
  22. meridian/data/test_utils.py +5 -3
  23. meridian/mlflow/autolog.py +2 -2
  24. meridian/model/__init__.py +1 -0
  25. meridian/model/adstock_hill.py +10 -9
  26. meridian/model/eda/__init__.py +3 -0
  27. meridian/model/eda/constants.py +21 -0
  28. meridian/model/eda/eda_engine.py +1580 -84
  29. meridian/model/eda/eda_outcome.py +200 -0
  30. meridian/model/eda/eda_spec.py +84 -0
  31. meridian/model/eda/meridian_eda.py +220 -0
  32. meridian/model/knots.py +56 -50
  33. meridian/model/media.py +10 -8
  34. meridian/model/model.py +79 -16
  35. meridian/model/model_test_data.py +53 -9
  36. meridian/model/posterior_sampler.py +398 -391
  37. meridian/model/prior_distribution.py +114 -39
  38. meridian/model/prior_sampler.py +146 -90
  39. meridian/model/spec.py +7 -8
  40. meridian/model/transformers.py +16 -8
  41. meridian/version.py +1 -1
  42. google_meridian-1.2.0.dist-info/RECORD +0 -52
  43. {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/WHEEL +0 -0
  44. {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/licenses/LICENSE +0 -0
  45. {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/top_level.txt +0 -0
@@ -26,7 +26,7 @@ import altair as alt
26
26
  import jinja2
27
27
  from meridian import backend
28
28
  from meridian import constants as c
29
- from meridian.analysis import analyzer
29
+ from meridian.analysis import analyzer as analyzer_module
30
30
  from meridian.analysis import formatter
31
31
  from meridian.analysis import summary_text
32
32
  from meridian.data import time_coordinates as tc
@@ -102,6 +102,7 @@ class OptimizationGrid:
102
102
  use_kpi: Whether using generic KPI or revenue.
103
103
  use_posterior: Whether posterior distributions were used, or prior.
104
104
  use_optimal_frequency: Whether optimal frequency was used.
105
+ max_frequency: The maximum frequency for reach and frequency channels.
105
106
  start_date: The start date of the optimization period.
106
107
  end_date: The end date of the optimization period.
107
108
  gtol: Float indicating the acceptable relative error for the budget used in
@@ -114,7 +115,12 @@ class OptimizationGrid:
114
115
  does not contain reach and frequency data, or if the model does contain
115
116
  reach and frequency data, but historical frequency is used for the
116
117
  optimization scenario.
117
- selected_times: The time coordinates from the model used in this grid.
118
+ selected_geos: The geo coordinates from the model used in this grid.
119
+ selected_times: The time coordinates from the model used in this grid. If
120
+ new data with modified time coordinates is used for optimization, this is
121
+ a list of booleans indicating which time coordinates are selected.
122
+ Otherwise, this is a list of strings indicating the time coordinates used
123
+ in this grid.
118
124
  """
119
125
 
120
126
  _grid_dataset: xr.Dataset
@@ -128,7 +134,9 @@ class OptimizationGrid:
128
134
  gtol: float
129
135
  round_factor: int
130
136
  optimal_frequency: np.ndarray | None
131
- selected_times: Sequence[str] | None
137
+ selected_geos: Sequence[str] | None
138
+ selected_times: Sequence[str] | Sequence[bool] | None
139
+ max_frequency: float | None = None
132
140
 
133
141
  @property
134
142
  def grid_dataset(self) -> xr.Dataset:
@@ -262,7 +270,7 @@ class OptimizationGrid:
262
270
  return xr.Dataset(
263
271
  coords={c.CHANNEL: self.channels},
264
272
  data_vars={
265
- c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
273
+ c.OPTIMIZED: ([c.CHANNEL], optimal_spend),
266
274
  c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
267
275
  },
268
276
  )
@@ -386,16 +394,26 @@ class OptimizationGrid:
386
394
  media spend that maximizes incremental outcome based on spend constraints
387
395
  for all media and RF channels.
388
396
  """
389
- spend = spend_grid[0, :].copy()
390
- incremental_outcome = incremental_outcome_grid[0, :].copy()
391
- spend_grid = spend_grid[1:, :]
392
- incremental_outcome_grid = incremental_outcome_grid[1:, :]
393
- iterative_roi_grid = np.round(
394
- backend.divide_no_nan(
395
- incremental_outcome_grid - incremental_outcome, spend_grid - spend
396
- ),
397
- decimals=8,
397
+ spend_grid_values = np.array(spend_grid.values, dtype=np.float64)
398
+ incremental_outcome_grid_values = np.array(
399
+ incremental_outcome_grid.values, dtype=np.float64
400
+ )
401
+
402
+ spend = spend_grid_values[0, :].copy()
403
+ incremental_outcome = incremental_outcome_grid_values[0, :].copy()
404
+ spend_grid_values = spend_grid_values[1:, :]
405
+ incremental_outcome_grid_values = incremental_outcome_grid_values[1:, :]
406
+
407
+ numerator = incremental_outcome_grid_values - incremental_outcome
408
+ denominator = spend_grid_values - spend
409
+ iterative_roi_grid = np.divide(
410
+ numerator,
411
+ denominator,
412
+ out=np.zeros_like(numerator),
413
+ where=(denominator != 0),
398
414
  )
415
+ iterative_roi_grid = np.round(iterative_roi_grid, decimals=8)
416
+
399
417
  while True:
400
418
  spend_optimal = spend.astype(int)
401
419
  # If none of the exit criteria are met roi_grid will eventually be filled
@@ -407,8 +425,8 @@ class OptimizationGrid:
407
425
  )
408
426
  row_idx = point[0]
409
427
  media_idx = point[1]
410
- spend[media_idx] = spend_grid[row_idx, media_idx]
411
- incremental_outcome[media_idx] = incremental_outcome_grid[
428
+ spend[media_idx] = spend_grid_values[row_idx, media_idx]
429
+ incremental_outcome[media_idx] = incremental_outcome_grid_values[
412
430
  row_idx, media_idx
413
431
  ]
414
432
  roi_grid_point = iterative_roi_grid[row_idx, media_idx]
@@ -421,14 +439,23 @@ class OptimizationGrid:
421
439
  break
422
440
 
423
441
  iterative_roi_grid[0 : row_idx + 1, media_idx] = np.nan
442
+
443
+ num_col = (
444
+ incremental_outcome_grid_values[row_idx + 1 :, media_idx]
445
+ - incremental_outcome_grid_values[row_idx, media_idx]
446
+ )
447
+ den_col = (
448
+ spend_grid_values[row_idx + 1 :, media_idx]
449
+ - spend_grid_values[row_idx, media_idx]
450
+ )
451
+ new_roi_col = np.divide(
452
+ num_col,
453
+ den_col,
454
+ out=np.zeros_like(num_col),
455
+ where=(den_col != 0),
456
+ )
424
457
  iterative_roi_grid[row_idx + 1 :, media_idx] = np.round(
425
- backend.divide_no_nan(
426
- incremental_outcome_grid[row_idx + 1 :, media_idx]
427
- - incremental_outcome_grid[row_idx, media_idx],
428
- spend_grid[row_idx + 1 :, media_idx]
429
- - spend_grid[row_idx, media_idx],
430
- ),
431
- decimals=8,
458
+ new_roi_col, decimals=8
432
459
  )
433
460
  return spend_optimal
434
461
 
@@ -438,40 +465,33 @@ class OptimizationResults:
438
465
  """The optimized budget allocation.
439
466
 
440
467
  This is a dataclass object containing datasets output from `BudgetOptimizer`.
441
- These datasets include:
442
-
443
- - `nonoptimized_data`: The non-optimized budget metrics (based on historical
444
- frequency).
445
- - `nonoptimized_data_with_optimal_freq`: The non-optimized budget metrics
446
- based on optimal frequency.
447
- - `optimized_data`: The optimized budget metrics.
448
- - `optimization_grid`: The grid information used for optimization.
449
-
450
- The metrics (data variables) are: ROI, mROI, incremental outcome, CPIK.
451
468
 
452
- Additionally, some intermediate values and referecences to the source fitted
453
- model and analyzer are also stored here. These are useful for visualizing and
454
- debugging.
469
+ The performance metrics (data variables) are: spend, percentage of spend, ROI,
470
+ mROI, incremental outcome, CPIK, and effectiveness.
455
471
 
456
472
  Attributes:
457
473
  meridian: The fitted Meridian model that was used to create this budget
458
474
  allocation.
459
475
  analyzer: The analyzer bound to the model above.
460
- spend_ratio: The spend ratio used to scale the non-optimized budget metrics
461
- to the optimized budget metrics.
462
- spend_bounds: The spend bounds used to scale the non-optimized budget
463
- metrics to the optimized budget metrics.
464
- nonoptimized_data: The non-optimized budget metrics (based on historical
465
- frequency).
466
- nonoptimized_data_with_optimal_freq: The non-optimized budget metrics based
467
- on optimal frequency.
468
- optimized_data: The optimized budget metrics.
476
+ spend_ratio: The spend ratio used to scale the non-optimized performance
477
+ metrics to the optimized performance metrics.
478
+ spend_bounds: The spend bounds used to scale the non-optimized performance
479
+ metrics to the optimized performance metrics.
480
+ nonoptimized_data: Performance metrics under the non-optimized budget. For
481
+ R&F channels, the non-optimized frequency is used.
482
+ nonoptimized_data_with_optimal_freq: Performance metrics under the
483
+ non-optimized budget. For R&F channels, the optimal frequency is used if
484
+ frequency was optimized.
485
+ optimized_data: Performance metrics under the optimized budget. For R&F
486
+ channels, the optimal frequency is used if frequency was optimized.
469
487
  optimization_grid: The grid information used for optimization.
488
+ new_data: The optional `DataTensors` container that was used to create this
489
+ budget allocation.
470
490
  """
471
491
 
472
492
  meridian: model.Meridian
473
493
  # The analyzer bound to the model above.
474
- analyzer: analyzer.Analyzer
494
+ analyzer: analyzer_module.Analyzer
475
495
  spend_ratio: np.ndarray # spend / historical spend
476
496
  spend_bounds: tuple[np.ndarray, np.ndarray]
477
497
 
@@ -481,6 +501,10 @@ class OptimizationResults:
481
501
  _optimized_data: xr.Dataset
482
502
  _optimization_grid: OptimizationGrid
483
503
 
504
+ # The optional `DataTensors` container to use if optimization was performed
505
+ # on data different from the original `input_data`.
506
+ new_data: analyzer_module.DataTensors | None = None
507
+
484
508
  # TODO: Move this, and the plotting methods, to a summarizer.
485
509
  @functools.cached_property
486
510
  def template_env(self) -> jinja2.Environment:
@@ -497,10 +521,10 @@ class OptimizationResults:
497
521
 
498
522
  @property
499
523
  def nonoptimized_data(self) -> xr.Dataset:
500
- """Dataset holding the non-optimized budget metrics.
524
+ """Dataset holding the non-optimized performance metrics.
501
525
 
502
526
  For channels that have reach and frequency data, their performance metrics
503
- (ROI, mROI, incremental outcome, CPIK) are based on historical frequency.
527
+ are based on historical frequency.
504
528
 
505
529
  The dataset contains the following:
506
530
 
@@ -519,10 +543,10 @@ class OptimizationResults:
519
543
 
520
544
  @property
521
545
  def nonoptimized_data_with_optimal_freq(self) -> xr.Dataset:
522
- """Dataset holding the non-optimized budget metrics.
546
+ """Dataset holding the non-optimized performance metrics.
523
547
 
524
548
  For channels that have reach and frequency data, their performance metrics
525
- (ROI, mROI, incremental outcome, CPIK) are based on optimal frequency.
549
+ are based on optimal frequency.
526
550
 
527
551
  The dataset contains the following:
528
552
 
@@ -537,10 +561,10 @@ class OptimizationResults:
537
561
 
538
562
  @property
539
563
  def optimized_data(self) -> xr.Dataset:
540
- """Dataset holding the optimized budget metrics.
564
+ """Dataset holding the optimized performance metrics.
541
565
 
542
566
  For channels that have reach and frequency data, their performance metrics
543
- (ROI, mROI, incremental outcome) are based on optimal frequency.
567
+ are based on optimal frequency.
544
568
 
545
569
  The dataset contains the following:
546
570
 
@@ -558,11 +582,16 @@ class OptimizationResults:
558
582
  """The grid information used for optimization."""
559
583
  return self._optimization_grid
560
584
 
561
- def output_optimization_summary(self, filename: str, filepath: str):
585
+ def output_optimization_summary(
586
+ self,
587
+ filename: str,
588
+ filepath: str,
589
+ currency: str = c.DEFAULT_CURRENCY,
590
+ ):
562
591
  """Generates and saves the HTML optimization summary output."""
563
592
  os.makedirs(filepath, exist_ok=True)
564
593
  with open(os.path.join(filepath, filename), 'w') as f:
565
- f.write(self._gen_optimization_summary())
594
+ f.write(self._gen_optimization_summary(currency))
566
595
 
567
596
  def plot_incremental_outcome_delta(self) -> alt.Chart:
568
597
  """Plots a waterfall chart showing the change in incremental outcome."""
@@ -712,7 +741,7 @@ class OptimizationResults:
712
741
  )
713
742
  )
714
743
 
715
- def plot_spend_delta(self) -> alt.Chart:
744
+ def plot_spend_delta(self, currency: str = c.DEFAULT_CURRENCY) -> alt.Chart:
716
745
  """Plots a bar chart showing the optimized change in spend per channel."""
717
746
  df = self._get_delta_data(c.SPEND)
718
747
  base = (
@@ -733,7 +762,7 @@ class OptimizationResults:
733
762
  y=alt.Y(
734
763
  f'{c.SPEND}:Q',
735
764
  axis=alt.Axis(
736
- title='$',
765
+ title=currency,
737
766
  domain=False,
738
767
  labelExpr=formatter.compact_number_expr(),
739
768
  **formatter.AXIS_CONFIG,
@@ -894,9 +923,12 @@ class OptimizationResults:
894
923
  returned this result.
895
924
  """
896
925
  channels = self.optimized_data.channel.values
897
- selected_times = self.meridian.expand_selected_time_dims(
926
+ selected_times = _expand_selected_times(
927
+ meridian=self.meridian,
898
928
  start_date=self.optimized_data.start_date,
899
929
  end_date=self.optimized_data.end_date,
930
+ new_data=self.new_data,
931
+ return_flexible_str=True,
900
932
  )
901
933
  _, ubounds = self.spend_bounds
902
934
  upper_bound = (
@@ -912,8 +944,10 @@ class OptimizationResults:
912
944
  # WARN: If `selected_times` is not None (i.e. a subset time range), this
913
945
  # response curve computation might take a significant amount of time.
914
946
  return self.analyzer.response_curves(
947
+ new_data=self.new_data,
915
948
  spend_multipliers=spend_multiplier,
916
949
  use_posterior=self.optimization_grid.use_posterior,
950
+ selected_geos=self.optimization_grid.selected_geos,
917
951
  selected_times=selected_times,
918
952
  by_reach=True,
919
953
  use_kpi=not self.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
@@ -1024,7 +1058,7 @@ class OptimizationResults:
1024
1058
  sorted_df.sort_index(inplace=True)
1025
1059
  return sorted_df
1026
1060
 
1027
- def _gen_optimization_summary(self) -> str:
1061
+ def _gen_optimization_summary(self, currency: str) -> str:
1028
1062
  """Generates HTML optimization summary output (as sanitized content str)."""
1029
1063
  start_date = tc.normalize_date(self.optimized_data.start_date)
1030
1064
  self.template_env.globals[c.START_DATE] = start_date.strftime(
@@ -1036,22 +1070,25 @@ class OptimizationResults:
1036
1070
  self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
1037
1071
  f'%b {end_date_adjusted.day}, %Y'
1038
1072
  )
1073
+ self.template_env.globals[c.SELECTED_GEOS] = (
1074
+ self.optimization_grid.selected_geos
1075
+ )
1039
1076
 
1040
1077
  html_template = self.template_env.get_template('summary.html.jinja')
1041
1078
  return html_template.render(
1042
1079
  title=summary_text.OPTIMIZATION_TITLE,
1043
- cards=self._create_output_sections(),
1080
+ cards=self._create_output_sections(currency),
1044
1081
  )
1045
1082
 
1046
- def _create_output_sections(self) -> Sequence[str]:
1083
+ def _create_output_sections(self, currency: str) -> Sequence[str]:
1047
1084
  """Creates the HTML snippets for cards in the summary page."""
1048
1085
  return [
1049
- self._create_scenario_plan_section(),
1050
- self._create_budget_allocation_section(),
1086
+ self._create_scenario_plan_section(currency),
1087
+ self._create_budget_allocation_section(currency),
1051
1088
  self._create_response_curves_section(),
1052
1089
  ]
1053
1090
 
1054
- def _create_scenario_plan_section(self) -> str:
1091
+ def _create_scenario_plan_section(self, currency: str) -> str:
1055
1092
  """Creates the HTML card snippet for the scenario plan section."""
1056
1093
  card_spec = formatter.CardSpec(
1057
1094
  id=summary_text.SCENARIO_PLAN_CARD_ID,
@@ -1094,22 +1131,32 @@ class OptimizationResults:
1094
1131
  self.template_env,
1095
1132
  card_spec,
1096
1133
  insights,
1097
- stats_specs=self._create_scenario_stats_specs(),
1134
+ stats_specs=self._create_scenario_stats_specs(currency),
1098
1135
  )
1099
1136
 
1100
- def _create_scenario_stats_specs(self) -> Sequence[formatter.StatsSpec]:
1137
+ def _create_scenario_stats_specs(
1138
+ self, currency: str
1139
+ ) -> Sequence[formatter.StatsSpec]:
1101
1140
  """Creates the stats to fill the scenario plan section."""
1102
1141
  outcome = self._kpi_or_revenue
1103
1142
  budget_diff = self.optimized_data.budget - self.nonoptimized_data.budget
1104
1143
  budget_prefix = '+' if budget_diff > 0 else ''
1105
1144
  non_optimized_budget = formatter.StatsSpec(
1106
1145
  title=summary_text.NON_OPTIMIZED_BUDGET_LABEL,
1107
- stat=formatter.format_monetary_num(self.nonoptimized_data.budget),
1146
+ stat=formatter.format_monetary_num(
1147
+ num=self.nonoptimized_data.budget,
1148
+ currency=currency,
1149
+ ),
1108
1150
  )
1109
1151
  optimized_budget = formatter.StatsSpec(
1110
1152
  title=summary_text.OPTIMIZED_BUDGET_LABEL,
1111
- stat=formatter.format_monetary_num(self.optimized_data.budget),
1112
- delta=(budget_prefix + formatter.format_monetary_num(budget_diff)),
1153
+ stat=formatter.format_monetary_num(
1154
+ num=self.optimized_data.budget, currency=currency
1155
+ ),
1156
+ delta=(
1157
+ budget_prefix
1158
+ + formatter.format_monetary_num(num=budget_diff, currency=currency)
1159
+ ),
1113
1160
  )
1114
1161
 
1115
1162
  if outcome == c.REVENUE:
@@ -1131,7 +1178,7 @@ class OptimizationResults:
1131
1178
  )
1132
1179
  optimized_performance_title = summary_text.OPTIMIZED_CPIK_LABEL
1133
1180
  optimized_performance_stat = f'${self.optimized_data.total_cpik:.2f}'
1134
- optimized_performance_diff = formatter.compact_number(diff, 2, '$')
1181
+ optimized_performance_diff = formatter.compact_number(diff, 2, currency)
1135
1182
  non_optimized_performance = formatter.StatsSpec(
1136
1183
  title=non_optimized_performance_title,
1137
1184
  stat=non_optimized_performance_stat,
@@ -1147,7 +1194,7 @@ class OptimizationResults:
1147
1194
  - self.nonoptimized_data.total_incremental_outcome
1148
1195
  )
1149
1196
  inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
1150
- currency = '$' if outcome == c.REVENUE else ''
1197
+ currency = currency if outcome == c.REVENUE else ''
1151
1198
  non_optimized_inc_outcome = formatter.StatsSpec(
1152
1199
  title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
1153
1200
  outcome=outcome
@@ -1177,7 +1224,7 @@ class OptimizationResults:
1177
1224
  optimized_inc_outcome,
1178
1225
  ]
1179
1226
 
1180
- def _create_budget_allocation_section(self) -> str:
1227
+ def _create_budget_allocation_section(self, currency: str) -> str:
1181
1228
  """Creates the HTML card snippet for the budget allocation section."""
1182
1229
  outcome = self._kpi_or_revenue
1183
1230
  card_spec = formatter.CardSpec(
@@ -1187,7 +1234,7 @@ class OptimizationResults:
1187
1234
  spend_delta = formatter.ChartSpec(
1188
1235
  id=summary_text.SPEND_DELTA_CHART_ID,
1189
1236
  description=summary_text.SPEND_DELTA_CHART_INSIGHTS,
1190
- chart_json=self.plot_spend_delta().to_json(),
1237
+ chart_json=self.plot_spend_delta(currency).to_json(),
1191
1238
  )
1192
1239
  spend_allocation = formatter.ChartSpec(
1193
1240
  id=summary_text.SPEND_ALLOCATION_CHART_ID,
@@ -1276,7 +1323,7 @@ class BudgetOptimizer:
1276
1323
 
1277
1324
  def __init__(self, meridian: model.Meridian):
1278
1325
  self._meridian = meridian
1279
- self._analyzer = analyzer.Analyzer(self._meridian)
1326
+ self._analyzer = analyzer_module.Analyzer(self._meridian)
1280
1327
 
1281
1328
  def _validate_model_fit(self, use_posterior: bool):
1282
1329
  """Validates that the model is fit."""
@@ -1288,8 +1335,9 @@ class BudgetOptimizer:
1288
1335
 
1289
1336
  def optimize(
1290
1337
  self,
1291
- new_data: analyzer.DataTensors | None = None,
1338
+ new_data: analyzer_module.DataTensors | None = None,
1292
1339
  use_posterior: bool = True,
1340
+ selected_geos: Sequence[str] | None = None,
1293
1341
  # TODO: b/409550413 - Remove this argument.
1294
1342
  selected_times: tuple[str | None, str | None] | None = None,
1295
1343
  start_date: tc.Date = None,
@@ -1302,7 +1350,10 @@ class BudgetOptimizer:
1302
1350
  target_roi: float | None = None,
1303
1351
  target_mroi: float | None = None,
1304
1352
  gtol: float = 0.0001,
1353
+ # TODO:
1354
+ # merging use_optimal_frequency and max_frequency into a single argument.
1305
1355
  use_optimal_frequency: bool = True,
1356
+ max_frequency: float | None = None,
1306
1357
  use_kpi: bool = False,
1307
1358
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
1308
1359
  batch_size: int = c.DEFAULT_BATCH_SIZE,
@@ -1378,6 +1429,9 @@ class BudgetOptimizer:
1378
1429
  use_posterior: Boolean. If `True`, then the budget is optimized based on
1379
1430
  the posterior distribution of the model. Otherwise, the prior
1380
1431
  distribution is used.
1432
+ selected_geos: Optional list containing a subset of geos to include. By
1433
+ default, all geos are included. The selected geos should match those in
1434
+ `InputData.geo`.
1381
1435
  selected_times: Deprecated. Tuple containing the start and end time
1382
1436
  dimension coordinates for the duration to run the optimization on.
1383
1437
  Please Use `start_date` and `end_date` instead.
@@ -1434,6 +1488,10 @@ class BudgetOptimizer:
1434
1488
  use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
1435
1489
  trained Meridian model for optimization. If `False`, uses historical
1436
1490
  frequency or `new_data.frequency` if provided.
1491
+ max_frequency: Float indicating the frequency upper bound for the optimal
1492
+ frequency search space. If `None` when `use_optimal_frequency` is
1493
+ `True`, the max frequency of the input data is used. If
1494
+ `use_optimal_frequency` is `False`, `max_frequency` is ignored.
1437
1495
  use_kpi: If `True`, runs the optimization on KPI. Defaults to revenue.
1438
1496
  confidence_level: The threshold for computing the confidence intervals.
1439
1497
  batch_size: Maximum draws per chain in each batch. The calculation is run
@@ -1479,6 +1537,7 @@ class BudgetOptimizer:
1479
1537
  use_grid_arg = optimization_grid is not None and self._validate_grid(
1480
1538
  new_data=new_data,
1481
1539
  use_posterior=use_posterior,
1540
+ selected_geos=selected_geos,
1482
1541
  start_date=start_date,
1483
1542
  end_date=end_date,
1484
1543
  budget=budget,
@@ -1487,12 +1546,14 @@ class BudgetOptimizer:
1487
1546
  spend_constraint_upper=spend_constraint_upper,
1488
1547
  gtol=gtol,
1489
1548
  use_optimal_frequency=use_optimal_frequency,
1549
+ max_frequency=max_frequency,
1490
1550
  use_kpi=use_kpi,
1491
1551
  optimization_grid=optimization_grid,
1492
1552
  )
1493
1553
  if optimization_grid is None or not use_grid_arg:
1494
1554
  optimization_grid = self.create_optimization_grid(
1495
1555
  new_data=new_data,
1556
+ selected_geos=selected_geos,
1496
1557
  start_date=start_date,
1497
1558
  end_date=end_date,
1498
1559
  budget=budget,
@@ -1503,6 +1564,7 @@ class BudgetOptimizer:
1503
1564
  use_posterior=use_posterior,
1504
1565
  use_kpi=use_kpi,
1505
1566
  use_optimal_frequency=use_optimal_frequency,
1567
+ max_frequency=max_frequency,
1506
1568
  batch_size=batch_size,
1507
1569
  )
1508
1570
 
@@ -1526,13 +1588,14 @@ class BudgetOptimizer:
1526
1588
  use_historical_budget = budget is None or np.isclose(
1527
1589
  budget, np.sum(optimization_grid.historical_spend)
1528
1590
  )
1529
- new_data = new_data or analyzer.DataTensors()
1591
+ new_data = new_data or analyzer_module.DataTensors()
1530
1592
  nonoptimized_data = self._create_budget_dataset(
1531
1593
  new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
1532
1594
  use_posterior=use_posterior,
1533
1595
  use_kpi=use_kpi,
1534
1596
  hist_spend=optimization_grid.historical_spend,
1535
1597
  spend=spend.non_optimized,
1598
+ selected_geos=selected_geos,
1536
1599
  start_date=start_date,
1537
1600
  end_date=end_date,
1538
1601
  confidence_level=confidence_level,
@@ -1545,6 +1608,7 @@ class BudgetOptimizer:
1545
1608
  use_kpi=use_kpi,
1546
1609
  hist_spend=optimization_grid.historical_spend,
1547
1610
  spend=spend.non_optimized,
1611
+ selected_geos=selected_geos,
1548
1612
  start_date=start_date,
1549
1613
  end_date=end_date,
1550
1614
  optimal_frequency=optimization_grid.optimal_frequency,
@@ -1565,6 +1629,7 @@ class BudgetOptimizer:
1565
1629
  use_kpi=use_kpi,
1566
1630
  hist_spend=optimization_grid.historical_spend,
1567
1631
  spend=spend.optimized,
1632
+ selected_geos=selected_geos,
1568
1633
  start_date=start_date,
1569
1634
  end_date=end_date,
1570
1635
  optimal_frequency=optimization_grid.optimal_frequency,
@@ -1595,6 +1660,7 @@ class BudgetOptimizer:
1595
1660
  )
1596
1661
 
1597
1662
  return OptimizationResults(
1663
+ new_data=new_data,
1598
1664
  meridian=self._meridian,
1599
1665
  analyzer=self._analyzer,
1600
1666
  spend_ratio=spend_ratio,
@@ -1617,7 +1683,7 @@ class BudgetOptimizer:
1617
1683
  rf_spend: backend.Tensor | None = None,
1618
1684
  revenue_per_kpi: backend.Tensor | None = None,
1619
1685
  use_optimal_frequency: bool = True,
1620
- ) -> analyzer.DataTensors:
1686
+ ) -> analyzer_module.DataTensors:
1621
1687
  """Creates a `DataTensors` for optimizations from CPM and flighting data.
1622
1688
 
1623
1689
  CPM is broken down into cost per media unit, `cpmu`, for the media channels
@@ -1684,7 +1750,11 @@ class BudgetOptimizer:
1684
1750
  A `DataTensors` object with optional tensors `media`, `reach`,
1685
1751
  `frequency`, `media_spend`, `rf_spend`, `revenue_per_kpi`, and `time`.
1686
1752
  """
1753
+ n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
1754
+ n_geos = self._meridian.n_geos
1687
1755
  self._validate_optimization_tensors(
1756
+ expected_n_geos=n_geos,
1757
+ expected_n_times=n_times,
1688
1758
  cpmu=cpmu,
1689
1759
  cprf=cprf,
1690
1760
  media=media,
@@ -1695,13 +1765,6 @@ class BudgetOptimizer:
1695
1765
  revenue_per_kpi=revenue_per_kpi,
1696
1766
  use_optimal_frequency=use_optimal_frequency,
1697
1767
  )
1698
- n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
1699
- n_geos = self._meridian.n_geos
1700
- revenue_per_kpi = (
1701
- _expand_tensor(revenue_per_kpi, (n_geos, n_times))
1702
- if revenue_per_kpi is not None
1703
- else None
1704
- )
1705
1768
 
1706
1769
  tensors = {}
1707
1770
  if media is not None:
@@ -1737,14 +1800,17 @@ class BudgetOptimizer:
1737
1800
  impressions, tensors[c.FREQUENCY]
1738
1801
  )
1739
1802
  if revenue_per_kpi is not None:
1740
- tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
1803
+ tensors[c.REVENUE_PER_KPI] = _expand_tensor(
1804
+ revenue_per_kpi, (n_geos, n_times)
1805
+ )
1741
1806
  tensors[c.TIME] = backend.to_tensor(time)
1742
- return analyzer.DataTensors(**tensors)
1807
+ return analyzer_module.DataTensors(**tensors)
1743
1808
 
1744
1809
  def _validate_grid(
1745
1810
  self,
1746
- new_data: analyzer.DataTensors | None,
1811
+ new_data: analyzer_module.DataTensors | None,
1747
1812
  use_posterior: bool,
1813
+ selected_geos: Sequence[str] | None,
1748
1814
  start_date: tc.Date,
1749
1815
  end_date: tc.Date,
1750
1816
  budget: float | None,
@@ -1753,6 +1819,7 @@ class BudgetOptimizer:
1753
1819
  spend_constraint_upper: _SpendConstraint,
1754
1820
  gtol: float,
1755
1821
  use_optimal_frequency: bool,
1822
+ max_frequency: float | None,
1756
1823
  use_kpi: bool,
1757
1824
  optimization_grid: OptimizationGrid,
1758
1825
  ) -> bool:
@@ -1785,6 +1852,15 @@ class BudgetOptimizer:
1785
1852
  )
1786
1853
  return False
1787
1854
 
1855
+ if max_frequency != optimization_grid.max_frequency:
1856
+ warnings.warn(
1857
+ 'Given optimization grid was created with `use_optimal_frequency` ='
1858
+ f' {optimization_grid.max_frequency}, but optimization was'
1859
+ f' called with `max_frequency` = {max_frequency}. A'
1860
+ ' new grid will be created.'
1861
+ )
1862
+ return False
1863
+
1788
1864
  if (
1789
1865
  start_date != optimization_grid.start_date
1790
1866
  or end_date != optimization_grid.end_date
@@ -1799,7 +1875,7 @@ class BudgetOptimizer:
1799
1875
  return False
1800
1876
 
1801
1877
  if new_data is None:
1802
- new_data = analyzer.DataTensors()
1878
+ new_data = analyzer_module.DataTensors()
1803
1879
  required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
1804
1880
  filled_data = new_data.validate_and_fill_missing_data(
1805
1881
  required_tensors_names=required_tensors, meridian=self._meridian
@@ -1814,8 +1890,20 @@ class BudgetOptimizer:
1814
1890
  )
1815
1891
  return False
1816
1892
 
1893
+ s_geos = sorted(selected_geos or [])
1894
+ g_geos = sorted(optimization_grid.selected_geos or [])
1895
+ if s_geos != g_geos:
1896
+ warnings.warn(
1897
+ 'Given optimization grid was created with `selected_geos` ='
1898
+ f' {optimization_grid.selected_geos}, but optimization request was'
1899
+ f' called with `selected_geos` = {selected_geos}. A new grid will be'
1900
+ ' created.'
1901
+ )
1902
+ return False
1903
+
1817
1904
  n_channels = len(optimization_grid.channels)
1818
- selected_times = self._validate_selected_times(
1905
+ selected_times = _expand_selected_times(
1906
+ meridian=self._meridian,
1819
1907
  start_date=start_date,
1820
1908
  end_date=end_date,
1821
1909
  new_data=new_data,
@@ -1870,6 +1958,7 @@ class BudgetOptimizer:
1870
1958
  self,
1871
1959
  new_data: xr.Dataset | None = None,
1872
1960
  use_posterior: bool = True,
1961
+ selected_geos: Sequence[str] | None = None,
1873
1962
  # TODO: b/409550413 - Remove this argument.
1874
1963
  selected_times: tuple[str | None, str | None] | None = None,
1875
1964
  start_date: tc.Date = None,
@@ -1880,6 +1969,7 @@ class BudgetOptimizer:
1880
1969
  spend_constraint_upper: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
1881
1970
  gtol: float = 0.0001,
1882
1971
  use_optimal_frequency: bool = True,
1972
+ max_frequency: float | None = None,
1883
1973
  use_kpi: bool = False,
1884
1974
  batch_size: int = c.DEFAULT_BATCH_SIZE,
1885
1975
  ) -> OptimizationGrid:
@@ -1908,6 +1998,9 @@ class BudgetOptimizer:
1908
1998
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1909
1999
  from the posterior distribution of the model. Otherwise, the prior
1910
2000
  distribution is used.
2001
+ selected_geos: Optional list containing a subset of geos to include. By
2002
+ default, all geos are included. The selected geos should match those in
2003
+ `InputData.geo`.
1911
2004
  selected_times: Deprecated. Tuple containing the start and end time
1912
2005
  dimension coordinates. Please Use `start_date` and `end_date` instead.
1913
2006
  start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
@@ -1948,6 +2041,10 @@ class BudgetOptimizer:
1948
2041
  the smallest integer such that `(budget - rounded_budget)` is less than
1949
2042
  or equal to `(budget * gtol)`. `gtol` must be less than 1.
1950
2043
  use_optimal_frequency: Boolean. Whether optimal frequency was used.
2044
+ max_frequency: Float indicating the frequency upper bound for the optimal
2045
+ frequency search space. If `None` when `use_optimal_frequency` is
2046
+ `True`, the max frequency of the input data is used. If
2047
+ `use_optimal_frequency` is `False`, `max_frequency` is ignored.
1951
2048
  use_kpi: Boolean. If `True`, then the incremental outcome is derived from
1952
2049
  the KPI impact. Otherwise, the incremental outcome is derived from the
1953
2050
  revenue impact.
@@ -1961,8 +2058,9 @@ class BudgetOptimizer:
1961
2058
  """
1962
2059
  self._validate_model_fit(use_posterior)
1963
2060
  if new_data is None:
1964
- new_data = analyzer.DataTensors()
1965
-
2061
+ new_data = analyzer_module.DataTensors()
2062
+ if selected_geos is not None and not selected_geos:
2063
+ raise ValueError('`selected_geos` must not be empty.')
1966
2064
  if selected_times is not None:
1967
2065
  warnings.warn(
1968
2066
  '`selected_times` is deprecated. Please use `start_date` and'
@@ -1978,13 +2076,15 @@ class BudgetOptimizer:
1978
2076
  filled_data = new_data.validate_and_fill_missing_data(
1979
2077
  required_tensors_names=required_tensors, meridian=self._meridian
1980
2078
  )
1981
- selected_times = self._validate_selected_times(
2079
+ selected_times = _expand_selected_times(
2080
+ meridian=self._meridian,
1982
2081
  start_date=start_date,
1983
2082
  end_date=end_date,
1984
2083
  new_data=filled_data,
1985
2084
  )
1986
2085
  hist_spend = self._analyzer.get_aggregated_spend(
1987
2086
  new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
2087
+ selected_geos=selected_geos,
1988
2088
  selected_times=selected_times,
1989
2089
  include_media=self._meridian.n_media_channels > 0,
1990
2090
  include_rf=self._meridian.n_rf_channels > 0,
@@ -2008,7 +2108,7 @@ class BudgetOptimizer:
2008
2108
  )
2009
2109
  )
2010
2110
  if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
2011
- opt_freq_data = analyzer.DataTensors(
2111
+ opt_freq_data = analyzer_module.DataTensors(
2012
2112
  rf_impressions=filled_data.reach * filled_data.frequency,
2013
2113
  rf_spend=filled_data.rf_spend,
2014
2114
  revenue_per_kpi=filled_data.revenue_per_kpi,
@@ -2017,8 +2117,10 @@ class BudgetOptimizer:
2017
2117
  self._analyzer.optimal_freq(
2018
2118
  new_data=opt_freq_data,
2019
2119
  use_posterior=use_posterior,
2120
+ selected_geos=selected_geos,
2020
2121
  selected_times=selected_times,
2021
2122
  use_kpi=use_kpi,
2123
+ max_frequency=max_frequency,
2022
2124
  ).optimal_frequency,
2023
2125
  dtype=backend.float32,
2024
2126
  )
@@ -2031,6 +2133,7 @@ class BudgetOptimizer:
2031
2133
  spend_bound_lower=optimization_lower_bound,
2032
2134
  spend_bound_upper=optimization_upper_bound,
2033
2135
  step_size=step_size,
2136
+ selected_geos=selected_geos,
2034
2137
  selected_times=selected_times,
2035
2138
  new_data=filled_data.filter_fields(c.PAID_DATA),
2036
2139
  use_posterior=use_posterior,
@@ -2050,11 +2153,13 @@ class BudgetOptimizer:
2050
2153
  use_kpi=use_kpi,
2051
2154
  use_posterior=use_posterior,
2052
2155
  use_optimal_frequency=use_optimal_frequency,
2156
+ max_frequency=max_frequency,
2053
2157
  start_date=start_date,
2054
2158
  end_date=end_date,
2055
2159
  gtol=gtol,
2056
2160
  round_factor=round_factor,
2057
2161
  optimal_frequency=optimal_frequency,
2162
+ selected_geos=selected_geos,
2058
2163
  selected_times=selected_times,
2059
2164
  )
2060
2165
 
@@ -2098,38 +2203,11 @@ class BudgetOptimizer:
2098
2203
  attrs={c.SPEND_STEP_SIZE: spend_step_size},
2099
2204
  )
2100
2205
 
2101
- def _validate_selected_times(
2102
- self,
2103
- start_date: tc.Date,
2104
- end_date: tc.Date,
2105
- new_data: analyzer.DataTensors | None,
2106
- ) -> Sequence[str] | Sequence[bool] | None:
2107
- """Validates and returns the selected times."""
2108
- if start_date is None and end_date is None:
2109
- return None
2110
-
2111
- new_data = new_data or analyzer.DataTensors()
2112
- if new_data.get_modified_times(self._meridian) is None:
2113
- return self._meridian.expand_selected_time_dims(
2114
- start_date=start_date,
2115
- end_date=end_date,
2116
- )
2117
- else:
2118
- assert new_data.time is not None
2119
- new_times_str = np.asarray(new_data.time).astype(str).tolist()
2120
- time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
2121
- expanded_dates = time_coordinates.expand_selected_time_dims(
2122
- start_date=start_date,
2123
- end_date=end_date,
2124
- )
2125
- expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
2126
- return [x in expanded_str for x in new_times_str]
2127
-
2128
2206
  def _get_incremental_outcome_tensors(
2129
2207
  self,
2130
2208
  hist_spend: np.ndarray,
2131
2209
  spend: np.ndarray,
2132
- new_data: analyzer.DataTensors | None = None,
2210
+ new_data: analyzer_module.DataTensors | None = None,
2133
2211
  optimal_frequency: Sequence[float] | None = None,
2134
2212
  ) -> tuple[
2135
2213
  backend.Tensor | None,
@@ -2165,7 +2243,7 @@ class BudgetOptimizer:
2165
2243
  Returns:
2166
2244
  Tuple of backend.tensors (new_media, new_reach, new_frequency).
2167
2245
  """
2168
- new_data = new_data or analyzer.DataTensors()
2246
+ new_data = new_data or analyzer_module.DataTensors()
2169
2247
  filled_data = new_data.validate_and_fill_missing_data(
2170
2248
  c.PAID_CHANNELS,
2171
2249
  self._meridian,
@@ -2206,9 +2284,10 @@ class BudgetOptimizer:
2206
2284
  self,
2207
2285
  hist_spend: np.ndarray,
2208
2286
  spend: np.ndarray,
2209
- new_data: analyzer.DataTensors | None = None,
2287
+ new_data: analyzer_module.DataTensors | None = None,
2210
2288
  use_posterior: bool = True,
2211
2289
  use_kpi: bool = False,
2290
+ selected_geos: Sequence[str] | None = None,
2212
2291
  start_date: tc.Date = None,
2213
2292
  end_date: tc.Date = None,
2214
2293
  optimal_frequency: Sequence[float] | None = None,
@@ -2218,13 +2297,16 @@ class BudgetOptimizer:
2218
2297
  use_historical_budget: bool = True,
2219
2298
  ) -> xr.Dataset:
2220
2299
  """Creates the budget dataset."""
2221
- new_data = new_data or analyzer.DataTensors()
2300
+ new_data = new_data or analyzer_module.DataTensors()
2222
2301
  filled_data = new_data.validate_and_fill_missing_data(
2223
2302
  c.PAID_DATA + (c.TIME,),
2224
2303
  self._meridian,
2225
2304
  )
2226
- selected_times = self._validate_selected_times(
2227
- start_date=start_date, end_date=end_date, new_data=new_data
2305
+ selected_times = _expand_selected_times(
2306
+ meridian=self._meridian,
2307
+ start_date=start_date,
2308
+ end_date=end_date,
2309
+ new_data=new_data,
2228
2310
  )
2229
2311
  spend_tensor = backend.to_tensor(spend, dtype=backend.float32)
2230
2312
  hist_spend = backend.to_tensor(hist_spend, dtype=backend.float32)
@@ -2237,7 +2319,7 @@ class BudgetOptimizer:
2237
2319
  )
2238
2320
  )
2239
2321
  budget = np.sum(spend_tensor)
2240
- inc_outcome_data = analyzer.DataTensors(
2322
+ inc_outcome_data = analyzer_module.DataTensors(
2241
2323
  media=new_media,
2242
2324
  reach=new_reach,
2243
2325
  frequency=new_frequency,
@@ -2249,6 +2331,7 @@ class BudgetOptimizer:
2249
2331
  incremental_outcome = self._analyzer.incremental_outcome(
2250
2332
  use_posterior=use_posterior,
2251
2333
  new_data=inc_outcome_data,
2334
+ selected_geos=selected_geos,
2252
2335
  selected_times=selected_times,
2253
2336
  use_kpi=use_kpi,
2254
2337
  batch_size=batch_size,
@@ -2257,6 +2340,7 @@ class BudgetOptimizer:
2257
2340
  incremental_increase = 0.01
2258
2341
  mroi_numerator = self._analyzer.incremental_outcome(
2259
2342
  new_data=inc_outcome_data,
2343
+ selected_geos=selected_geos,
2260
2344
  selected_times=selected_times,
2261
2345
  scaling_factor0=1.0,
2262
2346
  scaling_factor1=1 + incremental_increase,
@@ -2269,7 +2353,7 @@ class BudgetOptimizer:
2269
2353
  # shape (n_channels, n_metrics) where n_metrics = 4 for (mean, median,
2270
2354
  # ci_lo, and ci_hi)
2271
2355
  incremental_outcome_with_mean_median_and_ci = (
2272
- analyzer.get_central_tendency_and_ci(
2356
+ analyzer_module.get_central_tendency_and_ci(
2273
2357
  data=incremental_outcome,
2274
2358
  confidence_level=confidence_level,
2275
2359
  include_median=True,
@@ -2281,18 +2365,18 @@ class BudgetOptimizer:
2281
2365
  )
2282
2366
 
2283
2367
  aggregated_impressions = self._analyzer.get_aggregated_impressions(
2284
- new_data=analyzer.DataTensors(
2368
+ new_data=analyzer_module.DataTensors(
2285
2369
  media=new_media, reach=new_reach, frequency=new_frequency
2286
2370
  ),
2287
2371
  selected_times=selected_times,
2288
- selected_geos=None,
2372
+ selected_geos=selected_geos,
2289
2373
  aggregate_times=True,
2290
2374
  aggregate_geos=True,
2291
2375
  optimal_frequency=optimal_frequency,
2292
2376
  include_non_paid_channels=False,
2293
2377
  )
2294
2378
  effectiveness_with_mean_median_and_ci = (
2295
- analyzer.get_central_tendency_and_ci(
2379
+ analyzer_module.get_central_tendency_and_ci(
2296
2380
  data=backend.divide_no_nan(
2297
2381
  incremental_outcome, aggregated_impressions
2298
2382
  ),
@@ -2301,12 +2385,12 @@ class BudgetOptimizer:
2301
2385
  )
2302
2386
  )
2303
2387
 
2304
- roi = analyzer.get_central_tendency_and_ci(
2388
+ roi = analyzer_module.get_central_tendency_and_ci(
2305
2389
  data=backend.divide_no_nan(incremental_outcome, spend_tensor),
2306
2390
  confidence_level=confidence_level,
2307
2391
  include_median=True,
2308
2392
  )
2309
- marginal_roi = analyzer.get_central_tendency_and_ci(
2393
+ marginal_roi = analyzer_module.get_central_tendency_and_ci(
2310
2394
  data=backend.divide_no_nan(
2311
2395
  mroi_numerator, spend_tensor * incremental_increase
2312
2396
  ),
@@ -2314,7 +2398,7 @@ class BudgetOptimizer:
2314
2398
  include_median=True,
2315
2399
  )
2316
2400
 
2317
- cpik = analyzer.get_central_tendency_and_ci(
2401
+ cpik = analyzer_module.get_central_tendency_and_ci(
2318
2402
  data=backend.divide_no_nan(spend_tensor, incremental_outcome),
2319
2403
  confidence_level=confidence_level,
2320
2404
  include_median=True,
@@ -2328,19 +2412,27 @@ class BudgetOptimizer:
2328
2412
  total_spend = np.sum(spend) if np.sum(spend) > 0 else 1
2329
2413
  pct_of_spend = spend / total_spend
2330
2414
  data_vars = {
2331
- c.SPEND: ([c.CHANNEL], spend.data),
2332
- c.PCT_OF_SPEND: ([c.CHANNEL], pct_of_spend.data),
2415
+ c.SPEND: ([c.CHANNEL], np.array(spend.data, dtype=np.float64)),
2416
+ c.PCT_OF_SPEND: (
2417
+ [c.CHANNEL],
2418
+ np.array(pct_of_spend.data, dtype=np.float64),
2419
+ ),
2333
2420
  c.INCREMENTAL_OUTCOME: (
2334
2421
  [c.CHANNEL, c.METRIC],
2335
- incremental_outcome_with_mean_median_and_ci,
2422
+ np.array(
2423
+ incremental_outcome_with_mean_median_and_ci, dtype=np.float64
2424
+ ),
2336
2425
  ),
2337
2426
  c.EFFECTIVENESS: (
2338
2427
  [c.CHANNEL, c.METRIC],
2339
- effectiveness_with_mean_median_and_ci,
2428
+ np.array(effectiveness_with_mean_median_and_ci, dtype=np.float64),
2429
+ ),
2430
+ c.ROI: ([c.CHANNEL, c.METRIC], np.array(roi, dtype=np.float64)),
2431
+ c.MROI: (
2432
+ [c.CHANNEL, c.METRIC],
2433
+ np.array(marginal_roi, dtype=np.float64),
2340
2434
  ),
2341
- c.ROI: ([c.CHANNEL, c.METRIC], roi),
2342
- c.MROI: ([c.CHANNEL, c.METRIC], marginal_roi),
2343
- c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
2435
+ c.CPIK: ([c.CHANNEL, c.METRIC], np.array(cpik, dtype=np.float64)),
2344
2436
  }
2345
2437
 
2346
2438
  all_times = np.asarray(filled_data.time).astype(str).tolist()
@@ -2374,7 +2466,8 @@ class BudgetOptimizer:
2374
2466
  i: int,
2375
2467
  incremental_outcome_grid: np.ndarray,
2376
2468
  multipliers_grid: backend.Tensor,
2377
- new_data: analyzer.DataTensors | None = None,
2469
+ new_data: analyzer_module.DataTensors | None = None,
2470
+ selected_geos: Sequence[str] | None = None,
2378
2471
  selected_times: Sequence[str] | Sequence[bool] | None = None,
2379
2472
  use_posterior: bool = True,
2380
2473
  use_kpi: bool = False,
@@ -2396,6 +2489,9 @@ class BudgetOptimizer:
2396
2489
  tensors is provided with a different number of time periods than in
2397
2490
  `InputData`, then all tensors must be provided with the same number of
2398
2491
  time periods.
2492
+ selected_geos: Optional list containing a subset of geos to include. By
2493
+ default, all geos are included. The selected geos should match those in
2494
+ `InputData.geo`.
2399
2495
  selected_times: Optional list of times to optimize. This can either be a
2400
2496
  string list containing a subset of time dimension coordinates from
2401
2497
  `InputData.time` or a boolean list with length equal to the time
@@ -2416,7 +2512,7 @@ class BudgetOptimizer:
2416
2512
  reducing `batch_size`. The calculation will generally be faster with
2417
2513
  larger `batch_size` values.
2418
2514
  """
2419
- new_data = new_data or analyzer.DataTensors()
2515
+ new_data = new_data or analyzer_module.DataTensors()
2420
2516
  filled_data = new_data.validate_and_fill_missing_data(
2421
2517
  c.PAID_DATA, self._meridian
2422
2518
  )
@@ -2455,12 +2551,13 @@ class BudgetOptimizer:
2455
2551
  np.asarray(
2456
2552
  self._analyzer.incremental_outcome(
2457
2553
  use_posterior=use_posterior,
2458
- new_data=analyzer.DataTensors(
2554
+ new_data=analyzer_module.DataTensors(
2459
2555
  media=new_media,
2460
2556
  reach=new_reach,
2461
2557
  frequency=new_frequency,
2462
2558
  revenue_per_kpi=filled_data.revenue_per_kpi,
2463
2559
  ),
2560
+ selected_geos=selected_geos,
2464
2561
  selected_times=selected_times,
2465
2562
  use_kpi=use_kpi,
2466
2563
  include_non_paid_channels=False,
@@ -2477,7 +2574,8 @@ class BudgetOptimizer:
2477
2574
  spend_bound_lower: np.ndarray,
2478
2575
  spend_bound_upper: np.ndarray,
2479
2576
  step_size: int,
2480
- new_data: analyzer.DataTensors | None = None,
2577
+ new_data: analyzer_module.DataTensors | None = None,
2578
+ selected_geos: Sequence[str] | None = None,
2481
2579
  selected_times: Sequence[str] | Sequence[bool] | None = None,
2482
2580
  use_posterior: bool = True,
2483
2581
  use_kpi: bool = False,
@@ -2500,6 +2598,9 @@ class BudgetOptimizer:
2500
2598
  tensors is provided with a different number of time periods than in
2501
2599
  `InputData`, then all tensors must be provided with the same number of
2502
2600
  time periods.
2601
+ selected_geos: Optional list containing a subset of geos to include. By
2602
+ default, all geos are included. The selected geos should match those in
2603
+ `InputData.geo`.
2503
2604
  selected_times: Optional list of times to optimize. This can either be a
2504
2605
  string list containing a subset of time dimension coordinates from
2505
2606
  `InputData.time` or a boolean list with length equal to the time
@@ -2556,6 +2657,7 @@ class BudgetOptimizer:
2556
2657
  i=i,
2557
2658
  incremental_outcome_grid=incremental_outcome_grid,
2558
2659
  multipliers_grid=multipliers_grid,
2660
+ selected_geos=selected_geos,
2559
2661
  selected_times=selected_times,
2560
2662
  new_data=new_data,
2561
2663
  use_posterior=use_posterior,
@@ -2571,20 +2673,15 @@ class BudgetOptimizer:
2571
2673
  # we use the following code to fix it, and ensure incremental_outcome/spend
2572
2674
  # is always same for RF channels.
2573
2675
  if self._meridian.n_rf_channels > 0:
2574
- rf_incremental_outcome_max = np.nanmax(
2575
- incremental_outcome_grid[:, -self._meridian.n_rf_channels :], axis=0
2576
- )
2577
- rf_spend_max = np.nanmax(
2578
- spend_grid[:, -self._meridian.n_rf_channels :], axis=0
2579
- )
2580
- rf_roi = backend.divide_no_nan(rf_incremental_outcome_max, rf_spend_max)
2581
- incremental_outcome_grid[:, -self._meridian.n_rf_channels :] = (
2582
- rf_roi * spend_grid[:, -self._meridian.n_rf_channels :]
2676
+ incremental_outcome_grid = backend.stabilize_rf_roi_grid(
2677
+ spend_grid, incremental_outcome_grid, self._meridian.n_rf_channels
2583
2678
  )
2584
2679
  return (spend_grid, incremental_outcome_grid)
2585
2680
 
2586
2681
  def _validate_optimization_tensors(
2587
2682
  self,
2683
+ expected_n_geos: int,
2684
+ expected_n_times: int,
2588
2685
  cpmu: backend.Tensor | None = None,
2589
2686
  cprf: backend.Tensor | None = None,
2590
2687
  media: backend.Tensor | None = None,
@@ -2601,11 +2698,21 @@ class BudgetOptimizer:
2601
2698
  'If `media` or `media_spend` is provided, then `cpmu` must also be'
2602
2699
  ' provided.'
2603
2700
  )
2701
+ if (media is None and media_spend is None) and cpmu is not None:
2702
+ raise ValueError(
2703
+ 'If `cpmu` is provided, then one of `media` or `media_spend` must'
2704
+ ' also be provided.'
2705
+ )
2604
2706
  if (rf_impressions is not None or rf_spend is not None) and cprf is None:
2605
2707
  raise ValueError(
2606
2708
  'If `reach` and `frequency` or `rf_spend` is provided, then `cprf`'
2607
2709
  ' must also be provided.'
2608
2710
  )
2711
+ if (rf_impressions is None and rf_spend is None) and cprf is not None:
2712
+ raise ValueError(
2713
+ 'If `cprf` is provided, then one of `rf_impressions` or `rf_spend`'
2714
+ ' must also be provided.'
2715
+ )
2609
2716
  if media is not None and media_spend is not None:
2610
2717
  raise ValueError('Only one of `media` or `media_spend` can be provided.')
2611
2718
  if rf_impressions is not None and rf_spend is not None:
@@ -2623,26 +2730,44 @@ class BudgetOptimizer:
2623
2730
  'If `use_optimal_frequency` is `False`, then `frequency` must be'
2624
2731
  ' provided.'
2625
2732
  )
2626
-
2627
- n_geos = [
2628
- t.shape[0]
2629
- for t in [
2630
- cpmu,
2631
- cprf,
2632
- media,
2633
- rf_impressions,
2634
- frequency,
2635
- media_spend,
2636
- rf_spend,
2637
- ]
2638
- if t is not None and t.ndim == 3
2733
+ n_geos_list = []
2734
+ n_times_list = []
2735
+ tensor_list = [
2736
+ cpmu,
2737
+ cprf,
2738
+ media,
2739
+ rf_impressions,
2740
+ frequency,
2741
+ media_spend,
2742
+ rf_spend,
2639
2743
  ]
2744
+ for t in tensor_list:
2745
+ # `(n_geos, T, n_channels)` shape
2746
+ if t is not None and t.ndim == 3:
2747
+ n_geos_list.append(t.shape[0])
2748
+ n_times_list.append(t.shape[1])
2749
+ # `(T, n_channels)` shape
2750
+ elif t is not None and t.ndim == 2:
2751
+ n_times_list.append(t.shape[0])
2752
+
2753
+ # `(n_geos, T)` shape
2640
2754
  if revenue_per_kpi is not None and revenue_per_kpi.ndim == 2:
2641
- n_geos.append(revenue_per_kpi.shape[0])
2642
- if any(n_geo != self._meridian.n_geos for n_geo in n_geos):
2755
+ n_geos_list.append(revenue_per_kpi.shape[0])
2756
+ n_times_list.append(revenue_per_kpi.shape[1])
2757
+ # `(T)` shape
2758
+ elif revenue_per_kpi is not None and revenue_per_kpi.ndim == 1:
2759
+ n_times_list.append(revenue_per_kpi.shape[0])
2760
+
2761
+ if any(n_geo != expected_n_geos for n_geo in n_geos_list):
2762
+ raise ValueError(
2763
+ 'All tensors with a geo dimension must have'
2764
+ f' {expected_n_geos} geos (as defined in `meridian.InputData`).'
2765
+ )
2766
+
2767
+ if any(n_time != expected_n_times for n_time in n_times_list):
2643
2768
  raise ValueError(
2644
- 'All tensors with a geo dimension must have the same number of geos'
2645
- ' as in `meridian.InputData`.'
2769
+ 'All tensors with a time dimension must have'
2770
+ f' {expected_n_times} times (as defined in `time` argument).'
2646
2771
  )
2647
2772
 
2648
2773
  def _allocate_tensor_by_population(
@@ -2958,3 +3083,62 @@ def _expand_tensor(tensor: backend.Tensor, required_shape: tuple[int, ...]):
2958
3083
  f'Cannot expand tensor with shape {tensor.shape} to target'
2959
3084
  f' {required_shape}.'
2960
3085
  )
3086
+
3087
+
3088
+ def _expand_selected_times(
3089
+ meridian: model.Meridian,
3090
+ start_date: tc.Date,
3091
+ end_date: tc.Date,
3092
+ new_data: analyzer_module.DataTensors | None,
3093
+ return_flexible_str: bool = False,
3094
+ ) -> Sequence[str] | Sequence[bool] | None:
3095
+ """Creates selected_times from start_date and end_date.
3096
+
3097
+ This function creates `selected_times` argument based on `start_date`,
3098
+ `end_date` and `new_data`. If `new_data` is not used or used with unmodified
3099
+ times, dates are selected from `meridian.input_data.time`. In the flexible
3100
+ time scenario, when `new_data` is provided with modified times, dates are
3101
+ selected from `new_data.time`. In this case, `new_data.time` must be provided
3102
+ and the function returns a list of booleans.
3103
+
3104
+ Args:
3105
+ meridian: The `Meridian` object with original data.
3106
+ start_date: Start date of the selected time period.
3107
+ end_date: End date of the selected time period.
3108
+ new_data: The optional `DataTensors` object. If times are modified in
3109
+ `new_data`, then `new_data.time` must be provided.
3110
+ return_flexible_str: Whether to return a list of strings or a list of
3111
+ booleans in case time is modified in `new_data`.
3112
+
3113
+ Returns:
3114
+ If both `start_date` and `end_date` are `None`, returns `None`. If
3115
+ `new_data` is not used or used with unmodified times, returns a list of
3116
+ strings with selected dates. If `new_data` is used with modified times,
3117
+ returns a list of strings or a list of booleans depending on the
3118
+ `return_flexible_str` argument.
3119
+ """
3120
+ if start_date is None and end_date is None:
3121
+ return None
3122
+
3123
+ new_data = new_data or analyzer_module.DataTensors()
3124
+ if new_data.get_modified_times(meridian) is None:
3125
+ return meridian.expand_selected_time_dims(
3126
+ start_date=start_date,
3127
+ end_date=end_date,
3128
+ )
3129
+ else:
3130
+ assert new_data.time is not None
3131
+ new_times_str = np.asarray(new_data.time).astype(str).tolist()
3132
+ time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
3133
+ expanded_dates = time_coordinates.expand_selected_time_dims(
3134
+ start_date=start_date,
3135
+ end_date=end_date,
3136
+ )
3137
+ if expanded_dates is None:
3138
+ expanded_dates = time_coordinates.all_dates
3139
+ expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
3140
+ if return_flexible_str:
3141
+ return [x for x in new_times_str if x in expanded_str]
3142
+ # TODO: Remove once every method uses `new_data.time`.
3143
+ else:
3144
+ return [x in expanded_str for x in new_times_str]