PyPI - google-meridian - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

google-meridian 1.2.1py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

google_meridian-1.3.1.dist-info/METADATA +209 -0
google_meridian-1.3.1.dist-info/RECORD +76 -0
{google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/top_level.txt +1 -0
meridian/analysis/__init__.py +2 -0
meridian/analysis/analyzer.py +179 -105
meridian/analysis/formatter.py +2 -2
meridian/analysis/optimizer.py +227 -87
meridian/analysis/review/__init__.py +20 -0
meridian/analysis/review/checks.py +721 -0
meridian/analysis/review/configs.py +110 -0
meridian/analysis/review/constants.py +40 -0
meridian/analysis/review/results.py +544 -0
meridian/analysis/review/reviewer.py +186 -0
meridian/analysis/summarizer.py +21 -34
meridian/analysis/templates/chips.html.jinja +12 -0
meridian/analysis/test_utils.py +27 -5
meridian/analysis/visualizer.py +41 -57
meridian/backend/__init__.py +457 -118
meridian/backend/test_utils.py +162 -0
meridian/constants.py +39 -3
meridian/model/__init__.py +1 -0
meridian/model/eda/__init__.py +3 -0
meridian/model/eda/constants.py +21 -0
meridian/model/eda/eda_engine.py +1309 -196
meridian/model/eda/eda_outcome.py +200 -0
meridian/model/eda/eda_spec.py +84 -0
meridian/model/eda/meridian_eda.py +220 -0
meridian/model/knots.py +55 -49
meridian/model/media.py +10 -8
meridian/model/model.py +79 -16
meridian/model/model_test_data.py +53 -0
meridian/model/posterior_sampler.py +39 -32
meridian/model/prior_distribution.py +12 -2
meridian/model/prior_sampler.py +146 -90
meridian/model/spec.py +7 -8
meridian/model/transformers.py +11 -3
meridian/version.py +1 -1
schema/__init__.py +18 -0
schema/serde/__init__.py +26 -0
schema/serde/constants.py +48 -0
schema/serde/distribution.py +515 -0
schema/serde/eda_spec.py +192 -0
schema/serde/function_registry.py +143 -0
schema/serde/hyperparameters.py +363 -0
schema/serde/inference_data.py +105 -0
schema/serde/marketing_data.py +1321 -0
schema/serde/meridian_serde.py +413 -0
schema/serde/serde.py +47 -0
schema/serde/test_data.py +4608 -0
schema/utils/__init__.py +17 -0
schema/utils/time_record.py +156 -0
google_meridian-1.2.1.dist-info/METADATA +0 -409
google_meridian-1.2.1.dist-info/RECORD +0 -52
{google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/WHEEL +0 -0
{google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/licenses/LICENSE +0 -0

meridian/analysis/optimizer.py CHANGED Viewed

@@ -102,6 +102,7 @@ class OptimizationGrid:
     use_kpi: Whether using generic KPI or revenue.
     use_posterior: Whether posterior distributions were used, or prior.
     use_optimal_frequency: Whether optimal frequency was used.
+    max_frequency: The maximum frequency for reach and frequency channels.
     start_date: The start date of the optimization period.
     end_date: The end date of the optimization period.
     gtol: Float indicating the acceptable relative error for the budget used in
@@ -114,9 +115,10 @@ class OptimizationGrid:
       does not contain reach and frequency data, or if the model does contain
       reach and frequency data, but historical frequency is used for the
       optimization scenario.
+    selected_geos: The geo coordinates from the model used in this grid.
     selected_times: The time coordinates from the model used in this grid. If
-      new data with modified time coordinates is used for optimization, this
-      is a list of booleans indicating which time coordinates are selected.
+      new data with modified time coordinates is used for optimization, this is
+      a list of booleans indicating which time coordinates are selected.
       Otherwise, this is a list of strings indicating the time coordinates used
       in this grid.
   """
@@ -132,7 +134,9 @@ class OptimizationGrid:
   gtol: float
   round_factor: int
   optimal_frequency: np.ndarray | None
+  selected_geos: Sequence[str] | None
   selected_times: Sequence[str] | Sequence[bool] | None
+  max_frequency: float | None = None
   @property
   def grid_dataset(self) -> xr.Dataset:
@@ -266,7 +270,7 @@ class OptimizationGrid:
     return xr.Dataset(
         coords={c.CHANNEL: self.channels},
         data_vars={
-            c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
+            c.OPTIMIZED: ([c.CHANNEL], optimal_spend),
             c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
         },
     )
@@ -390,16 +394,26 @@ class OptimizationGrid:
       media spend that maximizes incremental outcome based on spend constraints
       for all media and RF channels.
     """
-    spend = spend_grid[0, :].copy()
-    incremental_outcome = incremental_outcome_grid[0, :].copy()
-    spend_grid = spend_grid[1:, :]
-    incremental_outcome_grid = incremental_outcome_grid[1:, :]
-    iterative_roi_grid = np.round(
-        backend.divide_no_nan(
-            incremental_outcome_grid - incremental_outcome, spend_grid - spend
-        ),
-        decimals=8,
+    spend_grid_values = np.array(spend_grid.values, dtype=np.float64)
+    incremental_outcome_grid_values = np.array(
+        incremental_outcome_grid.values, dtype=np.float64
+    )
+    spend = spend_grid_values[0, :].copy()
+    incremental_outcome = incremental_outcome_grid_values[0, :].copy()
+    spend_grid_values = spend_grid_values[1:, :]
+    incremental_outcome_grid_values = incremental_outcome_grid_values[1:, :]
+    numerator = incremental_outcome_grid_values - incremental_outcome
+    denominator = spend_grid_values - spend
+    iterative_roi_grid = np.divide(
+        numerator,
+        denominator,
+        out=np.zeros_like(numerator),
+        where=(denominator != 0),
     )
+    iterative_roi_grid = np.round(iterative_roi_grid, decimals=8)
     while True:
       spend_optimal = spend.astype(int)
       # If none of the exit criteria are met roi_grid will eventually be filled
@@ -411,8 +425,8 @@ class OptimizationGrid:
       )
       row_idx = point[0]
       media_idx = point[1]
-      spend[media_idx] = spend_grid[row_idx, media_idx]
-      incremental_outcome[media_idx] = incremental_outcome_grid[
+      spend[media_idx] = spend_grid_values[row_idx, media_idx]
+      incremental_outcome[media_idx] = incremental_outcome_grid_values[
           row_idx, media_idx
       ]
       roi_grid_point = iterative_roi_grid[row_idx, media_idx]
@@ -425,14 +439,23 @@ class OptimizationGrid:
         break
       iterative_roi_grid[0 : row_idx + 1, media_idx] = np.nan
+      num_col = (
+          incremental_outcome_grid_values[row_idx + 1 :, media_idx]
+          - incremental_outcome_grid_values[row_idx, media_idx]
+      )
+      den_col = (
+          spend_grid_values[row_idx + 1 :, media_idx]
+          - spend_grid_values[row_idx, media_idx]
+      )
+      new_roi_col = np.divide(
+          num_col,
+          den_col,
+          out=np.zeros_like(num_col),
+          where=(den_col != 0),
+      )
       iterative_roi_grid[row_idx + 1 :, media_idx] = np.round(
-          backend.divide_no_nan(
-              incremental_outcome_grid[row_idx + 1 :, media_idx]
-              - incremental_outcome_grid[row_idx, media_idx],
-              spend_grid[row_idx + 1 :, media_idx]
-              - spend_grid[row_idx, media_idx],
-          ),
-          decimals=8,
+          new_roi_col, decimals=8
       )
     return spend_optimal
@@ -559,11 +582,16 @@ class OptimizationResults:
     """The grid information used for optimization."""
     return self._optimization_grid
-  def output_optimization_summary(self, filename: str, filepath: str):
+  def output_optimization_summary(
+      self,
+      filename: str,
+      filepath: str,
+      currency: str = c.DEFAULT_CURRENCY,
+  ):
     """Generates and saves the HTML optimization summary output."""
     os.makedirs(filepath, exist_ok=True)
     with open(os.path.join(filepath, filename), 'w') as f:
-      f.write(self._gen_optimization_summary())
+      f.write(self._gen_optimization_summary(currency))
   def plot_incremental_outcome_delta(self) -> alt.Chart:
     """Plots a waterfall chart showing the change in incremental outcome."""
@@ -713,7 +741,7 @@ class OptimizationResults:
         )
     )
-  def plot_spend_delta(self) -> alt.Chart:
+  def plot_spend_delta(self, currency: str = c.DEFAULT_CURRENCY) -> alt.Chart:
     """Plots a bar chart showing the optimized change in spend per channel."""
     df = self._get_delta_data(c.SPEND)
     base = (
@@ -734,7 +762,7 @@ class OptimizationResults:
             y=alt.Y(
                 f'{c.SPEND}:Q',
                 axis=alt.Axis(
-                    title='$',
+                    title=currency,
                     domain=False,
                     labelExpr=formatter.compact_number_expr(),
                     **formatter.AXIS_CONFIG,
@@ -919,6 +947,7 @@ class OptimizationResults:
         new_data=self.new_data,
         spend_multipliers=spend_multiplier,
         use_posterior=self.optimization_grid.use_posterior,
+        selected_geos=self.optimization_grid.selected_geos,
         selected_times=selected_times,
         by_reach=True,
         use_kpi=not self.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
@@ -1029,7 +1058,7 @@ class OptimizationResults:
     sorted_df.sort_index(inplace=True)
     return sorted_df
-  def _gen_optimization_summary(self) -> str:
+  def _gen_optimization_summary(self, currency: str) -> str:
     """Generates HTML optimization summary output (as sanitized content str)."""
     start_date = tc.normalize_date(self.optimized_data.start_date)
     self.template_env.globals[c.START_DATE] = start_date.strftime(
@@ -1041,22 +1070,25 @@ class OptimizationResults:
     self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
         f'%b {end_date_adjusted.day}, %Y'
     )
+    self.template_env.globals[c.SELECTED_GEOS] = (
+        self.optimization_grid.selected_geos
+    )
     html_template = self.template_env.get_template('summary.html.jinja')
     return html_template.render(
         title=summary_text.OPTIMIZATION_TITLE,
-        cards=self._create_output_sections(),
+        cards=self._create_output_sections(currency),
     )
-  def _create_output_sections(self) -> Sequence[str]:
+  def _create_output_sections(self, currency: str) -> Sequence[str]:
     """Creates the HTML snippets for cards in the summary page."""
     return [
-        self._create_scenario_plan_section(),
-        self._create_budget_allocation_section(),
+        self._create_scenario_plan_section(currency),
+        self._create_budget_allocation_section(currency),
         self._create_response_curves_section(),
     ]
-  def _create_scenario_plan_section(self) -> str:
+  def _create_scenario_plan_section(self, currency: str) -> str:
     """Creates the HTML card snippet for the scenario plan section."""
     card_spec = formatter.CardSpec(
         id=summary_text.SCENARIO_PLAN_CARD_ID,
@@ -1099,22 +1131,32 @@ class OptimizationResults:
         self.template_env,
         card_spec,
         insights,
-        stats_specs=self._create_scenario_stats_specs(),
+        stats_specs=self._create_scenario_stats_specs(currency),
     )
-  def _create_scenario_stats_specs(self) -> Sequence[formatter.StatsSpec]:
+  def _create_scenario_stats_specs(
+      self, currency: str
+  ) -> Sequence[formatter.StatsSpec]:
     """Creates the stats to fill the scenario plan section."""
     outcome = self._kpi_or_revenue
     budget_diff = self.optimized_data.budget - self.nonoptimized_data.budget
     budget_prefix = '+' if budget_diff > 0 else ''
     non_optimized_budget = formatter.StatsSpec(
         title=summary_text.NON_OPTIMIZED_BUDGET_LABEL,
-        stat=formatter.format_monetary_num(self.nonoptimized_data.budget),
+        stat=formatter.format_monetary_num(
+            num=self.nonoptimized_data.budget,
+            currency=currency,
+        ),
     )
     optimized_budget = formatter.StatsSpec(
         title=summary_text.OPTIMIZED_BUDGET_LABEL,
-        stat=formatter.format_monetary_num(self.optimized_data.budget),
-        delta=(budget_prefix + formatter.format_monetary_num(budget_diff)),
+        stat=formatter.format_monetary_num(
+            num=self.optimized_data.budget, currency=currency
+        ),
+        delta=(
+            budget_prefix
+            + formatter.format_monetary_num(num=budget_diff, currency=currency)
+        ),
     )
     if outcome == c.REVENUE:
@@ -1136,7 +1178,7 @@ class OptimizationResults:
       )
       optimized_performance_title = summary_text.OPTIMIZED_CPIK_LABEL
       optimized_performance_stat = f'${self.optimized_data.total_cpik:.2f}'
-      optimized_performance_diff = formatter.compact_number(diff, 2, '$')
+      optimized_performance_diff = formatter.compact_number(diff, 2, currency)
     non_optimized_performance = formatter.StatsSpec(
         title=non_optimized_performance_title,
         stat=non_optimized_performance_stat,
@@ -1152,7 +1194,7 @@ class OptimizationResults:
         - self.nonoptimized_data.total_incremental_outcome
     )
     inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
-    currency = '$' if outcome == c.REVENUE else ''
+    currency = currency if outcome == c.REVENUE else ''
     non_optimized_inc_outcome = formatter.StatsSpec(
         title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
             outcome=outcome
@@ -1182,7 +1224,7 @@ class OptimizationResults:
         optimized_inc_outcome,
     ]
-  def _create_budget_allocation_section(self) -> str:
+  def _create_budget_allocation_section(self, currency: str) -> str:
     """Creates the HTML card snippet for the budget allocation section."""
     outcome = self._kpi_or_revenue
     card_spec = formatter.CardSpec(
@@ -1192,7 +1234,7 @@ class OptimizationResults:
     spend_delta = formatter.ChartSpec(
         id=summary_text.SPEND_DELTA_CHART_ID,
         description=summary_text.SPEND_DELTA_CHART_INSIGHTS,
-        chart_json=self.plot_spend_delta().to_json(),
+        chart_json=self.plot_spend_delta(currency).to_json(),
     )
     spend_allocation = formatter.ChartSpec(
         id=summary_text.SPEND_ALLOCATION_CHART_ID,
@@ -1295,6 +1337,7 @@ class BudgetOptimizer:
       self,
       new_data: analyzer_module.DataTensors | None = None,
       use_posterior: bool = True,
+      selected_geos: Sequence[str] | None = None,
       # TODO: b/409550413 - Remove this argument.
       selected_times: tuple[str | None, str | None] | None = None,
       start_date: tc.Date = None,
@@ -1307,7 +1350,10 @@ class BudgetOptimizer:
       target_roi: float | None = None,
       target_mroi: float | None = None,
       gtol: float = 0.0001,
+      # TODO:
+      # merging use_optimal_frequency and max_frequency into a single argument.
       use_optimal_frequency: bool = True,
+      max_frequency: float | None = None,
       use_kpi: bool = False,
       confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
@@ -1383,6 +1429,9 @@ class BudgetOptimizer:
       use_posterior: Boolean. If `True`, then the budget is optimized based on
         the posterior distribution of the model. Otherwise, the prior
         distribution is used.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Deprecated. Tuple containing the start and end time
         dimension coordinates for the duration to run the optimization on.
         Please Use `start_date` and `end_date` instead.
@@ -1439,6 +1488,10 @@ class BudgetOptimizer:
       use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
         trained Meridian model for optimization. If `False`, uses historical
         frequency or `new_data.frequency` if provided.
+      max_frequency: Float indicating the frequency upper bound for the optimal
+        frequency search space. If `None` when `use_optimal_frequency` is
+        `True`, the max frequency of the input data is used. If
+        `use_optimal_frequency` is `False`, `max_frequency` is ignored.
       use_kpi: If `True`, runs the optimization on KPI. Defaults to revenue.
       confidence_level: The threshold for computing the confidence intervals.
       batch_size: Maximum draws per chain in each batch. The calculation is run
@@ -1484,6 +1537,7 @@ class BudgetOptimizer:
     use_grid_arg = optimization_grid is not None and self._validate_grid(
         new_data=new_data,
         use_posterior=use_posterior,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         budget=budget,
@@ -1492,12 +1546,14 @@ class BudgetOptimizer:
         spend_constraint_upper=spend_constraint_upper,
         gtol=gtol,
         use_optimal_frequency=use_optimal_frequency,
+        max_frequency=max_frequency,
         use_kpi=use_kpi,
         optimization_grid=optimization_grid,
     )
     if optimization_grid is None or not use_grid_arg:
       optimization_grid = self.create_optimization_grid(
           new_data=new_data,
+          selected_geos=selected_geos,
           start_date=start_date,
           end_date=end_date,
           budget=budget,
@@ -1508,6 +1564,7 @@ class BudgetOptimizer:
           use_posterior=use_posterior,
           use_kpi=use_kpi,
           use_optimal_frequency=use_optimal_frequency,
+          max_frequency=max_frequency,
           batch_size=batch_size,
       )
@@ -1538,6 +1595,7 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
         spend=spend.non_optimized,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         confidence_level=confidence_level,
@@ -1550,6 +1608,7 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
         spend=spend.non_optimized,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         optimal_frequency=optimization_grid.optimal_frequency,
@@ -1570,6 +1629,7 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
         spend=spend.optimized,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         optimal_frequency=optimization_grid.optimal_frequency,
@@ -1690,7 +1750,11 @@ class BudgetOptimizer:
       A `DataTensors` object with optional tensors `media`, `reach`,
       `frequency`, `media_spend`, `rf_spend`, `revenue_per_kpi`, and `time`.
     """
+    n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
+    n_geos = self._meridian.n_geos
     self._validate_optimization_tensors(
+        expected_n_geos=n_geos,
+        expected_n_times=n_times,
         cpmu=cpmu,
         cprf=cprf,
         media=media,
@@ -1701,13 +1765,6 @@ class BudgetOptimizer:
         revenue_per_kpi=revenue_per_kpi,
         use_optimal_frequency=use_optimal_frequency,
     )
-    n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
-    n_geos = self._meridian.n_geos
-    revenue_per_kpi = (
-        _expand_tensor(revenue_per_kpi, (n_geos, n_times))
-        if revenue_per_kpi is not None
-        else None
-    )
     tensors = {}
     if media is not None:
@@ -1743,7 +1800,9 @@ class BudgetOptimizer:
           impressions, tensors[c.FREQUENCY]
       )
     if revenue_per_kpi is not None:
-      tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
+      tensors[c.REVENUE_PER_KPI] = _expand_tensor(
+          revenue_per_kpi, (n_geos, n_times)
+      )
     tensors[c.TIME] = backend.to_tensor(time)
     return analyzer_module.DataTensors(**tensors)
@@ -1751,6 +1810,7 @@ class BudgetOptimizer:
       self,
       new_data: analyzer_module.DataTensors | None,
       use_posterior: bool,
+      selected_geos: Sequence[str] | None,
       start_date: tc.Date,
       end_date: tc.Date,
       budget: float | None,
@@ -1759,6 +1819,7 @@ class BudgetOptimizer:
       spend_constraint_upper: _SpendConstraint,
       gtol: float,
       use_optimal_frequency: bool,
+      max_frequency: float | None,
       use_kpi: bool,
       optimization_grid: OptimizationGrid,
   ) -> bool:
@@ -1791,6 +1852,15 @@ class BudgetOptimizer:
       )
       return False
+    if max_frequency != optimization_grid.max_frequency:
+      warnings.warn(
+          'Given optimization grid was created with `use_optimal_frequency` ='
+          f' {optimization_grid.max_frequency}, but optimization was'
+          f' called with `max_frequency` = {max_frequency}. A'
+          ' new grid will be created.'
+      )
+      return False
     if (
         start_date != optimization_grid.start_date
         or end_date != optimization_grid.end_date
@@ -1820,6 +1890,17 @@ class BudgetOptimizer:
       )
       return False
+    s_geos = sorted(selected_geos or [])
+    g_geos = sorted(optimization_grid.selected_geos or [])
+    if s_geos != g_geos:
+      warnings.warn(
+          'Given optimization grid was created with `selected_geos` ='
+          f' {optimization_grid.selected_geos}, but optimization request was'
+          f' called with `selected_geos` = {selected_geos}. A new grid will be'
+          ' created.'
+      )
+      return False
     n_channels = len(optimization_grid.channels)
     selected_times = _expand_selected_times(
         meridian=self._meridian,
@@ -1877,6 +1958,7 @@ class BudgetOptimizer:
       self,
       new_data: xr.Dataset | None = None,
       use_posterior: bool = True,
+      selected_geos: Sequence[str] | None = None,
       # TODO: b/409550413 - Remove this argument.
       selected_times: tuple[str | None, str | None] | None = None,
       start_date: tc.Date = None,
@@ -1887,6 +1969,7 @@ class BudgetOptimizer:
       spend_constraint_upper: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
       gtol: float = 0.0001,
       use_optimal_frequency: bool = True,
+      max_frequency: float | None = None,
       use_kpi: bool = False,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
   ) -> OptimizationGrid:
@@ -1915,6 +1998,9 @@ class BudgetOptimizer:
       use_posterior: Boolean. If `True`, then the incremental outcome is derived
         from the posterior distribution of the model. Otherwise, the prior
         distribution is used.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Deprecated. Tuple containing the start and end time
         dimension coordinates. Please Use `start_date` and `end_date` instead.
       start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
@@ -1955,6 +2041,10 @@ class BudgetOptimizer:
         the smallest integer such that `(budget - rounded_budget)` is less than
         or equal to `(budget * gtol)`. `gtol` must be less than 1.
       use_optimal_frequency: Boolean. Whether optimal frequency was used.
+      max_frequency: Float indicating the frequency upper bound for the optimal
+        frequency search space. If `None` when `use_optimal_frequency` is
+        `True`, the max frequency of the input data is used. If
+        `use_optimal_frequency` is `False`, `max_frequency` is ignored.
       use_kpi: Boolean. If `True`, then the incremental outcome is derived from
         the KPI impact. Otherwise, the incremental outcome is derived from the
         revenue impact.
@@ -1969,7 +2059,8 @@ class BudgetOptimizer:
     self._validate_model_fit(use_posterior)
     if new_data is None:
       new_data = analyzer_module.DataTensors()
+    if selected_geos is not None and not selected_geos:
+      raise ValueError('`selected_geos` must not be empty.')
     if selected_times is not None:
       warnings.warn(
           '`selected_times` is deprecated. Please use `start_date` and'
@@ -1993,6 +2084,7 @@ class BudgetOptimizer:
     )
     hist_spend = self._analyzer.get_aggregated_spend(
         new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
+        selected_geos=selected_geos,
         selected_times=selected_times,
         include_media=self._meridian.n_media_channels > 0,
         include_rf=self._meridian.n_rf_channels > 0,
@@ -2025,8 +2117,10 @@ class BudgetOptimizer:
           self._analyzer.optimal_freq(
               new_data=opt_freq_data,
               use_posterior=use_posterior,
+              selected_geos=selected_geos,
               selected_times=selected_times,
               use_kpi=use_kpi,
+              max_frequency=max_frequency,
           ).optimal_frequency,
           dtype=backend.float32,
       )
@@ -2039,6 +2133,7 @@ class BudgetOptimizer:
         spend_bound_lower=optimization_lower_bound,
         spend_bound_upper=optimization_upper_bound,
         step_size=step_size,
+        selected_geos=selected_geos,
         selected_times=selected_times,
         new_data=filled_data.filter_fields(c.PAID_DATA),
         use_posterior=use_posterior,
@@ -2058,11 +2153,13 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         use_posterior=use_posterior,
         use_optimal_frequency=use_optimal_frequency,
+        max_frequency=max_frequency,
         start_date=start_date,
         end_date=end_date,
         gtol=gtol,
         round_factor=round_factor,
         optimal_frequency=optimal_frequency,
+        selected_geos=selected_geos,
         selected_times=selected_times,
     )
@@ -2190,6 +2287,7 @@ class BudgetOptimizer:
       new_data: analyzer_module.DataTensors | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
+      selected_geos: Sequence[str] | None = None,
       start_date: tc.Date = None,
       end_date: tc.Date = None,
       optimal_frequency: Sequence[float] | None = None,
@@ -2233,6 +2331,7 @@ class BudgetOptimizer:
     incremental_outcome = self._analyzer.incremental_outcome(
         use_posterior=use_posterior,
         new_data=inc_outcome_data,
+        selected_geos=selected_geos,
         selected_times=selected_times,
         use_kpi=use_kpi,
         batch_size=batch_size,
@@ -2241,6 +2340,7 @@ class BudgetOptimizer:
     incremental_increase = 0.01
     mroi_numerator = self._analyzer.incremental_outcome(
         new_data=inc_outcome_data,
+        selected_geos=selected_geos,
         selected_times=selected_times,
         scaling_factor0=1.0,
         scaling_factor1=1 + incremental_increase,
@@ -2269,7 +2369,7 @@ class BudgetOptimizer:
             media=new_media, reach=new_reach, frequency=new_frequency
         ),
         selected_times=selected_times,
-        selected_geos=None,
+        selected_geos=selected_geos,
         aggregate_times=True,
         aggregate_geos=True,
         optimal_frequency=optimal_frequency,
@@ -2312,19 +2412,27 @@ class BudgetOptimizer:
     total_spend = np.sum(spend) if np.sum(spend) > 0 else 1
     pct_of_spend = spend / total_spend
     data_vars = {
-        c.SPEND: ([c.CHANNEL], spend.data),
-        c.PCT_OF_SPEND: ([c.CHANNEL], pct_of_spend.data),
+        c.SPEND: ([c.CHANNEL], np.array(spend.data, dtype=np.float64)),
+        c.PCT_OF_SPEND: (
+            [c.CHANNEL],
+            np.array(pct_of_spend.data, dtype=np.float64),
+        ),
         c.INCREMENTAL_OUTCOME: (
             [c.CHANNEL, c.METRIC],
-            incremental_outcome_with_mean_median_and_ci,
+            np.array(
+                incremental_outcome_with_mean_median_and_ci, dtype=np.float64
+            ),
         ),
         c.EFFECTIVENESS: (
             [c.CHANNEL, c.METRIC],
-            effectiveness_with_mean_median_and_ci,
+            np.array(effectiveness_with_mean_median_and_ci, dtype=np.float64),
+        ),
+        c.ROI: ([c.CHANNEL, c.METRIC], np.array(roi, dtype=np.float64)),
+        c.MROI: (
+            [c.CHANNEL, c.METRIC],
+            np.array(marginal_roi, dtype=np.float64),
         ),
-        c.ROI: ([c.CHANNEL, c.METRIC], roi),
-        c.MROI: ([c.CHANNEL, c.METRIC], marginal_roi),
-        c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
+        c.CPIK: ([c.CHANNEL, c.METRIC], np.array(cpik, dtype=np.float64)),
     }
     all_times = np.asarray(filled_data.time).astype(str).tolist()
@@ -2359,6 +2467,7 @@ class BudgetOptimizer:
       incremental_outcome_grid: np.ndarray,
       multipliers_grid: backend.Tensor,
       new_data: analyzer_module.DataTensors | None = None,
+      selected_geos: Sequence[str] | None = None,
       selected_times: Sequence[str] | Sequence[bool] | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
@@ -2380,6 +2489,9 @@ class BudgetOptimizer:
         tensors is provided with a different number of time periods than in
         `InputData`, then all tensors must be provided with the same number of
         time periods.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Optional list of times to optimize. This can either be a
         string list containing a subset of time dimension coordinates from
         `InputData.time` or a boolean list with length equal to the time
@@ -2445,6 +2557,7 @@ class BudgetOptimizer:
                     frequency=new_frequency,
                     revenue_per_kpi=filled_data.revenue_per_kpi,
                 ),
+                selected_geos=selected_geos,
                 selected_times=selected_times,
                 use_kpi=use_kpi,
                 include_non_paid_channels=False,
@@ -2462,6 +2575,7 @@ class BudgetOptimizer:
       spend_bound_upper: np.ndarray,
       step_size: int,
       new_data: analyzer_module.DataTensors | None = None,
+      selected_geos: Sequence[str] | None = None,
       selected_times: Sequence[str] | Sequence[bool] | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
@@ -2484,6 +2598,9 @@ class BudgetOptimizer:
         tensors is provided with a different number of time periods than in
         `InputData`, then all tensors must be provided with the same number of
         time periods.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Optional list of times to optimize. This can either be a
         string list containing a subset of time dimension coordinates from
         `InputData.time` or a boolean list with length equal to the time
@@ -2540,6 +2657,7 @@ class BudgetOptimizer:
           i=i,
           incremental_outcome_grid=incremental_outcome_grid,
           multipliers_grid=multipliers_grid,
+          selected_geos=selected_geos,
           selected_times=selected_times,
           new_data=new_data,
           use_posterior=use_posterior,
@@ -2555,20 +2673,15 @@ class BudgetOptimizer:
     # we use the following code to fix it, and ensure incremental_outcome/spend
     # is always same for RF channels.
     if self._meridian.n_rf_channels > 0:
-      rf_incremental_outcome_max = np.nanmax(
-          incremental_outcome_grid[:, -self._meridian.n_rf_channels :], axis=0
-      )
-      rf_spend_max = np.nanmax(
-          spend_grid[:, -self._meridian.n_rf_channels :], axis=0
-      )
-      rf_roi = backend.divide_no_nan(rf_incremental_outcome_max, rf_spend_max)
-      incremental_outcome_grid[:, -self._meridian.n_rf_channels :] = (
-          rf_roi * spend_grid[:, -self._meridian.n_rf_channels :]
+      incremental_outcome_grid = backend.stabilize_rf_roi_grid(
+          spend_grid, incremental_outcome_grid, self._meridian.n_rf_channels
       )
     return (spend_grid, incremental_outcome_grid)
   def _validate_optimization_tensors(
       self,
+      expected_n_geos: int,
+      expected_n_times: int,
       cpmu: backend.Tensor | None = None,
       cprf: backend.Tensor | None = None,
       media: backend.Tensor | None = None,
@@ -2585,11 +2698,21 @@ class BudgetOptimizer:
           'If `media` or `media_spend` is provided, then `cpmu` must also be'
           ' provided.'
       )
+    if (media is None and media_spend is None) and cpmu is not None:
+      raise ValueError(
+          'If `cpmu` is provided, then one of `media` or `media_spend` must'
+          ' also be provided.'
+      )
     if (rf_impressions is not None or rf_spend is not None) and cprf is None:
       raise ValueError(
           'If `reach` and `frequency` or `rf_spend` is provided, then `cprf`'
           ' must also be provided.'
       )
+    if (rf_impressions is None and rf_spend is None) and cprf is not None:
+      raise ValueError(
+          'If `cprf` is provided, then one of `rf_impressions` or `rf_spend`'
+          ' must also be provided.'
+      )
     if media is not None and media_spend is not None:
       raise ValueError('Only one of `media` or `media_spend` can be provided.')
     if rf_impressions is not None and rf_spend is not None:
@@ -2607,26 +2730,44 @@ class BudgetOptimizer:
             'If `use_optimal_frequency` is `False`, then `frequency` must be'
             ' provided.'
         )
-    n_geos = [
-        t.shape[0]
-        for t in [
-            cpmu,
-            cprf,
-            media,
-            rf_impressions,
-            frequency,
-            media_spend,
-            rf_spend,
-        ]
-        if t is not None and t.ndim == 3
+    n_geos_list = []
+    n_times_list = []
+    tensor_list = [
+        cpmu,
+        cprf,
+        media,
+        rf_impressions,
+        frequency,
+        media_spend,
+        rf_spend,
     ]
+    for t in tensor_list:
+      # `(n_geos, T, n_channels)` shape
+      if t is not None and t.ndim == 3:
+        n_geos_list.append(t.shape[0])
+        n_times_list.append(t.shape[1])
+      # `(T, n_channels)` shape
+      elif t is not None and t.ndim == 2:
+        n_times_list.append(t.shape[0])
+    # `(n_geos, T)` shape
     if revenue_per_kpi is not None and revenue_per_kpi.ndim == 2:
-      n_geos.append(revenue_per_kpi.shape[0])
-    if any(n_geo != self._meridian.n_geos for n_geo in n_geos):
+      n_geos_list.append(revenue_per_kpi.shape[0])
+      n_times_list.append(revenue_per_kpi.shape[1])
+    # `(T)` shape
+    elif revenue_per_kpi is not None and revenue_per_kpi.ndim == 1:
+      n_times_list.append(revenue_per_kpi.shape[0])
+    if any(n_geo != expected_n_geos for n_geo in n_geos_list):
+      raise ValueError(
+          'All tensors with a geo dimension must have'
+          f' {expected_n_geos} geos (as defined in `meridian.InputData`).'
+      )
+    if any(n_time != expected_n_times for n_time in n_times_list):
       raise ValueError(
-          'All tensors with a geo dimension must have the same number of geos'
-          ' as in `meridian.InputData`.'
+          'All tensors with a time dimension must have'
+          f' {expected_n_times} times (as defined in `time` argument).'
       )
   def _allocate_tensor_by_population(
@@ -2993,12 +3134,11 @@ def _expand_selected_times(
         start_date=start_date,
         end_date=end_date,
     )
+    if expanded_dates is None:
+      expanded_dates = time_coordinates.all_dates
+    expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
     if return_flexible_str:
-      if expanded_dates is None:
-        expanded_dates = time_coordinates.all_dates
-      expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
       return [x for x in new_times_str if x in expanded_str]
     # TODO: Remove once every method uses `new_data.time`.
     else:
-      expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
       return [x in expanded_str for x in new_times_str]

google-meridian 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

google-meridian 1.2.1py3-none-any.whl → 1.3.1py3-none-any.whl