PyPI - google-meridian - Versions diffs - 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

google-meridian 1.2.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/METADATA +10 -10
google_meridian-1.3.0.dist-info/RECORD +62 -0
meridian/analysis/__init__.py +2 -0
meridian/analysis/analyzer.py +280 -142
meridian/analysis/formatter.py +2 -2
meridian/analysis/optimizer.py +353 -169
meridian/analysis/review/__init__.py +20 -0
meridian/analysis/review/checks.py +721 -0
meridian/analysis/review/configs.py +110 -0
meridian/analysis/review/constants.py +40 -0
meridian/analysis/review/results.py +544 -0
meridian/analysis/review/reviewer.py +186 -0
meridian/analysis/summarizer.py +14 -12
meridian/analysis/templates/chips.html.jinja +12 -0
meridian/analysis/test_utils.py +27 -5
meridian/analysis/visualizer.py +45 -50
meridian/backend/__init__.py +698 -55
meridian/backend/config.py +75 -16
meridian/backend/test_utils.py +127 -1
meridian/constants.py +52 -11
meridian/data/input_data.py +7 -2
meridian/data/test_utils.py +5 -3
meridian/mlflow/autolog.py +2 -2
meridian/model/__init__.py +1 -0
meridian/model/adstock_hill.py +10 -9
meridian/model/eda/__init__.py +3 -0
meridian/model/eda/constants.py +21 -0
meridian/model/eda/eda_engine.py +1580 -84
meridian/model/eda/eda_outcome.py +200 -0
meridian/model/eda/eda_spec.py +84 -0
meridian/model/eda/meridian_eda.py +220 -0
meridian/model/knots.py +56 -50
meridian/model/media.py +10 -8
meridian/model/model.py +79 -16
meridian/model/model_test_data.py +53 -9
meridian/model/posterior_sampler.py +398 -391
meridian/model/prior_distribution.py +114 -39
meridian/model/prior_sampler.py +146 -90
meridian/model/spec.py +7 -8
meridian/model/transformers.py +16 -8
meridian/version.py +1 -1
google_meridian-1.2.0.dist-info/RECORD +0 -52
{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/WHEEL +0 -0
{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/top_level.txt +0 -0

meridian/analysis/optimizer.py CHANGED Viewed

@@ -26,7 +26,7 @@ import altair as alt
 import jinja2
 from meridian import backend
 from meridian import constants as c
-from meridian.analysis import analyzer
+from meridian.analysis import analyzer as analyzer_module
 from meridian.analysis import formatter
 from meridian.analysis import summary_text
 from meridian.data import time_coordinates as tc
@@ -102,6 +102,7 @@ class OptimizationGrid:
     use_kpi: Whether using generic KPI or revenue.
     use_posterior: Whether posterior distributions were used, or prior.
     use_optimal_frequency: Whether optimal frequency was used.
+    max_frequency: The maximum frequency for reach and frequency channels.
     start_date: The start date of the optimization period.
     end_date: The end date of the optimization period.
     gtol: Float indicating the acceptable relative error for the budget used in
@@ -114,7 +115,12 @@ class OptimizationGrid:
       does not contain reach and frequency data, or if the model does contain
       reach and frequency data, but historical frequency is used for the
       optimization scenario.
-    selected_times: The time coordinates from the model used in this grid.
+    selected_geos: The geo coordinates from the model used in this grid.
+    selected_times: The time coordinates from the model used in this grid. If
+      new data with modified time coordinates is used for optimization, this is
+      a list of booleans indicating which time coordinates are selected.
+      Otherwise, this is a list of strings indicating the time coordinates used
+      in this grid.
   """
   _grid_dataset: xr.Dataset
@@ -128,7 +134,9 @@ class OptimizationGrid:
   gtol: float
   round_factor: int
   optimal_frequency: np.ndarray | None
-  selected_times: Sequence[str] | None
+  selected_geos: Sequence[str] | None
+  selected_times: Sequence[str] | Sequence[bool] | None
+  max_frequency: float | None = None
   @property
   def grid_dataset(self) -> xr.Dataset:
@@ -262,7 +270,7 @@ class OptimizationGrid:
     return xr.Dataset(
         coords={c.CHANNEL: self.channels},
         data_vars={
-            c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
+            c.OPTIMIZED: ([c.CHANNEL], optimal_spend),
             c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
         },
     )
@@ -386,16 +394,26 @@ class OptimizationGrid:
       media spend that maximizes incremental outcome based on spend constraints
       for all media and RF channels.
     """
-    spend = spend_grid[0, :].copy()
-    incremental_outcome = incremental_outcome_grid[0, :].copy()
-    spend_grid = spend_grid[1:, :]
-    incremental_outcome_grid = incremental_outcome_grid[1:, :]
-    iterative_roi_grid = np.round(
-        backend.divide_no_nan(
-            incremental_outcome_grid - incremental_outcome, spend_grid - spend
-        ),
-        decimals=8,
+    spend_grid_values = np.array(spend_grid.values, dtype=np.float64)
+    incremental_outcome_grid_values = np.array(
+        incremental_outcome_grid.values, dtype=np.float64
+    )
+    spend = spend_grid_values[0, :].copy()
+    incremental_outcome = incremental_outcome_grid_values[0, :].copy()
+    spend_grid_values = spend_grid_values[1:, :]
+    incremental_outcome_grid_values = incremental_outcome_grid_values[1:, :]
+    numerator = incremental_outcome_grid_values - incremental_outcome
+    denominator = spend_grid_values - spend
+    iterative_roi_grid = np.divide(
+        numerator,
+        denominator,
+        out=np.zeros_like(numerator),
+        where=(denominator != 0),
     )
+    iterative_roi_grid = np.round(iterative_roi_grid, decimals=8)
     while True:
       spend_optimal = spend.astype(int)
       # If none of the exit criteria are met roi_grid will eventually be filled
@@ -407,8 +425,8 @@ class OptimizationGrid:
       )
       row_idx = point[0]
       media_idx = point[1]
-      spend[media_idx] = spend_grid[row_idx, media_idx]
-      incremental_outcome[media_idx] = incremental_outcome_grid[
+      spend[media_idx] = spend_grid_values[row_idx, media_idx]
+      incremental_outcome[media_idx] = incremental_outcome_grid_values[
           row_idx, media_idx
       ]
       roi_grid_point = iterative_roi_grid[row_idx, media_idx]
@@ -421,14 +439,23 @@ class OptimizationGrid:
         break
       iterative_roi_grid[0 : row_idx + 1, media_idx] = np.nan
+      num_col = (
+          incremental_outcome_grid_values[row_idx + 1 :, media_idx]
+          - incremental_outcome_grid_values[row_idx, media_idx]
+      )
+      den_col = (
+          spend_grid_values[row_idx + 1 :, media_idx]
+          - spend_grid_values[row_idx, media_idx]
+      )
+      new_roi_col = np.divide(
+          num_col,
+          den_col,
+          out=np.zeros_like(num_col),
+          where=(den_col != 0),
+      )
       iterative_roi_grid[row_idx + 1 :, media_idx] = np.round(
-          backend.divide_no_nan(
-              incremental_outcome_grid[row_idx + 1 :, media_idx]
-              - incremental_outcome_grid[row_idx, media_idx],
-              spend_grid[row_idx + 1 :, media_idx]
-              - spend_grid[row_idx, media_idx],
-          ),
-          decimals=8,
+          new_roi_col, decimals=8
       )
     return spend_optimal
@@ -438,40 +465,33 @@ class OptimizationResults:
   """The optimized budget allocation.
   This is a dataclass object containing datasets output from `BudgetOptimizer`.
-  These datasets include:
-  - `nonoptimized_data`: The non-optimized budget metrics (based on historical
-    frequency).
-  - `nonoptimized_data_with_optimal_freq`: The non-optimized budget metrics
-    based on optimal frequency.
-  - `optimized_data`: The optimized budget metrics.
-  - `optimization_grid`: The grid information used for optimization.
-  The metrics (data variables) are: ROI, mROI, incremental outcome, CPIK.
-  Additionally, some intermediate values and referecences to the source fitted
-  model and analyzer are also stored here. These are useful for visualizing and
-  debugging.
+  The performance metrics (data variables) are: spend, percentage of spend, ROI,
+  mROI, incremental outcome, CPIK, and effectiveness.
   Attributes:
     meridian: The fitted Meridian model that was used to create this budget
       allocation.
     analyzer: The analyzer bound to the model above.
-    spend_ratio: The spend ratio used to scale the non-optimized budget metrics
-      to the optimized budget metrics.
-    spend_bounds: The spend bounds used to scale the non-optimized budget
-      metrics to the optimized budget metrics.
-    nonoptimized_data: The non-optimized budget metrics (based on historical
-      frequency).
-    nonoptimized_data_with_optimal_freq: The non-optimized budget metrics based
-      on optimal frequency.
-    optimized_data: The optimized budget metrics.
+    spend_ratio: The spend ratio used to scale the non-optimized performance
+      metrics to the optimized performance metrics.
+    spend_bounds: The spend bounds used to scale the non-optimized performance
+      metrics to the optimized performance metrics.
+    nonoptimized_data: Performance metrics under the non-optimized budget. For
+      R&F channels, the non-optimized frequency is used.
+    nonoptimized_data_with_optimal_freq: Performance metrics under the
+      non-optimized budget. For R&F channels, the optimal frequency is used if
+      frequency was optimized.
+    optimized_data: Performance metrics under the optimized budget. For R&F
+      channels, the optimal frequency is used if frequency was optimized.
     optimization_grid: The grid information used for optimization.
+    new_data: The optional `DataTensors` container that was used to create this
+      budget allocation.
   """
   meridian: model.Meridian
   # The analyzer bound to the model above.
-  analyzer: analyzer.Analyzer
+  analyzer: analyzer_module.Analyzer
   spend_ratio: np.ndarray  # spend / historical spend
   spend_bounds: tuple[np.ndarray, np.ndarray]
@@ -481,6 +501,10 @@ class OptimizationResults:
   _optimized_data: xr.Dataset
   _optimization_grid: OptimizationGrid
+  # The optional `DataTensors` container to use if optimization was performed
+  # on data different from the original `input_data`.
+  new_data: analyzer_module.DataTensors | None = None
   # TODO: Move this, and the plotting methods, to a summarizer.
   @functools.cached_property
   def template_env(self) -> jinja2.Environment:
@@ -497,10 +521,10 @@ class OptimizationResults:
   @property
   def nonoptimized_data(self) -> xr.Dataset:
-    """Dataset holding the non-optimized budget metrics.
+    """Dataset holding the non-optimized performance metrics.
     For channels that have reach and frequency data, their performance metrics
-    (ROI, mROI, incremental outcome, CPIK) are based on historical frequency.
+    are based on historical frequency.
     The dataset contains the following:
@@ -519,10 +543,10 @@ class OptimizationResults:
   @property
   def nonoptimized_data_with_optimal_freq(self) -> xr.Dataset:
-    """Dataset holding the non-optimized budget metrics.
+    """Dataset holding the non-optimized performance metrics.
     For channels that have reach and frequency data, their performance metrics
-    (ROI, mROI, incremental outcome, CPIK) are based on optimal frequency.
+    are based on optimal frequency.
     The dataset contains the following:
@@ -537,10 +561,10 @@ class OptimizationResults:
   @property
   def optimized_data(self) -> xr.Dataset:
-    """Dataset holding the optimized budget metrics.
+    """Dataset holding the optimized performance metrics.
     For channels that have reach and frequency data, their performance metrics
-    (ROI, mROI, incremental outcome) are based on optimal frequency.
+    are based on optimal frequency.
     The dataset contains the following:
@@ -558,11 +582,16 @@ class OptimizationResults:
     """The grid information used for optimization."""
     return self._optimization_grid
-  def output_optimization_summary(self, filename: str, filepath: str):
+  def output_optimization_summary(
+      self,
+      filename: str,
+      filepath: str,
+      currency: str = c.DEFAULT_CURRENCY,
+  ):
     """Generates and saves the HTML optimization summary output."""
     os.makedirs(filepath, exist_ok=True)
     with open(os.path.join(filepath, filename), 'w') as f:
-      f.write(self._gen_optimization_summary())
+      f.write(self._gen_optimization_summary(currency))
   def plot_incremental_outcome_delta(self) -> alt.Chart:
     """Plots a waterfall chart showing the change in incremental outcome."""
@@ -712,7 +741,7 @@ class OptimizationResults:
         )
     )
-  def plot_spend_delta(self) -> alt.Chart:
+  def plot_spend_delta(self, currency: str = c.DEFAULT_CURRENCY) -> alt.Chart:
     """Plots a bar chart showing the optimized change in spend per channel."""
     df = self._get_delta_data(c.SPEND)
     base = (
@@ -733,7 +762,7 @@ class OptimizationResults:
             y=alt.Y(
                 f'{c.SPEND}:Q',
                 axis=alt.Axis(
-                    title='$',
+                    title=currency,
                     domain=False,
                     labelExpr=formatter.compact_number_expr(),
                     **formatter.AXIS_CONFIG,
@@ -894,9 +923,12 @@ class OptimizationResults:
       returned this result.
     """
     channels = self.optimized_data.channel.values
-    selected_times = self.meridian.expand_selected_time_dims(
+    selected_times = _expand_selected_times(
+        meridian=self.meridian,
         start_date=self.optimized_data.start_date,
         end_date=self.optimized_data.end_date,
+        new_data=self.new_data,
+        return_flexible_str=True,
     )
     _, ubounds = self.spend_bounds
     upper_bound = (
@@ -912,8 +944,10 @@ class OptimizationResults:
     # WARN: If `selected_times` is not None (i.e. a subset time range), this
     # response curve computation might take a significant amount of time.
     return self.analyzer.response_curves(
+        new_data=self.new_data,
         spend_multipliers=spend_multiplier,
         use_posterior=self.optimization_grid.use_posterior,
+        selected_geos=self.optimization_grid.selected_geos,
         selected_times=selected_times,
         by_reach=True,
         use_kpi=not self.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
@@ -1024,7 +1058,7 @@ class OptimizationResults:
     sorted_df.sort_index(inplace=True)
     return sorted_df
-  def _gen_optimization_summary(self) -> str:
+  def _gen_optimization_summary(self, currency: str) -> str:
     """Generates HTML optimization summary output (as sanitized content str)."""
     start_date = tc.normalize_date(self.optimized_data.start_date)
     self.template_env.globals[c.START_DATE] = start_date.strftime(
@@ -1036,22 +1070,25 @@ class OptimizationResults:
     self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
         f'%b {end_date_adjusted.day}, %Y'
     )
+    self.template_env.globals[c.SELECTED_GEOS] = (
+        self.optimization_grid.selected_geos
+    )
     html_template = self.template_env.get_template('summary.html.jinja')
     return html_template.render(
         title=summary_text.OPTIMIZATION_TITLE,
-        cards=self._create_output_sections(),
+        cards=self._create_output_sections(currency),
     )
-  def _create_output_sections(self) -> Sequence[str]:
+  def _create_output_sections(self, currency: str) -> Sequence[str]:
     """Creates the HTML snippets for cards in the summary page."""
     return [
-        self._create_scenario_plan_section(),
-        self._create_budget_allocation_section(),
+        self._create_scenario_plan_section(currency),
+        self._create_budget_allocation_section(currency),
         self._create_response_curves_section(),
     ]
-  def _create_scenario_plan_section(self) -> str:
+  def _create_scenario_plan_section(self, currency: str) -> str:
     """Creates the HTML card snippet for the scenario plan section."""
     card_spec = formatter.CardSpec(
         id=summary_text.SCENARIO_PLAN_CARD_ID,
@@ -1094,22 +1131,32 @@ class OptimizationResults:
         self.template_env,
         card_spec,
         insights,
-        stats_specs=self._create_scenario_stats_specs(),
+        stats_specs=self._create_scenario_stats_specs(currency),
     )
-  def _create_scenario_stats_specs(self) -> Sequence[formatter.StatsSpec]:
+  def _create_scenario_stats_specs(
+      self, currency: str
+  ) -> Sequence[formatter.StatsSpec]:
     """Creates the stats to fill the scenario plan section."""
     outcome = self._kpi_or_revenue
     budget_diff = self.optimized_data.budget - self.nonoptimized_data.budget
     budget_prefix = '+' if budget_diff > 0 else ''
     non_optimized_budget = formatter.StatsSpec(
         title=summary_text.NON_OPTIMIZED_BUDGET_LABEL,
-        stat=formatter.format_monetary_num(self.nonoptimized_data.budget),
+        stat=formatter.format_monetary_num(
+            num=self.nonoptimized_data.budget,
+            currency=currency,
+        ),
     )
     optimized_budget = formatter.StatsSpec(
         title=summary_text.OPTIMIZED_BUDGET_LABEL,
-        stat=formatter.format_monetary_num(self.optimized_data.budget),
-        delta=(budget_prefix + formatter.format_monetary_num(budget_diff)),
+        stat=formatter.format_monetary_num(
+            num=self.optimized_data.budget, currency=currency
+        ),
+        delta=(
+            budget_prefix
+            + formatter.format_monetary_num(num=budget_diff, currency=currency)
+        ),
     )
     if outcome == c.REVENUE:
@@ -1131,7 +1178,7 @@ class OptimizationResults:
       )
       optimized_performance_title = summary_text.OPTIMIZED_CPIK_LABEL
       optimized_performance_stat = f'${self.optimized_data.total_cpik:.2f}'
-      optimized_performance_diff = formatter.compact_number(diff, 2, '$')
+      optimized_performance_diff = formatter.compact_number(diff, 2, currency)
     non_optimized_performance = formatter.StatsSpec(
         title=non_optimized_performance_title,
         stat=non_optimized_performance_stat,
@@ -1147,7 +1194,7 @@ class OptimizationResults:
         - self.nonoptimized_data.total_incremental_outcome
     )
     inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
-    currency = '$' if outcome == c.REVENUE else ''
+    currency = currency if outcome == c.REVENUE else ''
     non_optimized_inc_outcome = formatter.StatsSpec(
         title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
             outcome=outcome
@@ -1177,7 +1224,7 @@ class OptimizationResults:
         optimized_inc_outcome,
     ]
-  def _create_budget_allocation_section(self) -> str:
+  def _create_budget_allocation_section(self, currency: str) -> str:
     """Creates the HTML card snippet for the budget allocation section."""
     outcome = self._kpi_or_revenue
     card_spec = formatter.CardSpec(
@@ -1187,7 +1234,7 @@ class OptimizationResults:
     spend_delta = formatter.ChartSpec(
         id=summary_text.SPEND_DELTA_CHART_ID,
         description=summary_text.SPEND_DELTA_CHART_INSIGHTS,
-        chart_json=self.plot_spend_delta().to_json(),
+        chart_json=self.plot_spend_delta(currency).to_json(),
     )
     spend_allocation = formatter.ChartSpec(
         id=summary_text.SPEND_ALLOCATION_CHART_ID,
@@ -1276,7 +1323,7 @@ class BudgetOptimizer:
   def __init__(self, meridian: model.Meridian):
     self._meridian = meridian
-    self._analyzer = analyzer.Analyzer(self._meridian)
+    self._analyzer = analyzer_module.Analyzer(self._meridian)
   def _validate_model_fit(self, use_posterior: bool):
     """Validates that the model is fit."""
@@ -1288,8 +1335,9 @@ class BudgetOptimizer:
   def optimize(
       self,
-      new_data: analyzer.DataTensors | None = None,
+      new_data: analyzer_module.DataTensors | None = None,
       use_posterior: bool = True,
+      selected_geos: Sequence[str] | None = None,
       # TODO: b/409550413 - Remove this argument.
       selected_times: tuple[str | None, str | None] | None = None,
       start_date: tc.Date = None,
@@ -1302,7 +1350,10 @@ class BudgetOptimizer:
       target_roi: float | None = None,
       target_mroi: float | None = None,
       gtol: float = 0.0001,
+      # TODO:
+      # merging use_optimal_frequency and max_frequency into a single argument.
       use_optimal_frequency: bool = True,
+      max_frequency: float | None = None,
       use_kpi: bool = False,
       confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
@@ -1378,6 +1429,9 @@ class BudgetOptimizer:
       use_posterior: Boolean. If `True`, then the budget is optimized based on
         the posterior distribution of the model. Otherwise, the prior
         distribution is used.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Deprecated. Tuple containing the start and end time
         dimension coordinates for the duration to run the optimization on.
         Please Use `start_date` and `end_date` instead.
@@ -1434,6 +1488,10 @@ class BudgetOptimizer:
       use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
         trained Meridian model for optimization. If `False`, uses historical
         frequency or `new_data.frequency` if provided.
+      max_frequency: Float indicating the frequency upper bound for the optimal
+        frequency search space. If `None` when `use_optimal_frequency` is
+        `True`, the max frequency of the input data is used. If
+        `use_optimal_frequency` is `False`, `max_frequency` is ignored.
       use_kpi: If `True`, runs the optimization on KPI. Defaults to revenue.
       confidence_level: The threshold for computing the confidence intervals.
       batch_size: Maximum draws per chain in each batch. The calculation is run
@@ -1479,6 +1537,7 @@ class BudgetOptimizer:
     use_grid_arg = optimization_grid is not None and self._validate_grid(
         new_data=new_data,
         use_posterior=use_posterior,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         budget=budget,
@@ -1487,12 +1546,14 @@ class BudgetOptimizer:
         spend_constraint_upper=spend_constraint_upper,
         gtol=gtol,
         use_optimal_frequency=use_optimal_frequency,
+        max_frequency=max_frequency,
         use_kpi=use_kpi,
         optimization_grid=optimization_grid,
     )
     if optimization_grid is None or not use_grid_arg:
       optimization_grid = self.create_optimization_grid(
           new_data=new_data,
+          selected_geos=selected_geos,
           start_date=start_date,
           end_date=end_date,
           budget=budget,
@@ -1503,6 +1564,7 @@ class BudgetOptimizer:
           use_posterior=use_posterior,
           use_kpi=use_kpi,
           use_optimal_frequency=use_optimal_frequency,
+          max_frequency=max_frequency,
           batch_size=batch_size,
       )
@@ -1526,13 +1588,14 @@ class BudgetOptimizer:
     use_historical_budget = budget is None or np.isclose(
         budget, np.sum(optimization_grid.historical_spend)
     )
-    new_data = new_data or analyzer.DataTensors()
+    new_data = new_data or analyzer_module.DataTensors()
     nonoptimized_data = self._create_budget_dataset(
         new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
         use_posterior=use_posterior,
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
         spend=spend.non_optimized,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         confidence_level=confidence_level,
@@ -1545,6 +1608,7 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
         spend=spend.non_optimized,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         optimal_frequency=optimization_grid.optimal_frequency,
@@ -1565,6 +1629,7 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
         spend=spend.optimized,
+        selected_geos=selected_geos,
         start_date=start_date,
         end_date=end_date,
         optimal_frequency=optimization_grid.optimal_frequency,
@@ -1595,6 +1660,7 @@ class BudgetOptimizer:
     )
     return OptimizationResults(
+        new_data=new_data,
         meridian=self._meridian,
         analyzer=self._analyzer,
         spend_ratio=spend_ratio,
@@ -1617,7 +1683,7 @@ class BudgetOptimizer:
       rf_spend: backend.Tensor | None = None,
       revenue_per_kpi: backend.Tensor | None = None,
       use_optimal_frequency: bool = True,
-  ) -> analyzer.DataTensors:
+  ) -> analyzer_module.DataTensors:
     """Creates a `DataTensors` for optimizations from CPM and flighting data.
     CPM is broken down into cost per media unit, `cpmu`, for the media channels
@@ -1684,7 +1750,11 @@ class BudgetOptimizer:
       A `DataTensors` object with optional tensors `media`, `reach`,
       `frequency`, `media_spend`, `rf_spend`, `revenue_per_kpi`, and `time`.
     """
+    n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
+    n_geos = self._meridian.n_geos
     self._validate_optimization_tensors(
+        expected_n_geos=n_geos,
+        expected_n_times=n_times,
         cpmu=cpmu,
         cprf=cprf,
         media=media,
@@ -1695,13 +1765,6 @@ class BudgetOptimizer:
         revenue_per_kpi=revenue_per_kpi,
         use_optimal_frequency=use_optimal_frequency,
     )
-    n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
-    n_geos = self._meridian.n_geos
-    revenue_per_kpi = (
-        _expand_tensor(revenue_per_kpi, (n_geos, n_times))
-        if revenue_per_kpi is not None
-        else None
-    )
     tensors = {}
     if media is not None:
@@ -1737,14 +1800,17 @@ class BudgetOptimizer:
           impressions, tensors[c.FREQUENCY]
       )
     if revenue_per_kpi is not None:
-      tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
+      tensors[c.REVENUE_PER_KPI] = _expand_tensor(
+          revenue_per_kpi, (n_geos, n_times)
+      )
     tensors[c.TIME] = backend.to_tensor(time)
-    return analyzer.DataTensors(**tensors)
+    return analyzer_module.DataTensors(**tensors)
   def _validate_grid(
       self,
-      new_data: analyzer.DataTensors | None,
+      new_data: analyzer_module.DataTensors | None,
       use_posterior: bool,
+      selected_geos: Sequence[str] | None,
       start_date: tc.Date,
       end_date: tc.Date,
       budget: float | None,
@@ -1753,6 +1819,7 @@ class BudgetOptimizer:
       spend_constraint_upper: _SpendConstraint,
       gtol: float,
       use_optimal_frequency: bool,
+      max_frequency: float | None,
       use_kpi: bool,
       optimization_grid: OptimizationGrid,
   ) -> bool:
@@ -1785,6 +1852,15 @@ class BudgetOptimizer:
       )
       return False
+    if max_frequency != optimization_grid.max_frequency:
+      warnings.warn(
+          'Given optimization grid was created with `use_optimal_frequency` ='
+          f' {optimization_grid.max_frequency}, but optimization was'
+          f' called with `max_frequency` = {max_frequency}. A'
+          ' new grid will be created.'
+      )
+      return False
     if (
         start_date != optimization_grid.start_date
         or end_date != optimization_grid.end_date
@@ -1799,7 +1875,7 @@ class BudgetOptimizer:
       return False
     if new_data is None:
-      new_data = analyzer.DataTensors()
+      new_data = analyzer_module.DataTensors()
     required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
     filled_data = new_data.validate_and_fill_missing_data(
         required_tensors_names=required_tensors, meridian=self._meridian
@@ -1814,8 +1890,20 @@ class BudgetOptimizer:
       )
       return False
+    s_geos = sorted(selected_geos or [])
+    g_geos = sorted(optimization_grid.selected_geos or [])
+    if s_geos != g_geos:
+      warnings.warn(
+          'Given optimization grid was created with `selected_geos` ='
+          f' {optimization_grid.selected_geos}, but optimization request was'
+          f' called with `selected_geos` = {selected_geos}. A new grid will be'
+          ' created.'
+      )
+      return False
     n_channels = len(optimization_grid.channels)
-    selected_times = self._validate_selected_times(
+    selected_times = _expand_selected_times(
+        meridian=self._meridian,
         start_date=start_date,
         end_date=end_date,
         new_data=new_data,
@@ -1870,6 +1958,7 @@ class BudgetOptimizer:
       self,
       new_data: xr.Dataset | None = None,
       use_posterior: bool = True,
+      selected_geos: Sequence[str] | None = None,
       # TODO: b/409550413 - Remove this argument.
       selected_times: tuple[str | None, str | None] | None = None,
       start_date: tc.Date = None,
@@ -1880,6 +1969,7 @@ class BudgetOptimizer:
       spend_constraint_upper: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
       gtol: float = 0.0001,
       use_optimal_frequency: bool = True,
+      max_frequency: float | None = None,
       use_kpi: bool = False,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
   ) -> OptimizationGrid:
@@ -1908,6 +1998,9 @@ class BudgetOptimizer:
       use_posterior: Boolean. If `True`, then the incremental outcome is derived
         from the posterior distribution of the model. Otherwise, the prior
         distribution is used.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Deprecated. Tuple containing the start and end time
         dimension coordinates. Please Use `start_date` and `end_date` instead.
       start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
@@ -1948,6 +2041,10 @@ class BudgetOptimizer:
         the smallest integer such that `(budget - rounded_budget)` is less than
         or equal to `(budget * gtol)`. `gtol` must be less than 1.
       use_optimal_frequency: Boolean. Whether optimal frequency was used.
+      max_frequency: Float indicating the frequency upper bound for the optimal
+        frequency search space. If `None` when `use_optimal_frequency` is
+        `True`, the max frequency of the input data is used. If
+        `use_optimal_frequency` is `False`, `max_frequency` is ignored.
       use_kpi: Boolean. If `True`, then the incremental outcome is derived from
         the KPI impact. Otherwise, the incremental outcome is derived from the
         revenue impact.
@@ -1961,8 +2058,9 @@ class BudgetOptimizer:
     """
     self._validate_model_fit(use_posterior)
     if new_data is None:
-      new_data = analyzer.DataTensors()
+      new_data = analyzer_module.DataTensors()
+    if selected_geos is not None and not selected_geos:
+      raise ValueError('`selected_geos` must not be empty.')
     if selected_times is not None:
       warnings.warn(
           '`selected_times` is deprecated. Please use `start_date` and'
@@ -1978,13 +2076,15 @@ class BudgetOptimizer:
     filled_data = new_data.validate_and_fill_missing_data(
         required_tensors_names=required_tensors, meridian=self._meridian
     )
-    selected_times = self._validate_selected_times(
+    selected_times = _expand_selected_times(
+        meridian=self._meridian,
         start_date=start_date,
         end_date=end_date,
         new_data=filled_data,
     )
     hist_spend = self._analyzer.get_aggregated_spend(
         new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
+        selected_geos=selected_geos,
         selected_times=selected_times,
         include_media=self._meridian.n_media_channels > 0,
         include_rf=self._meridian.n_rf_channels > 0,
@@ -2008,7 +2108,7 @@ class BudgetOptimizer:
         )
     )
     if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
-      opt_freq_data = analyzer.DataTensors(
+      opt_freq_data = analyzer_module.DataTensors(
           rf_impressions=filled_data.reach * filled_data.frequency,
           rf_spend=filled_data.rf_spend,
           revenue_per_kpi=filled_data.revenue_per_kpi,
@@ -2017,8 +2117,10 @@ class BudgetOptimizer:
           self._analyzer.optimal_freq(
               new_data=opt_freq_data,
               use_posterior=use_posterior,
+              selected_geos=selected_geos,
               selected_times=selected_times,
               use_kpi=use_kpi,
+              max_frequency=max_frequency,
           ).optimal_frequency,
           dtype=backend.float32,
       )
@@ -2031,6 +2133,7 @@ class BudgetOptimizer:
         spend_bound_lower=optimization_lower_bound,
         spend_bound_upper=optimization_upper_bound,
         step_size=step_size,
+        selected_geos=selected_geos,
         selected_times=selected_times,
         new_data=filled_data.filter_fields(c.PAID_DATA),
         use_posterior=use_posterior,
@@ -2050,11 +2153,13 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         use_posterior=use_posterior,
         use_optimal_frequency=use_optimal_frequency,
+        max_frequency=max_frequency,
         start_date=start_date,
         end_date=end_date,
         gtol=gtol,
         round_factor=round_factor,
         optimal_frequency=optimal_frequency,
+        selected_geos=selected_geos,
         selected_times=selected_times,
     )
@@ -2098,38 +2203,11 @@ class BudgetOptimizer:
         attrs={c.SPEND_STEP_SIZE: spend_step_size},
     )
-  def _validate_selected_times(
-      self,
-      start_date: tc.Date,
-      end_date: tc.Date,
-      new_data: analyzer.DataTensors | None,
-  ) -> Sequence[str] | Sequence[bool] | None:
-    """Validates and returns the selected times."""
-    if start_date is None and end_date is None:
-      return None
-    new_data = new_data or analyzer.DataTensors()
-    if new_data.get_modified_times(self._meridian) is None:
-      return self._meridian.expand_selected_time_dims(
-          start_date=start_date,
-          end_date=end_date,
-      )
-    else:
-      assert new_data.time is not None
-      new_times_str = np.asarray(new_data.time).astype(str).tolist()
-      time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
-      expanded_dates = time_coordinates.expand_selected_time_dims(
-          start_date=start_date,
-          end_date=end_date,
-      )
-      expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
-      return [x in expanded_str for x in new_times_str]
   def _get_incremental_outcome_tensors(
       self,
       hist_spend: np.ndarray,
       spend: np.ndarray,
-      new_data: analyzer.DataTensors | None = None,
+      new_data: analyzer_module.DataTensors | None = None,
       optimal_frequency: Sequence[float] | None = None,
   ) -> tuple[
       backend.Tensor | None,
@@ -2165,7 +2243,7 @@ class BudgetOptimizer:
     Returns:
       Tuple of backend.tensors (new_media, new_reach, new_frequency).
     """
-    new_data = new_data or analyzer.DataTensors()
+    new_data = new_data or analyzer_module.DataTensors()
     filled_data = new_data.validate_and_fill_missing_data(
         c.PAID_CHANNELS,
         self._meridian,
@@ -2206,9 +2284,10 @@ class BudgetOptimizer:
       self,
       hist_spend: np.ndarray,
       spend: np.ndarray,
-      new_data: analyzer.DataTensors | None = None,
+      new_data: analyzer_module.DataTensors | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
+      selected_geos: Sequence[str] | None = None,
       start_date: tc.Date = None,
       end_date: tc.Date = None,
       optimal_frequency: Sequence[float] | None = None,
@@ -2218,13 +2297,16 @@ class BudgetOptimizer:
       use_historical_budget: bool = True,
   ) -> xr.Dataset:
     """Creates the budget dataset."""
-    new_data = new_data or analyzer.DataTensors()
+    new_data = new_data or analyzer_module.DataTensors()
     filled_data = new_data.validate_and_fill_missing_data(
         c.PAID_DATA + (c.TIME,),
         self._meridian,
     )
-    selected_times = self._validate_selected_times(
-        start_date=start_date, end_date=end_date, new_data=new_data
+    selected_times = _expand_selected_times(
+        meridian=self._meridian,
+        start_date=start_date,
+        end_date=end_date,
+        new_data=new_data,
     )
     spend_tensor = backend.to_tensor(spend, dtype=backend.float32)
     hist_spend = backend.to_tensor(hist_spend, dtype=backend.float32)
@@ -2237,7 +2319,7 @@ class BudgetOptimizer:
         )
     )
     budget = np.sum(spend_tensor)
-    inc_outcome_data = analyzer.DataTensors(
+    inc_outcome_data = analyzer_module.DataTensors(
         media=new_media,
         reach=new_reach,
         frequency=new_frequency,
@@ -2249,6 +2331,7 @@ class BudgetOptimizer:
     incremental_outcome = self._analyzer.incremental_outcome(
         use_posterior=use_posterior,
         new_data=inc_outcome_data,
+        selected_geos=selected_geos,
         selected_times=selected_times,
         use_kpi=use_kpi,
         batch_size=batch_size,
@@ -2257,6 +2340,7 @@ class BudgetOptimizer:
     incremental_increase = 0.01
     mroi_numerator = self._analyzer.incremental_outcome(
         new_data=inc_outcome_data,
+        selected_geos=selected_geos,
         selected_times=selected_times,
         scaling_factor0=1.0,
         scaling_factor1=1 + incremental_increase,
@@ -2269,7 +2353,7 @@ class BudgetOptimizer:
     # shape (n_channels, n_metrics) where n_metrics = 4 for (mean, median,
     # ci_lo, and ci_hi)
     incremental_outcome_with_mean_median_and_ci = (
-        analyzer.get_central_tendency_and_ci(
+        analyzer_module.get_central_tendency_and_ci(
             data=incremental_outcome,
             confidence_level=confidence_level,
             include_median=True,
@@ -2281,18 +2365,18 @@ class BudgetOptimizer:
     )
     aggregated_impressions = self._analyzer.get_aggregated_impressions(
-        new_data=analyzer.DataTensors(
+        new_data=analyzer_module.DataTensors(
             media=new_media, reach=new_reach, frequency=new_frequency
         ),
         selected_times=selected_times,
-        selected_geos=None,
+        selected_geos=selected_geos,
         aggregate_times=True,
         aggregate_geos=True,
         optimal_frequency=optimal_frequency,
         include_non_paid_channels=False,
     )
     effectiveness_with_mean_median_and_ci = (
-        analyzer.get_central_tendency_and_ci(
+        analyzer_module.get_central_tendency_and_ci(
             data=backend.divide_no_nan(
                 incremental_outcome, aggregated_impressions
             ),
@@ -2301,12 +2385,12 @@ class BudgetOptimizer:
         )
     )
-    roi = analyzer.get_central_tendency_and_ci(
+    roi = analyzer_module.get_central_tendency_and_ci(
         data=backend.divide_no_nan(incremental_outcome, spend_tensor),
         confidence_level=confidence_level,
         include_median=True,
     )
-    marginal_roi = analyzer.get_central_tendency_and_ci(
+    marginal_roi = analyzer_module.get_central_tendency_and_ci(
         data=backend.divide_no_nan(
             mroi_numerator, spend_tensor * incremental_increase
         ),
@@ -2314,7 +2398,7 @@ class BudgetOptimizer:
         include_median=True,
     )
-    cpik = analyzer.get_central_tendency_and_ci(
+    cpik = analyzer_module.get_central_tendency_and_ci(
         data=backend.divide_no_nan(spend_tensor, incremental_outcome),
         confidence_level=confidence_level,
         include_median=True,
@@ -2328,19 +2412,27 @@ class BudgetOptimizer:
     total_spend = np.sum(spend) if np.sum(spend) > 0 else 1
     pct_of_spend = spend / total_spend
     data_vars = {
-        c.SPEND: ([c.CHANNEL], spend.data),
-        c.PCT_OF_SPEND: ([c.CHANNEL], pct_of_spend.data),
+        c.SPEND: ([c.CHANNEL], np.array(spend.data, dtype=np.float64)),
+        c.PCT_OF_SPEND: (
+            [c.CHANNEL],
+            np.array(pct_of_spend.data, dtype=np.float64),
+        ),
         c.INCREMENTAL_OUTCOME: (
             [c.CHANNEL, c.METRIC],
-            incremental_outcome_with_mean_median_and_ci,
+            np.array(
+                incremental_outcome_with_mean_median_and_ci, dtype=np.float64
+            ),
         ),
         c.EFFECTIVENESS: (
             [c.CHANNEL, c.METRIC],
-            effectiveness_with_mean_median_and_ci,
+            np.array(effectiveness_with_mean_median_and_ci, dtype=np.float64),
+        ),
+        c.ROI: ([c.CHANNEL, c.METRIC], np.array(roi, dtype=np.float64)),
+        c.MROI: (
+            [c.CHANNEL, c.METRIC],
+            np.array(marginal_roi, dtype=np.float64),
         ),
-        c.ROI: ([c.CHANNEL, c.METRIC], roi),
-        c.MROI: ([c.CHANNEL, c.METRIC], marginal_roi),
-        c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
+        c.CPIK: ([c.CHANNEL, c.METRIC], np.array(cpik, dtype=np.float64)),
     }
     all_times = np.asarray(filled_data.time).astype(str).tolist()
@@ -2374,7 +2466,8 @@ class BudgetOptimizer:
       i: int,
       incremental_outcome_grid: np.ndarray,
       multipliers_grid: backend.Tensor,
-      new_data: analyzer.DataTensors | None = None,
+      new_data: analyzer_module.DataTensors | None = None,
+      selected_geos: Sequence[str] | None = None,
       selected_times: Sequence[str] | Sequence[bool] | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
@@ -2396,6 +2489,9 @@ class BudgetOptimizer:
         tensors is provided with a different number of time periods than in
         `InputData`, then all tensors must be provided with the same number of
         time periods.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Optional list of times to optimize. This can either be a
         string list containing a subset of time dimension coordinates from
         `InputData.time` or a boolean list with length equal to the time
@@ -2416,7 +2512,7 @@ class BudgetOptimizer:
         reducing `batch_size`. The calculation will generally be faster with
         larger `batch_size` values.
     """
-    new_data = new_data or analyzer.DataTensors()
+    new_data = new_data or analyzer_module.DataTensors()
     filled_data = new_data.validate_and_fill_missing_data(
         c.PAID_DATA, self._meridian
     )
@@ -2455,12 +2551,13 @@ class BudgetOptimizer:
         np.asarray(
             self._analyzer.incremental_outcome(
                 use_posterior=use_posterior,
-                new_data=analyzer.DataTensors(
+                new_data=analyzer_module.DataTensors(
                     media=new_media,
                     reach=new_reach,
                     frequency=new_frequency,
                     revenue_per_kpi=filled_data.revenue_per_kpi,
                 ),
+                selected_geos=selected_geos,
                 selected_times=selected_times,
                 use_kpi=use_kpi,
                 include_non_paid_channels=False,
@@ -2477,7 +2574,8 @@ class BudgetOptimizer:
       spend_bound_lower: np.ndarray,
       spend_bound_upper: np.ndarray,
       step_size: int,
-      new_data: analyzer.DataTensors | None = None,
+      new_data: analyzer_module.DataTensors | None = None,
+      selected_geos: Sequence[str] | None = None,
       selected_times: Sequence[str] | Sequence[bool] | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
@@ -2500,6 +2598,9 @@ class BudgetOptimizer:
         tensors is provided with a different number of time periods than in
         `InputData`, then all tensors must be provided with the same number of
         time periods.
+      selected_geos: Optional list containing a subset of geos to include. By
+        default, all geos are included. The selected geos should match those in
+        `InputData.geo`.
       selected_times: Optional list of times to optimize. This can either be a
         string list containing a subset of time dimension coordinates from
         `InputData.time` or a boolean list with length equal to the time
@@ -2556,6 +2657,7 @@ class BudgetOptimizer:
           i=i,
           incremental_outcome_grid=incremental_outcome_grid,
           multipliers_grid=multipliers_grid,
+          selected_geos=selected_geos,
           selected_times=selected_times,
           new_data=new_data,
           use_posterior=use_posterior,
@@ -2571,20 +2673,15 @@ class BudgetOptimizer:
     # we use the following code to fix it, and ensure incremental_outcome/spend
     # is always same for RF channels.
     if self._meridian.n_rf_channels > 0:
-      rf_incremental_outcome_max = np.nanmax(
-          incremental_outcome_grid[:, -self._meridian.n_rf_channels :], axis=0
-      )
-      rf_spend_max = np.nanmax(
-          spend_grid[:, -self._meridian.n_rf_channels :], axis=0
-      )
-      rf_roi = backend.divide_no_nan(rf_incremental_outcome_max, rf_spend_max)
-      incremental_outcome_grid[:, -self._meridian.n_rf_channels :] = (
-          rf_roi * spend_grid[:, -self._meridian.n_rf_channels :]
+      incremental_outcome_grid = backend.stabilize_rf_roi_grid(
+          spend_grid, incremental_outcome_grid, self._meridian.n_rf_channels
       )
     return (spend_grid, incremental_outcome_grid)
   def _validate_optimization_tensors(
       self,
+      expected_n_geos: int,
+      expected_n_times: int,
       cpmu: backend.Tensor | None = None,
       cprf: backend.Tensor | None = None,
       media: backend.Tensor | None = None,
@@ -2601,11 +2698,21 @@ class BudgetOptimizer:
           'If `media` or `media_spend` is provided, then `cpmu` must also be'
           ' provided.'
       )
+    if (media is None and media_spend is None) and cpmu is not None:
+      raise ValueError(
+          'If `cpmu` is provided, then one of `media` or `media_spend` must'
+          ' also be provided.'
+      )
     if (rf_impressions is not None or rf_spend is not None) and cprf is None:
       raise ValueError(
           'If `reach` and `frequency` or `rf_spend` is provided, then `cprf`'
           ' must also be provided.'
       )
+    if (rf_impressions is None and rf_spend is None) and cprf is not None:
+      raise ValueError(
+          'If `cprf` is provided, then one of `rf_impressions` or `rf_spend`'
+          ' must also be provided.'
+      )
     if media is not None and media_spend is not None:
       raise ValueError('Only one of `media` or `media_spend` can be provided.')
     if rf_impressions is not None and rf_spend is not None:
@@ -2623,26 +2730,44 @@ class BudgetOptimizer:
             'If `use_optimal_frequency` is `False`, then `frequency` must be'
             ' provided.'
         )
-    n_geos = [
-        t.shape[0]
-        for t in [
-            cpmu,
-            cprf,
-            media,
-            rf_impressions,
-            frequency,
-            media_spend,
-            rf_spend,
-        ]
-        if t is not None and t.ndim == 3
+    n_geos_list = []
+    n_times_list = []
+    tensor_list = [
+        cpmu,
+        cprf,
+        media,
+        rf_impressions,
+        frequency,
+        media_spend,
+        rf_spend,
     ]
+    for t in tensor_list:
+      # `(n_geos, T, n_channels)` shape
+      if t is not None and t.ndim == 3:
+        n_geos_list.append(t.shape[0])
+        n_times_list.append(t.shape[1])
+      # `(T, n_channels)` shape
+      elif t is not None and t.ndim == 2:
+        n_times_list.append(t.shape[0])
+    # `(n_geos, T)` shape
     if revenue_per_kpi is not None and revenue_per_kpi.ndim == 2:
-      n_geos.append(revenue_per_kpi.shape[0])
-    if any(n_geo != self._meridian.n_geos for n_geo in n_geos):
+      n_geos_list.append(revenue_per_kpi.shape[0])
+      n_times_list.append(revenue_per_kpi.shape[1])
+    # `(T)` shape
+    elif revenue_per_kpi is not None and revenue_per_kpi.ndim == 1:
+      n_times_list.append(revenue_per_kpi.shape[0])
+    if any(n_geo != expected_n_geos for n_geo in n_geos_list):
+      raise ValueError(
+          'All tensors with a geo dimension must have'
+          f' {expected_n_geos} geos (as defined in `meridian.InputData`).'
+      )
+    if any(n_time != expected_n_times for n_time in n_times_list):
       raise ValueError(
-          'All tensors with a geo dimension must have the same number of geos'
-          ' as in `meridian.InputData`.'
+          'All tensors with a time dimension must have'
+          f' {expected_n_times} times (as defined in `time` argument).'
       )
   def _allocate_tensor_by_population(
@@ -2958,3 +3083,62 @@ def _expand_tensor(tensor: backend.Tensor, required_shape: tuple[int, ...]):
       f'Cannot expand tensor with shape {tensor.shape} to target'
       f' {required_shape}.'
   )
+def _expand_selected_times(
+    meridian: model.Meridian,
+    start_date: tc.Date,
+    end_date: tc.Date,
+    new_data: analyzer_module.DataTensors | None,
+    return_flexible_str: bool = False,
+) -> Sequence[str] | Sequence[bool] | None:
+  """Creates selected_times from start_date and end_date.
+  This function creates `selected_times` argument based on `start_date`,
+  `end_date` and `new_data`. If `new_data` is not used or used with unmodified
+  times, dates are selected from `meridian.input_data.time`. In the flexible
+  time scenario, when `new_data` is provided with modified times, dates are
+  selected from `new_data.time`. In this case, `new_data.time` must be provided
+  and the function returns a list of booleans.
+  Args:
+    meridian: The `Meridian` object with original data.
+    start_date: Start date of the selected time period.
+    end_date: End date of the selected time period.
+    new_data: The optional `DataTensors` object. If times are modified in
+      `new_data`, then `new_data.time` must be provided.
+    return_flexible_str: Whether to return a list of strings or a list of
+      booleans in case time is modified in `new_data`.
+  Returns:
+    If both `start_date` and `end_date` are `None`, returns `None`. If
+    `new_data` is not used or used with unmodified times, returns a list of
+    strings with selected dates. If `new_data` is used with modified times,
+    returns a list of strings or a list of booleans depending on the
+    `return_flexible_str` argument.
+  """
+  if start_date is None and end_date is None:
+    return None
+  new_data = new_data or analyzer_module.DataTensors()
+  if new_data.get_modified_times(meridian) is None:
+    return meridian.expand_selected_time_dims(
+        start_date=start_date,
+        end_date=end_date,
+    )
+  else:
+    assert new_data.time is not None
+    new_times_str = np.asarray(new_data.time).astype(str).tolist()
+    time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
+    expanded_dates = time_coordinates.expand_selected_time_dims(
+        start_date=start_date,
+        end_date=end_date,
+    )
+    if expanded_dates is None:
+      expanded_dates = time_coordinates.all_dates
+    expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
+    if return_flexible_str:
+      return [x for x in new_times_str if x in expanded_str]
+    # TODO: Remove once every method uses `new_data.time`.
+    else:
+      return [x in expanded_str for x in new_times_str]

google-meridian 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

google-meridian 1.2.0py3-none-any.whl → 1.3.0py3-none-any.whl