PyPI - google-meridian - Versions diffs - 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

google-meridian 1.0.8py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/METADATA +2 -2
google_meridian-1.1.0.dist-info/RECORD +41 -0
{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/WHEEL +1 -1
meridian/__init__.py +1 -1
meridian/analysis/analyzer.py +303 -207
meridian/analysis/optimizer.py +431 -82
meridian/analysis/summarizer.py +25 -7
meridian/analysis/test_utils.py +81 -81
meridian/analysis/visualizer.py +81 -39
meridian/constants.py +111 -26
meridian/data/input_data.py +115 -19
meridian/data/test_utils.py +116 -5
meridian/data/time_coordinates.py +3 -3
meridian/model/media.py +133 -98
meridian/model/model.py +457 -52
meridian/model/model_test_data.py +11 -0
meridian/model/posterior_sampler.py +120 -43
meridian/model/prior_distribution.py +95 -29
meridian/model/prior_sampler.py +179 -209
meridian/model/spec.py +196 -36
meridian/model/transformers.py +15 -3
google_meridian-1.0.8.dist-info/RECORD +0 -41
{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/top_level.txt +0 -0

meridian/analysis/optimizer.py CHANGED Viewed

@@ -28,6 +28,7 @@ from meridian import constants as c
 from meridian.analysis import analyzer
 from meridian.analysis import formatter
 from meridian.analysis import summary_text
+from meridian.data import time_coordinates as tc
 from meridian.model import model
 import numpy as np
 import pandas as pd
@@ -97,6 +98,8 @@ class OptimizationGrid:
     use_kpi: Whether using generic KPI or revenue.
     use_posterior: Whether posterior distributions were used, or prior.
     use_optimal_frequency: Whether optimal frequency was used.
+    start_date: The start date of the optimization period.
+    end_date: The end date of the optimization period.
     gtol: Float indicating the acceptable relative error for the budget used in
       the grid setup. The budget is rounded by `10*n`, where `n` is the smallest
       integer such that `(budget - rounded_budget)` is less than or equal to
@@ -116,10 +119,12 @@ class OptimizationGrid:
   use_kpi: bool
   use_posterior: bool
   use_optimal_frequency: bool
+  start_date: tc.Date
+  end_date: tc.Date
   gtol: float
   round_factor: int
   optimal_frequency: np.ndarray | None
-  selected_times: list[str] | None
+  selected_times: Sequence[str] | None
   @property
   def grid_dataset(self) -> xr.Dataset:
@@ -189,7 +194,7 @@ class OptimizationGrid:
       variables:
         * `optimized`: media spend that maximizes incremental outcome based
         on spend constraints for all media and RF channels.
-        * `non_optimized`: Channel-level spend.
+        * `non_optimized`: rounded channel-level spend.
     Raises:
       A warning if the budget's rounding should be different from the grid's
@@ -226,7 +231,7 @@ class OptimizationGrid:
             spend_constraint_upper=spend_constraint_upper,
         )
     )
-    self._check_optimization_bounds(
+    self.check_optimization_bounds(
         lower_bound=optimization_lower_bound,
         upper_bound=optimization_upper_bound,
     )
@@ -235,16 +240,16 @@ class OptimizationGrid:
       warnings.warn(
           'Optimization accuracy may suffer owing to budget level differences.'
           ' Consider creating a new grid with smaller `gtol` if you intend to'
-          " shrink budgets significantly. It's only a problem when you use a"
-          ' smaller budget, for which the intended step size is meant to be'
-          ' smaller for one or more channels.'
+          ' shrink total budget significantly across optimization runs.'
+          ' It is only a problem when you use a much smaller budget, '
+          ' for which the intended step size is smaller. '
       )
     (spend_grid, incremental_outcome_grid) = self._trim_grid(
         spend_bound_lower=optimization_lower_bound,
         spend_bound_upper=optimization_upper_bound,
     )
+    rounded_spend = np.round(spend, self.round_factor).astype(int)
     if isinstance(scenario, FixedBudgetScenario):
-      rounded_spend = np.round(spend, self.round_factor)
       scenario = dataclasses.replace(
           scenario, total_budget=np.sum(rounded_spend)
       )
@@ -258,7 +263,7 @@ class OptimizationGrid:
         coords={c.CHANNEL: self.channels},
         data_vars={
             c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
-            c.NON_OPTIMIZED: ([c.CHANNEL], spend),
+            c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
         },
     )
@@ -344,8 +349,10 @@ class OptimizationGrid:
     grid coverage and they are rounded using this grid's round factor.
     Args:
-      spend_bound_lower: The lower bound of spend for each channel.
-      spend_bound_upper: The upper bound of spend for each channel.
+      spend_bound_lower: The lower bound of spend for each channel. Must be in
+        the same order as `self.channels`.
+      spend_bound_upper: The upper bound of spend for each channel. Must be in
+        the same order as `self.channels`.
     Returns:
       updated_spend: The updated spend grid with valid spend values moved up to
@@ -382,7 +389,7 @@ class OptimizationGrid:
     return (updated_spend, updated_incremental_outcome)
-  def _check_optimization_bounds(
+  def check_optimization_bounds(
       self,
       lower_bound: np.ndarray,
       upper_bound: np.ndarray,
@@ -391,9 +398,9 @@ class OptimizationGrid:
     Args:
       lower_bound: `np.ndarray` of shape `(n_channels,)` containing the lower
-        bound for each channel.
+        bound for each channel. Must be in the same order as `self.channels`.
       upper_bound: `np.ndarray` of shape `(n_channels,)` containing the upper
-        bound for each channel.
+        bound for each channel. Must be in the same order as `self.channels`.
     Raises:
       ValueError: If the spend grid does not fit within the optimization bounds.
@@ -621,7 +628,7 @@ class OptimizationResults:
     # by adjusting the domain of the y-axis so that the incremental outcome does
     # not start at 0. Calculate the total decrease in incremental outcome to pad
     # the y-axis from the non-optimized total incremental outcome value.
-    sum_decr = sum(df[df.incremental_outcome < 0].incremental_outcome)
+    sum_decr = df[df.incremental_outcome < 0].incremental_outcome.sum()
     y_padding = float(f'1e{int(math.log10(-sum_decr))}') if sum_decr < 0 else 2
     domain_scale = [
         self.nonoptimized_data.total_incremental_outcome + sum_decr - y_padding,
@@ -1016,8 +1023,16 @@ class OptimizationResults:
   def _gen_optimization_summary(self) -> str:
     """Generates HTML optimization summary output (as sanitized content str)."""
-    self.template_env.globals[c.START_DATE] = self.optimized_data.start_date
-    self.template_env.globals[c.END_DATE] = self.optimized_data.end_date
+    start_date = tc.normalize_date(self.optimized_data.start_date)
+    self.template_env.globals[c.START_DATE] = start_date.strftime(
+        f'%b {start_date.day}, %Y'
+    )
+    interval_days = self.meridian.input_data.time_coordinates.interval_days
+    end_date = tc.normalize_date(self.optimized_data.end_date)
+    end_date_adjusted = end_date + pd.Timedelta(days=interval_days)
+    self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
+        f'%b {end_date_adjusted.day}, %Y'
+    )
     html_template = self.template_env.get_template('summary.html.jinja')
     return html_template.render(
@@ -1129,21 +1144,26 @@ class OptimizationResults:
         - self.nonoptimized_data.total_incremental_outcome
     )
     inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
+    currency = '$' if outcome == c.REVENUE else ''
     non_optimized_inc_outcome = formatter.StatsSpec(
         title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
             outcome=outcome
         ),
-        stat=formatter.format_monetary_num(
-            self.nonoptimized_data.total_incremental_outcome,
+        stat=formatter.compact_number(
+            n=self.nonoptimized_data.total_incremental_outcome,
+            precision=0,
+            currency=currency,
         ),
     )
     optimized_inc_outcome = formatter.StatsSpec(
         title=summary_text.OPTIMIZED_INC_OUTCOME_LABEL.format(outcome=outcome),
-        stat=formatter.format_monetary_num(
-            self.optimized_data.total_incremental_outcome,
+        stat=formatter.compact_number(
+            n=self.optimized_data.total_incremental_outcome,
+            precision=0,
+            currency=currency,
         ),
         delta=inc_outcome_prefix
-        + formatter.format_monetary_num(inc_outcome_diff),
+        + formatter.compact_number(inc_outcome_diff, 0, currency),
     )
     return [
         non_optimized_budget,
@@ -1265,8 +1285,12 @@ class BudgetOptimizer:
   def optimize(
       self,
+      new_data: analyzer.DataTensors | None = None,
       use_posterior: bool = True,
+      # TODO: b/409550413 - Remove this argument.
       selected_times: tuple[str | None, str | None] | None = None,
+      start_date: tc.Date = None,
+      end_date: tc.Date = None,
       fixed_budget: bool = True,
       budget: float | None = None,
       pct_of_spend: Sequence[float] | None = None,
@@ -1279,23 +1303,61 @@ class BudgetOptimizer:
       use_kpi: bool = False,
       confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
+      optimization_grid: OptimizationGrid | None = None,
   ) -> OptimizationResults:
     """Finds the optimal budget allocation that maximizes outcome.
-    Outcome is typically revenue, but when the KPI is not revenue and "revenue
-    per KPI" data is not available, then Meridian defines the Outcome to be the
-    KPI itself.
+    Optimization depends on the following:
+    1. Flighting pattern (the relative allocation of a channels' media units
+       across geos and time periods, which is held fixed for each channel)
+    2. Cost per media unit (This is assumed to be constant for each channel, and
+       can optionally vary by geo and/or time period)
+    3. `pct_of_spend` (center of the spend box constraint for each channel)
+    4. `budget` (total budget used for fixed budget scenarios)
+    By default, these values are assigned based on the historical data. The
+    `pct_of_spend` and `budget` are optimization arguments that can be
+    overridden directly. Passing `new_data.media` (or `new_data.reach` or
+    `new_data.frequency`) will override both the flighting pattern and cost per
+    media unit. Passing `new_data.spend` (or `new_data.rf_spend) will only
+    override the cost per media unit.
+    If `new_data` is passed with a different number of time periods than the
+    historical data, then all of the optimization parameters will be inferred
+    from it. Default values for `pct_of_spend` and `budget` (if
+    `fixed_budget=True`) will be inferred from the `new_data`, but can be
+    overridden using the `pct_of_spend` and `budget` arguments.
+    If `start_date` or `end_date` is specified, then the default values are
+    inferred based on the subset of time periods specified. Both start and end
+    time selectors should align with the Meridian time dimension coordinates in
+    the underlying model if optimizing the original data. If `new_data` is
+    provided with a different number of time periods than in `InputData`, then
+    the start and end time coordinates must match the time dimensions in
+    `new_data.time`. By default, all times periods are used. Either start or
+    end time component can be `None` to represent the first or the last time
+    coordinate, respectively.
     Args:
+      new_data: An optional `DataTensors` container with optional tensors:
+        `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
+        `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
+        Meridian object are used. If `new_data` is provided, the optimization is
+        run on the versions of the tensors in `new_data` and the original
+        versions of all the remaining tensors. If any of the tensors in
+        `new_data` is provided with a different number of time periods than in
+        `InputData`, then all tensors must be provided with the same number of
+        time periods and the `time` tensor must be provided.
       use_posterior: Boolean. If `True`, then the budget is optimized based on
         the posterior distribution of the model. Otherwise, the prior
         distribution is used.
-      selected_times: Tuple containing the start and end time dimension
-        coordinates for the duration to run the optimization on. Selected time
-        values should align with the Meridian time dimension coordinates in the
-        underlying model. By default, all times periods are used. Either start
-        or end time component can be `None` to represent the first or the last
-        time coordinate, respectively.
+      selected_times: Deprecated. Tuple containing the start and end time
+        dimension coordinates for the duration to run the optimization on.
+        Please Use `start_date` and `end_date` instead.
+      start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
+        format. Default is `None`, i.e. the first time period.
+      end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
+        Default is `None`, i.e. the last time period.
       fixed_budget: Boolean indicating whether it's a fixed budget optimization
         or flexible budget optimization. Defaults to `True`. If `False`, must
         specify either `target_roi` or `target_mroi`.
@@ -1347,11 +1409,27 @@ class BudgetOptimizer:
         in batches to avoid memory exhaustion. If a memory error occurs, try
         reducing `batch_size`. The calculation will generally be faster with
         larger `batch_size` values.
+      optimization_grid: An `OptimizationGrid` object containing the grid
+        information. Grid creating is a time consuming part of optimization.
+        Creating one grid and running various optimizations on it can save time.
+        If `None` or grid doesn't match the optimization arguments, a new grid
+        will be created.
     Returns:
       An `OptimizationResults` object containing optimized budget allocation
       datasets, along with some of the intermediate values used to derive them.
     """
+    if selected_times is not None:
+      warnings.warn(
+          '`selected_times` is deprecated. Please use `start_date` and'
+          ' `end_date` instead.',
+          DeprecationWarning,
+          stacklevel=2,
+      )
+      deprecated_start_date, deprecated_end_date = selected_times
+      start_date = start_date or deprecated_start_date
+      end_date = end_date or deprecated_end_date
     _validate_budget(
         fixed_budget=fixed_budget,
         budget=budget,
@@ -1367,18 +1445,36 @@ class BudgetOptimizer:
       spend_constraint_lower = spend_constraint_default
     if spend_constraint_upper is None:
       spend_constraint_upper = spend_constraint_default
-    optimization_grid = self.create_optimization_grid(
-        selected_times=selected_times,
+    use_grid_arg = optimization_grid is not None and self._validate_grid(
+        new_data=new_data,
+        use_posterior=use_posterior,
+        start_date=start_date,
+        end_date=end_date,
         budget=budget,
         pct_of_spend=pct_of_spend,
         spend_constraint_lower=spend_constraint_lower,
         spend_constraint_upper=spend_constraint_upper,
         gtol=gtol,
-        use_posterior=use_posterior,
-        use_kpi=use_kpi,
         use_optimal_frequency=use_optimal_frequency,
-        batch_size=batch_size,
+        use_kpi=use_kpi,
+        optimization_grid=optimization_grid,
     )
+    if optimization_grid is None or not use_grid_arg:
+      optimization_grid = self.create_optimization_grid(
+          new_data=new_data,
+          start_date=start_date,
+          end_date=end_date,
+          budget=budget,
+          pct_of_spend=pct_of_spend,
+          spend_constraint_lower=spend_constraint_lower,
+          spend_constraint_upper=spend_constraint_upper,
+          gtol=gtol,
+          use_posterior=use_posterior,
+          use_kpi=use_kpi,
+          use_optimal_frequency=use_optimal_frequency,
+          batch_size=batch_size,
+      )
     if fixed_budget:
       scenario = FixedBudgetScenario(total_budget=budget)
     elif target_roi:
@@ -1399,24 +1495,24 @@ class BudgetOptimizer:
     use_historical_budget = budget is None or np.isclose(
         budget, np.sum(optimization_grid.historical_spend)
     )
-    rounded_spend = np.round(
-        spend.non_optimized, optimization_grid.round_factor
-    ).astype(int)
+    new_data = new_data or analyzer.DataTensors()
     nonoptimized_data = self._create_budget_dataset(
+        new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
         use_posterior=use_posterior,
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
-        spend=rounded_spend,
+        spend=spend.non_optimized,
         selected_times=optimization_grid.selected_times,
         confidence_level=confidence_level,
         batch_size=batch_size,
         use_historical_budget=use_historical_budget,
     )
     nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
+        new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
         use_posterior=use_posterior,
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
-        spend=rounded_spend,
+        spend=spend.non_optimized,
         selected_times=optimization_grid.selected_times,
         optimal_frequency=optimization_grid.optimal_frequency,
         confidence_level=confidence_level,
@@ -1431,6 +1527,7 @@ class BudgetOptimizer:
     elif target_mroi:
       constraints[c.TARGET_MROI] = target_mroi
     optimized_data = self._create_budget_dataset(
+        new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
         use_posterior=use_posterior,
         use_kpi=use_kpi,
         hist_spend=optimization_grid.historical_spend,
@@ -1474,10 +1571,139 @@ class BudgetOptimizer:
         _optimization_grid=optimization_grid,
     )
+  def _validate_grid(
+      self,
+      new_data: analyzer.DataTensors | None,
+      use_posterior: bool,
+      start_date: tc.Date,
+      end_date: tc.Date,
+      budget: float | None,
+      pct_of_spend: Sequence[float] | None,
+      spend_constraint_lower: _SpendConstraint,
+      spend_constraint_upper: _SpendConstraint,
+      gtol: float,
+      use_optimal_frequency: bool,
+      use_kpi: bool,
+      optimization_grid: OptimizationGrid,
+  ) -> bool:
+    """Checks if the grid is valid for the optimization scenario."""
+    if use_posterior != optimization_grid.use_posterior:
+      warnings.warn(
+          'Given optimization grid was created with `use_posterior` ='
+          f' {optimization_grid.use_posterior}, but optimization was called'
+          f' with `use_posterior` = {use_posterior}. A new grid will be'
+          ' created.'
+      )
+      return False
+    if use_kpi != optimization_grid.use_kpi:
+      warnings.warn(
+          'Given optimization grid was created with `use_kpi` ='
+          f' {optimization_grid.use_kpi}, but optimization was called'
+          f' with `use_kpi` = {use_kpi}. A new grid will be'
+          ' created.'
+      )
+      return False
+    if use_optimal_frequency != optimization_grid.use_optimal_frequency:
+      warnings.warn(
+          'Given optimization grid was created with `use_optimal_frequency` ='
+          f' {optimization_grid.use_optimal_frequency}, but optimization was'
+          f' called with `use_optimal_frequency` = {use_optimal_frequency}. A'
+          ' new grid will be created.'
+      )
+      return False
+    if (
+        start_date != optimization_grid.start_date
+        or end_date != optimization_grid.end_date
+    ):
+      warnings.warn(
+          'Given optimization grid was created with `start_date` ='
+          f' {optimization_grid.start_date} and `end_date` ='
+          f' {optimization_grid.end_date}, but optimization was called with'
+          f' `start_date` = {start_date} and `end_date` = {end_date}. A new'
+          ' grid will be created.'
+      )
+      return False
+    if new_data is None:
+      new_data = analyzer.DataTensors()
+    required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
+    filled_data = new_data.validate_and_fill_missing_data(
+        required_tensors_names=required_tensors, meridian=self._meridian
+    )
+    paid_channels = self._meridian.input_data.get_all_paid_channels()
+    if not np.array_equal(paid_channels, optimization_grid.channels):
+      warnings.warn(
+          'Given optimization grid was created with `channels` ='
+          f' {optimization_grid.channels}, but optimization request was'
+          f' resolved with `channels` = {paid_channels}. A new grid will be'
+          ' created.'
+      )
+      return False
+    n_channels = len(optimization_grid.channels)
+    selected_times = self._validate_selected_times(
+        start_date=start_date,
+        end_date=end_date,
+        new_data=new_data,
+    )
+    hist_spend = self._analyzer.get_aggregated_spend(
+        new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
+        selected_times=selected_times,
+        include_media=self._meridian.n_media_channels > 0,
+        include_rf=self._meridian.n_rf_channels > 0,
+    ).data
+    budget = budget or np.sum(hist_spend)
+    valid_pct_of_spend = _validate_pct_of_spend(
+        n_channels=n_channels,
+        hist_spend=hist_spend,
+        pct_of_spend=pct_of_spend,
+    )
+    spend = budget * valid_pct_of_spend
+    (optimization_lower_bound, optimization_upper_bound) = (
+        _get_optimization_bounds(
+            n_channels=n_channels,
+            spend=spend,
+            round_factor=optimization_grid.round_factor,
+            spend_constraint_lower=spend_constraint_lower,
+            spend_constraint_upper=spend_constraint_upper,
+        )
+    )
+    try:
+      optimization_grid.check_optimization_bounds(
+          lower_bound=optimization_lower_bound,
+          upper_bound=optimization_upper_bound,
+      )
+    except ValueError as e:
+      warnings.warn(
+          'Optimization called with bounds that are not within the grid. A new'
+          f' grid will be created. Error message: {str(e)}'
+      )
+      return False
+    round_factor = _get_round_factor(budget, gtol)
+    if round_factor != optimization_grid.round_factor:
+      warnings.warn(
+          'Optimization accuracy may suffer owing to budget level differences.'
+          ' Consider creating a new grid with smaller `gtol` if you intend to'
+          ' shrink total budget significantly across optimization runs.'
+          ' It is only a problem when you use a much smaller budget, '
+          ' for which the intended step size is smaller.'
+      )
+    return True
   def create_optimization_grid(
       self,
+      new_data: xr.Dataset | None = None,
       use_posterior: bool = True,
+      # TODO: b/409550413 - Remove this argument.
       selected_times: tuple[str | None, str | None] | None = None,
+      start_date: tc.Date = None,
+      end_date: tc.Date = None,
       budget: float | None = None,
       pct_of_spend: Sequence[float] | None = None,
       spend_constraint_lower: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
@@ -1489,16 +1715,35 @@ class BudgetOptimizer:
   ) -> OptimizationGrid:
     """Creates a OptimizationGrid for optimization.
+    If `start_date` or `end_date` is specified, then the default values are
+    inferred based on the subset of time periods specified. Both start and end
+    time selectors should align with the Meridian time dimension coordinates in
+    the underlying model if optimizing the original data. If `new_data` is
+    provided with a different number of time periods than in `InputData`, then
+    the start and end time coordinates must match the time dimensions in
+    `new_data.time`. By default, all times periods are used. Either start or
+    end time component can be `None` to represent the first or the last time
+    coordinate, respectively.
     Args:
+      new_data: An optional `DataTensors` container with optional tensors:
+        `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
+        `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
+        Meridian object are used. If `new_data` is provided, the grid is created
+        using the versions of the tensors in `new_data` and the original
+        versions of all the remaining tensors. If any of the tensors in
+        `new_data` is provided with a different number of time periods than in
+        `InputData`, then all tensors must be provided with the same number of
+        time periods and the `time` tensor must be provided.
       use_posterior: Boolean. If `True`, then the incremental outcome is derived
         from the posterior distribution of the model. Otherwise, the prior
         distribution is used.
-      selected_times: Tuple containing the start and end time dimension
-        coordinates for the duration to run the optimization on. Selected time
-        values should align with the Meridian time dimension coordinates in the
-        underlying model. By default, all times periods are used. Either start
-        or end time component can be `None` to represent the first or the last
-        time coordinate, respectively.
+      selected_times: Deprecated. Tuple containing the start and end time
+        dimension coordinates. Please Use `start_date` and `end_date` instead.
+      start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
+        format. Default is `None`, i.e. the first time period.
+      end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
+        Default is `None`, i.e. the last time period.
       budget: Number indicating the total budget for the fixed budget scenario.
         Defaults to the historical budget.
       pct_of_spend: Numeric list of size `n_paid_channels` containing the
@@ -1545,16 +1790,32 @@ class BudgetOptimizer:
       An OptimizationGrid object containing the grid data for optimization.
     """
     self._validate_model_fit(use_posterior)
+    if new_data is None:
+      new_data = analyzer.DataTensors()
     if selected_times is not None:
-      start_date, end_date = selected_times
-      selected_time_dims = self._meridian.expand_selected_time_dims(
-          start_date=start_date,
-          end_date=end_date,
+      warnings.warn(
+          '`selected_times` is deprecated. Please use `start_date` and'
+          ' `end_date` instead.',
+          DeprecationWarning,
+          stacklevel=2,
       )
-    else:
-      selected_time_dims = None
-    hist_spend = self._analyzer.get_historical_spend(
-        selected_time_dims,
+      deprecated_start_date, deprecated_end_date = selected_times
+      start_date = start_date or deprecated_start_date
+      end_date = end_date or deprecated_end_date
+    required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
+    filled_data = new_data.validate_and_fill_missing_data(
+        required_tensors_names=required_tensors, meridian=self._meridian
+    )
+    selected_times = self._validate_selected_times(
+        start_date=start_date,
+        end_date=end_date,
+        new_data=filled_data,
+    )
+    hist_spend = self._analyzer.get_aggregated_spend(
+        new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
+        selected_times=selected_times,
         include_media=self._meridian.n_media_channels > 0,
         include_rf=self._meridian.n_rf_channels > 0,
     ).data
@@ -1579,8 +1840,9 @@ class BudgetOptimizer:
     if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
       optimal_frequency = tf.convert_to_tensor(
           self._analyzer.optimal_freq(
+              new_data=filled_data.filter_fields(c.RF_DATA),
               use_posterior=use_posterior,
-              selected_times=selected_time_dims,
+              selected_times=selected_times,
               use_kpi=use_kpi,
           ).optimal_frequency,
           dtype=tf.float32,
@@ -1594,7 +1856,8 @@ class BudgetOptimizer:
         spend_bound_lower=optimization_lower_bound,
         spend_bound_upper=optimization_upper_bound,
         step_size=step_size,
-        selected_times=selected_time_dims,
+        selected_times=selected_times,
+        new_data=filled_data.filter_fields(c.PAID_DATA),
         use_posterior=use_posterior,
         use_kpi=use_kpi,
         optimal_frequency=optimal_frequency,
@@ -1612,10 +1875,12 @@ class BudgetOptimizer:
         use_kpi=use_kpi,
         use_posterior=use_posterior,
         use_optimal_frequency=use_optimal_frequency,
+        start_date=start_date,
+        end_date=end_date,
         gtol=gtol,
         round_factor=round_factor,
         optimal_frequency=optimal_frequency,
-        selected_times=selected_time_dims,
+        selected_times=selected_times,
     )
   def _create_grid_dataset(
@@ -1658,10 +1923,38 @@ class BudgetOptimizer:
         attrs={c.SPEND_STEP_SIZE: spend_step_size},
     )
+  def _validate_selected_times(
+      self,
+      start_date: tc.Date,
+      end_date: tc.Date,
+      new_data: analyzer.DataTensors | None,
+  ) -> Sequence[str] | Sequence[bool] | None:
+    """Validates and returns the selected times."""
+    if start_date is None and end_date is None:
+      return None
+    new_data = new_data or analyzer.DataTensors()
+    if new_data.get_modified_times(self._meridian) is None:
+      return self._meridian.expand_selected_time_dims(
+          start_date=start_date,
+          end_date=end_date,
+      )
+    else:
+      assert new_data.time is not None
+      new_times_str = new_data.time.numpy().astype(str).tolist()
+      time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
+      expanded_dates = time_coordinates.expand_selected_time_dims(
+          start_date=start_date,
+          end_date=end_date,
+      )
+      expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
+      return [x in expanded_str for x in new_times_str]
   def _get_incremental_outcome_tensors(
       self,
       hist_spend: np.ndarray,
       spend: np.ndarray,
+      new_data: analyzer.DataTensors | None = None,
       optimal_frequency: Sequence[float] | None = None,
   ) -> tuple[
       tf.Tensor | None,
@@ -1686,6 +1979,11 @@ class BudgetOptimizer:
     Args:
       hist_spend: historical spend data.
       spend: new optimized spend data.
+      new_data: An optional `DataTensors` object containing the new `media`,
+        `reach`, and `frequency` tensors. If `None`, the existing tensors from
+        the Meridian object are used. If any of the tensors is provided with a
+        different number of time periods than in `InputData`, then all tensors
+        must be provided with the same number of time periods.
       optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
         the optimal frequency per channel, that maximizes posterior mean roi.
         Value is `None` if the model does not contain reach and frequency data,
@@ -1696,13 +1994,18 @@ class BudgetOptimizer:
       Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
       new_rf_spend).
     """
+    new_data = new_data or analyzer.DataTensors()
+    filled_data = new_data.validate_and_fill_missing_data(
+        c.PAID_CHANNELS,
+        self._meridian,
+    )
     if self._meridian.n_media_channels > 0:
       new_media = (
           tf.math.divide_no_nan(
               spend[: self._meridian.n_media_channels],
               hist_spend[: self._meridian.n_media_channels],
           )
-          * self._meridian.media_tensors.media
+          * filled_data.media
       )
       new_media_spend = tf.convert_to_tensor(
           spend[: self._meridian.n_media_channels]
@@ -1711,9 +2014,7 @@ class BudgetOptimizer:
       new_media = None
       new_media_spend = None
     if self._meridian.n_rf_channels > 0:
-      rf_media = (
-          self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
-      )
+      rf_media = filled_data.reach * filled_data.frequency
       new_rf_media = (
           tf.math.divide_no_nan(
               spend[-self._meridian.n_rf_channels :],
@@ -1722,7 +2023,7 @@ class BudgetOptimizer:
           * rf_media
       )
       frequency = (
-          self._meridian.rf_tensors.frequency
+          filled_data.frequency
           if optimal_frequency is None
           else optimal_frequency
       )
@@ -1742,9 +2043,10 @@ class BudgetOptimizer:
       self,
       hist_spend: np.ndarray,
       spend: np.ndarray,
+      new_data: analyzer.DataTensors | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
-      selected_times: Sequence[str] | None = None,
+      selected_times: Sequence[str] | Sequence[bool] | None = None,
       optimal_frequency: Sequence[float] | None = None,
       attrs: Mapping[str, Any] | None = None,
       confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
@@ -1752,15 +2054,22 @@ class BudgetOptimizer:
       use_historical_budget: bool = True,
   ) -> xr.Dataset:
     """Creates the budget dataset."""
+    new_data = new_data or analyzer.DataTensors()
+    filled_data = new_data.validate_and_fill_missing_data(
+        c.PAID_DATA + (c.TIME,),
+        self._meridian,
+    )
     spend = tf.convert_to_tensor(spend, dtype=tf.float32)
     hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
     (new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
         self._get_incremental_outcome_tensors(
-            hist_spend, spend, optimal_frequency
+            hist_spend,
+            spend,
+            new_data=filled_data.filter_fields(c.PAID_CHANNELS),
+            optimal_frequency=optimal_frequency,
         )
     )
     budget = np.sum(spend)
-    all_times = self._meridian.input_data.time.values.tolist()
     # incremental_outcome here is a tensor with the shape
     # (n_chains, n_draws, n_channels)
@@ -1770,6 +2079,7 @@ class BudgetOptimizer:
             media=new_media,
             reach=new_reach,
             frequency=new_frequency,
+            revenue_per_kpi=filled_data.revenue_per_kpi,
         ),
         selected_times=selected_times,
         use_kpi=use_kpi,
@@ -1792,6 +2102,9 @@ class BudgetOptimizer:
     )
     aggregated_impressions = self._analyzer.get_aggregated_impressions(
+        new_data=analyzer.DataTensors(
+            media=new_media, reach=new_reach, frequency=new_frequency
+        ),
         selected_times=selected_times,
         selected_geos=None,
         aggregate_times=True,
@@ -1799,10 +2112,11 @@ class BudgetOptimizer:
         optimal_frequency=optimal_frequency,
         include_non_paid_channels=False,
     )
-    effectiveness = incremental_outcome / aggregated_impressions
     effectiveness_with_mean_median_and_ci = (
         analyzer.get_central_tendency_and_ci(
-            data=effectiveness,
+            data=tf.math.divide_no_nan(
+                incremental_outcome, aggregated_impressions
+            ),
             confidence_level=confidence_level,
             include_median=True,
         )
@@ -1822,6 +2136,7 @@ class BudgetOptimizer:
                 frequency=new_frequency,
                 media_spend=new_media_spend,
                 rf_spend=new_rf_spend,
+                revenue_per_kpi=filled_data.revenue_per_kpi,
             ),
             selected_times=selected_times,
             batch_size=batch_size,
@@ -1860,6 +2175,18 @@ class BudgetOptimizer:
         c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
     }
+    all_times = (
+        filled_data.time.numpy().astype(str).tolist()
+        if filled_data.time is not None
+        else self._meridian.input_data.time.values.tolist()
+    )
+    if selected_times is not None and all(
+        isinstance(time, bool) for time in selected_times
+    ):
+      selected_times = [
+          time for time, selected in zip(all_times, selected_times) if selected
+      ]
     attributes = {
         c.START_DATE: min(selected_times) if selected_times else all_times[0],
         c.END_DATE: max(selected_times) if selected_times else all_times[-1],
@@ -1889,7 +2216,8 @@ class BudgetOptimizer:
       i: int,
       incremental_outcome_grid: np.ndarray,
       multipliers_grid: tf.Tensor,
-      selected_times: Sequence[str],
+      new_data: analyzer.DataTensors | None = None,
+      selected_times: Sequence[str] | Sequence[bool] | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
       optimal_frequency: xr.DataArray | None = None,
@@ -1904,8 +2232,16 @@ class BudgetOptimizer:
         number of columns is equal to the number of total channels, containing
         incremental outcome by channel.
       multipliers_grid: A grid derived from spend.
-      selected_times: Sequence of strings representing the time dimensions in
-        `meridian.input_data.time` to use for optimization.
+      new_data: An optional `DataTensors` object containing the new `media`,
+        `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
+        existing tensors from the Meridian object are used. If any of the
+        tensors is provided with a different number of time periods than in
+        `InputData`, then all tensors must be provided with the same number of
+        time periods.
+      selected_times: Optional list of times to optimize. This can either be a
+        string list containing a subset of time dimension coordinates from
+        `InputData.time` or a boolean list with length equal to the time
+        dimension of the tensor. By default, all time periods are included.
       use_posterior: Boolean. If `True`, then the incremental outcome is derived
         from the posterior distribution of the model. Otherwise, the prior
         distribution is used.
@@ -1922,10 +2258,14 @@ class BudgetOptimizer:
         reducing `batch_size`. The calculation will generally be faster with
         larger `batch_size` values.
     """
+    new_data = new_data or analyzer.DataTensors()
+    filled_data = new_data.validate_and_fill_missing_data(
+        c.PAID_DATA, self._meridian
+    )
     if self._meridian.n_media_channels > 0:
       new_media = (
           multipliers_grid[i, : self._meridian.n_media_channels]
-          * self._meridian.media_tensors.media
+          * filled_data.media
       )
     else:
       new_media = None
@@ -1934,20 +2274,18 @@ class BudgetOptimizer:
       new_frequency = None
       new_reach = None
     elif optimal_frequency is not None:
-      new_frequency = (
-          tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency
-      )
+      new_frequency = tf.ones_like(filled_data.frequency) * optimal_frequency
       new_reach = tf.math.divide_no_nan(
           multipliers_grid[i, -self._meridian.n_rf_channels :]
-          * self._meridian.rf_tensors.reach
-          * self._meridian.rf_tensors.frequency,
+          * filled_data.reach
+          * filled_data.frequency,
           new_frequency,
       )
     else:
-      new_frequency = self._meridian.rf_tensors.frequency
+      new_frequency = filled_data.frequency
       new_reach = (
           multipliers_grid[i, -self._meridian.n_rf_channels :]
-          * self._meridian.rf_tensors.reach
+          * filled_data.reach
       )
     # incremental_outcome returns a three dimensional tensor with dims
@@ -1960,6 +2298,7 @@ class BudgetOptimizer:
                 media=new_media,
                 reach=new_reach,
                 frequency=new_frequency,
+                revenue_per_kpi=filled_data.revenue_per_kpi,
             ),
             selected_times=selected_times,
             use_kpi=use_kpi,
@@ -1976,7 +2315,8 @@ class BudgetOptimizer:
       spend_bound_lower: np.ndarray,
       spend_bound_upper: np.ndarray,
       step_size: int,
-      selected_times: Sequence[str],
+      new_data: analyzer.DataTensors | None = None,
+      selected_times: Sequence[str] | Sequence[bool] | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
       optimal_frequency: xr.DataArray | None = None,
@@ -1992,8 +2332,16 @@ class BudgetOptimizer:
         containing the upper constraint spend for each channel.
       step_size: Integer indicating the step size, or interval, between values
         in the spend grid. All media channels have the same step size.
-      selected_times: Sequence of strings representing the time dimensions in
-        `meridian.input_data.time` to use for optimization.
+      new_data: An optional `DataTensors` object containing the new `media`,
+        `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
+        existing tensors from the Meridian object are used. If any of the
+        tensors is provided with a different number of time periods than in
+        `InputData`, then all tensors must be provided with the same number of
+        time periods.
+      selected_times: Optional list of times to optimize. This can either be a
+        string list containing a subset of time dimension coordinates from
+        `InputData.time` or a boolean list with length equal to the time
+        dimension of the tensor. By default, all time periods are included.
       use_posterior: Boolean. If `True`, then the incremental outcome is derived
         from the posterior distribution of the model. Otherwise, the prior
         distribution is used.
@@ -2047,6 +2395,7 @@ class BudgetOptimizer:
           incremental_outcome_grid=incremental_outcome_grid,
           multipliers_grid=multipliers_grid,
           selected_times=selected_times,
+          new_data=new_data,
           use_posterior=use_posterior,
           use_kpi=use_kpi,
           optimal_frequency=optimal_frequency,
@@ -2207,7 +2556,7 @@ def _validate_budget(
     budget: float | None,
     target_roi: float | None,
     target_mroi: float | None,
-):
+) -> None:
   """Validates the budget optimization arguments."""
   if fixed_budget:
     if target_roi is not None:

google-meridian 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

google-meridian 1.0.8py3-none-any.whl → 1.1.0py3-none-any.whl