PyPI - google-meridian - Versions diffs - 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

google-meridian 1.0.8py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/METADATA +2 -2
google_meridian-1.1.0.dist-info/RECORD +41 -0
{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/WHEEL +1 -1
meridian/__init__.py +1 -1
meridian/analysis/analyzer.py +303 -207
meridian/analysis/optimizer.py +431 -82
meridian/analysis/summarizer.py +25 -7
meridian/analysis/test_utils.py +81 -81
meridian/analysis/visualizer.py +81 -39
meridian/constants.py +111 -26
meridian/data/input_data.py +115 -19
meridian/data/test_utils.py +116 -5
meridian/data/time_coordinates.py +3 -3
meridian/model/media.py +133 -98
meridian/model/model.py +457 -52
meridian/model/model_test_data.py +11 -0
meridian/model/posterior_sampler.py +120 -43
meridian/model/prior_distribution.py +95 -29
meridian/model/prior_sampler.py +179 -209
meridian/model/spec.py +196 -36
meridian/model/transformers.py +15 -3
google_meridian-1.0.8.dist-info/RECORD +0 -41
{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/top_level.txt +0 -0

meridian/analysis/analyzer.py CHANGED Viewed

@@ -16,6 +16,7 @@
 from collections.abc import Mapping, Sequence
 import itertools
+import numbers
 from typing import Any, Optional
 import warnings
@@ -37,6 +38,20 @@ __all__ = [
 ]
+def _validate_non_media_baseline_values_numbers(
+    non_media_baseline_values: Sequence[str | float] | None,
+):
+  if non_media_baseline_values is None:
+    return
+  for value in non_media_baseline_values:
+    if not isinstance(value, numbers.Number):
+      raise ValueError(
+          f"Invalid `non_media_baseline_values` value: '{value}'. Only float"
+          " numbers are supported."
+      )
 # TODO: Refactor the related unit tests to be under DataTensors.
 class DataTensors(tf.experimental.ExtensionType):
   """Container for data variable arguments of Analyzer methods.
@@ -63,6 +78,8 @@ class DataTensors(tf.experimental.ExtensionType):
     controls: Optional tensor with dimensions `(n_geos, n_times, n_controls)`.
     revenue_per_kpi: Optional tensor with dimensions `(n_geos, T)` for any time
       dimension `T`.
+    time: Optional tensor of time coordinates in the "YYYY-mm-dd" string format
+      for time dimension `T`.
   """
   media: Optional[tf.Tensor]
@@ -76,6 +93,7 @@ class DataTensors(tf.experimental.ExtensionType):
   non_media_treatments: Optional[tf.Tensor]
   controls: Optional[tf.Tensor]
   revenue_per_kpi: Optional[tf.Tensor]
+  time: Optional[tf.Tensor]
   def __init__(
       self,
@@ -90,6 +108,7 @@ class DataTensors(tf.experimental.ExtensionType):
       non_media_treatments: Optional[tf.Tensor] = None,
       controls: Optional[tf.Tensor] = None,
       revenue_per_kpi: Optional[tf.Tensor] = None,
+      time: Optional[Sequence[str] | tf.Tensor] = None,
   ):
     self.media = tf.cast(media, tf.float32) if media is not None else None
     self.media_spend = (
@@ -130,6 +149,7 @@ class DataTensors(tf.experimental.ExtensionType):
         if revenue_per_kpi is not None
         else None
     )
+    self.time = tf.cast(time, tf.string) if time is not None else None
   def __validate__(self):
     self._validate_n_dims()
@@ -176,6 +196,7 @@ class DataTensors(tf.experimental.ExtensionType):
           new_tensor is not None
           and old_tensor is not None
           and new_tensor.ndim > 1
+          and old_tensor.ndim > 1
           and new_tensor.shape[1] != old_tensor.shape[1]
       ):
         return new_tensor.shape[1]
@@ -241,6 +262,8 @@ class DataTensors(tf.experimental.ExtensionType):
               f"New `{field.name}` must have 1 or 3 dimensions. Found"
               f" {tensor.ndim} dimensions."
           )
+      elif field.name == constants.TIME:
+        _check_n_dims(tensor, field.name, 1)
       else:
         _check_n_dims(tensor, field.name, 3)
@@ -283,7 +306,7 @@ class DataTensors(tf.experimental.ExtensionType):
     for var_name in required_fields:
       new_tensor = getattr(self, var_name)
       if new_tensor is not None and new_tensor.shape[0] != meridian.n_geos:
-        # Skip spend data with only 1 dimension of (n_channels).
+        # Skip spend and time data with only 1 dimension.
         if new_tensor.ndim == 1:
           continue
         raise ValueError(
@@ -296,7 +319,7 @@ class DataTensors(tf.experimental.ExtensionType):
   ):
     """Validates the channel dimension of the specified data variables."""
     for var_name in required_fields:
-      if var_name == constants.REVENUE_PER_KPI:
+      if var_name in [constants.REVENUE_PER_KPI, constants.TIME]:
         continue
       new_tensor = getattr(self, var_name)
       old_tensor = getattr(meridian.input_data, var_name)
@@ -317,12 +340,24 @@ class DataTensors(tf.experimental.ExtensionType):
       old_tensor = getattr(meridian.input_data, var_name)
       # Skip spend data with only 1 dimension of (n_channels).
-      if new_tensor is not None and new_tensor.ndim == 1:
+      if (
+          var_name in [constants.MEDIA_SPEND, constants.RF_SPEND]
+          and new_tensor is not None
+          and new_tensor.ndim == 1
+      ):
         continue
       if new_tensor is not None:
         assert old_tensor is not None
-        if new_tensor.shape[1] != old_tensor.shape[1]:
+        if (
+            var_name == constants.TIME
+            and new_tensor.shape[0] != old_tensor.shape[0]
+        ):
+          raise ValueError(
+              f"New `{var_name}` is expected to have {old_tensor.shape[0]}"
+              f" time periods. Found {new_tensor.shape[0]} time periods."
+          )
+        elif new_tensor.ndim > 1 and new_tensor.shape[1] != old_tensor.shape[1]:
           raise ValueError(
               f"New `{var_name}` is expected to have {old_tensor.shape[1]}"
               f" time periods. Found {new_tensor.shape[1]} time periods."
@@ -345,12 +380,24 @@ class DataTensors(tf.experimental.ExtensionType):
       if old_tensor is None:
         continue
       # Skip spend data with only 1 dimension of (n_channels).
-      if new_tensor is not None and new_tensor.ndim == 1:
+      if (
+          var_name in [constants.MEDIA_SPEND, constants.RF_SPEND]
+          and new_tensor is not None
+          and new_tensor.ndim == 1
+      ):
         continue
       if new_tensor is None:
         missing_params.append(var_name)
-      elif new_tensor.shape[1] != new_n_times:
+      elif var_name == constants.TIME and new_tensor.shape[0] != new_n_times:
+        raise ValueError(
+            "If the time dimension of any variable in `new_data` is "
+            "modified, then all variables must be provided with the same "
+            f"number of time periods. `{var_name}` has {new_tensor.shape[1]} "
+            "time periods, which does not match the modified number of time "
+            f"periods, {new_n_times}.",
+        )
+      elif new_tensor.ndim > 1 and new_tensor.shape[1] != new_n_times:
         raise ValueError(
             "If the time dimension of any variable in `new_data` is "
             "modified, then all variables must be provided with the same "
@@ -390,6 +437,10 @@ class DataTensors(tf.experimental.ExtensionType):
         old_tensor = meridian.controls
       elif var_name == constants.REVENUE_PER_KPI:
         old_tensor = meridian.revenue_per_kpi
+      elif var_name == constants.TIME:
+        old_tensor = tf.convert_to_tensor(
+            meridian.input_data.time.values.tolist(), dtype=tf.string
+        )
       else:
         continue
@@ -618,22 +669,16 @@ def _scale_tensors_by_multiplier(
     data: DataTensors,
     multiplier: float,
     by_reach: bool,
-    non_media_treatments_baseline: tf.Tensor | None = None,
 ) -> DataTensors:
   """Get scaled tensors for incremental outcome calculation.
   Args:
     data: DataTensors object containing the optional tensors to scale. Only
-      `media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
-      `organic_frequency`, `non_media_treatments` are scaled. The other tensors
-      remain unchanged.
+      `media`, `reach`, `frequency`, `organic_media`, `organic_reach`, and
+      `organic_frequency` are scaled. The other tensors remain unchanged.
     multiplier: Float indicating the factor to scale tensors by.
     by_reach: Boolean indicating whether to scale reach or frequency when rf
       data is available.
-    non_media_treatments_baseline: Optional tensor to overwrite
-      `data.non_media_treatments` in the output. Used to compute the
-      conterfactual values for incremental outcome calculation. If not used, the
-      unmodified `data.non_media_treatments` tensor is returned in the output.
   Returns:
     A `DataTensors` object containing scaled tensor parameters. The original
@@ -662,14 +707,9 @@ def _scale_tensors_by_multiplier(
       incremented_data[constants.ORGANIC_FREQUENCY] = (
           data.organic_frequency * multiplier
       )
-  if non_media_treatments_baseline is not None:
-    incremented_data[constants.NON_MEDIA_TREATMENTS] = (
-        non_media_treatments_baseline
-    )
-  else:
-    incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
   # Include the original data that does not get scaled.
+  incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
   incremented_data[constants.MEDIA_SPEND] = data.media_spend
   incremented_data[constants.RF_SPEND] = data.rf_spend
   incremented_data[constants.CONTROLS] = data.controls
@@ -719,79 +759,6 @@ def _central_tendency_and_ci_by_prior_and_posterior(
   return xr.Dataset(data_vars=xr_data, coords=xr_coords)
-def _compute_non_media_baseline(
-    non_media_treatments: tf.Tensor,
-    non_media_baseline_values: Sequence[float | str] | None = None,
-    non_media_selected_times: Sequence[bool] | None = None,
-) -> tf.Tensor:
-  """Computes the baseline for each non-media treatment channel.
-  Args:
-    non_media_treatments: The non-media treatment input data.
-    non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-      Each element is either a float (which means that the fixed value will be
-      used as baseline for the given channel) or one of the strings "min" or
-      "max" (which mean that the global minimum or maximum value will be used as
-      baseline for the values of the given non_media treatment channel). If
-      None, the minimum value is used as baseline for each non_media treatment
-      channel.
-    non_media_selected_times: Optional list of shape (n_times,). Each element is
-      a boolean indicating whether the corresponding time period should be
-      included in the baseline computation.
-  Returns:
-    A tensor of shape (n_geos, n_times, n_non_media_channels) containing the
-    baseline values for each non-media treatment channel.
-  """
-  if non_media_selected_times is None:
-    non_media_selected_times = [True] * non_media_treatments.shape[-2]
-  if non_media_baseline_values is None:
-    # If non_media_baseline_values is not provided, use the minimum value for
-    # each non_media treatment channel as the baseline.
-    non_media_baseline_values_filled = [
-        constants.NON_MEDIA_BASELINE_MIN
-    ] * non_media_treatments.shape[-1]
-  else:
-    non_media_baseline_values_filled = non_media_baseline_values
-  if non_media_treatments.shape[-1] != len(non_media_baseline_values_filled):
-    raise ValueError(
-        "The number of non-media channels"
-        f" ({non_media_treatments.shape[-1]}) does not match the number"
-        f" of baseline types ({len(non_media_baseline_values_filled)})."
-    )
-  baseline_list = []
-  for channel in range(non_media_treatments.shape[-1]):
-    baseline_value = non_media_baseline_values_filled[channel]
-    if baseline_value == constants.NON_MEDIA_BASELINE_MIN:
-      baseline_for_channel = tf.reduce_min(
-          non_media_treatments[..., channel], axis=[0, 1]
-      )
-    elif baseline_value == constants.NON_MEDIA_BASELINE_MAX:
-      baseline_for_channel = tf.reduce_max(
-          non_media_treatments[..., channel], axis=[0, 1]
-      )
-    elif isinstance(baseline_value, float):
-      baseline_for_channel = tf.cast(baseline_value, tf.float32)
-    else:
-      raise ValueError(
-          f"Invalid non_media_baseline_values value: '{baseline_value}'. Only"
-          " float numbers and strings 'min' and 'max' are supported."
-      )
-    baseline_list.append(
-        baseline_for_channel
-        * tf.ones_like(non_media_treatments[..., channel])
-        * non_media_selected_times
-    )
-  return tf.stack(baseline_list, axis=-1)
 class Analyzer:
   """Runs calculations to analyze the raw data after fitting the model."""
@@ -818,7 +785,7 @@ class Analyzer:
         `media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
         `organic_frequency`, `non_media_treatments`, `controls`. The `media`,
         `reach`, `organic_media`, `organic_reach` and `non_media_treatments`
-        tensors are assumed to be scaled by their corresponding transformers.
+        tensors are expected to be scaled by their corresponding transformers.
       dist_tensors: A `DistributionTensors` container with the distribution
         tensors for media, RF, organic media, organic RF, non-media treatments,
         and controls.
@@ -1029,7 +996,7 @@ class Analyzer:
           organic_media=self._meridian.organic_media_tensors.organic_media_scaled,
           organic_reach=self._meridian.organic_rf_tensors.organic_reach_scaled,
           organic_frequency=self._meridian.organic_rf_tensors.organic_frequency,
-          non_media_treatments=self._meridian.non_media_treatments_scaled,
+          non_media_treatments=self._meridian.non_media_treatments_normalized,
           controls=self._meridian.controls_scaled,
           revenue_per_kpi=self._meridian.revenue_per_kpi,
       )
@@ -1078,10 +1045,10 @@ class Analyzer:
           if new_data.organic_frequency is not None
           else self._meridian.organic_rf_tensors.organic_frequency
       )
-      non_media_treatments_scaled = _transformed_new_or_scaled(
+      non_media_treatments_normalized = _transformed_new_or_scaled(
           new_variable=new_data.non_media_treatments,
           transformer=self._meridian.non_media_transformer,
-          scaled_variable=self._meridian.non_media_treatments_scaled,
+          scaled_variable=self._meridian.non_media_treatments_normalized,
       )
       return DataTensors(
           media=media_scaled,
@@ -1090,7 +1057,7 @@ class Analyzer:
           organic_media=organic_media_scaled,
           organic_reach=organic_reach_scaled,
           organic_frequency=organic_frequency,
-          non_media_treatments=non_media_treatments_scaled,
+          non_media_treatments=non_media_treatments_normalized,
           controls=controls_scaled,
           revenue_per_kpi=revenue_per_kpi,
       )
@@ -1559,7 +1526,7 @@ class Analyzer:
       self,
       data_tensors: DataTensors,
       dist_tensors: DistributionTensors,
-      non_media_baseline_values: Sequence[float | str] | None = None,
+      non_media_treatments_baseline_normalized: Sequence[float] | None = None,
   ) -> tf.Tensor:
     """Computes incremental KPI distribution.
@@ -1573,17 +1540,26 @@ class Analyzer:
       dist_tensors: A `DistributionTensors` container with the distribution
         tensors for media, RF, organic media, organic RF and non-media
         treatments channels.
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the scaled values of the given non_media treatments
-        channel). If None, the minimum value is used as baseline for each
-        non_media treatments channel.
+      non_media_treatments_baseline_normalized: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float that will be used as
+        baseline for the given channel. The values are expected to be scaled by
+        population for channels where
+        `model_spec.non_media_population_scaling_id` is `True` and normalized by
+        centering and scaling using means and standard deviations. This argument
+        is required if the data contains non-media treatments.
     Returns:
       Tensor of incremental KPI distribution.
     """
+    if (
+        data_tensors.non_media_treatments is not None
+        and non_media_treatments_baseline_normalized is None
+    ):
+      raise ValueError(
+          "`non_media_treatments_baseline_normalized` must be passed to"
+          " `_get_incremental_kpi` when `non_media_treatments` data is"
+          " present."
+      )
     n_media_times = self._meridian.n_media_times
     if data_tensors.media is not None:
       n_times = data_tensors.media.shape[1]  # pytype: disable=attribute-error
@@ -1606,13 +1582,10 @@ class Analyzer:
         combined_beta,
     )
     if data_tensors.non_media_treatments is not None:
-      non_media_scaled_baseline = _compute_non_media_baseline(
-          non_media_treatments=data_tensors.non_media_treatments,
-          non_media_baseline_values=non_media_baseline_values,
-      )
       non_media_kpi = tf.einsum(
           "gtn,...gn->...gtn",
-          data_tensors.non_media_treatments - non_media_scaled_baseline,
+          data_tensors.non_media_treatments
+          - non_media_treatments_baseline_normalized,
           dist_tensors.gamma_gn,
       )
       return tf.concat([combined_media_kpi, non_media_kpi], axis=-1)
@@ -1662,7 +1635,7 @@ class Analyzer:
       self,
       data_tensors: DataTensors,
       dist_tensors: DistributionTensors,
-      non_media_baseline_values: Sequence[float | str] | None = None,
+      non_media_treatments_baseline_normalized: Sequence[float] | None = None,
       inverse_transform_outcome: bool | None = None,
       use_kpi: bool | None = None,
       selected_geos: Sequence[str] | None = None,
@@ -1687,20 +1660,21 @@ class Analyzer:
         poulation. Shape (n_geos x T x n_organic_rf_channels), for any time
         dimension T. `organic_frequency`: `organic frequency data` with shape
         (n_geos x T x n_organic_rf_channels), for any time dimension T.
-        `non_media_treatments`: `non_media_treatments` data with shape (n_geos x
-        T x n_non_media_channels), for any time dimension T. `revenue_per_kpi`:
-        Contains revenue per kpi data with shape `(n_geos x T)`, for any time
-        dimension `T`.
-     dist_tensors: A `DistributionTensors` container with the distribution
-       tensors for media, RF, organic media, organic RF and non-media treatments
-       channels.
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the scaled values of the given non_media treatments
-        channel). If None, the minimum value is used as baseline for each
-        non_media treatments channel.
+        `non_media_treatments`: `non_media_treatments` data scaled by population
+        for the selected channels and normalized by means and standard
+        deviations with shape (n_geos x T x n_non_media_channels), for any time
+        dimension T. `revenue_per_kpi`: Contains revenue per kpi data with shape
+        `(n_geos x T)`, for any time dimension `T`.
+      dist_tensors: A `DistributionTensors` container with the distribution
+        tensors for media, RF, organic media, organic RF and non-media
+        treatments channels.
+      non_media_treatments_baseline_normalized: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float that will be used as
+        baseline for the given channel. The values are expected to be scaled by
+        population for channels where
+        `model_spec.non_media_population_scaling_id` is `True` and normalized by
+        centering and scaling using means and standard deviations. This argument
+        is required if the data contains non-media treatments.
       inverse_transform_outcome: Boolean. If `True`, returns the expected
         outcome in the original KPI or revenue (depending on what is passed to
         `use_kpi`), as it was passed to `InputData`. If False, returns the
@@ -1725,10 +1699,20 @@ class Analyzer:
       Tensor containing the incremental outcome distribution.
     """
     self._check_revenue_data_exists(use_kpi)
+    if (
+        data_tensors.non_media_treatments is not None
+        and non_media_treatments_baseline_normalized is None
+    ):
+      raise ValueError(
+          "`non_media_treatments_baseline_normalized` must be passed to"
+          " `_incremental_outcome_impl` when `non_media_treatments` data is"
+          " present."
+      )
     transformed_outcome = self._get_incremental_kpi(
         data_tensors=data_tensors,
         dist_tensors=dist_tensors,
-        non_media_baseline_values=non_media_baseline_values,
+        non_media_treatments_baseline_normalized=non_media_treatments_baseline_normalized,
     )
     if inverse_transform_outcome:
       incremental_outcome = self._inverse_outcome(
@@ -1752,7 +1736,7 @@ class Analyzer:
       self,
       use_posterior: bool = True,
       new_data: DataTensors | None = None,
-      non_media_baseline_values: Sequence[float | str] | None = None,
+      non_media_baseline_values: Sequence[float] | None = None,
       scaling_factor0: float = 0.0,
       scaling_factor1: float = 1.0,
       selected_geos: Sequence[str] | None = None,
@@ -1771,15 +1755,26 @@ class Analyzer:
     This calculates the media outcome of each media channel for each posterior
     or prior parameter draw. Incremental outcome is defined as:
-    `E(Outcome|Media_1, Controls)` minus `E(Outcome|Media_0, Controls)`
+    `E(Outcome|Treatment_1, Controls)` minus `E(Outcome|Treatment_0, Controls)`
+    For paid & organic channels (without reach and frequency data),
+    `Treatment_1` means that media execution for a given channel is multiplied
+    by
+    `scaling_factor1` (1.0 by default) for the set of time periods specified
+    by `media_selected_times`. Similarly, `Treatment_0` means that media
+    execution is multiplied by `scaling_factor0` (0.0 by default) for these time
+    periods.
+    For paid & organic channels with reach and frequency data, either reach or
+    frequency is held fixed while the other is scaled, depending on the
+    `by_reach` argument.
-    Here, `Media_1` means that media execution for a given channel is multiplied
-    by `scaling_factor1` (1.0 by default) for the set of time periods specified
-    by `media_selected_times`. Similarly, `Media_0` means that media execution
-    is multiplied by `scaling_factor0` (0.0 by default) for these time periods.
+    For non-media treatments, `Treatment_1` means that the variable is set to
+    historical values. `Treatment_0` means that the variable is set to its
+    baseline value for all geos and time periods. Note that the scaling factors
+    (`scaling_factor0` and `scaling_factor1`) are not applicable to non-media
+    treatments.
-    For channels with reach and frequency data, either reach or frequency is
-    held fixed while the other is scaled, depending on the `by_reach` argument.
     "Outcome" refers to either `revenue` if `use_kpi=False`, or `kpi` if
     `use_kpi=True`. When `revenue_per_kpi` is not defined, `use_kpi` cannot be
     False.
@@ -1821,13 +1816,13 @@ class Analyzer:
         any of the tensors in `new_data` is provided with a different number of
         time periods than in `InputData`, then all tensors must be provided with
         the same number of time periods.
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the scaled values of the given non_media treatments
-        channel). If not provided, the minimum value is used as the baseline for
-        each non_media treatments channel.
+      non_media_baseline_values: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float which means that the
+        fixed value will be used as baseline for the given channel. It is
+        expected that they are scaled by population for the channels where
+        `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
+        `model_spec.non_media_baseline_values` is used, which defaults to the
+        minimum value for each non_media treatment channel.
       scaling_factor0: Float. The factor by which to scale the counterfactual
         scenario "Media_0" during the time periods specified in
         `media_selected_times`. Must be non-negative and less than
@@ -1909,6 +1904,7 @@ class Analyzer:
           aggregate_geos=aggregate_geos,
           selected_geos=selected_geos,
       )
+    _validate_non_media_baseline_values_numbers(non_media_baseline_values)
     dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
     if dist_type not in mmm.inference_data.groups():
@@ -1967,7 +1963,6 @@ class Analyzer:
         media_selected_times = [
             x in media_selected_times for x in mmm.input_data.media_time
         ]
-    non_media_selected_times = media_selected_times[-mmm.n_times :]
     # Set counterfactual tensors based on the scaling factors and the media
     # selected times.
@@ -1979,28 +1974,52 @@ class Analyzer:
     )[:, None]
     if data_tensors.non_media_treatments is not None:
-      new_non_media_treatments0 = _compute_non_media_baseline(
-          non_media_treatments=data_tensors.non_media_treatments,
-          non_media_baseline_values=non_media_baseline_values,
-          non_media_selected_times=non_media_selected_times,
+      non_media_treatments_baseline_scaled = (
+          self._meridian.compute_non_media_treatments_baseline(
+              non_media_baseline_values=non_media_baseline_values,
+          )
+      )
+      non_media_treatments_baseline_normalized = self._meridian.non_media_transformer.forward(  # pytype: disable=attribute-error
+          non_media_treatments_baseline_scaled,
+          apply_population_scaling=False,
+      )
+      non_media_treatments0 = tf.broadcast_to(
+          tf.constant(
+              non_media_treatments_baseline_normalized, dtype=tf.float32
+          )[tf.newaxis, tf.newaxis, :],
+          self._meridian.non_media_treatments.shape,  # pytype: disable=attribute-error
       )
     else:
-      new_non_media_treatments0 = None
+      non_media_treatments_baseline_normalized = None
+      non_media_treatments0 = None
     incremented_data0 = _scale_tensors_by_multiplier(
         data=data_tensors,
         multiplier=counterfactual0,
         by_reach=by_reach,
-        non_media_treatments_baseline=new_non_media_treatments0,
     )
     incremented_data1 = _scale_tensors_by_multiplier(
         data=data_tensors, multiplier=counterfactual1, by_reach=by_reach
     )
-    data_tensors0 = self._get_scaled_data_tensors(
+    scaled_data0 = self._get_scaled_data_tensors(
         new_data=incremented_data0,
         include_non_paid_channels=include_non_paid_channels,
     )
+    # TODO: b/415198977 - Verify the computation of outcome of non-media
+    # treatments with `media_selected_times` and scale factors.
+    data_tensors0 = DataTensors(
+        media=scaled_data0.media,
+        reach=scaled_data0.reach,
+        frequency=scaled_data0.frequency,
+        organic_media=scaled_data0.organic_media,
+        organic_reach=scaled_data0.organic_reach,
+        organic_frequency=scaled_data0.organic_frequency,
+        revenue_per_kpi=scaled_data0.revenue_per_kpi,
+        non_media_treatments=non_media_treatments0,
+    )
     data_tensors1 = self._get_scaled_data_tensors(
         new_data=incremented_data1,
         include_non_paid_channels=include_non_paid_channels,
@@ -2027,7 +2046,9 @@ class Analyzer:
     incremental_outcome_kwargs = {
         "inverse_transform_outcome": inverse_transform_outcome,
         "use_kpi": use_kpi,
-        "non_media_baseline_values": non_media_baseline_values,
+        "non_media_treatments_baseline_normalized": (
+            non_media_treatments_baseline_normalized
+        ),
     }
     for i, start_index in enumerate(batch_starting_indices):
       stop_index = np.min([n_draws, start_index + batch_size])
@@ -2503,7 +2524,7 @@ class Analyzer:
       aggregate_geos: bool = False,
       aggregate_times: bool = False,
       split_by_holdout_id: bool = False,
-      non_media_baseline_values: Sequence[str | float] | None = None,
+      non_media_baseline_values: Sequence[float] | None = None,
       confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
   ) -> xr.Dataset:
     """Calculates the data for the expected versus actual outcome over time.
@@ -2515,19 +2536,20 @@ class Analyzer:
         are summed over all of the time periods.
       split_by_holdout_id: Boolean. If `True` and `holdout_id` exists, the data
         is split into `'Train'`, `'Test'`, and `'All Data'` subsections.
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the values of the given non_media treatment channel). If
-        None, the minimum value is used as baseline for each non_media treatment
-        channel.
+      non_media_baseline_values: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float which means that the
+        fixed value will be used as baseline for the given channel. It is
+        expected that they are scaled by population for the channels where
+        `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
+        `model_spec.non_media_baseline_values` is used, which defaults to the
+        minimum value for each non_media treatment channel.
       confidence_level: Confidence level for expected outcome credible
         intervals, represented as a value between zero and one. Default: `0.9`.
     Returns:
       A dataset with the expected, baseline, and actual outcome metrics.
     """
+    _validate_non_media_baseline_values_numbers(non_media_baseline_values)
     mmm = self._meridian
     use_kpi = self._meridian.input_data.revenue_per_kpi is None
     can_split_by_holdout = self._can_split_by_holdout_id(split_by_holdout_id)
@@ -2597,7 +2619,7 @@ class Analyzer:
   def _calculate_baseline_expected_outcome(
       self,
-      non_media_baseline_values: Sequence[str | float] | None = None,
+      non_media_baseline_values: Sequence[float] | None = None,
       **expected_outcome_kwargs,
   ) -> tf.Tensor:
     """Calculates either the posterior or prior expected outcome of baseline.
@@ -2609,20 +2631,19 @@ class Analyzer:
       3) `new_organic_media` is set to all zeros
       4) `new_organic_reach` is set to all zeros
       5) `new_non_media_treatments` is set to the counterfactual values
-      according to the
-        `non_media_baseline_values` argument
+      according to the `non_media_baseline_values` argument
       6) `new_controls` are set to historical values
     All other arguments of `expected_outcome` can be passed to this method.
     Args:
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the values of the given non_media treatment channel). If
-        None, the minimum value is used as baseline for each non_media treatment
-        channel.
+      non_media_baseline_values: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float which means that the
+        fixed value will be used as baseline for the given channel. It is
+        expected that they are scaled by population for the channels where
+        `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
+        `model_spec.non_media_baseline_values` is used, which defaults to the
+        minimum value for each non_media treatment channel.
       **expected_outcome_kwargs: kwargs to pass to `expected_outcome`, which
         could contain use_posterior, selected_geos, selected_times,
         aggregate_geos, aggregate_times, inverse_transform_outcome, use_kpi,
@@ -2655,10 +2676,27 @@ class Analyzer:
         else None
     )
     if self._meridian.non_media_treatments is not None:
-      new_non_media_treatments = _compute_non_media_baseline(
-          non_media_treatments=self._meridian.non_media_treatments,
+      if self._meridian.model_spec.non_media_population_scaling_id is not None:
+        scaling_factors = tf.where(
+            self._meridian.model_spec.non_media_population_scaling_id,
+            self._meridian.population[:, tf.newaxis, tf.newaxis],
+            tf.ones_like(self._meridian.population)[:, tf.newaxis, tf.newaxis],
+        )
+      else:
+        scaling_factors = tf.ones_like(self._meridian.population)[
+            :, tf.newaxis, tf.newaxis
+        ]
+      baseline = self._meridian.compute_non_media_treatments_baseline(
           non_media_baseline_values=non_media_baseline_values,
       )
+      new_non_media_treatments_population_scaled = tf.broadcast_to(
+          tf.constant(baseline, dtype=tf.float32)[tf.newaxis, tf.newaxis, :],
+          self._meridian.non_media_treatments.shape,
+      )
+      new_non_media_treatments = (
+          new_non_media_treatments_population_scaled * scaling_factors
+      )
     else:
       new_non_media_treatments = None
     new_controls = self._meridian.controls
@@ -2679,7 +2717,7 @@ class Analyzer:
       new_data: DataTensors | None = None,
       use_kpi: bool | None = None,
       include_non_paid_channels: bool = True,
-      non_media_baseline_values: Sequence[str | float] | None = None,
+      non_media_baseline_values: Sequence[float] | None = None,
       **kwargs,
   ) -> tf.Tensor:
     """Aggregates the incremental outcome of the media channels.
@@ -2707,13 +2745,13 @@ class Analyzer:
       include_non_paid_channels: Boolean. If `True`, then non-media treatments
         and organic effects are included in the calculation. If `False`, then
         only the paid media and RF effects are included.
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the scaled values of the given non_media treatments
-        channel). If not provided, the minimum value is used as the baseline for
-        each non_media treatments channel.
+      non_media_baseline_values: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float which means that the
+        fixed value will be used as baseline for the given channel. It is
+        expected that they are scaled by population for the channels where
+        `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
+        `model_spec.non_media_baseline_values` is used, which defaults to the
+        minimum value for each non_media treatment channel.
       **kwargs: kwargs to pass to `incremental_outcome`, which could contain
         selected_geos, selected_times, aggregate_geos, aggregate_times,
         batch_size.
@@ -2723,6 +2761,7 @@ class Analyzer:
       of the channel dimension is incremented by one, with the new component at
       the end containing the total incremental outcome of all channels.
     """
+    _validate_non_media_baseline_values_numbers(non_media_baseline_values)
     use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
     incremental_outcome_m = self.incremental_outcome(
         use_posterior=use_posterior,
@@ -2755,7 +2794,7 @@ class Analyzer:
       confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
       batch_size: int = constants.DEFAULT_BATCH_SIZE,
       include_non_paid_channels: bool = False,
-      non_media_baseline_values: Sequence[str | float] | None = None,
+      non_media_baseline_values: Sequence[float] | None = None,
   ) -> xr.Dataset:
     """Returns summary metrics.
@@ -2831,13 +2870,13 @@ class Analyzer:
         reported. If `False`, only the paid channels (media, reach and
         frequency) are included but the summary contains also the metrics
         dependent on spend. Default: `False`.
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the values of the given non_media treatment channel). If
-        None, the minimum value is used as baseline for each non_media treatment
-        channel.
+      non_media_baseline_values: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float which means that the
+        fixed value will be used as baseline for the given channel. It is
+        expected that they are scaled by population for the channels where
+        `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
+        `model_spec.non_media_baseline_values` is used, which defaults to the
+        minimum value for each non_media treatment channel.
     Returns:
       An `xr.Dataset` with coordinates: `channel`, `metric` (`mean`, `median`,
@@ -2851,6 +2890,7 @@ class Analyzer:
       when `aggregate_times=False` because they do not have a clear
       interpretation by time period.
     """
+    _validate_non_media_baseline_values_numbers(non_media_baseline_values)
     dim_kwargs = {
         "selected_geos": selected_geos,
         "selected_times": selected_times,
@@ -3239,7 +3279,7 @@ class Analyzer:
       selected_times: Sequence[str] | None = None,
       aggregate_geos: bool = True,
       aggregate_times: bool = True,
-      non_media_baseline_values: Sequence[float | str] | None = None,
+      non_media_baseline_values: Sequence[float] | None = None,
       confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
       batch_size: int = constants.DEFAULT_BATCH_SIZE,
   ) -> xr.Dataset:
@@ -3254,13 +3294,13 @@ class Analyzer:
         all of the regions.
       aggregate_times: Boolean. If `True`, the expected outcome is summed over
         all of the time periods.
-      non_media_baseline_values: Optional list of shape (n_non_media_channels,).
-        Each element is either a float (which means that the fixed value will be
-        used as baseline for the given channel) or one of the strings "min" or
-        "max" (which mean that the global minimum or maximum value will be used
-        as baseline for the values of the given non_media treatment channel). If
-        None, the minimum value is used as baseline for each non_media treatment
-        channel.
+      non_media_baseline_values: Optional list of shape
+        `(n_non_media_channels,)`. Each element is a float which means that the
+        fixed value will be used as baseline for the given channel. It is
+        expected that they are scaled by population for the channels where
+        `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
+        `model_spec.non_media_baseline_values` is used, which defaults to the
+        minimum value for each non_media treatment channel.
       confidence_level: Confidence level for media summary metrics credible
         intervals, represented as a value between zero and one.
       batch_size: Integer representing the maximum draws per chain in each
@@ -3273,6 +3313,7 @@ class Analyzer:
       `ci_low`,`ci_high`),`distribution` (prior, posterior) and contains the
       following data variables: `baseline_outcome`, `pct_of_contribution`.
     """
+    _validate_non_media_baseline_values_numbers(non_media_baseline_values)
     # TODO: Change "pct_of_contribution" to a more accurate term.
     use_kpi = self._meridian.input_data.revenue_per_kpi is None
@@ -4663,11 +4704,11 @@ class Analyzer:
   def get_historical_spend(
       self,
-      selected_times: Sequence[str] | None,
+      selected_times: Sequence[str] | None = None,
       include_media: bool = True,
       include_rf: bool = True,
   ) -> xr.DataArray:
-    """Gets the aggregated historical spend based on the time period.
+    """Deprecated. Gets the aggregated historical spend based on the time.
     Args:
       selected_times: The time period to get the historical spends. If None, the
@@ -4681,6 +4722,51 @@ class Analyzer:
       An `xr.DataArray` with the coordinate `channel` and contains the data
       variable `spend`.
+    Raises:
+      ValueError: A ValueError is raised when `include_media` and `include_rf`
+      are both False.
+    """
+    warnings.warn(
+        "`get_historical_spend` is deprecated. Please use "
+        "`get_aggregated_spend` with `new_data=None` instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return self.get_aggregated_spend(
+        selected_times=selected_times,
+        include_media=include_media,
+        include_rf=include_rf,
+    )
+  def get_aggregated_spend(
+      self,
+      new_data: DataTensors | None = None,
+      selected_times: Sequence[str] | Sequence[bool] | None = None,
+      include_media: bool = True,
+      include_rf: bool = True,
+  ) -> xr.DataArray:
+    """Gets the aggregated spend based on the selected time.
+    Args:
+      new_data: An optional `DataTensors` object containing the new `media`,
+        `media_spend`, `reach`, `frequency`, `rf_spend` tensors. If `None`, the
+        existing tensors from the Meridian object are used. If `new_data`
+        argument is used, then the aggregated spend is computed using the values
+        of the tensors passed in the `new_data` argument and the original values
+        of all the remaining tensors.  If any of the tensors in `new_data` is
+        provided with a different number of time periods than in `InputData`,
+        then all tensors must be provided with the same number of time periods.
+      selected_times: The time period to get the aggregated spends. If None, the
+        spend will be aggregated over all time periods.
+      include_media: Whether to include spends for paid media channels that do
+        not have R&F data.
+      include_rf: Whether to include spends for paid media channels with R&F
+        data.
+    Returns:
+      An `xr.DataArray` with the coordinate `channel` and contains the data
+      variable `spend`.
     Raises:
       ValueError: A ValueError is raised when `include_media` and `include_rf`
       are both False.
@@ -4689,6 +4775,11 @@ class Analyzer:
       raise ValueError(
           "At least one of include_media or include_rf must be True."
       )
+    new_data = new_data or DataTensors()
+    required_tensors_names = constants.PAID_CHANNELS + constants.SPEND_DATA
+    filled_data = new_data.validate_and_fill_missing_data(
+        required_tensors_names, self._meridian
+    )
     empty_da = xr.DataArray(
         dims=[constants.CHANNEL], coords={constants.CHANNEL: []}
@@ -4709,8 +4800,8 @@ class Analyzer:
     else:
       aggregated_media_spend = self._impute_and_aggregate_spend(
           selected_times,
-          self._meridian.media_tensors.media,
-          self._meridian.media_tensors.media_spend,
+          filled_data.media,
+          filled_data.media_spend,
           list(self._meridian.input_data.media_channel.values),
       )
@@ -4723,18 +4814,16 @@ class Analyzer:
         or self._meridian.rf_tensors.rf_spend is None
     ):
       warnings.warn(
-          "Requested spends for paid media channels with R&F data, but but the"
+          "Requested spends for paid media channels with R&F data, but the"
           " channels are not available.",
       )
       aggregated_rf_spend = empty_da
     else:
-      rf_execution_values = (
-          self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
-      )
+      rf_execution_values = filled_data.reach * filled_data.frequency
       aggregated_rf_spend = self._impute_and_aggregate_spend(
           selected_times,
           rf_execution_values,
-          self._meridian.rf_tensors.rf_spend,
+          filled_data.rf_spend,
           list(self._meridian.input_data.rf_channel.values),
       )
@@ -4744,7 +4833,7 @@ class Analyzer:
   def _impute_and_aggregate_spend(
       self,
-      selected_times: Sequence[str] | None,
+      selected_times: Sequence[str] | Sequence[bool] | None,
       media_execution_values: tf.Tensor,
       channel_spend: tf.Tensor,
       channel_names: Sequence[str],
@@ -4759,7 +4848,7 @@ class Analyzer:
     argument, its values only affect the output when imputation is required.
     Args:
-      selected_times: The time period to get the historical spend.
+      selected_times: The time period to get the aggregated spend.
       media_execution_values: The media execution values over all time points.
       channel_spend: The spend over all time points. Its shape can be `(n_geos,
         n_times, n_media_channels)` or `(n_media_channels,)` if the data is
@@ -4775,17 +4864,24 @@ class Analyzer:
         "selected_times": selected_times,
         "aggregate_geos": True,
         "aggregate_times": True,
+        "flexible_time_dim": True,
     }
     if channel_spend.ndim == 3:
       aggregated_spend = self.filter_and_aggregate_geos_and_times(
           channel_spend,
+          has_media_dim=True,
           **dim_kwargs,
       ).numpy()
     # channel_spend.ndim can only be 3 or 1.
     else:
       # media spend can have more time points than the model time points
-      media_exe_values = media_execution_values[:, -self._meridian.n_times :, :]
+      if media_execution_values.shape[1] == self._meridian.n_media_times:
+        media_exe_values = media_execution_values[
+            :, -self._meridian.n_times :, :
+        ]
+      else:
+        media_exe_values = media_execution_values
       # Calculates CPM over all times and geos if the spend does not have time
       # and geo dimensions.
       target_media_exe_values = self.filter_and_aggregate_geos_and_times(

google-meridian 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

google-meridian 1.0.8py3-none-any.whl → 1.1.0py3-none-any.whl