PyPI - google-meridian - Versions diffs - 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

google-meridian 1.0.9py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/METADATA +2 -2
google_meridian-1.1.0.dist-info/RECORD +41 -0
{google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/WHEEL +1 -1
meridian/__init__.py +1 -1
meridian/analysis/analyzer.py +195 -189
meridian/analysis/optimizer.py +263 -65
meridian/analysis/summarizer.py +4 -4
meridian/analysis/test_utils.py +81 -81
meridian/analysis/visualizer.py +12 -16
meridian/constants.py +100 -16
meridian/data/input_data.py +115 -19
meridian/data/test_utils.py +116 -5
meridian/data/time_coordinates.py +3 -3
meridian/model/media.py +133 -98
meridian/model/model.py +447 -57
meridian/model/model_test_data.py +11 -0
meridian/model/posterior_sampler.py +120 -43
meridian/model/prior_distribution.py +96 -51
meridian/model/prior_sampler.py +179 -209
meridian/model/spec.py +196 -36
meridian/model/transformers.py +15 -3
google_meridian-1.0.9.dist-info/RECORD +0 -41
{google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/top_level.txt +0 -0

meridian/data/input_data.py CHANGED Viewed

@@ -60,7 +60,7 @@ def _check_dim_collection(
     )
-def _check_dim_match(dim: str, arrays: Sequence[xr.DataArray]):
+def _check_dim_match(dim: str, arrays: Sequence[xr.DataArray | None]):
   """Verifies that the dimensions of the appropriate arrays match."""
   lengths = [len(array.coords[dim]) for array in arrays if array is not None]
   names = [array.name for array in arrays if array is not None]
@@ -83,6 +83,31 @@ def _check_coords_match(dim: str, arrays: Sequence[xr.DataArray]):
       )
+def _aggregate_spend(
+    spend: xr.DataArray, calibration_period: np.ndarray | None
+) -> np.ndarray | None:
+  """Aggregates spend for each channel over the calibration period.
+  Args:
+    spend: An array with shape `(n_geos, n_times, n_channels)` to aggregate.
+    calibration_period: An optional boolean array of shape `(n_media_times,
+      n_channels)`. If provided, spend is filtered according to this period.
+  Returns:
+    A 1-D array of aggregated media spend per channel, or `None` if `spend` is
+    `None`.
+  """
+  if spend is None:
+    return None
+  if calibration_period is None:
+    return np.sum(spend, axis=(0, 1))
+  # Select the last `n_times` from the `calibration_period`
+  factors = np.where(calibration_period[-spend.shape[1] :, :], 1, 0)
+  return np.einsum("gtm,tm->m", spend, factors)
 @dataclasses.dataclass
 class InputData:
   """A data container for advertising data in a format supported by Meridian.
@@ -120,8 +145,14 @@ class InputData:
       in the same order. If either of these arguments is passed, then the other
       is not optional.
     media_spend: An optional `DataArray` containing the cost of each media
-      channel. This is used as the denominator for ROI calculations. The
-      DataArray shape can be `(n_geos, n_times, n_media_channels)` or
+      channel. This is used as the denominator for ROI calculations. It is also
+      used to calculate an assumed cost per media unit for post-modeling
+      analysis such as response curves and budget optimization. Only the
+      aggregate spend (across geos and time periods) is required for these
+      calculations. However, a spend breakdown by geo and time period is
+      required if `roi_calibration_period` is specified or if conducting
+      post-modeling analysis on a specific subset of geos and/or time periods.
+      The DataArray shape can be `(n_geos, n_times, n_media_channels)` or
       `(n_media_channels,)` if the data is aggregated over `geo` and `time`
       dimensions. We recommend that the spend total aligns with the time window
       of the `kpi` and `controls` data, which is the time window over which
@@ -131,7 +162,9 @@ class InputData:
       time window of media executed during the time window. `media` and
       `media_spend` must contain the same number of media channels in the same
       order. If either of these arguments is passed, then the other is not
-      optional.
+      optional. If a tensor of shape `(n_media_channels,)` is passed as
+      `media_spend`, then it will be automatically allocated across geos and
+      times proportinally to `media`.
     reach: An optional `DataArray` of dimensions `(n_geos, n_media_times,
       n_rf_channels)` containing non-negative `reach` values. It is required
       that `n_media_times` ≥ `n_times`, and the final `n_times` time periods
@@ -164,18 +197,26 @@ class InputData:
       others are not optional.
     rf_spend: An optional `DataArray` containing the cost of each reach and
       frequency channel. This is used as the denominator for ROI calculations.
-      The DataArray shape can be `(n_rf_channels,)`, `(n_geos, n_times,
-      n_rf_channels)`, or `(n_geos, n_rf_channels)`. The spend should be
-      aggregated over geo and/or time dimensions that are not represented. We
-      recommend that the spend total aligns with the time window of the `kpi`
-      and `controls` data, which is the time window over which incremental
-      outcome of the ROI numerator is calculated. However, note that incremental
-      outcome is influenced by media execution prior to this time window,
-      through lagged effects, and excludes lagged effects beyond the time window
-      of media executed during the time window. If only `media` data is used,
-      `rf_spend` will be `None`. `reach`, `frequency`, and `rf_spend` must
-      contain the same number of media channels in the same order. If any of
-      these arguments is passed, then the others are not optional.
+      It is also used to calculate an assumed cost per media unit for
+      post-modeling analysis such as response curves and budget optimization.
+      Only the aggregate spend (across geos and time periods) is required for
+      these calculations. However, a spend breakdown by geo and time period is
+      required if `rf_roi_calibration_period` is specified or if conducting
+      post-modeling analysis on a specific subset of geos and/or time periods.
+      The DataArray shape can be `(n_rf_channels,)` or `(n_geos, n_times,
+      n_rf_channels)`. The spend should be aggregated over geo and/or time
+      dimensions that are not represented. We recommend that the spend total
+      aligns with the time window of the `kpi` and `controls` data, which is the
+      time window over which incremental outcome of the ROI numerator is
+      calculated. However, note that incremental outcome is influenced by media
+      execution prior to this time window, through lagged effects, and excludes
+      lagged effects beyond the time window of media executed during the time
+      window. If only `media` data is used, `rf_spend` will be `None`. `reach`,
+      `frequency`, and `rf_spend` must contain the same number of media channels
+      in the same order. If any of these arguments is passed, then the others
+      are not optional. If a tensor of shape `(n_rf_channels,)` is passed as
+      `rf_spend`, then it will be automatically allocated across geos and times
+      proportionally to `(reach * frequency)`.
     organic_media: An optional `DataArray` of dimensions `(n_geos,
       n_media_times, n_organic_media_channels)` containing non-negative organic
       media values. Organic media variables are media activities that have no
@@ -265,6 +306,40 @@ class InputData:
       if isinstance(array, xr.DataArray) and constants.GEO in array.dims:
         array.coords[constants.GEO] = array.coords[constants.GEO].astype(str)
+  # TODO: b/416775065 - Combine with Analyzer._impute_and_aggregate_spend
+  @functools.cached_property
+  def allocated_media_spend(self) -> xr.DataArray | None:
+    """Returns the allocated media spend for each geo and time."""
+    if self.media_spend is not None and len(self.media_spend.shape) == 1:
+      return self._allocate_spend(self.media_spend, self.media)
+    else:
+      return self.media_spend
+  @property
+  def allocated_rf_spend(self) -> xr.DataArray | None:
+    """Returns the allocated RF spend for each geo and time."""
+    if self.rf_spend is not None and len(self.rf_spend.shape) == 1:
+      return self._allocate_spend(self.rf_spend, self.reach * self.frequency)
+    else:
+      return self.rf_spend
+  def aggregate_media_spend(
+      self, calibration_period: np.ndarray | None = None
+  ) -> np.ndarray | None:
+    """Aggregates media spend by channel over the calibration period."""
+    return _aggregate_spend(
+        spend=self.allocated_media_spend, calibration_period=calibration_period
+    )
+  def aggregate_rf_spend(
+      self, calibration_period: np.ndarray | None = None
+  ) -> np.ndarray | None:
+    """Aggregates RF spend by channel over the calibration period."""
+    return _aggregate_spend(
+        spend=self.allocated_rf_spend,
+        calibration_period=calibration_period,
+    )
   @property
   def geo(self) -> xr.DataArray:
     """Returns the geo dimension."""
@@ -424,7 +499,8 @@ class InputData:
   def _validate_names(self):
     """Verifies that the names of the data arrays are correct."""
-    arrays = [
+    # Must match the order of constants.POSSIBLE_INPUT_DATA_ARRAY_NAMES!
+    arrays = (
         self.kpi,
         self.controls,
         self.population,
@@ -438,7 +514,7 @@ class InputData:
         self.reach,
         self.frequency,
         self.rf_spend,
-    ]
+    )
     for array, name in zip(arrays, constants.POSSIBLE_INPUT_DATA_ARRAY_NAMES):
       if array is not None and array.name != name:
@@ -479,7 +555,6 @@ class InputData:
         [
             [constants.RF_CHANNEL],
             [constants.GEO, constants.TIME, constants.RF_CHANNEL],
-            [constants.GEO, constants.RF_CHANNEL],
         ],
     )
     _check_dim_collection(
@@ -848,3 +923,24 @@ class InputData:
       return self.media_spend.values
     else:
       raise ValueError("Both RF and Media are missing.")
+  def get_total_outcome(self) -> np.ndarray:
+    """Returns total outcome, aggregated over geos and times."""
+    if self.revenue_per_kpi is None:
+      return np.sum(self.kpi.values)
+    return np.sum(self.kpi.values * self.revenue_per_kpi.values)
+  def _allocate_spend(self, spend: xr.DataArray, media_units: xr.DataArray):
+    """Allocates spend across geo and time proportionally to media units."""
+    n_times = len(self.kpi.coords[constants.TIME])
+    selected_media_units = media_units.isel(media_time=slice(-n_times, None))
+    total_media_units_per_channel = selected_media_units.sum(
+        dim=["geo", "media_time"]
+    )
+    proportions = selected_media_units / total_media_units_per_channel
+    expanded_spend = spend.expand_dims({
+        "geo": selected_media_units["geo"],
+        "media_time": selected_media_units["media_time"],
+    })
+    allocated_spend = expanded_spend * proportions
+    return allocated_spend.rename({"media_time": "time"})

meridian/data/test_utils.py CHANGED Viewed

@@ -65,12 +65,24 @@ _REQUIRED_COORDS = immutabledict.immutabledict({
     c.MEDIA_TIME: _sample_times(n_times=3),
     c.CONTROL_VARIABLE: ['control_0', 'control_1'],
 })
+_NON_MEDIA_COORDS = immutabledict.immutabledict(
+    {c.NON_MEDIA_CHANNEL: ['non_media_channel_0', 'non_media_channel_1']}
+)
 _MEDIA_COORDS = immutabledict.immutabledict(
     {c.MEDIA_CHANNEL: ['media_channel_0', 'media_channel_1', 'media_channel_2']}
 )
+_ORGANIC_MEDIA_COORDS = immutabledict.immutabledict({
+    c.ORGANIC_MEDIA_CHANNEL: [
+        'organic_media_channel_0',
+        'organic_media_channel_1',
+    ]
+})
 _RF_COORDS = immutabledict.immutabledict(
     {c.RF_CHANNEL: ['rf_channel_0', 'rf_channel_1']}
 )
+_ORGANIC_RF_COORDS = immutabledict.immutabledict(
+    {c.ORGANIC_RF_CHANNEL: ['organic_rf_channel_0', 'organic_rf_channel_1']}
+)
 _REQUIRED_DATA_VARS = immutabledict.immutabledict({
     c.KPI: (['geo', 'time'], [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
@@ -376,6 +388,70 @@ DATASET_WITHOUT_TIME_VARIATION_IN_REACH = xr.Dataset(
     },
 )
+DATASET_WITHOUT_TIME_VARIATION_IN_ORGANIC_MEDIA = xr.Dataset(
+    coords=_REQUIRED_COORDS
+    | _MEDIA_COORDS
+    | _RF_COORDS
+    | _ORGANIC_MEDIA_COORDS,
+    data_vars=_REQUIRED_DATA_VARS
+    | _MEDIA_DATA_VARS
+    | _RF_DATA_VARS
+    | _OPTIONAL_DATA_VARS
+    | {
+        c.ORGANIC_MEDIA: (
+            ['geo', 'media_time', 'organic_media_channel'],
+            [
+                [[2.1, 2.2], [2.1, 2.21], [2.1, 2.2]],
+                [[2.7, 2.8], [2.7, 2.8], [2.7, 2.8]],
+            ],
+        ),
+    },
+)
+DATASET_WITHOUT_TIME_VARIATION_IN_ORGANIC_REACH = xr.Dataset(
+    coords=_REQUIRED_COORDS
+    | _MEDIA_COORDS
+    | _RF_COORDS
+    | _ORGANIC_RF_COORDS,
+    data_vars=_REQUIRED_DATA_VARS
+    | _MEDIA_DATA_VARS
+    | _RF_DATA_VARS
+    | _OPTIONAL_DATA_VARS
+    | {
+        c.ORGANIC_REACH: (
+            ['geo', 'media_time', 'organic_rf_channel'],
+            [
+                [[2.1, 2.2], [2.11, 2.2], [2.1, 2.2]],
+                [[2.7, 2.8], [2.7, 2.8], [2.7, 2.8]],
+            ],
+        ),
+        c.ORGANIC_FREQUENCY: (
+            ['geo', 'media_time', 'organic_rf_channel'],
+            [
+                [[7.1, 7.2], [7.3, 7.4], [7.5, 7.6]],
+                [[7.11, 7.21], [7.31, 7.41], [7.51, 7.61]],
+            ],
+        ),
+    },
+)
+DATASET_WITHOUT_TIME_VARIATION_IN_NON_MEDIA_TREATMENTS = xr.Dataset(
+    coords=_REQUIRED_COORDS | _MEDIA_COORDS | _RF_COORDS | _NON_MEDIA_COORDS,
+    data_vars=_REQUIRED_DATA_VARS
+    | _MEDIA_DATA_VARS
+    | _RF_DATA_VARS
+    | _OPTIONAL_DATA_VARS
+    | {
+        c.NON_MEDIA_TREATMENTS: (
+            ['geo', 'time', 'non_media_channel'],
+            [
+                [[2.1, 2.2], [2.1, 2.2], [2.1, 2.2]],
+                [[2.7, 2.8], [2.7, 2.8], [2.7, 2.8]],
+            ],
+        ),
+    },
+)
 _NATIONAL_COORDS = immutabledict.immutabledict({
     c.TIME: [
         _SAMPLE_START_DATE.strftime(c.DATE_FORMAT),
@@ -1491,17 +1567,52 @@ def sample_input_data_from_dataset(
     dataset: xr.Dataset, kpi_type: str
 ) -> input_data.InputData:
   """Generates a sample `InputData` from a full xarray Dataset."""
+  media = dataset.media if c.MEDIA in dataset.data_vars.keys() else None
+  media_spend = (
+      dataset.media_spend if c.MEDIA_SPEND in dataset.data_vars.keys() else None
+  )
+  reach = dataset.reach if c.REACH in dataset.data_vars.keys() else None
+  frequency = (
+      dataset.frequency if c.FREQUENCY in dataset.data_vars.keys() else None
+  )
+  rf_spend = (
+      dataset.rf_spend if c.RF_SPEND in dataset.data_vars.keys() else None
+  )
+  organic_media = (
+      dataset.organic_media
+      if c.ORGANIC_MEDIA in dataset.data_vars.keys()
+      else None
+  )
+  organic_reach = (
+      dataset.organic_reach
+      if c.ORGANIC_REACH in dataset.data_vars.keys()
+      else None
+  )
+  organic_frequency = (
+      dataset.organic_frequency
+      if c.ORGANIC_FREQUENCY in dataset.data_vars.keys()
+      else None
+  )
+  non_media_treatments = (
+      dataset.non_media_treatments
+      if c.NON_MEDIA_TREATMENTS in dataset.data_vars.keys()
+      else None
+  )
   return input_data.InputData(
       kpi=dataset.kpi,
       kpi_type=kpi_type,
       revenue_per_kpi=dataset.revenue_per_kpi,
       population=dataset.population,
       controls=dataset.controls,
-      media=dataset.media,
-      media_spend=dataset.media_spend,
-      reach=dataset.reach,
-      frequency=dataset.frequency,
-      rf_spend=dataset.rf_spend,
+      media=media,
+      media_spend=media_spend,
+      reach=reach,
+      frequency=frequency,
+      rf_spend=rf_spend,
+      organic_media=organic_media,
+      organic_reach=organic_reach,
+      organic_frequency=organic_frequency,
+      non_media_treatments=non_media_treatments,
   )

meridian/data/time_coordinates.py CHANGED Viewed

@@ -36,7 +36,7 @@ __all__ = [
 # A type alias for a polymorphic "date" type.
-Date: TypeAlias = str | datetime.datetime | datetime.date | np.datetime64
+Date: TypeAlias = str | datetime.datetime | datetime.date | np.datetime64 | None
 # A type alias for a polymorphic "date interval" type. In all variants it is
 # always a tuple of (start_date, end_date).
@@ -236,8 +236,8 @@ class TimeCoordinates:
   def expand_selected_time_dims(
       self,
-      start_date: Date | None = None,
-      end_date: Date | None = None,
+      start_date: Date = None,
+      end_date: Date = None,
   ) -> list[datetime.date] | None:
     """Validates and returns time dimension values based on the selected times.

google-meridian 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

google-meridian 1.0.9py3-none-any.whl → 1.1.0py3-none-any.whl