PyPI - google-meridian - Versions diffs - 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

google-meridian 1.2.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/METADATA +10 -10
google_meridian-1.3.0.dist-info/RECORD +62 -0
meridian/analysis/__init__.py +2 -0
meridian/analysis/analyzer.py +280 -142
meridian/analysis/formatter.py +2 -2
meridian/analysis/optimizer.py +353 -169
meridian/analysis/review/__init__.py +20 -0
meridian/analysis/review/checks.py +721 -0
meridian/analysis/review/configs.py +110 -0
meridian/analysis/review/constants.py +40 -0
meridian/analysis/review/results.py +544 -0
meridian/analysis/review/reviewer.py +186 -0
meridian/analysis/summarizer.py +14 -12
meridian/analysis/templates/chips.html.jinja +12 -0
meridian/analysis/test_utils.py +27 -5
meridian/analysis/visualizer.py +45 -50
meridian/backend/__init__.py +698 -55
meridian/backend/config.py +75 -16
meridian/backend/test_utils.py +127 -1
meridian/constants.py +52 -11
meridian/data/input_data.py +7 -2
meridian/data/test_utils.py +5 -3
meridian/mlflow/autolog.py +2 -2
meridian/model/__init__.py +1 -0
meridian/model/adstock_hill.py +10 -9
meridian/model/eda/__init__.py +3 -0
meridian/model/eda/constants.py +21 -0
meridian/model/eda/eda_engine.py +1580 -84
meridian/model/eda/eda_outcome.py +200 -0
meridian/model/eda/eda_spec.py +84 -0
meridian/model/eda/meridian_eda.py +220 -0
meridian/model/knots.py +56 -50
meridian/model/media.py +10 -8
meridian/model/model.py +79 -16
meridian/model/model_test_data.py +53 -9
meridian/model/posterior_sampler.py +398 -391
meridian/model/prior_distribution.py +114 -39
meridian/model/prior_sampler.py +146 -90
meridian/model/spec.py +7 -8
meridian/model/transformers.py +16 -8
meridian/version.py +1 -1
google_meridian-1.2.0.dist-info/RECORD +0 -52
{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/WHEEL +0 -0
{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/top_level.txt +0 -0

meridian/model/media.py CHANGED Viewed

@@ -63,8 +63,8 @@ class MediaTensors:
     media_spend: A tensor constructed from `InputData.media_spend`.
     media_transformer: A `MediaTransformer` to scale media tensors using the
       model's media data.
-    media_scaled: The media tensor normalized by population and by the median
-      value.
+    media_scaled: The media tensor after pre-modeling transformations including
+      population scaling and scaling by the median non-zero value.
     prior_media_scaled_counterfactual: A tensor containing `media_scaled` values
       corresponding to the counterfactual scenario required for the prior
       calculation. For ROI priors, the counterfactual scenario is where media is
@@ -169,8 +169,9 @@ class OrganicMediaTensors:
     organic_media: A tensor constructed from `InputData.organic_media`.
     organic_media_transformer: A `MediaTransformer` to scale media tensors using
       the model's organic media data.
-    organic_media_scaled: The organic media tensor normalized by population and
-      by the median value.
+    organic_media_scaled: The organic media tensor after pre-modeling
+      transformations including population scaling and scaling by the media
+      non-zero value.
   """
   organic_media: backend.Tensor | None = None
@@ -214,8 +215,8 @@ class RfTensors:
     rf_spend: A tensor constructed from `InputData.rf_spend`.
     reach_transformer: A `MediaTransformer` to scale RF tensors using the
       model's RF data.
-    reach_scaled: A reach tensor normalized by population and by the median
-      value.
+    reach_scaled: A reach tensor after pre-modeling transformations including
+      population scaling and scaling by the median non-zero value.
     prior_reach_scaled_counterfactual: A tensor containing `reach_scaled` values
       corresponding to the counterfactual scenario required for the prior
       calculation. For ROI priors, the counterfactual scenario is where reach is
@@ -324,8 +325,9 @@ class OrganicRfTensors:
     organic_frequency: A tensor constructed from `InputData.organic_frequency`.
     organic_reach_transformer: A `MediaTransformer` to scale organic RF tensors
       using the model's organic RF data.
-    organic_reach_scaled: An organic reach tensor normalized by population and
-      by the median value.
+    organic_reach_scaled: An organic reach tensor after pre-modeling
+      transformations including population scaling and scaling by the median
+      non-zero value.
   """
   organic_reach: backend.Tensor | None = None

meridian/model/model.py CHANGED Viewed

@@ -14,6 +14,7 @@
 """Meridian module for the geo-level Bayesian hierarchical media mix model."""
+import collections
 from collections.abc import Mapping, Sequence
 import functools
 import numbers
@@ -34,19 +35,26 @@ from meridian.model import prior_distribution
 from meridian.model import prior_sampler
 from meridian.model import spec
 from meridian.model import transformers
+from meridian.model.eda import eda_engine
+from meridian.model.eda import eda_outcome
+from meridian.model.eda import eda_spec as eda_spec_module
 import numpy as np
 __all__ = [
     "MCMCSamplingError",
     "MCMCOOMError",
     "Meridian",
+    "ModelFittingError",
     "NotFittedModelError",
     "save_mmm",
     "load_mmm",
 ]
+class ModelFittingError(Exception):
+  """Model has critical issues preventing fitting."""
 class NotFittedModelError(Exception):
   """Model has not been fitted."""
@@ -91,6 +99,10 @@ class Meridian:
     model_spec: A `ModelSpec` object containing the model specification.
     inference_data: A _mutable_ `arviz.InferenceData` object containing the
       resulting data from fitting the model.
+    eda_engine: An `EDAEngine` object containing the EDA engine.
+    eda_spec: An `EDASpec` object containing the EDA specification.
+    eda_outcomes: A list of `EDAOutcome` objects containing the outcomes from
+      running critical EDA checks.
     n_geos: Number of geos in the data.
     n_media_channels: Number of media channels in the data.
     n_rf_channels: Number of reach and frequency (RF) channels in the data.
@@ -126,11 +138,17 @@ class Meridian:
       treatmenttensors using the model's non-media treatment data.
     kpi_transformer: A `KpiTransformer` to scale KPI tensors using the model's
       KPI data.
-    controls_scaled: The controls tensor normalized by population and by the
-      median value.
-    non_media_treatments_scaled: The non-media treatment tensor normalized by
-      population and by the median value.
-    kpi_scaled: The KPI tensor normalized by population and by the median value.
+    controls_scaled: The controls tensor after pre-modeling transformations
+      including population scaling (for variables with
+      `ModelSpec.control_population_scaling_id` set to `True`), centering by the
+      mean, and scaling by the standard deviation.
+    non_media_treatments_scaled: The non-media treatment tensor after
+      pre-modeling transformations including population scaling (for variables
+      with `ModelSpec.non_media_population_scaling_id` set to `True`), centering
+      by the mean, and scaling by the standard deviation.
+    kpi_scaled: The KPI tensor after pre-modeling transformations including
+      population scaling, centering by the mean, and scaling by the standard
+      deviation.
     media_effects_dist: A string to specify the distribution of media random
       effects across geos.
     unique_sigma_for_each_geo: A boolean indicating whether to use a unique
@@ -148,6 +166,7 @@ class Meridian:
       inference_data: (
           az.InferenceData | None
       ) = None,  # for deserializer use only
+      eda_spec: eda_spec_module.EDASpec = eda_spec_module.EDASpec(),
   ):
     self._input_data = input_data
     self._model_spec = model_spec if model_spec else spec.ModelSpec()
@@ -155,6 +174,8 @@ class Meridian:
         inference_data if inference_data else az.InferenceData()
     )
+    self._eda_spec = eda_spec
     self._validate_data_dependent_model_spec()
     self._validate_injected_inference_data()
@@ -184,6 +205,18 @@ class Meridian:
   def inference_data(self) -> az.InferenceData:
     return self._inference_data
+  @functools.cached_property
+  def eda_engine(self) -> eda_engine.EDAEngine:
+    return eda_engine.EDAEngine(self, spec=self._eda_spec)
+  @property
+  def eda_spec(self) -> eda_spec_module.EDASpec:
+    return self._eda_spec
+  @property
+  def eda_outcomes(self) -> Sequence[eda_outcome.EDAOutcome]:
+    return self.eda_engine.run_all_critical_checks()
   @functools.cached_property
   def media_tensors(self) -> media.MediaTensors:
     return media.build_media_tensors(self.input_data, self.model_spec)
@@ -444,7 +477,8 @@ class Meridian:
           f" {tuple(self.model_spec.adstock_decay_spec.keys())}. Keys should"
           " either contain only channel_names"
           f" {tuple(self.input_data.get_all_adstock_hill_channels().tolist())} or"
-          " be one or more of {'media', 'rf', 'organic_media', 'organic_rf'}."
+          " be one or more of {'media', 'rf', 'organic_media',"
+          " 'organic_rf'}."
       ) from e
   @functools.cached_property
@@ -561,7 +595,9 @@ class Meridian:
             non_media_treatments_population_scaled[..., channel], axis=[0, 1]
         )
       elif isinstance(baseline_value, numbers.Number):
-        baseline_for_channel = backend.cast(baseline_value, backend.float32)
+        baseline_for_channel = backend.to_tensor(
+            baseline_value, dtype=backend.float32
+        )
       else:
         raise ValueError(
             f"Invalid non_media_baseline_values value: '{baseline_value}'. Only"
@@ -1135,16 +1171,11 @@ class Meridian:
             " time."
         )
-  def _kpi_has_variability(self):
-    """Returns True if the KPI has variability across geos and times."""
-    return self.kpi_transformer.population_scaled_stdev != 0
   def _validate_kpi_transformer(self):
     """Validates the KPI transformer."""
-    if self._kpi_has_variability():
+    if self.eda_engine.kpi_has_variability:
       return
-    kpi = "kpi" if self.is_national else "population_scaled_kpi"
+    kpi = self.eda_engine.kpi_scaled_da.name
     if (
         self.n_media_channels > 0
@@ -1569,6 +1600,36 @@ class Meridian:
     """
     self.prior_sampler_callable(n_draws=n_draws, seed=seed)
+  def _run_model_fitting_guardrail(self):
+    """Raises an error if the model has critical EDA issues."""
+    error_findings_by_type: dict[eda_outcome.EDACheckType, list[str]] = (
+        collections.defaultdict(list)
+    )
+    for outcome in self.eda_outcomes:
+      error_findings = [
+          finding
+          for finding in outcome.findings
+          if finding.severity == eda_outcome.EDASeverity.ERROR
+      ]
+      if error_findings:
+        error_findings_by_type[outcome.check_type].extend(
+            [finding.explanation for finding in error_findings]
+        )
+    if error_findings_by_type:
+      error_message_lines = [
+          "Model has critical EDA issues. Please fix before running"
+          " `sample_posterior`.\n"
+      ]
+      for check_type, explanations in error_findings_by_type.items():
+        error_message_lines.append(f"Check type: {check_type.name}")
+        for explanation in explanations:
+          error_message_lines.append(f"- {explanation}")
+      error_message_lines.append(
+          "For further details, please refer to `Meridian.eda_outcomes`."
+      )
+      raise ModelFittingError("\n".join(error_message_lines))
   def sample_posterior(
       self,
       n_chains: Sequence[int] | int,
@@ -1644,8 +1705,10 @@ class Meridian:
         a list of integers as `n_chains` to sample chains serially. For more
         information, see
         [ResourceExhaustedError when running Meridian.sample_posterior]
-        (https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error).
+        (https://developers.google.com/meridian/docs/post-modeling/model-debugging#gpu-oom-error).
     """
+    self._run_model_fitting_guardrail()
     self.posterior_sampler_callable(
         n_chains=n_chains,
         n_adapt=n_adapt,

meridian/model/model_test_data.py CHANGED Viewed

@@ -52,7 +52,9 @@ def _convert_with_swap(array: xr.DataArray, n_burnin: int) -> backend.Tensor:
   else:
     pad_value = 0.0 if array.dtype.kind == "f" else 0
-  burnin = backend.fill([n_burnin] + transposed_tensor.shape[1:], pad_value)
+  burnin = backend.fill(
+      [n_burnin] + list(transposed_tensor.shape[1:]), pad_value
+  )
   return backend.concatenate(
       [burnin, transposed_tensor],
       axis=0,
@@ -122,18 +124,13 @@ class WithInputDataSamples:
   _N_MEDIA_CHANNELS = 3
   _N_RF_CHANNELS = 2
   _N_CONTROLS = 2
-  _ROI_CALIBRATION_PERIOD = backend.cast(
-      backend.ones((_N_MEDIA_TIMES_SHORT, _N_MEDIA_CHANNELS)),
-      dtype=backend.bool_,
-  )
-  _RF_ROI_CALIBRATION_PERIOD = backend.cast(
-      backend.ones((_N_MEDIA_TIMES_SHORT, _N_RF_CHANNELS)),
-      dtype=backend.bool_,
-  )
   _N_ORGANIC_MEDIA_CHANNELS = 4
   _N_ORGANIC_RF_CHANNELS = 1
   _N_NON_MEDIA_CHANNELS = 2
+  _ROI_CALIBRATION_PERIOD: backend.Tensor
+  _RF_ROI_CALIBRATION_PERIOD: backend.Tensor
   # Private class variables to hold the base test data.
   _input_data_non_revenue_no_revenue_per_kpi: input_data.InputData
   _input_data_media_and_rf_non_revenue_no_revenue_per_kpi: input_data.InputData
@@ -159,6 +156,8 @@ class WithInputDataSamples:
   _short_input_data_non_media_and_organic: input_data.InputData
   _short_input_data_non_media: input_data.InputData
   _input_data_non_media_and_organic_same_time_dims: input_data.InputData
+  _input_data_organic_only: input_data.InputData
+  _national_input_data_organic_only: input_data.InputData
   # The following NamedTuples and their attributes are immutable, so they can
   # be accessed directly.
@@ -170,6 +169,15 @@ class WithInputDataSamples:
   @classmethod
   def setup(cls):
     """Sets up input data samples."""
+    cls._ROI_CALIBRATION_PERIOD = backend.cast(
+        backend.ones((cls._N_MEDIA_TIMES_SHORT, cls._N_MEDIA_CHANNELS)),
+        dtype=backend.bool_,
+    )
+    cls._RF_ROI_CALIBRATION_PERIOD = backend.cast(
+        backend.ones((cls._N_MEDIA_TIMES_SHORT, cls._N_RF_CHANNELS)),
+        dtype=backend.bool_,
+    )
     cls._input_data_non_revenue_no_revenue_per_kpi = (
         test_utils.sample_input_data_non_revenue_no_revenue_per_kpi(
             n_geos=cls._N_GEOS,
@@ -490,6 +498,34 @@ class WithInputDataSamples:
             seed=0,
         )
     )
+    cls._input_data_organic_only = (
+        test_utils.sample_input_data_non_revenue_revenue_per_kpi(
+            n_geos=cls._N_GEOS,
+            n_times=cls._N_TIMES,
+            n_media_times=cls._N_MEDIA_TIMES,
+            n_controls=cls._N_CONTROLS,
+            n_non_media_channels=0,
+            n_media_channels=cls._N_MEDIA_CHANNELS,
+            n_rf_channels=0,
+            n_organic_media_channels=cls._N_ORGANIC_MEDIA_CHANNELS,
+            n_organic_rf_channels=cls._N_ORGANIC_RF_CHANNELS,
+            seed=0,
+        )
+    )
+    cls._national_input_data_organic_only = (
+        test_utils.sample_input_data_non_revenue_revenue_per_kpi(
+            n_geos=cls._N_GEOS_NATIONAL,
+            n_times=cls._N_TIMES,
+            n_media_times=cls._N_MEDIA_TIMES,
+            n_controls=cls._N_CONTROLS,
+            n_non_media_channels=0,
+            n_media_channels=cls._N_MEDIA_CHANNELS,
+            n_rf_channels=0,
+            n_organic_media_channels=cls._N_ORGANIC_MEDIA_CHANNELS,
+            n_organic_rf_channels=cls._N_ORGANIC_RF_CHANNELS,
+            seed=0,
+        )
+    )
   @property
   def input_data_non_revenue_no_revenue_per_kpi(self) -> input_data.InputData:
@@ -600,3 +636,11 @@ class WithInputDataSamples:
       self,
   ) -> input_data.InputData:
     return self._input_data_non_media_and_organic_same_time_dims.copy(deep=True)
+  @property
+  def input_data_organic_only(self) -> input_data.InputData:
+    return self._input_data_organic_only.copy(deep=True)
+  @property
+  def national_input_data_organic_only(self) -> input_data.InputData:
+    return self._national_input_data_organic_only.copy(deep=True)

google-meridian 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

google-meridian 1.2.0py3-none-any.whl → 1.3.0py3-none-any.whl