PyPI - google-meridian - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

google-meridian 1.2.1py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

google_meridian-1.3.1.dist-info/METADATA +209 -0
google_meridian-1.3.1.dist-info/RECORD +76 -0
{google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/top_level.txt +1 -0
meridian/analysis/__init__.py +2 -0
meridian/analysis/analyzer.py +179 -105
meridian/analysis/formatter.py +2 -2
meridian/analysis/optimizer.py +227 -87
meridian/analysis/review/__init__.py +20 -0
meridian/analysis/review/checks.py +721 -0
meridian/analysis/review/configs.py +110 -0
meridian/analysis/review/constants.py +40 -0
meridian/analysis/review/results.py +544 -0
meridian/analysis/review/reviewer.py +186 -0
meridian/analysis/summarizer.py +21 -34
meridian/analysis/templates/chips.html.jinja +12 -0
meridian/analysis/test_utils.py +27 -5
meridian/analysis/visualizer.py +41 -57
meridian/backend/__init__.py +457 -118
meridian/backend/test_utils.py +162 -0
meridian/constants.py +39 -3
meridian/model/__init__.py +1 -0
meridian/model/eda/__init__.py +3 -0
meridian/model/eda/constants.py +21 -0
meridian/model/eda/eda_engine.py +1309 -196
meridian/model/eda/eda_outcome.py +200 -0
meridian/model/eda/eda_spec.py +84 -0
meridian/model/eda/meridian_eda.py +220 -0
meridian/model/knots.py +55 -49
meridian/model/media.py +10 -8
meridian/model/model.py +79 -16
meridian/model/model_test_data.py +53 -0
meridian/model/posterior_sampler.py +39 -32
meridian/model/prior_distribution.py +12 -2
meridian/model/prior_sampler.py +146 -90
meridian/model/spec.py +7 -8
meridian/model/transformers.py +11 -3
meridian/version.py +1 -1
schema/__init__.py +18 -0
schema/serde/__init__.py +26 -0
schema/serde/constants.py +48 -0
schema/serde/distribution.py +515 -0
schema/serde/eda_spec.py +192 -0
schema/serde/function_registry.py +143 -0
schema/serde/hyperparameters.py +363 -0
schema/serde/inference_data.py +105 -0
schema/serde/marketing_data.py +1321 -0
schema/serde/meridian_serde.py +413 -0
schema/serde/serde.py +47 -0
schema/serde/test_data.py +4608 -0
schema/utils/__init__.py +17 -0
schema/utils/time_record.py +156 -0
google_meridian-1.2.1.dist-info/METADATA +0 -409
google_meridian-1.2.1.dist-info/RECORD +0 -52
{google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/WHEEL +0 -0
{google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/licenses/LICENSE +0 -0

meridian/model/model.py CHANGED Viewed

@@ -14,6 +14,7 @@
 """Meridian module for the geo-level Bayesian hierarchical media mix model."""
+import collections
 from collections.abc import Mapping, Sequence
 import functools
 import numbers
@@ -34,19 +35,26 @@ from meridian.model import prior_distribution
 from meridian.model import prior_sampler
 from meridian.model import spec
 from meridian.model import transformers
+from meridian.model.eda import eda_engine
+from meridian.model.eda import eda_outcome
+from meridian.model.eda import eda_spec as eda_spec_module
 import numpy as np
 __all__ = [
     "MCMCSamplingError",
     "MCMCOOMError",
     "Meridian",
+    "ModelFittingError",
     "NotFittedModelError",
     "save_mmm",
     "load_mmm",
 ]
+class ModelFittingError(Exception):
+  """Model has critical issues preventing fitting."""
 class NotFittedModelError(Exception):
   """Model has not been fitted."""
@@ -91,6 +99,10 @@ class Meridian:
     model_spec: A `ModelSpec` object containing the model specification.
     inference_data: A _mutable_ `arviz.InferenceData` object containing the
       resulting data from fitting the model.
+    eda_engine: An `EDAEngine` object containing the EDA engine.
+    eda_spec: An `EDASpec` object containing the EDA specification.
+    eda_outcomes: A list of `EDAOutcome` objects containing the outcomes from
+      running critical EDA checks.
     n_geos: Number of geos in the data.
     n_media_channels: Number of media channels in the data.
     n_rf_channels: Number of reach and frequency (RF) channels in the data.
@@ -126,11 +138,17 @@ class Meridian:
       treatmenttensors using the model's non-media treatment data.
     kpi_transformer: A `KpiTransformer` to scale KPI tensors using the model's
       KPI data.
-    controls_scaled: The controls tensor normalized by population and by the
-      median value.
-    non_media_treatments_scaled: The non-media treatment tensor normalized by
-      population and by the median value.
-    kpi_scaled: The KPI tensor normalized by population and by the median value.
+    controls_scaled: The controls tensor after pre-modeling transformations
+      including population scaling (for variables with
+      `ModelSpec.control_population_scaling_id` set to `True`), centering by the
+      mean, and scaling by the standard deviation.
+    non_media_treatments_scaled: The non-media treatment tensor after
+      pre-modeling transformations including population scaling (for variables
+      with `ModelSpec.non_media_population_scaling_id` set to `True`), centering
+      by the mean, and scaling by the standard deviation.
+    kpi_scaled: The KPI tensor after pre-modeling transformations including
+      population scaling, centering by the mean, and scaling by the standard
+      deviation.
     media_effects_dist: A string to specify the distribution of media random
       effects across geos.
     unique_sigma_for_each_geo: A boolean indicating whether to use a unique
@@ -148,6 +166,7 @@ class Meridian:
       inference_data: (
           az.InferenceData | None
       ) = None,  # for deserializer use only
+      eda_spec: eda_spec_module.EDASpec = eda_spec_module.EDASpec(),
   ):
     self._input_data = input_data
     self._model_spec = model_spec if model_spec else spec.ModelSpec()
@@ -155,6 +174,8 @@ class Meridian:
         inference_data if inference_data else az.InferenceData()
     )
+    self._eda_spec = eda_spec
     self._validate_data_dependent_model_spec()
     self._validate_injected_inference_data()
@@ -184,6 +205,18 @@ class Meridian:
   def inference_data(self) -> az.InferenceData:
     return self._inference_data
+  @functools.cached_property
+  def eda_engine(self) -> eda_engine.EDAEngine:
+    return eda_engine.EDAEngine(self, spec=self._eda_spec)
+  @property
+  def eda_spec(self) -> eda_spec_module.EDASpec:
+    return self._eda_spec
+  @property
+  def eda_outcomes(self) -> Sequence[eda_outcome.EDAOutcome]:
+    return self.eda_engine.run_all_critical_checks()
   @functools.cached_property
   def media_tensors(self) -> media.MediaTensors:
     return media.build_media_tensors(self.input_data, self.model_spec)
@@ -444,7 +477,8 @@ class Meridian:
           f" {tuple(self.model_spec.adstock_decay_spec.keys())}. Keys should"
           " either contain only channel_names"
           f" {tuple(self.input_data.get_all_adstock_hill_channels().tolist())} or"
-          " be one or more of {'media', 'rf', 'organic_media', 'organic_rf'}."
+          " be one or more of {'media', 'rf', 'organic_media',"
+          " 'organic_rf'}."
       ) from e
   @functools.cached_property
@@ -561,7 +595,9 @@ class Meridian:
             non_media_treatments_population_scaled[..., channel], axis=[0, 1]
         )
       elif isinstance(baseline_value, numbers.Number):
-        baseline_for_channel = backend.cast(baseline_value, backend.float32)
+        baseline_for_channel = backend.to_tensor(
+            baseline_value, dtype=backend.float32
+        )
       else:
         raise ValueError(
             f"Invalid non_media_baseline_values value: '{baseline_value}'. Only"
@@ -1135,16 +1171,11 @@ class Meridian:
             " time."
         )
-  def _kpi_has_variability(self):
-    """Returns True if the KPI has variability across geos and times."""
-    return self.kpi_transformer.population_scaled_stdev != 0
   def _validate_kpi_transformer(self):
     """Validates the KPI transformer."""
-    if self._kpi_has_variability():
+    if self.eda_engine.kpi_has_variability:
       return
-    kpi = "kpi" if self.is_national else "population_scaled_kpi"
+    kpi = self.eda_engine.kpi_scaled_da.name
     if (
         self.n_media_channels > 0
@@ -1569,6 +1600,36 @@ class Meridian:
     """
     self.prior_sampler_callable(n_draws=n_draws, seed=seed)
+  def _run_model_fitting_guardrail(self):
+    """Raises an error if the model has critical EDA issues."""
+    error_findings_by_type: dict[eda_outcome.EDACheckType, list[str]] = (
+        collections.defaultdict(list)
+    )
+    for outcome in self.eda_outcomes:
+      error_findings = [
+          finding
+          for finding in outcome.findings
+          if finding.severity == eda_outcome.EDASeverity.ERROR
+      ]
+      if error_findings:
+        error_findings_by_type[outcome.check_type].extend(
+            [finding.explanation for finding in error_findings]
+        )
+    if error_findings_by_type:
+      error_message_lines = [
+          "Model has critical EDA issues. Please fix before running"
+          " `sample_posterior`.\n"
+      ]
+      for check_type, explanations in error_findings_by_type.items():
+        error_message_lines.append(f"Check type: {check_type.name}")
+        for explanation in explanations:
+          error_message_lines.append(f"- {explanation}")
+      error_message_lines.append(
+          "For further details, please refer to `Meridian.eda_outcomes`."
+      )
+      raise ModelFittingError("\n".join(error_message_lines))
   def sample_posterior(
       self,
       n_chains: Sequence[int] | int,
@@ -1644,8 +1705,10 @@ class Meridian:
         a list of integers as `n_chains` to sample chains serially. For more
         information, see
         [ResourceExhaustedError when running Meridian.sample_posterior]
-        (https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error).
+        (https://developers.google.com/meridian/docs/post-modeling/model-debugging#gpu-oom-error).
     """
+    self._run_model_fitting_guardrail()
     self.posterior_sampler_callable(
         n_chains=n_chains,
         n_adapt=n_adapt,

meridian/model/model_test_data.py CHANGED Viewed

@@ -143,6 +143,7 @@ class WithInputDataSamples:
   _short_input_data_with_rf_only: input_data.InputData
   _short_input_data_with_media_and_rf: input_data.InputData
   _national_input_data_media_only: input_data.InputData
+  _national_input_data_rf_only: input_data.InputData
   _national_input_data_media_and_rf: input_data.InputData
   _test_dist_media_and_rf: collections.OrderedDict[str, backend.Tensor]
   _test_dist_media_only: collections.OrderedDict[str, backend.Tensor]
@@ -156,6 +157,8 @@ class WithInputDataSamples:
   _short_input_data_non_media_and_organic: input_data.InputData
   _short_input_data_non_media: input_data.InputData
   _input_data_non_media_and_organic_same_time_dims: input_data.InputData
+  _input_data_organic_only: input_data.InputData
+  _national_input_data_organic_only: input_data.InputData
   # The following NamedTuples and their attributes are immutable, so they can
   # be accessed directly.
@@ -280,6 +283,16 @@ class WithInputDataSamples:
             seed=0,
         )
     )
+    cls._national_input_data_rf_only = (
+        test_utils.sample_input_data_non_revenue_revenue_per_kpi(
+            n_geos=cls._N_GEOS_NATIONAL,
+            n_times=cls._N_TIMES,
+            n_media_times=cls._N_MEDIA_TIMES,
+            n_controls=cls._N_CONTROLS,
+            n_rf_channels=cls._N_RF_CHANNELS,
+            seed=0,
+        )
+    )
     cls._national_input_data_media_only = (
         test_utils.sample_input_data_non_revenue_revenue_per_kpi(
             n_geos=cls._N_GEOS_NATIONAL,
@@ -496,6 +509,34 @@ class WithInputDataSamples:
             seed=0,
         )
     )
+    cls._input_data_organic_only = (
+        test_utils.sample_input_data_non_revenue_revenue_per_kpi(
+            n_geos=cls._N_GEOS,
+            n_times=cls._N_TIMES,
+            n_media_times=cls._N_MEDIA_TIMES,
+            n_controls=cls._N_CONTROLS,
+            n_non_media_channels=0,
+            n_media_channels=cls._N_MEDIA_CHANNELS,
+            n_rf_channels=0,
+            n_organic_media_channels=cls._N_ORGANIC_MEDIA_CHANNELS,
+            n_organic_rf_channels=cls._N_ORGANIC_RF_CHANNELS,
+            seed=0,
+        )
+    )
+    cls._national_input_data_organic_only = (
+        test_utils.sample_input_data_non_revenue_revenue_per_kpi(
+            n_geos=cls._N_GEOS_NATIONAL,
+            n_times=cls._N_TIMES,
+            n_media_times=cls._N_MEDIA_TIMES,
+            n_controls=cls._N_CONTROLS,
+            n_non_media_channels=0,
+            n_media_channels=cls._N_MEDIA_CHANNELS,
+            n_rf_channels=0,
+            n_organic_media_channels=cls._N_ORGANIC_MEDIA_CHANNELS,
+            n_organic_rf_channels=cls._N_ORGANIC_RF_CHANNELS,
+            seed=0,
+        )
+    )
   @property
   def input_data_non_revenue_no_revenue_per_kpi(self) -> input_data.InputData:
@@ -551,6 +592,10 @@ class WithInputDataSamples:
   def national_input_data_media_only(self) -> input_data.InputData:
     return self._national_input_data_media_only.copy(deep=True)
+  @property
+  def national_input_data_rf_only(self) -> input_data.InputData:
+    return self._national_input_data_rf_only.copy(deep=True)
   @property
   def national_input_data_media_and_rf(self) -> input_data.InputData:
     return self._national_input_data_media_and_rf.copy(deep=True)
@@ -606,3 +651,11 @@ class WithInputDataSamples:
       self,
   ) -> input_data.InputData:
     return self._input_data_non_media_and_organic_same_time_dims.copy(deep=True)
+  @property
+  def input_data_organic_only(self) -> input_data.InputData:
+    return self._input_data_organic_only.copy(deep=True)
+  @property
+  def national_input_data_organic_only(self) -> input_data.InputData:
+    return self._national_input_data_organic_only.copy(deep=True)

meridian/model/posterior_sampler.py CHANGED Viewed

@@ -72,12 +72,6 @@ def _get_tau_g(
   return backend.tfd.Deterministic(tau_g, name="tau_g")
-@backend.function(autograph=False, jit_compile=True)
-def _xla_windowed_adaptive_nuts(**kwargs):
-  """XLA wrapper for windowed_adaptive_nuts."""
-  return backend.experimental.mcmc.windowed_adaptive_nuts(**kwargs)
 def _joint_dist_unpinned(mmm: "model.Meridian"):
   """Returns unpinned joint distribution."""
@@ -447,26 +441,44 @@ class PosteriorMCMCSampler:
   def __init__(self, meridian: "model.Meridian"):
     self._meridian = meridian
+    self._joint_dist = None
+  def __getstate__(self):
+    state = self.__dict__.copy()
+    # Exclude unpickleable objects.
+    if "_joint_dist" in state:
+      del state["_joint_dist"]
+    return state
+  def __setstate__(self, state):
+    self.__dict__.update(state)
+    self._joint_dist = None
   @property
   def model(self) -> "model.Meridian":
     return self._meridian
+  def _joint_dist_unpinned_fn(self):
+    return _joint_dist_unpinned(self.model)
   def _get_joint_dist_unpinned(self) -> backend.tfd.Distribution:
-    """Returns a `JointDistributionCoroutineAutoBatched` function for MCMC."""
+    """Builds a `JointDistributionCoroutineAutoBatched` function for MCMC."""
     mmm = self.model
     mmm.populate_cached_properties()
-    fn = lambda: _joint_dist_unpinned(mmm)
-    return backend.tfd.JointDistributionCoroutineAutoBatched(fn)
+    return backend.tfd.JointDistributionCoroutineAutoBatched(
+        self._joint_dist_unpinned_fn
+    )
   def _get_joint_dist(self) -> backend.tfd.Distribution:
-    mmm = self.model
-    y = (
-        backend.where(mmm.holdout_id, 0.0, mmm.kpi_scaled)
-        if mmm.holdout_id is not None
-        else mmm.kpi_scaled
-    )
-    return self._get_joint_dist_unpinned().experimental_pin(y=y)
+    if self._joint_dist is None:
+      mmm = self.model
+      y = (
+          backend.where(mmm.holdout_id, 0.0, mmm.kpi_scaled)
+          if mmm.holdout_id is not None
+          else mmm.kpi_scaled
+      )
+      self._joint_dist = self._get_joint_dist_unpinned().experimental_pin(y=y)
+    return self._joint_dist
   def __call__(
       self,
@@ -541,26 +553,22 @@ class PosteriorMCMCSampler:
         a list of integers as `n_chains` to sample chains serially. For more
         information, see
         [ResourceExhaustedError when running Meridian.sample_posterior]
-        (https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error).
+        (https://developers.google.com/meridian/docs/post-modeling/model-debugging#gpu-oom-error).
     """
-    if seed is not None and isinstance(seed, Sequence) and len(seed) != 2:
-      raise ValueError(
-          "Invalid seed: Must be either a single integer (stateful seed) or a"
-          " pair of two integers (stateless seed). See"
-          " [tfp.random.sanitize_seed](https://www.tensorflow.org/probability/api_docs/python/tfp/random/sanitize_seed)"
-          " for details."
-      )
-    if seed is not None and isinstance(seed, int):
-      seed = (seed, seed)
-    seed = backend.random.sanitize_seed(seed) if seed is not None else None
+    rng_handler = backend.RNGHandler(seed)
     n_chains_list = [n_chains] if isinstance(n_chains, int) else n_chains
     total_chains = np.sum(n_chains_list)
+    # Clear joint distribution cache prior to sampling.
+    self._joint_dist = None
     states = []
     traces = []
     for n_chains_batch in n_chains_list:
+      kernel_seed = rng_handler.get_kernel_seed()
       try:
-        mcmc = _xla_windowed_adaptive_nuts(
+        mcmc = backend.xla_windowed_adaptive_nuts(
             n_draws=n_burnin + n_keep,
             joint_dist=self._get_joint_dist(),
             n_chains=n_chains_batch,
@@ -572,17 +580,16 @@ class PosteriorMCMCSampler:
             max_energy_diff=max_energy_diff,
             unrolled_leapfrog_steps=unrolled_leapfrog_steps,
             parallel_iterations=parallel_iterations,
-            seed=seed,
+            seed=kernel_seed,
             **pins,
         )
       except backend.errors.ResourceExhaustedError as error:
         raise MCMCOOMError(
             "ERROR: Out of memory. Try reducing `n_keep` or pass a list of"
             " integers as `n_chains` to sample chains serially (see"
-            " https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error)"
+            " https://developers.google.com/meridian/docs/post-modeling/model-debugging#gpu-oom-error)"
         ) from error
-      if seed is not None:
-        seed += 1
+      rng_handler = rng_handler.advance_handler()
       states.append(mcmc.all_states._asdict())
       traces.append(mcmc.trace)

meridian/model/prior_distribution.py CHANGED Viewed

@@ -35,6 +35,7 @@ __all__ = [
     'PriorDistribution',
     'distributions_are_equal',
     'lognormal_dist_from_mean_std',
+    'lognormal_dist_from_range',
 ]
@@ -1195,7 +1196,7 @@ def lognormal_dist_from_range(
   """Define a LogNormal distribution from a specified range.
   This function parameterizes lognormal distributions by the bounds of a range,
-  so that the specificed probability mass falls within the bounds defined by
+  so that the specified probability mass falls within the bounds defined by
   `low` and `high`. The probability mass is symmetric about the median. For
   example, to define a lognormal distribution with a 95% probability mass of
   (1, 10), use:
@@ -1210,7 +1211,7 @@ def lognormal_dist_from_range(
     high: Float or array-like denoting the upper bound of range. Values must be
       non-negative.
     mass_percent: Float or array-like denoting the probability mass. Values must
-      be between 0 and 1 (exlusive). Default: 0.95.
+      be between 0 and 1 (exclusive). Default: 0.95.
   Returns:
     A `backend.tfd.LogNormal` object with the input percentage mass falling
@@ -1341,6 +1342,15 @@ def _validate_support(
             f'{parameter_name} was assigned a point mass (deterministic) prior'
             f' at {bounds[i]}, which is not allowed.'
         )
+  elif isinstance(tfp_dist, backend.tfd.TruncatedNormal):
+    # TruncatedNormal quantile method is not reliable, particularly when the
+    # `low` or `high` value falls into extreme percentile of the untruncated
+    # distribution. Note that
+    # `TruncatedNormal.experimental_default_event_space_bijector()([-inf, inf])`
+    # returns the correct support range, so this method could be used if the
+    # `quantile` method is found to be unreliable for other distributions.
+    support_min_vals = tfp_dist.low
+    support_max_vals = tfp_dist.high
   else:
     try:
       support_min_vals = tfp_dist.quantile(0)

google-meridian 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

google-meridian 1.2.1py3-none-any.whl → 1.3.1py3-none-any.whl