PyPI - google-meridian - Versions diffs - 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

google-meridian 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/METADATA +6 -2
{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/RECORD +23 -17
meridian/__init__.py +6 -4
meridian/analysis/analyzer.py +61 -19
meridian/analysis/optimizer.py +75 -44
meridian/analysis/visualizer.py +15 -5
meridian/constants.py +1 -0
meridian/data/__init__.py +3 -0
meridian/data/data_frame_input_data_builder.py +614 -0
meridian/data/input_data_builder.py +823 -0
meridian/data/load.py +138 -402
meridian/data/nd_array_input_data_builder.py +509 -0
meridian/mlflow/__init__.py +17 -0
meridian/mlflow/autolog.py +206 -0
meridian/model/media.py +7 -0
meridian/model/model.py +32 -26
meridian/model/posterior_sampler.py +13 -9
meridian/model/prior_sampler.py +4 -6
meridian/model/spec.py +17 -7
meridian/version.py +17 -0
{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/WHEEL +0 -0
{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/top_level.txt +0 -0

meridian/mlflow/autolog.py ADDED Viewed

@@ -0,0 +1,206 @@
+# Copyright 2025 The Meridian Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MLflow autologging integration for Meridian.
+This module enables MLflow tracking for Meridian. When enabled via `autolog()`,
+parameters, metrics, and other metadata will be automatically logged to MLflow,
+allowing for improved experiment tracking and analysis.
+To enable MLflow autologging for your Meridian workflows, simply call
+`autolog.autolog()` once before your model run.
+Example usage:
+```python
+import mlflow
+from meridian.data import load
+from meridian.mlflow import autolog
+from meridian.model import model
+# Enable autologging (call this once per session)
+autolog.autolog(log_metrics=True)
+# Start an MLflow run (optionally name it for better grouping)
+with mlflow.start_run(run_name="my_run"):
+  # Load data
+  data = load.CsvDataLoader(...).load()
+  # Initialize Meridian model
+  mmm = model.Meridian(input_data=data)
+  # Run Meridian sampling processes
+  mmm.sample_prior(n_draws=100, seed=123)
+  mmm.sample_posterior(n_chains=7, n_adapt=500, n_burnin=500, n_keep=1000,
+  seed=1)
+# After the run completes, you can retrieve run results using the MLflow client.
+client = mlflow.tracking.MlflowClient()
+# Get the experiment ID for the run you just launched
+experiment_id = "0"
+# Search for runs matching the run name
+runs = client.search_runs(
+    experiment_id,
+    max_results=1000,
+    filter_string=f"attributes.run_name = 'my_run'"
+)
+# Print details of the run
+if runs:
+  print(runs[0])
+else:
+  print("No runs found.")
+```
+"""
+import dataclasses
+import inspect
+import json
+from typing import Any, Callable
+import arviz as az
+from meridian.analysis import visualizer
+import mlflow
+from mlflow.utils.autologging_utils import autologging_integration, safe_patch
+from meridian.model import model
+from meridian.model import posterior_sampler
+from meridian.model import prior_sampler
+from meridian.model import spec
+from meridian.version import __version__
+import numpy as np
+import tensorflow_probability as tfp
+FLAVOR_NAME = "meridian"
+__all__ = ["autolog"]
+def _log_versions() -> None:
+  """Logs Meridian and ArviZ versions."""
+  mlflow.log_param("meridian_version", __version__)
+  mlflow.log_param("arviz_version", az.__version__)
+def _log_model_spec(model_spec: spec.ModelSpec) -> None:
+  """Logs the `ModelSpec` object."""
+  # TODO: Replace with serde api when it's available.
+  # PriorDistribution is logged separately.
+  excluded_fields = ["prior"]
+  for field in dataclasses.fields(model_spec):
+    if field.name in excluded_fields:
+      continue
+    field_value = getattr(model_spec, field.name)
+    # Stringify numpy arrays before logging.
+    if isinstance(field_value, np.ndarray):
+      field_value = json.dumps(field_value.tolist())
+    mlflow.log_param(f"spec.{field.name}", field_value)
+def _log_priors(model_spec: spec.ModelSpec) -> None:
+  """Logs the `PriorDistribution` object."""
+  # TODO: Replace with serde api when it's available.
+  priors = model_spec.prior
+  for field in dataclasses.fields(priors):
+    field_value = getattr(priors, field.name)
+    # Stringify Distributions and numpy arrays.
+    if isinstance(field_value, tfp.distributions.Distribution):
+      field_value = str(field_value)
+    elif isinstance(field_value, np.ndarray):
+      field_value = json.dumps(field_value.tolist())
+    mlflow.log_param(f"prior.{field.name}", field_value)
+@autologging_integration(FLAVOR_NAME)
+def autolog(
+    disable: bool = False,  # pylint: disable=unused-argument
+    silent: bool = False,  # pylint: disable=unused-argument
+    log_metrics: bool = False,
+) -> None:
+  """Enables MLflow tracking for Meridian.
+  See https://mlflow.org/docs/latest/tracking/
+  Args:
+    disable: Whether to disable autologging.
+    silent: Whether to suppress all event logs and warnings from MLflow.
+    log_metrics: Whether model metrics should be logged. Enabling this option
+      involves the creation of post-modeling objects to compute relevant
+      performance metrics. Metrics include R-Squared, MAPE, and wMAPE values.
+  """
+  def patch_meridian_init(
+      original: Callable[..., Any], self, *args, **kwargs
+  ) -> model.Meridian:
+    _log_versions()
+    mmm = original(self, *args, **kwargs)
+    _log_model_spec(self.model_spec)
+    _log_priors(self.model_spec)
+    return mmm
+  def patch_prior_sampling(original: Callable[..., Any], self, *args, **kwargs):
+    mlflow.log_param("sample_prior.n_draws", kwargs.get("n_draws", "default"))
+    mlflow.log_param("sample_prior.seed", kwargs.get("seed", "default"))
+    return original(self, *args, **kwargs)
+  def patch_posterior_sampling(
+      original: Callable[..., Any], self, *args, **kwargs
+  ):
+    excluded_fields = ["current_state", "pins"]
+    params = [
+        name
+        for name, value in inspect.signature(original).parameters.items()
+        if name != "self"
+        and value.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
+        and name not in excluded_fields
+    ]
+    for param in params:
+      mlflow.log_param(
+          f"sample_posterior.{param}", kwargs.get(param, "default")
+      )
+    original(self, *args, **kwargs)
+    if log_metrics:
+      model_diagnostics = visualizer.ModelDiagnostics(self.model)
+      df_diag = model_diagnostics.predictive_accuracy_table()
+      get_metric = lambda n: df_diag[df_diag.metric == n].value.to_list()[0]
+      mlflow.log_metric("R_Squared", get_metric("R_Squared"))
+      mlflow.log_metric("MAPE", get_metric("MAPE"))
+      mlflow.log_metric("wMAPE", get_metric("wMAPE"))
+  safe_patch(FLAVOR_NAME, model.Meridian, "__init__", patch_meridian_init)
+  safe_patch(
+      FLAVOR_NAME,
+      prior_sampler.PriorDistributionSampler,
+      "__call__",
+      patch_prior_sampling,
+  )
+  safe_patch(
+      FLAVOR_NAME,
+      posterior_sampler.PosteriorMCMCSampler,
+      "__call__",
+      patch_posterior_sampling,
+  )

meridian/model/media.py CHANGED Viewed

@@ -207,6 +207,8 @@ class RfTensors:
   Attributes:
     reach: A tensor constructed from `InputData.reach`.
     frequency: A tensor constructed from `InputData.frequency`.
+    rf_impressions: A tensor constructed from `InputData.reach` *
+      `InputData.frequency`.
     rf_spend: A tensor constructed from `InputData.rf_spend`.
     reach_transformer: A `MediaTransformer` to scale RF tensors using the
       model's RF data.
@@ -233,6 +235,7 @@ class RfTensors:
   reach: tf.Tensor | None = None
   frequency: tf.Tensor | None = None
+  rf_impressions: tf.Tensor | None = None
   rf_spend: tf.Tensor | None = None
   reach_transformer: transformers.MediaTransformer | None = None
   reach_scaled: tf.Tensor | None = None
@@ -250,6 +253,9 @@ def build_rf_tensors(
   reach = tf.convert_to_tensor(input_data.reach, dtype=tf.float32)
   frequency = tf.convert_to_tensor(input_data.frequency, dtype=tf.float32)
+  rf_impressions = (
+      reach * frequency if reach is not None and frequency is not None else None
+  )
   rf_spend = tf.convert_to_tensor(input_data.rf_spend, dtype=tf.float32)
   reach_transformer = transformers.MediaTransformer(
       reach, tf.convert_to_tensor(input_data.population, dtype=tf.float32)
@@ -292,6 +298,7 @@ def build_rf_tensors(
   return RfTensors(
       reach=reach,
       frequency=frequency,
+      rf_impressions=rf_impressions,
       rf_spend=rf_spend,
       reach_transformer=reach_transformer,
       reach_scaled=reach_scaled,

meridian/model/model.py CHANGED Viewed

@@ -1046,16 +1046,24 @@ class Meridian:
     mask = tf.equal(counts, self.n_geos)
     col_idx_bad = tf.boolean_mask(col_idx_unique, mask)
     dims_bad = tf.gather(data_dims, col_idx_bad)
-    if col_idx_bad.shape[0] and not self.is_national:
-      raise ValueError(
-          f"The following {data_name} variables do not vary across time, making"
-          f" a model with geo main effects unidentifiable: {dims_bad}. This can"
-          " lead to poor model convergence. Since these variables only vary"
-          " across geo and not across time, they are collinear with geo and"
-          " redundant in a model with geo main effects. To address this, drop"
-          " the listed variables that do not vary across time."
-      )
+    if col_idx_bad.shape[0]:
+      if self.is_national:
+        raise ValueError(
+            f"The following {data_name} variables do not vary across time,"
+            " which is equivalent to no signal at all in a national model:"
+            f" {dims_bad}.  This can lead to poor model convergence. To address"
+            " this, drop the listed variables that do not vary across time."
+        )
+      else:
+        raise ValueError(
+            f"The following {data_name} variables do not vary across time,"
+            f" making a model with geo main effects unidentifiable: {dims_bad}."
+            " This can lead to poor model convergence. Since these variables"
+            " only vary across geo and not across time, they are collinear"
+            " with geo and redundant in a model with geo main effects. To"
+            " address this, drop the listed variables that do not vary across"
+            " time."
+        )
   def _validate_kpi_transformer(self):
     """Validates the KPI transformer."""
@@ -1439,8 +1447,7 @@ class Meridian:
         see [PRNGS and seeds]
         (https://github.com/tensorflow/probability/blob/main/PRNGS.md).
     """
-    prior_inference_data = self.prior_sampler_callable(n_draws, seed)
-    self.inference_data.extend(prior_inference_data, join="right")
+    self.prior_sampler_callable(n_draws=n_draws, seed=seed)
   def sample_posterior(
       self,
@@ -1519,22 +1526,21 @@ class Meridian:
         [ResourceExhaustedError when running Meridian.sample_posterior]
         (https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error).
     """
-    posterior_inference_data = self.posterior_sampler_callable(
-        n_chains,
-        n_adapt,
-        n_burnin,
-        n_keep,
-        current_state,
-        init_step_size,
-        dual_averaging_kwargs,
-        max_tree_depth,
-        max_energy_diff,
-        unrolled_leapfrog_steps,
-        parallel_iterations,
-        seed,
+    self.posterior_sampler_callable(
+        n_chains=n_chains,
+        n_adapt=n_adapt,
+        n_burnin=n_burnin,
+        n_keep=n_keep,
+        current_state=current_state,
+        init_step_size=init_step_size,
+        dual_averaging_kwargs=dual_averaging_kwargs,
+        max_tree_depth=max_tree_depth,
+        max_energy_diff=max_energy_diff,
+        unrolled_leapfrog_steps=unrolled_leapfrog_steps,
+        parallel_iterations=parallel_iterations,
+        seed=seed,
         **pins,
     )
-    self.inference_data.extend(posterior_inference_data, join="right")
 def save_mmm(mmm: Meridian, file_path: str):

meridian/model/posterior_sampler.py CHANGED Viewed

@@ -85,9 +85,13 @@ class PosteriorMCMCSampler:
   def __init__(self, meridian: "model.Meridian"):
     self._meridian = meridian
+  @property
+  def model(self) -> "model.Meridian":
+    return self._meridian
   def _get_joint_dist_unpinned(self) -> tfp.distributions.Distribution:
     """Returns a `JointDistributionCoroutineAutoBatched` function for MCMC."""
-    mmm = self._meridian
+    mmm = self.model
     mmm.populate_cached_properties()
     # This lists all the derived properties and states of this Meridian object
@@ -453,7 +457,7 @@ class PosteriorMCMCSampler:
     return joint_dist_unpinned
   def _get_joint_dist(self) -> tfp.distributions.Distribution:
-    mmm = self._meridian
+    mmm = self.model
     y = (
         tf.where(mmm.holdout_id, 0.0, mmm.kpi_scaled)
         if mmm.holdout_id is not None
@@ -476,7 +480,7 @@ class PosteriorMCMCSampler:
       parallel_iterations: int = 10,
       seed: Sequence[int] | int | None = None,
       **pins,
-  ) -> az.InferenceData:
+  ) -> None:
     """Runs Markov Chain Monte Carlo (MCMC) sampling of posterior distributions.
     For more information about the arguments, see [`windowed_adaptive_nuts`]
@@ -529,9 +533,6 @@ class PosteriorMCMCSampler:
       **pins: These are used to condition the provided joint distribution, and
         are passed directly to `joint_dist.experimental_pin(**pins)`.
-    Returns:
-      An Arviz `InferenceData` object containing posterior samples only.
     Throws:
       MCMCOOMError: If the model is out of memory. Try reducing `n_keep` or pass
         a list of integers as `n_chains` to sample chains serially. For more
@@ -589,10 +590,10 @@ class PosteriorMCMCSampler:
         if k not in constants.UNSAVED_PARAMETERS
     }
     # Create Arviz InferenceData for posterior draws.
-    posterior_coords = self._meridian.create_inference_data_coords(
+    posterior_coords = self.model.create_inference_data_coords(
         total_chains, n_keep
     )
-    posterior_dims = self._meridian.create_inference_data_dims()
+    posterior_dims = self.model.create_inference_data_dims()
     infdata_posterior = az.convert_to_inference_data(
         mcmc_states, coords=posterior_coords, dims=posterior_dims
     )
@@ -654,4 +655,7 @@ class PosteriorMCMCSampler:
         dims=sample_stats_dims,
         group="sample_stats",
     )
-    return az.concat(infdata_posterior, infdata_trace, infdata_sample_stats)
+    posterior_inference_data = az.concat(
+        infdata_posterior, infdata_trace, infdata_sample_stats
+    )
+    self.model.inference_data.extend(posterior_inference_data, join="right")

meridian/model/prior_sampler.py CHANGED Viewed

@@ -588,22 +588,20 @@ class PriorDistributionSampler:
         | non_media_treatments_vars
     )
-  def __call__(self, n_draws: int, seed: int | None = None) -> az.InferenceData:
+  def __call__(self, n_draws: int, seed: int | None = None) -> None:
     """Draws samples from prior distributions.
-    Returns:
-      An Arviz `InferenceData` object containing prior samples only.
     Args:
       n_draws: Number of samples drawn from the prior distribution.
       seed: Used to set the seed for reproducible results. For more information,
         see [PRNGS and seeds]
         (https://github.com/tensorflow/probability/blob/main/PRNGS.md).
     """
-    prior_draws = self._sample_prior(n_draws, seed=seed)
+    prior_draws = self._sample_prior(n_draws=n_draws, seed=seed)
     # Create Arviz InferenceData for prior draws.
     prior_coords = self._meridian.create_inference_data_coords(1, n_draws)
     prior_dims = self._meridian.create_inference_data_dims()
-    return az.convert_to_inference_data(
+    prior_inference_data = az.convert_to_inference_data(
         prior_draws, coords=prior_coords, dims=prior_dims, group=constants.PRIOR
     )
+    self._meridian.inference_data.extend(prior_inference_data, join="right")

meridian/model/spec.py CHANGED Viewed

@@ -109,17 +109,27 @@ class ModelSpec:
     roi_calibration_period: An optional boolean array of shape `(n_media_times,
       n_media_channels)` indicating the subset of `time` that the ROI value of
       the `roi_m` prior applies to. The ROI numerator is the incremental outcome
-      generated during this time period, and the denominator is the spend during
-      this time period. (Spend data by time period is required). If `None`, all
-      times are used. Only used if `media_prior_type` is `'roi'`.
+      generated by media executed during the calibration period. More precisely,
+      it is the difference in expected outcome between the counterfactual where
+      media is set to historical values versus the counterfactual where media is
+      set to zero during the calibration period and set to historical values for
+      all other time periods. The denominator is the channel spend during
+      calibration period (excluding any calibration time periods prior to the
+      first KPI time period). Spend data by time period is required. If `None`,
+      all times are used. Only used if `media_prior_type` is `'roi'`.
       Default: `None`.
     rf_roi_calibration_period: An optional boolean array of shape
       `(n_media_times, n_rf_channels)` indicating the subset of `time` that the
       ROI value of the `roi_rf` prior applies to. The ROI numerator is the
-      incremental outcome generated during this time period, and the denominator
-      is the spend during this time period. (Spend data by time period is
-      required). If `None`, all times are used. Only used if `rf_prior_type` is
-      `'roi'`. Default: `None`.
+      incremental outcome generated by media executed during the calibration
+      period. More precisely, it is the difference in expected outcome between
+      the counterfactual where reach and frequency is set to historical values
+      versus the counterfactual where reach is set to zero during the
+      calibration period and set to historical values for all other time
+      periods. The denominator is the channel spend during calibration period
+      (excluding any calibration time periods prior to the first KPI time
+      period). Spend data by time period is required. If `None`, all times are
+      used. Only used if `rf_prior_type` is `'roi'`. Default: `None`.
     organic_media_prior_type: A string to specify the prior type for the organic
       media coefficients. Allowed values: `'contribution'`, `'coefficient'`.
       `PriorDistribution` contains `contribution_om` and `beta_om`, but only one

meridian/version.py ADDED Viewed

@@ -0,0 +1,17 @@
+# Copyright 2025 The Meridian Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module for Meridian version."""
+__version__ = "1.1.3"

{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{google_meridian-1.1.1.dist-info → google_meridian-1.1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

google-meridian 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

google-meridian 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl