google-meridian 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ # Copyright 2025 The Meridian Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """MLflow autologging integration for Meridian.
16
+
17
+ This module enables MLflow tracking for Meridian. When enabled via `autolog()`,
18
+ parameters, metrics, and other metadata will be automatically logged to MLflow,
19
+ allowing for improved experiment tracking and analysis.
20
+
21
+ To enable MLflow autologging for your Meridian workflows, simply call
22
+ `autolog.autolog()` once before your model run.
23
+
24
+ Example usage:
25
+
26
+ ```python
27
+ import mlflow
28
+ from meridian.data import load
29
+ from meridian.mlflow import autolog
30
+ from meridian.model import model
31
+
32
+ # Enable autologging (call this once per session)
33
+ autolog.autolog(log_metrics=True)
34
+
35
+ # Start an MLflow run (optionally name it for better grouping)
36
+ with mlflow.start_run(run_name="my_run"):
37
+ # Load data
38
+ data = load.CsvDataLoader(...).load()
39
+
40
+ # Initialize Meridian model
41
+ mmm = model.Meridian(input_data=data)
42
+
43
+ # Run Meridian sampling processes
44
+ mmm.sample_prior(n_draws=100, seed=123)
45
+ mmm.sample_posterior(n_chains=7, n_adapt=500, n_burnin=500, n_keep=1000,
46
+ seed=1)
47
+
48
+ # After the run completes, you can retrieve run results using the MLflow client.
49
+ client = mlflow.tracking.MlflowClient()
50
+
51
+ # Get the experiment ID for the run you just launched
52
+ experiment_id = "0"
53
+
54
+ # Search for runs matching the run name
55
+ runs = client.search_runs(
56
+ experiment_id,
57
+ max_results=1000,
58
+ filter_string=f"attributes.run_name = 'my_run'"
59
+ )
60
+
61
+ # Print details of the run
62
+ if runs:
63
+ print(runs[0])
64
+ else:
65
+ print("No runs found.")
66
+ ```
67
+ """
68
+
69
+ import dataclasses
70
+ import inspect
71
+ import json
72
+ from typing import Any, Callable
73
+
74
+ import arviz as az
75
+ from meridian.analysis import visualizer
76
+ import mlflow
77
+ from mlflow.utils.autologging_utils import autologging_integration, safe_patch
78
+ from meridian.model import model
79
+ from meridian.model import posterior_sampler
80
+ from meridian.model import prior_sampler
81
+ from meridian.model import spec
82
+ from meridian.version import __version__
83
+ import numpy as np
84
+ import tensorflow_probability as tfp
85
+
86
+
87
+ FLAVOR_NAME = "meridian"
88
+
89
+
90
+ __all__ = ["autolog"]
91
+
92
+
93
+ def _log_versions() -> None:
94
+ """Logs Meridian and ArviZ versions."""
95
+ mlflow.log_param("meridian_version", __version__)
96
+ mlflow.log_param("arviz_version", az.__version__)
97
+
98
+
99
+ def _log_model_spec(model_spec: spec.ModelSpec) -> None:
100
+ """Logs the `ModelSpec` object."""
101
+ # TODO: Replace with serde api when it's available.
102
+ # PriorDistribution is logged separately.
103
+ excluded_fields = ["prior"]
104
+
105
+ for field in dataclasses.fields(model_spec):
106
+ if field.name in excluded_fields:
107
+ continue
108
+
109
+ field_value = getattr(model_spec, field.name)
110
+
111
+ # Stringify numpy arrays before logging.
112
+ if isinstance(field_value, np.ndarray):
113
+ field_value = json.dumps(field_value.tolist())
114
+
115
+ mlflow.log_param(f"spec.{field.name}", field_value)
116
+
117
+
118
+ def _log_priors(model_spec: spec.ModelSpec) -> None:
119
+ """Logs the `PriorDistribution` object."""
120
+ # TODO: Replace with serde api when it's available.
121
+ priors = model_spec.prior
122
+ for field in dataclasses.fields(priors):
123
+ field_value = getattr(priors, field.name)
124
+
125
+ # Stringify Distributions and numpy arrays.
126
+ if isinstance(field_value, tfp.distributions.Distribution):
127
+ field_value = str(field_value)
128
+ elif isinstance(field_value, np.ndarray):
129
+ field_value = json.dumps(field_value.tolist())
130
+
131
+ mlflow.log_param(f"prior.{field.name}", field_value)
132
+
133
+
134
+ @autologging_integration(FLAVOR_NAME)
135
+ def autolog(
136
+ disable: bool = False, # pylint: disable=unused-argument
137
+ silent: bool = False, # pylint: disable=unused-argument
138
+ log_metrics: bool = False,
139
+ ) -> None:
140
+ """Enables MLflow tracking for Meridian.
141
+
142
+ See https://mlflow.org/docs/latest/tracking/
143
+
144
+ Args:
145
+ disable: Whether to disable autologging.
146
+ silent: Whether to suppress all event logs and warnings from MLflow.
147
+ log_metrics: Whether model metrics should be logged. Enabling this option
148
+ involves the creation of post-modeling objects to compute relevant
149
+ performance metrics. Metrics include R-Squared, MAPE, and wMAPE values.
150
+ """
151
+
152
+ def patch_meridian_init(
153
+ original: Callable[..., Any], self, *args, **kwargs
154
+ ) -> model.Meridian:
155
+ _log_versions()
156
+ mmm = original(self, *args, **kwargs)
157
+ _log_model_spec(self.model_spec)
158
+ _log_priors(self.model_spec)
159
+ return mmm
160
+
161
+ def patch_prior_sampling(original: Callable[..., Any], self, *args, **kwargs):
162
+ mlflow.log_param("sample_prior.n_draws", kwargs.get("n_draws", "default"))
163
+ mlflow.log_param("sample_prior.seed", kwargs.get("seed", "default"))
164
+ return original(self, *args, **kwargs)
165
+
166
+ def patch_posterior_sampling(
167
+ original: Callable[..., Any], self, *args, **kwargs
168
+ ):
169
+ excluded_fields = ["current_state", "pins"]
170
+ params = [
171
+ name
172
+ for name, value in inspect.signature(original).parameters.items()
173
+ if name != "self"
174
+ and value.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
175
+ and name not in excluded_fields
176
+ ]
177
+
178
+ for param in params:
179
+ mlflow.log_param(
180
+ f"sample_posterior.{param}", kwargs.get(param, "default")
181
+ )
182
+
183
+ original(self, *args, **kwargs)
184
+ if log_metrics:
185
+ model_diagnostics = visualizer.ModelDiagnostics(self.model)
186
+ df_diag = model_diagnostics.predictive_accuracy_table()
187
+
188
+ get_metric = lambda n: df_diag[df_diag.metric == n].value.to_list()[0]
189
+
190
+ mlflow.log_metric("R_Squared", get_metric("R_Squared"))
191
+ mlflow.log_metric("MAPE", get_metric("MAPE"))
192
+ mlflow.log_metric("wMAPE", get_metric("wMAPE"))
193
+
194
+ safe_patch(FLAVOR_NAME, model.Meridian, "__init__", patch_meridian_init)
195
+ safe_patch(
196
+ FLAVOR_NAME,
197
+ prior_sampler.PriorDistributionSampler,
198
+ "__call__",
199
+ patch_prior_sampling,
200
+ )
201
+ safe_patch(
202
+ FLAVOR_NAME,
203
+ posterior_sampler.PosteriorMCMCSampler,
204
+ "__call__",
205
+ patch_posterior_sampling,
206
+ )
meridian/model/media.py CHANGED
@@ -207,6 +207,8 @@ class RfTensors:
207
207
  Attributes:
208
208
  reach: A tensor constructed from `InputData.reach`.
209
209
  frequency: A tensor constructed from `InputData.frequency`.
210
+ rf_impressions: A tensor constructed from `InputData.reach` *
211
+ `InputData.frequency`.
210
212
  rf_spend: A tensor constructed from `InputData.rf_spend`.
211
213
  reach_transformer: A `MediaTransformer` to scale RF tensors using the
212
214
  model's RF data.
@@ -233,6 +235,7 @@ class RfTensors:
233
235
 
234
236
  reach: tf.Tensor | None = None
235
237
  frequency: tf.Tensor | None = None
238
+ rf_impressions: tf.Tensor | None = None
236
239
  rf_spend: tf.Tensor | None = None
237
240
  reach_transformer: transformers.MediaTransformer | None = None
238
241
  reach_scaled: tf.Tensor | None = None
@@ -250,6 +253,9 @@ def build_rf_tensors(
250
253
 
251
254
  reach = tf.convert_to_tensor(input_data.reach, dtype=tf.float32)
252
255
  frequency = tf.convert_to_tensor(input_data.frequency, dtype=tf.float32)
256
+ rf_impressions = (
257
+ reach * frequency if reach is not None and frequency is not None else None
258
+ )
253
259
  rf_spend = tf.convert_to_tensor(input_data.rf_spend, dtype=tf.float32)
254
260
  reach_transformer = transformers.MediaTransformer(
255
261
  reach, tf.convert_to_tensor(input_data.population, dtype=tf.float32)
@@ -292,6 +298,7 @@ def build_rf_tensors(
292
298
  return RfTensors(
293
299
  reach=reach,
294
300
  frequency=frequency,
301
+ rf_impressions=rf_impressions,
295
302
  rf_spend=rf_spend,
296
303
  reach_transformer=reach_transformer,
297
304
  reach_scaled=reach_scaled,
meridian/model/model.py CHANGED
@@ -1046,16 +1046,24 @@ class Meridian:
1046
1046
  mask = tf.equal(counts, self.n_geos)
1047
1047
  col_idx_bad = tf.boolean_mask(col_idx_unique, mask)
1048
1048
  dims_bad = tf.gather(data_dims, col_idx_bad)
1049
-
1050
- if col_idx_bad.shape[0] and not self.is_national:
1051
- raise ValueError(
1052
- f"The following {data_name} variables do not vary across time, making"
1053
- f" a model with geo main effects unidentifiable: {dims_bad}. This can"
1054
- " lead to poor model convergence. Since these variables only vary"
1055
- " across geo and not across time, they are collinear with geo and"
1056
- " redundant in a model with geo main effects. To address this, drop"
1057
- " the listed variables that do not vary across time."
1058
- )
1049
+ if col_idx_bad.shape[0]:
1050
+ if self.is_national:
1051
+ raise ValueError(
1052
+ f"The following {data_name} variables do not vary across time,"
1053
+ " which is equivalent to no signal at all in a national model:"
1054
+ f" {dims_bad}. This can lead to poor model convergence. To address"
1055
+ " this, drop the listed variables that do not vary across time."
1056
+ )
1057
+ else:
1058
+ raise ValueError(
1059
+ f"The following {data_name} variables do not vary across time,"
1060
+ f" making a model with geo main effects unidentifiable: {dims_bad}."
1061
+ " This can lead to poor model convergence. Since these variables"
1062
+ " only vary across geo and not across time, they are collinear"
1063
+ " with geo and redundant in a model with geo main effects. To"
1064
+ " address this, drop the listed variables that do not vary across"
1065
+ " time."
1066
+ )
1059
1067
 
1060
1068
  def _validate_kpi_transformer(self):
1061
1069
  """Validates the KPI transformer."""
@@ -1439,8 +1447,7 @@ class Meridian:
1439
1447
  see [PRNGS and seeds]
1440
1448
  (https://github.com/tensorflow/probability/blob/main/PRNGS.md).
1441
1449
  """
1442
- prior_inference_data = self.prior_sampler_callable(n_draws, seed)
1443
- self.inference_data.extend(prior_inference_data, join="right")
1450
+ self.prior_sampler_callable(n_draws=n_draws, seed=seed)
1444
1451
 
1445
1452
  def sample_posterior(
1446
1453
  self,
@@ -1519,22 +1526,21 @@ class Meridian:
1519
1526
  [ResourceExhaustedError when running Meridian.sample_posterior]
1520
1527
  (https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error).
1521
1528
  """
1522
- posterior_inference_data = self.posterior_sampler_callable(
1523
- n_chains,
1524
- n_adapt,
1525
- n_burnin,
1526
- n_keep,
1527
- current_state,
1528
- init_step_size,
1529
- dual_averaging_kwargs,
1530
- max_tree_depth,
1531
- max_energy_diff,
1532
- unrolled_leapfrog_steps,
1533
- parallel_iterations,
1534
- seed,
1529
+ self.posterior_sampler_callable(
1530
+ n_chains=n_chains,
1531
+ n_adapt=n_adapt,
1532
+ n_burnin=n_burnin,
1533
+ n_keep=n_keep,
1534
+ current_state=current_state,
1535
+ init_step_size=init_step_size,
1536
+ dual_averaging_kwargs=dual_averaging_kwargs,
1537
+ max_tree_depth=max_tree_depth,
1538
+ max_energy_diff=max_energy_diff,
1539
+ unrolled_leapfrog_steps=unrolled_leapfrog_steps,
1540
+ parallel_iterations=parallel_iterations,
1541
+ seed=seed,
1535
1542
  **pins,
1536
1543
  )
1537
- self.inference_data.extend(posterior_inference_data, join="right")
1538
1544
 
1539
1545
 
1540
1546
  def save_mmm(mmm: Meridian, file_path: str):
@@ -85,9 +85,13 @@ class PosteriorMCMCSampler:
85
85
  def __init__(self, meridian: "model.Meridian"):
86
86
  self._meridian = meridian
87
87
 
88
+ @property
89
+ def model(self) -> "model.Meridian":
90
+ return self._meridian
91
+
88
92
  def _get_joint_dist_unpinned(self) -> tfp.distributions.Distribution:
89
93
  """Returns a `JointDistributionCoroutineAutoBatched` function for MCMC."""
90
- mmm = self._meridian
94
+ mmm = self.model
91
95
  mmm.populate_cached_properties()
92
96
 
93
97
  # This lists all the derived properties and states of this Meridian object
@@ -453,7 +457,7 @@ class PosteriorMCMCSampler:
453
457
  return joint_dist_unpinned
454
458
 
455
459
  def _get_joint_dist(self) -> tfp.distributions.Distribution:
456
- mmm = self._meridian
460
+ mmm = self.model
457
461
  y = (
458
462
  tf.where(mmm.holdout_id, 0.0, mmm.kpi_scaled)
459
463
  if mmm.holdout_id is not None
@@ -476,7 +480,7 @@ class PosteriorMCMCSampler:
476
480
  parallel_iterations: int = 10,
477
481
  seed: Sequence[int] | int | None = None,
478
482
  **pins,
479
- ) -> az.InferenceData:
483
+ ) -> None:
480
484
  """Runs Markov Chain Monte Carlo (MCMC) sampling of posterior distributions.
481
485
 
482
486
  For more information about the arguments, see [`windowed_adaptive_nuts`]
@@ -529,9 +533,6 @@ class PosteriorMCMCSampler:
529
533
  **pins: These are used to condition the provided joint distribution, and
530
534
  are passed directly to `joint_dist.experimental_pin(**pins)`.
531
535
 
532
- Returns:
533
- An Arviz `InferenceData` object containing posterior samples only.
534
-
535
536
  Throws:
536
537
  MCMCOOMError: If the model is out of memory. Try reducing `n_keep` or pass
537
538
  a list of integers as `n_chains` to sample chains serially. For more
@@ -589,10 +590,10 @@ class PosteriorMCMCSampler:
589
590
  if k not in constants.UNSAVED_PARAMETERS
590
591
  }
591
592
  # Create Arviz InferenceData for posterior draws.
592
- posterior_coords = self._meridian.create_inference_data_coords(
593
+ posterior_coords = self.model.create_inference_data_coords(
593
594
  total_chains, n_keep
594
595
  )
595
- posterior_dims = self._meridian.create_inference_data_dims()
596
+ posterior_dims = self.model.create_inference_data_dims()
596
597
  infdata_posterior = az.convert_to_inference_data(
597
598
  mcmc_states, coords=posterior_coords, dims=posterior_dims
598
599
  )
@@ -654,4 +655,7 @@ class PosteriorMCMCSampler:
654
655
  dims=sample_stats_dims,
655
656
  group="sample_stats",
656
657
  )
657
- return az.concat(infdata_posterior, infdata_trace, infdata_sample_stats)
658
+ posterior_inference_data = az.concat(
659
+ infdata_posterior, infdata_trace, infdata_sample_stats
660
+ )
661
+ self.model.inference_data.extend(posterior_inference_data, join="right")
@@ -588,22 +588,20 @@ class PriorDistributionSampler:
588
588
  | non_media_treatments_vars
589
589
  )
590
590
 
591
- def __call__(self, n_draws: int, seed: int | None = None) -> az.InferenceData:
591
+ def __call__(self, n_draws: int, seed: int | None = None) -> None:
592
592
  """Draws samples from prior distributions.
593
593
 
594
- Returns:
595
- An Arviz `InferenceData` object containing prior samples only.
596
-
597
594
  Args:
598
595
  n_draws: Number of samples drawn from the prior distribution.
599
596
  seed: Used to set the seed for reproducible results. For more information,
600
597
  see [PRNGS and seeds]
601
598
  (https://github.com/tensorflow/probability/blob/main/PRNGS.md).
602
599
  """
603
- prior_draws = self._sample_prior(n_draws, seed=seed)
600
+ prior_draws = self._sample_prior(n_draws=n_draws, seed=seed)
604
601
  # Create Arviz InferenceData for prior draws.
605
602
  prior_coords = self._meridian.create_inference_data_coords(1, n_draws)
606
603
  prior_dims = self._meridian.create_inference_data_dims()
607
- return az.convert_to_inference_data(
604
+ prior_inference_data = az.convert_to_inference_data(
608
605
  prior_draws, coords=prior_coords, dims=prior_dims, group=constants.PRIOR
609
606
  )
607
+ self._meridian.inference_data.extend(prior_inference_data, join="right")
meridian/model/spec.py CHANGED
@@ -109,17 +109,27 @@ class ModelSpec:
109
109
  roi_calibration_period: An optional boolean array of shape `(n_media_times,
110
110
  n_media_channels)` indicating the subset of `time` that the ROI value of
111
111
  the `roi_m` prior applies to. The ROI numerator is the incremental outcome
112
- generated during this time period, and the denominator is the spend during
113
- this time period. (Spend data by time period is required). If `None`, all
114
- times are used. Only used if `media_prior_type` is `'roi'`.
112
+ generated by media executed during the calibration period. More precisely,
113
+ it is the difference in expected outcome between the counterfactual where
114
+ media is set to historical values versus the counterfactual where media is
115
+ set to zero during the calibration period and set to historical values for
116
+ all other time periods. The denominator is the channel spend during
117
+ calibration period (excluding any calibration time periods prior to the
118
+ first KPI time period). Spend data by time period is required. If `None`,
119
+ all times are used. Only used if `media_prior_type` is `'roi'`.
115
120
  Default: `None`.
116
121
  rf_roi_calibration_period: An optional boolean array of shape
117
122
  `(n_media_times, n_rf_channels)` indicating the subset of `time` that the
118
123
  ROI value of the `roi_rf` prior applies to. The ROI numerator is the
119
- incremental outcome generated during this time period, and the denominator
120
- is the spend during this time period. (Spend data by time period is
121
- required). If `None`, all times are used. Only used if `rf_prior_type` is
122
- `'roi'`. Default: `None`.
124
+ incremental outcome generated by media executed during the calibration
125
+ period. More precisely, it is the difference in expected outcome between
126
+ the counterfactual where reach and frequency is set to historical values
127
+ versus the counterfactual where reach is set to zero during the
128
+ calibration period and set to historical values for all other time
129
+ periods. The denominator is the channel spend during calibration period
130
+ (excluding any calibration time periods prior to the first KPI time
131
+ period). Spend data by time period is required. If `None`, all times are
132
+ used. Only used if `rf_prior_type` is `'roi'`. Default: `None`.
123
133
  organic_media_prior_type: A string to specify the prior type for the organic
124
134
  media coefficients. Allowed values: `'contribution'`, `'coefficient'`.
125
135
  `PriorDistribution` contains `contribution_om` and `beta_om`, but only one
meridian/version.py ADDED
@@ -0,0 +1,17 @@
1
+ # Copyright 2025 The Meridian Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Module for Meridian version."""
16
+
17
+ __version__ = "1.1.3"