google-meridian 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/METADATA +2 -2
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/RECORD +18 -17
- meridian/__init__.py +6 -4
- meridian/analysis/analyzer.py +68 -25
- meridian/analysis/optimizer.py +298 -48
- meridian/constants.py +3 -0
- meridian/data/data_frame_input_data_builder.py +41 -0
- meridian/data/input_data_builder.py +12 -4
- meridian/data/load.py +262 -346
- meridian/mlflow/autolog.py +158 -6
- meridian/model/media.py +7 -0
- meridian/model/model.py +14 -16
- meridian/model/posterior_sampler.py +13 -9
- meridian/model/prior_sampler.py +4 -6
- meridian/version.py +17 -0
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/WHEEL +0 -0
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/top_level.txt +0 -0
meridian/mlflow/autolog.py
CHANGED
|
@@ -12,29 +12,130 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
"""MLflow autologging integration for Meridian.
|
|
15
|
+
"""MLflow autologging integration for Meridian.
|
|
16
16
|
|
|
17
|
+
This module enables MLflow tracking for Meridian. When enabled via `autolog()`,
|
|
18
|
+
parameters, metrics, and other metadata will be automatically logged to MLflow,
|
|
19
|
+
allowing for improved experiment tracking and analysis.
|
|
20
|
+
|
|
21
|
+
To enable MLflow autologging for your Meridian workflows, simply call
|
|
22
|
+
`autolog.autolog()` once before your model run.
|
|
23
|
+
|
|
24
|
+
Example usage:
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
import mlflow
|
|
28
|
+
from meridian.data import load
|
|
29
|
+
from meridian.mlflow import autolog
|
|
30
|
+
from meridian.model import model
|
|
31
|
+
|
|
32
|
+
# Enable autologging (call this once per session)
|
|
33
|
+
autolog.autolog(log_metrics=True)
|
|
34
|
+
|
|
35
|
+
# Start an MLflow run (optionally name it for better grouping)
|
|
36
|
+
with mlflow.start_run(run_name="my_run"):
|
|
37
|
+
# Load data
|
|
38
|
+
data = load.CsvDataLoader(...).load()
|
|
39
|
+
|
|
40
|
+
# Initialize Meridian model
|
|
41
|
+
mmm = model.Meridian(input_data=data)
|
|
42
|
+
|
|
43
|
+
# Run Meridian sampling processes
|
|
44
|
+
mmm.sample_prior(n_draws=100, seed=123)
|
|
45
|
+
mmm.sample_posterior(n_chains=7, n_adapt=500, n_burnin=500, n_keep=1000,
|
|
46
|
+
seed=1)
|
|
47
|
+
|
|
48
|
+
# After the run completes, you can retrieve run results using the MLflow client.
|
|
49
|
+
client = mlflow.tracking.MlflowClient()
|
|
50
|
+
|
|
51
|
+
# Get the experiment ID for the run you just launched
|
|
52
|
+
experiment_id = "0"
|
|
53
|
+
|
|
54
|
+
# Search for runs matching the run name
|
|
55
|
+
runs = client.search_runs(
|
|
56
|
+
experiment_id,
|
|
57
|
+
max_results=1000,
|
|
58
|
+
filter_string=f"attributes.run_name = 'my_run'"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Print details of the run
|
|
62
|
+
if runs:
|
|
63
|
+
print(runs[0])
|
|
64
|
+
else:
|
|
65
|
+
print("No runs found.")
|
|
66
|
+
```
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
import dataclasses
|
|
70
|
+
import inspect
|
|
71
|
+
import json
|
|
17
72
|
from typing import Any, Callable
|
|
18
73
|
|
|
19
74
|
import arviz as az
|
|
20
|
-
import
|
|
75
|
+
from meridian.analysis import visualizer
|
|
21
76
|
import mlflow
|
|
22
77
|
from mlflow.utils.autologging_utils import autologging_integration, safe_patch
|
|
23
78
|
from meridian.model import model
|
|
79
|
+
from meridian.model import posterior_sampler
|
|
80
|
+
from meridian.model import prior_sampler
|
|
81
|
+
from meridian.model import spec
|
|
82
|
+
from meridian.version import __version__
|
|
83
|
+
import numpy as np
|
|
84
|
+
import tensorflow_probability as tfp
|
|
85
|
+
|
|
24
86
|
|
|
25
87
|
FLAVOR_NAME = "meridian"
|
|
26
88
|
|
|
27
89
|
|
|
90
|
+
__all__ = ["autolog"]
|
|
91
|
+
|
|
92
|
+
|
|
28
93
|
def _log_versions() -> None:
|
|
29
94
|
"""Logs Meridian and ArviZ versions."""
|
|
30
|
-
mlflow.log_param("meridian_version",
|
|
95
|
+
mlflow.log_param("meridian_version", __version__)
|
|
31
96
|
mlflow.log_param("arviz_version", az.__version__)
|
|
32
97
|
|
|
33
98
|
|
|
99
|
+
def _log_model_spec(model_spec: spec.ModelSpec) -> None:
|
|
100
|
+
"""Logs the `ModelSpec` object."""
|
|
101
|
+
# TODO: Replace with serde api when it's available.
|
|
102
|
+
# PriorDistribution is logged separately.
|
|
103
|
+
excluded_fields = ["prior"]
|
|
104
|
+
|
|
105
|
+
for field in dataclasses.fields(model_spec):
|
|
106
|
+
if field.name in excluded_fields:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
field_value = getattr(model_spec, field.name)
|
|
110
|
+
|
|
111
|
+
# Stringify numpy arrays before logging.
|
|
112
|
+
if isinstance(field_value, np.ndarray):
|
|
113
|
+
field_value = json.dumps(field_value.tolist())
|
|
114
|
+
|
|
115
|
+
mlflow.log_param(f"spec.{field.name}", field_value)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _log_priors(model_spec: spec.ModelSpec) -> None:
|
|
119
|
+
"""Logs the `PriorDistribution` object."""
|
|
120
|
+
# TODO: Replace with serde api when it's available.
|
|
121
|
+
priors = model_spec.prior
|
|
122
|
+
for field in dataclasses.fields(priors):
|
|
123
|
+
field_value = getattr(priors, field.name)
|
|
124
|
+
|
|
125
|
+
# Stringify Distributions and numpy arrays.
|
|
126
|
+
if isinstance(field_value, tfp.distributions.Distribution):
|
|
127
|
+
field_value = str(field_value)
|
|
128
|
+
elif isinstance(field_value, np.ndarray):
|
|
129
|
+
field_value = json.dumps(field_value.tolist())
|
|
130
|
+
|
|
131
|
+
mlflow.log_param(f"prior.{field.name}", field_value)
|
|
132
|
+
|
|
133
|
+
|
|
34
134
|
@autologging_integration(FLAVOR_NAME)
|
|
35
135
|
def autolog(
|
|
36
136
|
disable: bool = False, # pylint: disable=unused-argument
|
|
37
137
|
silent: bool = False, # pylint: disable=unused-argument
|
|
138
|
+
log_metrics: bool = False,
|
|
38
139
|
) -> None:
|
|
39
140
|
"""Enables MLflow tracking for Meridian.
|
|
40
141
|
|
|
@@ -43,12 +144,63 @@ def autolog(
|
|
|
43
144
|
Args:
|
|
44
145
|
disable: Whether to disable autologging.
|
|
45
146
|
silent: Whether to suppress all event logs and warnings from MLflow.
|
|
147
|
+
log_metrics: Whether model metrics should be logged. Enabling this option
|
|
148
|
+
involves the creation of post-modeling objects to compute relevant
|
|
149
|
+
performance metrics. Metrics include R-Squared, MAPE, and wMAPE values.
|
|
46
150
|
"""
|
|
47
151
|
|
|
48
152
|
def patch_meridian_init(
|
|
49
|
-
original: Callable[..., Any], *args, **kwargs
|
|
50
|
-
) ->
|
|
153
|
+
original: Callable[..., Any], self, *args, **kwargs
|
|
154
|
+
) -> model.Meridian:
|
|
51
155
|
_log_versions()
|
|
52
|
-
|
|
156
|
+
mmm = original(self, *args, **kwargs)
|
|
157
|
+
_log_model_spec(self.model_spec)
|
|
158
|
+
_log_priors(self.model_spec)
|
|
159
|
+
return mmm
|
|
160
|
+
|
|
161
|
+
def patch_prior_sampling(original: Callable[..., Any], self, *args, **kwargs):
|
|
162
|
+
mlflow.log_param("sample_prior.n_draws", kwargs.get("n_draws", "default"))
|
|
163
|
+
mlflow.log_param("sample_prior.seed", kwargs.get("seed", "default"))
|
|
164
|
+
return original(self, *args, **kwargs)
|
|
165
|
+
|
|
166
|
+
def patch_posterior_sampling(
|
|
167
|
+
original: Callable[..., Any], self, *args, **kwargs
|
|
168
|
+
):
|
|
169
|
+
excluded_fields = ["current_state", "pins"]
|
|
170
|
+
params = [
|
|
171
|
+
name
|
|
172
|
+
for name, value in inspect.signature(original).parameters.items()
|
|
173
|
+
if name != "self"
|
|
174
|
+
and value.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
|
|
175
|
+
and name not in excluded_fields
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
for param in params:
|
|
179
|
+
mlflow.log_param(
|
|
180
|
+
f"sample_posterior.{param}", kwargs.get(param, "default")
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
original(self, *args, **kwargs)
|
|
184
|
+
if log_metrics:
|
|
185
|
+
model_diagnostics = visualizer.ModelDiagnostics(self.model)
|
|
186
|
+
df_diag = model_diagnostics.predictive_accuracy_table()
|
|
187
|
+
|
|
188
|
+
get_metric = lambda n: df_diag[df_diag.metric == n].value.to_list()[0]
|
|
189
|
+
|
|
190
|
+
mlflow.log_metric("R_Squared", get_metric("R_Squared"))
|
|
191
|
+
mlflow.log_metric("MAPE", get_metric("MAPE"))
|
|
192
|
+
mlflow.log_metric("wMAPE", get_metric("wMAPE"))
|
|
53
193
|
|
|
54
194
|
safe_patch(FLAVOR_NAME, model.Meridian, "__init__", patch_meridian_init)
|
|
195
|
+
safe_patch(
|
|
196
|
+
FLAVOR_NAME,
|
|
197
|
+
prior_sampler.PriorDistributionSampler,
|
|
198
|
+
"__call__",
|
|
199
|
+
patch_prior_sampling,
|
|
200
|
+
)
|
|
201
|
+
safe_patch(
|
|
202
|
+
FLAVOR_NAME,
|
|
203
|
+
posterior_sampler.PosteriorMCMCSampler,
|
|
204
|
+
"__call__",
|
|
205
|
+
patch_posterior_sampling,
|
|
206
|
+
)
|
meridian/model/media.py
CHANGED
|
@@ -207,6 +207,8 @@ class RfTensors:
|
|
|
207
207
|
Attributes:
|
|
208
208
|
reach: A tensor constructed from `InputData.reach`.
|
|
209
209
|
frequency: A tensor constructed from `InputData.frequency`.
|
|
210
|
+
rf_impressions: A tensor constructed from `InputData.reach` *
|
|
211
|
+
`InputData.frequency`.
|
|
210
212
|
rf_spend: A tensor constructed from `InputData.rf_spend`.
|
|
211
213
|
reach_transformer: A `MediaTransformer` to scale RF tensors using the
|
|
212
214
|
model's RF data.
|
|
@@ -233,6 +235,7 @@ class RfTensors:
|
|
|
233
235
|
|
|
234
236
|
reach: tf.Tensor | None = None
|
|
235
237
|
frequency: tf.Tensor | None = None
|
|
238
|
+
rf_impressions: tf.Tensor | None = None
|
|
236
239
|
rf_spend: tf.Tensor | None = None
|
|
237
240
|
reach_transformer: transformers.MediaTransformer | None = None
|
|
238
241
|
reach_scaled: tf.Tensor | None = None
|
|
@@ -250,6 +253,9 @@ def build_rf_tensors(
|
|
|
250
253
|
|
|
251
254
|
reach = tf.convert_to_tensor(input_data.reach, dtype=tf.float32)
|
|
252
255
|
frequency = tf.convert_to_tensor(input_data.frequency, dtype=tf.float32)
|
|
256
|
+
rf_impressions = (
|
|
257
|
+
reach * frequency if reach is not None and frequency is not None else None
|
|
258
|
+
)
|
|
253
259
|
rf_spend = tf.convert_to_tensor(input_data.rf_spend, dtype=tf.float32)
|
|
254
260
|
reach_transformer = transformers.MediaTransformer(
|
|
255
261
|
reach, tf.convert_to_tensor(input_data.population, dtype=tf.float32)
|
|
@@ -292,6 +298,7 @@ def build_rf_tensors(
|
|
|
292
298
|
return RfTensors(
|
|
293
299
|
reach=reach,
|
|
294
300
|
frequency=frequency,
|
|
301
|
+
rf_impressions=rf_impressions,
|
|
295
302
|
rf_spend=rf_spend,
|
|
296
303
|
reach_transformer=reach_transformer,
|
|
297
304
|
reach_scaled=reach_scaled,
|
meridian/model/model.py
CHANGED
|
@@ -1447,8 +1447,7 @@ class Meridian:
|
|
|
1447
1447
|
see [PRNGS and seeds]
|
|
1448
1448
|
(https://github.com/tensorflow/probability/blob/main/PRNGS.md).
|
|
1449
1449
|
"""
|
|
1450
|
-
|
|
1451
|
-
self.inference_data.extend(prior_inference_data, join="right")
|
|
1450
|
+
self.prior_sampler_callable(n_draws=n_draws, seed=seed)
|
|
1452
1451
|
|
|
1453
1452
|
def sample_posterior(
|
|
1454
1453
|
self,
|
|
@@ -1527,22 +1526,21 @@ class Meridian:
|
|
|
1527
1526
|
[ResourceExhaustedError when running Meridian.sample_posterior]
|
|
1528
1527
|
(https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error).
|
|
1529
1528
|
"""
|
|
1530
|
-
|
|
1531
|
-
n_chains,
|
|
1532
|
-
n_adapt,
|
|
1533
|
-
n_burnin,
|
|
1534
|
-
n_keep,
|
|
1535
|
-
current_state,
|
|
1536
|
-
init_step_size,
|
|
1537
|
-
dual_averaging_kwargs,
|
|
1538
|
-
max_tree_depth,
|
|
1539
|
-
max_energy_diff,
|
|
1540
|
-
unrolled_leapfrog_steps,
|
|
1541
|
-
parallel_iterations,
|
|
1542
|
-
seed,
|
|
1529
|
+
self.posterior_sampler_callable(
|
|
1530
|
+
n_chains=n_chains,
|
|
1531
|
+
n_adapt=n_adapt,
|
|
1532
|
+
n_burnin=n_burnin,
|
|
1533
|
+
n_keep=n_keep,
|
|
1534
|
+
current_state=current_state,
|
|
1535
|
+
init_step_size=init_step_size,
|
|
1536
|
+
dual_averaging_kwargs=dual_averaging_kwargs,
|
|
1537
|
+
max_tree_depth=max_tree_depth,
|
|
1538
|
+
max_energy_diff=max_energy_diff,
|
|
1539
|
+
unrolled_leapfrog_steps=unrolled_leapfrog_steps,
|
|
1540
|
+
parallel_iterations=parallel_iterations,
|
|
1541
|
+
seed=seed,
|
|
1543
1542
|
**pins,
|
|
1544
1543
|
)
|
|
1545
|
-
self.inference_data.extend(posterior_inference_data, join="right")
|
|
1546
1544
|
|
|
1547
1545
|
|
|
1548
1546
|
def save_mmm(mmm: Meridian, file_path: str):
|
|
@@ -85,9 +85,13 @@ class PosteriorMCMCSampler:
|
|
|
85
85
|
def __init__(self, meridian: "model.Meridian"):
|
|
86
86
|
self._meridian = meridian
|
|
87
87
|
|
|
88
|
+
@property
|
|
89
|
+
def model(self) -> "model.Meridian":
|
|
90
|
+
return self._meridian
|
|
91
|
+
|
|
88
92
|
def _get_joint_dist_unpinned(self) -> tfp.distributions.Distribution:
|
|
89
93
|
"""Returns a `JointDistributionCoroutineAutoBatched` function for MCMC."""
|
|
90
|
-
mmm = self.
|
|
94
|
+
mmm = self.model
|
|
91
95
|
mmm.populate_cached_properties()
|
|
92
96
|
|
|
93
97
|
# This lists all the derived properties and states of this Meridian object
|
|
@@ -453,7 +457,7 @@ class PosteriorMCMCSampler:
|
|
|
453
457
|
return joint_dist_unpinned
|
|
454
458
|
|
|
455
459
|
def _get_joint_dist(self) -> tfp.distributions.Distribution:
|
|
456
|
-
mmm = self.
|
|
460
|
+
mmm = self.model
|
|
457
461
|
y = (
|
|
458
462
|
tf.where(mmm.holdout_id, 0.0, mmm.kpi_scaled)
|
|
459
463
|
if mmm.holdout_id is not None
|
|
@@ -476,7 +480,7 @@ class PosteriorMCMCSampler:
|
|
|
476
480
|
parallel_iterations: int = 10,
|
|
477
481
|
seed: Sequence[int] | int | None = None,
|
|
478
482
|
**pins,
|
|
479
|
-
) ->
|
|
483
|
+
) -> None:
|
|
480
484
|
"""Runs Markov Chain Monte Carlo (MCMC) sampling of posterior distributions.
|
|
481
485
|
|
|
482
486
|
For more information about the arguments, see [`windowed_adaptive_nuts`]
|
|
@@ -529,9 +533,6 @@ class PosteriorMCMCSampler:
|
|
|
529
533
|
**pins: These are used to condition the provided joint distribution, and
|
|
530
534
|
are passed directly to `joint_dist.experimental_pin(**pins)`.
|
|
531
535
|
|
|
532
|
-
Returns:
|
|
533
|
-
An Arviz `InferenceData` object containing posterior samples only.
|
|
534
|
-
|
|
535
536
|
Throws:
|
|
536
537
|
MCMCOOMError: If the model is out of memory. Try reducing `n_keep` or pass
|
|
537
538
|
a list of integers as `n_chains` to sample chains serially. For more
|
|
@@ -589,10 +590,10 @@ class PosteriorMCMCSampler:
|
|
|
589
590
|
if k not in constants.UNSAVED_PARAMETERS
|
|
590
591
|
}
|
|
591
592
|
# Create Arviz InferenceData for posterior draws.
|
|
592
|
-
posterior_coords = self.
|
|
593
|
+
posterior_coords = self.model.create_inference_data_coords(
|
|
593
594
|
total_chains, n_keep
|
|
594
595
|
)
|
|
595
|
-
posterior_dims = self.
|
|
596
|
+
posterior_dims = self.model.create_inference_data_dims()
|
|
596
597
|
infdata_posterior = az.convert_to_inference_data(
|
|
597
598
|
mcmc_states, coords=posterior_coords, dims=posterior_dims
|
|
598
599
|
)
|
|
@@ -654,4 +655,7 @@ class PosteriorMCMCSampler:
|
|
|
654
655
|
dims=sample_stats_dims,
|
|
655
656
|
group="sample_stats",
|
|
656
657
|
)
|
|
657
|
-
|
|
658
|
+
posterior_inference_data = az.concat(
|
|
659
|
+
infdata_posterior, infdata_trace, infdata_sample_stats
|
|
660
|
+
)
|
|
661
|
+
self.model.inference_data.extend(posterior_inference_data, join="right")
|
meridian/model/prior_sampler.py
CHANGED
|
@@ -588,22 +588,20 @@ class PriorDistributionSampler:
|
|
|
588
588
|
| non_media_treatments_vars
|
|
589
589
|
)
|
|
590
590
|
|
|
591
|
-
def __call__(self, n_draws: int, seed: int | None = None) ->
|
|
591
|
+
def __call__(self, n_draws: int, seed: int | None = None) -> None:
|
|
592
592
|
"""Draws samples from prior distributions.
|
|
593
593
|
|
|
594
|
-
Returns:
|
|
595
|
-
An Arviz `InferenceData` object containing prior samples only.
|
|
596
|
-
|
|
597
594
|
Args:
|
|
598
595
|
n_draws: Number of samples drawn from the prior distribution.
|
|
599
596
|
seed: Used to set the seed for reproducible results. For more information,
|
|
600
597
|
see [PRNGS and seeds]
|
|
601
598
|
(https://github.com/tensorflow/probability/blob/main/PRNGS.md).
|
|
602
599
|
"""
|
|
603
|
-
prior_draws = self._sample_prior(n_draws, seed=seed)
|
|
600
|
+
prior_draws = self._sample_prior(n_draws=n_draws, seed=seed)
|
|
604
601
|
# Create Arviz InferenceData for prior draws.
|
|
605
602
|
prior_coords = self._meridian.create_inference_data_coords(1, n_draws)
|
|
606
603
|
prior_dims = self._meridian.create_inference_data_dims()
|
|
607
|
-
|
|
604
|
+
prior_inference_data = az.convert_to_inference_data(
|
|
608
605
|
prior_draws, coords=prior_coords, dims=prior_dims, group=constants.PRIOR
|
|
609
606
|
)
|
|
607
|
+
self._meridian.inference_data.extend(prior_inference_data, join="right")
|
meridian/version.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright 2025 The Meridian Authors.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Module for Meridian version."""
|
|
16
|
+
|
|
17
|
+
__version__ = "1.1.4"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|