google-meridian 1.3.0__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google_meridian-1.3.2.dist-info/METADATA +209 -0
- google_meridian-1.3.2.dist-info/RECORD +76 -0
- {google_meridian-1.3.0.dist-info → google_meridian-1.3.2.dist-info}/top_level.txt +1 -0
- meridian/analysis/__init__.py +1 -2
- meridian/analysis/analyzer.py +0 -1
- meridian/analysis/optimizer.py +5 -3
- meridian/analysis/review/checks.py +81 -30
- meridian/analysis/review/constants.py +4 -0
- meridian/analysis/review/results.py +40 -9
- meridian/analysis/summarizer.py +1 -1
- meridian/analysis/visualizer.py +1 -1
- meridian/backend/__init__.py +229 -24
- meridian/backend/test_utils.py +194 -0
- meridian/constants.py +1 -0
- meridian/data/load.py +2 -0
- meridian/model/eda/__init__.py +0 -1
- meridian/model/eda/constants.py +12 -2
- meridian/model/eda/eda_engine.py +353 -45
- meridian/model/eda/eda_outcome.py +21 -1
- meridian/model/knots.py +17 -0
- meridian/model/model_test_data.py +15 -0
- meridian/{analysis/templates → templates}/card.html.jinja +1 -1
- meridian/{analysis/templates → templates}/chart.html.jinja +1 -1
- meridian/{analysis/templates → templates}/chips.html.jinja +1 -1
- meridian/{analysis → templates}/formatter.py +12 -1
- meridian/templates/formatter_test.py +216 -0
- meridian/{analysis/templates → templates}/insights.html.jinja +1 -1
- meridian/{analysis/templates → templates}/stats.html.jinja +1 -1
- meridian/{analysis/templates → templates}/style.css +1 -1
- meridian/{analysis/templates → templates}/style.scss +1 -1
- meridian/{analysis/templates → templates}/summary.html.jinja +4 -2
- meridian/{analysis/templates → templates}/table.html.jinja +1 -1
- meridian/version.py +1 -1
- schema/__init__.py +30 -0
- schema/serde/__init__.py +26 -0
- schema/serde/constants.py +48 -0
- schema/serde/distribution.py +515 -0
- schema/serde/eda_spec.py +192 -0
- schema/serde/function_registry.py +143 -0
- schema/serde/hyperparameters.py +363 -0
- schema/serde/inference_data.py +105 -0
- schema/serde/marketing_data.py +1321 -0
- schema/serde/meridian_serde.py +413 -0
- schema/serde/serde.py +47 -0
- schema/serde/test_data.py +4608 -0
- schema/utils/__init__.py +17 -0
- schema/utils/time_record.py +156 -0
- google_meridian-1.3.0.dist-info/METADATA +0 -409
- google_meridian-1.3.0.dist-info/RECORD +0 -62
- meridian/model/eda/meridian_eda.py +0 -220
- {google_meridian-1.3.0.dist-info → google_meridian-1.3.2.dist-info}/WHEEL +0 -0
- {google_meridian-1.3.0.dist-info → google_meridian-1.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,220 +0,0 @@
|
|
|
1
|
-
# Copyright 2025 The Meridian Authors.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
"""Module containing Meridian related exploratory data analysis (EDA) functionalities."""
|
|
16
|
-
from __future__ import annotations
|
|
17
|
-
|
|
18
|
-
from typing import Literal, TYPE_CHECKING, Union
|
|
19
|
-
|
|
20
|
-
import altair as alt
|
|
21
|
-
from meridian import constants
|
|
22
|
-
from meridian.model.eda import constants as eda_constants
|
|
23
|
-
import pandas as pd
|
|
24
|
-
|
|
25
|
-
if TYPE_CHECKING:
|
|
26
|
-
from meridian.model import model # pylint: disable=g-bad-import-order,g-import-not-at-top
|
|
27
|
-
|
|
28
|
-
__all__ = [
|
|
29
|
-
'MeridianEDA',
|
|
30
|
-
]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class MeridianEDA:
|
|
34
|
-
"""Class for running pre-modeling exploratory data analysis for Meridian InputData."""
|
|
35
|
-
|
|
36
|
-
_PAIRWISE_CORR_COLOR_SCALE = alt.Scale(
|
|
37
|
-
domain=[-1.0, 0.0, 1.0],
|
|
38
|
-
range=['#1f78b4', '#f7f7f7', '#e34a33'], # Blue-light grey-Orange
|
|
39
|
-
type='linear',
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
def __init__(
|
|
43
|
-
self,
|
|
44
|
-
meridian: model.Meridian,
|
|
45
|
-
):
|
|
46
|
-
self._meridian = meridian
|
|
47
|
-
|
|
48
|
-
def generate_and_save_report(self, filename: str, filepath: str):
|
|
49
|
-
"""Generates and saves the 2 page HTML report containing findings in EDA about given InputData.
|
|
50
|
-
|
|
51
|
-
Args:
|
|
52
|
-
filename: The filename for the generated HTML output.
|
|
53
|
-
filepath: The path to the directory where the file will be saved.
|
|
54
|
-
"""
|
|
55
|
-
# TODO: Implement.
|
|
56
|
-
raise NotImplementedError()
|
|
57
|
-
|
|
58
|
-
def plot_pairwise_correlation(
|
|
59
|
-
self, geos: Union[int, list[str], Literal['nationalize']] = 1
|
|
60
|
-
) -> alt.Chart:
|
|
61
|
-
"""Plots the Pairwise Correlation data.
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
geos: Defines which geos to plot. - int: The number of top geos to plot,
|
|
65
|
-
ranked by population. - list[str]: A specific list of geo names to plot.
|
|
66
|
-
- 'nationalize': Aggregates all geos into a single national view.
|
|
67
|
-
Defaults to 1 (plotting the top geo). If the data is already at a
|
|
68
|
-
national level, this parameter is ignored and a national plot is
|
|
69
|
-
generated.
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
Altair chart(s) of the Pairwise Correlation data.
|
|
73
|
-
"""
|
|
74
|
-
geos_to_plot = self._validate_and_get_geos_to_plot(geos)
|
|
75
|
-
is_national = self._meridian.is_national
|
|
76
|
-
nationalize_geos = geos == 'nationalize'
|
|
77
|
-
|
|
78
|
-
if is_national or nationalize_geos:
|
|
79
|
-
pairwise_corr_artifact = (
|
|
80
|
-
self._meridian.eda_engine.check_national_pairwise_corr().get_national_artifact
|
|
81
|
-
)
|
|
82
|
-
if pairwise_corr_artifact is None:
|
|
83
|
-
raise ValueError('EDAOutcome does not have national artifact.')
|
|
84
|
-
else:
|
|
85
|
-
pairwise_corr_artifact = (
|
|
86
|
-
self._meridian.eda_engine.check_geo_pairwise_corr().get_geo_artifact
|
|
87
|
-
)
|
|
88
|
-
if pairwise_corr_artifact is None:
|
|
89
|
-
raise ValueError('EDAOutcome does not have geo artifact.')
|
|
90
|
-
pairwise_corr_data = pairwise_corr_artifact.corr_matrix.to_dataframe()
|
|
91
|
-
|
|
92
|
-
charts = []
|
|
93
|
-
for geo_to_plot in geos_to_plot:
|
|
94
|
-
title = (
|
|
95
|
-
'Pairwise correlations among all treatments and controls for'
|
|
96
|
-
f' {geo_to_plot}'
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
if not (is_national or nationalize_geos):
|
|
100
|
-
plot_data = (
|
|
101
|
-
pairwise_corr_data.xs(geo_to_plot, level=constants.GEO)
|
|
102
|
-
.rename_axis(
|
|
103
|
-
index=[eda_constants.VARIABLE_1, eda_constants.VARIABLE_2]
|
|
104
|
-
)
|
|
105
|
-
.reset_index()
|
|
106
|
-
)
|
|
107
|
-
else:
|
|
108
|
-
plot_data = pairwise_corr_data.rename_axis(
|
|
109
|
-
index=[eda_constants.VARIABLE_1, eda_constants.VARIABLE_2]
|
|
110
|
-
).reset_index()
|
|
111
|
-
plot_data.columns = [
|
|
112
|
-
eda_constants.VARIABLE_1,
|
|
113
|
-
eda_constants.VARIABLE_2,
|
|
114
|
-
eda_constants.CORRELATION,
|
|
115
|
-
]
|
|
116
|
-
unique_variables = plot_data[eda_constants.VARIABLE_1].unique()
|
|
117
|
-
variable_to_index = {name: i for i, name in enumerate(unique_variables)}
|
|
118
|
-
|
|
119
|
-
plot_data['idx1'] = plot_data[eda_constants.VARIABLE_1].map(
|
|
120
|
-
variable_to_index
|
|
121
|
-
)
|
|
122
|
-
plot_data['idx2'] = plot_data[eda_constants.VARIABLE_2].map(
|
|
123
|
-
variable_to_index
|
|
124
|
-
)
|
|
125
|
-
lower_triangle_data = plot_data[plot_data['idx2'] > plot_data['idx1']]
|
|
126
|
-
|
|
127
|
-
charts.append(
|
|
128
|
-
self._plot_2d_heatmap(lower_triangle_data, title, unique_variables)
|
|
129
|
-
)
|
|
130
|
-
final_chart = (
|
|
131
|
-
alt.vconcat(*charts)
|
|
132
|
-
.resolve_legend(color='independent')
|
|
133
|
-
.configure_axis(labelAngle=315)
|
|
134
|
-
.configure_title(anchor='start')
|
|
135
|
-
.configure_view(stroke=None)
|
|
136
|
-
)
|
|
137
|
-
return final_chart
|
|
138
|
-
|
|
139
|
-
def _plot_2d_heatmap(
|
|
140
|
-
self, data: pd.DataFrame, title: str, unique_variables: list[str]
|
|
141
|
-
) -> alt.Chart:
|
|
142
|
-
"""Plots a 2D heatmap."""
|
|
143
|
-
# Base chart with position encodings
|
|
144
|
-
base = (
|
|
145
|
-
alt.Chart(data)
|
|
146
|
-
.encode(
|
|
147
|
-
x=alt.X(
|
|
148
|
-
f'{eda_constants.VARIABLE_1}:N',
|
|
149
|
-
title=None,
|
|
150
|
-
sort=unique_variables,
|
|
151
|
-
scale=alt.Scale(domain=unique_variables),
|
|
152
|
-
),
|
|
153
|
-
y=alt.Y(
|
|
154
|
-
f'{eda_constants.VARIABLE_2}:N',
|
|
155
|
-
title=None,
|
|
156
|
-
sort=unique_variables,
|
|
157
|
-
scale=alt.Scale(domain=unique_variables),
|
|
158
|
-
),
|
|
159
|
-
)
|
|
160
|
-
.properties(title=title)
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
# Heatmap layer (rectangles)
|
|
164
|
-
heatmap = base.mark_rect().encode(
|
|
165
|
-
color=alt.Color(
|
|
166
|
-
f'{eda_constants.CORRELATION}:Q',
|
|
167
|
-
scale=self._PAIRWISE_CORR_COLOR_SCALE,
|
|
168
|
-
legend=alt.Legend(title=eda_constants.CORRELATION),
|
|
169
|
-
),
|
|
170
|
-
tooltip=[
|
|
171
|
-
eda_constants.VARIABLE_1,
|
|
172
|
-
eda_constants.VARIABLE_2,
|
|
173
|
-
alt.Tooltip(f'{eda_constants.CORRELATION}:Q', format='.3f'),
|
|
174
|
-
],
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
# Text annotation layer (values)
|
|
178
|
-
text = base.mark_text().encode(
|
|
179
|
-
text=alt.Text(f'{eda_constants.CORRELATION}:Q', format='.3f'),
|
|
180
|
-
color=alt.value('black'),
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
# Combine layers and apply final configurations
|
|
184
|
-
chart = (heatmap + text).properties(width=350, height=350)
|
|
185
|
-
|
|
186
|
-
return chart
|
|
187
|
-
|
|
188
|
-
def _generate_pairwise_correlation_report(self) -> str:
|
|
189
|
-
"""Creates the HTML snippet for Pairwise Correlation report section."""
|
|
190
|
-
# TODO: Implement.
|
|
191
|
-
raise NotImplementedError()
|
|
192
|
-
|
|
193
|
-
def _validate_and_get_geos_to_plot(
|
|
194
|
-
self, geos: Union[int, list[str], Literal['nationalize']]
|
|
195
|
-
) -> list[str]:
|
|
196
|
-
"""Validates and returns the geos to plot."""
|
|
197
|
-
## Validate
|
|
198
|
-
is_national = self._meridian.is_national
|
|
199
|
-
if is_national or geos == 'nationalize':
|
|
200
|
-
geos_to_plot = [constants.NATIONAL_MODEL_DEFAULT_GEO_NAME]
|
|
201
|
-
elif isinstance(geos, int):
|
|
202
|
-
if geos > len(self._meridian.input_data.geo) or geos <= 0:
|
|
203
|
-
raise ValueError(
|
|
204
|
-
'geos must be a positive integer less than or equal to the number'
|
|
205
|
-
' of geos in the data.'
|
|
206
|
-
)
|
|
207
|
-
geos_to_plot = self._meridian.input_data.get_n_top_largest_geos(geos)
|
|
208
|
-
else:
|
|
209
|
-
geos_to_plot = geos
|
|
210
|
-
|
|
211
|
-
if (
|
|
212
|
-
not is_national and geos != 'nationalize'
|
|
213
|
-
): # if national then geos_to_plot will be ignored
|
|
214
|
-
for geo in geos_to_plot:
|
|
215
|
-
if geo not in self._meridian.input_data.geo:
|
|
216
|
-
raise ValueError(f'Geo {geo} does not exist in the data.')
|
|
217
|
-
if len(geos_to_plot) != len(set(geos_to_plot)):
|
|
218
|
-
raise ValueError('geos must not contain duplicate values.')
|
|
219
|
-
|
|
220
|
-
return geos_to_plot
|
|
File without changes
|
|
File without changes
|