google-meridian 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.4.0.dist-info → google_meridian-1.5.1.dist-info}/METADATA +14 -11
- {google_meridian-1.4.0.dist-info → google_meridian-1.5.1.dist-info}/RECORD +50 -46
- {google_meridian-1.4.0.dist-info → google_meridian-1.5.1.dist-info}/WHEEL +1 -1
- meridian/analysis/analyzer.py +558 -398
- meridian/analysis/optimizer.py +90 -68
- meridian/analysis/review/checks.py +118 -116
- meridian/analysis/review/constants.py +3 -3
- meridian/analysis/review/results.py +131 -68
- meridian/analysis/review/reviewer.py +8 -23
- meridian/analysis/summarizer.py +6 -1
- meridian/analysis/test_utils.py +2898 -2538
- meridian/analysis/visualizer.py +28 -9
- meridian/backend/__init__.py +106 -0
- meridian/constants.py +1 -0
- meridian/data/input_data.py +30 -52
- meridian/data/input_data_builder.py +2 -9
- meridian/data/test_utils.py +25 -41
- meridian/data/validator.py +48 -0
- meridian/mlflow/autolog.py +19 -9
- meridian/model/adstock_hill.py +3 -5
- meridian/model/context.py +134 -0
- meridian/model/eda/constants.py +334 -4
- meridian/model/eda/eda_engine.py +724 -312
- meridian/model/eda/eda_outcome.py +177 -33
- meridian/model/model.py +159 -110
- meridian/model/model_test_data.py +38 -0
- meridian/model/posterior_sampler.py +103 -62
- meridian/model/prior_sampler.py +114 -94
- meridian/model/spec.py +23 -14
- meridian/templates/card.html.jinja +9 -7
- meridian/templates/chart.html.jinja +1 -6
- meridian/templates/finding.html.jinja +19 -0
- meridian/templates/findings.html.jinja +33 -0
- meridian/templates/formatter.py +41 -5
- meridian/templates/formatter_test.py +127 -0
- meridian/templates/style.css +66 -9
- meridian/templates/style.scss +85 -4
- meridian/templates/table.html.jinja +1 -0
- meridian/version.py +1 -1
- scenarioplanner/linkingapi/constants.py +1 -1
- scenarioplanner/mmm_ui_proto_generator.py +1 -0
- schema/processors/marketing_processor.py +11 -10
- schema/processors/model_processor.py +4 -1
- schema/serde/distribution.py +12 -7
- schema/serde/hyperparameters.py +54 -107
- schema/serde/meridian_serde.py +12 -3
- schema/utils/__init__.py +1 -0
- schema/utils/proto_enum_converter.py +127 -0
- {google_meridian-1.4.0.dist-info → google_meridian-1.5.1.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.4.0.dist-info → google_meridian-1.5.1.dist-info}/top_level.txt +0 -0
meridian/model/context.py
CHANGED
|
@@ -21,6 +21,7 @@ import warnings
|
|
|
21
21
|
from meridian import backend
|
|
22
22
|
from meridian import constants
|
|
23
23
|
from meridian.data import input_data as data
|
|
24
|
+
from meridian.data import time_coordinates as tc
|
|
24
25
|
from meridian.model import adstock_hill
|
|
25
26
|
from meridian.model import knots
|
|
26
27
|
from meridian.model import media
|
|
@@ -59,6 +60,8 @@ class ModelContext:
|
|
|
59
60
|
self._check_media_prior_support()
|
|
60
61
|
self._validate_geo_invariants()
|
|
61
62
|
self._validate_time_invariants()
|
|
63
|
+
self._validate_media_spend_for_paid_channels()
|
|
64
|
+
self._validate_rf_spend_for_paid_channels()
|
|
62
65
|
|
|
63
66
|
def _validate_data_dependent_model_spec(self):
|
|
64
67
|
"""Validates that the data dependent model specs have correct shapes."""
|
|
@@ -307,6 +310,41 @@ class ModelContext:
|
|
|
307
310
|
].values,
|
|
308
311
|
)
|
|
309
312
|
|
|
313
|
+
def _validate_media_spend_for_paid_channels(self) -> None:
|
|
314
|
+
self._validate_spend_for_paid_channels(
|
|
315
|
+
self.input_data.aggregate_media_spend(), constants.MEDIA_CHANNEL
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
def _validate_rf_spend_for_paid_channels(self) -> None:
|
|
319
|
+
self._validate_spend_for_paid_channels(
|
|
320
|
+
self.input_data.aggregate_rf_spend(), constants.RF_CHANNEL
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def _validate_spend_for_paid_channels(
|
|
324
|
+
self,
|
|
325
|
+
spend: np.ndarray | None,
|
|
326
|
+
dim: str,
|
|
327
|
+
) -> None:
|
|
328
|
+
"""Validates non-zero media spend for paid media channels.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
spend: The media spend data to validate.
|
|
332
|
+
dim: The dimension name of the spend data.
|
|
333
|
+
|
|
334
|
+
Raises:
|
|
335
|
+
ValueError if any paid media channel has zero total spend.
|
|
336
|
+
"""
|
|
337
|
+
if spend is None:
|
|
338
|
+
return
|
|
339
|
+
zero_spend_channels = spend.coords[dim].where(spend == 0, drop=True).values
|
|
340
|
+
|
|
341
|
+
if zero_spend_channels.size > 0:
|
|
342
|
+
raise ValueError(
|
|
343
|
+
"Zero total spend detected for paid channels:"
|
|
344
|
+
f" {', '.join(zero_spend_channels)}. If data is correct and this is"
|
|
345
|
+
" expected, please consider modeling the data as organic media."
|
|
346
|
+
)
|
|
347
|
+
|
|
310
348
|
def _check_if_no_time_variation(
|
|
311
349
|
self,
|
|
312
350
|
scaled_data: backend.Tensor,
|
|
@@ -905,6 +943,68 @@ class ModelContext:
|
|
|
905
943
|
organic_rf=organic_rf_adstock_function,
|
|
906
944
|
)
|
|
907
945
|
|
|
946
|
+
def create_inference_data_coords(
|
|
947
|
+
self, n_chains: int, n_draws: int
|
|
948
|
+
) -> Mapping[str, np.ndarray | Sequence[str]]:
|
|
949
|
+
"""Creates data coordinates for inference data."""
|
|
950
|
+
media_channel_names = (
|
|
951
|
+
self.input_data.media_channel
|
|
952
|
+
if self.input_data.media_channel is not None
|
|
953
|
+
else np.array([])
|
|
954
|
+
)
|
|
955
|
+
rf_channel_names = (
|
|
956
|
+
self.input_data.rf_channel
|
|
957
|
+
if self.input_data.rf_channel is not None
|
|
958
|
+
else np.array([])
|
|
959
|
+
)
|
|
960
|
+
organic_media_channel_names = (
|
|
961
|
+
self.input_data.organic_media_channel
|
|
962
|
+
if self.input_data.organic_media_channel is not None
|
|
963
|
+
else np.array([])
|
|
964
|
+
)
|
|
965
|
+
organic_rf_channel_names = (
|
|
966
|
+
self.input_data.organic_rf_channel
|
|
967
|
+
if self.input_data.organic_rf_channel is not None
|
|
968
|
+
else np.array([])
|
|
969
|
+
)
|
|
970
|
+
non_media_channel_names = (
|
|
971
|
+
self.input_data.non_media_channel
|
|
972
|
+
if self.input_data.non_media_channel is not None
|
|
973
|
+
else np.array([])
|
|
974
|
+
)
|
|
975
|
+
control_variable_names = (
|
|
976
|
+
self.input_data.control_variable
|
|
977
|
+
if self.input_data.control_variable is not None
|
|
978
|
+
else np.array([])
|
|
979
|
+
)
|
|
980
|
+
return {
|
|
981
|
+
constants.CHAIN: np.arange(n_chains),
|
|
982
|
+
constants.DRAW: np.arange(n_draws),
|
|
983
|
+
constants.GEO: self.input_data.geo,
|
|
984
|
+
constants.TIME: self.input_data.time,
|
|
985
|
+
constants.MEDIA_TIME: self.input_data.media_time,
|
|
986
|
+
constants.KNOTS: np.arange(self.knot_info.n_knots),
|
|
987
|
+
constants.CONTROL_VARIABLE: control_variable_names,
|
|
988
|
+
constants.NON_MEDIA_CHANNEL: non_media_channel_names,
|
|
989
|
+
constants.MEDIA_CHANNEL: media_channel_names,
|
|
990
|
+
constants.RF_CHANNEL: rf_channel_names,
|
|
991
|
+
constants.ORGANIC_MEDIA_CHANNEL: organic_media_channel_names,
|
|
992
|
+
constants.ORGANIC_RF_CHANNEL: organic_rf_channel_names,
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
def create_inference_data_dims(self) -> Mapping[str, Sequence[str]]:
|
|
996
|
+
"""Creates data dimensions for inference data."""
|
|
997
|
+
inference_dims = dict(constants.INFERENCE_DIMS)
|
|
998
|
+
if self.unique_sigma_for_each_geo:
|
|
999
|
+
inference_dims[constants.SIGMA] = [constants.GEO]
|
|
1000
|
+
else:
|
|
1001
|
+
inference_dims[constants.SIGMA] = []
|
|
1002
|
+
|
|
1003
|
+
return {
|
|
1004
|
+
param: [constants.CHAIN, constants.DRAW] + list(dims)
|
|
1005
|
+
for param, dims in inference_dims.items()
|
|
1006
|
+
}
|
|
1007
|
+
|
|
908
1008
|
def populate_cached_properties(self):
|
|
909
1009
|
"""Eagerly activates all cached properties.
|
|
910
1010
|
|
|
@@ -923,3 +1023,37 @@ class ModelContext:
|
|
|
923
1023
|
]
|
|
924
1024
|
for attr in cached_properties:
|
|
925
1025
|
_ = getattr(self, attr)
|
|
1026
|
+
|
|
1027
|
+
def expand_selected_time_dims(
|
|
1028
|
+
self,
|
|
1029
|
+
start_date: tc.Date = None,
|
|
1030
|
+
end_date: tc.Date = None,
|
|
1031
|
+
) -> list[str] | None:
|
|
1032
|
+
"""Validates and returns time dimension values based on the selected times.
|
|
1033
|
+
|
|
1034
|
+
If both `start_date` and `end_date` are None, returns None. If specified,
|
|
1035
|
+
both `start_date` and `end_date` are inclusive, and must be present in the
|
|
1036
|
+
time coordinates of the input data.
|
|
1037
|
+
|
|
1038
|
+
Args:
|
|
1039
|
+
start_date: Start date of the selected time period. If None, implies the
|
|
1040
|
+
earliest time dimension value in the input data.
|
|
1041
|
+
end_date: End date of the selected time period. If None, implies the
|
|
1042
|
+
latest time dimension value in the input data.
|
|
1043
|
+
|
|
1044
|
+
Returns:
|
|
1045
|
+
A list of time dimension values (as Meridian-formatted strings) in the
|
|
1046
|
+
input data within the selected time period, or do nothing and pass through
|
|
1047
|
+
None if both arguments are Nones, or if `start_date` and `end_date`
|
|
1048
|
+
correspond to the entire time range in the input data.
|
|
1049
|
+
|
|
1050
|
+
Raises:
|
|
1051
|
+
ValueError if `start_date` or `end_date` is not in the input data time
|
|
1052
|
+
dimensions.
|
|
1053
|
+
"""
|
|
1054
|
+
expanded = self.input_data.time_coordinates.expand_selected_time_dims(
|
|
1055
|
+
start_date=start_date, end_date=end_date
|
|
1056
|
+
)
|
|
1057
|
+
if expanded is None:
|
|
1058
|
+
return None
|
|
1059
|
+
return [date.strftime(constants.DATE_FORMAT) for date in expanded]
|
meridian/model/eda/constants.py
CHANGED
|
@@ -13,20 +13,350 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
"""Constants specific to MeridianEDA."""
|
|
16
|
+
from typing import Literal
|
|
17
|
+
import altair as alt
|
|
18
|
+
import immutabledict
|
|
19
|
+
from meridian import constants
|
|
20
|
+
import numpy as np
|
|
16
21
|
|
|
17
|
-
|
|
22
|
+
##### EDA Engine constants #####
|
|
23
|
+
DEFAULT_DA_VAR_AGG_FUNCTION = np.sum
|
|
18
24
|
COST_PER_MEDIA_UNIT = 'cost_per_media_unit'
|
|
19
|
-
|
|
25
|
+
ABS_COST_PER_MEDIA_UNIT = 'abs_cost_per_media_unit'
|
|
26
|
+
RSQUARED_GEO = 'rsquared_geo'
|
|
27
|
+
RSQUARED_TIME = 'rsquared_time'
|
|
20
28
|
VARIABLE_1 = 'var1'
|
|
21
29
|
VARIABLE_2 = 'var2'
|
|
22
30
|
CORRELATION = 'correlation'
|
|
23
31
|
ABS_CORRELATION_COL_NAME = 'abs_correlation'
|
|
32
|
+
CORRELATION_MATRIX_NAME = 'correlation_matrix'
|
|
33
|
+
OVERALL_PAIRWISE_CORR_THRESHOLD = 0.999
|
|
34
|
+
GEO_PAIRWISE_CORR_THRESHOLD = 0.999
|
|
35
|
+
NATIONAL_PAIRWISE_CORR_THRESHOLD = 0.999
|
|
36
|
+
Q1_THRESHOLD = 0.25
|
|
37
|
+
Q3_THRESHOLD = 0.75
|
|
38
|
+
IQR_MULTIPLIER = 1.5
|
|
39
|
+
STD_WITH_OUTLIERS_VAR_NAME = 'std_with_outliers'
|
|
40
|
+
STD_WITHOUT_OUTLIERS_VAR_NAME = 'std_without_outliers'
|
|
41
|
+
STD_THRESHOLD = 1e-4
|
|
42
|
+
OUTLIERS_COL_NAME = 'outliers'
|
|
43
|
+
ABS_OUTLIERS_COL_NAME = 'abs_outliers'
|
|
44
|
+
VIF_COL_NAME = 'VIF'
|
|
45
|
+
EXTREME_CORRELATION_WITH = 'extreme_correlation_with'
|
|
46
|
+
TIME_AND_GEO_AGGREGATION = 'times and geos'
|
|
47
|
+
TIME_AGGREGATION = 'times'
|
|
48
|
+
PRIOR_CONTRIBUTION = 'prior_contribution'
|
|
24
49
|
|
|
25
|
-
|
|
50
|
+
##### EDA Plotting properties #####
|
|
51
|
+
CORRELATION_RED = '#d73027'
|
|
52
|
+
CORRELATION_WHITE = '#f7f7f7'
|
|
53
|
+
CORRELATION_BLUE = '#4575b4'
|
|
54
|
+
CORRELATION_LEGEND_TITLE = 'correlation (blue=OK, red=bad)'
|
|
26
55
|
VARIABLE = 'var'
|
|
27
56
|
VALUE = 'value'
|
|
28
|
-
NATIONALIZE = 'nationalize'
|
|
57
|
+
NATIONALIZE: Literal['nationalize'] = 'nationalize'
|
|
29
58
|
MEDIA_IMPRESSIONS_SCALED = 'media_impressions_scaled'
|
|
30
59
|
IMPRESSION_SHARE_SCALED = 'impression_share_scaled'
|
|
31
60
|
SPEND_SHARE = 'spend_share'
|
|
32
61
|
LABEL = 'label'
|
|
62
|
+
DEFAULT_CHART_COLOR = '#4C78A8'
|
|
63
|
+
PAIRWISE_CORR_COLOR_SCALE = alt.Scale(
|
|
64
|
+
domain=[-1.0, -0.5, 0.0, 0.5, 1.0],
|
|
65
|
+
range=[
|
|
66
|
+
CORRELATION_RED,
|
|
67
|
+
CORRELATION_WHITE,
|
|
68
|
+
CORRELATION_BLUE,
|
|
69
|
+
CORRELATION_WHITE,
|
|
70
|
+
CORRELATION_RED,
|
|
71
|
+
],
|
|
72
|
+
type='linear',
|
|
73
|
+
)
|
|
74
|
+
POPULATION_CORRELATION_LEGEND_CONFIGS = immutabledict.immutabledict({
|
|
75
|
+
'title': CORRELATION_LEGEND_TITLE,
|
|
76
|
+
'orient': 'bottom',
|
|
77
|
+
})
|
|
78
|
+
POPULATION_RAW_MEDIA_CORRELATION_ENCODINGS = immutabledict.immutabledict({
|
|
79
|
+
'x': alt.X(
|
|
80
|
+
f'{VARIABLE}:N',
|
|
81
|
+
sort=None,
|
|
82
|
+
title=constants.CHANNEL,
|
|
83
|
+
axis=alt.Axis(labelAngle=-45),
|
|
84
|
+
),
|
|
85
|
+
'y': alt.Y(
|
|
86
|
+
f'{VALUE}:Q', title=CORRELATION, scale=alt.Scale(domain=[-1, 1])
|
|
87
|
+
),
|
|
88
|
+
'color': alt.Color(
|
|
89
|
+
f'{VALUE}:Q',
|
|
90
|
+
scale=alt.Scale(
|
|
91
|
+
domain=[-1, 0, 1],
|
|
92
|
+
range=[CORRELATION_RED, CORRELATION_WHITE, CORRELATION_BLUE],
|
|
93
|
+
),
|
|
94
|
+
legend=alt.Legend(**POPULATION_CORRELATION_LEGEND_CONFIGS),
|
|
95
|
+
),
|
|
96
|
+
})
|
|
97
|
+
POPULATION_TREATMENT_CORRELATION_ENCODINGS = immutabledict.immutabledict({
|
|
98
|
+
'x': alt.X(
|
|
99
|
+
f'{VARIABLE}:N',
|
|
100
|
+
sort=None,
|
|
101
|
+
title=constants.CHANNEL,
|
|
102
|
+
axis=alt.Axis(labelAngle=-45),
|
|
103
|
+
),
|
|
104
|
+
'y': alt.Y(
|
|
105
|
+
f'{VALUE}:Q', title=CORRELATION, scale=alt.Scale(domain=[-1, 1])
|
|
106
|
+
),
|
|
107
|
+
'color': alt.Color(
|
|
108
|
+
f'{VALUE}:Q',
|
|
109
|
+
scale=alt.Scale(
|
|
110
|
+
domain=[-1, -0.5, 0, 0.5, 1],
|
|
111
|
+
range=[
|
|
112
|
+
CORRELATION_RED,
|
|
113
|
+
CORRELATION_WHITE,
|
|
114
|
+
CORRELATION_BLUE,
|
|
115
|
+
CORRELATION_WHITE,
|
|
116
|
+
CORRELATION_RED,
|
|
117
|
+
],
|
|
118
|
+
),
|
|
119
|
+
legend=alt.Legend(**POPULATION_CORRELATION_LEGEND_CONFIGS),
|
|
120
|
+
),
|
|
121
|
+
})
|
|
122
|
+
PRIOR_MEAN_ENCODINGS = immutabledict.immutabledict({
|
|
123
|
+
'x': alt.X(
|
|
124
|
+
f'{VARIABLE}:N',
|
|
125
|
+
sort=None,
|
|
126
|
+
title=constants.CHANNEL,
|
|
127
|
+
axis=alt.Axis(labelAngle=-45),
|
|
128
|
+
),
|
|
129
|
+
'y': alt.Y(f'{VALUE}:Q', title=PRIOR_CONTRIBUTION),
|
|
130
|
+
})
|
|
131
|
+
CHANNEL_TYPE_TO_COLOR = immutabledict.immutabledict({
|
|
132
|
+
constants.MEDIA_UNITS: '#4285F4',
|
|
133
|
+
constants.MEDIA_CHANNEL: '#4285F4',
|
|
134
|
+
constants.SPEND: '#FBBC04',
|
|
135
|
+
COST_PER_MEDIA_UNIT: '#A142F4',
|
|
136
|
+
constants.ORGANIC_MEDIA_CHANNEL: '#F29900',
|
|
137
|
+
constants.RF_CHANNEL: '#EA4335',
|
|
138
|
+
constants.ORGANIC_RF_CHANNEL: '#FBBC04',
|
|
139
|
+
constants.CONTROL_VARIABLE: '#34A853',
|
|
140
|
+
constants.NON_MEDIA_CHANNEL: '#12939A',
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
##### Report constants #####
|
|
145
|
+
REPORT_TITLE = 'Meridian Exploratory Data Analysis Report'
|
|
146
|
+
DISPLAY_LIMIT_MESSAGE = (
|
|
147
|
+
'<br/>(Due to space constraints, this table only displays the 5 most severe'
|
|
148
|
+
' cases. Please use {function} to review {to_review}.)'
|
|
149
|
+
)
|
|
150
|
+
DISPLAY_LIMIT = 5
|
|
151
|
+
TIME_SERIES_LIMIT = 2
|
|
152
|
+
POPULATION_CORRELATION_BARCHART_LIMIT = PRIOR_MEAN_BARCHART_LIMIT = 15
|
|
153
|
+
# category 1
|
|
154
|
+
SPEND_AND_MEDIA_UNIT_CARD_ID = 'spend-and-media-unit'
|
|
155
|
+
SPEND_AND_MEDIA_UNIT_CARD_TITLE = 'Spend and Media Unit'
|
|
156
|
+
RELATIVE_SPEND_SHARE_CHART_ID = 'relative-spend-share-chart'
|
|
157
|
+
SPEND_PER_MEDIA_UNIT_CHART_ID = 'spend-per-media-unit-chart'
|
|
158
|
+
INCONSISTENT_DATA_TABLE_ID = 'inconsistent-data-table'
|
|
159
|
+
COST_PER_MEDIA_UNIT_OUTLIER_TABLE_ID = 'cost-per-media-unit-outlier-table'
|
|
160
|
+
# category 2
|
|
161
|
+
RESPONSE_VARIABLES_CARD_ID = 'response-variables'
|
|
162
|
+
RESPONSE_VARIABLES_CARD_TITLE = 'Individual Explanatory/Response Variables'
|
|
163
|
+
TREATMENTS_CHART_ID = 'treatments-chart'
|
|
164
|
+
CONTROLS_AND_NON_MEDIA_CHART_ID = 'controls-and-non-media-chart'
|
|
165
|
+
KPI_CHART_ID = 'kpi-chart'
|
|
166
|
+
TREATMENT_CONTROL_VARIABILITY_TABLE_ID = 'treatment-control-variability-table'
|
|
167
|
+
TREATMENT_CONTROL_OUTLIER_TABLE_ID = 'treatment-control-outlier-table'
|
|
168
|
+
# category 3
|
|
169
|
+
POPULATION_SCALING_CARD_ID = 'population-scaling'
|
|
170
|
+
POPULATION_SCALING_CARD_TITLE = 'Population Scaling of Explanatory Variables'
|
|
171
|
+
POPULATION_RAW_MEDIA_CHART_ID = 'population-raw-media-chart'
|
|
172
|
+
POPULATION_TREATMENT_CHART_ID = 'population-treatment-chart'
|
|
173
|
+
# category 4
|
|
174
|
+
RELATIONSHIP_BETWEEN_VARIABLES_CARD_ID = 'relationship-among-variables'
|
|
175
|
+
RELATIONSHIP_BETWEEN_VARIABLES_CARD_TITLE = 'Relationship Among the Variables'
|
|
176
|
+
PAIRWISE_CORRELATION_CHART_ID = 'pairwise-correlation-chart'
|
|
177
|
+
EXTREME_VIF_ERROR_TABLE_ID = 'extreme-vif-error-table'
|
|
178
|
+
EXTREME_VIF_ATTENTION_TABLE_ID = 'extreme-vif-attention-table'
|
|
179
|
+
R_SQUARED_TIME_TABLE_ID = 'r-squared-time-table'
|
|
180
|
+
R_SQUARED_GEO_TABLE_ID = 'r-squared-geo-table'
|
|
181
|
+
# category 5
|
|
182
|
+
PRIOR_SPECIFICATIONS_CARD_ID = 'prior-specifications'
|
|
183
|
+
PRIOR_SPECIFICATIONS_CARD_TITLE = 'Prior Specifications'
|
|
184
|
+
PRIOR_CHART_ID = 'prior-chart'
|
|
185
|
+
# summary
|
|
186
|
+
SUMMARY_CARD_ID = 'summary'
|
|
187
|
+
SUMMARY_CARD_TITLE = 'Summary'
|
|
188
|
+
SUMMARY_TABLE_ID = 'summary-table'
|
|
189
|
+
CATEGORY = 'Category'
|
|
190
|
+
FINDING = 'Finding'
|
|
191
|
+
RECOMMENDED_NEXT_STEP = 'Recommended Next Step'
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
##### Finding messages #####
|
|
195
|
+
SUMMARY_TABLE_SUMMARY_INFO = (
|
|
196
|
+
'Review the full report to investigate the health of your dataset and'
|
|
197
|
+
' confirm findings align with your expectations.'
|
|
198
|
+
)
|
|
199
|
+
SUMMARY_TABLE_SUMMARY_FINDING = (
|
|
200
|
+
'Review the health of your dataset below. Resolve all FAILS and investigate'
|
|
201
|
+
' REVIEW flags in the detailed sections to ensure your data is ready for'
|
|
202
|
+
' modeling.'
|
|
203
|
+
)
|
|
204
|
+
SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_INFO = (
|
|
205
|
+
'No automated issues detected. See <a'
|
|
206
|
+
f' href="#{SPEND_AND_MEDIA_UNIT_CARD_ID}">Spend and Media Units</a> for'
|
|
207
|
+
' more details.'
|
|
208
|
+
)
|
|
209
|
+
SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_FINDING = (
|
|
210
|
+
f'See <a href="#{SPEND_AND_MEDIA_UNIT_CARD_ID}">Spend and Media Units</a>.'
|
|
211
|
+
' Where applicable, verify that spend and media units align across'
|
|
212
|
+
' channels, and review outliers in cost per media unit.'
|
|
213
|
+
)
|
|
214
|
+
SUMMARY_TABLE_RESPONSE_VARIABLES_INFO = (
|
|
215
|
+
'No automated issues detected. See <a'
|
|
216
|
+
f' href="#{RESPONSE_VARIABLES_CARD_ID}">Individual Explanatory/Response'
|
|
217
|
+
' Variables</a> for more details.'
|
|
218
|
+
)
|
|
219
|
+
SUMMARY_TABLE_RESPONSE_VARIABLES_FINDING = (
|
|
220
|
+
f'See <a href="#{RESPONSE_VARIABLES_CARD_ID}">Individual'
|
|
221
|
+
' Explanatory/Response Variables</a>. Where applicable, review any'
|
|
222
|
+
' variables with low signal or with outliers.'
|
|
223
|
+
)
|
|
224
|
+
SUMMARY_TABLE_POPULATION_SCALING_INFO = (
|
|
225
|
+
'No automated issues detected. See <a'
|
|
226
|
+
f' href="#{POPULATION_SCALING_CARD_ID}">Population Scaling</a> for more'
|
|
227
|
+
' details.'
|
|
228
|
+
)
|
|
229
|
+
SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_INFO = (
|
|
230
|
+
'No automated issues detected. See <a'
|
|
231
|
+
f' href="#{RELATIONSHIP_BETWEEN_VARIABLES_CARD_ID}">Relationship Among the'
|
|
232
|
+
' Variables</a> for more details.'
|
|
233
|
+
)
|
|
234
|
+
SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_FINDING = (
|
|
235
|
+
f'See <a href="#{RELATIONSHIP_BETWEEN_VARIABLES_CARD_ID}">Relationship'
|
|
236
|
+
' Among the Variables</a>. Check for high multicollinearity among the'
|
|
237
|
+
' variables that could lead to model convergence issues.'
|
|
238
|
+
)
|
|
239
|
+
SUMMARY_TABLE_PRIOR_SPECIFICATIONS_INFO = (
|
|
240
|
+
'No automated issues detected. See <a'
|
|
241
|
+
f' href="#{PRIOR_SPECIFICATIONS_CARD_ID}">Prior Specifications</a> for more'
|
|
242
|
+
' details. Assess the likelihood of a negative baseline occurring.'
|
|
243
|
+
)
|
|
244
|
+
SPEND_PER_MEDIA_UNIT_INFO = (
|
|
245
|
+
'Please review the patterns for spend, media units, and'
|
|
246
|
+
' cost-per-media unit. Any erratic or unexpected patterns warrant a data'
|
|
247
|
+
' review.'
|
|
248
|
+
)
|
|
249
|
+
VARIABILITY_PLOT_INFO = (
|
|
250
|
+
'Please review the variability of the explanatory and response variables'
|
|
251
|
+
' illustrated by the boxplots. Note that variables with very low'
|
|
252
|
+
' variability could be difficult to estimate and could hurt model'
|
|
253
|
+
' convergence. Consider merging or replacing them with other variables,'
|
|
254
|
+
' dropping them if they are negligibly small, or using a custom prior if'
|
|
255
|
+
' you have relevant information. If outliers exist, check your data input'
|
|
256
|
+
' to determine if they are genuine or erroneous.'
|
|
257
|
+
)
|
|
258
|
+
RELATIVE_SPEND_SHARE_INFO = (
|
|
259
|
+
"Please review the channel's share of spend. Channels with a very small"
|
|
260
|
+
' share of spend might be difficult to estimate. You might want to combine'
|
|
261
|
+
' them with other channels. Meanwhile, a channel with a huge spend share'
|
|
262
|
+
' would increase the risk of producing a negative baseline if it also has a'
|
|
263
|
+
' high ROI.'
|
|
264
|
+
)
|
|
265
|
+
PAIRWISE_CORRELATION_CHECK_INFO = (
|
|
266
|
+
'Please review the computed pairwise correlations. Note that'
|
|
267
|
+
' high pairwise correlation may cause model identifiability'
|
|
268
|
+
' and convergence issues. Consider combining the variables if'
|
|
269
|
+
' high correlation exists.'
|
|
270
|
+
)
|
|
271
|
+
MULTICOLLINEARITY_ERROR = (
|
|
272
|
+
'Some variables have extreme multicollinearity (VIF'
|
|
273
|
+
' > {threshold}) across all {aggregation}. Note that'
|
|
274
|
+
' a common cause of multicollinearity is perfect pairwise'
|
|
275
|
+
' correlation. To address multicollinearity, please drop any'
|
|
276
|
+
' variable that is a linear combination of other variables.'
|
|
277
|
+
' Otherwise, consider combining variables.{additional_info}'
|
|
278
|
+
)
|
|
279
|
+
MULTICOLLINEARITY_ATTENTION = (
|
|
280
|
+
'Some variables have extreme multicollinearity (VIF >'
|
|
281
|
+
' {threshold}) in certain geo(s). Note that a common'
|
|
282
|
+
' cause of multicollinearity is perfect pairwise'
|
|
283
|
+
" correlation. While this geo-level issue isn't necessarily"
|
|
284
|
+
' problematic due to hierarchical modeling in Meridian, it'
|
|
285
|
+
' may be a data issue that could lead to poor inference or'
|
|
286
|
+
' even poor convergence. Consider checking your data or'
|
|
287
|
+
' combining these variables, especially if they also have'
|
|
288
|
+
' high VIF in other geos.{additional_info}'
|
|
289
|
+
)
|
|
290
|
+
R_SQUARED_TIME_INFO = (
|
|
291
|
+
'This check regresses each variable against time as a'
|
|
292
|
+
' categorical variable. In this case, high R-squared indicates'
|
|
293
|
+
' low geo variation of a variable. This could lead to a weakly'
|
|
294
|
+
' identifiable and non-converging model if a large number of'
|
|
295
|
+
' knots are used. Consider dropping the variable with very high'
|
|
296
|
+
' R-squared or reducing `knots` argument in `ModelSpec`.'
|
|
297
|
+
)
|
|
298
|
+
R_SQUARED_GEO_INFO = (
|
|
299
|
+
'This check regresses each variable against geo as a'
|
|
300
|
+
' categorical variable. In this case, high R-squared indicates'
|
|
301
|
+
' low time variation of a variable. This could lead to a weakly'
|
|
302
|
+
' identifiable and non-converging model due to geo main'
|
|
303
|
+
' effects. Consider dropping the variable with very high'
|
|
304
|
+
' R-squared.'
|
|
305
|
+
)
|
|
306
|
+
POPULATION_CORRELATION_SCALED_TREATMENT_CONTROL_INFO = (
|
|
307
|
+
'Please review the Spearman correlation between population and scaled'
|
|
308
|
+
' treatment units or scaled controls.<br/><br/>For controls and non-media'
|
|
309
|
+
" channels: Meridian doesn't population-scale these variables by default."
|
|
310
|
+
' High correlation indicates that users should population-scale these'
|
|
311
|
+
' variables using the `control_population_scaling_id` or'
|
|
312
|
+
' `non_media_population_scaling_id` argument in `ModelSpec`.<br/><br/>For'
|
|
313
|
+
' paid and organic media channels: Meridian automatically population-scales'
|
|
314
|
+
' these media channels by default. High correlation indicates that the'
|
|
315
|
+
' variable may have been population-scaled before being passed to Meridian.'
|
|
316
|
+
' Please check your data input.'
|
|
317
|
+
)
|
|
318
|
+
POPULATION_CORRELATION_RAW_MEDIA_INFO = (
|
|
319
|
+
'Please review the Spearman correlation between population and raw paid and'
|
|
320
|
+
' organic media variables. These raw media variables are expected to have'
|
|
321
|
+
' positive correlation with population. If there is low or negative'
|
|
322
|
+
' correlation, please check your data input.'
|
|
323
|
+
)
|
|
324
|
+
PRIOR_PROBABILITY_REPORT_INFO = (
|
|
325
|
+
'Negative baseline is equivalent to the treatment effects getting too much'
|
|
326
|
+
' credit. Please review the prior probability of negative baseline together'
|
|
327
|
+
' with the bar chart for channel-level prior mean of contribution. If the'
|
|
328
|
+
' prior probability of negative baseline is high, consider custom treatment'
|
|
329
|
+
' priors. In particular, a custom `contribution prior` type may be'
|
|
330
|
+
' appropriate.<br/><br/>'
|
|
331
|
+
)
|
|
332
|
+
# The boolean keys indicate whether findings were detected (True) or
|
|
333
|
+
# not (False), and the values are the corresponding message that should be
|
|
334
|
+
# displayed. Example, if there were errors or reviews in the spend and media
|
|
335
|
+
# unit card (True), then we want to display the finding message,
|
|
336
|
+
# otherwise (False) we display the info message.
|
|
337
|
+
CATEGORY_TO_MESSAGE_BY_STATUS = immutabledict.immutabledict({
|
|
338
|
+
SUMMARY_CARD_TITLE: immutabledict.immutabledict({
|
|
339
|
+
False: SUMMARY_TABLE_SUMMARY_INFO,
|
|
340
|
+
True: SUMMARY_TABLE_SUMMARY_FINDING,
|
|
341
|
+
}),
|
|
342
|
+
SPEND_AND_MEDIA_UNIT_CARD_TITLE: immutabledict.immutabledict({
|
|
343
|
+
False: SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_INFO,
|
|
344
|
+
True: SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_FINDING,
|
|
345
|
+
}),
|
|
346
|
+
RESPONSE_VARIABLES_CARD_TITLE: immutabledict.immutabledict({
|
|
347
|
+
False: SUMMARY_TABLE_RESPONSE_VARIABLES_INFO,
|
|
348
|
+
True: SUMMARY_TABLE_RESPONSE_VARIABLES_FINDING,
|
|
349
|
+
}),
|
|
350
|
+
POPULATION_SCALING_CARD_TITLE: immutabledict.immutabledict({
|
|
351
|
+
False: SUMMARY_TABLE_POPULATION_SCALING_INFO,
|
|
352
|
+
True: '', # currently there are no findings for this card
|
|
353
|
+
}),
|
|
354
|
+
RELATIONSHIP_BETWEEN_VARIABLES_CARD_TITLE: immutabledict.immutabledict({
|
|
355
|
+
False: SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_INFO,
|
|
356
|
+
True: SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_FINDING,
|
|
357
|
+
}),
|
|
358
|
+
PRIOR_SPECIFICATIONS_CARD_TITLE: immutabledict.immutabledict({
|
|
359
|
+
False: SUMMARY_TABLE_PRIOR_SPECIFICATIONS_INFO,
|
|
360
|
+
True: '', # currently there are no findings for this card
|
|
361
|
+
}),
|
|
362
|
+
})
|