google-meridian 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {google_meridian-1.4.0.dist-info → google_meridian-1.5.0.dist-info}/METADATA +14 -11
  2. {google_meridian-1.4.0.dist-info → google_meridian-1.5.0.dist-info}/RECORD +47 -43
  3. {google_meridian-1.4.0.dist-info → google_meridian-1.5.0.dist-info}/WHEEL +1 -1
  4. meridian/analysis/analyzer.py +558 -398
  5. meridian/analysis/optimizer.py +90 -68
  6. meridian/analysis/review/reviewer.py +4 -1
  7. meridian/analysis/summarizer.py +6 -1
  8. meridian/analysis/test_utils.py +2898 -2538
  9. meridian/analysis/visualizer.py +28 -9
  10. meridian/backend/__init__.py +106 -0
  11. meridian/constants.py +1 -0
  12. meridian/data/input_data.py +30 -52
  13. meridian/data/input_data_builder.py +2 -9
  14. meridian/data/test_utils.py +25 -41
  15. meridian/data/validator.py +48 -0
  16. meridian/mlflow/autolog.py +19 -9
  17. meridian/model/adstock_hill.py +3 -5
  18. meridian/model/context.py +134 -0
  19. meridian/model/eda/constants.py +334 -4
  20. meridian/model/eda/eda_engine.py +723 -312
  21. meridian/model/eda/eda_outcome.py +177 -33
  22. meridian/model/model.py +159 -110
  23. meridian/model/model_test_data.py +38 -0
  24. meridian/model/posterior_sampler.py +103 -62
  25. meridian/model/prior_sampler.py +114 -94
  26. meridian/model/spec.py +23 -14
  27. meridian/templates/card.html.jinja +9 -7
  28. meridian/templates/chart.html.jinja +1 -6
  29. meridian/templates/finding.html.jinja +19 -0
  30. meridian/templates/findings.html.jinja +33 -0
  31. meridian/templates/formatter.py +41 -5
  32. meridian/templates/formatter_test.py +127 -0
  33. meridian/templates/style.css +66 -9
  34. meridian/templates/style.scss +85 -4
  35. meridian/templates/table.html.jinja +1 -0
  36. meridian/version.py +1 -1
  37. scenarioplanner/linkingapi/constants.py +1 -1
  38. scenarioplanner/mmm_ui_proto_generator.py +1 -0
  39. schema/processors/marketing_processor.py +11 -10
  40. schema/processors/model_processor.py +4 -1
  41. schema/serde/distribution.py +12 -7
  42. schema/serde/hyperparameters.py +54 -107
  43. schema/serde/meridian_serde.py +6 -1
  44. schema/utils/__init__.py +1 -0
  45. schema/utils/proto_enum_converter.py +127 -0
  46. {google_meridian-1.4.0.dist-info → google_meridian-1.5.0.dist-info}/licenses/LICENSE +0 -0
  47. {google_meridian-1.4.0.dist-info → google_meridian-1.5.0.dist-info}/top_level.txt +0 -0
meridian/model/context.py CHANGED
@@ -21,6 +21,7 @@ import warnings
21
21
  from meridian import backend
22
22
  from meridian import constants
23
23
  from meridian.data import input_data as data
24
+ from meridian.data import time_coordinates as tc
24
25
  from meridian.model import adstock_hill
25
26
  from meridian.model import knots
26
27
  from meridian.model import media
@@ -59,6 +60,8 @@ class ModelContext:
59
60
  self._check_media_prior_support()
60
61
  self._validate_geo_invariants()
61
62
  self._validate_time_invariants()
63
+ self._validate_media_spend_for_paid_channels()
64
+ self._validate_rf_spend_for_paid_channels()
62
65
 
63
66
  def _validate_data_dependent_model_spec(self):
64
67
  """Validates that the data dependent model specs have correct shapes."""
@@ -307,6 +310,41 @@ class ModelContext:
307
310
  ].values,
308
311
  )
309
312
 
313
+ def _validate_media_spend_for_paid_channels(self) -> None:
314
+ self._validate_spend_for_paid_channels(
315
+ self.input_data.aggregate_media_spend(), constants.MEDIA_CHANNEL
316
+ )
317
+
318
+ def _validate_rf_spend_for_paid_channels(self) -> None:
319
+ self._validate_spend_for_paid_channels(
320
+ self.input_data.aggregate_rf_spend(), constants.RF_CHANNEL
321
+ )
322
+
323
+ def _validate_spend_for_paid_channels(
324
+ self,
325
+ spend: np.ndarray | None,
326
+ dim: str,
327
+ ) -> None:
328
+ """Validates non-zero media spend for paid media channels.
329
+
330
+ Args:
331
+ spend: The media spend data to validate.
332
+ dim: The dimension name of the spend data.
333
+
334
+ Raises:
335
+ ValueError if any paid media channel has zero total spend.
336
+ """
337
+ if spend is None:
338
+ return
339
+ zero_spend_channels = spend.coords[dim].where(spend == 0, drop=True).values
340
+
341
+ if zero_spend_channels.size > 0:
342
+ raise ValueError(
343
+ "Zero total spend detected for paid channels:"
344
+ f" {', '.join(zero_spend_channels)}. If data is correct and this is"
345
+ " expected, please consider modeling the data as organic media."
346
+ )
347
+
310
348
  def _check_if_no_time_variation(
311
349
  self,
312
350
  scaled_data: backend.Tensor,
@@ -905,6 +943,68 @@ class ModelContext:
905
943
  organic_rf=organic_rf_adstock_function,
906
944
  )
907
945
 
946
+ def create_inference_data_coords(
947
+ self, n_chains: int, n_draws: int
948
+ ) -> Mapping[str, np.ndarray | Sequence[str]]:
949
+ """Creates data coordinates for inference data."""
950
+ media_channel_names = (
951
+ self.input_data.media_channel
952
+ if self.input_data.media_channel is not None
953
+ else np.array([])
954
+ )
955
+ rf_channel_names = (
956
+ self.input_data.rf_channel
957
+ if self.input_data.rf_channel is not None
958
+ else np.array([])
959
+ )
960
+ organic_media_channel_names = (
961
+ self.input_data.organic_media_channel
962
+ if self.input_data.organic_media_channel is not None
963
+ else np.array([])
964
+ )
965
+ organic_rf_channel_names = (
966
+ self.input_data.organic_rf_channel
967
+ if self.input_data.organic_rf_channel is not None
968
+ else np.array([])
969
+ )
970
+ non_media_channel_names = (
971
+ self.input_data.non_media_channel
972
+ if self.input_data.non_media_channel is not None
973
+ else np.array([])
974
+ )
975
+ control_variable_names = (
976
+ self.input_data.control_variable
977
+ if self.input_data.control_variable is not None
978
+ else np.array([])
979
+ )
980
+ return {
981
+ constants.CHAIN: np.arange(n_chains),
982
+ constants.DRAW: np.arange(n_draws),
983
+ constants.GEO: self.input_data.geo,
984
+ constants.TIME: self.input_data.time,
985
+ constants.MEDIA_TIME: self.input_data.media_time,
986
+ constants.KNOTS: np.arange(self.knot_info.n_knots),
987
+ constants.CONTROL_VARIABLE: control_variable_names,
988
+ constants.NON_MEDIA_CHANNEL: non_media_channel_names,
989
+ constants.MEDIA_CHANNEL: media_channel_names,
990
+ constants.RF_CHANNEL: rf_channel_names,
991
+ constants.ORGANIC_MEDIA_CHANNEL: organic_media_channel_names,
992
+ constants.ORGANIC_RF_CHANNEL: organic_rf_channel_names,
993
+ }
994
+
995
+ def create_inference_data_dims(self) -> Mapping[str, Sequence[str]]:
996
+ """Creates data dimensions for inference data."""
997
+ inference_dims = dict(constants.INFERENCE_DIMS)
998
+ if self.unique_sigma_for_each_geo:
999
+ inference_dims[constants.SIGMA] = [constants.GEO]
1000
+ else:
1001
+ inference_dims[constants.SIGMA] = []
1002
+
1003
+ return {
1004
+ param: [constants.CHAIN, constants.DRAW] + list(dims)
1005
+ for param, dims in inference_dims.items()
1006
+ }
1007
+
908
1008
  def populate_cached_properties(self):
909
1009
  """Eagerly activates all cached properties.
910
1010
 
@@ -923,3 +1023,37 @@ class ModelContext:
923
1023
  ]
924
1024
  for attr in cached_properties:
925
1025
  _ = getattr(self, attr)
1026
+
1027
+ def expand_selected_time_dims(
1028
+ self,
1029
+ start_date: tc.Date = None,
1030
+ end_date: tc.Date = None,
1031
+ ) -> list[str] | None:
1032
+ """Validates and returns time dimension values based on the selected times.
1033
+
1034
+ If both `start_date` and `end_date` are None, returns None. If specified,
1035
+ both `start_date` and `end_date` are inclusive, and must be present in the
1036
+ time coordinates of the input data.
1037
+
1038
+ Args:
1039
+ start_date: Start date of the selected time period. If None, implies the
1040
+ earliest time dimension value in the input data.
1041
+ end_date: End date of the selected time period. If None, implies the
1042
+ latest time dimension value in the input data.
1043
+
1044
+ Returns:
1045
+ A list of time dimension values (as Meridian-formatted strings) in the
1046
+ input data within the selected time period, or do nothing and pass through
1047
+ None if both arguments are Nones, or if `start_date` and `end_date`
1048
+ correspond to the entire time range in the input data.
1049
+
1050
+ Raises:
1051
+ ValueError if `start_date` or `end_date` is not in the input data time
1052
+ dimensions.
1053
+ """
1054
+ expanded = self.input_data.time_coordinates.expand_selected_time_dims(
1055
+ start_date=start_date, end_date=end_date
1056
+ )
1057
+ if expanded is None:
1058
+ return None
1059
+ return [date.strftime(constants.DATE_FORMAT) for date in expanded]
@@ -13,20 +13,350 @@
13
13
  # limitations under the License.
14
14
 
15
15
  """Constants specific to MeridianEDA."""
16
+ from typing import Literal
17
+ import altair as alt
18
+ import immutabledict
19
+ from meridian import constants
20
+ import numpy as np
16
21
 
17
- # EDA Engine constants
22
+ ##### EDA Engine constants #####
23
+ DEFAULT_DA_VAR_AGG_FUNCTION = np.sum
18
24
  COST_PER_MEDIA_UNIT = 'cost_per_media_unit'
19
-
25
+ ABS_COST_PER_MEDIA_UNIT = 'abs_cost_per_media_unit'
26
+ RSQUARED_GEO = 'rsquared_geo'
27
+ RSQUARED_TIME = 'rsquared_time'
20
28
  VARIABLE_1 = 'var1'
21
29
  VARIABLE_2 = 'var2'
22
30
  CORRELATION = 'correlation'
23
31
  ABS_CORRELATION_COL_NAME = 'abs_correlation'
32
+ CORRELATION_MATRIX_NAME = 'correlation_matrix'
33
+ OVERALL_PAIRWISE_CORR_THRESHOLD = 0.999
34
+ GEO_PAIRWISE_CORR_THRESHOLD = 0.999
35
+ NATIONAL_PAIRWISE_CORR_THRESHOLD = 0.999
36
+ Q1_THRESHOLD = 0.25
37
+ Q3_THRESHOLD = 0.75
38
+ IQR_MULTIPLIER = 1.5
39
+ STD_WITH_OUTLIERS_VAR_NAME = 'std_with_outliers'
40
+ STD_WITHOUT_OUTLIERS_VAR_NAME = 'std_without_outliers'
41
+ STD_THRESHOLD = 1e-4
42
+ OUTLIERS_COL_NAME = 'outliers'
43
+ ABS_OUTLIERS_COL_NAME = 'abs_outliers'
44
+ VIF_COL_NAME = 'VIF'
45
+ EXTREME_CORRELATION_WITH = 'extreme_correlation_with'
46
+ TIME_AND_GEO_AGGREGATION = 'times and geos'
47
+ TIME_AGGREGATION = 'times'
48
+ PRIOR_CONTRIBUTION = 'prior_contribution'
24
49
 
25
- # EDA Plotting properties
50
+ ##### EDA Plotting properties #####
51
+ CORRELATION_RED = '#d73027'
52
+ CORRELATION_WHITE = '#f7f7f7'
53
+ CORRELATION_BLUE = '#4575b4'
54
+ CORRELATION_LEGEND_TITLE = 'correlation (blue=OK, red=bad)'
26
55
  VARIABLE = 'var'
27
56
  VALUE = 'value'
28
- NATIONALIZE = 'nationalize'
57
+ NATIONALIZE: Literal['nationalize'] = 'nationalize'
29
58
  MEDIA_IMPRESSIONS_SCALED = 'media_impressions_scaled'
30
59
  IMPRESSION_SHARE_SCALED = 'impression_share_scaled'
31
60
  SPEND_SHARE = 'spend_share'
32
61
  LABEL = 'label'
62
+ DEFAULT_CHART_COLOR = '#4C78A8'
63
+ PAIRWISE_CORR_COLOR_SCALE = alt.Scale(
64
+ domain=[-1.0, -0.5, 0.0, 0.5, 1.0],
65
+ range=[
66
+ CORRELATION_RED,
67
+ CORRELATION_WHITE,
68
+ CORRELATION_BLUE,
69
+ CORRELATION_WHITE,
70
+ CORRELATION_RED,
71
+ ],
72
+ type='linear',
73
+ )
74
+ POPULATION_CORRELATION_LEGEND_CONFIGS = immutabledict.immutabledict({
75
+ 'title': CORRELATION_LEGEND_TITLE,
76
+ 'orient': 'bottom',
77
+ })
78
+ POPULATION_RAW_MEDIA_CORRELATION_ENCODINGS = immutabledict.immutabledict({
79
+ 'x': alt.X(
80
+ f'{VARIABLE}:N',
81
+ sort=None,
82
+ title=constants.CHANNEL,
83
+ axis=alt.Axis(labelAngle=-45),
84
+ ),
85
+ 'y': alt.Y(
86
+ f'{VALUE}:Q', title=CORRELATION, scale=alt.Scale(domain=[-1, 1])
87
+ ),
88
+ 'color': alt.Color(
89
+ f'{VALUE}:Q',
90
+ scale=alt.Scale(
91
+ domain=[-1, 0, 1],
92
+ range=[CORRELATION_RED, CORRELATION_WHITE, CORRELATION_BLUE],
93
+ ),
94
+ legend=alt.Legend(**POPULATION_CORRELATION_LEGEND_CONFIGS),
95
+ ),
96
+ })
97
+ POPULATION_TREATMENT_CORRELATION_ENCODINGS = immutabledict.immutabledict({
98
+ 'x': alt.X(
99
+ f'{VARIABLE}:N',
100
+ sort=None,
101
+ title=constants.CHANNEL,
102
+ axis=alt.Axis(labelAngle=-45),
103
+ ),
104
+ 'y': alt.Y(
105
+ f'{VALUE}:Q', title=CORRELATION, scale=alt.Scale(domain=[-1, 1])
106
+ ),
107
+ 'color': alt.Color(
108
+ f'{VALUE}:Q',
109
+ scale=alt.Scale(
110
+ domain=[-1, -0.5, 0, 0.5, 1],
111
+ range=[
112
+ CORRELATION_RED,
113
+ CORRELATION_WHITE,
114
+ CORRELATION_BLUE,
115
+ CORRELATION_WHITE,
116
+ CORRELATION_RED,
117
+ ],
118
+ ),
119
+ legend=alt.Legend(**POPULATION_CORRELATION_LEGEND_CONFIGS),
120
+ ),
121
+ })
122
+ PRIOR_MEAN_ENCODINGS = immutabledict.immutabledict({
123
+ 'x': alt.X(
124
+ f'{VARIABLE}:N',
125
+ sort=None,
126
+ title=constants.CHANNEL,
127
+ axis=alt.Axis(labelAngle=-45),
128
+ ),
129
+ 'y': alt.Y(f'{VALUE}:Q', title=PRIOR_CONTRIBUTION),
130
+ })
131
+ CHANNEL_TYPE_TO_COLOR = immutabledict.immutabledict({
132
+ constants.MEDIA_UNITS: '#4285F4',
133
+ constants.MEDIA_CHANNEL: '#4285F4',
134
+ constants.SPEND: '#FBBC04',
135
+ COST_PER_MEDIA_UNIT: '#A142F4',
136
+ constants.ORGANIC_MEDIA_CHANNEL: '#F29900',
137
+ constants.RF_CHANNEL: '#EA4335',
138
+ constants.ORGANIC_RF_CHANNEL: '#FBBC04',
139
+ constants.CONTROL_VARIABLE: '#34A853',
140
+ constants.NON_MEDIA_CHANNEL: '#12939A',
141
+ })
142
+
143
+
144
+ ##### Report constants #####
145
+ REPORT_TITLE = 'Meridian Exploratory Data Analysis Report'
146
+ DISPLAY_LIMIT_MESSAGE = (
147
+ '<br/>(Due to space constraints, this table only displays the 5 most severe'
148
+ ' cases. Please use {function} to review {to_review}.)'
149
+ )
150
+ DISPLAY_LIMIT = 5
151
+ TIME_SERIES_LIMIT = 2
152
+ POPULATION_CORRELATION_BARCHART_LIMIT = PRIOR_MEAN_BARCHART_LIMIT = 15
153
+ # category 1
154
+ SPEND_AND_MEDIA_UNIT_CARD_ID = 'spend-and-media-unit'
155
+ SPEND_AND_MEDIA_UNIT_CARD_TITLE = 'Spend and Media Unit'
156
+ RELATIVE_SPEND_SHARE_CHART_ID = 'relative-spend-share-chart'
157
+ SPEND_PER_MEDIA_UNIT_CHART_ID = 'spend-per-media-unit-chart'
158
+ INCONSISTENT_DATA_TABLE_ID = 'inconsistent-data-table'
159
+ COST_PER_MEDIA_UNIT_OUTLIER_TABLE_ID = 'cost-per-media-unit-outlier-table'
160
+ # category 2
161
+ RESPONSE_VARIABLES_CARD_ID = 'response-variables'
162
+ RESPONSE_VARIABLES_CARD_TITLE = 'Individual Explanatory/Response Variables'
163
+ TREATMENTS_CHART_ID = 'treatments-chart'
164
+ CONTROLS_AND_NON_MEDIA_CHART_ID = 'controls-and-non-media-chart'
165
+ KPI_CHART_ID = 'kpi-chart'
166
+ TREATMENT_CONTROL_VARIABILITY_TABLE_ID = 'treatment-control-variability-table'
167
+ TREATMENT_CONTROL_OUTLIER_TABLE_ID = 'treatment-control-outlier-table'
168
+ # category 3
169
+ POPULATION_SCALING_CARD_ID = 'population-scaling'
170
+ POPULATION_SCALING_CARD_TITLE = 'Population Scaling of Explanatory Variables'
171
+ POPULATION_RAW_MEDIA_CHART_ID = 'population-raw-media-chart'
172
+ POPULATION_TREATMENT_CHART_ID = 'population-treatment-chart'
173
+ # category 4
174
+ RELATIONSHIP_BETWEEN_VARIABLES_CARD_ID = 'relationship-among-variables'
175
+ RELATIONSHIP_BETWEEN_VARIABLES_CARD_TITLE = 'Relationship Among the Variables'
176
+ PAIRWISE_CORRELATION_CHART_ID = 'pairwise-correlation-chart'
177
+ EXTREME_VIF_ERROR_TABLE_ID = 'extreme-vif-error-table'
178
+ EXTREME_VIF_ATTENTION_TABLE_ID = 'extreme-vif-attention-table'
179
+ R_SQUARED_TIME_TABLE_ID = 'r-squared-time-table'
180
+ R_SQUARED_GEO_TABLE_ID = 'r-squared-geo-table'
181
+ # category 5
182
+ PRIOR_SPECIFICATIONS_CARD_ID = 'prior-specifications'
183
+ PRIOR_SPECIFICATIONS_CARD_TITLE = 'Prior Specifications'
184
+ PRIOR_CHART_ID = 'prior-chart'
185
+ # summary
186
+ SUMMARY_CARD_ID = 'summary'
187
+ SUMMARY_CARD_TITLE = 'Summary'
188
+ SUMMARY_TABLE_ID = 'summary-table'
189
+ CATEGORY = 'Category'
190
+ FINDING = 'Finding'
191
+ RECOMMENDED_NEXT_STEP = 'Recommended Next Step'
192
+
193
+
194
+ ##### Finding messages #####
195
+ SUMMARY_TABLE_SUMMARY_INFO = (
196
+ 'Review the full report to investigate the health of your dataset and'
197
+ ' confirm findings align with your expectations.'
198
+ )
199
+ SUMMARY_TABLE_SUMMARY_FINDING = (
200
+ 'Review the health of your dataset below. Resolve all FAILS and investigate'
201
+ ' REVIEW flags in the detailed sections to ensure your data is ready for'
202
+ ' modeling.'
203
+ )
204
+ SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_INFO = (
205
+ 'No automated issues detected. See <a'
206
+ f' href="#{SPEND_AND_MEDIA_UNIT_CARD_ID}">Spend and Media Units</a> for'
207
+ ' more details.'
208
+ )
209
+ SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_FINDING = (
210
+ f'See <a href="#{SPEND_AND_MEDIA_UNIT_CARD_ID}">Spend and Media Units</a>.'
211
+ ' Where applicable, verify that spend and media units align across'
212
+ ' channels, and review outliers in cost per media unit.'
213
+ )
214
+ SUMMARY_TABLE_RESPONSE_VARIABLES_INFO = (
215
+ 'No automated issues detected. See <a'
216
+ f' href="#{RESPONSE_VARIABLES_CARD_ID}">Individual Explanatory/Response'
217
+ ' Variables</a> for more details.'
218
+ )
219
+ SUMMARY_TABLE_RESPONSE_VARIABLES_FINDING = (
220
+ f'See <a href="#{RESPONSE_VARIABLES_CARD_ID}">Individual'
221
+ ' Explanatory/Response Variables</a>. Where applicable, review any'
222
+ ' variables with low signal or with outliers.'
223
+ )
224
+ SUMMARY_TABLE_POPULATION_SCALING_INFO = (
225
+ 'No automated issues detected. See <a'
226
+ f' href="#{POPULATION_SCALING_CARD_ID}">Population Scaling</a> for more'
227
+ ' details.'
228
+ )
229
+ SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_INFO = (
230
+ 'No automated issues detected. See <a'
231
+ f' href="#{RELATIONSHIP_BETWEEN_VARIABLES_CARD_ID}">Relationship Among the'
232
+ ' Variables</a> for more details.'
233
+ )
234
+ SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_FINDING = (
235
+ f'See <a href="#{RELATIONSHIP_BETWEEN_VARIABLES_CARD_ID}">Relationship'
236
+ ' Among the Variables</a>. Check for high multicollinearity among the'
237
+ ' variables that could lead to model convergence issues.'
238
+ )
239
+ SUMMARY_TABLE_PRIOR_SPECIFICATIONS_INFO = (
240
+ 'No automated issues detected. See <a'
241
+ f' href="#{PRIOR_SPECIFICATIONS_CARD_ID}">Prior Specifications</a> for more'
242
+ ' details. Assess the likelihood of a negative baseline occurring.'
243
+ )
244
+ SPEND_PER_MEDIA_UNIT_INFO = (
245
+ 'Please review the patterns for spend, media units, and'
246
+ ' cost-per-media unit. Any erratic or unexpected patterns warrant a data'
247
+ ' review.'
248
+ )
249
+ VARIABILITY_PLOT_INFO = (
250
+ 'Please review the variability of the explanatory and response variables'
251
+ ' illustrated by the boxplots. Note that variables with very low'
252
+ ' variability could be difficult to estimate and could hurt model'
253
+ ' convergence. Consider merging or replacing them with other variables,'
254
+ ' dropping them if they are negligibly small, or using a custom prior if'
255
+ ' you have relevant information. If outliers exist, check your data input'
256
+ ' to determine if they are genuine or erroneous.'
257
+ )
258
+ RELATIVE_SPEND_SHARE_INFO = (
259
+ "Please review the channel's share of spend. Channels with a very small"
260
+ ' share of spend might be difficult to estimate. You might want to combine'
261
+ ' them with other channels. Meanwhile, a channel with a huge spend share'
262
+ ' would increase the risk of producing a negative baseline if it also has a'
263
+ ' high ROI.'
264
+ )
265
+ PAIRWISE_CORRELATION_CHECK_INFO = (
266
+ 'Please review the computed pairwise correlations. Note that'
267
+ ' high pairwise correlation may cause model identifiability'
268
+ ' and convergence issues. Consider combining the variables if'
269
+ ' high correlation exists.'
270
+ )
271
+ MULTICOLLINEARITY_ERROR = (
272
+ 'Some variables have extreme multicollinearity (VIF'
273
+ ' > {threshold}) across all {aggregation}. Note that'
274
+ ' a common cause of multicollinearity is perfect pairwise'
275
+ ' correlation. To address multicollinearity, please drop any'
276
+ ' variable that is a linear combination of other variables.'
277
+ ' Otherwise, consider combining variables.{additional_info}'
278
+ )
279
+ MULTICOLLINEARITY_ATTENTION = (
280
+ 'Some variables have extreme multicollinearity (VIF >'
281
+ ' {threshold}) in certain geo(s). Note that a common'
282
+ ' cause of multicollinearity is perfect pairwise'
283
+ " correlation. While this geo-level issue isn't necessarily"
284
+ ' problematic due to hierarchical modeling in Meridian, it'
285
+ ' may be a data issue that could lead to poor inference or'
286
+ ' even poor convergence. Consider checking your data or'
287
+ ' combining these variables, especially if they also have'
288
+ ' high VIF in other geos.{additional_info}'
289
+ )
290
+ R_SQUARED_TIME_INFO = (
291
+ 'This check regresses each variable against time as a'
292
+ ' categorical variable. In this case, high R-squared indicates'
293
+ ' low geo variation of a variable. This could lead to a weakly'
294
+ ' identifiable and non-converging model if a large number of'
295
+ ' knots are used. Consider dropping the variable with very high'
296
+ ' R-squared or reducing `knots` argument in `ModelSpec`.'
297
+ )
298
+ R_SQUARED_GEO_INFO = (
299
+ 'This check regresses each variable against geo as a'
300
+ ' categorical variable. In this case, high R-squared indicates'
301
+ ' low time variation of a variable. This could lead to a weakly'
302
+ ' identifiable and non-converging model due to geo main'
303
+ ' effects. Consider dropping the variable with very high'
304
+ ' R-squared.'
305
+ )
306
+ POPULATION_CORRELATION_SCALED_TREATMENT_CONTROL_INFO = (
307
+ 'Please review the Spearman correlation between population and scaled'
308
+ ' treatment units or scaled controls.<br/><br/>For controls and non-media'
309
+ " channels: Meridian doesn't population-scale these variables by default."
310
+ ' High correlation indicates that users should population-scale these'
311
+ ' variables using the `control_population_scaling_id` or'
312
+ ' `non_media_population_scaling_id` argument in `ModelSpec`.<br/><br/>For'
313
+ ' paid and organic media channels: Meridian automatically population-scales'
314
+ ' these media channels by default. High correlation indicates that the'
315
+ ' variable may have been population-scaled before being passed to Meridian.'
316
+ ' Please check your data input.'
317
+ )
318
+ POPULATION_CORRELATION_RAW_MEDIA_INFO = (
319
+ 'Please review the Spearman correlation between population and raw paid and'
320
+ ' organic media variables. These raw media variables are expected to have'
321
+ ' positive correlation with population. If there is low or negative'
322
+ ' correlation, please check your data input.'
323
+ )
324
+ PRIOR_PROBABILITY_REPORT_INFO = (
325
+ 'Negative baseline is equivalent to the treatment effects getting too much'
326
+ ' credit. Please review the prior probability of negative baseline together'
327
+ ' with the bar chart for channel-level prior mean of contribution. If the'
328
+ ' prior probability of negative baseline is high, consider custom treatment'
329
+ ' priors. In particular, a custom `contribution prior` type may be'
330
+ ' appropriate.<br/><br/>'
331
+ )
332
+ # The boolean keys indicate whether findings were detected (True) or
333
+ # not (False), and the values are the corresponding message that should be
334
+ # displayed. Example, if there were errors or reviews in the spend and media
335
+ # unit card (True), then we want to display the finding message,
336
+ # otherwise (False) we display the info message.
337
+ CATEGORY_TO_MESSAGE_BY_STATUS = immutabledict.immutabledict({
338
+ SUMMARY_CARD_TITLE: immutabledict.immutabledict({
339
+ False: SUMMARY_TABLE_SUMMARY_INFO,
340
+ True: SUMMARY_TABLE_SUMMARY_FINDING,
341
+ }),
342
+ SPEND_AND_MEDIA_UNIT_CARD_TITLE: immutabledict.immutabledict({
343
+ False: SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_INFO,
344
+ True: SUMMARY_TABLE_SPEND_AND_MEDIA_UNIT_FINDING,
345
+ }),
346
+ RESPONSE_VARIABLES_CARD_TITLE: immutabledict.immutabledict({
347
+ False: SUMMARY_TABLE_RESPONSE_VARIABLES_INFO,
348
+ True: SUMMARY_TABLE_RESPONSE_VARIABLES_FINDING,
349
+ }),
350
+ POPULATION_SCALING_CARD_TITLE: immutabledict.immutabledict({
351
+ False: SUMMARY_TABLE_POPULATION_SCALING_INFO,
352
+ True: '', # currently there are no findings for this card
353
+ }),
354
+ RELATIONSHIP_BETWEEN_VARIABLES_CARD_TITLE: immutabledict.immutabledict({
355
+ False: SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_INFO,
356
+ True: SUMMARY_TABLE_RELATIONSHIP_BETWEEN_VARIABLES_FINDING,
357
+ }),
358
+ PRIOR_SPECIFICATIONS_CARD_TITLE: immutabledict.immutabledict({
359
+ False: SUMMARY_TABLE_PRIOR_SPECIFICATIONS_INFO,
360
+ True: '', # currently there are no findings for this card
361
+ }),
362
+ })