google-meridian 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/METADATA +11 -10
- {google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/RECORD +18 -18
- {google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/WHEEL +1 -1
- meridian/__init__.py +1 -1
- meridian/analysis/analyzer.py +383 -320
- meridian/analysis/optimizer.py +531 -269
- meridian/analysis/summarizer.py +21 -3
- meridian/analysis/summary_text.py +20 -1
- meridian/analysis/templates/chart.html.jinja +1 -0
- meridian/analysis/test_utils.py +47 -99
- meridian/analysis/visualizer.py +407 -83
- meridian/constants.py +31 -0
- meridian/data/input_data.py +49 -5
- meridian/data/load.py +10 -7
- meridian/model/model.py +5 -4
- meridian/model/posterior_sampler.py +15 -5
- {google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info/licenses}/LICENSE +0 -0
- {google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/top_level.txt +0 -0
meridian/analysis/analyzer.py
CHANGED
|
@@ -181,6 +181,13 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
181
181
|
return new_tensor.shape[1]
|
|
182
182
|
return None
|
|
183
183
|
|
|
184
|
+
def filter_fields(self, fields: Sequence[str]) -> Self:
|
|
185
|
+
"""Returns a new DataTensors object with only the specified fields."""
|
|
186
|
+
output = {}
|
|
187
|
+
for field in fields:
|
|
188
|
+
output[field] = getattr(self, field)
|
|
189
|
+
return DataTensors(**output)
|
|
190
|
+
|
|
184
191
|
def validate_and_fill_missing_data(
|
|
185
192
|
self,
|
|
186
193
|
required_tensors_names: Sequence[str],
|
|
@@ -896,8 +903,8 @@ class Analyzer:
|
|
|
896
903
|
"""Computes decayed effect means and CIs for media or RF channels.
|
|
897
904
|
|
|
898
905
|
Args:
|
|
899
|
-
channel_type: Specifies `media` or `
|
|
900
|
-
posterior decayed effects.
|
|
906
|
+
channel_type: Specifies `media`, `reach`, or `organic_media` for computing
|
|
907
|
+
prior and posterior decayed effects.
|
|
901
908
|
l_range: The range of time across which the adstock effect is computed.
|
|
902
909
|
xr_dims: A list of dimensions for the output dataset.
|
|
903
910
|
xr_coords: A dictionary with the coordinates for the output dataset.
|
|
@@ -914,12 +921,22 @@ class Analyzer:
|
|
|
914
921
|
self._meridian.inference_data.posterior.alpha_m.values,
|
|
915
922
|
(-1, self._meridian.n_media_channels),
|
|
916
923
|
)
|
|
917
|
-
|
|
924
|
+
elif channel_type is constants.REACH:
|
|
918
925
|
prior = self._meridian.inference_data.prior.alpha_rf.values[0]
|
|
919
926
|
posterior = np.reshape(
|
|
920
927
|
self._meridian.inference_data.posterior.alpha_rf.values,
|
|
921
928
|
(-1, self._meridian.n_rf_channels),
|
|
922
929
|
)
|
|
930
|
+
elif channel_type is constants.ORGANIC_MEDIA:
|
|
931
|
+
prior = self._meridian.inference_data.prior.alpha_om.values[0]
|
|
932
|
+
posterior = np.reshape(
|
|
933
|
+
self._meridian.inference_data.posterior.alpha_om.values,
|
|
934
|
+
(-1, self._meridian.n_organic_media_channels),
|
|
935
|
+
)
|
|
936
|
+
else:
|
|
937
|
+
raise ValueError(
|
|
938
|
+
f"Unsupported channel type for adstock decay: '{channel_type}'. "
|
|
939
|
+
)
|
|
923
940
|
|
|
924
941
|
decayed_effect_prior = (
|
|
925
942
|
prior[np.newaxis, ...] ** l_range[:, np.newaxis, np.newaxis, np.newaxis]
|
|
@@ -1455,16 +1472,7 @@ class Analyzer:
|
|
|
1455
1472
|
if new_data is None:
|
|
1456
1473
|
new_data = DataTensors()
|
|
1457
1474
|
|
|
1458
|
-
required_fields =
|
|
1459
|
-
constants.CONTROLS,
|
|
1460
|
-
constants.MEDIA,
|
|
1461
|
-
constants.REACH,
|
|
1462
|
-
constants.FREQUENCY,
|
|
1463
|
-
constants.ORGANIC_MEDIA,
|
|
1464
|
-
constants.ORGANIC_REACH,
|
|
1465
|
-
constants.ORGANIC_FREQUENCY,
|
|
1466
|
-
constants.NON_MEDIA_TREATMENTS,
|
|
1467
|
-
]
|
|
1475
|
+
required_fields = constants.NON_REVENUE_DATA
|
|
1468
1476
|
filled_tensors = new_data.validate_and_fill_missing_data(
|
|
1469
1477
|
required_tensors_names=required_fields,
|
|
1470
1478
|
meridian=self._meridian,
|
|
@@ -1841,8 +1849,8 @@ class Analyzer:
|
|
|
1841
1849
|
include or booleans with length equal to the number of time periods in
|
|
1842
1850
|
`new_data`, if provided. If `new_data` is provided,
|
|
1843
1851
|
`media_selected_times` can select any subset of time periods in
|
|
1844
|
-
`new_data`. If `new_data is not provided, `media_selected_times`
|
|
1845
|
-
from `InputData.time`. The incremental outcome corresponds to
|
|
1852
|
+
`new_data`. If `new_data` is not provided, `media_selected_times`
|
|
1853
|
+
selects from `InputData.time`. The incremental outcome corresponds to
|
|
1846
1854
|
incremental KPI generated during the `selected_times` arg by treatment
|
|
1847
1855
|
variables executed during the `media_selected_times` arg. For each
|
|
1848
1856
|
channel, the incremental outcome is defined as the difference between
|
|
@@ -1922,16 +1930,9 @@ class Analyzer:
|
|
|
1922
1930
|
if new_data is None:
|
|
1923
1931
|
new_data = DataTensors()
|
|
1924
1932
|
|
|
1925
|
-
required_params =
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
constants.FREQUENCY,
|
|
1929
|
-
constants.ORGANIC_MEDIA,
|
|
1930
|
-
constants.ORGANIC_REACH,
|
|
1931
|
-
constants.ORGANIC_FREQUENCY,
|
|
1932
|
-
constants.NON_MEDIA_TREATMENTS,
|
|
1933
|
-
constants.REVENUE_PER_KPI,
|
|
1934
|
-
]
|
|
1933
|
+
required_params = constants.PAID_DATA
|
|
1934
|
+
if include_non_paid_channels:
|
|
1935
|
+
required_params += constants.NON_PAID_DATA
|
|
1935
1936
|
data_tensors = new_data.validate_and_fill_missing_data(
|
|
1936
1937
|
required_tensors_names=required_params, meridian=self._meridian
|
|
1937
1938
|
)
|
|
@@ -2193,14 +2194,7 @@ class Analyzer:
|
|
|
2193
2194
|
}
|
|
2194
2195
|
self._check_revenue_data_exists(use_kpi)
|
|
2195
2196
|
self._validate_geo_and_time_granularity(**dim_kwargs)
|
|
2196
|
-
required_values =
|
|
2197
|
-
constants.MEDIA,
|
|
2198
|
-
constants.MEDIA_SPEND,
|
|
2199
|
-
constants.REACH,
|
|
2200
|
-
constants.FREQUENCY,
|
|
2201
|
-
constants.RF_SPEND,
|
|
2202
|
-
constants.REVENUE_PER_KPI,
|
|
2203
|
-
]
|
|
2197
|
+
required_values = constants.PERFORMANCE_DATA
|
|
2204
2198
|
if not new_data:
|
|
2205
2199
|
new_data = DataTensors()
|
|
2206
2200
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
@@ -2208,7 +2202,7 @@ class Analyzer:
|
|
|
2208
2202
|
meridian=self._meridian,
|
|
2209
2203
|
)
|
|
2210
2204
|
numerator = self.incremental_outcome(
|
|
2211
|
-
new_data=filled_data,
|
|
2205
|
+
new_data=filled_data.filter_fields(constants.PAID_DATA),
|
|
2212
2206
|
scaling_factor0=1,
|
|
2213
2207
|
scaling_factor1=1 + incremental_increase,
|
|
2214
2208
|
inverse_transform_outcome=True,
|
|
@@ -2322,14 +2316,7 @@ class Analyzer:
|
|
|
2322
2316
|
}
|
|
2323
2317
|
self._check_revenue_data_exists(use_kpi)
|
|
2324
2318
|
self._validate_geo_and_time_granularity(**dim_kwargs)
|
|
2325
|
-
required_values =
|
|
2326
|
-
constants.MEDIA,
|
|
2327
|
-
constants.MEDIA_SPEND,
|
|
2328
|
-
constants.REACH,
|
|
2329
|
-
constants.FREQUENCY,
|
|
2330
|
-
constants.RF_SPEND,
|
|
2331
|
-
constants.REVENUE_PER_KPI,
|
|
2332
|
-
]
|
|
2319
|
+
required_values = constants.PERFORMANCE_DATA
|
|
2333
2320
|
if not new_data:
|
|
2334
2321
|
new_data = DataTensors()
|
|
2335
2322
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
@@ -2337,7 +2324,7 @@ class Analyzer:
|
|
|
2337
2324
|
meridian=self._meridian,
|
|
2338
2325
|
)
|
|
2339
2326
|
incremental_outcome = self.incremental_outcome(
|
|
2340
|
-
new_data=filled_data,
|
|
2327
|
+
new_data=filled_data.filter_fields(constants.PAID_DATA),
|
|
2341
2328
|
**incremental_outcome_kwargs,
|
|
2342
2329
|
**dim_kwargs,
|
|
2343
2330
|
)
|
|
@@ -2579,21 +2566,15 @@ class Analyzer:
|
|
|
2579
2566
|
|
|
2580
2567
|
# Set up the coordinates.
|
|
2581
2568
|
coords = {
|
|
2582
|
-
constants.METRIC:
|
|
2583
|
-
[constants.METRIC],
|
|
2584
|
-
[constants.MEAN, constants.CI_LO, constants.CI_HI],
|
|
2585
|
-
),
|
|
2569
|
+
constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
|
|
2586
2570
|
}
|
|
2587
2571
|
|
|
2588
2572
|
if not aggregate_geos:
|
|
2589
|
-
coords[constants.GEO] =
|
|
2573
|
+
coords[constants.GEO] = mmm.input_data.geo.data
|
|
2590
2574
|
if not aggregate_times:
|
|
2591
|
-
coords[constants.TIME] =
|
|
2575
|
+
coords[constants.TIME] = mmm.input_data.time.data
|
|
2592
2576
|
if can_split_by_holdout:
|
|
2593
|
-
coords[constants.EVALUATION_SET_VAR] = (
|
|
2594
|
-
[constants.EVALUATION_SET_VAR],
|
|
2595
|
-
list(constants.EVALUATION_SET),
|
|
2596
|
-
)
|
|
2577
|
+
coords[constants.EVALUATION_SET_VAR] = list(constants.EVALUATION_SET)
|
|
2597
2578
|
|
|
2598
2579
|
# Set up the dimensions.
|
|
2599
2580
|
actual_dims = ((constants.GEO,) if not aggregate_geos else ()) + (
|
|
@@ -2879,7 +2860,7 @@ class Analyzer:
|
|
|
2879
2860
|
batched_kwargs = {"batch_size": batch_size}
|
|
2880
2861
|
new_data = new_data or DataTensors()
|
|
2881
2862
|
aggregated_impressions = self.get_aggregated_impressions(
|
|
2882
|
-
new_data=new_data,
|
|
2863
|
+
new_data=new_data.filter_fields(constants.IMPRESSIONS_DATA),
|
|
2883
2864
|
optimal_frequency=optimal_frequency,
|
|
2884
2865
|
include_non_paid_channels=include_non_paid_channels,
|
|
2885
2866
|
**dim_kwargs,
|
|
@@ -2892,9 +2873,12 @@ class Analyzer:
|
|
|
2892
2873
|
axis=-1,
|
|
2893
2874
|
)
|
|
2894
2875
|
|
|
2876
|
+
incremental_outcome_fields = list(
|
|
2877
|
+
constants.PAID_DATA + constants.NON_PAID_DATA
|
|
2878
|
+
)
|
|
2895
2879
|
incremental_outcome_prior = self.compute_incremental_outcome_aggregate(
|
|
2896
2880
|
use_posterior=False,
|
|
2897
|
-
new_data=new_data,
|
|
2881
|
+
new_data=new_data.filter_fields(incremental_outcome_fields),
|
|
2898
2882
|
use_kpi=use_kpi,
|
|
2899
2883
|
include_non_paid_channels=include_non_paid_channels,
|
|
2900
2884
|
non_media_baseline_values=non_media_baseline_values,
|
|
@@ -2903,7 +2887,7 @@ class Analyzer:
|
|
|
2903
2887
|
)
|
|
2904
2888
|
incremental_outcome_posterior = self.compute_incremental_outcome_aggregate(
|
|
2905
2889
|
use_posterior=True,
|
|
2906
|
-
new_data=new_data,
|
|
2890
|
+
new_data=new_data.filter_fields(incremental_outcome_fields),
|
|
2907
2891
|
use_kpi=use_kpi,
|
|
2908
2892
|
include_non_paid_channels=include_non_paid_channels,
|
|
2909
2893
|
non_media_baseline_values=non_media_baseline_values,
|
|
@@ -2912,7 +2896,7 @@ class Analyzer:
|
|
|
2912
2896
|
)
|
|
2913
2897
|
incremental_outcome_mroi_prior = self.compute_incremental_outcome_aggregate(
|
|
2914
2898
|
use_posterior=False,
|
|
2915
|
-
new_data=new_data,
|
|
2899
|
+
new_data=new_data.filter_fields(incremental_outcome_fields),
|
|
2916
2900
|
use_kpi=use_kpi,
|
|
2917
2901
|
by_reach=marginal_roi_by_reach,
|
|
2918
2902
|
scaling_factor0=1,
|
|
@@ -2925,7 +2909,7 @@ class Analyzer:
|
|
|
2925
2909
|
incremental_outcome_mroi_posterior = (
|
|
2926
2910
|
self.compute_incremental_outcome_aggregate(
|
|
2927
2911
|
use_posterior=True,
|
|
2928
|
-
new_data=new_data,
|
|
2912
|
+
new_data=new_data.filter_fields(incremental_outcome_fields),
|
|
2929
2913
|
use_kpi=use_kpi,
|
|
2930
2914
|
by_reach=marginal_roi_by_reach,
|
|
2931
2915
|
scaling_factor0=1,
|
|
@@ -2947,19 +2931,14 @@ class Analyzer:
|
|
|
2947
2931
|
if include_non_paid_channels
|
|
2948
2932
|
else self._meridian.input_data.get_all_paid_channels()
|
|
2949
2933
|
)
|
|
2950
|
-
xr_coords = {
|
|
2951
|
-
constants.CHANNEL: (
|
|
2952
|
-
[constants.CHANNEL],
|
|
2953
|
-
list(channels) + [constants.ALL_CHANNELS],
|
|
2954
|
-
),
|
|
2955
|
-
}
|
|
2934
|
+
xr_coords = {constants.CHANNEL: list(channels) + [constants.ALL_CHANNELS]}
|
|
2956
2935
|
if not aggregate_geos:
|
|
2957
2936
|
geo_dims = (
|
|
2958
2937
|
self._meridian.input_data.geo.data
|
|
2959
2938
|
if selected_geos is None
|
|
2960
2939
|
else selected_geos
|
|
2961
2940
|
)
|
|
2962
|
-
xr_coords[constants.GEO] =
|
|
2941
|
+
xr_coords[constants.GEO] = geo_dims
|
|
2963
2942
|
if not aggregate_times:
|
|
2964
2943
|
# Get the time coordinates for flexible time dimensions.
|
|
2965
2944
|
modified_times = new_data.get_modified_times(self._meridian)
|
|
@@ -2975,25 +2954,19 @@ class Analyzer:
|
|
|
2975
2954
|
time_dims = times[indices]
|
|
2976
2955
|
else:
|
|
2977
2956
|
time_dims = selected_times
|
|
2978
|
-
xr_coords[constants.TIME] =
|
|
2957
|
+
xr_coords[constants.TIME] = time_dims
|
|
2979
2958
|
xr_dims_with_ci_and_distribution = xr_dims + (
|
|
2980
2959
|
constants.METRIC,
|
|
2981
2960
|
constants.DISTRIBUTION,
|
|
2982
2961
|
)
|
|
2983
2962
|
xr_coords_with_ci_and_distribution = {
|
|
2984
|
-
constants.METRIC:
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
],
|
|
2992
|
-
),
|
|
2993
|
-
constants.DISTRIBUTION: (
|
|
2994
|
-
[constants.DISTRIBUTION],
|
|
2995
|
-
[constants.PRIOR, constants.POSTERIOR],
|
|
2996
|
-
),
|
|
2963
|
+
constants.METRIC: [
|
|
2964
|
+
constants.MEAN,
|
|
2965
|
+
constants.MEDIAN,
|
|
2966
|
+
constants.CI_LO,
|
|
2967
|
+
constants.CI_HI,
|
|
2968
|
+
],
|
|
2969
|
+
constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
|
|
2997
2970
|
**xr_coords,
|
|
2998
2971
|
}
|
|
2999
2972
|
incremental_outcome = _central_tendency_and_ci_by_prior_and_posterior(
|
|
@@ -3022,14 +2995,14 @@ class Analyzer:
|
|
|
3022
2995
|
if new_data.get_modified_times(self._meridian) is None:
|
|
3023
2996
|
expected_outcome_prior = self.expected_outcome(
|
|
3024
2997
|
use_posterior=False,
|
|
3025
|
-
new_data=new_data,
|
|
2998
|
+
new_data=new_data.filter_fields(constants.NON_REVENUE_DATA),
|
|
3026
2999
|
use_kpi=use_kpi,
|
|
3027
3000
|
**dim_kwargs,
|
|
3028
3001
|
**batched_kwargs,
|
|
3029
3002
|
)
|
|
3030
3003
|
expected_outcome_posterior = self.expected_outcome(
|
|
3031
3004
|
use_posterior=True,
|
|
3032
|
-
new_data=new_data,
|
|
3005
|
+
new_data=new_data.filter_fields(constants.NON_REVENUE_DATA),
|
|
3033
3006
|
use_kpi=use_kpi,
|
|
3034
3007
|
**dim_kwargs,
|
|
3035
3008
|
**batched_kwargs,
|
|
@@ -3070,11 +3043,9 @@ class Analyzer:
|
|
|
3070
3043
|
# If non-paid channels are not included, return all metrics, paid and
|
|
3071
3044
|
# non-paid.
|
|
3072
3045
|
spend_list = []
|
|
3073
|
-
|
|
3074
|
-
|
|
3075
|
-
|
|
3076
|
-
[constants.MEDIA_SPEND, constants.RF_SPEND], self._meridian
|
|
3077
|
-
)
|
|
3046
|
+
new_spend_tensors = new_data.filter_fields(
|
|
3047
|
+
constants.SPEND_DATA
|
|
3048
|
+
).validate_and_fill_missing_data(constants.SPEND_DATA, self._meridian)
|
|
3078
3049
|
if self._meridian.n_media_channels > 0:
|
|
3079
3050
|
spend_list.append(new_spend_tensors.media_spend)
|
|
3080
3051
|
if self._meridian.n_rf_channels > 0:
|
|
@@ -3136,7 +3107,7 @@ class Analyzer:
|
|
|
3136
3107
|
cpik = self._compute_cpik_aggregate(
|
|
3137
3108
|
incremental_kpi_prior=self.compute_incremental_outcome_aggregate(
|
|
3138
3109
|
use_posterior=False,
|
|
3139
|
-
new_data=new_data,
|
|
3110
|
+
new_data=new_data.filter_fields(incremental_outcome_fields),
|
|
3140
3111
|
use_kpi=True,
|
|
3141
3112
|
include_non_paid_channels=False,
|
|
3142
3113
|
**dim_kwargs,
|
|
@@ -3144,7 +3115,7 @@ class Analyzer:
|
|
|
3144
3115
|
),
|
|
3145
3116
|
incremental_kpi_posterior=self.compute_incremental_outcome_aggregate(
|
|
3146
3117
|
use_posterior=True,
|
|
3147
|
-
new_data=new_data,
|
|
3118
|
+
new_data=new_data.filter_fields(incremental_outcome_fields),
|
|
3148
3119
|
use_kpi=True,
|
|
3149
3120
|
include_non_paid_channels=False,
|
|
3150
3121
|
**dim_kwargs,
|
|
@@ -3207,18 +3178,13 @@ class Analyzer:
|
|
|
3207
3178
|
(or `(n_channels,)` if geos and times are aggregated) with aggregate
|
|
3208
3179
|
impression values per channel.
|
|
3209
3180
|
"""
|
|
3210
|
-
tensor_names_list =
|
|
3181
|
+
tensor_names_list = (
|
|
3211
3182
|
constants.MEDIA,
|
|
3212
3183
|
constants.REACH,
|
|
3213
3184
|
constants.FREQUENCY,
|
|
3214
|
-
|
|
3185
|
+
)
|
|
3215
3186
|
if include_non_paid_channels:
|
|
3216
|
-
tensor_names_list.
|
|
3217
|
-
constants.ORGANIC_MEDIA,
|
|
3218
|
-
constants.ORGANIC_REACH,
|
|
3219
|
-
constants.ORGANIC_FREQUENCY,
|
|
3220
|
-
constants.NON_MEDIA_TREATMENTS,
|
|
3221
|
-
])
|
|
3187
|
+
tensor_names_list += constants.NON_PAID_DATA
|
|
3222
3188
|
if new_data is None:
|
|
3223
3189
|
new_data = DataTensors()
|
|
3224
3190
|
data_tensors = new_data.validate_and_fill_missing_data(
|
|
@@ -3323,41 +3289,33 @@ class Analyzer:
|
|
|
3323
3289
|
+ ((constants.TIME,) if not aggregate_times else ())
|
|
3324
3290
|
+ (constants.CHANNEL,)
|
|
3325
3291
|
)
|
|
3326
|
-
xr_coords = {
|
|
3327
|
-
constants.CHANNEL: ([constants.CHANNEL], [constants.BASELINE]),
|
|
3328
|
-
}
|
|
3292
|
+
xr_coords = {constants.CHANNEL: [constants.BASELINE]}
|
|
3329
3293
|
if not aggregate_geos:
|
|
3330
3294
|
geo_dims = (
|
|
3331
3295
|
self._meridian.input_data.geo.data
|
|
3332
3296
|
if selected_geos is None
|
|
3333
3297
|
else selected_geos
|
|
3334
3298
|
)
|
|
3335
|
-
xr_coords[constants.GEO] =
|
|
3299
|
+
xr_coords[constants.GEO] = geo_dims
|
|
3336
3300
|
if not aggregate_times:
|
|
3337
3301
|
time_dims = (
|
|
3338
3302
|
self._meridian.input_data.time.data
|
|
3339
3303
|
if selected_times is None
|
|
3340
3304
|
else selected_times
|
|
3341
3305
|
)
|
|
3342
|
-
xr_coords[constants.TIME] =
|
|
3306
|
+
xr_coords[constants.TIME] = time_dims
|
|
3343
3307
|
xr_dims_with_ci_and_distribution = xr_dims + (
|
|
3344
3308
|
constants.METRIC,
|
|
3345
3309
|
constants.DISTRIBUTION,
|
|
3346
3310
|
)
|
|
3347
3311
|
xr_coords_with_ci_and_distribution = {
|
|
3348
|
-
constants.METRIC:
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
3354
|
-
|
|
3355
|
-
],
|
|
3356
|
-
),
|
|
3357
|
-
constants.DISTRIBUTION: (
|
|
3358
|
-
[constants.DISTRIBUTION],
|
|
3359
|
-
[constants.PRIOR, constants.POSTERIOR],
|
|
3360
|
-
),
|
|
3312
|
+
constants.METRIC: [
|
|
3313
|
+
constants.MEAN,
|
|
3314
|
+
constants.MEDIAN,
|
|
3315
|
+
constants.CI_LO,
|
|
3316
|
+
constants.CI_HI,
|
|
3317
|
+
],
|
|
3318
|
+
constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
|
|
3361
3319
|
**xr_coords,
|
|
3362
3320
|
}
|
|
3363
3321
|
|
|
@@ -3414,11 +3372,12 @@ class Analyzer:
|
|
|
3414
3372
|
|
|
3415
3373
|
def optimal_freq(
|
|
3416
3374
|
self,
|
|
3375
|
+
new_data: DataTensors | None = None,
|
|
3417
3376
|
freq_grid: Sequence[float] | None = None,
|
|
3418
3377
|
use_posterior: bool = True,
|
|
3419
3378
|
use_kpi: bool = False,
|
|
3420
3379
|
selected_geos: Sequence[str | int] | None = None,
|
|
3421
|
-
selected_times: Sequence[str | int] | None = None,
|
|
3380
|
+
selected_times: Sequence[str | int | bool] | None = None,
|
|
3422
3381
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
3423
3382
|
) -> xr.Dataset:
|
|
3424
3383
|
"""Calculates the optimal frequency that maximizes posterior mean ROI.
|
|
@@ -3429,10 +3388,27 @@ class Analyzer:
|
|
|
3429
3388
|
number of impressions remains unchanged as frequency varies. Meridian solves
|
|
3430
3389
|
for the frequency at which posterior mean ROI is optimized.
|
|
3431
3390
|
|
|
3391
|
+
If `new_data=None`, this method calculates the opptimal frequency on the
|
|
3392
|
+
values of the paid RF variables that the Meridian object was initialized
|
|
3393
|
+
with. The user can override this historical data through the `new_data`
|
|
3394
|
+
argument. For example,
|
|
3395
|
+
|
|
3396
|
+
```python
|
|
3397
|
+
new_data = DataTensors(reach=new_reach, frequency=new_frequency)
|
|
3398
|
+
```
|
|
3399
|
+
|
|
3432
3400
|
Note: The ROI numerator is revenue if `use_kpi` is `False`, otherwise, the
|
|
3433
3401
|
ROI numerator is KPI units.
|
|
3434
3402
|
|
|
3435
3403
|
Args:
|
|
3404
|
+
new_data: Optional `DataTensors` object containing `reach`, `frequency`,
|
|
3405
|
+
`rf_spend`, and `revenue_per_kpi`. If provided, the optimal frequency is
|
|
3406
|
+
calculated using the values of the tensors passed in `new_data` and the
|
|
3407
|
+
original values of all the remaining tensors. If `None`, the historical
|
|
3408
|
+
data used to initialize the Meridian object is used. If any of the
|
|
3409
|
+
tensors in `new_data` is provided with a different number of time
|
|
3410
|
+
periods than in `InputData`, then all tensors must be provided with the
|
|
3411
|
+
same number of time periods.
|
|
3436
3412
|
freq_grid: List of frequency values. The ROI of each channel is calculated
|
|
3437
3413
|
for each frequency value in the list. By default, the list includes
|
|
3438
3414
|
numbers from `1.0` to the maximum frequency in increments of `0.1`.
|
|
@@ -3443,8 +3419,10 @@ class Analyzer:
|
|
|
3443
3419
|
revenue.
|
|
3444
3420
|
selected_geos: Optional list containing a subset of geos to include. By
|
|
3445
3421
|
default, all geos are included.
|
|
3446
|
-
selected_times: Optional list containing a subset of
|
|
3447
|
-
|
|
3422
|
+
selected_times: Optional list containing either a subset of dates to
|
|
3423
|
+
include or booleans with length equal to the number of time periods in
|
|
3424
|
+
the `new_data` args, if provided. By default, all time periods are
|
|
3425
|
+
included.
|
|
3448
3426
|
confidence_level: Confidence level for prior and posterior credible
|
|
3449
3427
|
intervals, represented as a value between zero and one.
|
|
3450
3428
|
|
|
@@ -3475,6 +3453,7 @@ class Analyzer:
|
|
|
3475
3453
|
ValueError: If there are no channels with reach and frequency data.
|
|
3476
3454
|
"""
|
|
3477
3455
|
dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
|
|
3456
|
+
new_data = new_data or DataTensors()
|
|
3478
3457
|
if self._meridian.n_rf_channels == 0:
|
|
3479
3458
|
raise ValueError(
|
|
3480
3459
|
"Must have at least one channel with reach and frequency data."
|
|
@@ -3484,7 +3463,29 @@ class Analyzer:
|
|
|
3484
3463
|
f"sample_{dist_type}() must be called prior to calling this method."
|
|
3485
3464
|
)
|
|
3486
3465
|
|
|
3487
|
-
|
|
3466
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
3467
|
+
constants.RF_DATA,
|
|
3468
|
+
self._meridian,
|
|
3469
|
+
)
|
|
3470
|
+
# TODO: Once treatment type filtering is added, remove adding
|
|
3471
|
+
# dummy media and media spend to `roi()` and `summary_metrics()`. This is a
|
|
3472
|
+
# hack to use `roi()` and `summary_metrics()` for RF only analysis.
|
|
3473
|
+
has_media = self._meridian.n_media_channels > 0
|
|
3474
|
+
n_media_times = (
|
|
3475
|
+
filled_data.get_modified_times(self._meridian)
|
|
3476
|
+
or self._meridian.n_media_times
|
|
3477
|
+
)
|
|
3478
|
+
n_times = (
|
|
3479
|
+
filled_data.get_modified_times(self._meridian) or self._meridian.n_times
|
|
3480
|
+
)
|
|
3481
|
+
dummy_media = tf.ones(
|
|
3482
|
+
(self._meridian.n_geos, n_media_times, self._meridian.n_media_channels)
|
|
3483
|
+
)
|
|
3484
|
+
dummy_media_spend = tf.ones(
|
|
3485
|
+
(self._meridian.n_geos, n_times, self._meridian.n_media_channels)
|
|
3486
|
+
)
|
|
3487
|
+
|
|
3488
|
+
max_freq = np.max(np.array(filled_data.frequency))
|
|
3488
3489
|
if freq_grid is None:
|
|
3489
3490
|
freq_grid = np.arange(1, max_freq, 0.1)
|
|
3490
3491
|
|
|
@@ -3494,14 +3495,18 @@ class Analyzer:
|
|
|
3494
3495
|
metric_grid = np.zeros((len(freq_grid), self._meridian.n_rf_channels, 4))
|
|
3495
3496
|
|
|
3496
3497
|
for i, freq in enumerate(freq_grid):
|
|
3497
|
-
new_frequency = tf.ones_like(
|
|
3498
|
-
new_reach =
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
|
|
3498
|
+
new_frequency = tf.ones_like(filled_data.frequency) * freq
|
|
3499
|
+
new_reach = filled_data.frequency * filled_data.reach / new_frequency
|
|
3500
|
+
new_roi_data = DataTensors(
|
|
3501
|
+
reach=new_reach,
|
|
3502
|
+
frequency=new_frequency,
|
|
3503
|
+
rf_spend=filled_data.rf_spend,
|
|
3504
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
3505
|
+
media=dummy_media if has_media else None,
|
|
3506
|
+
media_spend=dummy_media_spend if has_media else None,
|
|
3502
3507
|
)
|
|
3503
3508
|
metric_grid_temp = self.roi(
|
|
3504
|
-
new_data=
|
|
3509
|
+
new_data=new_roi_data,
|
|
3505
3510
|
use_posterior=use_posterior,
|
|
3506
3511
|
selected_geos=selected_geos,
|
|
3507
3512
|
selected_times=selected_times,
|
|
@@ -3521,20 +3526,25 @@ class Analyzer:
|
|
|
3521
3526
|
|
|
3522
3527
|
optimal_frequency = [freq_grid[i] for i in optimal_freq_idx]
|
|
3523
3528
|
optimal_frequency_tensor = tf.convert_to_tensor(
|
|
3524
|
-
tf.ones_like(
|
|
3529
|
+
tf.ones_like(filled_data.frequency) * optimal_frequency,
|
|
3525
3530
|
tf.float32,
|
|
3526
3531
|
)
|
|
3527
3532
|
optimal_reach = (
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3533
|
+
filled_data.frequency * filled_data.reach / optimal_frequency_tensor
|
|
3534
|
+
)
|
|
3535
|
+
|
|
3536
|
+
new_summary_metrics_data = DataTensors(
|
|
3537
|
+
reach=optimal_reach,
|
|
3538
|
+
frequency=optimal_frequency_tensor,
|
|
3539
|
+
rf_spend=filled_data.rf_spend,
|
|
3540
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
3541
|
+
media=dummy_media if has_media else None,
|
|
3542
|
+
media_spend=dummy_media_spend if has_media else None,
|
|
3531
3543
|
)
|
|
3532
3544
|
|
|
3533
3545
|
# Compute the optimized metrics based on the optimal frequency.
|
|
3534
3546
|
optimized_metrics_by_reach = self.summary_metrics(
|
|
3535
|
-
new_data=
|
|
3536
|
-
reach=optimal_reach, frequency=optimal_frequency_tensor
|
|
3537
|
-
),
|
|
3547
|
+
new_data=new_summary_metrics_data,
|
|
3538
3548
|
marginal_roi_by_reach=True,
|
|
3539
3549
|
selected_geos=selected_geos,
|
|
3540
3550
|
selected_times=selected_times,
|
|
@@ -3544,9 +3554,7 @@ class Analyzer:
|
|
|
3544
3554
|
constants.DISTRIBUTION: dist_type,
|
|
3545
3555
|
})
|
|
3546
3556
|
optimized_metrics_by_frequency = self.summary_metrics(
|
|
3547
|
-
new_data=
|
|
3548
|
-
reach=optimal_reach, frequency=optimal_frequency_tensor
|
|
3549
|
-
),
|
|
3557
|
+
new_data=new_summary_metrics_data,
|
|
3550
3558
|
marginal_roi_by_reach=False,
|
|
3551
3559
|
selected_geos=selected_geos,
|
|
3552
3560
|
selected_times=selected_times,
|
|
@@ -3594,17 +3602,14 @@ class Analyzer:
|
|
|
3594
3602
|
return xr.Dataset(
|
|
3595
3603
|
data_vars=data_vars,
|
|
3596
3604
|
coords={
|
|
3597
|
-
constants.FREQUENCY:
|
|
3598
|
-
constants.RF_CHANNEL:
|
|
3599
|
-
constants.METRIC:
|
|
3600
|
-
|
|
3601
|
-
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
|
|
3605
|
-
constants.CI_HI,
|
|
3606
|
-
],
|
|
3607
|
-
),
|
|
3605
|
+
constants.FREQUENCY: freq_grid,
|
|
3606
|
+
constants.RF_CHANNEL: rf_channel_values,
|
|
3607
|
+
constants.METRIC: [
|
|
3608
|
+
constants.MEAN,
|
|
3609
|
+
constants.MEDIAN,
|
|
3610
|
+
constants.CI_LO,
|
|
3611
|
+
constants.CI_HI,
|
|
3612
|
+
],
|
|
3608
3613
|
},
|
|
3609
3614
|
attrs={
|
|
3610
3615
|
constants.CONFIDENCE_LEVEL: confidence_level,
|
|
@@ -3677,14 +3682,12 @@ class Analyzer:
|
|
|
3677
3682
|
|
|
3678
3683
|
xr_dims = [constants.METRIC, constants.GEO_GRANULARITY]
|
|
3679
3684
|
xr_coords = {
|
|
3680
|
-
constants.METRIC:
|
|
3681
|
-
|
|
3682
|
-
|
|
3683
|
-
|
|
3684
|
-
|
|
3685
|
-
|
|
3686
|
-
[constants.GEO, constants.NATIONAL],
|
|
3687
|
-
),
|
|
3685
|
+
constants.METRIC: [
|
|
3686
|
+
constants.R_SQUARED,
|
|
3687
|
+
constants.MAPE,
|
|
3688
|
+
constants.WMAPE,
|
|
3689
|
+
],
|
|
3690
|
+
constants.GEO_GRANULARITY: [constants.GEO, constants.NATIONAL],
|
|
3688
3691
|
}
|
|
3689
3692
|
if self._meridian.revenue_per_kpi is not None:
|
|
3690
3693
|
input_tensor = self._meridian.kpi * self._meridian.revenue_per_kpi
|
|
@@ -3715,10 +3718,7 @@ class Analyzer:
|
|
|
3715
3718
|
dataset = xr.Dataset(data_vars=xr_data, coords=xr_coords)
|
|
3716
3719
|
else:
|
|
3717
3720
|
xr_dims.append(constants.EVALUATION_SET_VAR)
|
|
3718
|
-
xr_coords[constants.EVALUATION_SET_VAR] = (
|
|
3719
|
-
[constants.EVALUATION_SET_VAR],
|
|
3720
|
-
list(constants.EVALUATION_SET),
|
|
3721
|
-
)
|
|
3721
|
+
xr_coords[constants.EVALUATION_SET_VAR] = list(constants.EVALUATION_SET)
|
|
3722
3722
|
|
|
3723
3723
|
holdout_id = self._filter_holdout_id_for_selected_geos_and_times(
|
|
3724
3724
|
self._meridian.model_spec.holdout_id, selected_geos, selected_times
|
|
@@ -3913,6 +3913,7 @@ class Analyzer:
|
|
|
3913
3913
|
selected_times: Sequence[str] | None = None,
|
|
3914
3914
|
by_reach: bool = True,
|
|
3915
3915
|
use_optimal_frequency: bool = False,
|
|
3916
|
+
use_kpi: bool = False,
|
|
3916
3917
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
3917
3918
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
3918
3919
|
) -> xr.Dataset:
|
|
@@ -3940,6 +3941,8 @@ class Analyzer:
|
|
|
3940
3941
|
frequency.
|
|
3941
3942
|
use_optimal_frequency: If `True`, uses the optimal frequency to plot the
|
|
3942
3943
|
response curves. Defaults to `False`.
|
|
3944
|
+
use_kpi: A boolean flag indicating whether to use KPI instead of revenue
|
|
3945
|
+
to generate the response curves. Defaults to `False`.
|
|
3943
3946
|
confidence_level: Confidence level for prior and posterior credible
|
|
3944
3947
|
intervals, represented as a value between zero and one.
|
|
3945
3948
|
batch_size: Integer representing the maximum draws per chain in each
|
|
@@ -3951,7 +3954,6 @@ class Analyzer:
|
|
|
3951
3954
|
An `xarray.Dataset` containing the data needed to visualize response
|
|
3952
3955
|
curves.
|
|
3953
3956
|
"""
|
|
3954
|
-
use_kpi = self._meridian.input_data.revenue_per_kpi is None
|
|
3955
3957
|
if self._meridian.is_national:
|
|
3956
3958
|
_warn_if_geo_arg_in_kwargs(
|
|
3957
3959
|
selected_geos=selected_geos,
|
|
@@ -4004,7 +4006,7 @@ class Analyzer:
|
|
|
4004
4006
|
)
|
|
4005
4007
|
inc_outcome_temp = self.incremental_outcome(
|
|
4006
4008
|
use_posterior=use_posterior,
|
|
4007
|
-
new_data=new_data,
|
|
4009
|
+
new_data=new_data.filter_fields(constants.PAID_DATA),
|
|
4008
4010
|
inverse_transform_outcome=True,
|
|
4009
4011
|
batch_size=batch_size,
|
|
4010
4012
|
use_kpi=use_kpi,
|
|
@@ -4035,22 +4037,13 @@ class Analyzer:
|
|
|
4035
4037
|
)
|
|
4036
4038
|
spend_einsum = tf.einsum("k,m->km", np.array(spend_multipliers), spend)
|
|
4037
4039
|
xr_coords = {
|
|
4038
|
-
constants.CHANNEL: (
|
|
4039
|
-
|
|
4040
|
-
|
|
4041
|
-
|
|
4042
|
-
|
|
4043
|
-
|
|
4044
|
-
|
|
4045
|
-
constants.MEAN,
|
|
4046
|
-
constants.CI_LO,
|
|
4047
|
-
constants.CI_HI,
|
|
4048
|
-
],
|
|
4049
|
-
),
|
|
4050
|
-
constants.SPEND_MULTIPLIER: (
|
|
4051
|
-
[constants.SPEND_MULTIPLIER],
|
|
4052
|
-
spend_multipliers,
|
|
4053
|
-
),
|
|
4040
|
+
constants.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
|
|
4041
|
+
constants.METRIC: [
|
|
4042
|
+
constants.MEAN,
|
|
4043
|
+
constants.CI_LO,
|
|
4044
|
+
constants.CI_HI,
|
|
4045
|
+
],
|
|
4046
|
+
constants.SPEND_MULTIPLIER: spend_multipliers,
|
|
4054
4047
|
}
|
|
4055
4048
|
xr_data_vars = {
|
|
4056
4049
|
constants.SPEND: (
|
|
@@ -4068,14 +4061,14 @@ class Analyzer:
|
|
|
4068
4061
|
def adstock_decay(
|
|
4069
4062
|
self, confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL
|
|
4070
4063
|
) -> pd.DataFrame:
|
|
4071
|
-
"""Calculates adstock decay for media and
|
|
4064
|
+
"""Calculates adstock decay for paid media, RF, and organic media channels.
|
|
4072
4065
|
|
|
4073
4066
|
Args:
|
|
4074
4067
|
confidence_level: Confidence level for prior and posterior credible
|
|
4075
4068
|
intervals, represented as a value between zero and one.
|
|
4076
4069
|
|
|
4077
4070
|
Returns:
|
|
4078
|
-
Pandas DataFrame containing the channel
|
|
4071
|
+
Pandas DataFrame containing the `channel`, `time_units`, `distribution`,
|
|
4079
4072
|
`ci_hi`, `ci_lo`, and `mean` for the Adstock function.
|
|
4080
4073
|
"""
|
|
4081
4074
|
if (
|
|
@@ -4100,63 +4093,75 @@ class Analyzer:
|
|
|
4100
4093
|
step_size = 1 / steps_per_time_period
|
|
4101
4094
|
l_range = np.arange(0, max_lag, step_size)
|
|
4102
4095
|
|
|
4103
|
-
rf_channel_values = (
|
|
4104
|
-
self._meridian.input_data.rf_channel.values
|
|
4105
|
-
if self._meridian.input_data.rf_channel is not None
|
|
4106
|
-
else []
|
|
4107
|
-
)
|
|
4108
|
-
|
|
4109
|
-
media_channel_values = (
|
|
4110
|
-
self._meridian.input_data.media_channel.values
|
|
4111
|
-
if self._meridian.input_data.media_channel is not None
|
|
4112
|
-
else []
|
|
4113
|
-
)
|
|
4114
|
-
|
|
4115
4096
|
xr_dims = [
|
|
4116
4097
|
constants.TIME_UNITS,
|
|
4117
4098
|
constants.CHANNEL,
|
|
4118
4099
|
constants.METRIC,
|
|
4119
4100
|
constants.DISTRIBUTION,
|
|
4120
4101
|
]
|
|
4121
|
-
|
|
4122
|
-
constants.TIME_UNITS:
|
|
4123
|
-
constants.
|
|
4124
|
-
|
|
4125
|
-
rf_channel_values,
|
|
4126
|
-
),
|
|
4127
|
-
constants.DISTRIBUTION: (
|
|
4128
|
-
[constants.DISTRIBUTION],
|
|
4129
|
-
[constants.PRIOR, constants.POSTERIOR],
|
|
4130
|
-
),
|
|
4131
|
-
constants.METRIC: (
|
|
4132
|
-
[constants.METRIC],
|
|
4133
|
-
[constants.MEAN, constants.CI_LO, constants.CI_HI],
|
|
4134
|
-
),
|
|
4102
|
+
base_xr_coords = {
|
|
4103
|
+
constants.TIME_UNITS: l_range,
|
|
4104
|
+
constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
|
|
4105
|
+
constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
|
|
4135
4106
|
}
|
|
4136
|
-
|
|
4107
|
+
final_df_list = []
|
|
4108
|
+
|
|
4109
|
+
if self._meridian.n_media_channels > 0:
|
|
4110
|
+
media_channel_values = (
|
|
4111
|
+
self._meridian.input_data.media_channel.values
|
|
4112
|
+
if self._meridian.input_data.media_channel is not None
|
|
4113
|
+
else []
|
|
4114
|
+
)
|
|
4115
|
+
media_xr_coords = base_xr_coords | {
|
|
4116
|
+
constants.CHANNEL: media_channel_values
|
|
4117
|
+
}
|
|
4118
|
+
adstock_df_m = self._get_adstock_dataframe(
|
|
4119
|
+
constants.MEDIA,
|
|
4120
|
+
l_range,
|
|
4121
|
+
xr_dims,
|
|
4122
|
+
media_xr_coords,
|
|
4123
|
+
confidence_level,
|
|
4124
|
+
)
|
|
4125
|
+
if not adstock_df_m.empty:
|
|
4126
|
+
final_df_list.append(adstock_df_m)
|
|
4137
4127
|
|
|
4138
4128
|
if self._meridian.n_rf_channels > 0:
|
|
4129
|
+
rf_channel_values = (
|
|
4130
|
+
self._meridian.input_data.rf_channel.values
|
|
4131
|
+
if self._meridian.input_data.rf_channel is not None
|
|
4132
|
+
else []
|
|
4133
|
+
)
|
|
4134
|
+
rf_xr_coords = base_xr_coords | {constants.CHANNEL: rf_channel_values}
|
|
4139
4135
|
adstock_df_rf = self._get_adstock_dataframe(
|
|
4140
4136
|
constants.REACH,
|
|
4141
4137
|
l_range,
|
|
4142
4138
|
xr_dims,
|
|
4143
|
-
|
|
4139
|
+
rf_xr_coords,
|
|
4144
4140
|
confidence_level,
|
|
4145
4141
|
)
|
|
4146
|
-
|
|
4147
|
-
|
|
4148
|
-
|
|
4149
|
-
|
|
4150
|
-
|
|
4142
|
+
if not adstock_df_rf.empty:
|
|
4143
|
+
final_df_list.append(adstock_df_rf)
|
|
4144
|
+
|
|
4145
|
+
if self._meridian.n_organic_media_channels > 0:
|
|
4146
|
+
organic_media_channel_values = (
|
|
4147
|
+
self._meridian.input_data.organic_media_channel.values
|
|
4148
|
+
if self._meridian.input_data.organic_media_channel is not None
|
|
4149
|
+
else []
|
|
4150
|
+
)
|
|
4151
|
+
organic_media_xr_coords = base_xr_coords | {
|
|
4152
|
+
constants.CHANNEL: organic_media_channel_values
|
|
4153
|
+
}
|
|
4154
|
+
adstock_df_om = self._get_adstock_dataframe(
|
|
4155
|
+
constants.ORGANIC_MEDIA,
|
|
4151
4156
|
l_range,
|
|
4152
4157
|
xr_dims,
|
|
4153
|
-
|
|
4158
|
+
organic_media_xr_coords,
|
|
4154
4159
|
confidence_level,
|
|
4155
4160
|
)
|
|
4156
|
-
|
|
4157
|
-
|
|
4158
|
-
)
|
|
4161
|
+
if not adstock_df_om.empty:
|
|
4162
|
+
final_df_list.append(adstock_df_om)
|
|
4159
4163
|
|
|
4164
|
+
final_df = pd.concat(final_df_list, ignore_index=True)
|
|
4160
4165
|
# Adding an extra column that indicates whether time_units is an integer
|
|
4161
4166
|
# for marking the discrete points on the plot.
|
|
4162
4167
|
final_df[constants.IS_INT_TIME_UNIT] = final_df[constants.TIME_UNITS].apply(
|
|
@@ -4172,14 +4177,14 @@ class Analyzer:
|
|
|
4172
4177
|
"""Computes the point-wise mean and credible intervals for the Hill curves.
|
|
4173
4178
|
|
|
4174
4179
|
Args:
|
|
4175
|
-
channel_type: Type of channel, either `media` or `
|
|
4180
|
+
channel_type: Type of channel, either `media`, `rf`, or `organic_media`.
|
|
4176
4181
|
confidence_level: Confidence level for `posterior` and `prior` credible
|
|
4177
4182
|
intervals, represented as a value between zero and one.
|
|
4178
4183
|
|
|
4179
4184
|
Returns:
|
|
4180
4185
|
A DataFrame with data needed to plot the Hill curves, with columns:
|
|
4181
4186
|
|
|
4182
|
-
* `channel`: `media` or `
|
|
4187
|
+
* `channel`: `media`, `rf`, or `organic_media` channel name.
|
|
4183
4188
|
* `media_units`: Media (for `media` channels) or average frequency (for
|
|
4184
4189
|
`rf` channels) units.
|
|
4185
4190
|
* `distribution`: Indication of `posterior` or `prior` draw.
|
|
@@ -4188,7 +4193,12 @@ class Analyzer:
|
|
|
4188
4193
|
* `ci_lo`: Lower bound of the credible interval of the value of the Hill
|
|
4189
4194
|
function.
|
|
4190
4195
|
* `mean`: Point-wise mean of the value of the Hill function per draw.
|
|
4191
|
-
* channel_type: Indication of a `media` or `
|
|
4196
|
+
* channel_type: Indication of a `media`, `rf`, or `organic_media`
|
|
4197
|
+
channel.
|
|
4198
|
+
|
|
4199
|
+
Raises:
|
|
4200
|
+
ValueError: If `channel_type` is not one of the recognized constants
|
|
4201
|
+
`media`, `rf`, or `organic_media`.
|
|
4192
4202
|
"""
|
|
4193
4203
|
if (
|
|
4194
4204
|
channel_type == constants.MEDIA
|
|
@@ -4196,31 +4206,46 @@ class Analyzer:
|
|
|
4196
4206
|
):
|
|
4197
4207
|
ec = constants.EC_M
|
|
4198
4208
|
slope = constants.SLOPE_M
|
|
4199
|
-
linspace = np.linspace(
|
|
4200
|
-
0,
|
|
4201
|
-
np.max(
|
|
4202
|
-
np.array(self._meridian.media_tensors.media_scaled), axis=(0, 1)
|
|
4203
|
-
),
|
|
4204
|
-
constants.HILL_NUM_STEPS,
|
|
4205
|
-
)
|
|
4206
4209
|
channels = self._meridian.input_data.media_channel.values
|
|
4210
|
+
transformer = self._meridian.media_tensors.media_transformer
|
|
4211
|
+
linspace_max_values = np.max(
|
|
4212
|
+
np.array(self._meridian.media_tensors.media_scaled), axis=(0, 1)
|
|
4213
|
+
)
|
|
4207
4214
|
elif (
|
|
4208
4215
|
channel_type == constants.RF
|
|
4209
4216
|
and self._meridian.input_data.rf_channel is not None
|
|
4210
4217
|
):
|
|
4211
4218
|
ec = constants.EC_RF
|
|
4212
4219
|
slope = constants.SLOPE_RF
|
|
4213
|
-
linspace = np.linspace(
|
|
4214
|
-
0,
|
|
4215
|
-
np.max(np.array(self._meridian.rf_tensors.frequency), axis=(0, 1)),
|
|
4216
|
-
constants.HILL_NUM_STEPS,
|
|
4217
|
-
)
|
|
4218
4220
|
channels = self._meridian.input_data.rf_channel.values
|
|
4221
|
+
transformer = None
|
|
4222
|
+
linspace_max_values = np.max(
|
|
4223
|
+
np.array(self._meridian.rf_tensors.frequency), axis=(0, 1)
|
|
4224
|
+
)
|
|
4225
|
+
elif (
|
|
4226
|
+
channel_type == constants.ORGANIC_MEDIA
|
|
4227
|
+
and self._meridian.input_data.organic_media_channel is not None
|
|
4228
|
+
):
|
|
4229
|
+
ec = constants.EC_OM
|
|
4230
|
+
slope = constants.SLOPE_OM
|
|
4231
|
+
channels = self._meridian.input_data.organic_media_channel.values
|
|
4232
|
+
transformer = (
|
|
4233
|
+
self._meridian.organic_media_tensors.organic_media_transformer
|
|
4234
|
+
)
|
|
4235
|
+
linspace_max_values = np.max(
|
|
4236
|
+
np.array(self._meridian.organic_media_tensors.organic_media_scaled),
|
|
4237
|
+
axis=(0, 1),
|
|
4238
|
+
)
|
|
4219
4239
|
else:
|
|
4220
4240
|
raise ValueError(
|
|
4221
|
-
f"Unsupported channel type: {channel_type} or the"
|
|
4222
|
-
"
|
|
4241
|
+
f"Unsupported channel type: {channel_type} or the requested type of"
|
|
4242
|
+
" channels (`media`, `rf`, or `organic_media`) are not present."
|
|
4223
4243
|
)
|
|
4244
|
+
linspace = np.linspace(
|
|
4245
|
+
0,
|
|
4246
|
+
linspace_max_values,
|
|
4247
|
+
constants.HILL_NUM_STEPS,
|
|
4248
|
+
)
|
|
4224
4249
|
linspace_filler = np.linspace(0, 1, constants.HILL_NUM_STEPS)
|
|
4225
4250
|
xr_dims = [
|
|
4226
4251
|
constants.MEDIA_UNITS,
|
|
@@ -4229,19 +4254,10 @@ class Analyzer:
|
|
|
4229
4254
|
constants.DISTRIBUTION,
|
|
4230
4255
|
]
|
|
4231
4256
|
xr_coords = {
|
|
4232
|
-
constants.MEDIA_UNITS:
|
|
4233
|
-
constants.CHANNEL: (
|
|
4234
|
-
|
|
4235
|
-
|
|
4236
|
-
),
|
|
4237
|
-
constants.DISTRIBUTION: (
|
|
4238
|
-
[constants.DISTRIBUTION],
|
|
4239
|
-
[constants.PRIOR, constants.POSTERIOR],
|
|
4240
|
-
),
|
|
4241
|
-
constants.METRIC: (
|
|
4242
|
-
[constants.METRIC],
|
|
4243
|
-
[constants.MEAN, constants.CI_LO, constants.CI_HI],
|
|
4244
|
-
),
|
|
4257
|
+
constants.MEDIA_UNITS: linspace_filler,
|
|
4258
|
+
constants.CHANNEL: list(channels),
|
|
4259
|
+
constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
|
|
4260
|
+
constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
|
|
4245
4261
|
}
|
|
4246
4262
|
# Expanding the linspace by one dimension since the HillTransformer requires
|
|
4247
4263
|
# 3-dimensional input as (geo, time, channel).
|
|
@@ -4285,13 +4301,10 @@ class Analyzer:
|
|
|
4285
4301
|
|
|
4286
4302
|
# Fill media_units or frequency x-axis with the correct range.
|
|
4287
4303
|
media_units_arr = []
|
|
4288
|
-
if
|
|
4289
|
-
|
|
4290
|
-
self._meridian.media_tensors.media, self._meridian.population
|
|
4291
|
-
)
|
|
4292
|
-
population_scaled_median_m = media_transformers.population_scaled_median_m
|
|
4304
|
+
if transformer is not None:
|
|
4305
|
+
population_scaled_median = transformer.population_scaled_median_m
|
|
4293
4306
|
x_range_full_shape = linspace * tf.transpose(
|
|
4294
|
-
|
|
4307
|
+
population_scaled_median[:, np.newaxis]
|
|
4295
4308
|
)
|
|
4296
4309
|
else:
|
|
4297
4310
|
x_range_full_shape = linspace
|
|
@@ -4312,8 +4325,68 @@ class Analyzer:
|
|
|
4312
4325
|
df[constants.MEDIA_UNITS] = media_units_arr
|
|
4313
4326
|
return df
|
|
4314
4327
|
|
|
4328
|
+
def _get_channel_hill_histogram_dataframe(
|
|
4329
|
+
self,
|
|
4330
|
+
channel_type: str,
|
|
4331
|
+
data_to_histogram: tf.Tensor,
|
|
4332
|
+
channel_names: Sequence[str],
|
|
4333
|
+
n_bins: int,
|
|
4334
|
+
) -> pd.DataFrame:
|
|
4335
|
+
"""Calculates hill histogram dataframe for a given channel type's values.
|
|
4336
|
+
|
|
4337
|
+
Args:
|
|
4338
|
+
channel_type: The type of channel (e.g., 'rf', 'media', 'organic_media').
|
|
4339
|
+
data_to_histogram: The 2D tensor (observations, channels). containing the
|
|
4340
|
+
data whose distribution needs to be histogrammed for each channel.
|
|
4341
|
+
channel_names: The names corresponding to the channels in
|
|
4342
|
+
data_to_histogram.
|
|
4343
|
+
n_bins: The number of bins for the histogram.
|
|
4344
|
+
|
|
4345
|
+
Returns:
|
|
4346
|
+
A Pandas DataFrame containing the calculated histogram data for all
|
|
4347
|
+
channels of the given type. Returns an empty DataFrame if no valid
|
|
4348
|
+
data is found for any channel.
|
|
4349
|
+
"""
|
|
4350
|
+
channels_data = {
|
|
4351
|
+
constants.CHANNEL: [],
|
|
4352
|
+
constants.CHANNEL_TYPE: [],
|
|
4353
|
+
constants.SCALED_COUNT_HISTOGRAM: [],
|
|
4354
|
+
constants.COUNT_HISTOGRAM: [],
|
|
4355
|
+
constants.START_INTERVAL_HISTOGRAM: [],
|
|
4356
|
+
constants.END_INTERVAL_HISTOGRAM: [],
|
|
4357
|
+
}
|
|
4358
|
+
|
|
4359
|
+
for i, channel_name in enumerate(channel_names):
|
|
4360
|
+
channel_data_np = data_to_histogram[:, i].numpy()
|
|
4361
|
+
channel_data_np = channel_data_np[~np.isnan(channel_data_np)]
|
|
4362
|
+
if channel_data_np.size == 0:
|
|
4363
|
+
continue
|
|
4364
|
+
|
|
4365
|
+
counts_per_bucket, buckets = np.histogram(
|
|
4366
|
+
channel_data_np, bins=n_bins, density=True
|
|
4367
|
+
)
|
|
4368
|
+
max_counts = (
|
|
4369
|
+
np.max(counts_per_bucket) if np.max(counts_per_bucket) > 0 else 1.0
|
|
4370
|
+
)
|
|
4371
|
+
|
|
4372
|
+
num_buckets = len(counts_per_bucket)
|
|
4373
|
+
channels_data[constants.CHANNEL].extend([channel_name] * num_buckets)
|
|
4374
|
+
channels_data[constants.CHANNEL_TYPE].extend([channel_type] * num_buckets)
|
|
4375
|
+
channels_data[constants.SCALED_COUNT_HISTOGRAM].extend(
|
|
4376
|
+
counts_per_bucket / max_counts
|
|
4377
|
+
)
|
|
4378
|
+
channels_data[constants.COUNT_HISTOGRAM].extend(counts_per_bucket)
|
|
4379
|
+
channels_data[constants.START_INTERVAL_HISTOGRAM].extend(buckets[:-1])
|
|
4380
|
+
channels_data[constants.END_INTERVAL_HISTOGRAM].extend(buckets[1:])
|
|
4381
|
+
|
|
4382
|
+
return pd.DataFrame(channels_data)
|
|
4383
|
+
|
|
4315
4384
|
def _get_hill_histogram_dataframe(self, n_bins: int) -> pd.DataFrame:
|
|
4316
|
-
"""
|
|
4385
|
+
"""Calculates histogram data for a given channel type's values.
|
|
4386
|
+
|
|
4387
|
+
Computes histogram data for the distribution of media units (for media or
|
|
4388
|
+
organic media channels) or frequency (for RF channels) across
|
|
4389
|
+
observations.
|
|
4317
4390
|
|
|
4318
4391
|
Args:
|
|
4319
4392
|
n_bins: Number of equal-width bins to include in the histogram for the
|
|
@@ -4339,73 +4412,64 @@ class Analyzer:
|
|
|
4339
4412
|
"""
|
|
4340
4413
|
n_geos = self._meridian.n_geos
|
|
4341
4414
|
n_media_times = self._meridian.n_media_times
|
|
4342
|
-
|
|
4343
|
-
|
|
4344
|
-
|
|
4345
|
-
(
|
|
4346
|
-
channels,
|
|
4347
|
-
scaled_count,
|
|
4348
|
-
channel_type_arr,
|
|
4349
|
-
start_interval_histogram,
|
|
4350
|
-
end_interval_histogram,
|
|
4351
|
-
count,
|
|
4352
|
-
) = ([], [], [], [], [], [])
|
|
4415
|
+
|
|
4416
|
+
df_list = []
|
|
4353
4417
|
|
|
4354
4418
|
# RF.
|
|
4355
4419
|
if self._meridian.input_data.rf_channel is not None:
|
|
4356
|
-
frequency =
|
|
4357
|
-
|
|
4358
|
-
|
|
4359
|
-
|
|
4360
|
-
|
|
4361
|
-
|
|
4362
|
-
|
|
4363
|
-
|
|
4364
|
-
|
|
4365
|
-
|
|
4420
|
+
frequency = self._meridian.rf_tensors.frequency
|
|
4421
|
+
if frequency is not None:
|
|
4422
|
+
reshaped_frequency = tf.reshape(
|
|
4423
|
+
frequency, (n_geos * n_media_times, self._meridian.n_rf_channels)
|
|
4424
|
+
)
|
|
4425
|
+
rf_hist_data = self._get_channel_hill_histogram_dataframe(
|
|
4426
|
+
channel_type=constants.RF,
|
|
4427
|
+
data_to_histogram=reshaped_frequency,
|
|
4428
|
+
channel_names=self._meridian.input_data.rf_channel.values,
|
|
4429
|
+
n_bins=n_bins,
|
|
4366
4430
|
)
|
|
4367
|
-
|
|
4368
|
-
channel_type_arr.extend([constants.RF] * len(counts_per_bucket))
|
|
4369
|
-
scaled_count.extend(counts_per_bucket / max(counts_per_bucket))
|
|
4370
|
-
count.extend(counts_per_bucket)
|
|
4371
|
-
start_interval_histogram.extend(buckets[:-1])
|
|
4372
|
-
end_interval_histogram.extend(buckets[1:])
|
|
4431
|
+
df_list.append(pd.DataFrame(rf_hist_data))
|
|
4373
4432
|
|
|
4374
4433
|
# Media.
|
|
4375
4434
|
if self._meridian.input_data.media_channel is not None:
|
|
4376
|
-
transformer =
|
|
4377
|
-
|
|
4378
|
-
|
|
4379
|
-
|
|
4380
|
-
|
|
4381
|
-
|
|
4382
|
-
|
|
4383
|
-
|
|
4384
|
-
|
|
4385
|
-
|
|
4435
|
+
transformer = self._meridian.media_tensors.media_transformer
|
|
4436
|
+
scaled = self._meridian.media_tensors.media_scaled
|
|
4437
|
+
if transformer is not None and scaled is not None:
|
|
4438
|
+
population_scaled_median = transformer.population_scaled_median_m
|
|
4439
|
+
scaled_media_units = scaled * population_scaled_median
|
|
4440
|
+
reshaped_scaled_media_units = tf.reshape(
|
|
4441
|
+
scaled_media_units,
|
|
4442
|
+
(n_geos * n_media_times, self._meridian.n_media_channels),
|
|
4443
|
+
)
|
|
4444
|
+
media_hist_data = self._get_channel_hill_histogram_dataframe(
|
|
4445
|
+
channel_type=constants.MEDIA,
|
|
4446
|
+
data_to_histogram=reshaped_scaled_media_units,
|
|
4447
|
+
channel_names=self._meridian.input_data.media_channel.values,
|
|
4448
|
+
n_bins=n_bins,
|
|
4449
|
+
)
|
|
4450
|
+
df_list.append(pd.DataFrame(media_hist_data))
|
|
4451
|
+
# Organic media.
|
|
4452
|
+
if self._meridian.input_data.organic_media_channel is not None:
|
|
4453
|
+
transformer_om = (
|
|
4454
|
+
self._meridian.organic_media_tensors.organic_media_transformer
|
|
4386
4455
|
)
|
|
4387
|
-
|
|
4388
|
-
|
|
4389
|
-
|
|
4390
|
-
|
|
4391
|
-
|
|
4392
|
-
|
|
4456
|
+
scaled_om = self._meridian.organic_media_tensors.organic_media_scaled
|
|
4457
|
+
if transformer_om is not None and scaled_om is not None:
|
|
4458
|
+
population_scaled_median_om = transformer_om.population_scaled_median_m
|
|
4459
|
+
scaled_organic_media_units = scaled_om * population_scaled_median_om
|
|
4460
|
+
reshaped_scaled_organic_media_units = tf.reshape(
|
|
4461
|
+
scaled_organic_media_units,
|
|
4462
|
+
(n_geos * n_media_times, self._meridian.n_organic_media_channels),
|
|
4393
4463
|
)
|
|
4394
|
-
|
|
4395
|
-
|
|
4396
|
-
|
|
4397
|
-
|
|
4398
|
-
|
|
4399
|
-
|
|
4400
|
-
|
|
4401
|
-
|
|
4402
|
-
|
|
4403
|
-
constants.CHANNEL_TYPE: channel_type_arr,
|
|
4404
|
-
constants.SCALED_COUNT_HISTOGRAM: scaled_count,
|
|
4405
|
-
constants.COUNT_HISTOGRAM: count,
|
|
4406
|
-
constants.START_INTERVAL_HISTOGRAM: start_interval_histogram,
|
|
4407
|
-
constants.END_INTERVAL_HISTOGRAM: end_interval_histogram,
|
|
4408
|
-
})
|
|
4464
|
+
organic_media_hist_data = self._get_channel_hill_histogram_dataframe(
|
|
4465
|
+
channel_type=constants.ORGANIC_MEDIA,
|
|
4466
|
+
data_to_histogram=reshaped_scaled_organic_media_units,
|
|
4467
|
+
channel_names=self._meridian.input_data.organic_media_channel.values,
|
|
4468
|
+
n_bins=n_bins,
|
|
4469
|
+
)
|
|
4470
|
+
df_list.append(pd.DataFrame(organic_media_hist_data))
|
|
4471
|
+
|
|
4472
|
+
return pd.concat(df_list, ignore_index=True)
|
|
4409
4473
|
|
|
4410
4474
|
def hill_curves(
|
|
4411
4475
|
self,
|
|
@@ -4453,17 +4517,16 @@ class Analyzer:
|
|
|
4453
4517
|
)
|
|
4454
4518
|
|
|
4455
4519
|
final_dfs = [pd.DataFrame()]
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4459
|
-
|
|
4460
|
-
|
|
4461
|
-
|
|
4462
|
-
|
|
4463
|
-
|
|
4464
|
-
|
|
4465
|
-
|
|
4466
|
-
final_dfs.append(hill_df_rf)
|
|
4520
|
+
for n_channels, channel_type in [
|
|
4521
|
+
(self._meridian.n_media_channels, constants.MEDIA),
|
|
4522
|
+
(self._meridian.n_rf_channels, constants.RF),
|
|
4523
|
+
(self._meridian.n_organic_media_channels, constants.ORGANIC_MEDIA),
|
|
4524
|
+
]:
|
|
4525
|
+
if n_channels > 0:
|
|
4526
|
+
hill_df = self._get_hill_curves_dataframe(
|
|
4527
|
+
channel_type, confidence_level
|
|
4528
|
+
)
|
|
4529
|
+
final_dfs.append(hill_df)
|
|
4467
4530
|
|
|
4468
4531
|
final_dfs.append(self._get_hill_histogram_dataframe(n_bins=n_bins))
|
|
4469
4532
|
return pd.concat(final_dfs)
|