google-meridian 1.1.5__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/METADATA +8 -2
- google_meridian-1.2.0.dist-info/RECORD +52 -0
- meridian/__init__.py +1 -0
- meridian/analysis/analyzer.py +526 -362
- meridian/analysis/optimizer.py +275 -267
- meridian/analysis/test_utils.py +96 -94
- meridian/analysis/visualizer.py +37 -49
- meridian/backend/__init__.py +514 -0
- meridian/backend/config.py +59 -0
- meridian/backend/test_utils.py +95 -0
- meridian/constants.py +59 -3
- meridian/data/input_data.py +94 -0
- meridian/data/test_utils.py +144 -12
- meridian/model/adstock_hill.py +279 -33
- meridian/model/eda/__init__.py +17 -0
- meridian/model/eda/eda_engine.py +306 -0
- meridian/model/knots.py +525 -2
- meridian/model/media.py +62 -54
- meridian/model/model.py +224 -97
- meridian/model/model_test_data.py +323 -157
- meridian/model/posterior_sampler.py +84 -77
- meridian/model/prior_distribution.py +538 -168
- meridian/model/prior_sampler.py +65 -65
- meridian/model/spec.py +23 -3
- meridian/model/transformers.py +53 -47
- meridian/version.py +1 -1
- google_meridian-1.1.5.dist-info/RECORD +0 -47
- {google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/WHEEL +0 -0
- {google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/top_level.txt +0 -0
meridian/analysis/analyzer.py
CHANGED
|
@@ -20,14 +20,13 @@ import numbers
|
|
|
20
20
|
from typing import Any, Optional
|
|
21
21
|
import warnings
|
|
22
22
|
|
|
23
|
+
from meridian import backend
|
|
23
24
|
from meridian import constants
|
|
24
25
|
from meridian.model import adstock_hill
|
|
25
26
|
from meridian.model import model
|
|
26
27
|
from meridian.model import transformers
|
|
27
28
|
import numpy as np
|
|
28
29
|
import pandas as pd
|
|
29
|
-
import tensorflow as tf
|
|
30
|
-
import tensorflow_probability as tfp
|
|
31
30
|
from typing_extensions import Self
|
|
32
31
|
import xarray as xr
|
|
33
32
|
|
|
@@ -35,6 +34,7 @@ __all__ = [
|
|
|
35
34
|
"Analyzer",
|
|
36
35
|
"DataTensors",
|
|
37
36
|
"DistributionTensors",
|
|
37
|
+
"get_central_tendency_and_ci",
|
|
38
38
|
]
|
|
39
39
|
|
|
40
40
|
|
|
@@ -53,7 +53,7 @@ def _validate_non_media_baseline_values_numbers(
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
# TODO: Refactor the related unit tests to be under DataTensors.
|
|
56
|
-
class DataTensors(
|
|
56
|
+
class DataTensors(backend.ExtensionType):
|
|
57
57
|
"""Container for data variable arguments of Analyzer methods.
|
|
58
58
|
|
|
59
59
|
Attributes:
|
|
@@ -88,86 +88,100 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
88
88
|
for time dimension `T`.
|
|
89
89
|
"""
|
|
90
90
|
|
|
91
|
-
media: Optional[
|
|
92
|
-
media_spend: Optional[
|
|
93
|
-
reach: Optional[
|
|
94
|
-
frequency: Optional[
|
|
95
|
-
rf_impressions: Optional[
|
|
96
|
-
rf_spend: Optional[
|
|
97
|
-
organic_media: Optional[
|
|
98
|
-
organic_reach: Optional[
|
|
99
|
-
organic_frequency: Optional[
|
|
100
|
-
non_media_treatments: Optional[
|
|
101
|
-
controls: Optional[
|
|
102
|
-
revenue_per_kpi: Optional[
|
|
103
|
-
time: Optional[
|
|
91
|
+
media: Optional[backend.Tensor]
|
|
92
|
+
media_spend: Optional[backend.Tensor]
|
|
93
|
+
reach: Optional[backend.Tensor]
|
|
94
|
+
frequency: Optional[backend.Tensor]
|
|
95
|
+
rf_impressions: Optional[backend.Tensor]
|
|
96
|
+
rf_spend: Optional[backend.Tensor]
|
|
97
|
+
organic_media: Optional[backend.Tensor]
|
|
98
|
+
organic_reach: Optional[backend.Tensor]
|
|
99
|
+
organic_frequency: Optional[backend.Tensor]
|
|
100
|
+
non_media_treatments: Optional[backend.Tensor]
|
|
101
|
+
controls: Optional[backend.Tensor]
|
|
102
|
+
revenue_per_kpi: Optional[backend.Tensor]
|
|
103
|
+
time: Optional[backend.Tensor]
|
|
104
104
|
|
|
105
105
|
def __init__(
|
|
106
106
|
self,
|
|
107
|
-
media: Optional[
|
|
108
|
-
media_spend: Optional[
|
|
109
|
-
reach: Optional[
|
|
110
|
-
frequency: Optional[
|
|
111
|
-
rf_impressions: Optional[
|
|
112
|
-
rf_spend: Optional[
|
|
113
|
-
organic_media: Optional[
|
|
114
|
-
organic_reach: Optional[
|
|
115
|
-
organic_frequency: Optional[
|
|
116
|
-
non_media_treatments: Optional[
|
|
117
|
-
controls: Optional[
|
|
118
|
-
revenue_per_kpi: Optional[
|
|
119
|
-
time: Optional[Sequence[str] |
|
|
107
|
+
media: Optional[backend.Tensor] = None,
|
|
108
|
+
media_spend: Optional[backend.Tensor] = None,
|
|
109
|
+
reach: Optional[backend.Tensor] = None,
|
|
110
|
+
frequency: Optional[backend.Tensor] = None,
|
|
111
|
+
rf_impressions: Optional[backend.Tensor] = None,
|
|
112
|
+
rf_spend: Optional[backend.Tensor] = None,
|
|
113
|
+
organic_media: Optional[backend.Tensor] = None,
|
|
114
|
+
organic_reach: Optional[backend.Tensor] = None,
|
|
115
|
+
organic_frequency: Optional[backend.Tensor] = None,
|
|
116
|
+
non_media_treatments: Optional[backend.Tensor] = None,
|
|
117
|
+
controls: Optional[backend.Tensor] = None,
|
|
118
|
+
revenue_per_kpi: Optional[backend.Tensor] = None,
|
|
119
|
+
time: Optional[Sequence[str] | backend.Tensor] = None,
|
|
120
120
|
):
|
|
121
|
-
self.media =
|
|
121
|
+
self.media = (
|
|
122
|
+
backend.cast(media, backend.float32) if media is not None else None
|
|
123
|
+
)
|
|
122
124
|
self.media_spend = (
|
|
123
|
-
|
|
125
|
+
backend.cast(media_spend, backend.float32)
|
|
126
|
+
if media_spend is not None
|
|
127
|
+
else None
|
|
128
|
+
)
|
|
129
|
+
self.reach = (
|
|
130
|
+
backend.cast(reach, backend.float32) if reach is not None else None
|
|
124
131
|
)
|
|
125
|
-
self.reach = tf.cast(reach, tf.float32) if reach is not None else None
|
|
126
132
|
self.frequency = (
|
|
127
|
-
|
|
133
|
+
backend.cast(frequency, backend.float32)
|
|
134
|
+
if frequency is not None
|
|
135
|
+
else None
|
|
128
136
|
)
|
|
129
137
|
self.rf_impressions = (
|
|
130
|
-
|
|
138
|
+
backend.cast(rf_impressions, backend.float32)
|
|
131
139
|
if rf_impressions is not None
|
|
132
140
|
else None
|
|
133
141
|
)
|
|
134
142
|
self.rf_spend = (
|
|
135
|
-
|
|
143
|
+
backend.cast(rf_spend, backend.float32)
|
|
144
|
+
if rf_spend is not None
|
|
145
|
+
else None
|
|
136
146
|
)
|
|
137
147
|
self.organic_media = (
|
|
138
|
-
|
|
148
|
+
backend.cast(organic_media, backend.float32)
|
|
139
149
|
if organic_media is not None
|
|
140
150
|
else None
|
|
141
151
|
)
|
|
142
152
|
self.organic_reach = (
|
|
143
|
-
|
|
153
|
+
backend.cast(organic_reach, backend.float32)
|
|
144
154
|
if organic_reach is not None
|
|
145
155
|
else None
|
|
146
156
|
)
|
|
147
157
|
self.organic_frequency = (
|
|
148
|
-
|
|
158
|
+
backend.cast(organic_frequency, backend.float32)
|
|
149
159
|
if organic_frequency is not None
|
|
150
160
|
else None
|
|
151
161
|
)
|
|
152
162
|
self.non_media_treatments = (
|
|
153
|
-
|
|
163
|
+
backend.cast(non_media_treatments, backend.float32)
|
|
154
164
|
if non_media_treatments is not None
|
|
155
165
|
else None
|
|
156
166
|
)
|
|
157
167
|
self.controls = (
|
|
158
|
-
|
|
168
|
+
backend.cast(controls, backend.float32)
|
|
169
|
+
if controls is not None
|
|
170
|
+
else None
|
|
159
171
|
)
|
|
160
172
|
self.revenue_per_kpi = (
|
|
161
|
-
|
|
173
|
+
backend.cast(revenue_per_kpi, backend.float32)
|
|
162
174
|
if revenue_per_kpi is not None
|
|
163
175
|
else None
|
|
164
176
|
)
|
|
165
|
-
self.time =
|
|
177
|
+
self.time = (
|
|
178
|
+
backend.to_tensor(time, dtype="string") if time is not None else None
|
|
179
|
+
)
|
|
166
180
|
|
|
167
181
|
def __validate__(self):
|
|
168
182
|
self._validate_n_dims()
|
|
169
183
|
|
|
170
|
-
def total_spend(self) ->
|
|
184
|
+
def total_spend(self) -> backend.Tensor | None:
|
|
171
185
|
"""Returns the total spend tensor.
|
|
172
186
|
|
|
173
187
|
Returns:
|
|
@@ -180,7 +194,9 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
180
194
|
spend_tensors.append(self.media_spend)
|
|
181
195
|
if self.rf_spend is not None:
|
|
182
196
|
spend_tensors.append(self.rf_spend)
|
|
183
|
-
return
|
|
197
|
+
return (
|
|
198
|
+
backend.concatenate(spend_tensors, axis=-1) if spend_tensors else None
|
|
199
|
+
)
|
|
184
200
|
|
|
185
201
|
def get_modified_times(self, meridian: model.Meridian) -> int | None:
|
|
186
202
|
"""Returns `n_times` of any tensor where `n_times` has been modified.
|
|
@@ -472,8 +488,8 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
472
488
|
elif var_name == constants.REVENUE_PER_KPI:
|
|
473
489
|
old_tensor = meridian.revenue_per_kpi
|
|
474
490
|
elif var_name == constants.TIME:
|
|
475
|
-
old_tensor =
|
|
476
|
-
meridian.input_data.time.values.tolist(), dtype=
|
|
491
|
+
old_tensor = backend.to_tensor(
|
|
492
|
+
meridian.input_data.time.values.tolist(), dtype="string"
|
|
477
493
|
)
|
|
478
494
|
else:
|
|
479
495
|
continue
|
|
@@ -484,63 +500,38 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
484
500
|
return DataTensors(**output)
|
|
485
501
|
|
|
486
502
|
|
|
487
|
-
class DistributionTensors(
|
|
503
|
+
class DistributionTensors(backend.ExtensionType):
|
|
488
504
|
"""Container for parameters distributions arguments of Analyzer methods."""
|
|
489
505
|
|
|
490
|
-
alpha_m: Optional[
|
|
491
|
-
alpha_rf: Optional[
|
|
492
|
-
alpha_om: Optional[
|
|
493
|
-
alpha_orf: Optional[
|
|
494
|
-
ec_m: Optional[
|
|
495
|
-
ec_rf: Optional[
|
|
496
|
-
ec_om: Optional[
|
|
497
|
-
ec_orf: Optional[
|
|
498
|
-
slope_m: Optional[
|
|
499
|
-
slope_rf: Optional[
|
|
500
|
-
slope_om: Optional[
|
|
501
|
-
slope_orf: Optional[
|
|
502
|
-
beta_gm: Optional[
|
|
503
|
-
beta_grf: Optional[
|
|
504
|
-
beta_gom: Optional[
|
|
505
|
-
beta_gorf: Optional[
|
|
506
|
-
mu_t: Optional[
|
|
507
|
-
tau_g: Optional[
|
|
508
|
-
gamma_gc: Optional[
|
|
509
|
-
gamma_gn: Optional[
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
def _transformed_new_or_scaled(
|
|
513
|
-
new_variable: tf.Tensor | None,
|
|
514
|
-
transformer: transformers.TensorTransformer | None,
|
|
515
|
-
scaled_variable: tf.Tensor | None,
|
|
516
|
-
) -> tf.Tensor | None:
|
|
517
|
-
"""Returns the transformed new variable or the scaled variable.
|
|
518
|
-
|
|
519
|
-
If the `new_variable` is present, returns
|
|
520
|
-
`transformer.forward(new_variable)`. Otherwise, returns the
|
|
521
|
-
`scaled_variable`.
|
|
522
|
-
|
|
523
|
-
Args:
|
|
524
|
-
new_variable: Optional tensor to be transformed..
|
|
525
|
-
transformer: Optional DataTransformer.
|
|
526
|
-
scaled_variable: Tensor to be returned if `new_variable` is None.
|
|
527
|
-
|
|
528
|
-
Returns:
|
|
529
|
-
The transformed new variable (if the new variable is present) or the
|
|
530
|
-
original scaled variable from the input data otherwise.
|
|
531
|
-
"""
|
|
532
|
-
if new_variable is None or transformer is None:
|
|
533
|
-
return scaled_variable
|
|
534
|
-
return transformer.forward(new_variable)
|
|
506
|
+
alpha_m: Optional[backend.Tensor] = None
|
|
507
|
+
alpha_rf: Optional[backend.Tensor] = None
|
|
508
|
+
alpha_om: Optional[backend.Tensor] = None
|
|
509
|
+
alpha_orf: Optional[backend.Tensor] = None
|
|
510
|
+
ec_m: Optional[backend.Tensor] = None
|
|
511
|
+
ec_rf: Optional[backend.Tensor] = None
|
|
512
|
+
ec_om: Optional[backend.Tensor] = None
|
|
513
|
+
ec_orf: Optional[backend.Tensor] = None
|
|
514
|
+
slope_m: Optional[backend.Tensor] = None
|
|
515
|
+
slope_rf: Optional[backend.Tensor] = None
|
|
516
|
+
slope_om: Optional[backend.Tensor] = None
|
|
517
|
+
slope_orf: Optional[backend.Tensor] = None
|
|
518
|
+
beta_gm: Optional[backend.Tensor] = None
|
|
519
|
+
beta_grf: Optional[backend.Tensor] = None
|
|
520
|
+
beta_gom: Optional[backend.Tensor] = None
|
|
521
|
+
beta_gorf: Optional[backend.Tensor] = None
|
|
522
|
+
mu_t: Optional[backend.Tensor] = None
|
|
523
|
+
tau_g: Optional[backend.Tensor] = None
|
|
524
|
+
gamma_gc: Optional[backend.Tensor] = None
|
|
525
|
+
gamma_gn: Optional[backend.Tensor] = None
|
|
535
526
|
|
|
536
527
|
|
|
537
528
|
def get_central_tendency_and_ci(
|
|
538
|
-
data: np.ndarray |
|
|
529
|
+
data: np.ndarray | backend.Tensor,
|
|
539
530
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
540
531
|
axis: tuple[int, ...] = (0, 1),
|
|
541
532
|
include_median=False,
|
|
542
533
|
) -> np.ndarray:
|
|
543
|
-
"""Calculates
|
|
534
|
+
"""Calculates mean and credible intervals for the given data.
|
|
544
535
|
|
|
545
536
|
Args:
|
|
546
537
|
data: Data for the metric.
|
|
@@ -551,8 +542,8 @@ def get_central_tendency_and_ci(
|
|
|
551
542
|
the median in the output Dataset (default: False).
|
|
552
543
|
|
|
553
544
|
Returns:
|
|
554
|
-
A numpy array or
|
|
555
|
-
|
|
545
|
+
A numpy array or backend.Tensor containing the mean and credible intervals
|
|
546
|
+
for the given data. Optionally, it also includes the median.
|
|
556
547
|
"""
|
|
557
548
|
mean = np.mean(data, axis=axis, keepdims=False)
|
|
558
549
|
ci_lo = np.quantile(data, (1 - confidence_level) / 2, axis=axis)
|
|
@@ -565,6 +556,31 @@ def get_central_tendency_and_ci(
|
|
|
565
556
|
return np.stack([mean, ci_lo, ci_hi], axis=-1)
|
|
566
557
|
|
|
567
558
|
|
|
559
|
+
def _transformed_new_or_scaled(
|
|
560
|
+
new_variable: backend.Tensor | None,
|
|
561
|
+
transformer: transformers.TensorTransformer | None,
|
|
562
|
+
scaled_variable: backend.Tensor | None,
|
|
563
|
+
) -> backend.Tensor | None:
|
|
564
|
+
"""Returns the transformed new variable or the scaled variable.
|
|
565
|
+
|
|
566
|
+
If the `new_variable` is present, returns
|
|
567
|
+
`transformer.forward(new_variable)`. Otherwise, returns the
|
|
568
|
+
`scaled_variable`.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
new_variable: Optional tensor to be transformed..
|
|
572
|
+
transformer: Optional DataTransformer.
|
|
573
|
+
scaled_variable: Tensor to be returned if `new_variable` is None.
|
|
574
|
+
|
|
575
|
+
Returns:
|
|
576
|
+
The transformed new variable (if the new variable is present) or the
|
|
577
|
+
original scaled variable from the input data otherwise.
|
|
578
|
+
"""
|
|
579
|
+
if new_variable is None or transformer is None:
|
|
580
|
+
return scaled_variable
|
|
581
|
+
return transformer.forward(new_variable)
|
|
582
|
+
|
|
583
|
+
|
|
568
584
|
def _calc_rsquared(expected, actual):
|
|
569
585
|
"""Calculates r-squared between actual and expected outcome."""
|
|
570
586
|
return 1 - np.nanmean((expected - actual) ** 2) / np.nanvar(actual)
|
|
@@ -594,7 +610,7 @@ def _warn_if_geo_arg_in_kwargs(**kwargs):
|
|
|
594
610
|
)
|
|
595
611
|
|
|
596
612
|
|
|
597
|
-
def _check_n_dims(tensor:
|
|
613
|
+
def _check_n_dims(tensor: backend.Tensor, name: str, n_dims: int):
|
|
598
614
|
"""Raises an error if the tensor has the wrong number of dimensions."""
|
|
599
615
|
if tensor.ndim != n_dims:
|
|
600
616
|
raise ValueError(
|
|
@@ -753,8 +769,8 @@ def _scale_tensors_by_multiplier(
|
|
|
753
769
|
|
|
754
770
|
|
|
755
771
|
def _central_tendency_and_ci_by_prior_and_posterior(
|
|
756
|
-
prior:
|
|
757
|
-
posterior:
|
|
772
|
+
prior: backend.Tensor,
|
|
773
|
+
posterior: backend.Tensor,
|
|
758
774
|
metric_name: str,
|
|
759
775
|
xr_dims: Sequence[str],
|
|
760
776
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
@@ -799,16 +815,16 @@ class Analyzer:
|
|
|
799
815
|
def __init__(self, meridian: model.Meridian):
|
|
800
816
|
self._meridian = meridian
|
|
801
817
|
# Make the meridian object ready for methods in this analyzer that create
|
|
802
|
-
#
|
|
803
|
-
# states mutation before those graphs execute.
|
|
818
|
+
# backend.function computation graphs: it should be frozen for no more
|
|
819
|
+
# internal states mutation before those graphs execute.
|
|
804
820
|
self._meridian.populate_cached_properties()
|
|
805
821
|
|
|
806
|
-
@
|
|
822
|
+
@backend.function(jit_compile=True)
|
|
807
823
|
def _get_kpi_means(
|
|
808
824
|
self,
|
|
809
825
|
data_tensors: DataTensors,
|
|
810
826
|
dist_tensors: DistributionTensors,
|
|
811
|
-
) ->
|
|
827
|
+
) -> backend.Tensor:
|
|
812
828
|
"""Computes batched KPI means.
|
|
813
829
|
|
|
814
830
|
Note that the output array has the same number of time periods as the media
|
|
@@ -827,7 +843,7 @@ class Analyzer:
|
|
|
827
843
|
Returns:
|
|
828
844
|
Tensor representing computed kpi means.
|
|
829
845
|
"""
|
|
830
|
-
tau_gt =
|
|
846
|
+
tau_gt = backend.expand_dims(dist_tensors.tau_g, -1) + backend.expand_dims(
|
|
831
847
|
dist_tensors.mu_t, -2
|
|
832
848
|
)
|
|
833
849
|
combined_media_transformed, combined_beta = (
|
|
@@ -837,17 +853,17 @@ class Analyzer:
|
|
|
837
853
|
)
|
|
838
854
|
)
|
|
839
855
|
|
|
840
|
-
result = tau_gt +
|
|
856
|
+
result = tau_gt + backend.einsum(
|
|
841
857
|
"...gtm,...gm->...gt", combined_media_transformed, combined_beta
|
|
842
858
|
)
|
|
843
859
|
if self._meridian.controls is not None:
|
|
844
|
-
result +=
|
|
860
|
+
result += backend.einsum(
|
|
845
861
|
"...gtc,...gc->...gt",
|
|
846
862
|
data_tensors.controls,
|
|
847
863
|
dist_tensors.gamma_gc,
|
|
848
864
|
)
|
|
849
865
|
if data_tensors.non_media_treatments is not None:
|
|
850
|
-
result +=
|
|
866
|
+
result += backend.einsum(
|
|
851
867
|
"...gtm,...gm->...gt",
|
|
852
868
|
data_tensors.non_media_treatments,
|
|
853
869
|
dist_tensors.gamma_gn,
|
|
@@ -902,8 +918,8 @@ class Analyzer:
|
|
|
902
918
|
"""Computes decayed effect means and CIs for media or RF channels.
|
|
903
919
|
|
|
904
920
|
Args:
|
|
905
|
-
channel_type: Specifies `media`, `
|
|
906
|
-
prior and posterior decayed effects.
|
|
921
|
+
channel_type: Specifies `media`, `rf`, `organic_media`, or `organic_rf`
|
|
922
|
+
for computing prior and posterior decayed effects.
|
|
907
923
|
l_range: The range of time across which the adstock effect is computed.
|
|
908
924
|
xr_dims: A list of dimensions for the output dataset.
|
|
909
925
|
xr_coords: A dictionary with the coordinates for the output dataset.
|
|
@@ -914,42 +930,66 @@ class Analyzer:
|
|
|
914
930
|
Pandas DataFrame containing the channel, time_units, distribution, ci_hi,
|
|
915
931
|
ci_lo, and mean decayed effects for either media or RF channel types.
|
|
916
932
|
"""
|
|
917
|
-
|
|
933
|
+
window_size = min(
|
|
934
|
+
self._meridian.model_spec.max_lag + 1, self._meridian.n_media_times
|
|
935
|
+
)
|
|
936
|
+
if channel_type == constants.MEDIA:
|
|
918
937
|
prior = self._meridian.inference_data.prior.alpha_m.values[0]
|
|
919
938
|
posterior = np.reshape(
|
|
920
939
|
self._meridian.inference_data.posterior.alpha_m.values,
|
|
921
940
|
(-1, self._meridian.n_media_channels),
|
|
922
941
|
)
|
|
923
|
-
|
|
942
|
+
decay_functions = self._meridian.adstock_decay_spec.media
|
|
943
|
+
elif channel_type == constants.RF:
|
|
924
944
|
prior = self._meridian.inference_data.prior.alpha_rf.values[0]
|
|
925
945
|
posterior = np.reshape(
|
|
926
946
|
self._meridian.inference_data.posterior.alpha_rf.values,
|
|
927
947
|
(-1, self._meridian.n_rf_channels),
|
|
928
948
|
)
|
|
929
|
-
|
|
949
|
+
decay_functions = self._meridian.adstock_decay_spec.rf
|
|
950
|
+
elif channel_type == constants.ORGANIC_MEDIA:
|
|
930
951
|
prior = self._meridian.inference_data.prior.alpha_om.values[0]
|
|
931
952
|
posterior = np.reshape(
|
|
932
953
|
self._meridian.inference_data.posterior.alpha_om.values,
|
|
933
954
|
(-1, self._meridian.n_organic_media_channels),
|
|
934
955
|
)
|
|
956
|
+
decay_functions = self._meridian.adstock_decay_spec.organic_media
|
|
957
|
+
elif channel_type == constants.ORGANIC_RF:
|
|
958
|
+
prior = self._meridian.inference_data.prior.alpha_orf.values[0]
|
|
959
|
+
posterior = np.reshape(
|
|
960
|
+
self._meridian.inference_data.posterior.alpha_orf.values,
|
|
961
|
+
(-1, self._meridian.n_organic_rf_channels),
|
|
962
|
+
)
|
|
963
|
+
decay_functions = self._meridian.adstock_decay_spec.organic_rf
|
|
935
964
|
else:
|
|
936
965
|
raise ValueError(
|
|
937
966
|
f"Unsupported channel type for adstock decay: '{channel_type}'. "
|
|
938
967
|
)
|
|
939
968
|
|
|
940
|
-
decayed_effect_prior = (
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
969
|
+
decayed_effect_prior = adstock_hill.compute_decay_weights(
|
|
970
|
+
alpha=backend.to_tensor(
|
|
971
|
+
prior[backend.newaxis, ...], dtype=backend.float32
|
|
972
|
+
),
|
|
973
|
+
l_range=backend.to_tensor(l_range, dtype=backend.float32),
|
|
974
|
+
window_size=window_size,
|
|
975
|
+
decay_functions=decay_functions,
|
|
976
|
+
normalize=False,
|
|
977
|
+
)
|
|
978
|
+
decayed_effect_posterior = adstock_hill.compute_decay_weights(
|
|
979
|
+
alpha=backend.to_tensor(
|
|
980
|
+
posterior[backend.newaxis, ...], dtype=backend.float32
|
|
981
|
+
),
|
|
982
|
+
l_range=backend.to_tensor(l_range, dtype=backend.float32),
|
|
983
|
+
window_size=window_size,
|
|
984
|
+
decay_functions=decay_functions,
|
|
985
|
+
normalize=False,
|
|
946
986
|
)
|
|
947
987
|
|
|
948
|
-
decayed_effect_prior_transpose =
|
|
949
|
-
decayed_effect_prior, perm=[
|
|
988
|
+
decayed_effect_prior_transpose = backend.transpose(
|
|
989
|
+
decayed_effect_prior, perm=[0, 1, 3, 2]
|
|
950
990
|
)
|
|
951
|
-
decayed_effect_posterior_transpose =
|
|
952
|
-
decayed_effect_posterior, perm=[
|
|
991
|
+
decayed_effect_posterior_transpose = backend.transpose(
|
|
992
|
+
decayed_effect_posterior, perm=[0, 1, 3, 2]
|
|
953
993
|
)
|
|
954
994
|
adstock_dataset = _central_tendency_and_ci_by_prior_and_posterior(
|
|
955
995
|
decayed_effect_prior_transpose,
|
|
@@ -1157,7 +1197,7 @@ class Analyzer:
|
|
|
1157
1197
|
data_tensors: DataTensors,
|
|
1158
1198
|
dist_tensors: DistributionTensors,
|
|
1159
1199
|
n_times_output: int | None = None,
|
|
1160
|
-
) -> tuple[
|
|
1200
|
+
) -> tuple[backend.Tensor | None, backend.Tensor | None]:
|
|
1161
1201
|
"""Function for transforming media using adstock and hill functions.
|
|
1162
1202
|
|
|
1163
1203
|
This transforms the media tensor using the adstock and hill functions, in
|
|
@@ -1185,6 +1225,7 @@ class Analyzer:
|
|
|
1185
1225
|
alpha=dist_tensors.alpha_m,
|
|
1186
1226
|
ec=dist_tensors.ec_m,
|
|
1187
1227
|
slope=dist_tensors.slope_m,
|
|
1228
|
+
decay_functions=self._meridian.adstock_decay_spec.media,
|
|
1188
1229
|
n_times_output=n_times_output,
|
|
1189
1230
|
)
|
|
1190
1231
|
)
|
|
@@ -1198,6 +1239,7 @@ class Analyzer:
|
|
|
1198
1239
|
alpha=dist_tensors.alpha_rf,
|
|
1199
1240
|
ec=dist_tensors.ec_rf,
|
|
1200
1241
|
slope=dist_tensors.slope_rf,
|
|
1242
|
+
decay_functions=self._meridian.adstock_decay_spec.rf,
|
|
1201
1243
|
n_times_output=n_times_output,
|
|
1202
1244
|
)
|
|
1203
1245
|
)
|
|
@@ -1209,6 +1251,7 @@ class Analyzer:
|
|
|
1209
1251
|
alpha=dist_tensors.alpha_om,
|
|
1210
1252
|
ec=dist_tensors.ec_om,
|
|
1211
1253
|
slope=dist_tensors.slope_om,
|
|
1254
|
+
decay_functions=self._meridian.adstock_decay_spec.organic_media,
|
|
1212
1255
|
n_times_output=n_times_output,
|
|
1213
1256
|
)
|
|
1214
1257
|
)
|
|
@@ -1221,25 +1264,26 @@ class Analyzer:
|
|
|
1221
1264
|
alpha=dist_tensors.alpha_orf,
|
|
1222
1265
|
ec=dist_tensors.ec_orf,
|
|
1223
1266
|
slope=dist_tensors.slope_orf,
|
|
1267
|
+
decay_functions=self._meridian.adstock_decay_spec.organic_rf,
|
|
1224
1268
|
n_times_output=n_times_output,
|
|
1225
1269
|
)
|
|
1226
1270
|
)
|
|
1227
1271
|
combined_betas.append(dist_tensors.beta_gorf)
|
|
1228
1272
|
|
|
1229
|
-
combined_media_transformed =
|
|
1230
|
-
combined_beta =
|
|
1273
|
+
combined_media_transformed = backend.concatenate(combined_medias, axis=-1)
|
|
1274
|
+
combined_beta = backend.concatenate(combined_betas, axis=-1)
|
|
1231
1275
|
return combined_media_transformed, combined_beta
|
|
1232
1276
|
|
|
1233
1277
|
def filter_and_aggregate_geos_and_times(
|
|
1234
1278
|
self,
|
|
1235
|
-
tensor:
|
|
1279
|
+
tensor: backend.Tensor,
|
|
1236
1280
|
selected_geos: Sequence[str] | None = None,
|
|
1237
1281
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1238
1282
|
aggregate_geos: bool = True,
|
|
1239
1283
|
aggregate_times: bool = True,
|
|
1240
1284
|
flexible_time_dim: bool = False,
|
|
1241
1285
|
has_media_dim: bool = True,
|
|
1242
|
-
) ->
|
|
1286
|
+
) -> backend.Tensor:
|
|
1243
1287
|
"""Filters and/or aggregates geo and time dimensions of a tensor.
|
|
1244
1288
|
|
|
1245
1289
|
Args:
|
|
@@ -1299,12 +1343,12 @@ class Analyzer:
|
|
|
1299
1343
|
c + 1 for c in allowed_n_channels
|
|
1300
1344
|
]
|
|
1301
1345
|
expected_shapes_w_media = [
|
|
1302
|
-
|
|
1346
|
+
backend.TensorShape(shape)
|
|
1303
1347
|
for shape in itertools.product(
|
|
1304
1348
|
[mmm.n_geos], [n_times], allowed_channel_dim
|
|
1305
1349
|
)
|
|
1306
1350
|
]
|
|
1307
|
-
expected_shape_wo_media =
|
|
1351
|
+
expected_shape_wo_media = backend.TensorShape([mmm.n_geos, n_times])
|
|
1308
1352
|
if not flexible_time_dim:
|
|
1309
1353
|
if tensor.shape[-3:] in expected_shapes_w_media:
|
|
1310
1354
|
has_media_dim = True
|
|
@@ -1338,7 +1382,7 @@ class Analyzer:
|
|
|
1338
1382
|
"meridian.InputData."
|
|
1339
1383
|
)
|
|
1340
1384
|
geo_mask = [x in selected_geos for x in mmm.input_data.geo]
|
|
1341
|
-
tensor =
|
|
1385
|
+
tensor = backend.boolean_mask(tensor, geo_mask, axis=geo_dim)
|
|
1342
1386
|
|
|
1343
1387
|
if selected_times is not None:
|
|
1344
1388
|
_validate_selected_times(
|
|
@@ -1350,9 +1394,9 @@ class Analyzer:
|
|
|
1350
1394
|
)
|
|
1351
1395
|
if _is_str_list(selected_times):
|
|
1352
1396
|
time_mask = [x in selected_times for x in mmm.input_data.time]
|
|
1353
|
-
tensor =
|
|
1397
|
+
tensor = backend.boolean_mask(tensor, time_mask, axis=time_dim)
|
|
1354
1398
|
elif _is_bool_list(selected_times):
|
|
1355
|
-
tensor =
|
|
1399
|
+
tensor = backend.boolean_mask(tensor, selected_times, axis=time_dim)
|
|
1356
1400
|
|
|
1357
1401
|
tensor_dims = "...gt" + "m" * has_media_dim
|
|
1358
1402
|
output_dims = (
|
|
@@ -1360,7 +1404,7 @@ class Analyzer:
|
|
|
1360
1404
|
+ "t" * (not aggregate_times)
|
|
1361
1405
|
+ "m" * has_media_dim
|
|
1362
1406
|
)
|
|
1363
|
-
return
|
|
1407
|
+
return backend.einsum(f"{tensor_dims}->...{output_dims}", tensor)
|
|
1364
1408
|
|
|
1365
1409
|
def expected_outcome(
|
|
1366
1410
|
self,
|
|
@@ -1373,7 +1417,7 @@ class Analyzer:
|
|
|
1373
1417
|
inverse_transform_outcome: bool = True,
|
|
1374
1418
|
use_kpi: bool = False,
|
|
1375
1419
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
1376
|
-
) ->
|
|
1420
|
+
) -> backend.Tensor:
|
|
1377
1421
|
"""Calculates either prior or posterior expected outcome.
|
|
1378
1422
|
|
|
1379
1423
|
This calculates `E(Outcome|Media, RF, Organic media, Organic RF, Non-media
|
|
@@ -1492,7 +1536,7 @@ class Analyzer:
|
|
|
1492
1536
|
|
|
1493
1537
|
n_draws = params.draw.size
|
|
1494
1538
|
n_chains = params.chain.size
|
|
1495
|
-
outcome_means =
|
|
1539
|
+
outcome_means = backend.zeros(
|
|
1496
1540
|
(n_chains, 0, self._meridian.n_geos, self._meridian.n_times)
|
|
1497
1541
|
)
|
|
1498
1542
|
batch_starting_indices = np.arange(n_draws, step=batch_size)
|
|
@@ -1508,7 +1552,7 @@ class Analyzer:
|
|
|
1508
1552
|
for start_index in batch_starting_indices:
|
|
1509
1553
|
stop_index = np.min([n_draws, start_index + batch_size])
|
|
1510
1554
|
batch_dists = {
|
|
1511
|
-
k:
|
|
1555
|
+
k: backend.to_tensor(params[k][:, start_index:stop_index, ...])
|
|
1512
1556
|
for k in param_list
|
|
1513
1557
|
}
|
|
1514
1558
|
dist_tensors = DistributionTensors(**batch_dists)
|
|
@@ -1519,7 +1563,9 @@ class Analyzer:
|
|
|
1519
1563
|
dist_tensors=dist_tensors,
|
|
1520
1564
|
)
|
|
1521
1565
|
)
|
|
1522
|
-
outcome_means =
|
|
1566
|
+
outcome_means = backend.concatenate(
|
|
1567
|
+
[outcome_means, *outcome_means_temps], axis=1
|
|
1568
|
+
)
|
|
1523
1569
|
if inverse_transform_outcome:
|
|
1524
1570
|
outcome_means = self._meridian.kpi_transformer.inverse(outcome_means)
|
|
1525
1571
|
if not use_kpi:
|
|
@@ -1562,7 +1608,7 @@ class Analyzer:
|
|
|
1562
1608
|
data_tensors: DataTensors,
|
|
1563
1609
|
dist_tensors: DistributionTensors,
|
|
1564
1610
|
non_media_treatments_baseline_normalized: Sequence[float] | None = None,
|
|
1565
|
-
) ->
|
|
1611
|
+
) -> backend.Tensor:
|
|
1566
1612
|
"""Computes incremental KPI distribution.
|
|
1567
1613
|
|
|
1568
1614
|
Args:
|
|
@@ -1611,28 +1657,28 @@ class Analyzer:
|
|
|
1611
1657
|
n_times_output=n_times_output,
|
|
1612
1658
|
)
|
|
1613
1659
|
)
|
|
1614
|
-
combined_media_kpi =
|
|
1660
|
+
combined_media_kpi = backend.einsum(
|
|
1615
1661
|
"...gtm,...gm->...gtm",
|
|
1616
1662
|
combined_media_transformed,
|
|
1617
1663
|
combined_beta,
|
|
1618
1664
|
)
|
|
1619
1665
|
if data_tensors.non_media_treatments is not None:
|
|
1620
|
-
non_media_kpi =
|
|
1666
|
+
non_media_kpi = backend.einsum(
|
|
1621
1667
|
"gtn,...gn->...gtn",
|
|
1622
1668
|
data_tensors.non_media_treatments
|
|
1623
1669
|
- non_media_treatments_baseline_normalized,
|
|
1624
1670
|
dist_tensors.gamma_gn,
|
|
1625
1671
|
)
|
|
1626
|
-
return
|
|
1672
|
+
return backend.concatenate([combined_media_kpi, non_media_kpi], axis=-1)
|
|
1627
1673
|
else:
|
|
1628
1674
|
return combined_media_kpi
|
|
1629
1675
|
|
|
1630
1676
|
def _inverse_outcome(
|
|
1631
1677
|
self,
|
|
1632
|
-
modeled_incremental_outcome:
|
|
1678
|
+
modeled_incremental_outcome: backend.Tensor,
|
|
1633
1679
|
use_kpi: bool,
|
|
1634
|
-
revenue_per_kpi:
|
|
1635
|
-
) ->
|
|
1680
|
+
revenue_per_kpi: backend.Tensor | None,
|
|
1681
|
+
) -> backend.Tensor:
|
|
1636
1682
|
"""Inverses incremental outcome (revenue or KPI).
|
|
1637
1683
|
|
|
1638
1684
|
This method assumes that additive changes on the model kpi scale
|
|
@@ -1656,16 +1702,16 @@ class Analyzer:
|
|
|
1656
1702
|
if revenue_per_kpi is None:
|
|
1657
1703
|
revenue_per_kpi = self._meridian.revenue_per_kpi
|
|
1658
1704
|
t1 = self._meridian.kpi_transformer.inverse(
|
|
1659
|
-
|
|
1705
|
+
backend.einsum("...m->m...", modeled_incremental_outcome)
|
|
1660
1706
|
)
|
|
1661
|
-
t2 = self._meridian.kpi_transformer.inverse(
|
|
1662
|
-
kpi =
|
|
1707
|
+
t2 = self._meridian.kpi_transformer.inverse(backend.zeros_like(t1))
|
|
1708
|
+
kpi = backend.einsum("m...->...m", t1 - t2)
|
|
1663
1709
|
|
|
1664
1710
|
if use_kpi:
|
|
1665
1711
|
return kpi
|
|
1666
|
-
return
|
|
1712
|
+
return backend.einsum("gt,...gtm->...gtm", revenue_per_kpi, kpi)
|
|
1667
1713
|
|
|
1668
|
-
@
|
|
1714
|
+
@backend.function(jit_compile=True)
|
|
1669
1715
|
def _incremental_outcome_impl(
|
|
1670
1716
|
self,
|
|
1671
1717
|
data_tensors: DataTensors,
|
|
@@ -1677,7 +1723,7 @@ class Analyzer:
|
|
|
1677
1723
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1678
1724
|
aggregate_geos: bool = True,
|
|
1679
1725
|
aggregate_times: bool = True,
|
|
1680
|
-
) ->
|
|
1726
|
+
) -> backend.Tensor:
|
|
1681
1727
|
"""Computes incremental outcome (revenue or KPI) on a batch of data.
|
|
1682
1728
|
|
|
1683
1729
|
Args:
|
|
@@ -1722,9 +1768,11 @@ class Analyzer:
|
|
|
1722
1768
|
selected_geos: Contains a subset of geos to include. By default, all geos
|
|
1723
1769
|
are included.
|
|
1724
1770
|
selected_times: An optional string list containing a subset of
|
|
1725
|
-
`
|
|
1726
|
-
number of time periods in `
|
|
1727
|
-
|
|
1771
|
+
`input_data.time` to include or a boolean list with length equal to the
|
|
1772
|
+
number of time periods in `data_tensors` if time is modified in
|
|
1773
|
+
`data_tensors`, or `input_data.n_times` otherwise. If time in
|
|
1774
|
+
`data_tensors` is modified, then only the boolean list can be used as
|
|
1775
|
+
`selected_times`. By default, all time periods are included.
|
|
1728
1776
|
aggregate_geos: If True, then incremental outcome is summed over all
|
|
1729
1777
|
regions.
|
|
1730
1778
|
aggregate_times: If True, then incremental outcome is summed over all time
|
|
@@ -1767,6 +1815,7 @@ class Analyzer:
|
|
|
1767
1815
|
has_media_dim=True,
|
|
1768
1816
|
)
|
|
1769
1817
|
|
|
1818
|
+
# TODO: b/407847021 - Add support for `new_data.time`.
|
|
1770
1819
|
def incremental_outcome(
|
|
1771
1820
|
self,
|
|
1772
1821
|
use_posterior: bool = True,
|
|
@@ -1784,7 +1833,7 @@ class Analyzer:
|
|
|
1784
1833
|
by_reach: bool = True,
|
|
1785
1834
|
include_non_paid_channels: bool = True,
|
|
1786
1835
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
1787
|
-
) ->
|
|
1836
|
+
) -> backend.Tensor:
|
|
1788
1837
|
"""Calculates either the posterior or prior incremental outcome.
|
|
1789
1838
|
|
|
1790
1839
|
This calculates the media outcome of each media channel for each posterior
|
|
@@ -1869,26 +1918,27 @@ class Analyzer:
|
|
|
1869
1918
|
default, all geos are included.
|
|
1870
1919
|
selected_times: Optional list containing either a subset of dates to
|
|
1871
1920
|
include or booleans with length equal to the number of time periods in
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
`
|
|
1876
|
-
|
|
1877
|
-
|
|
1921
|
+
`new_data` if time is modified in `new_data`, or `input_data.n_times`
|
|
1922
|
+
otherwise. The incremental outcome corresponds to incremental KPI
|
|
1923
|
+
generated during the `selected_times` arg by media executed during the
|
|
1924
|
+
`media_selected_times` arg. Note that if `use_kpi=False`, then
|
|
1925
|
+
`selected_times` can only include the time periods that have
|
|
1926
|
+
`revenue_per_kpi` input data. By default, all time periods are included
|
|
1927
|
+
where `revenue_per_kpi` data is available.
|
|
1878
1928
|
media_selected_times: Optional list containing either a subset of dates to
|
|
1879
1929
|
include or booleans with length equal to the number of time periods in
|
|
1880
|
-
|
|
1881
|
-
`media_selected_times` can select any subset
|
|
1882
|
-
`new_data`. If `new_data` is not provided,
|
|
1883
|
-
selects from `InputData.time`. The incremental
|
|
1884
|
-
incremental KPI generated during the
|
|
1885
|
-
variables executed during the
|
|
1886
|
-
channel, the incremental outcome is
|
|
1887
|
-
expected KPI when treatment variables
|
|
1888
|
-
`scaling_factor1` and `scaling_factor0` during
|
|
1889
|
-
periods. By default, the difference is between
|
|
1890
|
-
historical execution levels, or as provided in
|
|
1891
|
-
execution. Defaults to include all time periods.
|
|
1930
|
+
KPI data or number of time periods in the `new_data` args, if provided.
|
|
1931
|
+
If `new_data` is provided, `media_selected_times` can select any subset
|
|
1932
|
+
of time periods in `new_data`. If `new_data` is not provided,
|
|
1933
|
+
`media_selected_times` selects from `InputData.time`. The incremental
|
|
1934
|
+
outcome corresponds to incremental KPI generated during the
|
|
1935
|
+
`selected_times` arg by treatment variables executed during the
|
|
1936
|
+
`media_selected_times` arg. For each channel, the incremental outcome is
|
|
1937
|
+
defined as the difference between expected KPI when treatment variables
|
|
1938
|
+
execution is scaled by `scaling_factor1` and `scaling_factor0` during
|
|
1939
|
+
these specified time periods. By default, the difference is between
|
|
1940
|
+
treatment variables at historical execution levels, or as provided in
|
|
1941
|
+
`new_data`, versus zero execution. Defaults to include all time periods.
|
|
1892
1942
|
aggregate_geos: Boolean. If `True`, then incremental outcome is summed
|
|
1893
1943
|
over all regions.
|
|
1894
1944
|
aggregate_times: Boolean. If `True`, then incremental outcome is summed
|
|
@@ -2018,11 +2068,11 @@ class Analyzer:
|
|
|
2018
2068
|
non_media_treatments_baseline_scaled,
|
|
2019
2069
|
apply_population_scaling=False,
|
|
2020
2070
|
)
|
|
2021
|
-
non_media_treatments0 =
|
|
2022
|
-
|
|
2023
|
-
non_media_treatments_baseline_normalized, dtype=
|
|
2024
|
-
)[
|
|
2025
|
-
|
|
2071
|
+
non_media_treatments0 = backend.broadcast_to(
|
|
2072
|
+
backend.to_tensor(
|
|
2073
|
+
non_media_treatments_baseline_normalized, dtype=backend.float32
|
|
2074
|
+
)[backend.newaxis, backend.newaxis, :],
|
|
2075
|
+
data_tensors.non_media_treatments.shape, # pytype: disable=attribute-error
|
|
2026
2076
|
)
|
|
2027
2077
|
else:
|
|
2028
2078
|
non_media_treatments_baseline_normalized = None
|
|
@@ -2088,7 +2138,7 @@ class Analyzer:
|
|
|
2088
2138
|
for i, start_index in enumerate(batch_starting_indices):
|
|
2089
2139
|
stop_index = np.min([n_draws, start_index + batch_size])
|
|
2090
2140
|
batch_dists = {
|
|
2091
|
-
k:
|
|
2141
|
+
k: backend.to_tensor(params[k][:, start_index:stop_index, ...])
|
|
2092
2142
|
for k in param_list
|
|
2093
2143
|
}
|
|
2094
2144
|
dist_tensors = DistributionTensors(**batch_dists)
|
|
@@ -2106,12 +2156,12 @@ class Analyzer:
|
|
|
2106
2156
|
**dim_kwargs,
|
|
2107
2157
|
**incremental_outcome_kwargs,
|
|
2108
2158
|
)
|
|
2109
|
-
return
|
|
2159
|
+
return backend.concatenate(incremental_outcome_temps, axis=1)
|
|
2110
2160
|
|
|
2111
2161
|
def _validate_geo_and_time_granularity(
|
|
2112
2162
|
self,
|
|
2113
2163
|
selected_geos: Sequence[str] | None = None,
|
|
2114
|
-
selected_times: Sequence[str] | None = None,
|
|
2164
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2115
2165
|
aggregate_geos: bool = True,
|
|
2116
2166
|
):
|
|
2117
2167
|
"""Validates the geo and time granularity arguments for ROI analysis.
|
|
@@ -2119,8 +2169,9 @@ class Analyzer:
|
|
|
2119
2169
|
Args:
|
|
2120
2170
|
selected_geos: Optional. Contains a subset of geos to include. By default,
|
|
2121
2171
|
all geos are included.
|
|
2122
|
-
selected_times: Optional. Contains a subset of times to include
|
|
2123
|
-
default, all time periods
|
|
2172
|
+
selected_times: Optional. Contains a subset of times to include or
|
|
2173
|
+
booleans with length `input_data.n_times`. By default, all time periods
|
|
2174
|
+
are included.
|
|
2124
2175
|
aggregate_geos: If `True`, then expected revenue is summed over all
|
|
2125
2176
|
regions.
|
|
2126
2177
|
|
|
@@ -2180,7 +2231,7 @@ class Analyzer:
|
|
|
2180
2231
|
by_reach: bool = True,
|
|
2181
2232
|
use_kpi: bool = False,
|
|
2182
2233
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2183
|
-
) ->
|
|
2234
|
+
) -> backend.Tensor:
|
|
2184
2235
|
"""Calculates the marginal ROI prior or posterior distribution.
|
|
2185
2236
|
|
|
2186
2237
|
The marginal ROI (mROI) numerator is the change in expected outcome (`kpi`
|
|
@@ -2290,7 +2341,7 @@ class Analyzer:
|
|
|
2290
2341
|
"dimension."
|
|
2291
2342
|
)
|
|
2292
2343
|
denominator = spend_inc
|
|
2293
|
-
return
|
|
2344
|
+
return backend.divide_no_nan(numerator, denominator)
|
|
2294
2345
|
|
|
2295
2346
|
def roi(
|
|
2296
2347
|
self,
|
|
@@ -2301,7 +2352,7 @@ class Analyzer:
|
|
|
2301
2352
|
aggregate_geos: bool = True,
|
|
2302
2353
|
use_kpi: bool = False,
|
|
2303
2354
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2304
|
-
) ->
|
|
2355
|
+
) -> backend.Tensor:
|
|
2305
2356
|
"""Calculates ROI prior or posterior distribution for each media channel.
|
|
2306
2357
|
|
|
2307
2358
|
The ROI numerator is the change in expected outcome (`kpi` or `kpi *
|
|
@@ -2406,7 +2457,7 @@ class Analyzer:
|
|
|
2406
2457
|
"dimension."
|
|
2407
2458
|
)
|
|
2408
2459
|
denominator = spend
|
|
2409
|
-
return
|
|
2460
|
+
return backend.divide_no_nan(incremental_outcome, denominator)
|
|
2410
2461
|
|
|
2411
2462
|
def cpik(
|
|
2412
2463
|
self,
|
|
@@ -2416,7 +2467,7 @@ class Analyzer:
|
|
|
2416
2467
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2417
2468
|
aggregate_geos: bool = True,
|
|
2418
2469
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2419
|
-
) ->
|
|
2470
|
+
) -> backend.Tensor:
|
|
2420
2471
|
"""Calculates the cost per incremental KPI distribution for each channel.
|
|
2421
2472
|
|
|
2422
2473
|
The CPIK numerator is the total spend on the channel. The CPIK denominator
|
|
@@ -2481,11 +2532,11 @@ class Analyzer:
|
|
|
2481
2532
|
aggregate_geos=aggregate_geos,
|
|
2482
2533
|
batch_size=batch_size,
|
|
2483
2534
|
)
|
|
2484
|
-
return
|
|
2535
|
+
return backend.divide_no_nan(1.0, roi)
|
|
2485
2536
|
|
|
2486
2537
|
def _mean_and_ci_by_eval_set(
|
|
2487
2538
|
self,
|
|
2488
|
-
draws:
|
|
2539
|
+
draws: backend.Tensor,
|
|
2489
2540
|
split_by_holdout: bool,
|
|
2490
2541
|
aggregate_geos: bool = True,
|
|
2491
2542
|
aggregate_times: bool = True,
|
|
@@ -2656,7 +2707,7 @@ class Analyzer:
|
|
|
2656
2707
|
self,
|
|
2657
2708
|
non_media_baseline_values: Sequence[float] | None = None,
|
|
2658
2709
|
**expected_outcome_kwargs,
|
|
2659
|
-
) ->
|
|
2710
|
+
) -> backend.Tensor:
|
|
2660
2711
|
"""Calculates either the posterior or prior expected outcome of baseline.
|
|
2661
2712
|
|
|
2662
2713
|
This is a wrapper for expected_outcome() that automatically sets the
|
|
@@ -2673,8 +2724,8 @@ class Analyzer:
|
|
|
2673
2724
|
|
|
2674
2725
|
Args:
|
|
2675
2726
|
non_media_baseline_values: Optional list of shape
|
|
2676
|
-
`(n_non_media_channels,)`. Each element is a float
|
|
2677
|
-
|
|
2727
|
+
`(n_non_media_channels,)`. Each element is a float denoting a fixed
|
|
2728
|
+
value that will be used as the baseline for the given channel. It is
|
|
2678
2729
|
expected that they are scaled by population for the channels where
|
|
2679
2730
|
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2680
2731
|
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
@@ -2690,43 +2741,47 @@ class Analyzer:
|
|
|
2690
2741
|
dropped if `aggregate_geos=True` or `aggregate_time=True`, respectively.
|
|
2691
2742
|
"""
|
|
2692
2743
|
new_media = (
|
|
2693
|
-
|
|
2744
|
+
backend.zeros_like(self._meridian.media_tensors.media)
|
|
2694
2745
|
if self._meridian.media_tensors.media is not None
|
|
2695
2746
|
else None
|
|
2696
2747
|
)
|
|
2697
2748
|
# Frequency is not needed because the reach is zero.
|
|
2698
2749
|
new_reach = (
|
|
2699
|
-
|
|
2750
|
+
backend.zeros_like(self._meridian.rf_tensors.reach)
|
|
2700
2751
|
if self._meridian.rf_tensors.reach is not None
|
|
2701
2752
|
else None
|
|
2702
2753
|
)
|
|
2703
2754
|
new_organic_media = (
|
|
2704
|
-
|
|
2755
|
+
backend.zeros_like(self._meridian.organic_media_tensors.organic_media)
|
|
2705
2756
|
if self._meridian.organic_media_tensors.organic_media is not None
|
|
2706
2757
|
else None
|
|
2707
2758
|
)
|
|
2708
2759
|
new_organic_reach = (
|
|
2709
|
-
|
|
2760
|
+
backend.zeros_like(self._meridian.organic_rf_tensors.organic_reach)
|
|
2710
2761
|
if self._meridian.organic_rf_tensors.organic_reach is not None
|
|
2711
2762
|
else None
|
|
2712
2763
|
)
|
|
2713
2764
|
if self._meridian.non_media_treatments is not None:
|
|
2714
2765
|
if self._meridian.model_spec.non_media_population_scaling_id is not None:
|
|
2715
|
-
scaling_factors =
|
|
2766
|
+
scaling_factors = backend.where(
|
|
2716
2767
|
self._meridian.model_spec.non_media_population_scaling_id,
|
|
2717
|
-
self._meridian.population[:,
|
|
2718
|
-
|
|
2768
|
+
self._meridian.population[:, backend.newaxis, backend.newaxis],
|
|
2769
|
+
backend.ones_like(self._meridian.population)[
|
|
2770
|
+
:, backend.newaxis, backend.newaxis
|
|
2771
|
+
],
|
|
2719
2772
|
)
|
|
2720
2773
|
else:
|
|
2721
|
-
scaling_factors =
|
|
2722
|
-
:,
|
|
2774
|
+
scaling_factors = backend.ones_like(self._meridian.population)[
|
|
2775
|
+
:, backend.newaxis, backend.newaxis
|
|
2723
2776
|
]
|
|
2724
2777
|
|
|
2725
2778
|
baseline = self._meridian.compute_non_media_treatments_baseline(
|
|
2726
2779
|
non_media_baseline_values=non_media_baseline_values,
|
|
2727
2780
|
)
|
|
2728
|
-
new_non_media_treatments_population_scaled =
|
|
2729
|
-
|
|
2781
|
+
new_non_media_treatments_population_scaled = backend.broadcast_to(
|
|
2782
|
+
backend.to_tensor(baseline, dtype=backend.float32)[
|
|
2783
|
+
backend.newaxis, backend.newaxis, :
|
|
2784
|
+
],
|
|
2730
2785
|
self._meridian.non_media_treatments.shape,
|
|
2731
2786
|
)
|
|
2732
2787
|
new_non_media_treatments = (
|
|
@@ -2754,7 +2809,7 @@ class Analyzer:
|
|
|
2754
2809
|
include_non_paid_channels: bool = True,
|
|
2755
2810
|
non_media_baseline_values: Sequence[float] | None = None,
|
|
2756
2811
|
**kwargs,
|
|
2757
|
-
) ->
|
|
2812
|
+
) -> backend.Tensor:
|
|
2758
2813
|
"""Aggregates the incremental outcome of the media channels.
|
|
2759
2814
|
|
|
2760
2815
|
Args:
|
|
@@ -2806,11 +2861,11 @@ class Analyzer:
|
|
|
2806
2861
|
non_media_baseline_values=non_media_baseline_values,
|
|
2807
2862
|
**kwargs,
|
|
2808
2863
|
)
|
|
2809
|
-
incremental_outcome_total =
|
|
2864
|
+
incremental_outcome_total = backend.reduce_sum(
|
|
2810
2865
|
incremental_outcome_m, axis=-1, keepdims=True
|
|
2811
2866
|
)
|
|
2812
2867
|
|
|
2813
|
-
return
|
|
2868
|
+
return backend.concatenate(
|
|
2814
2869
|
[incremental_outcome_m, incremental_outcome_total],
|
|
2815
2870
|
axis=-1,
|
|
2816
2871
|
)
|
|
@@ -2940,10 +2995,10 @@ class Analyzer:
|
|
|
2940
2995
|
include_non_paid_channels=include_non_paid_channels,
|
|
2941
2996
|
**dim_kwargs,
|
|
2942
2997
|
)
|
|
2943
|
-
impressions_with_total =
|
|
2998
|
+
impressions_with_total = backend.concatenate(
|
|
2944
2999
|
[
|
|
2945
3000
|
aggregated_impressions,
|
|
2946
|
-
|
|
3001
|
+
backend.reduce_sum(aggregated_impressions, -1, keepdims=True),
|
|
2947
3002
|
],
|
|
2948
3003
|
axis=-1,
|
|
2949
3004
|
)
|
|
@@ -3127,12 +3182,15 @@ class Analyzer:
|
|
|
3127
3182
|
spend_list.append(new_spend_tensors.rf_spend)
|
|
3128
3183
|
# TODO Add support for 1-dimensional spend.
|
|
3129
3184
|
aggregated_spend = self.filter_and_aggregate_geos_and_times(
|
|
3130
|
-
tensor=
|
|
3185
|
+
tensor=backend.concatenate(spend_list, axis=-1),
|
|
3131
3186
|
flexible_time_dim=True,
|
|
3132
3187
|
**dim_kwargs,
|
|
3133
3188
|
)
|
|
3134
|
-
spend_with_total =
|
|
3135
|
-
[
|
|
3189
|
+
spend_with_total = backend.concatenate(
|
|
3190
|
+
[
|
|
3191
|
+
aggregated_spend,
|
|
3192
|
+
backend.reduce_sum(aggregated_spend, -1, keepdims=True),
|
|
3193
|
+
],
|
|
3136
3194
|
axis=-1,
|
|
3137
3195
|
)
|
|
3138
3196
|
spend_data = self._compute_spend_data_aggregate(
|
|
@@ -3220,7 +3278,7 @@ class Analyzer:
|
|
|
3220
3278
|
aggregate_times: bool = True,
|
|
3221
3279
|
optimal_frequency: Sequence[float] | None = None,
|
|
3222
3280
|
include_non_paid_channels: bool = True,
|
|
3223
|
-
) ->
|
|
3281
|
+
) -> backend.Tensor:
|
|
3224
3282
|
"""Computes aggregated impressions values in the data across all channels.
|
|
3225
3283
|
|
|
3226
3284
|
Args:
|
|
@@ -3277,7 +3335,9 @@ class Analyzer:
|
|
|
3277
3335
|
if optimal_frequency is None:
|
|
3278
3336
|
new_frequency = data_tensors.frequency
|
|
3279
3337
|
else:
|
|
3280
|
-
new_frequency =
|
|
3338
|
+
new_frequency = (
|
|
3339
|
+
backend.ones_like(data_tensors.frequency) * optimal_frequency
|
|
3340
|
+
)
|
|
3281
3341
|
impressions_list.append(
|
|
3282
3342
|
data_tensors.reach[:, -n_times:, :] * new_frequency[:, -n_times:, :]
|
|
3283
3343
|
)
|
|
@@ -3290,7 +3350,8 @@ class Analyzer:
|
|
|
3290
3350
|
new_organic_frequency = data_tensors.organic_frequency
|
|
3291
3351
|
else:
|
|
3292
3352
|
new_organic_frequency = (
|
|
3293
|
-
|
|
3353
|
+
backend.ones_like(data_tensors.organic_frequency)
|
|
3354
|
+
* optimal_frequency
|
|
3294
3355
|
)
|
|
3295
3356
|
impressions_list.append(
|
|
3296
3357
|
data_tensors.organic_reach[:, -n_times:, :]
|
|
@@ -3300,7 +3361,7 @@ class Analyzer:
|
|
|
3300
3361
|
impressions_list.append(data_tensors.non_media_treatments)
|
|
3301
3362
|
|
|
3302
3363
|
return self.filter_and_aggregate_geos_and_times(
|
|
3303
|
-
tensor=
|
|
3364
|
+
tensor=backend.concatenate(impressions_list, axis=-1),
|
|
3304
3365
|
selected_geos=selected_geos,
|
|
3305
3366
|
selected_times=selected_times,
|
|
3306
3367
|
aggregate_geos=aggregate_geos,
|
|
@@ -3402,7 +3463,7 @@ class Analyzer:
|
|
|
3402
3463
|
use_posterior=True, use_kpi=use_kpi, **outcome_kwargs
|
|
3403
3464
|
)
|
|
3404
3465
|
|
|
3405
|
-
baseline_expected_outcome_prior =
|
|
3466
|
+
baseline_expected_outcome_prior = backend.expand_dims(
|
|
3406
3467
|
self._calculate_baseline_expected_outcome(
|
|
3407
3468
|
use_posterior=False,
|
|
3408
3469
|
use_kpi=use_kpi,
|
|
@@ -3411,7 +3472,7 @@ class Analyzer:
|
|
|
3411
3472
|
),
|
|
3412
3473
|
axis=-1,
|
|
3413
3474
|
)
|
|
3414
|
-
baseline_expected_outcome_posterior =
|
|
3475
|
+
baseline_expected_outcome_posterior = backend.expand_dims(
|
|
3415
3476
|
self._calculate_baseline_expected_outcome(
|
|
3416
3477
|
use_posterior=True,
|
|
3417
3478
|
use_kpi=use_kpi,
|
|
@@ -3453,8 +3514,8 @@ class Analyzer:
|
|
|
3453
3514
|
freq_grid: Sequence[float] | None = None,
|
|
3454
3515
|
use_posterior: bool = True,
|
|
3455
3516
|
use_kpi: bool = False,
|
|
3456
|
-
selected_geos: Sequence[str
|
|
3457
|
-
selected_times: Sequence[str |
|
|
3517
|
+
selected_geos: Sequence[str] | None = None,
|
|
3518
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
3458
3519
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
3459
3520
|
) -> xr.Dataset:
|
|
3460
3521
|
"""Calculates the optimal frequency that maximizes posterior mean ROI.
|
|
@@ -3502,8 +3563,8 @@ class Analyzer:
|
|
|
3502
3563
|
default, all geos are included.
|
|
3503
3564
|
selected_times: Optional list containing either a subset of dates to
|
|
3504
3565
|
include or booleans with length equal to the number of time periods in
|
|
3505
|
-
|
|
3506
|
-
included.
|
|
3566
|
+
`new_data` if time is modified in `new_data`, or `input_data.n_times`
|
|
3567
|
+
otherwise. By default, all time periods are included.
|
|
3507
3568
|
confidence_level: Confidence level for prior and posterior credible
|
|
3508
3569
|
intervals, represented as a value between zero and one.
|
|
3509
3570
|
|
|
@@ -3563,10 +3624,10 @@ class Analyzer:
|
|
|
3563
3624
|
n_times = (
|
|
3564
3625
|
filled_data.get_modified_times(self._meridian) or self._meridian.n_times
|
|
3565
3626
|
)
|
|
3566
|
-
dummy_media =
|
|
3627
|
+
dummy_media = backend.ones(
|
|
3567
3628
|
(self._meridian.n_geos, n_media_times, self._meridian.n_media_channels)
|
|
3568
3629
|
)
|
|
3569
|
-
dummy_media_spend =
|
|
3630
|
+
dummy_media_spend = backend.ones(
|
|
3570
3631
|
(self._meridian.n_geos, n_times, self._meridian.n_media_channels)
|
|
3571
3632
|
)
|
|
3572
3633
|
|
|
@@ -3582,7 +3643,7 @@ class Analyzer:
|
|
|
3582
3643
|
metric_grid = np.zeros((len(freq_grid), self._meridian.n_rf_channels, 4))
|
|
3583
3644
|
|
|
3584
3645
|
for i, freq in enumerate(freq_grid):
|
|
3585
|
-
new_frequency =
|
|
3646
|
+
new_frequency = backend.ones_like(filled_data.rf_impressions) * freq
|
|
3586
3647
|
new_reach = filled_data.rf_impressions / new_frequency
|
|
3587
3648
|
new_roi_data = DataTensors(
|
|
3588
3649
|
reach=new_reach,
|
|
@@ -3612,9 +3673,9 @@ class Analyzer:
|
|
|
3612
3673
|
)
|
|
3613
3674
|
|
|
3614
3675
|
optimal_frequency = [freq_grid[i] for i in optimal_freq_idx]
|
|
3615
|
-
optimal_frequency_tensor =
|
|
3616
|
-
|
|
3617
|
-
|
|
3676
|
+
optimal_frequency_tensor = backend.to_tensor(
|
|
3677
|
+
backend.ones_like(filled_data.rf_impressions) * optimal_frequency,
|
|
3678
|
+
backend.float32,
|
|
3618
3679
|
)
|
|
3619
3680
|
optimal_reach = filled_data.rf_impressions / optimal_frequency_tensor
|
|
3620
3681
|
|
|
@@ -3778,10 +3839,12 @@ class Analyzer:
|
|
|
3778
3839
|
input_tensor = self._meridian.kpi * self._meridian.revenue_per_kpi
|
|
3779
3840
|
else:
|
|
3780
3841
|
input_tensor = self._meridian.kpi
|
|
3781
|
-
actual =
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3842
|
+
actual = np.asarray(
|
|
3843
|
+
self.filter_and_aggregate_geos_and_times(
|
|
3844
|
+
tensor=input_tensor,
|
|
3845
|
+
**dims_kwargs,
|
|
3846
|
+
)
|
|
3847
|
+
)
|
|
3785
3848
|
expected = np.mean(
|
|
3786
3849
|
self.expected_outcome(
|
|
3787
3850
|
batch_size=batch_size, use_kpi=use_kpi, **dims_kwargs
|
|
@@ -3888,7 +3951,7 @@ class Analyzer:
|
|
|
3888
3951
|
|
|
3889
3952
|
return holdout_id
|
|
3890
3953
|
|
|
3891
|
-
def get_rhat(self) -> Mapping[str,
|
|
3954
|
+
def get_rhat(self) -> Mapping[str, backend.Tensor]:
|
|
3892
3955
|
"""Computes the R-hat values for each parameter in the model.
|
|
3893
3956
|
|
|
3894
3957
|
Returns:
|
|
@@ -3904,12 +3967,12 @@ class Analyzer:
|
|
|
3904
3967
|
"sample_posterior() must be called prior to calling this method."
|
|
3905
3968
|
)
|
|
3906
3969
|
|
|
3907
|
-
def _transpose_first_two_dims(x:
|
|
3970
|
+
def _transpose_first_two_dims(x: backend.Tensor) -> backend.Tensor:
|
|
3908
3971
|
n_dim = len(x.shape)
|
|
3909
3972
|
perm = [1, 0] + list(range(2, n_dim))
|
|
3910
|
-
return
|
|
3973
|
+
return backend.transpose(x, perm)
|
|
3911
3974
|
|
|
3912
|
-
rhat =
|
|
3975
|
+
rhat = backend.mcmc.potential_scale_reduction({
|
|
3913
3976
|
k: _transpose_first_two_dims(v)
|
|
3914
3977
|
for k, v in self._meridian.inference_data.posterior.data_vars.items()
|
|
3915
3978
|
})
|
|
@@ -3966,12 +4029,13 @@ class Analyzer:
|
|
|
3966
4029
|
if self._meridian.prior_broadcast.has_deterministic_param(param):
|
|
3967
4030
|
continue
|
|
3968
4031
|
|
|
3969
|
-
|
|
3970
|
-
|
|
3971
|
-
|
|
3972
|
-
|
|
3973
|
-
row_idx = bad_idx[0]
|
|
4032
|
+
if rhat[param].ndim == 2:
|
|
4033
|
+
row_idx, col_idx = np.where(rhat[param] > bad_rhat_threshold)
|
|
4034
|
+
elif rhat[param].ndim == 1:
|
|
4035
|
+
row_idx = np.where(rhat[param] > bad_rhat_threshold)[0]
|
|
3974
4036
|
col_idx = []
|
|
4037
|
+
elif rhat[param].ndim == 0:
|
|
4038
|
+
row_idx = col_idx = []
|
|
3975
4039
|
else:
|
|
3976
4040
|
raise ValueError(f"Unexpected dimension for parameter {param}.")
|
|
3977
4041
|
|
|
@@ -4010,7 +4074,7 @@ class Analyzer:
|
|
|
4010
4074
|
|
|
4011
4075
|
A list of multipliers is applied to each media channel's total historical
|
|
4012
4076
|
spend within `selected_geos` and `selected_times` to obtain the x-axis
|
|
4013
|
-
values. The y-axis values are the incremental
|
|
4077
|
+
values. The y-axis values are the incremental outcome generated by each
|
|
4014
4078
|
channel within `selected_geos` and `selected_times` under the counterfactual
|
|
4015
4079
|
where media units in each geo and time period are scaled by the
|
|
4016
4080
|
corresponding multiplier. (Media units for time periods prior to
|
|
@@ -4024,9 +4088,8 @@ class Analyzer:
|
|
|
4024
4088
|
generated. If `False`, prior response curves are generated.
|
|
4025
4089
|
selected_geos: Optional list containing a subset of geos to include. By
|
|
4026
4090
|
default, all geos are included.
|
|
4027
|
-
selected_times: Optional list
|
|
4028
|
-
|
|
4029
|
-
strings and integers must align with the `Meridian.n_times`.
|
|
4091
|
+
selected_times: Optional list containing a subset of dates to include. By
|
|
4092
|
+
default, all time periods are included.
|
|
4030
4093
|
by_reach: Boolean. For channels with reach and frequency. If `True`, plots
|
|
4031
4094
|
the response curve by reach. If `False`, plots the response curve by
|
|
4032
4095
|
frequency.
|
|
@@ -4056,17 +4119,17 @@ class Analyzer:
|
|
|
4056
4119
|
"aggregate_times": True,
|
|
4057
4120
|
}
|
|
4058
4121
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
4059
|
-
frequency =
|
|
4122
|
+
frequency = backend.ones_like(
|
|
4060
4123
|
self._meridian.rf_tensors.frequency
|
|
4061
|
-
) *
|
|
4124
|
+
) * backend.to_tensor(
|
|
4062
4125
|
self.optimal_freq(
|
|
4063
4126
|
selected_geos=selected_geos,
|
|
4064
4127
|
selected_times=selected_times,
|
|
4065
4128
|
use_kpi=use_kpi,
|
|
4066
4129
|
).optimal_frequency,
|
|
4067
|
-
dtype=
|
|
4130
|
+
dtype=backend.float32,
|
|
4068
4131
|
)
|
|
4069
|
-
reach =
|
|
4132
|
+
reach = backend.divide_no_nan(
|
|
4070
4133
|
self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency,
|
|
4071
4134
|
frequency,
|
|
4072
4135
|
)
|
|
@@ -4082,7 +4145,7 @@ class Analyzer:
|
|
|
4082
4145
|
))
|
|
4083
4146
|
for i, multiplier in enumerate(spend_multipliers):
|
|
4084
4147
|
if multiplier == 0:
|
|
4085
|
-
incremental_outcome[i, :, :] =
|
|
4148
|
+
incremental_outcome[i, :, :] = backend.zeros(
|
|
4086
4149
|
(len(self._meridian.input_data.get_all_paid_channels()), 3)
|
|
4087
4150
|
) # Last dimension = 3 for the mean, ci_lo and ci_hi.
|
|
4088
4151
|
continue
|
|
@@ -4109,7 +4172,7 @@ class Analyzer:
|
|
|
4109
4172
|
)
|
|
4110
4173
|
|
|
4111
4174
|
if self._meridian.n_media_channels > 0 and self._meridian.n_rf_channels > 0:
|
|
4112
|
-
spend =
|
|
4175
|
+
spend = backend.concatenate(
|
|
4113
4176
|
[
|
|
4114
4177
|
self._meridian.media_tensors.media_spend,
|
|
4115
4178
|
self._meridian.rf_tensors.rf_spend,
|
|
@@ -4121,12 +4184,12 @@ class Analyzer:
|
|
|
4121
4184
|
else:
|
|
4122
4185
|
spend = self._meridian.rf_tensors.rf_spend
|
|
4123
4186
|
|
|
4124
|
-
if
|
|
4187
|
+
if backend.rank(spend) == 3:
|
|
4125
4188
|
spend = self.filter_and_aggregate_geos_and_times(
|
|
4126
4189
|
tensor=spend,
|
|
4127
4190
|
**dim_kwargs,
|
|
4128
4191
|
)
|
|
4129
|
-
spend_einsum =
|
|
4192
|
+
spend_einsum = backend.einsum("k,m->km", np.array(spend_multipliers), spend)
|
|
4130
4193
|
xr_coords = {
|
|
4131
4194
|
constants.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
|
|
4132
4195
|
constants.METRIC: [
|
|
@@ -4197,60 +4260,45 @@ class Analyzer:
|
|
|
4197
4260
|
}
|
|
4198
4261
|
final_df_list = []
|
|
4199
4262
|
|
|
4200
|
-
|
|
4201
|
-
|
|
4202
|
-
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
|
|
4208
|
-
|
|
4209
|
-
|
|
4210
|
-
|
|
4211
|
-
|
|
4212
|
-
|
|
4213
|
-
|
|
4214
|
-
|
|
4215
|
-
|
|
4216
|
-
|
|
4217
|
-
|
|
4263
|
+
def _add_adstock_decay_for_channel(
|
|
4264
|
+
n_channels: int,
|
|
4265
|
+
channel_data: xr.DataArray | None,
|
|
4266
|
+
adstock_channel_type: str,
|
|
4267
|
+
) -> None:
|
|
4268
|
+
"""Helper to compute and append adstock decay data for a channel type."""
|
|
4269
|
+
if n_channels > 0:
|
|
4270
|
+
channel_values = channel_data.values if channel_data is not None else []
|
|
4271
|
+
xr_coords = base_xr_coords | {constants.CHANNEL: channel_values}
|
|
4272
|
+
adstock_df = self._get_adstock_dataframe(
|
|
4273
|
+
adstock_channel_type,
|
|
4274
|
+
l_range,
|
|
4275
|
+
xr_dims,
|
|
4276
|
+
xr_coords,
|
|
4277
|
+
confidence_level,
|
|
4278
|
+
)
|
|
4279
|
+
if not adstock_df.empty:
|
|
4280
|
+
final_df_list.append(adstock_df)
|
|
4218
4281
|
|
|
4219
|
-
|
|
4220
|
-
|
|
4221
|
-
|
|
4222
|
-
|
|
4223
|
-
|
|
4224
|
-
|
|
4225
|
-
|
|
4226
|
-
|
|
4227
|
-
|
|
4228
|
-
|
|
4229
|
-
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
|
|
4236
|
-
|
|
4237
|
-
|
|
4238
|
-
|
|
4239
|
-
if self._meridian.input_data.organic_media_channel is not None
|
|
4240
|
-
else []
|
|
4241
|
-
)
|
|
4242
|
-
organic_media_xr_coords = base_xr_coords | {
|
|
4243
|
-
constants.CHANNEL: organic_media_channel_values
|
|
4244
|
-
}
|
|
4245
|
-
adstock_df_om = self._get_adstock_dataframe(
|
|
4246
|
-
constants.ORGANIC_MEDIA,
|
|
4247
|
-
l_range,
|
|
4248
|
-
xr_dims,
|
|
4249
|
-
organic_media_xr_coords,
|
|
4250
|
-
confidence_level,
|
|
4251
|
-
)
|
|
4252
|
-
if not adstock_df_om.empty:
|
|
4253
|
-
final_df_list.append(adstock_df_om)
|
|
4282
|
+
_add_adstock_decay_for_channel(
|
|
4283
|
+
self._meridian.n_media_channels,
|
|
4284
|
+
self._meridian.input_data.media_channel,
|
|
4285
|
+
constants.MEDIA,
|
|
4286
|
+
)
|
|
4287
|
+
_add_adstock_decay_for_channel(
|
|
4288
|
+
self._meridian.n_rf_channels,
|
|
4289
|
+
self._meridian.input_data.rf_channel,
|
|
4290
|
+
constants.RF,
|
|
4291
|
+
)
|
|
4292
|
+
_add_adstock_decay_for_channel(
|
|
4293
|
+
self._meridian.n_organic_media_channels,
|
|
4294
|
+
self._meridian.input_data.organic_media_channel,
|
|
4295
|
+
constants.ORGANIC_MEDIA,
|
|
4296
|
+
)
|
|
4297
|
+
_add_adstock_decay_for_channel(
|
|
4298
|
+
self._meridian.n_organic_rf_channels,
|
|
4299
|
+
self._meridian.input_data.organic_rf_channel,
|
|
4300
|
+
constants.ORGANIC_RF,
|
|
4301
|
+
)
|
|
4254
4302
|
|
|
4255
4303
|
final_df = pd.concat(final_df_list, ignore_index=True)
|
|
4256
4304
|
# Adding an extra column that indicates whether time_units is an integer
|
|
@@ -4275,7 +4323,8 @@ class Analyzer:
|
|
|
4275
4323
|
Returns:
|
|
4276
4324
|
A DataFrame with data needed to plot the Hill curves, with columns:
|
|
4277
4325
|
|
|
4278
|
-
* `channel`: `media`, `rf`, or `
|
|
4326
|
+
* `channel`: `media`, `rf`, `organic_media`, or `organic_rf` channel
|
|
4327
|
+
name.
|
|
4279
4328
|
* `media_units`: Media (for `media` channels) or average frequency (for
|
|
4280
4329
|
`rf` channels) units.
|
|
4281
4330
|
* `distribution`: Indication of `posterior` or `prior` draw.
|
|
@@ -4284,12 +4333,12 @@ class Analyzer:
|
|
|
4284
4333
|
* `ci_lo`: Lower bound of the credible interval of the value of the Hill
|
|
4285
4334
|
function.
|
|
4286
4335
|
* `mean`: Point-wise mean of the value of the Hill function per draw.
|
|
4287
|
-
* channel_type: Indication of a `media`, `rf`,
|
|
4288
|
-
channel
|
|
4336
|
+
* channel_type: Indication of a `media`, `rf`, `organic_media`
|
|
4337
|
+
channel, or `organic_rf`.
|
|
4289
4338
|
|
|
4290
4339
|
Raises:
|
|
4291
4340
|
ValueError: If `channel_type` is not one of the recognized constants
|
|
4292
|
-
`media`, `rf`, or `
|
|
4341
|
+
`media`, `rf`, `organic_media`, or `organic_rf`.
|
|
4293
4342
|
"""
|
|
4294
4343
|
if (
|
|
4295
4344
|
channel_type == constants.MEDIA
|
|
@@ -4327,10 +4376,23 @@ class Analyzer:
|
|
|
4327
4376
|
np.array(self._meridian.organic_media_tensors.organic_media_scaled),
|
|
4328
4377
|
axis=(0, 1),
|
|
4329
4378
|
)
|
|
4379
|
+
elif (
|
|
4380
|
+
channel_type == constants.ORGANIC_RF
|
|
4381
|
+
and self._meridian.input_data.organic_rf_channel is not None
|
|
4382
|
+
):
|
|
4383
|
+
ec = constants.EC_ORF
|
|
4384
|
+
slope = constants.SLOPE_ORF
|
|
4385
|
+
channels = self._meridian.input_data.organic_rf_channel.values
|
|
4386
|
+
transformer = None
|
|
4387
|
+
linspace_max_values = np.max(
|
|
4388
|
+
np.array(self._meridian.organic_rf_tensors.organic_frequency),
|
|
4389
|
+
axis=(0, 1),
|
|
4390
|
+
)
|
|
4330
4391
|
else:
|
|
4331
4392
|
raise ValueError(
|
|
4332
4393
|
f"Unsupported channel type: {channel_type} or the requested type of"
|
|
4333
|
-
" channels (`media`, `rf`, or `
|
|
4394
|
+
" channels (`media`, `rf`, `organic_media`, or `organic_rf`) are not"
|
|
4395
|
+
" present."
|
|
4334
4396
|
)
|
|
4335
4397
|
linspace = np.linspace(
|
|
4336
4398
|
0,
|
|
@@ -4352,7 +4414,7 @@ class Analyzer:
|
|
|
4352
4414
|
}
|
|
4353
4415
|
# Expanding the linspace by one dimension since the HillTransformer requires
|
|
4354
4416
|
# 3-dimensional input as (geo, time, channel).
|
|
4355
|
-
expanded_linspace =
|
|
4417
|
+
expanded_linspace = backend.expand_dims(linspace, axis=0)
|
|
4356
4418
|
# Including [:, :, 0, :, :] in the output of the Hill Function to reduce the
|
|
4357
4419
|
# tensors by the geo dimension. Original Hill dimension shape is (n_chains,
|
|
4358
4420
|
# n_draws, n_geos, n_times, n_channels), and we want to plot the
|
|
@@ -4374,36 +4436,44 @@ class Analyzer:
|
|
|
4374
4436
|
xr_coords,
|
|
4375
4437
|
confidence_level,
|
|
4376
4438
|
)
|
|
4377
|
-
|
|
4439
|
+
|
|
4440
|
+
df_raw = (
|
|
4378
4441
|
hill_dataset[constants.HILL_SATURATION_LEVEL]
|
|
4379
4442
|
.to_dataframe()
|
|
4380
4443
|
.reset_index()
|
|
4381
|
-
.pivot(
|
|
4382
|
-
index=[
|
|
4383
|
-
constants.CHANNEL,
|
|
4384
|
-
constants.MEDIA_UNITS,
|
|
4385
|
-
constants.DISTRIBUTION,
|
|
4386
|
-
],
|
|
4387
|
-
columns=constants.METRIC,
|
|
4388
|
-
values=constants.HILL_SATURATION_LEVEL,
|
|
4389
|
-
)
|
|
4390
|
-
.reset_index()
|
|
4391
4444
|
)
|
|
4392
4445
|
|
|
4446
|
+
# Ensure the channel order matches the tensor order (defined by 'channels')
|
|
4447
|
+
# by using a Categorical type before pivoting. This prevents pivot from
|
|
4448
|
+
# sorting alphabetically, which can cause misalignment between channel names
|
|
4449
|
+
# and the calculated media units derived later from the tensor order.
|
|
4450
|
+
df_raw[constants.CHANNEL] = pd.Categorical(
|
|
4451
|
+
df_raw[constants.CHANNEL], categories=channels
|
|
4452
|
+
)
|
|
4453
|
+
df = df_raw.pivot(
|
|
4454
|
+
index=[
|
|
4455
|
+
constants.CHANNEL,
|
|
4456
|
+
constants.MEDIA_UNITS,
|
|
4457
|
+
constants.DISTRIBUTION,
|
|
4458
|
+
],
|
|
4459
|
+
columns=constants.METRIC,
|
|
4460
|
+
values=constants.HILL_SATURATION_LEVEL,
|
|
4461
|
+
).reset_index()
|
|
4462
|
+
|
|
4393
4463
|
# Fill media_units or frequency x-axis with the correct range.
|
|
4394
4464
|
media_units_arr = []
|
|
4395
4465
|
if transformer is not None:
|
|
4396
4466
|
population_scaled_median = transformer.population_scaled_median_m
|
|
4397
|
-
x_range_full_shape = linspace *
|
|
4398
|
-
population_scaled_median[:,
|
|
4467
|
+
x_range_full_shape = linspace * backend.transpose(
|
|
4468
|
+
population_scaled_median[:, backend.newaxis]
|
|
4399
4469
|
)
|
|
4400
4470
|
else:
|
|
4401
4471
|
x_range_full_shape = linspace
|
|
4402
4472
|
|
|
4403
4473
|
# Flatten this into a list.
|
|
4404
|
-
x_range_list = (
|
|
4405
|
-
|
|
4406
|
-
)
|
|
4474
|
+
x_range_list = np.asarray(
|
|
4475
|
+
backend.reshape(backend.transpose(x_range_full_shape), [-1])
|
|
4476
|
+
).tolist()
|
|
4407
4477
|
|
|
4408
4478
|
# Doubles each value in the list to account for alternating prior
|
|
4409
4479
|
# and posterior.
|
|
@@ -4419,14 +4489,15 @@ class Analyzer:
|
|
|
4419
4489
|
def _get_channel_hill_histogram_dataframe(
|
|
4420
4490
|
self,
|
|
4421
4491
|
channel_type: str,
|
|
4422
|
-
data_to_histogram:
|
|
4492
|
+
data_to_histogram: backend.Tensor,
|
|
4423
4493
|
channel_names: Sequence[str],
|
|
4424
4494
|
n_bins: int,
|
|
4425
4495
|
) -> pd.DataFrame:
|
|
4426
4496
|
"""Calculates hill histogram dataframe for a given channel type's values.
|
|
4427
4497
|
|
|
4428
4498
|
Args:
|
|
4429
|
-
channel_type: The type of channel (e.g., 'rf', 'media', 'organic_media'
|
|
4499
|
+
channel_type: The type of channel (e.g., 'rf', 'media', 'organic_media',
|
|
4500
|
+
'organic_rf').
|
|
4430
4501
|
data_to_histogram: The 2D tensor (observations, channels). containing the
|
|
4431
4502
|
data whose distribution needs to be histogrammed for each channel.
|
|
4432
4503
|
channel_names: The names corresponding to the channels in
|
|
@@ -4448,7 +4519,7 @@ class Analyzer:
|
|
|
4448
4519
|
}
|
|
4449
4520
|
|
|
4450
4521
|
for i, channel_name in enumerate(channel_names):
|
|
4451
|
-
channel_data_np = data_to_histogram[:, i]
|
|
4522
|
+
channel_data_np = np.asarray(data_to_histogram[:, i])
|
|
4452
4523
|
channel_data_np = channel_data_np[~np.isnan(channel_data_np)]
|
|
4453
4524
|
if channel_data_np.size == 0:
|
|
4454
4525
|
continue
|
|
@@ -4510,7 +4581,7 @@ class Analyzer:
|
|
|
4510
4581
|
if self._meridian.input_data.rf_channel is not None:
|
|
4511
4582
|
frequency = self._meridian.rf_tensors.frequency
|
|
4512
4583
|
if frequency is not None:
|
|
4513
|
-
reshaped_frequency =
|
|
4584
|
+
reshaped_frequency = backend.reshape(
|
|
4514
4585
|
frequency, (n_geos * n_media_times, self._meridian.n_rf_channels)
|
|
4515
4586
|
)
|
|
4516
4587
|
rf_hist_data = self._get_channel_hill_histogram_dataframe(
|
|
@@ -4528,7 +4599,7 @@ class Analyzer:
|
|
|
4528
4599
|
if transformer is not None and scaled is not None:
|
|
4529
4600
|
population_scaled_median = transformer.population_scaled_median_m
|
|
4530
4601
|
scaled_media_units = scaled * population_scaled_median
|
|
4531
|
-
reshaped_scaled_media_units =
|
|
4602
|
+
reshaped_scaled_media_units = backend.reshape(
|
|
4532
4603
|
scaled_media_units,
|
|
4533
4604
|
(n_geos * n_media_times, self._meridian.n_media_channels),
|
|
4534
4605
|
)
|
|
@@ -4548,7 +4619,7 @@ class Analyzer:
|
|
|
4548
4619
|
if transformer_om is not None and scaled_om is not None:
|
|
4549
4620
|
population_scaled_median_om = transformer_om.population_scaled_median_m
|
|
4550
4621
|
scaled_organic_media_units = scaled_om * population_scaled_median_om
|
|
4551
|
-
reshaped_scaled_organic_media_units =
|
|
4622
|
+
reshaped_scaled_organic_media_units = backend.reshape(
|
|
4552
4623
|
scaled_organic_media_units,
|
|
4553
4624
|
(n_geos * n_media_times, self._meridian.n_organic_media_channels),
|
|
4554
4625
|
)
|
|
@@ -4560,6 +4631,21 @@ class Analyzer:
|
|
|
4560
4631
|
)
|
|
4561
4632
|
df_list.append(pd.DataFrame(organic_media_hist_data))
|
|
4562
4633
|
|
|
4634
|
+
# Organic RF.
|
|
4635
|
+
if self._meridian.input_data.organic_rf_channel is not None:
|
|
4636
|
+
frequency = self._meridian.organic_rf_tensors.organic_frequency
|
|
4637
|
+
if frequency is not None:
|
|
4638
|
+
reshaped_frequency = backend.reshape(
|
|
4639
|
+
frequency,
|
|
4640
|
+
(n_geos * n_media_times, self._meridian.n_organic_rf_channels),
|
|
4641
|
+
)
|
|
4642
|
+
organic_rf_hist_data = self._get_channel_hill_histogram_dataframe(
|
|
4643
|
+
channel_type=constants.ORGANIC_RF,
|
|
4644
|
+
data_to_histogram=reshaped_frequency,
|
|
4645
|
+
channel_names=self._meridian.input_data.organic_rf_channel.values,
|
|
4646
|
+
n_bins=n_bins,
|
|
4647
|
+
)
|
|
4648
|
+
df_list.append(pd.DataFrame(organic_rf_hist_data))
|
|
4563
4649
|
return pd.concat(df_list, ignore_index=True)
|
|
4564
4650
|
|
|
4565
4651
|
def hill_curves(
|
|
@@ -4612,6 +4698,7 @@ class Analyzer:
|
|
|
4612
4698
|
(self._meridian.n_media_channels, constants.MEDIA),
|
|
4613
4699
|
(self._meridian.n_rf_channels, constants.RF),
|
|
4614
4700
|
(self._meridian.n_organic_media_channels, constants.ORGANIC_MEDIA),
|
|
4701
|
+
(self._meridian.n_organic_rf_channels, constants.ORGANIC_RF),
|
|
4615
4702
|
]:
|
|
4616
4703
|
if n_channels > 0:
|
|
4617
4704
|
hill_df = self._get_hill_curves_dataframe(
|
|
@@ -4624,11 +4711,11 @@ class Analyzer:
|
|
|
4624
4711
|
|
|
4625
4712
|
def _compute_roi_aggregate(
|
|
4626
4713
|
self,
|
|
4627
|
-
incremental_outcome_prior:
|
|
4628
|
-
incremental_outcome_posterior:
|
|
4714
|
+
incremental_outcome_prior: backend.Tensor,
|
|
4715
|
+
incremental_outcome_posterior: backend.Tensor,
|
|
4629
4716
|
xr_dims: Sequence[str],
|
|
4630
4717
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4631
|
-
spend_with_total:
|
|
4718
|
+
spend_with_total: backend.Tensor,
|
|
4632
4719
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
4633
4720
|
metric_name: str = constants.ROI,
|
|
4634
4721
|
) -> xr.Dataset:
|
|
@@ -4645,8 +4732,8 @@ class Analyzer:
|
|
|
4645
4732
|
|
|
4646
4733
|
def _compute_spend_data_aggregate(
|
|
4647
4734
|
self,
|
|
4648
|
-
spend_with_total:
|
|
4649
|
-
impressions_with_total:
|
|
4735
|
+
spend_with_total: backend.Tensor,
|
|
4736
|
+
impressions_with_total: backend.Tensor,
|
|
4650
4737
|
xr_dims: Sequence[str],
|
|
4651
4738
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4652
4739
|
) -> xr.Dataset:
|
|
@@ -4682,9 +4769,9 @@ class Analyzer:
|
|
|
4682
4769
|
|
|
4683
4770
|
def _compute_effectiveness_aggregate(
|
|
4684
4771
|
self,
|
|
4685
|
-
incremental_outcome_prior:
|
|
4686
|
-
incremental_outcome_posterior:
|
|
4687
|
-
impressions_with_total:
|
|
4772
|
+
incremental_outcome_prior: backend.Tensor,
|
|
4773
|
+
incremental_outcome_posterior: backend.Tensor,
|
|
4774
|
+
impressions_with_total: backend.Tensor,
|
|
4688
4775
|
xr_dims: Sequence[str],
|
|
4689
4776
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4690
4777
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -4701,9 +4788,9 @@ class Analyzer:
|
|
|
4701
4788
|
|
|
4702
4789
|
def _compute_cpik_aggregate(
|
|
4703
4790
|
self,
|
|
4704
|
-
incremental_kpi_prior:
|
|
4705
|
-
incremental_kpi_posterior:
|
|
4706
|
-
spend_with_total:
|
|
4791
|
+
incremental_kpi_prior: backend.Tensor,
|
|
4792
|
+
incremental_kpi_posterior: backend.Tensor,
|
|
4793
|
+
spend_with_total: backend.Tensor,
|
|
4707
4794
|
xr_dims: Sequence[str],
|
|
4708
4795
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4709
4796
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -4720,17 +4807,19 @@ class Analyzer:
|
|
|
4720
4807
|
|
|
4721
4808
|
def _compute_pct_of_contribution(
|
|
4722
4809
|
self,
|
|
4723
|
-
incremental_outcome_prior:
|
|
4724
|
-
incremental_outcome_posterior:
|
|
4725
|
-
expected_outcome_prior:
|
|
4726
|
-
expected_outcome_posterior:
|
|
4810
|
+
incremental_outcome_prior: backend.Tensor,
|
|
4811
|
+
incremental_outcome_posterior: backend.Tensor,
|
|
4812
|
+
expected_outcome_prior: backend.Tensor,
|
|
4813
|
+
expected_outcome_posterior: backend.Tensor,
|
|
4727
4814
|
xr_dims: Sequence[str],
|
|
4728
4815
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4729
4816
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
4730
4817
|
) -> xr.Dataset:
|
|
4731
4818
|
"""Computes the parts of `MediaSummary` related to mean expected outcome."""
|
|
4732
|
-
mean_expected_outcome_prior =
|
|
4733
|
-
|
|
4819
|
+
mean_expected_outcome_prior = backend.reduce_mean(
|
|
4820
|
+
expected_outcome_prior, (0, 1)
|
|
4821
|
+
)
|
|
4822
|
+
mean_expected_outcome_posterior = backend.reduce_mean(
|
|
4734
4823
|
expected_outcome_posterior, (0, 1)
|
|
4735
4824
|
)
|
|
4736
4825
|
|
|
@@ -4806,8 +4895,9 @@ class Analyzer:
|
|
|
4806
4895
|
of all the remaining tensors. If any of the tensors in `new_data` is
|
|
4807
4896
|
provided with a different number of time periods than in `InputData`,
|
|
4808
4897
|
then all tensors must be provided with the same number of time periods.
|
|
4809
|
-
selected_times:
|
|
4810
|
-
|
|
4898
|
+
selected_times: Optional list containing either a subset of dates to
|
|
4899
|
+
include or booleans with length equal to the number of time periods in
|
|
4900
|
+
KPI data. By default, all time periods are included.
|
|
4811
4901
|
include_media: Whether to include spends for paid media channels that do
|
|
4812
4902
|
not have R&F data.
|
|
4813
4903
|
include_rf: Whether to include spends for paid media channels with R&F
|
|
@@ -4884,8 +4974,8 @@ class Analyzer:
|
|
|
4884
4974
|
def _impute_and_aggregate_spend(
|
|
4885
4975
|
self,
|
|
4886
4976
|
selected_times: Sequence[str] | Sequence[bool] | None,
|
|
4887
|
-
media_execution_values:
|
|
4888
|
-
channel_spend:
|
|
4977
|
+
media_execution_values: backend.Tensor,
|
|
4978
|
+
channel_spend: backend.Tensor,
|
|
4889
4979
|
channel_names: Sequence[str],
|
|
4890
4980
|
) -> xr.DataArray:
|
|
4891
4981
|
"""Imputes and aggregates the spend over the selected time period.
|
|
@@ -4898,7 +4988,9 @@ class Analyzer:
|
|
|
4898
4988
|
argument, its values only affect the output when imputation is required.
|
|
4899
4989
|
|
|
4900
4990
|
Args:
|
|
4901
|
-
selected_times:
|
|
4991
|
+
selected_times: Optional list containing either a subset of dates to
|
|
4992
|
+
include or booleans with length equal to the number of time periods in
|
|
4993
|
+
KPI data. By default, all time periods are included.
|
|
4902
4994
|
media_execution_values: The media execution values over all time points.
|
|
4903
4995
|
channel_spend: The spend over all time points. Its shape can be `(n_geos,
|
|
4904
4996
|
n_times, n_media_channels)` or `(n_media_channels,)` if the data is
|
|
@@ -4918,11 +5010,13 @@ class Analyzer:
|
|
|
4918
5010
|
}
|
|
4919
5011
|
|
|
4920
5012
|
if channel_spend.ndim == 3:
|
|
4921
|
-
aggregated_spend =
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
|
|
4925
|
-
|
|
5013
|
+
aggregated_spend = np.asarray(
|
|
5014
|
+
self.filter_and_aggregate_geos_and_times(
|
|
5015
|
+
channel_spend,
|
|
5016
|
+
has_media_dim=True,
|
|
5017
|
+
**dim_kwargs,
|
|
5018
|
+
)
|
|
5019
|
+
)
|
|
4926
5020
|
# channel_spend.ndim can only be 3 or 1.
|
|
4927
5021
|
else:
|
|
4928
5022
|
# media spend can have more time points than the model time points
|
|
@@ -4938,14 +5032,84 @@ class Analyzer:
|
|
|
4938
5032
|
media_exe_values,
|
|
4939
5033
|
**dim_kwargs,
|
|
4940
5034
|
)
|
|
4941
|
-
imputed_cpmu =
|
|
5035
|
+
imputed_cpmu = backend.divide_no_nan(
|
|
4942
5036
|
channel_spend,
|
|
4943
5037
|
np.sum(media_exe_values, (0, 1)),
|
|
4944
5038
|
)
|
|
4945
|
-
aggregated_spend = (target_media_exe_values * imputed_cpmu)
|
|
5039
|
+
aggregated_spend = np.asarray(target_media_exe_values * imputed_cpmu)
|
|
4946
5040
|
|
|
4947
5041
|
return xr.DataArray(
|
|
4948
5042
|
data=aggregated_spend,
|
|
4949
5043
|
dims=[constants.CHANNEL],
|
|
4950
5044
|
coords={constants.CHANNEL: channel_names},
|
|
4951
5045
|
)
|
|
5046
|
+
|
|
5047
|
+
def negative_baseline_probability(
|
|
5048
|
+
self,
|
|
5049
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
5050
|
+
use_posterior: bool = True,
|
|
5051
|
+
selected_geos: Sequence[str] | None = None,
|
|
5052
|
+
selected_times: Sequence[str] | None = None,
|
|
5053
|
+
use_kpi: bool = False,
|
|
5054
|
+
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
5055
|
+
) -> np.floating:
|
|
5056
|
+
"""Calculates either prior or posterior negative baseline probability.
|
|
5057
|
+
|
|
5058
|
+
This calculates either the prior or posterior probability that the baseline,
|
|
5059
|
+
aggregated over the supplied time window, is negative.
|
|
5060
|
+
|
|
5061
|
+
The baseline is calculated by computing `expected_outcome` with the
|
|
5062
|
+
following assumptions:
|
|
5063
|
+
1) `media` is set to all zeros,
|
|
5064
|
+
2) `reach` is set to all zeros,
|
|
5065
|
+
3) `organic_media` is set to all zeros,
|
|
5066
|
+
4) `organic_reach` is set to all zeros,
|
|
5067
|
+
5) `non_media_treatments` is set to the counterfactual values according
|
|
5068
|
+
to the `non_media_baseline_values` argument,
|
|
5069
|
+
6) `controls` are set to historical values.
|
|
5070
|
+
|
|
5071
|
+
Args:
|
|
5072
|
+
non_media_baseline_values: Optional list of shape
|
|
5073
|
+
`(n_non_media_channels,)`. Each element is a float denoting a fixed
|
|
5074
|
+
value that will be used as the baseline for the given channel. It is
|
|
5075
|
+
expected that they are scaled by population for the channels where
|
|
5076
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
5077
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
5078
|
+
minimum value for each non_media treatment channel.
|
|
5079
|
+
use_posterior: Boolean. If `True`, then the expected outcome posterior
|
|
5080
|
+
distribution is calculated. Otherwise, the prior distribution is
|
|
5081
|
+
calculated.
|
|
5082
|
+
selected_geos: Optional list of containing a subset of geos to include. By
|
|
5083
|
+
default, all geos are included.
|
|
5084
|
+
selected_times: Optional list of containing a subset of dates to include.
|
|
5085
|
+
The values accepted here must match time dimension coordinates from
|
|
5086
|
+
`InputData.time`. By default, all time periods are included.
|
|
5087
|
+
use_kpi: Boolean. If `use_kpi = True`, the expected KPI is calculated;
|
|
5088
|
+
otherwise the expected revenue `(kpi * revenue_per_kpi)` is calculated.
|
|
5089
|
+
It is required that `use_kpi = True` if `revenue_per_kpi` is not defined
|
|
5090
|
+
or if `inverse_transform_outcome = False`.
|
|
5091
|
+
batch_size: Integer representing the maximum draws per chain in each
|
|
5092
|
+
batch. The calculation is run in batches to avoid memory exhaustion. If
|
|
5093
|
+
a memory error occurs, try reducing `batch_size`. The calculation will
|
|
5094
|
+
generally be faster with larger `batch_size` values.
|
|
5095
|
+
|
|
5096
|
+
Returns:
|
|
5097
|
+
A float representing the prior or posterior negative baseline probability
|
|
5098
|
+
over the supplied time window.
|
|
5099
|
+
Raises:
|
|
5100
|
+
NotFittedModelError: if `sample_posterior()` (for `use_posterior=True`)
|
|
5101
|
+
or `sample_prior()` (for `use_posterior=False`) has not been called
|
|
5102
|
+
prior to calling this method.
|
|
5103
|
+
"""
|
|
5104
|
+
|
|
5105
|
+
baseline_draws = self._calculate_baseline_expected_outcome(
|
|
5106
|
+
non_media_baseline_values=non_media_baseline_values,
|
|
5107
|
+
use_posterior=use_posterior,
|
|
5108
|
+
selected_geos=selected_geos,
|
|
5109
|
+
selected_times=selected_times,
|
|
5110
|
+
aggregate_geos=True,
|
|
5111
|
+
aggregate_times=True,
|
|
5112
|
+
use_kpi=use_kpi,
|
|
5113
|
+
batch_size=batch_size,
|
|
5114
|
+
)
|
|
5115
|
+
return np.mean(baseline_draws < 0)
|