google-meridian 1.1.6__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/METADATA +8 -2
- google_meridian-1.2.1.dist-info/RECORD +52 -0
- meridian/__init__.py +1 -0
- meridian/analysis/analyzer.py +621 -393
- meridian/analysis/optimizer.py +403 -351
- meridian/analysis/summarizer.py +31 -16
- meridian/analysis/test_utils.py +96 -94
- meridian/analysis/visualizer.py +53 -54
- meridian/backend/__init__.py +975 -0
- meridian/backend/config.py +118 -0
- meridian/backend/test_utils.py +181 -0
- meridian/constants.py +71 -10
- meridian/data/input_data.py +99 -0
- meridian/data/test_utils.py +146 -12
- meridian/mlflow/autolog.py +2 -2
- meridian/model/adstock_hill.py +280 -33
- meridian/model/eda/__init__.py +17 -0
- meridian/model/eda/eda_engine.py +735 -0
- meridian/model/knots.py +525 -2
- meridian/model/media.py +62 -54
- meridian/model/model.py +224 -97
- meridian/model/model_test_data.py +331 -159
- meridian/model/posterior_sampler.py +388 -383
- meridian/model/prior_distribution.py +612 -177
- meridian/model/prior_sampler.py +65 -65
- meridian/model/spec.py +23 -3
- meridian/model/transformers.py +55 -49
- meridian/version.py +1 -1
- google_meridian-1.1.6.dist-info/RECORD +0 -47
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/WHEEL +0 -0
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/top_level.txt +0 -0
meridian/analysis/analyzer.py
CHANGED
|
@@ -20,14 +20,13 @@ import numbers
|
|
|
20
20
|
from typing import Any, Optional
|
|
21
21
|
import warnings
|
|
22
22
|
|
|
23
|
+
from meridian import backend
|
|
23
24
|
from meridian import constants
|
|
24
25
|
from meridian.model import adstock_hill
|
|
25
26
|
from meridian.model import model
|
|
26
27
|
from meridian.model import transformers
|
|
27
28
|
import numpy as np
|
|
28
29
|
import pandas as pd
|
|
29
|
-
import tensorflow as tf
|
|
30
|
-
import tensorflow_probability as tfp
|
|
31
30
|
from typing_extensions import Self
|
|
32
31
|
import xarray as xr
|
|
33
32
|
|
|
@@ -35,6 +34,7 @@ __all__ = [
|
|
|
35
34
|
"Analyzer",
|
|
36
35
|
"DataTensors",
|
|
37
36
|
"DistributionTensors",
|
|
37
|
+
"get_central_tendency_and_ci",
|
|
38
38
|
]
|
|
39
39
|
|
|
40
40
|
|
|
@@ -53,7 +53,7 @@ def _validate_non_media_baseline_values_numbers(
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
# TODO: Refactor the related unit tests to be under DataTensors.
|
|
56
|
-
class DataTensors(
|
|
56
|
+
class DataTensors(backend.ExtensionType):
|
|
57
57
|
"""Container for data variable arguments of Analyzer methods.
|
|
58
58
|
|
|
59
59
|
Attributes:
|
|
@@ -88,86 +88,100 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
88
88
|
for time dimension `T`.
|
|
89
89
|
"""
|
|
90
90
|
|
|
91
|
-
media: Optional[
|
|
92
|
-
media_spend: Optional[
|
|
93
|
-
reach: Optional[
|
|
94
|
-
frequency: Optional[
|
|
95
|
-
rf_impressions: Optional[
|
|
96
|
-
rf_spend: Optional[
|
|
97
|
-
organic_media: Optional[
|
|
98
|
-
organic_reach: Optional[
|
|
99
|
-
organic_frequency: Optional[
|
|
100
|
-
non_media_treatments: Optional[
|
|
101
|
-
controls: Optional[
|
|
102
|
-
revenue_per_kpi: Optional[
|
|
103
|
-
time: Optional[
|
|
91
|
+
media: Optional[backend.Tensor]
|
|
92
|
+
media_spend: Optional[backend.Tensor]
|
|
93
|
+
reach: Optional[backend.Tensor]
|
|
94
|
+
frequency: Optional[backend.Tensor]
|
|
95
|
+
rf_impressions: Optional[backend.Tensor]
|
|
96
|
+
rf_spend: Optional[backend.Tensor]
|
|
97
|
+
organic_media: Optional[backend.Tensor]
|
|
98
|
+
organic_reach: Optional[backend.Tensor]
|
|
99
|
+
organic_frequency: Optional[backend.Tensor]
|
|
100
|
+
non_media_treatments: Optional[backend.Tensor]
|
|
101
|
+
controls: Optional[backend.Tensor]
|
|
102
|
+
revenue_per_kpi: Optional[backend.Tensor]
|
|
103
|
+
time: Optional[backend.Tensor]
|
|
104
104
|
|
|
105
105
|
def __init__(
|
|
106
106
|
self,
|
|
107
|
-
media: Optional[
|
|
108
|
-
media_spend: Optional[
|
|
109
|
-
reach: Optional[
|
|
110
|
-
frequency: Optional[
|
|
111
|
-
rf_impressions: Optional[
|
|
112
|
-
rf_spend: Optional[
|
|
113
|
-
organic_media: Optional[
|
|
114
|
-
organic_reach: Optional[
|
|
115
|
-
organic_frequency: Optional[
|
|
116
|
-
non_media_treatments: Optional[
|
|
117
|
-
controls: Optional[
|
|
118
|
-
revenue_per_kpi: Optional[
|
|
119
|
-
time: Optional[Sequence[str] |
|
|
107
|
+
media: Optional[backend.Tensor] = None,
|
|
108
|
+
media_spend: Optional[backend.Tensor] = None,
|
|
109
|
+
reach: Optional[backend.Tensor] = None,
|
|
110
|
+
frequency: Optional[backend.Tensor] = None,
|
|
111
|
+
rf_impressions: Optional[backend.Tensor] = None,
|
|
112
|
+
rf_spend: Optional[backend.Tensor] = None,
|
|
113
|
+
organic_media: Optional[backend.Tensor] = None,
|
|
114
|
+
organic_reach: Optional[backend.Tensor] = None,
|
|
115
|
+
organic_frequency: Optional[backend.Tensor] = None,
|
|
116
|
+
non_media_treatments: Optional[backend.Tensor] = None,
|
|
117
|
+
controls: Optional[backend.Tensor] = None,
|
|
118
|
+
revenue_per_kpi: Optional[backend.Tensor] = None,
|
|
119
|
+
time: Optional[Sequence[str] | backend.Tensor] = None,
|
|
120
120
|
):
|
|
121
|
-
self.media =
|
|
121
|
+
self.media = (
|
|
122
|
+
backend.cast(media, backend.float32) if media is not None else None
|
|
123
|
+
)
|
|
122
124
|
self.media_spend = (
|
|
123
|
-
|
|
125
|
+
backend.cast(media_spend, backend.float32)
|
|
126
|
+
if media_spend is not None
|
|
127
|
+
else None
|
|
128
|
+
)
|
|
129
|
+
self.reach = (
|
|
130
|
+
backend.cast(reach, backend.float32) if reach is not None else None
|
|
124
131
|
)
|
|
125
|
-
self.reach = tf.cast(reach, tf.float32) if reach is not None else None
|
|
126
132
|
self.frequency = (
|
|
127
|
-
|
|
133
|
+
backend.cast(frequency, backend.float32)
|
|
134
|
+
if frequency is not None
|
|
135
|
+
else None
|
|
128
136
|
)
|
|
129
137
|
self.rf_impressions = (
|
|
130
|
-
|
|
138
|
+
backend.cast(rf_impressions, backend.float32)
|
|
131
139
|
if rf_impressions is not None
|
|
132
140
|
else None
|
|
133
141
|
)
|
|
134
142
|
self.rf_spend = (
|
|
135
|
-
|
|
143
|
+
backend.cast(rf_spend, backend.float32)
|
|
144
|
+
if rf_spend is not None
|
|
145
|
+
else None
|
|
136
146
|
)
|
|
137
147
|
self.organic_media = (
|
|
138
|
-
|
|
148
|
+
backend.cast(organic_media, backend.float32)
|
|
139
149
|
if organic_media is not None
|
|
140
150
|
else None
|
|
141
151
|
)
|
|
142
152
|
self.organic_reach = (
|
|
143
|
-
|
|
153
|
+
backend.cast(organic_reach, backend.float32)
|
|
144
154
|
if organic_reach is not None
|
|
145
155
|
else None
|
|
146
156
|
)
|
|
147
157
|
self.organic_frequency = (
|
|
148
|
-
|
|
158
|
+
backend.cast(organic_frequency, backend.float32)
|
|
149
159
|
if organic_frequency is not None
|
|
150
160
|
else None
|
|
151
161
|
)
|
|
152
162
|
self.non_media_treatments = (
|
|
153
|
-
|
|
163
|
+
backend.cast(non_media_treatments, backend.float32)
|
|
154
164
|
if non_media_treatments is not None
|
|
155
165
|
else None
|
|
156
166
|
)
|
|
157
167
|
self.controls = (
|
|
158
|
-
|
|
168
|
+
backend.cast(controls, backend.float32)
|
|
169
|
+
if controls is not None
|
|
170
|
+
else None
|
|
159
171
|
)
|
|
160
172
|
self.revenue_per_kpi = (
|
|
161
|
-
|
|
173
|
+
backend.cast(revenue_per_kpi, backend.float32)
|
|
162
174
|
if revenue_per_kpi is not None
|
|
163
175
|
else None
|
|
164
176
|
)
|
|
165
|
-
self.time =
|
|
177
|
+
self.time = (
|
|
178
|
+
backend.to_tensor(time, dtype="string") if time is not None else None
|
|
179
|
+
)
|
|
166
180
|
|
|
167
181
|
def __validate__(self):
|
|
168
182
|
self._validate_n_dims()
|
|
169
183
|
|
|
170
|
-
def total_spend(self) ->
|
|
184
|
+
def total_spend(self) -> backend.Tensor | None:
|
|
171
185
|
"""Returns the total spend tensor.
|
|
172
186
|
|
|
173
187
|
Returns:
|
|
@@ -180,7 +194,9 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
180
194
|
spend_tensors.append(self.media_spend)
|
|
181
195
|
if self.rf_spend is not None:
|
|
182
196
|
spend_tensors.append(self.rf_spend)
|
|
183
|
-
return
|
|
197
|
+
return (
|
|
198
|
+
backend.concatenate(spend_tensors, axis=-1) if spend_tensors else None
|
|
199
|
+
)
|
|
184
200
|
|
|
185
201
|
def get_modified_times(self, meridian: model.Meridian) -> int | None:
|
|
186
202
|
"""Returns `n_times` of any tensor where `n_times` has been modified.
|
|
@@ -472,8 +488,8 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
472
488
|
elif var_name == constants.REVENUE_PER_KPI:
|
|
473
489
|
old_tensor = meridian.revenue_per_kpi
|
|
474
490
|
elif var_name == constants.TIME:
|
|
475
|
-
old_tensor =
|
|
476
|
-
meridian.input_data.time.values.tolist(), dtype=
|
|
491
|
+
old_tensor = backend.to_tensor(
|
|
492
|
+
meridian.input_data.time.values.tolist(), dtype="string"
|
|
477
493
|
)
|
|
478
494
|
else:
|
|
479
495
|
continue
|
|
@@ -484,63 +500,38 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
484
500
|
return DataTensors(**output)
|
|
485
501
|
|
|
486
502
|
|
|
487
|
-
class DistributionTensors(
|
|
503
|
+
class DistributionTensors(backend.ExtensionType):
|
|
488
504
|
"""Container for parameters distributions arguments of Analyzer methods."""
|
|
489
505
|
|
|
490
|
-
alpha_m: Optional[
|
|
491
|
-
alpha_rf: Optional[
|
|
492
|
-
alpha_om: Optional[
|
|
493
|
-
alpha_orf: Optional[
|
|
494
|
-
ec_m: Optional[
|
|
495
|
-
ec_rf: Optional[
|
|
496
|
-
ec_om: Optional[
|
|
497
|
-
ec_orf: Optional[
|
|
498
|
-
slope_m: Optional[
|
|
499
|
-
slope_rf: Optional[
|
|
500
|
-
slope_om: Optional[
|
|
501
|
-
slope_orf: Optional[
|
|
502
|
-
beta_gm: Optional[
|
|
503
|
-
beta_grf: Optional[
|
|
504
|
-
beta_gom: Optional[
|
|
505
|
-
beta_gorf: Optional[
|
|
506
|
-
mu_t: Optional[
|
|
507
|
-
tau_g: Optional[
|
|
508
|
-
gamma_gc: Optional[
|
|
509
|
-
gamma_gn: Optional[
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
def _transformed_new_or_scaled(
|
|
513
|
-
new_variable: tf.Tensor | None,
|
|
514
|
-
transformer: transformers.TensorTransformer | None,
|
|
515
|
-
scaled_variable: tf.Tensor | None,
|
|
516
|
-
) -> tf.Tensor | None:
|
|
517
|
-
"""Returns the transformed new variable or the scaled variable.
|
|
518
|
-
|
|
519
|
-
If the `new_variable` is present, returns
|
|
520
|
-
`transformer.forward(new_variable)`. Otherwise, returns the
|
|
521
|
-
`scaled_variable`.
|
|
522
|
-
|
|
523
|
-
Args:
|
|
524
|
-
new_variable: Optional tensor to be transformed..
|
|
525
|
-
transformer: Optional DataTransformer.
|
|
526
|
-
scaled_variable: Tensor to be returned if `new_variable` is None.
|
|
527
|
-
|
|
528
|
-
Returns:
|
|
529
|
-
The transformed new variable (if the new variable is present) or the
|
|
530
|
-
original scaled variable from the input data otherwise.
|
|
531
|
-
"""
|
|
532
|
-
if new_variable is None or transformer is None:
|
|
533
|
-
return scaled_variable
|
|
534
|
-
return transformer.forward(new_variable)
|
|
506
|
+
alpha_m: Optional[backend.Tensor] = None
|
|
507
|
+
alpha_rf: Optional[backend.Tensor] = None
|
|
508
|
+
alpha_om: Optional[backend.Tensor] = None
|
|
509
|
+
alpha_orf: Optional[backend.Tensor] = None
|
|
510
|
+
ec_m: Optional[backend.Tensor] = None
|
|
511
|
+
ec_rf: Optional[backend.Tensor] = None
|
|
512
|
+
ec_om: Optional[backend.Tensor] = None
|
|
513
|
+
ec_orf: Optional[backend.Tensor] = None
|
|
514
|
+
slope_m: Optional[backend.Tensor] = None
|
|
515
|
+
slope_rf: Optional[backend.Tensor] = None
|
|
516
|
+
slope_om: Optional[backend.Tensor] = None
|
|
517
|
+
slope_orf: Optional[backend.Tensor] = None
|
|
518
|
+
beta_gm: Optional[backend.Tensor] = None
|
|
519
|
+
beta_grf: Optional[backend.Tensor] = None
|
|
520
|
+
beta_gom: Optional[backend.Tensor] = None
|
|
521
|
+
beta_gorf: Optional[backend.Tensor] = None
|
|
522
|
+
mu_t: Optional[backend.Tensor] = None
|
|
523
|
+
tau_g: Optional[backend.Tensor] = None
|
|
524
|
+
gamma_gc: Optional[backend.Tensor] = None
|
|
525
|
+
gamma_gn: Optional[backend.Tensor] = None
|
|
535
526
|
|
|
536
527
|
|
|
537
528
|
def get_central_tendency_and_ci(
|
|
538
|
-
data: np.ndarray |
|
|
529
|
+
data: np.ndarray | backend.Tensor,
|
|
539
530
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
540
531
|
axis: tuple[int, ...] = (0, 1),
|
|
541
532
|
include_median=False,
|
|
542
533
|
) -> np.ndarray:
|
|
543
|
-
"""Calculates
|
|
534
|
+
"""Calculates mean and credible intervals for the given data.
|
|
544
535
|
|
|
545
536
|
Args:
|
|
546
537
|
data: Data for the metric.
|
|
@@ -551,8 +542,8 @@ def get_central_tendency_and_ci(
|
|
|
551
542
|
the median in the output Dataset (default: False).
|
|
552
543
|
|
|
553
544
|
Returns:
|
|
554
|
-
A numpy array or
|
|
555
|
-
|
|
545
|
+
A numpy array or backend.Tensor containing the mean and credible intervals
|
|
546
|
+
for the given data. Optionally, it also includes the median.
|
|
556
547
|
"""
|
|
557
548
|
mean = np.mean(data, axis=axis, keepdims=False)
|
|
558
549
|
ci_lo = np.quantile(data, (1 - confidence_level) / 2, axis=axis)
|
|
@@ -565,6 +556,31 @@ def get_central_tendency_and_ci(
|
|
|
565
556
|
return np.stack([mean, ci_lo, ci_hi], axis=-1)
|
|
566
557
|
|
|
567
558
|
|
|
559
|
+
def _transformed_new_or_scaled(
|
|
560
|
+
new_variable: backend.Tensor | None,
|
|
561
|
+
transformer: transformers.TensorTransformer | None,
|
|
562
|
+
scaled_variable: backend.Tensor | None,
|
|
563
|
+
) -> backend.Tensor | None:
|
|
564
|
+
"""Returns the transformed new variable or the scaled variable.
|
|
565
|
+
|
|
566
|
+
If the `new_variable` is present, returns
|
|
567
|
+
`transformer.forward(new_variable)`. Otherwise, returns the
|
|
568
|
+
`scaled_variable`.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
new_variable: Optional tensor to be transformed..
|
|
572
|
+
transformer: Optional DataTransformer.
|
|
573
|
+
scaled_variable: Tensor to be returned if `new_variable` is None.
|
|
574
|
+
|
|
575
|
+
Returns:
|
|
576
|
+
The transformed new variable (if the new variable is present) or the
|
|
577
|
+
original scaled variable from the input data otherwise.
|
|
578
|
+
"""
|
|
579
|
+
if new_variable is None or transformer is None:
|
|
580
|
+
return scaled_variable
|
|
581
|
+
return transformer.forward(new_variable)
|
|
582
|
+
|
|
583
|
+
|
|
568
584
|
def _calc_rsquared(expected, actual):
|
|
569
585
|
"""Calculates r-squared between actual and expected outcome."""
|
|
570
586
|
return 1 - np.nanmean((expected - actual) ** 2) / np.nanvar(actual)
|
|
@@ -594,7 +610,7 @@ def _warn_if_geo_arg_in_kwargs(**kwargs):
|
|
|
594
610
|
)
|
|
595
611
|
|
|
596
612
|
|
|
597
|
-
def _check_n_dims(tensor:
|
|
613
|
+
def _check_n_dims(tensor: backend.Tensor, name: str, n_dims: int):
|
|
598
614
|
"""Raises an error if the tensor has the wrong number of dimensions."""
|
|
599
615
|
if tensor.ndim != n_dims:
|
|
600
616
|
raise ValueError(
|
|
@@ -659,43 +675,66 @@ def _validate_flexible_selected_times(
|
|
|
659
675
|
selected_times: Sequence[str] | Sequence[bool] | None,
|
|
660
676
|
media_selected_times: Sequence[str] | Sequence[bool] | None,
|
|
661
677
|
new_n_media_times: int,
|
|
678
|
+
new_time: Sequence[str] | None = None,
|
|
662
679
|
):
|
|
663
680
|
"""Raises an error if selected times or media selected times is invalid.
|
|
664
681
|
|
|
665
|
-
This checks that the `selected_times` and `media_selected_times` arguments
|
|
666
|
-
are lists of booleans with the same number of elements as `new_n_media_times
|
|
667
|
-
|
|
668
|
-
`
|
|
682
|
+
This checks that (1) the `selected_times` and `media_selected_times` arguments
|
|
683
|
+
are lists of booleans with the same number of elements as `new_n_media_times`,
|
|
684
|
+
or (2) the `selected_times` and `media_selected_times` arguments are lists of
|
|
685
|
+
strings and the `new_time` list is provided and `selected_times` and
|
|
686
|
+
`media_selected_times` are subsets of `new_time`. This is only relevant if the
|
|
687
|
+
time dimension of any of the variables in `new_data` used in the analysis is
|
|
688
|
+
modified.
|
|
669
689
|
|
|
670
690
|
Args:
|
|
671
691
|
selected_times: Optional list of times to validate.
|
|
672
692
|
media_selected_times: Optional list of media times to validate.
|
|
673
693
|
new_n_media_times: The number of time periods in the new data.
|
|
694
|
+
new_time: The optional time dimension of the new data.
|
|
674
695
|
"""
|
|
675
696
|
if selected_times and (
|
|
676
|
-
not
|
|
677
|
-
|
|
697
|
+
not (
|
|
698
|
+
_is_bool_list(selected_times)
|
|
699
|
+
and len(selected_times) == new_n_media_times
|
|
700
|
+
)
|
|
701
|
+
and not (
|
|
702
|
+
_is_str_list(selected_times)
|
|
703
|
+
and new_time is not None
|
|
704
|
+
and set(selected_times) <= set(new_time)
|
|
705
|
+
)
|
|
678
706
|
):
|
|
679
707
|
raise ValueError(
|
|
680
708
|
"If `media`, `reach`, `frequency`, `organic_media`,"
|
|
681
709
|
" `organic_reach`, `organic_frequency`, `non_media_treatments`, or"
|
|
682
710
|
" `revenue_per_kpi` is provided with a different number of time"
|
|
683
|
-
" periods than in `InputData`, then `selected_times` must be a list"
|
|
711
|
+
" periods than in `InputData`, then (1) `selected_times` must be a list"
|
|
684
712
|
" of booleans with length equal to the number of time periods in"
|
|
685
|
-
" the new data
|
|
713
|
+
" the new data, or (2) `selected_times` must be a list of strings and"
|
|
714
|
+
" `new_time` must be provided and `selected_times` must be a subset of"
|
|
715
|
+
" `new_time`."
|
|
686
716
|
)
|
|
687
717
|
|
|
688
718
|
if media_selected_times and (
|
|
689
|
-
not
|
|
690
|
-
|
|
719
|
+
not (
|
|
720
|
+
_is_bool_list(media_selected_times)
|
|
721
|
+
and len(media_selected_times) == new_n_media_times
|
|
722
|
+
)
|
|
723
|
+
and not (
|
|
724
|
+
_is_str_list(media_selected_times)
|
|
725
|
+
and new_time is not None
|
|
726
|
+
and set(media_selected_times) <= set(new_time)
|
|
727
|
+
)
|
|
691
728
|
):
|
|
692
729
|
raise ValueError(
|
|
693
730
|
"If `media`, `reach`, `frequency`, `organic_media`,"
|
|
694
731
|
" `organic_reach`, `organic_frequency`, `non_media_treatments`, or"
|
|
695
732
|
" `revenue_per_kpi` is provided with a different number of time"
|
|
696
|
-
" periods than in `InputData`, then `media_selected_times` must be"
|
|
733
|
+
" periods than in `InputData`, then (1) `media_selected_times` must be"
|
|
697
734
|
" a list of booleans with length equal to the number of time"
|
|
698
|
-
" periods in the new data
|
|
735
|
+
" periods in the new data, or (2) `media_selected_times` must be a list"
|
|
736
|
+
" of strings and `new_time` must be provided and"
|
|
737
|
+
" `media_selected_times` must be a subset of `new_time`."
|
|
699
738
|
)
|
|
700
739
|
|
|
701
740
|
|
|
@@ -753,8 +792,8 @@ def _scale_tensors_by_multiplier(
|
|
|
753
792
|
|
|
754
793
|
|
|
755
794
|
def _central_tendency_and_ci_by_prior_and_posterior(
|
|
756
|
-
prior:
|
|
757
|
-
posterior:
|
|
795
|
+
prior: backend.Tensor,
|
|
796
|
+
posterior: backend.Tensor,
|
|
758
797
|
metric_name: str,
|
|
759
798
|
xr_dims: Sequence[str],
|
|
760
799
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
@@ -799,16 +838,16 @@ class Analyzer:
|
|
|
799
838
|
def __init__(self, meridian: model.Meridian):
|
|
800
839
|
self._meridian = meridian
|
|
801
840
|
# Make the meridian object ready for methods in this analyzer that create
|
|
802
|
-
#
|
|
803
|
-
# states mutation before those graphs execute.
|
|
841
|
+
# backend.function computation graphs: it should be frozen for no more
|
|
842
|
+
# internal states mutation before those graphs execute.
|
|
804
843
|
self._meridian.populate_cached_properties()
|
|
805
844
|
|
|
806
|
-
@
|
|
845
|
+
@backend.function(jit_compile=True)
|
|
807
846
|
def _get_kpi_means(
|
|
808
847
|
self,
|
|
809
848
|
data_tensors: DataTensors,
|
|
810
849
|
dist_tensors: DistributionTensors,
|
|
811
|
-
) ->
|
|
850
|
+
) -> backend.Tensor:
|
|
812
851
|
"""Computes batched KPI means.
|
|
813
852
|
|
|
814
853
|
Note that the output array has the same number of time periods as the media
|
|
@@ -827,7 +866,7 @@ class Analyzer:
|
|
|
827
866
|
Returns:
|
|
828
867
|
Tensor representing computed kpi means.
|
|
829
868
|
"""
|
|
830
|
-
tau_gt =
|
|
869
|
+
tau_gt = backend.expand_dims(dist_tensors.tau_g, -1) + backend.expand_dims(
|
|
831
870
|
dist_tensors.mu_t, -2
|
|
832
871
|
)
|
|
833
872
|
combined_media_transformed, combined_beta = (
|
|
@@ -837,17 +876,17 @@ class Analyzer:
|
|
|
837
876
|
)
|
|
838
877
|
)
|
|
839
878
|
|
|
840
|
-
result = tau_gt +
|
|
879
|
+
result = tau_gt + backend.einsum(
|
|
841
880
|
"...gtm,...gm->...gt", combined_media_transformed, combined_beta
|
|
842
881
|
)
|
|
843
882
|
if self._meridian.controls is not None:
|
|
844
|
-
result +=
|
|
883
|
+
result += backend.einsum(
|
|
845
884
|
"...gtc,...gc->...gt",
|
|
846
885
|
data_tensors.controls,
|
|
847
886
|
dist_tensors.gamma_gc,
|
|
848
887
|
)
|
|
849
888
|
if data_tensors.non_media_treatments is not None:
|
|
850
|
-
result +=
|
|
889
|
+
result += backend.einsum(
|
|
851
890
|
"...gtm,...gm->...gt",
|
|
852
891
|
data_tensors.non_media_treatments,
|
|
853
892
|
dist_tensors.gamma_gn,
|
|
@@ -902,8 +941,8 @@ class Analyzer:
|
|
|
902
941
|
"""Computes decayed effect means and CIs for media or RF channels.
|
|
903
942
|
|
|
904
943
|
Args:
|
|
905
|
-
channel_type: Specifies `media`, `
|
|
906
|
-
prior and posterior decayed effects.
|
|
944
|
+
channel_type: Specifies `media`, `rf`, `organic_media`, or `organic_rf`
|
|
945
|
+
for computing prior and posterior decayed effects.
|
|
907
946
|
l_range: The range of time across which the adstock effect is computed.
|
|
908
947
|
xr_dims: A list of dimensions for the output dataset.
|
|
909
948
|
xr_coords: A dictionary with the coordinates for the output dataset.
|
|
@@ -914,42 +953,66 @@ class Analyzer:
|
|
|
914
953
|
Pandas DataFrame containing the channel, time_units, distribution, ci_hi,
|
|
915
954
|
ci_lo, and mean decayed effects for either media or RF channel types.
|
|
916
955
|
"""
|
|
917
|
-
|
|
956
|
+
window_size = min(
|
|
957
|
+
self._meridian.model_spec.max_lag + 1, self._meridian.n_media_times
|
|
958
|
+
)
|
|
959
|
+
if channel_type == constants.MEDIA:
|
|
918
960
|
prior = self._meridian.inference_data.prior.alpha_m.values[0]
|
|
919
961
|
posterior = np.reshape(
|
|
920
962
|
self._meridian.inference_data.posterior.alpha_m.values,
|
|
921
963
|
(-1, self._meridian.n_media_channels),
|
|
922
964
|
)
|
|
923
|
-
|
|
965
|
+
decay_functions = self._meridian.adstock_decay_spec.media
|
|
966
|
+
elif channel_type == constants.RF:
|
|
924
967
|
prior = self._meridian.inference_data.prior.alpha_rf.values[0]
|
|
925
968
|
posterior = np.reshape(
|
|
926
969
|
self._meridian.inference_data.posterior.alpha_rf.values,
|
|
927
970
|
(-1, self._meridian.n_rf_channels),
|
|
928
971
|
)
|
|
929
|
-
|
|
972
|
+
decay_functions = self._meridian.adstock_decay_spec.rf
|
|
973
|
+
elif channel_type == constants.ORGANIC_MEDIA:
|
|
930
974
|
prior = self._meridian.inference_data.prior.alpha_om.values[0]
|
|
931
975
|
posterior = np.reshape(
|
|
932
976
|
self._meridian.inference_data.posterior.alpha_om.values,
|
|
933
977
|
(-1, self._meridian.n_organic_media_channels),
|
|
934
978
|
)
|
|
979
|
+
decay_functions = self._meridian.adstock_decay_spec.organic_media
|
|
980
|
+
elif channel_type == constants.ORGANIC_RF:
|
|
981
|
+
prior = self._meridian.inference_data.prior.alpha_orf.values[0]
|
|
982
|
+
posterior = np.reshape(
|
|
983
|
+
self._meridian.inference_data.posterior.alpha_orf.values,
|
|
984
|
+
(-1, self._meridian.n_organic_rf_channels),
|
|
985
|
+
)
|
|
986
|
+
decay_functions = self._meridian.adstock_decay_spec.organic_rf
|
|
935
987
|
else:
|
|
936
988
|
raise ValueError(
|
|
937
989
|
f"Unsupported channel type for adstock decay: '{channel_type}'. "
|
|
938
990
|
)
|
|
939
991
|
|
|
940
|
-
decayed_effect_prior = (
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
992
|
+
decayed_effect_prior = adstock_hill.compute_decay_weights(
|
|
993
|
+
alpha=backend.to_tensor(
|
|
994
|
+
prior[backend.newaxis, ...], dtype=backend.float32
|
|
995
|
+
),
|
|
996
|
+
l_range=backend.to_tensor(l_range, dtype=backend.float32),
|
|
997
|
+
window_size=window_size,
|
|
998
|
+
decay_functions=decay_functions,
|
|
999
|
+
normalize=False,
|
|
1000
|
+
)
|
|
1001
|
+
decayed_effect_posterior = adstock_hill.compute_decay_weights(
|
|
1002
|
+
alpha=backend.to_tensor(
|
|
1003
|
+
posterior[backend.newaxis, ...], dtype=backend.float32
|
|
1004
|
+
),
|
|
1005
|
+
l_range=backend.to_tensor(l_range, dtype=backend.float32),
|
|
1006
|
+
window_size=window_size,
|
|
1007
|
+
decay_functions=decay_functions,
|
|
1008
|
+
normalize=False,
|
|
946
1009
|
)
|
|
947
1010
|
|
|
948
|
-
decayed_effect_prior_transpose =
|
|
949
|
-
decayed_effect_prior, perm=[
|
|
1011
|
+
decayed_effect_prior_transpose = backend.transpose(
|
|
1012
|
+
decayed_effect_prior, perm=[0, 1, 3, 2]
|
|
950
1013
|
)
|
|
951
|
-
decayed_effect_posterior_transpose =
|
|
952
|
-
decayed_effect_posterior, perm=[
|
|
1014
|
+
decayed_effect_posterior_transpose = backend.transpose(
|
|
1015
|
+
decayed_effect_posterior, perm=[0, 1, 3, 2]
|
|
953
1016
|
)
|
|
954
1017
|
adstock_dataset = _central_tendency_and_ci_by_prior_and_posterior(
|
|
955
1018
|
decayed_effect_prior_transpose,
|
|
@@ -1157,7 +1220,7 @@ class Analyzer:
|
|
|
1157
1220
|
data_tensors: DataTensors,
|
|
1158
1221
|
dist_tensors: DistributionTensors,
|
|
1159
1222
|
n_times_output: int | None = None,
|
|
1160
|
-
) -> tuple[
|
|
1223
|
+
) -> tuple[backend.Tensor | None, backend.Tensor | None]:
|
|
1161
1224
|
"""Function for transforming media using adstock and hill functions.
|
|
1162
1225
|
|
|
1163
1226
|
This transforms the media tensor using the adstock and hill functions, in
|
|
@@ -1185,6 +1248,7 @@ class Analyzer:
|
|
|
1185
1248
|
alpha=dist_tensors.alpha_m,
|
|
1186
1249
|
ec=dist_tensors.ec_m,
|
|
1187
1250
|
slope=dist_tensors.slope_m,
|
|
1251
|
+
decay_functions=self._meridian.adstock_decay_spec.media,
|
|
1188
1252
|
n_times_output=n_times_output,
|
|
1189
1253
|
)
|
|
1190
1254
|
)
|
|
@@ -1198,6 +1262,7 @@ class Analyzer:
|
|
|
1198
1262
|
alpha=dist_tensors.alpha_rf,
|
|
1199
1263
|
ec=dist_tensors.ec_rf,
|
|
1200
1264
|
slope=dist_tensors.slope_rf,
|
|
1265
|
+
decay_functions=self._meridian.adstock_decay_spec.rf,
|
|
1201
1266
|
n_times_output=n_times_output,
|
|
1202
1267
|
)
|
|
1203
1268
|
)
|
|
@@ -1209,6 +1274,7 @@ class Analyzer:
|
|
|
1209
1274
|
alpha=dist_tensors.alpha_om,
|
|
1210
1275
|
ec=dist_tensors.ec_om,
|
|
1211
1276
|
slope=dist_tensors.slope_om,
|
|
1277
|
+
decay_functions=self._meridian.adstock_decay_spec.organic_media,
|
|
1212
1278
|
n_times_output=n_times_output,
|
|
1213
1279
|
)
|
|
1214
1280
|
)
|
|
@@ -1221,25 +1287,26 @@ class Analyzer:
|
|
|
1221
1287
|
alpha=dist_tensors.alpha_orf,
|
|
1222
1288
|
ec=dist_tensors.ec_orf,
|
|
1223
1289
|
slope=dist_tensors.slope_orf,
|
|
1290
|
+
decay_functions=self._meridian.adstock_decay_spec.organic_rf,
|
|
1224
1291
|
n_times_output=n_times_output,
|
|
1225
1292
|
)
|
|
1226
1293
|
)
|
|
1227
1294
|
combined_betas.append(dist_tensors.beta_gorf)
|
|
1228
1295
|
|
|
1229
|
-
combined_media_transformed =
|
|
1230
|
-
combined_beta =
|
|
1296
|
+
combined_media_transformed = backend.concatenate(combined_medias, axis=-1)
|
|
1297
|
+
combined_beta = backend.concatenate(combined_betas, axis=-1)
|
|
1231
1298
|
return combined_media_transformed, combined_beta
|
|
1232
1299
|
|
|
1233
1300
|
def filter_and_aggregate_geos_and_times(
|
|
1234
1301
|
self,
|
|
1235
|
-
tensor:
|
|
1302
|
+
tensor: backend.Tensor,
|
|
1236
1303
|
selected_geos: Sequence[str] | None = None,
|
|
1237
1304
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1238
1305
|
aggregate_geos: bool = True,
|
|
1239
1306
|
aggregate_times: bool = True,
|
|
1240
1307
|
flexible_time_dim: bool = False,
|
|
1241
1308
|
has_media_dim: bool = True,
|
|
1242
|
-
) ->
|
|
1309
|
+
) -> backend.Tensor:
|
|
1243
1310
|
"""Filters and/or aggregates geo and time dimensions of a tensor.
|
|
1244
1311
|
|
|
1245
1312
|
Args:
|
|
@@ -1299,12 +1366,12 @@ class Analyzer:
|
|
|
1299
1366
|
c + 1 for c in allowed_n_channels
|
|
1300
1367
|
]
|
|
1301
1368
|
expected_shapes_w_media = [
|
|
1302
|
-
|
|
1369
|
+
backend.TensorShape(shape)
|
|
1303
1370
|
for shape in itertools.product(
|
|
1304
1371
|
[mmm.n_geos], [n_times], allowed_channel_dim
|
|
1305
1372
|
)
|
|
1306
1373
|
]
|
|
1307
|
-
expected_shape_wo_media =
|
|
1374
|
+
expected_shape_wo_media = backend.TensorShape([mmm.n_geos, n_times])
|
|
1308
1375
|
if not flexible_time_dim:
|
|
1309
1376
|
if tensor.shape[-3:] in expected_shapes_w_media:
|
|
1310
1377
|
has_media_dim = True
|
|
@@ -1338,7 +1405,7 @@ class Analyzer:
|
|
|
1338
1405
|
"meridian.InputData."
|
|
1339
1406
|
)
|
|
1340
1407
|
geo_mask = [x in selected_geos for x in mmm.input_data.geo]
|
|
1341
|
-
tensor =
|
|
1408
|
+
tensor = backend.boolean_mask(tensor, geo_mask, axis=geo_dim)
|
|
1342
1409
|
|
|
1343
1410
|
if selected_times is not None:
|
|
1344
1411
|
_validate_selected_times(
|
|
@@ -1350,9 +1417,9 @@ class Analyzer:
|
|
|
1350
1417
|
)
|
|
1351
1418
|
if _is_str_list(selected_times):
|
|
1352
1419
|
time_mask = [x in selected_times for x in mmm.input_data.time]
|
|
1353
|
-
tensor =
|
|
1420
|
+
tensor = backend.boolean_mask(tensor, time_mask, axis=time_dim)
|
|
1354
1421
|
elif _is_bool_list(selected_times):
|
|
1355
|
-
tensor =
|
|
1422
|
+
tensor = backend.boolean_mask(tensor, selected_times, axis=time_dim)
|
|
1356
1423
|
|
|
1357
1424
|
tensor_dims = "...gt" + "m" * has_media_dim
|
|
1358
1425
|
output_dims = (
|
|
@@ -1360,7 +1427,7 @@ class Analyzer:
|
|
|
1360
1427
|
+ "t" * (not aggregate_times)
|
|
1361
1428
|
+ "m" * has_media_dim
|
|
1362
1429
|
)
|
|
1363
|
-
return
|
|
1430
|
+
return backend.einsum(f"{tensor_dims}->...{output_dims}", tensor)
|
|
1364
1431
|
|
|
1365
1432
|
def expected_outcome(
|
|
1366
1433
|
self,
|
|
@@ -1373,7 +1440,7 @@ class Analyzer:
|
|
|
1373
1440
|
inverse_transform_outcome: bool = True,
|
|
1374
1441
|
use_kpi: bool = False,
|
|
1375
1442
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
1376
|
-
) ->
|
|
1443
|
+
) -> backend.Tensor:
|
|
1377
1444
|
"""Calculates either prior or posterior expected outcome.
|
|
1378
1445
|
|
|
1379
1446
|
This calculates `E(Outcome|Media, RF, Organic media, Organic RF, Non-media
|
|
@@ -1492,7 +1559,7 @@ class Analyzer:
|
|
|
1492
1559
|
|
|
1493
1560
|
n_draws = params.draw.size
|
|
1494
1561
|
n_chains = params.chain.size
|
|
1495
|
-
outcome_means =
|
|
1562
|
+
outcome_means = backend.zeros(
|
|
1496
1563
|
(n_chains, 0, self._meridian.n_geos, self._meridian.n_times)
|
|
1497
1564
|
)
|
|
1498
1565
|
batch_starting_indices = np.arange(n_draws, step=batch_size)
|
|
@@ -1508,7 +1575,7 @@ class Analyzer:
|
|
|
1508
1575
|
for start_index in batch_starting_indices:
|
|
1509
1576
|
stop_index = np.min([n_draws, start_index + batch_size])
|
|
1510
1577
|
batch_dists = {
|
|
1511
|
-
k:
|
|
1578
|
+
k: backend.to_tensor(params[k][:, start_index:stop_index, ...])
|
|
1512
1579
|
for k in param_list
|
|
1513
1580
|
}
|
|
1514
1581
|
dist_tensors = DistributionTensors(**batch_dists)
|
|
@@ -1519,7 +1586,9 @@ class Analyzer:
|
|
|
1519
1586
|
dist_tensors=dist_tensors,
|
|
1520
1587
|
)
|
|
1521
1588
|
)
|
|
1522
|
-
outcome_means =
|
|
1589
|
+
outcome_means = backend.concatenate(
|
|
1590
|
+
[outcome_means, *outcome_means_temps], axis=1
|
|
1591
|
+
)
|
|
1523
1592
|
if inverse_transform_outcome:
|
|
1524
1593
|
outcome_means = self._meridian.kpi_transformer.inverse(outcome_means)
|
|
1525
1594
|
if not use_kpi:
|
|
@@ -1562,7 +1631,7 @@ class Analyzer:
|
|
|
1562
1631
|
data_tensors: DataTensors,
|
|
1563
1632
|
dist_tensors: DistributionTensors,
|
|
1564
1633
|
non_media_treatments_baseline_normalized: Sequence[float] | None = None,
|
|
1565
|
-
) ->
|
|
1634
|
+
) -> backend.Tensor:
|
|
1566
1635
|
"""Computes incremental KPI distribution.
|
|
1567
1636
|
|
|
1568
1637
|
Args:
|
|
@@ -1611,28 +1680,28 @@ class Analyzer:
|
|
|
1611
1680
|
n_times_output=n_times_output,
|
|
1612
1681
|
)
|
|
1613
1682
|
)
|
|
1614
|
-
combined_media_kpi =
|
|
1683
|
+
combined_media_kpi = backend.einsum(
|
|
1615
1684
|
"...gtm,...gm->...gtm",
|
|
1616
1685
|
combined_media_transformed,
|
|
1617
1686
|
combined_beta,
|
|
1618
1687
|
)
|
|
1619
1688
|
if data_tensors.non_media_treatments is not None:
|
|
1620
|
-
non_media_kpi =
|
|
1689
|
+
non_media_kpi = backend.einsum(
|
|
1621
1690
|
"gtn,...gn->...gtn",
|
|
1622
1691
|
data_tensors.non_media_treatments
|
|
1623
1692
|
- non_media_treatments_baseline_normalized,
|
|
1624
1693
|
dist_tensors.gamma_gn,
|
|
1625
1694
|
)
|
|
1626
|
-
return
|
|
1695
|
+
return backend.concatenate([combined_media_kpi, non_media_kpi], axis=-1)
|
|
1627
1696
|
else:
|
|
1628
1697
|
return combined_media_kpi
|
|
1629
1698
|
|
|
1630
1699
|
def _inverse_outcome(
|
|
1631
1700
|
self,
|
|
1632
|
-
modeled_incremental_outcome:
|
|
1701
|
+
modeled_incremental_outcome: backend.Tensor,
|
|
1633
1702
|
use_kpi: bool,
|
|
1634
|
-
revenue_per_kpi:
|
|
1635
|
-
) ->
|
|
1703
|
+
revenue_per_kpi: backend.Tensor | None,
|
|
1704
|
+
) -> backend.Tensor:
|
|
1636
1705
|
"""Inverses incremental outcome (revenue or KPI).
|
|
1637
1706
|
|
|
1638
1707
|
This method assumes that additive changes on the model kpi scale
|
|
@@ -1656,16 +1725,16 @@ class Analyzer:
|
|
|
1656
1725
|
if revenue_per_kpi is None:
|
|
1657
1726
|
revenue_per_kpi = self._meridian.revenue_per_kpi
|
|
1658
1727
|
t1 = self._meridian.kpi_transformer.inverse(
|
|
1659
|
-
|
|
1728
|
+
backend.einsum("...m->m...", modeled_incremental_outcome)
|
|
1660
1729
|
)
|
|
1661
|
-
t2 = self._meridian.kpi_transformer.inverse(
|
|
1662
|
-
kpi =
|
|
1730
|
+
t2 = self._meridian.kpi_transformer.inverse(backend.zeros_like(t1))
|
|
1731
|
+
kpi = backend.einsum("m...->...m", t1 - t2)
|
|
1663
1732
|
|
|
1664
1733
|
if use_kpi:
|
|
1665
1734
|
return kpi
|
|
1666
|
-
return
|
|
1735
|
+
return backend.einsum("gt,...gtm->...gtm", revenue_per_kpi, kpi)
|
|
1667
1736
|
|
|
1668
|
-
@
|
|
1737
|
+
@backend.function(jit_compile=True)
|
|
1669
1738
|
def _incremental_outcome_impl(
|
|
1670
1739
|
self,
|
|
1671
1740
|
data_tensors: DataTensors,
|
|
@@ -1677,7 +1746,7 @@ class Analyzer:
|
|
|
1677
1746
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1678
1747
|
aggregate_geos: bool = True,
|
|
1679
1748
|
aggregate_times: bool = True,
|
|
1680
|
-
) ->
|
|
1749
|
+
) -> backend.Tensor:
|
|
1681
1750
|
"""Computes incremental outcome (revenue or KPI) on a batch of data.
|
|
1682
1751
|
|
|
1683
1752
|
Args:
|
|
@@ -1722,9 +1791,11 @@ class Analyzer:
|
|
|
1722
1791
|
selected_geos: Contains a subset of geos to include. By default, all geos
|
|
1723
1792
|
are included.
|
|
1724
1793
|
selected_times: An optional string list containing a subset of
|
|
1725
|
-
`
|
|
1726
|
-
number of time periods in `
|
|
1727
|
-
|
|
1794
|
+
`input_data.time` to include or a boolean list with length equal to the
|
|
1795
|
+
number of time periods in `data_tensors` if time is modified in
|
|
1796
|
+
`data_tensors`, or `input_data.n_times` otherwise. If time in
|
|
1797
|
+
`data_tensors` is modified, then only the boolean list can be used as
|
|
1798
|
+
`selected_times`. By default, all time periods are included.
|
|
1728
1799
|
aggregate_geos: If True, then incremental outcome is summed over all
|
|
1729
1800
|
regions.
|
|
1730
1801
|
aggregate_times: If True, then incremental outcome is summed over all time
|
|
@@ -1767,6 +1838,7 @@ class Analyzer:
|
|
|
1767
1838
|
has_media_dim=True,
|
|
1768
1839
|
)
|
|
1769
1840
|
|
|
1841
|
+
# TODO: b/407847021 - Add support for `new_data.time`.
|
|
1770
1842
|
def incremental_outcome(
|
|
1771
1843
|
self,
|
|
1772
1844
|
use_posterior: bool = True,
|
|
@@ -1784,7 +1856,7 @@ class Analyzer:
|
|
|
1784
1856
|
by_reach: bool = True,
|
|
1785
1857
|
include_non_paid_channels: bool = True,
|
|
1786
1858
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
1787
|
-
) ->
|
|
1859
|
+
) -> backend.Tensor:
|
|
1788
1860
|
"""Calculates either the posterior or prior incremental outcome.
|
|
1789
1861
|
|
|
1790
1862
|
This calculates the media outcome of each media channel for each posterior
|
|
@@ -1869,26 +1941,27 @@ class Analyzer:
|
|
|
1869
1941
|
default, all geos are included.
|
|
1870
1942
|
selected_times: Optional list containing either a subset of dates to
|
|
1871
1943
|
include or booleans with length equal to the number of time periods in
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
`
|
|
1876
|
-
|
|
1877
|
-
|
|
1944
|
+
`new_data` if time is modified in `new_data`, or `input_data.n_times`
|
|
1945
|
+
otherwise. The incremental outcome corresponds to incremental KPI
|
|
1946
|
+
generated during the `selected_times` arg by media executed during the
|
|
1947
|
+
`media_selected_times` arg. Note that if `use_kpi=False`, then
|
|
1948
|
+
`selected_times` can only include the time periods that have
|
|
1949
|
+
`revenue_per_kpi` input data. By default, all time periods are included
|
|
1950
|
+
where `revenue_per_kpi` data is available.
|
|
1878
1951
|
media_selected_times: Optional list containing either a subset of dates to
|
|
1879
1952
|
include or booleans with length equal to the number of time periods in
|
|
1880
|
-
|
|
1881
|
-
`media_selected_times` can select any subset
|
|
1882
|
-
`new_data`. If `new_data` is not provided,
|
|
1883
|
-
selects from `InputData.time`. The incremental
|
|
1884
|
-
incremental KPI generated during the
|
|
1885
|
-
variables executed during the
|
|
1886
|
-
channel, the incremental outcome is
|
|
1887
|
-
expected KPI when treatment variables
|
|
1888
|
-
`scaling_factor1` and `scaling_factor0` during
|
|
1889
|
-
periods. By default, the difference is between
|
|
1890
|
-
historical execution levels, or as provided in
|
|
1891
|
-
execution. Defaults to include all time periods.
|
|
1953
|
+
KPI data or number of time periods in the `new_data` args, if provided.
|
|
1954
|
+
If `new_data` is provided, `media_selected_times` can select any subset
|
|
1955
|
+
of time periods in `new_data`. If `new_data` is not provided,
|
|
1956
|
+
`media_selected_times` selects from `InputData.time`. The incremental
|
|
1957
|
+
outcome corresponds to incremental KPI generated during the
|
|
1958
|
+
`selected_times` arg by treatment variables executed during the
|
|
1959
|
+
`media_selected_times` arg. For each channel, the incremental outcome is
|
|
1960
|
+
defined as the difference between expected KPI when treatment variables
|
|
1961
|
+
execution is scaled by `scaling_factor1` and `scaling_factor0` during
|
|
1962
|
+
these specified time periods. By default, the difference is between
|
|
1963
|
+
treatment variables at historical execution levels, or as provided in
|
|
1964
|
+
`new_data`, versus zero execution. Defaults to include all time periods.
|
|
1892
1965
|
aggregate_geos: Boolean. If `True`, then incremental outcome is summed
|
|
1893
1966
|
over all regions.
|
|
1894
1967
|
aggregate_times: Boolean. If `True`, then incremental outcome is summed
|
|
@@ -2018,11 +2091,11 @@ class Analyzer:
|
|
|
2018
2091
|
non_media_treatments_baseline_scaled,
|
|
2019
2092
|
apply_population_scaling=False,
|
|
2020
2093
|
)
|
|
2021
|
-
non_media_treatments0 =
|
|
2022
|
-
|
|
2023
|
-
non_media_treatments_baseline_normalized, dtype=
|
|
2024
|
-
)[
|
|
2025
|
-
|
|
2094
|
+
non_media_treatments0 = backend.broadcast_to(
|
|
2095
|
+
backend.to_tensor(
|
|
2096
|
+
non_media_treatments_baseline_normalized, dtype=backend.float32
|
|
2097
|
+
)[backend.newaxis, backend.newaxis, :],
|
|
2098
|
+
data_tensors.non_media_treatments.shape, # pytype: disable=attribute-error
|
|
2026
2099
|
)
|
|
2027
2100
|
else:
|
|
2028
2101
|
non_media_treatments_baseline_normalized = None
|
|
@@ -2088,7 +2161,7 @@ class Analyzer:
|
|
|
2088
2161
|
for i, start_index in enumerate(batch_starting_indices):
|
|
2089
2162
|
stop_index = np.min([n_draws, start_index + batch_size])
|
|
2090
2163
|
batch_dists = {
|
|
2091
|
-
k:
|
|
2164
|
+
k: backend.to_tensor(params[k][:, start_index:stop_index, ...])
|
|
2092
2165
|
for k in param_list
|
|
2093
2166
|
}
|
|
2094
2167
|
dist_tensors = DistributionTensors(**batch_dists)
|
|
@@ -2106,12 +2179,12 @@ class Analyzer:
|
|
|
2106
2179
|
**dim_kwargs,
|
|
2107
2180
|
**incremental_outcome_kwargs,
|
|
2108
2181
|
)
|
|
2109
|
-
return
|
|
2182
|
+
return backend.concatenate(incremental_outcome_temps, axis=1)
|
|
2110
2183
|
|
|
2111
2184
|
def _validate_geo_and_time_granularity(
|
|
2112
2185
|
self,
|
|
2113
2186
|
selected_geos: Sequence[str] | None = None,
|
|
2114
|
-
selected_times: Sequence[str] | None = None,
|
|
2187
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2115
2188
|
aggregate_geos: bool = True,
|
|
2116
2189
|
):
|
|
2117
2190
|
"""Validates the geo and time granularity arguments for ROI analysis.
|
|
@@ -2119,8 +2192,9 @@ class Analyzer:
|
|
|
2119
2192
|
Args:
|
|
2120
2193
|
selected_geos: Optional. Contains a subset of geos to include. By default,
|
|
2121
2194
|
all geos are included.
|
|
2122
|
-
selected_times: Optional. Contains a subset of times to include
|
|
2123
|
-
default, all time periods
|
|
2195
|
+
selected_times: Optional. Contains a subset of times to include or
|
|
2196
|
+
booleans with length `input_data.n_times`. By default, all time periods
|
|
2197
|
+
are included.
|
|
2124
2198
|
aggregate_geos: If `True`, then expected revenue is summed over all
|
|
2125
2199
|
regions.
|
|
2126
2200
|
|
|
@@ -2180,7 +2254,7 @@ class Analyzer:
|
|
|
2180
2254
|
by_reach: bool = True,
|
|
2181
2255
|
use_kpi: bool = False,
|
|
2182
2256
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2183
|
-
) ->
|
|
2257
|
+
) -> backend.Tensor:
|
|
2184
2258
|
"""Calculates the marginal ROI prior or posterior distribution.
|
|
2185
2259
|
|
|
2186
2260
|
The marginal ROI (mROI) numerator is the change in expected outcome (`kpi`
|
|
@@ -2290,7 +2364,7 @@ class Analyzer:
|
|
|
2290
2364
|
"dimension."
|
|
2291
2365
|
)
|
|
2292
2366
|
denominator = spend_inc
|
|
2293
|
-
return
|
|
2367
|
+
return backend.divide_no_nan(numerator, denominator)
|
|
2294
2368
|
|
|
2295
2369
|
def roi(
|
|
2296
2370
|
self,
|
|
@@ -2301,7 +2375,7 @@ class Analyzer:
|
|
|
2301
2375
|
aggregate_geos: bool = True,
|
|
2302
2376
|
use_kpi: bool = False,
|
|
2303
2377
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2304
|
-
) ->
|
|
2378
|
+
) -> backend.Tensor:
|
|
2305
2379
|
"""Calculates ROI prior or posterior distribution for each media channel.
|
|
2306
2380
|
|
|
2307
2381
|
The ROI numerator is the change in expected outcome (`kpi` or `kpi *
|
|
@@ -2406,7 +2480,7 @@ class Analyzer:
|
|
|
2406
2480
|
"dimension."
|
|
2407
2481
|
)
|
|
2408
2482
|
denominator = spend
|
|
2409
|
-
return
|
|
2483
|
+
return backend.divide_no_nan(incremental_outcome, denominator)
|
|
2410
2484
|
|
|
2411
2485
|
def cpik(
|
|
2412
2486
|
self,
|
|
@@ -2416,7 +2490,7 @@ class Analyzer:
|
|
|
2416
2490
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2417
2491
|
aggregate_geos: bool = True,
|
|
2418
2492
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2419
|
-
) ->
|
|
2493
|
+
) -> backend.Tensor:
|
|
2420
2494
|
"""Calculates the cost per incremental KPI distribution for each channel.
|
|
2421
2495
|
|
|
2422
2496
|
The CPIK numerator is the total spend on the channel. The CPIK denominator
|
|
@@ -2481,11 +2555,11 @@ class Analyzer:
|
|
|
2481
2555
|
aggregate_geos=aggregate_geos,
|
|
2482
2556
|
batch_size=batch_size,
|
|
2483
2557
|
)
|
|
2484
|
-
return
|
|
2558
|
+
return backend.divide_no_nan(1.0, roi)
|
|
2485
2559
|
|
|
2486
2560
|
def _mean_and_ci_by_eval_set(
|
|
2487
2561
|
self,
|
|
2488
|
-
draws:
|
|
2562
|
+
draws: backend.Tensor,
|
|
2489
2563
|
split_by_holdout: bool,
|
|
2490
2564
|
aggregate_geos: bool = True,
|
|
2491
2565
|
aggregate_times: bool = True,
|
|
@@ -2656,7 +2730,7 @@ class Analyzer:
|
|
|
2656
2730
|
self,
|
|
2657
2731
|
non_media_baseline_values: Sequence[float] | None = None,
|
|
2658
2732
|
**expected_outcome_kwargs,
|
|
2659
|
-
) ->
|
|
2733
|
+
) -> backend.Tensor:
|
|
2660
2734
|
"""Calculates either the posterior or prior expected outcome of baseline.
|
|
2661
2735
|
|
|
2662
2736
|
This is a wrapper for expected_outcome() that automatically sets the
|
|
@@ -2673,8 +2747,8 @@ class Analyzer:
|
|
|
2673
2747
|
|
|
2674
2748
|
Args:
|
|
2675
2749
|
non_media_baseline_values: Optional list of shape
|
|
2676
|
-
`(n_non_media_channels,)`. Each element is a float
|
|
2677
|
-
|
|
2750
|
+
`(n_non_media_channels,)`. Each element is a float denoting a fixed
|
|
2751
|
+
value that will be used as the baseline for the given channel. It is
|
|
2678
2752
|
expected that they are scaled by population for the channels where
|
|
2679
2753
|
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2680
2754
|
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
@@ -2690,43 +2764,47 @@ class Analyzer:
|
|
|
2690
2764
|
dropped if `aggregate_geos=True` or `aggregate_time=True`, respectively.
|
|
2691
2765
|
"""
|
|
2692
2766
|
new_media = (
|
|
2693
|
-
|
|
2767
|
+
backend.zeros_like(self._meridian.media_tensors.media)
|
|
2694
2768
|
if self._meridian.media_tensors.media is not None
|
|
2695
2769
|
else None
|
|
2696
2770
|
)
|
|
2697
2771
|
# Frequency is not needed because the reach is zero.
|
|
2698
2772
|
new_reach = (
|
|
2699
|
-
|
|
2773
|
+
backend.zeros_like(self._meridian.rf_tensors.reach)
|
|
2700
2774
|
if self._meridian.rf_tensors.reach is not None
|
|
2701
2775
|
else None
|
|
2702
2776
|
)
|
|
2703
2777
|
new_organic_media = (
|
|
2704
|
-
|
|
2778
|
+
backend.zeros_like(self._meridian.organic_media_tensors.organic_media)
|
|
2705
2779
|
if self._meridian.organic_media_tensors.organic_media is not None
|
|
2706
2780
|
else None
|
|
2707
2781
|
)
|
|
2708
2782
|
new_organic_reach = (
|
|
2709
|
-
|
|
2783
|
+
backend.zeros_like(self._meridian.organic_rf_tensors.organic_reach)
|
|
2710
2784
|
if self._meridian.organic_rf_tensors.organic_reach is not None
|
|
2711
2785
|
else None
|
|
2712
2786
|
)
|
|
2713
2787
|
if self._meridian.non_media_treatments is not None:
|
|
2714
2788
|
if self._meridian.model_spec.non_media_population_scaling_id is not None:
|
|
2715
|
-
scaling_factors =
|
|
2789
|
+
scaling_factors = backend.where(
|
|
2716
2790
|
self._meridian.model_spec.non_media_population_scaling_id,
|
|
2717
|
-
self._meridian.population[:,
|
|
2718
|
-
|
|
2791
|
+
self._meridian.population[:, backend.newaxis, backend.newaxis],
|
|
2792
|
+
backend.ones_like(self._meridian.population)[
|
|
2793
|
+
:, backend.newaxis, backend.newaxis
|
|
2794
|
+
],
|
|
2719
2795
|
)
|
|
2720
2796
|
else:
|
|
2721
|
-
scaling_factors =
|
|
2722
|
-
:,
|
|
2797
|
+
scaling_factors = backend.ones_like(self._meridian.population)[
|
|
2798
|
+
:, backend.newaxis, backend.newaxis
|
|
2723
2799
|
]
|
|
2724
2800
|
|
|
2725
2801
|
baseline = self._meridian.compute_non_media_treatments_baseline(
|
|
2726
2802
|
non_media_baseline_values=non_media_baseline_values,
|
|
2727
2803
|
)
|
|
2728
|
-
new_non_media_treatments_population_scaled =
|
|
2729
|
-
|
|
2804
|
+
new_non_media_treatments_population_scaled = backend.broadcast_to(
|
|
2805
|
+
backend.to_tensor(baseline, dtype=backend.float32)[
|
|
2806
|
+
backend.newaxis, backend.newaxis, :
|
|
2807
|
+
],
|
|
2730
2808
|
self._meridian.non_media_treatments.shape,
|
|
2731
2809
|
)
|
|
2732
2810
|
new_non_media_treatments = (
|
|
@@ -2754,7 +2832,7 @@ class Analyzer:
|
|
|
2754
2832
|
include_non_paid_channels: bool = True,
|
|
2755
2833
|
non_media_baseline_values: Sequence[float] | None = None,
|
|
2756
2834
|
**kwargs,
|
|
2757
|
-
) ->
|
|
2835
|
+
) -> backend.Tensor:
|
|
2758
2836
|
"""Aggregates the incremental outcome of the media channels.
|
|
2759
2837
|
|
|
2760
2838
|
Args:
|
|
@@ -2806,11 +2884,11 @@ class Analyzer:
|
|
|
2806
2884
|
non_media_baseline_values=non_media_baseline_values,
|
|
2807
2885
|
**kwargs,
|
|
2808
2886
|
)
|
|
2809
|
-
incremental_outcome_total =
|
|
2887
|
+
incremental_outcome_total = backend.reduce_sum(
|
|
2810
2888
|
incremental_outcome_m, axis=-1, keepdims=True
|
|
2811
2889
|
)
|
|
2812
2890
|
|
|
2813
|
-
return
|
|
2891
|
+
return backend.concatenate(
|
|
2814
2892
|
[incremental_outcome_m, incremental_outcome_total],
|
|
2815
2893
|
axis=-1,
|
|
2816
2894
|
)
|
|
@@ -2940,10 +3018,10 @@ class Analyzer:
|
|
|
2940
3018
|
include_non_paid_channels=include_non_paid_channels,
|
|
2941
3019
|
**dim_kwargs,
|
|
2942
3020
|
)
|
|
2943
|
-
impressions_with_total =
|
|
3021
|
+
impressions_with_total = backend.concatenate(
|
|
2944
3022
|
[
|
|
2945
3023
|
aggregated_impressions,
|
|
2946
|
-
|
|
3024
|
+
backend.reduce_sum(aggregated_impressions, -1, keepdims=True),
|
|
2947
3025
|
],
|
|
2948
3026
|
axis=-1,
|
|
2949
3027
|
)
|
|
@@ -3127,12 +3205,15 @@ class Analyzer:
|
|
|
3127
3205
|
spend_list.append(new_spend_tensors.rf_spend)
|
|
3128
3206
|
# TODO Add support for 1-dimensional spend.
|
|
3129
3207
|
aggregated_spend = self.filter_and_aggregate_geos_and_times(
|
|
3130
|
-
tensor=
|
|
3208
|
+
tensor=backend.concatenate(spend_list, axis=-1),
|
|
3131
3209
|
flexible_time_dim=True,
|
|
3132
3210
|
**dim_kwargs,
|
|
3133
3211
|
)
|
|
3134
|
-
spend_with_total =
|
|
3135
|
-
[
|
|
3212
|
+
spend_with_total = backend.concatenate(
|
|
3213
|
+
[
|
|
3214
|
+
aggregated_spend,
|
|
3215
|
+
backend.reduce_sum(aggregated_spend, -1, keepdims=True),
|
|
3216
|
+
],
|
|
3136
3217
|
axis=-1,
|
|
3137
3218
|
)
|
|
3138
3219
|
spend_data = self._compute_spend_data_aggregate(
|
|
@@ -3220,7 +3301,7 @@ class Analyzer:
|
|
|
3220
3301
|
aggregate_times: bool = True,
|
|
3221
3302
|
optimal_frequency: Sequence[float] | None = None,
|
|
3222
3303
|
include_non_paid_channels: bool = True,
|
|
3223
|
-
) ->
|
|
3304
|
+
) -> backend.Tensor:
|
|
3224
3305
|
"""Computes aggregated impressions values in the data across all channels.
|
|
3225
3306
|
|
|
3226
3307
|
Args:
|
|
@@ -3277,7 +3358,9 @@ class Analyzer:
|
|
|
3277
3358
|
if optimal_frequency is None:
|
|
3278
3359
|
new_frequency = data_tensors.frequency
|
|
3279
3360
|
else:
|
|
3280
|
-
new_frequency =
|
|
3361
|
+
new_frequency = (
|
|
3362
|
+
backend.ones_like(data_tensors.frequency) * optimal_frequency
|
|
3363
|
+
)
|
|
3281
3364
|
impressions_list.append(
|
|
3282
3365
|
data_tensors.reach[:, -n_times:, :] * new_frequency[:, -n_times:, :]
|
|
3283
3366
|
)
|
|
@@ -3290,7 +3373,8 @@ class Analyzer:
|
|
|
3290
3373
|
new_organic_frequency = data_tensors.organic_frequency
|
|
3291
3374
|
else:
|
|
3292
3375
|
new_organic_frequency = (
|
|
3293
|
-
|
|
3376
|
+
backend.ones_like(data_tensors.organic_frequency)
|
|
3377
|
+
* optimal_frequency
|
|
3294
3378
|
)
|
|
3295
3379
|
impressions_list.append(
|
|
3296
3380
|
data_tensors.organic_reach[:, -n_times:, :]
|
|
@@ -3300,7 +3384,7 @@ class Analyzer:
|
|
|
3300
3384
|
impressions_list.append(data_tensors.non_media_treatments)
|
|
3301
3385
|
|
|
3302
3386
|
return self.filter_and_aggregate_geos_and_times(
|
|
3303
|
-
tensor=
|
|
3387
|
+
tensor=backend.concatenate(impressions_list, axis=-1),
|
|
3304
3388
|
selected_geos=selected_geos,
|
|
3305
3389
|
selected_times=selected_times,
|
|
3306
3390
|
aggregate_geos=aggregate_geos,
|
|
@@ -3402,7 +3486,7 @@ class Analyzer:
|
|
|
3402
3486
|
use_posterior=True, use_kpi=use_kpi, **outcome_kwargs
|
|
3403
3487
|
)
|
|
3404
3488
|
|
|
3405
|
-
baseline_expected_outcome_prior =
|
|
3489
|
+
baseline_expected_outcome_prior = backend.expand_dims(
|
|
3406
3490
|
self._calculate_baseline_expected_outcome(
|
|
3407
3491
|
use_posterior=False,
|
|
3408
3492
|
use_kpi=use_kpi,
|
|
@@ -3411,7 +3495,7 @@ class Analyzer:
|
|
|
3411
3495
|
),
|
|
3412
3496
|
axis=-1,
|
|
3413
3497
|
)
|
|
3414
|
-
baseline_expected_outcome_posterior =
|
|
3498
|
+
baseline_expected_outcome_posterior = backend.expand_dims(
|
|
3415
3499
|
self._calculate_baseline_expected_outcome(
|
|
3416
3500
|
use_posterior=True,
|
|
3417
3501
|
use_kpi=use_kpi,
|
|
@@ -3453,8 +3537,8 @@ class Analyzer:
|
|
|
3453
3537
|
freq_grid: Sequence[float] | None = None,
|
|
3454
3538
|
use_posterior: bool = True,
|
|
3455
3539
|
use_kpi: bool = False,
|
|
3456
|
-
selected_geos: Sequence[str
|
|
3457
|
-
selected_times: Sequence[str |
|
|
3540
|
+
selected_geos: Sequence[str] | None = None,
|
|
3541
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
3458
3542
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
3459
3543
|
) -> xr.Dataset:
|
|
3460
3544
|
"""Calculates the optimal frequency that maximizes posterior mean ROI.
|
|
@@ -3502,8 +3586,8 @@ class Analyzer:
|
|
|
3502
3586
|
default, all geos are included.
|
|
3503
3587
|
selected_times: Optional list containing either a subset of dates to
|
|
3504
3588
|
include or booleans with length equal to the number of time periods in
|
|
3505
|
-
|
|
3506
|
-
included.
|
|
3589
|
+
`new_data` if time is modified in `new_data`, or `input_data.n_times`
|
|
3590
|
+
otherwise. By default, all time periods are included.
|
|
3507
3591
|
confidence_level: Confidence level for prior and posterior credible
|
|
3508
3592
|
intervals, represented as a value between zero and one.
|
|
3509
3593
|
|
|
@@ -3563,10 +3647,10 @@ class Analyzer:
|
|
|
3563
3647
|
n_times = (
|
|
3564
3648
|
filled_data.get_modified_times(self._meridian) or self._meridian.n_times
|
|
3565
3649
|
)
|
|
3566
|
-
dummy_media =
|
|
3650
|
+
dummy_media = backend.ones(
|
|
3567
3651
|
(self._meridian.n_geos, n_media_times, self._meridian.n_media_channels)
|
|
3568
3652
|
)
|
|
3569
|
-
dummy_media_spend =
|
|
3653
|
+
dummy_media_spend = backend.ones(
|
|
3570
3654
|
(self._meridian.n_geos, n_times, self._meridian.n_media_channels)
|
|
3571
3655
|
)
|
|
3572
3656
|
|
|
@@ -3582,7 +3666,7 @@ class Analyzer:
|
|
|
3582
3666
|
metric_grid = np.zeros((len(freq_grid), self._meridian.n_rf_channels, 4))
|
|
3583
3667
|
|
|
3584
3668
|
for i, freq in enumerate(freq_grid):
|
|
3585
|
-
new_frequency =
|
|
3669
|
+
new_frequency = backend.ones_like(filled_data.rf_impressions) * freq
|
|
3586
3670
|
new_reach = filled_data.rf_impressions / new_frequency
|
|
3587
3671
|
new_roi_data = DataTensors(
|
|
3588
3672
|
reach=new_reach,
|
|
@@ -3612,9 +3696,9 @@ class Analyzer:
|
|
|
3612
3696
|
)
|
|
3613
3697
|
|
|
3614
3698
|
optimal_frequency = [freq_grid[i] for i in optimal_freq_idx]
|
|
3615
|
-
optimal_frequency_tensor =
|
|
3616
|
-
|
|
3617
|
-
|
|
3699
|
+
optimal_frequency_tensor = backend.to_tensor(
|
|
3700
|
+
backend.ones_like(filled_data.rf_impressions) * optimal_frequency,
|
|
3701
|
+
backend.float32,
|
|
3618
3702
|
)
|
|
3619
3703
|
optimal_reach = filled_data.rf_impressions / optimal_frequency_tensor
|
|
3620
3704
|
|
|
@@ -3778,10 +3862,12 @@ class Analyzer:
|
|
|
3778
3862
|
input_tensor = self._meridian.kpi * self._meridian.revenue_per_kpi
|
|
3779
3863
|
else:
|
|
3780
3864
|
input_tensor = self._meridian.kpi
|
|
3781
|
-
actual =
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3865
|
+
actual = np.asarray(
|
|
3866
|
+
self.filter_and_aggregate_geos_and_times(
|
|
3867
|
+
tensor=input_tensor,
|
|
3868
|
+
**dims_kwargs,
|
|
3869
|
+
)
|
|
3870
|
+
)
|
|
3785
3871
|
expected = np.mean(
|
|
3786
3872
|
self.expected_outcome(
|
|
3787
3873
|
batch_size=batch_size, use_kpi=use_kpi, **dims_kwargs
|
|
@@ -3888,7 +3974,7 @@ class Analyzer:
|
|
|
3888
3974
|
|
|
3889
3975
|
return holdout_id
|
|
3890
3976
|
|
|
3891
|
-
def get_rhat(self) -> Mapping[str,
|
|
3977
|
+
def get_rhat(self) -> Mapping[str, backend.Tensor]:
|
|
3892
3978
|
"""Computes the R-hat values for each parameter in the model.
|
|
3893
3979
|
|
|
3894
3980
|
Returns:
|
|
@@ -3904,12 +3990,12 @@ class Analyzer:
|
|
|
3904
3990
|
"sample_posterior() must be called prior to calling this method."
|
|
3905
3991
|
)
|
|
3906
3992
|
|
|
3907
|
-
def _transpose_first_two_dims(x:
|
|
3993
|
+
def _transpose_first_two_dims(x: backend.Tensor) -> backend.Tensor:
|
|
3908
3994
|
n_dim = len(x.shape)
|
|
3909
3995
|
perm = [1, 0] + list(range(2, n_dim))
|
|
3910
|
-
return
|
|
3996
|
+
return backend.transpose(x, perm)
|
|
3911
3997
|
|
|
3912
|
-
rhat =
|
|
3998
|
+
rhat = backend.mcmc.potential_scale_reduction({
|
|
3913
3999
|
k: _transpose_first_two_dims(v)
|
|
3914
4000
|
for k, v in self._meridian.inference_data.posterior.data_vars.items()
|
|
3915
4001
|
})
|
|
@@ -3966,12 +4052,13 @@ class Analyzer:
|
|
|
3966
4052
|
if self._meridian.prior_broadcast.has_deterministic_param(param):
|
|
3967
4053
|
continue
|
|
3968
4054
|
|
|
3969
|
-
|
|
3970
|
-
|
|
3971
|
-
|
|
3972
|
-
|
|
3973
|
-
row_idx = bad_idx[0]
|
|
4055
|
+
if rhat[param].ndim == 2:
|
|
4056
|
+
row_idx, col_idx = np.where(rhat[param] > bad_rhat_threshold)
|
|
4057
|
+
elif rhat[param].ndim == 1:
|
|
4058
|
+
row_idx = np.where(rhat[param] > bad_rhat_threshold)[0]
|
|
3974
4059
|
col_idx = []
|
|
4060
|
+
elif rhat[param].ndim == 0:
|
|
4061
|
+
row_idx = col_idx = []
|
|
3975
4062
|
else:
|
|
3976
4063
|
raise ValueError(f"Unexpected dimension for parameter {param}.")
|
|
3977
4064
|
|
|
@@ -3992,6 +4079,7 @@ class Analyzer:
|
|
|
3992
4079
|
|
|
3993
4080
|
def response_curves(
|
|
3994
4081
|
self,
|
|
4082
|
+
new_data: DataTensors | None = None,
|
|
3995
4083
|
spend_multipliers: list[float] | None = None,
|
|
3996
4084
|
use_posterior: bool = True,
|
|
3997
4085
|
selected_geos: Sequence[str] | None = None,
|
|
@@ -4010,13 +4098,22 @@ class Analyzer:
|
|
|
4010
4098
|
|
|
4011
4099
|
A list of multipliers is applied to each media channel's total historical
|
|
4012
4100
|
spend within `selected_geos` and `selected_times` to obtain the x-axis
|
|
4013
|
-
values. The y-axis values are the incremental
|
|
4101
|
+
values. The y-axis values are the incremental outcome generated by each
|
|
4014
4102
|
channel within `selected_geos` and `selected_times` under the counterfactual
|
|
4015
4103
|
where media units in each geo and time period are scaled by the
|
|
4016
4104
|
corresponding multiplier. (Media units for time periods prior to
|
|
4017
4105
|
`selected_times` are also scaled by the multiplier.)
|
|
4018
4106
|
|
|
4019
4107
|
Args:
|
|
4108
|
+
new_data: Optional `DataTensors` object with optional new tensors:
|
|
4109
|
+
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
4110
|
+
`revenue_per_kpi`, `times`. If provided, the response curves are
|
|
4111
|
+
calculated using the values of the tensors passed in `new_data` and the
|
|
4112
|
+
original values of all the remaining tensors. If `None`, the response
|
|
4113
|
+
curves are calculated using the original values of all the tensors. If
|
|
4114
|
+
any of the tensors in `new_data` is provided with a different number of
|
|
4115
|
+
time periods than in `InputData`, then all tensors must be provided with
|
|
4116
|
+
the same number of time periods and the `time` tensor must be provided.
|
|
4020
4117
|
spend_multipliers: List of multipliers. Each channel's total spend is
|
|
4021
4118
|
multiplied by these factors to obtain the values at which the curve is
|
|
4022
4119
|
calculated for that channel.
|
|
@@ -4024,9 +4121,11 @@ class Analyzer:
|
|
|
4024
4121
|
generated. If `False`, prior response curves are generated.
|
|
4025
4122
|
selected_geos: Optional list containing a subset of geos to include. By
|
|
4026
4123
|
default, all geos are included.
|
|
4027
|
-
selected_times: Optional list
|
|
4028
|
-
|
|
4029
|
-
|
|
4124
|
+
selected_times: Optional list containing a subset of dates to include. If
|
|
4125
|
+
`new_data` is provided with modified time periods, then `selected_times`
|
|
4126
|
+
must be a subset of `new_data.times`. Otherwise, `selected_times` must
|
|
4127
|
+
be a subset of `self._meridian.input_data.time`. By default, all time
|
|
4128
|
+
periods are included.
|
|
4030
4129
|
by_reach: Boolean. For channels with reach and frequency. If `True`, plots
|
|
4031
4130
|
the response curve by reach. If `False`, plots the response curve by
|
|
4032
4131
|
frequency.
|
|
@@ -4055,24 +4154,62 @@ class Analyzer:
|
|
|
4055
4154
|
"aggregate_geos": True,
|
|
4056
4155
|
"aggregate_times": True,
|
|
4057
4156
|
}
|
|
4157
|
+
if new_data is None:
|
|
4158
|
+
new_data = DataTensors()
|
|
4159
|
+
# TODO: b/442920356 - Support flexible time without providing exact dates.
|
|
4160
|
+
required_tensors_names = constants.PERFORMANCE_DATA + (constants.TIME,)
|
|
4161
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
4162
|
+
required_tensors_names=required_tensors_names,
|
|
4163
|
+
meridian=self._meridian,
|
|
4164
|
+
allow_modified_times=True,
|
|
4165
|
+
)
|
|
4166
|
+
new_n_media_times = filled_data.get_modified_times(self._meridian)
|
|
4167
|
+
|
|
4168
|
+
if new_n_media_times is None:
|
|
4169
|
+
_validate_selected_times(
|
|
4170
|
+
selected_times=selected_times,
|
|
4171
|
+
input_times=self._meridian.input_data.time,
|
|
4172
|
+
n_times=self._meridian.n_times,
|
|
4173
|
+
arg_name="selected_times",
|
|
4174
|
+
comparison_arg_name="the input data",
|
|
4175
|
+
)
|
|
4176
|
+
else:
|
|
4177
|
+
new_time = np.asarray(filled_data.time).astype(str).tolist()
|
|
4178
|
+
_validate_flexible_selected_times(
|
|
4179
|
+
selected_times=selected_times,
|
|
4180
|
+
media_selected_times=None,
|
|
4181
|
+
new_n_media_times=new_n_media_times,
|
|
4182
|
+
new_time=new_time,
|
|
4183
|
+
)
|
|
4184
|
+
# TODO: b/407847021 - Switch to Sequence[str] once it is supported.
|
|
4185
|
+
if selected_times is not None:
|
|
4186
|
+
selected_times = [x in selected_times for x in new_time]
|
|
4187
|
+
dim_kwargs["selected_times"] = selected_times
|
|
4188
|
+
|
|
4058
4189
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
4059
|
-
|
|
4060
|
-
|
|
4061
|
-
|
|
4190
|
+
opt_freq_data = DataTensors(
|
|
4191
|
+
media=filled_data.media,
|
|
4192
|
+
rf_impressions=filled_data.reach * filled_data.frequency,
|
|
4193
|
+
media_spend=filled_data.media_spend,
|
|
4194
|
+
rf_spend=filled_data.rf_spend,
|
|
4195
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
4196
|
+
)
|
|
4197
|
+
frequency = backend.ones_like(filled_data.frequency) * backend.to_tensor(
|
|
4062
4198
|
self.optimal_freq(
|
|
4199
|
+
new_data=opt_freq_data,
|
|
4063
4200
|
selected_geos=selected_geos,
|
|
4064
4201
|
selected_times=selected_times,
|
|
4065
4202
|
use_kpi=use_kpi,
|
|
4066
4203
|
).optimal_frequency,
|
|
4067
|
-
dtype=
|
|
4204
|
+
dtype=backend.float32,
|
|
4068
4205
|
)
|
|
4069
|
-
reach =
|
|
4070
|
-
|
|
4206
|
+
reach = backend.divide_no_nan(
|
|
4207
|
+
filled_data.reach * filled_data.frequency,
|
|
4071
4208
|
frequency,
|
|
4072
4209
|
)
|
|
4073
4210
|
else:
|
|
4074
|
-
frequency =
|
|
4075
|
-
reach =
|
|
4211
|
+
frequency = filled_data.frequency
|
|
4212
|
+
reach = filled_data.reach
|
|
4076
4213
|
if spend_multipliers is None:
|
|
4077
4214
|
spend_multipliers = list(np.arange(0, 2.2, 0.2))
|
|
4078
4215
|
incremental_outcome = np.zeros((
|
|
@@ -4082,22 +4219,23 @@ class Analyzer:
|
|
|
4082
4219
|
))
|
|
4083
4220
|
for i, multiplier in enumerate(spend_multipliers):
|
|
4084
4221
|
if multiplier == 0:
|
|
4085
|
-
incremental_outcome[i, :, :] =
|
|
4222
|
+
incremental_outcome[i, :, :] = backend.zeros(
|
|
4086
4223
|
(len(self._meridian.input_data.get_all_paid_channels()), 3)
|
|
4087
4224
|
) # Last dimension = 3 for the mean, ci_lo and ci_hi.
|
|
4088
4225
|
continue
|
|
4089
|
-
|
|
4226
|
+
scaled_data = _scale_tensors_by_multiplier(
|
|
4090
4227
|
data=DataTensors(
|
|
4091
|
-
media=
|
|
4228
|
+
media=filled_data.media,
|
|
4092
4229
|
reach=reach,
|
|
4093
4230
|
frequency=frequency,
|
|
4231
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
4094
4232
|
),
|
|
4095
4233
|
multiplier=multiplier,
|
|
4096
4234
|
by_reach=by_reach,
|
|
4097
4235
|
)
|
|
4098
4236
|
inc_outcome_temp = self.incremental_outcome(
|
|
4099
4237
|
use_posterior=use_posterior,
|
|
4100
|
-
new_data=
|
|
4238
|
+
new_data=scaled_data.filter_fields(constants.PAID_DATA),
|
|
4101
4239
|
inverse_transform_outcome=True,
|
|
4102
4240
|
batch_size=batch_size,
|
|
4103
4241
|
use_kpi=use_kpi,
|
|
@@ -4108,25 +4246,14 @@ class Analyzer:
|
|
|
4108
4246
|
inc_outcome_temp, confidence_level
|
|
4109
4247
|
)
|
|
4110
4248
|
|
|
4111
|
-
|
|
4112
|
-
|
|
4113
|
-
[
|
|
4114
|
-
self._meridian.media_tensors.media_spend,
|
|
4115
|
-
self._meridian.rf_tensors.rf_spend,
|
|
4116
|
-
],
|
|
4117
|
-
axis=-1,
|
|
4118
|
-
)
|
|
4119
|
-
elif self._meridian.n_media_channels > 0:
|
|
4120
|
-
spend = self._meridian.media_tensors.media_spend
|
|
4121
|
-
else:
|
|
4122
|
-
spend = self._meridian.rf_tensors.rf_spend
|
|
4123
|
-
|
|
4124
|
-
if tf.rank(spend) == 3:
|
|
4249
|
+
spend = filled_data.total_spend()
|
|
4250
|
+
if spend is not None and spend.ndim == 3:
|
|
4125
4251
|
spend = self.filter_and_aggregate_geos_and_times(
|
|
4126
4252
|
tensor=spend,
|
|
4253
|
+
flexible_time_dim=True,
|
|
4127
4254
|
**dim_kwargs,
|
|
4128
4255
|
)
|
|
4129
|
-
spend_einsum =
|
|
4256
|
+
spend_einsum = backend.einsum("k,m->km", np.array(spend_multipliers), spend)
|
|
4130
4257
|
xr_coords = {
|
|
4131
4258
|
constants.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
|
|
4132
4259
|
constants.METRIC: [
|
|
@@ -4197,60 +4324,45 @@ class Analyzer:
|
|
|
4197
4324
|
}
|
|
4198
4325
|
final_df_list = []
|
|
4199
4326
|
|
|
4200
|
-
|
|
4201
|
-
|
|
4202
|
-
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
|
|
4208
|
-
|
|
4209
|
-
|
|
4210
|
-
|
|
4211
|
-
|
|
4212
|
-
|
|
4213
|
-
|
|
4214
|
-
|
|
4215
|
-
|
|
4216
|
-
|
|
4217
|
-
|
|
4327
|
+
def _add_adstock_decay_for_channel(
|
|
4328
|
+
n_channels: int,
|
|
4329
|
+
channel_data: xr.DataArray | None,
|
|
4330
|
+
adstock_channel_type: str,
|
|
4331
|
+
) -> None:
|
|
4332
|
+
"""Helper to compute and append adstock decay data for a channel type."""
|
|
4333
|
+
if n_channels > 0:
|
|
4334
|
+
channel_values = channel_data.values if channel_data is not None else []
|
|
4335
|
+
xr_coords = base_xr_coords | {constants.CHANNEL: channel_values}
|
|
4336
|
+
adstock_df = self._get_adstock_dataframe(
|
|
4337
|
+
adstock_channel_type,
|
|
4338
|
+
l_range,
|
|
4339
|
+
xr_dims,
|
|
4340
|
+
xr_coords,
|
|
4341
|
+
confidence_level,
|
|
4342
|
+
)
|
|
4343
|
+
if not adstock_df.empty:
|
|
4344
|
+
final_df_list.append(adstock_df)
|
|
4218
4345
|
|
|
4219
|
-
|
|
4220
|
-
|
|
4221
|
-
|
|
4222
|
-
|
|
4223
|
-
|
|
4224
|
-
|
|
4225
|
-
|
|
4226
|
-
|
|
4227
|
-
|
|
4228
|
-
|
|
4229
|
-
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
|
|
4236
|
-
|
|
4237
|
-
|
|
4238
|
-
|
|
4239
|
-
if self._meridian.input_data.organic_media_channel is not None
|
|
4240
|
-
else []
|
|
4241
|
-
)
|
|
4242
|
-
organic_media_xr_coords = base_xr_coords | {
|
|
4243
|
-
constants.CHANNEL: organic_media_channel_values
|
|
4244
|
-
}
|
|
4245
|
-
adstock_df_om = self._get_adstock_dataframe(
|
|
4246
|
-
constants.ORGANIC_MEDIA,
|
|
4247
|
-
l_range,
|
|
4248
|
-
xr_dims,
|
|
4249
|
-
organic_media_xr_coords,
|
|
4250
|
-
confidence_level,
|
|
4251
|
-
)
|
|
4252
|
-
if not adstock_df_om.empty:
|
|
4253
|
-
final_df_list.append(adstock_df_om)
|
|
4346
|
+
_add_adstock_decay_for_channel(
|
|
4347
|
+
self._meridian.n_media_channels,
|
|
4348
|
+
self._meridian.input_data.media_channel,
|
|
4349
|
+
constants.MEDIA,
|
|
4350
|
+
)
|
|
4351
|
+
_add_adstock_decay_for_channel(
|
|
4352
|
+
self._meridian.n_rf_channels,
|
|
4353
|
+
self._meridian.input_data.rf_channel,
|
|
4354
|
+
constants.RF,
|
|
4355
|
+
)
|
|
4356
|
+
_add_adstock_decay_for_channel(
|
|
4357
|
+
self._meridian.n_organic_media_channels,
|
|
4358
|
+
self._meridian.input_data.organic_media_channel,
|
|
4359
|
+
constants.ORGANIC_MEDIA,
|
|
4360
|
+
)
|
|
4361
|
+
_add_adstock_decay_for_channel(
|
|
4362
|
+
self._meridian.n_organic_rf_channels,
|
|
4363
|
+
self._meridian.input_data.organic_rf_channel,
|
|
4364
|
+
constants.ORGANIC_RF,
|
|
4365
|
+
)
|
|
4254
4366
|
|
|
4255
4367
|
final_df = pd.concat(final_df_list, ignore_index=True)
|
|
4256
4368
|
# Adding an extra column that indicates whether time_units is an integer
|
|
@@ -4275,7 +4387,8 @@ class Analyzer:
|
|
|
4275
4387
|
Returns:
|
|
4276
4388
|
A DataFrame with data needed to plot the Hill curves, with columns:
|
|
4277
4389
|
|
|
4278
|
-
* `channel`: `media`, `rf`, or `
|
|
4390
|
+
* `channel`: `media`, `rf`, `organic_media`, or `organic_rf` channel
|
|
4391
|
+
name.
|
|
4279
4392
|
* `media_units`: Media (for `media` channels) or average frequency (for
|
|
4280
4393
|
`rf` channels) units.
|
|
4281
4394
|
* `distribution`: Indication of `posterior` or `prior` draw.
|
|
@@ -4284,12 +4397,12 @@ class Analyzer:
|
|
|
4284
4397
|
* `ci_lo`: Lower bound of the credible interval of the value of the Hill
|
|
4285
4398
|
function.
|
|
4286
4399
|
* `mean`: Point-wise mean of the value of the Hill function per draw.
|
|
4287
|
-
* channel_type: Indication of a `media`, `rf`,
|
|
4288
|
-
channel
|
|
4400
|
+
* channel_type: Indication of a `media`, `rf`, `organic_media`
|
|
4401
|
+
channel, or `organic_rf`.
|
|
4289
4402
|
|
|
4290
4403
|
Raises:
|
|
4291
4404
|
ValueError: If `channel_type` is not one of the recognized constants
|
|
4292
|
-
`media`, `rf`, or `
|
|
4405
|
+
`media`, `rf`, `organic_media`, or `organic_rf`.
|
|
4293
4406
|
"""
|
|
4294
4407
|
if (
|
|
4295
4408
|
channel_type == constants.MEDIA
|
|
@@ -4327,10 +4440,23 @@ class Analyzer:
|
|
|
4327
4440
|
np.array(self._meridian.organic_media_tensors.organic_media_scaled),
|
|
4328
4441
|
axis=(0, 1),
|
|
4329
4442
|
)
|
|
4443
|
+
elif (
|
|
4444
|
+
channel_type == constants.ORGANIC_RF
|
|
4445
|
+
and self._meridian.input_data.organic_rf_channel is not None
|
|
4446
|
+
):
|
|
4447
|
+
ec = constants.EC_ORF
|
|
4448
|
+
slope = constants.SLOPE_ORF
|
|
4449
|
+
channels = self._meridian.input_data.organic_rf_channel.values
|
|
4450
|
+
transformer = None
|
|
4451
|
+
linspace_max_values = np.max(
|
|
4452
|
+
np.array(self._meridian.organic_rf_tensors.organic_frequency),
|
|
4453
|
+
axis=(0, 1),
|
|
4454
|
+
)
|
|
4330
4455
|
else:
|
|
4331
4456
|
raise ValueError(
|
|
4332
4457
|
f"Unsupported channel type: {channel_type} or the requested type of"
|
|
4333
|
-
" channels (`media`, `rf`, or `
|
|
4458
|
+
" channels (`media`, `rf`, `organic_media`, or `organic_rf`) are not"
|
|
4459
|
+
" present."
|
|
4334
4460
|
)
|
|
4335
4461
|
linspace = np.linspace(
|
|
4336
4462
|
0,
|
|
@@ -4352,7 +4478,7 @@ class Analyzer:
|
|
|
4352
4478
|
}
|
|
4353
4479
|
# Expanding the linspace by one dimension since the HillTransformer requires
|
|
4354
4480
|
# 3-dimensional input as (geo, time, channel).
|
|
4355
|
-
expanded_linspace =
|
|
4481
|
+
expanded_linspace = backend.expand_dims(linspace, axis=0)
|
|
4356
4482
|
# Including [:, :, 0, :, :] in the output of the Hill Function to reduce the
|
|
4357
4483
|
# tensors by the geo dimension. Original Hill dimension shape is (n_chains,
|
|
4358
4484
|
# n_draws, n_geos, n_times, n_channels), and we want to plot the
|
|
@@ -4374,36 +4500,44 @@ class Analyzer:
|
|
|
4374
4500
|
xr_coords,
|
|
4375
4501
|
confidence_level,
|
|
4376
4502
|
)
|
|
4377
|
-
|
|
4503
|
+
|
|
4504
|
+
df_raw = (
|
|
4378
4505
|
hill_dataset[constants.HILL_SATURATION_LEVEL]
|
|
4379
4506
|
.to_dataframe()
|
|
4380
4507
|
.reset_index()
|
|
4381
|
-
.pivot(
|
|
4382
|
-
index=[
|
|
4383
|
-
constants.CHANNEL,
|
|
4384
|
-
constants.MEDIA_UNITS,
|
|
4385
|
-
constants.DISTRIBUTION,
|
|
4386
|
-
],
|
|
4387
|
-
columns=constants.METRIC,
|
|
4388
|
-
values=constants.HILL_SATURATION_LEVEL,
|
|
4389
|
-
)
|
|
4390
|
-
.reset_index()
|
|
4391
4508
|
)
|
|
4392
4509
|
|
|
4510
|
+
# Ensure the channel order matches the tensor order (defined by 'channels')
|
|
4511
|
+
# by using a Categorical type before pivoting. This prevents pivot from
|
|
4512
|
+
# sorting alphabetically, which can cause misalignment between channel names
|
|
4513
|
+
# and the calculated media units derived later from the tensor order.
|
|
4514
|
+
df_raw[constants.CHANNEL] = pd.Categorical(
|
|
4515
|
+
df_raw[constants.CHANNEL], categories=channels
|
|
4516
|
+
)
|
|
4517
|
+
df = df_raw.pivot(
|
|
4518
|
+
index=[
|
|
4519
|
+
constants.CHANNEL,
|
|
4520
|
+
constants.MEDIA_UNITS,
|
|
4521
|
+
constants.DISTRIBUTION,
|
|
4522
|
+
],
|
|
4523
|
+
columns=constants.METRIC,
|
|
4524
|
+
values=constants.HILL_SATURATION_LEVEL,
|
|
4525
|
+
).reset_index()
|
|
4526
|
+
|
|
4393
4527
|
# Fill media_units or frequency x-axis with the correct range.
|
|
4394
4528
|
media_units_arr = []
|
|
4395
4529
|
if transformer is not None:
|
|
4396
4530
|
population_scaled_median = transformer.population_scaled_median_m
|
|
4397
|
-
x_range_full_shape = linspace *
|
|
4398
|
-
population_scaled_median[:,
|
|
4531
|
+
x_range_full_shape = linspace * backend.transpose(
|
|
4532
|
+
population_scaled_median[:, backend.newaxis]
|
|
4399
4533
|
)
|
|
4400
4534
|
else:
|
|
4401
4535
|
x_range_full_shape = linspace
|
|
4402
4536
|
|
|
4403
4537
|
# Flatten this into a list.
|
|
4404
|
-
x_range_list = (
|
|
4405
|
-
|
|
4406
|
-
)
|
|
4538
|
+
x_range_list = np.asarray(
|
|
4539
|
+
backend.reshape(backend.transpose(x_range_full_shape), [-1])
|
|
4540
|
+
).tolist()
|
|
4407
4541
|
|
|
4408
4542
|
# Doubles each value in the list to account for alternating prior
|
|
4409
4543
|
# and posterior.
|
|
@@ -4419,14 +4553,15 @@ class Analyzer:
|
|
|
4419
4553
|
def _get_channel_hill_histogram_dataframe(
|
|
4420
4554
|
self,
|
|
4421
4555
|
channel_type: str,
|
|
4422
|
-
data_to_histogram:
|
|
4556
|
+
data_to_histogram: backend.Tensor,
|
|
4423
4557
|
channel_names: Sequence[str],
|
|
4424
4558
|
n_bins: int,
|
|
4425
4559
|
) -> pd.DataFrame:
|
|
4426
4560
|
"""Calculates hill histogram dataframe for a given channel type's values.
|
|
4427
4561
|
|
|
4428
4562
|
Args:
|
|
4429
|
-
channel_type: The type of channel (e.g., 'rf', 'media', 'organic_media'
|
|
4563
|
+
channel_type: The type of channel (e.g., 'rf', 'media', 'organic_media',
|
|
4564
|
+
'organic_rf').
|
|
4430
4565
|
data_to_histogram: The 2D tensor (observations, channels). containing the
|
|
4431
4566
|
data whose distribution needs to be histogrammed for each channel.
|
|
4432
4567
|
channel_names: The names corresponding to the channels in
|
|
@@ -4448,7 +4583,7 @@ class Analyzer:
|
|
|
4448
4583
|
}
|
|
4449
4584
|
|
|
4450
4585
|
for i, channel_name in enumerate(channel_names):
|
|
4451
|
-
channel_data_np = data_to_histogram[:, i]
|
|
4586
|
+
channel_data_np = np.asarray(data_to_histogram[:, i])
|
|
4452
4587
|
channel_data_np = channel_data_np[~np.isnan(channel_data_np)]
|
|
4453
4588
|
if channel_data_np.size == 0:
|
|
4454
4589
|
continue
|
|
@@ -4510,7 +4645,7 @@ class Analyzer:
|
|
|
4510
4645
|
if self._meridian.input_data.rf_channel is not None:
|
|
4511
4646
|
frequency = self._meridian.rf_tensors.frequency
|
|
4512
4647
|
if frequency is not None:
|
|
4513
|
-
reshaped_frequency =
|
|
4648
|
+
reshaped_frequency = backend.reshape(
|
|
4514
4649
|
frequency, (n_geos * n_media_times, self._meridian.n_rf_channels)
|
|
4515
4650
|
)
|
|
4516
4651
|
rf_hist_data = self._get_channel_hill_histogram_dataframe(
|
|
@@ -4528,7 +4663,7 @@ class Analyzer:
|
|
|
4528
4663
|
if transformer is not None and scaled is not None:
|
|
4529
4664
|
population_scaled_median = transformer.population_scaled_median_m
|
|
4530
4665
|
scaled_media_units = scaled * population_scaled_median
|
|
4531
|
-
reshaped_scaled_media_units =
|
|
4666
|
+
reshaped_scaled_media_units = backend.reshape(
|
|
4532
4667
|
scaled_media_units,
|
|
4533
4668
|
(n_geos * n_media_times, self._meridian.n_media_channels),
|
|
4534
4669
|
)
|
|
@@ -4548,7 +4683,7 @@ class Analyzer:
|
|
|
4548
4683
|
if transformer_om is not None and scaled_om is not None:
|
|
4549
4684
|
population_scaled_median_om = transformer_om.population_scaled_median_m
|
|
4550
4685
|
scaled_organic_media_units = scaled_om * population_scaled_median_om
|
|
4551
|
-
reshaped_scaled_organic_media_units =
|
|
4686
|
+
reshaped_scaled_organic_media_units = backend.reshape(
|
|
4552
4687
|
scaled_organic_media_units,
|
|
4553
4688
|
(n_geos * n_media_times, self._meridian.n_organic_media_channels),
|
|
4554
4689
|
)
|
|
@@ -4560,6 +4695,21 @@ class Analyzer:
|
|
|
4560
4695
|
)
|
|
4561
4696
|
df_list.append(pd.DataFrame(organic_media_hist_data))
|
|
4562
4697
|
|
|
4698
|
+
# Organic RF.
|
|
4699
|
+
if self._meridian.input_data.organic_rf_channel is not None:
|
|
4700
|
+
frequency = self._meridian.organic_rf_tensors.organic_frequency
|
|
4701
|
+
if frequency is not None:
|
|
4702
|
+
reshaped_frequency = backend.reshape(
|
|
4703
|
+
frequency,
|
|
4704
|
+
(n_geos * n_media_times, self._meridian.n_organic_rf_channels),
|
|
4705
|
+
)
|
|
4706
|
+
organic_rf_hist_data = self._get_channel_hill_histogram_dataframe(
|
|
4707
|
+
channel_type=constants.ORGANIC_RF,
|
|
4708
|
+
data_to_histogram=reshaped_frequency,
|
|
4709
|
+
channel_names=self._meridian.input_data.organic_rf_channel.values,
|
|
4710
|
+
n_bins=n_bins,
|
|
4711
|
+
)
|
|
4712
|
+
df_list.append(pd.DataFrame(organic_rf_hist_data))
|
|
4563
4713
|
return pd.concat(df_list, ignore_index=True)
|
|
4564
4714
|
|
|
4565
4715
|
def hill_curves(
|
|
@@ -4612,6 +4762,7 @@ class Analyzer:
|
|
|
4612
4762
|
(self._meridian.n_media_channels, constants.MEDIA),
|
|
4613
4763
|
(self._meridian.n_rf_channels, constants.RF),
|
|
4614
4764
|
(self._meridian.n_organic_media_channels, constants.ORGANIC_MEDIA),
|
|
4765
|
+
(self._meridian.n_organic_rf_channels, constants.ORGANIC_RF),
|
|
4615
4766
|
]:
|
|
4616
4767
|
if n_channels > 0:
|
|
4617
4768
|
hill_df = self._get_hill_curves_dataframe(
|
|
@@ -4624,11 +4775,11 @@ class Analyzer:
|
|
|
4624
4775
|
|
|
4625
4776
|
def _compute_roi_aggregate(
|
|
4626
4777
|
self,
|
|
4627
|
-
incremental_outcome_prior:
|
|
4628
|
-
incremental_outcome_posterior:
|
|
4778
|
+
incremental_outcome_prior: backend.Tensor,
|
|
4779
|
+
incremental_outcome_posterior: backend.Tensor,
|
|
4629
4780
|
xr_dims: Sequence[str],
|
|
4630
4781
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4631
|
-
spend_with_total:
|
|
4782
|
+
spend_with_total: backend.Tensor,
|
|
4632
4783
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
4633
4784
|
metric_name: str = constants.ROI,
|
|
4634
4785
|
) -> xr.Dataset:
|
|
@@ -4645,8 +4796,8 @@ class Analyzer:
|
|
|
4645
4796
|
|
|
4646
4797
|
def _compute_spend_data_aggregate(
|
|
4647
4798
|
self,
|
|
4648
|
-
spend_with_total:
|
|
4649
|
-
impressions_with_total:
|
|
4799
|
+
spend_with_total: backend.Tensor,
|
|
4800
|
+
impressions_with_total: backend.Tensor,
|
|
4650
4801
|
xr_dims: Sequence[str],
|
|
4651
4802
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4652
4803
|
) -> xr.Dataset:
|
|
@@ -4682,9 +4833,9 @@ class Analyzer:
|
|
|
4682
4833
|
|
|
4683
4834
|
def _compute_effectiveness_aggregate(
|
|
4684
4835
|
self,
|
|
4685
|
-
incremental_outcome_prior:
|
|
4686
|
-
incremental_outcome_posterior:
|
|
4687
|
-
impressions_with_total:
|
|
4836
|
+
incremental_outcome_prior: backend.Tensor,
|
|
4837
|
+
incremental_outcome_posterior: backend.Tensor,
|
|
4838
|
+
impressions_with_total: backend.Tensor,
|
|
4688
4839
|
xr_dims: Sequence[str],
|
|
4689
4840
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4690
4841
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -4701,9 +4852,9 @@ class Analyzer:
|
|
|
4701
4852
|
|
|
4702
4853
|
def _compute_cpik_aggregate(
|
|
4703
4854
|
self,
|
|
4704
|
-
incremental_kpi_prior:
|
|
4705
|
-
incremental_kpi_posterior:
|
|
4706
|
-
spend_with_total:
|
|
4855
|
+
incremental_kpi_prior: backend.Tensor,
|
|
4856
|
+
incremental_kpi_posterior: backend.Tensor,
|
|
4857
|
+
spend_with_total: backend.Tensor,
|
|
4707
4858
|
xr_dims: Sequence[str],
|
|
4708
4859
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4709
4860
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -4720,17 +4871,19 @@ class Analyzer:
|
|
|
4720
4871
|
|
|
4721
4872
|
def _compute_pct_of_contribution(
|
|
4722
4873
|
self,
|
|
4723
|
-
incremental_outcome_prior:
|
|
4724
|
-
incremental_outcome_posterior:
|
|
4725
|
-
expected_outcome_prior:
|
|
4726
|
-
expected_outcome_posterior:
|
|
4874
|
+
incremental_outcome_prior: backend.Tensor,
|
|
4875
|
+
incremental_outcome_posterior: backend.Tensor,
|
|
4876
|
+
expected_outcome_prior: backend.Tensor,
|
|
4877
|
+
expected_outcome_posterior: backend.Tensor,
|
|
4727
4878
|
xr_dims: Sequence[str],
|
|
4728
4879
|
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
|
|
4729
4880
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
4730
4881
|
) -> xr.Dataset:
|
|
4731
4882
|
"""Computes the parts of `MediaSummary` related to mean expected outcome."""
|
|
4732
|
-
mean_expected_outcome_prior =
|
|
4733
|
-
|
|
4883
|
+
mean_expected_outcome_prior = backend.reduce_mean(
|
|
4884
|
+
expected_outcome_prior, (0, 1)
|
|
4885
|
+
)
|
|
4886
|
+
mean_expected_outcome_posterior = backend.reduce_mean(
|
|
4734
4887
|
expected_outcome_posterior, (0, 1)
|
|
4735
4888
|
)
|
|
4736
4889
|
|
|
@@ -4806,8 +4959,9 @@ class Analyzer:
|
|
|
4806
4959
|
of all the remaining tensors. If any of the tensors in `new_data` is
|
|
4807
4960
|
provided with a different number of time periods than in `InputData`,
|
|
4808
4961
|
then all tensors must be provided with the same number of time periods.
|
|
4809
|
-
selected_times:
|
|
4810
|
-
|
|
4962
|
+
selected_times: Optional list containing either a subset of dates to
|
|
4963
|
+
include or booleans with length equal to the number of time periods in
|
|
4964
|
+
KPI data. By default, all time periods are included.
|
|
4811
4965
|
include_media: Whether to include spends for paid media channels that do
|
|
4812
4966
|
not have R&F data.
|
|
4813
4967
|
include_rf: Whether to include spends for paid media channels with R&F
|
|
@@ -4884,8 +5038,8 @@ class Analyzer:
|
|
|
4884
5038
|
def _impute_and_aggregate_spend(
|
|
4885
5039
|
self,
|
|
4886
5040
|
selected_times: Sequence[str] | Sequence[bool] | None,
|
|
4887
|
-
media_execution_values:
|
|
4888
|
-
channel_spend:
|
|
5041
|
+
media_execution_values: backend.Tensor,
|
|
5042
|
+
channel_spend: backend.Tensor,
|
|
4889
5043
|
channel_names: Sequence[str],
|
|
4890
5044
|
) -> xr.DataArray:
|
|
4891
5045
|
"""Imputes and aggregates the spend over the selected time period.
|
|
@@ -4898,7 +5052,9 @@ class Analyzer:
|
|
|
4898
5052
|
argument, its values only affect the output when imputation is required.
|
|
4899
5053
|
|
|
4900
5054
|
Args:
|
|
4901
|
-
selected_times:
|
|
5055
|
+
selected_times: Optional list containing either a subset of dates to
|
|
5056
|
+
include or booleans with length equal to the number of time periods in
|
|
5057
|
+
KPI data. By default, all time periods are included.
|
|
4902
5058
|
media_execution_values: The media execution values over all time points.
|
|
4903
5059
|
channel_spend: The spend over all time points. Its shape can be `(n_geos,
|
|
4904
5060
|
n_times, n_media_channels)` or `(n_media_channels,)` if the data is
|
|
@@ -4918,11 +5074,13 @@ class Analyzer:
|
|
|
4918
5074
|
}
|
|
4919
5075
|
|
|
4920
5076
|
if channel_spend.ndim == 3:
|
|
4921
|
-
aggregated_spend =
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
|
|
4925
|
-
|
|
5077
|
+
aggregated_spend = np.asarray(
|
|
5078
|
+
self.filter_and_aggregate_geos_and_times(
|
|
5079
|
+
channel_spend,
|
|
5080
|
+
has_media_dim=True,
|
|
5081
|
+
**dim_kwargs,
|
|
5082
|
+
)
|
|
5083
|
+
)
|
|
4926
5084
|
# channel_spend.ndim can only be 3 or 1.
|
|
4927
5085
|
else:
|
|
4928
5086
|
# media spend can have more time points than the model time points
|
|
@@ -4938,14 +5096,84 @@ class Analyzer:
|
|
|
4938
5096
|
media_exe_values,
|
|
4939
5097
|
**dim_kwargs,
|
|
4940
5098
|
)
|
|
4941
|
-
imputed_cpmu =
|
|
5099
|
+
imputed_cpmu = backend.divide_no_nan(
|
|
4942
5100
|
channel_spend,
|
|
4943
5101
|
np.sum(media_exe_values, (0, 1)),
|
|
4944
5102
|
)
|
|
4945
|
-
aggregated_spend = (target_media_exe_values * imputed_cpmu)
|
|
5103
|
+
aggregated_spend = np.asarray(target_media_exe_values * imputed_cpmu)
|
|
4946
5104
|
|
|
4947
5105
|
return xr.DataArray(
|
|
4948
5106
|
data=aggregated_spend,
|
|
4949
5107
|
dims=[constants.CHANNEL],
|
|
4950
5108
|
coords={constants.CHANNEL: channel_names},
|
|
4951
5109
|
)
|
|
5110
|
+
|
|
5111
|
+
def negative_baseline_probability(
|
|
5112
|
+
self,
|
|
5113
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
5114
|
+
use_posterior: bool = True,
|
|
5115
|
+
selected_geos: Sequence[str] | None = None,
|
|
5116
|
+
selected_times: Sequence[str] | None = None,
|
|
5117
|
+
use_kpi: bool = False,
|
|
5118
|
+
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
5119
|
+
) -> np.floating:
|
|
5120
|
+
"""Calculates either prior or posterior negative baseline probability.
|
|
5121
|
+
|
|
5122
|
+
This calculates either the prior or posterior probability that the baseline,
|
|
5123
|
+
aggregated over the supplied time window, is negative.
|
|
5124
|
+
|
|
5125
|
+
The baseline is calculated by computing `expected_outcome` with the
|
|
5126
|
+
following assumptions:
|
|
5127
|
+
1) `media` is set to all zeros,
|
|
5128
|
+
2) `reach` is set to all zeros,
|
|
5129
|
+
3) `organic_media` is set to all zeros,
|
|
5130
|
+
4) `organic_reach` is set to all zeros,
|
|
5131
|
+
5) `non_media_treatments` is set to the counterfactual values according
|
|
5132
|
+
to the `non_media_baseline_values` argument,
|
|
5133
|
+
6) `controls` are set to historical values.
|
|
5134
|
+
|
|
5135
|
+
Args:
|
|
5136
|
+
non_media_baseline_values: Optional list of shape
|
|
5137
|
+
`(n_non_media_channels,)`. Each element is a float denoting a fixed
|
|
5138
|
+
value that will be used as the baseline for the given channel. It is
|
|
5139
|
+
expected that they are scaled by population for the channels where
|
|
5140
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
5141
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
5142
|
+
minimum value for each non_media treatment channel.
|
|
5143
|
+
use_posterior: Boolean. If `True`, then the expected outcome posterior
|
|
5144
|
+
distribution is calculated. Otherwise, the prior distribution is
|
|
5145
|
+
calculated.
|
|
5146
|
+
selected_geos: Optional list of containing a subset of geos to include. By
|
|
5147
|
+
default, all geos are included.
|
|
5148
|
+
selected_times: Optional list of containing a subset of dates to include.
|
|
5149
|
+
The values accepted here must match time dimension coordinates from
|
|
5150
|
+
`InputData.time`. By default, all time periods are included.
|
|
5151
|
+
use_kpi: Boolean. If `use_kpi = True`, the expected KPI is calculated;
|
|
5152
|
+
otherwise the expected revenue `(kpi * revenue_per_kpi)` is calculated.
|
|
5153
|
+
It is required that `use_kpi = True` if `revenue_per_kpi` is not defined
|
|
5154
|
+
or if `inverse_transform_outcome = False`.
|
|
5155
|
+
batch_size: Integer representing the maximum draws per chain in each
|
|
5156
|
+
batch. The calculation is run in batches to avoid memory exhaustion. If
|
|
5157
|
+
a memory error occurs, try reducing `batch_size`. The calculation will
|
|
5158
|
+
generally be faster with larger `batch_size` values.
|
|
5159
|
+
|
|
5160
|
+
Returns:
|
|
5161
|
+
A float representing the prior or posterior negative baseline probability
|
|
5162
|
+
over the supplied time window.
|
|
5163
|
+
Raises:
|
|
5164
|
+
NotFittedModelError: if `sample_posterior()` (for `use_posterior=True`)
|
|
5165
|
+
or `sample_prior()` (for `use_posterior=False`) has not been called
|
|
5166
|
+
prior to calling this method.
|
|
5167
|
+
"""
|
|
5168
|
+
|
|
5169
|
+
baseline_draws = self._calculate_baseline_expected_outcome(
|
|
5170
|
+
non_media_baseline_values=non_media_baseline_values,
|
|
5171
|
+
use_posterior=use_posterior,
|
|
5172
|
+
selected_geos=selected_geos,
|
|
5173
|
+
selected_times=selected_times,
|
|
5174
|
+
aggregate_geos=True,
|
|
5175
|
+
aggregate_times=True,
|
|
5176
|
+
use_kpi=use_kpi,
|
|
5177
|
+
batch_size=batch_size,
|
|
5178
|
+
)
|
|
5179
|
+
return np.mean(baseline_draws < 0)
|