google-meridian 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.0.8.dist-info → google_meridian-1.0.9.dist-info}/METADATA +2 -2
- {google_meridian-1.0.8.dist-info → google_meridian-1.0.9.dist-info}/RECORD +13 -13
- meridian/__init__.py +1 -1
- meridian/analysis/analyzer.py +108 -18
- meridian/analysis/optimizer.py +196 -45
- meridian/analysis/summarizer.py +21 -3
- meridian/analysis/visualizer.py +69 -23
- meridian/constants.py +12 -11
- meridian/model/model.py +15 -0
- meridian/model/prior_distribution.py +22 -1
- {google_meridian-1.0.8.dist-info → google_meridian-1.0.9.dist-info}/WHEEL +0 -0
- {google_meridian-1.0.8.dist-info → google_meridian-1.0.9.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.0.8.dist-info → google_meridian-1.0.9.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: google-meridian
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.9
|
|
4
4
|
Summary: Google's open source mixed marketing model library, helps you understand your return on investment and direct your ad spend with confidence.
|
|
5
5
|
Author-email: The Meridian Authors <no-reply@google.com>
|
|
6
6
|
License:
|
|
@@ -393,7 +393,7 @@ To cite this repository:
|
|
|
393
393
|
author = {Google Meridian Marketing Mix Modeling Team},
|
|
394
394
|
title = {Meridian: Marketing Mix Modeling},
|
|
395
395
|
url = {https://github.com/google/meridian},
|
|
396
|
-
version = {1.0.
|
|
396
|
+
version = {1.0.9},
|
|
397
397
|
year = {2025},
|
|
398
398
|
}
|
|
399
399
|
```
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
google_meridian-1.0.
|
|
2
|
-
meridian/__init__.py,sha256=
|
|
3
|
-
meridian/constants.py,sha256=
|
|
1
|
+
google_meridian-1.0.9.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
2
|
+
meridian/__init__.py,sha256=b7KL3QTlpVq4JvFAzhv7fcEUz6iiBqeGwZrzLv_JLjI,714
|
|
3
|
+
meridian/constants.py,sha256=OHzrSdGcburXB1miI9oPZ_6mdf6wokKq2zhzwo9YA84,15434
|
|
4
4
|
meridian/analysis/__init__.py,sha256=-FooDZ5OzePpyTVkvRoWQx_xBaRR_hjVLny9H8-kkyQ,836
|
|
5
|
-
meridian/analysis/analyzer.py,sha256=
|
|
5
|
+
meridian/analysis/analyzer.py,sha256=nFODFwnrR2QC2FiEX11UgMGrVHkud_KzUsTpKUtQiSo,203965
|
|
6
6
|
meridian/analysis/formatter.py,sha256=F8OYxD2bH13zV10JY63j2ugCOj-DpTXhyJr43n5ukr8,7270
|
|
7
|
-
meridian/analysis/optimizer.py,sha256=
|
|
8
|
-
meridian/analysis/summarizer.py,sha256=
|
|
7
|
+
meridian/analysis/optimizer.py,sha256=SVZJjO0nZjWL62PoeuIBf5_iPdDMdgmA3fuY1R8pEsU,98126
|
|
8
|
+
meridian/analysis/summarizer.py,sha256=PPin1hKvcdGuzNOlXOsCtCO0JzawyBb26g4LEFptRh0,18883
|
|
9
9
|
meridian/analysis/summary_text.py,sha256=n6a-DTZxtS3WvdI_pDEK7lvO3MRUX3h83GzuVnG6sQ4,12438
|
|
10
10
|
meridian/analysis/test_utils.py,sha256=xai8oxXu51PDsiQ-ZYTnN_eSLsGu0BUOS8rDTcc6v-E,77719
|
|
11
|
-
meridian/analysis/visualizer.py,sha256=
|
|
11
|
+
meridian/analysis/visualizer.py,sha256=KgqdqbYkvo1vY0u-JGuIYEpwMR1xUvJToG1QcIaVuPo,94138
|
|
12
12
|
meridian/analysis/templates/card.html.jinja,sha256=pv4MVbQ25CcvtZY-LH7bFW0OSeHobkeEkAleB1sfQ14,1284
|
|
13
13
|
meridian/analysis/templates/chart.html.jinja,sha256=87i0xnXHRBoLLxBpKv2i960TLToWq4r1aVQZqaXIeMQ,1086
|
|
14
14
|
meridian/analysis/templates/chips.html.jinja,sha256=Az0tQwF_-b03JDLyOzpeH-8fb-6jgJgbNfnUUSm-q6E,645
|
|
@@ -28,14 +28,14 @@ meridian/model/__init__.py,sha256=bvx8vvXolktsCTDKViU9U1v85pgNWF3haDowTKy11d4,98
|
|
|
28
28
|
meridian/model/adstock_hill.py,sha256=b_YYhqci6ndgi602FFXmx2f12ceC4N0tp338nMMtm54,9283
|
|
29
29
|
meridian/model/knots.py,sha256=r7PPaJM96d5pkoOeV9crIOgkM0-rh24mWMvypMiV4aQ,8054
|
|
30
30
|
meridian/model/media.py,sha256=Gjr4jm0y_6pFy7aa_oKIuuZ8P7F56e3ZB-3o6msApeA,11876
|
|
31
|
-
meridian/model/model.py,sha256=
|
|
31
|
+
meridian/model/model.py,sha256=CgBzyR8KWE3lPecaCTg0FF16booUOpsE3ARNcm5KrFc,43875
|
|
32
32
|
meridian/model/model_test_data.py,sha256=dqS_vDQUg811UGmyr8ZgWp8VTIra-krA7A2erQlfPlU,12488
|
|
33
33
|
meridian/model/posterior_sampler.py,sha256=uUNMdxyoK0LT6hNKiAxEEl-1X0SyBMz-o_Sao5q5Ts8,23228
|
|
34
|
-
meridian/model/prior_distribution.py,sha256=
|
|
34
|
+
meridian/model/prior_distribution.py,sha256=h-L6hLOC-bM9ciYCvbZbDN7-3-30AwHBbo06KsSwDiY,39934
|
|
35
35
|
meridian/model/prior_sampler.py,sha256=zGSAQviFO3s2GcVbfG9EfXxo_SNFBFbTQC3e-QBFzio,23079
|
|
36
36
|
meridian/model/spec.py,sha256=xaHxfCLWLnWMAkMy2ouDoqGBHI_4tzzX8AaJOsKdu7Q,8878
|
|
37
37
|
meridian/model/transformers.py,sha256=te3OJixprWLtv7O00a9GZWE4waTS94NNLVo3tWIl1-k,7420
|
|
38
|
-
google_meridian-1.0.
|
|
39
|
-
google_meridian-1.0.
|
|
40
|
-
google_meridian-1.0.
|
|
41
|
-
google_meridian-1.0.
|
|
38
|
+
google_meridian-1.0.9.dist-info/METADATA,sha256=N6Y923SR6L6T5Py0xVpNwZm0lBl-mXWC2oCnYq903pM,22055
|
|
39
|
+
google_meridian-1.0.9.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
40
|
+
google_meridian-1.0.9.dist-info/top_level.txt,sha256=nwaCebZvvU34EopTKZsjK0OMTFjVnkf4FfnBN_TAc0g,9
|
|
41
|
+
google_meridian-1.0.9.dist-info/RECORD,,
|
meridian/__init__.py
CHANGED
meridian/analysis/analyzer.py
CHANGED
|
@@ -63,6 +63,8 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
63
63
|
controls: Optional tensor with dimensions `(n_geos, n_times, n_controls)`.
|
|
64
64
|
revenue_per_kpi: Optional tensor with dimensions `(n_geos, T)` for any time
|
|
65
65
|
dimension `T`.
|
|
66
|
+
time: Optional tensor of time coordinates in the "YYYY-mm-dd" string format
|
|
67
|
+
for time dimension `T`.
|
|
66
68
|
"""
|
|
67
69
|
|
|
68
70
|
media: Optional[tf.Tensor]
|
|
@@ -76,6 +78,7 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
76
78
|
non_media_treatments: Optional[tf.Tensor]
|
|
77
79
|
controls: Optional[tf.Tensor]
|
|
78
80
|
revenue_per_kpi: Optional[tf.Tensor]
|
|
81
|
+
time: Optional[tf.Tensor]
|
|
79
82
|
|
|
80
83
|
def __init__(
|
|
81
84
|
self,
|
|
@@ -90,6 +93,7 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
90
93
|
non_media_treatments: Optional[tf.Tensor] = None,
|
|
91
94
|
controls: Optional[tf.Tensor] = None,
|
|
92
95
|
revenue_per_kpi: Optional[tf.Tensor] = None,
|
|
96
|
+
time: Optional[Sequence[str] | tf.Tensor] = None,
|
|
93
97
|
):
|
|
94
98
|
self.media = tf.cast(media, tf.float32) if media is not None else None
|
|
95
99
|
self.media_spend = (
|
|
@@ -130,6 +134,7 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
130
134
|
if revenue_per_kpi is not None
|
|
131
135
|
else None
|
|
132
136
|
)
|
|
137
|
+
self.time = tf.cast(time, tf.string) if time is not None else None
|
|
133
138
|
|
|
134
139
|
def __validate__(self):
|
|
135
140
|
self._validate_n_dims()
|
|
@@ -241,6 +246,8 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
241
246
|
f"New `{field.name}` must have 1 or 3 dimensions. Found"
|
|
242
247
|
f" {tensor.ndim} dimensions."
|
|
243
248
|
)
|
|
249
|
+
elif field.name == constants.TIME:
|
|
250
|
+
_check_n_dims(tensor, field.name, 1)
|
|
244
251
|
else:
|
|
245
252
|
_check_n_dims(tensor, field.name, 3)
|
|
246
253
|
|
|
@@ -283,7 +290,7 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
283
290
|
for var_name in required_fields:
|
|
284
291
|
new_tensor = getattr(self, var_name)
|
|
285
292
|
if new_tensor is not None and new_tensor.shape[0] != meridian.n_geos:
|
|
286
|
-
# Skip spend data with only 1 dimension
|
|
293
|
+
# Skip spend and time data with only 1 dimension.
|
|
287
294
|
if new_tensor.ndim == 1:
|
|
288
295
|
continue
|
|
289
296
|
raise ValueError(
|
|
@@ -296,7 +303,7 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
296
303
|
):
|
|
297
304
|
"""Validates the channel dimension of the specified data variables."""
|
|
298
305
|
for var_name in required_fields:
|
|
299
|
-
if var_name
|
|
306
|
+
if var_name in [constants.REVENUE_PER_KPI, constants.TIME]:
|
|
300
307
|
continue
|
|
301
308
|
new_tensor = getattr(self, var_name)
|
|
302
309
|
old_tensor = getattr(meridian.input_data, var_name)
|
|
@@ -317,12 +324,24 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
317
324
|
old_tensor = getattr(meridian.input_data, var_name)
|
|
318
325
|
|
|
319
326
|
# Skip spend data with only 1 dimension of (n_channels).
|
|
320
|
-
if
|
|
327
|
+
if (
|
|
328
|
+
var_name in [constants.MEDIA_SPEND, constants.RF_SPEND]
|
|
329
|
+
and new_tensor is not None
|
|
330
|
+
and new_tensor.ndim == 1
|
|
331
|
+
):
|
|
321
332
|
continue
|
|
322
333
|
|
|
323
334
|
if new_tensor is not None:
|
|
324
335
|
assert old_tensor is not None
|
|
325
|
-
if
|
|
336
|
+
if (
|
|
337
|
+
var_name == constants.TIME
|
|
338
|
+
and new_tensor.shape[0] != old_tensor.shape[0]
|
|
339
|
+
):
|
|
340
|
+
raise ValueError(
|
|
341
|
+
f"New `{var_name}` is expected to have {old_tensor.shape[0]}"
|
|
342
|
+
f" time periods. Found {new_tensor.shape[0]} time periods."
|
|
343
|
+
)
|
|
344
|
+
elif new_tensor.ndim > 1 and new_tensor.shape[1] != old_tensor.shape[1]:
|
|
326
345
|
raise ValueError(
|
|
327
346
|
f"New `{var_name}` is expected to have {old_tensor.shape[1]}"
|
|
328
347
|
f" time periods. Found {new_tensor.shape[1]} time periods."
|
|
@@ -345,12 +364,24 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
345
364
|
if old_tensor is None:
|
|
346
365
|
continue
|
|
347
366
|
# Skip spend data with only 1 dimension of (n_channels).
|
|
348
|
-
if
|
|
367
|
+
if (
|
|
368
|
+
var_name in [constants.MEDIA_SPEND, constants.RF_SPEND]
|
|
369
|
+
and new_tensor is not None
|
|
370
|
+
and new_tensor.ndim == 1
|
|
371
|
+
):
|
|
349
372
|
continue
|
|
350
373
|
|
|
351
374
|
if new_tensor is None:
|
|
352
375
|
missing_params.append(var_name)
|
|
353
|
-
elif new_tensor.shape[
|
|
376
|
+
elif var_name == constants.TIME and new_tensor.shape[0] != new_n_times:
|
|
377
|
+
raise ValueError(
|
|
378
|
+
"If the time dimension of any variable in `new_data` is "
|
|
379
|
+
"modified, then all variables must be provided with the same "
|
|
380
|
+
f"number of time periods. `{var_name}` has {new_tensor.shape[1]} "
|
|
381
|
+
"time periods, which does not match the modified number of time "
|
|
382
|
+
f"periods, {new_n_times}.",
|
|
383
|
+
)
|
|
384
|
+
elif new_tensor.ndim > 1 and new_tensor.shape[1] != new_n_times:
|
|
354
385
|
raise ValueError(
|
|
355
386
|
"If the time dimension of any variable in `new_data` is "
|
|
356
387
|
"modified, then all variables must be provided with the same "
|
|
@@ -390,6 +421,10 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
390
421
|
old_tensor = meridian.controls
|
|
391
422
|
elif var_name == constants.REVENUE_PER_KPI:
|
|
392
423
|
old_tensor = meridian.revenue_per_kpi
|
|
424
|
+
elif var_name == constants.TIME:
|
|
425
|
+
old_tensor = tf.convert_to_tensor(
|
|
426
|
+
meridian.input_data.time.values.tolist(), dtype=tf.string
|
|
427
|
+
)
|
|
393
428
|
else:
|
|
394
429
|
continue
|
|
395
430
|
|
|
@@ -4663,11 +4698,11 @@ class Analyzer:
|
|
|
4663
4698
|
|
|
4664
4699
|
def get_historical_spend(
|
|
4665
4700
|
self,
|
|
4666
|
-
selected_times: Sequence[str] | None,
|
|
4701
|
+
selected_times: Sequence[str] | None = None,
|
|
4667
4702
|
include_media: bool = True,
|
|
4668
4703
|
include_rf: bool = True,
|
|
4669
4704
|
) -> xr.DataArray:
|
|
4670
|
-
"""Gets the aggregated historical spend based on the time
|
|
4705
|
+
"""Deprecated. Gets the aggregated historical spend based on the time.
|
|
4671
4706
|
|
|
4672
4707
|
Args:
|
|
4673
4708
|
selected_times: The time period to get the historical spends. If None, the
|
|
@@ -4681,6 +4716,51 @@ class Analyzer:
|
|
|
4681
4716
|
An `xr.DataArray` with the coordinate `channel` and contains the data
|
|
4682
4717
|
variable `spend`.
|
|
4683
4718
|
|
|
4719
|
+
Raises:
|
|
4720
|
+
ValueError: A ValueError is raised when `include_media` and `include_rf`
|
|
4721
|
+
are both False.
|
|
4722
|
+
"""
|
|
4723
|
+
warnings.warn(
|
|
4724
|
+
"`get_historical_spend` is deprecated. Please use "
|
|
4725
|
+
"`get_aggregated_spend` with `new_data=None` instead.",
|
|
4726
|
+
DeprecationWarning,
|
|
4727
|
+
stacklevel=2,
|
|
4728
|
+
)
|
|
4729
|
+
return self.get_aggregated_spend(
|
|
4730
|
+
selected_times=selected_times,
|
|
4731
|
+
include_media=include_media,
|
|
4732
|
+
include_rf=include_rf,
|
|
4733
|
+
)
|
|
4734
|
+
|
|
4735
|
+
def get_aggregated_spend(
|
|
4736
|
+
self,
|
|
4737
|
+
new_data: DataTensors | None = None,
|
|
4738
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
4739
|
+
include_media: bool = True,
|
|
4740
|
+
include_rf: bool = True,
|
|
4741
|
+
) -> xr.DataArray:
|
|
4742
|
+
"""Gets the aggregated spend based on the selected time.
|
|
4743
|
+
|
|
4744
|
+
Args:
|
|
4745
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
4746
|
+
`media_spend`, `reach`, `frequency`, `rf_spend` tensors. If `None`, the
|
|
4747
|
+
existing tensors from the Meridian object are used. If `new_data`
|
|
4748
|
+
argument is used, then the aggregated spend is computed using the values
|
|
4749
|
+
of the tensors passed in the `new_data` argument and the original values
|
|
4750
|
+
of all the remaining tensors. If any of the tensors in `new_data` is
|
|
4751
|
+
provided with a different number of time periods than in `InputData`,
|
|
4752
|
+
then all tensors must be provided with the same number of time periods.
|
|
4753
|
+
selected_times: The time period to get the aggregated spends. If None, the
|
|
4754
|
+
spend will be aggregated over all time periods.
|
|
4755
|
+
include_media: Whether to include spends for paid media channels that do
|
|
4756
|
+
not have R&F data.
|
|
4757
|
+
include_rf: Whether to include spends for paid media channels with R&F
|
|
4758
|
+
data.
|
|
4759
|
+
|
|
4760
|
+
Returns:
|
|
4761
|
+
An `xr.DataArray` with the coordinate `channel` and contains the data
|
|
4762
|
+
variable `spend`.
|
|
4763
|
+
|
|
4684
4764
|
Raises:
|
|
4685
4765
|
ValueError: A ValueError is raised when `include_media` and `include_rf`
|
|
4686
4766
|
are both False.
|
|
@@ -4689,6 +4769,11 @@ class Analyzer:
|
|
|
4689
4769
|
raise ValueError(
|
|
4690
4770
|
"At least one of include_media or include_rf must be True."
|
|
4691
4771
|
)
|
|
4772
|
+
new_data = new_data or DataTensors()
|
|
4773
|
+
required_tensors_names = constants.PAID_CHANNELS + constants.SPEND_DATA
|
|
4774
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
4775
|
+
required_tensors_names, self._meridian
|
|
4776
|
+
)
|
|
4692
4777
|
|
|
4693
4778
|
empty_da = xr.DataArray(
|
|
4694
4779
|
dims=[constants.CHANNEL], coords={constants.CHANNEL: []}
|
|
@@ -4709,8 +4794,8 @@ class Analyzer:
|
|
|
4709
4794
|
else:
|
|
4710
4795
|
aggregated_media_spend = self._impute_and_aggregate_spend(
|
|
4711
4796
|
selected_times,
|
|
4712
|
-
|
|
4713
|
-
|
|
4797
|
+
filled_data.media,
|
|
4798
|
+
filled_data.media_spend,
|
|
4714
4799
|
list(self._meridian.input_data.media_channel.values),
|
|
4715
4800
|
)
|
|
4716
4801
|
|
|
@@ -4723,18 +4808,16 @@ class Analyzer:
|
|
|
4723
4808
|
or self._meridian.rf_tensors.rf_spend is None
|
|
4724
4809
|
):
|
|
4725
4810
|
warnings.warn(
|
|
4726
|
-
"Requested spends for paid media channels with R&F data, but
|
|
4811
|
+
"Requested spends for paid media channels with R&F data, but the"
|
|
4727
4812
|
" channels are not available.",
|
|
4728
4813
|
)
|
|
4729
4814
|
aggregated_rf_spend = empty_da
|
|
4730
4815
|
else:
|
|
4731
|
-
rf_execution_values =
|
|
4732
|
-
self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
|
|
4733
|
-
)
|
|
4816
|
+
rf_execution_values = filled_data.reach * filled_data.frequency
|
|
4734
4817
|
aggregated_rf_spend = self._impute_and_aggregate_spend(
|
|
4735
4818
|
selected_times,
|
|
4736
4819
|
rf_execution_values,
|
|
4737
|
-
|
|
4820
|
+
filled_data.rf_spend,
|
|
4738
4821
|
list(self._meridian.input_data.rf_channel.values),
|
|
4739
4822
|
)
|
|
4740
4823
|
|
|
@@ -4744,7 +4827,7 @@ class Analyzer:
|
|
|
4744
4827
|
|
|
4745
4828
|
def _impute_and_aggregate_spend(
|
|
4746
4829
|
self,
|
|
4747
|
-
selected_times: Sequence[str] | None,
|
|
4830
|
+
selected_times: Sequence[str] | Sequence[bool] | None,
|
|
4748
4831
|
media_execution_values: tf.Tensor,
|
|
4749
4832
|
channel_spend: tf.Tensor,
|
|
4750
4833
|
channel_names: Sequence[str],
|
|
@@ -4759,7 +4842,7 @@ class Analyzer:
|
|
|
4759
4842
|
argument, its values only affect the output when imputation is required.
|
|
4760
4843
|
|
|
4761
4844
|
Args:
|
|
4762
|
-
selected_times: The time period to get the
|
|
4845
|
+
selected_times: The time period to get the aggregated spend.
|
|
4763
4846
|
media_execution_values: The media execution values over all time points.
|
|
4764
4847
|
channel_spend: The spend over all time points. Its shape can be `(n_geos,
|
|
4765
4848
|
n_times, n_media_channels)` or `(n_media_channels,)` if the data is
|
|
@@ -4775,17 +4858,24 @@ class Analyzer:
|
|
|
4775
4858
|
"selected_times": selected_times,
|
|
4776
4859
|
"aggregate_geos": True,
|
|
4777
4860
|
"aggregate_times": True,
|
|
4861
|
+
"flexible_time_dim": True,
|
|
4778
4862
|
}
|
|
4779
4863
|
|
|
4780
4864
|
if channel_spend.ndim == 3:
|
|
4781
4865
|
aggregated_spend = self.filter_and_aggregate_geos_and_times(
|
|
4782
4866
|
channel_spend,
|
|
4867
|
+
has_media_dim=True,
|
|
4783
4868
|
**dim_kwargs,
|
|
4784
4869
|
).numpy()
|
|
4785
4870
|
# channel_spend.ndim can only be 3 or 1.
|
|
4786
4871
|
else:
|
|
4787
4872
|
# media spend can have more time points than the model time points
|
|
4788
|
-
|
|
4873
|
+
if media_execution_values.shape[1] == self._meridian.n_media_times:
|
|
4874
|
+
media_exe_values = media_execution_values[
|
|
4875
|
+
:, -self._meridian.n_times :, :
|
|
4876
|
+
]
|
|
4877
|
+
else:
|
|
4878
|
+
media_exe_values = media_execution_values
|
|
4789
4879
|
# Calculates CPM over all times and geos if the spend does not have time
|
|
4790
4880
|
# and geo dimensions.
|
|
4791
4881
|
target_media_exe_values = self.filter_and_aggregate_geos_and_times(
|
meridian/analysis/optimizer.py
CHANGED
|
@@ -28,6 +28,7 @@ from meridian import constants as c
|
|
|
28
28
|
from meridian.analysis import analyzer
|
|
29
29
|
from meridian.analysis import formatter
|
|
30
30
|
from meridian.analysis import summary_text
|
|
31
|
+
from meridian.data import time_coordinates as tc
|
|
31
32
|
from meridian.model import model
|
|
32
33
|
import numpy as np
|
|
33
34
|
import pandas as pd
|
|
@@ -119,7 +120,7 @@ class OptimizationGrid:
|
|
|
119
120
|
gtol: float
|
|
120
121
|
round_factor: int
|
|
121
122
|
optimal_frequency: np.ndarray | None
|
|
122
|
-
selected_times:
|
|
123
|
+
selected_times: Sequence[str] | Sequence[bool] | None
|
|
123
124
|
|
|
124
125
|
@property
|
|
125
126
|
def grid_dataset(self) -> xr.Dataset:
|
|
@@ -621,7 +622,7 @@ class OptimizationResults:
|
|
|
621
622
|
# by adjusting the domain of the y-axis so that the incremental outcome does
|
|
622
623
|
# not start at 0. Calculate the total decrease in incremental outcome to pad
|
|
623
624
|
# the y-axis from the non-optimized total incremental outcome value.
|
|
624
|
-
sum_decr =
|
|
625
|
+
sum_decr = df[df.incremental_outcome < 0].incremental_outcome.sum()
|
|
625
626
|
y_padding = float(f'1e{int(math.log10(-sum_decr))}') if sum_decr < 0 else 2
|
|
626
627
|
domain_scale = [
|
|
627
628
|
self.nonoptimized_data.total_incremental_outcome + sum_decr - y_padding,
|
|
@@ -1016,8 +1017,16 @@ class OptimizationResults:
|
|
|
1016
1017
|
|
|
1017
1018
|
def _gen_optimization_summary(self) -> str:
|
|
1018
1019
|
"""Generates HTML optimization summary output (as sanitized content str)."""
|
|
1019
|
-
|
|
1020
|
-
self.template_env.globals[c.
|
|
1020
|
+
start_date = tc.normalize_date(self.optimized_data.start_date)
|
|
1021
|
+
self.template_env.globals[c.START_DATE] = start_date.strftime(
|
|
1022
|
+
f'%b {start_date.day}, %Y'
|
|
1023
|
+
)
|
|
1024
|
+
interval_days = self.meridian.input_data.time_coordinates.interval_days
|
|
1025
|
+
end_date = tc.normalize_date(self.optimized_data.end_date)
|
|
1026
|
+
end_date_adjusted = end_date + pd.Timedelta(days=interval_days)
|
|
1027
|
+
self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
|
|
1028
|
+
f'%b {end_date_adjusted.day}, %Y'
|
|
1029
|
+
)
|
|
1021
1030
|
|
|
1022
1031
|
html_template = self.template_env.get_template('summary.html.jinja')
|
|
1023
1032
|
return html_template.render(
|
|
@@ -1265,6 +1274,7 @@ class BudgetOptimizer:
|
|
|
1265
1274
|
|
|
1266
1275
|
def optimize(
|
|
1267
1276
|
self,
|
|
1277
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1268
1278
|
use_posterior: bool = True,
|
|
1269
1279
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1270
1280
|
fixed_budget: bool = True,
|
|
@@ -1282,18 +1292,50 @@ class BudgetOptimizer:
|
|
|
1282
1292
|
) -> OptimizationResults:
|
|
1283
1293
|
"""Finds the optimal budget allocation that maximizes outcome.
|
|
1284
1294
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1295
|
+
Optimization depends on the following:
|
|
1296
|
+
1. Flighting pattern (the relative allocation of a channels' media units
|
|
1297
|
+
across geos and time periods, which is held fixed for each channel)
|
|
1298
|
+
2. Cost per media unit (This is assumed to be constant for each channel, and
|
|
1299
|
+
can optionally vary by geo and/or time period)
|
|
1300
|
+
3. `pct_of_spend` (center of the spend box constraint for each channel)
|
|
1301
|
+
4. `budget` (total budget used for fixed budget scenarios)
|
|
1302
|
+
|
|
1303
|
+
By default, these values are assigned based on the historical data. The
|
|
1304
|
+
`pct_of_spend` and `budget` are optimization arguments that can be
|
|
1305
|
+
overridden directly. Passing `new_data.media` (or `new_data.reach` or
|
|
1306
|
+
`new_data.frequency`) will override both the flighting pattern and cost per
|
|
1307
|
+
media unit. Passing `new_data.spend` (or `new_data.rf_spend) will only
|
|
1308
|
+
override the cost per media unit.
|
|
1309
|
+
|
|
1310
|
+
If `new_data` is passed with a different number of time periods than the
|
|
1311
|
+
historical data, then all of the optimization parameters will be inferred
|
|
1312
|
+
from it. Default values for `pct_of_spend` and `budget` (if
|
|
1313
|
+
`fixed_budget=True`) will be inferred from the `new_data`, but can be
|
|
1314
|
+
overridden using the `pct_of_spend` and `budget` arguments.
|
|
1315
|
+
|
|
1316
|
+
If `selected_times` is specified, then the default values are inferred based
|
|
1317
|
+
on the subset of time periods specified.
|
|
1288
1318
|
|
|
1289
1319
|
Args:
|
|
1320
|
+
new_data: An optional `DataTensors` container with optional tensors:
|
|
1321
|
+
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
1322
|
+
`revenue_per_kpi`, and `time`. If `None`, the original tensors from the
|
|
1323
|
+
Meridian object are used. If `new_data` is provided, the optimization is
|
|
1324
|
+
run on the versions of the tensors in `new_data` and the original
|
|
1325
|
+
versions of all the remaining tensors. If any of the tensors in
|
|
1326
|
+
`new_data` is provided with a different number of time periods than in
|
|
1327
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
1328
|
+
time periods and the `time` tensor must be provided.
|
|
1290
1329
|
use_posterior: Boolean. If `True`, then the budget is optimized based on
|
|
1291
1330
|
the posterior distribution of the model. Otherwise, the prior
|
|
1292
1331
|
distribution is used.
|
|
1293
1332
|
selected_times: Tuple containing the start and end time dimension
|
|
1294
1333
|
coordinates for the duration to run the optimization on. Selected time
|
|
1295
1334
|
values should align with the Meridian time dimension coordinates in the
|
|
1296
|
-
underlying model
|
|
1335
|
+
underlying model if optimizing the original data. If `new_data` is
|
|
1336
|
+
provided with a different number of time periods than in `InputData`,
|
|
1337
|
+
then the start and end time coordinates must match the time dimensions
|
|
1338
|
+
in `new_data.time`. By default, all times periods are used. Either start
|
|
1297
1339
|
or end time component can be `None` to represent the first or the last
|
|
1298
1340
|
time coordinate, respectively.
|
|
1299
1341
|
fixed_budget: Boolean indicating whether it's a fixed budget optimization
|
|
@@ -1310,7 +1352,7 @@ class BudgetOptimizer:
|
|
|
1310
1352
|
performance metrics (for example, ROI) and construct the feasible range
|
|
1311
1353
|
of media-level spend with the spend constraints. Consider using
|
|
1312
1354
|
`InputData.get_paid_channels_argument_builder()` to construct this
|
|
1313
|
-
argument.
|
|
1355
|
+
argument. If using `new_data`, this argument is ignored.
|
|
1314
1356
|
spend_constraint_lower: Numeric list of size `n_paid_channels` or float
|
|
1315
1357
|
(same constraint for all channels) indicating the lower bound of
|
|
1316
1358
|
media-level spend. If given as a channel-indexed array, the order must
|
|
@@ -1368,6 +1410,7 @@ class BudgetOptimizer:
|
|
|
1368
1410
|
if spend_constraint_upper is None:
|
|
1369
1411
|
spend_constraint_upper = spend_constraint_default
|
|
1370
1412
|
optimization_grid = self.create_optimization_grid(
|
|
1413
|
+
new_data=new_data,
|
|
1371
1414
|
selected_times=selected_times,
|
|
1372
1415
|
budget=budget,
|
|
1373
1416
|
pct_of_spend=pct_of_spend,
|
|
@@ -1403,6 +1446,7 @@ class BudgetOptimizer:
|
|
|
1403
1446
|
spend.non_optimized, optimization_grid.round_factor
|
|
1404
1447
|
).astype(int)
|
|
1405
1448
|
nonoptimized_data = self._create_budget_dataset(
|
|
1449
|
+
new_data=new_data,
|
|
1406
1450
|
use_posterior=use_posterior,
|
|
1407
1451
|
use_kpi=use_kpi,
|
|
1408
1452
|
hist_spend=optimization_grid.historical_spend,
|
|
@@ -1413,6 +1457,7 @@ class BudgetOptimizer:
|
|
|
1413
1457
|
use_historical_budget=use_historical_budget,
|
|
1414
1458
|
)
|
|
1415
1459
|
nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
|
|
1460
|
+
new_data=new_data,
|
|
1416
1461
|
use_posterior=use_posterior,
|
|
1417
1462
|
use_kpi=use_kpi,
|
|
1418
1463
|
hist_spend=optimization_grid.historical_spend,
|
|
@@ -1431,6 +1476,7 @@ class BudgetOptimizer:
|
|
|
1431
1476
|
elif target_mroi:
|
|
1432
1477
|
constraints[c.TARGET_MROI] = target_mroi
|
|
1433
1478
|
optimized_data = self._create_budget_dataset(
|
|
1479
|
+
new_data=new_data,
|
|
1434
1480
|
use_posterior=use_posterior,
|
|
1435
1481
|
use_kpi=use_kpi,
|
|
1436
1482
|
hist_spend=optimization_grid.historical_spend,
|
|
@@ -1476,6 +1522,7 @@ class BudgetOptimizer:
|
|
|
1476
1522
|
|
|
1477
1523
|
def create_optimization_grid(
|
|
1478
1524
|
self,
|
|
1525
|
+
new_data: xr.Dataset | None = None,
|
|
1479
1526
|
use_posterior: bool = True,
|
|
1480
1527
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1481
1528
|
budget: float | None = None,
|
|
@@ -1490,13 +1537,25 @@ class BudgetOptimizer:
|
|
|
1490
1537
|
"""Creates a OptimizationGrid for optimization.
|
|
1491
1538
|
|
|
1492
1539
|
Args:
|
|
1540
|
+
new_data: An optional `DataTensors` container with optional tensors:
|
|
1541
|
+
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
1542
|
+
`revenue_per_kpi`, and `time`. If `None`, the original tensors from the
|
|
1543
|
+
Meridian object are used. If `new_data` is provided, the grid is created
|
|
1544
|
+
using the versions of the tensors in `new_data` and the original
|
|
1545
|
+
versions of all the remaining tensors. If any of the tensors in
|
|
1546
|
+
`new_data` is provided with a different number of time periods than in
|
|
1547
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
1548
|
+
time periods and the `time` tensor must be provided.
|
|
1493
1549
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1494
1550
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1495
1551
|
distribution is used.
|
|
1496
1552
|
selected_times: Tuple containing the start and end time dimension
|
|
1497
1553
|
coordinates for the duration to run the optimization on. Selected time
|
|
1498
1554
|
values should align with the Meridian time dimension coordinates in the
|
|
1499
|
-
underlying model
|
|
1555
|
+
underlying model if optimizing the original data. If `new_data` is
|
|
1556
|
+
provided with a different number of time periods than in `InputData`,
|
|
1557
|
+
then the start and end time coordinates must match the time dimensions
|
|
1558
|
+
in `new_data.time`. By default, all times periods are used. Either start
|
|
1500
1559
|
or end time component can be `None` to represent the first or the last
|
|
1501
1560
|
time coordinate, respectively.
|
|
1502
1561
|
budget: Number indicating the total budget for the fixed budget scenario.
|
|
@@ -1510,7 +1569,7 @@ class BudgetOptimizer:
|
|
|
1510
1569
|
performance metrics (for example, ROI) and construct the feasible range
|
|
1511
1570
|
of media-level spend with the spend constraints. Consider using
|
|
1512
1571
|
`InputData.get_paid_channels_argument_builder()` to construct this
|
|
1513
|
-
argument.
|
|
1572
|
+
argument. If using `new_data`, this argument is ignored.
|
|
1514
1573
|
spend_constraint_lower: Numeric list of size `n_paid_channels` or float
|
|
1515
1574
|
(same constraint for all channels) indicating the lower bound of
|
|
1516
1575
|
media-level spend. If given as a channel-indexed array, the order must
|
|
@@ -1545,16 +1604,20 @@ class BudgetOptimizer:
|
|
|
1545
1604
|
An OptimizationGrid object containing the grid data for optimization.
|
|
1546
1605
|
"""
|
|
1547
1606
|
self._validate_model_fit(use_posterior)
|
|
1548
|
-
if
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1607
|
+
if new_data is None:
|
|
1608
|
+
new_data = analyzer.DataTensors()
|
|
1609
|
+
|
|
1610
|
+
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1611
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1612
|
+
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1613
|
+
)
|
|
1614
|
+
|
|
1615
|
+
selected_time_dims = self._validate_selected_times(
|
|
1616
|
+
selected_times, filled_data
|
|
1617
|
+
)
|
|
1618
|
+
hist_spend = self._analyzer.get_aggregated_spend(
|
|
1619
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
|
|
1620
|
+
selected_times=selected_time_dims,
|
|
1558
1621
|
include_media=self._meridian.n_media_channels > 0,
|
|
1559
1622
|
include_rf=self._meridian.n_rf_channels > 0,
|
|
1560
1623
|
).data
|
|
@@ -1579,6 +1642,7 @@ class BudgetOptimizer:
|
|
|
1579
1642
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
1580
1643
|
optimal_frequency = tf.convert_to_tensor(
|
|
1581
1644
|
self._analyzer.optimal_freq(
|
|
1645
|
+
new_data=filled_data.filter_fields(c.RF_DATA),
|
|
1582
1646
|
use_posterior=use_posterior,
|
|
1583
1647
|
selected_times=selected_time_dims,
|
|
1584
1648
|
use_kpi=use_kpi,
|
|
@@ -1595,6 +1659,7 @@ class BudgetOptimizer:
|
|
|
1595
1659
|
spend_bound_upper=optimization_upper_bound,
|
|
1596
1660
|
step_size=step_size,
|
|
1597
1661
|
selected_times=selected_time_dims,
|
|
1662
|
+
new_data=filled_data.filter_fields(c.PAID_DATA),
|
|
1598
1663
|
use_posterior=use_posterior,
|
|
1599
1664
|
use_kpi=use_kpi,
|
|
1600
1665
|
optimal_frequency=optimal_frequency,
|
|
@@ -1658,10 +1723,40 @@ class BudgetOptimizer:
|
|
|
1658
1723
|
attrs={c.SPEND_STEP_SIZE: spend_step_size},
|
|
1659
1724
|
)
|
|
1660
1725
|
|
|
1726
|
+
def _validate_selected_times(
|
|
1727
|
+
self,
|
|
1728
|
+
selected_times: tuple[str | None, str | None] | None,
|
|
1729
|
+
new_data: analyzer.DataTensors | None,
|
|
1730
|
+
) -> Sequence[str] | Sequence[bool] | None:
|
|
1731
|
+
"""Validates and returns the selected times."""
|
|
1732
|
+
if selected_times is None:
|
|
1733
|
+
return None
|
|
1734
|
+
start_date, end_date = selected_times
|
|
1735
|
+
if start_date is None and end_date is None:
|
|
1736
|
+
return None
|
|
1737
|
+
|
|
1738
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1739
|
+
if new_data.get_modified_times(self._meridian) is None:
|
|
1740
|
+
return self._meridian.expand_selected_time_dims(
|
|
1741
|
+
start_date=start_date,
|
|
1742
|
+
end_date=end_date,
|
|
1743
|
+
)
|
|
1744
|
+
else:
|
|
1745
|
+
assert new_data.time is not None
|
|
1746
|
+
new_times_str = new_data.time.numpy().astype(str).tolist()
|
|
1747
|
+
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
1748
|
+
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
1749
|
+
start_date=start_date,
|
|
1750
|
+
end_date=end_date,
|
|
1751
|
+
)
|
|
1752
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
1753
|
+
return [x in expanded_str for x in new_times_str]
|
|
1754
|
+
|
|
1661
1755
|
def _get_incremental_outcome_tensors(
|
|
1662
1756
|
self,
|
|
1663
1757
|
hist_spend: np.ndarray,
|
|
1664
1758
|
spend: np.ndarray,
|
|
1759
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1665
1760
|
optimal_frequency: Sequence[float] | None = None,
|
|
1666
1761
|
) -> tuple[
|
|
1667
1762
|
tf.Tensor | None,
|
|
@@ -1686,6 +1781,11 @@ class BudgetOptimizer:
|
|
|
1686
1781
|
Args:
|
|
1687
1782
|
hist_spend: historical spend data.
|
|
1688
1783
|
spend: new optimized spend data.
|
|
1784
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
1785
|
+
`reach`, and `frequency` tensors. If `None`, the existing tensors from
|
|
1786
|
+
the Meridian object are used. If any of the tensors is provided with a
|
|
1787
|
+
different number of time periods than in `InputData`, then all tensors
|
|
1788
|
+
must be provided with the same number of time periods.
|
|
1689
1789
|
optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
|
|
1690
1790
|
the optimal frequency per channel, that maximizes posterior mean roi.
|
|
1691
1791
|
Value is `None` if the model does not contain reach and frequency data,
|
|
@@ -1696,13 +1796,18 @@ class BudgetOptimizer:
|
|
|
1696
1796
|
Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
|
|
1697
1797
|
new_rf_spend).
|
|
1698
1798
|
"""
|
|
1799
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1800
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1801
|
+
c.PAID_CHANNELS,
|
|
1802
|
+
self._meridian,
|
|
1803
|
+
)
|
|
1699
1804
|
if self._meridian.n_media_channels > 0:
|
|
1700
1805
|
new_media = (
|
|
1701
1806
|
tf.math.divide_no_nan(
|
|
1702
1807
|
spend[: self._meridian.n_media_channels],
|
|
1703
1808
|
hist_spend[: self._meridian.n_media_channels],
|
|
1704
1809
|
)
|
|
1705
|
-
*
|
|
1810
|
+
* filled_data.media
|
|
1706
1811
|
)
|
|
1707
1812
|
new_media_spend = tf.convert_to_tensor(
|
|
1708
1813
|
spend[: self._meridian.n_media_channels]
|
|
@@ -1711,9 +1816,7 @@ class BudgetOptimizer:
|
|
|
1711
1816
|
new_media = None
|
|
1712
1817
|
new_media_spend = None
|
|
1713
1818
|
if self._meridian.n_rf_channels > 0:
|
|
1714
|
-
rf_media =
|
|
1715
|
-
self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
|
|
1716
|
-
)
|
|
1819
|
+
rf_media = filled_data.reach * filled_data.frequency
|
|
1717
1820
|
new_rf_media = (
|
|
1718
1821
|
tf.math.divide_no_nan(
|
|
1719
1822
|
spend[-self._meridian.n_rf_channels :],
|
|
@@ -1722,7 +1825,7 @@ class BudgetOptimizer:
|
|
|
1722
1825
|
* rf_media
|
|
1723
1826
|
)
|
|
1724
1827
|
frequency = (
|
|
1725
|
-
|
|
1828
|
+
filled_data.frequency
|
|
1726
1829
|
if optimal_frequency is None
|
|
1727
1830
|
else optimal_frequency
|
|
1728
1831
|
)
|
|
@@ -1742,9 +1845,10 @@ class BudgetOptimizer:
|
|
|
1742
1845
|
self,
|
|
1743
1846
|
hist_spend: np.ndarray,
|
|
1744
1847
|
spend: np.ndarray,
|
|
1848
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1745
1849
|
use_posterior: bool = True,
|
|
1746
1850
|
use_kpi: bool = False,
|
|
1747
|
-
selected_times: Sequence[str] | None = None,
|
|
1851
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1748
1852
|
optimal_frequency: Sequence[float] | None = None,
|
|
1749
1853
|
attrs: Mapping[str, Any] | None = None,
|
|
1750
1854
|
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -1752,15 +1856,22 @@ class BudgetOptimizer:
|
|
|
1752
1856
|
use_historical_budget: bool = True,
|
|
1753
1857
|
) -> xr.Dataset:
|
|
1754
1858
|
"""Creates the budget dataset."""
|
|
1859
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1860
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1861
|
+
c.PAID_DATA + (c.TIME,),
|
|
1862
|
+
self._meridian,
|
|
1863
|
+
)
|
|
1755
1864
|
spend = tf.convert_to_tensor(spend, dtype=tf.float32)
|
|
1756
1865
|
hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
|
|
1757
1866
|
(new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
|
|
1758
1867
|
self._get_incremental_outcome_tensors(
|
|
1759
|
-
hist_spend,
|
|
1868
|
+
hist_spend,
|
|
1869
|
+
spend,
|
|
1870
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS),
|
|
1871
|
+
optimal_frequency=optimal_frequency,
|
|
1760
1872
|
)
|
|
1761
1873
|
)
|
|
1762
1874
|
budget = np.sum(spend)
|
|
1763
|
-
all_times = self._meridian.input_data.time.values.tolist()
|
|
1764
1875
|
|
|
1765
1876
|
# incremental_outcome here is a tensor with the shape
|
|
1766
1877
|
# (n_chains, n_draws, n_channels)
|
|
@@ -1770,6 +1881,7 @@ class BudgetOptimizer:
|
|
|
1770
1881
|
media=new_media,
|
|
1771
1882
|
reach=new_reach,
|
|
1772
1883
|
frequency=new_frequency,
|
|
1884
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1773
1885
|
),
|
|
1774
1886
|
selected_times=selected_times,
|
|
1775
1887
|
use_kpi=use_kpi,
|
|
@@ -1792,6 +1904,9 @@ class BudgetOptimizer:
|
|
|
1792
1904
|
)
|
|
1793
1905
|
|
|
1794
1906
|
aggregated_impressions = self._analyzer.get_aggregated_impressions(
|
|
1907
|
+
new_data=analyzer.DataTensors(
|
|
1908
|
+
media=new_media, reach=new_reach, frequency=new_frequency
|
|
1909
|
+
),
|
|
1795
1910
|
selected_times=selected_times,
|
|
1796
1911
|
selected_geos=None,
|
|
1797
1912
|
aggregate_times=True,
|
|
@@ -1799,10 +1914,11 @@ class BudgetOptimizer:
|
|
|
1799
1914
|
optimal_frequency=optimal_frequency,
|
|
1800
1915
|
include_non_paid_channels=False,
|
|
1801
1916
|
)
|
|
1802
|
-
effectiveness = incremental_outcome / aggregated_impressions
|
|
1803
1917
|
effectiveness_with_mean_median_and_ci = (
|
|
1804
1918
|
analyzer.get_central_tendency_and_ci(
|
|
1805
|
-
data=
|
|
1919
|
+
data=tf.math.divide_no_nan(
|
|
1920
|
+
incremental_outcome, aggregated_impressions
|
|
1921
|
+
),
|
|
1806
1922
|
confidence_level=confidence_level,
|
|
1807
1923
|
include_median=True,
|
|
1808
1924
|
)
|
|
@@ -1822,6 +1938,7 @@ class BudgetOptimizer:
|
|
|
1822
1938
|
frequency=new_frequency,
|
|
1823
1939
|
media_spend=new_media_spend,
|
|
1824
1940
|
rf_spend=new_rf_spend,
|
|
1941
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1825
1942
|
),
|
|
1826
1943
|
selected_times=selected_times,
|
|
1827
1944
|
batch_size=batch_size,
|
|
@@ -1860,6 +1977,18 @@ class BudgetOptimizer:
|
|
|
1860
1977
|
c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
|
|
1861
1978
|
}
|
|
1862
1979
|
|
|
1980
|
+
all_times = (
|
|
1981
|
+
filled_data.time.numpy().astype(str).tolist()
|
|
1982
|
+
if filled_data.time is not None
|
|
1983
|
+
else self._meridian.input_data.time.values.tolist()
|
|
1984
|
+
)
|
|
1985
|
+
if selected_times is not None and all(
|
|
1986
|
+
isinstance(time, bool) for time in selected_times
|
|
1987
|
+
):
|
|
1988
|
+
selected_times = [
|
|
1989
|
+
time for time, selected in zip(all_times, selected_times) if selected
|
|
1990
|
+
]
|
|
1991
|
+
|
|
1863
1992
|
attributes = {
|
|
1864
1993
|
c.START_DATE: min(selected_times) if selected_times else all_times[0],
|
|
1865
1994
|
c.END_DATE: max(selected_times) if selected_times else all_times[-1],
|
|
@@ -1889,7 +2018,8 @@ class BudgetOptimizer:
|
|
|
1889
2018
|
i: int,
|
|
1890
2019
|
incremental_outcome_grid: np.ndarray,
|
|
1891
2020
|
multipliers_grid: tf.Tensor,
|
|
1892
|
-
|
|
2021
|
+
new_data: analyzer.DataTensors | None = None,
|
|
2022
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1893
2023
|
use_posterior: bool = True,
|
|
1894
2024
|
use_kpi: bool = False,
|
|
1895
2025
|
optimal_frequency: xr.DataArray | None = None,
|
|
@@ -1904,8 +2034,16 @@ class BudgetOptimizer:
|
|
|
1904
2034
|
number of columns is equal to the number of total channels, containing
|
|
1905
2035
|
incremental outcome by channel.
|
|
1906
2036
|
multipliers_grid: A grid derived from spend.
|
|
1907
|
-
|
|
1908
|
-
`
|
|
2037
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
2038
|
+
`reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
|
|
2039
|
+
existing tensors from the Meridian object are used. If any of the
|
|
2040
|
+
tensors is provided with a different number of time periods than in
|
|
2041
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
2042
|
+
time periods.
|
|
2043
|
+
selected_times: Optional list of times to optimize. This can either be a
|
|
2044
|
+
string list containing a subset of time dimension coordinates from
|
|
2045
|
+
`InputData.time` or a boolean list with length equal to the time
|
|
2046
|
+
dimension of the tensor. By default, all time periods are included.
|
|
1909
2047
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1910
2048
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1911
2049
|
distribution is used.
|
|
@@ -1922,10 +2060,14 @@ class BudgetOptimizer:
|
|
|
1922
2060
|
reducing `batch_size`. The calculation will generally be faster with
|
|
1923
2061
|
larger `batch_size` values.
|
|
1924
2062
|
"""
|
|
2063
|
+
new_data = new_data or analyzer.DataTensors()
|
|
2064
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
2065
|
+
c.PAID_DATA, self._meridian
|
|
2066
|
+
)
|
|
1925
2067
|
if self._meridian.n_media_channels > 0:
|
|
1926
2068
|
new_media = (
|
|
1927
2069
|
multipliers_grid[i, : self._meridian.n_media_channels]
|
|
1928
|
-
*
|
|
2070
|
+
* filled_data.media
|
|
1929
2071
|
)
|
|
1930
2072
|
else:
|
|
1931
2073
|
new_media = None
|
|
@@ -1934,20 +2076,18 @@ class BudgetOptimizer:
|
|
|
1934
2076
|
new_frequency = None
|
|
1935
2077
|
new_reach = None
|
|
1936
2078
|
elif optimal_frequency is not None:
|
|
1937
|
-
new_frequency = (
|
|
1938
|
-
tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency
|
|
1939
|
-
)
|
|
2079
|
+
new_frequency = tf.ones_like(filled_data.frequency) * optimal_frequency
|
|
1940
2080
|
new_reach = tf.math.divide_no_nan(
|
|
1941
2081
|
multipliers_grid[i, -self._meridian.n_rf_channels :]
|
|
1942
|
-
*
|
|
1943
|
-
*
|
|
2082
|
+
* filled_data.reach
|
|
2083
|
+
* filled_data.frequency,
|
|
1944
2084
|
new_frequency,
|
|
1945
2085
|
)
|
|
1946
2086
|
else:
|
|
1947
|
-
new_frequency =
|
|
2087
|
+
new_frequency = filled_data.frequency
|
|
1948
2088
|
new_reach = (
|
|
1949
2089
|
multipliers_grid[i, -self._meridian.n_rf_channels :]
|
|
1950
|
-
*
|
|
2090
|
+
* filled_data.reach
|
|
1951
2091
|
)
|
|
1952
2092
|
|
|
1953
2093
|
# incremental_outcome returns a three dimensional tensor with dims
|
|
@@ -1960,6 +2100,7 @@ class BudgetOptimizer:
|
|
|
1960
2100
|
media=new_media,
|
|
1961
2101
|
reach=new_reach,
|
|
1962
2102
|
frequency=new_frequency,
|
|
2103
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1963
2104
|
),
|
|
1964
2105
|
selected_times=selected_times,
|
|
1965
2106
|
use_kpi=use_kpi,
|
|
@@ -1976,7 +2117,8 @@ class BudgetOptimizer:
|
|
|
1976
2117
|
spend_bound_lower: np.ndarray,
|
|
1977
2118
|
spend_bound_upper: np.ndarray,
|
|
1978
2119
|
step_size: int,
|
|
1979
|
-
|
|
2120
|
+
new_data: analyzer.DataTensors | None = None,
|
|
2121
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1980
2122
|
use_posterior: bool = True,
|
|
1981
2123
|
use_kpi: bool = False,
|
|
1982
2124
|
optimal_frequency: xr.DataArray | None = None,
|
|
@@ -1992,8 +2134,16 @@ class BudgetOptimizer:
|
|
|
1992
2134
|
containing the upper constraint spend for each channel.
|
|
1993
2135
|
step_size: Integer indicating the step size, or interval, between values
|
|
1994
2136
|
in the spend grid. All media channels have the same step size.
|
|
1995
|
-
|
|
1996
|
-
`
|
|
2137
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
2138
|
+
`reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
|
|
2139
|
+
existing tensors from the Meridian object are used. If any of the
|
|
2140
|
+
tensors is provided with a different number of time periods than in
|
|
2141
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
2142
|
+
time periods.
|
|
2143
|
+
selected_times: Optional list of times to optimize. This can either be a
|
|
2144
|
+
string list containing a subset of time dimension coordinates from
|
|
2145
|
+
`InputData.time` or a boolean list with length equal to the time
|
|
2146
|
+
dimension of the tensor. By default, all time periods are included.
|
|
1997
2147
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1998
2148
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1999
2149
|
distribution is used.
|
|
@@ -2047,6 +2197,7 @@ class BudgetOptimizer:
|
|
|
2047
2197
|
incremental_outcome_grid=incremental_outcome_grid,
|
|
2048
2198
|
multipliers_grid=multipliers_grid,
|
|
2049
2199
|
selected_times=selected_times,
|
|
2200
|
+
new_data=new_data,
|
|
2050
2201
|
use_posterior=use_posterior,
|
|
2051
2202
|
use_kpi=use_kpi,
|
|
2052
2203
|
optimal_frequency=optimal_frequency,
|
meridian/analysis/summarizer.py
CHANGED
|
@@ -167,7 +167,9 @@ class Summarizer:
|
|
|
167
167
|
self._create_model_fit_card_html(
|
|
168
168
|
template_env, selected_times=selected_times
|
|
169
169
|
),
|
|
170
|
-
self._create_outcome_contrib_card_html(
|
|
170
|
+
self._create_outcome_contrib_card_html(
|
|
171
|
+
template_env, media_summary, selected_times=selected_times
|
|
172
|
+
),
|
|
171
173
|
self._create_performance_breakdown_card_html(
|
|
172
174
|
template_env, media_summary
|
|
173
175
|
),
|
|
@@ -267,16 +269,30 @@ class Summarizer:
|
|
|
267
269
|
self,
|
|
268
270
|
template_env: jinja2.Environment,
|
|
269
271
|
media_summary: visualizer.MediaSummary,
|
|
272
|
+
selected_times: Sequence[str] | None,
|
|
270
273
|
) -> str:
|
|
271
274
|
"""Creates the HTML snippet for the Outcome Contrib card."""
|
|
272
275
|
outcome = self._kpi_or_revenue()
|
|
273
276
|
|
|
277
|
+
num_selected_times = (
|
|
278
|
+
self._meridian.n_times
|
|
279
|
+
if selected_times is None
|
|
280
|
+
else len(selected_times)
|
|
281
|
+
)
|
|
282
|
+
time_granularity = (
|
|
283
|
+
c.WEEKLY
|
|
284
|
+
if num_selected_times < c.QUARTERLY_SUMMARY_THRESHOLD_WEEKS
|
|
285
|
+
else c.QUARTERLY
|
|
286
|
+
)
|
|
287
|
+
|
|
274
288
|
channel_contrib_area_chart = formatter.ChartSpec(
|
|
275
289
|
id=summary_text.CHANNEL_CONTRIB_BY_TIME_CHART_ID,
|
|
276
290
|
description=summary_text.CHANNEL_CONTRIB_BY_TIME_CHART_DESCRIPTION.format(
|
|
277
291
|
outcome=outcome
|
|
278
292
|
),
|
|
279
|
-
chart_json=media_summary.plot_channel_contribution_area_chart(
|
|
293
|
+
chart_json=media_summary.plot_channel_contribution_area_chart(
|
|
294
|
+
time_granularity=time_granularity
|
|
295
|
+
).to_json(),
|
|
280
296
|
)
|
|
281
297
|
|
|
282
298
|
channel_contrib_bump_chart = formatter.ChartSpec(
|
|
@@ -284,7 +300,9 @@ class Summarizer:
|
|
|
284
300
|
description=summary_text.CHANNEL_CONTRIB_RANK_CHART_DESCRIPTION.format(
|
|
285
301
|
outcome=outcome
|
|
286
302
|
),
|
|
287
|
-
chart_json=media_summary.plot_channel_contribution_bump_chart(
|
|
303
|
+
chart_json=media_summary.plot_channel_contribution_bump_chart(
|
|
304
|
+
time_granularity=time_granularity
|
|
305
|
+
).to_json(),
|
|
288
306
|
)
|
|
289
307
|
channel_drivers_chart = formatter.ChartSpec(
|
|
290
308
|
id=summary_text.CHANNEL_DRIVERS_CHART_ID,
|
meridian/analysis/visualizer.py
CHANGED
|
@@ -465,14 +465,14 @@ class ModelFit:
|
|
|
465
465
|
else:
|
|
466
466
|
y_axis_label = summary_text.KPI_LABEL
|
|
467
467
|
plot = (
|
|
468
|
-
alt.Chart(model_fit_df, width=c.
|
|
468
|
+
alt.Chart(model_fit_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
|
|
469
469
|
.mark_line()
|
|
470
470
|
.encode(
|
|
471
471
|
x=alt.X(
|
|
472
472
|
f'{c.TIME}:T',
|
|
473
473
|
title='Time period',
|
|
474
474
|
axis=alt.Axis(
|
|
475
|
-
format=
|
|
475
|
+
format=c.QUARTER_FORMAT,
|
|
476
476
|
grid=False,
|
|
477
477
|
tickCount=8,
|
|
478
478
|
domainColor=c.GREY_300,
|
|
@@ -1657,18 +1657,36 @@ class MediaSummary:
|
|
|
1657
1657
|
self._marginal_roi_by_reach = marginal_roi_by_reach
|
|
1658
1658
|
self._non_media_baseline_values = non_media_baseline_values
|
|
1659
1659
|
|
|
1660
|
-
def plot_channel_contribution_area_chart(
|
|
1660
|
+
def plot_channel_contribution_area_chart(
|
|
1661
|
+
self, time_granularity: str = c.QUARTERLY
|
|
1662
|
+
) -> alt.Chart:
|
|
1661
1663
|
"""Plots a stacked area chart of the contribution share per channel by time.
|
|
1662
1664
|
|
|
1665
|
+
Args:
|
|
1666
|
+
time_granularity: The granularity for the time axis. Options are `weekly`
|
|
1667
|
+
or `quarterly`. Defaults to `quarterly`.
|
|
1668
|
+
|
|
1663
1669
|
Returns:
|
|
1664
1670
|
An Altair plot showing the contribution share per channel by time.
|
|
1671
|
+
|
|
1672
|
+
Raises:
|
|
1673
|
+
ValueError: If time_granularity is not one of the allowed constants.
|
|
1665
1674
|
"""
|
|
1675
|
+
if time_granularity not in c.TIME_GRANULARITIES:
|
|
1676
|
+
raise ValueError(
|
|
1677
|
+
f'time_granularity must be one of {c.TIME_GRANULARITIES}'
|
|
1678
|
+
)
|
|
1679
|
+
|
|
1680
|
+
x_axis_format = (
|
|
1681
|
+
c.DATE_FORMAT if time_granularity == c.WEEKLY else c.QUARTER_FORMAT
|
|
1682
|
+
)
|
|
1683
|
+
|
|
1666
1684
|
outcome_df = self._transform_contribution_metrics(
|
|
1667
1685
|
include_non_paid=True, aggregate_times=False
|
|
1668
1686
|
)
|
|
1669
1687
|
|
|
1670
1688
|
# Ensure proper ordering for the stacked area chart. Baseline should be at
|
|
1671
|
-
# the bottom.
|
|
1689
|
+
# the bottom. Separate the *stacking* order from the *legend* order.
|
|
1672
1690
|
stack_order = sorted([
|
|
1673
1691
|
channel
|
|
1674
1692
|
for channel in outcome_df[c.CHANNEL].unique()
|
|
@@ -1691,7 +1709,7 @@ class MediaSummary:
|
|
|
1691
1709
|
)
|
|
1692
1710
|
|
|
1693
1711
|
plot = (
|
|
1694
|
-
alt.Chart(outcome_df, width=c.
|
|
1712
|
+
alt.Chart(outcome_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
|
|
1695
1713
|
.mark_area()
|
|
1696
1714
|
.transform_calculate(
|
|
1697
1715
|
sort_channel=f'indexof({stack_order}, datum.channel)'
|
|
@@ -1701,7 +1719,7 @@ class MediaSummary:
|
|
|
1701
1719
|
f'{c.TIME}:T',
|
|
1702
1720
|
title='Time period',
|
|
1703
1721
|
axis=alt.Axis(
|
|
1704
|
-
format=
|
|
1722
|
+
format=x_axis_format,
|
|
1705
1723
|
grid=False,
|
|
1706
1724
|
tickCount=8,
|
|
1707
1725
|
domainColor=c.GREY_300,
|
|
@@ -1730,12 +1748,13 @@ class MediaSummary:
|
|
|
1730
1748
|
labelFontSize=c.AXIS_FONT_SIZE,
|
|
1731
1749
|
labelFont=c.FONT_ROBOTO,
|
|
1732
1750
|
title=None,
|
|
1751
|
+
orient='bottom',
|
|
1733
1752
|
),
|
|
1734
1753
|
scale=alt.Scale(domain=legend_order),
|
|
1735
1754
|
sort=legend_order,
|
|
1736
1755
|
),
|
|
1737
1756
|
tooltip=[
|
|
1738
|
-
alt.Tooltip(f'{c.TIME}:T', format=
|
|
1757
|
+
alt.Tooltip(f'{c.TIME}:T', format=c.DATE_FORMAT),
|
|
1739
1758
|
c.CHANNEL,
|
|
1740
1759
|
alt.Tooltip(f'{c.INCREMENTAL_OUTCOME}:Q', format=',.2f'),
|
|
1741
1760
|
],
|
|
@@ -1751,16 +1770,31 @@ class MediaSummary:
|
|
|
1751
1770
|
)
|
|
1752
1771
|
return plot
|
|
1753
1772
|
|
|
1754
|
-
def plot_channel_contribution_bump_chart(
|
|
1755
|
-
|
|
1773
|
+
def plot_channel_contribution_bump_chart(
|
|
1774
|
+
self, time_granularity: str = c.QUARTERLY
|
|
1775
|
+
) -> alt.Chart:
|
|
1776
|
+
"""Plots a bump chart of channel contribution rank over time.
|
|
1756
1777
|
|
|
1757
1778
|
This chart shows the relative rank of each channel's contribution,
|
|
1758
|
-
including the baseline, based on incremental outcome
|
|
1779
|
+
including the baseline, based on incremental outcome. Depending on the
|
|
1780
|
+
time_granularity, ranks are shown either weekly or at the end of each
|
|
1759
1781
|
quarter. Rank 1 represents the highest contribution.
|
|
1760
1782
|
|
|
1783
|
+
Args:
|
|
1784
|
+
time_granularity: The granularity for the time axis. Options are `weekly`
|
|
1785
|
+
or `quarterly`. Defaults to `quarterly`.
|
|
1786
|
+
|
|
1761
1787
|
Returns:
|
|
1762
|
-
An Altair plot showing the contribution rank per channel by
|
|
1788
|
+
An Altair plot showing the contribution rank per channel by time.
|
|
1789
|
+
|
|
1790
|
+
Raises:
|
|
1791
|
+
ValueError: If time_granularity is not one of the allowed constants.
|
|
1763
1792
|
"""
|
|
1793
|
+
if time_granularity not in c.TIME_GRANULARITIES:
|
|
1794
|
+
raise ValueError(
|
|
1795
|
+
f'time_granularity must be one of {c.TIME_GRANULARITIES}'
|
|
1796
|
+
)
|
|
1797
|
+
|
|
1764
1798
|
outcome_df = self._transform_contribution_metrics(
|
|
1765
1799
|
include_non_paid=True, aggregate_times=False
|
|
1766
1800
|
)
|
|
@@ -1770,30 +1804,37 @@ class MediaSummary:
|
|
|
1770
1804
|
method='first', ascending=False
|
|
1771
1805
|
)
|
|
1772
1806
|
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1807
|
+
if time_granularity == c.QUARTERLY:
|
|
1808
|
+
# Filter data to keep only the last available date within each quarter
|
|
1809
|
+
# for a quarterly view of ranking changes.
|
|
1810
|
+
unique_times = pd.Series(outcome_df[c.TIME].unique()).sort_values()
|
|
1811
|
+
quarters = unique_times.dt.to_period('Q')
|
|
1812
|
+
quarterly_dates = unique_times[~quarters.duplicated(keep='last')]
|
|
1813
|
+
plot_df = outcome_df[outcome_df[c.TIME].isin(quarterly_dates)].copy()
|
|
1814
|
+
x_axis_format = c.QUARTER_FORMAT
|
|
1815
|
+
tooltip_time_format = c.QUARTER_FORMAT
|
|
1816
|
+
tooltip_time_title = 'Quarter'
|
|
1817
|
+
else:
|
|
1818
|
+
plot_df = outcome_df.copy()
|
|
1819
|
+
x_axis_format = c.DATE_FORMAT
|
|
1820
|
+
tooltip_time_format = c.DATE_FORMAT
|
|
1821
|
+
tooltip_time_title = 'Week'
|
|
1781
1822
|
|
|
1782
1823
|
legend_order = [c.BASELINE] + sorted([
|
|
1783
1824
|
channel
|
|
1784
|
-
for channel in
|
|
1825
|
+
for channel in plot_df[c.CHANNEL].unique()
|
|
1785
1826
|
if channel != c.BASELINE
|
|
1786
1827
|
])
|
|
1787
1828
|
|
|
1788
1829
|
plot = (
|
|
1789
|
-
alt.Chart(
|
|
1830
|
+
alt.Chart(plot_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
|
|
1790
1831
|
.mark_line(point=True)
|
|
1791
1832
|
.encode(
|
|
1792
1833
|
x=alt.X(
|
|
1793
1834
|
f'{c.TIME}:T',
|
|
1794
1835
|
title='Time period',
|
|
1795
1836
|
axis=alt.Axis(
|
|
1796
|
-
format=
|
|
1837
|
+
format=x_axis_format,
|
|
1797
1838
|
grid=False,
|
|
1798
1839
|
domainColor=c.GREY_300,
|
|
1799
1840
|
),
|
|
@@ -1819,12 +1860,17 @@ class MediaSummary:
|
|
|
1819
1860
|
labelFontSize=c.AXIS_FONT_SIZE,
|
|
1820
1861
|
labelFont=c.FONT_ROBOTO,
|
|
1821
1862
|
title=None,
|
|
1863
|
+
orient='bottom',
|
|
1822
1864
|
),
|
|
1823
1865
|
scale=alt.Scale(domain=legend_order),
|
|
1824
1866
|
sort=legend_order,
|
|
1825
1867
|
),
|
|
1826
1868
|
tooltip=[
|
|
1827
|
-
alt.Tooltip(
|
|
1869
|
+
alt.Tooltip(
|
|
1870
|
+
f'{c.TIME}:T',
|
|
1871
|
+
format=tooltip_time_format,
|
|
1872
|
+
title=tooltip_time_title,
|
|
1873
|
+
),
|
|
1828
1874
|
alt.Tooltip(f'{c.CHANNEL}:N', title='Channel'),
|
|
1829
1875
|
alt.Tooltip('rank:O', title='Rank'),
|
|
1830
1876
|
alt.Tooltip(
|
meridian/constants.py
CHANGED
|
@@ -51,6 +51,8 @@ GREY_300 = '#DADCE0'
|
|
|
51
51
|
|
|
52
52
|
# Example: "2024-01-09"
|
|
53
53
|
DATE_FORMAT = '%Y-%m-%d'
|
|
54
|
+
# Example: "2024 Apr"
|
|
55
|
+
QUARTER_FORMAT = '%Y %b'
|
|
54
56
|
|
|
55
57
|
# Input data variables.
|
|
56
58
|
KPI = 'kpi'
|
|
@@ -95,12 +97,8 @@ POSSIBLE_INPUT_DATA_ARRAY_NAMES = (
|
|
|
95
97
|
+ MEDIA_INPUT_DATA_ARRAY_NAMES
|
|
96
98
|
+ RF_INPUT_DATA_ARRAY_NAMES
|
|
97
99
|
)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
REACH,
|
|
101
|
-
FREQUENCY,
|
|
102
|
-
REVENUE_PER_KPI,
|
|
103
|
-
)
|
|
100
|
+
PAID_CHANNELS = (MEDIA, REACH, FREQUENCY)
|
|
101
|
+
PAID_DATA = PAID_CHANNELS + (REVENUE_PER_KPI,)
|
|
104
102
|
NON_PAID_DATA = (
|
|
105
103
|
ORGANIC_MEDIA,
|
|
106
104
|
ORGANIC_REACH,
|
|
@@ -112,11 +110,7 @@ SPEND_DATA = (
|
|
|
112
110
|
RF_SPEND,
|
|
113
111
|
)
|
|
114
112
|
PERFORMANCE_DATA = PAID_DATA + SPEND_DATA
|
|
115
|
-
IMPRESSIONS_DATA =
|
|
116
|
-
MEDIA,
|
|
117
|
-
REACH,
|
|
118
|
-
FREQUENCY,
|
|
119
|
-
) + NON_PAID_DATA
|
|
113
|
+
IMPRESSIONS_DATA = PAID_CHANNELS + NON_PAID_DATA
|
|
120
114
|
RF_DATA = (
|
|
121
115
|
REACH,
|
|
122
116
|
FREQUENCY,
|
|
@@ -622,3 +616,10 @@ CARD_STATS = 'stats'
|
|
|
622
616
|
# VegaLite common params.
|
|
623
617
|
VEGALITE_FACET_DEFAULT_WIDTH = 400
|
|
624
618
|
VEGALITE_FACET_LARGE_WIDTH = 500
|
|
619
|
+
VEGALITE_FACET_EXTRA_LARGE_WIDTH = 900
|
|
620
|
+
|
|
621
|
+
# Time Granularity Constants
|
|
622
|
+
WEEKLY = 'weekly'
|
|
623
|
+
QUARTERLY = 'quarterly'
|
|
624
|
+
TIME_GRANULARITIES = frozenset({WEEKLY, QUARTERLY})
|
|
625
|
+
QUARTERLY_SUMMARY_THRESHOLD_WEEKS = 52
|
meridian/model/model.py
CHANGED
|
@@ -149,6 +149,7 @@ class Meridian:
|
|
|
149
149
|
self._validate_paid_media_prior_type()
|
|
150
150
|
self._validate_geo_invariants()
|
|
151
151
|
self._validate_time_invariants()
|
|
152
|
+
self._validate_kpi_transformer()
|
|
152
153
|
|
|
153
154
|
@property
|
|
154
155
|
def input_data(self) -> data.InputData:
|
|
@@ -410,6 +411,7 @@ class Meridian:
|
|
|
410
411
|
set_total_media_contribution_prior=set_total_media_contribution_prior,
|
|
411
412
|
kpi=np.sum(self.input_data.kpi.values),
|
|
412
413
|
total_spend=agg_total_spend,
|
|
414
|
+
media_effects_dist=self.media_effects_dist,
|
|
413
415
|
)
|
|
414
416
|
|
|
415
417
|
@functools.cached_property
|
|
@@ -825,6 +827,19 @@ class Meridian:
|
|
|
825
827
|
" the listed variables that do not vary across time."
|
|
826
828
|
)
|
|
827
829
|
|
|
830
|
+
def _validate_kpi_transformer(self):
|
|
831
|
+
"""Validates the KPI transformer."""
|
|
832
|
+
if (
|
|
833
|
+
self.kpi_transformer.population_scaled_stdev == 0
|
|
834
|
+
and self.model_spec.paid_media_prior_type
|
|
835
|
+
in constants.PAID_MEDIA_ROI_PRIOR_TYPES
|
|
836
|
+
):
|
|
837
|
+
kpi = "kpi" if self.is_national else "population_scaled_kpi"
|
|
838
|
+
raise ValueError(
|
|
839
|
+
f"`{kpi}` cannot be constant with"
|
|
840
|
+
f" {self.model_spec.paid_media_prior_type} prior type."
|
|
841
|
+
)
|
|
842
|
+
|
|
828
843
|
def adstock_hill_media(
|
|
829
844
|
self,
|
|
830
845
|
media: tf.Tensor, # pylint: disable=redefined-outer-name
|
|
@@ -455,6 +455,7 @@ class PriorDistribution:
|
|
|
455
455
|
set_total_media_contribution_prior: bool,
|
|
456
456
|
kpi: float,
|
|
457
457
|
total_spend: np.ndarray,
|
|
458
|
+
media_effects_dist: str,
|
|
458
459
|
) -> PriorDistribution:
|
|
459
460
|
"""Returns a new `PriorDistribution` with broadcast distribution attributes.
|
|
460
461
|
|
|
@@ -480,6 +481,8 @@ class PriorDistribution:
|
|
|
480
481
|
`set_total_media_contribution_prior=True`.
|
|
481
482
|
total_spend: Spend per media channel summed across geos and time. Required
|
|
482
483
|
if `set_total_media_contribution_prior=True`.
|
|
484
|
+
media_effects_dist: A string to specify the distribution of media random
|
|
485
|
+
effects across geos.
|
|
483
486
|
|
|
484
487
|
Returns:
|
|
485
488
|
A new `PriorDistribution` broadcast from this prior distribution,
|
|
@@ -757,6 +760,7 @@ class PriorDistribution:
|
|
|
757
760
|
)
|
|
758
761
|
else:
|
|
759
762
|
roi_m_converted = self.roi_m
|
|
763
|
+
_check_for_negative_effect(roi_m_converted, media_effects_dist)
|
|
760
764
|
roi_m = tfp.distributions.BatchBroadcast(
|
|
761
765
|
roi_m_converted, n_media_channels, name=constants.ROI_M
|
|
762
766
|
)
|
|
@@ -777,13 +781,15 @@ class PriorDistribution:
|
|
|
777
781
|
)
|
|
778
782
|
else:
|
|
779
783
|
roi_rf_converted = self.roi_rf
|
|
784
|
+
_check_for_negative_effect(roi_rf_converted, media_effects_dist)
|
|
780
785
|
roi_rf = tfp.distributions.BatchBroadcast(
|
|
781
786
|
roi_rf_converted, n_rf_channels, name=constants.ROI_RF
|
|
782
787
|
)
|
|
783
|
-
|
|
788
|
+
_check_for_negative_effect(self.mroi_m, media_effects_dist)
|
|
784
789
|
mroi_m = tfp.distributions.BatchBroadcast(
|
|
785
790
|
self.mroi_m, n_media_channels, name=constants.MROI_M
|
|
786
791
|
)
|
|
792
|
+
_check_for_negative_effect(self.mroi_rf, media_effects_dist)
|
|
787
793
|
mroi_rf = tfp.distributions.BatchBroadcast(
|
|
788
794
|
self.mroi_rf, n_rf_channels, name=constants.MROI_RF
|
|
789
795
|
)
|
|
@@ -885,6 +891,21 @@ def _get_total_media_contribution_prior(
|
|
|
885
891
|
return tfp.distributions.LogNormal(lognormal_mu, lognormal_sigma, name=name)
|
|
886
892
|
|
|
887
893
|
|
|
894
|
+
def _check_for_negative_effect(
|
|
895
|
+
dist: tfp.distributions.Distribution, media_effects_dist: str
|
|
896
|
+
):
|
|
897
|
+
"""Checks for negative effect in the model."""
|
|
898
|
+
if (
|
|
899
|
+
media_effects_dist == constants.MEDIA_EFFECTS_LOG_NORMAL
|
|
900
|
+
and np.any(dist.cdf(0)) > 0
|
|
901
|
+
):
|
|
902
|
+
raise ValueError(
|
|
903
|
+
'Media priors must have non-negative support when'
|
|
904
|
+
f' `media_effects_dist`="{media_effects_dist}". Found negative effect'
|
|
905
|
+
f' in {dist.name}.'
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
|
|
888
909
|
def distributions_are_equal(
|
|
889
910
|
a: tfp.distributions.Distribution, b: tfp.distributions.Distribution
|
|
890
911
|
) -> bool:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|