google-meridian 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.1.dist-info}/METADATA +2 -2
- google_meridian-1.1.1.dist-info/RECORD +41 -0
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.1.dist-info}/WHEEL +1 -1
- meridian/__init__.py +2 -2
- meridian/analysis/__init__.py +1 -1
- meridian/analysis/analyzer.py +213 -206
- meridian/analysis/formatter.py +1 -1
- meridian/analysis/optimizer.py +264 -66
- meridian/analysis/summarizer.py +5 -5
- meridian/analysis/summary_text.py +1 -1
- meridian/analysis/test_utils.py +82 -82
- meridian/analysis/visualizer.py +14 -19
- meridian/constants.py +103 -19
- meridian/data/__init__.py +1 -1
- meridian/data/arg_builder.py +1 -1
- meridian/data/input_data.py +127 -27
- meridian/data/load.py +53 -40
- meridian/data/test_utils.py +172 -44
- meridian/data/time_coordinates.py +4 -4
- meridian/model/__init__.py +1 -1
- meridian/model/adstock_hill.py +1 -1
- meridian/model/knots.py +1 -1
- meridian/model/media.py +134 -99
- meridian/model/model.py +494 -84
- meridian/model/model_test_data.py +86 -1
- meridian/model/posterior_sampler.py +139 -58
- meridian/model/prior_distribution.py +97 -52
- meridian/model/prior_sampler.py +209 -233
- meridian/model/spec.py +197 -37
- meridian/model/transformers.py +16 -4
- google_meridian-1.0.9.dist-info/RECORD +0 -41
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.1.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.1.dist-info}/top_level.txt +0 -0
meridian/analysis/analyzer.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2025 The Meridian Authors.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
from collections.abc import Mapping, Sequence
|
|
18
18
|
import itertools
|
|
19
|
+
import numbers
|
|
19
20
|
from typing import Any, Optional
|
|
20
21
|
import warnings
|
|
21
22
|
|
|
@@ -37,6 +38,20 @@ __all__ = [
|
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
def _validate_non_media_baseline_values_numbers(
|
|
42
|
+
non_media_baseline_values: Sequence[str | float] | None,
|
|
43
|
+
):
|
|
44
|
+
if non_media_baseline_values is None:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
for value in non_media_baseline_values:
|
|
48
|
+
if not isinstance(value, numbers.Number):
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"Invalid `non_media_baseline_values` value: '{value}'. Only float"
|
|
51
|
+
" numbers are supported."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
40
55
|
# TODO: Refactor the related unit tests to be under DataTensors.
|
|
41
56
|
class DataTensors(tf.experimental.ExtensionType):
|
|
42
57
|
"""Container for data variable arguments of Analyzer methods.
|
|
@@ -181,6 +196,7 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
181
196
|
new_tensor is not None
|
|
182
197
|
and old_tensor is not None
|
|
183
198
|
and new_tensor.ndim > 1
|
|
199
|
+
and old_tensor.ndim > 1
|
|
184
200
|
and new_tensor.shape[1] != old_tensor.shape[1]
|
|
185
201
|
):
|
|
186
202
|
return new_tensor.shape[1]
|
|
@@ -653,22 +669,16 @@ def _scale_tensors_by_multiplier(
|
|
|
653
669
|
data: DataTensors,
|
|
654
670
|
multiplier: float,
|
|
655
671
|
by_reach: bool,
|
|
656
|
-
non_media_treatments_baseline: tf.Tensor | None = None,
|
|
657
672
|
) -> DataTensors:
|
|
658
673
|
"""Get scaled tensors for incremental outcome calculation.
|
|
659
674
|
|
|
660
675
|
Args:
|
|
661
676
|
data: DataTensors object containing the optional tensors to scale. Only
|
|
662
|
-
`media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
|
|
663
|
-
`organic_frequency
|
|
664
|
-
remain unchanged.
|
|
677
|
+
`media`, `reach`, `frequency`, `organic_media`, `organic_reach`, and
|
|
678
|
+
`organic_frequency` are scaled. The other tensors remain unchanged.
|
|
665
679
|
multiplier: Float indicating the factor to scale tensors by.
|
|
666
680
|
by_reach: Boolean indicating whether to scale reach or frequency when rf
|
|
667
681
|
data is available.
|
|
668
|
-
non_media_treatments_baseline: Optional tensor to overwrite
|
|
669
|
-
`data.non_media_treatments` in the output. Used to compute the
|
|
670
|
-
conterfactual values for incremental outcome calculation. If not used, the
|
|
671
|
-
unmodified `data.non_media_treatments` tensor is returned in the output.
|
|
672
682
|
|
|
673
683
|
Returns:
|
|
674
684
|
A `DataTensors` object containing scaled tensor parameters. The original
|
|
@@ -697,14 +707,9 @@ def _scale_tensors_by_multiplier(
|
|
|
697
707
|
incremented_data[constants.ORGANIC_FREQUENCY] = (
|
|
698
708
|
data.organic_frequency * multiplier
|
|
699
709
|
)
|
|
700
|
-
if non_media_treatments_baseline is not None:
|
|
701
|
-
incremented_data[constants.NON_MEDIA_TREATMENTS] = (
|
|
702
|
-
non_media_treatments_baseline
|
|
703
|
-
)
|
|
704
|
-
else:
|
|
705
|
-
incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
|
|
706
710
|
|
|
707
711
|
# Include the original data that does not get scaled.
|
|
712
|
+
incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
|
|
708
713
|
incremented_data[constants.MEDIA_SPEND] = data.media_spend
|
|
709
714
|
incremented_data[constants.RF_SPEND] = data.rf_spend
|
|
710
715
|
incremented_data[constants.CONTROLS] = data.controls
|
|
@@ -754,79 +759,6 @@ def _central_tendency_and_ci_by_prior_and_posterior(
|
|
|
754
759
|
return xr.Dataset(data_vars=xr_data, coords=xr_coords)
|
|
755
760
|
|
|
756
761
|
|
|
757
|
-
def _compute_non_media_baseline(
|
|
758
|
-
non_media_treatments: tf.Tensor,
|
|
759
|
-
non_media_baseline_values: Sequence[float | str] | None = None,
|
|
760
|
-
non_media_selected_times: Sequence[bool] | None = None,
|
|
761
|
-
) -> tf.Tensor:
|
|
762
|
-
"""Computes the baseline for each non-media treatment channel.
|
|
763
|
-
|
|
764
|
-
Args:
|
|
765
|
-
non_media_treatments: The non-media treatment input data.
|
|
766
|
-
non_media_baseline_values: Optional list of shape (n_non_media_channels,).
|
|
767
|
-
Each element is either a float (which means that the fixed value will be
|
|
768
|
-
used as baseline for the given channel) or one of the strings "min" or
|
|
769
|
-
"max" (which mean that the global minimum or maximum value will be used as
|
|
770
|
-
baseline for the values of the given non_media treatment channel). If
|
|
771
|
-
None, the minimum value is used as baseline for each non_media treatment
|
|
772
|
-
channel.
|
|
773
|
-
non_media_selected_times: Optional list of shape (n_times,). Each element is
|
|
774
|
-
a boolean indicating whether the corresponding time period should be
|
|
775
|
-
included in the baseline computation.
|
|
776
|
-
|
|
777
|
-
Returns:
|
|
778
|
-
A tensor of shape (n_geos, n_times, n_non_media_channels) containing the
|
|
779
|
-
baseline values for each non-media treatment channel.
|
|
780
|
-
"""
|
|
781
|
-
|
|
782
|
-
if non_media_selected_times is None:
|
|
783
|
-
non_media_selected_times = [True] * non_media_treatments.shape[-2]
|
|
784
|
-
|
|
785
|
-
if non_media_baseline_values is None:
|
|
786
|
-
# If non_media_baseline_values is not provided, use the minimum value for
|
|
787
|
-
# each non_media treatment channel as the baseline.
|
|
788
|
-
non_media_baseline_values_filled = [
|
|
789
|
-
constants.NON_MEDIA_BASELINE_MIN
|
|
790
|
-
] * non_media_treatments.shape[-1]
|
|
791
|
-
else:
|
|
792
|
-
non_media_baseline_values_filled = non_media_baseline_values
|
|
793
|
-
|
|
794
|
-
if non_media_treatments.shape[-1] != len(non_media_baseline_values_filled):
|
|
795
|
-
raise ValueError(
|
|
796
|
-
"The number of non-media channels"
|
|
797
|
-
f" ({non_media_treatments.shape[-1]}) does not match the number"
|
|
798
|
-
f" of baseline types ({len(non_media_baseline_values_filled)})."
|
|
799
|
-
)
|
|
800
|
-
|
|
801
|
-
baseline_list = []
|
|
802
|
-
for channel in range(non_media_treatments.shape[-1]):
|
|
803
|
-
baseline_value = non_media_baseline_values_filled[channel]
|
|
804
|
-
|
|
805
|
-
if baseline_value == constants.NON_MEDIA_BASELINE_MIN:
|
|
806
|
-
baseline_for_channel = tf.reduce_min(
|
|
807
|
-
non_media_treatments[..., channel], axis=[0, 1]
|
|
808
|
-
)
|
|
809
|
-
elif baseline_value == constants.NON_MEDIA_BASELINE_MAX:
|
|
810
|
-
baseline_for_channel = tf.reduce_max(
|
|
811
|
-
non_media_treatments[..., channel], axis=[0, 1]
|
|
812
|
-
)
|
|
813
|
-
elif isinstance(baseline_value, float):
|
|
814
|
-
baseline_for_channel = tf.cast(baseline_value, tf.float32)
|
|
815
|
-
else:
|
|
816
|
-
raise ValueError(
|
|
817
|
-
f"Invalid non_media_baseline_values value: '{baseline_value}'. Only"
|
|
818
|
-
" float numbers and strings 'min' and 'max' are supported."
|
|
819
|
-
)
|
|
820
|
-
|
|
821
|
-
baseline_list.append(
|
|
822
|
-
baseline_for_channel
|
|
823
|
-
* tf.ones_like(non_media_treatments[..., channel])
|
|
824
|
-
* non_media_selected_times
|
|
825
|
-
)
|
|
826
|
-
|
|
827
|
-
return tf.stack(baseline_list, axis=-1)
|
|
828
|
-
|
|
829
|
-
|
|
830
762
|
class Analyzer:
|
|
831
763
|
"""Runs calculations to analyze the raw data after fitting the model."""
|
|
832
764
|
|
|
@@ -853,10 +785,10 @@ class Analyzer:
|
|
|
853
785
|
`media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
|
|
854
786
|
`organic_frequency`, `non_media_treatments`, `controls`. The `media`,
|
|
855
787
|
`reach`, `organic_media`, `organic_reach` and `non_media_treatments`
|
|
856
|
-
tensors are
|
|
788
|
+
tensors are expected to be scaled by their corresponding transformers.
|
|
857
789
|
dist_tensors: A `DistributionTensors` container with the distribution
|
|
858
790
|
tensors for media, RF, organic media, organic RF, non-media treatments,
|
|
859
|
-
and controls.
|
|
791
|
+
and controls (if available).
|
|
860
792
|
|
|
861
793
|
Returns:
|
|
862
794
|
Tensor representing computed kpi means.
|
|
@@ -871,17 +803,15 @@ class Analyzer:
|
|
|
871
803
|
)
|
|
872
804
|
)
|
|
873
805
|
|
|
874
|
-
result = (
|
|
875
|
-
|
|
876
|
-
+ tf.einsum(
|
|
877
|
-
"...gtm,...gm->...gt", combined_media_transformed, combined_beta
|
|
878
|
-
)
|
|
879
|
-
+ tf.einsum(
|
|
880
|
-
"...gtc,...gc->...gt",
|
|
881
|
-
data_tensors.controls,
|
|
882
|
-
dist_tensors.gamma_gc,
|
|
883
|
-
)
|
|
806
|
+
result = tau_gt + tf.einsum(
|
|
807
|
+
"...gtm,...gm->...gt", combined_media_transformed, combined_beta
|
|
884
808
|
)
|
|
809
|
+
if self._meridian.controls is not None:
|
|
810
|
+
result += tf.einsum(
|
|
811
|
+
"...gtc,...gc->...gt",
|
|
812
|
+
data_tensors.controls,
|
|
813
|
+
dist_tensors.gamma_gc,
|
|
814
|
+
)
|
|
885
815
|
if data_tensors.non_media_treatments is not None:
|
|
886
816
|
result += tf.einsum(
|
|
887
817
|
"...gtm,...gm->...gt",
|
|
@@ -1064,7 +994,7 @@ class Analyzer:
|
|
|
1064
994
|
organic_media=self._meridian.organic_media_tensors.organic_media_scaled,
|
|
1065
995
|
organic_reach=self._meridian.organic_rf_tensors.organic_reach_scaled,
|
|
1066
996
|
organic_frequency=self._meridian.organic_rf_tensors.organic_frequency,
|
|
1067
|
-
non_media_treatments=self._meridian.
|
|
997
|
+
non_media_treatments=self._meridian.non_media_treatments_normalized,
|
|
1068
998
|
controls=self._meridian.controls_scaled,
|
|
1069
999
|
revenue_per_kpi=self._meridian.revenue_per_kpi,
|
|
1070
1000
|
)
|
|
@@ -1113,10 +1043,10 @@ class Analyzer:
|
|
|
1113
1043
|
if new_data.organic_frequency is not None
|
|
1114
1044
|
else self._meridian.organic_rf_tensors.organic_frequency
|
|
1115
1045
|
)
|
|
1116
|
-
|
|
1046
|
+
non_media_treatments_normalized = _transformed_new_or_scaled(
|
|
1117
1047
|
new_variable=new_data.non_media_treatments,
|
|
1118
1048
|
transformer=self._meridian.non_media_transformer,
|
|
1119
|
-
scaled_variable=self._meridian.
|
|
1049
|
+
scaled_variable=self._meridian.non_media_treatments_normalized,
|
|
1120
1050
|
)
|
|
1121
1051
|
return DataTensors(
|
|
1122
1052
|
media=media_scaled,
|
|
@@ -1125,7 +1055,7 @@ class Analyzer:
|
|
|
1125
1055
|
organic_media=organic_media_scaled,
|
|
1126
1056
|
organic_reach=organic_reach_scaled,
|
|
1127
1057
|
organic_frequency=organic_frequency,
|
|
1128
|
-
non_media_treatments=
|
|
1058
|
+
non_media_treatments=non_media_treatments_normalized,
|
|
1129
1059
|
controls=controls_scaled,
|
|
1130
1060
|
revenue_per_kpi=revenue_per_kpi,
|
|
1131
1061
|
)
|
|
@@ -1532,11 +1462,14 @@ class Analyzer:
|
|
|
1532
1462
|
(n_chains, 0, self._meridian.n_geos, self._meridian.n_times)
|
|
1533
1463
|
)
|
|
1534
1464
|
batch_starting_indices = np.arange(n_draws, step=batch_size)
|
|
1535
|
-
param_list =
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1465
|
+
param_list = (
|
|
1466
|
+
[
|
|
1467
|
+
constants.MU_T,
|
|
1468
|
+
constants.TAU_G,
|
|
1469
|
+
]
|
|
1470
|
+
+ ([constants.GAMMA_GC] if self._meridian.n_controls else [])
|
|
1471
|
+
+ self._get_causal_param_names(include_non_paid_channels=True)
|
|
1472
|
+
)
|
|
1540
1473
|
outcome_means_temps = []
|
|
1541
1474
|
for start_index in batch_starting_indices:
|
|
1542
1475
|
stop_index = np.min([n_draws, start_index + batch_size])
|
|
@@ -1594,7 +1527,7 @@ class Analyzer:
|
|
|
1594
1527
|
self,
|
|
1595
1528
|
data_tensors: DataTensors,
|
|
1596
1529
|
dist_tensors: DistributionTensors,
|
|
1597
|
-
|
|
1530
|
+
non_media_treatments_baseline_normalized: Sequence[float] | None = None,
|
|
1598
1531
|
) -> tf.Tensor:
|
|
1599
1532
|
"""Computes incremental KPI distribution.
|
|
1600
1533
|
|
|
@@ -1608,17 +1541,26 @@ class Analyzer:
|
|
|
1608
1541
|
dist_tensors: A `DistributionTensors` container with the distribution
|
|
1609
1542
|
tensors for media, RF, organic media, organic RF and non-media
|
|
1610
1543
|
treatments channels.
|
|
1611
|
-
|
|
1612
|
-
Each element is
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1544
|
+
non_media_treatments_baseline_normalized: Optional list of shape
|
|
1545
|
+
`(n_non_media_channels,)`. Each element is a float that will be used as
|
|
1546
|
+
baseline for the given channel. The values are expected to be scaled by
|
|
1547
|
+
population for channels where
|
|
1548
|
+
`model_spec.non_media_population_scaling_id` is `True` and normalized by
|
|
1549
|
+
centering and scaling using means and standard deviations. This argument
|
|
1550
|
+
is required if the data contains non-media treatments.
|
|
1618
1551
|
|
|
1619
1552
|
Returns:
|
|
1620
1553
|
Tensor of incremental KPI distribution.
|
|
1621
1554
|
"""
|
|
1555
|
+
if (
|
|
1556
|
+
data_tensors.non_media_treatments is not None
|
|
1557
|
+
and non_media_treatments_baseline_normalized is None
|
|
1558
|
+
):
|
|
1559
|
+
raise ValueError(
|
|
1560
|
+
"`non_media_treatments_baseline_normalized` must be passed to"
|
|
1561
|
+
" `_get_incremental_kpi` when `non_media_treatments` data is"
|
|
1562
|
+
" present."
|
|
1563
|
+
)
|
|
1622
1564
|
n_media_times = self._meridian.n_media_times
|
|
1623
1565
|
if data_tensors.media is not None:
|
|
1624
1566
|
n_times = data_tensors.media.shape[1] # pytype: disable=attribute-error
|
|
@@ -1641,13 +1583,10 @@ class Analyzer:
|
|
|
1641
1583
|
combined_beta,
|
|
1642
1584
|
)
|
|
1643
1585
|
if data_tensors.non_media_treatments is not None:
|
|
1644
|
-
non_media_scaled_baseline = _compute_non_media_baseline(
|
|
1645
|
-
non_media_treatments=data_tensors.non_media_treatments,
|
|
1646
|
-
non_media_baseline_values=non_media_baseline_values,
|
|
1647
|
-
)
|
|
1648
1586
|
non_media_kpi = tf.einsum(
|
|
1649
1587
|
"gtn,...gn->...gtn",
|
|
1650
|
-
data_tensors.non_media_treatments
|
|
1588
|
+
data_tensors.non_media_treatments
|
|
1589
|
+
- non_media_treatments_baseline_normalized,
|
|
1651
1590
|
dist_tensors.gamma_gn,
|
|
1652
1591
|
)
|
|
1653
1592
|
return tf.concat([combined_media_kpi, non_media_kpi], axis=-1)
|
|
@@ -1697,7 +1636,7 @@ class Analyzer:
|
|
|
1697
1636
|
self,
|
|
1698
1637
|
data_tensors: DataTensors,
|
|
1699
1638
|
dist_tensors: DistributionTensors,
|
|
1700
|
-
|
|
1639
|
+
non_media_treatments_baseline_normalized: Sequence[float] | None = None,
|
|
1701
1640
|
inverse_transform_outcome: bool | None = None,
|
|
1702
1641
|
use_kpi: bool | None = None,
|
|
1703
1642
|
selected_geos: Sequence[str] | None = None,
|
|
@@ -1722,20 +1661,21 @@ class Analyzer:
|
|
|
1722
1661
|
poulation. Shape (n_geos x T x n_organic_rf_channels), for any time
|
|
1723
1662
|
dimension T. `organic_frequency`: `organic frequency data` with shape
|
|
1724
1663
|
(n_geos x T x n_organic_rf_channels), for any time dimension T.
|
|
1725
|
-
`non_media_treatments`: `non_media_treatments` data
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
dimension `
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1664
|
+
`non_media_treatments`: `non_media_treatments` data scaled by population
|
|
1665
|
+
for the selected channels and normalized by means and standard
|
|
1666
|
+
deviations with shape (n_geos x T x n_non_media_channels), for any time
|
|
1667
|
+
dimension T. `revenue_per_kpi`: Contains revenue per kpi data with shape
|
|
1668
|
+
`(n_geos x T)`, for any time dimension `T`.
|
|
1669
|
+
dist_tensors: A `DistributionTensors` container with the distribution
|
|
1670
|
+
tensors for media, RF, organic media, organic RF and non-media
|
|
1671
|
+
treatments channels.
|
|
1672
|
+
non_media_treatments_baseline_normalized: Optional list of shape
|
|
1673
|
+
`(n_non_media_channels,)`. Each element is a float that will be used as
|
|
1674
|
+
baseline for the given channel. The values are expected to be scaled by
|
|
1675
|
+
population for channels where
|
|
1676
|
+
`model_spec.non_media_population_scaling_id` is `True` and normalized by
|
|
1677
|
+
centering and scaling using means and standard deviations. This argument
|
|
1678
|
+
is required if the data contains non-media treatments.
|
|
1739
1679
|
inverse_transform_outcome: Boolean. If `True`, returns the expected
|
|
1740
1680
|
outcome in the original KPI or revenue (depending on what is passed to
|
|
1741
1681
|
`use_kpi`), as it was passed to `InputData`. If False, returns the
|
|
@@ -1760,10 +1700,20 @@ class Analyzer:
|
|
|
1760
1700
|
Tensor containing the incremental outcome distribution.
|
|
1761
1701
|
"""
|
|
1762
1702
|
self._check_revenue_data_exists(use_kpi)
|
|
1703
|
+
if (
|
|
1704
|
+
data_tensors.non_media_treatments is not None
|
|
1705
|
+
and non_media_treatments_baseline_normalized is None
|
|
1706
|
+
):
|
|
1707
|
+
raise ValueError(
|
|
1708
|
+
"`non_media_treatments_baseline_normalized` must be passed to"
|
|
1709
|
+
" `_incremental_outcome_impl` when `non_media_treatments` data is"
|
|
1710
|
+
" present."
|
|
1711
|
+
)
|
|
1712
|
+
|
|
1763
1713
|
transformed_outcome = self._get_incremental_kpi(
|
|
1764
1714
|
data_tensors=data_tensors,
|
|
1765
1715
|
dist_tensors=dist_tensors,
|
|
1766
|
-
|
|
1716
|
+
non_media_treatments_baseline_normalized=non_media_treatments_baseline_normalized,
|
|
1767
1717
|
)
|
|
1768
1718
|
if inverse_transform_outcome:
|
|
1769
1719
|
incremental_outcome = self._inverse_outcome(
|
|
@@ -1787,7 +1737,7 @@ class Analyzer:
|
|
|
1787
1737
|
self,
|
|
1788
1738
|
use_posterior: bool = True,
|
|
1789
1739
|
new_data: DataTensors | None = None,
|
|
1790
|
-
non_media_baseline_values: Sequence[float
|
|
1740
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
1791
1741
|
scaling_factor0: float = 0.0,
|
|
1792
1742
|
scaling_factor1: float = 1.0,
|
|
1793
1743
|
selected_geos: Sequence[str] | None = None,
|
|
@@ -1806,15 +1756,26 @@ class Analyzer:
|
|
|
1806
1756
|
This calculates the media outcome of each media channel for each posterior
|
|
1807
1757
|
or prior parameter draw. Incremental outcome is defined as:
|
|
1808
1758
|
|
|
1809
|
-
`E(Outcome|
|
|
1759
|
+
`E(Outcome|Treatment_1, Controls)` minus `E(Outcome|Treatment_0, Controls)`
|
|
1760
|
+
|
|
1761
|
+
For paid & organic channels (without reach and frequency data),
|
|
1762
|
+
`Treatment_1` means that media execution for a given channel is multiplied
|
|
1763
|
+
by
|
|
1764
|
+
`scaling_factor1` (1.0 by default) for the set of time periods specified
|
|
1765
|
+
by `media_selected_times`. Similarly, `Treatment_0` means that media
|
|
1766
|
+
execution is multiplied by `scaling_factor0` (0.0 by default) for these time
|
|
1767
|
+
periods.
|
|
1810
1768
|
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1769
|
+
For paid & organic channels with reach and frequency data, either reach or
|
|
1770
|
+
frequency is held fixed while the other is scaled, depending on the
|
|
1771
|
+
`by_reach` argument.
|
|
1772
|
+
|
|
1773
|
+
For non-media treatments, `Treatment_1` means that the variable is set to
|
|
1774
|
+
historical values. `Treatment_0` means that the variable is set to its
|
|
1775
|
+
baseline value for all geos and time periods. Note that the scaling factors
|
|
1776
|
+
(`scaling_factor0` and `scaling_factor1`) are not applicable to non-media
|
|
1777
|
+
treatments.
|
|
1815
1778
|
|
|
1816
|
-
For channels with reach and frequency data, either reach or frequency is
|
|
1817
|
-
held fixed while the other is scaled, depending on the `by_reach` argument.
|
|
1818
1779
|
"Outcome" refers to either `revenue` if `use_kpi=False`, or `kpi` if
|
|
1819
1780
|
`use_kpi=True`. When `revenue_per_kpi` is not defined, `use_kpi` cannot be
|
|
1820
1781
|
False.
|
|
@@ -1856,13 +1817,13 @@ class Analyzer:
|
|
|
1856
1817
|
any of the tensors in `new_data` is provided with a different number of
|
|
1857
1818
|
time periods than in `InputData`, then all tensors must be provided with
|
|
1858
1819
|
the same number of time periods.
|
|
1859
|
-
non_media_baseline_values: Optional list of shape
|
|
1860
|
-
Each element is
|
|
1861
|
-
used as baseline for the given channel
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
each non_media
|
|
1820
|
+
non_media_baseline_values: Optional list of shape
|
|
1821
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
1822
|
+
fixed value will be used as baseline for the given channel. It is
|
|
1823
|
+
expected that they are scaled by population for the channels where
|
|
1824
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
1825
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
1826
|
+
minimum value for each non_media treatment channel.
|
|
1866
1827
|
scaling_factor0: Float. The factor by which to scale the counterfactual
|
|
1867
1828
|
scenario "Media_0" during the time periods specified in
|
|
1868
1829
|
`media_selected_times`. Must be non-negative and less than
|
|
@@ -1944,6 +1905,7 @@ class Analyzer:
|
|
|
1944
1905
|
aggregate_geos=aggregate_geos,
|
|
1945
1906
|
selected_geos=selected_geos,
|
|
1946
1907
|
)
|
|
1908
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
1947
1909
|
dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
|
|
1948
1910
|
|
|
1949
1911
|
if dist_type not in mmm.inference_data.groups():
|
|
@@ -2002,7 +1964,6 @@ class Analyzer:
|
|
|
2002
1964
|
media_selected_times = [
|
|
2003
1965
|
x in media_selected_times for x in mmm.input_data.media_time
|
|
2004
1966
|
]
|
|
2005
|
-
non_media_selected_times = media_selected_times[-mmm.n_times :]
|
|
2006
1967
|
|
|
2007
1968
|
# Set counterfactual tensors based on the scaling factors and the media
|
|
2008
1969
|
# selected times.
|
|
@@ -2014,28 +1975,52 @@ class Analyzer:
|
|
|
2014
1975
|
)[:, None]
|
|
2015
1976
|
|
|
2016
1977
|
if data_tensors.non_media_treatments is not None:
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
1978
|
+
non_media_treatments_baseline_scaled = (
|
|
1979
|
+
self._meridian.compute_non_media_treatments_baseline(
|
|
1980
|
+
non_media_baseline_values=non_media_baseline_values,
|
|
1981
|
+
)
|
|
1982
|
+
)
|
|
1983
|
+
non_media_treatments_baseline_normalized = self._meridian.non_media_transformer.forward( # pytype: disable=attribute-error
|
|
1984
|
+
non_media_treatments_baseline_scaled,
|
|
1985
|
+
apply_population_scaling=False,
|
|
1986
|
+
)
|
|
1987
|
+
non_media_treatments0 = tf.broadcast_to(
|
|
1988
|
+
tf.constant(
|
|
1989
|
+
non_media_treatments_baseline_normalized, dtype=tf.float32
|
|
1990
|
+
)[tf.newaxis, tf.newaxis, :],
|
|
1991
|
+
self._meridian.non_media_treatments.shape, # pytype: disable=attribute-error
|
|
2021
1992
|
)
|
|
2022
1993
|
else:
|
|
2023
|
-
|
|
1994
|
+
non_media_treatments_baseline_normalized = None
|
|
1995
|
+
non_media_treatments0 = None
|
|
2024
1996
|
|
|
2025
1997
|
incremented_data0 = _scale_tensors_by_multiplier(
|
|
2026
1998
|
data=data_tensors,
|
|
2027
1999
|
multiplier=counterfactual0,
|
|
2028
2000
|
by_reach=by_reach,
|
|
2029
|
-
non_media_treatments_baseline=new_non_media_treatments0,
|
|
2030
2001
|
)
|
|
2031
2002
|
incremented_data1 = _scale_tensors_by_multiplier(
|
|
2032
2003
|
data=data_tensors, multiplier=counterfactual1, by_reach=by_reach
|
|
2033
2004
|
)
|
|
2034
2005
|
|
|
2035
|
-
|
|
2006
|
+
scaled_data0 = self._get_scaled_data_tensors(
|
|
2036
2007
|
new_data=incremented_data0,
|
|
2037
2008
|
include_non_paid_channels=include_non_paid_channels,
|
|
2038
2009
|
)
|
|
2010
|
+
# TODO: b/415198977 - Verify the computation of outcome of non-media
|
|
2011
|
+
# treatments with `media_selected_times` and scale factors.
|
|
2012
|
+
|
|
2013
|
+
data_tensors0 = DataTensors(
|
|
2014
|
+
media=scaled_data0.media,
|
|
2015
|
+
reach=scaled_data0.reach,
|
|
2016
|
+
frequency=scaled_data0.frequency,
|
|
2017
|
+
organic_media=scaled_data0.organic_media,
|
|
2018
|
+
organic_reach=scaled_data0.organic_reach,
|
|
2019
|
+
organic_frequency=scaled_data0.organic_frequency,
|
|
2020
|
+
revenue_per_kpi=scaled_data0.revenue_per_kpi,
|
|
2021
|
+
non_media_treatments=non_media_treatments0,
|
|
2022
|
+
)
|
|
2023
|
+
|
|
2039
2024
|
data_tensors1 = self._get_scaled_data_tensors(
|
|
2040
2025
|
new_data=incremented_data1,
|
|
2041
2026
|
include_non_paid_channels=include_non_paid_channels,
|
|
@@ -2062,7 +2047,9 @@ class Analyzer:
|
|
|
2062
2047
|
incremental_outcome_kwargs = {
|
|
2063
2048
|
"inverse_transform_outcome": inverse_transform_outcome,
|
|
2064
2049
|
"use_kpi": use_kpi,
|
|
2065
|
-
"
|
|
2050
|
+
"non_media_treatments_baseline_normalized": (
|
|
2051
|
+
non_media_treatments_baseline_normalized
|
|
2052
|
+
),
|
|
2066
2053
|
}
|
|
2067
2054
|
for i, start_index in enumerate(batch_starting_indices):
|
|
2068
2055
|
stop_index = np.min([n_draws, start_index + batch_size])
|
|
@@ -2538,7 +2525,7 @@ class Analyzer:
|
|
|
2538
2525
|
aggregate_geos: bool = False,
|
|
2539
2526
|
aggregate_times: bool = False,
|
|
2540
2527
|
split_by_holdout_id: bool = False,
|
|
2541
|
-
non_media_baseline_values: Sequence[
|
|
2528
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2542
2529
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
2543
2530
|
) -> xr.Dataset:
|
|
2544
2531
|
"""Calculates the data for the expected versus actual outcome over time.
|
|
@@ -2550,19 +2537,20 @@ class Analyzer:
|
|
|
2550
2537
|
are summed over all of the time periods.
|
|
2551
2538
|
split_by_holdout_id: Boolean. If `True` and `holdout_id` exists, the data
|
|
2552
2539
|
is split into `'Train'`, `'Test'`, and `'All Data'` subsections.
|
|
2553
|
-
non_media_baseline_values: Optional list of shape
|
|
2554
|
-
Each element is
|
|
2555
|
-
used as baseline for the given channel
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
channel.
|
|
2540
|
+
non_media_baseline_values: Optional list of shape
|
|
2541
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2542
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2543
|
+
expected that they are scaled by population for the channels where
|
|
2544
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2545
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2546
|
+
minimum value for each non_media treatment channel.
|
|
2560
2547
|
confidence_level: Confidence level for expected outcome credible
|
|
2561
2548
|
intervals, represented as a value between zero and one. Default: `0.9`.
|
|
2562
2549
|
|
|
2563
2550
|
Returns:
|
|
2564
2551
|
A dataset with the expected, baseline, and actual outcome metrics.
|
|
2565
2552
|
"""
|
|
2553
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
2566
2554
|
mmm = self._meridian
|
|
2567
2555
|
use_kpi = self._meridian.input_data.revenue_per_kpi is None
|
|
2568
2556
|
can_split_by_holdout = self._can_split_by_holdout_id(split_by_holdout_id)
|
|
@@ -2632,7 +2620,7 @@ class Analyzer:
|
|
|
2632
2620
|
|
|
2633
2621
|
def _calculate_baseline_expected_outcome(
|
|
2634
2622
|
self,
|
|
2635
|
-
non_media_baseline_values: Sequence[
|
|
2623
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2636
2624
|
**expected_outcome_kwargs,
|
|
2637
2625
|
) -> tf.Tensor:
|
|
2638
2626
|
"""Calculates either the posterior or prior expected outcome of baseline.
|
|
@@ -2644,20 +2632,19 @@ class Analyzer:
|
|
|
2644
2632
|
3) `new_organic_media` is set to all zeros
|
|
2645
2633
|
4) `new_organic_reach` is set to all zeros
|
|
2646
2634
|
5) `new_non_media_treatments` is set to the counterfactual values
|
|
2647
|
-
according to the
|
|
2648
|
-
`non_media_baseline_values` argument
|
|
2635
|
+
according to the `non_media_baseline_values` argument
|
|
2649
2636
|
6) `new_controls` are set to historical values
|
|
2650
2637
|
|
|
2651
2638
|
All other arguments of `expected_outcome` can be passed to this method.
|
|
2652
2639
|
|
|
2653
2640
|
Args:
|
|
2654
|
-
non_media_baseline_values: Optional list of shape
|
|
2655
|
-
Each element is
|
|
2656
|
-
used as baseline for the given channel
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
channel.
|
|
2641
|
+
non_media_baseline_values: Optional list of shape
|
|
2642
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2643
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2644
|
+
expected that they are scaled by population for the channels where
|
|
2645
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2646
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2647
|
+
minimum value for each non_media treatment channel.
|
|
2661
2648
|
**expected_outcome_kwargs: kwargs to pass to `expected_outcome`, which
|
|
2662
2649
|
could contain use_posterior, selected_geos, selected_times,
|
|
2663
2650
|
aggregate_geos, aggregate_times, inverse_transform_outcome, use_kpi,
|
|
@@ -2690,10 +2677,27 @@ class Analyzer:
|
|
|
2690
2677
|
else None
|
|
2691
2678
|
)
|
|
2692
2679
|
if self._meridian.non_media_treatments is not None:
|
|
2693
|
-
|
|
2694
|
-
|
|
2680
|
+
if self._meridian.model_spec.non_media_population_scaling_id is not None:
|
|
2681
|
+
scaling_factors = tf.where(
|
|
2682
|
+
self._meridian.model_spec.non_media_population_scaling_id,
|
|
2683
|
+
self._meridian.population[:, tf.newaxis, tf.newaxis],
|
|
2684
|
+
tf.ones_like(self._meridian.population)[:, tf.newaxis, tf.newaxis],
|
|
2685
|
+
)
|
|
2686
|
+
else:
|
|
2687
|
+
scaling_factors = tf.ones_like(self._meridian.population)[
|
|
2688
|
+
:, tf.newaxis, tf.newaxis
|
|
2689
|
+
]
|
|
2690
|
+
|
|
2691
|
+
baseline = self._meridian.compute_non_media_treatments_baseline(
|
|
2695
2692
|
non_media_baseline_values=non_media_baseline_values,
|
|
2696
2693
|
)
|
|
2694
|
+
new_non_media_treatments_population_scaled = tf.broadcast_to(
|
|
2695
|
+
tf.constant(baseline, dtype=tf.float32)[tf.newaxis, tf.newaxis, :],
|
|
2696
|
+
self._meridian.non_media_treatments.shape,
|
|
2697
|
+
)
|
|
2698
|
+
new_non_media_treatments = (
|
|
2699
|
+
new_non_media_treatments_population_scaled * scaling_factors
|
|
2700
|
+
)
|
|
2697
2701
|
else:
|
|
2698
2702
|
new_non_media_treatments = None
|
|
2699
2703
|
new_controls = self._meridian.controls
|
|
@@ -2714,7 +2718,7 @@ class Analyzer:
|
|
|
2714
2718
|
new_data: DataTensors | None = None,
|
|
2715
2719
|
use_kpi: bool | None = None,
|
|
2716
2720
|
include_non_paid_channels: bool = True,
|
|
2717
|
-
non_media_baseline_values: Sequence[
|
|
2721
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2718
2722
|
**kwargs,
|
|
2719
2723
|
) -> tf.Tensor:
|
|
2720
2724
|
"""Aggregates the incremental outcome of the media channels.
|
|
@@ -2742,13 +2746,13 @@ class Analyzer:
|
|
|
2742
2746
|
include_non_paid_channels: Boolean. If `True`, then non-media treatments
|
|
2743
2747
|
and organic effects are included in the calculation. If `False`, then
|
|
2744
2748
|
only the paid media and RF effects are included.
|
|
2745
|
-
non_media_baseline_values: Optional list of shape
|
|
2746
|
-
Each element is
|
|
2747
|
-
used as baseline for the given channel
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
each non_media
|
|
2749
|
+
non_media_baseline_values: Optional list of shape
|
|
2750
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2751
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2752
|
+
expected that they are scaled by population for the channels where
|
|
2753
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2754
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2755
|
+
minimum value for each non_media treatment channel.
|
|
2752
2756
|
**kwargs: kwargs to pass to `incremental_outcome`, which could contain
|
|
2753
2757
|
selected_geos, selected_times, aggregate_geos, aggregate_times,
|
|
2754
2758
|
batch_size.
|
|
@@ -2758,6 +2762,7 @@ class Analyzer:
|
|
|
2758
2762
|
of the channel dimension is incremented by one, with the new component at
|
|
2759
2763
|
the end containing the total incremental outcome of all channels.
|
|
2760
2764
|
"""
|
|
2765
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
2761
2766
|
use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
|
|
2762
2767
|
incremental_outcome_m = self.incremental_outcome(
|
|
2763
2768
|
use_posterior=use_posterior,
|
|
@@ -2790,7 +2795,7 @@ class Analyzer:
|
|
|
2790
2795
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
2791
2796
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2792
2797
|
include_non_paid_channels: bool = False,
|
|
2793
|
-
non_media_baseline_values: Sequence[
|
|
2798
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2794
2799
|
) -> xr.Dataset:
|
|
2795
2800
|
"""Returns summary metrics.
|
|
2796
2801
|
|
|
@@ -2866,13 +2871,13 @@ class Analyzer:
|
|
|
2866
2871
|
reported. If `False`, only the paid channels (media, reach and
|
|
2867
2872
|
frequency) are included but the summary contains also the metrics
|
|
2868
2873
|
dependent on spend. Default: `False`.
|
|
2869
|
-
non_media_baseline_values: Optional list of shape
|
|
2870
|
-
Each element is
|
|
2871
|
-
used as baseline for the given channel
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
channel.
|
|
2874
|
+
non_media_baseline_values: Optional list of shape
|
|
2875
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2876
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2877
|
+
expected that they are scaled by population for the channels where
|
|
2878
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2879
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2880
|
+
minimum value for each non_media treatment channel.
|
|
2876
2881
|
|
|
2877
2882
|
Returns:
|
|
2878
2883
|
An `xr.Dataset` with coordinates: `channel`, `metric` (`mean`, `median`,
|
|
@@ -2886,6 +2891,7 @@ class Analyzer:
|
|
|
2886
2891
|
when `aggregate_times=False` because they do not have a clear
|
|
2887
2892
|
interpretation by time period.
|
|
2888
2893
|
"""
|
|
2894
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
2889
2895
|
dim_kwargs = {
|
|
2890
2896
|
"selected_geos": selected_geos,
|
|
2891
2897
|
"selected_times": selected_times,
|
|
@@ -3274,7 +3280,7 @@ class Analyzer:
|
|
|
3274
3280
|
selected_times: Sequence[str] | None = None,
|
|
3275
3281
|
aggregate_geos: bool = True,
|
|
3276
3282
|
aggregate_times: bool = True,
|
|
3277
|
-
non_media_baseline_values: Sequence[float
|
|
3283
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
3278
3284
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
3279
3285
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
3280
3286
|
) -> xr.Dataset:
|
|
@@ -3289,13 +3295,13 @@ class Analyzer:
|
|
|
3289
3295
|
all of the regions.
|
|
3290
3296
|
aggregate_times: Boolean. If `True`, the expected outcome is summed over
|
|
3291
3297
|
all of the time periods.
|
|
3292
|
-
non_media_baseline_values: Optional list of shape
|
|
3293
|
-
Each element is
|
|
3294
|
-
used as baseline for the given channel
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
channel.
|
|
3298
|
+
non_media_baseline_values: Optional list of shape
|
|
3299
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
3300
|
+
fixed value will be used as baseline for the given channel. It is
|
|
3301
|
+
expected that they are scaled by population for the channels where
|
|
3302
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
3303
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
3304
|
+
minimum value for each non_media treatment channel.
|
|
3299
3305
|
confidence_level: Confidence level for media summary metrics credible
|
|
3300
3306
|
intervals, represented as a value between zero and one.
|
|
3301
3307
|
batch_size: Integer representing the maximum draws per chain in each
|
|
@@ -3308,6 +3314,7 @@ class Analyzer:
|
|
|
3308
3314
|
`ci_low`,`ci_high`),`distribution` (prior, posterior) and contains the
|
|
3309
3315
|
following data variables: `baseline_outcome`, `pct_of_contribution`.
|
|
3310
3316
|
"""
|
|
3317
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
3311
3318
|
# TODO: Change "pct_of_contribution" to a more accurate term.
|
|
3312
3319
|
|
|
3313
3320
|
use_kpi = self._meridian.input_data.revenue_per_kpi is None
|