google-meridian 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/METADATA +2 -2
- google_meridian-1.1.0.dist-info/RECORD +41 -0
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/WHEEL +1 -1
- meridian/__init__.py +1 -1
- meridian/analysis/analyzer.py +195 -189
- meridian/analysis/optimizer.py +263 -65
- meridian/analysis/summarizer.py +4 -4
- meridian/analysis/test_utils.py +81 -81
- meridian/analysis/visualizer.py +12 -16
- meridian/constants.py +100 -16
- meridian/data/input_data.py +115 -19
- meridian/data/test_utils.py +116 -5
- meridian/data/time_coordinates.py +3 -3
- meridian/model/media.py +133 -98
- meridian/model/model.py +447 -57
- meridian/model/model_test_data.py +11 -0
- meridian/model/posterior_sampler.py +120 -43
- meridian/model/prior_distribution.py +96 -51
- meridian/model/prior_sampler.py +179 -209
- meridian/model/spec.py +196 -36
- meridian/model/transformers.py +15 -3
- google_meridian-1.0.9.dist-info/RECORD +0 -41
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/top_level.txt +0 -0
meridian/analysis/analyzer.py
CHANGED
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
from collections.abc import Mapping, Sequence
|
|
18
18
|
import itertools
|
|
19
|
+
import numbers
|
|
19
20
|
from typing import Any, Optional
|
|
20
21
|
import warnings
|
|
21
22
|
|
|
@@ -37,6 +38,20 @@ __all__ = [
|
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
def _validate_non_media_baseline_values_numbers(
|
|
42
|
+
non_media_baseline_values: Sequence[str | float] | None,
|
|
43
|
+
):
|
|
44
|
+
if non_media_baseline_values is None:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
for value in non_media_baseline_values:
|
|
48
|
+
if not isinstance(value, numbers.Number):
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"Invalid `non_media_baseline_values` value: '{value}'. Only float"
|
|
51
|
+
" numbers are supported."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
40
55
|
# TODO: Refactor the related unit tests to be under DataTensors.
|
|
41
56
|
class DataTensors(tf.experimental.ExtensionType):
|
|
42
57
|
"""Container for data variable arguments of Analyzer methods.
|
|
@@ -181,6 +196,7 @@ class DataTensors(tf.experimental.ExtensionType):
|
|
|
181
196
|
new_tensor is not None
|
|
182
197
|
and old_tensor is not None
|
|
183
198
|
and new_tensor.ndim > 1
|
|
199
|
+
and old_tensor.ndim > 1
|
|
184
200
|
and new_tensor.shape[1] != old_tensor.shape[1]
|
|
185
201
|
):
|
|
186
202
|
return new_tensor.shape[1]
|
|
@@ -653,22 +669,16 @@ def _scale_tensors_by_multiplier(
|
|
|
653
669
|
data: DataTensors,
|
|
654
670
|
multiplier: float,
|
|
655
671
|
by_reach: bool,
|
|
656
|
-
non_media_treatments_baseline: tf.Tensor | None = None,
|
|
657
672
|
) -> DataTensors:
|
|
658
673
|
"""Get scaled tensors for incremental outcome calculation.
|
|
659
674
|
|
|
660
675
|
Args:
|
|
661
676
|
data: DataTensors object containing the optional tensors to scale. Only
|
|
662
|
-
`media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
|
|
663
|
-
`organic_frequency
|
|
664
|
-
remain unchanged.
|
|
677
|
+
`media`, `reach`, `frequency`, `organic_media`, `organic_reach`, and
|
|
678
|
+
`organic_frequency` are scaled. The other tensors remain unchanged.
|
|
665
679
|
multiplier: Float indicating the factor to scale tensors by.
|
|
666
680
|
by_reach: Boolean indicating whether to scale reach or frequency when rf
|
|
667
681
|
data is available.
|
|
668
|
-
non_media_treatments_baseline: Optional tensor to overwrite
|
|
669
|
-
`data.non_media_treatments` in the output. Used to compute the
|
|
670
|
-
conterfactual values for incremental outcome calculation. If not used, the
|
|
671
|
-
unmodified `data.non_media_treatments` tensor is returned in the output.
|
|
672
682
|
|
|
673
683
|
Returns:
|
|
674
684
|
A `DataTensors` object containing scaled tensor parameters. The original
|
|
@@ -697,14 +707,9 @@ def _scale_tensors_by_multiplier(
|
|
|
697
707
|
incremented_data[constants.ORGANIC_FREQUENCY] = (
|
|
698
708
|
data.organic_frequency * multiplier
|
|
699
709
|
)
|
|
700
|
-
if non_media_treatments_baseline is not None:
|
|
701
|
-
incremented_data[constants.NON_MEDIA_TREATMENTS] = (
|
|
702
|
-
non_media_treatments_baseline
|
|
703
|
-
)
|
|
704
|
-
else:
|
|
705
|
-
incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
|
|
706
710
|
|
|
707
711
|
# Include the original data that does not get scaled.
|
|
712
|
+
incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
|
|
708
713
|
incremented_data[constants.MEDIA_SPEND] = data.media_spend
|
|
709
714
|
incremented_data[constants.RF_SPEND] = data.rf_spend
|
|
710
715
|
incremented_data[constants.CONTROLS] = data.controls
|
|
@@ -754,79 +759,6 @@ def _central_tendency_and_ci_by_prior_and_posterior(
|
|
|
754
759
|
return xr.Dataset(data_vars=xr_data, coords=xr_coords)
|
|
755
760
|
|
|
756
761
|
|
|
757
|
-
def _compute_non_media_baseline(
|
|
758
|
-
non_media_treatments: tf.Tensor,
|
|
759
|
-
non_media_baseline_values: Sequence[float | str] | None = None,
|
|
760
|
-
non_media_selected_times: Sequence[bool] | None = None,
|
|
761
|
-
) -> tf.Tensor:
|
|
762
|
-
"""Computes the baseline for each non-media treatment channel.
|
|
763
|
-
|
|
764
|
-
Args:
|
|
765
|
-
non_media_treatments: The non-media treatment input data.
|
|
766
|
-
non_media_baseline_values: Optional list of shape (n_non_media_channels,).
|
|
767
|
-
Each element is either a float (which means that the fixed value will be
|
|
768
|
-
used as baseline for the given channel) or one of the strings "min" or
|
|
769
|
-
"max" (which mean that the global minimum or maximum value will be used as
|
|
770
|
-
baseline for the values of the given non_media treatment channel). If
|
|
771
|
-
None, the minimum value is used as baseline for each non_media treatment
|
|
772
|
-
channel.
|
|
773
|
-
non_media_selected_times: Optional list of shape (n_times,). Each element is
|
|
774
|
-
a boolean indicating whether the corresponding time period should be
|
|
775
|
-
included in the baseline computation.
|
|
776
|
-
|
|
777
|
-
Returns:
|
|
778
|
-
A tensor of shape (n_geos, n_times, n_non_media_channels) containing the
|
|
779
|
-
baseline values for each non-media treatment channel.
|
|
780
|
-
"""
|
|
781
|
-
|
|
782
|
-
if non_media_selected_times is None:
|
|
783
|
-
non_media_selected_times = [True] * non_media_treatments.shape[-2]
|
|
784
|
-
|
|
785
|
-
if non_media_baseline_values is None:
|
|
786
|
-
# If non_media_baseline_values is not provided, use the minimum value for
|
|
787
|
-
# each non_media treatment channel as the baseline.
|
|
788
|
-
non_media_baseline_values_filled = [
|
|
789
|
-
constants.NON_MEDIA_BASELINE_MIN
|
|
790
|
-
] * non_media_treatments.shape[-1]
|
|
791
|
-
else:
|
|
792
|
-
non_media_baseline_values_filled = non_media_baseline_values
|
|
793
|
-
|
|
794
|
-
if non_media_treatments.shape[-1] != len(non_media_baseline_values_filled):
|
|
795
|
-
raise ValueError(
|
|
796
|
-
"The number of non-media channels"
|
|
797
|
-
f" ({non_media_treatments.shape[-1]}) does not match the number"
|
|
798
|
-
f" of baseline types ({len(non_media_baseline_values_filled)})."
|
|
799
|
-
)
|
|
800
|
-
|
|
801
|
-
baseline_list = []
|
|
802
|
-
for channel in range(non_media_treatments.shape[-1]):
|
|
803
|
-
baseline_value = non_media_baseline_values_filled[channel]
|
|
804
|
-
|
|
805
|
-
if baseline_value == constants.NON_MEDIA_BASELINE_MIN:
|
|
806
|
-
baseline_for_channel = tf.reduce_min(
|
|
807
|
-
non_media_treatments[..., channel], axis=[0, 1]
|
|
808
|
-
)
|
|
809
|
-
elif baseline_value == constants.NON_MEDIA_BASELINE_MAX:
|
|
810
|
-
baseline_for_channel = tf.reduce_max(
|
|
811
|
-
non_media_treatments[..., channel], axis=[0, 1]
|
|
812
|
-
)
|
|
813
|
-
elif isinstance(baseline_value, float):
|
|
814
|
-
baseline_for_channel = tf.cast(baseline_value, tf.float32)
|
|
815
|
-
else:
|
|
816
|
-
raise ValueError(
|
|
817
|
-
f"Invalid non_media_baseline_values value: '{baseline_value}'. Only"
|
|
818
|
-
" float numbers and strings 'min' and 'max' are supported."
|
|
819
|
-
)
|
|
820
|
-
|
|
821
|
-
baseline_list.append(
|
|
822
|
-
baseline_for_channel
|
|
823
|
-
* tf.ones_like(non_media_treatments[..., channel])
|
|
824
|
-
* non_media_selected_times
|
|
825
|
-
)
|
|
826
|
-
|
|
827
|
-
return tf.stack(baseline_list, axis=-1)
|
|
828
|
-
|
|
829
|
-
|
|
830
762
|
class Analyzer:
|
|
831
763
|
"""Runs calculations to analyze the raw data after fitting the model."""
|
|
832
764
|
|
|
@@ -853,7 +785,7 @@ class Analyzer:
|
|
|
853
785
|
`media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
|
|
854
786
|
`organic_frequency`, `non_media_treatments`, `controls`. The `media`,
|
|
855
787
|
`reach`, `organic_media`, `organic_reach` and `non_media_treatments`
|
|
856
|
-
tensors are
|
|
788
|
+
tensors are expected to be scaled by their corresponding transformers.
|
|
857
789
|
dist_tensors: A `DistributionTensors` container with the distribution
|
|
858
790
|
tensors for media, RF, organic media, organic RF, non-media treatments,
|
|
859
791
|
and controls.
|
|
@@ -1064,7 +996,7 @@ class Analyzer:
|
|
|
1064
996
|
organic_media=self._meridian.organic_media_tensors.organic_media_scaled,
|
|
1065
997
|
organic_reach=self._meridian.organic_rf_tensors.organic_reach_scaled,
|
|
1066
998
|
organic_frequency=self._meridian.organic_rf_tensors.organic_frequency,
|
|
1067
|
-
non_media_treatments=self._meridian.
|
|
999
|
+
non_media_treatments=self._meridian.non_media_treatments_normalized,
|
|
1068
1000
|
controls=self._meridian.controls_scaled,
|
|
1069
1001
|
revenue_per_kpi=self._meridian.revenue_per_kpi,
|
|
1070
1002
|
)
|
|
@@ -1113,10 +1045,10 @@ class Analyzer:
|
|
|
1113
1045
|
if new_data.organic_frequency is not None
|
|
1114
1046
|
else self._meridian.organic_rf_tensors.organic_frequency
|
|
1115
1047
|
)
|
|
1116
|
-
|
|
1048
|
+
non_media_treatments_normalized = _transformed_new_or_scaled(
|
|
1117
1049
|
new_variable=new_data.non_media_treatments,
|
|
1118
1050
|
transformer=self._meridian.non_media_transformer,
|
|
1119
|
-
scaled_variable=self._meridian.
|
|
1051
|
+
scaled_variable=self._meridian.non_media_treatments_normalized,
|
|
1120
1052
|
)
|
|
1121
1053
|
return DataTensors(
|
|
1122
1054
|
media=media_scaled,
|
|
@@ -1125,7 +1057,7 @@ class Analyzer:
|
|
|
1125
1057
|
organic_media=organic_media_scaled,
|
|
1126
1058
|
organic_reach=organic_reach_scaled,
|
|
1127
1059
|
organic_frequency=organic_frequency,
|
|
1128
|
-
non_media_treatments=
|
|
1060
|
+
non_media_treatments=non_media_treatments_normalized,
|
|
1129
1061
|
controls=controls_scaled,
|
|
1130
1062
|
revenue_per_kpi=revenue_per_kpi,
|
|
1131
1063
|
)
|
|
@@ -1594,7 +1526,7 @@ class Analyzer:
|
|
|
1594
1526
|
self,
|
|
1595
1527
|
data_tensors: DataTensors,
|
|
1596
1528
|
dist_tensors: DistributionTensors,
|
|
1597
|
-
|
|
1529
|
+
non_media_treatments_baseline_normalized: Sequence[float] | None = None,
|
|
1598
1530
|
) -> tf.Tensor:
|
|
1599
1531
|
"""Computes incremental KPI distribution.
|
|
1600
1532
|
|
|
@@ -1608,17 +1540,26 @@ class Analyzer:
|
|
|
1608
1540
|
dist_tensors: A `DistributionTensors` container with the distribution
|
|
1609
1541
|
tensors for media, RF, organic media, organic RF and non-media
|
|
1610
1542
|
treatments channels.
|
|
1611
|
-
|
|
1612
|
-
Each element is
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1543
|
+
non_media_treatments_baseline_normalized: Optional list of shape
|
|
1544
|
+
`(n_non_media_channels,)`. Each element is a float that will be used as
|
|
1545
|
+
baseline for the given channel. The values are expected to be scaled by
|
|
1546
|
+
population for channels where
|
|
1547
|
+
`model_spec.non_media_population_scaling_id` is `True` and normalized by
|
|
1548
|
+
centering and scaling using means and standard deviations. This argument
|
|
1549
|
+
is required if the data contains non-media treatments.
|
|
1618
1550
|
|
|
1619
1551
|
Returns:
|
|
1620
1552
|
Tensor of incremental KPI distribution.
|
|
1621
1553
|
"""
|
|
1554
|
+
if (
|
|
1555
|
+
data_tensors.non_media_treatments is not None
|
|
1556
|
+
and non_media_treatments_baseline_normalized is None
|
|
1557
|
+
):
|
|
1558
|
+
raise ValueError(
|
|
1559
|
+
"`non_media_treatments_baseline_normalized` must be passed to"
|
|
1560
|
+
" `_get_incremental_kpi` when `non_media_treatments` data is"
|
|
1561
|
+
" present."
|
|
1562
|
+
)
|
|
1622
1563
|
n_media_times = self._meridian.n_media_times
|
|
1623
1564
|
if data_tensors.media is not None:
|
|
1624
1565
|
n_times = data_tensors.media.shape[1] # pytype: disable=attribute-error
|
|
@@ -1641,13 +1582,10 @@ class Analyzer:
|
|
|
1641
1582
|
combined_beta,
|
|
1642
1583
|
)
|
|
1643
1584
|
if data_tensors.non_media_treatments is not None:
|
|
1644
|
-
non_media_scaled_baseline = _compute_non_media_baseline(
|
|
1645
|
-
non_media_treatments=data_tensors.non_media_treatments,
|
|
1646
|
-
non_media_baseline_values=non_media_baseline_values,
|
|
1647
|
-
)
|
|
1648
1585
|
non_media_kpi = tf.einsum(
|
|
1649
1586
|
"gtn,...gn->...gtn",
|
|
1650
|
-
data_tensors.non_media_treatments
|
|
1587
|
+
data_tensors.non_media_treatments
|
|
1588
|
+
- non_media_treatments_baseline_normalized,
|
|
1651
1589
|
dist_tensors.gamma_gn,
|
|
1652
1590
|
)
|
|
1653
1591
|
return tf.concat([combined_media_kpi, non_media_kpi], axis=-1)
|
|
@@ -1697,7 +1635,7 @@ class Analyzer:
|
|
|
1697
1635
|
self,
|
|
1698
1636
|
data_tensors: DataTensors,
|
|
1699
1637
|
dist_tensors: DistributionTensors,
|
|
1700
|
-
|
|
1638
|
+
non_media_treatments_baseline_normalized: Sequence[float] | None = None,
|
|
1701
1639
|
inverse_transform_outcome: bool | None = None,
|
|
1702
1640
|
use_kpi: bool | None = None,
|
|
1703
1641
|
selected_geos: Sequence[str] | None = None,
|
|
@@ -1722,20 +1660,21 @@ class Analyzer:
|
|
|
1722
1660
|
poulation. Shape (n_geos x T x n_organic_rf_channels), for any time
|
|
1723
1661
|
dimension T. `organic_frequency`: `organic frequency data` with shape
|
|
1724
1662
|
(n_geos x T x n_organic_rf_channels), for any time dimension T.
|
|
1725
|
-
`non_media_treatments`: `non_media_treatments` data
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
dimension `
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1663
|
+
`non_media_treatments`: `non_media_treatments` data scaled by population
|
|
1664
|
+
for the selected channels and normalized by means and standard
|
|
1665
|
+
deviations with shape (n_geos x T x n_non_media_channels), for any time
|
|
1666
|
+
dimension T. `revenue_per_kpi`: Contains revenue per kpi data with shape
|
|
1667
|
+
`(n_geos x T)`, for any time dimension `T`.
|
|
1668
|
+
dist_tensors: A `DistributionTensors` container with the distribution
|
|
1669
|
+
tensors for media, RF, organic media, organic RF and non-media
|
|
1670
|
+
treatments channels.
|
|
1671
|
+
non_media_treatments_baseline_normalized: Optional list of shape
|
|
1672
|
+
`(n_non_media_channels,)`. Each element is a float that will be used as
|
|
1673
|
+
baseline for the given channel. The values are expected to be scaled by
|
|
1674
|
+
population for channels where
|
|
1675
|
+
`model_spec.non_media_population_scaling_id` is `True` and normalized by
|
|
1676
|
+
centering and scaling using means and standard deviations. This argument
|
|
1677
|
+
is required if the data contains non-media treatments.
|
|
1739
1678
|
inverse_transform_outcome: Boolean. If `True`, returns the expected
|
|
1740
1679
|
outcome in the original KPI or revenue (depending on what is passed to
|
|
1741
1680
|
`use_kpi`), as it was passed to `InputData`. If False, returns the
|
|
@@ -1760,10 +1699,20 @@ class Analyzer:
|
|
|
1760
1699
|
Tensor containing the incremental outcome distribution.
|
|
1761
1700
|
"""
|
|
1762
1701
|
self._check_revenue_data_exists(use_kpi)
|
|
1702
|
+
if (
|
|
1703
|
+
data_tensors.non_media_treatments is not None
|
|
1704
|
+
and non_media_treatments_baseline_normalized is None
|
|
1705
|
+
):
|
|
1706
|
+
raise ValueError(
|
|
1707
|
+
"`non_media_treatments_baseline_normalized` must be passed to"
|
|
1708
|
+
" `_incremental_outcome_impl` when `non_media_treatments` data is"
|
|
1709
|
+
" present."
|
|
1710
|
+
)
|
|
1711
|
+
|
|
1763
1712
|
transformed_outcome = self._get_incremental_kpi(
|
|
1764
1713
|
data_tensors=data_tensors,
|
|
1765
1714
|
dist_tensors=dist_tensors,
|
|
1766
|
-
|
|
1715
|
+
non_media_treatments_baseline_normalized=non_media_treatments_baseline_normalized,
|
|
1767
1716
|
)
|
|
1768
1717
|
if inverse_transform_outcome:
|
|
1769
1718
|
incremental_outcome = self._inverse_outcome(
|
|
@@ -1787,7 +1736,7 @@ class Analyzer:
|
|
|
1787
1736
|
self,
|
|
1788
1737
|
use_posterior: bool = True,
|
|
1789
1738
|
new_data: DataTensors | None = None,
|
|
1790
|
-
non_media_baseline_values: Sequence[float
|
|
1739
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
1791
1740
|
scaling_factor0: float = 0.0,
|
|
1792
1741
|
scaling_factor1: float = 1.0,
|
|
1793
1742
|
selected_geos: Sequence[str] | None = None,
|
|
@@ -1806,15 +1755,26 @@ class Analyzer:
|
|
|
1806
1755
|
This calculates the media outcome of each media channel for each posterior
|
|
1807
1756
|
or prior parameter draw. Incremental outcome is defined as:
|
|
1808
1757
|
|
|
1809
|
-
`E(Outcome|
|
|
1758
|
+
`E(Outcome|Treatment_1, Controls)` minus `E(Outcome|Treatment_0, Controls)`
|
|
1810
1759
|
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
by
|
|
1814
|
-
|
|
1760
|
+
For paid & organic channels (without reach and frequency data),
|
|
1761
|
+
`Treatment_1` means that media execution for a given channel is multiplied
|
|
1762
|
+
by
|
|
1763
|
+
`scaling_factor1` (1.0 by default) for the set of time periods specified
|
|
1764
|
+
by `media_selected_times`. Similarly, `Treatment_0` means that media
|
|
1765
|
+
execution is multiplied by `scaling_factor0` (0.0 by default) for these time
|
|
1766
|
+
periods.
|
|
1767
|
+
|
|
1768
|
+
For paid & organic channels with reach and frequency data, either reach or
|
|
1769
|
+
frequency is held fixed while the other is scaled, depending on the
|
|
1770
|
+
`by_reach` argument.
|
|
1771
|
+
|
|
1772
|
+
For non-media treatments, `Treatment_1` means that the variable is set to
|
|
1773
|
+
historical values. `Treatment_0` means that the variable is set to its
|
|
1774
|
+
baseline value for all geos and time periods. Note that the scaling factors
|
|
1775
|
+
(`scaling_factor0` and `scaling_factor1`) are not applicable to non-media
|
|
1776
|
+
treatments.
|
|
1815
1777
|
|
|
1816
|
-
For channels with reach and frequency data, either reach or frequency is
|
|
1817
|
-
held fixed while the other is scaled, depending on the `by_reach` argument.
|
|
1818
1778
|
"Outcome" refers to either `revenue` if `use_kpi=False`, or `kpi` if
|
|
1819
1779
|
`use_kpi=True`. When `revenue_per_kpi` is not defined, `use_kpi` cannot be
|
|
1820
1780
|
False.
|
|
@@ -1856,13 +1816,13 @@ class Analyzer:
|
|
|
1856
1816
|
any of the tensors in `new_data` is provided with a different number of
|
|
1857
1817
|
time periods than in `InputData`, then all tensors must be provided with
|
|
1858
1818
|
the same number of time periods.
|
|
1859
|
-
non_media_baseline_values: Optional list of shape
|
|
1860
|
-
Each element is
|
|
1861
|
-
used as baseline for the given channel
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
each non_media
|
|
1819
|
+
non_media_baseline_values: Optional list of shape
|
|
1820
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
1821
|
+
fixed value will be used as baseline for the given channel. It is
|
|
1822
|
+
expected that they are scaled by population for the channels where
|
|
1823
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
1824
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
1825
|
+
minimum value for each non_media treatment channel.
|
|
1866
1826
|
scaling_factor0: Float. The factor by which to scale the counterfactual
|
|
1867
1827
|
scenario "Media_0" during the time periods specified in
|
|
1868
1828
|
`media_selected_times`. Must be non-negative and less than
|
|
@@ -1944,6 +1904,7 @@ class Analyzer:
|
|
|
1944
1904
|
aggregate_geos=aggregate_geos,
|
|
1945
1905
|
selected_geos=selected_geos,
|
|
1946
1906
|
)
|
|
1907
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
1947
1908
|
dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
|
|
1948
1909
|
|
|
1949
1910
|
if dist_type not in mmm.inference_data.groups():
|
|
@@ -2002,7 +1963,6 @@ class Analyzer:
|
|
|
2002
1963
|
media_selected_times = [
|
|
2003
1964
|
x in media_selected_times for x in mmm.input_data.media_time
|
|
2004
1965
|
]
|
|
2005
|
-
non_media_selected_times = media_selected_times[-mmm.n_times :]
|
|
2006
1966
|
|
|
2007
1967
|
# Set counterfactual tensors based on the scaling factors and the media
|
|
2008
1968
|
# selected times.
|
|
@@ -2014,28 +1974,52 @@ class Analyzer:
|
|
|
2014
1974
|
)[:, None]
|
|
2015
1975
|
|
|
2016
1976
|
if data_tensors.non_media_treatments is not None:
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
1977
|
+
non_media_treatments_baseline_scaled = (
|
|
1978
|
+
self._meridian.compute_non_media_treatments_baseline(
|
|
1979
|
+
non_media_baseline_values=non_media_baseline_values,
|
|
1980
|
+
)
|
|
1981
|
+
)
|
|
1982
|
+
non_media_treatments_baseline_normalized = self._meridian.non_media_transformer.forward( # pytype: disable=attribute-error
|
|
1983
|
+
non_media_treatments_baseline_scaled,
|
|
1984
|
+
apply_population_scaling=False,
|
|
1985
|
+
)
|
|
1986
|
+
non_media_treatments0 = tf.broadcast_to(
|
|
1987
|
+
tf.constant(
|
|
1988
|
+
non_media_treatments_baseline_normalized, dtype=tf.float32
|
|
1989
|
+
)[tf.newaxis, tf.newaxis, :],
|
|
1990
|
+
self._meridian.non_media_treatments.shape, # pytype: disable=attribute-error
|
|
2021
1991
|
)
|
|
2022
1992
|
else:
|
|
2023
|
-
|
|
1993
|
+
non_media_treatments_baseline_normalized = None
|
|
1994
|
+
non_media_treatments0 = None
|
|
2024
1995
|
|
|
2025
1996
|
incremented_data0 = _scale_tensors_by_multiplier(
|
|
2026
1997
|
data=data_tensors,
|
|
2027
1998
|
multiplier=counterfactual0,
|
|
2028
1999
|
by_reach=by_reach,
|
|
2029
|
-
non_media_treatments_baseline=new_non_media_treatments0,
|
|
2030
2000
|
)
|
|
2031
2001
|
incremented_data1 = _scale_tensors_by_multiplier(
|
|
2032
2002
|
data=data_tensors, multiplier=counterfactual1, by_reach=by_reach
|
|
2033
2003
|
)
|
|
2034
2004
|
|
|
2035
|
-
|
|
2005
|
+
scaled_data0 = self._get_scaled_data_tensors(
|
|
2036
2006
|
new_data=incremented_data0,
|
|
2037
2007
|
include_non_paid_channels=include_non_paid_channels,
|
|
2038
2008
|
)
|
|
2009
|
+
# TODO: b/415198977 - Verify the computation of outcome of non-media
|
|
2010
|
+
# treatments with `media_selected_times` and scale factors.
|
|
2011
|
+
|
|
2012
|
+
data_tensors0 = DataTensors(
|
|
2013
|
+
media=scaled_data0.media,
|
|
2014
|
+
reach=scaled_data0.reach,
|
|
2015
|
+
frequency=scaled_data0.frequency,
|
|
2016
|
+
organic_media=scaled_data0.organic_media,
|
|
2017
|
+
organic_reach=scaled_data0.organic_reach,
|
|
2018
|
+
organic_frequency=scaled_data0.organic_frequency,
|
|
2019
|
+
revenue_per_kpi=scaled_data0.revenue_per_kpi,
|
|
2020
|
+
non_media_treatments=non_media_treatments0,
|
|
2021
|
+
)
|
|
2022
|
+
|
|
2039
2023
|
data_tensors1 = self._get_scaled_data_tensors(
|
|
2040
2024
|
new_data=incremented_data1,
|
|
2041
2025
|
include_non_paid_channels=include_non_paid_channels,
|
|
@@ -2062,7 +2046,9 @@ class Analyzer:
|
|
|
2062
2046
|
incremental_outcome_kwargs = {
|
|
2063
2047
|
"inverse_transform_outcome": inverse_transform_outcome,
|
|
2064
2048
|
"use_kpi": use_kpi,
|
|
2065
|
-
"
|
|
2049
|
+
"non_media_treatments_baseline_normalized": (
|
|
2050
|
+
non_media_treatments_baseline_normalized
|
|
2051
|
+
),
|
|
2066
2052
|
}
|
|
2067
2053
|
for i, start_index in enumerate(batch_starting_indices):
|
|
2068
2054
|
stop_index = np.min([n_draws, start_index + batch_size])
|
|
@@ -2538,7 +2524,7 @@ class Analyzer:
|
|
|
2538
2524
|
aggregate_geos: bool = False,
|
|
2539
2525
|
aggregate_times: bool = False,
|
|
2540
2526
|
split_by_holdout_id: bool = False,
|
|
2541
|
-
non_media_baseline_values: Sequence[
|
|
2527
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2542
2528
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
2543
2529
|
) -> xr.Dataset:
|
|
2544
2530
|
"""Calculates the data for the expected versus actual outcome over time.
|
|
@@ -2550,19 +2536,20 @@ class Analyzer:
|
|
|
2550
2536
|
are summed over all of the time periods.
|
|
2551
2537
|
split_by_holdout_id: Boolean. If `True` and `holdout_id` exists, the data
|
|
2552
2538
|
is split into `'Train'`, `'Test'`, and `'All Data'` subsections.
|
|
2553
|
-
non_media_baseline_values: Optional list of shape
|
|
2554
|
-
Each element is
|
|
2555
|
-
used as baseline for the given channel
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
channel.
|
|
2539
|
+
non_media_baseline_values: Optional list of shape
|
|
2540
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2541
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2542
|
+
expected that they are scaled by population for the channels where
|
|
2543
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2544
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2545
|
+
minimum value for each non_media treatment channel.
|
|
2560
2546
|
confidence_level: Confidence level for expected outcome credible
|
|
2561
2547
|
intervals, represented as a value between zero and one. Default: `0.9`.
|
|
2562
2548
|
|
|
2563
2549
|
Returns:
|
|
2564
2550
|
A dataset with the expected, baseline, and actual outcome metrics.
|
|
2565
2551
|
"""
|
|
2552
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
2566
2553
|
mmm = self._meridian
|
|
2567
2554
|
use_kpi = self._meridian.input_data.revenue_per_kpi is None
|
|
2568
2555
|
can_split_by_holdout = self._can_split_by_holdout_id(split_by_holdout_id)
|
|
@@ -2632,7 +2619,7 @@ class Analyzer:
|
|
|
2632
2619
|
|
|
2633
2620
|
def _calculate_baseline_expected_outcome(
|
|
2634
2621
|
self,
|
|
2635
|
-
non_media_baseline_values: Sequence[
|
|
2622
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2636
2623
|
**expected_outcome_kwargs,
|
|
2637
2624
|
) -> tf.Tensor:
|
|
2638
2625
|
"""Calculates either the posterior or prior expected outcome of baseline.
|
|
@@ -2644,20 +2631,19 @@ class Analyzer:
|
|
|
2644
2631
|
3) `new_organic_media` is set to all zeros
|
|
2645
2632
|
4) `new_organic_reach` is set to all zeros
|
|
2646
2633
|
5) `new_non_media_treatments` is set to the counterfactual values
|
|
2647
|
-
according to the
|
|
2648
|
-
`non_media_baseline_values` argument
|
|
2634
|
+
according to the `non_media_baseline_values` argument
|
|
2649
2635
|
6) `new_controls` are set to historical values
|
|
2650
2636
|
|
|
2651
2637
|
All other arguments of `expected_outcome` can be passed to this method.
|
|
2652
2638
|
|
|
2653
2639
|
Args:
|
|
2654
|
-
non_media_baseline_values: Optional list of shape
|
|
2655
|
-
Each element is
|
|
2656
|
-
used as baseline for the given channel
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
channel.
|
|
2640
|
+
non_media_baseline_values: Optional list of shape
|
|
2641
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2642
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2643
|
+
expected that they are scaled by population for the channels where
|
|
2644
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2645
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2646
|
+
minimum value for each non_media treatment channel.
|
|
2661
2647
|
**expected_outcome_kwargs: kwargs to pass to `expected_outcome`, which
|
|
2662
2648
|
could contain use_posterior, selected_geos, selected_times,
|
|
2663
2649
|
aggregate_geos, aggregate_times, inverse_transform_outcome, use_kpi,
|
|
@@ -2690,10 +2676,27 @@ class Analyzer:
|
|
|
2690
2676
|
else None
|
|
2691
2677
|
)
|
|
2692
2678
|
if self._meridian.non_media_treatments is not None:
|
|
2693
|
-
|
|
2694
|
-
|
|
2679
|
+
if self._meridian.model_spec.non_media_population_scaling_id is not None:
|
|
2680
|
+
scaling_factors = tf.where(
|
|
2681
|
+
self._meridian.model_spec.non_media_population_scaling_id,
|
|
2682
|
+
self._meridian.population[:, tf.newaxis, tf.newaxis],
|
|
2683
|
+
tf.ones_like(self._meridian.population)[:, tf.newaxis, tf.newaxis],
|
|
2684
|
+
)
|
|
2685
|
+
else:
|
|
2686
|
+
scaling_factors = tf.ones_like(self._meridian.population)[
|
|
2687
|
+
:, tf.newaxis, tf.newaxis
|
|
2688
|
+
]
|
|
2689
|
+
|
|
2690
|
+
baseline = self._meridian.compute_non_media_treatments_baseline(
|
|
2695
2691
|
non_media_baseline_values=non_media_baseline_values,
|
|
2696
2692
|
)
|
|
2693
|
+
new_non_media_treatments_population_scaled = tf.broadcast_to(
|
|
2694
|
+
tf.constant(baseline, dtype=tf.float32)[tf.newaxis, tf.newaxis, :],
|
|
2695
|
+
self._meridian.non_media_treatments.shape,
|
|
2696
|
+
)
|
|
2697
|
+
new_non_media_treatments = (
|
|
2698
|
+
new_non_media_treatments_population_scaled * scaling_factors
|
|
2699
|
+
)
|
|
2697
2700
|
else:
|
|
2698
2701
|
new_non_media_treatments = None
|
|
2699
2702
|
new_controls = self._meridian.controls
|
|
@@ -2714,7 +2717,7 @@ class Analyzer:
|
|
|
2714
2717
|
new_data: DataTensors | None = None,
|
|
2715
2718
|
use_kpi: bool | None = None,
|
|
2716
2719
|
include_non_paid_channels: bool = True,
|
|
2717
|
-
non_media_baseline_values: Sequence[
|
|
2720
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2718
2721
|
**kwargs,
|
|
2719
2722
|
) -> tf.Tensor:
|
|
2720
2723
|
"""Aggregates the incremental outcome of the media channels.
|
|
@@ -2742,13 +2745,13 @@ class Analyzer:
|
|
|
2742
2745
|
include_non_paid_channels: Boolean. If `True`, then non-media treatments
|
|
2743
2746
|
and organic effects are included in the calculation. If `False`, then
|
|
2744
2747
|
only the paid media and RF effects are included.
|
|
2745
|
-
non_media_baseline_values: Optional list of shape
|
|
2746
|
-
Each element is
|
|
2747
|
-
used as baseline for the given channel
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
each non_media
|
|
2748
|
+
non_media_baseline_values: Optional list of shape
|
|
2749
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2750
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2751
|
+
expected that they are scaled by population for the channels where
|
|
2752
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2753
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2754
|
+
minimum value for each non_media treatment channel.
|
|
2752
2755
|
**kwargs: kwargs to pass to `incremental_outcome`, which could contain
|
|
2753
2756
|
selected_geos, selected_times, aggregate_geos, aggregate_times,
|
|
2754
2757
|
batch_size.
|
|
@@ -2758,6 +2761,7 @@ class Analyzer:
|
|
|
2758
2761
|
of the channel dimension is incremented by one, with the new component at
|
|
2759
2762
|
the end containing the total incremental outcome of all channels.
|
|
2760
2763
|
"""
|
|
2764
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
2761
2765
|
use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
|
|
2762
2766
|
incremental_outcome_m = self.incremental_outcome(
|
|
2763
2767
|
use_posterior=use_posterior,
|
|
@@ -2790,7 +2794,7 @@ class Analyzer:
|
|
|
2790
2794
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
2791
2795
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
2792
2796
|
include_non_paid_channels: bool = False,
|
|
2793
|
-
non_media_baseline_values: Sequence[
|
|
2797
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
2794
2798
|
) -> xr.Dataset:
|
|
2795
2799
|
"""Returns summary metrics.
|
|
2796
2800
|
|
|
@@ -2866,13 +2870,13 @@ class Analyzer:
|
|
|
2866
2870
|
reported. If `False`, only the paid channels (media, reach and
|
|
2867
2871
|
frequency) are included but the summary contains also the metrics
|
|
2868
2872
|
dependent on spend. Default: `False`.
|
|
2869
|
-
non_media_baseline_values: Optional list of shape
|
|
2870
|
-
Each element is
|
|
2871
|
-
used as baseline for the given channel
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
channel.
|
|
2873
|
+
non_media_baseline_values: Optional list of shape
|
|
2874
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
2875
|
+
fixed value will be used as baseline for the given channel. It is
|
|
2876
|
+
expected that they are scaled by population for the channels where
|
|
2877
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
2878
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
2879
|
+
minimum value for each non_media treatment channel.
|
|
2876
2880
|
|
|
2877
2881
|
Returns:
|
|
2878
2882
|
An `xr.Dataset` with coordinates: `channel`, `metric` (`mean`, `median`,
|
|
@@ -2886,6 +2890,7 @@ class Analyzer:
|
|
|
2886
2890
|
when `aggregate_times=False` because they do not have a clear
|
|
2887
2891
|
interpretation by time period.
|
|
2888
2892
|
"""
|
|
2893
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
2889
2894
|
dim_kwargs = {
|
|
2890
2895
|
"selected_geos": selected_geos,
|
|
2891
2896
|
"selected_times": selected_times,
|
|
@@ -3274,7 +3279,7 @@ class Analyzer:
|
|
|
3274
3279
|
selected_times: Sequence[str] | None = None,
|
|
3275
3280
|
aggregate_geos: bool = True,
|
|
3276
3281
|
aggregate_times: bool = True,
|
|
3277
|
-
non_media_baseline_values: Sequence[float
|
|
3282
|
+
non_media_baseline_values: Sequence[float] | None = None,
|
|
3278
3283
|
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
|
|
3279
3284
|
batch_size: int = constants.DEFAULT_BATCH_SIZE,
|
|
3280
3285
|
) -> xr.Dataset:
|
|
@@ -3289,13 +3294,13 @@ class Analyzer:
|
|
|
3289
3294
|
all of the regions.
|
|
3290
3295
|
aggregate_times: Boolean. If `True`, the expected outcome is summed over
|
|
3291
3296
|
all of the time periods.
|
|
3292
|
-
non_media_baseline_values: Optional list of shape
|
|
3293
|
-
Each element is
|
|
3294
|
-
used as baseline for the given channel
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
channel.
|
|
3297
|
+
non_media_baseline_values: Optional list of shape
|
|
3298
|
+
`(n_non_media_channels,)`. Each element is a float which means that the
|
|
3299
|
+
fixed value will be used as baseline for the given channel. It is
|
|
3300
|
+
expected that they are scaled by population for the channels where
|
|
3301
|
+
`model_spec.non_media_population_scaling_id` is `True`. If `None`, the
|
|
3302
|
+
`model_spec.non_media_baseline_values` is used, which defaults to the
|
|
3303
|
+
minimum value for each non_media treatment channel.
|
|
3299
3304
|
confidence_level: Confidence level for media summary metrics credible
|
|
3300
3305
|
intervals, represented as a value between zero and one.
|
|
3301
3306
|
batch_size: Integer representing the maximum draws per chain in each
|
|
@@ -3308,6 +3313,7 @@ class Analyzer:
|
|
|
3308
3313
|
`ci_low`,`ci_high`),`distribution` (prior, posterior) and contains the
|
|
3309
3314
|
following data variables: `baseline_outcome`, `pct_of_contribution`.
|
|
3310
3315
|
"""
|
|
3316
|
+
_validate_non_media_baseline_values_numbers(non_media_baseline_values)
|
|
3311
3317
|
# TODO: Change "pct_of_contribution" to a more accurate term.
|
|
3312
3318
|
|
|
3313
3319
|
use_kpi = self._meridian.input_data.revenue_per_kpi is None
|