google-meridian 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@
16
16
 
17
17
  from collections.abc import Mapping, Sequence
18
18
  import itertools
19
+ import numbers
19
20
  from typing import Any, Optional
20
21
  import warnings
21
22
 
@@ -37,6 +38,20 @@ __all__ = [
37
38
  ]
38
39
 
39
40
 
41
+ def _validate_non_media_baseline_values_numbers(
42
+ non_media_baseline_values: Sequence[str | float] | None,
43
+ ):
44
+ if non_media_baseline_values is None:
45
+ return
46
+
47
+ for value in non_media_baseline_values:
48
+ if not isinstance(value, numbers.Number):
49
+ raise ValueError(
50
+ f"Invalid `non_media_baseline_values` value: '{value}'. Only float"
51
+ " numbers are supported."
52
+ )
53
+
54
+
40
55
  # TODO: Refactor the related unit tests to be under DataTensors.
41
56
  class DataTensors(tf.experimental.ExtensionType):
42
57
  """Container for data variable arguments of Analyzer methods.
@@ -181,6 +196,7 @@ class DataTensors(tf.experimental.ExtensionType):
181
196
  new_tensor is not None
182
197
  and old_tensor is not None
183
198
  and new_tensor.ndim > 1
199
+ and old_tensor.ndim > 1
184
200
  and new_tensor.shape[1] != old_tensor.shape[1]
185
201
  ):
186
202
  return new_tensor.shape[1]
@@ -653,22 +669,16 @@ def _scale_tensors_by_multiplier(
653
669
  data: DataTensors,
654
670
  multiplier: float,
655
671
  by_reach: bool,
656
- non_media_treatments_baseline: tf.Tensor | None = None,
657
672
  ) -> DataTensors:
658
673
  """Get scaled tensors for incremental outcome calculation.
659
674
 
660
675
  Args:
661
676
  data: DataTensors object containing the optional tensors to scale. Only
662
- `media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
663
- `organic_frequency`, `non_media_treatments` are scaled. The other tensors
664
- remain unchanged.
677
+ `media`, `reach`, `frequency`, `organic_media`, `organic_reach`, and
678
+ `organic_frequency` are scaled. The other tensors remain unchanged.
665
679
  multiplier: Float indicating the factor to scale tensors by.
666
680
  by_reach: Boolean indicating whether to scale reach or frequency when rf
667
681
  data is available.
668
- non_media_treatments_baseline: Optional tensor to overwrite
669
- `data.non_media_treatments` in the output. Used to compute the
670
- conterfactual values for incremental outcome calculation. If not used, the
671
- unmodified `data.non_media_treatments` tensor is returned in the output.
672
682
 
673
683
  Returns:
674
684
  A `DataTensors` object containing scaled tensor parameters. The original
@@ -697,14 +707,9 @@ def _scale_tensors_by_multiplier(
697
707
  incremented_data[constants.ORGANIC_FREQUENCY] = (
698
708
  data.organic_frequency * multiplier
699
709
  )
700
- if non_media_treatments_baseline is not None:
701
- incremented_data[constants.NON_MEDIA_TREATMENTS] = (
702
- non_media_treatments_baseline
703
- )
704
- else:
705
- incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
706
710
 
707
711
  # Include the original data that does not get scaled.
712
+ incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
708
713
  incremented_data[constants.MEDIA_SPEND] = data.media_spend
709
714
  incremented_data[constants.RF_SPEND] = data.rf_spend
710
715
  incremented_data[constants.CONTROLS] = data.controls
@@ -754,79 +759,6 @@ def _central_tendency_and_ci_by_prior_and_posterior(
754
759
  return xr.Dataset(data_vars=xr_data, coords=xr_coords)
755
760
 
756
761
 
757
- def _compute_non_media_baseline(
758
- non_media_treatments: tf.Tensor,
759
- non_media_baseline_values: Sequence[float | str] | None = None,
760
- non_media_selected_times: Sequence[bool] | None = None,
761
- ) -> tf.Tensor:
762
- """Computes the baseline for each non-media treatment channel.
763
-
764
- Args:
765
- non_media_treatments: The non-media treatment input data.
766
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
767
- Each element is either a float (which means that the fixed value will be
768
- used as baseline for the given channel) or one of the strings "min" or
769
- "max" (which mean that the global minimum or maximum value will be used as
770
- baseline for the values of the given non_media treatment channel). If
771
- None, the minimum value is used as baseline for each non_media treatment
772
- channel.
773
- non_media_selected_times: Optional list of shape (n_times,). Each element is
774
- a boolean indicating whether the corresponding time period should be
775
- included in the baseline computation.
776
-
777
- Returns:
778
- A tensor of shape (n_geos, n_times, n_non_media_channels) containing the
779
- baseline values for each non-media treatment channel.
780
- """
781
-
782
- if non_media_selected_times is None:
783
- non_media_selected_times = [True] * non_media_treatments.shape[-2]
784
-
785
- if non_media_baseline_values is None:
786
- # If non_media_baseline_values is not provided, use the minimum value for
787
- # each non_media treatment channel as the baseline.
788
- non_media_baseline_values_filled = [
789
- constants.NON_MEDIA_BASELINE_MIN
790
- ] * non_media_treatments.shape[-1]
791
- else:
792
- non_media_baseline_values_filled = non_media_baseline_values
793
-
794
- if non_media_treatments.shape[-1] != len(non_media_baseline_values_filled):
795
- raise ValueError(
796
- "The number of non-media channels"
797
- f" ({non_media_treatments.shape[-1]}) does not match the number"
798
- f" of baseline types ({len(non_media_baseline_values_filled)})."
799
- )
800
-
801
- baseline_list = []
802
- for channel in range(non_media_treatments.shape[-1]):
803
- baseline_value = non_media_baseline_values_filled[channel]
804
-
805
- if baseline_value == constants.NON_MEDIA_BASELINE_MIN:
806
- baseline_for_channel = tf.reduce_min(
807
- non_media_treatments[..., channel], axis=[0, 1]
808
- )
809
- elif baseline_value == constants.NON_MEDIA_BASELINE_MAX:
810
- baseline_for_channel = tf.reduce_max(
811
- non_media_treatments[..., channel], axis=[0, 1]
812
- )
813
- elif isinstance(baseline_value, float):
814
- baseline_for_channel = tf.cast(baseline_value, tf.float32)
815
- else:
816
- raise ValueError(
817
- f"Invalid non_media_baseline_values value: '{baseline_value}'. Only"
818
- " float numbers and strings 'min' and 'max' are supported."
819
- )
820
-
821
- baseline_list.append(
822
- baseline_for_channel
823
- * tf.ones_like(non_media_treatments[..., channel])
824
- * non_media_selected_times
825
- )
826
-
827
- return tf.stack(baseline_list, axis=-1)
828
-
829
-
830
762
  class Analyzer:
831
763
  """Runs calculations to analyze the raw data after fitting the model."""
832
764
 
@@ -853,7 +785,7 @@ class Analyzer:
853
785
  `media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
854
786
  `organic_frequency`, `non_media_treatments`, `controls`. The `media`,
855
787
  `reach`, `organic_media`, `organic_reach` and `non_media_treatments`
856
- tensors are assumed to be scaled by their corresponding transformers.
788
+ tensors are expected to be scaled by their corresponding transformers.
857
789
  dist_tensors: A `DistributionTensors` container with the distribution
858
790
  tensors for media, RF, organic media, organic RF, non-media treatments,
859
791
  and controls.
@@ -1064,7 +996,7 @@ class Analyzer:
1064
996
  organic_media=self._meridian.organic_media_tensors.organic_media_scaled,
1065
997
  organic_reach=self._meridian.organic_rf_tensors.organic_reach_scaled,
1066
998
  organic_frequency=self._meridian.organic_rf_tensors.organic_frequency,
1067
- non_media_treatments=self._meridian.non_media_treatments_scaled,
999
+ non_media_treatments=self._meridian.non_media_treatments_normalized,
1068
1000
  controls=self._meridian.controls_scaled,
1069
1001
  revenue_per_kpi=self._meridian.revenue_per_kpi,
1070
1002
  )
@@ -1113,10 +1045,10 @@ class Analyzer:
1113
1045
  if new_data.organic_frequency is not None
1114
1046
  else self._meridian.organic_rf_tensors.organic_frequency
1115
1047
  )
1116
- non_media_treatments_scaled = _transformed_new_or_scaled(
1048
+ non_media_treatments_normalized = _transformed_new_or_scaled(
1117
1049
  new_variable=new_data.non_media_treatments,
1118
1050
  transformer=self._meridian.non_media_transformer,
1119
- scaled_variable=self._meridian.non_media_treatments_scaled,
1051
+ scaled_variable=self._meridian.non_media_treatments_normalized,
1120
1052
  )
1121
1053
  return DataTensors(
1122
1054
  media=media_scaled,
@@ -1125,7 +1057,7 @@ class Analyzer:
1125
1057
  organic_media=organic_media_scaled,
1126
1058
  organic_reach=organic_reach_scaled,
1127
1059
  organic_frequency=organic_frequency,
1128
- non_media_treatments=non_media_treatments_scaled,
1060
+ non_media_treatments=non_media_treatments_normalized,
1129
1061
  controls=controls_scaled,
1130
1062
  revenue_per_kpi=revenue_per_kpi,
1131
1063
  )
@@ -1594,7 +1526,7 @@ class Analyzer:
1594
1526
  self,
1595
1527
  data_tensors: DataTensors,
1596
1528
  dist_tensors: DistributionTensors,
1597
- non_media_baseline_values: Sequence[float | str] | None = None,
1529
+ non_media_treatments_baseline_normalized: Sequence[float] | None = None,
1598
1530
  ) -> tf.Tensor:
1599
1531
  """Computes incremental KPI distribution.
1600
1532
 
@@ -1608,17 +1540,26 @@ class Analyzer:
1608
1540
  dist_tensors: A `DistributionTensors` container with the distribution
1609
1541
  tensors for media, RF, organic media, organic RF and non-media
1610
1542
  treatments channels.
1611
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
1612
- Each element is either a float (which means that the fixed value will be
1613
- used as baseline for the given channel) or one of the strings "min" or
1614
- "max" (which mean that the global minimum or maximum value will be used
1615
- as baseline for the scaled values of the given non_media treatments
1616
- channel). If None, the minimum value is used as baseline for each
1617
- non_media treatments channel.
1543
+ non_media_treatments_baseline_normalized: Optional list of shape
1544
+ `(n_non_media_channels,)`. Each element is a float that will be used as
1545
+ baseline for the given channel. The values are expected to be scaled by
1546
+ population for channels where
1547
+ `model_spec.non_media_population_scaling_id` is `True` and normalized by
1548
+ centering and scaling using means and standard deviations. This argument
1549
+ is required if the data contains non-media treatments.
1618
1550
 
1619
1551
  Returns:
1620
1552
  Tensor of incremental KPI distribution.
1621
1553
  """
1554
+ if (
1555
+ data_tensors.non_media_treatments is not None
1556
+ and non_media_treatments_baseline_normalized is None
1557
+ ):
1558
+ raise ValueError(
1559
+ "`non_media_treatments_baseline_normalized` must be passed to"
1560
+ " `_get_incremental_kpi` when `non_media_treatments` data is"
1561
+ " present."
1562
+ )
1622
1563
  n_media_times = self._meridian.n_media_times
1623
1564
  if data_tensors.media is not None:
1624
1565
  n_times = data_tensors.media.shape[1] # pytype: disable=attribute-error
@@ -1641,13 +1582,10 @@ class Analyzer:
1641
1582
  combined_beta,
1642
1583
  )
1643
1584
  if data_tensors.non_media_treatments is not None:
1644
- non_media_scaled_baseline = _compute_non_media_baseline(
1645
- non_media_treatments=data_tensors.non_media_treatments,
1646
- non_media_baseline_values=non_media_baseline_values,
1647
- )
1648
1585
  non_media_kpi = tf.einsum(
1649
1586
  "gtn,...gn->...gtn",
1650
- data_tensors.non_media_treatments - non_media_scaled_baseline,
1587
+ data_tensors.non_media_treatments
1588
+ - non_media_treatments_baseline_normalized,
1651
1589
  dist_tensors.gamma_gn,
1652
1590
  )
1653
1591
  return tf.concat([combined_media_kpi, non_media_kpi], axis=-1)
@@ -1697,7 +1635,7 @@ class Analyzer:
1697
1635
  self,
1698
1636
  data_tensors: DataTensors,
1699
1637
  dist_tensors: DistributionTensors,
1700
- non_media_baseline_values: Sequence[float | str] | None = None,
1638
+ non_media_treatments_baseline_normalized: Sequence[float] | None = None,
1701
1639
  inverse_transform_outcome: bool | None = None,
1702
1640
  use_kpi: bool | None = None,
1703
1641
  selected_geos: Sequence[str] | None = None,
@@ -1722,20 +1660,21 @@ class Analyzer:
1722
1660
  poulation. Shape (n_geos x T x n_organic_rf_channels), for any time
1723
1661
  dimension T. `organic_frequency`: `organic frequency data` with shape
1724
1662
  (n_geos x T x n_organic_rf_channels), for any time dimension T.
1725
- `non_media_treatments`: `non_media_treatments` data with shape (n_geos x
1726
- T x n_non_media_channels), for any time dimension T. `revenue_per_kpi`:
1727
- Contains revenue per kpi data with shape `(n_geos x T)`, for any time
1728
- dimension `T`.
1729
- dist_tensors: A `DistributionTensors` container with the distribution
1730
- tensors for media, RF, organic media, organic RF and non-media treatments
1731
- channels.
1732
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
1733
- Each element is either a float (which means that the fixed value will be
1734
- used as baseline for the given channel) or one of the strings "min" or
1735
- "max" (which mean that the global minimum or maximum value will be used
1736
- as baseline for the scaled values of the given non_media treatments
1737
- channel). If None, the minimum value is used as baseline for each
1738
- non_media treatments channel.
1663
+ `non_media_treatments`: `non_media_treatments` data scaled by population
1664
+ for the selected channels and normalized by means and standard
1665
+ deviations with shape (n_geos x T x n_non_media_channels), for any time
1666
+ dimension T. `revenue_per_kpi`: Contains revenue per kpi data with shape
1667
+ `(n_geos x T)`, for any time dimension `T`.
1668
+ dist_tensors: A `DistributionTensors` container with the distribution
1669
+ tensors for media, RF, organic media, organic RF and non-media
1670
+ treatments channels.
1671
+ non_media_treatments_baseline_normalized: Optional list of shape
1672
+ `(n_non_media_channels,)`. Each element is a float that will be used as
1673
+ baseline for the given channel. The values are expected to be scaled by
1674
+ population for channels where
1675
+ `model_spec.non_media_population_scaling_id` is `True` and normalized by
1676
+ centering and scaling using means and standard deviations. This argument
1677
+ is required if the data contains non-media treatments.
1739
1678
  inverse_transform_outcome: Boolean. If `True`, returns the expected
1740
1679
  outcome in the original KPI or revenue (depending on what is passed to
1741
1680
  `use_kpi`), as it was passed to `InputData`. If False, returns the
@@ -1760,10 +1699,20 @@ class Analyzer:
1760
1699
  Tensor containing the incremental outcome distribution.
1761
1700
  """
1762
1701
  self._check_revenue_data_exists(use_kpi)
1702
+ if (
1703
+ data_tensors.non_media_treatments is not None
1704
+ and non_media_treatments_baseline_normalized is None
1705
+ ):
1706
+ raise ValueError(
1707
+ "`non_media_treatments_baseline_normalized` must be passed to"
1708
+ " `_incremental_outcome_impl` when `non_media_treatments` data is"
1709
+ " present."
1710
+ )
1711
+
1763
1712
  transformed_outcome = self._get_incremental_kpi(
1764
1713
  data_tensors=data_tensors,
1765
1714
  dist_tensors=dist_tensors,
1766
- non_media_baseline_values=non_media_baseline_values,
1715
+ non_media_treatments_baseline_normalized=non_media_treatments_baseline_normalized,
1767
1716
  )
1768
1717
  if inverse_transform_outcome:
1769
1718
  incremental_outcome = self._inverse_outcome(
@@ -1787,7 +1736,7 @@ class Analyzer:
1787
1736
  self,
1788
1737
  use_posterior: bool = True,
1789
1738
  new_data: DataTensors | None = None,
1790
- non_media_baseline_values: Sequence[float | str] | None = None,
1739
+ non_media_baseline_values: Sequence[float] | None = None,
1791
1740
  scaling_factor0: float = 0.0,
1792
1741
  scaling_factor1: float = 1.0,
1793
1742
  selected_geos: Sequence[str] | None = None,
@@ -1806,15 +1755,26 @@ class Analyzer:
1806
1755
  This calculates the media outcome of each media channel for each posterior
1807
1756
  or prior parameter draw. Incremental outcome is defined as:
1808
1757
 
1809
- `E(Outcome|Media_1, Controls)` minus `E(Outcome|Media_0, Controls)`
1758
+ `E(Outcome|Treatment_1, Controls)` minus `E(Outcome|Treatment_0, Controls)`
1810
1759
 
1811
- Here, `Media_1` means that media execution for a given channel is multiplied
1812
- by `scaling_factor1` (1.0 by default) for the set of time periods specified
1813
- by `media_selected_times`. Similarly, `Media_0` means that media execution
1814
- is multiplied by `scaling_factor0` (0.0 by default) for these time periods.
1760
+ For paid & organic channels (without reach and frequency data),
1761
+ `Treatment_1` means that media execution for a given channel is multiplied
1762
+ by
1763
+ `scaling_factor1` (1.0 by default) for the set of time periods specified
1764
+ by `media_selected_times`. Similarly, `Treatment_0` means that media
1765
+ execution is multiplied by `scaling_factor0` (0.0 by default) for these time
1766
+ periods.
1767
+
1768
+ For paid & organic channels with reach and frequency data, either reach or
1769
+ frequency is held fixed while the other is scaled, depending on the
1770
+ `by_reach` argument.
1771
+
1772
+ For non-media treatments, `Treatment_1` means that the variable is set to
1773
+ historical values. `Treatment_0` means that the variable is set to its
1774
+ baseline value for all geos and time periods. Note that the scaling factors
1775
+ (`scaling_factor0` and `scaling_factor1`) are not applicable to non-media
1776
+ treatments.
1815
1777
 
1816
- For channels with reach and frequency data, either reach or frequency is
1817
- held fixed while the other is scaled, depending on the `by_reach` argument.
1818
1778
  "Outcome" refers to either `revenue` if `use_kpi=False`, or `kpi` if
1819
1779
  `use_kpi=True`. When `revenue_per_kpi` is not defined, `use_kpi` cannot be
1820
1780
  False.
@@ -1856,13 +1816,13 @@ class Analyzer:
1856
1816
  any of the tensors in `new_data` is provided with a different number of
1857
1817
  time periods than in `InputData`, then all tensors must be provided with
1858
1818
  the same number of time periods.
1859
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
1860
- Each element is either a float (which means that the fixed value will be
1861
- used as baseline for the given channel) or one of the strings "min" or
1862
- "max" (which mean that the global minimum or maximum value will be used
1863
- as baseline for the scaled values of the given non_media treatments
1864
- channel). If not provided, the minimum value is used as the baseline for
1865
- each non_media treatments channel.
1819
+ non_media_baseline_values: Optional list of shape
1820
+ `(n_non_media_channels,)`. Each element is a float which means that the
1821
+ fixed value will be used as baseline for the given channel. It is
1822
+ expected that they are scaled by population for the channels where
1823
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
1824
+ `model_spec.non_media_baseline_values` is used, which defaults to the
1825
+ minimum value for each non_media treatment channel.
1866
1826
  scaling_factor0: Float. The factor by which to scale the counterfactual
1867
1827
  scenario "Media_0" during the time periods specified in
1868
1828
  `media_selected_times`. Must be non-negative and less than
@@ -1944,6 +1904,7 @@ class Analyzer:
1944
1904
  aggregate_geos=aggregate_geos,
1945
1905
  selected_geos=selected_geos,
1946
1906
  )
1907
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
1947
1908
  dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
1948
1909
 
1949
1910
  if dist_type not in mmm.inference_data.groups():
@@ -2002,7 +1963,6 @@ class Analyzer:
2002
1963
  media_selected_times = [
2003
1964
  x in media_selected_times for x in mmm.input_data.media_time
2004
1965
  ]
2005
- non_media_selected_times = media_selected_times[-mmm.n_times :]
2006
1966
 
2007
1967
  # Set counterfactual tensors based on the scaling factors and the media
2008
1968
  # selected times.
@@ -2014,28 +1974,52 @@ class Analyzer:
2014
1974
  )[:, None]
2015
1975
 
2016
1976
  if data_tensors.non_media_treatments is not None:
2017
- new_non_media_treatments0 = _compute_non_media_baseline(
2018
- non_media_treatments=data_tensors.non_media_treatments,
2019
- non_media_baseline_values=non_media_baseline_values,
2020
- non_media_selected_times=non_media_selected_times,
1977
+ non_media_treatments_baseline_scaled = (
1978
+ self._meridian.compute_non_media_treatments_baseline(
1979
+ non_media_baseline_values=non_media_baseline_values,
1980
+ )
1981
+ )
1982
+ non_media_treatments_baseline_normalized = self._meridian.non_media_transformer.forward( # pytype: disable=attribute-error
1983
+ non_media_treatments_baseline_scaled,
1984
+ apply_population_scaling=False,
1985
+ )
1986
+ non_media_treatments0 = tf.broadcast_to(
1987
+ tf.constant(
1988
+ non_media_treatments_baseline_normalized, dtype=tf.float32
1989
+ )[tf.newaxis, tf.newaxis, :],
1990
+ self._meridian.non_media_treatments.shape, # pytype: disable=attribute-error
2021
1991
  )
2022
1992
  else:
2023
- new_non_media_treatments0 = None
1993
+ non_media_treatments_baseline_normalized = None
1994
+ non_media_treatments0 = None
2024
1995
 
2025
1996
  incremented_data0 = _scale_tensors_by_multiplier(
2026
1997
  data=data_tensors,
2027
1998
  multiplier=counterfactual0,
2028
1999
  by_reach=by_reach,
2029
- non_media_treatments_baseline=new_non_media_treatments0,
2030
2000
  )
2031
2001
  incremented_data1 = _scale_tensors_by_multiplier(
2032
2002
  data=data_tensors, multiplier=counterfactual1, by_reach=by_reach
2033
2003
  )
2034
2004
 
2035
- data_tensors0 = self._get_scaled_data_tensors(
2005
+ scaled_data0 = self._get_scaled_data_tensors(
2036
2006
  new_data=incremented_data0,
2037
2007
  include_non_paid_channels=include_non_paid_channels,
2038
2008
  )
2009
+ # TODO: b/415198977 - Verify the computation of outcome of non-media
2010
+ # treatments with `media_selected_times` and scale factors.
2011
+
2012
+ data_tensors0 = DataTensors(
2013
+ media=scaled_data0.media,
2014
+ reach=scaled_data0.reach,
2015
+ frequency=scaled_data0.frequency,
2016
+ organic_media=scaled_data0.organic_media,
2017
+ organic_reach=scaled_data0.organic_reach,
2018
+ organic_frequency=scaled_data0.organic_frequency,
2019
+ revenue_per_kpi=scaled_data0.revenue_per_kpi,
2020
+ non_media_treatments=non_media_treatments0,
2021
+ )
2022
+
2039
2023
  data_tensors1 = self._get_scaled_data_tensors(
2040
2024
  new_data=incremented_data1,
2041
2025
  include_non_paid_channels=include_non_paid_channels,
@@ -2062,7 +2046,9 @@ class Analyzer:
2062
2046
  incremental_outcome_kwargs = {
2063
2047
  "inverse_transform_outcome": inverse_transform_outcome,
2064
2048
  "use_kpi": use_kpi,
2065
- "non_media_baseline_values": non_media_baseline_values,
2049
+ "non_media_treatments_baseline_normalized": (
2050
+ non_media_treatments_baseline_normalized
2051
+ ),
2066
2052
  }
2067
2053
  for i, start_index in enumerate(batch_starting_indices):
2068
2054
  stop_index = np.min([n_draws, start_index + batch_size])
@@ -2538,7 +2524,7 @@ class Analyzer:
2538
2524
  aggregate_geos: bool = False,
2539
2525
  aggregate_times: bool = False,
2540
2526
  split_by_holdout_id: bool = False,
2541
- non_media_baseline_values: Sequence[str | float] | None = None,
2527
+ non_media_baseline_values: Sequence[float] | None = None,
2542
2528
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
2543
2529
  ) -> xr.Dataset:
2544
2530
  """Calculates the data for the expected versus actual outcome over time.
@@ -2550,19 +2536,20 @@ class Analyzer:
2550
2536
  are summed over all of the time periods.
2551
2537
  split_by_holdout_id: Boolean. If `True` and `holdout_id` exists, the data
2552
2538
  is split into `'Train'`, `'Test'`, and `'All Data'` subsections.
2553
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2554
- Each element is either a float (which means that the fixed value will be
2555
- used as baseline for the given channel) or one of the strings "min" or
2556
- "max" (which mean that the global minimum or maximum value will be used
2557
- as baseline for the values of the given non_media treatment channel). If
2558
- None, the minimum value is used as baseline for each non_media treatment
2559
- channel.
2539
+ non_media_baseline_values: Optional list of shape
2540
+ `(n_non_media_channels,)`. Each element is a float which means that the
2541
+ fixed value will be used as baseline for the given channel. It is
2542
+ expected that they are scaled by population for the channels where
2543
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2544
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2545
+ minimum value for each non_media treatment channel.
2560
2546
  confidence_level: Confidence level for expected outcome credible
2561
2547
  intervals, represented as a value between zero and one. Default: `0.9`.
2562
2548
 
2563
2549
  Returns:
2564
2550
  A dataset with the expected, baseline, and actual outcome metrics.
2565
2551
  """
2552
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
2566
2553
  mmm = self._meridian
2567
2554
  use_kpi = self._meridian.input_data.revenue_per_kpi is None
2568
2555
  can_split_by_holdout = self._can_split_by_holdout_id(split_by_holdout_id)
@@ -2632,7 +2619,7 @@ class Analyzer:
2632
2619
 
2633
2620
  def _calculate_baseline_expected_outcome(
2634
2621
  self,
2635
- non_media_baseline_values: Sequence[str | float] | None = None,
2622
+ non_media_baseline_values: Sequence[float] | None = None,
2636
2623
  **expected_outcome_kwargs,
2637
2624
  ) -> tf.Tensor:
2638
2625
  """Calculates either the posterior or prior expected outcome of baseline.
@@ -2644,20 +2631,19 @@ class Analyzer:
2644
2631
  3) `new_organic_media` is set to all zeros
2645
2632
  4) `new_organic_reach` is set to all zeros
2646
2633
  5) `new_non_media_treatments` is set to the counterfactual values
2647
- according to the
2648
- `non_media_baseline_values` argument
2634
+ according to the `non_media_baseline_values` argument
2649
2635
  6) `new_controls` are set to historical values
2650
2636
 
2651
2637
  All other arguments of `expected_outcome` can be passed to this method.
2652
2638
 
2653
2639
  Args:
2654
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2655
- Each element is either a float (which means that the fixed value will be
2656
- used as baseline for the given channel) or one of the strings "min" or
2657
- "max" (which mean that the global minimum or maximum value will be used
2658
- as baseline for the values of the given non_media treatment channel). If
2659
- None, the minimum value is used as baseline for each non_media treatment
2660
- channel.
2640
+ non_media_baseline_values: Optional list of shape
2641
+ `(n_non_media_channels,)`. Each element is a float which means that the
2642
+ fixed value will be used as baseline for the given channel. It is
2643
+ expected that they are scaled by population for the channels where
2644
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2645
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2646
+ minimum value for each non_media treatment channel.
2661
2647
  **expected_outcome_kwargs: kwargs to pass to `expected_outcome`, which
2662
2648
  could contain use_posterior, selected_geos, selected_times,
2663
2649
  aggregate_geos, aggregate_times, inverse_transform_outcome, use_kpi,
@@ -2690,10 +2676,27 @@ class Analyzer:
2690
2676
  else None
2691
2677
  )
2692
2678
  if self._meridian.non_media_treatments is not None:
2693
- new_non_media_treatments = _compute_non_media_baseline(
2694
- non_media_treatments=self._meridian.non_media_treatments,
2679
+ if self._meridian.model_spec.non_media_population_scaling_id is not None:
2680
+ scaling_factors = tf.where(
2681
+ self._meridian.model_spec.non_media_population_scaling_id,
2682
+ self._meridian.population[:, tf.newaxis, tf.newaxis],
2683
+ tf.ones_like(self._meridian.population)[:, tf.newaxis, tf.newaxis],
2684
+ )
2685
+ else:
2686
+ scaling_factors = tf.ones_like(self._meridian.population)[
2687
+ :, tf.newaxis, tf.newaxis
2688
+ ]
2689
+
2690
+ baseline = self._meridian.compute_non_media_treatments_baseline(
2695
2691
  non_media_baseline_values=non_media_baseline_values,
2696
2692
  )
2693
+ new_non_media_treatments_population_scaled = tf.broadcast_to(
2694
+ tf.constant(baseline, dtype=tf.float32)[tf.newaxis, tf.newaxis, :],
2695
+ self._meridian.non_media_treatments.shape,
2696
+ )
2697
+ new_non_media_treatments = (
2698
+ new_non_media_treatments_population_scaled * scaling_factors
2699
+ )
2697
2700
  else:
2698
2701
  new_non_media_treatments = None
2699
2702
  new_controls = self._meridian.controls
@@ -2714,7 +2717,7 @@ class Analyzer:
2714
2717
  new_data: DataTensors | None = None,
2715
2718
  use_kpi: bool | None = None,
2716
2719
  include_non_paid_channels: bool = True,
2717
- non_media_baseline_values: Sequence[str | float] | None = None,
2720
+ non_media_baseline_values: Sequence[float] | None = None,
2718
2721
  **kwargs,
2719
2722
  ) -> tf.Tensor:
2720
2723
  """Aggregates the incremental outcome of the media channels.
@@ -2742,13 +2745,13 @@ class Analyzer:
2742
2745
  include_non_paid_channels: Boolean. If `True`, then non-media treatments
2743
2746
  and organic effects are included in the calculation. If `False`, then
2744
2747
  only the paid media and RF effects are included.
2745
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2746
- Each element is either a float (which means that the fixed value will be
2747
- used as baseline for the given channel) or one of the strings "min" or
2748
- "max" (which mean that the global minimum or maximum value will be used
2749
- as baseline for the scaled values of the given non_media treatments
2750
- channel). If not provided, the minimum value is used as the baseline for
2751
- each non_media treatments channel.
2748
+ non_media_baseline_values: Optional list of shape
2749
+ `(n_non_media_channels,)`. Each element is a float which means that the
2750
+ fixed value will be used as baseline for the given channel. It is
2751
+ expected that they are scaled by population for the channels where
2752
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2753
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2754
+ minimum value for each non_media treatment channel.
2752
2755
  **kwargs: kwargs to pass to `incremental_outcome`, which could contain
2753
2756
  selected_geos, selected_times, aggregate_geos, aggregate_times,
2754
2757
  batch_size.
@@ -2758,6 +2761,7 @@ class Analyzer:
2758
2761
  of the channel dimension is incremented by one, with the new component at
2759
2762
  the end containing the total incremental outcome of all channels.
2760
2763
  """
2764
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
2761
2765
  use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
2762
2766
  incremental_outcome_m = self.incremental_outcome(
2763
2767
  use_posterior=use_posterior,
@@ -2790,7 +2794,7 @@ class Analyzer:
2790
2794
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
2791
2795
  batch_size: int = constants.DEFAULT_BATCH_SIZE,
2792
2796
  include_non_paid_channels: bool = False,
2793
- non_media_baseline_values: Sequence[str | float] | None = None,
2797
+ non_media_baseline_values: Sequence[float] | None = None,
2794
2798
  ) -> xr.Dataset:
2795
2799
  """Returns summary metrics.
2796
2800
 
@@ -2866,13 +2870,13 @@ class Analyzer:
2866
2870
  reported. If `False`, only the paid channels (media, reach and
2867
2871
  frequency) are included but the summary contains also the metrics
2868
2872
  dependent on spend. Default: `False`.
2869
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2870
- Each element is either a float (which means that the fixed value will be
2871
- used as baseline for the given channel) or one of the strings "min" or
2872
- "max" (which mean that the global minimum or maximum value will be used
2873
- as baseline for the values of the given non_media treatment channel). If
2874
- None, the minimum value is used as baseline for each non_media treatment
2875
- channel.
2873
+ non_media_baseline_values: Optional list of shape
2874
+ `(n_non_media_channels,)`. Each element is a float which means that the
2875
+ fixed value will be used as baseline for the given channel. It is
2876
+ expected that they are scaled by population for the channels where
2877
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2878
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2879
+ minimum value for each non_media treatment channel.
2876
2880
 
2877
2881
  Returns:
2878
2882
  An `xr.Dataset` with coordinates: `channel`, `metric` (`mean`, `median`,
@@ -2886,6 +2890,7 @@ class Analyzer:
2886
2890
  when `aggregate_times=False` because they do not have a clear
2887
2891
  interpretation by time period.
2888
2892
  """
2893
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
2889
2894
  dim_kwargs = {
2890
2895
  "selected_geos": selected_geos,
2891
2896
  "selected_times": selected_times,
@@ -3274,7 +3279,7 @@ class Analyzer:
3274
3279
  selected_times: Sequence[str] | None = None,
3275
3280
  aggregate_geos: bool = True,
3276
3281
  aggregate_times: bool = True,
3277
- non_media_baseline_values: Sequence[float | str] | None = None,
3282
+ non_media_baseline_values: Sequence[float] | None = None,
3278
3283
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
3279
3284
  batch_size: int = constants.DEFAULT_BATCH_SIZE,
3280
3285
  ) -> xr.Dataset:
@@ -3289,13 +3294,13 @@ class Analyzer:
3289
3294
  all of the regions.
3290
3295
  aggregate_times: Boolean. If `True`, the expected outcome is summed over
3291
3296
  all of the time periods.
3292
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
3293
- Each element is either a float (which means that the fixed value will be
3294
- used as baseline for the given channel) or one of the strings "min" or
3295
- "max" (which mean that the global minimum or maximum value will be used
3296
- as baseline for the values of the given non_media treatment channel). If
3297
- None, the minimum value is used as baseline for each non_media treatment
3298
- channel.
3297
+ non_media_baseline_values: Optional list of shape
3298
+ `(n_non_media_channels,)`. Each element is a float which means that the
3299
+ fixed value will be used as baseline for the given channel. It is
3300
+ expected that they are scaled by population for the channels where
3301
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
3302
+ `model_spec.non_media_baseline_values` is used, which defaults to the
3303
+ minimum value for each non_media treatment channel.
3299
3304
  confidence_level: Confidence level for media summary metrics credible
3300
3305
  intervals, represented as a value between zero and one.
3301
3306
  batch_size: Integer representing the maximum draws per chain in each
@@ -3308,6 +3313,7 @@ class Analyzer:
3308
3313
  `ci_low`,`ci_high`),`distribution` (prior, posterior) and contains the
3309
3314
  following data variables: `baseline_outcome`, `pct_of_contribution`.
3310
3315
  """
3316
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
3311
3317
  # TODO: Change "pct_of_contribution" to a more accurate term.
3312
3318
 
3313
3319
  use_kpi = self._meridian.input_data.revenue_per_kpi is None