google-meridian 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The Meridian Authors.
1
+ # Copyright 2025 The Meridian Authors.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
16
16
 
17
17
  from collections.abc import Mapping, Sequence
18
18
  import itertools
19
+ import numbers
19
20
  from typing import Any, Optional
20
21
  import warnings
21
22
 
@@ -37,6 +38,20 @@ __all__ = [
37
38
  ]
38
39
 
39
40
 
41
+ def _validate_non_media_baseline_values_numbers(
42
+ non_media_baseline_values: Sequence[str | float] | None,
43
+ ):
44
+ if non_media_baseline_values is None:
45
+ return
46
+
47
+ for value in non_media_baseline_values:
48
+ if not isinstance(value, numbers.Number):
49
+ raise ValueError(
50
+ f"Invalid `non_media_baseline_values` value: '{value}'. Only float"
51
+ " numbers are supported."
52
+ )
53
+
54
+
40
55
  # TODO: Refactor the related unit tests to be under DataTensors.
41
56
  class DataTensors(tf.experimental.ExtensionType):
42
57
  """Container for data variable arguments of Analyzer methods.
@@ -181,6 +196,7 @@ class DataTensors(tf.experimental.ExtensionType):
181
196
  new_tensor is not None
182
197
  and old_tensor is not None
183
198
  and new_tensor.ndim > 1
199
+ and old_tensor.ndim > 1
184
200
  and new_tensor.shape[1] != old_tensor.shape[1]
185
201
  ):
186
202
  return new_tensor.shape[1]
@@ -653,22 +669,16 @@ def _scale_tensors_by_multiplier(
653
669
  data: DataTensors,
654
670
  multiplier: float,
655
671
  by_reach: bool,
656
- non_media_treatments_baseline: tf.Tensor | None = None,
657
672
  ) -> DataTensors:
658
673
  """Get scaled tensors for incremental outcome calculation.
659
674
 
660
675
  Args:
661
676
  data: DataTensors object containing the optional tensors to scale. Only
662
- `media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
663
- `organic_frequency`, `non_media_treatments` are scaled. The other tensors
664
- remain unchanged.
677
+ `media`, `reach`, `frequency`, `organic_media`, `organic_reach`, and
678
+ `organic_frequency` are scaled. The other tensors remain unchanged.
665
679
  multiplier: Float indicating the factor to scale tensors by.
666
680
  by_reach: Boolean indicating whether to scale reach or frequency when rf
667
681
  data is available.
668
- non_media_treatments_baseline: Optional tensor to overwrite
669
- `data.non_media_treatments` in the output. Used to compute the
670
- conterfactual values for incremental outcome calculation. If not used, the
671
- unmodified `data.non_media_treatments` tensor is returned in the output.
672
682
 
673
683
  Returns:
674
684
  A `DataTensors` object containing scaled tensor parameters. The original
@@ -697,14 +707,9 @@ def _scale_tensors_by_multiplier(
697
707
  incremented_data[constants.ORGANIC_FREQUENCY] = (
698
708
  data.organic_frequency * multiplier
699
709
  )
700
- if non_media_treatments_baseline is not None:
701
- incremented_data[constants.NON_MEDIA_TREATMENTS] = (
702
- non_media_treatments_baseline
703
- )
704
- else:
705
- incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
706
710
 
707
711
  # Include the original data that does not get scaled.
712
+ incremented_data[constants.NON_MEDIA_TREATMENTS] = data.non_media_treatments
708
713
  incremented_data[constants.MEDIA_SPEND] = data.media_spend
709
714
  incremented_data[constants.RF_SPEND] = data.rf_spend
710
715
  incremented_data[constants.CONTROLS] = data.controls
@@ -754,79 +759,6 @@ def _central_tendency_and_ci_by_prior_and_posterior(
754
759
  return xr.Dataset(data_vars=xr_data, coords=xr_coords)
755
760
 
756
761
 
757
- def _compute_non_media_baseline(
758
- non_media_treatments: tf.Tensor,
759
- non_media_baseline_values: Sequence[float | str] | None = None,
760
- non_media_selected_times: Sequence[bool] | None = None,
761
- ) -> tf.Tensor:
762
- """Computes the baseline for each non-media treatment channel.
763
-
764
- Args:
765
- non_media_treatments: The non-media treatment input data.
766
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
767
- Each element is either a float (which means that the fixed value will be
768
- used as baseline for the given channel) or one of the strings "min" or
769
- "max" (which mean that the global minimum or maximum value will be used as
770
- baseline for the values of the given non_media treatment channel). If
771
- None, the minimum value is used as baseline for each non_media treatment
772
- channel.
773
- non_media_selected_times: Optional list of shape (n_times,). Each element is
774
- a boolean indicating whether the corresponding time period should be
775
- included in the baseline computation.
776
-
777
- Returns:
778
- A tensor of shape (n_geos, n_times, n_non_media_channels) containing the
779
- baseline values for each non-media treatment channel.
780
- """
781
-
782
- if non_media_selected_times is None:
783
- non_media_selected_times = [True] * non_media_treatments.shape[-2]
784
-
785
- if non_media_baseline_values is None:
786
- # If non_media_baseline_values is not provided, use the minimum value for
787
- # each non_media treatment channel as the baseline.
788
- non_media_baseline_values_filled = [
789
- constants.NON_MEDIA_BASELINE_MIN
790
- ] * non_media_treatments.shape[-1]
791
- else:
792
- non_media_baseline_values_filled = non_media_baseline_values
793
-
794
- if non_media_treatments.shape[-1] != len(non_media_baseline_values_filled):
795
- raise ValueError(
796
- "The number of non-media channels"
797
- f" ({non_media_treatments.shape[-1]}) does not match the number"
798
- f" of baseline types ({len(non_media_baseline_values_filled)})."
799
- )
800
-
801
- baseline_list = []
802
- for channel in range(non_media_treatments.shape[-1]):
803
- baseline_value = non_media_baseline_values_filled[channel]
804
-
805
- if baseline_value == constants.NON_MEDIA_BASELINE_MIN:
806
- baseline_for_channel = tf.reduce_min(
807
- non_media_treatments[..., channel], axis=[0, 1]
808
- )
809
- elif baseline_value == constants.NON_MEDIA_BASELINE_MAX:
810
- baseline_for_channel = tf.reduce_max(
811
- non_media_treatments[..., channel], axis=[0, 1]
812
- )
813
- elif isinstance(baseline_value, float):
814
- baseline_for_channel = tf.cast(baseline_value, tf.float32)
815
- else:
816
- raise ValueError(
817
- f"Invalid non_media_baseline_values value: '{baseline_value}'. Only"
818
- " float numbers and strings 'min' and 'max' are supported."
819
- )
820
-
821
- baseline_list.append(
822
- baseline_for_channel
823
- * tf.ones_like(non_media_treatments[..., channel])
824
- * non_media_selected_times
825
- )
826
-
827
- return tf.stack(baseline_list, axis=-1)
828
-
829
-
830
762
  class Analyzer:
831
763
  """Runs calculations to analyze the raw data after fitting the model."""
832
764
 
@@ -853,10 +785,10 @@ class Analyzer:
853
785
  `media`, `reach`, `frequency`, `organic_media`, `organic_reach`,
854
786
  `organic_frequency`, `non_media_treatments`, `controls`. The `media`,
855
787
  `reach`, `organic_media`, `organic_reach` and `non_media_treatments`
856
- tensors are assumed to be scaled by their corresponding transformers.
788
+ tensors are expected to be scaled by their corresponding transformers.
857
789
  dist_tensors: A `DistributionTensors` container with the distribution
858
790
  tensors for media, RF, organic media, organic RF, non-media treatments,
859
- and controls.
791
+ and controls (if available).
860
792
 
861
793
  Returns:
862
794
  Tensor representing computed kpi means.
@@ -871,17 +803,15 @@ class Analyzer:
871
803
  )
872
804
  )
873
805
 
874
- result = (
875
- tau_gt
876
- + tf.einsum(
877
- "...gtm,...gm->...gt", combined_media_transformed, combined_beta
878
- )
879
- + tf.einsum(
880
- "...gtc,...gc->...gt",
881
- data_tensors.controls,
882
- dist_tensors.gamma_gc,
883
- )
806
+ result = tau_gt + tf.einsum(
807
+ "...gtm,...gm->...gt", combined_media_transformed, combined_beta
884
808
  )
809
+ if self._meridian.controls is not None:
810
+ result += tf.einsum(
811
+ "...gtc,...gc->...gt",
812
+ data_tensors.controls,
813
+ dist_tensors.gamma_gc,
814
+ )
885
815
  if data_tensors.non_media_treatments is not None:
886
816
  result += tf.einsum(
887
817
  "...gtm,...gm->...gt",
@@ -1064,7 +994,7 @@ class Analyzer:
1064
994
  organic_media=self._meridian.organic_media_tensors.organic_media_scaled,
1065
995
  organic_reach=self._meridian.organic_rf_tensors.organic_reach_scaled,
1066
996
  organic_frequency=self._meridian.organic_rf_tensors.organic_frequency,
1067
- non_media_treatments=self._meridian.non_media_treatments_scaled,
997
+ non_media_treatments=self._meridian.non_media_treatments_normalized,
1068
998
  controls=self._meridian.controls_scaled,
1069
999
  revenue_per_kpi=self._meridian.revenue_per_kpi,
1070
1000
  )
@@ -1113,10 +1043,10 @@ class Analyzer:
1113
1043
  if new_data.organic_frequency is not None
1114
1044
  else self._meridian.organic_rf_tensors.organic_frequency
1115
1045
  )
1116
- non_media_treatments_scaled = _transformed_new_or_scaled(
1046
+ non_media_treatments_normalized = _transformed_new_or_scaled(
1117
1047
  new_variable=new_data.non_media_treatments,
1118
1048
  transformer=self._meridian.non_media_transformer,
1119
- scaled_variable=self._meridian.non_media_treatments_scaled,
1049
+ scaled_variable=self._meridian.non_media_treatments_normalized,
1120
1050
  )
1121
1051
  return DataTensors(
1122
1052
  media=media_scaled,
@@ -1125,7 +1055,7 @@ class Analyzer:
1125
1055
  organic_media=organic_media_scaled,
1126
1056
  organic_reach=organic_reach_scaled,
1127
1057
  organic_frequency=organic_frequency,
1128
- non_media_treatments=non_media_treatments_scaled,
1058
+ non_media_treatments=non_media_treatments_normalized,
1129
1059
  controls=controls_scaled,
1130
1060
  revenue_per_kpi=revenue_per_kpi,
1131
1061
  )
@@ -1532,11 +1462,14 @@ class Analyzer:
1532
1462
  (n_chains, 0, self._meridian.n_geos, self._meridian.n_times)
1533
1463
  )
1534
1464
  batch_starting_indices = np.arange(n_draws, step=batch_size)
1535
- param_list = [
1536
- constants.MU_T,
1537
- constants.TAU_G,
1538
- constants.GAMMA_GC,
1539
- ] + self._get_causal_param_names(include_non_paid_channels=True)
1465
+ param_list = (
1466
+ [
1467
+ constants.MU_T,
1468
+ constants.TAU_G,
1469
+ ]
1470
+ + ([constants.GAMMA_GC] if self._meridian.n_controls else [])
1471
+ + self._get_causal_param_names(include_non_paid_channels=True)
1472
+ )
1540
1473
  outcome_means_temps = []
1541
1474
  for start_index in batch_starting_indices:
1542
1475
  stop_index = np.min([n_draws, start_index + batch_size])
@@ -1594,7 +1527,7 @@ class Analyzer:
1594
1527
  self,
1595
1528
  data_tensors: DataTensors,
1596
1529
  dist_tensors: DistributionTensors,
1597
- non_media_baseline_values: Sequence[float | str] | None = None,
1530
+ non_media_treatments_baseline_normalized: Sequence[float] | None = None,
1598
1531
  ) -> tf.Tensor:
1599
1532
  """Computes incremental KPI distribution.
1600
1533
 
@@ -1608,17 +1541,26 @@ class Analyzer:
1608
1541
  dist_tensors: A `DistributionTensors` container with the distribution
1609
1542
  tensors for media, RF, organic media, organic RF and non-media
1610
1543
  treatments channels.
1611
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
1612
- Each element is either a float (which means that the fixed value will be
1613
- used as baseline for the given channel) or one of the strings "min" or
1614
- "max" (which mean that the global minimum or maximum value will be used
1615
- as baseline for the scaled values of the given non_media treatments
1616
- channel). If None, the minimum value is used as baseline for each
1617
- non_media treatments channel.
1544
+ non_media_treatments_baseline_normalized: Optional list of shape
1545
+ `(n_non_media_channels,)`. Each element is a float that will be used as
1546
+ baseline for the given channel. The values are expected to be scaled by
1547
+ population for channels where
1548
+ `model_spec.non_media_population_scaling_id` is `True` and normalized by
1549
+ centering and scaling using means and standard deviations. This argument
1550
+ is required if the data contains non-media treatments.
1618
1551
 
1619
1552
  Returns:
1620
1553
  Tensor of incremental KPI distribution.
1621
1554
  """
1555
+ if (
1556
+ data_tensors.non_media_treatments is not None
1557
+ and non_media_treatments_baseline_normalized is None
1558
+ ):
1559
+ raise ValueError(
1560
+ "`non_media_treatments_baseline_normalized` must be passed to"
1561
+ " `_get_incremental_kpi` when `non_media_treatments` data is"
1562
+ " present."
1563
+ )
1622
1564
  n_media_times = self._meridian.n_media_times
1623
1565
  if data_tensors.media is not None:
1624
1566
  n_times = data_tensors.media.shape[1] # pytype: disable=attribute-error
@@ -1641,13 +1583,10 @@ class Analyzer:
1641
1583
  combined_beta,
1642
1584
  )
1643
1585
  if data_tensors.non_media_treatments is not None:
1644
- non_media_scaled_baseline = _compute_non_media_baseline(
1645
- non_media_treatments=data_tensors.non_media_treatments,
1646
- non_media_baseline_values=non_media_baseline_values,
1647
- )
1648
1586
  non_media_kpi = tf.einsum(
1649
1587
  "gtn,...gn->...gtn",
1650
- data_tensors.non_media_treatments - non_media_scaled_baseline,
1588
+ data_tensors.non_media_treatments
1589
+ - non_media_treatments_baseline_normalized,
1651
1590
  dist_tensors.gamma_gn,
1652
1591
  )
1653
1592
  return tf.concat([combined_media_kpi, non_media_kpi], axis=-1)
@@ -1697,7 +1636,7 @@ class Analyzer:
1697
1636
  self,
1698
1637
  data_tensors: DataTensors,
1699
1638
  dist_tensors: DistributionTensors,
1700
- non_media_baseline_values: Sequence[float | str] | None = None,
1639
+ non_media_treatments_baseline_normalized: Sequence[float] | None = None,
1701
1640
  inverse_transform_outcome: bool | None = None,
1702
1641
  use_kpi: bool | None = None,
1703
1642
  selected_geos: Sequence[str] | None = None,
@@ -1722,20 +1661,21 @@ class Analyzer:
1722
1661
  poulation. Shape (n_geos x T x n_organic_rf_channels), for any time
1723
1662
  dimension T. `organic_frequency`: `organic frequency data` with shape
1724
1663
  (n_geos x T x n_organic_rf_channels), for any time dimension T.
1725
- `non_media_treatments`: `non_media_treatments` data with shape (n_geos x
1726
- T x n_non_media_channels), for any time dimension T. `revenue_per_kpi`:
1727
- Contains revenue per kpi data with shape `(n_geos x T)`, for any time
1728
- dimension `T`.
1729
- dist_tensors: A `DistributionTensors` container with the distribution
1730
- tensors for media, RF, organic media, organic RF and non-media treatments
1731
- channels.
1732
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
1733
- Each element is either a float (which means that the fixed value will be
1734
- used as baseline for the given channel) or one of the strings "min" or
1735
- "max" (which mean that the global minimum or maximum value will be used
1736
- as baseline for the scaled values of the given non_media treatments
1737
- channel). If None, the minimum value is used as baseline for each
1738
- non_media treatments channel.
1664
+ `non_media_treatments`: `non_media_treatments` data scaled by population
1665
+ for the selected channels and normalized by means and standard
1666
+ deviations with shape (n_geos x T x n_non_media_channels), for any time
1667
+ dimension T. `revenue_per_kpi`: Contains revenue per kpi data with shape
1668
+ `(n_geos x T)`, for any time dimension `T`.
1669
+ dist_tensors: A `DistributionTensors` container with the distribution
1670
+ tensors for media, RF, organic media, organic RF and non-media
1671
+ treatments channels.
1672
+ non_media_treatments_baseline_normalized: Optional list of shape
1673
+ `(n_non_media_channels,)`. Each element is a float that will be used as
1674
+ baseline for the given channel. The values are expected to be scaled by
1675
+ population for channels where
1676
+ `model_spec.non_media_population_scaling_id` is `True` and normalized by
1677
+ centering and scaling using means and standard deviations. This argument
1678
+ is required if the data contains non-media treatments.
1739
1679
  inverse_transform_outcome: Boolean. If `True`, returns the expected
1740
1680
  outcome in the original KPI or revenue (depending on what is passed to
1741
1681
  `use_kpi`), as it was passed to `InputData`. If False, returns the
@@ -1760,10 +1700,20 @@ class Analyzer:
1760
1700
  Tensor containing the incremental outcome distribution.
1761
1701
  """
1762
1702
  self._check_revenue_data_exists(use_kpi)
1703
+ if (
1704
+ data_tensors.non_media_treatments is not None
1705
+ and non_media_treatments_baseline_normalized is None
1706
+ ):
1707
+ raise ValueError(
1708
+ "`non_media_treatments_baseline_normalized` must be passed to"
1709
+ " `_incremental_outcome_impl` when `non_media_treatments` data is"
1710
+ " present."
1711
+ )
1712
+
1763
1713
  transformed_outcome = self._get_incremental_kpi(
1764
1714
  data_tensors=data_tensors,
1765
1715
  dist_tensors=dist_tensors,
1766
- non_media_baseline_values=non_media_baseline_values,
1716
+ non_media_treatments_baseline_normalized=non_media_treatments_baseline_normalized,
1767
1717
  )
1768
1718
  if inverse_transform_outcome:
1769
1719
  incremental_outcome = self._inverse_outcome(
@@ -1787,7 +1737,7 @@ class Analyzer:
1787
1737
  self,
1788
1738
  use_posterior: bool = True,
1789
1739
  new_data: DataTensors | None = None,
1790
- non_media_baseline_values: Sequence[float | str] | None = None,
1740
+ non_media_baseline_values: Sequence[float] | None = None,
1791
1741
  scaling_factor0: float = 0.0,
1792
1742
  scaling_factor1: float = 1.0,
1793
1743
  selected_geos: Sequence[str] | None = None,
@@ -1806,15 +1756,26 @@ class Analyzer:
1806
1756
  This calculates the media outcome of each media channel for each posterior
1807
1757
  or prior parameter draw. Incremental outcome is defined as:
1808
1758
 
1809
- `E(Outcome|Media_1, Controls)` minus `E(Outcome|Media_0, Controls)`
1759
+ `E(Outcome|Treatment_1, Controls)` minus `E(Outcome|Treatment_0, Controls)`
1760
+
1761
+ For paid & organic channels (without reach and frequency data),
1762
+ `Treatment_1` means that media execution for a given channel is multiplied
1763
+ by
1764
+ `scaling_factor1` (1.0 by default) for the set of time periods specified
1765
+ by `media_selected_times`. Similarly, `Treatment_0` means that media
1766
+ execution is multiplied by `scaling_factor0` (0.0 by default) for these time
1767
+ periods.
1810
1768
 
1811
- Here, `Media_1` means that media execution for a given channel is multiplied
1812
- by `scaling_factor1` (1.0 by default) for the set of time periods specified
1813
- by `media_selected_times`. Similarly, `Media_0` means that media execution
1814
- is multiplied by `scaling_factor0` (0.0 by default) for these time periods.
1769
+ For paid & organic channels with reach and frequency data, either reach or
1770
+ frequency is held fixed while the other is scaled, depending on the
1771
+ `by_reach` argument.
1772
+
1773
+ For non-media treatments, `Treatment_1` means that the variable is set to
1774
+ historical values. `Treatment_0` means that the variable is set to its
1775
+ baseline value for all geos and time periods. Note that the scaling factors
1776
+ (`scaling_factor0` and `scaling_factor1`) are not applicable to non-media
1777
+ treatments.
1815
1778
 
1816
- For channels with reach and frequency data, either reach or frequency is
1817
- held fixed while the other is scaled, depending on the `by_reach` argument.
1818
1779
  "Outcome" refers to either `revenue` if `use_kpi=False`, or `kpi` if
1819
1780
  `use_kpi=True`. When `revenue_per_kpi` is not defined, `use_kpi` cannot be
1820
1781
  False.
@@ -1856,13 +1817,13 @@ class Analyzer:
1856
1817
  any of the tensors in `new_data` is provided with a different number of
1857
1818
  time periods than in `InputData`, then all tensors must be provided with
1858
1819
  the same number of time periods.
1859
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
1860
- Each element is either a float (which means that the fixed value will be
1861
- used as baseline for the given channel) or one of the strings "min" or
1862
- "max" (which mean that the global minimum or maximum value will be used
1863
- as baseline for the scaled values of the given non_media treatments
1864
- channel). If not provided, the minimum value is used as the baseline for
1865
- each non_media treatments channel.
1820
+ non_media_baseline_values: Optional list of shape
1821
+ `(n_non_media_channels,)`. Each element is a float which means that the
1822
+ fixed value will be used as baseline for the given channel. It is
1823
+ expected that they are scaled by population for the channels where
1824
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
1825
+ `model_spec.non_media_baseline_values` is used, which defaults to the
1826
+ minimum value for each non_media treatment channel.
1866
1827
  scaling_factor0: Float. The factor by which to scale the counterfactual
1867
1828
  scenario "Media_0" during the time periods specified in
1868
1829
  `media_selected_times`. Must be non-negative and less than
@@ -1944,6 +1905,7 @@ class Analyzer:
1944
1905
  aggregate_geos=aggregate_geos,
1945
1906
  selected_geos=selected_geos,
1946
1907
  )
1908
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
1947
1909
  dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
1948
1910
 
1949
1911
  if dist_type not in mmm.inference_data.groups():
@@ -2002,7 +1964,6 @@ class Analyzer:
2002
1964
  media_selected_times = [
2003
1965
  x in media_selected_times for x in mmm.input_data.media_time
2004
1966
  ]
2005
- non_media_selected_times = media_selected_times[-mmm.n_times :]
2006
1967
 
2007
1968
  # Set counterfactual tensors based on the scaling factors and the media
2008
1969
  # selected times.
@@ -2014,28 +1975,52 @@ class Analyzer:
2014
1975
  )[:, None]
2015
1976
 
2016
1977
  if data_tensors.non_media_treatments is not None:
2017
- new_non_media_treatments0 = _compute_non_media_baseline(
2018
- non_media_treatments=data_tensors.non_media_treatments,
2019
- non_media_baseline_values=non_media_baseline_values,
2020
- non_media_selected_times=non_media_selected_times,
1978
+ non_media_treatments_baseline_scaled = (
1979
+ self._meridian.compute_non_media_treatments_baseline(
1980
+ non_media_baseline_values=non_media_baseline_values,
1981
+ )
1982
+ )
1983
+ non_media_treatments_baseline_normalized = self._meridian.non_media_transformer.forward( # pytype: disable=attribute-error
1984
+ non_media_treatments_baseline_scaled,
1985
+ apply_population_scaling=False,
1986
+ )
1987
+ non_media_treatments0 = tf.broadcast_to(
1988
+ tf.constant(
1989
+ non_media_treatments_baseline_normalized, dtype=tf.float32
1990
+ )[tf.newaxis, tf.newaxis, :],
1991
+ self._meridian.non_media_treatments.shape, # pytype: disable=attribute-error
2021
1992
  )
2022
1993
  else:
2023
- new_non_media_treatments0 = None
1994
+ non_media_treatments_baseline_normalized = None
1995
+ non_media_treatments0 = None
2024
1996
 
2025
1997
  incremented_data0 = _scale_tensors_by_multiplier(
2026
1998
  data=data_tensors,
2027
1999
  multiplier=counterfactual0,
2028
2000
  by_reach=by_reach,
2029
- non_media_treatments_baseline=new_non_media_treatments0,
2030
2001
  )
2031
2002
  incremented_data1 = _scale_tensors_by_multiplier(
2032
2003
  data=data_tensors, multiplier=counterfactual1, by_reach=by_reach
2033
2004
  )
2034
2005
 
2035
- data_tensors0 = self._get_scaled_data_tensors(
2006
+ scaled_data0 = self._get_scaled_data_tensors(
2036
2007
  new_data=incremented_data0,
2037
2008
  include_non_paid_channels=include_non_paid_channels,
2038
2009
  )
2010
+ # TODO: b/415198977 - Verify the computation of outcome of non-media
2011
+ # treatments with `media_selected_times` and scale factors.
2012
+
2013
+ data_tensors0 = DataTensors(
2014
+ media=scaled_data0.media,
2015
+ reach=scaled_data0.reach,
2016
+ frequency=scaled_data0.frequency,
2017
+ organic_media=scaled_data0.organic_media,
2018
+ organic_reach=scaled_data0.organic_reach,
2019
+ organic_frequency=scaled_data0.organic_frequency,
2020
+ revenue_per_kpi=scaled_data0.revenue_per_kpi,
2021
+ non_media_treatments=non_media_treatments0,
2022
+ )
2023
+
2039
2024
  data_tensors1 = self._get_scaled_data_tensors(
2040
2025
  new_data=incremented_data1,
2041
2026
  include_non_paid_channels=include_non_paid_channels,
@@ -2062,7 +2047,9 @@ class Analyzer:
2062
2047
  incremental_outcome_kwargs = {
2063
2048
  "inverse_transform_outcome": inverse_transform_outcome,
2064
2049
  "use_kpi": use_kpi,
2065
- "non_media_baseline_values": non_media_baseline_values,
2050
+ "non_media_treatments_baseline_normalized": (
2051
+ non_media_treatments_baseline_normalized
2052
+ ),
2066
2053
  }
2067
2054
  for i, start_index in enumerate(batch_starting_indices):
2068
2055
  stop_index = np.min([n_draws, start_index + batch_size])
@@ -2538,7 +2525,7 @@ class Analyzer:
2538
2525
  aggregate_geos: bool = False,
2539
2526
  aggregate_times: bool = False,
2540
2527
  split_by_holdout_id: bool = False,
2541
- non_media_baseline_values: Sequence[str | float] | None = None,
2528
+ non_media_baseline_values: Sequence[float] | None = None,
2542
2529
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
2543
2530
  ) -> xr.Dataset:
2544
2531
  """Calculates the data for the expected versus actual outcome over time.
@@ -2550,19 +2537,20 @@ class Analyzer:
2550
2537
  are summed over all of the time periods.
2551
2538
  split_by_holdout_id: Boolean. If `True` and `holdout_id` exists, the data
2552
2539
  is split into `'Train'`, `'Test'`, and `'All Data'` subsections.
2553
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2554
- Each element is either a float (which means that the fixed value will be
2555
- used as baseline for the given channel) or one of the strings "min" or
2556
- "max" (which mean that the global minimum or maximum value will be used
2557
- as baseline for the values of the given non_media treatment channel). If
2558
- None, the minimum value is used as baseline for each non_media treatment
2559
- channel.
2540
+ non_media_baseline_values: Optional list of shape
2541
+ `(n_non_media_channels,)`. Each element is a float which means that the
2542
+ fixed value will be used as baseline for the given channel. It is
2543
+ expected that they are scaled by population for the channels where
2544
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2545
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2546
+ minimum value for each non_media treatment channel.
2560
2547
  confidence_level: Confidence level for expected outcome credible
2561
2548
  intervals, represented as a value between zero and one. Default: `0.9`.
2562
2549
 
2563
2550
  Returns:
2564
2551
  A dataset with the expected, baseline, and actual outcome metrics.
2565
2552
  """
2553
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
2566
2554
  mmm = self._meridian
2567
2555
  use_kpi = self._meridian.input_data.revenue_per_kpi is None
2568
2556
  can_split_by_holdout = self._can_split_by_holdout_id(split_by_holdout_id)
@@ -2632,7 +2620,7 @@ class Analyzer:
2632
2620
 
2633
2621
  def _calculate_baseline_expected_outcome(
2634
2622
  self,
2635
- non_media_baseline_values: Sequence[str | float] | None = None,
2623
+ non_media_baseline_values: Sequence[float] | None = None,
2636
2624
  **expected_outcome_kwargs,
2637
2625
  ) -> tf.Tensor:
2638
2626
  """Calculates either the posterior or prior expected outcome of baseline.
@@ -2644,20 +2632,19 @@ class Analyzer:
2644
2632
  3) `new_organic_media` is set to all zeros
2645
2633
  4) `new_organic_reach` is set to all zeros
2646
2634
  5) `new_non_media_treatments` is set to the counterfactual values
2647
- according to the
2648
- `non_media_baseline_values` argument
2635
+ according to the `non_media_baseline_values` argument
2649
2636
  6) `new_controls` are set to historical values
2650
2637
 
2651
2638
  All other arguments of `expected_outcome` can be passed to this method.
2652
2639
 
2653
2640
  Args:
2654
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2655
- Each element is either a float (which means that the fixed value will be
2656
- used as baseline for the given channel) or one of the strings "min" or
2657
- "max" (which mean that the global minimum or maximum value will be used
2658
- as baseline for the values of the given non_media treatment channel). If
2659
- None, the minimum value is used as baseline for each non_media treatment
2660
- channel.
2641
+ non_media_baseline_values: Optional list of shape
2642
+ `(n_non_media_channels,)`. Each element is a float which means that the
2643
+ fixed value will be used as baseline for the given channel. It is
2644
+ expected that they are scaled by population for the channels where
2645
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2646
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2647
+ minimum value for each non_media treatment channel.
2661
2648
  **expected_outcome_kwargs: kwargs to pass to `expected_outcome`, which
2662
2649
  could contain use_posterior, selected_geos, selected_times,
2663
2650
  aggregate_geos, aggregate_times, inverse_transform_outcome, use_kpi,
@@ -2690,10 +2677,27 @@ class Analyzer:
2690
2677
  else None
2691
2678
  )
2692
2679
  if self._meridian.non_media_treatments is not None:
2693
- new_non_media_treatments = _compute_non_media_baseline(
2694
- non_media_treatments=self._meridian.non_media_treatments,
2680
+ if self._meridian.model_spec.non_media_population_scaling_id is not None:
2681
+ scaling_factors = tf.where(
2682
+ self._meridian.model_spec.non_media_population_scaling_id,
2683
+ self._meridian.population[:, tf.newaxis, tf.newaxis],
2684
+ tf.ones_like(self._meridian.population)[:, tf.newaxis, tf.newaxis],
2685
+ )
2686
+ else:
2687
+ scaling_factors = tf.ones_like(self._meridian.population)[
2688
+ :, tf.newaxis, tf.newaxis
2689
+ ]
2690
+
2691
+ baseline = self._meridian.compute_non_media_treatments_baseline(
2695
2692
  non_media_baseline_values=non_media_baseline_values,
2696
2693
  )
2694
+ new_non_media_treatments_population_scaled = tf.broadcast_to(
2695
+ tf.constant(baseline, dtype=tf.float32)[tf.newaxis, tf.newaxis, :],
2696
+ self._meridian.non_media_treatments.shape,
2697
+ )
2698
+ new_non_media_treatments = (
2699
+ new_non_media_treatments_population_scaled * scaling_factors
2700
+ )
2697
2701
  else:
2698
2702
  new_non_media_treatments = None
2699
2703
  new_controls = self._meridian.controls
@@ -2714,7 +2718,7 @@ class Analyzer:
2714
2718
  new_data: DataTensors | None = None,
2715
2719
  use_kpi: bool | None = None,
2716
2720
  include_non_paid_channels: bool = True,
2717
- non_media_baseline_values: Sequence[str | float] | None = None,
2721
+ non_media_baseline_values: Sequence[float] | None = None,
2718
2722
  **kwargs,
2719
2723
  ) -> tf.Tensor:
2720
2724
  """Aggregates the incremental outcome of the media channels.
@@ -2742,13 +2746,13 @@ class Analyzer:
2742
2746
  include_non_paid_channels: Boolean. If `True`, then non-media treatments
2743
2747
  and organic effects are included in the calculation. If `False`, then
2744
2748
  only the paid media and RF effects are included.
2745
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2746
- Each element is either a float (which means that the fixed value will be
2747
- used as baseline for the given channel) or one of the strings "min" or
2748
- "max" (which mean that the global minimum or maximum value will be used
2749
- as baseline for the scaled values of the given non_media treatments
2750
- channel). If not provided, the minimum value is used as the baseline for
2751
- each non_media treatments channel.
2749
+ non_media_baseline_values: Optional list of shape
2750
+ `(n_non_media_channels,)`. Each element is a float which means that the
2751
+ fixed value will be used as baseline for the given channel. It is
2752
+ expected that they are scaled by population for the channels where
2753
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2754
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2755
+ minimum value for each non_media treatment channel.
2752
2756
  **kwargs: kwargs to pass to `incremental_outcome`, which could contain
2753
2757
  selected_geos, selected_times, aggregate_geos, aggregate_times,
2754
2758
  batch_size.
@@ -2758,6 +2762,7 @@ class Analyzer:
2758
2762
  of the channel dimension is incremented by one, with the new component at
2759
2763
  the end containing the total incremental outcome of all channels.
2760
2764
  """
2765
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
2761
2766
  use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
2762
2767
  incremental_outcome_m = self.incremental_outcome(
2763
2768
  use_posterior=use_posterior,
@@ -2790,7 +2795,7 @@ class Analyzer:
2790
2795
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
2791
2796
  batch_size: int = constants.DEFAULT_BATCH_SIZE,
2792
2797
  include_non_paid_channels: bool = False,
2793
- non_media_baseline_values: Sequence[str | float] | None = None,
2798
+ non_media_baseline_values: Sequence[float] | None = None,
2794
2799
  ) -> xr.Dataset:
2795
2800
  """Returns summary metrics.
2796
2801
 
@@ -2866,13 +2871,13 @@ class Analyzer:
2866
2871
  reported. If `False`, only the paid channels (media, reach and
2867
2872
  frequency) are included but the summary contains also the metrics
2868
2873
  dependent on spend. Default: `False`.
2869
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
2870
- Each element is either a float (which means that the fixed value will be
2871
- used as baseline for the given channel) or one of the strings "min" or
2872
- "max" (which mean that the global minimum or maximum value will be used
2873
- as baseline for the values of the given non_media treatment channel). If
2874
- None, the minimum value is used as baseline for each non_media treatment
2875
- channel.
2874
+ non_media_baseline_values: Optional list of shape
2875
+ `(n_non_media_channels,)`. Each element is a float which means that the
2876
+ fixed value will be used as baseline for the given channel. It is
2877
+ expected that they are scaled by population for the channels where
2878
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
2879
+ `model_spec.non_media_baseline_values` is used, which defaults to the
2880
+ minimum value for each non_media treatment channel.
2876
2881
 
2877
2882
  Returns:
2878
2883
  An `xr.Dataset` with coordinates: `channel`, `metric` (`mean`, `median`,
@@ -2886,6 +2891,7 @@ class Analyzer:
2886
2891
  when `aggregate_times=False` because they do not have a clear
2887
2892
  interpretation by time period.
2888
2893
  """
2894
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
2889
2895
  dim_kwargs = {
2890
2896
  "selected_geos": selected_geos,
2891
2897
  "selected_times": selected_times,
@@ -3274,7 +3280,7 @@ class Analyzer:
3274
3280
  selected_times: Sequence[str] | None = None,
3275
3281
  aggregate_geos: bool = True,
3276
3282
  aggregate_times: bool = True,
3277
- non_media_baseline_values: Sequence[float | str] | None = None,
3283
+ non_media_baseline_values: Sequence[float] | None = None,
3278
3284
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
3279
3285
  batch_size: int = constants.DEFAULT_BATCH_SIZE,
3280
3286
  ) -> xr.Dataset:
@@ -3289,13 +3295,13 @@ class Analyzer:
3289
3295
  all of the regions.
3290
3296
  aggregate_times: Boolean. If `True`, the expected outcome is summed over
3291
3297
  all of the time periods.
3292
- non_media_baseline_values: Optional list of shape (n_non_media_channels,).
3293
- Each element is either a float (which means that the fixed value will be
3294
- used as baseline for the given channel) or one of the strings "min" or
3295
- "max" (which mean that the global minimum or maximum value will be used
3296
- as baseline for the values of the given non_media treatment channel). If
3297
- None, the minimum value is used as baseline for each non_media treatment
3298
- channel.
3298
+ non_media_baseline_values: Optional list of shape
3299
+ `(n_non_media_channels,)`. Each element is a float which means that the
3300
+ fixed value will be used as baseline for the given channel. It is
3301
+ expected that they are scaled by population for the channels where
3302
+ `model_spec.non_media_population_scaling_id` is `True`. If `None`, the
3303
+ `model_spec.non_media_baseline_values` is used, which defaults to the
3304
+ minimum value for each non_media treatment channel.
3299
3305
  confidence_level: Confidence level for media summary metrics credible
3300
3306
  intervals, represented as a value between zero and one.
3301
3307
  batch_size: Integer representing the maximum draws per chain in each
@@ -3308,6 +3314,7 @@ class Analyzer:
3308
3314
  `ci_low`,`ci_high`),`distribution` (prior, posterior) and contains the
3309
3315
  following data variables: `baseline_outcome`, `pct_of_contribution`.
3310
3316
  """
3317
+ _validate_non_media_baseline_values_numbers(non_media_baseline_values)
3311
3318
  # TODO: Change "pct_of_contribution" to a more accurate term.
3312
3319
 
3313
3320
  use_kpi = self._meridian.input_data.revenue_per_kpi is None