google-meridian 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/METADATA +2 -2
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/RECORD +18 -17
- meridian/__init__.py +6 -4
- meridian/analysis/analyzer.py +68 -25
- meridian/analysis/optimizer.py +298 -48
- meridian/constants.py +3 -0
- meridian/data/data_frame_input_data_builder.py +41 -0
- meridian/data/input_data_builder.py +12 -4
- meridian/data/load.py +262 -346
- meridian/mlflow/autolog.py +158 -6
- meridian/model/media.py +7 -0
- meridian/model/model.py +14 -16
- meridian/model/posterior_sampler.py +13 -9
- meridian/model/prior_sampler.py +4 -6
- meridian/version.py +17 -0
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/WHEEL +0 -0
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.1.2.dist-info → google_meridian-1.1.4.dist-info}/top_level.txt +0 -0
meridian/analysis/optimizer.py
CHANGED
|
@@ -1368,7 +1368,10 @@ class BudgetOptimizer:
|
|
|
1368
1368
|
versions of all the remaining tensors. If any of the tensors in
|
|
1369
1369
|
`new_data` is provided with a different number of time periods than in
|
|
1370
1370
|
`InputData`, then all tensors must be provided with the same number of
|
|
1371
|
-
time periods and the `time` tensor must be provided.
|
|
1371
|
+
time periods and the `time` tensor must be provided. In this case, spend
|
|
1372
|
+
tensors must be provided with `geo` and `time` granularity. If
|
|
1373
|
+
`use_optimal_frequency` is `True`, `new_data.frequency` does not need to
|
|
1374
|
+
be provided and is ignored. The optimal frequency is used instead.
|
|
1372
1375
|
use_posterior: Boolean. If `True`, then the budget is optimized based on
|
|
1373
1376
|
the posterior distribution of the model. Otherwise, the prior
|
|
1374
1377
|
distribution is used.
|
|
@@ -1427,7 +1430,7 @@ class BudgetOptimizer:
|
|
|
1427
1430
|
or equal to `(budget * gtol)`. `gtol` must be less than 1.
|
|
1428
1431
|
use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
|
|
1429
1432
|
trained Meridian model for optimization. If `False`, uses historical
|
|
1430
|
-
frequency.
|
|
1433
|
+
frequency or `new_data.frequency` if provided.
|
|
1431
1434
|
use_kpi: If `True`, runs the optimization on KPI. Defaults to revenue.
|
|
1432
1435
|
confidence_level: The threshold for computing the confidence intervals.
|
|
1433
1436
|
batch_size: Maximum draws per chain in each batch. The calculation is run
|
|
@@ -1596,6 +1599,142 @@ class BudgetOptimizer:
|
|
|
1596
1599
|
_optimization_grid=optimization_grid,
|
|
1597
1600
|
)
|
|
1598
1601
|
|
|
1602
|
+
def create_optimization_tensors(
|
|
1603
|
+
self,
|
|
1604
|
+
time: Sequence[str] | tf.Tensor,
|
|
1605
|
+
cpmu: tf.Tensor | None = None,
|
|
1606
|
+
media: tf.Tensor | None = None,
|
|
1607
|
+
media_spend: tf.Tensor | None = None,
|
|
1608
|
+
cprf: tf.Tensor | None = None,
|
|
1609
|
+
rf_impressions: tf.Tensor | None = None,
|
|
1610
|
+
frequency: tf.Tensor | None = None,
|
|
1611
|
+
rf_spend: tf.Tensor | None = None,
|
|
1612
|
+
revenue_per_kpi: tf.Tensor | None = None,
|
|
1613
|
+
use_optimal_frequency: bool = True,
|
|
1614
|
+
) -> analyzer.DataTensors:
|
|
1615
|
+
"""Creates a `DataTensors` for optimizations from CPM and flighting data.
|
|
1616
|
+
|
|
1617
|
+
CPM is broken down into cost per media unit, `cpmu`, for the media channels
|
|
1618
|
+
and cost per impression (reach * frequency), `cprf`, for the reach and
|
|
1619
|
+
frequency channels.
|
|
1620
|
+
|
|
1621
|
+
The flighting pattern can be specified as the spend flighting or the media
|
|
1622
|
+
units flighting pattern at the time or geo and time granularity. If data is
|
|
1623
|
+
passed without a geo dimension, then the values are interpreted as
|
|
1624
|
+
national-level totals. If the model is a geo-level model, then the values
|
|
1625
|
+
are allocated across geos based on the population used in the model.
|
|
1626
|
+
|
|
1627
|
+
Below are the different combinations of tensors that can be provided:
|
|
1628
|
+
For media:
|
|
1629
|
+
1) `media`, `cpmu` (media units flighting pattern)
|
|
1630
|
+
2) `media_spend`, `cpmu` (spend flighting pattern)
|
|
1631
|
+
|
|
1632
|
+
For R&F:
|
|
1633
|
+
If `use_optimal_frequency=True`, `frequency` should not be provided.
|
|
1634
|
+
Frequency input is not required for the optimization, so the new
|
|
1635
|
+
`DataTensors` object will be created with `frequuency` arbitrarily set to
|
|
1636
|
+
1 and `reach=rf_impressions`.
|
|
1637
|
+
1) `rf_impressions`, `cprf` (impressions flighting pattern)
|
|
1638
|
+
2) `rf_spend`, `cprf` (spend flighting pattern)
|
|
1639
|
+
|
|
1640
|
+
If `use_optimal_frequency=False`:
|
|
1641
|
+
1) `rf_impressions`, `frequency`, `cprf` (impressions flighting pattern)
|
|
1642
|
+
2) `rf_spend`, `frequency`, `cprf` (spend flighting pattern)
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
Args:
|
|
1646
|
+
time: A sequence or tensor of time coordinates in the "YYYY-mm-dd" string
|
|
1647
|
+
format.
|
|
1648
|
+
cpmu: A tensor of cost per media unit with dimensions `(n_media_channels),
|
|
1649
|
+
`(T, n_media_channels)` or `(n_geos, T, n_media_channels)` for any time
|
|
1650
|
+
dimension `T`.
|
|
1651
|
+
media: An optional tensor of media unit values with dimensions `(T,
|
|
1652
|
+
n_media_channels)` or `(n_geos, T, n_media_channels)` for any time
|
|
1653
|
+
dimension `T`.
|
|
1654
|
+
media_spend: A tensor of media spend values with dimensions `(T,
|
|
1655
|
+
n_media_channels)` or `(n_geos, T, n_media_channels)` for any time
|
|
1656
|
+
dimension `T`.
|
|
1657
|
+
cprf: A tensor of cost per impression (reach * frequency) with dimensions
|
|
1658
|
+
`(n_rf_channels), `(T, n_rf_channels)` or `(n_geos, T, n_rf_channels)`
|
|
1659
|
+
for any time dimension `T`.
|
|
1660
|
+
rf_impressions: A tensor of impressions (reach * frequency) values with
|
|
1661
|
+
dimensions `(T, n_rf_channels)` or `(n_geos, T, n_rf_channels)` for any
|
|
1662
|
+
time dimension `T`.
|
|
1663
|
+
frequency: A tensor of frequency values with dimensions `(n_rf_channels)`,
|
|
1664
|
+
`(T, n_rf_channels)` or `(n_geos, T, n_rf_channels)` for any time
|
|
1665
|
+
dimension `T`. If `use_optimal_frequency=True`, then this tensor should
|
|
1666
|
+
not be provided and the optimal frequency will be calculated and used.
|
|
1667
|
+
rf_spend: A tensor of rf spend values with dimensions `(T, n_rf_channels)`
|
|
1668
|
+
or `(n_geos, T, n_rf_channels)` for any time dimension `T`.
|
|
1669
|
+
revenue_per_kpi: A tensor of revenue per KPI values with dimensions `()`,
|
|
1670
|
+
`(T)`, or `(n_geos, T)` for any time dimension `T`.
|
|
1671
|
+
use_optimal_frequency: Boolean. If `True`, the optiaml frequency will be
|
|
1672
|
+
used in the optimization and a frequency value should not be provided.
|
|
1673
|
+
In this case, `reach=rf_impressions` and `frequency=1` (by arbitrary
|
|
1674
|
+
convention) in the new data. If `False`, the frequency value must be
|
|
1675
|
+
provided.
|
|
1676
|
+
|
|
1677
|
+
Returns:
|
|
1678
|
+
A `DataTensors` object with optional tensors `media`, `reach`,
|
|
1679
|
+
`frequency`, `media_spend`, `rf_spend`, `revenue_per_kpi`, and `time`.
|
|
1680
|
+
"""
|
|
1681
|
+
self._validate_optimization_tensors(
|
|
1682
|
+
cpmu=cpmu,
|
|
1683
|
+
cprf=cprf,
|
|
1684
|
+
media=media,
|
|
1685
|
+
rf_impressions=rf_impressions,
|
|
1686
|
+
frequency=frequency,
|
|
1687
|
+
media_spend=media_spend,
|
|
1688
|
+
rf_spend=rf_spend,
|
|
1689
|
+
revenue_per_kpi=revenue_per_kpi,
|
|
1690
|
+
use_optimal_frequency=use_optimal_frequency,
|
|
1691
|
+
)
|
|
1692
|
+
n_times = time.shape[0] if isinstance(time, tf.Tensor) else len(time)
|
|
1693
|
+
n_geos = self._meridian.n_geos
|
|
1694
|
+
revenue_per_kpi = (
|
|
1695
|
+
_expand_tensor(revenue_per_kpi, (n_geos, n_times))
|
|
1696
|
+
if revenue_per_kpi is not None
|
|
1697
|
+
else None
|
|
1698
|
+
)
|
|
1699
|
+
|
|
1700
|
+
tensors = {}
|
|
1701
|
+
if media is not None:
|
|
1702
|
+
cpmu = _expand_tensor(cpmu, (n_geos, n_times, media.shape[-1]))
|
|
1703
|
+
tensors[c.MEDIA] = self._allocate_tensor_by_population(media)
|
|
1704
|
+
tensors[c.MEDIA_SPEND] = tensors[c.MEDIA] * cpmu
|
|
1705
|
+
if media_spend is not None:
|
|
1706
|
+
cpmu = _expand_tensor(cpmu, (n_geos, n_times, media_spend.shape[-1]))
|
|
1707
|
+
tensors[c.MEDIA_SPEND] = self._allocate_tensor_by_population(media_spend)
|
|
1708
|
+
tensors[c.MEDIA] = tensors[c.MEDIA_SPEND] / cpmu
|
|
1709
|
+
if rf_impressions is not None:
|
|
1710
|
+
shape = (n_geos, n_times, rf_impressions.shape[-1])
|
|
1711
|
+
cprf = _expand_tensor(cprf, shape)
|
|
1712
|
+
allocated_impressions = self._allocate_tensor_by_population(
|
|
1713
|
+
rf_impressions
|
|
1714
|
+
)
|
|
1715
|
+
tensors[c.RF_SPEND] = allocated_impressions * cprf
|
|
1716
|
+
if use_optimal_frequency:
|
|
1717
|
+
frequency = tf.ones_like(allocated_impressions)
|
|
1718
|
+
tensors[c.FREQUENCY] = _expand_tensor(frequency, shape)
|
|
1719
|
+
tensors[c.REACH] = tf.math.divide_no_nan(
|
|
1720
|
+
allocated_impressions, tensors[c.FREQUENCY]
|
|
1721
|
+
)
|
|
1722
|
+
if rf_spend is not None:
|
|
1723
|
+
shape = (n_geos, n_times, rf_spend.shape[-1])
|
|
1724
|
+
cprf = _expand_tensor(cprf, shape)
|
|
1725
|
+
tensors[c.RF_SPEND] = self._allocate_tensor_by_population(rf_spend)
|
|
1726
|
+
impressions = tf.math.divide_no_nan(tensors[c.RF_SPEND], cprf)
|
|
1727
|
+
if use_optimal_frequency:
|
|
1728
|
+
frequency = tf.ones_like(impressions)
|
|
1729
|
+
tensors[c.FREQUENCY] = _expand_tensor(frequency, shape)
|
|
1730
|
+
tensors[c.REACH] = tf.math.divide_no_nan(
|
|
1731
|
+
impressions, tensors[c.FREQUENCY]
|
|
1732
|
+
)
|
|
1733
|
+
if revenue_per_kpi is not None:
|
|
1734
|
+
tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
|
|
1735
|
+
tensors[c.TIME] = tf.convert_to_tensor(time)
|
|
1736
|
+
return analyzer.DataTensors(**tensors)
|
|
1737
|
+
|
|
1599
1738
|
def _validate_grid(
|
|
1600
1739
|
self,
|
|
1601
1740
|
new_data: analyzer.DataTensors | None,
|
|
@@ -1863,9 +2002,14 @@ class BudgetOptimizer:
|
|
|
1863
2002
|
)
|
|
1864
2003
|
)
|
|
1865
2004
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
2005
|
+
opt_freq_data = analyzer.DataTensors(
|
|
2006
|
+
rf_impressions=filled_data.reach * filled_data.frequency,
|
|
2007
|
+
rf_spend=filled_data.rf_spend,
|
|
2008
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
2009
|
+
)
|
|
1866
2010
|
optimal_frequency = tf.convert_to_tensor(
|
|
1867
2011
|
self._analyzer.optimal_freq(
|
|
1868
|
-
new_data=
|
|
2012
|
+
new_data=opt_freq_data,
|
|
1869
2013
|
use_posterior=use_posterior,
|
|
1870
2014
|
selected_times=selected_times,
|
|
1871
2015
|
use_kpi=use_kpi,
|
|
@@ -1985,8 +2129,6 @@ class BudgetOptimizer:
|
|
|
1985
2129
|
tf.Tensor | None,
|
|
1986
2130
|
tf.Tensor | None,
|
|
1987
2131
|
tf.Tensor | None,
|
|
1988
|
-
tf.Tensor | None,
|
|
1989
|
-
tf.Tensor | None,
|
|
1990
2132
|
]:
|
|
1991
2133
|
"""Gets the tensors for incremental outcome, based on spend data.
|
|
1992
2134
|
|
|
@@ -1994,12 +2136,11 @@ class BudgetOptimizer:
|
|
|
1994
2136
|
incremental_outcome() for creating budget data. new_media is calculated
|
|
1995
2137
|
assuming a constant cpm between historical spend and optimization spend.
|
|
1996
2138
|
new_reach and new_frequency are calculated by first multiplying them
|
|
1997
|
-
together and getting
|
|
1998
|
-
|
|
1999
|
-
optimal_frequency if optimal_frequency is not
|
|
2000
|
-
self._meridian.rf_tensors.frequency otherwise. new_reach is calculated
|
|
2001
|
-
|
|
2002
|
-
their respective indexes in spend.
|
|
2139
|
+
together and getting `rf_impressions`, and then calculating
|
|
2140
|
+
`new_rf_impressions` given the same formula for `new_media`. `new_frequency`
|
|
2141
|
+
is `optimal_frequency` if `optimal_frequency` is not None, and
|
|
2142
|
+
`self._meridian.rf_tensors.frequency` otherwise. `new_reach` is calculated
|
|
2143
|
+
using `new_rf_impressions / new_frequency`.
|
|
2003
2144
|
|
|
2004
2145
|
Args:
|
|
2005
2146
|
hist_spend: historical spend data.
|
|
@@ -2016,8 +2157,7 @@ class BudgetOptimizer:
|
|
|
2016
2157
|
frequency is used for the optimization scenario.
|
|
2017
2158
|
|
|
2018
2159
|
Returns:
|
|
2019
|
-
Tuple of tf.tensors (new_media,
|
|
2020
|
-
new_rf_spend).
|
|
2160
|
+
Tuple of tf.tensors (new_media, new_reach, new_frequency).
|
|
2021
2161
|
"""
|
|
2022
2162
|
new_data = new_data or analyzer.DataTensors()
|
|
2023
2163
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
@@ -2032,37 +2172,29 @@ class BudgetOptimizer:
|
|
|
2032
2172
|
)
|
|
2033
2173
|
* filled_data.media
|
|
2034
2174
|
)
|
|
2035
|
-
new_media_spend = tf.convert_to_tensor(
|
|
2036
|
-
spend[: self._meridian.n_media_channels]
|
|
2037
|
-
)
|
|
2038
2175
|
else:
|
|
2039
2176
|
new_media = None
|
|
2040
|
-
new_media_spend = None
|
|
2041
2177
|
if self._meridian.n_rf_channels > 0:
|
|
2042
|
-
|
|
2043
|
-
|
|
2178
|
+
rf_impressions = filled_data.reach * filled_data.frequency
|
|
2179
|
+
new_rf_impressions = (
|
|
2044
2180
|
tf.math.divide_no_nan(
|
|
2045
2181
|
spend[-self._meridian.n_rf_channels :],
|
|
2046
2182
|
hist_spend[-self._meridian.n_rf_channels :],
|
|
2047
2183
|
)
|
|
2048
|
-
*
|
|
2184
|
+
* rf_impressions
|
|
2049
2185
|
)
|
|
2050
2186
|
frequency = (
|
|
2051
2187
|
filled_data.frequency
|
|
2052
2188
|
if optimal_frequency is None
|
|
2053
2189
|
else optimal_frequency
|
|
2054
2190
|
)
|
|
2055
|
-
new_reach = tf.math.divide_no_nan(
|
|
2056
|
-
new_frequency = tf.math.divide_no_nan(
|
|
2057
|
-
new_rf_spend = tf.convert_to_tensor(
|
|
2058
|
-
spend[-self._meridian.n_rf_channels :]
|
|
2059
|
-
)
|
|
2191
|
+
new_reach = tf.math.divide_no_nan(new_rf_impressions, frequency)
|
|
2192
|
+
new_frequency = tf.math.divide_no_nan(new_rf_impressions, new_reach)
|
|
2060
2193
|
else:
|
|
2061
2194
|
new_reach = None
|
|
2062
2195
|
new_frequency = None
|
|
2063
|
-
new_rf_spend = None
|
|
2064
2196
|
|
|
2065
|
-
return (new_media,
|
|
2197
|
+
return (new_media, new_reach, new_frequency)
|
|
2066
2198
|
|
|
2067
2199
|
def _create_budget_dataset(
|
|
2068
2200
|
self,
|
|
@@ -2086,7 +2218,7 @@ class BudgetOptimizer:
|
|
|
2086
2218
|
)
|
|
2087
2219
|
spend_tensor = tf.convert_to_tensor(spend, dtype=tf.float32)
|
|
2088
2220
|
hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
|
|
2089
|
-
(new_media,
|
|
2221
|
+
(new_media, new_reach, new_frequency) = (
|
|
2090
2222
|
self._get_incremental_outcome_tensors(
|
|
2091
2223
|
hist_spend,
|
|
2092
2224
|
spend_tensor,
|
|
@@ -2095,22 +2227,34 @@ class BudgetOptimizer:
|
|
|
2095
2227
|
)
|
|
2096
2228
|
)
|
|
2097
2229
|
budget = np.sum(spend_tensor)
|
|
2230
|
+
inc_outcome_data = analyzer.DataTensors(
|
|
2231
|
+
media=new_media,
|
|
2232
|
+
reach=new_reach,
|
|
2233
|
+
frequency=new_frequency,
|
|
2234
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
2235
|
+
)
|
|
2098
2236
|
|
|
2099
2237
|
# incremental_outcome here is a tensor with the shape
|
|
2100
2238
|
# (n_chains, n_draws, n_channels)
|
|
2101
2239
|
incremental_outcome = self._analyzer.incremental_outcome(
|
|
2102
2240
|
use_posterior=use_posterior,
|
|
2103
|
-
new_data=
|
|
2104
|
-
media=new_media,
|
|
2105
|
-
reach=new_reach,
|
|
2106
|
-
frequency=new_frequency,
|
|
2107
|
-
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
2108
|
-
),
|
|
2241
|
+
new_data=inc_outcome_data,
|
|
2109
2242
|
selected_times=selected_times,
|
|
2110
2243
|
use_kpi=use_kpi,
|
|
2111
2244
|
batch_size=batch_size,
|
|
2112
2245
|
include_non_paid_channels=False,
|
|
2113
2246
|
)
|
|
2247
|
+
incremental_increase = 0.01
|
|
2248
|
+
mroi_numerator = self._analyzer.incremental_outcome(
|
|
2249
|
+
new_data=inc_outcome_data,
|
|
2250
|
+
selected_times=selected_times,
|
|
2251
|
+
scaling_factor0=1.0,
|
|
2252
|
+
scaling_factor1=1 + incremental_increase,
|
|
2253
|
+
use_posterior=use_posterior,
|
|
2254
|
+
use_kpi=use_kpi,
|
|
2255
|
+
batch_size=batch_size,
|
|
2256
|
+
include_non_paid_channels=False,
|
|
2257
|
+
)
|
|
2114
2258
|
# incremental_outcome_with_mean_median_and_ci here is an ndarray with the
|
|
2115
2259
|
# shape (n_channels, n_metrics) where n_metrics = 4 for (mean, median,
|
|
2116
2260
|
# ci_lo, and ci_hi)
|
|
@@ -2153,20 +2297,8 @@ class BudgetOptimizer:
|
|
|
2153
2297
|
include_median=True,
|
|
2154
2298
|
)
|
|
2155
2299
|
marginal_roi = analyzer.get_central_tendency_and_ci(
|
|
2156
|
-
data=
|
|
2157
|
-
|
|
2158
|
-
new_data=analyzer.DataTensors(
|
|
2159
|
-
media=new_media,
|
|
2160
|
-
reach=new_reach,
|
|
2161
|
-
frequency=new_frequency,
|
|
2162
|
-
media_spend=new_media_spend,
|
|
2163
|
-
rf_spend=new_rf_spend,
|
|
2164
|
-
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
2165
|
-
),
|
|
2166
|
-
selected_times=selected_times,
|
|
2167
|
-
batch_size=batch_size,
|
|
2168
|
-
by_reach=True,
|
|
2169
|
-
use_kpi=use_kpi,
|
|
2300
|
+
data=tf.math.divide_no_nan(
|
|
2301
|
+
mroi_numerator, spend_tensor * incremental_increase
|
|
2170
2302
|
),
|
|
2171
2303
|
confidence_level=confidence_level,
|
|
2172
2304
|
include_median=True,
|
|
@@ -2447,6 +2579,100 @@ class BudgetOptimizer:
|
|
|
2447
2579
|
)
|
|
2448
2580
|
return (spend_grid, incremental_outcome_grid)
|
|
2449
2581
|
|
|
2582
|
+
def _validate_optimization_tensors(
|
|
2583
|
+
self,
|
|
2584
|
+
cpmu: tf.Tensor | None = None,
|
|
2585
|
+
cprf: tf.Tensor | None = None,
|
|
2586
|
+
media: tf.Tensor | None = None,
|
|
2587
|
+
rf_impressions: tf.Tensor | None = None,
|
|
2588
|
+
frequency: tf.Tensor | None = None,
|
|
2589
|
+
media_spend: tf.Tensor | None = None,
|
|
2590
|
+
rf_spend: tf.Tensor | None = None,
|
|
2591
|
+
revenue_per_kpi: tf.Tensor | None = None,
|
|
2592
|
+
use_optimal_frequency: bool = True,
|
|
2593
|
+
):
|
|
2594
|
+
"""Validates the tensors needed for optimization."""
|
|
2595
|
+
if (media is not None or media_spend is not None) and cpmu is None:
|
|
2596
|
+
raise ValueError(
|
|
2597
|
+
'If `media` or `media_spend` is provided, then `cpmu` must also be'
|
|
2598
|
+
' provided.'
|
|
2599
|
+
)
|
|
2600
|
+
if (rf_impressions is not None or rf_spend is not None) and cprf is None:
|
|
2601
|
+
raise ValueError(
|
|
2602
|
+
'If `reach` and `frequency` or `rf_spend` is provided, then `cprf`'
|
|
2603
|
+
' must also be provided.'
|
|
2604
|
+
)
|
|
2605
|
+
if media is not None and media_spend is not None:
|
|
2606
|
+
raise ValueError('Only one of `media` or `media_spend` can be provided.')
|
|
2607
|
+
if rf_impressions is not None and rf_spend is not None:
|
|
2608
|
+
raise ValueError(
|
|
2609
|
+
'Only one of `rf_impressions` or `rf_spend` can be provided.'
|
|
2610
|
+
)
|
|
2611
|
+
if use_optimal_frequency and frequency is not None:
|
|
2612
|
+
raise ValueError(
|
|
2613
|
+
'If `use_optimal_frequency` is `True`, then `frequency` must not be'
|
|
2614
|
+
' provided.'
|
|
2615
|
+
)
|
|
2616
|
+
if not use_optimal_frequency and frequency is None:
|
|
2617
|
+
if rf_impressions is not None or rf_spend is not None:
|
|
2618
|
+
raise ValueError(
|
|
2619
|
+
'If `use_optimal_frequency` is `False`, then `frequency` must be'
|
|
2620
|
+
' provided.'
|
|
2621
|
+
)
|
|
2622
|
+
|
|
2623
|
+
n_geos = [
|
|
2624
|
+
t.shape[0]
|
|
2625
|
+
for t in [
|
|
2626
|
+
cpmu,
|
|
2627
|
+
cprf,
|
|
2628
|
+
media,
|
|
2629
|
+
rf_impressions,
|
|
2630
|
+
frequency,
|
|
2631
|
+
media_spend,
|
|
2632
|
+
rf_spend,
|
|
2633
|
+
]
|
|
2634
|
+
if t is not None and t.ndim == 3
|
|
2635
|
+
]
|
|
2636
|
+
if revenue_per_kpi is not None and revenue_per_kpi.ndim == 2:
|
|
2637
|
+
n_geos.append(revenue_per_kpi.shape[0])
|
|
2638
|
+
if any(n_geo != self._meridian.n_geos for n_geo in n_geos):
|
|
2639
|
+
raise ValueError(
|
|
2640
|
+
'All tensors with a geo dimension must have the same number of geos'
|
|
2641
|
+
' as in `meridian.InputData`.'
|
|
2642
|
+
)
|
|
2643
|
+
|
|
2644
|
+
def _allocate_tensor_by_population(
|
|
2645
|
+
self, tensor: tf.Tensor, required_ndim: int = 3
|
|
2646
|
+
):
|
|
2647
|
+
"""Allocates a tensor of shape (time,) or (time, channel) by the population.
|
|
2648
|
+
|
|
2649
|
+
Args:
|
|
2650
|
+
tensor: A tensor of shape (time,) or (time, channel).
|
|
2651
|
+
required_ndim: The required number of dimensions for the tensor.
|
|
2652
|
+
|
|
2653
|
+
Returns:
|
|
2654
|
+
The scaled tensor of shape (geo, time) or (geo, time, channel).
|
|
2655
|
+
"""
|
|
2656
|
+
if tensor.ndim == required_ndim:
|
|
2657
|
+
return tensor
|
|
2658
|
+
|
|
2659
|
+
if tensor.ndim != required_ndim - 1:
|
|
2660
|
+
raise ValueError(
|
|
2661
|
+
'Tensor must have 1 less than the required number of dimensions, '
|
|
2662
|
+
f'{required_ndim}, in order to be allocated by population. Found '
|
|
2663
|
+
f'{tensor.ndim} dimensions.'
|
|
2664
|
+
)
|
|
2665
|
+
|
|
2666
|
+
population = self._meridian.population
|
|
2667
|
+
normalized_population = population / tf.reduce_sum(population)
|
|
2668
|
+
if tensor.ndim == 1:
|
|
2669
|
+
reshaped_population = normalized_population[:, tf.newaxis]
|
|
2670
|
+
reshaped_tensor = tensor[tf.newaxis, :]
|
|
2671
|
+
else:
|
|
2672
|
+
reshaped_population = normalized_population[:, tf.newaxis, tf.newaxis]
|
|
2673
|
+
reshaped_tensor = tensor[tf.newaxis, :, :]
|
|
2674
|
+
return reshaped_tensor * reshaped_population
|
|
2675
|
+
|
|
2450
2676
|
|
|
2451
2677
|
def _validate_pct_of_spend(
|
|
2452
2678
|
n_channels: int,
|
|
@@ -2700,3 +2926,27 @@ def _raise_warning_if_target_constraints_not_met(
|
|
|
2700
2926
|
f' ROI is {target_mroi}, but the actual channel marginal ROIs are'
|
|
2701
2927
|
f' {optimized_mroi}.'
|
|
2702
2928
|
)
|
|
2929
|
+
|
|
2930
|
+
|
|
2931
|
+
def _expand_tensor(tensor: tf.Tensor, required_shape: tuple[int, ...]):
|
|
2932
|
+
"""Expands a tensor to the required number of dimensions."""
|
|
2933
|
+
if tensor.shape == required_shape:
|
|
2934
|
+
return tensor
|
|
2935
|
+
if tensor.ndim == 0:
|
|
2936
|
+
return tf.fill(required_shape, tensor)
|
|
2937
|
+
|
|
2938
|
+
# Tensor must be less than or equal to the required number of dimensions and
|
|
2939
|
+
# the shape must match the required shape excluding the difference in number
|
|
2940
|
+
# of dims.
|
|
2941
|
+
if tensor.ndim <= len(required_shape) and list(tensor.shape) == list(
|
|
2942
|
+
required_shape[-tensor.ndim :]
|
|
2943
|
+
):
|
|
2944
|
+
n_tile_dims = len(required_shape) - tensor.ndim
|
|
2945
|
+
repeats = list(required_shape[:n_tile_dims]) + [1] * tensor.ndim
|
|
2946
|
+
reshaped_tensor = tf.reshape(tensor, [1] * n_tile_dims + list(tensor.shape))
|
|
2947
|
+
return tf.tile(reshaped_tensor, repeats)
|
|
2948
|
+
|
|
2949
|
+
raise ValueError(
|
|
2950
|
+
f'Cannot expand tensor with shape {tensor.shape} to target'
|
|
2951
|
+
f' {required_shape}.'
|
|
2952
|
+
)
|
meridian/constants.py
CHANGED
|
@@ -63,6 +63,7 @@ CONTROLS = 'controls'
|
|
|
63
63
|
POPULATION = 'population'
|
|
64
64
|
REACH = 'reach'
|
|
65
65
|
FREQUENCY = 'frequency'
|
|
66
|
+
RF_IMPRESSIONS = 'rf_impressions'
|
|
66
67
|
RF_SPEND = 'rf_spend'
|
|
67
68
|
ORGANIC_MEDIA = 'organic_media'
|
|
68
69
|
ORGANIC_REACH = 'organic_reach'
|
|
@@ -70,6 +71,8 @@ ORGANIC_FREQUENCY = 'organic_frequency'
|
|
|
70
71
|
NON_MEDIA_TREATMENTS = 'non_media_treatments'
|
|
71
72
|
REVENUE = 'revenue'
|
|
72
73
|
NON_REVENUE = 'non_revenue'
|
|
74
|
+
CPMU = 'cpmu'
|
|
75
|
+
CPRF = 'cprf'
|
|
73
76
|
REQUIRED_INPUT_DATA_ARRAY_NAMES = (
|
|
74
77
|
KPI,
|
|
75
78
|
POPULATION,
|
|
@@ -92,6 +92,10 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
92
92
|
Returns:
|
|
93
93
|
The `DataFrameInputDataBuilder` with the added controls data.
|
|
94
94
|
"""
|
|
95
|
+
if not control_cols:
|
|
96
|
+
warnings.warn('No control columns provided. Not adding controls data.')
|
|
97
|
+
return self
|
|
98
|
+
|
|
95
99
|
controls_df = df.copy()
|
|
96
100
|
|
|
97
101
|
### Validate ###
|
|
@@ -236,6 +240,12 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
236
240
|
Returns:
|
|
237
241
|
The `DataFrameInputDataBuilder` with the added media and media spend data.
|
|
238
242
|
"""
|
|
243
|
+
if not media_cols or not media_spend_cols or not media_channels:
|
|
244
|
+
raise ValueError(
|
|
245
|
+
'`media_cols`, `media_spend_cols`, and `media_channels` must not be '
|
|
246
|
+
'empty.'
|
|
247
|
+
)
|
|
248
|
+
|
|
239
249
|
media_df = df.copy()
|
|
240
250
|
|
|
241
251
|
### Validate ###
|
|
@@ -308,6 +318,17 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
308
318
|
The `DataFrameInputDataBuilder` with the added reach, frequency, and rf
|
|
309
319
|
spend data.
|
|
310
320
|
"""
|
|
321
|
+
if (
|
|
322
|
+
not reach_cols
|
|
323
|
+
or not frequency_cols
|
|
324
|
+
or not rf_spend_cols
|
|
325
|
+
or not rf_channels
|
|
326
|
+
):
|
|
327
|
+
raise ValueError(
|
|
328
|
+
'`reach_cols`, `frequency_cols`, `rf_spend_cols`, and `rf_channels` '
|
|
329
|
+
'must not be empty.'
|
|
330
|
+
)
|
|
331
|
+
|
|
311
332
|
reach_df = df.copy()
|
|
312
333
|
|
|
313
334
|
### Validate ###
|
|
@@ -392,6 +413,9 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
392
413
|
Returns:
|
|
393
414
|
The `DataFrameInputDataBuilder` with the added organic media data.
|
|
394
415
|
"""
|
|
416
|
+
if not organic_media_cols:
|
|
417
|
+
raise ValueError('`organic_media_cols` must not be empty.')
|
|
418
|
+
|
|
395
419
|
organic_media_df = df.copy()
|
|
396
420
|
|
|
397
421
|
### Validate ###
|
|
@@ -458,6 +482,16 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
458
482
|
The `DataFrameInputDataBuilder` with the added organic reach and organic
|
|
459
483
|
frequency data.
|
|
460
484
|
"""
|
|
485
|
+
if (
|
|
486
|
+
not organic_reach_cols
|
|
487
|
+
or not organic_frequency_cols
|
|
488
|
+
or not organic_rf_channels
|
|
489
|
+
):
|
|
490
|
+
raise ValueError(
|
|
491
|
+
'`organic_reach_cols`, `organic_frequency_cols`, and'
|
|
492
|
+
' `organic_rf_channels` must not be empty.'
|
|
493
|
+
)
|
|
494
|
+
|
|
461
495
|
organic_reach_frequency_df = df.copy()
|
|
462
496
|
|
|
463
497
|
### Validate ###
|
|
@@ -526,6 +560,13 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
526
560
|
Returns:
|
|
527
561
|
The `DataFrameInputDataBuilder` with the added non-media treatments data.
|
|
528
562
|
"""
|
|
563
|
+
if not non_media_treatment_cols:
|
|
564
|
+
warnings.warn(
|
|
565
|
+
'No non-media treatment columns were provided. Not adding non-media '
|
|
566
|
+
'treatments data.'
|
|
567
|
+
)
|
|
568
|
+
return self
|
|
569
|
+
|
|
529
570
|
non_media_treatments_df = df.copy()
|
|
530
571
|
|
|
531
572
|
### Validate ###
|
|
@@ -134,7 +134,9 @@ class InputDataBuilder(abc.ABC):
|
|
|
134
134
|
if len(value) != len(set(value)):
|
|
135
135
|
raise ValueError('Geos must be unique.')
|
|
136
136
|
if self.geos is not None and set(self.geos) != set(value):
|
|
137
|
-
raise ValueError(
|
|
137
|
+
raise ValueError(
|
|
138
|
+
f'geos already set to {self.geos}. Cannot reassign to {value}.'
|
|
139
|
+
)
|
|
138
140
|
self._geos = value
|
|
139
141
|
|
|
140
142
|
@property
|
|
@@ -646,12 +648,13 @@ class InputDataBuilder(abc.ABC):
|
|
|
646
648
|
"""Normalizes the given `DataArray`'s coordinates in Meridian convention.
|
|
647
649
|
|
|
648
650
|
Validates that time values are in the conventional Meridian format and
|
|
649
|
-
that geos have national name if national.
|
|
651
|
+
that geos have national name if national. If geo coordinates are not string-
|
|
652
|
+
typed, they will be converted to strings.
|
|
650
653
|
|
|
651
654
|
Args:
|
|
652
655
|
da: The DataArray to normalize.
|
|
653
|
-
time_dimension_name: The name of the time dimension. If None, the
|
|
654
|
-
|
|
656
|
+
time_dimension_name: The name of the time dimension. If None, the will
|
|
657
|
+
skip time normalization.
|
|
655
658
|
|
|
656
659
|
Returns:
|
|
657
660
|
The normalized DataArray.
|
|
@@ -686,6 +689,11 @@ class InputDataBuilder(abc.ABC):
|
|
|
686
689
|
da = da.assign_coords(
|
|
687
690
|
{constants.GEO: [constants.NATIONAL_MODEL_DEFAULT_GEO_NAME]},
|
|
688
691
|
)
|
|
692
|
+
else:
|
|
693
|
+
da = da.assign_coords(
|
|
694
|
+
{constants.GEO: da.coords[constants.GEO].astype(str)}
|
|
695
|
+
)
|
|
696
|
+
|
|
689
697
|
return da
|
|
690
698
|
|
|
691
699
|
def _validate_set(self, component: str, da: xr.DataArray):
|