google-meridian 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/METADATA +2 -2
- {google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/RECORD +18 -18
- {google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/WHEEL +1 -1
- meridian/__init__.py +1 -1
- meridian/analysis/analyzer.py +491 -338
- meridian/analysis/optimizer.py +712 -299
- meridian/analysis/summarizer.py +40 -4
- meridian/analysis/summary_text.py +20 -1
- meridian/analysis/templates/chart.html.jinja +1 -0
- meridian/analysis/test_utils.py +47 -99
- meridian/analysis/visualizer.py +455 -85
- meridian/constants.py +32 -0
- meridian/data/input_data.py +49 -5
- meridian/model/model.py +20 -4
- meridian/model/posterior_sampler.py +15 -5
- meridian/model/prior_distribution.py +22 -1
- {google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/top_level.txt +0 -0
meridian/analysis/optimizer.py
CHANGED
|
@@ -28,6 +28,7 @@ from meridian import constants as c
|
|
|
28
28
|
from meridian.analysis import analyzer
|
|
29
29
|
from meridian.analysis import formatter
|
|
30
30
|
from meridian.analysis import summary_text
|
|
31
|
+
from meridian.data import time_coordinates as tc
|
|
31
32
|
from meridian.model import model
|
|
32
33
|
import numpy as np
|
|
33
34
|
import pandas as pd
|
|
@@ -37,6 +38,7 @@ import xarray as xr
|
|
|
37
38
|
|
|
38
39
|
__all__ = [
|
|
39
40
|
'BudgetOptimizer',
|
|
41
|
+
'OptimizationGrid',
|
|
40
42
|
'OptimizationResults',
|
|
41
43
|
]
|
|
42
44
|
|
|
@@ -92,10 +94,14 @@ class OptimizationGrid:
|
|
|
92
94
|
Attributes:
|
|
93
95
|
historical_spend: ndarray of shape `(n_paid_channels,)` containing
|
|
94
96
|
aggregated historical spend allocation for spend for all media and RF
|
|
95
|
-
channels.
|
|
97
|
+
channels.
|
|
96
98
|
use_kpi: Whether using generic KPI or revenue.
|
|
97
99
|
use_posterior: Whether posterior distributions were used, or prior.
|
|
98
100
|
use_optimal_frequency: Whether optimal frequency was used.
|
|
101
|
+
gtol: Float indicating the acceptable relative error for the budget used in
|
|
102
|
+
the grid setup. The budget is rounded by `10*n`, where `n` is the smallest
|
|
103
|
+
integer such that `(budget - rounded_budget)` is less than or equal to
|
|
104
|
+
`(budget * gtol)`.
|
|
99
105
|
round_factor: The round factor used for the optimization grid.
|
|
100
106
|
optimal_frequency: Optional ndarray of shape `(n_paid_channels,)`,
|
|
101
107
|
containing the optimal frequency per channel. Value is `None` if the model
|
|
@@ -111,9 +117,10 @@ class OptimizationGrid:
|
|
|
111
117
|
use_kpi: bool
|
|
112
118
|
use_posterior: bool
|
|
113
119
|
use_optimal_frequency: bool
|
|
120
|
+
gtol: float
|
|
114
121
|
round_factor: int
|
|
115
122
|
optimal_frequency: np.ndarray | None
|
|
116
|
-
selected_times:
|
|
123
|
+
selected_times: Sequence[str] | Sequence[bool] | None
|
|
117
124
|
|
|
118
125
|
@property
|
|
119
126
|
def grid_dataset(self) -> xr.Dataset:
|
|
@@ -142,35 +149,149 @@ class OptimizationGrid:
|
|
|
142
149
|
"""The spend step size."""
|
|
143
150
|
return self.grid_dataset.attrs[c.SPEND_STEP_SIZE]
|
|
144
151
|
|
|
145
|
-
|
|
152
|
+
@property
|
|
153
|
+
def channels(self) -> list[str]:
|
|
154
|
+
"""The spend channels in the grid."""
|
|
155
|
+
return self.grid_dataset.channel.data.tolist()
|
|
156
|
+
|
|
146
157
|
def optimize(
|
|
147
158
|
self,
|
|
148
159
|
scenario: FixedBudgetScenario | FlexibleBudgetScenario,
|
|
160
|
+
pct_of_spend: Sequence[float] | None = None,
|
|
161
|
+
spend_constraint_lower: _SpendConstraint | None = None,
|
|
162
|
+
spend_constraint_upper: _SpendConstraint | None = None,
|
|
163
|
+
) -> xr.Dataset:
|
|
164
|
+
"""Finds the optimal budget allocation that maximizes outcome.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
scenario: The optimization scenario with corresponding parameters.
|
|
168
|
+
pct_of_spend: Numeric list of size `channels` containing the percentage
|
|
169
|
+
allocation for spend for all channels. The values must be between 0-1,
|
|
170
|
+
summing to 1. By default, the historical allocation is used. Budget and
|
|
171
|
+
allocation are used in conjunction to determine the non-optimized
|
|
172
|
+
media-level spend, which is used to calculate the non-optimized
|
|
173
|
+
performance metrics (for example, ROI) and construct the feasible range
|
|
174
|
+
of media-level spend with the spend constraints.
|
|
175
|
+
spend_constraint_lower: Numeric list of size `channels` or float (same
|
|
176
|
+
constraint for all channels) indicating the lower bound of media-level
|
|
177
|
+
spend. If given as a channel-indexed array, the order must match
|
|
178
|
+
`channels`. The lower bound of media-level spend is `(1 -
|
|
179
|
+
spend_constraint_lower) * budget * allocation)`. The value must be
|
|
180
|
+
between 0-1. Defaults to `0.3` for fixed budget and `1` for flexible.
|
|
181
|
+
spend_constraint_upper: Numeric list of size `channels` or float (same
|
|
182
|
+
constraint for all channels) indicating the upper bound of media-level
|
|
183
|
+
spend. If given as a channel-indexed array, the order must match
|
|
184
|
+
`channels`. The upper bound of media-level spend is `(1 +
|
|
185
|
+
spend_constraint_upper) * budget * allocation)`. Defaults to `0.3` for
|
|
186
|
+
fixed budget and `1` for flexible.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
An xarray Dataset with `channel` as the coordinate and the following data
|
|
190
|
+
variables:
|
|
191
|
+
* `optimized`: media spend that maximizes incremental outcome based
|
|
192
|
+
on spend constraints for all media and RF channels.
|
|
193
|
+
* `non_optimized`: Channel-level spend.
|
|
194
|
+
|
|
195
|
+
Raises:
|
|
196
|
+
A warning if the budget's rounding should be different from the grid's
|
|
197
|
+
round factor.'.
|
|
198
|
+
ValueError: If spend allocation is not within the grid coverage.
|
|
199
|
+
"""
|
|
200
|
+
total_budget = (
|
|
201
|
+
scenario.total_budget
|
|
202
|
+
if isinstance(scenario, FixedBudgetScenario)
|
|
203
|
+
else None
|
|
204
|
+
)
|
|
205
|
+
budget = total_budget or np.sum(self.historical_spend)
|
|
206
|
+
valid_pct_of_spend = _validate_pct_of_spend(
|
|
207
|
+
n_channels=len(self.channels),
|
|
208
|
+
hist_spend=self.historical_spend,
|
|
209
|
+
pct_of_spend=pct_of_spend,
|
|
210
|
+
)
|
|
211
|
+
spend = budget * valid_pct_of_spend
|
|
212
|
+
spend_constraint_default = (
|
|
213
|
+
c.SPEND_CONSTRAINT_DEFAULT_FIXED_BUDGET
|
|
214
|
+
if isinstance(scenario, FixedBudgetScenario)
|
|
215
|
+
else c.SPEND_CONSTRAINT_DEFAULT_FLEXIBLE_BUDGET
|
|
216
|
+
)
|
|
217
|
+
if spend_constraint_lower is None:
|
|
218
|
+
spend_constraint_lower = spend_constraint_default
|
|
219
|
+
if spend_constraint_upper is None:
|
|
220
|
+
spend_constraint_upper = spend_constraint_default
|
|
221
|
+
(optimization_lower_bound, optimization_upper_bound) = (
|
|
222
|
+
_get_optimization_bounds(
|
|
223
|
+
n_channels=len(self.channels),
|
|
224
|
+
spend=spend,
|
|
225
|
+
round_factor=self.round_factor,
|
|
226
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
227
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
self._check_optimization_bounds(
|
|
231
|
+
lower_bound=optimization_lower_bound,
|
|
232
|
+
upper_bound=optimization_upper_bound,
|
|
233
|
+
)
|
|
234
|
+
round_factor = _get_round_factor(budget, self.gtol)
|
|
235
|
+
if round_factor != self.round_factor:
|
|
236
|
+
warnings.warn(
|
|
237
|
+
'Optimization accuracy may suffer owing to budget level differences.'
|
|
238
|
+
' Consider creating a new grid with smaller `gtol` if you intend to'
|
|
239
|
+
" shrink budgets significantly. It's only a problem when you use a"
|
|
240
|
+
' smaller budget, for which the intended step size is meant to be'
|
|
241
|
+
' smaller for one or more channels.'
|
|
242
|
+
)
|
|
243
|
+
(spend_grid, incremental_outcome_grid) = self._trim_grid(
|
|
244
|
+
spend_bound_lower=optimization_lower_bound,
|
|
245
|
+
spend_bound_upper=optimization_upper_bound,
|
|
246
|
+
)
|
|
247
|
+
if isinstance(scenario, FixedBudgetScenario):
|
|
248
|
+
rounded_spend = np.round(spend, self.round_factor)
|
|
249
|
+
scenario = dataclasses.replace(
|
|
250
|
+
scenario, total_budget=np.sum(rounded_spend)
|
|
251
|
+
)
|
|
252
|
+
optimal_spend = self._grid_search(
|
|
253
|
+
spend_grid=spend_grid,
|
|
254
|
+
incremental_outcome_grid=incremental_outcome_grid,
|
|
255
|
+
scenario=scenario,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
return xr.Dataset(
|
|
259
|
+
coords={c.CHANNEL: self.channels},
|
|
260
|
+
data_vars={
|
|
261
|
+
c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
|
|
262
|
+
c.NON_OPTIMIZED: ([c.CHANNEL], spend),
|
|
263
|
+
},
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def _grid_search(
|
|
267
|
+
self,
|
|
268
|
+
spend_grid: np.ndarray,
|
|
269
|
+
incremental_outcome_grid: np.ndarray,
|
|
270
|
+
scenario: FixedBudgetScenario | FlexibleBudgetScenario,
|
|
149
271
|
) -> np.ndarray:
|
|
150
272
|
"""Hill-climbing search algorithm for budget optimization.
|
|
151
273
|
|
|
152
274
|
Args:
|
|
275
|
+
spend_grid: Discrete grid with dimensions (`grid_length` x
|
|
276
|
+
`n_total_channels`) containing spend by channel for all media and RF
|
|
277
|
+
channels, used in the hill-climbing search algorithm.
|
|
278
|
+
incremental_outcome_grid: Discrete grid with dimensions (`grid_length` x
|
|
279
|
+
`n_total_channels`) containing incremental outcome by channel for all
|
|
280
|
+
media and RF channels, used in the hill-climbing search algorithm.
|
|
153
281
|
scenario: The optimization scenario with corresponding parameters.
|
|
154
282
|
|
|
155
283
|
Returns:
|
|
156
|
-
optimal_spend: `np.ndarray`
|
|
157
|
-
media spend that maximizes incremental outcome based on spend
|
|
284
|
+
optimal_spend: `np.ndarray` of dimension (`n_total_channels`) containing
|
|
285
|
+
the media spend that maximizes incremental outcome based on spend
|
|
158
286
|
constraints for all media and RF channels.
|
|
287
|
+
optimal_inc_outcome: `np.ndarray` of dimension (`n_total_channels`)
|
|
288
|
+
containing the post optimization incremental outcome per channel for all
|
|
289
|
+
media and RF channels.
|
|
159
290
|
"""
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
rounded_spend = np.round(self.historical_spend, self.round_factor).astype(
|
|
165
|
-
int
|
|
166
|
-
)
|
|
167
|
-
budget = np.sum(rounded_spend)
|
|
168
|
-
scenario = dataclasses.replace(scenario, total_budget=budget)
|
|
169
|
-
|
|
170
|
-
spend = self.spend_grid[0, :].copy()
|
|
171
|
-
incremental_outcome = self.incremental_outcome_grid[0, :].copy()
|
|
172
|
-
spend_grid = self.spend_grid[1:, :]
|
|
173
|
-
incremental_outcome_grid = self.incremental_outcome_grid[1:, :]
|
|
291
|
+
spend = spend_grid[0, :].copy()
|
|
292
|
+
incremental_outcome = incremental_outcome_grid[0, :].copy()
|
|
293
|
+
spend_grid = spend_grid[1:, :]
|
|
294
|
+
incremental_outcome_grid = incremental_outcome_grid[1:, :]
|
|
174
295
|
iterative_roi_grid = np.round(
|
|
175
296
|
tf.math.divide_no_nan(
|
|
176
297
|
incremental_outcome_grid - incremental_outcome, spend_grid - spend
|
|
@@ -211,9 +332,97 @@ class OptimizationGrid:
|
|
|
211
332
|
),
|
|
212
333
|
decimals=8,
|
|
213
334
|
)
|
|
214
|
-
|
|
215
335
|
return spend_optimal
|
|
216
336
|
|
|
337
|
+
def _trim_grid(
|
|
338
|
+
self,
|
|
339
|
+
spend_bound_lower: np.ndarray,
|
|
340
|
+
spend_bound_upper: np.ndarray,
|
|
341
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
342
|
+
"""Trim the grids based on a more restricted spend bound.
|
|
343
|
+
|
|
344
|
+
It is assumed that spend bounds are validated: their values are within the
|
|
345
|
+
grid coverage and they are rounded using this grid's round factor.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
spend_bound_lower: The lower bound of spend for each channel.
|
|
349
|
+
spend_bound_upper: The upper bound of spend for each channel.
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
updated_spend: The updated spend grid with valid spend values moved up to
|
|
353
|
+
the first row and invalid spend values filled with NaN.
|
|
354
|
+
updated_incremental_outcome: The updated incremental outcome grid with the
|
|
355
|
+
corresponding incremental outcome values moved up to the first row and
|
|
356
|
+
invalid incremental outcome values filled with NaN.
|
|
357
|
+
"""
|
|
358
|
+
spend_grid = self.spend_grid
|
|
359
|
+
updated_spend = self.spend_grid.copy()
|
|
360
|
+
updated_incremental_outcome = self.incremental_outcome_grid.copy()
|
|
361
|
+
|
|
362
|
+
for ch in range(len(self.channels)):
|
|
363
|
+
valid_indices = np.where(
|
|
364
|
+
(spend_grid[:, ch] >= spend_bound_lower[ch])
|
|
365
|
+
& (spend_grid[:, ch] <= spend_bound_upper[ch])
|
|
366
|
+
)[0]
|
|
367
|
+
first_valid_index = valid_indices[0]
|
|
368
|
+
last_valid_index = valid_indices[-1]
|
|
369
|
+
|
|
370
|
+
# Move the smallest spend to the first row.
|
|
371
|
+
updated_spend[:, ch] = np.roll(
|
|
372
|
+
updated_spend[:, ch], shift=-first_valid_index
|
|
373
|
+
)
|
|
374
|
+
# Move the corresponding incremental outcome to the first row.
|
|
375
|
+
updated_incremental_outcome[:, ch] = np.roll(
|
|
376
|
+
updated_incremental_outcome[:, ch], shift=-first_valid_index
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Fill the invalid indices with NaN.
|
|
380
|
+
nan_indices = last_valid_index - first_valid_index + 1
|
|
381
|
+
updated_spend[nan_indices:, ch] = np.nan
|
|
382
|
+
updated_incremental_outcome[nan_indices:, ch] = np.nan
|
|
383
|
+
|
|
384
|
+
return (updated_spend, updated_incremental_outcome)
|
|
385
|
+
|
|
386
|
+
def _check_optimization_bounds(
|
|
387
|
+
self,
|
|
388
|
+
lower_bound: np.ndarray,
|
|
389
|
+
upper_bound: np.ndarray,
|
|
390
|
+
) -> None:
|
|
391
|
+
"""Checks if the spend grid fits within the optimization bounds.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
lower_bound: `np.ndarray` of shape `(n_channels,)` containing the lower
|
|
395
|
+
bound for each channel.
|
|
396
|
+
upper_bound: `np.ndarray` of shape `(n_channels,)` containing the upper
|
|
397
|
+
bound for each channel.
|
|
398
|
+
|
|
399
|
+
Raises:
|
|
400
|
+
ValueError: If the spend grid does not fit within the optimization bounds.
|
|
401
|
+
"""
|
|
402
|
+
min_spend = np.min(self.spend_grid, axis=0)
|
|
403
|
+
max_spend = np.max(self.spend_grid, axis=0)
|
|
404
|
+
errors = []
|
|
405
|
+
for i, channel_min_spend in enumerate(min_spend.data):
|
|
406
|
+
if lower_bound[i] < channel_min_spend:
|
|
407
|
+
errors.append(
|
|
408
|
+
f'Lower bound {lower_bound[i]} for channel'
|
|
409
|
+
f' {self.channels[i]} is below the mimimum spend of the grid'
|
|
410
|
+
f' {channel_min_spend}.'
|
|
411
|
+
)
|
|
412
|
+
for i, channel_max_spend in enumerate(max_spend.data):
|
|
413
|
+
if upper_bound[i] > channel_max_spend:
|
|
414
|
+
errors.append(
|
|
415
|
+
f'Upper bound {upper_bound[i]} for channel'
|
|
416
|
+
f' {self.channels[i]} is above the maximum spend of the grid'
|
|
417
|
+
f' {channel_max_spend}.'
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
if errors:
|
|
421
|
+
raise ValueError(
|
|
422
|
+
'Spend allocation is not within the grid coverage:\n'
|
|
423
|
+
+ '\n'.join(errors)
|
|
424
|
+
)
|
|
425
|
+
|
|
217
426
|
|
|
218
427
|
@dataclasses.dataclass(frozen=True)
|
|
219
428
|
class OptimizationResults:
|
|
@@ -413,7 +622,7 @@ class OptimizationResults:
|
|
|
413
622
|
# by adjusting the domain of the y-axis so that the incremental outcome does
|
|
414
623
|
# not start at 0. Calculate the total decrease in incremental outcome to pad
|
|
415
624
|
# the y-axis from the non-optimized total incremental outcome value.
|
|
416
|
-
sum_decr =
|
|
625
|
+
sum_decr = df[df.incremental_outcome < 0].incremental_outcome.sum()
|
|
417
626
|
y_padding = float(f'1e{int(math.log10(-sum_decr))}') if sum_decr < 0 else 2
|
|
418
627
|
domain_scale = [
|
|
419
628
|
self.nonoptimized_data.total_incremental_outcome + sum_decr - y_padding,
|
|
@@ -490,7 +699,7 @@ class OptimizationResults:
|
|
|
490
699
|
title=formatter.custom_title_params(
|
|
491
700
|
summary_text.SPEND_ALLOCATION_CHART_TITLE
|
|
492
701
|
),
|
|
493
|
-
width=c.VEGALITE_FACET_DEFAULT_WIDTH
|
|
702
|
+
width=c.VEGALITE_FACET_DEFAULT_WIDTH,
|
|
494
703
|
)
|
|
495
704
|
)
|
|
496
705
|
|
|
@@ -698,6 +907,7 @@ class OptimizationResults:
|
|
|
698
907
|
use_posterior=self.optimization_grid.use_posterior,
|
|
699
908
|
selected_times=selected_times,
|
|
700
909
|
by_reach=True,
|
|
910
|
+
use_kpi=not self.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
|
|
701
911
|
use_optimal_frequency=self.optimization_grid.use_optimal_frequency,
|
|
702
912
|
)
|
|
703
913
|
|
|
@@ -807,8 +1017,16 @@ class OptimizationResults:
|
|
|
807
1017
|
|
|
808
1018
|
def _gen_optimization_summary(self) -> str:
|
|
809
1019
|
"""Generates HTML optimization summary output (as sanitized content str)."""
|
|
810
|
-
|
|
811
|
-
self.template_env.globals[c.
|
|
1020
|
+
start_date = tc.normalize_date(self.optimized_data.start_date)
|
|
1021
|
+
self.template_env.globals[c.START_DATE] = start_date.strftime(
|
|
1022
|
+
f'%b {start_date.day}, %Y'
|
|
1023
|
+
)
|
|
1024
|
+
interval_days = self.meridian.input_data.time_coordinates.interval_days
|
|
1025
|
+
end_date = tc.normalize_date(self.optimized_data.end_date)
|
|
1026
|
+
end_date_adjusted = end_date + pd.Timedelta(days=interval_days)
|
|
1027
|
+
self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
|
|
1028
|
+
f'%b {end_date_adjusted.day}, %Y'
|
|
1029
|
+
)
|
|
812
1030
|
|
|
813
1031
|
html_template = self.template_env.get_template('summary.html.jinja')
|
|
814
1032
|
return html_template.render(
|
|
@@ -1056,6 +1274,7 @@ class BudgetOptimizer:
|
|
|
1056
1274
|
|
|
1057
1275
|
def optimize(
|
|
1058
1276
|
self,
|
|
1277
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1059
1278
|
use_posterior: bool = True,
|
|
1060
1279
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1061
1280
|
fixed_budget: bool = True,
|
|
@@ -1073,18 +1292,50 @@ class BudgetOptimizer:
|
|
|
1073
1292
|
) -> OptimizationResults:
|
|
1074
1293
|
"""Finds the optimal budget allocation that maximizes outcome.
|
|
1075
1294
|
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1295
|
+
Optimization depends on the following:
|
|
1296
|
+
1. Flighting pattern (the relative allocation of a channels' media units
|
|
1297
|
+
across geos and time periods, which is held fixed for each channel)
|
|
1298
|
+
2. Cost per media unit (This is assumed to be constant for each channel, and
|
|
1299
|
+
can optionally vary by geo and/or time period)
|
|
1300
|
+
3. `pct_of_spend` (center of the spend box constraint for each channel)
|
|
1301
|
+
4. `budget` (total budget used for fixed budget scenarios)
|
|
1302
|
+
|
|
1303
|
+
By default, these values are assigned based on the historical data. The
|
|
1304
|
+
`pct_of_spend` and `budget` are optimization arguments that can be
|
|
1305
|
+
overridden directly. Passing `new_data.media` (or `new_data.reach` or
|
|
1306
|
+
`new_data.frequency`) will override both the flighting pattern and cost per
|
|
1307
|
+
media unit. Passing `new_data.spend` (or `new_data.rf_spend) will only
|
|
1308
|
+
override the cost per media unit.
|
|
1309
|
+
|
|
1310
|
+
If `new_data` is passed with a different number of time periods than the
|
|
1311
|
+
historical data, then all of the optimization parameters will be inferred
|
|
1312
|
+
from it. Default values for `pct_of_spend` and `budget` (if
|
|
1313
|
+
`fixed_budget=True`) will be inferred from the `new_data`, but can be
|
|
1314
|
+
overridden using the `pct_of_spend` and `budget` arguments.
|
|
1315
|
+
|
|
1316
|
+
If `selected_times` is specified, then the default values are inferred based
|
|
1317
|
+
on the subset of time periods specified.
|
|
1079
1318
|
|
|
1080
1319
|
Args:
|
|
1320
|
+
new_data: An optional `DataTensors` container with optional tensors:
|
|
1321
|
+
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
1322
|
+
`revenue_per_kpi`, and `time`. If `None`, the original tensors from the
|
|
1323
|
+
Meridian object are used. If `new_data` is provided, the optimization is
|
|
1324
|
+
run on the versions of the tensors in `new_data` and the original
|
|
1325
|
+
versions of all the remaining tensors. If any of the tensors in
|
|
1326
|
+
`new_data` is provided with a different number of time periods than in
|
|
1327
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
1328
|
+
time periods and the `time` tensor must be provided.
|
|
1081
1329
|
use_posterior: Boolean. If `True`, then the budget is optimized based on
|
|
1082
1330
|
the posterior distribution of the model. Otherwise, the prior
|
|
1083
1331
|
distribution is used.
|
|
1084
1332
|
selected_times: Tuple containing the start and end time dimension
|
|
1085
1333
|
coordinates for the duration to run the optimization on. Selected time
|
|
1086
1334
|
values should align with the Meridian time dimension coordinates in the
|
|
1087
|
-
underlying model
|
|
1335
|
+
underlying model if optimizing the original data. If `new_data` is
|
|
1336
|
+
provided with a different number of time periods than in `InputData`,
|
|
1337
|
+
then the start and end time coordinates must match the time dimensions
|
|
1338
|
+
in `new_data.time`. By default, all times periods are used. Either start
|
|
1088
1339
|
or end time component can be `None` to represent the first or the last
|
|
1089
1340
|
time coordinate, respectively.
|
|
1090
1341
|
fixed_budget: Boolean indicating whether it's a fixed budget optimization
|
|
@@ -1101,7 +1352,7 @@ class BudgetOptimizer:
|
|
|
1101
1352
|
performance metrics (for example, ROI) and construct the feasible range
|
|
1102
1353
|
of media-level spend with the spend constraints. Consider using
|
|
1103
1354
|
`InputData.get_paid_channels_argument_builder()` to construct this
|
|
1104
|
-
argument.
|
|
1355
|
+
argument. If using `new_data`, this argument is ignored.
|
|
1105
1356
|
spend_constraint_lower: Numeric list of size `n_paid_channels` or float
|
|
1106
1357
|
(same constraint for all channels) indicating the lower bound of
|
|
1107
1358
|
media-level spend. If given as a channel-indexed array, the order must
|
|
@@ -1149,66 +1400,30 @@ class BudgetOptimizer:
|
|
|
1149
1400
|
target_roi=target_roi,
|
|
1150
1401
|
target_mroi=target_mroi,
|
|
1151
1402
|
)
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
start_date=start_date,
|
|
1157
|
-
end_date=end_date,
|
|
1158
|
-
)
|
|
1159
|
-
else:
|
|
1160
|
-
selected_time_dims = None
|
|
1161
|
-
hist_spend = self._analyzer.get_historical_spend(
|
|
1162
|
-
selected_time_dims,
|
|
1163
|
-
include_media=self._meridian.n_media_channels > 0,
|
|
1164
|
-
include_rf=self._meridian.n_rf_channels > 0,
|
|
1165
|
-
).data
|
|
1166
|
-
|
|
1167
|
-
use_historical_budget = budget is None or round(budget) == round(
|
|
1168
|
-
np.sum(hist_spend)
|
|
1169
|
-
)
|
|
1170
|
-
budget = budget or np.sum(hist_spend)
|
|
1171
|
-
pct_of_spend = self._validate_pct_of_spend(hist_spend, pct_of_spend)
|
|
1172
|
-
spend = budget * pct_of_spend
|
|
1173
|
-
round_factor = _get_round_factor(budget, gtol)
|
|
1174
|
-
rounded_spend = np.round(spend, round_factor).astype(int)
|
|
1175
|
-
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
1176
|
-
optimal_frequency = tf.convert_to_tensor(
|
|
1177
|
-
self._analyzer.optimal_freq(
|
|
1178
|
-
use_posterior=use_posterior,
|
|
1179
|
-
selected_times=selected_time_dims,
|
|
1180
|
-
use_kpi=use_kpi,
|
|
1181
|
-
).optimal_frequency,
|
|
1182
|
-
dtype=tf.float32,
|
|
1183
|
-
)
|
|
1184
|
-
else:
|
|
1185
|
-
optimal_frequency = None
|
|
1186
|
-
|
|
1187
|
-
(optimization_lower_bound, optimization_upper_bound, spend_bounds) = (
|
|
1188
|
-
self._get_optimization_bounds(
|
|
1189
|
-
spend=rounded_spend,
|
|
1190
|
-
spend_constraint_lower=spend_constraint_lower,
|
|
1191
|
-
spend_constraint_upper=spend_constraint_upper,
|
|
1192
|
-
round_factor=round_factor,
|
|
1193
|
-
fixed_budget=fixed_budget,
|
|
1194
|
-
)
|
|
1403
|
+
spend_constraint_default = (
|
|
1404
|
+
c.SPEND_CONSTRAINT_DEFAULT_FIXED_BUDGET
|
|
1405
|
+
if fixed_budget
|
|
1406
|
+
else c.SPEND_CONSTRAINT_DEFAULT_FLEXIBLE_BUDGET
|
|
1195
1407
|
)
|
|
1408
|
+
if spend_constraint_lower is None:
|
|
1409
|
+
spend_constraint_lower = spend_constraint_default
|
|
1410
|
+
if spend_constraint_upper is None:
|
|
1411
|
+
spend_constraint_upper = spend_constraint_default
|
|
1196
1412
|
optimization_grid = self.create_optimization_grid(
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1413
|
+
new_data=new_data,
|
|
1414
|
+
selected_times=selected_times,
|
|
1415
|
+
budget=budget,
|
|
1416
|
+
pct_of_spend=pct_of_spend,
|
|
1417
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1418
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1419
|
+
gtol=gtol,
|
|
1202
1420
|
use_posterior=use_posterior,
|
|
1203
1421
|
use_kpi=use_kpi,
|
|
1204
1422
|
use_optimal_frequency=use_optimal_frequency,
|
|
1205
|
-
optimal_frequency=optimal_frequency,
|
|
1206
1423
|
batch_size=batch_size,
|
|
1207
1424
|
)
|
|
1208
|
-
|
|
1209
1425
|
if fixed_budget:
|
|
1210
|
-
|
|
1211
|
-
scenario = FixedBudgetScenario(total_budget=total_budget)
|
|
1426
|
+
scenario = FixedBudgetScenario(total_budget=budget)
|
|
1212
1427
|
elif target_roi:
|
|
1213
1428
|
scenario = FlexibleBudgetScenario(
|
|
1214
1429
|
target_metric=c.ROI, target_value=target_roi
|
|
@@ -1217,27 +1432,38 @@ class BudgetOptimizer:
|
|
|
1217
1432
|
scenario = FlexibleBudgetScenario(
|
|
1218
1433
|
target_metric=c.MROI, target_value=target_mroi
|
|
1219
1434
|
)
|
|
1220
|
-
|
|
1221
|
-
optimal_spend = optimization_grid.optimize(
|
|
1435
|
+
spend = optimization_grid.optimize(
|
|
1222
1436
|
scenario=scenario,
|
|
1437
|
+
pct_of_spend=pct_of_spend,
|
|
1438
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1439
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1440
|
+
)
|
|
1441
|
+
|
|
1442
|
+
use_historical_budget = budget is None or np.isclose(
|
|
1443
|
+
budget, np.sum(optimization_grid.historical_spend)
|
|
1223
1444
|
)
|
|
1445
|
+
rounded_spend = np.round(
|
|
1446
|
+
spend.non_optimized, optimization_grid.round_factor
|
|
1447
|
+
).astype(int)
|
|
1224
1448
|
nonoptimized_data = self._create_budget_dataset(
|
|
1449
|
+
new_data=new_data,
|
|
1225
1450
|
use_posterior=use_posterior,
|
|
1226
1451
|
use_kpi=use_kpi,
|
|
1227
|
-
hist_spend=
|
|
1452
|
+
hist_spend=optimization_grid.historical_spend,
|
|
1228
1453
|
spend=rounded_spend,
|
|
1229
|
-
selected_times=
|
|
1454
|
+
selected_times=optimization_grid.selected_times,
|
|
1230
1455
|
confidence_level=confidence_level,
|
|
1231
1456
|
batch_size=batch_size,
|
|
1232
1457
|
use_historical_budget=use_historical_budget,
|
|
1233
1458
|
)
|
|
1234
1459
|
nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
|
|
1460
|
+
new_data=new_data,
|
|
1235
1461
|
use_posterior=use_posterior,
|
|
1236
1462
|
use_kpi=use_kpi,
|
|
1237
|
-
hist_spend=
|
|
1463
|
+
hist_spend=optimization_grid.historical_spend,
|
|
1238
1464
|
spend=rounded_spend,
|
|
1239
|
-
selected_times=
|
|
1240
|
-
optimal_frequency=optimal_frequency,
|
|
1465
|
+
selected_times=optimization_grid.selected_times,
|
|
1466
|
+
optimal_frequency=optimization_grid.optimal_frequency,
|
|
1241
1467
|
confidence_level=confidence_level,
|
|
1242
1468
|
batch_size=batch_size,
|
|
1243
1469
|
use_historical_budget=use_historical_budget,
|
|
@@ -1250,12 +1476,13 @@ class BudgetOptimizer:
|
|
|
1250
1476
|
elif target_mroi:
|
|
1251
1477
|
constraints[c.TARGET_MROI] = target_mroi
|
|
1252
1478
|
optimized_data = self._create_budget_dataset(
|
|
1479
|
+
new_data=new_data,
|
|
1253
1480
|
use_posterior=use_posterior,
|
|
1254
1481
|
use_kpi=use_kpi,
|
|
1255
|
-
hist_spend=
|
|
1256
|
-
spend=
|
|
1257
|
-
selected_times=
|
|
1258
|
-
optimal_frequency=optimal_frequency,
|
|
1482
|
+
hist_spend=optimization_grid.historical_spend,
|
|
1483
|
+
spend=spend.optimized,
|
|
1484
|
+
selected_times=optimization_grid.selected_times,
|
|
1485
|
+
optimal_frequency=optimization_grid.optimal_frequency,
|
|
1259
1486
|
attrs=constraints,
|
|
1260
1487
|
confidence_level=confidence_level,
|
|
1261
1488
|
batch_size=batch_size,
|
|
@@ -1263,17 +1490,23 @@ class BudgetOptimizer:
|
|
|
1263
1490
|
)
|
|
1264
1491
|
|
|
1265
1492
|
if not fixed_budget:
|
|
1266
|
-
|
|
1493
|
+
_raise_warning_if_target_constraints_not_met(
|
|
1267
1494
|
target_roi=target_roi,
|
|
1268
1495
|
target_mroi=target_mroi,
|
|
1269
1496
|
optimized_data=optimized_data,
|
|
1270
1497
|
)
|
|
1271
1498
|
|
|
1272
1499
|
spend_ratio = np.divide(
|
|
1273
|
-
spend,
|
|
1274
|
-
|
|
1275
|
-
out=np.zeros_like(
|
|
1276
|
-
where=
|
|
1500
|
+
spend.non_optimized,
|
|
1501
|
+
optimization_grid.historical_spend,
|
|
1502
|
+
out=np.zeros_like(optimization_grid.historical_spend, dtype=float),
|
|
1503
|
+
where=optimization_grid.historical_spend != 0,
|
|
1504
|
+
)
|
|
1505
|
+
n_paid_channels = len(self._meridian.input_data.get_all_paid_channels())
|
|
1506
|
+
spend_bounds = _get_spend_bounds(
|
|
1507
|
+
n_channels=n_paid_channels,
|
|
1508
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1509
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1277
1510
|
)
|
|
1278
1511
|
|
|
1279
1512
|
return OptimizationResults(
|
|
@@ -1287,71 +1520,81 @@ class BudgetOptimizer:
|
|
|
1287
1520
|
_optimization_grid=optimization_grid,
|
|
1288
1521
|
)
|
|
1289
1522
|
|
|
1290
|
-
def _raise_warning_if_target_constraints_not_met(
|
|
1291
|
-
self,
|
|
1292
|
-
target_roi: float | None,
|
|
1293
|
-
target_mroi: float | None,
|
|
1294
|
-
optimized_data: xr.Dataset,
|
|
1295
|
-
) -> None:
|
|
1296
|
-
"""Raises a warning if the target constraints are not met."""
|
|
1297
|
-
if target_roi:
|
|
1298
|
-
# Total ROI is a scalar value.
|
|
1299
|
-
optimized_roi = optimized_data.attrs[c.TOTAL_ROI]
|
|
1300
|
-
if optimized_roi < target_roi:
|
|
1301
|
-
warnings.warn(
|
|
1302
|
-
f'Target ROI constraint was not met. The target ROI is {target_roi}'
|
|
1303
|
-
f', but the actual ROI is {optimized_roi}.'
|
|
1304
|
-
)
|
|
1305
|
-
elif target_mroi:
|
|
1306
|
-
# Compare each channel's marginal ROI to the target.
|
|
1307
|
-
# optimized_data[c.MROI] is an array of shape (n_channels, 4), where the
|
|
1308
|
-
# last dimension is [mean, median, ci_lo, ci_hi].
|
|
1309
|
-
optimized_mroi = optimized_data[c.MROI][:, 0]
|
|
1310
|
-
if np.any(optimized_mroi < target_mroi):
|
|
1311
|
-
warnings.warn(
|
|
1312
|
-
'Target marginal ROI constraint was not met. The target marginal'
|
|
1313
|
-
f' ROI is {target_mroi}, but the actual channel marginal ROIs are'
|
|
1314
|
-
f' {optimized_mroi}.'
|
|
1315
|
-
)
|
|
1316
|
-
|
|
1317
1523
|
def create_optimization_grid(
|
|
1318
1524
|
self,
|
|
1319
|
-
|
|
1320
|
-
spend_bound_lower: np.ndarray,
|
|
1321
|
-
spend_bound_upper: np.ndarray,
|
|
1322
|
-
selected_times: Sequence[str] | None,
|
|
1323
|
-
round_factor: int,
|
|
1525
|
+
new_data: xr.Dataset | None = None,
|
|
1324
1526
|
use_posterior: bool = True,
|
|
1527
|
+
selected_times: tuple[str | None, str | None] | None = None,
|
|
1528
|
+
budget: float | None = None,
|
|
1529
|
+
pct_of_spend: Sequence[float] | None = None,
|
|
1530
|
+
spend_constraint_lower: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
|
|
1531
|
+
spend_constraint_upper: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
|
|
1532
|
+
gtol: float = 0.0001,
|
|
1533
|
+
use_optimal_frequency: bool = True,
|
|
1325
1534
|
use_kpi: bool = False,
|
|
1326
|
-
use_optimal_frequency: bool = False,
|
|
1327
|
-
optimal_frequency: xr.DataArray | None = None,
|
|
1328
1535
|
batch_size: int = c.DEFAULT_BATCH_SIZE,
|
|
1329
1536
|
) -> OptimizationGrid:
|
|
1330
1537
|
"""Creates a OptimizationGrid for optimization.
|
|
1331
1538
|
|
|
1332
1539
|
Args:
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
the
|
|
1339
|
-
|
|
1340
|
-
`
|
|
1341
|
-
|
|
1540
|
+
new_data: An optional `DataTensors` container with optional tensors:
|
|
1541
|
+
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
1542
|
+
`revenue_per_kpi`, and `time`. If `None`, the original tensors from the
|
|
1543
|
+
Meridian object are used. If `new_data` is provided, the grid is created
|
|
1544
|
+
using the versions of the tensors in `new_data` and the original
|
|
1545
|
+
versions of all the remaining tensors. If any of the tensors in
|
|
1546
|
+
`new_data` is provided with a different number of time periods than in
|
|
1547
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
1548
|
+
time periods and the `time` tensor must be provided.
|
|
1342
1549
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1343
1550
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1344
1551
|
distribution is used.
|
|
1552
|
+
selected_times: Tuple containing the start and end time dimension
|
|
1553
|
+
coordinates for the duration to run the optimization on. Selected time
|
|
1554
|
+
values should align with the Meridian time dimension coordinates in the
|
|
1555
|
+
underlying model if optimizing the original data. If `new_data` is
|
|
1556
|
+
provided with a different number of time periods than in `InputData`,
|
|
1557
|
+
then the start and end time coordinates must match the time dimensions
|
|
1558
|
+
in `new_data.time`. By default, all times periods are used. Either start
|
|
1559
|
+
or end time component can be `None` to represent the first or the last
|
|
1560
|
+
time coordinate, respectively.
|
|
1561
|
+
budget: Number indicating the total budget for the fixed budget scenario.
|
|
1562
|
+
Defaults to the historical budget.
|
|
1563
|
+
pct_of_spend: Numeric list of size `n_paid_channels` containing the
|
|
1564
|
+
percentage allocation for spend for all media and RF channels. The order
|
|
1565
|
+
must match `(InputData.media + InputData.reach)` with values between
|
|
1566
|
+
0-1, summing to 1. By default, the historical allocation is used. Budget
|
|
1567
|
+
and allocation are used in conjunction to determine the non-optimized
|
|
1568
|
+
media-level spend, which is used to calculate the non-optimized
|
|
1569
|
+
performance metrics (for example, ROI) and construct the feasible range
|
|
1570
|
+
of media-level spend with the spend constraints. Consider using
|
|
1571
|
+
`InputData.get_paid_channels_argument_builder()` to construct this
|
|
1572
|
+
argument. If using `new_data`, this argument is ignored.
|
|
1573
|
+
spend_constraint_lower: Numeric list of size `n_paid_channels` or float
|
|
1574
|
+
(same constraint for all channels) indicating the lower bound of
|
|
1575
|
+
media-level spend. If given as a channel-indexed array, the order must
|
|
1576
|
+
match `(InputData.media + InputData.reach)`. The lower bound of
|
|
1577
|
+
media-level spend is `(1 - spend_constraint_lower) * budget *
|
|
1578
|
+
allocation)`. The value must be between 0-1. Defaults to `0.3` for fixed
|
|
1579
|
+
budget and `1` for flexible. Consider using
|
|
1580
|
+
`InputData.get_paid_channels_argument_builder()` to construct this
|
|
1581
|
+
argument.
|
|
1582
|
+
spend_constraint_upper: Numeric list of size `n_paid_channels` or float
|
|
1583
|
+
(same constraint for all channels) indicating the upper bound of
|
|
1584
|
+
media-level spend. If given as a channel-indexed array, the order must
|
|
1585
|
+
match `(InputData.media + InputData.reach)`. The upper bound of
|
|
1586
|
+
media-level spend is `(1 + spend_constraint_upper) * budget *
|
|
1587
|
+
allocation)`. Defaults to `0.3` for fixed budget and `1` for flexible.
|
|
1588
|
+
Consider using `InputData.get_paid_channels_argument_builder()` to
|
|
1589
|
+
construct this argument.
|
|
1590
|
+
gtol: Float indicating the acceptable relative error for the budget used
|
|
1591
|
+
in the grid setup. The budget will be rounded by `10*n`, where `n` is
|
|
1592
|
+
the smallest integer such that `(budget - rounded_budget)` is less than
|
|
1593
|
+
or equal to `(budget * gtol)`. `gtol` must be less than 1.
|
|
1594
|
+
use_optimal_frequency: Boolean. Whether optimal frequency was used.
|
|
1345
1595
|
use_kpi: Boolean. If `True`, then the incremental outcome is derived from
|
|
1346
1596
|
the KPI impact. Otherwise, the incremental outcome is derived from the
|
|
1347
1597
|
revenue impact.
|
|
1348
|
-
use_optimal_frequency: Boolean. Whether optimal frequency was used.
|
|
1349
|
-
optimal_frequency: `xr.DataArray` with dimension `n_rf_channels`,
|
|
1350
|
-
containing the optimal frequency per channel, that maximizes mean ROI
|
|
1351
|
-
over the corresponding prior/posterior distribution. Value is `None` if
|
|
1352
|
-
the model does not contain reach and frequency data, or if the model
|
|
1353
|
-
does contain reach and frequency data, but historical frequency is used
|
|
1354
|
-
for the optimization scenario.
|
|
1355
1598
|
batch_size: Max draws per chain in each batch. The calculation is run in
|
|
1356
1599
|
batches to avoid memory exhaustion. If a memory error occurs, try
|
|
1357
1600
|
reducing `batch_size`. The calculation will generally be faster with
|
|
@@ -1361,14 +1604,62 @@ class BudgetOptimizer:
|
|
|
1361
1604
|
An OptimizationGrid object containing the grid data for optimization.
|
|
1362
1605
|
"""
|
|
1363
1606
|
self._validate_model_fit(use_posterior)
|
|
1607
|
+
if new_data is None:
|
|
1608
|
+
new_data = analyzer.DataTensors()
|
|
1609
|
+
|
|
1610
|
+
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1611
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1612
|
+
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1613
|
+
)
|
|
1614
|
+
|
|
1615
|
+
selected_time_dims = self._validate_selected_times(
|
|
1616
|
+
selected_times, filled_data
|
|
1617
|
+
)
|
|
1618
|
+
hist_spend = self._analyzer.get_aggregated_spend(
|
|
1619
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
|
|
1620
|
+
selected_times=selected_time_dims,
|
|
1621
|
+
include_media=self._meridian.n_media_channels > 0,
|
|
1622
|
+
include_rf=self._meridian.n_rf_channels > 0,
|
|
1623
|
+
).data
|
|
1624
|
+
n_paid_channels = len(self._meridian.input_data.get_all_paid_channels())
|
|
1625
|
+
budget = budget or np.sum(hist_spend)
|
|
1626
|
+
valid_pct_of_spend = _validate_pct_of_spend(
|
|
1627
|
+
n_channels=n_paid_channels,
|
|
1628
|
+
hist_spend=hist_spend,
|
|
1629
|
+
pct_of_spend=pct_of_spend,
|
|
1630
|
+
)
|
|
1631
|
+
spend = budget * valid_pct_of_spend
|
|
1632
|
+
round_factor = _get_round_factor(budget, gtol)
|
|
1633
|
+
(optimization_lower_bound, optimization_upper_bound) = (
|
|
1634
|
+
_get_optimization_bounds(
|
|
1635
|
+
n_channels=n_paid_channels,
|
|
1636
|
+
spend=spend,
|
|
1637
|
+
round_factor=round_factor,
|
|
1638
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1639
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1640
|
+
)
|
|
1641
|
+
)
|
|
1642
|
+
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
1643
|
+
optimal_frequency = tf.convert_to_tensor(
|
|
1644
|
+
self._analyzer.optimal_freq(
|
|
1645
|
+
new_data=filled_data.filter_fields(c.RF_DATA),
|
|
1646
|
+
use_posterior=use_posterior,
|
|
1647
|
+
selected_times=selected_time_dims,
|
|
1648
|
+
use_kpi=use_kpi,
|
|
1649
|
+
).optimal_frequency,
|
|
1650
|
+
dtype=tf.float32,
|
|
1651
|
+
)
|
|
1652
|
+
else:
|
|
1653
|
+
optimal_frequency = None
|
|
1364
1654
|
|
|
1365
1655
|
step_size = 10 ** (-round_factor)
|
|
1366
1656
|
(spend_grid, incremental_outcome_grid) = self._create_grids(
|
|
1367
|
-
spend=
|
|
1368
|
-
spend_bound_lower=
|
|
1369
|
-
spend_bound_upper=
|
|
1657
|
+
spend=hist_spend,
|
|
1658
|
+
spend_bound_lower=optimization_lower_bound,
|
|
1659
|
+
spend_bound_upper=optimization_upper_bound,
|
|
1370
1660
|
step_size=step_size,
|
|
1371
|
-
selected_times=
|
|
1661
|
+
selected_times=selected_time_dims,
|
|
1662
|
+
new_data=filled_data.filter_fields(c.PAID_DATA),
|
|
1372
1663
|
use_posterior=use_posterior,
|
|
1373
1664
|
use_kpi=use_kpi,
|
|
1374
1665
|
optimal_frequency=optimal_frequency,
|
|
@@ -1382,13 +1673,14 @@ class BudgetOptimizer:
|
|
|
1382
1673
|
|
|
1383
1674
|
return OptimizationGrid(
|
|
1384
1675
|
_grid_dataset=grid_dataset,
|
|
1385
|
-
historical_spend=
|
|
1676
|
+
historical_spend=hist_spend,
|
|
1386
1677
|
use_kpi=use_kpi,
|
|
1387
1678
|
use_posterior=use_posterior,
|
|
1388
1679
|
use_optimal_frequency=use_optimal_frequency,
|
|
1680
|
+
gtol=gtol,
|
|
1389
1681
|
round_factor=round_factor,
|
|
1390
1682
|
optimal_frequency=optimal_frequency,
|
|
1391
|
-
selected_times=
|
|
1683
|
+
selected_times=selected_time_dims,
|
|
1392
1684
|
)
|
|
1393
1685
|
|
|
1394
1686
|
def _create_grid_dataset(
|
|
@@ -1425,82 +1717,46 @@ class BudgetOptimizer:
|
|
|
1425
1717
|
return xr.Dataset(
|
|
1426
1718
|
data_vars=data_vars,
|
|
1427
1719
|
coords={
|
|
1428
|
-
c.GRID_SPEND_INDEX: (
|
|
1429
|
-
|
|
1430
|
-
np.arange(0, len(spend_grid)),
|
|
1431
|
-
),
|
|
1432
|
-
c.CHANNEL: (
|
|
1433
|
-
[c.CHANNEL],
|
|
1434
|
-
self._meridian.input_data.get_all_paid_channels(),
|
|
1435
|
-
),
|
|
1720
|
+
c.GRID_SPEND_INDEX: np.arange(0, len(spend_grid)),
|
|
1721
|
+
c.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
|
|
1436
1722
|
},
|
|
1437
1723
|
attrs={c.SPEND_STEP_SIZE: spend_step_size},
|
|
1438
1724
|
)
|
|
1439
1725
|
|
|
1440
|
-
def
|
|
1441
|
-
self, hist_spend: np.ndarray, pct_of_spend: Sequence[float] | None
|
|
1442
|
-
) -> np.ndarray:
|
|
1443
|
-
"""Validates and returns the percent of spend."""
|
|
1444
|
-
if pct_of_spend is not None:
|
|
1445
|
-
if len(pct_of_spend) != len(
|
|
1446
|
-
self._meridian.input_data.get_all_paid_channels()
|
|
1447
|
-
):
|
|
1448
|
-
raise ValueError('Percent of spend must be specified for all channels.')
|
|
1449
|
-
if not math.isclose(np.sum(pct_of_spend), 1.0, abs_tol=0.001):
|
|
1450
|
-
raise ValueError('Percent of spend must sum to one.')
|
|
1451
|
-
return np.array(pct_of_spend)
|
|
1452
|
-
else:
|
|
1453
|
-
return hist_spend / np.sum(hist_spend)
|
|
1454
|
-
|
|
1455
|
-
def _validate_spend_constraints(
|
|
1726
|
+
def _validate_selected_times(
|
|
1456
1727
|
self,
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
else:
|
|
1473
|
-
const = np.array(const)
|
|
1474
|
-
return const
|
|
1475
|
-
|
|
1476
|
-
const_lower = get_const_array(const_lower)
|
|
1477
|
-
const_upper = get_const_array(const_upper)
|
|
1478
|
-
|
|
1479
|
-
if any(
|
|
1480
|
-
len(const)
|
|
1481
|
-
not in (1, len(self._meridian.input_data.get_all_paid_channels()))
|
|
1482
|
-
for const in [const_lower, const_upper]
|
|
1483
|
-
):
|
|
1484
|
-
raise ValueError(
|
|
1485
|
-
'Spend constraints must be either a single constraint or be specified'
|
|
1486
|
-
' for all channels.'
|
|
1728
|
+
selected_times: tuple[str | None, str | None] | None,
|
|
1729
|
+
new_data: analyzer.DataTensors | None,
|
|
1730
|
+
) -> Sequence[str] | Sequence[bool] | None:
|
|
1731
|
+
"""Validates and returns the selected times."""
|
|
1732
|
+
if selected_times is None:
|
|
1733
|
+
return None
|
|
1734
|
+
start_date, end_date = selected_times
|
|
1735
|
+
if start_date is None and end_date is None:
|
|
1736
|
+
return None
|
|
1737
|
+
|
|
1738
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1739
|
+
if new_data.get_modified_times(self._meridian) is None:
|
|
1740
|
+
return self._meridian.expand_selected_time_dims(
|
|
1741
|
+
start_date=start_date,
|
|
1742
|
+
end_date=end_date,
|
|
1487
1743
|
)
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
return (const_lower, const_upper)
|
|
1744
|
+
else:
|
|
1745
|
+
assert new_data.time is not None
|
|
1746
|
+
new_times_str = new_data.time.numpy().astype(str).tolist()
|
|
1747
|
+
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
1748
|
+
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
1749
|
+
start_date=start_date,
|
|
1750
|
+
end_date=end_date,
|
|
1751
|
+
)
|
|
1752
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
1753
|
+
return [x in expanded_str for x in new_times_str]
|
|
1499
1754
|
|
|
1500
1755
|
def _get_incremental_outcome_tensors(
|
|
1501
1756
|
self,
|
|
1502
1757
|
hist_spend: np.ndarray,
|
|
1503
1758
|
spend: np.ndarray,
|
|
1759
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1504
1760
|
optimal_frequency: Sequence[float] | None = None,
|
|
1505
1761
|
) -> tuple[
|
|
1506
1762
|
tf.Tensor | None,
|
|
@@ -1525,6 +1781,11 @@ class BudgetOptimizer:
|
|
|
1525
1781
|
Args:
|
|
1526
1782
|
hist_spend: historical spend data.
|
|
1527
1783
|
spend: new optimized spend data.
|
|
1784
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
1785
|
+
`reach`, and `frequency` tensors. If `None`, the existing tensors from
|
|
1786
|
+
the Meridian object are used. If any of the tensors is provided with a
|
|
1787
|
+
different number of time periods than in `InputData`, then all tensors
|
|
1788
|
+
must be provided with the same number of time periods.
|
|
1528
1789
|
optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
|
|
1529
1790
|
the optimal frequency per channel, that maximizes posterior mean roi.
|
|
1530
1791
|
Value is `None` if the model does not contain reach and frequency data,
|
|
@@ -1535,13 +1796,18 @@ class BudgetOptimizer:
|
|
|
1535
1796
|
Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
|
|
1536
1797
|
new_rf_spend).
|
|
1537
1798
|
"""
|
|
1799
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1800
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1801
|
+
c.PAID_CHANNELS,
|
|
1802
|
+
self._meridian,
|
|
1803
|
+
)
|
|
1538
1804
|
if self._meridian.n_media_channels > 0:
|
|
1539
1805
|
new_media = (
|
|
1540
1806
|
tf.math.divide_no_nan(
|
|
1541
1807
|
spend[: self._meridian.n_media_channels],
|
|
1542
1808
|
hist_spend[: self._meridian.n_media_channels],
|
|
1543
1809
|
)
|
|
1544
|
-
*
|
|
1810
|
+
* filled_data.media
|
|
1545
1811
|
)
|
|
1546
1812
|
new_media_spend = tf.convert_to_tensor(
|
|
1547
1813
|
spend[: self._meridian.n_media_channels]
|
|
@@ -1550,9 +1816,7 @@ class BudgetOptimizer:
|
|
|
1550
1816
|
new_media = None
|
|
1551
1817
|
new_media_spend = None
|
|
1552
1818
|
if self._meridian.n_rf_channels > 0:
|
|
1553
|
-
rf_media =
|
|
1554
|
-
self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
|
|
1555
|
-
)
|
|
1819
|
+
rf_media = filled_data.reach * filled_data.frequency
|
|
1556
1820
|
new_rf_media = (
|
|
1557
1821
|
tf.math.divide_no_nan(
|
|
1558
1822
|
spend[-self._meridian.n_rf_channels :],
|
|
@@ -1561,7 +1825,7 @@ class BudgetOptimizer:
|
|
|
1561
1825
|
* rf_media
|
|
1562
1826
|
)
|
|
1563
1827
|
frequency = (
|
|
1564
|
-
|
|
1828
|
+
filled_data.frequency
|
|
1565
1829
|
if optimal_frequency is None
|
|
1566
1830
|
else optimal_frequency
|
|
1567
1831
|
)
|
|
@@ -1581,9 +1845,10 @@ class BudgetOptimizer:
|
|
|
1581
1845
|
self,
|
|
1582
1846
|
hist_spend: np.ndarray,
|
|
1583
1847
|
spend: np.ndarray,
|
|
1848
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1584
1849
|
use_posterior: bool = True,
|
|
1585
1850
|
use_kpi: bool = False,
|
|
1586
|
-
selected_times: Sequence[str] | None = None,
|
|
1851
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1587
1852
|
optimal_frequency: Sequence[float] | None = None,
|
|
1588
1853
|
attrs: Mapping[str, Any] | None = None,
|
|
1589
1854
|
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -1591,15 +1856,22 @@ class BudgetOptimizer:
|
|
|
1591
1856
|
use_historical_budget: bool = True,
|
|
1592
1857
|
) -> xr.Dataset:
|
|
1593
1858
|
"""Creates the budget dataset."""
|
|
1859
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1860
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1861
|
+
c.PAID_DATA + (c.TIME,),
|
|
1862
|
+
self._meridian,
|
|
1863
|
+
)
|
|
1594
1864
|
spend = tf.convert_to_tensor(spend, dtype=tf.float32)
|
|
1595
1865
|
hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
|
|
1596
1866
|
(new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
|
|
1597
1867
|
self._get_incremental_outcome_tensors(
|
|
1598
|
-
hist_spend,
|
|
1868
|
+
hist_spend,
|
|
1869
|
+
spend,
|
|
1870
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS),
|
|
1871
|
+
optimal_frequency=optimal_frequency,
|
|
1599
1872
|
)
|
|
1600
1873
|
)
|
|
1601
1874
|
budget = np.sum(spend)
|
|
1602
|
-
all_times = self._meridian.input_data.time.values.tolist()
|
|
1603
1875
|
|
|
1604
1876
|
# incremental_outcome here is a tensor with the shape
|
|
1605
1877
|
# (n_chains, n_draws, n_channels)
|
|
@@ -1609,6 +1881,7 @@ class BudgetOptimizer:
|
|
|
1609
1881
|
media=new_media,
|
|
1610
1882
|
reach=new_reach,
|
|
1611
1883
|
frequency=new_frequency,
|
|
1884
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1612
1885
|
),
|
|
1613
1886
|
selected_times=selected_times,
|
|
1614
1887
|
use_kpi=use_kpi,
|
|
@@ -1631,6 +1904,9 @@ class BudgetOptimizer:
|
|
|
1631
1904
|
)
|
|
1632
1905
|
|
|
1633
1906
|
aggregated_impressions = self._analyzer.get_aggregated_impressions(
|
|
1907
|
+
new_data=analyzer.DataTensors(
|
|
1908
|
+
media=new_media, reach=new_reach, frequency=new_frequency
|
|
1909
|
+
),
|
|
1634
1910
|
selected_times=selected_times,
|
|
1635
1911
|
selected_geos=None,
|
|
1636
1912
|
aggregate_times=True,
|
|
@@ -1638,10 +1914,11 @@ class BudgetOptimizer:
|
|
|
1638
1914
|
optimal_frequency=optimal_frequency,
|
|
1639
1915
|
include_non_paid_channels=False,
|
|
1640
1916
|
)
|
|
1641
|
-
effectiveness = incremental_outcome / aggregated_impressions
|
|
1642
1917
|
effectiveness_with_mean_median_and_ci = (
|
|
1643
1918
|
analyzer.get_central_tendency_and_ci(
|
|
1644
|
-
data=
|
|
1919
|
+
data=tf.math.divide_no_nan(
|
|
1920
|
+
incremental_outcome, aggregated_impressions
|
|
1921
|
+
),
|
|
1645
1922
|
confidence_level=confidence_level,
|
|
1646
1923
|
include_median=True,
|
|
1647
1924
|
)
|
|
@@ -1661,6 +1938,7 @@ class BudgetOptimizer:
|
|
|
1661
1938
|
frequency=new_frequency,
|
|
1662
1939
|
media_spend=new_media_spend,
|
|
1663
1940
|
rf_spend=new_rf_spend,
|
|
1941
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1664
1942
|
),
|
|
1665
1943
|
selected_times=selected_times,
|
|
1666
1944
|
batch_size=batch_size,
|
|
@@ -1699,6 +1977,18 @@ class BudgetOptimizer:
|
|
|
1699
1977
|
c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
|
|
1700
1978
|
}
|
|
1701
1979
|
|
|
1980
|
+
all_times = (
|
|
1981
|
+
filled_data.time.numpy().astype(str).tolist()
|
|
1982
|
+
if filled_data.time is not None
|
|
1983
|
+
else self._meridian.input_data.time.values.tolist()
|
|
1984
|
+
)
|
|
1985
|
+
if selected_times is not None and all(
|
|
1986
|
+
isinstance(time, bool) for time in selected_times
|
|
1987
|
+
):
|
|
1988
|
+
selected_times = [
|
|
1989
|
+
time for time, selected in zip(all_times, selected_times) if selected
|
|
1990
|
+
]
|
|
1991
|
+
|
|
1702
1992
|
attributes = {
|
|
1703
1993
|
c.START_DATE: min(selected_times) if selected_times else all_times[0],
|
|
1704
1994
|
c.END_DATE: max(selected_times) if selected_times else all_times[-1],
|
|
@@ -1717,73 +2007,19 @@ class BudgetOptimizer:
|
|
|
1717
2007
|
return xr.Dataset(
|
|
1718
2008
|
data_vars=data_vars,
|
|
1719
2009
|
coords={
|
|
1720
|
-
c.CHANNEL: (
|
|
1721
|
-
|
|
1722
|
-
self._meridian.input_data.get_all_paid_channels(),
|
|
1723
|
-
),
|
|
1724
|
-
c.METRIC: (
|
|
1725
|
-
[c.METRIC],
|
|
1726
|
-
[c.MEAN, c.MEDIAN, c.CI_LO, c.CI_HI],
|
|
1727
|
-
),
|
|
2010
|
+
c.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
|
|
2011
|
+
c.METRIC: [c.MEAN, c.MEDIAN, c.CI_LO, c.CI_HI],
|
|
1728
2012
|
},
|
|
1729
2013
|
attrs=attributes | (attrs or {}),
|
|
1730
2014
|
)
|
|
1731
2015
|
|
|
1732
|
-
def _get_optimization_bounds(
|
|
1733
|
-
self,
|
|
1734
|
-
spend: np.ndarray,
|
|
1735
|
-
spend_constraint_lower: _SpendConstraint | None,
|
|
1736
|
-
spend_constraint_upper: _SpendConstraint | None,
|
|
1737
|
-
round_factor: int,
|
|
1738
|
-
fixed_budget: bool,
|
|
1739
|
-
) -> tuple[np.ndarray, np.ndarray, tuple[np.ndarray, np.ndarray]]:
|
|
1740
|
-
"""Get optimization bounds from spend and spend constraints.
|
|
1741
|
-
|
|
1742
|
-
Args:
|
|
1743
|
-
spend: np.ndarray with size `n_total_channels` containing media-level
|
|
1744
|
-
spend for all media and RF channels.
|
|
1745
|
-
spend_constraint_lower: Numeric list of size `n_total_channels` or float
|
|
1746
|
-
(same constraint for all media) indicating the lower bound of
|
|
1747
|
-
media-level spend. The lower bound of media-level spend is `(1 -
|
|
1748
|
-
spend_constraint_lower) * budget * allocation)`. The value must be
|
|
1749
|
-
between 0-1.
|
|
1750
|
-
spend_constraint_upper: Numeric list of size `n_total_channels` or float
|
|
1751
|
-
(same constraint for all media) indicating the upper bound of
|
|
1752
|
-
media-level spend. The upper bound of media-level spend is `(1 +
|
|
1753
|
-
spend_constraint_upper) * budget * allocation)`.
|
|
1754
|
-
round_factor: Integer number of digits to round optimization bounds.
|
|
1755
|
-
fixed_budget: Boolean indicating whether it's a fixed budget optimization
|
|
1756
|
-
or flexible budget optimization.
|
|
1757
|
-
|
|
1758
|
-
Returns:
|
|
1759
|
-
lower_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
1760
|
-
lower bound spend for each media and RF channel.
|
|
1761
|
-
upper_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
1762
|
-
upper bound spend for each media and RF channel.
|
|
1763
|
-
spend_bounds: tuple of np.ndarray of size `n_total_channels` containing
|
|
1764
|
-
the untreated lower and upper bound spend for each media and RF channel.
|
|
1765
|
-
"""
|
|
1766
|
-
(spend_const_lower, spend_const_upper) = self._validate_spend_constraints(
|
|
1767
|
-
fixed_budget, spend_constraint_lower, spend_constraint_upper
|
|
1768
|
-
)
|
|
1769
|
-
spend_bounds = (
|
|
1770
|
-
np.maximum((1 - spend_const_lower), 0),
|
|
1771
|
-
(1 + spend_const_upper),
|
|
1772
|
-
)
|
|
1773
|
-
|
|
1774
|
-
lower_bound = np.round(
|
|
1775
|
-
(spend_bounds[0] * spend),
|
|
1776
|
-
round_factor,
|
|
1777
|
-
).astype(int)
|
|
1778
|
-
upper_bound = np.round(spend_bounds[1] * spend, round_factor).astype(int)
|
|
1779
|
-
return (lower_bound, upper_bound, spend_bounds)
|
|
1780
|
-
|
|
1781
2016
|
def _update_incremental_outcome_grid(
|
|
1782
2017
|
self,
|
|
1783
2018
|
i: int,
|
|
1784
2019
|
incremental_outcome_grid: np.ndarray,
|
|
1785
2020
|
multipliers_grid: tf.Tensor,
|
|
1786
|
-
|
|
2021
|
+
new_data: analyzer.DataTensors | None = None,
|
|
2022
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1787
2023
|
use_posterior: bool = True,
|
|
1788
2024
|
use_kpi: bool = False,
|
|
1789
2025
|
optimal_frequency: xr.DataArray | None = None,
|
|
@@ -1798,8 +2034,16 @@ class BudgetOptimizer:
|
|
|
1798
2034
|
number of columns is equal to the number of total channels, containing
|
|
1799
2035
|
incremental outcome by channel.
|
|
1800
2036
|
multipliers_grid: A grid derived from spend.
|
|
1801
|
-
|
|
1802
|
-
`
|
|
2037
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
2038
|
+
`reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
|
|
2039
|
+
existing tensors from the Meridian object are used. If any of the
|
|
2040
|
+
tensors is provided with a different number of time periods than in
|
|
2041
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
2042
|
+
time periods.
|
|
2043
|
+
selected_times: Optional list of times to optimize. This can either be a
|
|
2044
|
+
string list containing a subset of time dimension coordinates from
|
|
2045
|
+
`InputData.time` or a boolean list with length equal to the time
|
|
2046
|
+
dimension of the tensor. By default, all time periods are included.
|
|
1803
2047
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1804
2048
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1805
2049
|
distribution is used.
|
|
@@ -1816,10 +2060,14 @@ class BudgetOptimizer:
|
|
|
1816
2060
|
reducing `batch_size`. The calculation will generally be faster with
|
|
1817
2061
|
larger `batch_size` values.
|
|
1818
2062
|
"""
|
|
2063
|
+
new_data = new_data or analyzer.DataTensors()
|
|
2064
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
2065
|
+
c.PAID_DATA, self._meridian
|
|
2066
|
+
)
|
|
1819
2067
|
if self._meridian.n_media_channels > 0:
|
|
1820
2068
|
new_media = (
|
|
1821
2069
|
multipliers_grid[i, : self._meridian.n_media_channels]
|
|
1822
|
-
*
|
|
2070
|
+
* filled_data.media
|
|
1823
2071
|
)
|
|
1824
2072
|
else:
|
|
1825
2073
|
new_media = None
|
|
@@ -1828,20 +2076,18 @@ class BudgetOptimizer:
|
|
|
1828
2076
|
new_frequency = None
|
|
1829
2077
|
new_reach = None
|
|
1830
2078
|
elif optimal_frequency is not None:
|
|
1831
|
-
new_frequency = (
|
|
1832
|
-
tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency
|
|
1833
|
-
)
|
|
2079
|
+
new_frequency = tf.ones_like(filled_data.frequency) * optimal_frequency
|
|
1834
2080
|
new_reach = tf.math.divide_no_nan(
|
|
1835
2081
|
multipliers_grid[i, -self._meridian.n_rf_channels :]
|
|
1836
|
-
*
|
|
1837
|
-
*
|
|
2082
|
+
* filled_data.reach
|
|
2083
|
+
* filled_data.frequency,
|
|
1838
2084
|
new_frequency,
|
|
1839
2085
|
)
|
|
1840
2086
|
else:
|
|
1841
|
-
new_frequency =
|
|
2087
|
+
new_frequency = filled_data.frequency
|
|
1842
2088
|
new_reach = (
|
|
1843
2089
|
multipliers_grid[i, -self._meridian.n_rf_channels :]
|
|
1844
|
-
*
|
|
2090
|
+
* filled_data.reach
|
|
1845
2091
|
)
|
|
1846
2092
|
|
|
1847
2093
|
# incremental_outcome returns a three dimensional tensor with dims
|
|
@@ -1854,6 +2100,7 @@ class BudgetOptimizer:
|
|
|
1854
2100
|
media=new_media,
|
|
1855
2101
|
reach=new_reach,
|
|
1856
2102
|
frequency=new_frequency,
|
|
2103
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1857
2104
|
),
|
|
1858
2105
|
selected_times=selected_times,
|
|
1859
2106
|
use_kpi=use_kpi,
|
|
@@ -1870,7 +2117,8 @@ class BudgetOptimizer:
|
|
|
1870
2117
|
spend_bound_lower: np.ndarray,
|
|
1871
2118
|
spend_bound_upper: np.ndarray,
|
|
1872
2119
|
step_size: int,
|
|
1873
|
-
|
|
2120
|
+
new_data: analyzer.DataTensors | None = None,
|
|
2121
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1874
2122
|
use_posterior: bool = True,
|
|
1875
2123
|
use_kpi: bool = False,
|
|
1876
2124
|
optimal_frequency: xr.DataArray | None = None,
|
|
@@ -1886,8 +2134,16 @@ class BudgetOptimizer:
|
|
|
1886
2134
|
containing the upper constraint spend for each channel.
|
|
1887
2135
|
step_size: Integer indicating the step size, or interval, between values
|
|
1888
2136
|
in the spend grid. All media channels have the same step size.
|
|
1889
|
-
|
|
1890
|
-
`
|
|
2137
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
2138
|
+
`reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
|
|
2139
|
+
existing tensors from the Meridian object are used. If any of the
|
|
2140
|
+
tensors is provided with a different number of time periods than in
|
|
2141
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
2142
|
+
time periods.
|
|
2143
|
+
selected_times: Optional list of times to optimize. This can either be a
|
|
2144
|
+
string list containing a subset of time dimension coordinates from
|
|
2145
|
+
`InputData.time` or a boolean list with length equal to the time
|
|
2146
|
+
dimension of the tensor. By default, all time periods are included.
|
|
1891
2147
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1892
2148
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1893
2149
|
distribution is used.
|
|
@@ -1941,6 +2197,7 @@ class BudgetOptimizer:
|
|
|
1941
2197
|
incremental_outcome_grid=incremental_outcome_grid,
|
|
1942
2198
|
multipliers_grid=multipliers_grid,
|
|
1943
2199
|
selected_times=selected_times,
|
|
2200
|
+
new_data=new_data,
|
|
1944
2201
|
use_posterior=use_posterior,
|
|
1945
2202
|
use_kpi=use_kpi,
|
|
1946
2203
|
optimal_frequency=optimal_frequency,
|
|
@@ -1967,6 +2224,135 @@ class BudgetOptimizer:
|
|
|
1967
2224
|
return (spend_grid, incremental_outcome_grid)
|
|
1968
2225
|
|
|
1969
2226
|
|
|
2227
|
+
def _validate_pct_of_spend(
|
|
2228
|
+
n_channels: int,
|
|
2229
|
+
hist_spend: np.ndarray,
|
|
2230
|
+
pct_of_spend: Sequence[float] | None,
|
|
2231
|
+
) -> np.ndarray:
|
|
2232
|
+
"""Validates and returns the percent of spend."""
|
|
2233
|
+
if pct_of_spend is not None:
|
|
2234
|
+
if len(pct_of_spend) != n_channels:
|
|
2235
|
+
raise ValueError('Percent of spend must be specified for all channels.')
|
|
2236
|
+
if not math.isclose(np.sum(pct_of_spend), 1.0, abs_tol=0.001):
|
|
2237
|
+
raise ValueError('Percent of spend must sum to one.')
|
|
2238
|
+
return np.array(pct_of_spend)
|
|
2239
|
+
else:
|
|
2240
|
+
return hist_spend / np.sum(hist_spend)
|
|
2241
|
+
|
|
2242
|
+
|
|
2243
|
+
def _validate_spend_constraints(
|
|
2244
|
+
n_channels: int,
|
|
2245
|
+
const_lower: _SpendConstraint,
|
|
2246
|
+
const_upper: _SpendConstraint,
|
|
2247
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
2248
|
+
"""Validates and returns the spend constraint requirements."""
|
|
2249
|
+
|
|
2250
|
+
def get_const_array(const: _SpendConstraint) -> np.ndarray:
|
|
2251
|
+
if isinstance(const, (float, int)):
|
|
2252
|
+
const = np.array([const])
|
|
2253
|
+
else:
|
|
2254
|
+
const = np.array(const)
|
|
2255
|
+
return const
|
|
2256
|
+
|
|
2257
|
+
const_lower = get_const_array(const_lower)
|
|
2258
|
+
const_upper = get_const_array(const_upper)
|
|
2259
|
+
|
|
2260
|
+
if any(
|
|
2261
|
+
len(const) not in (1, n_channels) for const in [const_lower, const_upper]
|
|
2262
|
+
):
|
|
2263
|
+
raise ValueError(
|
|
2264
|
+
'Spend constraints must be either a single constraint or be specified'
|
|
2265
|
+
' for all channels.'
|
|
2266
|
+
)
|
|
2267
|
+
|
|
2268
|
+
for const in const_lower:
|
|
2269
|
+
if not 0.0 <= const <= 1.0:
|
|
2270
|
+
raise ValueError(
|
|
2271
|
+
'The lower spend constraint must be between 0 and 1 inclusive.'
|
|
2272
|
+
)
|
|
2273
|
+
for const in const_upper:
|
|
2274
|
+
if const < 0:
|
|
2275
|
+
raise ValueError('The upper spend constraint must be positive.')
|
|
2276
|
+
|
|
2277
|
+
return (const_lower, const_upper)
|
|
2278
|
+
|
|
2279
|
+
|
|
2280
|
+
def _get_spend_bounds(
|
|
2281
|
+
n_channels: int,
|
|
2282
|
+
spend_constraint_lower: _SpendConstraint,
|
|
2283
|
+
spend_constraint_upper: _SpendConstraint,
|
|
2284
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
2285
|
+
"""Get spend bounds from spend constraints.
|
|
2286
|
+
|
|
2287
|
+
Args:
|
|
2288
|
+
n_channels: Integer number of total channels.
|
|
2289
|
+
spend_constraint_lower: Numeric list of size `n_total_channels` or float
|
|
2290
|
+
(same constraint for all media) indicating the lower bound of media-level
|
|
2291
|
+
spend. The lower bound of media-level spend is `(1 -
|
|
2292
|
+
spend_constraint_lower) * budget * allocation)`. The value must be between
|
|
2293
|
+
0-1.
|
|
2294
|
+
spend_constraint_upper: Numeric list of size `n_total_channels` or float
|
|
2295
|
+
(same constraint for all media) indicating the upper bound of media-level
|
|
2296
|
+
spend. The upper bound of media-level spend is `(1 +
|
|
2297
|
+
spend_constraint_upper) * budget * allocation)`.
|
|
2298
|
+
|
|
2299
|
+
Returns:
|
|
2300
|
+
spend_bounds: tuple of np.ndarray of size `n_total_channels` containing
|
|
2301
|
+
the untreated lower and upper bound spend for each media and RF channel.
|
|
2302
|
+
"""
|
|
2303
|
+
(spend_const_lower, spend_const_upper) = _validate_spend_constraints(
|
|
2304
|
+
n_channels,
|
|
2305
|
+
spend_constraint_lower,
|
|
2306
|
+
spend_constraint_upper,
|
|
2307
|
+
)
|
|
2308
|
+
spend_bounds = (
|
|
2309
|
+
np.maximum((1 - spend_const_lower), 0),
|
|
2310
|
+
(1 + spend_const_upper),
|
|
2311
|
+
)
|
|
2312
|
+
return spend_bounds
|
|
2313
|
+
|
|
2314
|
+
|
|
2315
|
+
def _get_optimization_bounds(
|
|
2316
|
+
n_channels: int,
|
|
2317
|
+
spend: np.ndarray,
|
|
2318
|
+
round_factor: int,
|
|
2319
|
+
spend_constraint_lower: _SpendConstraint,
|
|
2320
|
+
spend_constraint_upper: _SpendConstraint,
|
|
2321
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
2322
|
+
"""Get optimization bounds from spend and spend constraints.
|
|
2323
|
+
|
|
2324
|
+
Args:
|
|
2325
|
+
n_channels: Integer number of total channels.
|
|
2326
|
+
spend: np.ndarray with size `n_total_channels` containing media-level spend
|
|
2327
|
+
for all media and RF channels.
|
|
2328
|
+
round_factor: Integer number of digits to round optimization bounds.
|
|
2329
|
+
spend_constraint_lower: Numeric list of size `n_total_channels` or float
|
|
2330
|
+
(same constraint for all media) indicating the lower bound of media-level
|
|
2331
|
+
spend. The lower bound of media-level spend is `(1 -
|
|
2332
|
+
spend_constraint_lower) * budget * allocation)`. The value must be between
|
|
2333
|
+
0-1.
|
|
2334
|
+
spend_constraint_upper: Numeric list of size `n_total_channels` or float
|
|
2335
|
+
(same constraint for all media) indicating the upper bound of media-level
|
|
2336
|
+
spend. The upper bound of media-level spend is `(1 +
|
|
2337
|
+
spend_constraint_upper) * budget * allocation)`.
|
|
2338
|
+
|
|
2339
|
+
Returns:
|
|
2340
|
+
lower_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
2341
|
+
lower bound spend for each media and RF channel.
|
|
2342
|
+
upper_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
2343
|
+
upper bound spend for each media and RF channel.
|
|
2344
|
+
"""
|
|
2345
|
+
spend_bounds = _get_spend_bounds(
|
|
2346
|
+
n_channels=n_channels,
|
|
2347
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
2348
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
2349
|
+
)
|
|
2350
|
+
rounded_spend = np.round(spend, round_factor).astype(int)
|
|
2351
|
+
lower = np.round((spend_bounds[0] * rounded_spend), round_factor).astype(int)
|
|
2352
|
+
upper = np.round(spend_bounds[1] * rounded_spend, round_factor).astype(int)
|
|
2353
|
+
return (lower, upper)
|
|
2354
|
+
|
|
2355
|
+
|
|
1970
2356
|
def _validate_budget(
|
|
1971
2357
|
fixed_budget: bool,
|
|
1972
2358
|
budget: float | None,
|
|
@@ -2063,3 +2449,30 @@ def _exceeds_optimization_constraints(
|
|
|
2063
2449
|
return cur_total_roi < target_value and roi_grid_point < cur_total_roi
|
|
2064
2450
|
else:
|
|
2065
2451
|
return roi_grid_point < scenario.target_value
|
|
2452
|
+
|
|
2453
|
+
|
|
2454
|
+
def _raise_warning_if_target_constraints_not_met(
|
|
2455
|
+
target_roi: float | None,
|
|
2456
|
+
target_mroi: float | None,
|
|
2457
|
+
optimized_data: xr.Dataset,
|
|
2458
|
+
) -> None:
|
|
2459
|
+
"""Raises a warning if the target constraints are not met."""
|
|
2460
|
+
if target_roi:
|
|
2461
|
+
# Total ROI is a scalar value.
|
|
2462
|
+
optimized_roi = optimized_data.attrs[c.TOTAL_ROI]
|
|
2463
|
+
if optimized_roi < target_roi:
|
|
2464
|
+
warnings.warn(
|
|
2465
|
+
f'Target ROI constraint was not met. The target ROI is {target_roi}'
|
|
2466
|
+
f', but the actual ROI is {optimized_roi}.'
|
|
2467
|
+
)
|
|
2468
|
+
elif target_mroi:
|
|
2469
|
+
# Compare each channel's marginal ROI to the target.
|
|
2470
|
+
# optimized_data[c.MROI] is an array of shape (n_channels, 4), where the
|
|
2471
|
+
# last dimension is [mean, median, ci_lo, ci_hi].
|
|
2472
|
+
optimized_mroi = optimized_data[c.MROI][:, 0]
|
|
2473
|
+
if np.any(optimized_mroi < target_mroi):
|
|
2474
|
+
warnings.warn(
|
|
2475
|
+
'Target marginal ROI constraint was not met. The target marginal'
|
|
2476
|
+
f' ROI is {target_mroi}, but the actual channel marginal ROIs are'
|
|
2477
|
+
f' {optimized_mroi}.'
|
|
2478
|
+
)
|