google-meridian 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/METADATA +2 -2
- google_meridian-1.1.0.dist-info/RECORD +41 -0
- {google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/WHEEL +1 -1
- meridian/__init__.py +1 -1
- meridian/analysis/analyzer.py +303 -207
- meridian/analysis/optimizer.py +431 -82
- meridian/analysis/summarizer.py +25 -7
- meridian/analysis/test_utils.py +81 -81
- meridian/analysis/visualizer.py +81 -39
- meridian/constants.py +111 -26
- meridian/data/input_data.py +115 -19
- meridian/data/test_utils.py +116 -5
- meridian/data/time_coordinates.py +3 -3
- meridian/model/media.py +133 -98
- meridian/model/model.py +457 -52
- meridian/model/model_test_data.py +11 -0
- meridian/model/posterior_sampler.py +120 -43
- meridian/model/prior_distribution.py +95 -29
- meridian/model/prior_sampler.py +179 -209
- meridian/model/spec.py +196 -36
- meridian/model/transformers.py +15 -3
- google_meridian-1.0.8.dist-info/RECORD +0 -41
- {google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.0.8.dist-info → google_meridian-1.1.0.dist-info}/top_level.txt +0 -0
meridian/analysis/optimizer.py
CHANGED
|
@@ -28,6 +28,7 @@ from meridian import constants as c
|
|
|
28
28
|
from meridian.analysis import analyzer
|
|
29
29
|
from meridian.analysis import formatter
|
|
30
30
|
from meridian.analysis import summary_text
|
|
31
|
+
from meridian.data import time_coordinates as tc
|
|
31
32
|
from meridian.model import model
|
|
32
33
|
import numpy as np
|
|
33
34
|
import pandas as pd
|
|
@@ -97,6 +98,8 @@ class OptimizationGrid:
|
|
|
97
98
|
use_kpi: Whether using generic KPI or revenue.
|
|
98
99
|
use_posterior: Whether posterior distributions were used, or prior.
|
|
99
100
|
use_optimal_frequency: Whether optimal frequency was used.
|
|
101
|
+
start_date: The start date of the optimization period.
|
|
102
|
+
end_date: The end date of the optimization period.
|
|
100
103
|
gtol: Float indicating the acceptable relative error for the budget used in
|
|
101
104
|
the grid setup. The budget is rounded by `10*n`, where `n` is the smallest
|
|
102
105
|
integer such that `(budget - rounded_budget)` is less than or equal to
|
|
@@ -116,10 +119,12 @@ class OptimizationGrid:
|
|
|
116
119
|
use_kpi: bool
|
|
117
120
|
use_posterior: bool
|
|
118
121
|
use_optimal_frequency: bool
|
|
122
|
+
start_date: tc.Date
|
|
123
|
+
end_date: tc.Date
|
|
119
124
|
gtol: float
|
|
120
125
|
round_factor: int
|
|
121
126
|
optimal_frequency: np.ndarray | None
|
|
122
|
-
selected_times:
|
|
127
|
+
selected_times: Sequence[str] | None
|
|
123
128
|
|
|
124
129
|
@property
|
|
125
130
|
def grid_dataset(self) -> xr.Dataset:
|
|
@@ -189,7 +194,7 @@ class OptimizationGrid:
|
|
|
189
194
|
variables:
|
|
190
195
|
* `optimized`: media spend that maximizes incremental outcome based
|
|
191
196
|
on spend constraints for all media and RF channels.
|
|
192
|
-
* `non_optimized`:
|
|
197
|
+
* `non_optimized`: rounded channel-level spend.
|
|
193
198
|
|
|
194
199
|
Raises:
|
|
195
200
|
A warning if the budget's rounding should be different from the grid's
|
|
@@ -226,7 +231,7 @@ class OptimizationGrid:
|
|
|
226
231
|
spend_constraint_upper=spend_constraint_upper,
|
|
227
232
|
)
|
|
228
233
|
)
|
|
229
|
-
self.
|
|
234
|
+
self.check_optimization_bounds(
|
|
230
235
|
lower_bound=optimization_lower_bound,
|
|
231
236
|
upper_bound=optimization_upper_bound,
|
|
232
237
|
)
|
|
@@ -235,16 +240,16 @@ class OptimizationGrid:
|
|
|
235
240
|
warnings.warn(
|
|
236
241
|
'Optimization accuracy may suffer owing to budget level differences.'
|
|
237
242
|
' Consider creating a new grid with smaller `gtol` if you intend to'
|
|
238
|
-
|
|
239
|
-
'
|
|
240
|
-
'
|
|
243
|
+
' shrink total budget significantly across optimization runs.'
|
|
244
|
+
' It is only a problem when you use a much smaller budget, '
|
|
245
|
+
' for which the intended step size is smaller. '
|
|
241
246
|
)
|
|
242
247
|
(spend_grid, incremental_outcome_grid) = self._trim_grid(
|
|
243
248
|
spend_bound_lower=optimization_lower_bound,
|
|
244
249
|
spend_bound_upper=optimization_upper_bound,
|
|
245
250
|
)
|
|
251
|
+
rounded_spend = np.round(spend, self.round_factor).astype(int)
|
|
246
252
|
if isinstance(scenario, FixedBudgetScenario):
|
|
247
|
-
rounded_spend = np.round(spend, self.round_factor)
|
|
248
253
|
scenario = dataclasses.replace(
|
|
249
254
|
scenario, total_budget=np.sum(rounded_spend)
|
|
250
255
|
)
|
|
@@ -258,7 +263,7 @@ class OptimizationGrid:
|
|
|
258
263
|
coords={c.CHANNEL: self.channels},
|
|
259
264
|
data_vars={
|
|
260
265
|
c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
|
|
261
|
-
c.NON_OPTIMIZED: ([c.CHANNEL],
|
|
266
|
+
c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
|
|
262
267
|
},
|
|
263
268
|
)
|
|
264
269
|
|
|
@@ -344,8 +349,10 @@ class OptimizationGrid:
|
|
|
344
349
|
grid coverage and they are rounded using this grid's round factor.
|
|
345
350
|
|
|
346
351
|
Args:
|
|
347
|
-
spend_bound_lower: The lower bound of spend for each channel.
|
|
348
|
-
|
|
352
|
+
spend_bound_lower: The lower bound of spend for each channel. Must be in
|
|
353
|
+
the same order as `self.channels`.
|
|
354
|
+
spend_bound_upper: The upper bound of spend for each channel. Must be in
|
|
355
|
+
the same order as `self.channels`.
|
|
349
356
|
|
|
350
357
|
Returns:
|
|
351
358
|
updated_spend: The updated spend grid with valid spend values moved up to
|
|
@@ -382,7 +389,7 @@ class OptimizationGrid:
|
|
|
382
389
|
|
|
383
390
|
return (updated_spend, updated_incremental_outcome)
|
|
384
391
|
|
|
385
|
-
def
|
|
392
|
+
def check_optimization_bounds(
|
|
386
393
|
self,
|
|
387
394
|
lower_bound: np.ndarray,
|
|
388
395
|
upper_bound: np.ndarray,
|
|
@@ -391,9 +398,9 @@ class OptimizationGrid:
|
|
|
391
398
|
|
|
392
399
|
Args:
|
|
393
400
|
lower_bound: `np.ndarray` of shape `(n_channels,)` containing the lower
|
|
394
|
-
bound for each channel.
|
|
401
|
+
bound for each channel. Must be in the same order as `self.channels`.
|
|
395
402
|
upper_bound: `np.ndarray` of shape `(n_channels,)` containing the upper
|
|
396
|
-
bound for each channel.
|
|
403
|
+
bound for each channel. Must be in the same order as `self.channels`.
|
|
397
404
|
|
|
398
405
|
Raises:
|
|
399
406
|
ValueError: If the spend grid does not fit within the optimization bounds.
|
|
@@ -621,7 +628,7 @@ class OptimizationResults:
|
|
|
621
628
|
# by adjusting the domain of the y-axis so that the incremental outcome does
|
|
622
629
|
# not start at 0. Calculate the total decrease in incremental outcome to pad
|
|
623
630
|
# the y-axis from the non-optimized total incremental outcome value.
|
|
624
|
-
sum_decr =
|
|
631
|
+
sum_decr = df[df.incremental_outcome < 0].incremental_outcome.sum()
|
|
625
632
|
y_padding = float(f'1e{int(math.log10(-sum_decr))}') if sum_decr < 0 else 2
|
|
626
633
|
domain_scale = [
|
|
627
634
|
self.nonoptimized_data.total_incremental_outcome + sum_decr - y_padding,
|
|
@@ -1016,8 +1023,16 @@ class OptimizationResults:
|
|
|
1016
1023
|
|
|
1017
1024
|
def _gen_optimization_summary(self) -> str:
|
|
1018
1025
|
"""Generates HTML optimization summary output (as sanitized content str)."""
|
|
1019
|
-
|
|
1020
|
-
self.template_env.globals[c.
|
|
1026
|
+
start_date = tc.normalize_date(self.optimized_data.start_date)
|
|
1027
|
+
self.template_env.globals[c.START_DATE] = start_date.strftime(
|
|
1028
|
+
f'%b {start_date.day}, %Y'
|
|
1029
|
+
)
|
|
1030
|
+
interval_days = self.meridian.input_data.time_coordinates.interval_days
|
|
1031
|
+
end_date = tc.normalize_date(self.optimized_data.end_date)
|
|
1032
|
+
end_date_adjusted = end_date + pd.Timedelta(days=interval_days)
|
|
1033
|
+
self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
|
|
1034
|
+
f'%b {end_date_adjusted.day}, %Y'
|
|
1035
|
+
)
|
|
1021
1036
|
|
|
1022
1037
|
html_template = self.template_env.get_template('summary.html.jinja')
|
|
1023
1038
|
return html_template.render(
|
|
@@ -1129,21 +1144,26 @@ class OptimizationResults:
|
|
|
1129
1144
|
- self.nonoptimized_data.total_incremental_outcome
|
|
1130
1145
|
)
|
|
1131
1146
|
inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
|
|
1147
|
+
currency = '$' if outcome == c.REVENUE else ''
|
|
1132
1148
|
non_optimized_inc_outcome = formatter.StatsSpec(
|
|
1133
1149
|
title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
|
|
1134
1150
|
outcome=outcome
|
|
1135
1151
|
),
|
|
1136
|
-
stat=formatter.
|
|
1137
|
-
self.nonoptimized_data.total_incremental_outcome,
|
|
1152
|
+
stat=formatter.compact_number(
|
|
1153
|
+
n=self.nonoptimized_data.total_incremental_outcome,
|
|
1154
|
+
precision=0,
|
|
1155
|
+
currency=currency,
|
|
1138
1156
|
),
|
|
1139
1157
|
)
|
|
1140
1158
|
optimized_inc_outcome = formatter.StatsSpec(
|
|
1141
1159
|
title=summary_text.OPTIMIZED_INC_OUTCOME_LABEL.format(outcome=outcome),
|
|
1142
|
-
stat=formatter.
|
|
1143
|
-
self.optimized_data.total_incremental_outcome,
|
|
1160
|
+
stat=formatter.compact_number(
|
|
1161
|
+
n=self.optimized_data.total_incremental_outcome,
|
|
1162
|
+
precision=0,
|
|
1163
|
+
currency=currency,
|
|
1144
1164
|
),
|
|
1145
1165
|
delta=inc_outcome_prefix
|
|
1146
|
-
+ formatter.
|
|
1166
|
+
+ formatter.compact_number(inc_outcome_diff, 0, currency),
|
|
1147
1167
|
)
|
|
1148
1168
|
return [
|
|
1149
1169
|
non_optimized_budget,
|
|
@@ -1265,8 +1285,12 @@ class BudgetOptimizer:
|
|
|
1265
1285
|
|
|
1266
1286
|
def optimize(
|
|
1267
1287
|
self,
|
|
1288
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1268
1289
|
use_posterior: bool = True,
|
|
1290
|
+
# TODO: b/409550413 - Remove this argument.
|
|
1269
1291
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1292
|
+
start_date: tc.Date = None,
|
|
1293
|
+
end_date: tc.Date = None,
|
|
1270
1294
|
fixed_budget: bool = True,
|
|
1271
1295
|
budget: float | None = None,
|
|
1272
1296
|
pct_of_spend: Sequence[float] | None = None,
|
|
@@ -1279,23 +1303,61 @@ class BudgetOptimizer:
|
|
|
1279
1303
|
use_kpi: bool = False,
|
|
1280
1304
|
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
|
|
1281
1305
|
batch_size: int = c.DEFAULT_BATCH_SIZE,
|
|
1306
|
+
optimization_grid: OptimizationGrid | None = None,
|
|
1282
1307
|
) -> OptimizationResults:
|
|
1283
1308
|
"""Finds the optimal budget allocation that maximizes outcome.
|
|
1284
1309
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1310
|
+
Optimization depends on the following:
|
|
1311
|
+
1. Flighting pattern (the relative allocation of a channels' media units
|
|
1312
|
+
across geos and time periods, which is held fixed for each channel)
|
|
1313
|
+
2. Cost per media unit (This is assumed to be constant for each channel, and
|
|
1314
|
+
can optionally vary by geo and/or time period)
|
|
1315
|
+
3. `pct_of_spend` (center of the spend box constraint for each channel)
|
|
1316
|
+
4. `budget` (total budget used for fixed budget scenarios)
|
|
1317
|
+
|
|
1318
|
+
By default, these values are assigned based on the historical data. The
|
|
1319
|
+
`pct_of_spend` and `budget` are optimization arguments that can be
|
|
1320
|
+
overridden directly. Passing `new_data.media` (or `new_data.reach` or
|
|
1321
|
+
`new_data.frequency`) will override both the flighting pattern and cost per
|
|
1322
|
+
media unit. Passing `new_data.spend` (or `new_data.rf_spend) will only
|
|
1323
|
+
override the cost per media unit.
|
|
1324
|
+
|
|
1325
|
+
If `new_data` is passed with a different number of time periods than the
|
|
1326
|
+
historical data, then all of the optimization parameters will be inferred
|
|
1327
|
+
from it. Default values for `pct_of_spend` and `budget` (if
|
|
1328
|
+
`fixed_budget=True`) will be inferred from the `new_data`, but can be
|
|
1329
|
+
overridden using the `pct_of_spend` and `budget` arguments.
|
|
1330
|
+
|
|
1331
|
+
If `start_date` or `end_date` is specified, then the default values are
|
|
1332
|
+
inferred based on the subset of time periods specified. Both start and end
|
|
1333
|
+
time selectors should align with the Meridian time dimension coordinates in
|
|
1334
|
+
the underlying model if optimizing the original data. If `new_data` is
|
|
1335
|
+
provided with a different number of time periods than in `InputData`, then
|
|
1336
|
+
the start and end time coordinates must match the time dimensions in
|
|
1337
|
+
`new_data.time`. By default, all times periods are used. Either start or
|
|
1338
|
+
end time component can be `None` to represent the first or the last time
|
|
1339
|
+
coordinate, respectively.
|
|
1288
1340
|
|
|
1289
1341
|
Args:
|
|
1342
|
+
new_data: An optional `DataTensors` container with optional tensors:
|
|
1343
|
+
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
1344
|
+
`revenue_per_kpi`, and `time`. If `None`, the original tensors from the
|
|
1345
|
+
Meridian object are used. If `new_data` is provided, the optimization is
|
|
1346
|
+
run on the versions of the tensors in `new_data` and the original
|
|
1347
|
+
versions of all the remaining tensors. If any of the tensors in
|
|
1348
|
+
`new_data` is provided with a different number of time periods than in
|
|
1349
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
1350
|
+
time periods and the `time` tensor must be provided.
|
|
1290
1351
|
use_posterior: Boolean. If `True`, then the budget is optimized based on
|
|
1291
1352
|
the posterior distribution of the model. Otherwise, the prior
|
|
1292
1353
|
distribution is used.
|
|
1293
|
-
selected_times: Tuple containing the start and end time
|
|
1294
|
-
coordinates for the duration to run the optimization on.
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1354
|
+
selected_times: Deprecated. Tuple containing the start and end time
|
|
1355
|
+
dimension coordinates for the duration to run the optimization on.
|
|
1356
|
+
Please Use `start_date` and `end_date` instead.
|
|
1357
|
+
start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
|
|
1358
|
+
format. Default is `None`, i.e. the first time period.
|
|
1359
|
+
end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
|
|
1360
|
+
Default is `None`, i.e. the last time period.
|
|
1299
1361
|
fixed_budget: Boolean indicating whether it's a fixed budget optimization
|
|
1300
1362
|
or flexible budget optimization. Defaults to `True`. If `False`, must
|
|
1301
1363
|
specify either `target_roi` or `target_mroi`.
|
|
@@ -1347,11 +1409,27 @@ class BudgetOptimizer:
|
|
|
1347
1409
|
in batches to avoid memory exhaustion. If a memory error occurs, try
|
|
1348
1410
|
reducing `batch_size`. The calculation will generally be faster with
|
|
1349
1411
|
larger `batch_size` values.
|
|
1412
|
+
optimization_grid: An `OptimizationGrid` object containing the grid
|
|
1413
|
+
information. Grid creating is a time consuming part of optimization.
|
|
1414
|
+
Creating one grid and running various optimizations on it can save time.
|
|
1415
|
+
If `None` or grid doesn't match the optimization arguments, a new grid
|
|
1416
|
+
will be created.
|
|
1350
1417
|
|
|
1351
1418
|
Returns:
|
|
1352
1419
|
An `OptimizationResults` object containing optimized budget allocation
|
|
1353
1420
|
datasets, along with some of the intermediate values used to derive them.
|
|
1354
1421
|
"""
|
|
1422
|
+
if selected_times is not None:
|
|
1423
|
+
warnings.warn(
|
|
1424
|
+
'`selected_times` is deprecated. Please use `start_date` and'
|
|
1425
|
+
' `end_date` instead.',
|
|
1426
|
+
DeprecationWarning,
|
|
1427
|
+
stacklevel=2,
|
|
1428
|
+
)
|
|
1429
|
+
deprecated_start_date, deprecated_end_date = selected_times
|
|
1430
|
+
start_date = start_date or deprecated_start_date
|
|
1431
|
+
end_date = end_date or deprecated_end_date
|
|
1432
|
+
|
|
1355
1433
|
_validate_budget(
|
|
1356
1434
|
fixed_budget=fixed_budget,
|
|
1357
1435
|
budget=budget,
|
|
@@ -1367,18 +1445,36 @@ class BudgetOptimizer:
|
|
|
1367
1445
|
spend_constraint_lower = spend_constraint_default
|
|
1368
1446
|
if spend_constraint_upper is None:
|
|
1369
1447
|
spend_constraint_upper = spend_constraint_default
|
|
1370
|
-
|
|
1371
|
-
|
|
1448
|
+
use_grid_arg = optimization_grid is not None and self._validate_grid(
|
|
1449
|
+
new_data=new_data,
|
|
1450
|
+
use_posterior=use_posterior,
|
|
1451
|
+
start_date=start_date,
|
|
1452
|
+
end_date=end_date,
|
|
1372
1453
|
budget=budget,
|
|
1373
1454
|
pct_of_spend=pct_of_spend,
|
|
1374
1455
|
spend_constraint_lower=spend_constraint_lower,
|
|
1375
1456
|
spend_constraint_upper=spend_constraint_upper,
|
|
1376
1457
|
gtol=gtol,
|
|
1377
|
-
use_posterior=use_posterior,
|
|
1378
|
-
use_kpi=use_kpi,
|
|
1379
1458
|
use_optimal_frequency=use_optimal_frequency,
|
|
1380
|
-
|
|
1459
|
+
use_kpi=use_kpi,
|
|
1460
|
+
optimization_grid=optimization_grid,
|
|
1381
1461
|
)
|
|
1462
|
+
if optimization_grid is None or not use_grid_arg:
|
|
1463
|
+
optimization_grid = self.create_optimization_grid(
|
|
1464
|
+
new_data=new_data,
|
|
1465
|
+
start_date=start_date,
|
|
1466
|
+
end_date=end_date,
|
|
1467
|
+
budget=budget,
|
|
1468
|
+
pct_of_spend=pct_of_spend,
|
|
1469
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1470
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1471
|
+
gtol=gtol,
|
|
1472
|
+
use_posterior=use_posterior,
|
|
1473
|
+
use_kpi=use_kpi,
|
|
1474
|
+
use_optimal_frequency=use_optimal_frequency,
|
|
1475
|
+
batch_size=batch_size,
|
|
1476
|
+
)
|
|
1477
|
+
|
|
1382
1478
|
if fixed_budget:
|
|
1383
1479
|
scenario = FixedBudgetScenario(total_budget=budget)
|
|
1384
1480
|
elif target_roi:
|
|
@@ -1399,24 +1495,24 @@ class BudgetOptimizer:
|
|
|
1399
1495
|
use_historical_budget = budget is None or np.isclose(
|
|
1400
1496
|
budget, np.sum(optimization_grid.historical_spend)
|
|
1401
1497
|
)
|
|
1402
|
-
|
|
1403
|
-
spend.non_optimized, optimization_grid.round_factor
|
|
1404
|
-
).astype(int)
|
|
1498
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1405
1499
|
nonoptimized_data = self._create_budget_dataset(
|
|
1500
|
+
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1406
1501
|
use_posterior=use_posterior,
|
|
1407
1502
|
use_kpi=use_kpi,
|
|
1408
1503
|
hist_spend=optimization_grid.historical_spend,
|
|
1409
|
-
spend=
|
|
1504
|
+
spend=spend.non_optimized,
|
|
1410
1505
|
selected_times=optimization_grid.selected_times,
|
|
1411
1506
|
confidence_level=confidence_level,
|
|
1412
1507
|
batch_size=batch_size,
|
|
1413
1508
|
use_historical_budget=use_historical_budget,
|
|
1414
1509
|
)
|
|
1415
1510
|
nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
|
|
1511
|
+
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1416
1512
|
use_posterior=use_posterior,
|
|
1417
1513
|
use_kpi=use_kpi,
|
|
1418
1514
|
hist_spend=optimization_grid.historical_spend,
|
|
1419
|
-
spend=
|
|
1515
|
+
spend=spend.non_optimized,
|
|
1420
1516
|
selected_times=optimization_grid.selected_times,
|
|
1421
1517
|
optimal_frequency=optimization_grid.optimal_frequency,
|
|
1422
1518
|
confidence_level=confidence_level,
|
|
@@ -1431,6 +1527,7 @@ class BudgetOptimizer:
|
|
|
1431
1527
|
elif target_mroi:
|
|
1432
1528
|
constraints[c.TARGET_MROI] = target_mroi
|
|
1433
1529
|
optimized_data = self._create_budget_dataset(
|
|
1530
|
+
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1434
1531
|
use_posterior=use_posterior,
|
|
1435
1532
|
use_kpi=use_kpi,
|
|
1436
1533
|
hist_spend=optimization_grid.historical_spend,
|
|
@@ -1474,10 +1571,139 @@ class BudgetOptimizer:
|
|
|
1474
1571
|
_optimization_grid=optimization_grid,
|
|
1475
1572
|
)
|
|
1476
1573
|
|
|
1574
|
+
def _validate_grid(
|
|
1575
|
+
self,
|
|
1576
|
+
new_data: analyzer.DataTensors | None,
|
|
1577
|
+
use_posterior: bool,
|
|
1578
|
+
start_date: tc.Date,
|
|
1579
|
+
end_date: tc.Date,
|
|
1580
|
+
budget: float | None,
|
|
1581
|
+
pct_of_spend: Sequence[float] | None,
|
|
1582
|
+
spend_constraint_lower: _SpendConstraint,
|
|
1583
|
+
spend_constraint_upper: _SpendConstraint,
|
|
1584
|
+
gtol: float,
|
|
1585
|
+
use_optimal_frequency: bool,
|
|
1586
|
+
use_kpi: bool,
|
|
1587
|
+
optimization_grid: OptimizationGrid,
|
|
1588
|
+
) -> bool:
|
|
1589
|
+
"""Checks if the grid is valid for the optimization scenario."""
|
|
1590
|
+
|
|
1591
|
+
if use_posterior != optimization_grid.use_posterior:
|
|
1592
|
+
warnings.warn(
|
|
1593
|
+
'Given optimization grid was created with `use_posterior` ='
|
|
1594
|
+
f' {optimization_grid.use_posterior}, but optimization was called'
|
|
1595
|
+
f' with `use_posterior` = {use_posterior}. A new grid will be'
|
|
1596
|
+
' created.'
|
|
1597
|
+
)
|
|
1598
|
+
return False
|
|
1599
|
+
|
|
1600
|
+
if use_kpi != optimization_grid.use_kpi:
|
|
1601
|
+
warnings.warn(
|
|
1602
|
+
'Given optimization grid was created with `use_kpi` ='
|
|
1603
|
+
f' {optimization_grid.use_kpi}, but optimization was called'
|
|
1604
|
+
f' with `use_kpi` = {use_kpi}. A new grid will be'
|
|
1605
|
+
' created.'
|
|
1606
|
+
)
|
|
1607
|
+
return False
|
|
1608
|
+
|
|
1609
|
+
if use_optimal_frequency != optimization_grid.use_optimal_frequency:
|
|
1610
|
+
warnings.warn(
|
|
1611
|
+
'Given optimization grid was created with `use_optimal_frequency` ='
|
|
1612
|
+
f' {optimization_grid.use_optimal_frequency}, but optimization was'
|
|
1613
|
+
f' called with `use_optimal_frequency` = {use_optimal_frequency}. A'
|
|
1614
|
+
' new grid will be created.'
|
|
1615
|
+
)
|
|
1616
|
+
return False
|
|
1617
|
+
|
|
1618
|
+
if (
|
|
1619
|
+
start_date != optimization_grid.start_date
|
|
1620
|
+
or end_date != optimization_grid.end_date
|
|
1621
|
+
):
|
|
1622
|
+
warnings.warn(
|
|
1623
|
+
'Given optimization grid was created with `start_date` ='
|
|
1624
|
+
f' {optimization_grid.start_date} and `end_date` ='
|
|
1625
|
+
f' {optimization_grid.end_date}, but optimization was called with'
|
|
1626
|
+
f' `start_date` = {start_date} and `end_date` = {end_date}. A new'
|
|
1627
|
+
' grid will be created.'
|
|
1628
|
+
)
|
|
1629
|
+
return False
|
|
1630
|
+
|
|
1631
|
+
if new_data is None:
|
|
1632
|
+
new_data = analyzer.DataTensors()
|
|
1633
|
+
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1634
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1635
|
+
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1636
|
+
)
|
|
1637
|
+
paid_channels = self._meridian.input_data.get_all_paid_channels()
|
|
1638
|
+
if not np.array_equal(paid_channels, optimization_grid.channels):
|
|
1639
|
+
warnings.warn(
|
|
1640
|
+
'Given optimization grid was created with `channels` ='
|
|
1641
|
+
f' {optimization_grid.channels}, but optimization request was'
|
|
1642
|
+
f' resolved with `channels` = {paid_channels}. A new grid will be'
|
|
1643
|
+
' created.'
|
|
1644
|
+
)
|
|
1645
|
+
return False
|
|
1646
|
+
|
|
1647
|
+
n_channels = len(optimization_grid.channels)
|
|
1648
|
+
selected_times = self._validate_selected_times(
|
|
1649
|
+
start_date=start_date,
|
|
1650
|
+
end_date=end_date,
|
|
1651
|
+
new_data=new_data,
|
|
1652
|
+
)
|
|
1653
|
+
hist_spend = self._analyzer.get_aggregated_spend(
|
|
1654
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
|
|
1655
|
+
selected_times=selected_times,
|
|
1656
|
+
include_media=self._meridian.n_media_channels > 0,
|
|
1657
|
+
include_rf=self._meridian.n_rf_channels > 0,
|
|
1658
|
+
).data
|
|
1659
|
+
budget = budget or np.sum(hist_spend)
|
|
1660
|
+
valid_pct_of_spend = _validate_pct_of_spend(
|
|
1661
|
+
n_channels=n_channels,
|
|
1662
|
+
hist_spend=hist_spend,
|
|
1663
|
+
pct_of_spend=pct_of_spend,
|
|
1664
|
+
)
|
|
1665
|
+
spend = budget * valid_pct_of_spend
|
|
1666
|
+
(optimization_lower_bound, optimization_upper_bound) = (
|
|
1667
|
+
_get_optimization_bounds(
|
|
1668
|
+
n_channels=n_channels,
|
|
1669
|
+
spend=spend,
|
|
1670
|
+
round_factor=optimization_grid.round_factor,
|
|
1671
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1672
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1673
|
+
)
|
|
1674
|
+
)
|
|
1675
|
+
try:
|
|
1676
|
+
optimization_grid.check_optimization_bounds(
|
|
1677
|
+
lower_bound=optimization_lower_bound,
|
|
1678
|
+
upper_bound=optimization_upper_bound,
|
|
1679
|
+
)
|
|
1680
|
+
except ValueError as e:
|
|
1681
|
+
warnings.warn(
|
|
1682
|
+
'Optimization called with bounds that are not within the grid. A new'
|
|
1683
|
+
f' grid will be created. Error message: {str(e)}'
|
|
1684
|
+
)
|
|
1685
|
+
return False
|
|
1686
|
+
|
|
1687
|
+
round_factor = _get_round_factor(budget, gtol)
|
|
1688
|
+
if round_factor != optimization_grid.round_factor:
|
|
1689
|
+
warnings.warn(
|
|
1690
|
+
'Optimization accuracy may suffer owing to budget level differences.'
|
|
1691
|
+
' Consider creating a new grid with smaller `gtol` if you intend to'
|
|
1692
|
+
' shrink total budget significantly across optimization runs.'
|
|
1693
|
+
' It is only a problem when you use a much smaller budget, '
|
|
1694
|
+
' for which the intended step size is smaller.'
|
|
1695
|
+
)
|
|
1696
|
+
|
|
1697
|
+
return True
|
|
1698
|
+
|
|
1477
1699
|
def create_optimization_grid(
|
|
1478
1700
|
self,
|
|
1701
|
+
new_data: xr.Dataset | None = None,
|
|
1479
1702
|
use_posterior: bool = True,
|
|
1703
|
+
# TODO: b/409550413 - Remove this argument.
|
|
1480
1704
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1705
|
+
start_date: tc.Date = None,
|
|
1706
|
+
end_date: tc.Date = None,
|
|
1481
1707
|
budget: float | None = None,
|
|
1482
1708
|
pct_of_spend: Sequence[float] | None = None,
|
|
1483
1709
|
spend_constraint_lower: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
|
|
@@ -1489,16 +1715,35 @@ class BudgetOptimizer:
|
|
|
1489
1715
|
) -> OptimizationGrid:
|
|
1490
1716
|
"""Creates a OptimizationGrid for optimization.
|
|
1491
1717
|
|
|
1718
|
+
If `start_date` or `end_date` is specified, then the default values are
|
|
1719
|
+
inferred based on the subset of time periods specified. Both start and end
|
|
1720
|
+
time selectors should align with the Meridian time dimension coordinates in
|
|
1721
|
+
the underlying model if optimizing the original data. If `new_data` is
|
|
1722
|
+
provided with a different number of time periods than in `InputData`, then
|
|
1723
|
+
the start and end time coordinates must match the time dimensions in
|
|
1724
|
+
`new_data.time`. By default, all times periods are used. Either start or
|
|
1725
|
+
end time component can be `None` to represent the first or the last time
|
|
1726
|
+
coordinate, respectively.
|
|
1727
|
+
|
|
1492
1728
|
Args:
|
|
1729
|
+
new_data: An optional `DataTensors` container with optional tensors:
|
|
1730
|
+
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
1731
|
+
`revenue_per_kpi`, and `time`. If `None`, the original tensors from the
|
|
1732
|
+
Meridian object are used. If `new_data` is provided, the grid is created
|
|
1733
|
+
using the versions of the tensors in `new_data` and the original
|
|
1734
|
+
versions of all the remaining tensors. If any of the tensors in
|
|
1735
|
+
`new_data` is provided with a different number of time periods than in
|
|
1736
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
1737
|
+
time periods and the `time` tensor must be provided.
|
|
1493
1738
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1494
1739
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1495
1740
|
distribution is used.
|
|
1496
|
-
selected_times: Tuple containing the start and end time
|
|
1497
|
-
coordinates
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1741
|
+
selected_times: Deprecated. Tuple containing the start and end time
|
|
1742
|
+
dimension coordinates. Please Use `start_date` and `end_date` instead.
|
|
1743
|
+
start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
|
|
1744
|
+
format. Default is `None`, i.e. the first time period.
|
|
1745
|
+
end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
|
|
1746
|
+
Default is `None`, i.e. the last time period.
|
|
1502
1747
|
budget: Number indicating the total budget for the fixed budget scenario.
|
|
1503
1748
|
Defaults to the historical budget.
|
|
1504
1749
|
pct_of_spend: Numeric list of size `n_paid_channels` containing the
|
|
@@ -1545,16 +1790,32 @@ class BudgetOptimizer:
|
|
|
1545
1790
|
An OptimizationGrid object containing the grid data for optimization.
|
|
1546
1791
|
"""
|
|
1547
1792
|
self._validate_model_fit(use_posterior)
|
|
1793
|
+
if new_data is None:
|
|
1794
|
+
new_data = analyzer.DataTensors()
|
|
1795
|
+
|
|
1548
1796
|
if selected_times is not None:
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1797
|
+
warnings.warn(
|
|
1798
|
+
'`selected_times` is deprecated. Please use `start_date` and'
|
|
1799
|
+
' `end_date` instead.',
|
|
1800
|
+
DeprecationWarning,
|
|
1801
|
+
stacklevel=2,
|
|
1553
1802
|
)
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1803
|
+
deprecated_start_date, deprecated_end_date = selected_times
|
|
1804
|
+
start_date = start_date or deprecated_start_date
|
|
1805
|
+
end_date = end_date or deprecated_end_date
|
|
1806
|
+
|
|
1807
|
+
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1808
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1809
|
+
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1810
|
+
)
|
|
1811
|
+
selected_times = self._validate_selected_times(
|
|
1812
|
+
start_date=start_date,
|
|
1813
|
+
end_date=end_date,
|
|
1814
|
+
new_data=filled_data,
|
|
1815
|
+
)
|
|
1816
|
+
hist_spend = self._analyzer.get_aggregated_spend(
|
|
1817
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
|
|
1818
|
+
selected_times=selected_times,
|
|
1558
1819
|
include_media=self._meridian.n_media_channels > 0,
|
|
1559
1820
|
include_rf=self._meridian.n_rf_channels > 0,
|
|
1560
1821
|
).data
|
|
@@ -1579,8 +1840,9 @@ class BudgetOptimizer:
|
|
|
1579
1840
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
1580
1841
|
optimal_frequency = tf.convert_to_tensor(
|
|
1581
1842
|
self._analyzer.optimal_freq(
|
|
1843
|
+
new_data=filled_data.filter_fields(c.RF_DATA),
|
|
1582
1844
|
use_posterior=use_posterior,
|
|
1583
|
-
selected_times=
|
|
1845
|
+
selected_times=selected_times,
|
|
1584
1846
|
use_kpi=use_kpi,
|
|
1585
1847
|
).optimal_frequency,
|
|
1586
1848
|
dtype=tf.float32,
|
|
@@ -1594,7 +1856,8 @@ class BudgetOptimizer:
|
|
|
1594
1856
|
spend_bound_lower=optimization_lower_bound,
|
|
1595
1857
|
spend_bound_upper=optimization_upper_bound,
|
|
1596
1858
|
step_size=step_size,
|
|
1597
|
-
selected_times=
|
|
1859
|
+
selected_times=selected_times,
|
|
1860
|
+
new_data=filled_data.filter_fields(c.PAID_DATA),
|
|
1598
1861
|
use_posterior=use_posterior,
|
|
1599
1862
|
use_kpi=use_kpi,
|
|
1600
1863
|
optimal_frequency=optimal_frequency,
|
|
@@ -1612,10 +1875,12 @@ class BudgetOptimizer:
|
|
|
1612
1875
|
use_kpi=use_kpi,
|
|
1613
1876
|
use_posterior=use_posterior,
|
|
1614
1877
|
use_optimal_frequency=use_optimal_frequency,
|
|
1878
|
+
start_date=start_date,
|
|
1879
|
+
end_date=end_date,
|
|
1615
1880
|
gtol=gtol,
|
|
1616
1881
|
round_factor=round_factor,
|
|
1617
1882
|
optimal_frequency=optimal_frequency,
|
|
1618
|
-
selected_times=
|
|
1883
|
+
selected_times=selected_times,
|
|
1619
1884
|
)
|
|
1620
1885
|
|
|
1621
1886
|
def _create_grid_dataset(
|
|
@@ -1658,10 +1923,38 @@ class BudgetOptimizer:
|
|
|
1658
1923
|
attrs={c.SPEND_STEP_SIZE: spend_step_size},
|
|
1659
1924
|
)
|
|
1660
1925
|
|
|
1926
|
+
def _validate_selected_times(
|
|
1927
|
+
self,
|
|
1928
|
+
start_date: tc.Date,
|
|
1929
|
+
end_date: tc.Date,
|
|
1930
|
+
new_data: analyzer.DataTensors | None,
|
|
1931
|
+
) -> Sequence[str] | Sequence[bool] | None:
|
|
1932
|
+
"""Validates and returns the selected times."""
|
|
1933
|
+
if start_date is None and end_date is None:
|
|
1934
|
+
return None
|
|
1935
|
+
|
|
1936
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1937
|
+
if new_data.get_modified_times(self._meridian) is None:
|
|
1938
|
+
return self._meridian.expand_selected_time_dims(
|
|
1939
|
+
start_date=start_date,
|
|
1940
|
+
end_date=end_date,
|
|
1941
|
+
)
|
|
1942
|
+
else:
|
|
1943
|
+
assert new_data.time is not None
|
|
1944
|
+
new_times_str = new_data.time.numpy().astype(str).tolist()
|
|
1945
|
+
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
1946
|
+
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
1947
|
+
start_date=start_date,
|
|
1948
|
+
end_date=end_date,
|
|
1949
|
+
)
|
|
1950
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
1951
|
+
return [x in expanded_str for x in new_times_str]
|
|
1952
|
+
|
|
1661
1953
|
def _get_incremental_outcome_tensors(
|
|
1662
1954
|
self,
|
|
1663
1955
|
hist_spend: np.ndarray,
|
|
1664
1956
|
spend: np.ndarray,
|
|
1957
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1665
1958
|
optimal_frequency: Sequence[float] | None = None,
|
|
1666
1959
|
) -> tuple[
|
|
1667
1960
|
tf.Tensor | None,
|
|
@@ -1686,6 +1979,11 @@ class BudgetOptimizer:
|
|
|
1686
1979
|
Args:
|
|
1687
1980
|
hist_spend: historical spend data.
|
|
1688
1981
|
spend: new optimized spend data.
|
|
1982
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
1983
|
+
`reach`, and `frequency` tensors. If `None`, the existing tensors from
|
|
1984
|
+
the Meridian object are used. If any of the tensors is provided with a
|
|
1985
|
+
different number of time periods than in `InputData`, then all tensors
|
|
1986
|
+
must be provided with the same number of time periods.
|
|
1689
1987
|
optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
|
|
1690
1988
|
the optimal frequency per channel, that maximizes posterior mean roi.
|
|
1691
1989
|
Value is `None` if the model does not contain reach and frequency data,
|
|
@@ -1696,13 +1994,18 @@ class BudgetOptimizer:
|
|
|
1696
1994
|
Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
|
|
1697
1995
|
new_rf_spend).
|
|
1698
1996
|
"""
|
|
1997
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1998
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1999
|
+
c.PAID_CHANNELS,
|
|
2000
|
+
self._meridian,
|
|
2001
|
+
)
|
|
1699
2002
|
if self._meridian.n_media_channels > 0:
|
|
1700
2003
|
new_media = (
|
|
1701
2004
|
tf.math.divide_no_nan(
|
|
1702
2005
|
spend[: self._meridian.n_media_channels],
|
|
1703
2006
|
hist_spend[: self._meridian.n_media_channels],
|
|
1704
2007
|
)
|
|
1705
|
-
*
|
|
2008
|
+
* filled_data.media
|
|
1706
2009
|
)
|
|
1707
2010
|
new_media_spend = tf.convert_to_tensor(
|
|
1708
2011
|
spend[: self._meridian.n_media_channels]
|
|
@@ -1711,9 +2014,7 @@ class BudgetOptimizer:
|
|
|
1711
2014
|
new_media = None
|
|
1712
2015
|
new_media_spend = None
|
|
1713
2016
|
if self._meridian.n_rf_channels > 0:
|
|
1714
|
-
rf_media =
|
|
1715
|
-
self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
|
|
1716
|
-
)
|
|
2017
|
+
rf_media = filled_data.reach * filled_data.frequency
|
|
1717
2018
|
new_rf_media = (
|
|
1718
2019
|
tf.math.divide_no_nan(
|
|
1719
2020
|
spend[-self._meridian.n_rf_channels :],
|
|
@@ -1722,7 +2023,7 @@ class BudgetOptimizer:
|
|
|
1722
2023
|
* rf_media
|
|
1723
2024
|
)
|
|
1724
2025
|
frequency = (
|
|
1725
|
-
|
|
2026
|
+
filled_data.frequency
|
|
1726
2027
|
if optimal_frequency is None
|
|
1727
2028
|
else optimal_frequency
|
|
1728
2029
|
)
|
|
@@ -1742,9 +2043,10 @@ class BudgetOptimizer:
|
|
|
1742
2043
|
self,
|
|
1743
2044
|
hist_spend: np.ndarray,
|
|
1744
2045
|
spend: np.ndarray,
|
|
2046
|
+
new_data: analyzer.DataTensors | None = None,
|
|
1745
2047
|
use_posterior: bool = True,
|
|
1746
2048
|
use_kpi: bool = False,
|
|
1747
|
-
selected_times: Sequence[str] | None = None,
|
|
2049
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1748
2050
|
optimal_frequency: Sequence[float] | None = None,
|
|
1749
2051
|
attrs: Mapping[str, Any] | None = None,
|
|
1750
2052
|
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -1752,15 +2054,22 @@ class BudgetOptimizer:
|
|
|
1752
2054
|
use_historical_budget: bool = True,
|
|
1753
2055
|
) -> xr.Dataset:
|
|
1754
2056
|
"""Creates the budget dataset."""
|
|
2057
|
+
new_data = new_data or analyzer.DataTensors()
|
|
2058
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
2059
|
+
c.PAID_DATA + (c.TIME,),
|
|
2060
|
+
self._meridian,
|
|
2061
|
+
)
|
|
1755
2062
|
spend = tf.convert_to_tensor(spend, dtype=tf.float32)
|
|
1756
2063
|
hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
|
|
1757
2064
|
(new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
|
|
1758
2065
|
self._get_incremental_outcome_tensors(
|
|
1759
|
-
hist_spend,
|
|
2066
|
+
hist_spend,
|
|
2067
|
+
spend,
|
|
2068
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS),
|
|
2069
|
+
optimal_frequency=optimal_frequency,
|
|
1760
2070
|
)
|
|
1761
2071
|
)
|
|
1762
2072
|
budget = np.sum(spend)
|
|
1763
|
-
all_times = self._meridian.input_data.time.values.tolist()
|
|
1764
2073
|
|
|
1765
2074
|
# incremental_outcome here is a tensor with the shape
|
|
1766
2075
|
# (n_chains, n_draws, n_channels)
|
|
@@ -1770,6 +2079,7 @@ class BudgetOptimizer:
|
|
|
1770
2079
|
media=new_media,
|
|
1771
2080
|
reach=new_reach,
|
|
1772
2081
|
frequency=new_frequency,
|
|
2082
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1773
2083
|
),
|
|
1774
2084
|
selected_times=selected_times,
|
|
1775
2085
|
use_kpi=use_kpi,
|
|
@@ -1792,6 +2102,9 @@ class BudgetOptimizer:
|
|
|
1792
2102
|
)
|
|
1793
2103
|
|
|
1794
2104
|
aggregated_impressions = self._analyzer.get_aggregated_impressions(
|
|
2105
|
+
new_data=analyzer.DataTensors(
|
|
2106
|
+
media=new_media, reach=new_reach, frequency=new_frequency
|
|
2107
|
+
),
|
|
1795
2108
|
selected_times=selected_times,
|
|
1796
2109
|
selected_geos=None,
|
|
1797
2110
|
aggregate_times=True,
|
|
@@ -1799,10 +2112,11 @@ class BudgetOptimizer:
|
|
|
1799
2112
|
optimal_frequency=optimal_frequency,
|
|
1800
2113
|
include_non_paid_channels=False,
|
|
1801
2114
|
)
|
|
1802
|
-
effectiveness = incremental_outcome / aggregated_impressions
|
|
1803
2115
|
effectiveness_with_mean_median_and_ci = (
|
|
1804
2116
|
analyzer.get_central_tendency_and_ci(
|
|
1805
|
-
data=
|
|
2117
|
+
data=tf.math.divide_no_nan(
|
|
2118
|
+
incremental_outcome, aggregated_impressions
|
|
2119
|
+
),
|
|
1806
2120
|
confidence_level=confidence_level,
|
|
1807
2121
|
include_median=True,
|
|
1808
2122
|
)
|
|
@@ -1822,6 +2136,7 @@ class BudgetOptimizer:
|
|
|
1822
2136
|
frequency=new_frequency,
|
|
1823
2137
|
media_spend=new_media_spend,
|
|
1824
2138
|
rf_spend=new_rf_spend,
|
|
2139
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1825
2140
|
),
|
|
1826
2141
|
selected_times=selected_times,
|
|
1827
2142
|
batch_size=batch_size,
|
|
@@ -1860,6 +2175,18 @@ class BudgetOptimizer:
|
|
|
1860
2175
|
c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
|
|
1861
2176
|
}
|
|
1862
2177
|
|
|
2178
|
+
all_times = (
|
|
2179
|
+
filled_data.time.numpy().astype(str).tolist()
|
|
2180
|
+
if filled_data.time is not None
|
|
2181
|
+
else self._meridian.input_data.time.values.tolist()
|
|
2182
|
+
)
|
|
2183
|
+
if selected_times is not None and all(
|
|
2184
|
+
isinstance(time, bool) for time in selected_times
|
|
2185
|
+
):
|
|
2186
|
+
selected_times = [
|
|
2187
|
+
time for time, selected in zip(all_times, selected_times) if selected
|
|
2188
|
+
]
|
|
2189
|
+
|
|
1863
2190
|
attributes = {
|
|
1864
2191
|
c.START_DATE: min(selected_times) if selected_times else all_times[0],
|
|
1865
2192
|
c.END_DATE: max(selected_times) if selected_times else all_times[-1],
|
|
@@ -1889,7 +2216,8 @@ class BudgetOptimizer:
|
|
|
1889
2216
|
i: int,
|
|
1890
2217
|
incremental_outcome_grid: np.ndarray,
|
|
1891
2218
|
multipliers_grid: tf.Tensor,
|
|
1892
|
-
|
|
2219
|
+
new_data: analyzer.DataTensors | None = None,
|
|
2220
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1893
2221
|
use_posterior: bool = True,
|
|
1894
2222
|
use_kpi: bool = False,
|
|
1895
2223
|
optimal_frequency: xr.DataArray | None = None,
|
|
@@ -1904,8 +2232,16 @@ class BudgetOptimizer:
|
|
|
1904
2232
|
number of columns is equal to the number of total channels, containing
|
|
1905
2233
|
incremental outcome by channel.
|
|
1906
2234
|
multipliers_grid: A grid derived from spend.
|
|
1907
|
-
|
|
1908
|
-
`
|
|
2235
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
2236
|
+
`reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
|
|
2237
|
+
existing tensors from the Meridian object are used. If any of the
|
|
2238
|
+
tensors is provided with a different number of time periods than in
|
|
2239
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
2240
|
+
time periods.
|
|
2241
|
+
selected_times: Optional list of times to optimize. This can either be a
|
|
2242
|
+
string list containing a subset of time dimension coordinates from
|
|
2243
|
+
`InputData.time` or a boolean list with length equal to the time
|
|
2244
|
+
dimension of the tensor. By default, all time periods are included.
|
|
1909
2245
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1910
2246
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1911
2247
|
distribution is used.
|
|
@@ -1922,10 +2258,14 @@ class BudgetOptimizer:
|
|
|
1922
2258
|
reducing `batch_size`. The calculation will generally be faster with
|
|
1923
2259
|
larger `batch_size` values.
|
|
1924
2260
|
"""
|
|
2261
|
+
new_data = new_data or analyzer.DataTensors()
|
|
2262
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
2263
|
+
c.PAID_DATA, self._meridian
|
|
2264
|
+
)
|
|
1925
2265
|
if self._meridian.n_media_channels > 0:
|
|
1926
2266
|
new_media = (
|
|
1927
2267
|
multipliers_grid[i, : self._meridian.n_media_channels]
|
|
1928
|
-
*
|
|
2268
|
+
* filled_data.media
|
|
1929
2269
|
)
|
|
1930
2270
|
else:
|
|
1931
2271
|
new_media = None
|
|
@@ -1934,20 +2274,18 @@ class BudgetOptimizer:
|
|
|
1934
2274
|
new_frequency = None
|
|
1935
2275
|
new_reach = None
|
|
1936
2276
|
elif optimal_frequency is not None:
|
|
1937
|
-
new_frequency = (
|
|
1938
|
-
tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency
|
|
1939
|
-
)
|
|
2277
|
+
new_frequency = tf.ones_like(filled_data.frequency) * optimal_frequency
|
|
1940
2278
|
new_reach = tf.math.divide_no_nan(
|
|
1941
2279
|
multipliers_grid[i, -self._meridian.n_rf_channels :]
|
|
1942
|
-
*
|
|
1943
|
-
*
|
|
2280
|
+
* filled_data.reach
|
|
2281
|
+
* filled_data.frequency,
|
|
1944
2282
|
new_frequency,
|
|
1945
2283
|
)
|
|
1946
2284
|
else:
|
|
1947
|
-
new_frequency =
|
|
2285
|
+
new_frequency = filled_data.frequency
|
|
1948
2286
|
new_reach = (
|
|
1949
2287
|
multipliers_grid[i, -self._meridian.n_rf_channels :]
|
|
1950
|
-
*
|
|
2288
|
+
* filled_data.reach
|
|
1951
2289
|
)
|
|
1952
2290
|
|
|
1953
2291
|
# incremental_outcome returns a three dimensional tensor with dims
|
|
@@ -1960,6 +2298,7 @@ class BudgetOptimizer:
|
|
|
1960
2298
|
media=new_media,
|
|
1961
2299
|
reach=new_reach,
|
|
1962
2300
|
frequency=new_frequency,
|
|
2301
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
1963
2302
|
),
|
|
1964
2303
|
selected_times=selected_times,
|
|
1965
2304
|
use_kpi=use_kpi,
|
|
@@ -1976,7 +2315,8 @@ class BudgetOptimizer:
|
|
|
1976
2315
|
spend_bound_lower: np.ndarray,
|
|
1977
2316
|
spend_bound_upper: np.ndarray,
|
|
1978
2317
|
step_size: int,
|
|
1979
|
-
|
|
2318
|
+
new_data: analyzer.DataTensors | None = None,
|
|
2319
|
+
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
1980
2320
|
use_posterior: bool = True,
|
|
1981
2321
|
use_kpi: bool = False,
|
|
1982
2322
|
optimal_frequency: xr.DataArray | None = None,
|
|
@@ -1992,8 +2332,16 @@ class BudgetOptimizer:
|
|
|
1992
2332
|
containing the upper constraint spend for each channel.
|
|
1993
2333
|
step_size: Integer indicating the step size, or interval, between values
|
|
1994
2334
|
in the spend grid. All media channels have the same step size.
|
|
1995
|
-
|
|
1996
|
-
`
|
|
2335
|
+
new_data: An optional `DataTensors` object containing the new `media`,
|
|
2336
|
+
`reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
|
|
2337
|
+
existing tensors from the Meridian object are used. If any of the
|
|
2338
|
+
tensors is provided with a different number of time periods than in
|
|
2339
|
+
`InputData`, then all tensors must be provided with the same number of
|
|
2340
|
+
time periods.
|
|
2341
|
+
selected_times: Optional list of times to optimize. This can either be a
|
|
2342
|
+
string list containing a subset of time dimension coordinates from
|
|
2343
|
+
`InputData.time` or a boolean list with length equal to the time
|
|
2344
|
+
dimension of the tensor. By default, all time periods are included.
|
|
1997
2345
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1998
2346
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1999
2347
|
distribution is used.
|
|
@@ -2047,6 +2395,7 @@ class BudgetOptimizer:
|
|
|
2047
2395
|
incremental_outcome_grid=incremental_outcome_grid,
|
|
2048
2396
|
multipliers_grid=multipliers_grid,
|
|
2049
2397
|
selected_times=selected_times,
|
|
2398
|
+
new_data=new_data,
|
|
2050
2399
|
use_posterior=use_posterior,
|
|
2051
2400
|
use_kpi=use_kpi,
|
|
2052
2401
|
optimal_frequency=optimal_frequency,
|
|
@@ -2207,7 +2556,7 @@ def _validate_budget(
|
|
|
2207
2556
|
budget: float | None,
|
|
2208
2557
|
target_roi: float | None,
|
|
2209
2558
|
target_mroi: float | None,
|
|
2210
|
-
):
|
|
2559
|
+
) -> None:
|
|
2211
2560
|
"""Validates the budget optimization arguments."""
|
|
2212
2561
|
if fixed_budget:
|
|
2213
2562
|
if target_roi is not None:
|