google-meridian 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/METADATA +2 -2
- google_meridian-1.1.0.dist-info/RECORD +41 -0
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/WHEEL +1 -1
- meridian/__init__.py +1 -1
- meridian/analysis/analyzer.py +195 -189
- meridian/analysis/optimizer.py +263 -65
- meridian/analysis/summarizer.py +4 -4
- meridian/analysis/test_utils.py +81 -81
- meridian/analysis/visualizer.py +12 -16
- meridian/constants.py +100 -16
- meridian/data/input_data.py +115 -19
- meridian/data/test_utils.py +116 -5
- meridian/data/time_coordinates.py +3 -3
- meridian/model/media.py +133 -98
- meridian/model/model.py +447 -57
- meridian/model/model_test_data.py +11 -0
- meridian/model/posterior_sampler.py +120 -43
- meridian/model/prior_distribution.py +96 -51
- meridian/model/prior_sampler.py +179 -209
- meridian/model/spec.py +196 -36
- meridian/model/transformers.py +15 -3
- google_meridian-1.0.9.dist-info/RECORD +0 -41
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.0.9.dist-info → google_meridian-1.1.0.dist-info}/top_level.txt +0 -0
meridian/analysis/optimizer.py
CHANGED
|
@@ -98,6 +98,8 @@ class OptimizationGrid:
|
|
|
98
98
|
use_kpi: Whether using generic KPI or revenue.
|
|
99
99
|
use_posterior: Whether posterior distributions were used, or prior.
|
|
100
100
|
use_optimal_frequency: Whether optimal frequency was used.
|
|
101
|
+
start_date: The start date of the optimization period.
|
|
102
|
+
end_date: The end date of the optimization period.
|
|
101
103
|
gtol: Float indicating the acceptable relative error for the budget used in
|
|
102
104
|
the grid setup. The budget is rounded by `10*n`, where `n` is the smallest
|
|
103
105
|
integer such that `(budget - rounded_budget)` is less than or equal to
|
|
@@ -117,10 +119,12 @@ class OptimizationGrid:
|
|
|
117
119
|
use_kpi: bool
|
|
118
120
|
use_posterior: bool
|
|
119
121
|
use_optimal_frequency: bool
|
|
122
|
+
start_date: tc.Date
|
|
123
|
+
end_date: tc.Date
|
|
120
124
|
gtol: float
|
|
121
125
|
round_factor: int
|
|
122
126
|
optimal_frequency: np.ndarray | None
|
|
123
|
-
selected_times: Sequence[str] |
|
|
127
|
+
selected_times: Sequence[str] | None
|
|
124
128
|
|
|
125
129
|
@property
|
|
126
130
|
def grid_dataset(self) -> xr.Dataset:
|
|
@@ -190,7 +194,7 @@ class OptimizationGrid:
|
|
|
190
194
|
variables:
|
|
191
195
|
* `optimized`: media spend that maximizes incremental outcome based
|
|
192
196
|
on spend constraints for all media and RF channels.
|
|
193
|
-
* `non_optimized`:
|
|
197
|
+
* `non_optimized`: rounded channel-level spend.
|
|
194
198
|
|
|
195
199
|
Raises:
|
|
196
200
|
A warning if the budget's rounding should be different from the grid's
|
|
@@ -227,7 +231,7 @@ class OptimizationGrid:
|
|
|
227
231
|
spend_constraint_upper=spend_constraint_upper,
|
|
228
232
|
)
|
|
229
233
|
)
|
|
230
|
-
self.
|
|
234
|
+
self.check_optimization_bounds(
|
|
231
235
|
lower_bound=optimization_lower_bound,
|
|
232
236
|
upper_bound=optimization_upper_bound,
|
|
233
237
|
)
|
|
@@ -236,16 +240,16 @@ class OptimizationGrid:
|
|
|
236
240
|
warnings.warn(
|
|
237
241
|
'Optimization accuracy may suffer owing to budget level differences.'
|
|
238
242
|
' Consider creating a new grid with smaller `gtol` if you intend to'
|
|
239
|
-
|
|
240
|
-
'
|
|
241
|
-
'
|
|
243
|
+
' shrink total budget significantly across optimization runs.'
|
|
244
|
+
' It is only a problem when you use a much smaller budget, '
|
|
245
|
+
' for which the intended step size is smaller. '
|
|
242
246
|
)
|
|
243
247
|
(spend_grid, incremental_outcome_grid) = self._trim_grid(
|
|
244
248
|
spend_bound_lower=optimization_lower_bound,
|
|
245
249
|
spend_bound_upper=optimization_upper_bound,
|
|
246
250
|
)
|
|
251
|
+
rounded_spend = np.round(spend, self.round_factor).astype(int)
|
|
247
252
|
if isinstance(scenario, FixedBudgetScenario):
|
|
248
|
-
rounded_spend = np.round(spend, self.round_factor)
|
|
249
253
|
scenario = dataclasses.replace(
|
|
250
254
|
scenario, total_budget=np.sum(rounded_spend)
|
|
251
255
|
)
|
|
@@ -259,7 +263,7 @@ class OptimizationGrid:
|
|
|
259
263
|
coords={c.CHANNEL: self.channels},
|
|
260
264
|
data_vars={
|
|
261
265
|
c.OPTIMIZED: ([c.CHANNEL], optimal_spend.data),
|
|
262
|
-
c.NON_OPTIMIZED: ([c.CHANNEL],
|
|
266
|
+
c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
|
|
263
267
|
},
|
|
264
268
|
)
|
|
265
269
|
|
|
@@ -345,8 +349,10 @@ class OptimizationGrid:
|
|
|
345
349
|
grid coverage and they are rounded using this grid's round factor.
|
|
346
350
|
|
|
347
351
|
Args:
|
|
348
|
-
spend_bound_lower: The lower bound of spend for each channel.
|
|
349
|
-
|
|
352
|
+
spend_bound_lower: The lower bound of spend for each channel. Must be in
|
|
353
|
+
the same order as `self.channels`.
|
|
354
|
+
spend_bound_upper: The upper bound of spend for each channel. Must be in
|
|
355
|
+
the same order as `self.channels`.
|
|
350
356
|
|
|
351
357
|
Returns:
|
|
352
358
|
updated_spend: The updated spend grid with valid spend values moved up to
|
|
@@ -383,7 +389,7 @@ class OptimizationGrid:
|
|
|
383
389
|
|
|
384
390
|
return (updated_spend, updated_incremental_outcome)
|
|
385
391
|
|
|
386
|
-
def
|
|
392
|
+
def check_optimization_bounds(
|
|
387
393
|
self,
|
|
388
394
|
lower_bound: np.ndarray,
|
|
389
395
|
upper_bound: np.ndarray,
|
|
@@ -392,9 +398,9 @@ class OptimizationGrid:
|
|
|
392
398
|
|
|
393
399
|
Args:
|
|
394
400
|
lower_bound: `np.ndarray` of shape `(n_channels,)` containing the lower
|
|
395
|
-
bound for each channel.
|
|
401
|
+
bound for each channel. Must be in the same order as `self.channels`.
|
|
396
402
|
upper_bound: `np.ndarray` of shape `(n_channels,)` containing the upper
|
|
397
|
-
bound for each channel.
|
|
403
|
+
bound for each channel. Must be in the same order as `self.channels`.
|
|
398
404
|
|
|
399
405
|
Raises:
|
|
400
406
|
ValueError: If the spend grid does not fit within the optimization bounds.
|
|
@@ -1138,21 +1144,26 @@ class OptimizationResults:
|
|
|
1138
1144
|
- self.nonoptimized_data.total_incremental_outcome
|
|
1139
1145
|
)
|
|
1140
1146
|
inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
|
|
1147
|
+
currency = '$' if outcome == c.REVENUE else ''
|
|
1141
1148
|
non_optimized_inc_outcome = formatter.StatsSpec(
|
|
1142
1149
|
title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
|
|
1143
1150
|
outcome=outcome
|
|
1144
1151
|
),
|
|
1145
|
-
stat=formatter.
|
|
1146
|
-
self.nonoptimized_data.total_incremental_outcome,
|
|
1152
|
+
stat=formatter.compact_number(
|
|
1153
|
+
n=self.nonoptimized_data.total_incremental_outcome,
|
|
1154
|
+
precision=0,
|
|
1155
|
+
currency=currency,
|
|
1147
1156
|
),
|
|
1148
1157
|
)
|
|
1149
1158
|
optimized_inc_outcome = formatter.StatsSpec(
|
|
1150
1159
|
title=summary_text.OPTIMIZED_INC_OUTCOME_LABEL.format(outcome=outcome),
|
|
1151
|
-
stat=formatter.
|
|
1152
|
-
self.optimized_data.total_incremental_outcome,
|
|
1160
|
+
stat=formatter.compact_number(
|
|
1161
|
+
n=self.optimized_data.total_incremental_outcome,
|
|
1162
|
+
precision=0,
|
|
1163
|
+
currency=currency,
|
|
1153
1164
|
),
|
|
1154
1165
|
delta=inc_outcome_prefix
|
|
1155
|
-
+ formatter.
|
|
1166
|
+
+ formatter.compact_number(inc_outcome_diff, 0, currency),
|
|
1156
1167
|
)
|
|
1157
1168
|
return [
|
|
1158
1169
|
non_optimized_budget,
|
|
@@ -1276,7 +1287,10 @@ class BudgetOptimizer:
|
|
|
1276
1287
|
self,
|
|
1277
1288
|
new_data: analyzer.DataTensors | None = None,
|
|
1278
1289
|
use_posterior: bool = True,
|
|
1290
|
+
# TODO: b/409550413 - Remove this argument.
|
|
1279
1291
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1292
|
+
start_date: tc.Date = None,
|
|
1293
|
+
end_date: tc.Date = None,
|
|
1280
1294
|
fixed_budget: bool = True,
|
|
1281
1295
|
budget: float | None = None,
|
|
1282
1296
|
pct_of_spend: Sequence[float] | None = None,
|
|
@@ -1289,6 +1303,7 @@ class BudgetOptimizer:
|
|
|
1289
1303
|
use_kpi: bool = False,
|
|
1290
1304
|
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
|
|
1291
1305
|
batch_size: int = c.DEFAULT_BATCH_SIZE,
|
|
1306
|
+
optimization_grid: OptimizationGrid | None = None,
|
|
1292
1307
|
) -> OptimizationResults:
|
|
1293
1308
|
"""Finds the optimal budget allocation that maximizes outcome.
|
|
1294
1309
|
|
|
@@ -1313,8 +1328,15 @@ class BudgetOptimizer:
|
|
|
1313
1328
|
`fixed_budget=True`) will be inferred from the `new_data`, but can be
|
|
1314
1329
|
overridden using the `pct_of_spend` and `budget` arguments.
|
|
1315
1330
|
|
|
1316
|
-
If `
|
|
1317
|
-
on the subset of time periods specified.
|
|
1331
|
+
If `start_date` or `end_date` is specified, then the default values are
|
|
1332
|
+
inferred based on the subset of time periods specified. Both start and end
|
|
1333
|
+
time selectors should align with the Meridian time dimension coordinates in
|
|
1334
|
+
the underlying model if optimizing the original data. If `new_data` is
|
|
1335
|
+
provided with a different number of time periods than in `InputData`, then
|
|
1336
|
+
the start and end time coordinates must match the time dimensions in
|
|
1337
|
+
`new_data.time`. By default, all times periods are used. Either start or
|
|
1338
|
+
end time component can be `None` to represent the first or the last time
|
|
1339
|
+
coordinate, respectively.
|
|
1318
1340
|
|
|
1319
1341
|
Args:
|
|
1320
1342
|
new_data: An optional `DataTensors` container with optional tensors:
|
|
@@ -1329,15 +1351,13 @@ class BudgetOptimizer:
|
|
|
1329
1351
|
use_posterior: Boolean. If `True`, then the budget is optimized based on
|
|
1330
1352
|
the posterior distribution of the model. Otherwise, the prior
|
|
1331
1353
|
distribution is used.
|
|
1332
|
-
selected_times: Tuple containing the start and end time
|
|
1333
|
-
coordinates for the duration to run the optimization on.
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
or end time component can be `None` to represent the first or the last
|
|
1340
|
-
time coordinate, respectively.
|
|
1354
|
+
selected_times: Deprecated. Tuple containing the start and end time
|
|
1355
|
+
dimension coordinates for the duration to run the optimization on.
|
|
1356
|
+
Please Use `start_date` and `end_date` instead.
|
|
1357
|
+
start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
|
|
1358
|
+
format. Default is `None`, i.e. the first time period.
|
|
1359
|
+
end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
|
|
1360
|
+
Default is `None`, i.e. the last time period.
|
|
1341
1361
|
fixed_budget: Boolean indicating whether it's a fixed budget optimization
|
|
1342
1362
|
or flexible budget optimization. Defaults to `True`. If `False`, must
|
|
1343
1363
|
specify either `target_roi` or `target_mroi`.
|
|
@@ -1352,7 +1372,7 @@ class BudgetOptimizer:
|
|
|
1352
1372
|
performance metrics (for example, ROI) and construct the feasible range
|
|
1353
1373
|
of media-level spend with the spend constraints. Consider using
|
|
1354
1374
|
`InputData.get_paid_channels_argument_builder()` to construct this
|
|
1355
|
-
argument.
|
|
1375
|
+
argument.
|
|
1356
1376
|
spend_constraint_lower: Numeric list of size `n_paid_channels` or float
|
|
1357
1377
|
(same constraint for all channels) indicating the lower bound of
|
|
1358
1378
|
media-level spend. If given as a channel-indexed array, the order must
|
|
@@ -1389,11 +1409,27 @@ class BudgetOptimizer:
|
|
|
1389
1409
|
in batches to avoid memory exhaustion. If a memory error occurs, try
|
|
1390
1410
|
reducing `batch_size`. The calculation will generally be faster with
|
|
1391
1411
|
larger `batch_size` values.
|
|
1412
|
+
optimization_grid: An `OptimizationGrid` object containing the grid
|
|
1413
|
+
information. Grid creating is a time consuming part of optimization.
|
|
1414
|
+
Creating one grid and running various optimizations on it can save time.
|
|
1415
|
+
If `None` or grid doesn't match the optimization arguments, a new grid
|
|
1416
|
+
will be created.
|
|
1392
1417
|
|
|
1393
1418
|
Returns:
|
|
1394
1419
|
An `OptimizationResults` object containing optimized budget allocation
|
|
1395
1420
|
datasets, along with some of the intermediate values used to derive them.
|
|
1396
1421
|
"""
|
|
1422
|
+
if selected_times is not None:
|
|
1423
|
+
warnings.warn(
|
|
1424
|
+
'`selected_times` is deprecated. Please use `start_date` and'
|
|
1425
|
+
' `end_date` instead.',
|
|
1426
|
+
DeprecationWarning,
|
|
1427
|
+
stacklevel=2,
|
|
1428
|
+
)
|
|
1429
|
+
deprecated_start_date, deprecated_end_date = selected_times
|
|
1430
|
+
start_date = start_date or deprecated_start_date
|
|
1431
|
+
end_date = end_date or deprecated_end_date
|
|
1432
|
+
|
|
1397
1433
|
_validate_budget(
|
|
1398
1434
|
fixed_budget=fixed_budget,
|
|
1399
1435
|
budget=budget,
|
|
@@ -1409,19 +1445,36 @@ class BudgetOptimizer:
|
|
|
1409
1445
|
spend_constraint_lower = spend_constraint_default
|
|
1410
1446
|
if spend_constraint_upper is None:
|
|
1411
1447
|
spend_constraint_upper = spend_constraint_default
|
|
1412
|
-
|
|
1448
|
+
use_grid_arg = optimization_grid is not None and self._validate_grid(
|
|
1413
1449
|
new_data=new_data,
|
|
1414
|
-
|
|
1450
|
+
use_posterior=use_posterior,
|
|
1451
|
+
start_date=start_date,
|
|
1452
|
+
end_date=end_date,
|
|
1415
1453
|
budget=budget,
|
|
1416
1454
|
pct_of_spend=pct_of_spend,
|
|
1417
1455
|
spend_constraint_lower=spend_constraint_lower,
|
|
1418
1456
|
spend_constraint_upper=spend_constraint_upper,
|
|
1419
1457
|
gtol=gtol,
|
|
1420
|
-
use_posterior=use_posterior,
|
|
1421
|
-
use_kpi=use_kpi,
|
|
1422
1458
|
use_optimal_frequency=use_optimal_frequency,
|
|
1423
|
-
|
|
1459
|
+
use_kpi=use_kpi,
|
|
1460
|
+
optimization_grid=optimization_grid,
|
|
1424
1461
|
)
|
|
1462
|
+
if optimization_grid is None or not use_grid_arg:
|
|
1463
|
+
optimization_grid = self.create_optimization_grid(
|
|
1464
|
+
new_data=new_data,
|
|
1465
|
+
start_date=start_date,
|
|
1466
|
+
end_date=end_date,
|
|
1467
|
+
budget=budget,
|
|
1468
|
+
pct_of_spend=pct_of_spend,
|
|
1469
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1470
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1471
|
+
gtol=gtol,
|
|
1472
|
+
use_posterior=use_posterior,
|
|
1473
|
+
use_kpi=use_kpi,
|
|
1474
|
+
use_optimal_frequency=use_optimal_frequency,
|
|
1475
|
+
batch_size=batch_size,
|
|
1476
|
+
)
|
|
1477
|
+
|
|
1425
1478
|
if fixed_budget:
|
|
1426
1479
|
scenario = FixedBudgetScenario(total_budget=budget)
|
|
1427
1480
|
elif target_roi:
|
|
@@ -1442,26 +1495,24 @@ class BudgetOptimizer:
|
|
|
1442
1495
|
use_historical_budget = budget is None or np.isclose(
|
|
1443
1496
|
budget, np.sum(optimization_grid.historical_spend)
|
|
1444
1497
|
)
|
|
1445
|
-
|
|
1446
|
-
spend.non_optimized, optimization_grid.round_factor
|
|
1447
|
-
).astype(int)
|
|
1498
|
+
new_data = new_data or analyzer.DataTensors()
|
|
1448
1499
|
nonoptimized_data = self._create_budget_dataset(
|
|
1449
|
-
new_data=new_data,
|
|
1500
|
+
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1450
1501
|
use_posterior=use_posterior,
|
|
1451
1502
|
use_kpi=use_kpi,
|
|
1452
1503
|
hist_spend=optimization_grid.historical_spend,
|
|
1453
|
-
spend=
|
|
1504
|
+
spend=spend.non_optimized,
|
|
1454
1505
|
selected_times=optimization_grid.selected_times,
|
|
1455
1506
|
confidence_level=confidence_level,
|
|
1456
1507
|
batch_size=batch_size,
|
|
1457
1508
|
use_historical_budget=use_historical_budget,
|
|
1458
1509
|
)
|
|
1459
1510
|
nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
|
|
1460
|
-
new_data=new_data,
|
|
1511
|
+
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1461
1512
|
use_posterior=use_posterior,
|
|
1462
1513
|
use_kpi=use_kpi,
|
|
1463
1514
|
hist_spend=optimization_grid.historical_spend,
|
|
1464
|
-
spend=
|
|
1515
|
+
spend=spend.non_optimized,
|
|
1465
1516
|
selected_times=optimization_grid.selected_times,
|
|
1466
1517
|
optimal_frequency=optimization_grid.optimal_frequency,
|
|
1467
1518
|
confidence_level=confidence_level,
|
|
@@ -1476,7 +1527,7 @@ class BudgetOptimizer:
|
|
|
1476
1527
|
elif target_mroi:
|
|
1477
1528
|
constraints[c.TARGET_MROI] = target_mroi
|
|
1478
1529
|
optimized_data = self._create_budget_dataset(
|
|
1479
|
-
new_data=new_data,
|
|
1530
|
+
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1480
1531
|
use_posterior=use_posterior,
|
|
1481
1532
|
use_kpi=use_kpi,
|
|
1482
1533
|
hist_spend=optimization_grid.historical_spend,
|
|
@@ -1520,11 +1571,139 @@ class BudgetOptimizer:
|
|
|
1520
1571
|
_optimization_grid=optimization_grid,
|
|
1521
1572
|
)
|
|
1522
1573
|
|
|
1574
|
+
def _validate_grid(
|
|
1575
|
+
self,
|
|
1576
|
+
new_data: analyzer.DataTensors | None,
|
|
1577
|
+
use_posterior: bool,
|
|
1578
|
+
start_date: tc.Date,
|
|
1579
|
+
end_date: tc.Date,
|
|
1580
|
+
budget: float | None,
|
|
1581
|
+
pct_of_spend: Sequence[float] | None,
|
|
1582
|
+
spend_constraint_lower: _SpendConstraint,
|
|
1583
|
+
spend_constraint_upper: _SpendConstraint,
|
|
1584
|
+
gtol: float,
|
|
1585
|
+
use_optimal_frequency: bool,
|
|
1586
|
+
use_kpi: bool,
|
|
1587
|
+
optimization_grid: OptimizationGrid,
|
|
1588
|
+
) -> bool:
|
|
1589
|
+
"""Checks if the grid is valid for the optimization scenario."""
|
|
1590
|
+
|
|
1591
|
+
if use_posterior != optimization_grid.use_posterior:
|
|
1592
|
+
warnings.warn(
|
|
1593
|
+
'Given optimization grid was created with `use_posterior` ='
|
|
1594
|
+
f' {optimization_grid.use_posterior}, but optimization was called'
|
|
1595
|
+
f' with `use_posterior` = {use_posterior}. A new grid will be'
|
|
1596
|
+
' created.'
|
|
1597
|
+
)
|
|
1598
|
+
return False
|
|
1599
|
+
|
|
1600
|
+
if use_kpi != optimization_grid.use_kpi:
|
|
1601
|
+
warnings.warn(
|
|
1602
|
+
'Given optimization grid was created with `use_kpi` ='
|
|
1603
|
+
f' {optimization_grid.use_kpi}, but optimization was called'
|
|
1604
|
+
f' with `use_kpi` = {use_kpi}. A new grid will be'
|
|
1605
|
+
' created.'
|
|
1606
|
+
)
|
|
1607
|
+
return False
|
|
1608
|
+
|
|
1609
|
+
if use_optimal_frequency != optimization_grid.use_optimal_frequency:
|
|
1610
|
+
warnings.warn(
|
|
1611
|
+
'Given optimization grid was created with `use_optimal_frequency` ='
|
|
1612
|
+
f' {optimization_grid.use_optimal_frequency}, but optimization was'
|
|
1613
|
+
f' called with `use_optimal_frequency` = {use_optimal_frequency}. A'
|
|
1614
|
+
' new grid will be created.'
|
|
1615
|
+
)
|
|
1616
|
+
return False
|
|
1617
|
+
|
|
1618
|
+
if (
|
|
1619
|
+
start_date != optimization_grid.start_date
|
|
1620
|
+
or end_date != optimization_grid.end_date
|
|
1621
|
+
):
|
|
1622
|
+
warnings.warn(
|
|
1623
|
+
'Given optimization grid was created with `start_date` ='
|
|
1624
|
+
f' {optimization_grid.start_date} and `end_date` ='
|
|
1625
|
+
f' {optimization_grid.end_date}, but optimization was called with'
|
|
1626
|
+
f' `start_date` = {start_date} and `end_date` = {end_date}. A new'
|
|
1627
|
+
' grid will be created.'
|
|
1628
|
+
)
|
|
1629
|
+
return False
|
|
1630
|
+
|
|
1631
|
+
if new_data is None:
|
|
1632
|
+
new_data = analyzer.DataTensors()
|
|
1633
|
+
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1634
|
+
filled_data = new_data.validate_and_fill_missing_data(
|
|
1635
|
+
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1636
|
+
)
|
|
1637
|
+
paid_channels = self._meridian.input_data.get_all_paid_channels()
|
|
1638
|
+
if not np.array_equal(paid_channels, optimization_grid.channels):
|
|
1639
|
+
warnings.warn(
|
|
1640
|
+
'Given optimization grid was created with `channels` ='
|
|
1641
|
+
f' {optimization_grid.channels}, but optimization request was'
|
|
1642
|
+
f' resolved with `channels` = {paid_channels}. A new grid will be'
|
|
1643
|
+
' created.'
|
|
1644
|
+
)
|
|
1645
|
+
return False
|
|
1646
|
+
|
|
1647
|
+
n_channels = len(optimization_grid.channels)
|
|
1648
|
+
selected_times = self._validate_selected_times(
|
|
1649
|
+
start_date=start_date,
|
|
1650
|
+
end_date=end_date,
|
|
1651
|
+
new_data=new_data,
|
|
1652
|
+
)
|
|
1653
|
+
hist_spend = self._analyzer.get_aggregated_spend(
|
|
1654
|
+
new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
|
|
1655
|
+
selected_times=selected_times,
|
|
1656
|
+
include_media=self._meridian.n_media_channels > 0,
|
|
1657
|
+
include_rf=self._meridian.n_rf_channels > 0,
|
|
1658
|
+
).data
|
|
1659
|
+
budget = budget or np.sum(hist_spend)
|
|
1660
|
+
valid_pct_of_spend = _validate_pct_of_spend(
|
|
1661
|
+
n_channels=n_channels,
|
|
1662
|
+
hist_spend=hist_spend,
|
|
1663
|
+
pct_of_spend=pct_of_spend,
|
|
1664
|
+
)
|
|
1665
|
+
spend = budget * valid_pct_of_spend
|
|
1666
|
+
(optimization_lower_bound, optimization_upper_bound) = (
|
|
1667
|
+
_get_optimization_bounds(
|
|
1668
|
+
n_channels=n_channels,
|
|
1669
|
+
spend=spend,
|
|
1670
|
+
round_factor=optimization_grid.round_factor,
|
|
1671
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
1672
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
1673
|
+
)
|
|
1674
|
+
)
|
|
1675
|
+
try:
|
|
1676
|
+
optimization_grid.check_optimization_bounds(
|
|
1677
|
+
lower_bound=optimization_lower_bound,
|
|
1678
|
+
upper_bound=optimization_upper_bound,
|
|
1679
|
+
)
|
|
1680
|
+
except ValueError as e:
|
|
1681
|
+
warnings.warn(
|
|
1682
|
+
'Optimization called with bounds that are not within the grid. A new'
|
|
1683
|
+
f' grid will be created. Error message: {str(e)}'
|
|
1684
|
+
)
|
|
1685
|
+
return False
|
|
1686
|
+
|
|
1687
|
+
round_factor = _get_round_factor(budget, gtol)
|
|
1688
|
+
if round_factor != optimization_grid.round_factor:
|
|
1689
|
+
warnings.warn(
|
|
1690
|
+
'Optimization accuracy may suffer owing to budget level differences.'
|
|
1691
|
+
' Consider creating a new grid with smaller `gtol` if you intend to'
|
|
1692
|
+
' shrink total budget significantly across optimization runs.'
|
|
1693
|
+
' It is only a problem when you use a much smaller budget, '
|
|
1694
|
+
' for which the intended step size is smaller.'
|
|
1695
|
+
)
|
|
1696
|
+
|
|
1697
|
+
return True
|
|
1698
|
+
|
|
1523
1699
|
def create_optimization_grid(
|
|
1524
1700
|
self,
|
|
1525
1701
|
new_data: xr.Dataset | None = None,
|
|
1526
1702
|
use_posterior: bool = True,
|
|
1703
|
+
# TODO: b/409550413 - Remove this argument.
|
|
1527
1704
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1705
|
+
start_date: tc.Date = None,
|
|
1706
|
+
end_date: tc.Date = None,
|
|
1528
1707
|
budget: float | None = None,
|
|
1529
1708
|
pct_of_spend: Sequence[float] | None = None,
|
|
1530
1709
|
spend_constraint_lower: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
|
|
@@ -1536,6 +1715,16 @@ class BudgetOptimizer:
|
|
|
1536
1715
|
) -> OptimizationGrid:
|
|
1537
1716
|
"""Creates a OptimizationGrid for optimization.
|
|
1538
1717
|
|
|
1718
|
+
If `start_date` or `end_date` is specified, then the default values are
|
|
1719
|
+
inferred based on the subset of time periods specified. Both start and end
|
|
1720
|
+
time selectors should align with the Meridian time dimension coordinates in
|
|
1721
|
+
the underlying model if optimizing the original data. If `new_data` is
|
|
1722
|
+
provided with a different number of time periods than in `InputData`, then
|
|
1723
|
+
the start and end time coordinates must match the time dimensions in
|
|
1724
|
+
`new_data.time`. By default, all times periods are used. Either start or
|
|
1725
|
+
end time component can be `None` to represent the first or the last time
|
|
1726
|
+
coordinate, respectively.
|
|
1727
|
+
|
|
1539
1728
|
Args:
|
|
1540
1729
|
new_data: An optional `DataTensors` container with optional tensors:
|
|
1541
1730
|
`media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
|
|
@@ -1549,15 +1738,12 @@ class BudgetOptimizer:
|
|
|
1549
1738
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1550
1739
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1551
1740
|
distribution is used.
|
|
1552
|
-
selected_times: Tuple containing the start and end time
|
|
1553
|
-
coordinates
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
in `new_data.time`. By default, all times periods are used. Either start
|
|
1559
|
-
or end time component can be `None` to represent the first or the last
|
|
1560
|
-
time coordinate, respectively.
|
|
1741
|
+
selected_times: Deprecated. Tuple containing the start and end time
|
|
1742
|
+
dimension coordinates. Please Use `start_date` and `end_date` instead.
|
|
1743
|
+
start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
|
|
1744
|
+
format. Default is `None`, i.e. the first time period.
|
|
1745
|
+
end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
|
|
1746
|
+
Default is `None`, i.e. the last time period.
|
|
1561
1747
|
budget: Number indicating the total budget for the fixed budget scenario.
|
|
1562
1748
|
Defaults to the historical budget.
|
|
1563
1749
|
pct_of_spend: Numeric list of size `n_paid_channels` containing the
|
|
@@ -1569,7 +1755,7 @@ class BudgetOptimizer:
|
|
|
1569
1755
|
performance metrics (for example, ROI) and construct the feasible range
|
|
1570
1756
|
of media-level spend with the spend constraints. Consider using
|
|
1571
1757
|
`InputData.get_paid_channels_argument_builder()` to construct this
|
|
1572
|
-
argument.
|
|
1758
|
+
argument.
|
|
1573
1759
|
spend_constraint_lower: Numeric list of size `n_paid_channels` or float
|
|
1574
1760
|
(same constraint for all channels) indicating the lower bound of
|
|
1575
1761
|
media-level spend. If given as a channel-indexed array, the order must
|
|
@@ -1607,17 +1793,29 @@ class BudgetOptimizer:
|
|
|
1607
1793
|
if new_data is None:
|
|
1608
1794
|
new_data = analyzer.DataTensors()
|
|
1609
1795
|
|
|
1796
|
+
if selected_times is not None:
|
|
1797
|
+
warnings.warn(
|
|
1798
|
+
'`selected_times` is deprecated. Please use `start_date` and'
|
|
1799
|
+
' `end_date` instead.',
|
|
1800
|
+
DeprecationWarning,
|
|
1801
|
+
stacklevel=2,
|
|
1802
|
+
)
|
|
1803
|
+
deprecated_start_date, deprecated_end_date = selected_times
|
|
1804
|
+
start_date = start_date or deprecated_start_date
|
|
1805
|
+
end_date = end_date or deprecated_end_date
|
|
1806
|
+
|
|
1610
1807
|
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1611
1808
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
1612
1809
|
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1613
1810
|
)
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1811
|
+
selected_times = self._validate_selected_times(
|
|
1812
|
+
start_date=start_date,
|
|
1813
|
+
end_date=end_date,
|
|
1814
|
+
new_data=filled_data,
|
|
1617
1815
|
)
|
|
1618
1816
|
hist_spend = self._analyzer.get_aggregated_spend(
|
|
1619
1817
|
new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
|
|
1620
|
-
selected_times=
|
|
1818
|
+
selected_times=selected_times,
|
|
1621
1819
|
include_media=self._meridian.n_media_channels > 0,
|
|
1622
1820
|
include_rf=self._meridian.n_rf_channels > 0,
|
|
1623
1821
|
).data
|
|
@@ -1644,7 +1842,7 @@ class BudgetOptimizer:
|
|
|
1644
1842
|
self._analyzer.optimal_freq(
|
|
1645
1843
|
new_data=filled_data.filter_fields(c.RF_DATA),
|
|
1646
1844
|
use_posterior=use_posterior,
|
|
1647
|
-
selected_times=
|
|
1845
|
+
selected_times=selected_times,
|
|
1648
1846
|
use_kpi=use_kpi,
|
|
1649
1847
|
).optimal_frequency,
|
|
1650
1848
|
dtype=tf.float32,
|
|
@@ -1658,7 +1856,7 @@ class BudgetOptimizer:
|
|
|
1658
1856
|
spend_bound_lower=optimization_lower_bound,
|
|
1659
1857
|
spend_bound_upper=optimization_upper_bound,
|
|
1660
1858
|
step_size=step_size,
|
|
1661
|
-
selected_times=
|
|
1859
|
+
selected_times=selected_times,
|
|
1662
1860
|
new_data=filled_data.filter_fields(c.PAID_DATA),
|
|
1663
1861
|
use_posterior=use_posterior,
|
|
1664
1862
|
use_kpi=use_kpi,
|
|
@@ -1677,10 +1875,12 @@ class BudgetOptimizer:
|
|
|
1677
1875
|
use_kpi=use_kpi,
|
|
1678
1876
|
use_posterior=use_posterior,
|
|
1679
1877
|
use_optimal_frequency=use_optimal_frequency,
|
|
1878
|
+
start_date=start_date,
|
|
1879
|
+
end_date=end_date,
|
|
1680
1880
|
gtol=gtol,
|
|
1681
1881
|
round_factor=round_factor,
|
|
1682
1882
|
optimal_frequency=optimal_frequency,
|
|
1683
|
-
selected_times=
|
|
1883
|
+
selected_times=selected_times,
|
|
1684
1884
|
)
|
|
1685
1885
|
|
|
1686
1886
|
def _create_grid_dataset(
|
|
@@ -1725,13 +1925,11 @@ class BudgetOptimizer:
|
|
|
1725
1925
|
|
|
1726
1926
|
def _validate_selected_times(
|
|
1727
1927
|
self,
|
|
1728
|
-
|
|
1928
|
+
start_date: tc.Date,
|
|
1929
|
+
end_date: tc.Date,
|
|
1729
1930
|
new_data: analyzer.DataTensors | None,
|
|
1730
1931
|
) -> Sequence[str] | Sequence[bool] | None:
|
|
1731
1932
|
"""Validates and returns the selected times."""
|
|
1732
|
-
if selected_times is None:
|
|
1733
|
-
return None
|
|
1734
|
-
start_date, end_date = selected_times
|
|
1735
1933
|
if start_date is None and end_date is None:
|
|
1736
1934
|
return None
|
|
1737
1935
|
|
|
@@ -2358,7 +2556,7 @@ def _validate_budget(
|
|
|
2358
2556
|
budget: float | None,
|
|
2359
2557
|
target_roi: float | None,
|
|
2360
2558
|
target_mroi: float | None,
|
|
2361
|
-
):
|
|
2559
|
+
) -> None:
|
|
2362
2560
|
"""Validates the budget optimization arguments."""
|
|
2363
2561
|
if fixed_budget:
|
|
2364
2562
|
if target_roi is not None:
|
meridian/analysis/summarizer.py
CHANGED
|
@@ -75,8 +75,8 @@ class Summarizer:
|
|
|
75
75
|
self,
|
|
76
76
|
filename: str,
|
|
77
77
|
filepath: str,
|
|
78
|
-
start_date: tc.Date
|
|
79
|
-
end_date: tc.Date
|
|
78
|
+
start_date: tc.Date = None,
|
|
79
|
+
end_date: tc.Date = None,
|
|
80
80
|
):
|
|
81
81
|
"""Generates and saves the HTML results summary output.
|
|
82
82
|
|
|
@@ -93,8 +93,8 @@ class Summarizer:
|
|
|
93
93
|
|
|
94
94
|
def _gen_model_results_summary(
|
|
95
95
|
self,
|
|
96
|
-
start_date: tc.Date
|
|
97
|
-
end_date: tc.Date
|
|
96
|
+
start_date: tc.Date = None,
|
|
97
|
+
end_date: tc.Date = None,
|
|
98
98
|
) -> str:
|
|
99
99
|
"""Generate HTML results summary output (as sanitized content str)."""
|
|
100
100
|
all_dates = self._meridian.input_data.time_coordinates.all_dates
|