google-meridian 1.1.6__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/METADATA +8 -2
- google_meridian-1.2.1.dist-info/RECORD +52 -0
- meridian/__init__.py +1 -0
- meridian/analysis/analyzer.py +621 -393
- meridian/analysis/optimizer.py +403 -351
- meridian/analysis/summarizer.py +31 -16
- meridian/analysis/test_utils.py +96 -94
- meridian/analysis/visualizer.py +53 -54
- meridian/backend/__init__.py +975 -0
- meridian/backend/config.py +118 -0
- meridian/backend/test_utils.py +181 -0
- meridian/constants.py +71 -10
- meridian/data/input_data.py +99 -0
- meridian/data/test_utils.py +146 -12
- meridian/mlflow/autolog.py +2 -2
- meridian/model/adstock_hill.py +280 -33
- meridian/model/eda/__init__.py +17 -0
- meridian/model/eda/eda_engine.py +735 -0
- meridian/model/knots.py +525 -2
- meridian/model/media.py +62 -54
- meridian/model/model.py +224 -97
- meridian/model/model_test_data.py +331 -159
- meridian/model/posterior_sampler.py +388 -383
- meridian/model/prior_distribution.py +612 -177
- meridian/model/prior_sampler.py +65 -65
- meridian/model/spec.py +23 -3
- meridian/model/transformers.py +55 -49
- meridian/version.py +1 -1
- google_meridian-1.1.6.dist-info/RECORD +0 -47
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/WHEEL +0 -0
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/top_level.txt +0 -0
meridian/analysis/optimizer.py
CHANGED
|
@@ -24,15 +24,15 @@ import warnings
|
|
|
24
24
|
|
|
25
25
|
import altair as alt
|
|
26
26
|
import jinja2
|
|
27
|
+
from meridian import backend
|
|
27
28
|
from meridian import constants as c
|
|
28
|
-
from meridian.analysis import analyzer
|
|
29
|
+
from meridian.analysis import analyzer as analyzer_module
|
|
29
30
|
from meridian.analysis import formatter
|
|
30
31
|
from meridian.analysis import summary_text
|
|
31
32
|
from meridian.data import time_coordinates as tc
|
|
32
33
|
from meridian.model import model
|
|
33
34
|
import numpy as np
|
|
34
35
|
import pandas as pd
|
|
35
|
-
import tensorflow as tf
|
|
36
36
|
import xarray as xr
|
|
37
37
|
|
|
38
38
|
|
|
@@ -40,6 +40,10 @@ __all__ = [
|
|
|
40
40
|
'BudgetOptimizer',
|
|
41
41
|
'OptimizationGrid',
|
|
42
42
|
'OptimizationResults',
|
|
43
|
+
'FixedBudgetScenario',
|
|
44
|
+
'FlexibleBudgetScenario',
|
|
45
|
+
'get_optimization_bounds',
|
|
46
|
+
'get_round_factor',
|
|
43
47
|
]
|
|
44
48
|
|
|
45
49
|
# Disable max row limitations in Altair.
|
|
@@ -110,7 +114,11 @@ class OptimizationGrid:
|
|
|
110
114
|
does not contain reach and frequency data, or if the model does contain
|
|
111
115
|
reach and frequency data, but historical frequency is used for the
|
|
112
116
|
optimization scenario.
|
|
113
|
-
selected_times: The time coordinates from the model used in this grid.
|
|
117
|
+
selected_times: The time coordinates from the model used in this grid. If
|
|
118
|
+
new data with modified time coordinates is used for optimization, this
|
|
119
|
+
is a list of booleans indicating which time coordinates are selected.
|
|
120
|
+
Otherwise, this is a list of strings indicating the time coordinates used
|
|
121
|
+
in this grid.
|
|
114
122
|
"""
|
|
115
123
|
|
|
116
124
|
_grid_dataset: xr.Dataset
|
|
@@ -124,7 +132,7 @@ class OptimizationGrid:
|
|
|
124
132
|
gtol: float
|
|
125
133
|
round_factor: int
|
|
126
134
|
optimal_frequency: np.ndarray | None
|
|
127
|
-
selected_times: Sequence[str] | None
|
|
135
|
+
selected_times: Sequence[str] | Sequence[bool] | None
|
|
128
136
|
|
|
129
137
|
@property
|
|
130
138
|
def grid_dataset(self) -> xr.Dataset:
|
|
@@ -139,12 +147,12 @@ class OptimizationGrid:
|
|
|
139
147
|
return self._grid_dataset
|
|
140
148
|
|
|
141
149
|
@property
|
|
142
|
-
def spend_grid(self) ->
|
|
150
|
+
def spend_grid(self) -> xr.DataArray:
|
|
143
151
|
"""The spend grid."""
|
|
144
152
|
return self.grid_dataset.spend_grid
|
|
145
153
|
|
|
146
154
|
@property
|
|
147
|
-
def incremental_outcome_grid(self) ->
|
|
155
|
+
def incremental_outcome_grid(self) -> xr.DataArray:
|
|
148
156
|
"""The incremental outcome grid."""
|
|
149
157
|
return self.grid_dataset.incremental_outcome_grid
|
|
150
158
|
|
|
@@ -231,11 +239,7 @@ class OptimizationGrid:
|
|
|
231
239
|
spend_constraint_upper=spend_constraint_upper,
|
|
232
240
|
)
|
|
233
241
|
)
|
|
234
|
-
self.
|
|
235
|
-
lower_bound=optimization_lower_bound,
|
|
236
|
-
upper_bound=optimization_upper_bound,
|
|
237
|
-
)
|
|
238
|
-
round_factor = _get_round_factor(budget, self.gtol)
|
|
242
|
+
round_factor = get_round_factor(budget, self.gtol)
|
|
239
243
|
if round_factor != self.round_factor:
|
|
240
244
|
warnings.warn(
|
|
241
245
|
'Optimization accuracy may suffer owing to budget level differences.'
|
|
@@ -244,7 +248,7 @@ class OptimizationGrid:
|
|
|
244
248
|
' It is only a problem when you use a much smaller budget, '
|
|
245
249
|
' for which the intended step size is smaller. '
|
|
246
250
|
)
|
|
247
|
-
(spend_grid, incremental_outcome_grid) = self.
|
|
251
|
+
(spend_grid, incremental_outcome_grid) = self.trim_grids(
|
|
248
252
|
spend_bound_lower=optimization_lower_bound,
|
|
249
253
|
spend_bound_upper=optimization_upper_bound,
|
|
250
254
|
)
|
|
@@ -267,86 +271,12 @@ class OptimizationGrid:
|
|
|
267
271
|
},
|
|
268
272
|
)
|
|
269
273
|
|
|
270
|
-
def
|
|
271
|
-
self,
|
|
272
|
-
spend_grid: np.ndarray,
|
|
273
|
-
incremental_outcome_grid: np.ndarray,
|
|
274
|
-
scenario: FixedBudgetScenario | FlexibleBudgetScenario,
|
|
275
|
-
) -> np.ndarray:
|
|
276
|
-
"""Hill-climbing search algorithm for budget optimization.
|
|
277
|
-
|
|
278
|
-
Args:
|
|
279
|
-
spend_grid: Discrete grid with dimensions (`grid_length` x
|
|
280
|
-
`n_total_channels`) containing spend by channel for all media and RF
|
|
281
|
-
channels, used in the hill-climbing search algorithm.
|
|
282
|
-
incremental_outcome_grid: Discrete grid with dimensions (`grid_length` x
|
|
283
|
-
`n_total_channels`) containing incremental outcome by channel for all
|
|
284
|
-
media and RF channels, used in the hill-climbing search algorithm.
|
|
285
|
-
scenario: The optimization scenario with corresponding parameters.
|
|
286
|
-
|
|
287
|
-
Returns:
|
|
288
|
-
optimal_spend: `np.ndarray` of dimension (`n_total_channels`) containing
|
|
289
|
-
the media spend that maximizes incremental outcome based on spend
|
|
290
|
-
constraints for all media and RF channels.
|
|
291
|
-
optimal_inc_outcome: `np.ndarray` of dimension (`n_total_channels`)
|
|
292
|
-
containing the post optimization incremental outcome per channel for all
|
|
293
|
-
media and RF channels.
|
|
294
|
-
"""
|
|
295
|
-
spend = spend_grid[0, :].copy()
|
|
296
|
-
incremental_outcome = incremental_outcome_grid[0, :].copy()
|
|
297
|
-
spend_grid = spend_grid[1:, :]
|
|
298
|
-
incremental_outcome_grid = incremental_outcome_grid[1:, :]
|
|
299
|
-
iterative_roi_grid = np.round(
|
|
300
|
-
tf.math.divide_no_nan(
|
|
301
|
-
incremental_outcome_grid - incremental_outcome, spend_grid - spend
|
|
302
|
-
),
|
|
303
|
-
decimals=8,
|
|
304
|
-
)
|
|
305
|
-
while True:
|
|
306
|
-
spend_optimal = spend.astype(int)
|
|
307
|
-
# If none of the exit criteria are met roi_grid will eventually be filled
|
|
308
|
-
# with all nans.
|
|
309
|
-
if np.isnan(iterative_roi_grid).all():
|
|
310
|
-
break
|
|
311
|
-
point = np.unravel_index(
|
|
312
|
-
np.nanargmax(iterative_roi_grid), iterative_roi_grid.shape
|
|
313
|
-
)
|
|
314
|
-
row_idx = point[0]
|
|
315
|
-
media_idx = point[1]
|
|
316
|
-
spend[media_idx] = spend_grid[row_idx, media_idx]
|
|
317
|
-
incremental_outcome[media_idx] = incremental_outcome_grid[
|
|
318
|
-
row_idx, media_idx
|
|
319
|
-
]
|
|
320
|
-
roi_grid_point = iterative_roi_grid[row_idx, media_idx]
|
|
321
|
-
if _exceeds_optimization_constraints(
|
|
322
|
-
spend=spend,
|
|
323
|
-
incremental_outcome=incremental_outcome,
|
|
324
|
-
roi_grid_point=roi_grid_point,
|
|
325
|
-
scenario=scenario,
|
|
326
|
-
):
|
|
327
|
-
break
|
|
328
|
-
|
|
329
|
-
iterative_roi_grid[0 : row_idx + 1, media_idx] = np.nan
|
|
330
|
-
iterative_roi_grid[row_idx + 1 :, media_idx] = np.round(
|
|
331
|
-
tf.math.divide_no_nan(
|
|
332
|
-
incremental_outcome_grid[row_idx + 1 :, media_idx]
|
|
333
|
-
- incremental_outcome_grid[row_idx, media_idx],
|
|
334
|
-
spend_grid[row_idx + 1 :, media_idx]
|
|
335
|
-
- spend_grid[row_idx, media_idx],
|
|
336
|
-
),
|
|
337
|
-
decimals=8,
|
|
338
|
-
)
|
|
339
|
-
return spend_optimal
|
|
340
|
-
|
|
341
|
-
def _trim_grid(
|
|
274
|
+
def trim_grids(
|
|
342
275
|
self,
|
|
343
276
|
spend_bound_lower: np.ndarray,
|
|
344
277
|
spend_bound_upper: np.ndarray,
|
|
345
|
-
) -> tuple[
|
|
346
|
-
"""
|
|
347
|
-
|
|
348
|
-
It is assumed that spend bounds are validated: their values are within the
|
|
349
|
-
grid coverage and they are rounded using this grid's round factor.
|
|
278
|
+
) -> tuple[xr.DataArray, xr.DataArray]:
|
|
279
|
+
"""Trims the grids based on a more restricted spend bound.
|
|
350
280
|
|
|
351
281
|
Args:
|
|
352
282
|
spend_bound_lower: The lower bound of spend for each channel. Must be in
|
|
@@ -355,12 +285,15 @@ class OptimizationGrid:
|
|
|
355
285
|
the same order as `self.channels`.
|
|
356
286
|
|
|
357
287
|
Returns:
|
|
358
|
-
updated_spend: The updated spend grid with valid spend values
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
invalid incremental outcome values filled with NaN.
|
|
288
|
+
updated_spend: The updated spend grid with only valid spend values.
|
|
289
|
+
updated_incremental_outcome: The updated incremental outcome grid
|
|
290
|
+
containing only the corresponding incremental outcome values for the
|
|
291
|
+
updated spend grid.
|
|
363
292
|
"""
|
|
293
|
+
self.check_optimization_bounds(
|
|
294
|
+
lower_bound=spend_bound_lower,
|
|
295
|
+
upper_bound=spend_bound_upper,
|
|
296
|
+
)
|
|
364
297
|
spend_grid = self.spend_grid
|
|
365
298
|
updated_spend = self.spend_grid.copy()
|
|
366
299
|
updated_incremental_outcome = self.incremental_outcome_grid.copy()
|
|
@@ -387,6 +320,12 @@ class OptimizationGrid:
|
|
|
387
320
|
updated_spend[nan_indices:, ch] = np.nan
|
|
388
321
|
updated_incremental_outcome[nan_indices:, ch] = np.nan
|
|
389
322
|
|
|
323
|
+
# Drop the rows with all NaN values.
|
|
324
|
+
updated_spend = updated_spend.dropna(dim=c.GRID_SPEND_INDEX, how='all')
|
|
325
|
+
updated_incremental_outcome = updated_incremental_outcome.dropna(
|
|
326
|
+
dim=c.GRID_SPEND_INDEX, how='all'
|
|
327
|
+
)
|
|
328
|
+
|
|
390
329
|
return (updated_spend, updated_incremental_outcome)
|
|
391
330
|
|
|
392
331
|
def check_optimization_bounds(
|
|
@@ -429,46 +368,107 @@ class OptimizationGrid:
|
|
|
429
368
|
+ '\n'.join(errors)
|
|
430
369
|
)
|
|
431
370
|
|
|
371
|
+
def _grid_search(
|
|
372
|
+
self,
|
|
373
|
+
spend_grid: xr.DataArray,
|
|
374
|
+
incremental_outcome_grid: xr.DataArray,
|
|
375
|
+
scenario: FixedBudgetScenario | FlexibleBudgetScenario,
|
|
376
|
+
) -> np.ndarray:
|
|
377
|
+
"""Hill-climbing search algorithm for budget optimization.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
spend_grid: Discrete grid with dimensions (`grid_length` x
|
|
381
|
+
`n_total_channels`) containing spend by channel for all media and RF
|
|
382
|
+
channels, used in the hill-climbing search algorithm.
|
|
383
|
+
incremental_outcome_grid: Discrete grid with dimensions (`grid_length` x
|
|
384
|
+
`n_total_channels`) containing incremental outcome by channel for all
|
|
385
|
+
media and RF channels, used in the hill-climbing search algorithm.
|
|
386
|
+
scenario: The optimization scenario with corresponding parameters.
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
`np.ndarray` of dimension (`n_total_channels`) containing the optimal
|
|
390
|
+
media spend that maximizes incremental outcome based on spend constraints
|
|
391
|
+
for all media and RF channels.
|
|
392
|
+
"""
|
|
393
|
+
spend = spend_grid[0, :].copy()
|
|
394
|
+
incremental_outcome = incremental_outcome_grid[0, :].copy()
|
|
395
|
+
spend_grid = spend_grid[1:, :]
|
|
396
|
+
incremental_outcome_grid = incremental_outcome_grid[1:, :]
|
|
397
|
+
iterative_roi_grid = np.round(
|
|
398
|
+
backend.divide_no_nan(
|
|
399
|
+
incremental_outcome_grid - incremental_outcome, spend_grid - spend
|
|
400
|
+
),
|
|
401
|
+
decimals=8,
|
|
402
|
+
)
|
|
403
|
+
while True:
|
|
404
|
+
spend_optimal = spend.astype(int)
|
|
405
|
+
# If none of the exit criteria are met roi_grid will eventually be filled
|
|
406
|
+
# with all nans.
|
|
407
|
+
if np.isnan(iterative_roi_grid).all():
|
|
408
|
+
break
|
|
409
|
+
point = np.unravel_index(
|
|
410
|
+
np.nanargmax(iterative_roi_grid), iterative_roi_grid.shape
|
|
411
|
+
)
|
|
412
|
+
row_idx = point[0]
|
|
413
|
+
media_idx = point[1]
|
|
414
|
+
spend[media_idx] = spend_grid[row_idx, media_idx]
|
|
415
|
+
incremental_outcome[media_idx] = incremental_outcome_grid[
|
|
416
|
+
row_idx, media_idx
|
|
417
|
+
]
|
|
418
|
+
roi_grid_point = iterative_roi_grid[row_idx, media_idx]
|
|
419
|
+
if _exceeds_optimization_constraints(
|
|
420
|
+
spend=spend,
|
|
421
|
+
incremental_outcome=incremental_outcome,
|
|
422
|
+
roi_grid_point=roi_grid_point,
|
|
423
|
+
scenario=scenario,
|
|
424
|
+
):
|
|
425
|
+
break
|
|
426
|
+
|
|
427
|
+
iterative_roi_grid[0 : row_idx + 1, media_idx] = np.nan
|
|
428
|
+
iterative_roi_grid[row_idx + 1 :, media_idx] = np.round(
|
|
429
|
+
backend.divide_no_nan(
|
|
430
|
+
incremental_outcome_grid[row_idx + 1 :, media_idx]
|
|
431
|
+
- incremental_outcome_grid[row_idx, media_idx],
|
|
432
|
+
spend_grid[row_idx + 1 :, media_idx]
|
|
433
|
+
- spend_grid[row_idx, media_idx],
|
|
434
|
+
),
|
|
435
|
+
decimals=8,
|
|
436
|
+
)
|
|
437
|
+
return spend_optimal
|
|
438
|
+
|
|
432
439
|
|
|
433
440
|
@dataclasses.dataclass(frozen=True)
|
|
434
441
|
class OptimizationResults:
|
|
435
442
|
"""The optimized budget allocation.
|
|
436
443
|
|
|
437
444
|
This is a dataclass object containing datasets output from `BudgetOptimizer`.
|
|
438
|
-
These datasets include:
|
|
439
|
-
|
|
440
|
-
- `nonoptimized_data`: The non-optimized budget metrics (based on historical
|
|
441
|
-
frequency).
|
|
442
|
-
- `nonoptimized_data_with_optimal_freq`: The non-optimized budget metrics
|
|
443
|
-
based on optimal frequency.
|
|
444
|
-
- `optimized_data`: The optimized budget metrics.
|
|
445
|
-
- `optimization_grid`: The grid information used for optimization.
|
|
446
445
|
|
|
447
|
-
The metrics (data variables) are:
|
|
448
|
-
|
|
449
|
-
Additionally, some intermediate values and referecences to the source fitted
|
|
450
|
-
model and analyzer are also stored here. These are useful for visualizing and
|
|
451
|
-
debugging.
|
|
446
|
+
The performance metrics (data variables) are: spend, percentage of spend, ROI,
|
|
447
|
+
mROI, incremental outcome, CPIK, and effectiveness.
|
|
452
448
|
|
|
453
449
|
Attributes:
|
|
454
450
|
meridian: The fitted Meridian model that was used to create this budget
|
|
455
451
|
allocation.
|
|
456
452
|
analyzer: The analyzer bound to the model above.
|
|
457
|
-
spend_ratio: The spend ratio used to scale the non-optimized
|
|
458
|
-
to the optimized
|
|
459
|
-
spend_bounds: The spend bounds used to scale the non-optimized
|
|
460
|
-
metrics to the optimized
|
|
461
|
-
nonoptimized_data:
|
|
462
|
-
frequency
|
|
463
|
-
nonoptimized_data_with_optimal_freq:
|
|
464
|
-
|
|
465
|
-
|
|
453
|
+
spend_ratio: The spend ratio used to scale the non-optimized performance
|
|
454
|
+
metrics to the optimized performance metrics.
|
|
455
|
+
spend_bounds: The spend bounds used to scale the non-optimized performance
|
|
456
|
+
metrics to the optimized performance metrics.
|
|
457
|
+
nonoptimized_data: Performance metrics under the non-optimized budget. For
|
|
458
|
+
R&F channels, the non-optimized frequency is used.
|
|
459
|
+
nonoptimized_data_with_optimal_freq: Performance metrics under the
|
|
460
|
+
non-optimized budget. For R&F channels, the optimal frequency is used if
|
|
461
|
+
frequency was optimized.
|
|
462
|
+
optimized_data: Performance metrics under the optimized budget. For R&F
|
|
463
|
+
channels, the optimal frequency is used if frequency was optimized.
|
|
466
464
|
optimization_grid: The grid information used for optimization.
|
|
465
|
+
new_data: The optional `DataTensors` container that was used to create this
|
|
466
|
+
budget allocation.
|
|
467
467
|
"""
|
|
468
468
|
|
|
469
469
|
meridian: model.Meridian
|
|
470
470
|
# The analyzer bound to the model above.
|
|
471
|
-
analyzer:
|
|
471
|
+
analyzer: analyzer_module.Analyzer
|
|
472
472
|
spend_ratio: np.ndarray # spend / historical spend
|
|
473
473
|
spend_bounds: tuple[np.ndarray, np.ndarray]
|
|
474
474
|
|
|
@@ -478,6 +478,10 @@ class OptimizationResults:
|
|
|
478
478
|
_optimized_data: xr.Dataset
|
|
479
479
|
_optimization_grid: OptimizationGrid
|
|
480
480
|
|
|
481
|
+
# The optional `DataTensors` container to use if optimization was performed
|
|
482
|
+
# on data different from the original `input_data`.
|
|
483
|
+
new_data: analyzer_module.DataTensors | None = None
|
|
484
|
+
|
|
481
485
|
# TODO: Move this, and the plotting methods, to a summarizer.
|
|
482
486
|
@functools.cached_property
|
|
483
487
|
def template_env(self) -> jinja2.Environment:
|
|
@@ -494,10 +498,10 @@ class OptimizationResults:
|
|
|
494
498
|
|
|
495
499
|
@property
|
|
496
500
|
def nonoptimized_data(self) -> xr.Dataset:
|
|
497
|
-
"""Dataset holding the non-optimized
|
|
501
|
+
"""Dataset holding the non-optimized performance metrics.
|
|
498
502
|
|
|
499
503
|
For channels that have reach and frequency data, their performance metrics
|
|
500
|
-
|
|
504
|
+
are based on historical frequency.
|
|
501
505
|
|
|
502
506
|
The dataset contains the following:
|
|
503
507
|
|
|
@@ -516,10 +520,10 @@ class OptimizationResults:
|
|
|
516
520
|
|
|
517
521
|
@property
|
|
518
522
|
def nonoptimized_data_with_optimal_freq(self) -> xr.Dataset:
|
|
519
|
-
"""Dataset holding the non-optimized
|
|
523
|
+
"""Dataset holding the non-optimized performance metrics.
|
|
520
524
|
|
|
521
525
|
For channels that have reach and frequency data, their performance metrics
|
|
522
|
-
|
|
526
|
+
are based on optimal frequency.
|
|
523
527
|
|
|
524
528
|
The dataset contains the following:
|
|
525
529
|
|
|
@@ -534,10 +538,10 @@ class OptimizationResults:
|
|
|
534
538
|
|
|
535
539
|
@property
|
|
536
540
|
def optimized_data(self) -> xr.Dataset:
|
|
537
|
-
"""Dataset holding the optimized
|
|
541
|
+
"""Dataset holding the optimized performance metrics.
|
|
538
542
|
|
|
539
543
|
For channels that have reach and frequency data, their performance metrics
|
|
540
|
-
|
|
544
|
+
are based on optimal frequency.
|
|
541
545
|
|
|
542
546
|
The dataset contains the following:
|
|
543
547
|
|
|
@@ -881,9 +885,9 @@ class OptimizationResults:
|
|
|
881
885
|
In particular:
|
|
882
886
|
|
|
883
887
|
1. `spend_multiplier` matches the discrete optimization grid, considering
|
|
884
|
-
|
|
888
|
+
the grid step size and any channel-level constraint bounds.
|
|
885
889
|
2. `selected_times`, `by_reach`, and `use_optimal_frequency` match the
|
|
886
|
-
|
|
890
|
+
values set in `BudgetOptimizer.optimize()`.
|
|
887
891
|
|
|
888
892
|
Returns:
|
|
889
893
|
A dataset returned by `Analyzer.response_curves()`, per budget
|
|
@@ -891,9 +895,12 @@ class OptimizationResults:
|
|
|
891
895
|
returned this result.
|
|
892
896
|
"""
|
|
893
897
|
channels = self.optimized_data.channel.values
|
|
894
|
-
selected_times =
|
|
898
|
+
selected_times = _expand_selected_times(
|
|
899
|
+
meridian=self.meridian,
|
|
895
900
|
start_date=self.optimized_data.start_date,
|
|
896
901
|
end_date=self.optimized_data.end_date,
|
|
902
|
+
new_data=self.new_data,
|
|
903
|
+
return_flexible_str=True,
|
|
897
904
|
)
|
|
898
905
|
_, ubounds = self.spend_bounds
|
|
899
906
|
upper_bound = (
|
|
@@ -909,6 +916,7 @@ class OptimizationResults:
|
|
|
909
916
|
# WARN: If `selected_times` is not None (i.e. a subset time range), this
|
|
910
917
|
# response curve computation might take a significant amount of time.
|
|
911
918
|
return self.analyzer.response_curves(
|
|
919
|
+
new_data=self.new_data,
|
|
912
920
|
spend_multipliers=spend_multiplier,
|
|
913
921
|
use_posterior=self.optimization_grid.use_posterior,
|
|
914
922
|
selected_times=selected_times,
|
|
@@ -1273,7 +1281,7 @@ class BudgetOptimizer:
|
|
|
1273
1281
|
|
|
1274
1282
|
def __init__(self, meridian: model.Meridian):
|
|
1275
1283
|
self._meridian = meridian
|
|
1276
|
-
self._analyzer =
|
|
1284
|
+
self._analyzer = analyzer_module.Analyzer(self._meridian)
|
|
1277
1285
|
|
|
1278
1286
|
def _validate_model_fit(self, use_posterior: bool):
|
|
1279
1287
|
"""Validates that the model is fit."""
|
|
@@ -1285,7 +1293,7 @@ class BudgetOptimizer:
|
|
|
1285
1293
|
|
|
1286
1294
|
def optimize(
|
|
1287
1295
|
self,
|
|
1288
|
-
new_data:
|
|
1296
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
1289
1297
|
use_posterior: bool = True,
|
|
1290
1298
|
# TODO: b/409550413 - Remove this argument.
|
|
1291
1299
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
@@ -1329,25 +1337,25 @@ class BudgetOptimizer:
|
|
|
1329
1337
|
The following optimization parameters are assigned default values based on
|
|
1330
1338
|
the model input data:
|
|
1331
1339
|
1. Flighting pattern. This is the relative allocation of a channel's media
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1340
|
+
units across geos and time periods. By default, the historical flighting
|
|
1341
|
+
pattern is used. The default can be overridden by passing
|
|
1342
|
+
`new_data.media`. The flighting pattern is held constant during
|
|
1343
|
+
optimization and does not depend on the overall budget assigned to the
|
|
1344
|
+
channel.
|
|
1337
1345
|
2. Cost per media unit. By default, the historical spend divided by
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1346
|
+
historical media units is used. This can optionally vary by geo or time
|
|
1347
|
+
period or both depending on whether the spend data has geo and time
|
|
1348
|
+
dimensions. The default can be overridden by passing `new_data.spend`.
|
|
1349
|
+
The cost per media unit is held constant during optimization and does not
|
|
1350
|
+
depend on the overall budget assigned to the channel.
|
|
1343
1351
|
3. Center of the spend box constraint for each channel. By default, the
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1352
|
+
historical percentage of spend within `selected_geos` and between
|
|
1353
|
+
`start_date` and `end_date` is used. This can be overridden by passing
|
|
1354
|
+
`pct_of_spend`.
|
|
1347
1355
|
4. Total budget to be allocated (for fixed budget scenarios only). By
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1356
|
+
default, the historical spend within `selected_geos` and between
|
|
1357
|
+
`start_date` and `end_date` is used. This can be overridden by passing
|
|
1358
|
+
`budget`.
|
|
1351
1359
|
|
|
1352
1360
|
Passing `new_data.media` (or `new_data.reach` or `new_data.frequency`) will
|
|
1353
1361
|
override both the flighting pattern and cost per media unit. Passing
|
|
@@ -1523,14 +1531,15 @@ class BudgetOptimizer:
|
|
|
1523
1531
|
use_historical_budget = budget is None or np.isclose(
|
|
1524
1532
|
budget, np.sum(optimization_grid.historical_spend)
|
|
1525
1533
|
)
|
|
1526
|
-
new_data = new_data or
|
|
1534
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
1527
1535
|
nonoptimized_data = self._create_budget_dataset(
|
|
1528
1536
|
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1529
1537
|
use_posterior=use_posterior,
|
|
1530
1538
|
use_kpi=use_kpi,
|
|
1531
1539
|
hist_spend=optimization_grid.historical_spend,
|
|
1532
1540
|
spend=spend.non_optimized,
|
|
1533
|
-
|
|
1541
|
+
start_date=start_date,
|
|
1542
|
+
end_date=end_date,
|
|
1534
1543
|
confidence_level=confidence_level,
|
|
1535
1544
|
batch_size=batch_size,
|
|
1536
1545
|
use_historical_budget=use_historical_budget,
|
|
@@ -1541,7 +1550,8 @@ class BudgetOptimizer:
|
|
|
1541
1550
|
use_kpi=use_kpi,
|
|
1542
1551
|
hist_spend=optimization_grid.historical_spend,
|
|
1543
1552
|
spend=spend.non_optimized,
|
|
1544
|
-
|
|
1553
|
+
start_date=start_date,
|
|
1554
|
+
end_date=end_date,
|
|
1545
1555
|
optimal_frequency=optimization_grid.optimal_frequency,
|
|
1546
1556
|
confidence_level=confidence_level,
|
|
1547
1557
|
batch_size=batch_size,
|
|
@@ -1560,7 +1570,8 @@ class BudgetOptimizer:
|
|
|
1560
1570
|
use_kpi=use_kpi,
|
|
1561
1571
|
hist_spend=optimization_grid.historical_spend,
|
|
1562
1572
|
spend=spend.optimized,
|
|
1563
|
-
|
|
1573
|
+
start_date=start_date,
|
|
1574
|
+
end_date=end_date,
|
|
1564
1575
|
optimal_frequency=optimization_grid.optimal_frequency,
|
|
1565
1576
|
attrs=constraints,
|
|
1566
1577
|
confidence_level=confidence_level,
|
|
@@ -1589,6 +1600,7 @@ class BudgetOptimizer:
|
|
|
1589
1600
|
)
|
|
1590
1601
|
|
|
1591
1602
|
return OptimizationResults(
|
|
1603
|
+
new_data=new_data,
|
|
1592
1604
|
meridian=self._meridian,
|
|
1593
1605
|
analyzer=self._analyzer,
|
|
1594
1606
|
spend_ratio=spend_ratio,
|
|
@@ -1601,17 +1613,17 @@ class BudgetOptimizer:
|
|
|
1601
1613
|
|
|
1602
1614
|
def create_optimization_tensors(
|
|
1603
1615
|
self,
|
|
1604
|
-
time: Sequence[str] |
|
|
1605
|
-
cpmu:
|
|
1606
|
-
media:
|
|
1607
|
-
media_spend:
|
|
1608
|
-
cprf:
|
|
1609
|
-
rf_impressions:
|
|
1610
|
-
frequency:
|
|
1611
|
-
rf_spend:
|
|
1612
|
-
revenue_per_kpi:
|
|
1616
|
+
time: Sequence[str] | backend.Tensor,
|
|
1617
|
+
cpmu: backend.Tensor | None = None,
|
|
1618
|
+
media: backend.Tensor | None = None,
|
|
1619
|
+
media_spend: backend.Tensor | None = None,
|
|
1620
|
+
cprf: backend.Tensor | None = None,
|
|
1621
|
+
rf_impressions: backend.Tensor | None = None,
|
|
1622
|
+
frequency: backend.Tensor | None = None,
|
|
1623
|
+
rf_spend: backend.Tensor | None = None,
|
|
1624
|
+
revenue_per_kpi: backend.Tensor | None = None,
|
|
1613
1625
|
use_optimal_frequency: bool = True,
|
|
1614
|
-
) ->
|
|
1626
|
+
) -> analyzer_module.DataTensors:
|
|
1615
1627
|
"""Creates a `DataTensors` for optimizations from CPM and flighting data.
|
|
1616
1628
|
|
|
1617
1629
|
CPM is broken down into cost per media unit, `cpmu`, for the media channels
|
|
@@ -1689,7 +1701,7 @@ class BudgetOptimizer:
|
|
|
1689
1701
|
revenue_per_kpi=revenue_per_kpi,
|
|
1690
1702
|
use_optimal_frequency=use_optimal_frequency,
|
|
1691
1703
|
)
|
|
1692
|
-
n_times = time.shape[0] if isinstance(time,
|
|
1704
|
+
n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
|
|
1693
1705
|
n_geos = self._meridian.n_geos
|
|
1694
1706
|
revenue_per_kpi = (
|
|
1695
1707
|
_expand_tensor(revenue_per_kpi, (n_geos, n_times))
|
|
@@ -1714,30 +1726,30 @@ class BudgetOptimizer:
|
|
|
1714
1726
|
)
|
|
1715
1727
|
tensors[c.RF_SPEND] = allocated_impressions * cprf
|
|
1716
1728
|
if use_optimal_frequency:
|
|
1717
|
-
frequency =
|
|
1729
|
+
frequency = backend.ones_like(allocated_impressions)
|
|
1718
1730
|
tensors[c.FREQUENCY] = _expand_tensor(frequency, shape)
|
|
1719
|
-
tensors[c.REACH] =
|
|
1731
|
+
tensors[c.REACH] = backend.divide_no_nan(
|
|
1720
1732
|
allocated_impressions, tensors[c.FREQUENCY]
|
|
1721
1733
|
)
|
|
1722
1734
|
if rf_spend is not None:
|
|
1723
1735
|
shape = (n_geos, n_times, rf_spend.shape[-1])
|
|
1724
1736
|
cprf = _expand_tensor(cprf, shape)
|
|
1725
1737
|
tensors[c.RF_SPEND] = self._allocate_tensor_by_population(rf_spend)
|
|
1726
|
-
impressions =
|
|
1738
|
+
impressions = backend.divide_no_nan(tensors[c.RF_SPEND], cprf)
|
|
1727
1739
|
if use_optimal_frequency:
|
|
1728
|
-
frequency =
|
|
1740
|
+
frequency = backend.ones_like(impressions)
|
|
1729
1741
|
tensors[c.FREQUENCY] = _expand_tensor(frequency, shape)
|
|
1730
|
-
tensors[c.REACH] =
|
|
1742
|
+
tensors[c.REACH] = backend.divide_no_nan(
|
|
1731
1743
|
impressions, tensors[c.FREQUENCY]
|
|
1732
1744
|
)
|
|
1733
1745
|
if revenue_per_kpi is not None:
|
|
1734
1746
|
tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
|
|
1735
|
-
tensors[c.TIME] =
|
|
1736
|
-
return
|
|
1747
|
+
tensors[c.TIME] = backend.to_tensor(time)
|
|
1748
|
+
return analyzer_module.DataTensors(**tensors)
|
|
1737
1749
|
|
|
1738
1750
|
def _validate_grid(
|
|
1739
1751
|
self,
|
|
1740
|
-
new_data:
|
|
1752
|
+
new_data: analyzer_module.DataTensors | None,
|
|
1741
1753
|
use_posterior: bool,
|
|
1742
1754
|
start_date: tc.Date,
|
|
1743
1755
|
end_date: tc.Date,
|
|
@@ -1793,7 +1805,7 @@ class BudgetOptimizer:
|
|
|
1793
1805
|
return False
|
|
1794
1806
|
|
|
1795
1807
|
if new_data is None:
|
|
1796
|
-
new_data =
|
|
1808
|
+
new_data = analyzer_module.DataTensors()
|
|
1797
1809
|
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1798
1810
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
1799
1811
|
required_tensors_names=required_tensors, meridian=self._meridian
|
|
@@ -1809,7 +1821,8 @@ class BudgetOptimizer:
|
|
|
1809
1821
|
return False
|
|
1810
1822
|
|
|
1811
1823
|
n_channels = len(optimization_grid.channels)
|
|
1812
|
-
selected_times =
|
|
1824
|
+
selected_times = _expand_selected_times(
|
|
1825
|
+
meridian=self._meridian,
|
|
1813
1826
|
start_date=start_date,
|
|
1814
1827
|
end_date=end_date,
|
|
1815
1828
|
new_data=new_data,
|
|
@@ -1848,7 +1861,7 @@ class BudgetOptimizer:
|
|
|
1848
1861
|
)
|
|
1849
1862
|
return False
|
|
1850
1863
|
|
|
1851
|
-
round_factor =
|
|
1864
|
+
round_factor = get_round_factor(budget, gtol)
|
|
1852
1865
|
if round_factor != optimization_grid.round_factor:
|
|
1853
1866
|
warnings.warn(
|
|
1854
1867
|
'Optimization accuracy may suffer owing to budget level differences.'
|
|
@@ -1955,7 +1968,7 @@ class BudgetOptimizer:
|
|
|
1955
1968
|
"""
|
|
1956
1969
|
self._validate_model_fit(use_posterior)
|
|
1957
1970
|
if new_data is None:
|
|
1958
|
-
new_data =
|
|
1971
|
+
new_data = analyzer_module.DataTensors()
|
|
1959
1972
|
|
|
1960
1973
|
if selected_times is not None:
|
|
1961
1974
|
warnings.warn(
|
|
@@ -1972,7 +1985,8 @@ class BudgetOptimizer:
|
|
|
1972
1985
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
1973
1986
|
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1974
1987
|
)
|
|
1975
|
-
selected_times =
|
|
1988
|
+
selected_times = _expand_selected_times(
|
|
1989
|
+
meridian=self._meridian,
|
|
1976
1990
|
start_date=start_date,
|
|
1977
1991
|
end_date=end_date,
|
|
1978
1992
|
new_data=filled_data,
|
|
@@ -1991,7 +2005,7 @@ class BudgetOptimizer:
|
|
|
1991
2005
|
pct_of_spend=pct_of_spend,
|
|
1992
2006
|
)
|
|
1993
2007
|
spend = budget * valid_pct_of_spend
|
|
1994
|
-
round_factor =
|
|
2008
|
+
round_factor = get_round_factor(budget, gtol)
|
|
1995
2009
|
(optimization_lower_bound, optimization_upper_bound) = (
|
|
1996
2010
|
get_optimization_bounds(
|
|
1997
2011
|
n_channels=n_paid_channels,
|
|
@@ -2002,19 +2016,19 @@ class BudgetOptimizer:
|
|
|
2002
2016
|
)
|
|
2003
2017
|
)
|
|
2004
2018
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
2005
|
-
opt_freq_data =
|
|
2019
|
+
opt_freq_data = analyzer_module.DataTensors(
|
|
2006
2020
|
rf_impressions=filled_data.reach * filled_data.frequency,
|
|
2007
2021
|
rf_spend=filled_data.rf_spend,
|
|
2008
2022
|
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
2009
2023
|
)
|
|
2010
|
-
optimal_frequency =
|
|
2024
|
+
optimal_frequency = backend.to_tensor(
|
|
2011
2025
|
self._analyzer.optimal_freq(
|
|
2012
2026
|
new_data=opt_freq_data,
|
|
2013
2027
|
use_posterior=use_posterior,
|
|
2014
2028
|
selected_times=selected_times,
|
|
2015
2029
|
use_kpi=use_kpi,
|
|
2016
2030
|
).optimal_frequency,
|
|
2017
|
-
dtype=
|
|
2031
|
+
dtype=backend.float32,
|
|
2018
2032
|
)
|
|
2019
2033
|
else:
|
|
2020
2034
|
optimal_frequency = None
|
|
@@ -2092,43 +2106,16 @@ class BudgetOptimizer:
|
|
|
2092
2106
|
attrs={c.SPEND_STEP_SIZE: spend_step_size},
|
|
2093
2107
|
)
|
|
2094
2108
|
|
|
2095
|
-
def _validate_selected_times(
|
|
2096
|
-
self,
|
|
2097
|
-
start_date: tc.Date,
|
|
2098
|
-
end_date: tc.Date,
|
|
2099
|
-
new_data: analyzer.DataTensors | None,
|
|
2100
|
-
) -> Sequence[str] | Sequence[bool] | None:
|
|
2101
|
-
"""Validates and returns the selected times."""
|
|
2102
|
-
if start_date is None and end_date is None:
|
|
2103
|
-
return None
|
|
2104
|
-
|
|
2105
|
-
new_data = new_data or analyzer.DataTensors()
|
|
2106
|
-
if new_data.get_modified_times(self._meridian) is None:
|
|
2107
|
-
return self._meridian.expand_selected_time_dims(
|
|
2108
|
-
start_date=start_date,
|
|
2109
|
-
end_date=end_date,
|
|
2110
|
-
)
|
|
2111
|
-
else:
|
|
2112
|
-
assert new_data.time is not None
|
|
2113
|
-
new_times_str = new_data.time.numpy().astype(str).tolist()
|
|
2114
|
-
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
2115
|
-
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
2116
|
-
start_date=start_date,
|
|
2117
|
-
end_date=end_date,
|
|
2118
|
-
)
|
|
2119
|
-
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
2120
|
-
return [x in expanded_str for x in new_times_str]
|
|
2121
|
-
|
|
2122
2109
|
def _get_incremental_outcome_tensors(
|
|
2123
2110
|
self,
|
|
2124
2111
|
hist_spend: np.ndarray,
|
|
2125
2112
|
spend: np.ndarray,
|
|
2126
|
-
new_data:
|
|
2113
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2127
2114
|
optimal_frequency: Sequence[float] | None = None,
|
|
2128
2115
|
) -> tuple[
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2116
|
+
backend.Tensor | None,
|
|
2117
|
+
backend.Tensor | None,
|
|
2118
|
+
backend.Tensor | None,
|
|
2132
2119
|
]:
|
|
2133
2120
|
"""Gets the tensors for incremental outcome, based on spend data.
|
|
2134
2121
|
|
|
@@ -2157,16 +2144,16 @@ class BudgetOptimizer:
|
|
|
2157
2144
|
frequency is used for the optimization scenario.
|
|
2158
2145
|
|
|
2159
2146
|
Returns:
|
|
2160
|
-
Tuple of
|
|
2147
|
+
Tuple of backend.tensors (new_media, new_reach, new_frequency).
|
|
2161
2148
|
"""
|
|
2162
|
-
new_data = new_data or
|
|
2149
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2163
2150
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2164
2151
|
c.PAID_CHANNELS,
|
|
2165
2152
|
self._meridian,
|
|
2166
2153
|
)
|
|
2167
2154
|
if self._meridian.n_media_channels > 0:
|
|
2168
2155
|
new_media = (
|
|
2169
|
-
|
|
2156
|
+
backend.divide_no_nan(
|
|
2170
2157
|
spend[: self._meridian.n_media_channels],
|
|
2171
2158
|
hist_spend[: self._meridian.n_media_channels],
|
|
2172
2159
|
)
|
|
@@ -2177,7 +2164,7 @@ class BudgetOptimizer:
|
|
|
2177
2164
|
if self._meridian.n_rf_channels > 0:
|
|
2178
2165
|
rf_impressions = filled_data.reach * filled_data.frequency
|
|
2179
2166
|
new_rf_impressions = (
|
|
2180
|
-
|
|
2167
|
+
backend.divide_no_nan(
|
|
2181
2168
|
spend[-self._meridian.n_rf_channels :],
|
|
2182
2169
|
hist_spend[-self._meridian.n_rf_channels :],
|
|
2183
2170
|
)
|
|
@@ -2188,8 +2175,8 @@ class BudgetOptimizer:
|
|
|
2188
2175
|
if optimal_frequency is None
|
|
2189
2176
|
else optimal_frequency
|
|
2190
2177
|
)
|
|
2191
|
-
new_reach =
|
|
2192
|
-
new_frequency =
|
|
2178
|
+
new_reach = backend.divide_no_nan(new_rf_impressions, frequency)
|
|
2179
|
+
new_frequency = backend.divide_no_nan(new_rf_impressions, new_reach)
|
|
2193
2180
|
else:
|
|
2194
2181
|
new_reach = None
|
|
2195
2182
|
new_frequency = None
|
|
@@ -2200,10 +2187,11 @@ class BudgetOptimizer:
|
|
|
2200
2187
|
self,
|
|
2201
2188
|
hist_spend: np.ndarray,
|
|
2202
2189
|
spend: np.ndarray,
|
|
2203
|
-
new_data:
|
|
2190
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2204
2191
|
use_posterior: bool = True,
|
|
2205
2192
|
use_kpi: bool = False,
|
|
2206
|
-
|
|
2193
|
+
start_date: tc.Date = None,
|
|
2194
|
+
end_date: tc.Date = None,
|
|
2207
2195
|
optimal_frequency: Sequence[float] | None = None,
|
|
2208
2196
|
attrs: Mapping[str, Any] | None = None,
|
|
2209
2197
|
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
|
|
@@ -2211,13 +2199,19 @@ class BudgetOptimizer:
|
|
|
2211
2199
|
use_historical_budget: bool = True,
|
|
2212
2200
|
) -> xr.Dataset:
|
|
2213
2201
|
"""Creates the budget dataset."""
|
|
2214
|
-
new_data = new_data or
|
|
2202
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2215
2203
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2216
2204
|
c.PAID_DATA + (c.TIME,),
|
|
2217
2205
|
self._meridian,
|
|
2218
2206
|
)
|
|
2219
|
-
|
|
2220
|
-
|
|
2207
|
+
selected_times = _expand_selected_times(
|
|
2208
|
+
meridian=self._meridian,
|
|
2209
|
+
start_date=start_date,
|
|
2210
|
+
end_date=end_date,
|
|
2211
|
+
new_data=new_data,
|
|
2212
|
+
)
|
|
2213
|
+
spend_tensor = backend.to_tensor(spend, dtype=backend.float32)
|
|
2214
|
+
hist_spend = backend.to_tensor(hist_spend, dtype=backend.float32)
|
|
2221
2215
|
(new_media, new_reach, new_frequency) = (
|
|
2222
2216
|
self._get_incremental_outcome_tensors(
|
|
2223
2217
|
hist_spend,
|
|
@@ -2227,7 +2221,7 @@ class BudgetOptimizer:
|
|
|
2227
2221
|
)
|
|
2228
2222
|
)
|
|
2229
2223
|
budget = np.sum(spend_tensor)
|
|
2230
|
-
inc_outcome_data =
|
|
2224
|
+
inc_outcome_data = analyzer_module.DataTensors(
|
|
2231
2225
|
media=new_media,
|
|
2232
2226
|
reach=new_reach,
|
|
2233
2227
|
frequency=new_frequency,
|
|
@@ -2259,7 +2253,7 @@ class BudgetOptimizer:
|
|
|
2259
2253
|
# shape (n_channels, n_metrics) where n_metrics = 4 for (mean, median,
|
|
2260
2254
|
# ci_lo, and ci_hi)
|
|
2261
2255
|
incremental_outcome_with_mean_median_and_ci = (
|
|
2262
|
-
|
|
2256
|
+
analyzer_module.get_central_tendency_and_ci(
|
|
2263
2257
|
data=incremental_outcome,
|
|
2264
2258
|
confidence_level=confidence_level,
|
|
2265
2259
|
include_median=True,
|
|
@@ -2271,7 +2265,7 @@ class BudgetOptimizer:
|
|
|
2271
2265
|
)
|
|
2272
2266
|
|
|
2273
2267
|
aggregated_impressions = self._analyzer.get_aggregated_impressions(
|
|
2274
|
-
new_data=
|
|
2268
|
+
new_data=analyzer_module.DataTensors(
|
|
2275
2269
|
media=new_media, reach=new_reach, frequency=new_frequency
|
|
2276
2270
|
),
|
|
2277
2271
|
selected_times=selected_times,
|
|
@@ -2282,8 +2276,8 @@ class BudgetOptimizer:
|
|
|
2282
2276
|
include_non_paid_channels=False,
|
|
2283
2277
|
)
|
|
2284
2278
|
effectiveness_with_mean_median_and_ci = (
|
|
2285
|
-
|
|
2286
|
-
data=
|
|
2279
|
+
analyzer_module.get_central_tendency_and_ci(
|
|
2280
|
+
data=backend.divide_no_nan(
|
|
2287
2281
|
incremental_outcome, aggregated_impressions
|
|
2288
2282
|
),
|
|
2289
2283
|
confidence_level=confidence_level,
|
|
@@ -2291,27 +2285,27 @@ class BudgetOptimizer:
|
|
|
2291
2285
|
)
|
|
2292
2286
|
)
|
|
2293
2287
|
|
|
2294
|
-
roi =
|
|
2295
|
-
data=
|
|
2288
|
+
roi = analyzer_module.get_central_tendency_and_ci(
|
|
2289
|
+
data=backend.divide_no_nan(incremental_outcome, spend_tensor),
|
|
2296
2290
|
confidence_level=confidence_level,
|
|
2297
2291
|
include_median=True,
|
|
2298
2292
|
)
|
|
2299
|
-
marginal_roi =
|
|
2300
|
-
data=
|
|
2293
|
+
marginal_roi = analyzer_module.get_central_tendency_and_ci(
|
|
2294
|
+
data=backend.divide_no_nan(
|
|
2301
2295
|
mroi_numerator, spend_tensor * incremental_increase
|
|
2302
2296
|
),
|
|
2303
2297
|
confidence_level=confidence_level,
|
|
2304
2298
|
include_median=True,
|
|
2305
2299
|
)
|
|
2306
2300
|
|
|
2307
|
-
cpik =
|
|
2308
|
-
data=
|
|
2301
|
+
cpik = analyzer_module.get_central_tendency_and_ci(
|
|
2302
|
+
data=backend.divide_no_nan(spend_tensor, incremental_outcome),
|
|
2309
2303
|
confidence_level=confidence_level,
|
|
2310
2304
|
include_median=True,
|
|
2311
2305
|
)
|
|
2312
|
-
total_inc_outcome =
|
|
2313
|
-
total_cpik =
|
|
2314
|
-
|
|
2306
|
+
total_inc_outcome = backend.reduce_sum(incremental_outcome, -1)
|
|
2307
|
+
total_cpik = backend.reduce_mean(
|
|
2308
|
+
backend.divide_no_nan(budget, total_inc_outcome),
|
|
2315
2309
|
axis=(0, 1),
|
|
2316
2310
|
)
|
|
2317
2311
|
|
|
@@ -2333,21 +2327,11 @@ class BudgetOptimizer:
|
|
|
2333
2327
|
c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
|
|
2334
2328
|
}
|
|
2335
2329
|
|
|
2336
|
-
all_times = (
|
|
2337
|
-
filled_data.time.numpy().astype(str).tolist()
|
|
2338
|
-
if filled_data.time is not None
|
|
2339
|
-
else self._meridian.input_data.time.values.tolist()
|
|
2340
|
-
)
|
|
2341
|
-
if selected_times is not None and all(
|
|
2342
|
-
isinstance(time, bool) for time in selected_times
|
|
2343
|
-
):
|
|
2344
|
-
selected_times = [
|
|
2345
|
-
time for time, selected in zip(all_times, selected_times) if selected
|
|
2346
|
-
]
|
|
2330
|
+
all_times = np.asarray(filled_data.time).astype(str).tolist()
|
|
2347
2331
|
|
|
2348
2332
|
attributes = {
|
|
2349
|
-
c.START_DATE:
|
|
2350
|
-
c.END_DATE:
|
|
2333
|
+
c.START_DATE: start_date if start_date else all_times[0],
|
|
2334
|
+
c.END_DATE: end_date if end_date else all_times[-1],
|
|
2351
2335
|
c.BUDGET: budget,
|
|
2352
2336
|
c.PROFIT: total_incremental_outcome - budget,
|
|
2353
2337
|
c.TOTAL_INCREMENTAL_OUTCOME: total_incremental_outcome,
|
|
@@ -2373,8 +2357,8 @@ class BudgetOptimizer:
|
|
|
2373
2357
|
self,
|
|
2374
2358
|
i: int,
|
|
2375
2359
|
incremental_outcome_grid: np.ndarray,
|
|
2376
|
-
multipliers_grid:
|
|
2377
|
-
new_data:
|
|
2360
|
+
multipliers_grid: backend.Tensor,
|
|
2361
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2378
2362
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2379
2363
|
use_posterior: bool = True,
|
|
2380
2364
|
use_kpi: bool = False,
|
|
@@ -2416,7 +2400,7 @@ class BudgetOptimizer:
|
|
|
2416
2400
|
reducing `batch_size`. The calculation will generally be faster with
|
|
2417
2401
|
larger `batch_size` values.
|
|
2418
2402
|
"""
|
|
2419
|
-
new_data = new_data or
|
|
2403
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2420
2404
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2421
2405
|
c.PAID_DATA, self._meridian
|
|
2422
2406
|
)
|
|
@@ -2432,8 +2416,10 @@ class BudgetOptimizer:
|
|
|
2432
2416
|
new_frequency = None
|
|
2433
2417
|
new_reach = None
|
|
2434
2418
|
elif optimal_frequency is not None:
|
|
2435
|
-
new_frequency =
|
|
2436
|
-
|
|
2419
|
+
new_frequency = (
|
|
2420
|
+
backend.ones_like(filled_data.frequency) * optimal_frequency
|
|
2421
|
+
)
|
|
2422
|
+
new_reach = backend.divide_no_nan(
|
|
2437
2423
|
multipliers_grid[i, -self._meridian.n_rf_channels :]
|
|
2438
2424
|
* filled_data.reach
|
|
2439
2425
|
* filled_data.frequency,
|
|
@@ -2450,20 +2436,22 @@ class BudgetOptimizer:
|
|
|
2450
2436
|
# (n_chains x n_draws x n_total_channels). Incremental_outcome_grid requires
|
|
2451
2437
|
# incremental outcome by channel.
|
|
2452
2438
|
incremental_outcome_grid[i, :] = np.mean(
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2458
|
-
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2439
|
+
np.asarray(
|
|
2440
|
+
self._analyzer.incremental_outcome(
|
|
2441
|
+
use_posterior=use_posterior,
|
|
2442
|
+
new_data=analyzer_module.DataTensors(
|
|
2443
|
+
media=new_media,
|
|
2444
|
+
reach=new_reach,
|
|
2445
|
+
frequency=new_frequency,
|
|
2446
|
+
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
2447
|
+
),
|
|
2448
|
+
selected_times=selected_times,
|
|
2449
|
+
use_kpi=use_kpi,
|
|
2450
|
+
include_non_paid_channels=False,
|
|
2451
|
+
batch_size=batch_size,
|
|
2452
|
+
)
|
|
2465
2453
|
),
|
|
2466
|
-
(c.CHAINS_DIMENSION, c.DRAWS_DIMENSION),
|
|
2454
|
+
axis=(c.CHAINS_DIMENSION, c.DRAWS_DIMENSION),
|
|
2467
2455
|
dtype=np.float64,
|
|
2468
2456
|
)
|
|
2469
2457
|
|
|
@@ -2473,7 +2461,7 @@ class BudgetOptimizer:
|
|
|
2473
2461
|
spend_bound_lower: np.ndarray,
|
|
2474
2462
|
spend_bound_upper: np.ndarray,
|
|
2475
2463
|
step_size: int,
|
|
2476
|
-
new_data:
|
|
2464
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2477
2465
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2478
2466
|
use_posterior: bool = True,
|
|
2479
2467
|
use_kpi: bool = False,
|
|
@@ -2541,8 +2529,8 @@ class BudgetOptimizer:
|
|
|
2541
2529
|
)
|
|
2542
2530
|
spend_grid[: len(spend_grid_m), i] = spend_grid_m
|
|
2543
2531
|
incremental_outcome_grid = np.full([n_grid_rows, n_grid_columns], np.nan)
|
|
2544
|
-
multipliers_grid_base =
|
|
2545
|
-
|
|
2532
|
+
multipliers_grid_base = backend.cast(
|
|
2533
|
+
backend.divide_no_nan(spend_grid, spend), dtype=backend.float32
|
|
2546
2534
|
)
|
|
2547
2535
|
multipliers_grid = np.where(
|
|
2548
2536
|
np.isnan(spend_grid), np.nan, multipliers_grid_base
|
|
@@ -2573,7 +2561,7 @@ class BudgetOptimizer:
|
|
|
2573
2561
|
rf_spend_max = np.nanmax(
|
|
2574
2562
|
spend_grid[:, -self._meridian.n_rf_channels :], axis=0
|
|
2575
2563
|
)
|
|
2576
|
-
rf_roi =
|
|
2564
|
+
rf_roi = backend.divide_no_nan(rf_incremental_outcome_max, rf_spend_max)
|
|
2577
2565
|
incremental_outcome_grid[:, -self._meridian.n_rf_channels :] = (
|
|
2578
2566
|
rf_roi * spend_grid[:, -self._meridian.n_rf_channels :]
|
|
2579
2567
|
)
|
|
@@ -2581,14 +2569,14 @@ class BudgetOptimizer:
|
|
|
2581
2569
|
|
|
2582
2570
|
def _validate_optimization_tensors(
|
|
2583
2571
|
self,
|
|
2584
|
-
cpmu:
|
|
2585
|
-
cprf:
|
|
2586
|
-
media:
|
|
2587
|
-
rf_impressions:
|
|
2588
|
-
frequency:
|
|
2589
|
-
media_spend:
|
|
2590
|
-
rf_spend:
|
|
2591
|
-
revenue_per_kpi:
|
|
2572
|
+
cpmu: backend.Tensor | None = None,
|
|
2573
|
+
cprf: backend.Tensor | None = None,
|
|
2574
|
+
media: backend.Tensor | None = None,
|
|
2575
|
+
rf_impressions: backend.Tensor | None = None,
|
|
2576
|
+
frequency: backend.Tensor | None = None,
|
|
2577
|
+
media_spend: backend.Tensor | None = None,
|
|
2578
|
+
rf_spend: backend.Tensor | None = None,
|
|
2579
|
+
revenue_per_kpi: backend.Tensor | None = None,
|
|
2592
2580
|
use_optimal_frequency: bool = True,
|
|
2593
2581
|
):
|
|
2594
2582
|
"""Validates the tensors needed for optimization."""
|
|
@@ -2642,7 +2630,7 @@ class BudgetOptimizer:
|
|
|
2642
2630
|
)
|
|
2643
2631
|
|
|
2644
2632
|
def _allocate_tensor_by_population(
|
|
2645
|
-
self, tensor:
|
|
2633
|
+
self, tensor: backend.Tensor, required_ndim: int = 3
|
|
2646
2634
|
):
|
|
2647
2635
|
"""Allocates a tensor of shape (time,) or (time, channel) by the population.
|
|
2648
2636
|
|
|
@@ -2664,16 +2652,83 @@ class BudgetOptimizer:
|
|
|
2664
2652
|
)
|
|
2665
2653
|
|
|
2666
2654
|
population = self._meridian.population
|
|
2667
|
-
normalized_population = population /
|
|
2655
|
+
normalized_population = population / backend.reduce_sum(population)
|
|
2668
2656
|
if tensor.ndim == 1:
|
|
2669
|
-
reshaped_population = normalized_population[:,
|
|
2670
|
-
reshaped_tensor = tensor[
|
|
2657
|
+
reshaped_population = normalized_population[:, backend.newaxis]
|
|
2658
|
+
reshaped_tensor = tensor[backend.newaxis, :]
|
|
2671
2659
|
else:
|
|
2672
|
-
reshaped_population = normalized_population[
|
|
2673
|
-
|
|
2660
|
+
reshaped_population = normalized_population[
|
|
2661
|
+
:, backend.newaxis, backend.newaxis
|
|
2662
|
+
]
|
|
2663
|
+
reshaped_tensor = tensor[backend.newaxis, :, :]
|
|
2674
2664
|
return reshaped_tensor * reshaped_population
|
|
2675
2665
|
|
|
2676
2666
|
|
|
2667
|
+
def get_optimization_bounds(
|
|
2668
|
+
n_channels: int,
|
|
2669
|
+
spend: np.ndarray,
|
|
2670
|
+
round_factor: int,
|
|
2671
|
+
spend_constraint_lower: _SpendConstraint,
|
|
2672
|
+
spend_constraint_upper: _SpendConstraint,
|
|
2673
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
2674
|
+
"""Get optimization bounds from spend and spend constraints.
|
|
2675
|
+
|
|
2676
|
+
Args:
|
|
2677
|
+
n_channels: Integer number of total channels.
|
|
2678
|
+
spend: np.ndarray with size `n_total_channels` containing media-level spend
|
|
2679
|
+
for all media and RF channels.
|
|
2680
|
+
round_factor: Integer number of digits to round optimization bounds.
|
|
2681
|
+
spend_constraint_lower: Numeric list of size `n_total_channels` or float
|
|
2682
|
+
(same constraint for all media) indicating the lower bound of media-level
|
|
2683
|
+
spend. The lower bound of media-level spend is `(1 -
|
|
2684
|
+
spend_constraint_lower) * budget * allocation)`. The value must be between
|
|
2685
|
+
0-1.
|
|
2686
|
+
spend_constraint_upper: Numeric list of size `n_total_channels` or float
|
|
2687
|
+
(same constraint for all media) indicating the upper bound of media-level
|
|
2688
|
+
spend. The upper bound of media-level spend is `(1 +
|
|
2689
|
+
spend_constraint_upper) * budget * allocation)`.
|
|
2690
|
+
|
|
2691
|
+
Returns:
|
|
2692
|
+
lower_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
2693
|
+
lower bound spend for each media and RF channel.
|
|
2694
|
+
upper_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
2695
|
+
upper bound spend for each media and RF channel.
|
|
2696
|
+
"""
|
|
2697
|
+
spend_bounds = _get_spend_bounds(
|
|
2698
|
+
n_channels=n_channels,
|
|
2699
|
+
spend_constraint_lower=spend_constraint_lower,
|
|
2700
|
+
spend_constraint_upper=spend_constraint_upper,
|
|
2701
|
+
)
|
|
2702
|
+
rounded_spend = np.round(spend, round_factor).astype(int)
|
|
2703
|
+
lower = np.round((spend_bounds[0] * rounded_spend), round_factor).astype(int)
|
|
2704
|
+
upper = np.round(spend_bounds[1] * rounded_spend, round_factor).astype(int)
|
|
2705
|
+
return (lower, upper)
|
|
2706
|
+
|
|
2707
|
+
|
|
2708
|
+
def get_round_factor(budget: float, gtol: float) -> int:
|
|
2709
|
+
"""Gets the number of integer digits to round off of budget.
|
|
2710
|
+
|
|
2711
|
+
Args:
|
|
2712
|
+
budget: Float number for total advertising budget.
|
|
2713
|
+
gtol: Float indicating the acceptable relative error for the budget used in
|
|
2714
|
+
the grid setup. The budget will be rounded by `10*n`, where `n` is the
|
|
2715
|
+
smallest int such that `(budget - rounded_budget) <= (budget * gtol)`.
|
|
2716
|
+
`gtol` must be less than 1.
|
|
2717
|
+
|
|
2718
|
+
Returns:
|
|
2719
|
+
Integer number of digits to round budget to.
|
|
2720
|
+
"""
|
|
2721
|
+
tolerance = budget * gtol
|
|
2722
|
+
if gtol >= 1.0:
|
|
2723
|
+
raise ValueError('gtol must be less than one.')
|
|
2724
|
+
elif budget <= 0.0:
|
|
2725
|
+
raise ValueError('`budget` must be greater than zero.')
|
|
2726
|
+
elif tolerance < 1.0:
|
|
2727
|
+
return 0
|
|
2728
|
+
else:
|
|
2729
|
+
return -int(math.log10(tolerance)) - 1
|
|
2730
|
+
|
|
2731
|
+
|
|
2677
2732
|
def _validate_pct_of_spend(
|
|
2678
2733
|
n_channels: int,
|
|
2679
2734
|
hist_spend: np.ndarray,
|
|
@@ -2748,7 +2803,7 @@ def _get_spend_bounds(
|
|
|
2748
2803
|
|
|
2749
2804
|
Returns:
|
|
2750
2805
|
spend_bounds: tuple of np.ndarray of size `n_total_channels` containing
|
|
2751
|
-
|
|
2806
|
+
the untreated lower and upper bound spend for each media and RF channel.
|
|
2752
2807
|
"""
|
|
2753
2808
|
(spend_const_lower, spend_const_upper) = _validate_spend_constraints(
|
|
2754
2809
|
n_channels,
|
|
@@ -2762,47 +2817,6 @@ def _get_spend_bounds(
|
|
|
2762
2817
|
return spend_bounds
|
|
2763
2818
|
|
|
2764
2819
|
|
|
2765
|
-
def get_optimization_bounds(
|
|
2766
|
-
n_channels: int,
|
|
2767
|
-
spend: np.ndarray,
|
|
2768
|
-
round_factor: int,
|
|
2769
|
-
spend_constraint_lower: _SpendConstraint,
|
|
2770
|
-
spend_constraint_upper: _SpendConstraint,
|
|
2771
|
-
) -> tuple[np.ndarray, np.ndarray]:
|
|
2772
|
-
"""Get optimization bounds from spend and spend constraints.
|
|
2773
|
-
|
|
2774
|
-
Args:
|
|
2775
|
-
n_channels: Integer number of total channels.
|
|
2776
|
-
spend: np.ndarray with size `n_total_channels` containing media-level spend
|
|
2777
|
-
for all media and RF channels.
|
|
2778
|
-
round_factor: Integer number of digits to round optimization bounds.
|
|
2779
|
-
spend_constraint_lower: Numeric list of size `n_total_channels` or float
|
|
2780
|
-
(same constraint for all media) indicating the lower bound of media-level
|
|
2781
|
-
spend. The lower bound of media-level spend is `(1 -
|
|
2782
|
-
spend_constraint_lower) * budget * allocation)`. The value must be between
|
|
2783
|
-
0-1.
|
|
2784
|
-
spend_constraint_upper: Numeric list of size `n_total_channels` or float
|
|
2785
|
-
(same constraint for all media) indicating the upper bound of media-level
|
|
2786
|
-
spend. The upper bound of media-level spend is `(1 +
|
|
2787
|
-
spend_constraint_upper) * budget * allocation)`.
|
|
2788
|
-
|
|
2789
|
-
Returns:
|
|
2790
|
-
lower_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
2791
|
-
lower bound spend for each media and RF channel.
|
|
2792
|
-
upper_bound: np.ndarray of size `n_total_channels` containing the treated
|
|
2793
|
-
upper bound spend for each media and RF channel.
|
|
2794
|
-
"""
|
|
2795
|
-
spend_bounds = _get_spend_bounds(
|
|
2796
|
-
n_channels=n_channels,
|
|
2797
|
-
spend_constraint_lower=spend_constraint_lower,
|
|
2798
|
-
spend_constraint_upper=spend_constraint_upper,
|
|
2799
|
-
)
|
|
2800
|
-
rounded_spend = np.round(spend, round_factor).astype(int)
|
|
2801
|
-
lower = np.round((spend_bounds[0] * rounded_spend), round_factor).astype(int)
|
|
2802
|
-
upper = np.round(spend_bounds[1] * rounded_spend, round_factor).astype(int)
|
|
2803
|
-
return (lower, upper)
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
2820
|
def _validate_budget(
|
|
2807
2821
|
fixed_budget: bool,
|
|
2808
2822
|
budget: float | None,
|
|
@@ -2836,30 +2850,6 @@ def _validate_budget(
|
|
|
2836
2850
|
)
|
|
2837
2851
|
|
|
2838
2852
|
|
|
2839
|
-
def _get_round_factor(budget: float, gtol: float) -> int:
|
|
2840
|
-
"""Function for obtaining number of integer digits to round off of budget.
|
|
2841
|
-
|
|
2842
|
-
Args:
|
|
2843
|
-
budget: float total advertising budget.
|
|
2844
|
-
gtol: float indicating the acceptable relative error for the udget used in
|
|
2845
|
-
the grid setup. The budget will be rounded by 10*n, where n is the
|
|
2846
|
-
smallest int such that (budget - rounded_budget) is less than or equal to
|
|
2847
|
-
(budget * gtol). gtol must be less than 1.
|
|
2848
|
-
|
|
2849
|
-
Returns:
|
|
2850
|
-
int number of integer digits to round budget to.
|
|
2851
|
-
"""
|
|
2852
|
-
tolerance = budget * gtol
|
|
2853
|
-
if gtol >= 1.0:
|
|
2854
|
-
raise ValueError('gtol must be less than one.')
|
|
2855
|
-
elif budget <= 0.0:
|
|
2856
|
-
raise ValueError('`budget` must be greater than zero.')
|
|
2857
|
-
elif tolerance < 1.0:
|
|
2858
|
-
return 0
|
|
2859
|
-
else:
|
|
2860
|
-
return -int(math.log10(tolerance)) - 1
|
|
2861
|
-
|
|
2862
|
-
|
|
2863
2853
|
def _exceeds_optimization_constraints(
|
|
2864
2854
|
spend: np.ndarray,
|
|
2865
2855
|
incremental_outcome: np.ndarray,
|
|
@@ -2928,12 +2918,12 @@ def _raise_warning_if_target_constraints_not_met(
|
|
|
2928
2918
|
)
|
|
2929
2919
|
|
|
2930
2920
|
|
|
2931
|
-
def _expand_tensor(tensor:
|
|
2921
|
+
def _expand_tensor(tensor: backend.Tensor, required_shape: tuple[int, ...]):
|
|
2932
2922
|
"""Expands a tensor to the required number of dimensions."""
|
|
2933
2923
|
if tensor.shape == required_shape:
|
|
2934
2924
|
return tensor
|
|
2935
2925
|
if tensor.ndim == 0:
|
|
2936
|
-
return
|
|
2926
|
+
return backend.fill(required_shape, tensor)
|
|
2937
2927
|
|
|
2938
2928
|
# Tensor must be less than or equal to the required number of dimensions and
|
|
2939
2929
|
# the shape must match the required shape excluding the difference in number
|
|
@@ -2943,10 +2933,72 @@ def _expand_tensor(tensor: tf.Tensor, required_shape: tuple[int, ...]):
|
|
|
2943
2933
|
):
|
|
2944
2934
|
n_tile_dims = len(required_shape) - tensor.ndim
|
|
2945
2935
|
repeats = list(required_shape[:n_tile_dims]) + [1] * tensor.ndim
|
|
2946
|
-
reshaped_tensor =
|
|
2947
|
-
|
|
2936
|
+
reshaped_tensor = backend.reshape(
|
|
2937
|
+
tensor, [1] * n_tile_dims + list(tensor.shape)
|
|
2938
|
+
)
|
|
2939
|
+
return backend.tile(reshaped_tensor, repeats)
|
|
2948
2940
|
|
|
2949
2941
|
raise ValueError(
|
|
2950
2942
|
f'Cannot expand tensor with shape {tensor.shape} to target'
|
|
2951
2943
|
f' {required_shape}.'
|
|
2952
2944
|
)
|
|
2945
|
+
|
|
2946
|
+
|
|
2947
|
+
def _expand_selected_times(
|
|
2948
|
+
meridian: model.Meridian,
|
|
2949
|
+
start_date: tc.Date,
|
|
2950
|
+
end_date: tc.Date,
|
|
2951
|
+
new_data: analyzer_module.DataTensors | None,
|
|
2952
|
+
return_flexible_str: bool = False,
|
|
2953
|
+
) -> Sequence[str] | Sequence[bool] | None:
|
|
2954
|
+
"""Creates selected_times from start_date and end_date.
|
|
2955
|
+
|
|
2956
|
+
This function creates `selected_times` argument based on `start_date`,
|
|
2957
|
+
`end_date` and `new_data`. If `new_data` is not used or used with unmodified
|
|
2958
|
+
times, dates are selected from `meridian.input_data.time`. In the flexible
|
|
2959
|
+
time scenario, when `new_data` is provided with modified times, dates are
|
|
2960
|
+
selected from `new_data.time`. In this case, `new_data.time` must be provided
|
|
2961
|
+
and the function returns a list of booleans.
|
|
2962
|
+
|
|
2963
|
+
Args:
|
|
2964
|
+
meridian: The `Meridian` object with original data.
|
|
2965
|
+
start_date: Start date of the selected time period.
|
|
2966
|
+
end_date: End date of the selected time period.
|
|
2967
|
+
new_data: The optional `DataTensors` object. If times are modified in
|
|
2968
|
+
`new_data`, then `new_data.time` must be provided.
|
|
2969
|
+
return_flexible_str: Whether to return a list of strings or a list of
|
|
2970
|
+
booleans in case time is modified in `new_data`.
|
|
2971
|
+
|
|
2972
|
+
Returns:
|
|
2973
|
+
If both `start_date` and `end_date` are `None`, returns `None`. If
|
|
2974
|
+
`new_data` is not used or used with unmodified times, returns a list of
|
|
2975
|
+
strings with selected dates. If `new_data` is used with modified times,
|
|
2976
|
+
returns a list of strings or a list of booleans depending on the
|
|
2977
|
+
`return_flexible_str` argument.
|
|
2978
|
+
"""
|
|
2979
|
+
if start_date is None and end_date is None:
|
|
2980
|
+
return None
|
|
2981
|
+
|
|
2982
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2983
|
+
if new_data.get_modified_times(meridian) is None:
|
|
2984
|
+
return meridian.expand_selected_time_dims(
|
|
2985
|
+
start_date=start_date,
|
|
2986
|
+
end_date=end_date,
|
|
2987
|
+
)
|
|
2988
|
+
else:
|
|
2989
|
+
assert new_data.time is not None
|
|
2990
|
+
new_times_str = np.asarray(new_data.time).astype(str).tolist()
|
|
2991
|
+
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
2992
|
+
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
2993
|
+
start_date=start_date,
|
|
2994
|
+
end_date=end_date,
|
|
2995
|
+
)
|
|
2996
|
+
if return_flexible_str:
|
|
2997
|
+
if expanded_dates is None:
|
|
2998
|
+
expanded_dates = time_coordinates.all_dates
|
|
2999
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
3000
|
+
return [x for x in new_times_str if x in expanded_str]
|
|
3001
|
+
# TODO: Remove once every method uses `new_data.time`.
|
|
3002
|
+
else:
|
|
3003
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
3004
|
+
return [x in expanded_str for x in new_times_str]
|