google-meridian 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/METADATA +10 -10
- google_meridian-1.3.0.dist-info/RECORD +62 -0
- meridian/analysis/__init__.py +2 -0
- meridian/analysis/analyzer.py +280 -142
- meridian/analysis/formatter.py +2 -2
- meridian/analysis/optimizer.py +353 -169
- meridian/analysis/review/__init__.py +20 -0
- meridian/analysis/review/checks.py +721 -0
- meridian/analysis/review/configs.py +110 -0
- meridian/analysis/review/constants.py +40 -0
- meridian/analysis/review/results.py +544 -0
- meridian/analysis/review/reviewer.py +186 -0
- meridian/analysis/summarizer.py +14 -12
- meridian/analysis/templates/chips.html.jinja +12 -0
- meridian/analysis/test_utils.py +27 -5
- meridian/analysis/visualizer.py +45 -50
- meridian/backend/__init__.py +698 -55
- meridian/backend/config.py +75 -16
- meridian/backend/test_utils.py +127 -1
- meridian/constants.py +52 -11
- meridian/data/input_data.py +7 -2
- meridian/data/test_utils.py +5 -3
- meridian/mlflow/autolog.py +2 -2
- meridian/model/__init__.py +1 -0
- meridian/model/adstock_hill.py +10 -9
- meridian/model/eda/__init__.py +3 -0
- meridian/model/eda/constants.py +21 -0
- meridian/model/eda/eda_engine.py +1580 -84
- meridian/model/eda/eda_outcome.py +200 -0
- meridian/model/eda/eda_spec.py +84 -0
- meridian/model/eda/meridian_eda.py +220 -0
- meridian/model/knots.py +56 -50
- meridian/model/media.py +10 -8
- meridian/model/model.py +79 -16
- meridian/model/model_test_data.py +53 -9
- meridian/model/posterior_sampler.py +398 -391
- meridian/model/prior_distribution.py +114 -39
- meridian/model/prior_sampler.py +146 -90
- meridian/model/spec.py +7 -8
- meridian/model/transformers.py +16 -8
- meridian/version.py +1 -1
- google_meridian-1.2.0.dist-info/RECORD +0 -52
- {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/WHEEL +0 -0
- {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.2.0.dist-info → google_meridian-1.3.0.dist-info}/top_level.txt +0 -0
meridian/analysis/optimizer.py
CHANGED
|
@@ -26,7 +26,7 @@ import altair as alt
|
|
|
26
26
|
import jinja2
|
|
27
27
|
from meridian import backend
|
|
28
28
|
from meridian import constants as c
|
|
29
|
-
from meridian.analysis import analyzer
|
|
29
|
+
from meridian.analysis import analyzer as analyzer_module
|
|
30
30
|
from meridian.analysis import formatter
|
|
31
31
|
from meridian.analysis import summary_text
|
|
32
32
|
from meridian.data import time_coordinates as tc
|
|
@@ -102,6 +102,7 @@ class OptimizationGrid:
|
|
|
102
102
|
use_kpi: Whether using generic KPI or revenue.
|
|
103
103
|
use_posterior: Whether posterior distributions were used, or prior.
|
|
104
104
|
use_optimal_frequency: Whether optimal frequency was used.
|
|
105
|
+
max_frequency: The maximum frequency for reach and frequency channels.
|
|
105
106
|
start_date: The start date of the optimization period.
|
|
106
107
|
end_date: The end date of the optimization period.
|
|
107
108
|
gtol: Float indicating the acceptable relative error for the budget used in
|
|
@@ -114,7 +115,12 @@ class OptimizationGrid:
|
|
|
114
115
|
does not contain reach and frequency data, or if the model does contain
|
|
115
116
|
reach and frequency data, but historical frequency is used for the
|
|
116
117
|
optimization scenario.
|
|
117
|
-
|
|
118
|
+
selected_geos: The geo coordinates from the model used in this grid.
|
|
119
|
+
selected_times: The time coordinates from the model used in this grid. If
|
|
120
|
+
new data with modified time coordinates is used for optimization, this is
|
|
121
|
+
a list of booleans indicating which time coordinates are selected.
|
|
122
|
+
Otherwise, this is a list of strings indicating the time coordinates used
|
|
123
|
+
in this grid.
|
|
118
124
|
"""
|
|
119
125
|
|
|
120
126
|
_grid_dataset: xr.Dataset
|
|
@@ -128,7 +134,9 @@ class OptimizationGrid:
|
|
|
128
134
|
gtol: float
|
|
129
135
|
round_factor: int
|
|
130
136
|
optimal_frequency: np.ndarray | None
|
|
131
|
-
|
|
137
|
+
selected_geos: Sequence[str] | None
|
|
138
|
+
selected_times: Sequence[str] | Sequence[bool] | None
|
|
139
|
+
max_frequency: float | None = None
|
|
132
140
|
|
|
133
141
|
@property
|
|
134
142
|
def grid_dataset(self) -> xr.Dataset:
|
|
@@ -262,7 +270,7 @@ class OptimizationGrid:
|
|
|
262
270
|
return xr.Dataset(
|
|
263
271
|
coords={c.CHANNEL: self.channels},
|
|
264
272
|
data_vars={
|
|
265
|
-
c.OPTIMIZED: ([c.CHANNEL], optimal_spend
|
|
273
|
+
c.OPTIMIZED: ([c.CHANNEL], optimal_spend),
|
|
266
274
|
c.NON_OPTIMIZED: ([c.CHANNEL], rounded_spend),
|
|
267
275
|
},
|
|
268
276
|
)
|
|
@@ -386,16 +394,26 @@ class OptimizationGrid:
|
|
|
386
394
|
media spend that maximizes incremental outcome based on spend constraints
|
|
387
395
|
for all media and RF channels.
|
|
388
396
|
"""
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
397
|
+
spend_grid_values = np.array(spend_grid.values, dtype=np.float64)
|
|
398
|
+
incremental_outcome_grid_values = np.array(
|
|
399
|
+
incremental_outcome_grid.values, dtype=np.float64
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
spend = spend_grid_values[0, :].copy()
|
|
403
|
+
incremental_outcome = incremental_outcome_grid_values[0, :].copy()
|
|
404
|
+
spend_grid_values = spend_grid_values[1:, :]
|
|
405
|
+
incremental_outcome_grid_values = incremental_outcome_grid_values[1:, :]
|
|
406
|
+
|
|
407
|
+
numerator = incremental_outcome_grid_values - incremental_outcome
|
|
408
|
+
denominator = spend_grid_values - spend
|
|
409
|
+
iterative_roi_grid = np.divide(
|
|
410
|
+
numerator,
|
|
411
|
+
denominator,
|
|
412
|
+
out=np.zeros_like(numerator),
|
|
413
|
+
where=(denominator != 0),
|
|
398
414
|
)
|
|
415
|
+
iterative_roi_grid = np.round(iterative_roi_grid, decimals=8)
|
|
416
|
+
|
|
399
417
|
while True:
|
|
400
418
|
spend_optimal = spend.astype(int)
|
|
401
419
|
# If none of the exit criteria are met roi_grid will eventually be filled
|
|
@@ -407,8 +425,8 @@ class OptimizationGrid:
|
|
|
407
425
|
)
|
|
408
426
|
row_idx = point[0]
|
|
409
427
|
media_idx = point[1]
|
|
410
|
-
spend[media_idx] =
|
|
411
|
-
incremental_outcome[media_idx] =
|
|
428
|
+
spend[media_idx] = spend_grid_values[row_idx, media_idx]
|
|
429
|
+
incremental_outcome[media_idx] = incremental_outcome_grid_values[
|
|
412
430
|
row_idx, media_idx
|
|
413
431
|
]
|
|
414
432
|
roi_grid_point = iterative_roi_grid[row_idx, media_idx]
|
|
@@ -421,14 +439,23 @@ class OptimizationGrid:
|
|
|
421
439
|
break
|
|
422
440
|
|
|
423
441
|
iterative_roi_grid[0 : row_idx + 1, media_idx] = np.nan
|
|
442
|
+
|
|
443
|
+
num_col = (
|
|
444
|
+
incremental_outcome_grid_values[row_idx + 1 :, media_idx]
|
|
445
|
+
- incremental_outcome_grid_values[row_idx, media_idx]
|
|
446
|
+
)
|
|
447
|
+
den_col = (
|
|
448
|
+
spend_grid_values[row_idx + 1 :, media_idx]
|
|
449
|
+
- spend_grid_values[row_idx, media_idx]
|
|
450
|
+
)
|
|
451
|
+
new_roi_col = np.divide(
|
|
452
|
+
num_col,
|
|
453
|
+
den_col,
|
|
454
|
+
out=np.zeros_like(num_col),
|
|
455
|
+
where=(den_col != 0),
|
|
456
|
+
)
|
|
424
457
|
iterative_roi_grid[row_idx + 1 :, media_idx] = np.round(
|
|
425
|
-
|
|
426
|
-
incremental_outcome_grid[row_idx + 1 :, media_idx]
|
|
427
|
-
- incremental_outcome_grid[row_idx, media_idx],
|
|
428
|
-
spend_grid[row_idx + 1 :, media_idx]
|
|
429
|
-
- spend_grid[row_idx, media_idx],
|
|
430
|
-
),
|
|
431
|
-
decimals=8,
|
|
458
|
+
new_roi_col, decimals=8
|
|
432
459
|
)
|
|
433
460
|
return spend_optimal
|
|
434
461
|
|
|
@@ -438,40 +465,33 @@ class OptimizationResults:
|
|
|
438
465
|
"""The optimized budget allocation.
|
|
439
466
|
|
|
440
467
|
This is a dataclass object containing datasets output from `BudgetOptimizer`.
|
|
441
|
-
These datasets include:
|
|
442
|
-
|
|
443
|
-
- `nonoptimized_data`: The non-optimized budget metrics (based on historical
|
|
444
|
-
frequency).
|
|
445
|
-
- `nonoptimized_data_with_optimal_freq`: The non-optimized budget metrics
|
|
446
|
-
based on optimal frequency.
|
|
447
|
-
- `optimized_data`: The optimized budget metrics.
|
|
448
|
-
- `optimization_grid`: The grid information used for optimization.
|
|
449
|
-
|
|
450
|
-
The metrics (data variables) are: ROI, mROI, incremental outcome, CPIK.
|
|
451
468
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
debugging.
|
|
469
|
+
The performance metrics (data variables) are: spend, percentage of spend, ROI,
|
|
470
|
+
mROI, incremental outcome, CPIK, and effectiveness.
|
|
455
471
|
|
|
456
472
|
Attributes:
|
|
457
473
|
meridian: The fitted Meridian model that was used to create this budget
|
|
458
474
|
allocation.
|
|
459
475
|
analyzer: The analyzer bound to the model above.
|
|
460
|
-
spend_ratio: The spend ratio used to scale the non-optimized
|
|
461
|
-
to the optimized
|
|
462
|
-
spend_bounds: The spend bounds used to scale the non-optimized
|
|
463
|
-
metrics to the optimized
|
|
464
|
-
nonoptimized_data:
|
|
465
|
-
frequency
|
|
466
|
-
nonoptimized_data_with_optimal_freq:
|
|
467
|
-
|
|
468
|
-
|
|
476
|
+
spend_ratio: The spend ratio used to scale the non-optimized performance
|
|
477
|
+
metrics to the optimized performance metrics.
|
|
478
|
+
spend_bounds: The spend bounds used to scale the non-optimized performance
|
|
479
|
+
metrics to the optimized performance metrics.
|
|
480
|
+
nonoptimized_data: Performance metrics under the non-optimized budget. For
|
|
481
|
+
R&F channels, the non-optimized frequency is used.
|
|
482
|
+
nonoptimized_data_with_optimal_freq: Performance metrics under the
|
|
483
|
+
non-optimized budget. For R&F channels, the optimal frequency is used if
|
|
484
|
+
frequency was optimized.
|
|
485
|
+
optimized_data: Performance metrics under the optimized budget. For R&F
|
|
486
|
+
channels, the optimal frequency is used if frequency was optimized.
|
|
469
487
|
optimization_grid: The grid information used for optimization.
|
|
488
|
+
new_data: The optional `DataTensors` container that was used to create this
|
|
489
|
+
budget allocation.
|
|
470
490
|
"""
|
|
471
491
|
|
|
472
492
|
meridian: model.Meridian
|
|
473
493
|
# The analyzer bound to the model above.
|
|
474
|
-
analyzer:
|
|
494
|
+
analyzer: analyzer_module.Analyzer
|
|
475
495
|
spend_ratio: np.ndarray # spend / historical spend
|
|
476
496
|
spend_bounds: tuple[np.ndarray, np.ndarray]
|
|
477
497
|
|
|
@@ -481,6 +501,10 @@ class OptimizationResults:
|
|
|
481
501
|
_optimized_data: xr.Dataset
|
|
482
502
|
_optimization_grid: OptimizationGrid
|
|
483
503
|
|
|
504
|
+
# The optional `DataTensors` container to use if optimization was performed
|
|
505
|
+
# on data different from the original `input_data`.
|
|
506
|
+
new_data: analyzer_module.DataTensors | None = None
|
|
507
|
+
|
|
484
508
|
# TODO: Move this, and the plotting methods, to a summarizer.
|
|
485
509
|
@functools.cached_property
|
|
486
510
|
def template_env(self) -> jinja2.Environment:
|
|
@@ -497,10 +521,10 @@ class OptimizationResults:
|
|
|
497
521
|
|
|
498
522
|
@property
|
|
499
523
|
def nonoptimized_data(self) -> xr.Dataset:
|
|
500
|
-
"""Dataset holding the non-optimized
|
|
524
|
+
"""Dataset holding the non-optimized performance metrics.
|
|
501
525
|
|
|
502
526
|
For channels that have reach and frequency data, their performance metrics
|
|
503
|
-
|
|
527
|
+
are based on historical frequency.
|
|
504
528
|
|
|
505
529
|
The dataset contains the following:
|
|
506
530
|
|
|
@@ -519,10 +543,10 @@ class OptimizationResults:
|
|
|
519
543
|
|
|
520
544
|
@property
|
|
521
545
|
def nonoptimized_data_with_optimal_freq(self) -> xr.Dataset:
|
|
522
|
-
"""Dataset holding the non-optimized
|
|
546
|
+
"""Dataset holding the non-optimized performance metrics.
|
|
523
547
|
|
|
524
548
|
For channels that have reach and frequency data, their performance metrics
|
|
525
|
-
|
|
549
|
+
are based on optimal frequency.
|
|
526
550
|
|
|
527
551
|
The dataset contains the following:
|
|
528
552
|
|
|
@@ -537,10 +561,10 @@ class OptimizationResults:
|
|
|
537
561
|
|
|
538
562
|
@property
|
|
539
563
|
def optimized_data(self) -> xr.Dataset:
|
|
540
|
-
"""Dataset holding the optimized
|
|
564
|
+
"""Dataset holding the optimized performance metrics.
|
|
541
565
|
|
|
542
566
|
For channels that have reach and frequency data, their performance metrics
|
|
543
|
-
|
|
567
|
+
are based on optimal frequency.
|
|
544
568
|
|
|
545
569
|
The dataset contains the following:
|
|
546
570
|
|
|
@@ -558,11 +582,16 @@ class OptimizationResults:
|
|
|
558
582
|
"""The grid information used for optimization."""
|
|
559
583
|
return self._optimization_grid
|
|
560
584
|
|
|
561
|
-
def output_optimization_summary(
|
|
585
|
+
def output_optimization_summary(
|
|
586
|
+
self,
|
|
587
|
+
filename: str,
|
|
588
|
+
filepath: str,
|
|
589
|
+
currency: str = c.DEFAULT_CURRENCY,
|
|
590
|
+
):
|
|
562
591
|
"""Generates and saves the HTML optimization summary output."""
|
|
563
592
|
os.makedirs(filepath, exist_ok=True)
|
|
564
593
|
with open(os.path.join(filepath, filename), 'w') as f:
|
|
565
|
-
f.write(self._gen_optimization_summary())
|
|
594
|
+
f.write(self._gen_optimization_summary(currency))
|
|
566
595
|
|
|
567
596
|
def plot_incremental_outcome_delta(self) -> alt.Chart:
|
|
568
597
|
"""Plots a waterfall chart showing the change in incremental outcome."""
|
|
@@ -712,7 +741,7 @@ class OptimizationResults:
|
|
|
712
741
|
)
|
|
713
742
|
)
|
|
714
743
|
|
|
715
|
-
def plot_spend_delta(self) -> alt.Chart:
|
|
744
|
+
def plot_spend_delta(self, currency: str = c.DEFAULT_CURRENCY) -> alt.Chart:
|
|
716
745
|
"""Plots a bar chart showing the optimized change in spend per channel."""
|
|
717
746
|
df = self._get_delta_data(c.SPEND)
|
|
718
747
|
base = (
|
|
@@ -733,7 +762,7 @@ class OptimizationResults:
|
|
|
733
762
|
y=alt.Y(
|
|
734
763
|
f'{c.SPEND}:Q',
|
|
735
764
|
axis=alt.Axis(
|
|
736
|
-
title=
|
|
765
|
+
title=currency,
|
|
737
766
|
domain=False,
|
|
738
767
|
labelExpr=formatter.compact_number_expr(),
|
|
739
768
|
**formatter.AXIS_CONFIG,
|
|
@@ -894,9 +923,12 @@ class OptimizationResults:
|
|
|
894
923
|
returned this result.
|
|
895
924
|
"""
|
|
896
925
|
channels = self.optimized_data.channel.values
|
|
897
|
-
selected_times =
|
|
926
|
+
selected_times = _expand_selected_times(
|
|
927
|
+
meridian=self.meridian,
|
|
898
928
|
start_date=self.optimized_data.start_date,
|
|
899
929
|
end_date=self.optimized_data.end_date,
|
|
930
|
+
new_data=self.new_data,
|
|
931
|
+
return_flexible_str=True,
|
|
900
932
|
)
|
|
901
933
|
_, ubounds = self.spend_bounds
|
|
902
934
|
upper_bound = (
|
|
@@ -912,8 +944,10 @@ class OptimizationResults:
|
|
|
912
944
|
# WARN: If `selected_times` is not None (i.e. a subset time range), this
|
|
913
945
|
# response curve computation might take a significant amount of time.
|
|
914
946
|
return self.analyzer.response_curves(
|
|
947
|
+
new_data=self.new_data,
|
|
915
948
|
spend_multipliers=spend_multiplier,
|
|
916
949
|
use_posterior=self.optimization_grid.use_posterior,
|
|
950
|
+
selected_geos=self.optimization_grid.selected_geos,
|
|
917
951
|
selected_times=selected_times,
|
|
918
952
|
by_reach=True,
|
|
919
953
|
use_kpi=not self.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
|
|
@@ -1024,7 +1058,7 @@ class OptimizationResults:
|
|
|
1024
1058
|
sorted_df.sort_index(inplace=True)
|
|
1025
1059
|
return sorted_df
|
|
1026
1060
|
|
|
1027
|
-
def _gen_optimization_summary(self) -> str:
|
|
1061
|
+
def _gen_optimization_summary(self, currency: str) -> str:
|
|
1028
1062
|
"""Generates HTML optimization summary output (as sanitized content str)."""
|
|
1029
1063
|
start_date = tc.normalize_date(self.optimized_data.start_date)
|
|
1030
1064
|
self.template_env.globals[c.START_DATE] = start_date.strftime(
|
|
@@ -1036,22 +1070,25 @@ class OptimizationResults:
|
|
|
1036
1070
|
self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
|
|
1037
1071
|
f'%b {end_date_adjusted.day}, %Y'
|
|
1038
1072
|
)
|
|
1073
|
+
self.template_env.globals[c.SELECTED_GEOS] = (
|
|
1074
|
+
self.optimization_grid.selected_geos
|
|
1075
|
+
)
|
|
1039
1076
|
|
|
1040
1077
|
html_template = self.template_env.get_template('summary.html.jinja')
|
|
1041
1078
|
return html_template.render(
|
|
1042
1079
|
title=summary_text.OPTIMIZATION_TITLE,
|
|
1043
|
-
cards=self._create_output_sections(),
|
|
1080
|
+
cards=self._create_output_sections(currency),
|
|
1044
1081
|
)
|
|
1045
1082
|
|
|
1046
|
-
def _create_output_sections(self) -> Sequence[str]:
|
|
1083
|
+
def _create_output_sections(self, currency: str) -> Sequence[str]:
|
|
1047
1084
|
"""Creates the HTML snippets for cards in the summary page."""
|
|
1048
1085
|
return [
|
|
1049
|
-
self._create_scenario_plan_section(),
|
|
1050
|
-
self._create_budget_allocation_section(),
|
|
1086
|
+
self._create_scenario_plan_section(currency),
|
|
1087
|
+
self._create_budget_allocation_section(currency),
|
|
1051
1088
|
self._create_response_curves_section(),
|
|
1052
1089
|
]
|
|
1053
1090
|
|
|
1054
|
-
def _create_scenario_plan_section(self) -> str:
|
|
1091
|
+
def _create_scenario_plan_section(self, currency: str) -> str:
|
|
1055
1092
|
"""Creates the HTML card snippet for the scenario plan section."""
|
|
1056
1093
|
card_spec = formatter.CardSpec(
|
|
1057
1094
|
id=summary_text.SCENARIO_PLAN_CARD_ID,
|
|
@@ -1094,22 +1131,32 @@ class OptimizationResults:
|
|
|
1094
1131
|
self.template_env,
|
|
1095
1132
|
card_spec,
|
|
1096
1133
|
insights,
|
|
1097
|
-
stats_specs=self._create_scenario_stats_specs(),
|
|
1134
|
+
stats_specs=self._create_scenario_stats_specs(currency),
|
|
1098
1135
|
)
|
|
1099
1136
|
|
|
1100
|
-
def _create_scenario_stats_specs(
|
|
1137
|
+
def _create_scenario_stats_specs(
|
|
1138
|
+
self, currency: str
|
|
1139
|
+
) -> Sequence[formatter.StatsSpec]:
|
|
1101
1140
|
"""Creates the stats to fill the scenario plan section."""
|
|
1102
1141
|
outcome = self._kpi_or_revenue
|
|
1103
1142
|
budget_diff = self.optimized_data.budget - self.nonoptimized_data.budget
|
|
1104
1143
|
budget_prefix = '+' if budget_diff > 0 else ''
|
|
1105
1144
|
non_optimized_budget = formatter.StatsSpec(
|
|
1106
1145
|
title=summary_text.NON_OPTIMIZED_BUDGET_LABEL,
|
|
1107
|
-
stat=formatter.format_monetary_num(
|
|
1146
|
+
stat=formatter.format_monetary_num(
|
|
1147
|
+
num=self.nonoptimized_data.budget,
|
|
1148
|
+
currency=currency,
|
|
1149
|
+
),
|
|
1108
1150
|
)
|
|
1109
1151
|
optimized_budget = formatter.StatsSpec(
|
|
1110
1152
|
title=summary_text.OPTIMIZED_BUDGET_LABEL,
|
|
1111
|
-
stat=formatter.format_monetary_num(
|
|
1112
|
-
|
|
1153
|
+
stat=formatter.format_monetary_num(
|
|
1154
|
+
num=self.optimized_data.budget, currency=currency
|
|
1155
|
+
),
|
|
1156
|
+
delta=(
|
|
1157
|
+
budget_prefix
|
|
1158
|
+
+ formatter.format_monetary_num(num=budget_diff, currency=currency)
|
|
1159
|
+
),
|
|
1113
1160
|
)
|
|
1114
1161
|
|
|
1115
1162
|
if outcome == c.REVENUE:
|
|
@@ -1131,7 +1178,7 @@ class OptimizationResults:
|
|
|
1131
1178
|
)
|
|
1132
1179
|
optimized_performance_title = summary_text.OPTIMIZED_CPIK_LABEL
|
|
1133
1180
|
optimized_performance_stat = f'${self.optimized_data.total_cpik:.2f}'
|
|
1134
|
-
optimized_performance_diff = formatter.compact_number(diff, 2,
|
|
1181
|
+
optimized_performance_diff = formatter.compact_number(diff, 2, currency)
|
|
1135
1182
|
non_optimized_performance = formatter.StatsSpec(
|
|
1136
1183
|
title=non_optimized_performance_title,
|
|
1137
1184
|
stat=non_optimized_performance_stat,
|
|
@@ -1147,7 +1194,7 @@ class OptimizationResults:
|
|
|
1147
1194
|
- self.nonoptimized_data.total_incremental_outcome
|
|
1148
1195
|
)
|
|
1149
1196
|
inc_outcome_prefix = '+' if inc_outcome_diff > 0 else ''
|
|
1150
|
-
currency =
|
|
1197
|
+
currency = currency if outcome == c.REVENUE else ''
|
|
1151
1198
|
non_optimized_inc_outcome = formatter.StatsSpec(
|
|
1152
1199
|
title=summary_text.NON_OPTIMIZED_INC_OUTCOME_LABEL.format(
|
|
1153
1200
|
outcome=outcome
|
|
@@ -1177,7 +1224,7 @@ class OptimizationResults:
|
|
|
1177
1224
|
optimized_inc_outcome,
|
|
1178
1225
|
]
|
|
1179
1226
|
|
|
1180
|
-
def _create_budget_allocation_section(self) -> str:
|
|
1227
|
+
def _create_budget_allocation_section(self, currency: str) -> str:
|
|
1181
1228
|
"""Creates the HTML card snippet for the budget allocation section."""
|
|
1182
1229
|
outcome = self._kpi_or_revenue
|
|
1183
1230
|
card_spec = formatter.CardSpec(
|
|
@@ -1187,7 +1234,7 @@ class OptimizationResults:
|
|
|
1187
1234
|
spend_delta = formatter.ChartSpec(
|
|
1188
1235
|
id=summary_text.SPEND_DELTA_CHART_ID,
|
|
1189
1236
|
description=summary_text.SPEND_DELTA_CHART_INSIGHTS,
|
|
1190
|
-
chart_json=self.plot_spend_delta().to_json(),
|
|
1237
|
+
chart_json=self.plot_spend_delta(currency).to_json(),
|
|
1191
1238
|
)
|
|
1192
1239
|
spend_allocation = formatter.ChartSpec(
|
|
1193
1240
|
id=summary_text.SPEND_ALLOCATION_CHART_ID,
|
|
@@ -1276,7 +1323,7 @@ class BudgetOptimizer:
|
|
|
1276
1323
|
|
|
1277
1324
|
def __init__(self, meridian: model.Meridian):
|
|
1278
1325
|
self._meridian = meridian
|
|
1279
|
-
self._analyzer =
|
|
1326
|
+
self._analyzer = analyzer_module.Analyzer(self._meridian)
|
|
1280
1327
|
|
|
1281
1328
|
def _validate_model_fit(self, use_posterior: bool):
|
|
1282
1329
|
"""Validates that the model is fit."""
|
|
@@ -1288,8 +1335,9 @@ class BudgetOptimizer:
|
|
|
1288
1335
|
|
|
1289
1336
|
def optimize(
|
|
1290
1337
|
self,
|
|
1291
|
-
new_data:
|
|
1338
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
1292
1339
|
use_posterior: bool = True,
|
|
1340
|
+
selected_geos: Sequence[str] | None = None,
|
|
1293
1341
|
# TODO: b/409550413 - Remove this argument.
|
|
1294
1342
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1295
1343
|
start_date: tc.Date = None,
|
|
@@ -1302,7 +1350,10 @@ class BudgetOptimizer:
|
|
|
1302
1350
|
target_roi: float | None = None,
|
|
1303
1351
|
target_mroi: float | None = None,
|
|
1304
1352
|
gtol: float = 0.0001,
|
|
1353
|
+
# TODO:
|
|
1354
|
+
# merging use_optimal_frequency and max_frequency into a single argument.
|
|
1305
1355
|
use_optimal_frequency: bool = True,
|
|
1356
|
+
max_frequency: float | None = None,
|
|
1306
1357
|
use_kpi: bool = False,
|
|
1307
1358
|
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
|
|
1308
1359
|
batch_size: int = c.DEFAULT_BATCH_SIZE,
|
|
@@ -1378,6 +1429,9 @@ class BudgetOptimizer:
|
|
|
1378
1429
|
use_posterior: Boolean. If `True`, then the budget is optimized based on
|
|
1379
1430
|
the posterior distribution of the model. Otherwise, the prior
|
|
1380
1431
|
distribution is used.
|
|
1432
|
+
selected_geos: Optional list containing a subset of geos to include. By
|
|
1433
|
+
default, all geos are included. The selected geos should match those in
|
|
1434
|
+
`InputData.geo`.
|
|
1381
1435
|
selected_times: Deprecated. Tuple containing the start and end time
|
|
1382
1436
|
dimension coordinates for the duration to run the optimization on.
|
|
1383
1437
|
Please Use `start_date` and `end_date` instead.
|
|
@@ -1434,6 +1488,10 @@ class BudgetOptimizer:
|
|
|
1434
1488
|
use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
|
|
1435
1489
|
trained Meridian model for optimization. If `False`, uses historical
|
|
1436
1490
|
frequency or `new_data.frequency` if provided.
|
|
1491
|
+
max_frequency: Float indicating the frequency upper bound for the optimal
|
|
1492
|
+
frequency search space. If `None` when `use_optimal_frequency` is
|
|
1493
|
+
`True`, the max frequency of the input data is used. If
|
|
1494
|
+
`use_optimal_frequency` is `False`, `max_frequency` is ignored.
|
|
1437
1495
|
use_kpi: If `True`, runs the optimization on KPI. Defaults to revenue.
|
|
1438
1496
|
confidence_level: The threshold for computing the confidence intervals.
|
|
1439
1497
|
batch_size: Maximum draws per chain in each batch. The calculation is run
|
|
@@ -1479,6 +1537,7 @@ class BudgetOptimizer:
|
|
|
1479
1537
|
use_grid_arg = optimization_grid is not None and self._validate_grid(
|
|
1480
1538
|
new_data=new_data,
|
|
1481
1539
|
use_posterior=use_posterior,
|
|
1540
|
+
selected_geos=selected_geos,
|
|
1482
1541
|
start_date=start_date,
|
|
1483
1542
|
end_date=end_date,
|
|
1484
1543
|
budget=budget,
|
|
@@ -1487,12 +1546,14 @@ class BudgetOptimizer:
|
|
|
1487
1546
|
spend_constraint_upper=spend_constraint_upper,
|
|
1488
1547
|
gtol=gtol,
|
|
1489
1548
|
use_optimal_frequency=use_optimal_frequency,
|
|
1549
|
+
max_frequency=max_frequency,
|
|
1490
1550
|
use_kpi=use_kpi,
|
|
1491
1551
|
optimization_grid=optimization_grid,
|
|
1492
1552
|
)
|
|
1493
1553
|
if optimization_grid is None or not use_grid_arg:
|
|
1494
1554
|
optimization_grid = self.create_optimization_grid(
|
|
1495
1555
|
new_data=new_data,
|
|
1556
|
+
selected_geos=selected_geos,
|
|
1496
1557
|
start_date=start_date,
|
|
1497
1558
|
end_date=end_date,
|
|
1498
1559
|
budget=budget,
|
|
@@ -1503,6 +1564,7 @@ class BudgetOptimizer:
|
|
|
1503
1564
|
use_posterior=use_posterior,
|
|
1504
1565
|
use_kpi=use_kpi,
|
|
1505
1566
|
use_optimal_frequency=use_optimal_frequency,
|
|
1567
|
+
max_frequency=max_frequency,
|
|
1506
1568
|
batch_size=batch_size,
|
|
1507
1569
|
)
|
|
1508
1570
|
|
|
@@ -1526,13 +1588,14 @@ class BudgetOptimizer:
|
|
|
1526
1588
|
use_historical_budget = budget is None or np.isclose(
|
|
1527
1589
|
budget, np.sum(optimization_grid.historical_spend)
|
|
1528
1590
|
)
|
|
1529
|
-
new_data = new_data or
|
|
1591
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
1530
1592
|
nonoptimized_data = self._create_budget_dataset(
|
|
1531
1593
|
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1532
1594
|
use_posterior=use_posterior,
|
|
1533
1595
|
use_kpi=use_kpi,
|
|
1534
1596
|
hist_spend=optimization_grid.historical_spend,
|
|
1535
1597
|
spend=spend.non_optimized,
|
|
1598
|
+
selected_geos=selected_geos,
|
|
1536
1599
|
start_date=start_date,
|
|
1537
1600
|
end_date=end_date,
|
|
1538
1601
|
confidence_level=confidence_level,
|
|
@@ -1545,6 +1608,7 @@ class BudgetOptimizer:
|
|
|
1545
1608
|
use_kpi=use_kpi,
|
|
1546
1609
|
hist_spend=optimization_grid.historical_spend,
|
|
1547
1610
|
spend=spend.non_optimized,
|
|
1611
|
+
selected_geos=selected_geos,
|
|
1548
1612
|
start_date=start_date,
|
|
1549
1613
|
end_date=end_date,
|
|
1550
1614
|
optimal_frequency=optimization_grid.optimal_frequency,
|
|
@@ -1565,6 +1629,7 @@ class BudgetOptimizer:
|
|
|
1565
1629
|
use_kpi=use_kpi,
|
|
1566
1630
|
hist_spend=optimization_grid.historical_spend,
|
|
1567
1631
|
spend=spend.optimized,
|
|
1632
|
+
selected_geos=selected_geos,
|
|
1568
1633
|
start_date=start_date,
|
|
1569
1634
|
end_date=end_date,
|
|
1570
1635
|
optimal_frequency=optimization_grid.optimal_frequency,
|
|
@@ -1595,6 +1660,7 @@ class BudgetOptimizer:
|
|
|
1595
1660
|
)
|
|
1596
1661
|
|
|
1597
1662
|
return OptimizationResults(
|
|
1663
|
+
new_data=new_data,
|
|
1598
1664
|
meridian=self._meridian,
|
|
1599
1665
|
analyzer=self._analyzer,
|
|
1600
1666
|
spend_ratio=spend_ratio,
|
|
@@ -1617,7 +1683,7 @@ class BudgetOptimizer:
|
|
|
1617
1683
|
rf_spend: backend.Tensor | None = None,
|
|
1618
1684
|
revenue_per_kpi: backend.Tensor | None = None,
|
|
1619
1685
|
use_optimal_frequency: bool = True,
|
|
1620
|
-
) ->
|
|
1686
|
+
) -> analyzer_module.DataTensors:
|
|
1621
1687
|
"""Creates a `DataTensors` for optimizations from CPM and flighting data.
|
|
1622
1688
|
|
|
1623
1689
|
CPM is broken down into cost per media unit, `cpmu`, for the media channels
|
|
@@ -1684,7 +1750,11 @@ class BudgetOptimizer:
|
|
|
1684
1750
|
A `DataTensors` object with optional tensors `media`, `reach`,
|
|
1685
1751
|
`frequency`, `media_spend`, `rf_spend`, `revenue_per_kpi`, and `time`.
|
|
1686
1752
|
"""
|
|
1753
|
+
n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
|
|
1754
|
+
n_geos = self._meridian.n_geos
|
|
1687
1755
|
self._validate_optimization_tensors(
|
|
1756
|
+
expected_n_geos=n_geos,
|
|
1757
|
+
expected_n_times=n_times,
|
|
1688
1758
|
cpmu=cpmu,
|
|
1689
1759
|
cprf=cprf,
|
|
1690
1760
|
media=media,
|
|
@@ -1695,13 +1765,6 @@ class BudgetOptimizer:
|
|
|
1695
1765
|
revenue_per_kpi=revenue_per_kpi,
|
|
1696
1766
|
use_optimal_frequency=use_optimal_frequency,
|
|
1697
1767
|
)
|
|
1698
|
-
n_times = time.shape[0] if isinstance(time, backend.Tensor) else len(time)
|
|
1699
|
-
n_geos = self._meridian.n_geos
|
|
1700
|
-
revenue_per_kpi = (
|
|
1701
|
-
_expand_tensor(revenue_per_kpi, (n_geos, n_times))
|
|
1702
|
-
if revenue_per_kpi is not None
|
|
1703
|
-
else None
|
|
1704
|
-
)
|
|
1705
1768
|
|
|
1706
1769
|
tensors = {}
|
|
1707
1770
|
if media is not None:
|
|
@@ -1737,14 +1800,17 @@ class BudgetOptimizer:
|
|
|
1737
1800
|
impressions, tensors[c.FREQUENCY]
|
|
1738
1801
|
)
|
|
1739
1802
|
if revenue_per_kpi is not None:
|
|
1740
|
-
tensors[c.REVENUE_PER_KPI] =
|
|
1803
|
+
tensors[c.REVENUE_PER_KPI] = _expand_tensor(
|
|
1804
|
+
revenue_per_kpi, (n_geos, n_times)
|
|
1805
|
+
)
|
|
1741
1806
|
tensors[c.TIME] = backend.to_tensor(time)
|
|
1742
|
-
return
|
|
1807
|
+
return analyzer_module.DataTensors(**tensors)
|
|
1743
1808
|
|
|
1744
1809
|
def _validate_grid(
|
|
1745
1810
|
self,
|
|
1746
|
-
new_data:
|
|
1811
|
+
new_data: analyzer_module.DataTensors | None,
|
|
1747
1812
|
use_posterior: bool,
|
|
1813
|
+
selected_geos: Sequence[str] | None,
|
|
1748
1814
|
start_date: tc.Date,
|
|
1749
1815
|
end_date: tc.Date,
|
|
1750
1816
|
budget: float | None,
|
|
@@ -1753,6 +1819,7 @@ class BudgetOptimizer:
|
|
|
1753
1819
|
spend_constraint_upper: _SpendConstraint,
|
|
1754
1820
|
gtol: float,
|
|
1755
1821
|
use_optimal_frequency: bool,
|
|
1822
|
+
max_frequency: float | None,
|
|
1756
1823
|
use_kpi: bool,
|
|
1757
1824
|
optimization_grid: OptimizationGrid,
|
|
1758
1825
|
) -> bool:
|
|
@@ -1785,6 +1852,15 @@ class BudgetOptimizer:
|
|
|
1785
1852
|
)
|
|
1786
1853
|
return False
|
|
1787
1854
|
|
|
1855
|
+
if max_frequency != optimization_grid.max_frequency:
|
|
1856
|
+
warnings.warn(
|
|
1857
|
+
'Given optimization grid was created with `use_optimal_frequency` ='
|
|
1858
|
+
f' {optimization_grid.max_frequency}, but optimization was'
|
|
1859
|
+
f' called with `max_frequency` = {max_frequency}. A'
|
|
1860
|
+
' new grid will be created.'
|
|
1861
|
+
)
|
|
1862
|
+
return False
|
|
1863
|
+
|
|
1788
1864
|
if (
|
|
1789
1865
|
start_date != optimization_grid.start_date
|
|
1790
1866
|
or end_date != optimization_grid.end_date
|
|
@@ -1799,7 +1875,7 @@ class BudgetOptimizer:
|
|
|
1799
1875
|
return False
|
|
1800
1876
|
|
|
1801
1877
|
if new_data is None:
|
|
1802
|
-
new_data =
|
|
1878
|
+
new_data = analyzer_module.DataTensors()
|
|
1803
1879
|
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1804
1880
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
1805
1881
|
required_tensors_names=required_tensors, meridian=self._meridian
|
|
@@ -1814,8 +1890,20 @@ class BudgetOptimizer:
|
|
|
1814
1890
|
)
|
|
1815
1891
|
return False
|
|
1816
1892
|
|
|
1893
|
+
s_geos = sorted(selected_geos or [])
|
|
1894
|
+
g_geos = sorted(optimization_grid.selected_geos or [])
|
|
1895
|
+
if s_geos != g_geos:
|
|
1896
|
+
warnings.warn(
|
|
1897
|
+
'Given optimization grid was created with `selected_geos` ='
|
|
1898
|
+
f' {optimization_grid.selected_geos}, but optimization request was'
|
|
1899
|
+
f' called with `selected_geos` = {selected_geos}. A new grid will be'
|
|
1900
|
+
' created.'
|
|
1901
|
+
)
|
|
1902
|
+
return False
|
|
1903
|
+
|
|
1817
1904
|
n_channels = len(optimization_grid.channels)
|
|
1818
|
-
selected_times =
|
|
1905
|
+
selected_times = _expand_selected_times(
|
|
1906
|
+
meridian=self._meridian,
|
|
1819
1907
|
start_date=start_date,
|
|
1820
1908
|
end_date=end_date,
|
|
1821
1909
|
new_data=new_data,
|
|
@@ -1870,6 +1958,7 @@ class BudgetOptimizer:
|
|
|
1870
1958
|
self,
|
|
1871
1959
|
new_data: xr.Dataset | None = None,
|
|
1872
1960
|
use_posterior: bool = True,
|
|
1961
|
+
selected_geos: Sequence[str] | None = None,
|
|
1873
1962
|
# TODO: b/409550413 - Remove this argument.
|
|
1874
1963
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
1875
1964
|
start_date: tc.Date = None,
|
|
@@ -1880,6 +1969,7 @@ class BudgetOptimizer:
|
|
|
1880
1969
|
spend_constraint_upper: _SpendConstraint = c.SPEND_CONSTRAINT_DEFAULT,
|
|
1881
1970
|
gtol: float = 0.0001,
|
|
1882
1971
|
use_optimal_frequency: bool = True,
|
|
1972
|
+
max_frequency: float | None = None,
|
|
1883
1973
|
use_kpi: bool = False,
|
|
1884
1974
|
batch_size: int = c.DEFAULT_BATCH_SIZE,
|
|
1885
1975
|
) -> OptimizationGrid:
|
|
@@ -1908,6 +1998,9 @@ class BudgetOptimizer:
|
|
|
1908
1998
|
use_posterior: Boolean. If `True`, then the incremental outcome is derived
|
|
1909
1999
|
from the posterior distribution of the model. Otherwise, the prior
|
|
1910
2000
|
distribution is used.
|
|
2001
|
+
selected_geos: Optional list containing a subset of geos to include. By
|
|
2002
|
+
default, all geos are included. The selected geos should match those in
|
|
2003
|
+
`InputData.geo`.
|
|
1911
2004
|
selected_times: Deprecated. Tuple containing the start and end time
|
|
1912
2005
|
dimension coordinates. Please Use `start_date` and `end_date` instead.
|
|
1913
2006
|
start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
|
|
@@ -1948,6 +2041,10 @@ class BudgetOptimizer:
|
|
|
1948
2041
|
the smallest integer such that `(budget - rounded_budget)` is less than
|
|
1949
2042
|
or equal to `(budget * gtol)`. `gtol` must be less than 1.
|
|
1950
2043
|
use_optimal_frequency: Boolean. Whether optimal frequency was used.
|
|
2044
|
+
max_frequency: Float indicating the frequency upper bound for the optimal
|
|
2045
|
+
frequency search space. If `None` when `use_optimal_frequency` is
|
|
2046
|
+
`True`, the max frequency of the input data is used. If
|
|
2047
|
+
`use_optimal_frequency` is `False`, `max_frequency` is ignored.
|
|
1951
2048
|
use_kpi: Boolean. If `True`, then the incremental outcome is derived from
|
|
1952
2049
|
the KPI impact. Otherwise, the incremental outcome is derived from the
|
|
1953
2050
|
revenue impact.
|
|
@@ -1961,8 +2058,9 @@ class BudgetOptimizer:
|
|
|
1961
2058
|
"""
|
|
1962
2059
|
self._validate_model_fit(use_posterior)
|
|
1963
2060
|
if new_data is None:
|
|
1964
|
-
new_data =
|
|
1965
|
-
|
|
2061
|
+
new_data = analyzer_module.DataTensors()
|
|
2062
|
+
if selected_geos is not None and not selected_geos:
|
|
2063
|
+
raise ValueError('`selected_geos` must not be empty.')
|
|
1966
2064
|
if selected_times is not None:
|
|
1967
2065
|
warnings.warn(
|
|
1968
2066
|
'`selected_times` is deprecated. Please use `start_date` and'
|
|
@@ -1978,13 +2076,15 @@ class BudgetOptimizer:
|
|
|
1978
2076
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
1979
2077
|
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1980
2078
|
)
|
|
1981
|
-
selected_times =
|
|
2079
|
+
selected_times = _expand_selected_times(
|
|
2080
|
+
meridian=self._meridian,
|
|
1982
2081
|
start_date=start_date,
|
|
1983
2082
|
end_date=end_date,
|
|
1984
2083
|
new_data=filled_data,
|
|
1985
2084
|
)
|
|
1986
2085
|
hist_spend = self._analyzer.get_aggregated_spend(
|
|
1987
2086
|
new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
|
|
2087
|
+
selected_geos=selected_geos,
|
|
1988
2088
|
selected_times=selected_times,
|
|
1989
2089
|
include_media=self._meridian.n_media_channels > 0,
|
|
1990
2090
|
include_rf=self._meridian.n_rf_channels > 0,
|
|
@@ -2008,7 +2108,7 @@ class BudgetOptimizer:
|
|
|
2008
2108
|
)
|
|
2009
2109
|
)
|
|
2010
2110
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
2011
|
-
opt_freq_data =
|
|
2111
|
+
opt_freq_data = analyzer_module.DataTensors(
|
|
2012
2112
|
rf_impressions=filled_data.reach * filled_data.frequency,
|
|
2013
2113
|
rf_spend=filled_data.rf_spend,
|
|
2014
2114
|
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
@@ -2017,8 +2117,10 @@ class BudgetOptimizer:
|
|
|
2017
2117
|
self._analyzer.optimal_freq(
|
|
2018
2118
|
new_data=opt_freq_data,
|
|
2019
2119
|
use_posterior=use_posterior,
|
|
2120
|
+
selected_geos=selected_geos,
|
|
2020
2121
|
selected_times=selected_times,
|
|
2021
2122
|
use_kpi=use_kpi,
|
|
2123
|
+
max_frequency=max_frequency,
|
|
2022
2124
|
).optimal_frequency,
|
|
2023
2125
|
dtype=backend.float32,
|
|
2024
2126
|
)
|
|
@@ -2031,6 +2133,7 @@ class BudgetOptimizer:
|
|
|
2031
2133
|
spend_bound_lower=optimization_lower_bound,
|
|
2032
2134
|
spend_bound_upper=optimization_upper_bound,
|
|
2033
2135
|
step_size=step_size,
|
|
2136
|
+
selected_geos=selected_geos,
|
|
2034
2137
|
selected_times=selected_times,
|
|
2035
2138
|
new_data=filled_data.filter_fields(c.PAID_DATA),
|
|
2036
2139
|
use_posterior=use_posterior,
|
|
@@ -2050,11 +2153,13 @@ class BudgetOptimizer:
|
|
|
2050
2153
|
use_kpi=use_kpi,
|
|
2051
2154
|
use_posterior=use_posterior,
|
|
2052
2155
|
use_optimal_frequency=use_optimal_frequency,
|
|
2156
|
+
max_frequency=max_frequency,
|
|
2053
2157
|
start_date=start_date,
|
|
2054
2158
|
end_date=end_date,
|
|
2055
2159
|
gtol=gtol,
|
|
2056
2160
|
round_factor=round_factor,
|
|
2057
2161
|
optimal_frequency=optimal_frequency,
|
|
2162
|
+
selected_geos=selected_geos,
|
|
2058
2163
|
selected_times=selected_times,
|
|
2059
2164
|
)
|
|
2060
2165
|
|
|
@@ -2098,38 +2203,11 @@ class BudgetOptimizer:
|
|
|
2098
2203
|
attrs={c.SPEND_STEP_SIZE: spend_step_size},
|
|
2099
2204
|
)
|
|
2100
2205
|
|
|
2101
|
-
def _validate_selected_times(
|
|
2102
|
-
self,
|
|
2103
|
-
start_date: tc.Date,
|
|
2104
|
-
end_date: tc.Date,
|
|
2105
|
-
new_data: analyzer.DataTensors | None,
|
|
2106
|
-
) -> Sequence[str] | Sequence[bool] | None:
|
|
2107
|
-
"""Validates and returns the selected times."""
|
|
2108
|
-
if start_date is None and end_date is None:
|
|
2109
|
-
return None
|
|
2110
|
-
|
|
2111
|
-
new_data = new_data or analyzer.DataTensors()
|
|
2112
|
-
if new_data.get_modified_times(self._meridian) is None:
|
|
2113
|
-
return self._meridian.expand_selected_time_dims(
|
|
2114
|
-
start_date=start_date,
|
|
2115
|
-
end_date=end_date,
|
|
2116
|
-
)
|
|
2117
|
-
else:
|
|
2118
|
-
assert new_data.time is not None
|
|
2119
|
-
new_times_str = np.asarray(new_data.time).astype(str).tolist()
|
|
2120
|
-
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
2121
|
-
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
2122
|
-
start_date=start_date,
|
|
2123
|
-
end_date=end_date,
|
|
2124
|
-
)
|
|
2125
|
-
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
2126
|
-
return [x in expanded_str for x in new_times_str]
|
|
2127
|
-
|
|
2128
2206
|
def _get_incremental_outcome_tensors(
|
|
2129
2207
|
self,
|
|
2130
2208
|
hist_spend: np.ndarray,
|
|
2131
2209
|
spend: np.ndarray,
|
|
2132
|
-
new_data:
|
|
2210
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2133
2211
|
optimal_frequency: Sequence[float] | None = None,
|
|
2134
2212
|
) -> tuple[
|
|
2135
2213
|
backend.Tensor | None,
|
|
@@ -2165,7 +2243,7 @@ class BudgetOptimizer:
|
|
|
2165
2243
|
Returns:
|
|
2166
2244
|
Tuple of backend.tensors (new_media, new_reach, new_frequency).
|
|
2167
2245
|
"""
|
|
2168
|
-
new_data = new_data or
|
|
2246
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2169
2247
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2170
2248
|
c.PAID_CHANNELS,
|
|
2171
2249
|
self._meridian,
|
|
@@ -2206,9 +2284,10 @@ class BudgetOptimizer:
|
|
|
2206
2284
|
self,
|
|
2207
2285
|
hist_spend: np.ndarray,
|
|
2208
2286
|
spend: np.ndarray,
|
|
2209
|
-
new_data:
|
|
2287
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2210
2288
|
use_posterior: bool = True,
|
|
2211
2289
|
use_kpi: bool = False,
|
|
2290
|
+
selected_geos: Sequence[str] | None = None,
|
|
2212
2291
|
start_date: tc.Date = None,
|
|
2213
2292
|
end_date: tc.Date = None,
|
|
2214
2293
|
optimal_frequency: Sequence[float] | None = None,
|
|
@@ -2218,13 +2297,16 @@ class BudgetOptimizer:
|
|
|
2218
2297
|
use_historical_budget: bool = True,
|
|
2219
2298
|
) -> xr.Dataset:
|
|
2220
2299
|
"""Creates the budget dataset."""
|
|
2221
|
-
new_data = new_data or
|
|
2300
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2222
2301
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2223
2302
|
c.PAID_DATA + (c.TIME,),
|
|
2224
2303
|
self._meridian,
|
|
2225
2304
|
)
|
|
2226
|
-
selected_times =
|
|
2227
|
-
|
|
2305
|
+
selected_times = _expand_selected_times(
|
|
2306
|
+
meridian=self._meridian,
|
|
2307
|
+
start_date=start_date,
|
|
2308
|
+
end_date=end_date,
|
|
2309
|
+
new_data=new_data,
|
|
2228
2310
|
)
|
|
2229
2311
|
spend_tensor = backend.to_tensor(spend, dtype=backend.float32)
|
|
2230
2312
|
hist_spend = backend.to_tensor(hist_spend, dtype=backend.float32)
|
|
@@ -2237,7 +2319,7 @@ class BudgetOptimizer:
|
|
|
2237
2319
|
)
|
|
2238
2320
|
)
|
|
2239
2321
|
budget = np.sum(spend_tensor)
|
|
2240
|
-
inc_outcome_data =
|
|
2322
|
+
inc_outcome_data = analyzer_module.DataTensors(
|
|
2241
2323
|
media=new_media,
|
|
2242
2324
|
reach=new_reach,
|
|
2243
2325
|
frequency=new_frequency,
|
|
@@ -2249,6 +2331,7 @@ class BudgetOptimizer:
|
|
|
2249
2331
|
incremental_outcome = self._analyzer.incremental_outcome(
|
|
2250
2332
|
use_posterior=use_posterior,
|
|
2251
2333
|
new_data=inc_outcome_data,
|
|
2334
|
+
selected_geos=selected_geos,
|
|
2252
2335
|
selected_times=selected_times,
|
|
2253
2336
|
use_kpi=use_kpi,
|
|
2254
2337
|
batch_size=batch_size,
|
|
@@ -2257,6 +2340,7 @@ class BudgetOptimizer:
|
|
|
2257
2340
|
incremental_increase = 0.01
|
|
2258
2341
|
mroi_numerator = self._analyzer.incremental_outcome(
|
|
2259
2342
|
new_data=inc_outcome_data,
|
|
2343
|
+
selected_geos=selected_geos,
|
|
2260
2344
|
selected_times=selected_times,
|
|
2261
2345
|
scaling_factor0=1.0,
|
|
2262
2346
|
scaling_factor1=1 + incremental_increase,
|
|
@@ -2269,7 +2353,7 @@ class BudgetOptimizer:
|
|
|
2269
2353
|
# shape (n_channels, n_metrics) where n_metrics = 4 for (mean, median,
|
|
2270
2354
|
# ci_lo, and ci_hi)
|
|
2271
2355
|
incremental_outcome_with_mean_median_and_ci = (
|
|
2272
|
-
|
|
2356
|
+
analyzer_module.get_central_tendency_and_ci(
|
|
2273
2357
|
data=incremental_outcome,
|
|
2274
2358
|
confidence_level=confidence_level,
|
|
2275
2359
|
include_median=True,
|
|
@@ -2281,18 +2365,18 @@ class BudgetOptimizer:
|
|
|
2281
2365
|
)
|
|
2282
2366
|
|
|
2283
2367
|
aggregated_impressions = self._analyzer.get_aggregated_impressions(
|
|
2284
|
-
new_data=
|
|
2368
|
+
new_data=analyzer_module.DataTensors(
|
|
2285
2369
|
media=new_media, reach=new_reach, frequency=new_frequency
|
|
2286
2370
|
),
|
|
2287
2371
|
selected_times=selected_times,
|
|
2288
|
-
selected_geos=
|
|
2372
|
+
selected_geos=selected_geos,
|
|
2289
2373
|
aggregate_times=True,
|
|
2290
2374
|
aggregate_geos=True,
|
|
2291
2375
|
optimal_frequency=optimal_frequency,
|
|
2292
2376
|
include_non_paid_channels=False,
|
|
2293
2377
|
)
|
|
2294
2378
|
effectiveness_with_mean_median_and_ci = (
|
|
2295
|
-
|
|
2379
|
+
analyzer_module.get_central_tendency_and_ci(
|
|
2296
2380
|
data=backend.divide_no_nan(
|
|
2297
2381
|
incremental_outcome, aggregated_impressions
|
|
2298
2382
|
),
|
|
@@ -2301,12 +2385,12 @@ class BudgetOptimizer:
|
|
|
2301
2385
|
)
|
|
2302
2386
|
)
|
|
2303
2387
|
|
|
2304
|
-
roi =
|
|
2388
|
+
roi = analyzer_module.get_central_tendency_and_ci(
|
|
2305
2389
|
data=backend.divide_no_nan(incremental_outcome, spend_tensor),
|
|
2306
2390
|
confidence_level=confidence_level,
|
|
2307
2391
|
include_median=True,
|
|
2308
2392
|
)
|
|
2309
|
-
marginal_roi =
|
|
2393
|
+
marginal_roi = analyzer_module.get_central_tendency_and_ci(
|
|
2310
2394
|
data=backend.divide_no_nan(
|
|
2311
2395
|
mroi_numerator, spend_tensor * incremental_increase
|
|
2312
2396
|
),
|
|
@@ -2314,7 +2398,7 @@ class BudgetOptimizer:
|
|
|
2314
2398
|
include_median=True,
|
|
2315
2399
|
)
|
|
2316
2400
|
|
|
2317
|
-
cpik =
|
|
2401
|
+
cpik = analyzer_module.get_central_tendency_and_ci(
|
|
2318
2402
|
data=backend.divide_no_nan(spend_tensor, incremental_outcome),
|
|
2319
2403
|
confidence_level=confidence_level,
|
|
2320
2404
|
include_median=True,
|
|
@@ -2328,19 +2412,27 @@ class BudgetOptimizer:
|
|
|
2328
2412
|
total_spend = np.sum(spend) if np.sum(spend) > 0 else 1
|
|
2329
2413
|
pct_of_spend = spend / total_spend
|
|
2330
2414
|
data_vars = {
|
|
2331
|
-
c.SPEND: ([c.CHANNEL], spend.data),
|
|
2332
|
-
c.PCT_OF_SPEND: (
|
|
2415
|
+
c.SPEND: ([c.CHANNEL], np.array(spend.data, dtype=np.float64)),
|
|
2416
|
+
c.PCT_OF_SPEND: (
|
|
2417
|
+
[c.CHANNEL],
|
|
2418
|
+
np.array(pct_of_spend.data, dtype=np.float64),
|
|
2419
|
+
),
|
|
2333
2420
|
c.INCREMENTAL_OUTCOME: (
|
|
2334
2421
|
[c.CHANNEL, c.METRIC],
|
|
2335
|
-
|
|
2422
|
+
np.array(
|
|
2423
|
+
incremental_outcome_with_mean_median_and_ci, dtype=np.float64
|
|
2424
|
+
),
|
|
2336
2425
|
),
|
|
2337
2426
|
c.EFFECTIVENESS: (
|
|
2338
2427
|
[c.CHANNEL, c.METRIC],
|
|
2339
|
-
effectiveness_with_mean_median_and_ci,
|
|
2428
|
+
np.array(effectiveness_with_mean_median_and_ci, dtype=np.float64),
|
|
2429
|
+
),
|
|
2430
|
+
c.ROI: ([c.CHANNEL, c.METRIC], np.array(roi, dtype=np.float64)),
|
|
2431
|
+
c.MROI: (
|
|
2432
|
+
[c.CHANNEL, c.METRIC],
|
|
2433
|
+
np.array(marginal_roi, dtype=np.float64),
|
|
2340
2434
|
),
|
|
2341
|
-
c.
|
|
2342
|
-
c.MROI: ([c.CHANNEL, c.METRIC], marginal_roi),
|
|
2343
|
-
c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
|
|
2435
|
+
c.CPIK: ([c.CHANNEL, c.METRIC], np.array(cpik, dtype=np.float64)),
|
|
2344
2436
|
}
|
|
2345
2437
|
|
|
2346
2438
|
all_times = np.asarray(filled_data.time).astype(str).tolist()
|
|
@@ -2374,7 +2466,8 @@ class BudgetOptimizer:
|
|
|
2374
2466
|
i: int,
|
|
2375
2467
|
incremental_outcome_grid: np.ndarray,
|
|
2376
2468
|
multipliers_grid: backend.Tensor,
|
|
2377
|
-
new_data:
|
|
2469
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2470
|
+
selected_geos: Sequence[str] | None = None,
|
|
2378
2471
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2379
2472
|
use_posterior: bool = True,
|
|
2380
2473
|
use_kpi: bool = False,
|
|
@@ -2396,6 +2489,9 @@ class BudgetOptimizer:
|
|
|
2396
2489
|
tensors is provided with a different number of time periods than in
|
|
2397
2490
|
`InputData`, then all tensors must be provided with the same number of
|
|
2398
2491
|
time periods.
|
|
2492
|
+
selected_geos: Optional list containing a subset of geos to include. By
|
|
2493
|
+
default, all geos are included. The selected geos should match those in
|
|
2494
|
+
`InputData.geo`.
|
|
2399
2495
|
selected_times: Optional list of times to optimize. This can either be a
|
|
2400
2496
|
string list containing a subset of time dimension coordinates from
|
|
2401
2497
|
`InputData.time` or a boolean list with length equal to the time
|
|
@@ -2416,7 +2512,7 @@ class BudgetOptimizer:
|
|
|
2416
2512
|
reducing `batch_size`. The calculation will generally be faster with
|
|
2417
2513
|
larger `batch_size` values.
|
|
2418
2514
|
"""
|
|
2419
|
-
new_data = new_data or
|
|
2515
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2420
2516
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2421
2517
|
c.PAID_DATA, self._meridian
|
|
2422
2518
|
)
|
|
@@ -2455,12 +2551,13 @@ class BudgetOptimizer:
|
|
|
2455
2551
|
np.asarray(
|
|
2456
2552
|
self._analyzer.incremental_outcome(
|
|
2457
2553
|
use_posterior=use_posterior,
|
|
2458
|
-
new_data=
|
|
2554
|
+
new_data=analyzer_module.DataTensors(
|
|
2459
2555
|
media=new_media,
|
|
2460
2556
|
reach=new_reach,
|
|
2461
2557
|
frequency=new_frequency,
|
|
2462
2558
|
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
2463
2559
|
),
|
|
2560
|
+
selected_geos=selected_geos,
|
|
2464
2561
|
selected_times=selected_times,
|
|
2465
2562
|
use_kpi=use_kpi,
|
|
2466
2563
|
include_non_paid_channels=False,
|
|
@@ -2477,7 +2574,8 @@ class BudgetOptimizer:
|
|
|
2477
2574
|
spend_bound_lower: np.ndarray,
|
|
2478
2575
|
spend_bound_upper: np.ndarray,
|
|
2479
2576
|
step_size: int,
|
|
2480
|
-
new_data:
|
|
2577
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2578
|
+
selected_geos: Sequence[str] | None = None,
|
|
2481
2579
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2482
2580
|
use_posterior: bool = True,
|
|
2483
2581
|
use_kpi: bool = False,
|
|
@@ -2500,6 +2598,9 @@ class BudgetOptimizer:
|
|
|
2500
2598
|
tensors is provided with a different number of time periods than in
|
|
2501
2599
|
`InputData`, then all tensors must be provided with the same number of
|
|
2502
2600
|
time periods.
|
|
2601
|
+
selected_geos: Optional list containing a subset of geos to include. By
|
|
2602
|
+
default, all geos are included. The selected geos should match those in
|
|
2603
|
+
`InputData.geo`.
|
|
2503
2604
|
selected_times: Optional list of times to optimize. This can either be a
|
|
2504
2605
|
string list containing a subset of time dimension coordinates from
|
|
2505
2606
|
`InputData.time` or a boolean list with length equal to the time
|
|
@@ -2556,6 +2657,7 @@ class BudgetOptimizer:
|
|
|
2556
2657
|
i=i,
|
|
2557
2658
|
incremental_outcome_grid=incremental_outcome_grid,
|
|
2558
2659
|
multipliers_grid=multipliers_grid,
|
|
2660
|
+
selected_geos=selected_geos,
|
|
2559
2661
|
selected_times=selected_times,
|
|
2560
2662
|
new_data=new_data,
|
|
2561
2663
|
use_posterior=use_posterior,
|
|
@@ -2571,20 +2673,15 @@ class BudgetOptimizer:
|
|
|
2571
2673
|
# we use the following code to fix it, and ensure incremental_outcome/spend
|
|
2572
2674
|
# is always same for RF channels.
|
|
2573
2675
|
if self._meridian.n_rf_channels > 0:
|
|
2574
|
-
|
|
2575
|
-
incremental_outcome_grid
|
|
2576
|
-
)
|
|
2577
|
-
rf_spend_max = np.nanmax(
|
|
2578
|
-
spend_grid[:, -self._meridian.n_rf_channels :], axis=0
|
|
2579
|
-
)
|
|
2580
|
-
rf_roi = backend.divide_no_nan(rf_incremental_outcome_max, rf_spend_max)
|
|
2581
|
-
incremental_outcome_grid[:, -self._meridian.n_rf_channels :] = (
|
|
2582
|
-
rf_roi * spend_grid[:, -self._meridian.n_rf_channels :]
|
|
2676
|
+
incremental_outcome_grid = backend.stabilize_rf_roi_grid(
|
|
2677
|
+
spend_grid, incremental_outcome_grid, self._meridian.n_rf_channels
|
|
2583
2678
|
)
|
|
2584
2679
|
return (spend_grid, incremental_outcome_grid)
|
|
2585
2680
|
|
|
2586
2681
|
def _validate_optimization_tensors(
|
|
2587
2682
|
self,
|
|
2683
|
+
expected_n_geos: int,
|
|
2684
|
+
expected_n_times: int,
|
|
2588
2685
|
cpmu: backend.Tensor | None = None,
|
|
2589
2686
|
cprf: backend.Tensor | None = None,
|
|
2590
2687
|
media: backend.Tensor | None = None,
|
|
@@ -2601,11 +2698,21 @@ class BudgetOptimizer:
|
|
|
2601
2698
|
'If `media` or `media_spend` is provided, then `cpmu` must also be'
|
|
2602
2699
|
' provided.'
|
|
2603
2700
|
)
|
|
2701
|
+
if (media is None and media_spend is None) and cpmu is not None:
|
|
2702
|
+
raise ValueError(
|
|
2703
|
+
'If `cpmu` is provided, then one of `media` or `media_spend` must'
|
|
2704
|
+
' also be provided.'
|
|
2705
|
+
)
|
|
2604
2706
|
if (rf_impressions is not None or rf_spend is not None) and cprf is None:
|
|
2605
2707
|
raise ValueError(
|
|
2606
2708
|
'If `reach` and `frequency` or `rf_spend` is provided, then `cprf`'
|
|
2607
2709
|
' must also be provided.'
|
|
2608
2710
|
)
|
|
2711
|
+
if (rf_impressions is None and rf_spend is None) and cprf is not None:
|
|
2712
|
+
raise ValueError(
|
|
2713
|
+
'If `cprf` is provided, then one of `rf_impressions` or `rf_spend`'
|
|
2714
|
+
' must also be provided.'
|
|
2715
|
+
)
|
|
2609
2716
|
if media is not None and media_spend is not None:
|
|
2610
2717
|
raise ValueError('Only one of `media` or `media_spend` can be provided.')
|
|
2611
2718
|
if rf_impressions is not None and rf_spend is not None:
|
|
@@ -2623,26 +2730,44 @@ class BudgetOptimizer:
|
|
|
2623
2730
|
'If `use_optimal_frequency` is `False`, then `frequency` must be'
|
|
2624
2731
|
' provided.'
|
|
2625
2732
|
)
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
rf_spend,
|
|
2637
|
-
]
|
|
2638
|
-
if t is not None and t.ndim == 3
|
|
2733
|
+
n_geos_list = []
|
|
2734
|
+
n_times_list = []
|
|
2735
|
+
tensor_list = [
|
|
2736
|
+
cpmu,
|
|
2737
|
+
cprf,
|
|
2738
|
+
media,
|
|
2739
|
+
rf_impressions,
|
|
2740
|
+
frequency,
|
|
2741
|
+
media_spend,
|
|
2742
|
+
rf_spend,
|
|
2639
2743
|
]
|
|
2744
|
+
for t in tensor_list:
|
|
2745
|
+
# `(n_geos, T, n_channels)` shape
|
|
2746
|
+
if t is not None and t.ndim == 3:
|
|
2747
|
+
n_geos_list.append(t.shape[0])
|
|
2748
|
+
n_times_list.append(t.shape[1])
|
|
2749
|
+
# `(T, n_channels)` shape
|
|
2750
|
+
elif t is not None and t.ndim == 2:
|
|
2751
|
+
n_times_list.append(t.shape[0])
|
|
2752
|
+
|
|
2753
|
+
# `(n_geos, T)` shape
|
|
2640
2754
|
if revenue_per_kpi is not None and revenue_per_kpi.ndim == 2:
|
|
2641
|
-
|
|
2642
|
-
|
|
2755
|
+
n_geos_list.append(revenue_per_kpi.shape[0])
|
|
2756
|
+
n_times_list.append(revenue_per_kpi.shape[1])
|
|
2757
|
+
# `(T)` shape
|
|
2758
|
+
elif revenue_per_kpi is not None and revenue_per_kpi.ndim == 1:
|
|
2759
|
+
n_times_list.append(revenue_per_kpi.shape[0])
|
|
2760
|
+
|
|
2761
|
+
if any(n_geo != expected_n_geos for n_geo in n_geos_list):
|
|
2762
|
+
raise ValueError(
|
|
2763
|
+
'All tensors with a geo dimension must have'
|
|
2764
|
+
f' {expected_n_geos} geos (as defined in `meridian.InputData`).'
|
|
2765
|
+
)
|
|
2766
|
+
|
|
2767
|
+
if any(n_time != expected_n_times for n_time in n_times_list):
|
|
2643
2768
|
raise ValueError(
|
|
2644
|
-
'All tensors with a
|
|
2645
|
-
' as in `
|
|
2769
|
+
'All tensors with a time dimension must have'
|
|
2770
|
+
f' {expected_n_times} times (as defined in `time` argument).'
|
|
2646
2771
|
)
|
|
2647
2772
|
|
|
2648
2773
|
def _allocate_tensor_by_population(
|
|
@@ -2958,3 +3083,62 @@ def _expand_tensor(tensor: backend.Tensor, required_shape: tuple[int, ...]):
|
|
|
2958
3083
|
f'Cannot expand tensor with shape {tensor.shape} to target'
|
|
2959
3084
|
f' {required_shape}.'
|
|
2960
3085
|
)
|
|
3086
|
+
|
|
3087
|
+
|
|
3088
|
+
def _expand_selected_times(
|
|
3089
|
+
meridian: model.Meridian,
|
|
3090
|
+
start_date: tc.Date,
|
|
3091
|
+
end_date: tc.Date,
|
|
3092
|
+
new_data: analyzer_module.DataTensors | None,
|
|
3093
|
+
return_flexible_str: bool = False,
|
|
3094
|
+
) -> Sequence[str] | Sequence[bool] | None:
|
|
3095
|
+
"""Creates selected_times from start_date and end_date.
|
|
3096
|
+
|
|
3097
|
+
This function creates `selected_times` argument based on `start_date`,
|
|
3098
|
+
`end_date` and `new_data`. If `new_data` is not used or used with unmodified
|
|
3099
|
+
times, dates are selected from `meridian.input_data.time`. In the flexible
|
|
3100
|
+
time scenario, when `new_data` is provided with modified times, dates are
|
|
3101
|
+
selected from `new_data.time`. In this case, `new_data.time` must be provided
|
|
3102
|
+
and the function returns a list of booleans.
|
|
3103
|
+
|
|
3104
|
+
Args:
|
|
3105
|
+
meridian: The `Meridian` object with original data.
|
|
3106
|
+
start_date: Start date of the selected time period.
|
|
3107
|
+
end_date: End date of the selected time period.
|
|
3108
|
+
new_data: The optional `DataTensors` object. If times are modified in
|
|
3109
|
+
`new_data`, then `new_data.time` must be provided.
|
|
3110
|
+
return_flexible_str: Whether to return a list of strings or a list of
|
|
3111
|
+
booleans in case time is modified in `new_data`.
|
|
3112
|
+
|
|
3113
|
+
Returns:
|
|
3114
|
+
If both `start_date` and `end_date` are `None`, returns `None`. If
|
|
3115
|
+
`new_data` is not used or used with unmodified times, returns a list of
|
|
3116
|
+
strings with selected dates. If `new_data` is used with modified times,
|
|
3117
|
+
returns a list of strings or a list of booleans depending on the
|
|
3118
|
+
`return_flexible_str` argument.
|
|
3119
|
+
"""
|
|
3120
|
+
if start_date is None and end_date is None:
|
|
3121
|
+
return None
|
|
3122
|
+
|
|
3123
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
3124
|
+
if new_data.get_modified_times(meridian) is None:
|
|
3125
|
+
return meridian.expand_selected_time_dims(
|
|
3126
|
+
start_date=start_date,
|
|
3127
|
+
end_date=end_date,
|
|
3128
|
+
)
|
|
3129
|
+
else:
|
|
3130
|
+
assert new_data.time is not None
|
|
3131
|
+
new_times_str = np.asarray(new_data.time).astype(str).tolist()
|
|
3132
|
+
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
3133
|
+
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
3134
|
+
start_date=start_date,
|
|
3135
|
+
end_date=end_date,
|
|
3136
|
+
)
|
|
3137
|
+
if expanded_dates is None:
|
|
3138
|
+
expanded_dates = time_coordinates.all_dates
|
|
3139
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
3140
|
+
if return_flexible_str:
|
|
3141
|
+
return [x for x in new_times_str if x in expanded_str]
|
|
3142
|
+
# TODO: Remove once every method uses `new_data.time`.
|
|
3143
|
+
else:
|
|
3144
|
+
return [x in expanded_str for x in new_times_str]
|