google-meridian 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.2.0.dist-info → google_meridian-1.2.1.dist-info}/METADATA +2 -2
- {google_meridian-1.2.0.dist-info → google_meridian-1.2.1.dist-info}/RECORD +24 -24
- meridian/analysis/analyzer.py +101 -37
- meridian/analysis/optimizer.py +132 -88
- meridian/analysis/summarizer.py +31 -16
- meridian/analysis/visualizer.py +16 -5
- meridian/backend/__init__.py +475 -14
- meridian/backend/config.py +75 -16
- meridian/backend/test_utils.py +87 -1
- meridian/constants.py +14 -9
- meridian/data/input_data.py +7 -2
- meridian/data/test_utils.py +5 -3
- meridian/mlflow/autolog.py +2 -2
- meridian/model/adstock_hill.py +10 -9
- meridian/model/eda/eda_engine.py +440 -11
- meridian/model/knots.py +1 -1
- meridian/model/model_test_data.py +15 -9
- meridian/model/posterior_sampler.py +365 -365
- meridian/model/prior_distribution.py +104 -39
- meridian/model/transformers.py +5 -5
- meridian/version.py +1 -1
- {google_meridian-1.2.0.dist-info → google_meridian-1.2.1.dist-info}/WHEEL +0 -0
- {google_meridian-1.2.0.dist-info → google_meridian-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.2.0.dist-info → google_meridian-1.2.1.dist-info}/top_level.txt +0 -0
meridian/analysis/optimizer.py
CHANGED
|
@@ -26,7 +26,7 @@ import altair as alt
|
|
|
26
26
|
import jinja2
|
|
27
27
|
from meridian import backend
|
|
28
28
|
from meridian import constants as c
|
|
29
|
-
from meridian.analysis import analyzer
|
|
29
|
+
from meridian.analysis import analyzer as analyzer_module
|
|
30
30
|
from meridian.analysis import formatter
|
|
31
31
|
from meridian.analysis import summary_text
|
|
32
32
|
from meridian.data import time_coordinates as tc
|
|
@@ -114,7 +114,11 @@ class OptimizationGrid:
|
|
|
114
114
|
does not contain reach and frequency data, or if the model does contain
|
|
115
115
|
reach and frequency data, but historical frequency is used for the
|
|
116
116
|
optimization scenario.
|
|
117
|
-
selected_times: The time coordinates from the model used in this grid.
|
|
117
|
+
selected_times: The time coordinates from the model used in this grid. If
|
|
118
|
+
new data with modified time coordinates is used for optimization, this
|
|
119
|
+
is a list of booleans indicating which time coordinates are selected.
|
|
120
|
+
Otherwise, this is a list of strings indicating the time coordinates used
|
|
121
|
+
in this grid.
|
|
118
122
|
"""
|
|
119
123
|
|
|
120
124
|
_grid_dataset: xr.Dataset
|
|
@@ -128,7 +132,7 @@ class OptimizationGrid:
|
|
|
128
132
|
gtol: float
|
|
129
133
|
round_factor: int
|
|
130
134
|
optimal_frequency: np.ndarray | None
|
|
131
|
-
selected_times: Sequence[str] | None
|
|
135
|
+
selected_times: Sequence[str] | Sequence[bool] | None
|
|
132
136
|
|
|
133
137
|
@property
|
|
134
138
|
def grid_dataset(self) -> xr.Dataset:
|
|
@@ -438,40 +442,33 @@ class OptimizationResults:
|
|
|
438
442
|
"""The optimized budget allocation.
|
|
439
443
|
|
|
440
444
|
This is a dataclass object containing datasets output from `BudgetOptimizer`.
|
|
441
|
-
These datasets include:
|
|
442
445
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
- `nonoptimized_data_with_optimal_freq`: The non-optimized budget metrics
|
|
446
|
-
based on optimal frequency.
|
|
447
|
-
- `optimized_data`: The optimized budget metrics.
|
|
448
|
-
- `optimization_grid`: The grid information used for optimization.
|
|
449
|
-
|
|
450
|
-
The metrics (data variables) are: ROI, mROI, incremental outcome, CPIK.
|
|
451
|
-
|
|
452
|
-
Additionally, some intermediate values and referecences to the source fitted
|
|
453
|
-
model and analyzer are also stored here. These are useful for visualizing and
|
|
454
|
-
debugging.
|
|
446
|
+
The performance metrics (data variables) are: spend, percentage of spend, ROI,
|
|
447
|
+
mROI, incremental outcome, CPIK, and effectiveness.
|
|
455
448
|
|
|
456
449
|
Attributes:
|
|
457
450
|
meridian: The fitted Meridian model that was used to create this budget
|
|
458
451
|
allocation.
|
|
459
452
|
analyzer: The analyzer bound to the model above.
|
|
460
|
-
spend_ratio: The spend ratio used to scale the non-optimized
|
|
461
|
-
to the optimized
|
|
462
|
-
spend_bounds: The spend bounds used to scale the non-optimized
|
|
463
|
-
metrics to the optimized
|
|
464
|
-
nonoptimized_data:
|
|
465
|
-
frequency
|
|
466
|
-
nonoptimized_data_with_optimal_freq:
|
|
467
|
-
|
|
468
|
-
|
|
453
|
+
spend_ratio: The spend ratio used to scale the non-optimized performance
|
|
454
|
+
metrics to the optimized performance metrics.
|
|
455
|
+
spend_bounds: The spend bounds used to scale the non-optimized performance
|
|
456
|
+
metrics to the optimized performance metrics.
|
|
457
|
+
nonoptimized_data: Performance metrics under the non-optimized budget. For
|
|
458
|
+
R&F channels, the non-optimized frequency is used.
|
|
459
|
+
nonoptimized_data_with_optimal_freq: Performance metrics under the
|
|
460
|
+
non-optimized budget. For R&F channels, the optimal frequency is used if
|
|
461
|
+
frequency was optimized.
|
|
462
|
+
optimized_data: Performance metrics under the optimized budget. For R&F
|
|
463
|
+
channels, the optimal frequency is used if frequency was optimized.
|
|
469
464
|
optimization_grid: The grid information used for optimization.
|
|
465
|
+
new_data: The optional `DataTensors` container that was used to create this
|
|
466
|
+
budget allocation.
|
|
470
467
|
"""
|
|
471
468
|
|
|
472
469
|
meridian: model.Meridian
|
|
473
470
|
# The analyzer bound to the model above.
|
|
474
|
-
analyzer:
|
|
471
|
+
analyzer: analyzer_module.Analyzer
|
|
475
472
|
spend_ratio: np.ndarray # spend / historical spend
|
|
476
473
|
spend_bounds: tuple[np.ndarray, np.ndarray]
|
|
477
474
|
|
|
@@ -481,6 +478,10 @@ class OptimizationResults:
|
|
|
481
478
|
_optimized_data: xr.Dataset
|
|
482
479
|
_optimization_grid: OptimizationGrid
|
|
483
480
|
|
|
481
|
+
# The optional `DataTensors` container to use if optimization was performed
|
|
482
|
+
# on data different from the original `input_data`.
|
|
483
|
+
new_data: analyzer_module.DataTensors | None = None
|
|
484
|
+
|
|
484
485
|
# TODO: Move this, and the plotting methods, to a summarizer.
|
|
485
486
|
@functools.cached_property
|
|
486
487
|
def template_env(self) -> jinja2.Environment:
|
|
@@ -497,10 +498,10 @@ class OptimizationResults:
|
|
|
497
498
|
|
|
498
499
|
@property
|
|
499
500
|
def nonoptimized_data(self) -> xr.Dataset:
|
|
500
|
-
"""Dataset holding the non-optimized
|
|
501
|
+
"""Dataset holding the non-optimized performance metrics.
|
|
501
502
|
|
|
502
503
|
For channels that have reach and frequency data, their performance metrics
|
|
503
|
-
|
|
504
|
+
are based on historical frequency.
|
|
504
505
|
|
|
505
506
|
The dataset contains the following:
|
|
506
507
|
|
|
@@ -519,10 +520,10 @@ class OptimizationResults:
|
|
|
519
520
|
|
|
520
521
|
@property
|
|
521
522
|
def nonoptimized_data_with_optimal_freq(self) -> xr.Dataset:
|
|
522
|
-
"""Dataset holding the non-optimized
|
|
523
|
+
"""Dataset holding the non-optimized performance metrics.
|
|
523
524
|
|
|
524
525
|
For channels that have reach and frequency data, their performance metrics
|
|
525
|
-
|
|
526
|
+
are based on optimal frequency.
|
|
526
527
|
|
|
527
528
|
The dataset contains the following:
|
|
528
529
|
|
|
@@ -537,10 +538,10 @@ class OptimizationResults:
|
|
|
537
538
|
|
|
538
539
|
@property
|
|
539
540
|
def optimized_data(self) -> xr.Dataset:
|
|
540
|
-
"""Dataset holding the optimized
|
|
541
|
+
"""Dataset holding the optimized performance metrics.
|
|
541
542
|
|
|
542
543
|
For channels that have reach and frequency data, their performance metrics
|
|
543
|
-
|
|
544
|
+
are based on optimal frequency.
|
|
544
545
|
|
|
545
546
|
The dataset contains the following:
|
|
546
547
|
|
|
@@ -894,9 +895,12 @@ class OptimizationResults:
|
|
|
894
895
|
returned this result.
|
|
895
896
|
"""
|
|
896
897
|
channels = self.optimized_data.channel.values
|
|
897
|
-
selected_times =
|
|
898
|
+
selected_times = _expand_selected_times(
|
|
899
|
+
meridian=self.meridian,
|
|
898
900
|
start_date=self.optimized_data.start_date,
|
|
899
901
|
end_date=self.optimized_data.end_date,
|
|
902
|
+
new_data=self.new_data,
|
|
903
|
+
return_flexible_str=True,
|
|
900
904
|
)
|
|
901
905
|
_, ubounds = self.spend_bounds
|
|
902
906
|
upper_bound = (
|
|
@@ -912,6 +916,7 @@ class OptimizationResults:
|
|
|
912
916
|
# WARN: If `selected_times` is not None (i.e. a subset time range), this
|
|
913
917
|
# response curve computation might take a significant amount of time.
|
|
914
918
|
return self.analyzer.response_curves(
|
|
919
|
+
new_data=self.new_data,
|
|
915
920
|
spend_multipliers=spend_multiplier,
|
|
916
921
|
use_posterior=self.optimization_grid.use_posterior,
|
|
917
922
|
selected_times=selected_times,
|
|
@@ -1276,7 +1281,7 @@ class BudgetOptimizer:
|
|
|
1276
1281
|
|
|
1277
1282
|
def __init__(self, meridian: model.Meridian):
|
|
1278
1283
|
self._meridian = meridian
|
|
1279
|
-
self._analyzer =
|
|
1284
|
+
self._analyzer = analyzer_module.Analyzer(self._meridian)
|
|
1280
1285
|
|
|
1281
1286
|
def _validate_model_fit(self, use_posterior: bool):
|
|
1282
1287
|
"""Validates that the model is fit."""
|
|
@@ -1288,7 +1293,7 @@ class BudgetOptimizer:
|
|
|
1288
1293
|
|
|
1289
1294
|
def optimize(
|
|
1290
1295
|
self,
|
|
1291
|
-
new_data:
|
|
1296
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
1292
1297
|
use_posterior: bool = True,
|
|
1293
1298
|
# TODO: b/409550413 - Remove this argument.
|
|
1294
1299
|
selected_times: tuple[str | None, str | None] | None = None,
|
|
@@ -1526,7 +1531,7 @@ class BudgetOptimizer:
|
|
|
1526
1531
|
use_historical_budget = budget is None or np.isclose(
|
|
1527
1532
|
budget, np.sum(optimization_grid.historical_spend)
|
|
1528
1533
|
)
|
|
1529
|
-
new_data = new_data or
|
|
1534
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
1530
1535
|
nonoptimized_data = self._create_budget_dataset(
|
|
1531
1536
|
new_data=new_data.filter_fields(c.PAID_DATA + (c.TIME,)),
|
|
1532
1537
|
use_posterior=use_posterior,
|
|
@@ -1595,6 +1600,7 @@ class BudgetOptimizer:
|
|
|
1595
1600
|
)
|
|
1596
1601
|
|
|
1597
1602
|
return OptimizationResults(
|
|
1603
|
+
new_data=new_data,
|
|
1598
1604
|
meridian=self._meridian,
|
|
1599
1605
|
analyzer=self._analyzer,
|
|
1600
1606
|
spend_ratio=spend_ratio,
|
|
@@ -1617,7 +1623,7 @@ class BudgetOptimizer:
|
|
|
1617
1623
|
rf_spend: backend.Tensor | None = None,
|
|
1618
1624
|
revenue_per_kpi: backend.Tensor | None = None,
|
|
1619
1625
|
use_optimal_frequency: bool = True,
|
|
1620
|
-
) ->
|
|
1626
|
+
) -> analyzer_module.DataTensors:
|
|
1621
1627
|
"""Creates a `DataTensors` for optimizations from CPM and flighting data.
|
|
1622
1628
|
|
|
1623
1629
|
CPM is broken down into cost per media unit, `cpmu`, for the media channels
|
|
@@ -1739,11 +1745,11 @@ class BudgetOptimizer:
|
|
|
1739
1745
|
if revenue_per_kpi is not None:
|
|
1740
1746
|
tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
|
|
1741
1747
|
tensors[c.TIME] = backend.to_tensor(time)
|
|
1742
|
-
return
|
|
1748
|
+
return analyzer_module.DataTensors(**tensors)
|
|
1743
1749
|
|
|
1744
1750
|
def _validate_grid(
|
|
1745
1751
|
self,
|
|
1746
|
-
new_data:
|
|
1752
|
+
new_data: analyzer_module.DataTensors | None,
|
|
1747
1753
|
use_posterior: bool,
|
|
1748
1754
|
start_date: tc.Date,
|
|
1749
1755
|
end_date: tc.Date,
|
|
@@ -1799,7 +1805,7 @@ class BudgetOptimizer:
|
|
|
1799
1805
|
return False
|
|
1800
1806
|
|
|
1801
1807
|
if new_data is None:
|
|
1802
|
-
new_data =
|
|
1808
|
+
new_data = analyzer_module.DataTensors()
|
|
1803
1809
|
required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
|
|
1804
1810
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
1805
1811
|
required_tensors_names=required_tensors, meridian=self._meridian
|
|
@@ -1815,7 +1821,8 @@ class BudgetOptimizer:
|
|
|
1815
1821
|
return False
|
|
1816
1822
|
|
|
1817
1823
|
n_channels = len(optimization_grid.channels)
|
|
1818
|
-
selected_times =
|
|
1824
|
+
selected_times = _expand_selected_times(
|
|
1825
|
+
meridian=self._meridian,
|
|
1819
1826
|
start_date=start_date,
|
|
1820
1827
|
end_date=end_date,
|
|
1821
1828
|
new_data=new_data,
|
|
@@ -1961,7 +1968,7 @@ class BudgetOptimizer:
|
|
|
1961
1968
|
"""
|
|
1962
1969
|
self._validate_model_fit(use_posterior)
|
|
1963
1970
|
if new_data is None:
|
|
1964
|
-
new_data =
|
|
1971
|
+
new_data = analyzer_module.DataTensors()
|
|
1965
1972
|
|
|
1966
1973
|
if selected_times is not None:
|
|
1967
1974
|
warnings.warn(
|
|
@@ -1978,7 +1985,8 @@ class BudgetOptimizer:
|
|
|
1978
1985
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
1979
1986
|
required_tensors_names=required_tensors, meridian=self._meridian
|
|
1980
1987
|
)
|
|
1981
|
-
selected_times =
|
|
1988
|
+
selected_times = _expand_selected_times(
|
|
1989
|
+
meridian=self._meridian,
|
|
1982
1990
|
start_date=start_date,
|
|
1983
1991
|
end_date=end_date,
|
|
1984
1992
|
new_data=filled_data,
|
|
@@ -2008,7 +2016,7 @@ class BudgetOptimizer:
|
|
|
2008
2016
|
)
|
|
2009
2017
|
)
|
|
2010
2018
|
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
|
|
2011
|
-
opt_freq_data =
|
|
2019
|
+
opt_freq_data = analyzer_module.DataTensors(
|
|
2012
2020
|
rf_impressions=filled_data.reach * filled_data.frequency,
|
|
2013
2021
|
rf_spend=filled_data.rf_spend,
|
|
2014
2022
|
revenue_per_kpi=filled_data.revenue_per_kpi,
|
|
@@ -2098,38 +2106,11 @@ class BudgetOptimizer:
|
|
|
2098
2106
|
attrs={c.SPEND_STEP_SIZE: spend_step_size},
|
|
2099
2107
|
)
|
|
2100
2108
|
|
|
2101
|
-
def _validate_selected_times(
|
|
2102
|
-
self,
|
|
2103
|
-
start_date: tc.Date,
|
|
2104
|
-
end_date: tc.Date,
|
|
2105
|
-
new_data: analyzer.DataTensors | None,
|
|
2106
|
-
) -> Sequence[str] | Sequence[bool] | None:
|
|
2107
|
-
"""Validates and returns the selected times."""
|
|
2108
|
-
if start_date is None and end_date is None:
|
|
2109
|
-
return None
|
|
2110
|
-
|
|
2111
|
-
new_data = new_data or analyzer.DataTensors()
|
|
2112
|
-
if new_data.get_modified_times(self._meridian) is None:
|
|
2113
|
-
return self._meridian.expand_selected_time_dims(
|
|
2114
|
-
start_date=start_date,
|
|
2115
|
-
end_date=end_date,
|
|
2116
|
-
)
|
|
2117
|
-
else:
|
|
2118
|
-
assert new_data.time is not None
|
|
2119
|
-
new_times_str = np.asarray(new_data.time).astype(str).tolist()
|
|
2120
|
-
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
2121
|
-
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
2122
|
-
start_date=start_date,
|
|
2123
|
-
end_date=end_date,
|
|
2124
|
-
)
|
|
2125
|
-
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
2126
|
-
return [x in expanded_str for x in new_times_str]
|
|
2127
|
-
|
|
2128
2109
|
def _get_incremental_outcome_tensors(
|
|
2129
2110
|
self,
|
|
2130
2111
|
hist_spend: np.ndarray,
|
|
2131
2112
|
spend: np.ndarray,
|
|
2132
|
-
new_data:
|
|
2113
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2133
2114
|
optimal_frequency: Sequence[float] | None = None,
|
|
2134
2115
|
) -> tuple[
|
|
2135
2116
|
backend.Tensor | None,
|
|
@@ -2165,7 +2146,7 @@ class BudgetOptimizer:
|
|
|
2165
2146
|
Returns:
|
|
2166
2147
|
Tuple of backend.tensors (new_media, new_reach, new_frequency).
|
|
2167
2148
|
"""
|
|
2168
|
-
new_data = new_data or
|
|
2149
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2169
2150
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2170
2151
|
c.PAID_CHANNELS,
|
|
2171
2152
|
self._meridian,
|
|
@@ -2206,7 +2187,7 @@ class BudgetOptimizer:
|
|
|
2206
2187
|
self,
|
|
2207
2188
|
hist_spend: np.ndarray,
|
|
2208
2189
|
spend: np.ndarray,
|
|
2209
|
-
new_data:
|
|
2190
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2210
2191
|
use_posterior: bool = True,
|
|
2211
2192
|
use_kpi: bool = False,
|
|
2212
2193
|
start_date: tc.Date = None,
|
|
@@ -2218,13 +2199,16 @@ class BudgetOptimizer:
|
|
|
2218
2199
|
use_historical_budget: bool = True,
|
|
2219
2200
|
) -> xr.Dataset:
|
|
2220
2201
|
"""Creates the budget dataset."""
|
|
2221
|
-
new_data = new_data or
|
|
2202
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2222
2203
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2223
2204
|
c.PAID_DATA + (c.TIME,),
|
|
2224
2205
|
self._meridian,
|
|
2225
2206
|
)
|
|
2226
|
-
selected_times =
|
|
2227
|
-
|
|
2207
|
+
selected_times = _expand_selected_times(
|
|
2208
|
+
meridian=self._meridian,
|
|
2209
|
+
start_date=start_date,
|
|
2210
|
+
end_date=end_date,
|
|
2211
|
+
new_data=new_data,
|
|
2228
2212
|
)
|
|
2229
2213
|
spend_tensor = backend.to_tensor(spend, dtype=backend.float32)
|
|
2230
2214
|
hist_spend = backend.to_tensor(hist_spend, dtype=backend.float32)
|
|
@@ -2237,7 +2221,7 @@ class BudgetOptimizer:
|
|
|
2237
2221
|
)
|
|
2238
2222
|
)
|
|
2239
2223
|
budget = np.sum(spend_tensor)
|
|
2240
|
-
inc_outcome_data =
|
|
2224
|
+
inc_outcome_data = analyzer_module.DataTensors(
|
|
2241
2225
|
media=new_media,
|
|
2242
2226
|
reach=new_reach,
|
|
2243
2227
|
frequency=new_frequency,
|
|
@@ -2269,7 +2253,7 @@ class BudgetOptimizer:
|
|
|
2269
2253
|
# shape (n_channels, n_metrics) where n_metrics = 4 for (mean, median,
|
|
2270
2254
|
# ci_lo, and ci_hi)
|
|
2271
2255
|
incremental_outcome_with_mean_median_and_ci = (
|
|
2272
|
-
|
|
2256
|
+
analyzer_module.get_central_tendency_and_ci(
|
|
2273
2257
|
data=incremental_outcome,
|
|
2274
2258
|
confidence_level=confidence_level,
|
|
2275
2259
|
include_median=True,
|
|
@@ -2281,7 +2265,7 @@ class BudgetOptimizer:
|
|
|
2281
2265
|
)
|
|
2282
2266
|
|
|
2283
2267
|
aggregated_impressions = self._analyzer.get_aggregated_impressions(
|
|
2284
|
-
new_data=
|
|
2268
|
+
new_data=analyzer_module.DataTensors(
|
|
2285
2269
|
media=new_media, reach=new_reach, frequency=new_frequency
|
|
2286
2270
|
),
|
|
2287
2271
|
selected_times=selected_times,
|
|
@@ -2292,7 +2276,7 @@ class BudgetOptimizer:
|
|
|
2292
2276
|
include_non_paid_channels=False,
|
|
2293
2277
|
)
|
|
2294
2278
|
effectiveness_with_mean_median_and_ci = (
|
|
2295
|
-
|
|
2279
|
+
analyzer_module.get_central_tendency_and_ci(
|
|
2296
2280
|
data=backend.divide_no_nan(
|
|
2297
2281
|
incremental_outcome, aggregated_impressions
|
|
2298
2282
|
),
|
|
@@ -2301,12 +2285,12 @@ class BudgetOptimizer:
|
|
|
2301
2285
|
)
|
|
2302
2286
|
)
|
|
2303
2287
|
|
|
2304
|
-
roi =
|
|
2288
|
+
roi = analyzer_module.get_central_tendency_and_ci(
|
|
2305
2289
|
data=backend.divide_no_nan(incremental_outcome, spend_tensor),
|
|
2306
2290
|
confidence_level=confidence_level,
|
|
2307
2291
|
include_median=True,
|
|
2308
2292
|
)
|
|
2309
|
-
marginal_roi =
|
|
2293
|
+
marginal_roi = analyzer_module.get_central_tendency_and_ci(
|
|
2310
2294
|
data=backend.divide_no_nan(
|
|
2311
2295
|
mroi_numerator, spend_tensor * incremental_increase
|
|
2312
2296
|
),
|
|
@@ -2314,7 +2298,7 @@ class BudgetOptimizer:
|
|
|
2314
2298
|
include_median=True,
|
|
2315
2299
|
)
|
|
2316
2300
|
|
|
2317
|
-
cpik =
|
|
2301
|
+
cpik = analyzer_module.get_central_tendency_and_ci(
|
|
2318
2302
|
data=backend.divide_no_nan(spend_tensor, incremental_outcome),
|
|
2319
2303
|
confidence_level=confidence_level,
|
|
2320
2304
|
include_median=True,
|
|
@@ -2374,7 +2358,7 @@ class BudgetOptimizer:
|
|
|
2374
2358
|
i: int,
|
|
2375
2359
|
incremental_outcome_grid: np.ndarray,
|
|
2376
2360
|
multipliers_grid: backend.Tensor,
|
|
2377
|
-
new_data:
|
|
2361
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2378
2362
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2379
2363
|
use_posterior: bool = True,
|
|
2380
2364
|
use_kpi: bool = False,
|
|
@@ -2416,7 +2400,7 @@ class BudgetOptimizer:
|
|
|
2416
2400
|
reducing `batch_size`. The calculation will generally be faster with
|
|
2417
2401
|
larger `batch_size` values.
|
|
2418
2402
|
"""
|
|
2419
|
-
new_data = new_data or
|
|
2403
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2420
2404
|
filled_data = new_data.validate_and_fill_missing_data(
|
|
2421
2405
|
c.PAID_DATA, self._meridian
|
|
2422
2406
|
)
|
|
@@ -2455,7 +2439,7 @@ class BudgetOptimizer:
|
|
|
2455
2439
|
np.asarray(
|
|
2456
2440
|
self._analyzer.incremental_outcome(
|
|
2457
2441
|
use_posterior=use_posterior,
|
|
2458
|
-
new_data=
|
|
2442
|
+
new_data=analyzer_module.DataTensors(
|
|
2459
2443
|
media=new_media,
|
|
2460
2444
|
reach=new_reach,
|
|
2461
2445
|
frequency=new_frequency,
|
|
@@ -2477,7 +2461,7 @@ class BudgetOptimizer:
|
|
|
2477
2461
|
spend_bound_lower: np.ndarray,
|
|
2478
2462
|
spend_bound_upper: np.ndarray,
|
|
2479
2463
|
step_size: int,
|
|
2480
|
-
new_data:
|
|
2464
|
+
new_data: analyzer_module.DataTensors | None = None,
|
|
2481
2465
|
selected_times: Sequence[str] | Sequence[bool] | None = None,
|
|
2482
2466
|
use_posterior: bool = True,
|
|
2483
2467
|
use_kpi: bool = False,
|
|
@@ -2958,3 +2942,63 @@ def _expand_tensor(tensor: backend.Tensor, required_shape: tuple[int, ...]):
|
|
|
2958
2942
|
f'Cannot expand tensor with shape {tensor.shape} to target'
|
|
2959
2943
|
f' {required_shape}.'
|
|
2960
2944
|
)
|
|
2945
|
+
|
|
2946
|
+
|
|
2947
|
+
def _expand_selected_times(
|
|
2948
|
+
meridian: model.Meridian,
|
|
2949
|
+
start_date: tc.Date,
|
|
2950
|
+
end_date: tc.Date,
|
|
2951
|
+
new_data: analyzer_module.DataTensors | None,
|
|
2952
|
+
return_flexible_str: bool = False,
|
|
2953
|
+
) -> Sequence[str] | Sequence[bool] | None:
|
|
2954
|
+
"""Creates selected_times from start_date and end_date.
|
|
2955
|
+
|
|
2956
|
+
This function creates `selected_times` argument based on `start_date`,
|
|
2957
|
+
`end_date` and `new_data`. If `new_data` is not used or used with unmodified
|
|
2958
|
+
times, dates are selected from `meridian.input_data.time`. In the flexible
|
|
2959
|
+
time scenario, when `new_data` is provided with modified times, dates are
|
|
2960
|
+
selected from `new_data.time`. In this case, `new_data.time` must be provided
|
|
2961
|
+
and the function returns a list of booleans.
|
|
2962
|
+
|
|
2963
|
+
Args:
|
|
2964
|
+
meridian: The `Meridian` object with original data.
|
|
2965
|
+
start_date: Start date of the selected time period.
|
|
2966
|
+
end_date: End date of the selected time period.
|
|
2967
|
+
new_data: The optional `DataTensors` object. If times are modified in
|
|
2968
|
+
`new_data`, then `new_data.time` must be provided.
|
|
2969
|
+
return_flexible_str: Whether to return a list of strings or a list of
|
|
2970
|
+
booleans in case time is modified in `new_data`.
|
|
2971
|
+
|
|
2972
|
+
Returns:
|
|
2973
|
+
If both `start_date` and `end_date` are `None`, returns `None`. If
|
|
2974
|
+
`new_data` is not used or used with unmodified times, returns a list of
|
|
2975
|
+
strings with selected dates. If `new_data` is used with modified times,
|
|
2976
|
+
returns a list of strings or a list of booleans depending on the
|
|
2977
|
+
`return_flexible_str` argument.
|
|
2978
|
+
"""
|
|
2979
|
+
if start_date is None and end_date is None:
|
|
2980
|
+
return None
|
|
2981
|
+
|
|
2982
|
+
new_data = new_data or analyzer_module.DataTensors()
|
|
2983
|
+
if new_data.get_modified_times(meridian) is None:
|
|
2984
|
+
return meridian.expand_selected_time_dims(
|
|
2985
|
+
start_date=start_date,
|
|
2986
|
+
end_date=end_date,
|
|
2987
|
+
)
|
|
2988
|
+
else:
|
|
2989
|
+
assert new_data.time is not None
|
|
2990
|
+
new_times_str = np.asarray(new_data.time).astype(str).tolist()
|
|
2991
|
+
time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
|
|
2992
|
+
expanded_dates = time_coordinates.expand_selected_time_dims(
|
|
2993
|
+
start_date=start_date,
|
|
2994
|
+
end_date=end_date,
|
|
2995
|
+
)
|
|
2996
|
+
if return_flexible_str:
|
|
2997
|
+
if expanded_dates is None:
|
|
2998
|
+
expanded_dates = time_coordinates.all_dates
|
|
2999
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
3000
|
+
return [x for x in new_times_str if x in expanded_str]
|
|
3001
|
+
# TODO: Remove once every method uses `new_data.time`.
|
|
3002
|
+
else:
|
|
3003
|
+
expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
|
|
3004
|
+
return [x in expanded_str for x in new_times_str]
|
meridian/analysis/summarizer.py
CHANGED
|
@@ -77,6 +77,7 @@ class Summarizer:
|
|
|
77
77
|
filepath: str,
|
|
78
78
|
start_date: tc.Date = None,
|
|
79
79
|
end_date: tc.Date = None,
|
|
80
|
+
use_kpi: bool = False,
|
|
80
81
|
):
|
|
81
82
|
"""Generates and saves the HTML results summary output.
|
|
82
83
|
|
|
@@ -86,15 +87,18 @@ class Summarizer:
|
|
|
86
87
|
start_date: Optional start date selector, *inclusive*, in _yyyy-mm-dd_
|
|
87
88
|
format.
|
|
88
89
|
end_date: Optional end date selector, *inclusive* in _yyyy-mm-dd_ format.
|
|
90
|
+
use_kpi: If `True`, calculate the incremental KPI. Otherwise, calculate
|
|
91
|
+
the incremental revenue using the revenue per KPI (if available).
|
|
89
92
|
"""
|
|
90
93
|
os.makedirs(filepath, exist_ok=True)
|
|
91
94
|
with open(os.path.join(filepath, filename), 'w') as f:
|
|
92
|
-
f.write(self._gen_model_results_summary(start_date, end_date))
|
|
95
|
+
f.write(self._gen_model_results_summary(start_date, end_date, use_kpi))
|
|
93
96
|
|
|
94
97
|
def _gen_model_results_summary(
|
|
95
98
|
self,
|
|
96
99
|
start_date: tc.Date = None,
|
|
97
100
|
end_date: tc.Date = None,
|
|
101
|
+
use_kpi: bool = False,
|
|
98
102
|
) -> str:
|
|
99
103
|
"""Generate HTML results summary output (as sanitized content str)."""
|
|
100
104
|
all_dates = self._meridian.input_data.time_coordinates.all_dates
|
|
@@ -140,6 +144,7 @@ class Summarizer:
|
|
|
140
144
|
cards_htmls = self._create_cards_htmls(
|
|
141
145
|
template_env,
|
|
142
146
|
selected_times=selected_times,
|
|
147
|
+
use_kpi=use_kpi,
|
|
143
148
|
)
|
|
144
149
|
|
|
145
150
|
return html_template.render(
|
|
@@ -150,6 +155,7 @@ class Summarizer:
|
|
|
150
155
|
self,
|
|
151
156
|
template_env: jinja2.Environment,
|
|
152
157
|
selected_times: Sequence[str] | None,
|
|
158
|
+
use_kpi: bool,
|
|
153
159
|
) -> Sequence[str]:
|
|
154
160
|
"""Creates the HTML snippets for cards in the summary page."""
|
|
155
161
|
media_summary = visualizer.MediaSummary(
|
|
@@ -165,10 +171,13 @@ class Summarizer:
|
|
|
165
171
|
)
|
|
166
172
|
cards = [
|
|
167
173
|
self._create_model_fit_card_html(
|
|
168
|
-
template_env, selected_times=selected_times
|
|
174
|
+
template_env, selected_times=selected_times, use_kpi=use_kpi
|
|
169
175
|
),
|
|
170
176
|
self._create_outcome_contrib_card_html(
|
|
171
|
-
template_env,
|
|
177
|
+
template_env,
|
|
178
|
+
media_summary,
|
|
179
|
+
selected_times=selected_times,
|
|
180
|
+
use_kpi=use_kpi,
|
|
172
181
|
),
|
|
173
182
|
self._create_performance_breakdown_card_html(
|
|
174
183
|
template_env, media_summary
|
|
@@ -179,16 +188,17 @@ class Summarizer:
|
|
|
179
188
|
media_summary=media_summary,
|
|
180
189
|
media_effects=media_effects,
|
|
181
190
|
reach_frequency=reach_frequency,
|
|
191
|
+
use_kpi=use_kpi,
|
|
182
192
|
),
|
|
183
193
|
]
|
|
184
194
|
return cards
|
|
185
195
|
|
|
186
196
|
def _create_model_fit_card_html(
|
|
187
|
-
self, template_env: jinja2.Environment, **kwargs
|
|
197
|
+
self, template_env: jinja2.Environment, use_kpi: bool, **kwargs
|
|
188
198
|
) -> str:
|
|
189
199
|
"""Creates the HTML snippet for the Model Fit card."""
|
|
190
200
|
model_fit = self._model_fit
|
|
191
|
-
outcome = self._kpi_or_revenue()
|
|
201
|
+
outcome = self._kpi_or_revenue(use_kpi)
|
|
192
202
|
expected_actual_outcome_chart = formatter.ChartSpec(
|
|
193
203
|
id=summary_text.EXPECTED_ACTUAL_OUTCOME_CHART_ID,
|
|
194
204
|
description=summary_text.EXPECTED_ACTUAL_OUTCOME_CHART_DESCRIPTION_FORMAT.format(
|
|
@@ -197,7 +207,9 @@ class Summarizer:
|
|
|
197
207
|
chart_json=model_fit.plot_model_fit(**kwargs).to_json(),
|
|
198
208
|
)
|
|
199
209
|
|
|
200
|
-
predictive_accuracy_table = self._predictive_accuracy_table_spec(
|
|
210
|
+
predictive_accuracy_table = self._predictive_accuracy_table_spec(
|
|
211
|
+
use_kpi=use_kpi, **kwargs
|
|
212
|
+
)
|
|
201
213
|
insights = summary_text.MODEL_FIT_INSIGHTS_FORMAT
|
|
202
214
|
|
|
203
215
|
return formatter.create_card_html(
|
|
@@ -207,9 +219,11 @@ class Summarizer:
|
|
|
207
219
|
[expected_actual_outcome_chart, predictive_accuracy_table],
|
|
208
220
|
)
|
|
209
221
|
|
|
210
|
-
def _predictive_accuracy_table_spec(
|
|
222
|
+
def _predictive_accuracy_table_spec(
|
|
223
|
+
self, use_kpi: bool, **kwargs
|
|
224
|
+
) -> formatter.TableSpec:
|
|
211
225
|
"""Creates the HTML snippet for the predictive accuracy table."""
|
|
212
|
-
outcome = self._kpi_or_revenue()
|
|
226
|
+
outcome = self._kpi_or_revenue(use_kpi)
|
|
213
227
|
model_diag = self._model_diagnostics
|
|
214
228
|
table = model_diag.predictive_accuracy_table(column_var=c.METRIC, **kwargs)
|
|
215
229
|
|
|
@@ -270,9 +284,10 @@ class Summarizer:
|
|
|
270
284
|
template_env: jinja2.Environment,
|
|
271
285
|
media_summary: visualizer.MediaSummary,
|
|
272
286
|
selected_times: Sequence[str] | None,
|
|
287
|
+
use_kpi: bool,
|
|
273
288
|
) -> str:
|
|
274
289
|
"""Creates the HTML snippet for the Outcome Contrib card."""
|
|
275
|
-
outcome = self._kpi_or_revenue()
|
|
290
|
+
outcome = self._kpi_or_revenue(use_kpi)
|
|
276
291
|
|
|
277
292
|
num_selected_times = (
|
|
278
293
|
self._meridian.n_times
|
|
@@ -442,9 +457,10 @@ class Summarizer:
|
|
|
442
457
|
media_summary: visualizer.MediaSummary,
|
|
443
458
|
media_effects: visualizer.MediaEffects,
|
|
444
459
|
reach_frequency: visualizer.ReachAndFrequency | None,
|
|
460
|
+
use_kpi: bool,
|
|
445
461
|
) -> str:
|
|
446
462
|
"""Creates the HTML snippet for the Optimal Analyst card."""
|
|
447
|
-
outcome = self._kpi_or_revenue()
|
|
463
|
+
outcome = self._kpi_or_revenue(use_kpi)
|
|
448
464
|
charts = []
|
|
449
465
|
charts.append(
|
|
450
466
|
formatter.ChartSpec(
|
|
@@ -457,6 +473,7 @@ class Summarizer:
|
|
|
457
473
|
selected_times=(
|
|
458
474
|
frozenset(selected_times) if selected_times else None
|
|
459
475
|
),
|
|
476
|
+
use_kpi=use_kpi,
|
|
460
477
|
plot_separately=False,
|
|
461
478
|
include_ci=False,
|
|
462
479
|
num_channels_displayed=7,
|
|
@@ -524,9 +541,7 @@ class Summarizer:
|
|
|
524
541
|
rf_channel=most_spend_rf_channel
|
|
525
542
|
).optimal_frequency
|
|
526
543
|
|
|
527
|
-
def _kpi_or_revenue(self) -> str:
|
|
528
|
-
if self._meridian.input_data.revenue_per_kpi is
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
outcome_str = c.KPI.upper()
|
|
532
|
-
return outcome_str
|
|
544
|
+
def _kpi_or_revenue(self, use_kpi: bool) -> str:
|
|
545
|
+
if use_kpi or self._meridian.input_data.revenue_per_kpi is None:
|
|
546
|
+
return c.KPI.upper()
|
|
547
|
+
return c.REVENUE
|