google-meridian 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {google_meridian-1.3.2.dist-info → google_meridian-1.5.0.dist-info}/METADATA +18 -11
  2. google_meridian-1.5.0.dist-info/RECORD +112 -0
  3. {google_meridian-1.3.2.dist-info → google_meridian-1.5.0.dist-info}/WHEEL +1 -1
  4. {google_meridian-1.3.2.dist-info → google_meridian-1.5.0.dist-info}/top_level.txt +1 -0
  5. meridian/analysis/analyzer.py +558 -398
  6. meridian/analysis/optimizer.py +90 -68
  7. meridian/analysis/review/reviewer.py +4 -1
  8. meridian/analysis/summarizer.py +13 -3
  9. meridian/analysis/test_utils.py +2911 -2102
  10. meridian/analysis/visualizer.py +37 -14
  11. meridian/backend/__init__.py +106 -0
  12. meridian/constants.py +2 -0
  13. meridian/data/input_data.py +30 -52
  14. meridian/data/input_data_builder.py +2 -9
  15. meridian/data/test_utils.py +107 -51
  16. meridian/data/validator.py +48 -0
  17. meridian/mlflow/autolog.py +19 -9
  18. meridian/model/__init__.py +2 -0
  19. meridian/model/adstock_hill.py +3 -5
  20. meridian/model/context.py +1059 -0
  21. meridian/model/eda/constants.py +335 -4
  22. meridian/model/eda/eda_engine.py +723 -312
  23. meridian/model/eda/eda_outcome.py +177 -33
  24. meridian/model/equations.py +418 -0
  25. meridian/model/knots.py +58 -47
  26. meridian/model/model.py +228 -878
  27. meridian/model/model_test_data.py +38 -0
  28. meridian/model/posterior_sampler.py +103 -62
  29. meridian/model/prior_sampler.py +114 -94
  30. meridian/model/spec.py +23 -14
  31. meridian/templates/card.html.jinja +9 -7
  32. meridian/templates/chart.html.jinja +1 -6
  33. meridian/templates/finding.html.jinja +19 -0
  34. meridian/templates/findings.html.jinja +33 -0
  35. meridian/templates/formatter.py +41 -5
  36. meridian/templates/formatter_test.py +127 -0
  37. meridian/templates/style.css +66 -9
  38. meridian/templates/style.scss +85 -4
  39. meridian/templates/table.html.jinja +1 -0
  40. meridian/version.py +1 -1
  41. scenarioplanner/__init__.py +42 -0
  42. scenarioplanner/converters/__init__.py +25 -0
  43. scenarioplanner/converters/dataframe/__init__.py +28 -0
  44. scenarioplanner/converters/dataframe/budget_opt_converters.py +383 -0
  45. scenarioplanner/converters/dataframe/common.py +71 -0
  46. scenarioplanner/converters/dataframe/constants.py +137 -0
  47. scenarioplanner/converters/dataframe/converter.py +42 -0
  48. scenarioplanner/converters/dataframe/dataframe_model_converter.py +70 -0
  49. scenarioplanner/converters/dataframe/marketing_analyses_converters.py +543 -0
  50. scenarioplanner/converters/dataframe/rf_opt_converters.py +314 -0
  51. scenarioplanner/converters/mmm.py +743 -0
  52. scenarioplanner/converters/mmm_converter.py +58 -0
  53. scenarioplanner/converters/sheets.py +156 -0
  54. scenarioplanner/converters/test_data.py +714 -0
  55. scenarioplanner/linkingapi/__init__.py +47 -0
  56. scenarioplanner/linkingapi/constants.py +27 -0
  57. scenarioplanner/linkingapi/url_generator.py +131 -0
  58. scenarioplanner/mmm_ui_proto_generator.py +355 -0
  59. schema/__init__.py +5 -2
  60. schema/mmm_proto_generator.py +71 -0
  61. schema/model_consumer.py +133 -0
  62. schema/processors/__init__.py +77 -0
  63. schema/processors/budget_optimization_processor.py +832 -0
  64. schema/processors/common.py +64 -0
  65. schema/processors/marketing_processor.py +1137 -0
  66. schema/processors/model_fit_processor.py +367 -0
  67. schema/processors/model_kernel_processor.py +117 -0
  68. schema/processors/model_processor.py +415 -0
  69. schema/processors/reach_frequency_optimization_processor.py +584 -0
  70. schema/serde/distribution.py +12 -7
  71. schema/serde/hyperparameters.py +54 -107
  72. schema/serde/meridian_serde.py +6 -1
  73. schema/test_data.py +380 -0
  74. schema/utils/__init__.py +2 -0
  75. schema/utils/date_range_bucketing.py +117 -0
  76. schema/utils/proto_enum_converter.py +127 -0
  77. google_meridian-1.3.2.dist-info/RECORD +0 -76
  78. {google_meridian-1.3.2.dist-info → google_meridian-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1137 @@
1
+ # Copyright 2025 The Meridian Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Meridian module for analyzing marketing data in a Meridian model.
16
+
17
+ This module provides a `MarketingProcessor`, designed to extract key marketing
18
+ insights from a trained Meridian model. It allows users to understand the impact
19
+ of different marketing channels, calculate return on investment (ROI), and
20
+ generate response curves.
21
+
22
+ The processor uses specifications defined in `MarketingAnalysisSpec` to control
23
+ the analysis. Users can request:
24
+
25
+ 1. **Media Summary Metrics:** Aggregated performance metrics for each media
26
+ channel, including spend, contribution, ROI, and effectiveness.
27
+ 2. **Incremental Outcomes:** The additional KPI or revenue driven by marketing
28
+ activities, calculated by comparing against a baseline scenario (e.g., zero
29
+ spend).
30
+ 3. **Response Curves:** Visualizations of how the predicted KPI or revenue
31
+ changes as spend on a particular channel increases, helping to identify
32
+ diminishing returns.
33
+
34
+ The results are output as a `MarketingAnalysisList` protobuf message, containing
35
+ detailed breakdowns per channel and for the baseline.
36
+
37
+ Key Classes:
38
+
39
+ - `MediaSummarySpec`: Configures the calculation of summary metrics like ROI.
40
+ - `IncrementalOutcomeSpec`: Configures the calculation of incremental impact.
41
+ - `ResponseCurveSpec`: Configures response curve generation.
42
+ - `MarketingAnalysisSpec`: The main specification to combine the above,
43
+ define date ranges, and set confidence levels.
44
+ - `MarketingProcessor`: The processor class that executes the analysis based
45
+ on the provided specs.
46
+
47
+ Example Usage:
48
+
49
+ 1. **Get Media Summary Metrics for a specific period:**
50
+
51
+ ```python
52
+ from schema.processors import marketing_processor
53
+ import datetime
54
+
55
+ # Assuming 'trained_model' is a loaded Meridian model object
56
+
57
+ spec = marketing_processor.MarketingAnalysisSpec(
58
+ analysis_name="q1_summary",
59
+ start_date=datetime.date(2023, 1, 1),
60
+ end_date=datetime.date(2023, 3, 31),
61
+ media_summary_spec=marketing_processor.MediaSummarySpec(
62
+ aggregate_times=True
63
+ ),
64
+ response_curve_spec=marketing_processor.ResponseCurveSpec(),
65
+ confidence_level=0.9,
66
+ )
67
+
68
+ processor = marketing_processor.MarketingProcessor(trained_model)
69
+ # `result` is a `marketing_analysis_pb2.MarketingAnalysisList` proto
70
+ result = processor.execute([spec])
71
+ ```
72
+
73
+ 2. **Calculate Incremental Outcome with new spend data:**
74
+
75
+ ```python
76
+ from schema.processors import marketing_processor
77
+ from meridian.analysis import analyzer
78
+ import datetime
79
+ import numpy as np
80
+
81
+ # Assuming 'trained_model' is a loaded Meridian model object
82
+ # Assuming 'new_media_spend' is a numpy array with shape (time, channels)
83
+
84
+ # Create DataTensors for the new data
85
+ # Example:
86
+ # new_data = analyzer.DataTensors(
87
+ # media=new_media_spend,
88
+ # time=new_time_index,
89
+ # )
90
+
91
+ spec = marketing_processor.MarketingAnalysisSpec(
92
+ analysis_name="what_if_scenario",
93
+ # NOTE: Dates must align with `new_data.time`
94
+ start_date=datetime.date(2023, 1, 1),
95
+ end_date=datetime.date(2023, 1, 31),
96
+ incremental_outcome_spec=marketing_processor.IncrementalOutcomeSpec(
97
+ new_data=new_data,
98
+ aggregate_times=True,
99
+ ),
100
+ )
101
+
102
+ processor = marketing_processor.MarketingProcessor(trained_model)
103
+ result = processor.execute([spec])
104
+
105
+ print(f"Incremental Outcome for {spec.analysis_name}:")
106
+ # Process results from result.marketing_analyses
107
+ ```
108
+
109
+ Note: You can provide the processor with multiple specs. This would result in
110
+ multiple marketing analysis results in the output.
111
+ """
112
+
113
+ from collections.abc import Sequence
114
+ import dataclasses
115
+ import datetime
116
+ import functools
117
+ import warnings
118
+
119
+ from meridian import constants
120
+ from meridian.analysis import analyzer
121
+ from meridian.data import time_coordinates
122
+ from mmm.v1 import mmm_pb2
123
+ from mmm.v1.common import date_interval_pb2
124
+ from mmm.v1.common import kpi_type_pb2
125
+ from mmm.v1.marketing.analysis import marketing_analysis_pb2
126
+ from mmm.v1.marketing.analysis import media_analysis_pb2
127
+ from mmm.v1.marketing.analysis import non_media_analysis_pb2
128
+ from mmm.v1.marketing.analysis import outcome_pb2
129
+ from mmm.v1.marketing.analysis import response_curve_pb2
130
+ from schema.processors import common
131
+ from schema.processors import model_processor
132
+ import numpy as np
133
+ import xarray as xr
134
+
135
+ __all__ = [
136
+ "MediaSummarySpec",
137
+ "IncrementalOutcomeSpec",
138
+ "ResponseCurveSpec",
139
+ "MarketingAnalysisSpec",
140
+ "MarketingProcessor",
141
+ ]
142
+
143
+
144
+ @dataclasses.dataclass(frozen=True, kw_only=True)
145
+ class MediaSummarySpec(model_processor.Spec):
146
+ """Stores parameters needed for creating media summary metrics.
147
+
148
+ Attributes:
149
+ aggregate_times: Boolean. If `True`, the media summary metrics are
150
+ aggregated over time. Defaults to `True`.
151
+ marginal_roi_by_reach: Boolean. Marginal ROI (mROI) is defined as the return
152
+ on the next dollar spent. If this argument is `True`, the assumption is
153
+ that the next dollar spent only impacts reach, holding frequency constant.
154
+ If this argument is `False`, the assumption is that the next dollar spent
155
+ only impacts frequency, holding reach constant. Defaults to `True`.
156
+ include_non_paid_channels: Boolean. If `True`, the media summary metrics
157
+ include non-paid channels. Defaults to `False`.
158
+ new_data: Optional `DataTensors` container with optional tensors: `media`,
159
+ `reach`, `frequency`, `organic_media`, `organic_reach`,
160
+ `organic_frequency`, `non_media_treatments` and `revenue_per_kpi`. If
161
+ `None`, the metrics are calculated using the `InputData` provided to the
162
+ Meridian object. If `new_data` is provided, the metrics are calculated
163
+ using the new tensors in `new_data` and the original values of the
164
+ remaining tensors.
165
+ media_selected_times: Optional list containing booleans with length equal to
166
+ the number of time periods in `new_data`, if provided. If `new_data` is
167
+ provided, `media_selected_times` can select any subset of time periods in
168
+ `new_data`. If `new_data` is not provided, `media_selected_times` selects
169
+ from model's original media data.
170
+ """
171
+
172
+ aggregate_times: bool = True
173
+ marginal_roi_by_reach: bool = True
174
+ include_non_paid_channels: bool = False
175
+ new_data: analyzer.DataTensors | None = None
176
+ media_selected_times: Sequence[bool] | None = None
177
+
178
+ def validate(self):
179
+ pass
180
+
181
+
182
+ @dataclasses.dataclass(frozen=True, kw_only=True)
183
+ class IncrementalOutcomeSpec(model_processor.Spec):
184
+ """Stores parameters needed for processing a model into `MarketingAnalysis`s.
185
+
186
+ Attributes:
187
+ aggregate_times: Boolean. If `True`, the media summary metrics are
188
+ aggregated over time. Defaults to `True`.
189
+ new_data: Optional `DataTensors` container with optional tensors: `media`,
190
+ `reach`, `frequency`, `organic_media`, `organic_reach`,
191
+ `organic_frequency`, `non_media_treatments` and `revenue_per_kpi`. If
192
+ `None`, the incremental outcome is calculated using the `InputData`
193
+ provided to the Meridian object. If `new_data` is provided, the
194
+ incremental outcome is calculated using the new tensors in `new_data` and
195
+ the original values of the remaining tensors. For example,
196
+ `incremental_outcome(new_data=DataTensors(media=new_media)` computes the
197
+ incremental outcome using `new_media` and the original values of `reach`,
198
+ `frequency`, `organic_media`, `organic_reach`, `organic_frequency`,
199
+ `non_media_treatments` and `revenue_per_kpi`. If any of the tensors in
200
+ `new_data` is provided with a different number of time periods than in
201
+ `InputData`, then all tensors must be provided with the same number of
202
+ time periods.
203
+ media_selected_times: Optional list containing booleans with length equal to
204
+ the number of time periods in `new_data`, if provided. If `new_data` is
205
+ provided, `media_selected_times` can select any subset of time periods in
206
+ `new_data`. If `new_data` is not provided, `media_selected_times` selects
207
+ from model's original media data and its length must be equal to the
208
+ number of time periods in the model's original media data.
209
+ include_non_paid_channels: Boolean. If `True`, the incremental outcome
210
+ includes non-paid channels. Defaults to `False`.
211
+ """
212
+
213
+ aggregate_times: bool = True
214
+ new_data: analyzer.DataTensors | None = None
215
+ media_selected_times: Sequence[bool] | None = None
216
+ include_non_paid_channels: bool = False
217
+
218
+ def validate(self):
219
+ super().validate()
220
+ if (self.new_data is not None) and (self.new_data.time is None):
221
+ raise ValueError("`time` must be provided in `new_data`.")
222
+
223
+
224
+ @dataclasses.dataclass(frozen=True)
225
+ class ResponseCurveSpec(model_processor.Spec):
226
+ """Stores parameters needed for creating response curves.
227
+
228
+ Attributes:
229
+ by_reach: Boolean. For channels with reach and frequency. If `True`, plots
230
+ the response curve by reach. If `False`, plots the response curve by
231
+ frequency.
232
+ """
233
+
234
+ by_reach: bool = True
235
+
236
+ def validate(self):
237
+ pass
238
+
239
+
240
+ @dataclasses.dataclass(frozen=True, kw_only=True)
241
+ class MarketingAnalysisSpec(model_processor.DatedSpec):
242
+ """Stores parameters needed for processing a model into `MarketingAnalysis`s.
243
+
244
+ Either `media_summary_spec` or `incremental_outcome_spec` must be provided,
245
+ but not both.
246
+
247
+ Attributes:
248
+ media_summary_spec: Parameters for creating media summary metrics. Mutually
249
+ exclusive with `incremental_outcome_spec`.
250
+ incremental_outcome_spec: Parameters for creating incremental outcome.
251
+ Mutually exclusive with `media_summary_spec`. If `new_data` is provided,
252
+ then the start and end dates of this `MarketingAnalysisSpec` must be
253
+ within the `new_data.time`.
254
+ response_curve_spec: Parameters for creating response curves. Response
255
+ curves are only computed for specs that aggregate times and have a
256
+ `media_summary_spec` selected.
257
+ confidence_level: Confidence level for credible intervals, represented as a
258
+ value between zero and one. Defaults to 0.9.
259
+ """
260
+
261
+ media_summary_spec: MediaSummarySpec | None = None
262
+ incremental_outcome_spec: IncrementalOutcomeSpec | None = None
263
+ response_curve_spec: ResponseCurveSpec = dataclasses.field(
264
+ default_factory=ResponseCurveSpec
265
+ )
266
+ confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL
267
+
268
+ def validate(self):
269
+ super().validate()
270
+ if self.confidence_level <= 0 or self.confidence_level >= 1:
271
+ raise ValueError(
272
+ "Confidence level must be greater than 0 and less than 1."
273
+ )
274
+ if (
275
+ self.media_summary_spec is None
276
+ and self.incremental_outcome_spec is None
277
+ ):
278
+ raise ValueError(
279
+ "At least one of `media_summary_spec` or `incremental_outcome_spec`"
280
+ " must be provided."
281
+ )
282
+ if (
283
+ self.media_summary_spec is not None
284
+ and self.incremental_outcome_spec is not None
285
+ ):
286
+ raise ValueError(
287
+ "Only one of `media_summary_spec` or `incremental_outcome_spec` can"
288
+ " be provided."
289
+ )
290
+
291
+
292
+ class MarketingProcessor(
293
+ model_processor.ModelProcessor[
294
+ MarketingAnalysisSpec, marketing_analysis_pb2.MarketingAnalysisList
295
+ ]
296
+ ):
297
+ """Generates `MarketingAnalysis` protos for a given trained Meridian model.
298
+
299
+ A `MarketingAnalysis` proto is generated for each spec supplied to
300
+ `execute()`. Within each `MarketingAnalysis` proto, a `MediaAnalysis` proto
301
+ is created for each channel in the model. One `NonMediaAnalysis` proto is also
302
+ created for the model's baseline data.
303
+ """
304
+
305
+ def __init__(
306
+ self,
307
+ trained_model: model_processor.ModelType,
308
+ ):
309
+ trained_model = model_processor.ensure_trained_model(trained_model)
310
+ self._analyzer = trained_model.internal_analyzer
311
+ self._meridian = trained_model.mmm
312
+ self._model_time_coordinates = trained_model.time_coordinates
313
+ self._interval_length = self._model_time_coordinates.interval_days
314
+
315
+ # If the input data KPI type is "revenue", then the `revenue_per_kpi` tensor
316
+ # must exist, and general-KPI type outcomes should not be defined.
317
+ self._revenue_kpi_type = (
318
+ trained_model.mmm.input_data.kpi_type == constants.REVENUE
319
+ )
320
+ # `_kpi_only` is TRUE iff the input data KPI type is "non-revenue" AND the
321
+ # `revenue_per_kpi` tensor is None.
322
+ self._kpi_only = trained_model.mmm.input_data.revenue_per_kpi is None
323
+
324
+ @classmethod
325
+ def spec_type(cls) -> type[MarketingAnalysisSpec]:
326
+ return MarketingAnalysisSpec
327
+
328
+ @classmethod
329
+ def output_type(cls) -> type[marketing_analysis_pb2.MarketingAnalysisList]:
330
+ return marketing_analysis_pb2.MarketingAnalysisList
331
+
332
+ def _set_output(
333
+ self,
334
+ output: mmm_pb2.Mmm,
335
+ result: marketing_analysis_pb2.MarketingAnalysisList,
336
+ ):
337
+ output.marketing_analysis_list.CopyFrom(result)
338
+
339
+ def execute(
340
+ self, marketing_analysis_specs: Sequence[MarketingAnalysisSpec]
341
+ ) -> marketing_analysis_pb2.MarketingAnalysisList:
342
+ """Runs a marketing analysis on the model based on the given specs.
343
+
344
+ A `MarketingAnalysis` proto is created for each of the given specs. Each
345
+ `MarketingAnalysis` proto contains a list of `MediaAnalysis` protos and a
346
+ singleton `NonMediaAnalysis` proto for the baseline analysis. The analysis
347
+ covers the time period bounded by the spec's start and end dates.
348
+
349
+ The singleton non-media analysis is performed on the model's baseline data,
350
+ and contains metrics such as incremental outcome and baseline percent of
351
+ contribution across media and non-media.
352
+
353
+ A media analysis is performed for each channel in the model, plus an
354
+ "All Channels" synthetic channel. The media analysis contains metrics such
355
+ as spend, percent of spend, incremental outcome, percent of contribution,
356
+ and effectiveness. Depending on the type of data (revenue-based or
357
+ non-revenue-based) in the model, the analysis also contains CPIK
358
+ (non-revenue-based) or ROI and MROI (revenue-based).
359
+
360
+ Args:
361
+ marketing_analysis_specs: A sequence of MarketingAnalysisSpec objects.
362
+
363
+ Returns:
364
+ A MarketingAnalysisList proto containing the results of the marketing
365
+ analysis for each spec.
366
+ """
367
+ marketing_analysis_list: list[marketing_analysis_pb2.MarketingAnalysis] = []
368
+
369
+ for spec in marketing_analysis_specs:
370
+ if spec.incremental_outcome_spec is not None:
371
+ new_data = spec.incremental_outcome_spec.new_data
372
+ elif spec.media_summary_spec is not None:
373
+ new_data = spec.media_summary_spec.new_data
374
+ else:
375
+ new_data = None
376
+
377
+ if new_data is not None and new_data.time is not None:
378
+ new_time_coords = time_coordinates.TimeCoordinates.from_dates(
379
+ np.asarray(new_data.time).astype(str).tolist()
380
+ )
381
+ resolver = spec.resolver(new_time_coords)
382
+ else:
383
+ resolver = spec.resolver(self._model_time_coordinates)
384
+ media_summary_marketing_analyses = (
385
+ self._generate_marketing_analyses_for_media_summary_spec(
386
+ spec, resolver
387
+ )
388
+ )
389
+ incremental_outcome_marketing_analyses = (
390
+ self._generate_marketing_analyses_for_incremental_outcome_spec(
391
+ spec, resolver
392
+ )
393
+ )
394
+ marketing_analysis_list.extend(
395
+ media_summary_marketing_analyses
396
+ + incremental_outcome_marketing_analyses
397
+ )
398
+
399
+ return marketing_analysis_pb2.MarketingAnalysisList(
400
+ marketing_analyses=marketing_analysis_list
401
+ )
402
+
403
+ def _generate_marketing_analyses_for_media_summary_spec(
404
+ self,
405
+ marketing_analysis_spec: MarketingAnalysisSpec,
406
+ resolver: model_processor.DatedSpecResolver,
407
+ ) -> list[marketing_analysis_pb2.MarketingAnalysis]:
408
+ """Creates a list of MarketingAnalysis protos based on the given spec.
409
+
410
+ If spec's `aggregate_times` is True, then only one MarketingAnalysis proto
411
+ is created. Otherwise, one MarketingAnalysis proto is created for each date
412
+ interval in the spec.
413
+
414
+ Args:
415
+ marketing_analysis_spec: An instance of MarketingAnalysisSpec.
416
+ resolver: A DatedSpecResolver instance.
417
+
418
+ Returns:
419
+ A list of `MarketingAnalysis` protos containing the results of the
420
+ marketing analysis for the given spec.
421
+ """
422
+ media_summary_spec = marketing_analysis_spec.media_summary_spec
423
+ if media_summary_spec is None:
424
+ return []
425
+
426
+ selected_times = resolver.resolve_to_enumerated_selected_times()
427
+ # This contains either a revenue-based KPI or a non-revenue KPI analysis.
428
+ media_summary_metrics, non_media_summary_metrics = (
429
+ self._generate_media_and_non_media_summary_metrics(
430
+ media_summary_spec,
431
+ selected_times,
432
+ marketing_analysis_spec.confidence_level,
433
+ self._kpi_only,
434
+ )
435
+ )
436
+
437
+ secondary_non_revenue_kpi_metrics = None
438
+ secondary_non_revenue_kpi_non_media_metrics = None
439
+ # If the input data KPI type is "non-revenue", and we calculated its
440
+ # revenue-based KPI outcomes above, then we should also compute its
441
+ # non-revenue KPI outcomes.
442
+ if not self._revenue_kpi_type and not self._kpi_only:
443
+ (
444
+ secondary_non_revenue_kpi_metrics,
445
+ secondary_non_revenue_kpi_non_media_metrics,
446
+ ) = self._generate_media_and_non_media_summary_metrics(
447
+ media_summary_spec,
448
+ selected_times,
449
+ marketing_analysis_spec.confidence_level,
450
+ use_kpi=True,
451
+ )
452
+
453
+ # Note: baseline_summary_metrics() prefers computing revenue (scaled from
454
+ # generic KPI with `revenue_per_kpi` when defined) baseline outcome here.
455
+ # TODO: Baseline outcomes for both revenue and non-revenue
456
+ # KPI types should be computed, when possible.
457
+ baseline_outcome = self._analyzer.baseline_summary_metrics(
458
+ confidence_level=marketing_analysis_spec.confidence_level,
459
+ aggregate_times=media_summary_spec.aggregate_times,
460
+ selected_times=selected_times,
461
+ ).sel(distribution=constants.POSTERIOR)
462
+
463
+ # Response curves are only computed for specs that aggregate times.
464
+ if media_summary_spec.aggregate_times:
465
+ response_curve_spec = marketing_analysis_spec.response_curve_spec
466
+ response_curves = self._analyzer.response_curves(
467
+ confidence_level=marketing_analysis_spec.confidence_level,
468
+ use_posterior=True,
469
+ selected_times=selected_times,
470
+ use_kpi=self._kpi_only,
471
+ by_reach=response_curve_spec.by_reach,
472
+ )
473
+ else:
474
+ response_curves = None
475
+ warnings.warn(
476
+ "Response curves are not computed for non-aggregated time periods."
477
+ )
478
+
479
+ date_intervals = self._build_time_intervals(
480
+ aggregate_times=media_summary_spec.aggregate_times,
481
+ resolver=resolver,
482
+ )
483
+
484
+ return self._marketing_metrics_to_protos(
485
+ media_summary_metrics,
486
+ non_media_summary_metrics,
487
+ baseline_outcome,
488
+ secondary_non_revenue_kpi_metrics,
489
+ secondary_non_revenue_kpi_non_media_metrics,
490
+ response_curves,
491
+ marketing_analysis_spec,
492
+ date_intervals,
493
+ )
494
+
495
+ def _generate_media_and_non_media_summary_metrics(
496
+ self,
497
+ media_summary_spec: MediaSummarySpec,
498
+ selected_times: list[str] | None,
499
+ confidence_level: float,
500
+ use_kpi: bool,
501
+ ) -> tuple[xr.Dataset | None, xr.Dataset | None]:
502
+ if media_summary_spec is None:
503
+ return (None, None)
504
+ compute_media_summary_metrics = functools.partial(
505
+ self._analyzer.summary_metrics,
506
+ marginal_roi_by_reach=media_summary_spec.marginal_roi_by_reach,
507
+ selected_times=selected_times,
508
+ aggregate_geos=True,
509
+ aggregate_times=media_summary_spec.aggregate_times,
510
+ new_data=media_summary_spec.new_data,
511
+ confidence_level=confidence_level,
512
+ )
513
+
514
+ media_summary_metrics = compute_media_summary_metrics(
515
+ use_kpi=use_kpi,
516
+ include_non_paid_channels=False,
517
+ ).sel(distribution=constants.POSTERIOR)
518
+ # TODO:Produce one metrics for both paid and non-paid channels.
519
+ non_media_summary_metrics = None
520
+ if media_summary_spec.include_non_paid_channels:
521
+ media_summary_metrics = media_summary_metrics.drop_sel(
522
+ channel=constants.ALL_CHANNELS
523
+ )
524
+ non_media_summary_metrics = (
525
+ compute_media_summary_metrics(
526
+ use_kpi=use_kpi,
527
+ include_non_paid_channels=True,
528
+ )
529
+ .sel(distribution=constants.POSTERIOR)
530
+ .drop_sel(
531
+ channel=media_summary_metrics.coords[constants.CHANNEL].data
532
+ )
533
+ )
534
+ return media_summary_metrics, non_media_summary_metrics
535
+
536
+ def _generate_marketing_analyses_for_incremental_outcome_spec(
537
+ self,
538
+ marketing_analysis_spec: MarketingAnalysisSpec,
539
+ resolver: model_processor.DatedSpecResolver,
540
+ ) -> list[marketing_analysis_pb2.MarketingAnalysis]:
541
+ """Creates a list of `MarketingAnalysis` protos based on the given spec.
542
+
543
+ If the spec's `aggregate_times` is True, then only one `MarketingAnalysis`
544
+ proto is created. Otherwise, one `MarketingAnalysis` proto is created for
545
+ each date interval in the spec.
546
+
547
+ Args:
548
+ marketing_analysis_spec: An instance of MarketingAnalysisSpec.
549
+ resolver: A DatedSpecResolver instance.
550
+
551
+ Returns:
552
+ A list of `MarketingAnalysis` protos containing the results of the
553
+ marketing analysis for the given spec.
554
+ """
555
+ incremental_outcome_spec = marketing_analysis_spec.incremental_outcome_spec
556
+ if incremental_outcome_spec is None:
557
+ return []
558
+
559
+ compute_incremental_outcome = functools.partial(
560
+ self._incremental_outcome_dataset,
561
+ resolver=resolver,
562
+ new_data=incremental_outcome_spec.new_data,
563
+ media_selected_times=incremental_outcome_spec.media_selected_times,
564
+ aggregate_geos=True,
565
+ aggregate_times=incremental_outcome_spec.aggregate_times,
566
+ confidence_level=marketing_analysis_spec.confidence_level,
567
+ include_non_paid_channels=False,
568
+ )
569
+ # This contains either a revenue-based KPI or a non-revenue KPI analysis.
570
+ incremental_outcome = compute_incremental_outcome(use_kpi=self._kpi_only)
571
+
572
+ secondary_non_revenue_kpi_metrics = None
573
+ # If the input data KPI type is "non-revenue", and we calculated its
574
+ # revenue-based KPI outcomes above, then we should also compute its
575
+ # non-revenue KPI outcomes.
576
+ if not self._revenue_kpi_type and not self._kpi_only:
577
+ secondary_non_revenue_kpi_metrics = compute_incremental_outcome(
578
+ use_kpi=True
579
+ )
580
+
581
+ date_intervals = self._build_time_intervals(
582
+ aggregate_times=incremental_outcome_spec.aggregate_times,
583
+ resolver=resolver,
584
+ )
585
+
586
+ return self._marketing_metrics_to_protos(
587
+ metrics=incremental_outcome,
588
+ non_media_metrics=None,
589
+ baseline_outcome=None,
590
+ secondary_non_revenue_kpi_metrics=secondary_non_revenue_kpi_metrics,
591
+ secondary_non_revenue_kpi_non_media_metrics=None,
592
+ response_curves=None,
593
+ marketing_analysis_spec=marketing_analysis_spec,
594
+ date_intervals=date_intervals,
595
+ )
596
+
597
+ def _build_time_intervals(
598
+ self,
599
+ aggregate_times: bool,
600
+ resolver: model_processor.DatedSpecResolver,
601
+ ) -> list[date_interval_pb2.DateInterval]:
602
+ """Creates a list of `DateInterval` protos for the given spec.
603
+
604
+ Args:
605
+ aggregate_times: Whether to aggregate times.
606
+ resolver: A DatedSpecResolver instance.
607
+
608
+ Returns:
609
+ A list of `DateInterval` protos for the given spec.
610
+ """
611
+ if aggregate_times:
612
+ date_interval = resolver.collapse_to_date_interval_proto()
613
+ # This means metrics are aggregated over time, only one date interval is
614
+ # needed.
615
+ return [date_interval]
616
+
617
+ # This list will contain all date intervals for the given spec. All dates
618
+ # in this list will share a common tag.
619
+ return resolver.transform_to_date_interval_protos()
620
+
621
+ def _marketing_metrics_to_protos(
622
+ self,
623
+ metrics: xr.Dataset,
624
+ non_media_metrics: xr.Dataset | None,
625
+ baseline_outcome: xr.Dataset | None,
626
+ secondary_non_revenue_kpi_metrics: xr.Dataset | None,
627
+ secondary_non_revenue_kpi_non_media_metrics: xr.Dataset | None,
628
+ response_curves: xr.Dataset | None,
629
+ marketing_analysis_spec: MarketingAnalysisSpec,
630
+ date_intervals: Sequence[date_interval_pb2.DateInterval],
631
+ ) -> list[marketing_analysis_pb2.MarketingAnalysis]:
632
+ """Creates a list of MarketingAnalysis protos from datasets."""
633
+ if metrics is None:
634
+ raise ValueError("metrics is None")
635
+
636
+ media_channels = list(metrics.coords[constants.CHANNEL].data)
637
+ non_media_channels = (
638
+ list(non_media_metrics.coords[constants.CHANNEL].data)
639
+ if non_media_metrics
640
+ else []
641
+ )
642
+ channels = media_channels + non_media_channels
643
+ channels_with_response_curve = (
644
+ response_curves.coords[constants.CHANNEL].data
645
+ if response_curves
646
+ else []
647
+ )
648
+ marketing_analyses = []
649
+ for date_interval in date_intervals:
650
+ start_date = date_interval.start_date
651
+ start_date_str = datetime.date(
652
+ start_date.year, start_date.month, start_date.day
653
+ ).strftime(constants.DATE_FORMAT)
654
+ media_analyses: list[media_analysis_pb2.MediaAnalysis] = []
655
+ non_media_analyses: list[non_media_analysis_pb2.NonMediaAnalysis] = []
656
+
657
+ # For all channels reported in the media summary metrics
658
+ for channel_name in channels:
659
+ channel_response_curve = None
660
+ if response_curves and (channel_name in channels_with_response_curve):
661
+ channel_response_curve = response_curves.sel(
662
+ {constants.CHANNEL: channel_name}
663
+ )
664
+ is_media_channel = channel_name in media_channels
665
+
666
+ channel_analysis = self._get_channel_metrics(
667
+ marketing_analysis_spec,
668
+ channel_name,
669
+ start_date_str,
670
+ metrics if is_media_channel else non_media_metrics,
671
+ secondary_non_revenue_kpi_metrics
672
+ if is_media_channel
673
+ else secondary_non_revenue_kpi_non_media_metrics,
674
+ channel_response_curve,
675
+ is_media_channel,
676
+ )
677
+ if isinstance(channel_analysis, media_analysis_pb2.MediaAnalysis):
678
+ media_analyses.append(channel_analysis)
679
+
680
+ if isinstance(
681
+ channel_analysis, non_media_analysis_pb2.NonMediaAnalysis
682
+ ):
683
+ non_media_analyses.append(channel_analysis)
684
+
685
+ marketing_analysis = marketing_analysis_pb2.MarketingAnalysis(
686
+ date_interval=date_interval,
687
+ media_analyses=media_analyses,
688
+ non_media_analyses=non_media_analyses,
689
+ )
690
+ if baseline_outcome is not None:
691
+ baseline_analysis = self._get_baseline_metrics(
692
+ marketing_analysis_spec=marketing_analysis_spec,
693
+ baseline_outcome=baseline_outcome,
694
+ start_date=start_date_str,
695
+ )
696
+ marketing_analysis.non_media_analyses.append(baseline_analysis)
697
+
698
+ marketing_analyses.append(marketing_analysis)
699
+
700
+ return marketing_analyses
701
+
702
+ def _get_channel_metrics(
703
+ self,
704
+ marketing_analysis_spec: MarketingAnalysisSpec,
705
+ channel_name: str,
706
+ start_date_str: str,
707
+ metrics: xr.Dataset,
708
+ secondary_metrics: xr.Dataset | None,
709
+ channel_response_curves: xr.Dataset | None,
710
+ is_media_channel: bool,
711
+ ) -> (
712
+ media_analysis_pb2.MediaAnalysis | non_media_analysis_pb2.NonMediaAnalysis
713
+ ):
714
+ """Returns a MediaAnalysis proto for the given channel."""
715
+ if constants.TIME in metrics.coords:
716
+ sel = {
717
+ constants.CHANNEL: channel_name,
718
+ constants.TIME: start_date_str,
719
+ }
720
+ else:
721
+ sel = {constants.CHANNEL: channel_name}
722
+
723
+ channel_metrics = metrics.sel(sel)
724
+ if secondary_metrics is not None:
725
+ channel_secondary_metrics = secondary_metrics.sel(sel)
726
+ else:
727
+ channel_secondary_metrics = None
728
+
729
+ return self._channel_metrics_to_proto(
730
+ channel_metrics,
731
+ channel_secondary_metrics,
732
+ channel_response_curves,
733
+ channel_name,
734
+ is_media_channel,
735
+ marketing_analysis_spec.confidence_level,
736
+ )
737
+
738
+ def _channel_metrics_to_proto(
739
+ self,
740
+ channel_media_summary_metrics: xr.Dataset,
741
+ channel_secondary_non_revenue_metrics: xr.Dataset | None,
742
+ channel_response_curve: xr.Dataset | None,
743
+ channel_name: str,
744
+ is_media_channel: bool,
745
+ confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
746
+ ) -> (
747
+ media_analysis_pb2.MediaAnalysis | non_media_analysis_pb2.NonMediaAnalysis
748
+ ):
749
+ """Creates a MediaAnalysis proto for the given channel from datasets.
750
+
751
+ Args:
752
+ channel_media_summary_metrics: A dataset containing the model's media
753
+ summary metrics. This dataset is pre-filtered to `channel_name`. This
754
+ dataset contains revenue-based metrics if the model's input data is
755
+ revenue-based, or if `revenue_per_kpi` is defined. Otherwise, it
756
+ contains non-revenue generic KPI metrics.
757
+ channel_secondary_non_revenue_metrics: A dataset containing the model's
758
+ non-revenue-based media summary metrics. This is only defined iff the
759
+ input data is non-revenue type AND `revenue_per_kpi` is available. In
760
+ this case, `channel_media_summary_metrics` contains revenue-based
761
+ metrics computed from `KPI * revenue_per_kpi`, and this dataset contains
762
+ media summary metrics based on the model's generic KPI alone. In all
763
+ other cases, this is `None`.
764
+ channel_response_curve: A dataset containing the data needed to generate a
765
+ response curve. This dataset is pre-filtered to `channel_name`.
766
+ channel_name: The name of the channel to analyze.
767
+ is_media_channel: Whether the channel is a media channel.
768
+ confidence_level: Confidence level for credible intervals, represented as
769
+ a value between zero and one.
770
+
771
+ Returns:
772
+ A proto containing the media analysis results for the given channel.
773
+ """
774
+
775
+ spend_info = _compute_spend(channel_media_summary_metrics)
776
+ is_all_channels = channel_name == constants.ALL_CHANNELS
777
+
778
+ compute_outcome = functools.partial(
779
+ self._compute_outcome,
780
+ is_all_channels=is_all_channels,
781
+ confidence_level=confidence_level,
782
+ )
783
+
784
+ outcomes = [
785
+ compute_outcome(
786
+ channel_media_summary_metrics,
787
+ is_revenue_type=(not self._kpi_only),
788
+ )
789
+ ]
790
+ # If `channel_media_summary_metrics` represented non-revenue data with
791
+ # revenue-type outcome (i.e. `is_revenue_type_kpi` is defined), then we
792
+ # should also have been provided with media summary metrics for their
793
+ # generic KPI counterparts, as well.
794
+ if channel_secondary_non_revenue_metrics is not None:
795
+ outcomes.append(
796
+ compute_outcome(
797
+ channel_secondary_non_revenue_metrics,
798
+ is_revenue_type=False,
799
+ )
800
+ )
801
+
802
+ if not is_media_channel:
803
+ return non_media_analysis_pb2.NonMediaAnalysis(
804
+ non_media_name=channel_name,
805
+ non_media_outcomes=outcomes,
806
+ )
807
+
808
+ media_analysis = media_analysis_pb2.MediaAnalysis(
809
+ channel_name=channel_name,
810
+ media_outcomes=outcomes,
811
+ )
812
+
813
+ if spend_info is not None:
814
+ media_analysis.spend_info.CopyFrom(spend_info)
815
+
816
+ if channel_response_curve is not None:
817
+ media_analysis.response_curve.CopyFrom(
818
+ self._compute_response_curve(
819
+ channel_response_curve,
820
+ )
821
+ )
822
+
823
+ return media_analysis
824
+
825
+ def _get_baseline_metrics(
826
+ self,
827
+ marketing_analysis_spec: MarketingAnalysisSpec,
828
+ baseline_outcome: xr.Dataset,
829
+ start_date: str,
830
+ ) -> non_media_analysis_pb2.NonMediaAnalysis:
831
+ """Analyzes "baseline" pseudo-channel outcomes over the given time points.
832
+
833
+ Args:
834
+ marketing_analysis_spec: A user input parameter specs for this analysis.
835
+ baseline_outcome: A dataset containing the model's baseline summary
836
+ metrics.
837
+ start_date: The date of the analysis.
838
+
839
+ Returns:
840
+ A `NonMediaAnalysis` representing baseline analysis.
841
+ """
842
+ if constants.TIME in baseline_outcome.coords:
843
+ baseline_outcome = baseline_outcome.sel(
844
+ time=start_date,
845
+ )
846
+ incremental_outcome = baseline_outcome[constants.BASELINE_OUTCOME]
847
+ # Convert percentage to decimal.
848
+ contribution_share = baseline_outcome[constants.PCT_OF_CONTRIBUTION] / 100
849
+
850
+ contribution = outcome_pb2.Contribution(
851
+ value=common.to_estimate(
852
+ incremental_outcome, marketing_analysis_spec.confidence_level
853
+ ),
854
+ share=common.to_estimate(
855
+ contribution_share, marketing_analysis_spec.confidence_level
856
+ ),
857
+ )
858
+ baseline_analysis = non_media_analysis_pb2.NonMediaAnalysis(
859
+ non_media_name=constants.BASELINE,
860
+ )
861
+ baseline_outcome = outcome_pb2.Outcome(
862
+ contribution=contribution,
863
+ # Baseline outcome is always revenue-based, unless `revenue_per_kpi`
864
+ # is undefined.
865
+ # TODO: kpi_type here is synced with what is used inside
866
+ # `baseline_summary_metrics()`. Ideally, really, we should inject this
867
+ # value into that function rather than re-deriving it here.
868
+ kpi_type=(
869
+ kpi_type_pb2.KpiType.NON_REVENUE
870
+ if self._kpi_only
871
+ else kpi_type_pb2.KpiType.REVENUE
872
+ ),
873
+ )
874
+ baseline_analysis.non_media_outcomes.append(baseline_outcome)
875
+
876
+ return baseline_analysis
877
+
878
+ def _compute_outcome(
879
+ self,
880
+ media_summary_metrics: xr.Dataset,
881
+ is_revenue_type: bool,
882
+ is_all_channels: bool,
883
+ confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
884
+ ) -> outcome_pb2.Outcome:
885
+ """Returns an `Outcome` proto for the given channel's media analysis.
886
+
887
+ Args:
888
+ media_summary_metrics: A dataset containing the model's media summary
889
+ metrics.
890
+ is_revenue_type: Whether the media summary metrics above are revenue
891
+ based.
892
+ is_all_channels: If True, the given media summary represents the aggregate
893
+ "All Channels". Omit `effectiveness` and `mroi` in this case.
894
+ confidence_level: Confidence level for credible intervals, represented as
895
+ a value between zero and one.
896
+ """
897
+ data_vars = media_summary_metrics.data_vars
898
+
899
+ effectiveness = roi = mroi = cpik = None
900
+ if not is_all_channels and constants.EFFECTIVENESS in data_vars:
901
+ effectiveness = outcome_pb2.Effectiveness(
902
+ media_unit=constants.IMPRESSIONS,
903
+ value=common.to_estimate(
904
+ media_summary_metrics[constants.EFFECTIVENESS],
905
+ confidence_level,
906
+ ),
907
+ )
908
+ if not is_all_channels and constants.MROI in data_vars:
909
+ mroi = common.to_estimate(
910
+ media_summary_metrics[constants.MROI],
911
+ confidence_level,
912
+ )
913
+
914
+ contribution_value = media_summary_metrics[constants.INCREMENTAL_OUTCOME]
915
+ contribution = outcome_pb2.Contribution(
916
+ value=common.to_estimate(
917
+ contribution_value,
918
+ confidence_level,
919
+ ),
920
+ )
921
+ # Convert percentage to decimal.
922
+ if constants.PCT_OF_CONTRIBUTION in data_vars:
923
+ contribution_share = (
924
+ media_summary_metrics[constants.PCT_OF_CONTRIBUTION] / 100
925
+ )
926
+ contribution.share.CopyFrom(
927
+ common.to_estimate(
928
+ contribution_share,
929
+ confidence_level,
930
+ )
931
+ )
932
+
933
+ if constants.CPIK in data_vars:
934
+ cpik = common.to_estimate(
935
+ media_summary_metrics[constants.CPIK],
936
+ confidence_level,
937
+ metric=constants.MEDIAN,
938
+ )
939
+
940
+ if constants.ROI in data_vars:
941
+ roi = common.to_estimate(
942
+ media_summary_metrics[constants.ROI],
943
+ confidence_level,
944
+ )
945
+
946
+ return outcome_pb2.Outcome(
947
+ kpi_type=(
948
+ kpi_type_pb2.KpiType.REVENUE
949
+ if is_revenue_type
950
+ else kpi_type_pb2.KpiType.NON_REVENUE
951
+ ),
952
+ contribution=contribution,
953
+ effectiveness=effectiveness,
954
+ cost_per_contribution=cpik,
955
+ roi=roi,
956
+ marginal_roi=mroi,
957
+ )
958
+
959
+ def _compute_response_curve(
960
+ self,
961
+ response_curve_dataset: xr.Dataset,
962
+ ) -> response_curve_pb2.ResponseCurve:
963
+ """Returns a `ResponseCurve` proto for the given channel.
964
+
965
+ Args:
966
+ response_curve_dataset: A dataset containing the data needed to generate a
967
+ response curve.
968
+ """
969
+
970
+ spend_multiplier_list = response_curve_dataset.coords[
971
+ constants.SPEND_MULTIPLIER
972
+ ].data
973
+ response_points: list[response_curve_pb2.ResponsePoint] = []
974
+
975
+ for spend_multiplier in spend_multiplier_list:
976
+ spend = (
977
+ response_curve_dataset[constants.SPEND]
978
+ .sel(spend_multiplier=spend_multiplier)
979
+ .data.item()
980
+ )
981
+ incremental_outcome = (
982
+ response_curve_dataset[constants.INCREMENTAL_OUTCOME]
983
+ .sel(
984
+ spend_multiplier=spend_multiplier,
985
+ metric=constants.MEAN,
986
+ )
987
+ .data.item()
988
+ )
989
+
990
+ response_point = response_curve_pb2.ResponsePoint(
991
+ input_value=spend,
992
+ incremental_kpi=incremental_outcome,
993
+ )
994
+ response_points.append(response_point)
995
+
996
+ return response_curve_pb2.ResponseCurve(
997
+ input_name=constants.SPEND,
998
+ response_points=response_points,
999
+ )
1000
+
1001
+ # TODO: Create an abstraction/container around these inference
1002
+ # parameters.
1003
+ def _incremental_outcome_dataset(
1004
+ self,
1005
+ resolver: model_processor.DatedSpecResolver,
1006
+ new_data: analyzer.DataTensors | None = None,
1007
+ media_selected_times: Sequence[bool] | None = None,
1008
+ selected_geos: Sequence[str] | None = None,
1009
+ aggregate_geos: bool = True,
1010
+ aggregate_times: bool = True,
1011
+ use_kpi: bool = False,
1012
+ confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
1013
+ batch_size: int = constants.DEFAULT_BATCH_SIZE,
1014
+ include_non_paid_channels: bool = False,
1015
+ ) -> xr.Dataset:
1016
+ """Returns incremental outcome for each channel with dimensions.
1017
+
1018
+ Args:
1019
+ resolver: A `DatedSpecResolver` instance.
1020
+ new_data: A dataset containing the new data to use in the analysis.
1021
+ media_selected_times: A boolean array of length `n_times` indicating which
1022
+ time periods are media-active.
1023
+ selected_geos: Optional list containing a subset of geos to include. By
1024
+ default, all geos are included.
1025
+ aggregate_geos: Boolean. If `True`, the expected outcome is summed over
1026
+ all of the regions.
1027
+ aggregate_times: Boolean. If `True`, the expected outcome is summed over
1028
+ all of the time periods.
1029
+ use_kpi: Boolean. If `True`, the summary metrics are calculated using KPI.
1030
+ If `False`, the metrics are calculated using revenue.
1031
+ confidence_level: Confidence level for summary metrics credible intervals,
1032
+ represented as a value between zero and one.
1033
+ batch_size: Integer representing the maximum draws per chain in each
1034
+ batch. The calculation is run in batches to avoid memory exhaustion. If
1035
+ a memory error occurs, try reducing `batch_size`. The calculation will
1036
+ generally be faster with larger `batch_size` values.
1037
+ include_non_paid_channels: Boolean. If `True`, non-paid channels (organic
1038
+ media, organic reach and frequency, and non-media treatments) are
1039
+ included in the summary but only the metrics independent of spend are
1040
+ reported. If `False`, only the paid channels (media, reach and
1041
+ frequency) are included but the summary contains also the metrics
1042
+ dependent on spend. Default: `False`.
1043
+
1044
+ Returns:
1045
+ An `xr.Dataset` and containing `incremental_outcome` for each channel. The
1046
+ coordinates are: `channel` and `metric` (`mean`, `median`, `ci_low`,
1047
+ `ci_high`)
1048
+ """
1049
+ # Selected times in boolean form are supported by the analyzer with and
1050
+ # without the new data.
1051
+ selected_times_bool = resolver.resolve_to_bool_selected_times()
1052
+ kwargs = {
1053
+ "selected_geos": selected_geos,
1054
+ "selected_times": selected_times_bool,
1055
+ "aggregate_geos": aggregate_geos,
1056
+ "aggregate_times": aggregate_times,
1057
+ "batch_size": batch_size,
1058
+ }
1059
+ incremental_outcome_posterior = (
1060
+ self._analyzer.compute_incremental_outcome_aggregate(
1061
+ new_data=new_data,
1062
+ media_selected_times=media_selected_times,
1063
+ use_posterior=True,
1064
+ use_kpi=use_kpi,
1065
+ include_non_paid_channels=include_non_paid_channels,
1066
+ **kwargs,
1067
+ )
1068
+ )
1069
+
1070
+ xr_dims = (
1071
+ ((constants.GEO,) if not aggregate_geos else ())
1072
+ + ((constants.TIME,) if not aggregate_times else ())
1073
+ + (constants.CHANNEL, constants.METRIC)
1074
+ )
1075
+ channels = (
1076
+ self._meridian.input_data.get_all_channels()
1077
+ if include_non_paid_channels
1078
+ else self._meridian.input_data.get_all_paid_channels()
1079
+ )
1080
+ xr_coords = {
1081
+ constants.CHANNEL: (
1082
+ [constants.CHANNEL],
1083
+ list(channels) + [constants.ALL_CHANNELS],
1084
+ ),
1085
+ }
1086
+ if not aggregate_geos:
1087
+ geo_dims = (
1088
+ self._meridian.input_data.geo.data
1089
+ if selected_geos is None
1090
+ else selected_geos
1091
+ )
1092
+ xr_coords[constants.GEO] = ([constants.GEO], geo_dims)
1093
+ if not aggregate_times:
1094
+ selected_times_str = resolver.resolve_to_enumerated_selected_times()
1095
+ if selected_times_str is not None:
1096
+ time_dims = selected_times_str
1097
+ else:
1098
+ time_dims = resolver.time_coordinates.all_dates_str
1099
+ xr_coords[constants.TIME] = ([constants.TIME], time_dims)
1100
+ xr_coords_with_ci = {
1101
+ constants.METRIC: (
1102
+ [constants.METRIC],
1103
+ [
1104
+ constants.MEAN,
1105
+ constants.MEDIAN,
1106
+ constants.CI_LO,
1107
+ constants.CI_HI,
1108
+ ],
1109
+ ),
1110
+ **xr_coords,
1111
+ }
1112
+ metrics = analyzer.get_central_tendency_and_ci(
1113
+ incremental_outcome_posterior, confidence_level, include_median=True
1114
+ )
1115
+ xr_data = {constants.INCREMENTAL_OUTCOME: (xr_dims, metrics)}
1116
+ return xr.Dataset(data_vars=xr_data, coords=xr_coords_with_ci)
1117
+
1118
+
1119
+ def _compute_spend(
1120
+ media_summary_metrics: xr.Dataset,
1121
+ ) -> media_analysis_pb2.SpendInfo | None:
1122
+ """Returns a `SpendInfo` proto with spend information for the given channel.
1123
+
1124
+ Args:
1125
+ media_summary_metrics: A dataset containing the model's media summary
1126
+ metrics.
1127
+ """
1128
+ if constants.SPEND not in media_summary_metrics.data_vars:
1129
+ return None
1130
+
1131
+ spend = media_summary_metrics[constants.SPEND].item()
1132
+ spend_share = media_summary_metrics[constants.PCT_OF_SPEND].data.item() / 100
1133
+
1134
+ return media_analysis_pb2.SpendInfo(
1135
+ spend=spend,
1136
+ spend_share=spend_share,
1137
+ )