google-meridian 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: google-meridian
3
- Version: 1.0.8
3
+ Version: 1.0.9
4
4
  Summary: Google's open source mixed marketing model library, helps you understand your return on investment and direct your ad spend with confidence.
5
5
  Author-email: The Meridian Authors <no-reply@google.com>
6
6
  License:
@@ -393,7 +393,7 @@ To cite this repository:
393
393
  author = {Google Meridian Marketing Mix Modeling Team},
394
394
  title = {Meridian: Marketing Mix Modeling},
395
395
  url = {https://github.com/google/meridian},
396
- version = {1.0.8},
396
+ version = {1.0.9},
397
397
  year = {2025},
398
398
  }
399
399
  ```
@@ -1,14 +1,14 @@
1
- google_meridian-1.0.8.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
2
- meridian/__init__.py,sha256=d85YKzKshDwbViGr-BG7DJhNJh8a-dVF87y83gnTv7I,714
3
- meridian/constants.py,sha256=vhJI7R3kTGIHkLzkyx3i6ZnpcAXdAo4ath1eBS6cQHQ,15197
1
+ google_meridian-1.0.9.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
2
+ meridian/__init__.py,sha256=b7KL3QTlpVq4JvFAzhv7fcEUz6iiBqeGwZrzLv_JLjI,714
3
+ meridian/constants.py,sha256=OHzrSdGcburXB1miI9oPZ_6mdf6wokKq2zhzwo9YA84,15434
4
4
  meridian/analysis/__init__.py,sha256=-FooDZ5OzePpyTVkvRoWQx_xBaRR_hjVLny9H8-kkyQ,836
5
- meridian/analysis/analyzer.py,sha256=HyFJlTUYsv03skU4SiPvqjwevq7TXabwtD9VhoGObsw,200181
5
+ meridian/analysis/analyzer.py,sha256=nFODFwnrR2QC2FiEX11UgMGrVHkud_KzUsTpKUtQiSo,203965
6
6
  meridian/analysis/formatter.py,sha256=F8OYxD2bH13zV10JY63j2ugCOj-DpTXhyJr43n5ukr8,7270
7
- meridian/analysis/optimizer.py,sha256=NwHb5PBhHye4XtPhh0qv0ZMCq6LwErZXFa86BwmtKLs,90115
8
- meridian/analysis/summarizer.py,sha256=jkESRdbH1U3ij-aBdV1JFTYNVJdfALmji5G4jmK4oMs,18403
7
+ meridian/analysis/optimizer.py,sha256=SVZJjO0nZjWL62PoeuIBf5_iPdDMdgmA3fuY1R8pEsU,98126
8
+ meridian/analysis/summarizer.py,sha256=PPin1hKvcdGuzNOlXOsCtCO0JzawyBb26g4LEFptRh0,18883
9
9
  meridian/analysis/summary_text.py,sha256=n6a-DTZxtS3WvdI_pDEK7lvO3MRUX3h83GzuVnG6sQ4,12438
10
10
  meridian/analysis/test_utils.py,sha256=xai8oxXu51PDsiQ-ZYTnN_eSLsGu0BUOS8rDTcc6v-E,77719
11
- meridian/analysis/visualizer.py,sha256=_40uBa6QMJSjfwsvswcbGRUN3Urr_Vs16XiwpWETAfc,92624
11
+ meridian/analysis/visualizer.py,sha256=KgqdqbYkvo1vY0u-JGuIYEpwMR1xUvJToG1QcIaVuPo,94138
12
12
  meridian/analysis/templates/card.html.jinja,sha256=pv4MVbQ25CcvtZY-LH7bFW0OSeHobkeEkAleB1sfQ14,1284
13
13
  meridian/analysis/templates/chart.html.jinja,sha256=87i0xnXHRBoLLxBpKv2i960TLToWq4r1aVQZqaXIeMQ,1086
14
14
  meridian/analysis/templates/chips.html.jinja,sha256=Az0tQwF_-b03JDLyOzpeH-8fb-6jgJgbNfnUUSm-q6E,645
@@ -28,14 +28,14 @@ meridian/model/__init__.py,sha256=bvx8vvXolktsCTDKViU9U1v85pgNWF3haDowTKy11d4,98
28
28
  meridian/model/adstock_hill.py,sha256=b_YYhqci6ndgi602FFXmx2f12ceC4N0tp338nMMtm54,9283
29
29
  meridian/model/knots.py,sha256=r7PPaJM96d5pkoOeV9crIOgkM0-rh24mWMvypMiV4aQ,8054
30
30
  meridian/model/media.py,sha256=Gjr4jm0y_6pFy7aa_oKIuuZ8P7F56e3ZB-3o6msApeA,11876
31
- meridian/model/model.py,sha256=hA6HSaH2cd7Zgm8_JX3Jd79bWQSk8BtdqfEm5C9e3oQ,43323
31
+ meridian/model/model.py,sha256=CgBzyR8KWE3lPecaCTg0FF16booUOpsE3ARNcm5KrFc,43875
32
32
  meridian/model/model_test_data.py,sha256=dqS_vDQUg811UGmyr8ZgWp8VTIra-krA7A2erQlfPlU,12488
33
33
  meridian/model/posterior_sampler.py,sha256=uUNMdxyoK0LT6hNKiAxEEl-1X0SyBMz-o_Sao5q5Ts8,23228
34
- meridian/model/prior_distribution.py,sha256=6fqx_XIM0DSQICd65XaSRhelsjvZ4ariBfeyOeoKld8,39075
34
+ meridian/model/prior_distribution.py,sha256=h-L6hLOC-bM9ciYCvbZbDN7-3-30AwHBbo06KsSwDiY,39934
35
35
  meridian/model/prior_sampler.py,sha256=zGSAQviFO3s2GcVbfG9EfXxo_SNFBFbTQC3e-QBFzio,23079
36
36
  meridian/model/spec.py,sha256=xaHxfCLWLnWMAkMy2ouDoqGBHI_4tzzX8AaJOsKdu7Q,8878
37
37
  meridian/model/transformers.py,sha256=te3OJixprWLtv7O00a9GZWE4waTS94NNLVo3tWIl1-k,7420
38
- google_meridian-1.0.8.dist-info/METADATA,sha256=DaSRL6L3xb0AiZBw22nbxDbFqvm2thApTpiEzffGe-o,22055
39
- google_meridian-1.0.8.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
40
- google_meridian-1.0.8.dist-info/top_level.txt,sha256=nwaCebZvvU34EopTKZsjK0OMTFjVnkf4FfnBN_TAc0g,9
41
- google_meridian-1.0.8.dist-info/RECORD,,
38
+ google_meridian-1.0.9.dist-info/METADATA,sha256=N6Y923SR6L6T5Py0xVpNwZm0lBl-mXWC2oCnYq903pM,22055
39
+ google_meridian-1.0.9.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
40
+ google_meridian-1.0.9.dist-info/top_level.txt,sha256=nwaCebZvvU34EopTKZsjK0OMTFjVnkf4FfnBN_TAc0g,9
41
+ google_meridian-1.0.9.dist-info/RECORD,,
meridian/__init__.py CHANGED
@@ -14,7 +14,7 @@
14
14
 
15
15
  """Meridian API."""
16
16
 
17
- __version__ = "1.0.8"
17
+ __version__ = "1.0.9"
18
18
 
19
19
 
20
20
  from meridian import analysis
@@ -63,6 +63,8 @@ class DataTensors(tf.experimental.ExtensionType):
63
63
  controls: Optional tensor with dimensions `(n_geos, n_times, n_controls)`.
64
64
  revenue_per_kpi: Optional tensor with dimensions `(n_geos, T)` for any time
65
65
  dimension `T`.
66
+ time: Optional tensor of time coordinates in the "YYYY-mm-dd" string format
67
+ for time dimension `T`.
66
68
  """
67
69
 
68
70
  media: Optional[tf.Tensor]
@@ -76,6 +78,7 @@ class DataTensors(tf.experimental.ExtensionType):
76
78
  non_media_treatments: Optional[tf.Tensor]
77
79
  controls: Optional[tf.Tensor]
78
80
  revenue_per_kpi: Optional[tf.Tensor]
81
+ time: Optional[tf.Tensor]
79
82
 
80
83
  def __init__(
81
84
  self,
@@ -90,6 +93,7 @@ class DataTensors(tf.experimental.ExtensionType):
90
93
  non_media_treatments: Optional[tf.Tensor] = None,
91
94
  controls: Optional[tf.Tensor] = None,
92
95
  revenue_per_kpi: Optional[tf.Tensor] = None,
96
+ time: Optional[Sequence[str] | tf.Tensor] = None,
93
97
  ):
94
98
  self.media = tf.cast(media, tf.float32) if media is not None else None
95
99
  self.media_spend = (
@@ -130,6 +134,7 @@ class DataTensors(tf.experimental.ExtensionType):
130
134
  if revenue_per_kpi is not None
131
135
  else None
132
136
  )
137
+ self.time = tf.cast(time, tf.string) if time is not None else None
133
138
 
134
139
  def __validate__(self):
135
140
  self._validate_n_dims()
@@ -241,6 +246,8 @@ class DataTensors(tf.experimental.ExtensionType):
241
246
  f"New `{field.name}` must have 1 or 3 dimensions. Found"
242
247
  f" {tensor.ndim} dimensions."
243
248
  )
249
+ elif field.name == constants.TIME:
250
+ _check_n_dims(tensor, field.name, 1)
244
251
  else:
245
252
  _check_n_dims(tensor, field.name, 3)
246
253
 
@@ -283,7 +290,7 @@ class DataTensors(tf.experimental.ExtensionType):
283
290
  for var_name in required_fields:
284
291
  new_tensor = getattr(self, var_name)
285
292
  if new_tensor is not None and new_tensor.shape[0] != meridian.n_geos:
286
- # Skip spend data with only 1 dimension of (n_channels).
293
+ # Skip spend and time data with only 1 dimension.
287
294
  if new_tensor.ndim == 1:
288
295
  continue
289
296
  raise ValueError(
@@ -296,7 +303,7 @@ class DataTensors(tf.experimental.ExtensionType):
296
303
  ):
297
304
  """Validates the channel dimension of the specified data variables."""
298
305
  for var_name in required_fields:
299
- if var_name == constants.REVENUE_PER_KPI:
306
+ if var_name in [constants.REVENUE_PER_KPI, constants.TIME]:
300
307
  continue
301
308
  new_tensor = getattr(self, var_name)
302
309
  old_tensor = getattr(meridian.input_data, var_name)
@@ -317,12 +324,24 @@ class DataTensors(tf.experimental.ExtensionType):
317
324
  old_tensor = getattr(meridian.input_data, var_name)
318
325
 
319
326
  # Skip spend data with only 1 dimension of (n_channels).
320
- if new_tensor is not None and new_tensor.ndim == 1:
327
+ if (
328
+ var_name in [constants.MEDIA_SPEND, constants.RF_SPEND]
329
+ and new_tensor is not None
330
+ and new_tensor.ndim == 1
331
+ ):
321
332
  continue
322
333
 
323
334
  if new_tensor is not None:
324
335
  assert old_tensor is not None
325
- if new_tensor.shape[1] != old_tensor.shape[1]:
336
+ if (
337
+ var_name == constants.TIME
338
+ and new_tensor.shape[0] != old_tensor.shape[0]
339
+ ):
340
+ raise ValueError(
341
+ f"New `{var_name}` is expected to have {old_tensor.shape[0]}"
342
+ f" time periods. Found {new_tensor.shape[0]} time periods."
343
+ )
344
+ elif new_tensor.ndim > 1 and new_tensor.shape[1] != old_tensor.shape[1]:
326
345
  raise ValueError(
327
346
  f"New `{var_name}` is expected to have {old_tensor.shape[1]}"
328
347
  f" time periods. Found {new_tensor.shape[1]} time periods."
@@ -345,12 +364,24 @@ class DataTensors(tf.experimental.ExtensionType):
345
364
  if old_tensor is None:
346
365
  continue
347
366
  # Skip spend data with only 1 dimension of (n_channels).
348
- if new_tensor is not None and new_tensor.ndim == 1:
367
+ if (
368
+ var_name in [constants.MEDIA_SPEND, constants.RF_SPEND]
369
+ and new_tensor is not None
370
+ and new_tensor.ndim == 1
371
+ ):
349
372
  continue
350
373
 
351
374
  if new_tensor is None:
352
375
  missing_params.append(var_name)
353
- elif new_tensor.shape[1] != new_n_times:
376
+ elif var_name == constants.TIME and new_tensor.shape[0] != new_n_times:
377
+ raise ValueError(
378
+ "If the time dimension of any variable in `new_data` is "
379
+ "modified, then all variables must be provided with the same "
380
+ f"number of time periods. `{var_name}` has {new_tensor.shape[1]} "
381
+ "time periods, which does not match the modified number of time "
382
+ f"periods, {new_n_times}.",
383
+ )
384
+ elif new_tensor.ndim > 1 and new_tensor.shape[1] != new_n_times:
354
385
  raise ValueError(
355
386
  "If the time dimension of any variable in `new_data` is "
356
387
  "modified, then all variables must be provided with the same "
@@ -390,6 +421,10 @@ class DataTensors(tf.experimental.ExtensionType):
390
421
  old_tensor = meridian.controls
391
422
  elif var_name == constants.REVENUE_PER_KPI:
392
423
  old_tensor = meridian.revenue_per_kpi
424
+ elif var_name == constants.TIME:
425
+ old_tensor = tf.convert_to_tensor(
426
+ meridian.input_data.time.values.tolist(), dtype=tf.string
427
+ )
393
428
  else:
394
429
  continue
395
430
 
@@ -4663,11 +4698,11 @@ class Analyzer:
4663
4698
 
4664
4699
  def get_historical_spend(
4665
4700
  self,
4666
- selected_times: Sequence[str] | None,
4701
+ selected_times: Sequence[str] | None = None,
4667
4702
  include_media: bool = True,
4668
4703
  include_rf: bool = True,
4669
4704
  ) -> xr.DataArray:
4670
- """Gets the aggregated historical spend based on the time period.
4705
+ """Deprecated. Gets the aggregated historical spend based on the time.
4671
4706
 
4672
4707
  Args:
4673
4708
  selected_times: The time period to get the historical spends. If None, the
@@ -4681,6 +4716,51 @@ class Analyzer:
4681
4716
  An `xr.DataArray` with the coordinate `channel` and contains the data
4682
4717
  variable `spend`.
4683
4718
 
4719
+ Raises:
4720
+ ValueError: A ValueError is raised when `include_media` and `include_rf`
4721
+ are both False.
4722
+ """
4723
+ warnings.warn(
4724
+ "`get_historical_spend` is deprecated. Please use "
4725
+ "`get_aggregated_spend` with `new_data=None` instead.",
4726
+ DeprecationWarning,
4727
+ stacklevel=2,
4728
+ )
4729
+ return self.get_aggregated_spend(
4730
+ selected_times=selected_times,
4731
+ include_media=include_media,
4732
+ include_rf=include_rf,
4733
+ )
4734
+
4735
+ def get_aggregated_spend(
4736
+ self,
4737
+ new_data: DataTensors | None = None,
4738
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
4739
+ include_media: bool = True,
4740
+ include_rf: bool = True,
4741
+ ) -> xr.DataArray:
4742
+ """Gets the aggregated spend based on the selected time.
4743
+
4744
+ Args:
4745
+ new_data: An optional `DataTensors` object containing the new `media`,
4746
+ `media_spend`, `reach`, `frequency`, `rf_spend` tensors. If `None`, the
4747
+ existing tensors from the Meridian object are used. If `new_data`
4748
+ argument is used, then the aggregated spend is computed using the values
4749
+ of the tensors passed in the `new_data` argument and the original values
4750
+ of all the remaining tensors. If any of the tensors in `new_data` is
4751
+ provided with a different number of time periods than in `InputData`,
4752
+ then all tensors must be provided with the same number of time periods.
4753
+ selected_times: The time period to get the aggregated spends. If None, the
4754
+ spend will be aggregated over all time periods.
4755
+ include_media: Whether to include spends for paid media channels that do
4756
+ not have R&F data.
4757
+ include_rf: Whether to include spends for paid media channels with R&F
4758
+ data.
4759
+
4760
+ Returns:
4761
+ An `xr.DataArray` with the coordinate `channel` and contains the data
4762
+ variable `spend`.
4763
+
4684
4764
  Raises:
4685
4765
  ValueError: A ValueError is raised when `include_media` and `include_rf`
4686
4766
  are both False.
@@ -4689,6 +4769,11 @@ class Analyzer:
4689
4769
  raise ValueError(
4690
4770
  "At least one of include_media or include_rf must be True."
4691
4771
  )
4772
+ new_data = new_data or DataTensors()
4773
+ required_tensors_names = constants.PAID_CHANNELS + constants.SPEND_DATA
4774
+ filled_data = new_data.validate_and_fill_missing_data(
4775
+ required_tensors_names, self._meridian
4776
+ )
4692
4777
 
4693
4778
  empty_da = xr.DataArray(
4694
4779
  dims=[constants.CHANNEL], coords={constants.CHANNEL: []}
@@ -4709,8 +4794,8 @@ class Analyzer:
4709
4794
  else:
4710
4795
  aggregated_media_spend = self._impute_and_aggregate_spend(
4711
4796
  selected_times,
4712
- self._meridian.media_tensors.media,
4713
- self._meridian.media_tensors.media_spend,
4797
+ filled_data.media,
4798
+ filled_data.media_spend,
4714
4799
  list(self._meridian.input_data.media_channel.values),
4715
4800
  )
4716
4801
 
@@ -4723,18 +4808,16 @@ class Analyzer:
4723
4808
  or self._meridian.rf_tensors.rf_spend is None
4724
4809
  ):
4725
4810
  warnings.warn(
4726
- "Requested spends for paid media channels with R&F data, but but the"
4811
+ "Requested spends for paid media channels with R&F data, but the"
4727
4812
  " channels are not available.",
4728
4813
  )
4729
4814
  aggregated_rf_spend = empty_da
4730
4815
  else:
4731
- rf_execution_values = (
4732
- self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
4733
- )
4816
+ rf_execution_values = filled_data.reach * filled_data.frequency
4734
4817
  aggregated_rf_spend = self._impute_and_aggregate_spend(
4735
4818
  selected_times,
4736
4819
  rf_execution_values,
4737
- self._meridian.rf_tensors.rf_spend,
4820
+ filled_data.rf_spend,
4738
4821
  list(self._meridian.input_data.rf_channel.values),
4739
4822
  )
4740
4823
 
@@ -4744,7 +4827,7 @@ class Analyzer:
4744
4827
 
4745
4828
  def _impute_and_aggregate_spend(
4746
4829
  self,
4747
- selected_times: Sequence[str] | None,
4830
+ selected_times: Sequence[str] | Sequence[bool] | None,
4748
4831
  media_execution_values: tf.Tensor,
4749
4832
  channel_spend: tf.Tensor,
4750
4833
  channel_names: Sequence[str],
@@ -4759,7 +4842,7 @@ class Analyzer:
4759
4842
  argument, its values only affect the output when imputation is required.
4760
4843
 
4761
4844
  Args:
4762
- selected_times: The time period to get the historical spend.
4845
+ selected_times: The time period to get the aggregated spend.
4763
4846
  media_execution_values: The media execution values over all time points.
4764
4847
  channel_spend: The spend over all time points. Its shape can be `(n_geos,
4765
4848
  n_times, n_media_channels)` or `(n_media_channels,)` if the data is
@@ -4775,17 +4858,24 @@ class Analyzer:
4775
4858
  "selected_times": selected_times,
4776
4859
  "aggregate_geos": True,
4777
4860
  "aggregate_times": True,
4861
+ "flexible_time_dim": True,
4778
4862
  }
4779
4863
 
4780
4864
  if channel_spend.ndim == 3:
4781
4865
  aggregated_spend = self.filter_and_aggregate_geos_and_times(
4782
4866
  channel_spend,
4867
+ has_media_dim=True,
4783
4868
  **dim_kwargs,
4784
4869
  ).numpy()
4785
4870
  # channel_spend.ndim can only be 3 or 1.
4786
4871
  else:
4787
4872
  # media spend can have more time points than the model time points
4788
- media_exe_values = media_execution_values[:, -self._meridian.n_times :, :]
4873
+ if media_execution_values.shape[1] == self._meridian.n_media_times:
4874
+ media_exe_values = media_execution_values[
4875
+ :, -self._meridian.n_times :, :
4876
+ ]
4877
+ else:
4878
+ media_exe_values = media_execution_values
4789
4879
  # Calculates CPM over all times and geos if the spend does not have time
4790
4880
  # and geo dimensions.
4791
4881
  target_media_exe_values = self.filter_and_aggregate_geos_and_times(
@@ -28,6 +28,7 @@ from meridian import constants as c
28
28
  from meridian.analysis import analyzer
29
29
  from meridian.analysis import formatter
30
30
  from meridian.analysis import summary_text
31
+ from meridian.data import time_coordinates as tc
31
32
  from meridian.model import model
32
33
  import numpy as np
33
34
  import pandas as pd
@@ -119,7 +120,7 @@ class OptimizationGrid:
119
120
  gtol: float
120
121
  round_factor: int
121
122
  optimal_frequency: np.ndarray | None
122
- selected_times: list[str] | None
123
+ selected_times: Sequence[str] | Sequence[bool] | None
123
124
 
124
125
  @property
125
126
  def grid_dataset(self) -> xr.Dataset:
@@ -621,7 +622,7 @@ class OptimizationResults:
621
622
  # by adjusting the domain of the y-axis so that the incremental outcome does
622
623
  # not start at 0. Calculate the total decrease in incremental outcome to pad
623
624
  # the y-axis from the non-optimized total incremental outcome value.
624
- sum_decr = sum(df[df.incremental_outcome < 0].incremental_outcome)
625
+ sum_decr = df[df.incremental_outcome < 0].incremental_outcome.sum()
625
626
  y_padding = float(f'1e{int(math.log10(-sum_decr))}') if sum_decr < 0 else 2
626
627
  domain_scale = [
627
628
  self.nonoptimized_data.total_incremental_outcome + sum_decr - y_padding,
@@ -1016,8 +1017,16 @@ class OptimizationResults:
1016
1017
 
1017
1018
  def _gen_optimization_summary(self) -> str:
1018
1019
  """Generates HTML optimization summary output (as sanitized content str)."""
1019
- self.template_env.globals[c.START_DATE] = self.optimized_data.start_date
1020
- self.template_env.globals[c.END_DATE] = self.optimized_data.end_date
1020
+ start_date = tc.normalize_date(self.optimized_data.start_date)
1021
+ self.template_env.globals[c.START_DATE] = start_date.strftime(
1022
+ f'%b {start_date.day}, %Y'
1023
+ )
1024
+ interval_days = self.meridian.input_data.time_coordinates.interval_days
1025
+ end_date = tc.normalize_date(self.optimized_data.end_date)
1026
+ end_date_adjusted = end_date + pd.Timedelta(days=interval_days)
1027
+ self.template_env.globals[c.END_DATE] = end_date_adjusted.strftime(
1028
+ f'%b {end_date_adjusted.day}, %Y'
1029
+ )
1021
1030
 
1022
1031
  html_template = self.template_env.get_template('summary.html.jinja')
1023
1032
  return html_template.render(
@@ -1265,6 +1274,7 @@ class BudgetOptimizer:
1265
1274
 
1266
1275
  def optimize(
1267
1276
  self,
1277
+ new_data: analyzer.DataTensors | None = None,
1268
1278
  use_posterior: bool = True,
1269
1279
  selected_times: tuple[str | None, str | None] | None = None,
1270
1280
  fixed_budget: bool = True,
@@ -1282,18 +1292,50 @@ class BudgetOptimizer:
1282
1292
  ) -> OptimizationResults:
1283
1293
  """Finds the optimal budget allocation that maximizes outcome.
1284
1294
 
1285
- Outcome is typically revenue, but when the KPI is not revenue and "revenue
1286
- per KPI" data is not available, then Meridian defines the Outcome to be the
1287
- KPI itself.
1295
+ Optimization depends on the following:
1296
+ 1. Flighting pattern (the relative allocation of a channels' media units
1297
+ across geos and time periods, which is held fixed for each channel)
1298
+ 2. Cost per media unit (This is assumed to be constant for each channel, and
1299
+ can optionally vary by geo and/or time period)
1300
+ 3. `pct_of_spend` (center of the spend box constraint for each channel)
1301
+ 4. `budget` (total budget used for fixed budget scenarios)
1302
+
1303
+ By default, these values are assigned based on the historical data. The
1304
+ `pct_of_spend` and `budget` are optimization arguments that can be
1305
+ overridden directly. Passing `new_data.media` (or `new_data.reach` or
1306
+ `new_data.frequency`) will override both the flighting pattern and cost per
1307
+ media unit. Passing `new_data.spend` (or `new_data.rf_spend) will only
1308
+ override the cost per media unit.
1309
+
1310
+ If `new_data` is passed with a different number of time periods than the
1311
+ historical data, then all of the optimization parameters will be inferred
1312
+ from it. Default values for `pct_of_spend` and `budget` (if
1313
+ `fixed_budget=True`) will be inferred from the `new_data`, but can be
1314
+ overridden using the `pct_of_spend` and `budget` arguments.
1315
+
1316
+ If `selected_times` is specified, then the default values are inferred based
1317
+ on the subset of time periods specified.
1288
1318
 
1289
1319
  Args:
1320
+ new_data: An optional `DataTensors` container with optional tensors:
1321
+ `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
1322
+ `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
1323
+ Meridian object are used. If `new_data` is provided, the optimization is
1324
+ run on the versions of the tensors in `new_data` and the original
1325
+ versions of all the remaining tensors. If any of the tensors in
1326
+ `new_data` is provided with a different number of time periods than in
1327
+ `InputData`, then all tensors must be provided with the same number of
1328
+ time periods and the `time` tensor must be provided.
1290
1329
  use_posterior: Boolean. If `True`, then the budget is optimized based on
1291
1330
  the posterior distribution of the model. Otherwise, the prior
1292
1331
  distribution is used.
1293
1332
  selected_times: Tuple containing the start and end time dimension
1294
1333
  coordinates for the duration to run the optimization on. Selected time
1295
1334
  values should align with the Meridian time dimension coordinates in the
1296
- underlying model. By default, all times periods are used. Either start
1335
+ underlying model if optimizing the original data. If `new_data` is
1336
+ provided with a different number of time periods than in `InputData`,
1337
+ then the start and end time coordinates must match the time dimensions
1338
+ in `new_data.time`. By default, all times periods are used. Either start
1297
1339
  or end time component can be `None` to represent the first or the last
1298
1340
  time coordinate, respectively.
1299
1341
  fixed_budget: Boolean indicating whether it's a fixed budget optimization
@@ -1310,7 +1352,7 @@ class BudgetOptimizer:
1310
1352
  performance metrics (for example, ROI) and construct the feasible range
1311
1353
  of media-level spend with the spend constraints. Consider using
1312
1354
  `InputData.get_paid_channels_argument_builder()` to construct this
1313
- argument.
1355
+ argument. If using `new_data`, this argument is ignored.
1314
1356
  spend_constraint_lower: Numeric list of size `n_paid_channels` or float
1315
1357
  (same constraint for all channels) indicating the lower bound of
1316
1358
  media-level spend. If given as a channel-indexed array, the order must
@@ -1368,6 +1410,7 @@ class BudgetOptimizer:
1368
1410
  if spend_constraint_upper is None:
1369
1411
  spend_constraint_upper = spend_constraint_default
1370
1412
  optimization_grid = self.create_optimization_grid(
1413
+ new_data=new_data,
1371
1414
  selected_times=selected_times,
1372
1415
  budget=budget,
1373
1416
  pct_of_spend=pct_of_spend,
@@ -1403,6 +1446,7 @@ class BudgetOptimizer:
1403
1446
  spend.non_optimized, optimization_grid.round_factor
1404
1447
  ).astype(int)
1405
1448
  nonoptimized_data = self._create_budget_dataset(
1449
+ new_data=new_data,
1406
1450
  use_posterior=use_posterior,
1407
1451
  use_kpi=use_kpi,
1408
1452
  hist_spend=optimization_grid.historical_spend,
@@ -1413,6 +1457,7 @@ class BudgetOptimizer:
1413
1457
  use_historical_budget=use_historical_budget,
1414
1458
  )
1415
1459
  nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
1460
+ new_data=new_data,
1416
1461
  use_posterior=use_posterior,
1417
1462
  use_kpi=use_kpi,
1418
1463
  hist_spend=optimization_grid.historical_spend,
@@ -1431,6 +1476,7 @@ class BudgetOptimizer:
1431
1476
  elif target_mroi:
1432
1477
  constraints[c.TARGET_MROI] = target_mroi
1433
1478
  optimized_data = self._create_budget_dataset(
1479
+ new_data=new_data,
1434
1480
  use_posterior=use_posterior,
1435
1481
  use_kpi=use_kpi,
1436
1482
  hist_spend=optimization_grid.historical_spend,
@@ -1476,6 +1522,7 @@ class BudgetOptimizer:
1476
1522
 
1477
1523
  def create_optimization_grid(
1478
1524
  self,
1525
+ new_data: xr.Dataset | None = None,
1479
1526
  use_posterior: bool = True,
1480
1527
  selected_times: tuple[str | None, str | None] | None = None,
1481
1528
  budget: float | None = None,
@@ -1490,13 +1537,25 @@ class BudgetOptimizer:
1490
1537
  """Creates a OptimizationGrid for optimization.
1491
1538
 
1492
1539
  Args:
1540
+ new_data: An optional `DataTensors` container with optional tensors:
1541
+ `media`, `reach`, `frequency`, `media_spend`, `rf_spend`,
1542
+ `revenue_per_kpi`, and `time`. If `None`, the original tensors from the
1543
+ Meridian object are used. If `new_data` is provided, the grid is created
1544
+ using the versions of the tensors in `new_data` and the original
1545
+ versions of all the remaining tensors. If any of the tensors in
1546
+ `new_data` is provided with a different number of time periods than in
1547
+ `InputData`, then all tensors must be provided with the same number of
1548
+ time periods and the `time` tensor must be provided.
1493
1549
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1494
1550
  from the posterior distribution of the model. Otherwise, the prior
1495
1551
  distribution is used.
1496
1552
  selected_times: Tuple containing the start and end time dimension
1497
1553
  coordinates for the duration to run the optimization on. Selected time
1498
1554
  values should align with the Meridian time dimension coordinates in the
1499
- underlying model. By default, all times periods are used. Either start
1555
+ underlying model if optimizing the original data. If `new_data` is
1556
+ provided with a different number of time periods than in `InputData`,
1557
+ then the start and end time coordinates must match the time dimensions
1558
+ in `new_data.time`. By default, all times periods are used. Either start
1500
1559
  or end time component can be `None` to represent the first or the last
1501
1560
  time coordinate, respectively.
1502
1561
  budget: Number indicating the total budget for the fixed budget scenario.
@@ -1510,7 +1569,7 @@ class BudgetOptimizer:
1510
1569
  performance metrics (for example, ROI) and construct the feasible range
1511
1570
  of media-level spend with the spend constraints. Consider using
1512
1571
  `InputData.get_paid_channels_argument_builder()` to construct this
1513
- argument.
1572
+ argument. If using `new_data`, this argument is ignored.
1514
1573
  spend_constraint_lower: Numeric list of size `n_paid_channels` or float
1515
1574
  (same constraint for all channels) indicating the lower bound of
1516
1575
  media-level spend. If given as a channel-indexed array, the order must
@@ -1545,16 +1604,20 @@ class BudgetOptimizer:
1545
1604
  An OptimizationGrid object containing the grid data for optimization.
1546
1605
  """
1547
1606
  self._validate_model_fit(use_posterior)
1548
- if selected_times is not None:
1549
- start_date, end_date = selected_times
1550
- selected_time_dims = self._meridian.expand_selected_time_dims(
1551
- start_date=start_date,
1552
- end_date=end_date,
1553
- )
1554
- else:
1555
- selected_time_dims = None
1556
- hist_spend = self._analyzer.get_historical_spend(
1557
- selected_time_dims,
1607
+ if new_data is None:
1608
+ new_data = analyzer.DataTensors()
1609
+
1610
+ required_tensors = c.PERFORMANCE_DATA + (c.TIME,)
1611
+ filled_data = new_data.validate_and_fill_missing_data(
1612
+ required_tensors_names=required_tensors, meridian=self._meridian
1613
+ )
1614
+
1615
+ selected_time_dims = self._validate_selected_times(
1616
+ selected_times, filled_data
1617
+ )
1618
+ hist_spend = self._analyzer.get_aggregated_spend(
1619
+ new_data=filled_data.filter_fields(c.PAID_CHANNELS + c.SPEND_DATA),
1620
+ selected_times=selected_time_dims,
1558
1621
  include_media=self._meridian.n_media_channels > 0,
1559
1622
  include_rf=self._meridian.n_rf_channels > 0,
1560
1623
  ).data
@@ -1579,6 +1642,7 @@ class BudgetOptimizer:
1579
1642
  if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
1580
1643
  optimal_frequency = tf.convert_to_tensor(
1581
1644
  self._analyzer.optimal_freq(
1645
+ new_data=filled_data.filter_fields(c.RF_DATA),
1582
1646
  use_posterior=use_posterior,
1583
1647
  selected_times=selected_time_dims,
1584
1648
  use_kpi=use_kpi,
@@ -1595,6 +1659,7 @@ class BudgetOptimizer:
1595
1659
  spend_bound_upper=optimization_upper_bound,
1596
1660
  step_size=step_size,
1597
1661
  selected_times=selected_time_dims,
1662
+ new_data=filled_data.filter_fields(c.PAID_DATA),
1598
1663
  use_posterior=use_posterior,
1599
1664
  use_kpi=use_kpi,
1600
1665
  optimal_frequency=optimal_frequency,
@@ -1658,10 +1723,40 @@ class BudgetOptimizer:
1658
1723
  attrs={c.SPEND_STEP_SIZE: spend_step_size},
1659
1724
  )
1660
1725
 
1726
+ def _validate_selected_times(
1727
+ self,
1728
+ selected_times: tuple[str | None, str | None] | None,
1729
+ new_data: analyzer.DataTensors | None,
1730
+ ) -> Sequence[str] | Sequence[bool] | None:
1731
+ """Validates and returns the selected times."""
1732
+ if selected_times is None:
1733
+ return None
1734
+ start_date, end_date = selected_times
1735
+ if start_date is None and end_date is None:
1736
+ return None
1737
+
1738
+ new_data = new_data or analyzer.DataTensors()
1739
+ if new_data.get_modified_times(self._meridian) is None:
1740
+ return self._meridian.expand_selected_time_dims(
1741
+ start_date=start_date,
1742
+ end_date=end_date,
1743
+ )
1744
+ else:
1745
+ assert new_data.time is not None
1746
+ new_times_str = new_data.time.numpy().astype(str).tolist()
1747
+ time_coordinates = tc.TimeCoordinates.from_dates(new_times_str)
1748
+ expanded_dates = time_coordinates.expand_selected_time_dims(
1749
+ start_date=start_date,
1750
+ end_date=end_date,
1751
+ )
1752
+ expanded_str = [date.strftime(c.DATE_FORMAT) for date in expanded_dates]
1753
+ return [x in expanded_str for x in new_times_str]
1754
+
1661
1755
  def _get_incremental_outcome_tensors(
1662
1756
  self,
1663
1757
  hist_spend: np.ndarray,
1664
1758
  spend: np.ndarray,
1759
+ new_data: analyzer.DataTensors | None = None,
1665
1760
  optimal_frequency: Sequence[float] | None = None,
1666
1761
  ) -> tuple[
1667
1762
  tf.Tensor | None,
@@ -1686,6 +1781,11 @@ class BudgetOptimizer:
1686
1781
  Args:
1687
1782
  hist_spend: historical spend data.
1688
1783
  spend: new optimized spend data.
1784
+ new_data: An optional `DataTensors` object containing the new `media`,
1785
+ `reach`, and `frequency` tensors. If `None`, the existing tensors from
1786
+ the Meridian object are used. If any of the tensors is provided with a
1787
+ different number of time periods than in `InputData`, then all tensors
1788
+ must be provided with the same number of time periods.
1689
1789
  optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
1690
1790
  the optimal frequency per channel, that maximizes posterior mean roi.
1691
1791
  Value is `None` if the model does not contain reach and frequency data,
@@ -1696,13 +1796,18 @@ class BudgetOptimizer:
1696
1796
  Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
1697
1797
  new_rf_spend).
1698
1798
  """
1799
+ new_data = new_data or analyzer.DataTensors()
1800
+ filled_data = new_data.validate_and_fill_missing_data(
1801
+ c.PAID_CHANNELS,
1802
+ self._meridian,
1803
+ )
1699
1804
  if self._meridian.n_media_channels > 0:
1700
1805
  new_media = (
1701
1806
  tf.math.divide_no_nan(
1702
1807
  spend[: self._meridian.n_media_channels],
1703
1808
  hist_spend[: self._meridian.n_media_channels],
1704
1809
  )
1705
- * self._meridian.media_tensors.media
1810
+ * filled_data.media
1706
1811
  )
1707
1812
  new_media_spend = tf.convert_to_tensor(
1708
1813
  spend[: self._meridian.n_media_channels]
@@ -1711,9 +1816,7 @@ class BudgetOptimizer:
1711
1816
  new_media = None
1712
1817
  new_media_spend = None
1713
1818
  if self._meridian.n_rf_channels > 0:
1714
- rf_media = (
1715
- self._meridian.rf_tensors.reach * self._meridian.rf_tensors.frequency
1716
- )
1819
+ rf_media = filled_data.reach * filled_data.frequency
1717
1820
  new_rf_media = (
1718
1821
  tf.math.divide_no_nan(
1719
1822
  spend[-self._meridian.n_rf_channels :],
@@ -1722,7 +1825,7 @@ class BudgetOptimizer:
1722
1825
  * rf_media
1723
1826
  )
1724
1827
  frequency = (
1725
- self._meridian.rf_tensors.frequency
1828
+ filled_data.frequency
1726
1829
  if optimal_frequency is None
1727
1830
  else optimal_frequency
1728
1831
  )
@@ -1742,9 +1845,10 @@ class BudgetOptimizer:
1742
1845
  self,
1743
1846
  hist_spend: np.ndarray,
1744
1847
  spend: np.ndarray,
1848
+ new_data: analyzer.DataTensors | None = None,
1745
1849
  use_posterior: bool = True,
1746
1850
  use_kpi: bool = False,
1747
- selected_times: Sequence[str] | None = None,
1851
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1748
1852
  optimal_frequency: Sequence[float] | None = None,
1749
1853
  attrs: Mapping[str, Any] | None = None,
1750
1854
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
@@ -1752,15 +1856,22 @@ class BudgetOptimizer:
1752
1856
  use_historical_budget: bool = True,
1753
1857
  ) -> xr.Dataset:
1754
1858
  """Creates the budget dataset."""
1859
+ new_data = new_data or analyzer.DataTensors()
1860
+ filled_data = new_data.validate_and_fill_missing_data(
1861
+ c.PAID_DATA + (c.TIME,),
1862
+ self._meridian,
1863
+ )
1755
1864
  spend = tf.convert_to_tensor(spend, dtype=tf.float32)
1756
1865
  hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
1757
1866
  (new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
1758
1867
  self._get_incremental_outcome_tensors(
1759
- hist_spend, spend, optimal_frequency
1868
+ hist_spend,
1869
+ spend,
1870
+ new_data=filled_data.filter_fields(c.PAID_CHANNELS),
1871
+ optimal_frequency=optimal_frequency,
1760
1872
  )
1761
1873
  )
1762
1874
  budget = np.sum(spend)
1763
- all_times = self._meridian.input_data.time.values.tolist()
1764
1875
 
1765
1876
  # incremental_outcome here is a tensor with the shape
1766
1877
  # (n_chains, n_draws, n_channels)
@@ -1770,6 +1881,7 @@ class BudgetOptimizer:
1770
1881
  media=new_media,
1771
1882
  reach=new_reach,
1772
1883
  frequency=new_frequency,
1884
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1773
1885
  ),
1774
1886
  selected_times=selected_times,
1775
1887
  use_kpi=use_kpi,
@@ -1792,6 +1904,9 @@ class BudgetOptimizer:
1792
1904
  )
1793
1905
 
1794
1906
  aggregated_impressions = self._analyzer.get_aggregated_impressions(
1907
+ new_data=analyzer.DataTensors(
1908
+ media=new_media, reach=new_reach, frequency=new_frequency
1909
+ ),
1795
1910
  selected_times=selected_times,
1796
1911
  selected_geos=None,
1797
1912
  aggregate_times=True,
@@ -1799,10 +1914,11 @@ class BudgetOptimizer:
1799
1914
  optimal_frequency=optimal_frequency,
1800
1915
  include_non_paid_channels=False,
1801
1916
  )
1802
- effectiveness = incremental_outcome / aggregated_impressions
1803
1917
  effectiveness_with_mean_median_and_ci = (
1804
1918
  analyzer.get_central_tendency_and_ci(
1805
- data=effectiveness,
1919
+ data=tf.math.divide_no_nan(
1920
+ incremental_outcome, aggregated_impressions
1921
+ ),
1806
1922
  confidence_level=confidence_level,
1807
1923
  include_median=True,
1808
1924
  )
@@ -1822,6 +1938,7 @@ class BudgetOptimizer:
1822
1938
  frequency=new_frequency,
1823
1939
  media_spend=new_media_spend,
1824
1940
  rf_spend=new_rf_spend,
1941
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1825
1942
  ),
1826
1943
  selected_times=selected_times,
1827
1944
  batch_size=batch_size,
@@ -1860,6 +1977,18 @@ class BudgetOptimizer:
1860
1977
  c.CPIK: ([c.CHANNEL, c.METRIC], cpik),
1861
1978
  }
1862
1979
 
1980
+ all_times = (
1981
+ filled_data.time.numpy().astype(str).tolist()
1982
+ if filled_data.time is not None
1983
+ else self._meridian.input_data.time.values.tolist()
1984
+ )
1985
+ if selected_times is not None and all(
1986
+ isinstance(time, bool) for time in selected_times
1987
+ ):
1988
+ selected_times = [
1989
+ time for time, selected in zip(all_times, selected_times) if selected
1990
+ ]
1991
+
1863
1992
  attributes = {
1864
1993
  c.START_DATE: min(selected_times) if selected_times else all_times[0],
1865
1994
  c.END_DATE: max(selected_times) if selected_times else all_times[-1],
@@ -1889,7 +2018,8 @@ class BudgetOptimizer:
1889
2018
  i: int,
1890
2019
  incremental_outcome_grid: np.ndarray,
1891
2020
  multipliers_grid: tf.Tensor,
1892
- selected_times: Sequence[str],
2021
+ new_data: analyzer.DataTensors | None = None,
2022
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1893
2023
  use_posterior: bool = True,
1894
2024
  use_kpi: bool = False,
1895
2025
  optimal_frequency: xr.DataArray | None = None,
@@ -1904,8 +2034,16 @@ class BudgetOptimizer:
1904
2034
  number of columns is equal to the number of total channels, containing
1905
2035
  incremental outcome by channel.
1906
2036
  multipliers_grid: A grid derived from spend.
1907
- selected_times: Sequence of strings representing the time dimensions in
1908
- `meridian.input_data.time` to use for optimization.
2037
+ new_data: An optional `DataTensors` object containing the new `media`,
2038
+ `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
2039
+ existing tensors from the Meridian object are used. If any of the
2040
+ tensors is provided with a different number of time periods than in
2041
+ `InputData`, then all tensors must be provided with the same number of
2042
+ time periods.
2043
+ selected_times: Optional list of times to optimize. This can either be a
2044
+ string list containing a subset of time dimension coordinates from
2045
+ `InputData.time` or a boolean list with length equal to the time
2046
+ dimension of the tensor. By default, all time periods are included.
1909
2047
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1910
2048
  from the posterior distribution of the model. Otherwise, the prior
1911
2049
  distribution is used.
@@ -1922,10 +2060,14 @@ class BudgetOptimizer:
1922
2060
  reducing `batch_size`. The calculation will generally be faster with
1923
2061
  larger `batch_size` values.
1924
2062
  """
2063
+ new_data = new_data or analyzer.DataTensors()
2064
+ filled_data = new_data.validate_and_fill_missing_data(
2065
+ c.PAID_DATA, self._meridian
2066
+ )
1925
2067
  if self._meridian.n_media_channels > 0:
1926
2068
  new_media = (
1927
2069
  multipliers_grid[i, : self._meridian.n_media_channels]
1928
- * self._meridian.media_tensors.media
2070
+ * filled_data.media
1929
2071
  )
1930
2072
  else:
1931
2073
  new_media = None
@@ -1934,20 +2076,18 @@ class BudgetOptimizer:
1934
2076
  new_frequency = None
1935
2077
  new_reach = None
1936
2078
  elif optimal_frequency is not None:
1937
- new_frequency = (
1938
- tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency
1939
- )
2079
+ new_frequency = tf.ones_like(filled_data.frequency) * optimal_frequency
1940
2080
  new_reach = tf.math.divide_no_nan(
1941
2081
  multipliers_grid[i, -self._meridian.n_rf_channels :]
1942
- * self._meridian.rf_tensors.reach
1943
- * self._meridian.rf_tensors.frequency,
2082
+ * filled_data.reach
2083
+ * filled_data.frequency,
1944
2084
  new_frequency,
1945
2085
  )
1946
2086
  else:
1947
- new_frequency = self._meridian.rf_tensors.frequency
2087
+ new_frequency = filled_data.frequency
1948
2088
  new_reach = (
1949
2089
  multipliers_grid[i, -self._meridian.n_rf_channels :]
1950
- * self._meridian.rf_tensors.reach
2090
+ * filled_data.reach
1951
2091
  )
1952
2092
 
1953
2093
  # incremental_outcome returns a three dimensional tensor with dims
@@ -1960,6 +2100,7 @@ class BudgetOptimizer:
1960
2100
  media=new_media,
1961
2101
  reach=new_reach,
1962
2102
  frequency=new_frequency,
2103
+ revenue_per_kpi=filled_data.revenue_per_kpi,
1963
2104
  ),
1964
2105
  selected_times=selected_times,
1965
2106
  use_kpi=use_kpi,
@@ -1976,7 +2117,8 @@ class BudgetOptimizer:
1976
2117
  spend_bound_lower: np.ndarray,
1977
2118
  spend_bound_upper: np.ndarray,
1978
2119
  step_size: int,
1979
- selected_times: Sequence[str],
2120
+ new_data: analyzer.DataTensors | None = None,
2121
+ selected_times: Sequence[str] | Sequence[bool] | None = None,
1980
2122
  use_posterior: bool = True,
1981
2123
  use_kpi: bool = False,
1982
2124
  optimal_frequency: xr.DataArray | None = None,
@@ -1992,8 +2134,16 @@ class BudgetOptimizer:
1992
2134
  containing the upper constraint spend for each channel.
1993
2135
  step_size: Integer indicating the step size, or interval, between values
1994
2136
  in the spend grid. All media channels have the same step size.
1995
- selected_times: Sequence of strings representing the time dimensions in
1996
- `meridian.input_data.time` to use for optimization.
2137
+ new_data: An optional `DataTensors` object containing the new `media`,
2138
+ `reach`, `frequency`, and `revenue_per_kpi` tensors. If `None`, the
2139
+ existing tensors from the Meridian object are used. If any of the
2140
+ tensors is provided with a different number of time periods than in
2141
+ `InputData`, then all tensors must be provided with the same number of
2142
+ time periods.
2143
+ selected_times: Optional list of times to optimize. This can either be a
2144
+ string list containing a subset of time dimension coordinates from
2145
+ `InputData.time` or a boolean list with length equal to the time
2146
+ dimension of the tensor. By default, all time periods are included.
1997
2147
  use_posterior: Boolean. If `True`, then the incremental outcome is derived
1998
2148
  from the posterior distribution of the model. Otherwise, the prior
1999
2149
  distribution is used.
@@ -2047,6 +2197,7 @@ class BudgetOptimizer:
2047
2197
  incremental_outcome_grid=incremental_outcome_grid,
2048
2198
  multipliers_grid=multipliers_grid,
2049
2199
  selected_times=selected_times,
2200
+ new_data=new_data,
2050
2201
  use_posterior=use_posterior,
2051
2202
  use_kpi=use_kpi,
2052
2203
  optimal_frequency=optimal_frequency,
@@ -167,7 +167,9 @@ class Summarizer:
167
167
  self._create_model_fit_card_html(
168
168
  template_env, selected_times=selected_times
169
169
  ),
170
- self._create_outcome_contrib_card_html(template_env, media_summary),
170
+ self._create_outcome_contrib_card_html(
171
+ template_env, media_summary, selected_times=selected_times
172
+ ),
171
173
  self._create_performance_breakdown_card_html(
172
174
  template_env, media_summary
173
175
  ),
@@ -267,16 +269,30 @@ class Summarizer:
267
269
  self,
268
270
  template_env: jinja2.Environment,
269
271
  media_summary: visualizer.MediaSummary,
272
+ selected_times: Sequence[str] | None,
270
273
  ) -> str:
271
274
  """Creates the HTML snippet for the Outcome Contrib card."""
272
275
  outcome = self._kpi_or_revenue()
273
276
 
277
+ num_selected_times = (
278
+ self._meridian.n_times
279
+ if selected_times is None
280
+ else len(selected_times)
281
+ )
282
+ time_granularity = (
283
+ c.WEEKLY
284
+ if num_selected_times < c.QUARTERLY_SUMMARY_THRESHOLD_WEEKS
285
+ else c.QUARTERLY
286
+ )
287
+
274
288
  channel_contrib_area_chart = formatter.ChartSpec(
275
289
  id=summary_text.CHANNEL_CONTRIB_BY_TIME_CHART_ID,
276
290
  description=summary_text.CHANNEL_CONTRIB_BY_TIME_CHART_DESCRIPTION.format(
277
291
  outcome=outcome
278
292
  ),
279
- chart_json=media_summary.plot_channel_contribution_area_chart().to_json(),
293
+ chart_json=media_summary.plot_channel_contribution_area_chart(
294
+ time_granularity=time_granularity
295
+ ).to_json(),
280
296
  )
281
297
 
282
298
  channel_contrib_bump_chart = formatter.ChartSpec(
@@ -284,7 +300,9 @@ class Summarizer:
284
300
  description=summary_text.CHANNEL_CONTRIB_RANK_CHART_DESCRIPTION.format(
285
301
  outcome=outcome
286
302
  ),
287
- chart_json=media_summary.plot_channel_contribution_bump_chart().to_json(),
303
+ chart_json=media_summary.plot_channel_contribution_bump_chart(
304
+ time_granularity=time_granularity
305
+ ).to_json(),
288
306
  )
289
307
  channel_drivers_chart = formatter.ChartSpec(
290
308
  id=summary_text.CHANNEL_DRIVERS_CHART_ID,
@@ -465,14 +465,14 @@ class ModelFit:
465
465
  else:
466
466
  y_axis_label = summary_text.KPI_LABEL
467
467
  plot = (
468
- alt.Chart(model_fit_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
468
+ alt.Chart(model_fit_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
469
469
  .mark_line()
470
470
  .encode(
471
471
  x=alt.X(
472
472
  f'{c.TIME}:T',
473
473
  title='Time period',
474
474
  axis=alt.Axis(
475
- format='%Y %b',
475
+ format=c.QUARTER_FORMAT,
476
476
  grid=False,
477
477
  tickCount=8,
478
478
  domainColor=c.GREY_300,
@@ -1657,18 +1657,36 @@ class MediaSummary:
1657
1657
  self._marginal_roi_by_reach = marginal_roi_by_reach
1658
1658
  self._non_media_baseline_values = non_media_baseline_values
1659
1659
 
1660
- def plot_channel_contribution_area_chart(self) -> alt.Chart:
1660
+ def plot_channel_contribution_area_chart(
1661
+ self, time_granularity: str = c.QUARTERLY
1662
+ ) -> alt.Chart:
1661
1663
  """Plots a stacked area chart of the contribution share per channel by time.
1662
1664
 
1665
+ Args:
1666
+ time_granularity: The granularity for the time axis. Options are `weekly`
1667
+ or `quarterly`. Defaults to `quarterly`.
1668
+
1663
1669
  Returns:
1664
1670
  An Altair plot showing the contribution share per channel by time.
1671
+
1672
+ Raises:
1673
+ ValueError: If time_granularity is not one of the allowed constants.
1665
1674
  """
1675
+ if time_granularity not in c.TIME_GRANULARITIES:
1676
+ raise ValueError(
1677
+ f'time_granularity must be one of {c.TIME_GRANULARITIES}'
1678
+ )
1679
+
1680
+ x_axis_format = (
1681
+ c.DATE_FORMAT if time_granularity == c.WEEKLY else c.QUARTER_FORMAT
1682
+ )
1683
+
1666
1684
  outcome_df = self._transform_contribution_metrics(
1667
1685
  include_non_paid=True, aggregate_times=False
1668
1686
  )
1669
1687
 
1670
1688
  # Ensure proper ordering for the stacked area chart. Baseline should be at
1671
- # the bottom. Separate the *stacking* order from the *legend* order.
1689
+ # the bottom. Separate the *stacking* order from the *legend* order.
1672
1690
  stack_order = sorted([
1673
1691
  channel
1674
1692
  for channel in outcome_df[c.CHANNEL].unique()
@@ -1691,7 +1709,7 @@ class MediaSummary:
1691
1709
  )
1692
1710
 
1693
1711
  plot = (
1694
- alt.Chart(outcome_df, width=c.VEGALITE_FACET_LARGE_WIDTH)
1712
+ alt.Chart(outcome_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
1695
1713
  .mark_area()
1696
1714
  .transform_calculate(
1697
1715
  sort_channel=f'indexof({stack_order}, datum.channel)'
@@ -1701,7 +1719,7 @@ class MediaSummary:
1701
1719
  f'{c.TIME}:T',
1702
1720
  title='Time period',
1703
1721
  axis=alt.Axis(
1704
- format='%Y Q%q',
1722
+ format=x_axis_format,
1705
1723
  grid=False,
1706
1724
  tickCount=8,
1707
1725
  domainColor=c.GREY_300,
@@ -1730,12 +1748,13 @@ class MediaSummary:
1730
1748
  labelFontSize=c.AXIS_FONT_SIZE,
1731
1749
  labelFont=c.FONT_ROBOTO,
1732
1750
  title=None,
1751
+ orient='bottom',
1733
1752
  ),
1734
1753
  scale=alt.Scale(domain=legend_order),
1735
1754
  sort=legend_order,
1736
1755
  ),
1737
1756
  tooltip=[
1738
- alt.Tooltip(f'{c.TIME}:T', format='%Y-%m-%d'),
1757
+ alt.Tooltip(f'{c.TIME}:T', format=c.DATE_FORMAT),
1739
1758
  c.CHANNEL,
1740
1759
  alt.Tooltip(f'{c.INCREMENTAL_OUTCOME}:Q', format=',.2f'),
1741
1760
  ],
@@ -1751,16 +1770,31 @@ class MediaSummary:
1751
1770
  )
1752
1771
  return plot
1753
1772
 
1754
- def plot_channel_contribution_bump_chart(self) -> alt.Chart:
1755
- """Plots a bump chart of channel contribution rank over time (Quarterly).
1773
+ def plot_channel_contribution_bump_chart(
1774
+ self, time_granularity: str = c.QUARTERLY
1775
+ ) -> alt.Chart:
1776
+ """Plots a bump chart of channel contribution rank over time.
1756
1777
 
1757
1778
  This chart shows the relative rank of each channel's contribution,
1758
- including the baseline, based on incremental outcome at the end of each
1779
+ including the baseline, based on incremental outcome. Depending on the
1780
+ time_granularity, ranks are shown either weekly or at the end of each
1759
1781
  quarter. Rank 1 represents the highest contribution.
1760
1782
 
1783
+ Args:
1784
+ time_granularity: The granularity for the time axis. Options are `weekly`
1785
+ or `quarterly`. Defaults to `quarterly`.
1786
+
1761
1787
  Returns:
1762
- An Altair plot showing the contribution rank per channel by quarter.
1788
+ An Altair plot showing the contribution rank per channel by time.
1789
+
1790
+ Raises:
1791
+ ValueError: If time_granularity is not one of the allowed constants.
1763
1792
  """
1793
+ if time_granularity not in c.TIME_GRANULARITIES:
1794
+ raise ValueError(
1795
+ f'time_granularity must be one of {c.TIME_GRANULARITIES}'
1796
+ )
1797
+
1764
1798
  outcome_df = self._transform_contribution_metrics(
1765
1799
  include_non_paid=True, aggregate_times=False
1766
1800
  )
@@ -1770,30 +1804,37 @@ class MediaSummary:
1770
1804
  method='first', ascending=False
1771
1805
  )
1772
1806
 
1773
- # Filter data to keep only the last available date within each quarter
1774
- # for a quarterly view of ranking changes.
1775
- unique_times = pd.Series(outcome_df[c.TIME].unique()).sort_values()
1776
- quarters = unique_times.dt.to_period('Q')
1777
- quarterly_dates = unique_times[~quarters.duplicated(keep='last')]
1778
- quarterly_rank_df = outcome_df[
1779
- outcome_df[c.TIME].isin(quarterly_dates)
1780
- ].copy()
1807
+ if time_granularity == c.QUARTERLY:
1808
+ # Filter data to keep only the last available date within each quarter
1809
+ # for a quarterly view of ranking changes.
1810
+ unique_times = pd.Series(outcome_df[c.TIME].unique()).sort_values()
1811
+ quarters = unique_times.dt.to_period('Q')
1812
+ quarterly_dates = unique_times[~quarters.duplicated(keep='last')]
1813
+ plot_df = outcome_df[outcome_df[c.TIME].isin(quarterly_dates)].copy()
1814
+ x_axis_format = c.QUARTER_FORMAT
1815
+ tooltip_time_format = c.QUARTER_FORMAT
1816
+ tooltip_time_title = 'Quarter'
1817
+ else:
1818
+ plot_df = outcome_df.copy()
1819
+ x_axis_format = c.DATE_FORMAT
1820
+ tooltip_time_format = c.DATE_FORMAT
1821
+ tooltip_time_title = 'Week'
1781
1822
 
1782
1823
  legend_order = [c.BASELINE] + sorted([
1783
1824
  channel
1784
- for channel in quarterly_rank_df[c.CHANNEL].unique()
1825
+ for channel in plot_df[c.CHANNEL].unique()
1785
1826
  if channel != c.BASELINE
1786
1827
  ])
1787
1828
 
1788
1829
  plot = (
1789
- alt.Chart(quarterly_rank_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
1830
+ alt.Chart(plot_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
1790
1831
  .mark_line(point=True)
1791
1832
  .encode(
1792
1833
  x=alt.X(
1793
1834
  f'{c.TIME}:T',
1794
1835
  title='Time period',
1795
1836
  axis=alt.Axis(
1796
- format='%Y Q%q',
1837
+ format=x_axis_format,
1797
1838
  grid=False,
1798
1839
  domainColor=c.GREY_300,
1799
1840
  ),
@@ -1819,12 +1860,17 @@ class MediaSummary:
1819
1860
  labelFontSize=c.AXIS_FONT_SIZE,
1820
1861
  labelFont=c.FONT_ROBOTO,
1821
1862
  title=None,
1863
+ orient='bottom',
1822
1864
  ),
1823
1865
  scale=alt.Scale(domain=legend_order),
1824
1866
  sort=legend_order,
1825
1867
  ),
1826
1868
  tooltip=[
1827
- alt.Tooltip(f'{c.TIME}:T', format='%Y Q%q', title='Quarter'),
1869
+ alt.Tooltip(
1870
+ f'{c.TIME}:T',
1871
+ format=tooltip_time_format,
1872
+ title=tooltip_time_title,
1873
+ ),
1828
1874
  alt.Tooltip(f'{c.CHANNEL}:N', title='Channel'),
1829
1875
  alt.Tooltip('rank:O', title='Rank'),
1830
1876
  alt.Tooltip(
meridian/constants.py CHANGED
@@ -51,6 +51,8 @@ GREY_300 = '#DADCE0'
51
51
 
52
52
  # Example: "2024-01-09"
53
53
  DATE_FORMAT = '%Y-%m-%d'
54
+ # Example: "2024 Apr"
55
+ QUARTER_FORMAT = '%Y %b'
54
56
 
55
57
  # Input data variables.
56
58
  KPI = 'kpi'
@@ -95,12 +97,8 @@ POSSIBLE_INPUT_DATA_ARRAY_NAMES = (
95
97
  + MEDIA_INPUT_DATA_ARRAY_NAMES
96
98
  + RF_INPUT_DATA_ARRAY_NAMES
97
99
  )
98
- PAID_DATA = (
99
- MEDIA,
100
- REACH,
101
- FREQUENCY,
102
- REVENUE_PER_KPI,
103
- )
100
+ PAID_CHANNELS = (MEDIA, REACH, FREQUENCY)
101
+ PAID_DATA = PAID_CHANNELS + (REVENUE_PER_KPI,)
104
102
  NON_PAID_DATA = (
105
103
  ORGANIC_MEDIA,
106
104
  ORGANIC_REACH,
@@ -112,11 +110,7 @@ SPEND_DATA = (
112
110
  RF_SPEND,
113
111
  )
114
112
  PERFORMANCE_DATA = PAID_DATA + SPEND_DATA
115
- IMPRESSIONS_DATA = (
116
- MEDIA,
117
- REACH,
118
- FREQUENCY,
119
- ) + NON_PAID_DATA
113
+ IMPRESSIONS_DATA = PAID_CHANNELS + NON_PAID_DATA
120
114
  RF_DATA = (
121
115
  REACH,
122
116
  FREQUENCY,
@@ -622,3 +616,10 @@ CARD_STATS = 'stats'
622
616
  # VegaLite common params.
623
617
  VEGALITE_FACET_DEFAULT_WIDTH = 400
624
618
  VEGALITE_FACET_LARGE_WIDTH = 500
619
+ VEGALITE_FACET_EXTRA_LARGE_WIDTH = 900
620
+
621
+ # Time Granularity Constants
622
+ WEEKLY = 'weekly'
623
+ QUARTERLY = 'quarterly'
624
+ TIME_GRANULARITIES = frozenset({WEEKLY, QUARTERLY})
625
+ QUARTERLY_SUMMARY_THRESHOLD_WEEKS = 52
meridian/model/model.py CHANGED
@@ -149,6 +149,7 @@ class Meridian:
149
149
  self._validate_paid_media_prior_type()
150
150
  self._validate_geo_invariants()
151
151
  self._validate_time_invariants()
152
+ self._validate_kpi_transformer()
152
153
 
153
154
  @property
154
155
  def input_data(self) -> data.InputData:
@@ -410,6 +411,7 @@ class Meridian:
410
411
  set_total_media_contribution_prior=set_total_media_contribution_prior,
411
412
  kpi=np.sum(self.input_data.kpi.values),
412
413
  total_spend=agg_total_spend,
414
+ media_effects_dist=self.media_effects_dist,
413
415
  )
414
416
 
415
417
  @functools.cached_property
@@ -825,6 +827,19 @@ class Meridian:
825
827
  " the listed variables that do not vary across time."
826
828
  )
827
829
 
830
+ def _validate_kpi_transformer(self):
831
+ """Validates the KPI transformer."""
832
+ if (
833
+ self.kpi_transformer.population_scaled_stdev == 0
834
+ and self.model_spec.paid_media_prior_type
835
+ in constants.PAID_MEDIA_ROI_PRIOR_TYPES
836
+ ):
837
+ kpi = "kpi" if self.is_national else "population_scaled_kpi"
838
+ raise ValueError(
839
+ f"`{kpi}` cannot be constant with"
840
+ f" {self.model_spec.paid_media_prior_type} prior type."
841
+ )
842
+
828
843
  def adstock_hill_media(
829
844
  self,
830
845
  media: tf.Tensor, # pylint: disable=redefined-outer-name
@@ -455,6 +455,7 @@ class PriorDistribution:
455
455
  set_total_media_contribution_prior: bool,
456
456
  kpi: float,
457
457
  total_spend: np.ndarray,
458
+ media_effects_dist: str,
458
459
  ) -> PriorDistribution:
459
460
  """Returns a new `PriorDistribution` with broadcast distribution attributes.
460
461
 
@@ -480,6 +481,8 @@ class PriorDistribution:
480
481
  `set_total_media_contribution_prior=True`.
481
482
  total_spend: Spend per media channel summed across geos and time. Required
482
483
  if `set_total_media_contribution_prior=True`.
484
+ media_effects_dist: A string to specify the distribution of media random
485
+ effects across geos.
483
486
 
484
487
  Returns:
485
488
  A new `PriorDistribution` broadcast from this prior distribution,
@@ -757,6 +760,7 @@ class PriorDistribution:
757
760
  )
758
761
  else:
759
762
  roi_m_converted = self.roi_m
763
+ _check_for_negative_effect(roi_m_converted, media_effects_dist)
760
764
  roi_m = tfp.distributions.BatchBroadcast(
761
765
  roi_m_converted, n_media_channels, name=constants.ROI_M
762
766
  )
@@ -777,13 +781,15 @@ class PriorDistribution:
777
781
  )
778
782
  else:
779
783
  roi_rf_converted = self.roi_rf
784
+ _check_for_negative_effect(roi_rf_converted, media_effects_dist)
780
785
  roi_rf = tfp.distributions.BatchBroadcast(
781
786
  roi_rf_converted, n_rf_channels, name=constants.ROI_RF
782
787
  )
783
-
788
+ _check_for_negative_effect(self.mroi_m, media_effects_dist)
784
789
  mroi_m = tfp.distributions.BatchBroadcast(
785
790
  self.mroi_m, n_media_channels, name=constants.MROI_M
786
791
  )
792
+ _check_for_negative_effect(self.mroi_rf, media_effects_dist)
787
793
  mroi_rf = tfp.distributions.BatchBroadcast(
788
794
  self.mroi_rf, n_rf_channels, name=constants.MROI_RF
789
795
  )
@@ -885,6 +891,21 @@ def _get_total_media_contribution_prior(
885
891
  return tfp.distributions.LogNormal(lognormal_mu, lognormal_sigma, name=name)
886
892
 
887
893
 
894
+ def _check_for_negative_effect(
895
+ dist: tfp.distributions.Distribution, media_effects_dist: str
896
+ ):
897
+ """Checks for negative effect in the model."""
898
+ if (
899
+ media_effects_dist == constants.MEDIA_EFFECTS_LOG_NORMAL
900
+ and np.any(dist.cdf(0)) > 0
901
+ ):
902
+ raise ValueError(
903
+ 'Media priors must have non-negative support when'
904
+ f' `media_effects_dist`="{media_effects_dist}". Found negative effect'
905
+ f' in {dist.name}.'
906
+ )
907
+
908
+
888
909
  def distributions_are_equal(
889
910
  a: tfp.distributions.Distribution, b: tfp.distributions.Distribution
890
911
  ) -> bool: