google-meridian 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
meridian/constants.py CHANGED
@@ -51,6 +51,8 @@ GREY_300 = '#DADCE0'
51
51
 
52
52
  # Example: "2024-01-09"
53
53
  DATE_FORMAT = '%Y-%m-%d'
54
+ # Example: "2024 Apr"
55
+ QUARTER_FORMAT = '%Y %b'
54
56
 
55
57
  # Input data variables.
56
58
  KPI = 'kpi'
@@ -95,6 +97,27 @@ POSSIBLE_INPUT_DATA_ARRAY_NAMES = (
95
97
  + MEDIA_INPUT_DATA_ARRAY_NAMES
96
98
  + RF_INPUT_DATA_ARRAY_NAMES
97
99
  )
100
+ PAID_CHANNELS = (MEDIA, REACH, FREQUENCY)
101
+ PAID_DATA = PAID_CHANNELS + (REVENUE_PER_KPI,)
102
+ NON_PAID_DATA = (
103
+ ORGANIC_MEDIA,
104
+ ORGANIC_REACH,
105
+ ORGANIC_FREQUENCY,
106
+ NON_MEDIA_TREATMENTS,
107
+ )
108
+ SPEND_DATA = (
109
+ MEDIA_SPEND,
110
+ RF_SPEND,
111
+ )
112
+ PERFORMANCE_DATA = PAID_DATA + SPEND_DATA
113
+ IMPRESSIONS_DATA = PAID_CHANNELS + NON_PAID_DATA
114
+ RF_DATA = (
115
+ REACH,
116
+ FREQUENCY,
117
+ RF_SPEND,
118
+ REVENUE_PER_KPI,
119
+ )
120
+ NON_REVENUE_DATA = IMPRESSIONS_DATA + (CONTROLS,)
98
121
 
99
122
  # Scaled input data variables.
100
123
  MEDIA_SCALED = 'media_scaled'
@@ -543,6 +566,7 @@ TARGET_ROI = 'target_roi'
543
566
  TARGET_MROI = 'target_mroi'
544
567
  SPEND_CONSTRAINT_DEFAULT_FIXED_BUDGET = 0.3
545
568
  SPEND_CONSTRAINT_DEFAULT_FLEXIBLE_BUDGET = 1.0
569
+ SPEND_CONSTRAINT_DEFAULT = 1.0
546
570
 
547
571
 
548
572
  # Plot constants.
@@ -591,3 +615,11 @@ CARD_STATS = 'stats'
591
615
 
592
616
  # VegaLite common params.
593
617
  VEGALITE_FACET_DEFAULT_WIDTH = 400
618
+ VEGALITE_FACET_LARGE_WIDTH = 500
619
+ VEGALITE_FACET_EXTRA_LARGE_WIDTH = 900
620
+
621
+ # Time Granularity Constants
622
+ WEEKLY = 'weekly'
623
+ QUARTERLY = 'quarterly'
624
+ TIME_GRANULARITIES = frozenset({WEEKLY, QUARTERLY})
625
+ QUARTERLY_SUMMARY_THRESHOLD_WEEKS = 52
@@ -401,6 +401,7 @@ class InputData:
401
401
  )
402
402
 
403
403
  def _validate_kpi(self):
404
+ """Validates the KPI data."""
404
405
  if (
405
406
  self.kpi_type != constants.REVENUE
406
407
  and self.kpi_type != constants.NON_REVENUE
@@ -413,6 +414,14 @@ class InputData:
413
414
  if (self.kpi.values < 0).any():
414
415
  raise ValueError("KPI values must be non-negative.")
415
416
 
417
+ if (
418
+ self.revenue_per_kpi is not None
419
+ and (self.revenue_per_kpi.values <= 0).all()
420
+ ):
421
+ raise ValueError(
422
+ "Revenue per KPI values must not be all zero or negative."
423
+ )
424
+
416
425
  def _validate_names(self):
417
426
  """Verifies that the names of the data arrays are correct."""
418
427
  arrays = [
@@ -534,15 +543,50 @@ class InputData:
534
543
  def _validate_media_channels(self):
535
544
  """Verifies Meridian media channel names invariants.
536
545
 
537
- In the input data, media channel names across `media_channel` and
538
- `rf_channel` must be unique.
546
+ In the input data, channel names across `media_channel`,
547
+ `rf_channel`, `organic_media_channel`, `organic_rf_channel`,
548
+ `non_media_channel` must be unique.
539
549
  """
540
550
  all_channels = self.get_all_channels()
541
551
  if len(np.unique(all_channels)) != all_channels.size:
542
- raise ValueError(
543
- "Media channel names across `media_channel` and `rf_channel` must be"
544
- " unique."
552
+ error_msg = (
553
+ "Channel names across `media_channel`, `rf_channel`,"
554
+ " `organic_media_channel`, `organic_rf_channel`, and"
555
+ " `non_media_channel` must be unique."
545
556
  )
557
+ # For each channel, store all occurrences of the channel in particular
558
+ # channel type.
559
+ from_channel_to_type = {}
560
+ for channel in all_channels:
561
+ if channel not in from_channel_to_type:
562
+ from_channel_to_type[channel] = []
563
+
564
+ # pytype: disable=attribute-error
565
+ if self.media_channel is not None:
566
+ for channel in self.media_channel.values:
567
+ from_channel_to_type[channel].append(constants.MEDIA_CHANNEL)
568
+ if self.rf_channel is not None:
569
+ for channel in self.rf_channel.values:
570
+ from_channel_to_type[channel].append(constants.RF_CHANNEL)
571
+ if self.organic_media_channel is not None:
572
+ for channel in self.organic_media_channel.values:
573
+ from_channel_to_type[channel].append(constants.ORGANIC_MEDIA_CHANNEL)
574
+ if self.organic_rf_channel is not None:
575
+ for channel in self.organic_rf_channel.values:
576
+ from_channel_to_type[channel].append(constants.ORGANIC_RF_CHANNEL)
577
+ if self.non_media_channel is not None:
578
+ for channel in self.non_media_channel.values:
579
+ from_channel_to_type[channel].append(constants.NON_MEDIA_CHANNEL)
580
+ # pytype: enable=attribute-error
581
+
582
+ for channel, types in from_channel_to_type.items():
583
+ if len(types) > 1:
584
+ error_msg += (
585
+ f" Channel `{channel}` is present in multiple channel types:"
586
+ f" {types}."
587
+ )
588
+
589
+ raise ValueError(error_msg)
546
590
 
547
591
  def _validate_times(self):
548
592
  """Validates time coordinate values."""
meridian/model/model.py CHANGED
@@ -149,6 +149,7 @@ class Meridian:
149
149
  self._validate_paid_media_prior_type()
150
150
  self._validate_geo_invariants()
151
151
  self._validate_time_invariants()
152
+ self._validate_kpi_transformer()
152
153
 
153
154
  @property
154
155
  def input_data(self) -> data.InputData:
@@ -410,6 +411,7 @@ class Meridian:
410
411
  set_total_media_contribution_prior=set_total_media_contribution_prior,
411
412
  kpi=np.sum(self.input_data.kpi.values),
412
413
  total_spend=agg_total_spend,
414
+ media_effects_dist=self.media_effects_dist,
413
415
  )
414
416
 
415
417
  @functools.cached_property
@@ -825,6 +827,19 @@ class Meridian:
825
827
  " the listed variables that do not vary across time."
826
828
  )
827
829
 
830
+ def _validate_kpi_transformer(self):
831
+ """Validates the KPI transformer."""
832
+ if (
833
+ self.kpi_transformer.population_scaled_stdev == 0
834
+ and self.model_spec.paid_media_prior_type
835
+ in constants.PAID_MEDIA_ROI_PRIOR_TYPES
836
+ ):
837
+ kpi = "kpi" if self.is_national else "population_scaled_kpi"
838
+ raise ValueError(
839
+ f"`{kpi}` cannot be constant with"
840
+ f" {self.model_spec.paid_media_prior_type} prior type."
841
+ )
842
+
828
843
  def adstock_hill_media(
829
844
  self,
830
845
  media: tf.Tensor, # pylint: disable=redefined-outer-name
@@ -1030,7 +1045,7 @@ class Meridian:
1030
1045
  max_energy_diff: float = 500.0,
1031
1046
  unrolled_leapfrog_steps: int = 1,
1032
1047
  parallel_iterations: int = 10,
1033
- seed: Sequence[int] | None = None,
1048
+ seed: Sequence[int] | int | None = None,
1034
1049
  **pins,
1035
1050
  ):
1036
1051
  """Runs Markov Chain Monte Carlo (MCMC) sampling of posterior distributions.
@@ -1080,9 +1095,10 @@ class Meridian:
1080
1095
  trajectory length implied by `max_tree_depth`. Defaults is `1`.
1081
1096
  parallel_iterations: Number of iterations allowed to run in parallel. Must
1082
1097
  be a positive integer. For more information, see `tf.while_loop`.
1083
- seed: Used to set the seed for reproducible results. For more information,
1084
- see [PRNGS and seeds]
1085
- (https://github.com/tensorflow/probability/blob/main/PRNGS.md).
1098
+ seed: An `int32[2]` Tensor or a Python list or tuple of 2 `int`s, which
1099
+ will be treated as stateless seeds; or a Python `int` or `None`, which
1100
+ will be treated as stateful seeds. See [tfp.random.sanitize_seed]
1101
+ (https://www.tensorflow.org/probability/api_docs/python/tfp/random/sanitize_seed).
1086
1102
  **pins: These are used to condition the provided joint distribution, and
1087
1103
  are passed directly to `joint_dist.experimental_pin(**pins)`.
1088
1104
 
@@ -393,7 +393,7 @@ class PosteriorMCMCSampler:
393
393
  max_energy_diff: float = 500.0,
394
394
  unrolled_leapfrog_steps: int = 1,
395
395
  parallel_iterations: int = 10,
396
- seed: Sequence[int] | None = None,
396
+ seed: Sequence[int] | int | None = None,
397
397
  **pins,
398
398
  ) -> az.InferenceData:
399
399
  """Runs Markov Chain Monte Carlo (MCMC) sampling of posterior distributions.
@@ -441,9 +441,10 @@ class PosteriorMCMCSampler:
441
441
  trajectory length implied by `max_tree_depth`. Defaults is `1`.
442
442
  parallel_iterations: Number of iterations allowed to run in parallel. Must
443
443
  be a positive integer. For more information, see `tf.while_loop`.
444
- seed: Used to set the seed for reproducible results. For more information,
445
- see [PRNGS and seeds]
446
- (https://github.com/tensorflow/probability/blob/main/PRNGS.md).
444
+ seed: An `int32[2]` Tensor or a Python list or tuple of 2 `int`s, which
445
+ will be treated as stateless seeds; or a Python `int` or `None`, which
446
+ will be treated as stateful seeds. See [tfp.random.sanitize_seed]
447
+ (https://www.tensorflow.org/probability/api_docs/python/tfp/random/sanitize_seed).
447
448
  **pins: These are used to condition the provided joint distribution, and
448
449
  are passed directly to `joint_dist.experimental_pin(**pins)`.
449
450
 
@@ -457,7 +458,14 @@ class PosteriorMCMCSampler:
457
458
  [ResourceExhaustedError when running Meridian.sample_posterior]
458
459
  (https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error).
459
460
  """
460
- seed = tfp.random.sanitize_seed(seed) if seed else None
461
+ if seed is not None and isinstance(seed, Sequence) and len(seed) != 2:
462
+ raise ValueError(
463
+ "Invalid seed: Must be either a single integer (stateful seed) or a"
464
+ " pair of two integers (stateless seed). See"
465
+ " [tfp.random.sanitize_seed](https://www.tensorflow.org/probability/api_docs/python/tfp/random/sanitize_seed)"
466
+ " for details."
467
+ )
468
+ seed = tfp.random.sanitize_seed(seed) if seed is not None else None
461
469
  n_chains_list = [n_chains] if isinstance(n_chains, int) else n_chains
462
470
  total_chains = np.sum(n_chains_list)
463
471
 
@@ -486,6 +494,8 @@ class PosteriorMCMCSampler:
486
494
  " integers as `n_chains` to sample chains serially (see"
487
495
  " https://developers.google.com/meridian/docs/advanced-modeling/model-debugging#gpu-oom-error)"
488
496
  ) from error
497
+ if seed is not None:
498
+ seed += 1
489
499
  states.append(mcmc.all_states._asdict())
490
500
  traces.append(mcmc.trace)
491
501
 
@@ -455,6 +455,7 @@ class PriorDistribution:
455
455
  set_total_media_contribution_prior: bool,
456
456
  kpi: float,
457
457
  total_spend: np.ndarray,
458
+ media_effects_dist: str,
458
459
  ) -> PriorDistribution:
459
460
  """Returns a new `PriorDistribution` with broadcast distribution attributes.
460
461
 
@@ -480,6 +481,8 @@ class PriorDistribution:
480
481
  `set_total_media_contribution_prior=True`.
481
482
  total_spend: Spend per media channel summed across geos and time. Required
482
483
  if `set_total_media_contribution_prior=True`.
484
+ media_effects_dist: A string to specify the distribution of media random
485
+ effects across geos.
483
486
 
484
487
  Returns:
485
488
  A new `PriorDistribution` broadcast from this prior distribution,
@@ -757,6 +760,7 @@ class PriorDistribution:
757
760
  )
758
761
  else:
759
762
  roi_m_converted = self.roi_m
763
+ _check_for_negative_effect(roi_m_converted, media_effects_dist)
760
764
  roi_m = tfp.distributions.BatchBroadcast(
761
765
  roi_m_converted, n_media_channels, name=constants.ROI_M
762
766
  )
@@ -777,13 +781,15 @@ class PriorDistribution:
777
781
  )
778
782
  else:
779
783
  roi_rf_converted = self.roi_rf
784
+ _check_for_negative_effect(roi_rf_converted, media_effects_dist)
780
785
  roi_rf = tfp.distributions.BatchBroadcast(
781
786
  roi_rf_converted, n_rf_channels, name=constants.ROI_RF
782
787
  )
783
-
788
+ _check_for_negative_effect(self.mroi_m, media_effects_dist)
784
789
  mroi_m = tfp.distributions.BatchBroadcast(
785
790
  self.mroi_m, n_media_channels, name=constants.MROI_M
786
791
  )
792
+ _check_for_negative_effect(self.mroi_rf, media_effects_dist)
787
793
  mroi_rf = tfp.distributions.BatchBroadcast(
788
794
  self.mroi_rf, n_rf_channels, name=constants.MROI_RF
789
795
  )
@@ -885,6 +891,21 @@ def _get_total_media_contribution_prior(
885
891
  return tfp.distributions.LogNormal(lognormal_mu, lognormal_sigma, name=name)
886
892
 
887
893
 
894
+ def _check_for_negative_effect(
895
+ dist: tfp.distributions.Distribution, media_effects_dist: str
896
+ ):
897
+ """Checks for negative effect in the model."""
898
+ if (
899
+ media_effects_dist == constants.MEDIA_EFFECTS_LOG_NORMAL
900
+ and np.any(dist.cdf(0)) > 0
901
+ ):
902
+ raise ValueError(
903
+ 'Media priors must have non-negative support when'
904
+ f' `media_effects_dist`="{media_effects_dist}". Found negative effect'
905
+ f' in {dist.name}.'
906
+ )
907
+
908
+
888
909
  def distributions_are_equal(
889
910
  a: tfp.distributions.Distribution, b: tfp.distributions.Distribution
890
911
  ) -> bool: