PyPI - google-meridian - Versions diffs - 1.1.5__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

google-meridian 1.1.5py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/METADATA +8 -2
google_meridian-1.2.0.dist-info/RECORD +52 -0
meridian/__init__.py +1 -0
meridian/analysis/analyzer.py +526 -362
meridian/analysis/optimizer.py +275 -267
meridian/analysis/test_utils.py +96 -94
meridian/analysis/visualizer.py +37 -49
meridian/backend/__init__.py +514 -0
meridian/backend/config.py +59 -0
meridian/backend/test_utils.py +95 -0
meridian/constants.py +59 -3
meridian/data/input_data.py +94 -0
meridian/data/test_utils.py +144 -12
meridian/model/adstock_hill.py +279 -33
meridian/model/eda/__init__.py +17 -0
meridian/model/eda/eda_engine.py +306 -0
meridian/model/knots.py +525 -2
meridian/model/media.py +62 -54
meridian/model/model.py +224 -97
meridian/model/model_test_data.py +323 -157
meridian/model/posterior_sampler.py +84 -77
meridian/model/prior_distribution.py +538 -168
meridian/model/prior_sampler.py +65 -65
meridian/model/spec.py +23 -3
meridian/model/transformers.py +53 -47
meridian/version.py +1 -1
google_meridian-1.1.5.dist-info/RECORD +0 -47
{google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/WHEEL +0 -0
{google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.1.5.dist-info → google_meridian-1.2.0.dist-info}/top_level.txt +0 -0

meridian/constants.py CHANGED Viewed

@@ -66,6 +66,7 @@ FREQUENCY = 'frequency'
 RF_IMPRESSIONS = 'rf_impressions'
 RF_SPEND = 'rf_spend'
 ORGANIC_MEDIA = 'organic_media'
+ORGANIC_RF = 'organic_rf'
 ORGANIC_REACH = 'organic_reach'
 ORGANIC_FREQUENCY = 'organic_frequency'
 NON_MEDIA_TREATMENTS = 'non_media_treatments'
@@ -143,6 +144,7 @@ MEDIA_CHANNEL = 'media_channel'
 RF_CHANNEL = 'rf_channel'
 CHANNEL = 'channel'
 RF = 'rf'
+ORGANIC_RF = 'organic_rf'
 ORGANIC_MEDIA_CHANNEL = 'organic_media_channel'
 ORGANIC_RF_CHANNEL = 'organic_rf_channel'
 NON_MEDIA_CHANNEL = 'non_media_channel'
@@ -212,9 +214,11 @@ NON_PAID_TREATMENT_PRIOR_TYPES = frozenset({
     TREATMENT_PRIOR_TYPE_COEFFICIENT,
     TREATMENT_PRIOR_TYPE_CONTRIBUTION,
 })
-PAID_MEDIA_ROI_PRIOR_TYPES = frozenset(
-    {TREATMENT_PRIOR_TYPE_ROI, TREATMENT_PRIOR_TYPE_MROI}
-)
+PAID_MEDIA_ROI_PRIOR_TYPES = frozenset({
+    TREATMENT_PRIOR_TYPE_ROI,
+    TREATMENT_PRIOR_TYPE_MROI,
+    TREATMENT_PRIOR_TYPE_CONTRIBUTION,
+})
 # Represents a 1% increase in spend.
 MROI_FACTOR = 1.01
@@ -315,6 +319,41 @@ RF_PARAMETER_NAMES = (
     BETA_RF,
     BETA_GRF,
 )
+ORGANIC_MEDIA_PARAMETER_NAMES = (
+    CONTRIBUTION_OM,
+    BETA_OM,
+    ETA_OM,
+    ALPHA_OM,
+    EC_OM,
+    SLOPE_OM,
+    BETA_GOM,
+)
+ORGANIC_RF_PARAMETER_NAMES = (
+    CONTRIBUTION_ORF,
+    BETA_ORF,
+    ETA_ORF,
+    ALPHA_ORF,
+    EC_ORF,
+    SLOPE_ORF,
+    BETA_GORF,
+)
+NON_MEDIA_PARAMETER_NAMES = (
+    CONTRIBUTION_N,
+    GAMMA_N,
+    XI_N,
+    GAMMA_GN,
+)
+ALL_NATIONAL_DETERMINISTIC_PARAMETER_NAMES = (
+    SLOPE_M,
+    SLOPE_OM,
+    XI_N,
+    XI_C,
+    ETA_M,
+    ETA_RF,
+    ETA_OM,
+    ETA_ORF,
+)
 MEDIA_PARAMETERS = (
     ROI_M,
@@ -501,10 +540,17 @@ ADSTOCK_HILL_FUNCTIONS = frozenset({
     'hill',
 })
+# Adstock decay functions.
+GEOMETRIC_DECAY = 'geometric'
+BINOMIAL_DECAY = 'binomial'
+ADSTOCK_DECAY_FUNCTIONS = frozenset({GEOMETRIC_DECAY, BINOMIAL_DECAY})
+ADSTOCK_CHANNELS = (MEDIA, RF, ORGANIC_MEDIA, ORGANIC_RF)
 # Distribution constants.
 DISTRIBUTION = 'distribution'
 DISTRIBUTION_TYPE = 'distribution_type'
+INDEPENDENT_MULTIVARIATE = 'IndependentMultivariate'
 PRIOR = 'prior'
 POSTERIOR = 'posterior'
 # Prior mean proportion of KPI incremental due to all media.
@@ -710,3 +756,13 @@ WEEKLY = 'weekly'
 QUARTERLY = 'quarterly'
 TIME_GRANULARITIES = frozenset({WEEKLY, QUARTERLY})
 QUARTERLY_SUMMARY_THRESHOLD_WEEKS = 52
+# Automatic Knot Selection constants
+KNOTS_SELECTED = 'knots_selected'
+SELECTION_COEFS = 'selection_coefs'
+MODEL = 'model'
+REGRESSION_COEFS = 'regression_coefs'
+SELECTED_MATRIX = 'selected_matrix'
+AIC = 'aic'
+BIC = 'bic'
+EBIC = 'ebic'

meridian/data/input_data.py CHANGED Viewed

@@ -442,6 +442,54 @@ class InputData:
     """Checks whether the `rf_spend` array has a time dimension."""
     return self.rf_spend is not None and constants.TIME in self.rf_spend.coords
+  @property
+  def scaled_centered_kpi(self) -> np.ndarray:
+    """Calculates scaled and centered KPI values.
+    Returns:
+      An array of KPI values that have been population-scaled and
+    mean-centered by geo.
+    """
+    kpi = self.kpi.values
+    population = self.population.values[:, np.newaxis]
+    population_scaled_kpi = np.divide(
+        kpi, population, out=np.zeros_like(kpi), where=(population != 0)
+    )
+    population_scaled_mean = np.mean(population_scaled_kpi)
+    population_scaled_stdev = np.std(population_scaled_kpi)
+    kpi_scaled = np.divide(
+        population_scaled_kpi - population_scaled_mean,
+        population_scaled_stdev,
+        out=np.zeros_like(population_scaled_kpi - population_scaled_mean),
+        where=(population_scaled_stdev != 0),
+    )
+    return kpi_scaled - np.mean(kpi_scaled, axis=1, keepdims=True)
+  def copy(self, deep: bool = True) -> "InputData":
+    """Returns a copy of the InputData instance.
+    Args:
+      deep: If True, a deep copy is made, meaning all xarray.DataArray objects
+        are also deepcopied. If False, a shallow copy is made.
+    Returns:
+      A new InputData instance.
+    """
+    if not deep:
+      return dataclasses.replace(self)
+    copied_fields = {}
+    for field in dataclasses.fields(self):
+      value = getattr(self, field.name)
+      if isinstance(value, xr.DataArray):
+        copied_fields[field.name] = value.copy(deep=True)
+      else:
+        # For other types, dataclasses.replace does a shallow copy.
+        copied_fields[field.name] = value
+    return InputData(**copied_fields)
   def _validate_scenarios(self):
     """Verifies that calibration and analysis is set correctly."""
     n_geos = len(self.kpi.coords[constants.GEO])
@@ -848,6 +896,32 @@ class InputData:
       raise ValueError("Both RF and media channel values are missing.")
     # pytype: enable=attribute-error
+  def get_all_adstock_hill_channels(self) -> np.ndarray:
+    """Returns all channel dimensions that adstock hill is applied to.
+    RF, organic media and organic RF channels are concatenated to the end of the
+    media channels if they are present.
+    """
+    adstock_hill_channels = []
+    if self.media_channel is not None:
+      adstock_hill_channels.append(self.media_channel.values)
+    if self.rf_channel is not None:
+      adstock_hill_channels.append(self.rf_channel.values)
+    if self.organic_media_channel is not None:
+      adstock_hill_channels.append(self.organic_media_channel.values)
+    if self.organic_rf_channel is not None:
+      adstock_hill_channels.append(self.organic_rf_channel.values)
+    if not adstock_hill_channels:
+      raise ValueError("Media, RF, organic media and organic RF channels are "
+                       "all missing.")
+    return np.concatenate(adstock_hill_channels, axis=None)
   def get_paid_channels_argument_builder(
       self,
   ) -> arg_builder.OrderedListArgumentBuilder:
@@ -870,6 +944,26 @@ class InputData:
       raise ValueError("There are no RF channels in the input data.")
     return arg_builder.OrderedListArgumentBuilder(self.rf_channel.values)
+  def get_organic_media_channels_argument_builder(
+      self
+  ) -> arg_builder.OrderedListArgumentBuilder:
+    """Returns an argument builder for *organic* media channels *only*."""
+    if self.organic_media_channel is None:
+      raise ValueError("There are no organic media channels in the input data.")
+    return arg_builder.OrderedListArgumentBuilder(
+        self.organic_media_channel.values
+        )
+  def get_organic_rf_channels_argument_builder(
+      self
+  ) -> arg_builder.OrderedListArgumentBuilder:
+    """Returns an argument builder for *organic* RF channels *only*."""
+    if self.organic_rf_channel is None:
+      raise ValueError("There are no organic RF channels in the input data.")
+    return arg_builder.OrderedListArgumentBuilder(
+        self.organic_rf_channel.values
+        )
   def get_all_channels(self) -> np.ndarray:
     """Returns all the channel dimensions.

meridian/data/test_utils.py CHANGED Viewed

@@ -21,6 +21,7 @@ import immutabledict
 from meridian import constants as c
 from meridian.data import input_data
 from meridian.data import load
+from meridian.model import knots
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -584,6 +585,47 @@ NATIONAL_COORD_TO_COLUMNS_WO_POPULATION_W_GEO = dataclasses.replace(
     geo='geo',
 )
+ADSTOCK_DECAY_SPEC_CASES = immutabledict.immutabledict({
+    'media': (
+        {},
+        {
+            'ch_0': c.BINOMIAL_DECAY,
+            'ch_1': c.GEOMETRIC_DECAY,
+            'ch_2': c.GEOMETRIC_DECAY,
+        },
+    ),
+    'rf': (
+        {},
+        {
+            'rf_ch_0': c.BINOMIAL_DECAY,
+            'rf_ch_1': c.GEOMETRIC_DECAY,
+            'rf_ch_2': c.GEOMETRIC_DECAY,
+            'rf_ch_3': c.BINOMIAL_DECAY,
+        },
+    ),
+    'organic_media': (
+        {},
+        {
+            'organic_media_0': c.BINOMIAL_DECAY,
+            'organic_media_1': c.GEOMETRIC_DECAY,
+            'organic_media_2': c.GEOMETRIC_DECAY,
+            'organic_media_3': c.BINOMIAL_DECAY,
+            'organic_media_4': c.GEOMETRIC_DECAY,
+        },
+    ),
+    'organic_rf': (
+        {},
+        {
+            'organic_rf_ch_0': c.BINOMIAL_DECAY,
+            'organic_rf_ch_1': c.GEOMETRIC_DECAY,
+            'organic_rf_ch_2': c.GEOMETRIC_DECAY,
+            'organic_rf_ch_3': c.BINOMIAL_DECAY,
+            'organic_rf_ch_4': c.BINOMIAL_DECAY,
+            'organic_rf_ch_5': c.GEOMETRIC_DECAY,
+        },
+    ),
+})
 def random_media_da(
     n_geos: int,
@@ -595,6 +637,7 @@ def random_media_da(
     explicit_geo_names: Sequence[str] | None = None,
     explicit_time_index: Sequence[str] | None = None,
     explicit_media_channel_names: Sequence[str] | None = None,
+    media_value_scales: list[tuple[float, float]] | None = None,
     array_name: str = 'media',
     channel_variable_name: str = 'media_channel',
     channel_prefix: str = 'ch_',
@@ -613,6 +656,8 @@ def random_media_da(
     explicit_time_index: If given, ignore `date_format` and use this as is
     explicit_media_channel_names: If given, ignore `n_media_channels` and use
       this as is
+    media_value_scales: A list of (mean, std) tuples, one for each media
+      channel, to control the scale of the generated random values.
     array_name: The name of the array to be created
     channel_variable_name: The name of the channel variable
     channel_prefix: The prefix of the channel names
@@ -628,11 +673,28 @@ def random_media_da(
   if n_times < n_media_times:
     start_date -= datetime.timedelta(weeks=(n_media_times - n_times))
-  media = np.round(
-      abs(
-          np.random.normal(5, 5, size=(n_geos, n_media_times, n_media_channels))
+  if media_value_scales:
+    if len(media_value_scales) != n_media_channels:
+      raise ValueError(
+          'Length of media_value_scales must match n_media_channels.'
       )
-  )
+    channel_data = []
+    for mean, std in media_value_scales:
+      channel_data.append(
+          np.round(
+              abs(np.random.normal(mean, std, size=(n_geos, n_media_times)))
+          )
+      )
+    media = np.stack(channel_data, axis=-1)
+  else:
+    media = np.round(
+        abs(
+            np.random.normal(
+                5, 5, size=(n_geos, n_media_times, n_media_channels)
+            )
+        )
+    )
   if explicit_geo_names is None:
     geos = sample_geos(n_geos, integer_geos)
   else:
@@ -698,6 +760,7 @@ def random_media_spend_nd_da(
     n_media_channels: int | None = None,
     seed=0,
     integer_geos: bool = False,
+    explicit_media_channel_names: Sequence[str] | None = None,
 ) -> xr.DataArray:
   """Generates a sample N-dimensional `media_spend` DataArray.
@@ -716,6 +779,8 @@ def random_media_spend_nd_da(
     n_media_channels: Number of channels in the created `media_spend` array.
     seed: Random seed used by `np.random.seed()`.
     integer_geos: If True, the geos will be integers.
+    explicit_media_channel_names: If given, ignore `n_media_channels` and use
+      this as is.
   Returns:
     A DataArray containing the generated `media_spend` data with the given
@@ -733,9 +798,12 @@ def random_media_spend_nd_da(
     coords['time'] = _sample_times(n_times=n_times)
   if n_media_channels is not None:
     dims.append('media_channel')
-    coords['media_channel'] = _sample_names(
-        prefix='ch_', n_names=n_media_channels
-    )
+    if explicit_media_channel_names is not None:
+      coords['media_channel'] = explicit_media_channel_names
+    else:
+      coords['media_channel'] = _sample_names(
+          prefix='ch_', n_names=n_media_channels
+      )
   if dims == ['geo', 'time', 'media_channel']:
     shape = (n_geos, n_times, n_media_channels)
@@ -822,6 +890,7 @@ def random_kpi_da(
     controls: xr.DataArray | None = None,
     seed: int = 0,
     integer_geos: bool = False,
+    kpi_data_pattern: str = '',
 ) -> xr.DataArray:
   """Generates a sample `kpi` DataArray."""
@@ -857,6 +926,22 @@ def random_kpi_da(
   error = np.random.normal(0, 2, size=(n_geos, n_times))
   kpi = abs(media_portion + control_portion + error)
+  if kpi_data_pattern == 'flat':
+    first_col = kpi[:, 0]  # all rows will have value same as first col
+    kpi = (
+        first_col[:, np.newaxis]
+        + np.random.normal(scale=0.02, size=kpi.shape)
+        + 0.04
+    )
+  elif kpi_data_pattern == 'seasonal':
+    for row in kpi:
+      row.sort()
+    kpi = np.sin(kpi) + 5
+  elif kpi_data_pattern == 'peak':
+    peak_index = int(len(kpi[0]) / 2)
+    kpi[:] = kpi[0, 0]
+    for row in kpi:
+      row[peak_index] *= 3
   return xr.DataArray(
       kpi,
@@ -891,14 +976,18 @@ def constant_revenue_per_kpi(
 def random_population(
-    n_geos: int, seed: int = 0, integer_geos: bool = False
+    n_geos: int,
+    seed: int = 0,
+    integer_geos: bool = False,
+    constant_value: float | None = None,
 ) -> xr.DataArray:
   """Generates a sample `population` DataArray."""
   np.random.seed(seed)
-  population = np.round(10 + abs(np.random.normal(3000, 100, size=n_geos)))
+  if constant_value is not None:
+    population = np.full(n_geos, constant_value)
+  else:
+    population = np.round(10 + abs(np.random.normal(3000, 100, size=n_geos)))
   return xr.DataArray(
       population,
       dims=['geo'],
@@ -1170,11 +1259,15 @@ def random_dataset(
     n_organic_media_channels: int | None = None,
     n_organic_rf_channels: int | None = None,
     n_media_channels: int | None = None,
+    explicit_media_channel_names: Sequence[str] | None = None,
+    media_value_scales: list[tuple[float, float]] | None = None,
     n_rf_channels: int | None = None,
     revenue_per_kpi_value: float | None = 3.14,
+    constant_population_value: float | None = None,
     seed: int = 0,
     remove_media_time: bool = False,
     integer_geos: bool = False,
+    kpi_data_pattern: str = '',
 ) -> xr.Dataset:
   """Generates a random dataset."""
   if n_media_channels:
@@ -1185,11 +1278,14 @@ def random_dataset(
         n_media_channels=n_media_channels,
         seed=seed,
         integer_geos=integer_geos,
+        explicit_media_channel_names=explicit_media_channel_names,
+        media_value_scales=media_value_scales,
     )
     media_spend = random_media_spend_nd_da(
         n_geos=n_geos,
         n_times=n_times,
         n_media_channels=n_media_channels,
+        explicit_media_channel_names=explicit_media_channel_names,
         seed=seed,
         integer_geos=integer_geos,
     )
@@ -1301,9 +1397,13 @@ def random_dataset(
       n_media_channels=n_media_channels or n_rf_channels or 0,
       n_controls=n_controls,
       integer_geos=integer_geos,
+      kpi_data_pattern=kpi_data_pattern,
   )
   population = random_population(
-      n_geos=n_geos, seed=seed, integer_geos=integer_geos
+      n_geos=n_geos,
+      seed=seed,
+      integer_geos=integer_geos,
+      constant_value=constant_population_value,
   )
   dataset = xr.combine_by_coords(
@@ -1644,6 +1744,7 @@ def sample_input_data_revenue(
     n_organic_media_channels: int | None = None,
     n_organic_rf_channels: int | None = None,
     seed: int = 0,
+    explicit_media_channel_names: Sequence[str] | None = None,
 ) -> input_data.InputData:
   """Generates sample InputData for `kpi_type='revenue'`."""
   dataset = random_dataset(
@@ -1658,6 +1759,7 @@ def sample_input_data_revenue(
       n_organic_rf_channels=n_organic_rf_channels,
       revenue_per_kpi_value=1.0,
       seed=seed,
+      explicit_media_channel_names=explicit_media_channel_names,
   )
   return input_data.InputData(
       kpi=dataset.kpi,
@@ -1773,3 +1875,33 @@ def sample_input_data_non_revenue_no_revenue_per_kpi(
       if n_organic_rf_channels
       else None,
   )
+def sample_input_data_for_aks_with_expected_knot_info() -> (
+    tuple[input_data.InputData, knots.KnotInfo]
+):
+  """Generates sample InputData and corresponding expected KnotInfo for testing.
+  Returns:
+    A tuple containing:
+      - InputData object with sample data.
+      - KnotInfo object with expected knot information.
+  """
+  data = sample_input_data_from_dataset(
+      random_dataset(
+          n_geos=20,
+          n_times=117,
+          n_media_times=117,
+          n_controls=2,
+          n_media_channels=5,
+      ),
+      'non_revenue',
+  )
+  expected_knot_info = knots.KnotInfo(
+      n_knots=6,
+      knot_locations=np.array([38, 39, 41, 48, 50, 55]),
+      weights=knots.l1_distance_weights(
+          117, np.array([38, 39, 41, 48, 50, 55])
+      ),
+  )
+  return data, expected_knot_info

google-meridian 1.1.5__py3-none-any.whl → 1.2.0__py3-none-any.whl

google-meridian 1.1.5py3-none-any.whl → 1.2.0py3-none-any.whl