PyPI - google-meridian - Versions diffs - 1.1.6__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

google-meridian 1.1.6py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/METADATA +8 -2
google_meridian-1.2.1.dist-info/RECORD +52 -0
meridian/__init__.py +1 -0
meridian/analysis/analyzer.py +621 -393
meridian/analysis/optimizer.py +403 -351
meridian/analysis/summarizer.py +31 -16
meridian/analysis/test_utils.py +96 -94
meridian/analysis/visualizer.py +53 -54
meridian/backend/__init__.py +975 -0
meridian/backend/config.py +118 -0
meridian/backend/test_utils.py +181 -0
meridian/constants.py +71 -10
meridian/data/input_data.py +99 -0
meridian/data/test_utils.py +146 -12
meridian/mlflow/autolog.py +2 -2
meridian/model/adstock_hill.py +280 -33
meridian/model/eda/__init__.py +17 -0
meridian/model/eda/eda_engine.py +735 -0
meridian/model/knots.py +525 -2
meridian/model/media.py +62 -54
meridian/model/model.py +224 -97
meridian/model/model_test_data.py +331 -159
meridian/model/posterior_sampler.py +388 -383
meridian/model/prior_distribution.py +612 -177
meridian/model/prior_sampler.py +65 -65
meridian/model/spec.py +23 -3
meridian/model/transformers.py +55 -49
meridian/version.py +1 -1
google_meridian-1.1.6.dist-info/RECORD +0 -47
{google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/WHEEL +0 -0
{google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.1.6.dist-info → google_meridian-1.2.1.dist-info}/top_level.txt +0 -0

meridian/data/test_utils.py CHANGED Viewed

@@ -21,6 +21,7 @@ import immutabledict
 from meridian import constants as c
 from meridian.data import input_data
 from meridian.data import load
+from meridian.model import knots
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -584,6 +585,47 @@ NATIONAL_COORD_TO_COLUMNS_WO_POPULATION_W_GEO = dataclasses.replace(
     geo='geo',
 )
+ADSTOCK_DECAY_SPEC_CASES = immutabledict.immutabledict({
+    'media': (
+        {},
+        {
+            'ch_0': c.BINOMIAL_DECAY,
+            'ch_1': c.GEOMETRIC_DECAY,
+            'ch_2': c.GEOMETRIC_DECAY,
+        },
+    ),
+    'rf': (
+        {},
+        {
+            'rf_ch_0': c.BINOMIAL_DECAY,
+            'rf_ch_1': c.GEOMETRIC_DECAY,
+            'rf_ch_2': c.GEOMETRIC_DECAY,
+            'rf_ch_3': c.BINOMIAL_DECAY,
+        },
+    ),
+    'organic_media': (
+        {},
+        {
+            'organic_media_0': c.BINOMIAL_DECAY,
+            'organic_media_1': c.GEOMETRIC_DECAY,
+            'organic_media_2': c.GEOMETRIC_DECAY,
+            'organic_media_3': c.BINOMIAL_DECAY,
+            'organic_media_4': c.GEOMETRIC_DECAY,
+        },
+    ),
+    'organic_rf': (
+        {},
+        {
+            'organic_rf_ch_0': c.BINOMIAL_DECAY,
+            'organic_rf_ch_1': c.GEOMETRIC_DECAY,
+            'organic_rf_ch_2': c.GEOMETRIC_DECAY,
+            'organic_rf_ch_3': c.BINOMIAL_DECAY,
+            'organic_rf_ch_4': c.BINOMIAL_DECAY,
+            'organic_rf_ch_5': c.GEOMETRIC_DECAY,
+        },
+    ),
+})
 def random_media_da(
     n_geos: int,
@@ -595,6 +637,7 @@ def random_media_da(
     explicit_geo_names: Sequence[str] | None = None,
     explicit_time_index: Sequence[str] | None = None,
     explicit_media_channel_names: Sequence[str] | None = None,
+    media_value_scales: list[tuple[float, float]] | None = None,
     array_name: str = 'media',
     channel_variable_name: str = 'media_channel',
     channel_prefix: str = 'ch_',
@@ -613,6 +656,8 @@ def random_media_da(
     explicit_time_index: If given, ignore `date_format` and use this as is
     explicit_media_channel_names: If given, ignore `n_media_channels` and use
       this as is
+    media_value_scales: A list of (mean, std) tuples, one for each media
+      channel, to control the scale of the generated random values.
     array_name: The name of the array to be created
     channel_variable_name: The name of the channel variable
     channel_prefix: The prefix of the channel names
@@ -628,11 +673,28 @@ def random_media_da(
   if n_times < n_media_times:
     start_date -= datetime.timedelta(weeks=(n_media_times - n_times))
-  media = np.round(
-      abs(
-          np.random.normal(5, 5, size=(n_geos, n_media_times, n_media_channels))
+  if media_value_scales:
+    if len(media_value_scales) != n_media_channels:
+      raise ValueError(
+          'Length of media_value_scales must match n_media_channels.'
       )
-  )
+    channel_data = []
+    for mean, std in media_value_scales:
+      channel_data.append(
+          np.round(
+              abs(np.random.normal(mean, std, size=(n_geos, n_media_times)))
+          )
+      )
+    media = np.stack(channel_data, axis=-1)
+  else:
+    media = np.round(
+        abs(
+            np.random.normal(
+                5, 5, size=(n_geos, n_media_times, n_media_channels)
+            )
+        )
+    )
   if explicit_geo_names is None:
     geos = sample_geos(n_geos, integer_geos)
   else:
@@ -698,6 +760,7 @@ def random_media_spend_nd_da(
     n_media_channels: int | None = None,
     seed=0,
     integer_geos: bool = False,
+    explicit_media_channel_names: Sequence[str] | None = None,
 ) -> xr.DataArray:
   """Generates a sample N-dimensional `media_spend` DataArray.
@@ -716,6 +779,8 @@ def random_media_spend_nd_da(
     n_media_channels: Number of channels in the created `media_spend` array.
     seed: Random seed used by `np.random.seed()`.
     integer_geos: If True, the geos will be integers.
+    explicit_media_channel_names: If given, ignore `n_media_channels` and use
+      this as is.
   Returns:
     A DataArray containing the generated `media_spend` data with the given
@@ -733,9 +798,12 @@ def random_media_spend_nd_da(
     coords['time'] = _sample_times(n_times=n_times)
   if n_media_channels is not None:
     dims.append('media_channel')
-    coords['media_channel'] = _sample_names(
-        prefix='ch_', n_names=n_media_channels
-    )
+    if explicit_media_channel_names is not None:
+      coords['media_channel'] = explicit_media_channel_names
+    else:
+      coords['media_channel'] = _sample_names(
+          prefix='ch_', n_names=n_media_channels
+      )
   if dims == ['geo', 'time', 'media_channel']:
     shape = (n_geos, n_times, n_media_channels)
@@ -822,6 +890,7 @@ def random_kpi_da(
     controls: xr.DataArray | None = None,
     seed: int = 0,
     integer_geos: bool = False,
+    kpi_data_pattern: str = '',
 ) -> xr.DataArray:
   """Generates a sample `kpi` DataArray."""
@@ -857,6 +926,22 @@ def random_kpi_da(
   error = np.random.normal(0, 2, size=(n_geos, n_times))
   kpi = abs(media_portion + control_portion + error)
+  if kpi_data_pattern == 'flat':
+    first_col = kpi[:, 0]  # all rows will have value same as first col
+    kpi = (
+        first_col[:, np.newaxis]
+        + np.random.normal(scale=0.02, size=kpi.shape)
+        + 0.04
+    )
+  elif kpi_data_pattern == 'seasonal':
+    for row in kpi:
+      row.sort()
+    kpi = np.sin(kpi) + 5
+  elif kpi_data_pattern == 'peak':
+    peak_index = int(len(kpi[0]) / 2)
+    kpi[:] = kpi[0, 0]
+    for row in kpi:
+      row[peak_index] *= 3
   return xr.DataArray(
       kpi,
@@ -891,14 +976,18 @@ def constant_revenue_per_kpi(
 def random_population(
-    n_geos: int, seed: int = 0, integer_geos: bool = False
+    n_geos: int,
+    seed: int = 0,
+    integer_geos: bool = False,
+    constant_value: float | None = None,
 ) -> xr.DataArray:
   """Generates a sample `population` DataArray."""
   np.random.seed(seed)
-  population = np.round(10 + abs(np.random.normal(3000, 100, size=n_geos)))
+  if constant_value is not None:
+    population = np.full(n_geos, constant_value)
+  else:
+    population = np.round(10 + abs(np.random.normal(3000, 100, size=n_geos)))
   return xr.DataArray(
       population,
       dims=['geo'],
@@ -1170,11 +1259,15 @@ def random_dataset(
     n_organic_media_channels: int | None = None,
     n_organic_rf_channels: int | None = None,
     n_media_channels: int | None = None,
+    explicit_media_channel_names: Sequence[str] | None = None,
+    media_value_scales: list[tuple[float, float]] | None = None,
     n_rf_channels: int | None = None,
     revenue_per_kpi_value: float | None = 3.14,
+    constant_population_value: float | None = None,
     seed: int = 0,
     remove_media_time: bool = False,
     integer_geos: bool = False,
+    kpi_data_pattern: str = '',
 ) -> xr.Dataset:
   """Generates a random dataset."""
   if n_media_channels:
@@ -1185,11 +1278,14 @@ def random_dataset(
         n_media_channels=n_media_channels,
         seed=seed,
         integer_geos=integer_geos,
+        explicit_media_channel_names=explicit_media_channel_names,
+        media_value_scales=media_value_scales,
     )
     media_spend = random_media_spend_nd_da(
         n_geos=n_geos,
         n_times=n_times,
         n_media_channels=n_media_channels,
+        explicit_media_channel_names=explicit_media_channel_names,
         seed=seed,
         integer_geos=integer_geos,
     )
@@ -1301,9 +1397,13 @@ def random_dataset(
       n_media_channels=n_media_channels or n_rf_channels or 0,
       n_controls=n_controls,
       integer_geos=integer_geos,
+      kpi_data_pattern=kpi_data_pattern,
   )
   population = random_population(
-      n_geos=n_geos, seed=seed, integer_geos=integer_geos
+      n_geos=n_geos,
+      seed=seed,
+      integer_geos=integer_geos,
+      constant_value=constant_population_value,
   )
   dataset = xr.combine_by_coords(
@@ -1644,6 +1744,7 @@ def sample_input_data_revenue(
     n_organic_media_channels: int | None = None,
     n_organic_rf_channels: int | None = None,
     seed: int = 0,
+    explicit_media_channel_names: Sequence[str] | None = None,
 ) -> input_data.InputData:
   """Generates sample InputData for `kpi_type='revenue'`."""
   dataset = random_dataset(
@@ -1658,6 +1759,7 @@ def sample_input_data_revenue(
       n_organic_rf_channels=n_organic_rf_channels,
       revenue_per_kpi_value=1.0,
       seed=seed,
+      explicit_media_channel_names=explicit_media_channel_names,
   )
   return input_data.InputData(
       kpi=dataset.kpi,
@@ -1773,3 +1875,35 @@ def sample_input_data_non_revenue_no_revenue_per_kpi(
       if n_organic_rf_channels
       else None,
   )
+def sample_input_data_for_aks_with_expected_knot_info() -> (
+    tuple[input_data.InputData, knots.KnotInfo]
+):
+  """Generates sample InputData and corresponding expected KnotInfo for testing.
+  Returns:
+    A tuple containing:
+      - InputData object with sample data.
+      - KnotInfo object with expected knot information.
+  """
+  data = sample_input_data_from_dataset(
+      random_dataset(
+          n_geos=20,
+          n_times=117,
+          n_media_times=117,
+          n_controls=2,
+          n_media_channels=5,
+      ),
+      'non_revenue',
+  )
+  expected_knot_info = knots.KnotInfo(
+      n_knots=13,
+      knot_locations=np.array(
+          [11, 14, 38, 39, 41, 43, 45, 48, 50, 55, 87, 89, 90]
+      ),
+      weights=knots.l1_distance_weights(
+          117, np.array([11, 14, 38, 39, 41, 43, 45, 48, 50, 55, 87, 89, 90])
+      ),
+  )
+  return data, expected_knot_info

meridian/mlflow/autolog.py CHANGED Viewed

@@ -72,6 +72,7 @@ import json
 from typing import Any, Callable
 import arviz as az
+from meridian import backend
 from meridian.analysis import visualizer
 import mlflow
 from mlflow.utils.autologging_utils import autologging_integration, safe_patch
@@ -81,7 +82,6 @@ from meridian.model import prior_sampler
 from meridian.model import spec
 from meridian.version import __version__
 import numpy as np
-import tensorflow_probability as tfp
 FLAVOR_NAME = "meridian"
@@ -123,7 +123,7 @@ def _log_priors(model_spec: spec.ModelSpec) -> None:
     field_value = getattr(priors, field.name)
     # Stringify Distributions and numpy arrays.
-    if isinstance(field_value, tfp.distributions.Distribution):
+    if isinstance(field_value, backend.tfd.Distribution):
       field_value = str(field_value)
     elif isinstance(field_value, np.ndarray):
       field_value = json.dumps(field_value.tolist())

google-meridian 1.1.6__py3-none-any.whl → 1.2.1__py3-none-any.whl

google-meridian 1.1.6py3-none-any.whl → 1.2.1py3-none-any.whl