PyPI - google-meridian - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

google-meridian 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{google_meridian-1.0.5.dist-info → google_meridian-1.0.7.dist-info}/METADATA +12 -11
{google_meridian-1.0.5.dist-info → google_meridian-1.0.7.dist-info}/RECORD +16 -16
{google_meridian-1.0.5.dist-info → google_meridian-1.0.7.dist-info}/WHEEL +1 -1
meridian/__init__.py +1 -1
meridian/analysis/analyzer.py +677 -817
meridian/analysis/optimizer.py +192 -134
meridian/analysis/summarizer.py +7 -3
meridian/analysis/test_utils.py +72 -20
meridian/analysis/visualizer.py +10 -10
meridian/constants.py +3 -0
meridian/data/input_data.py +49 -3
meridian/data/load.py +10 -7
meridian/data/test_utils.py +18 -11
meridian/data/time_coordinates.py +38 -17
{google_meridian-1.0.5.dist-info → google_meridian-1.0.7.dist-info/licenses}/LICENSE +0 -0
{google_meridian-1.0.5.dist-info → google_meridian-1.0.7.dist-info}/top_level.txt +0 -0

meridian/analysis/visualizer.py CHANGED Viewed

@@ -239,7 +239,7 @@ class ModelDiagnostics:
     groupby = posterior_df.columns.tolist()
     groupby.remove(parameter)
     plot = (
-        alt.Chart(prior_posterior_df)
+        alt.Chart(prior_posterior_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
         .transform_density(
             parameter, groupby=groupby, as_=[parameter, 'density']
         )
@@ -332,7 +332,7 @@ class ModelDiagnostics:
     rhat = rhat.dropna(subset=[c.RHAT])
     boxplot = (
-        alt.Chart(rhat)
+        alt.Chart(rhat, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
         .mark_boxplot(median={'color': c.BLUE_300}, outliers={'filled': True})
         .encode(
             x=alt.X(c.PARAMETER, axis=alt.Axis(labelAngle=-45)),
@@ -461,7 +461,7 @@ class ModelFit:
     else:
       y_axis_label = summary_text.KPI_LABEL
     plot = (
-        alt.Chart(model_fit_df)
+        alt.Chart(model_fit_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
         .mark_line()
         .encode(
             x=alt.X(
@@ -762,7 +762,7 @@ class ReachAndFrequency:
         range=[c.BLUE_600, c.RED_600],
     )
-    base = alt.Chart().transform_calculate(
+    base = alt.Chart(width=c.VEGALITE_FACET_DEFAULT_WIDTH).transform_calculate(
         optimal_freq=f"'{summary_text.OPTIMAL_FREQ_LABEL}'",
         expected_roi=f"'{summary_text.EXPECTED_ROI_LABEL}'",
     )
@@ -1012,7 +1012,7 @@ class MediaEffects:
     else:
       y_axis_label = summary_text.INC_KPI_LABEL
     base = (
-        alt.Chart(response_curves_df)
+        alt.Chart(response_curves_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
         .transform_calculate(
             spend_level=(
                 'datum.spend_multiplier >= 1.0 ? "Above current spend" : "Below'
@@ -1099,7 +1099,7 @@ class MediaEffects:
       An Altair plot showing the Adstock decay prior and posterior per media.
     """
     dataframe = self.adstock_decay_dataframe(confidence_level=confidence_level)
-    base = alt.Chart(dataframe)
+    base = alt.Chart(dataframe, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
     scaled_confidence_level = int(confidence_level * 100)
@@ -1254,7 +1254,7 @@ class MediaEffects:
       ]
       range_list = [c.BLUE_700, c.GREY_600]
-    base = alt.Chart(df_channel_type)
+    base = alt.Chart(df_channel_type, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
     color_scale = alt.Scale(
         domain=domain_list,
         range=range_list,
@@ -1274,7 +1274,7 @@ class MediaEffects:
         y2=f'{c.CI_HI}:Q',
         color=alt.Color(f'{c.DISTRIBUTION}:N', scale=color_scale),
     )
-    histogram = base.mark_bar(color=c.GREY_600, opacity=0.4).encode(
+    histogram = base.mark_rect(color=c.GREY_600, opacity=0.4).encode(
         x=f'{c.START_INTERVAL_HISTOGRAM}:Q',
         x2=f'{c.END_INTERVAL_HISTOGRAM}:Q',
         y=alt.Y(f'{c.SCALED_COUNT_HISTOGRAM}:Q'),
@@ -1700,7 +1700,7 @@ class MediaSummary:
     domain = [c.BASELINE, c.ALL_CHANNELS]
     colors = [c.YELLOW_600, c.BLUE_700]
-    base = alt.Chart(outcome_df).encode(
+    base = alt.Chart(outcome_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH).encode(
         alt.Theta(f'{c.PCT_OF_CONTRIBUTION}:Q', stack=True),
         alt.Color(
             f'{c.CHANNEL}:N',
@@ -1985,7 +1985,7 @@ class MediaSummary:
       axes_scale = alt.Scale(domain=(0, max_roi), nice=True)
     plot = (
-        alt.Chart(plot_df)
+        alt.Chart(plot_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
         .mark_circle(tooltip=True, size=c.POINT_SIZE)
         .encode(
             x=alt.X(c.ROI, title='ROI', scale=axes_scale),

meridian/constants.py CHANGED Viewed

@@ -588,3 +588,6 @@ END_DATE = 'end_date'
 CARD_INSIGHTS = 'insights'
 CARD_CHARTS = 'charts'
 CARD_STATS = 'stats'
+# VegaLite common params.
+VEGALITE_FACET_DEFAULT_WIDTH = 400

meridian/data/input_data.py CHANGED Viewed

@@ -18,6 +18,7 @@ The `InputData` class is used to store all the input data to the model.
 """
 from collections import abc
+from collections.abc import Sequence
 import dataclasses
 import datetime as dt
 import functools
@@ -59,7 +60,7 @@ def _check_dim_collection(
     )
-def _check_dim_match(dim, arrays):
+def _check_dim_match(dim: str, arrays: Sequence[xr.DataArray]):
   """Verifies that the dimensions of the appropriate arrays match."""
   lengths = [len(array.coords[dim]) for array in arrays if array is not None]
   names = [array.name for array in arrays if array is not None]
@@ -69,6 +70,19 @@ def _check_dim_match(dim, arrays):
     )
+def _check_coords_match(dim: str, arrays: Sequence[xr.DataArray]):
+  """Verifies that the coordinates of the appropriate arrays match."""
+  arrays = [arr for arr in arrays if arr is not None and dim in arr.coords]
+  if not arrays:
+    return
+  first_coords = arrays[0].coords[dim].values
+  for arr in arrays[1:]:
+    if not np.array_equal(arr.coords[dim].values, first_coords):
+      raise ValueError(
+          f"`{dim}` coordinates of array `{arr.name}` don't match."
+      )
 @dataclasses.dataclass
 class InputData:
   """A data container for advertising data in a format supported by Meridian.
@@ -242,6 +256,7 @@ class InputData:
     self._validate_media_channels()
     self._validate_time_formats()
     self._validate_times()
+    self._validate_geos()
   def _convert_geos_to_strings(self):
     """Converts geo coordinates to strings in all relevant DataArrays."""
@@ -542,11 +557,13 @@ class InputData:
     try:
       _ = self.time_coordinates.interval_days
     except ValueError as exc:
-      raise ValueError("Time coordinates must be evenly spaced.") from exc
+      raise ValueError("Time coordinates must be regularly spaced.") from exc
     try:
       _ = self.media_time_coordinates.interval_days
     except ValueError as exc:
-      raise ValueError("Media time coordinates must be evenly spaced.") from exc
+      raise ValueError(
+          "Media time coordinates must be regularly spaced."
+      ) from exc
   def _validate_time(self, array: xr.DataArray | None):
     """Validates the `time` dimension of the given `DataArray`.
@@ -617,6 +634,35 @@ class InputData:
               f" {constants.DATE_FORMAT}"
           ) from exc
+  def _check_unique_names(self, dim: str, array: xr.DataArray | None):
+    """Checks if a DataArray contains unique names on the specified dimension."""
+    if array is not None and dim in array.coords:
+      names = array.coords[dim].values.tolist()
+      if len(names) != len(set(names)):
+        raise ValueError(
+            f"`{dim}` names must be unique within the array `{array.name}`."
+        )
+  def _validate_geos(self):
+    """Validates geo coordinates across relevant DataArrays."""
+    arrays_with_geos = [
+        self.kpi,
+        self.revenue_per_kpi,
+        self.media,
+        self.controls,
+        self.population,
+        self.reach,
+        self.frequency,
+        self.organic_media,
+        self.organic_reach,
+        self.organic_frequency,
+        self.non_media_treatments,
+    ]
+    for array in arrays_with_geos:
+      self._check_unique_names(constants.GEO, array)
+    _check_coords_match(constants.GEO, arrays_with_geos)
   def as_dataset(self) -> xr.Dataset:
     """Returns data as a single `xarray.Dataset` object."""
     data = [

meridian/data/load.py CHANGED Viewed

@@ -950,7 +950,7 @@ class DataFrameDataLoader(InputDataLoader):
         raise ValueError('NA values found in the organic_frequency columns.')
     # Determine columns in which NAs are expected in the lagged-media period.
-    na_columns = []
+    not_lagged_columns = []
     coords = [
         constants.KPI,
         constants.CONTROLS,
@@ -967,12 +967,12 @@ class DataFrameDataLoader(InputDataLoader):
     for coord in coords:
       columns = getattr(self.coord_to_columns, coord)
       columns = [columns] if isinstance(columns, str) else columns
-      na_columns.extend(columns)
+      not_lagged_columns.extend(columns)
     # Dates with at least one non-NA value in columns different from media,
     # reach, frequency, organic_media, organic_reach, and organic_frequency.
     time_column_name = self.coord_to_columns.time
-    no_na_period = self.df[(~self.df[na_columns].isna()).any(axis=1)][
+    no_na_period = self.df[(~self.df[not_lagged_columns].isna()).any(axis=1)][
         time_column_name
     ].unique()
@@ -999,13 +999,16 @@ class DataFrameDataLoader(InputDataLoader):
     # organic_frequency.
     not_lagged_data = self.df.loc[
         self.df[time_column_name].isin(no_na_period),
-        na_columns,
+        not_lagged_columns,
     ]
     if not_lagged_data.isna().any(axis=None):
+      incorrect_columns = []
+      for column in not_lagged_columns:
+        if not_lagged_data[column].isna().any(axis=None):
+          incorrect_columns.append(column)
       raise ValueError(
-          'NA values found in non-media columns outside the lagged-media'
-          f' period {na_period} (continuous window of 100% NA values in all'
-          ' non-media columns).'
+          f'NA values found in columns {incorrect_columns} within the modeling'
+          ' time window (time periods where the KPI is modeled).'
       )
   def load(self) -> input_data.InputData:

meridian/data/test_utils.py CHANGED Viewed

@@ -37,7 +37,7 @@ def _sample_names(prefix: str, n_names: int | None) -> list[str] | None:
   return [prefix + str(n) for n in range(n_names)] if n_names else None
-def _sample_geos(
+def sample_geos(
     n_geos: int | None, integer_geos: bool = False
 ) -> list[str] | list[int] | None:
   """Generates a list of sample geos."""
@@ -519,6 +519,7 @@ def random_media_da(
     n_media_channels: int,
     seed: int = 0,
     date_format: str = c.DATE_FORMAT,
+    explicit_geo_names: Sequence[str] | None = None,
     explicit_time_index: Sequence[str] | None = None,
     explicit_media_channel_names: Sequence[str] | None = None,
     array_name: str = 'media',
@@ -535,6 +536,7 @@ def random_media_da(
     n_media_channels: Number of media channels
     seed: Random seed used by `np.random.seed()`
     date_format: The date format to use for time coordinate labels
+    explicit_geo_names: If given, ignore `n_geos` and use this as is.
     explicit_time_index: If given, ignore `date_format` and use this as is
     explicit_media_channel_names: If given, ignore `n_media_channels` and use
       this as is
@@ -558,6 +560,11 @@ def random_media_da(
           np.random.normal(5, 5, size=(n_geos, n_media_times, n_media_channels))
       )
   )
+  if explicit_geo_names is None:
+    geos = sample_geos(n_geos, integer_geos)
+  else:
+    geos = explicit_geo_names
   if explicit_time_index is None:
     media_time = _sample_times(
         n_times=n_media_times,
@@ -576,7 +583,7 @@ def random_media_da(
       media,
       dims=['geo', 'media_time', channel_variable_name],
       coords={
-          'geo': _sample_geos(n_geos, integer_geos),
+          'geo': geos,
           'media_time': media_time,
           channel_variable_name: media_channels,
       },
@@ -647,7 +654,7 @@ def random_media_spend_nd_da(
   coords = {}
   if n_geos is not None:
     dims.append('geo')
-    coords['geo'] = _sample_geos(n_geos, integer_geos)
+    coords['geo'] = sample_geos(n_geos, integer_geos)
   if n_times is not None:
     dims.append('time')
     coords['time'] = _sample_times(n_times=n_times)
@@ -719,7 +726,7 @@ def random_controls_da(
       controls,
       dims=['geo', 'time', 'control_variable'],
       coords={
-          'geo': _sample_geos(n_geos, integer_geos),
+          'geo': sample_geos(n_geos, integer_geos),
           'time': (
               _sample_times(n_times=n_times, date_format=date_format)
               if explicit_time_index is None
@@ -775,7 +782,7 @@ def random_kpi_da(
       kpi,
       dims=['geo', 'time'],
       coords={
-          'geo': _sample_geos(n_geos, integer_geos),
+          'geo': sample_geos(n_geos, integer_geos),
           'time': _sample_times(n_times=n_times),
       },
       name=c.KPI,
@@ -796,7 +803,7 @@ def constant_revenue_per_kpi(
       revenue_per_kpi,
       dims=['geo', 'time'],
       coords={
-          'geo': _sample_geos(n_geos, integer_geos),
+          'geo': sample_geos(n_geos, integer_geos),
           'time': _sample_times(n_times=n_times),
       },
       name='revenue_per_kpi',
@@ -815,7 +822,7 @@ def random_population(
   return xr.DataArray(
       population,
       dims=['geo'],
-      coords={'geo': _sample_geos(n_geos, integer_geos)},
+      coords={'geo': sample_geos(n_geos, integer_geos)},
       name='population',
   )
@@ -857,7 +864,7 @@ def random_reach_da(
       reach,
       dims=['geo', 'media_time', channel_variable_name],
       coords={
-          'geo': _sample_geos(n_geos, integer_geos),
+          'geo': sample_geos(n_geos, integer_geos),
           'media_time': _sample_times(
               n_times=n_media_times, start_date=start_date
           ),
@@ -925,7 +932,7 @@ def random_frequency_da(
       frequency,
       dims=['geo', 'media_time', channel_variable_name],
       coords={
-          'geo': _sample_geos(n_geos, integer_geos),
+          'geo': sample_geos(n_geos, integer_geos),
           'media_time': _sample_times(
               n_times=n_media_times, start_date=start_date
           ),
@@ -992,7 +999,7 @@ def random_rf_spend_nd_da(
   coords = {}
   if n_geos is not None:
     dims.append('geo')
-    coords['geo'] = _sample_geos(n_geos, integer_geos)
+    coords['geo'] = sample_geos(n_geos, integer_geos)
   if n_times is not None:
     dims.append('time')
     coords['time'] = _sample_times(n_times=n_times)
@@ -1060,7 +1067,7 @@ def random_non_media_treatments_da(
       non_media_treatments,
       dims=['geo', 'time', 'non_media_channel'],
       coords={
-          'geo': _sample_geos(n_geos, integer_geos),
+          'geo': sample_geos(n_geos, integer_geos),
           'time': (
               _sample_times(n_times=n_times, date_format=date_format)
               if explicit_time_index is None

meridian/data/time_coordinates.py CHANGED Viewed

@@ -19,7 +19,6 @@ import dataclasses
 import datetime
 import functools
 from typing import TypeAlias
-import warnings
 from meridian import constants
 import numpy as np
@@ -145,6 +144,10 @@ class TimeCoordinates:
     return cls(datetime_index=_to_pandas_datetime_index(dates))
   def __post_init__(self):
+    if len(self.datetime_index) <= 1:
+      raise ValueError(
+          "There must be more than one date index in the time coordinates."
+      )
     if not self.datetime_index.is_monotonic_increasing:
       raise ValueError(
           "Time coordinates must be strictly monotonically increasing."
@@ -162,28 +165,46 @@ class TimeCoordinates:
   @functools.cached_property
   def interval_days(self) -> int:
-    """Returns the interval between two neighboring dates in `all_dates`.
+    """Returns the *mean* interval between two neighboring dates in `all_dates`.
     Raises:
-      ValueError if the date index is not regularly spaced.
+      ValueError if the date index is not "regularly spaced".
     """
-    # Calculate the difference between consecutive dates, in days.
-    diff = self.datetime_index.to_series().diff().dt.days.dropna()
+    if not self._is_regular_time_index():
+      raise ValueError("Time coordinates are not regularly spaced!")
-    if diff.nunique() == 0:
-      # This edge case happens when there is only one date in the index.
-      # This is unlikely to happen in practice, but we handle it just in case.
-      warnings.warn(
-          "The time coordinates only have one date. Returning an interval of 0."
-      )
-      return 0
+    # Calculate the difference between consecutive dates, in days.
+    diffs = self._interval_days
+    # Return the rounded mean interval.
+    return int(np.round(np.mean(diffs)))
-    # Check for regularity.
-    if diff.nunique() != 1:
-      raise ValueError("`datetime_index` coordinates are not evenly spaced!")
+  @property
+  def _timedelta_index(self) -> pd.TimedeltaIndex:
+    """Returns the timedeltas between consecutive dates in `datetime_index`."""
+    return self.datetime_index.diff().dropna()
-    # Finally, return the mode interval.
-    return diff.mode()[0]
+  @property
+  def _interval_days(self) -> Sequence[int]:
+    """Converts `_timedelta_index` to a sequence of days for easier compute."""
+    return self._timedelta_index.days.to_numpy()
+  def _is_regular_time_index(self) -> bool:
+    """Returns True if the time index is "regularly spaced"."""
+    if np.all(self._interval_days == self._interval_days[0]):
+      # All intervals are regular. Base case.
+      return True
+    # Special cases:
+    # * Monthly cadences
+    if np.all(np.isin(self._interval_days, [28, 29, 30, 31])):
+      return True
+    # * Quarterly cadences
+    if np.all(np.isin(self._interval_days, [90, 91, 92])):
+      return True
+    # * Yearly cadences
+    if np.all(np.isin(self._interval_days, [365, 366])):
+      return True
+    return False
   def get_selected_dates(
       self,

{google_meridian-1.0.5.dist-info → google_meridian-1.0.7.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{google_meridian-1.0.5.dist-info → google_meridian-1.0.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

google-meridian 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

google-meridian 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl