PyPI - google-meridian - Versions diffs - 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl - Mend

google-meridian 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/METADATA +2 -2
{google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/RECORD +18 -18
{google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/WHEEL +1 -1
meridian/__init__.py +1 -1
meridian/analysis/analyzer.py +491 -338
meridian/analysis/optimizer.py +712 -299
meridian/analysis/summarizer.py +40 -4
meridian/analysis/summary_text.py +20 -1
meridian/analysis/templates/chart.html.jinja +1 -0
meridian/analysis/test_utils.py +47 -99
meridian/analysis/visualizer.py +455 -85
meridian/constants.py +32 -0
meridian/data/input_data.py +49 -5
meridian/model/model.py +20 -4
meridian/model/posterior_sampler.py +15 -5
meridian/model/prior_distribution.py +22 -1
{google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/licenses/LICENSE +0 -0
{google_meridian-1.0.7.dist-info → google_meridian-1.0.9.dist-info}/top_level.txt +0 -0

meridian/analysis/visualizer.py CHANGED Viewed

@@ -16,6 +16,8 @@
 from collections.abc import Sequence
 import functools
+from typing import Mapping
+import warnings
 import altair as alt
 from meridian import constants as c
 from meridian.analysis import analyzer
@@ -32,7 +34,9 @@ import xarray as xr
 __all__ = [
     'ModelDiagnostics',
     'ModelFit',
+    'MediaSummary',
     'ReachAndFrequency',
+    'MediaEffects',
 ]
@@ -246,7 +250,7 @@ class ModelDiagnostics:
         .mark_area(opacity=0.7)
         .encode(
             x=f'{parameter}:Q',
-            y='density:Q',
+            y=alt.Y(shorthand='density:Q', stack=False),
             color=f'{c.DISTRIBUTION}:N',
         )
     )
@@ -461,14 +465,14 @@ class ModelFit:
     else:
       y_axis_label = summary_text.KPI_LABEL
     plot = (
-        alt.Chart(model_fit_df, width=c.VEGALITE_FACET_DEFAULT_WIDTH)
+        alt.Chart(model_fit_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
         .mark_line()
         .encode(
             x=alt.X(
                 f'{c.TIME}:T',
                 title='Time period',
                 axis=alt.Axis(
-                    format='%Y %b',
+                    format=c.QUARTER_FORMAT,
                     grid=False,
                     tickCount=8,
                     domainColor=c.GREY_300,
@@ -883,11 +887,13 @@ class MediaEffects:
       A Dataset displaying the response curves data.
     """
     selected_times_list = list(selected_times) if selected_times else None
+    use_kpi = self._meridian.input_data.revenue_per_kpi is None
     return self._analyzer.response_curves(
         spend_multipliers=list(np.arange(0, 2.2, c.RESPONSE_CURVE_STEP_SIZE)),
         confidence_level=confidence_level,
         selected_times=selected_times_list,
         by_reach=by_reach,
+        use_kpi=use_kpi,
     )
   @functools.lru_cache(maxsize=128)
@@ -1170,7 +1176,7 @@ class MediaEffects:
       confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
       include_prior: bool = True,
       include_ci: bool = True,
-  ) -> alt.Chart | list[alt.Chart]:
+  ) -> Mapping[str, alt.Chart]:
     """Plots the Hill curves for each channel.
     Args:
@@ -1181,22 +1187,23 @@ class MediaEffects:
       include_ci: If `True`, plots the credible interval. Defaults to `True`.
     Returns:
-      A faceted Altair plot showing the histogram, prior and posterior lines,
-      and bands for the Hill saturation curves. When there are both media and
-      RF channels, a list of 2 faceted Altair plots are returned: one
-      for the media channels and another for the RF channels.
+      A dictionary mapping channel type constants (`media`, `rf`, and
+      `organic_media`) to their respective Altair chart objects. Keys are only
+      present if charts for that type were generated (i.e., if the
+      corresponding channels exist in the data). Returns an empty dictionary if
+      no relevant channels are found.
     """
     hill_curves_dataframe = self.hill_curves_dataframe(
         confidence_level=confidence_level
     )
     channel_types = list(set(hill_curves_dataframe[c.CHANNEL_TYPE]))
-    plot_media, plot_rf = None, None
+    plots: dict[str, alt.Chart] = {}
     if c.MEDIA in channel_types:
       media_df = hill_curves_dataframe[
           hill_curves_dataframe[c.CHANNEL_TYPE] == c.MEDIA
       ]
-      plot_media = self._plot_hill_curves_helper(
+      plots[c.MEDIA] = self._plot_hill_curves_helper(
           media_df, include_prior, include_ci
       )
@@ -1204,14 +1211,19 @@ class MediaEffects:
       rf_df = hill_curves_dataframe[
           hill_curves_dataframe[c.CHANNEL_TYPE] == c.RF
       ]
-      plot_rf = self._plot_hill_curves_helper(rf_df, include_prior, include_ci)
+      plots[c.RF] = self._plot_hill_curves_helper(
+          rf_df, include_prior, include_ci
+      )
-    if plot_media and plot_rf:
-      return [plot_media, plot_rf]
-    elif plot_media:
-      return plot_media
-    else:
-      return plot_rf
+    if c.ORGANIC_MEDIA in channel_types:
+      organic_media_df = hill_curves_dataframe[
+          hill_curves_dataframe[c.CHANNEL_TYPE] == c.ORGANIC_MEDIA
+      ]
+      plots[c.ORGANIC_MEDIA] = self._plot_hill_curves_helper(
+          organic_media_df, include_prior, include_ci
+      )
+    return plots
   def _plot_hill_curves_helper(
       self,
@@ -1231,13 +1243,26 @@ class MediaEffects:
     Returns:
       A faceted Altair plot showing the histogram and prior+posterior lines and
       bands for the Hill curves.
+    Raises:
+      ValueError: If the input DataFrame is empty, missing the channel_type
+        column, or contains an unsupported channel type.
     """
-    if c.MEDIA in list(df_channel_type[c.CHANNEL_TYPE]):
+    channel_type = df_channel_type[c.CHANNEL_TYPE].iloc[0]
+    if channel_type == c.MEDIA:
       x_axis_title = summary_text.HILL_X_AXIS_MEDIA_LABEL
       shaded_area_title = summary_text.HILL_SHADED_REGION_MEDIA_LABEL
-    else:
+    elif channel_type == c.RF:
       x_axis_title = summary_text.HILL_X_AXIS_RF_LABEL
       shaded_area_title = summary_text.HILL_SHADED_REGION_RF_LABEL
+    elif channel_type == c.ORGANIC_MEDIA:
+      x_axis_title = summary_text.HILL_X_AXIS_MEDIA_LABEL
+      shaded_area_title = summary_text.HILL_SHADED_REGION_MEDIA_LABEL
+    else:
+      raise ValueError(
+          f"Unsupported channel type '{channel_type}' found in Hill curve data."
+          ' Expected one of: {c.MEDIA}, {c.RF}, {c.ORGANIC_MEDIA}.'
+      )
     domain_list = [
         c.POSTERIOR,
         c.PRIOR,
@@ -1410,18 +1435,35 @@ class MediaSummary:
     self._marginal_roi_by_reach = marginal_roi_by_reach
     self._non_media_baseline_values = non_media_baseline_values
-  @functools.cached_property
-  def paid_summary_metrics(self) -> xr.Dataset:
+  @property
+  def paid_summary_metrics(self):
+    warnings.warn(
+        'The `paid_summary_metrics` property is deprecated. Use the'
+        ' `get_paid_summary_metrics()` method instead.',
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return self.get_paid_summary_metrics()
+  @functools.lru_cache(maxsize=128)
+  def get_paid_summary_metrics(
+      self, aggregate_times: bool = True
+  ) -> xr.Dataset:
     """Dataset holding the calculated summary metrics for the paid channels.
-    The dataset contains the following:
+    Args:
+      aggregate_times: If `True`, aggregates the metrics across all time
+        periods.  If `False`, returns time-varying metrics.
-    - **Coordinates:** `channel`, `metric` (`mean`, `median`, `ci_lo`, `ci_hi`),
-      `distribution` (`prior`, `posterior`)
-    - **Data variables:** `impressions`, `pct_of_impressions`, `spend`,
-      `pct_of_spend`, `CPM`, `incremental_outcome`, `pct_of_contribution`,
-      `roi`,
-      `effectiveness`, `mroi`.
+    Returns:
+      An `xarray.Dataset` containing the following:
+        - **Coordinates:** `channel`, `metric` (`mean`, `median`, `ci_lo`,
+        `ci_hi`),
+          `distribution` (`prior`, `posterior`)
+        - **Data variables:** `impressions`, `pct_of_impressions`, `spend`,
+          `pct_of_spend`, `CPM`, `incremental_outcome`, `pct_of_contribution`,
+          `roi`,
+          `effectiveness`, `mroi`.
     """
     return self._analyzer.summary_metrics(
         selected_times=self._selected_times,
@@ -1429,18 +1471,34 @@ class MediaSummary:
         use_kpi=self._meridian.input_data.revenue_per_kpi is None,
         confidence_level=self._confidence_level,
         include_non_paid_channels=False,
+        aggregate_times=aggregate_times,
+    )
+  @property
+  def all_summary_metrics(self):
+    warnings.warn(
+        'The `all_summary_metrics` property is deprecated. Use the'
+        ' `get_all_summary_metrics()` method instead.',
+        DeprecationWarning,
+        stacklevel=2,
     )
+    return self.get_all_summary_metrics()
-  @functools.cached_property
-  def all_summary_metrics(self) -> xr.Dataset:
+  @functools.lru_cache(maxsize=128)
+  def get_all_summary_metrics(self, aggregate_times: bool = True) -> xr.Dataset:
     """Dataset holding the calculated summary metrics for all channels.
-    The dataset contains the following:
+    Args:
+      aggregate_times: If `True`, aggregates the metrics across all time
+        periods.  If `False`, returns time-varying metrics.
-    - **Coordinates:** `channel`, `metric` (`mean`, `median`, `ci_lo`, `ci_hi`),
-      `distribution` (`prior`, `posterior`)
-    - **Data variables:** `incremental_outcome`, `pct_of_contribution`,
-      `effectiveness`.
+    Returns:
+      An `xarray.Dataset` containing the following:
+        - **Coordinates:** `channel`, `metric` (`mean`, `median`, `ci_lo`,
+        `ci_hi`),
+          `distribution` (`prior`, `posterior`)
+        - **Data variables:** `incremental_outcome`, `pct_of_contribution`,
+          `effectiveness`.
     """
     return self._analyzer.summary_metrics(
         selected_times=self._selected_times,
@@ -1448,6 +1506,7 @@ class MediaSummary:
         confidence_level=self._confidence_level,
         include_non_paid_channels=True,
         non_media_baseline_values=self._non_media_baseline_values,
+        aggregate_times=aggregate_times,
     )
   def summary_table(
@@ -1488,7 +1547,7 @@ class MediaSummary:
     ]
     if include_non_paid_channels:
       monetary_metrics = [c.INCREMENTAL_OUTCOME] * use_revenue
-      summary_metrics = self.all_summary_metrics
+      summary_metrics = self.get_all_summary_metrics()
       columns_rename_dict = {
           c.PCT_OF_CONTRIBUTION: summary_text.PCT_CONTRIBUTION_COL,
           c.INCREMENTAL_OUTCOME: (
@@ -1512,7 +1571,7 @@ class MediaSummary:
           c.SPEND,
           c.INCREMENTAL_OUTCOME,
       ] * use_revenue
-      summary_metrics = self.paid_summary_metrics
+      summary_metrics = self.get_paid_summary_metrics()
       columns_rename_dict = {
           c.PCT_OF_IMPRESSIONS: summary_text.PCT_IMPRESSIONS_COL,
           c.PCT_OF_SPEND: summary_text.PCT_SPEND_COL,
@@ -1598,6 +1657,240 @@ class MediaSummary:
     self._marginal_roi_by_reach = marginal_roi_by_reach
     self._non_media_baseline_values = non_media_baseline_values
+  def plot_channel_contribution_area_chart(
+      self, time_granularity: str = c.QUARTERLY
+  ) -> alt.Chart:
+    """Plots a stacked area chart of the contribution share per channel by time.
+    Args:
+      time_granularity: The granularity for the time axis. Options are `weekly`
+        or `quarterly`. Defaults to `quarterly`.
+    Returns:
+      An Altair plot showing the contribution share per channel by time.
+    Raises:
+      ValueError: If time_granularity is not one of the allowed constants.
+    """
+    if time_granularity not in c.TIME_GRANULARITIES:
+      raise ValueError(
+          f'time_granularity must be one of {c.TIME_GRANULARITIES}'
+      )
+    x_axis_format = (
+        c.DATE_FORMAT if time_granularity == c.WEEKLY else c.QUARTER_FORMAT
+    )
+    outcome_df = self._transform_contribution_metrics(
+        include_non_paid=True, aggregate_times=False
+    )
+    # Ensure proper ordering for the stacked area chart. Baseline should be at
+    # the bottom. Separate the *stacking* order from the *legend* order.
+    stack_order = sorted([
+        channel
+        for channel in outcome_df[c.CHANNEL].unique()
+        if channel != c.BASELINE
+    ]) + [c.BASELINE]
+    legend_order = [c.BASELINE] + sorted([
+        channel
+        for channel in outcome_df[c.CHANNEL].unique()
+        if channel != c.BASELINE
+    ])
+    # Get the minimum incremental outcome for baseline across all time periods
+    # as the lower bound for the stacked area chart.
+    min_y = (
+        outcome_df[outcome_df[c.CHANNEL] == c.BASELINE]
+        .groupby(c.TIME)[c.INCREMENTAL_OUTCOME]
+        .min()
+        .min()
+    )
+    plot = (
+        alt.Chart(outcome_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
+        .mark_area()
+        .transform_calculate(
+            sort_channel=f'indexof({stack_order}, datum.channel)'
+        )
+        .encode(
+            x=alt.X(
+                f'{c.TIME}:T',
+                title='Time period',
+                axis=alt.Axis(
+                    format=x_axis_format,
+                    grid=False,
+                    tickCount=8,
+                    domainColor=c.GREY_300,
+                ),
+            ),
+            y=alt.Y(
+                f'{c.INCREMENTAL_OUTCOME}:Q',
+                title=(
+                    c.REVENUE.title()
+                    if self._meridian.input_data.revenue_per_kpi is not None
+                    else c.KPI.upper()
+                ),
+                axis=alt.Axis(
+                    ticks=False,
+                    domain=False,
+                    tickCount=5,
+                    labelPadding=c.PADDING_10,
+                    labelExpr=formatter.compact_number_expr(),
+                    **formatter.Y_AXIS_TITLE_CONFIG,
+                ),
+                scale=alt.Scale(domainMin=min_y, clamp=True),
+            ),
+            color=alt.Color(
+                f'{c.CHANNEL}:N',
+                legend=alt.Legend(
+                    labelFontSize=c.AXIS_FONT_SIZE,
+                    labelFont=c.FONT_ROBOTO,
+                    title=None,
+                    orient='bottom',
+                ),
+                scale=alt.Scale(domain=legend_order),
+                sort=legend_order,
+            ),
+            tooltip=[
+                alt.Tooltip(f'{c.TIME}:T', format=c.DATE_FORMAT),
+                c.CHANNEL,
+                alt.Tooltip(f'{c.INCREMENTAL_OUTCOME}:Q', format=',.2f'),
+            ],
+            order=alt.Order('sort_channel:N', sort='descending'),
+        )
+        .properties(
+            title=formatter.custom_title_params(
+                summary_text.CHANNEL_CONTRIB_BY_TIME_CHART_TITLE
+            ),
+        )
+        .configure_axis(titlePadding=c.PADDING_10, **formatter.TEXT_CONFIG)
+        .configure_view(strokeOpacity=0)
+    )
+    return plot
+  def plot_channel_contribution_bump_chart(
+      self, time_granularity: str = c.QUARTERLY
+  ) -> alt.Chart:
+    """Plots a bump chart of channel contribution rank over time.
+    This chart shows the relative rank of each channel's contribution,
+    including the baseline, based on incremental outcome. Depending on the
+    time_granularity, ranks are shown either weekly or at the end of each
+    quarter. Rank 1 represents the highest contribution.
+    Args:
+      time_granularity: The granularity for the time axis. Options are `weekly`
+        or `quarterly`. Defaults to `quarterly`.
+    Returns:
+      An Altair plot showing the contribution rank per channel by time.
+    Raises:
+      ValueError: If time_granularity is not one of the allowed constants.
+    """
+    if time_granularity not in c.TIME_GRANULARITIES:
+      raise ValueError(
+          f'time_granularity must be one of {c.TIME_GRANULARITIES}'
+      )
+    outcome_df = self._transform_contribution_metrics(
+        include_non_paid=True, aggregate_times=False
+    )
+    outcome_df[c.TIME] = pd.to_datetime(outcome_df[c.TIME])
+    outcome_df['rank'] = outcome_df.groupby(c.TIME)[c.INCREMENTAL_OUTCOME].rank(
+        method='first', ascending=False
+    )
+    if time_granularity == c.QUARTERLY:
+      # Filter data to keep only the last available date within each quarter
+      # for a quarterly view of ranking changes.
+      unique_times = pd.Series(outcome_df[c.TIME].unique()).sort_values()
+      quarters = unique_times.dt.to_period('Q')
+      quarterly_dates = unique_times[~quarters.duplicated(keep='last')]
+      plot_df = outcome_df[outcome_df[c.TIME].isin(quarterly_dates)].copy()
+      x_axis_format = c.QUARTER_FORMAT
+      tooltip_time_format = c.QUARTER_FORMAT
+      tooltip_time_title = 'Quarter'
+    else:
+      plot_df = outcome_df.copy()
+      x_axis_format = c.DATE_FORMAT
+      tooltip_time_format = c.DATE_FORMAT
+      tooltip_time_title = 'Week'
+    legend_order = [c.BASELINE] + sorted([
+        channel
+        for channel in plot_df[c.CHANNEL].unique()
+        if channel != c.BASELINE
+    ])
+    plot = (
+        alt.Chart(plot_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
+        .mark_line(point=True)
+        .encode(
+            x=alt.X(
+                f'{c.TIME}:T',
+                title='Time period',
+                axis=alt.Axis(
+                    format=x_axis_format,
+                    grid=False,
+                    domainColor=c.GREY_300,
+                ),
+            ),
+            y=alt.Y(
+                'rank:Q',
+                title='Contribution Rank',
+                axis=alt.Axis(
+                    ticks=False,
+                    domain=False,
+                    labelPadding=c.PADDING_10,
+                    tickMinStep=1,
+                    format='d',
+                ),
+                scale=alt.Scale(
+                    zero=False,
+                    reverse=True,
+                ),
+            ),
+            color=alt.Color(
+                f'{c.CHANNEL}:N',
+                legend=alt.Legend(
+                    labelFontSize=c.AXIS_FONT_SIZE,
+                    labelFont=c.FONT_ROBOTO,
+                    title=None,
+                    orient='bottom',
+                ),
+                scale=alt.Scale(domain=legend_order),
+                sort=legend_order,
+            ),
+            tooltip=[
+                alt.Tooltip(
+                    f'{c.TIME}:T',
+                    format=tooltip_time_format,
+                    title=tooltip_time_title,
+                ),
+                alt.Tooltip(f'{c.CHANNEL}:N', title='Channel'),
+                alt.Tooltip('rank:O', title='Rank'),
+                alt.Tooltip(
+                    f'{c.INCREMENTAL_OUTCOME}:Q',
+                    format=',.0f',
+                    title='Incremental Outcome',
+                ),
+            ],
+        )
+        .properties(
+            title=formatter.custom_title_params(
+                summary_text.CHANNEL_CONTRIB_RANK_CHART_TITLE
+            )
+        )
+        .configure_axis(titlePadding=c.PADDING_10, **formatter.TEXT_CONFIG)
+        .configure_view(strokeOpacity=0)
+    )
+    return plot
   def plot_contribution_waterfall_chart(self) -> alt.Chart:
     """Plots a waterfall chart of the contribution share per channel.
@@ -1621,7 +1914,7 @@ class MediaSummary:
     num_channels = len(outcome_df[c.CHANNEL])
     base = (
-        alt.Chart(outcome_df)
+        alt.Chart(outcome_df, width=c.VEGALITE_FACET_LARGE_WIDTH)
         .transform_window(
             sum_outcome=f'sum({c.PCT_OF_CONTRIBUTION})',
             kwargs=f'lead({c.CHANNEL})',
@@ -1682,7 +1975,6 @@ class MediaSummary:
             ),
             height=c.BAR_SIZE * num_channels
             + c.BAR_SIZE * 2 * c.SCALED_PADDING,
-            width=500,
         )
         .configure_axis(titlePadding=c.PADDING_10, **formatter.TEXT_CONFIG)
         .configure_view(strokeOpacity=0)
@@ -1968,7 +2260,7 @@ class MediaSummary:
       An Altair bubble plot showing the ROI, spend, and another metric.
     """
     if selected_channels:
-      channels = self.paid_summary_metrics.channel
+      channels = self.get_paid_summary_metrics().channel
       if any(channel not in channels for channel in selected_channels):
         raise ValueError(
             '`selected_channels` should match the channel dimension names from '
@@ -2105,16 +2397,20 @@ class MediaSummary:
     Returns:
       A dataframe filtered based on the specifications.
     """
+    paid_summary_metrics = self.get_paid_summary_metrics()
     metrics_df = self._summary_metrics_to_mean_df(
-        metrics=[c.ROI, metric], selected_channels=selected_channels
+        paid_summary_metrics,
+        metrics=[c.ROI, metric],
+        selected_channels=selected_channels,
     )
-    spend_df = self.paid_summary_metrics[c.SPEND].to_dataframe().reset_index()
+    spend_df = paid_summary_metrics[c.SPEND].to_dataframe().reset_index()
     return metrics_df.merge(spend_df, on=c.CHANNEL)
   def _transform_contribution_metrics(
       self,
       selected_channels: Sequence[str] | None = None,
       include_non_paid: bool = False,
+      aggregate_times: bool = True,
   ) -> pd.DataFrame:
     """Transforms the media metrics for the contribution plot.
@@ -2127,56 +2423,133 @@ class MediaSummary:
       selected_channels: Optional list of a subset of channels to filter by.
       include_non_paid: If `True`, includes the organic media, organic RF and
         non-media channels in the contribution plot. Defaults to `False`.
+      aggregate_times: If `True`, aggregates the metrics across all time
+        periods.  If `False`, returns time-varying metrics.
     Returns:
       A dataframe with contributions per channel.
     """
-    total_media_criteria = {
-        c.DISTRIBUTION: c.POSTERIOR,
-        c.METRIC: c.MEAN,
-        c.CHANNEL: c.ALL_CHANNELS,
-    }
     summary_metrics = (
-        self.all_summary_metrics
+        self.get_all_summary_metrics(aggregate_times=aggregate_times)
         if include_non_paid
-        else self.paid_summary_metrics
+        else self.get_paid_summary_metrics(aggregate_times=aggregate_times)
     )
-    total_media_outcome = (
-        summary_metrics[c.INCREMENTAL_OUTCOME].sel(total_media_criteria).item()
+    contribution_df = self._calculate_contribution_dataframe(
+        summary_metrics, selected_channels
     )
-    total_media_pct = (
-        summary_metrics[c.PCT_OF_CONTRIBUTION].sel(total_media_criteria).item()
-        / 100
+    baseline_df = self._calculate_baseline_contribution_dataframe(
+        summary_metrics, aggregate_times
     )
-    total_outcome = total_media_outcome / total_media_pct
-    baseline_pct = 1 - total_media_pct
-    baseline_outcome = total_outcome * baseline_pct
-    baseline_df = pd.DataFrame(
-        {
-            c.CHANNEL: c.BASELINE,
-            c.INCREMENTAL_OUTCOME: baseline_outcome,
-            c.PCT_OF_CONTRIBUTION: baseline_pct,
-        },
-        index=[0],
+    combined_df = pd.concat([baseline_df, contribution_df]).reset_index(
+        drop=True
     )
-    outcome_df = self._summary_metrics_to_mean_df(
+    if aggregate_times:
+      combined_df.sort_values(
+          by=c.INCREMENTAL_OUTCOME, ascending=False, inplace=True
+      )
+    else:
+      combined_df.sort_values(
+          by=[c.TIME, c.INCREMENTAL_OUTCOME],
+          ascending=[True, False],
+          inplace=True,
+      )
+    return combined_df
+  def _calculate_contribution_dataframe(
+      self,
+      summary_metrics: xr.Dataset,
+      selected_channels: Sequence[str] | None,
+  ) -> pd.DataFrame:
+    """Calculates the contribution dataframe.
+    Args:
+      summary_metrics: xarray Dataset of summary metrics.
+      selected_channels: Optional list of channels.
+    Returns:
+      pd.DataFrame: Contribution dataframe.
+        Shape:
+          - If `aggregate_times=True`: (n_channels, 3)
+            Columns: 'channel', 'incremental_outcome', 'pct_of_contribution'
+          - If `aggregate_times=False`: (n_channels * n_times, 4)
+            Columns: 'time', 'channel', 'incremental_outcome',
+            'pct_of_contribution'
+    """
+    contribution_df = self._summary_metrics_to_mean_df(
+        summary_metrics=summary_metrics,
         metrics=[
             c.INCREMENTAL_OUTCOME,
             c.PCT_OF_CONTRIBUTION,
         ],
         selected_channels=selected_channels,
-        include_non_paid=include_non_paid,
     )
     # Convert to percentage values between 0-1.
-    outcome_df[c.PCT_OF_CONTRIBUTION] = outcome_df[c.PCT_OF_CONTRIBUTION].div(
-        100
+    contribution_df[c.PCT_OF_CONTRIBUTION] = contribution_df[
+        c.PCT_OF_CONTRIBUTION
+    ].div(100)
+    return contribution_df
+  def _calculate_baseline_contribution_dataframe(
+      self, summary_metrics: xr.Dataset, aggregate_times: bool
+  ) -> pd.DataFrame:
+    """Calculates the baseline contribution dataframe.
+      Calculates a single total outcome and baseline if aggregating.
+      Calculates time-varying total and baseline if not aggregating.
+    Args:
+      summary_metrics: The summary metrics dataset.
+      aggregate_times: Whether to aggregate times.
+    Returns:
+      A DataFrame containing the baseline metrics.
+        Shape:
+          - If `aggregate_times=True`: (1, 3)
+            Columns: 'channel', 'incremental_outcome', 'pct_of_contribution'
+          - If `aggregate_times=False`: (n_times, 4)
+            Columns: 'time', 'channel', 'incremental_outcome',
+            'pct_of_contribution'
+    """
+    total_media_criteria = {
+        c.DISTRIBUTION: c.POSTERIOR,
+        c.METRIC: c.MEAN,
+        c.CHANNEL: c.ALL_CHANNELS,
+    }
+    if not aggregate_times:
+      total_media_criteria[c.TIME] = (
+          self._selected_times
+          or self.get_all_summary_metrics(aggregate_times=False).time
+      )
+    total_media_outcome = summary_metrics[c.INCREMENTAL_OUTCOME].sel(
+        total_media_criteria
     )
-    outcome_df = pd.concat([baseline_df, outcome_df]).reset_index(drop=True)
-    outcome_df.sort_values(
-        by=c.INCREMENTAL_OUTCOME, ascending=False, inplace=True
+    total_media_pct = (
+        summary_metrics[c.PCT_OF_CONTRIBUTION].sel(total_media_criteria) / 100
     )
-    return outcome_df
+    total_outcome = total_media_outcome / total_media_pct
+    baseline_pct = 1 - total_media_pct
+    baseline_outcome = total_outcome * baseline_pct
+    if aggregate_times:
+      return pd.DataFrame(
+          {
+              c.CHANNEL: c.BASELINE,
+              c.INCREMENTAL_OUTCOME: baseline_outcome.item(),
+              c.PCT_OF_CONTRIBUTION: baseline_pct.item(),
+          },
+          index=[0],
+      )
+    else:
+      return pd.DataFrame({
+          c.TIME: self._selected_times or summary_metrics.time.values,
+          c.CHANNEL: c.BASELINE,
+          c.INCREMENTAL_OUTCOME: baseline_outcome.values,
+          c.PCT_OF_CONTRIBUTION: baseline_pct.values,
+      })
   def _transform_contribution_spend_metrics(self) -> pd.DataFrame:
     """Transforms the media metrics for the spend vs contribution plot.
@@ -2189,12 +2562,13 @@ class MediaSummary:
     Returns:
       A dataframe of spend and outcome percentages and ROI per channel.
     """
+    paid_summary_metrics = self.get_paid_summary_metrics()
     if self._meridian.input_data.revenue_per_kpi is not None:
       outcome = summary_text.REVENUE_LABEL
     else:
       outcome = summary_text.KPI_LABEL
     total_media_outcome = (
-        self.paid_summary_metrics[c.INCREMENTAL_OUTCOME]
+        paid_summary_metrics[c.INCREMENTAL_OUTCOME]
         .sel(
             distribution=c.POSTERIOR,
             metric=c.MEAN,
@@ -2203,7 +2577,7 @@ class MediaSummary:
         .item()
     )
     outcome_pct_df = self._summary_metrics_to_mean_df(
-        metrics=[c.INCREMENTAL_OUTCOME]
+        paid_summary_metrics, metrics=[c.INCREMENTAL_OUTCOME]
     )
     outcome_pct_df[c.PCT] = outcome_pct_df[c.INCREMENTAL_OUTCOME].div(
         total_media_outcome
@@ -2211,7 +2585,7 @@ class MediaSummary:
     outcome_pct_df.drop(columns=[c.INCREMENTAL_OUTCOME], inplace=True)
     outcome_pct_df['label'] = f'% {outcome}'
     spend_pct_df = (
-        self.paid_summary_metrics[c.PCT_OF_SPEND]
+        paid_summary_metrics[c.PCT_OF_SPEND]
         .drop_sel(channel=[c.ALL_CHANNELS])
         .to_dataframe()
         .reset_index()
@@ -2221,7 +2595,9 @@ class MediaSummary:
     spend_pct_df['label'] = '% Spend'
     pct_df = pd.concat([outcome_pct_df, spend_pct_df])
-    roi_df = self._summary_metrics_to_mean_df(metrics=[c.ROI])
+    roi_df = self._summary_metrics_to_mean_df(
+        paid_summary_metrics, metrics=[c.ROI]
+    )
     plot_df = pct_df.merge(roi_df, on=c.CHANNEL)
     scale_factor = plot_df[c.PCT].max() / plot_df[c.ROI].max()
     plot_df[c.ROI_SCALED] = plot_df[c.ROI] * scale_factor
@@ -2230,9 +2606,9 @@ class MediaSummary:
   def _summary_metrics_to_mean_df(
       self,
+      summary_metrics: xr.Dataset,
       metrics: Sequence[str],
       selected_channels: Sequence[str] | None = None,
-      include_non_paid: bool = False,
   ) -> pd.DataFrame:
     """Transforms the summary metrics to a dataframe of mean values.
@@ -2241,20 +2617,14 @@ class MediaSummary:
     channels.
     Args:
+      summary_metrics: The summary metrics dataset.
       metrics: A list of the metrics to include in the dataframe.
       selected_channels: List of channels to include. If None, all media
         channels will be included.
-      include_non_paid: If `True`, includes the organic media, organic RF and
-        non-media channels in the dataframe. Defaults to `False`.
     Returns:
       A dataframe of posterior mean values for the selected metrics and media.
     """
-    summary_metrics = (
-        self.all_summary_metrics
-        if include_non_paid
-        else self.paid_summary_metrics
-    )
     metrics_dataset = summary_metrics[metrics].sel(
         distribution=c.POSTERIOR, metric=c.MEAN
     )
@@ -2284,7 +2654,7 @@ class MediaSummary:
     central_tendency = c.MEDIAN if metric == c.CPIK else c.MEAN
     unused_central_tendency = c.MEAN if metric == c.CPIK else c.MEDIAN
     return (
-        self.paid_summary_metrics[metric]
+        self.get_paid_summary_metrics()[metric]
         .sel(distribution=c.POSTERIOR)
         .drop_sel(
             channel=c.ALL_CHANNELS,

google-meridian 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

google-meridian 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl