PyPI - google-meridian - Versions diffs - 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl - Mend

google-meridian 1.0.6py3-none-any.whl → 1.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/METADATA +11 -10
{google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/RECORD +18 -18
{google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/WHEEL +1 -1
meridian/__init__.py +1 -1
meridian/analysis/analyzer.py +383 -320
meridian/analysis/optimizer.py +531 -269
meridian/analysis/summarizer.py +21 -3
meridian/analysis/summary_text.py +20 -1
meridian/analysis/templates/chart.html.jinja +1 -0
meridian/analysis/test_utils.py +47 -99
meridian/analysis/visualizer.py +407 -83
meridian/constants.py +31 -0
meridian/data/input_data.py +49 -5
meridian/data/load.py +10 -7
meridian/model/model.py +5 -4
meridian/model/posterior_sampler.py +15 -5
{google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info/licenses}/LICENSE +0 -0
{google_meridian-1.0.6.dist-info → google_meridian-1.0.8.dist-info}/top_level.txt +0 -0

meridian/analysis/analyzer.py CHANGED Viewed

@@ -181,6 +181,13 @@ class DataTensors(tf.experimental.ExtensionType):
         return new_tensor.shape[1]
     return None
+  def filter_fields(self, fields: Sequence[str]) -> Self:
+    """Returns a new DataTensors object with only the specified fields."""
+    output = {}
+    for field in fields:
+      output[field] = getattr(self, field)
+    return DataTensors(**output)
   def validate_and_fill_missing_data(
       self,
       required_tensors_names: Sequence[str],
@@ -896,8 +903,8 @@ class Analyzer:
     """Computes decayed effect means and CIs for media or RF channels.
     Args:
-      channel_type: Specifies `media` or `reach` for computing prior and
-        posterior decayed effects.
+      channel_type: Specifies `media`, `reach`, or `organic_media` for computing
+        prior and posterior decayed effects.
       l_range: The range of time across which the adstock effect is computed.
       xr_dims: A list of dimensions for the output dataset.
       xr_coords: A dictionary with the coordinates for the output dataset.
@@ -914,12 +921,22 @@ class Analyzer:
           self._meridian.inference_data.posterior.alpha_m.values,
           (-1, self._meridian.n_media_channels),
       )
-    else:
+    elif channel_type is constants.REACH:
       prior = self._meridian.inference_data.prior.alpha_rf.values[0]
       posterior = np.reshape(
           self._meridian.inference_data.posterior.alpha_rf.values,
           (-1, self._meridian.n_rf_channels),
       )
+    elif channel_type is constants.ORGANIC_MEDIA:
+      prior = self._meridian.inference_data.prior.alpha_om.values[0]
+      posterior = np.reshape(
+          self._meridian.inference_data.posterior.alpha_om.values,
+          (-1, self._meridian.n_organic_media_channels),
+      )
+    else:
+      raise ValueError(
+          f"Unsupported channel type for adstock decay: '{channel_type}'. "
+      )
     decayed_effect_prior = (
         prior[np.newaxis, ...] ** l_range[:, np.newaxis, np.newaxis, np.newaxis]
@@ -1455,16 +1472,7 @@ class Analyzer:
     if new_data is None:
       new_data = DataTensors()
-    required_fields = [
-        constants.CONTROLS,
-        constants.MEDIA,
-        constants.REACH,
-        constants.FREQUENCY,
-        constants.ORGANIC_MEDIA,
-        constants.ORGANIC_REACH,
-        constants.ORGANIC_FREQUENCY,
-        constants.NON_MEDIA_TREATMENTS,
-    ]
+    required_fields = constants.NON_REVENUE_DATA
     filled_tensors = new_data.validate_and_fill_missing_data(
         required_tensors_names=required_fields,
         meridian=self._meridian,
@@ -1841,8 +1849,8 @@ class Analyzer:
         include or booleans with length equal to the number of time periods in
         `new_data`, if provided. If `new_data` is provided,
         `media_selected_times` can select any subset of time periods in
-        `new_data`. If `new_data is not provided, `media_selected_times` selects
-        from `InputData.time`. The incremental outcome corresponds to
+        `new_data`. If `new_data` is not provided, `media_selected_times`
+        selects from `InputData.time`. The incremental outcome corresponds to
         incremental KPI generated during the `selected_times` arg by treatment
         variables executed during the `media_selected_times` arg. For each
         channel, the incremental outcome is defined as the difference between
@@ -1922,16 +1930,9 @@ class Analyzer:
     if new_data is None:
       new_data = DataTensors()
-    required_params = [
-        constants.MEDIA,
-        constants.REACH,
-        constants.FREQUENCY,
-        constants.ORGANIC_MEDIA,
-        constants.ORGANIC_REACH,
-        constants.ORGANIC_FREQUENCY,
-        constants.NON_MEDIA_TREATMENTS,
-        constants.REVENUE_PER_KPI,
-    ]
+    required_params = constants.PAID_DATA
+    if include_non_paid_channels:
+      required_params += constants.NON_PAID_DATA
     data_tensors = new_data.validate_and_fill_missing_data(
         required_tensors_names=required_params, meridian=self._meridian
     )
@@ -2193,14 +2194,7 @@ class Analyzer:
     }
     self._check_revenue_data_exists(use_kpi)
     self._validate_geo_and_time_granularity(**dim_kwargs)
-    required_values = [
-        constants.MEDIA,
-        constants.MEDIA_SPEND,
-        constants.REACH,
-        constants.FREQUENCY,
-        constants.RF_SPEND,
-        constants.REVENUE_PER_KPI,
-    ]
+    required_values = constants.PERFORMANCE_DATA
     if not new_data:
       new_data = DataTensors()
     filled_data = new_data.validate_and_fill_missing_data(
@@ -2208,7 +2202,7 @@ class Analyzer:
         meridian=self._meridian,
     )
     numerator = self.incremental_outcome(
-        new_data=filled_data,
+        new_data=filled_data.filter_fields(constants.PAID_DATA),
         scaling_factor0=1,
         scaling_factor1=1 + incremental_increase,
         inverse_transform_outcome=True,
@@ -2322,14 +2316,7 @@ class Analyzer:
     }
     self._check_revenue_data_exists(use_kpi)
     self._validate_geo_and_time_granularity(**dim_kwargs)
-    required_values = [
-        constants.MEDIA,
-        constants.MEDIA_SPEND,
-        constants.REACH,
-        constants.FREQUENCY,
-        constants.RF_SPEND,
-        constants.REVENUE_PER_KPI,
-    ]
+    required_values = constants.PERFORMANCE_DATA
     if not new_data:
       new_data = DataTensors()
     filled_data = new_data.validate_and_fill_missing_data(
@@ -2337,7 +2324,7 @@ class Analyzer:
         meridian=self._meridian,
     )
     incremental_outcome = self.incremental_outcome(
-        new_data=filled_data,
+        new_data=filled_data.filter_fields(constants.PAID_DATA),
         **incremental_outcome_kwargs,
         **dim_kwargs,
     )
@@ -2579,21 +2566,15 @@ class Analyzer:
     # Set up the coordinates.
     coords = {
-        constants.METRIC: (
-            [constants.METRIC],
-            [constants.MEAN, constants.CI_LO, constants.CI_HI],
-        ),
+        constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
     }
     if not aggregate_geos:
-      coords[constants.GEO] = ([constants.GEO], mmm.input_data.geo.data)
+      coords[constants.GEO] = mmm.input_data.geo.data
     if not aggregate_times:
-      coords[constants.TIME] = ([constants.TIME], mmm.input_data.time.data)
+      coords[constants.TIME] = mmm.input_data.time.data
     if can_split_by_holdout:
-      coords[constants.EVALUATION_SET_VAR] = (
-          [constants.EVALUATION_SET_VAR],
-          list(constants.EVALUATION_SET),
-      )
+      coords[constants.EVALUATION_SET_VAR] = list(constants.EVALUATION_SET)
     # Set up the dimensions.
     actual_dims = ((constants.GEO,) if not aggregate_geos else ()) + (
@@ -2879,7 +2860,7 @@ class Analyzer:
     batched_kwargs = {"batch_size": batch_size}
     new_data = new_data or DataTensors()
     aggregated_impressions = self.get_aggregated_impressions(
-        new_data=new_data,
+        new_data=new_data.filter_fields(constants.IMPRESSIONS_DATA),
         optimal_frequency=optimal_frequency,
         include_non_paid_channels=include_non_paid_channels,
         **dim_kwargs,
@@ -2892,9 +2873,12 @@ class Analyzer:
         axis=-1,
     )
+    incremental_outcome_fields = list(
+        constants.PAID_DATA + constants.NON_PAID_DATA
+    )
     incremental_outcome_prior = self.compute_incremental_outcome_aggregate(
         use_posterior=False,
-        new_data=new_data,
+        new_data=new_data.filter_fields(incremental_outcome_fields),
         use_kpi=use_kpi,
         include_non_paid_channels=include_non_paid_channels,
         non_media_baseline_values=non_media_baseline_values,
@@ -2903,7 +2887,7 @@ class Analyzer:
     )
     incremental_outcome_posterior = self.compute_incremental_outcome_aggregate(
         use_posterior=True,
-        new_data=new_data,
+        new_data=new_data.filter_fields(incremental_outcome_fields),
         use_kpi=use_kpi,
         include_non_paid_channels=include_non_paid_channels,
         non_media_baseline_values=non_media_baseline_values,
@@ -2912,7 +2896,7 @@ class Analyzer:
     )
     incremental_outcome_mroi_prior = self.compute_incremental_outcome_aggregate(
         use_posterior=False,
-        new_data=new_data,
+        new_data=new_data.filter_fields(incremental_outcome_fields),
         use_kpi=use_kpi,
         by_reach=marginal_roi_by_reach,
         scaling_factor0=1,
@@ -2925,7 +2909,7 @@ class Analyzer:
     incremental_outcome_mroi_posterior = (
         self.compute_incremental_outcome_aggregate(
             use_posterior=True,
-            new_data=new_data,
+            new_data=new_data.filter_fields(incremental_outcome_fields),
             use_kpi=use_kpi,
             by_reach=marginal_roi_by_reach,
             scaling_factor0=1,
@@ -2947,19 +2931,14 @@ class Analyzer:
         if include_non_paid_channels
         else self._meridian.input_data.get_all_paid_channels()
     )
-    xr_coords = {
-        constants.CHANNEL: (
-            [constants.CHANNEL],
-            list(channels) + [constants.ALL_CHANNELS],
-        ),
-    }
+    xr_coords = {constants.CHANNEL: list(channels) + [constants.ALL_CHANNELS]}
     if not aggregate_geos:
       geo_dims = (
           self._meridian.input_data.geo.data
           if selected_geos is None
           else selected_geos
       )
-      xr_coords[constants.GEO] = ([constants.GEO], geo_dims)
+      xr_coords[constants.GEO] = geo_dims
     if not aggregate_times:
       # Get the time coordinates for flexible time dimensions.
       modified_times = new_data.get_modified_times(self._meridian)
@@ -2975,25 +2954,19 @@ class Analyzer:
         time_dims = times[indices]
       else:
         time_dims = selected_times
-      xr_coords[constants.TIME] = ([constants.TIME], time_dims)
+      xr_coords[constants.TIME] = time_dims
     xr_dims_with_ci_and_distribution = xr_dims + (
         constants.METRIC,
         constants.DISTRIBUTION,
     )
     xr_coords_with_ci_and_distribution = {
-        constants.METRIC: (
-            [constants.METRIC],
-            [
-                constants.MEAN,
-                constants.MEDIAN,
-                constants.CI_LO,
-                constants.CI_HI,
-            ],
-        ),
-        constants.DISTRIBUTION: (
-            [constants.DISTRIBUTION],
-            [constants.PRIOR, constants.POSTERIOR],
-        ),
+        constants.METRIC: [
+            constants.MEAN,
+            constants.MEDIAN,
+            constants.CI_LO,
+            constants.CI_HI,
+        ],
+        constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
         **xr_coords,
     }
     incremental_outcome = _central_tendency_and_ci_by_prior_and_posterior(
@@ -3022,14 +2995,14 @@ class Analyzer:
     if new_data.get_modified_times(self._meridian) is None:
       expected_outcome_prior = self.expected_outcome(
           use_posterior=False,
-          new_data=new_data,
+          new_data=new_data.filter_fields(constants.NON_REVENUE_DATA),
           use_kpi=use_kpi,
           **dim_kwargs,
           **batched_kwargs,
       )
       expected_outcome_posterior = self.expected_outcome(
           use_posterior=True,
-          new_data=new_data,
+          new_data=new_data.filter_fields(constants.NON_REVENUE_DATA),
           use_kpi=use_kpi,
           **dim_kwargs,
           **batched_kwargs,
@@ -3070,11 +3043,9 @@ class Analyzer:
     # If non-paid channels are not included, return all metrics, paid and
     # non-paid.
     spend_list = []
-    if new_data is None:
-      new_data = DataTensors()
-    new_spend_tensors = new_data.validate_and_fill_missing_data(
-        [constants.MEDIA_SPEND, constants.RF_SPEND], self._meridian
-    )
+    new_spend_tensors = new_data.filter_fields(
+        constants.SPEND_DATA
+    ).validate_and_fill_missing_data(constants.SPEND_DATA, self._meridian)
     if self._meridian.n_media_channels > 0:
       spend_list.append(new_spend_tensors.media_spend)
     if self._meridian.n_rf_channels > 0:
@@ -3136,7 +3107,7 @@ class Analyzer:
       cpik = self._compute_cpik_aggregate(
           incremental_kpi_prior=self.compute_incremental_outcome_aggregate(
               use_posterior=False,
-              new_data=new_data,
+              new_data=new_data.filter_fields(incremental_outcome_fields),
               use_kpi=True,
               include_non_paid_channels=False,
               **dim_kwargs,
@@ -3144,7 +3115,7 @@ class Analyzer:
           ),
           incremental_kpi_posterior=self.compute_incremental_outcome_aggregate(
               use_posterior=True,
-              new_data=new_data,
+              new_data=new_data.filter_fields(incremental_outcome_fields),
               use_kpi=True,
               include_non_paid_channels=False,
               **dim_kwargs,
@@ -3207,18 +3178,13 @@ class Analyzer:
       (or `(n_channels,)` if geos and times are aggregated) with aggregate
       impression values per channel.
     """
-    tensor_names_list = [
+    tensor_names_list = (
         constants.MEDIA,
         constants.REACH,
         constants.FREQUENCY,
-    ]
+    )
     if include_non_paid_channels:
-      tensor_names_list.extend([
-          constants.ORGANIC_MEDIA,
-          constants.ORGANIC_REACH,
-          constants.ORGANIC_FREQUENCY,
-          constants.NON_MEDIA_TREATMENTS,
-      ])
+      tensor_names_list += constants.NON_PAID_DATA
     if new_data is None:
       new_data = DataTensors()
     data_tensors = new_data.validate_and_fill_missing_data(
@@ -3323,41 +3289,33 @@ class Analyzer:
         + ((constants.TIME,) if not aggregate_times else ())
         + (constants.CHANNEL,)
     )
-    xr_coords = {
-        constants.CHANNEL: ([constants.CHANNEL], [constants.BASELINE]),
-    }
+    xr_coords = {constants.CHANNEL: [constants.BASELINE]}
     if not aggregate_geos:
       geo_dims = (
           self._meridian.input_data.geo.data
           if selected_geos is None
           else selected_geos
       )
-      xr_coords[constants.GEO] = ([constants.GEO], geo_dims)
+      xr_coords[constants.GEO] = geo_dims
     if not aggregate_times:
       time_dims = (
           self._meridian.input_data.time.data
           if selected_times is None
           else selected_times
       )
-      xr_coords[constants.TIME] = ([constants.TIME], time_dims)
+      xr_coords[constants.TIME] = time_dims
     xr_dims_with_ci_and_distribution = xr_dims + (
         constants.METRIC,
         constants.DISTRIBUTION,
     )
     xr_coords_with_ci_and_distribution = {
-        constants.METRIC: (
-            [constants.METRIC],
-            [
-                constants.MEAN,
-                constants.MEDIAN,
-                constants.CI_LO,
-                constants.CI_HI,
-            ],
-        ),
-        constants.DISTRIBUTION: (
-            [constants.DISTRIBUTION],
-            [constants.PRIOR, constants.POSTERIOR],
-        ),
+        constants.METRIC: [
+            constants.MEAN,
+            constants.MEDIAN,
+            constants.CI_LO,
+            constants.CI_HI,
+        ],
+        constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
         **xr_coords,
     }
@@ -3414,11 +3372,12 @@ class Analyzer:
   def optimal_freq(
       self,
+      new_data: DataTensors | None = None,
       freq_grid: Sequence[float] | None = None,
       use_posterior: bool = True,
       use_kpi: bool = False,
       selected_geos: Sequence[str | int] | None = None,
-      selected_times: Sequence[str | int] | None = None,
+      selected_times: Sequence[str | int | bool] | None = None,
       confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
   ) -> xr.Dataset:
     """Calculates the optimal frequency that maximizes posterior mean ROI.
@@ -3429,10 +3388,27 @@ class Analyzer:
     number of impressions remains unchanged as frequency varies. Meridian solves
     for the frequency at which posterior mean ROI is optimized.
+    If `new_data=None`, this method calculates the opptimal frequency on the
+    values of the paid RF variables that the Meridian object was initialized
+    with. The user can override this historical data through the `new_data`
+    argument. For example,
+    ```python
+    new_data = DataTensors(reach=new_reach, frequency=new_frequency)
+    ```
     Note: The ROI numerator is revenue if `use_kpi` is `False`, otherwise, the
     ROI numerator is KPI units.
     Args:
+      new_data: Optional `DataTensors` object containing `reach`, `frequency`,
+        `rf_spend`, and `revenue_per_kpi`. If provided, the optimal frequency is
+        calculated using the values of the tensors passed in `new_data` and the
+        original values of all the remaining tensors. If `None`, the historical
+        data used to initialize the Meridian object is used. If any of the
+        tensors in `new_data` is provided with a different number of time
+        periods than in `InputData`, then all tensors must be provided with the
+        same number of time periods.
       freq_grid: List of frequency values. The ROI of each channel is calculated
         for each frequency value in the list. By default, the list includes
         numbers from `1.0` to the maximum frequency in increments of `0.1`.
@@ -3443,8 +3419,10 @@ class Analyzer:
         revenue.
       selected_geos: Optional list containing a subset of geos to include. By
         default, all geos are included.
-      selected_times: Optional list containing a subset of times to include. By
-        default, all time periods are included.
+      selected_times: Optional list containing either a subset of dates to
+        include or booleans with length equal to the number of time periods in
+        the `new_data` args, if provided. By default, all time periods are
+        included.
       confidence_level: Confidence level for prior and posterior credible
         intervals, represented as a value between zero and one.
@@ -3475,6 +3453,7 @@ class Analyzer:
       ValueError: If there are no channels with reach and frequency data.
     """
     dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
+    new_data = new_data or DataTensors()
     if self._meridian.n_rf_channels == 0:
       raise ValueError(
           "Must have at least one channel with reach and frequency data."
@@ -3484,7 +3463,29 @@ class Analyzer:
           f"sample_{dist_type}() must be called prior to calling this method."
       )
-    max_freq = np.max(np.array(self._meridian.rf_tensors.frequency))
+    filled_data = new_data.validate_and_fill_missing_data(
+        constants.RF_DATA,
+        self._meridian,
+    )
+    # TODO: Once treatment type filtering is added, remove adding
+    # dummy media and media spend to `roi()` and `summary_metrics()`. This is a
+    # hack to use `roi()` and `summary_metrics()` for RF only analysis.
+    has_media = self._meridian.n_media_channels > 0
+    n_media_times = (
+        filled_data.get_modified_times(self._meridian)
+        or self._meridian.n_media_times
+    )
+    n_times = (
+        filled_data.get_modified_times(self._meridian) or self._meridian.n_times
+    )
+    dummy_media = tf.ones(
+        (self._meridian.n_geos, n_media_times, self._meridian.n_media_channels)
+    )
+    dummy_media_spend = tf.ones(
+        (self._meridian.n_geos, n_times, self._meridian.n_media_channels)
+    )
+    max_freq = np.max(np.array(filled_data.frequency))
     if freq_grid is None:
       freq_grid = np.arange(1, max_freq, 0.1)
@@ -3494,14 +3495,18 @@ class Analyzer:
     metric_grid = np.zeros((len(freq_grid), self._meridian.n_rf_channels, 4))
     for i, freq in enumerate(freq_grid):
-      new_frequency = tf.ones_like(self._meridian.rf_tensors.frequency) * freq
-      new_reach = (
-          self._meridian.rf_tensors.frequency
-          * self._meridian.rf_tensors.reach
-          / new_frequency
+      new_frequency = tf.ones_like(filled_data.frequency) * freq
+      new_reach = filled_data.frequency * filled_data.reach / new_frequency
+      new_roi_data = DataTensors(
+          reach=new_reach,
+          frequency=new_frequency,
+          rf_spend=filled_data.rf_spend,
+          revenue_per_kpi=filled_data.revenue_per_kpi,
+          media=dummy_media if has_media else None,
+          media_spend=dummy_media_spend if has_media else None,
       )
       metric_grid_temp = self.roi(
-          new_data=DataTensors(reach=new_reach, frequency=new_frequency),
+          new_data=new_roi_data,
           use_posterior=use_posterior,
           selected_geos=selected_geos,
           selected_times=selected_times,
@@ -3521,20 +3526,25 @@ class Analyzer:
     optimal_frequency = [freq_grid[i] for i in optimal_freq_idx]
     optimal_frequency_tensor = tf.convert_to_tensor(
-        tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency,
+        tf.ones_like(filled_data.frequency) * optimal_frequency,
         tf.float32,
     )
     optimal_reach = (
-        self._meridian.rf_tensors.frequency
-        * self._meridian.rf_tensors.reach
-        / optimal_frequency_tensor
+        filled_data.frequency * filled_data.reach / optimal_frequency_tensor
+    )
+    new_summary_metrics_data = DataTensors(
+        reach=optimal_reach,
+        frequency=optimal_frequency_tensor,
+        rf_spend=filled_data.rf_spend,
+        revenue_per_kpi=filled_data.revenue_per_kpi,
+        media=dummy_media if has_media else None,
+        media_spend=dummy_media_spend if has_media else None,
     )
     # Compute the optimized metrics based on the optimal frequency.
     optimized_metrics_by_reach = self.summary_metrics(
-        new_data=DataTensors(
-            reach=optimal_reach, frequency=optimal_frequency_tensor
-        ),
+        new_data=new_summary_metrics_data,
         marginal_roi_by_reach=True,
         selected_geos=selected_geos,
         selected_times=selected_times,
@@ -3544,9 +3554,7 @@ class Analyzer:
         constants.DISTRIBUTION: dist_type,
     })
     optimized_metrics_by_frequency = self.summary_metrics(
-        new_data=DataTensors(
-            reach=optimal_reach, frequency=optimal_frequency_tensor
-        ),
+        new_data=new_summary_metrics_data,
         marginal_roi_by_reach=False,
         selected_geos=selected_geos,
         selected_times=selected_times,
@@ -3594,17 +3602,14 @@ class Analyzer:
     return xr.Dataset(
         data_vars=data_vars,
         coords={
-            constants.FREQUENCY: ([constants.FREQUENCY], freq_grid),
-            constants.RF_CHANNEL: ([constants.RF_CHANNEL], rf_channel_values),
-            constants.METRIC: (
-                [constants.METRIC],
-                [
-                    constants.MEAN,
-                    constants.MEDIAN,
-                    constants.CI_LO,
-                    constants.CI_HI,
-                ],
-            ),
+            constants.FREQUENCY: freq_grid,
+            constants.RF_CHANNEL: rf_channel_values,
+            constants.METRIC: [
+                constants.MEAN,
+                constants.MEDIAN,
+                constants.CI_LO,
+                constants.CI_HI,
+            ],
         },
         attrs={
             constants.CONFIDENCE_LEVEL: confidence_level,
@@ -3677,14 +3682,12 @@ class Analyzer:
     xr_dims = [constants.METRIC, constants.GEO_GRANULARITY]
     xr_coords = {
-        constants.METRIC: (
-            [constants.METRIC],
-            [constants.R_SQUARED, constants.MAPE, constants.WMAPE],
-        ),
-        constants.GEO_GRANULARITY: (
-            [constants.GEO_GRANULARITY],
-            [constants.GEO, constants.NATIONAL],
-        ),
+        constants.METRIC: [
+            constants.R_SQUARED,
+            constants.MAPE,
+            constants.WMAPE,
+        ],
+        constants.GEO_GRANULARITY: [constants.GEO, constants.NATIONAL],
     }
     if self._meridian.revenue_per_kpi is not None:
       input_tensor = self._meridian.kpi * self._meridian.revenue_per_kpi
@@ -3715,10 +3718,7 @@ class Analyzer:
       dataset = xr.Dataset(data_vars=xr_data, coords=xr_coords)
     else:
       xr_dims.append(constants.EVALUATION_SET_VAR)
-      xr_coords[constants.EVALUATION_SET_VAR] = (
-          [constants.EVALUATION_SET_VAR],
-          list(constants.EVALUATION_SET),
-      )
+      xr_coords[constants.EVALUATION_SET_VAR] = list(constants.EVALUATION_SET)
       holdout_id = self._filter_holdout_id_for_selected_geos_and_times(
           self._meridian.model_spec.holdout_id, selected_geos, selected_times
@@ -3913,6 +3913,7 @@ class Analyzer:
       selected_times: Sequence[str] | None = None,
       by_reach: bool = True,
       use_optimal_frequency: bool = False,
+      use_kpi: bool = False,
       confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
       batch_size: int = constants.DEFAULT_BATCH_SIZE,
   ) -> xr.Dataset:
@@ -3940,6 +3941,8 @@ class Analyzer:
         frequency.
       use_optimal_frequency: If `True`, uses the optimal frequency to plot the
         response curves. Defaults to `False`.
+      use_kpi: A boolean flag indicating whether to use KPI instead of revenue
+        to generate the response curves. Defaults to `False`.
       confidence_level: Confidence level for prior and posterior credible
         intervals, represented as a value between zero and one.
       batch_size: Integer representing the maximum draws per chain in each
@@ -3951,7 +3954,6 @@ class Analyzer:
         An `xarray.Dataset` containing the data needed to visualize response
         curves.
     """
-    use_kpi = self._meridian.input_data.revenue_per_kpi is None
     if self._meridian.is_national:
       _warn_if_geo_arg_in_kwargs(
           selected_geos=selected_geos,
@@ -4004,7 +4006,7 @@ class Analyzer:
       )
       inc_outcome_temp = self.incremental_outcome(
           use_posterior=use_posterior,
-          new_data=new_data,
+          new_data=new_data.filter_fields(constants.PAID_DATA),
           inverse_transform_outcome=True,
           batch_size=batch_size,
           use_kpi=use_kpi,
@@ -4035,22 +4037,13 @@ class Analyzer:
       )
     spend_einsum = tf.einsum("k,m->km", np.array(spend_multipliers), spend)
     xr_coords = {
-        constants.CHANNEL: (
-            [constants.CHANNEL],
-            self._meridian.input_data.get_all_paid_channels(),
-        ),
-        constants.METRIC: (
-            [constants.METRIC],
-            [
-                constants.MEAN,
-                constants.CI_LO,
-                constants.CI_HI,
-            ],
-        ),
-        constants.SPEND_MULTIPLIER: (
-            [constants.SPEND_MULTIPLIER],
-            spend_multipliers,
-        ),
+        constants.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
+        constants.METRIC: [
+            constants.MEAN,
+            constants.CI_LO,
+            constants.CI_HI,
+        ],
+        constants.SPEND_MULTIPLIER: spend_multipliers,
     }
     xr_data_vars = {
         constants.SPEND: (
@@ -4068,14 +4061,14 @@ class Analyzer:
   def adstock_decay(
       self, confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL
   ) -> pd.DataFrame:
-    """Calculates adstock decay for media and reach and frequency channels.
+    """Calculates adstock decay for paid media, RF, and organic media channels.
     Args:
       confidence_level: Confidence level for prior and posterior credible
         intervals, represented as a value between zero and one.
     Returns:
-      Pandas DataFrame containing the channel, `time_units`, distribution,
+      Pandas DataFrame containing the `channel`, `time_units`, `distribution`,
       `ci_hi`, `ci_lo`, and `mean` for the Adstock function.
     """
     if (
@@ -4100,63 +4093,75 @@ class Analyzer:
     step_size = 1 / steps_per_time_period
     l_range = np.arange(0, max_lag, step_size)
-    rf_channel_values = (
-        self._meridian.input_data.rf_channel.values
-        if self._meridian.input_data.rf_channel is not None
-        else []
-    )
-    media_channel_values = (
-        self._meridian.input_data.media_channel.values
-        if self._meridian.input_data.media_channel is not None
-        else []
-    )
     xr_dims = [
         constants.TIME_UNITS,
         constants.CHANNEL,
         constants.METRIC,
         constants.DISTRIBUTION,
     ]
-    xr_coords = {
-        constants.TIME_UNITS: ([constants.TIME_UNITS], l_range),
-        constants.CHANNEL: (
-            [constants.CHANNEL],
-            rf_channel_values,
-        ),
-        constants.DISTRIBUTION: (
-            [constants.DISTRIBUTION],
-            [constants.PRIOR, constants.POSTERIOR],
-        ),
-        constants.METRIC: (
-            [constants.METRIC],
-            [constants.MEAN, constants.CI_LO, constants.CI_HI],
-        ),
+    base_xr_coords = {
+        constants.TIME_UNITS: l_range,
+        constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
+        constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
     }
-    final_df = pd.DataFrame()
+    final_df_list = []
+    if self._meridian.n_media_channels > 0:
+      media_channel_values = (
+          self._meridian.input_data.media_channel.values
+          if self._meridian.input_data.media_channel is not None
+          else []
+      )
+      media_xr_coords = base_xr_coords | {
+          constants.CHANNEL: media_channel_values
+      }
+      adstock_df_m = self._get_adstock_dataframe(
+          constants.MEDIA,
+          l_range,
+          xr_dims,
+          media_xr_coords,
+          confidence_level,
+      )
+      if not adstock_df_m.empty:
+        final_df_list.append(adstock_df_m)
     if self._meridian.n_rf_channels > 0:
+      rf_channel_values = (
+          self._meridian.input_data.rf_channel.values
+          if self._meridian.input_data.rf_channel is not None
+          else []
+      )
+      rf_xr_coords = base_xr_coords | {constants.CHANNEL: rf_channel_values}
       adstock_df_rf = self._get_adstock_dataframe(
           constants.REACH,
           l_range,
           xr_dims,
-          xr_coords,
+          rf_xr_coords,
           confidence_level,
       )
-      final_df = pd.concat([final_df, adstock_df_rf], axis=0)
-    if self._meridian.n_media_channels > 0:
-      xr_coords[constants.CHANNEL] = ([constants.CHANNEL], media_channel_values)
-      adstock_df_m = self._get_adstock_dataframe(
-          constants.MEDIA,
+      if not adstock_df_rf.empty:
+        final_df_list.append(adstock_df_rf)
+    if self._meridian.n_organic_media_channels > 0:
+      organic_media_channel_values = (
+          self._meridian.input_data.organic_media_channel.values
+          if self._meridian.input_data.organic_media_channel is not None
+          else []
+      )
+      organic_media_xr_coords = base_xr_coords | {
+          constants.CHANNEL: organic_media_channel_values
+      }
+      adstock_df_om = self._get_adstock_dataframe(
+          constants.ORGANIC_MEDIA,
           l_range,
           xr_dims,
-          xr_coords,
+          organic_media_xr_coords,
           confidence_level,
       )
-      final_df = pd.concat([final_df, adstock_df_m], axis=0).reset_index(
-          drop=True
-      )
+      if not adstock_df_om.empty:
+        final_df_list.append(adstock_df_om)
+    final_df = pd.concat(final_df_list, ignore_index=True)
     # Adding an extra column that indicates whether time_units is an integer
     # for marking the discrete points on the plot.
     final_df[constants.IS_INT_TIME_UNIT] = final_df[constants.TIME_UNITS].apply(
@@ -4172,14 +4177,14 @@ class Analyzer:
     """Computes the point-wise mean and credible intervals for the Hill curves.
     Args:
-      channel_type: Type of channel, either `media` or `rf`.
+      channel_type: Type of channel, either `media`, `rf`, or `organic_media`.
       confidence_level: Confidence level for `posterior` and `prior` credible
         intervals, represented as a value between zero and one.
     Returns:
       A DataFrame with data needed to plot the Hill curves, with columns:
-      *   `channel`: `media` or `rf` channel name.
+      *   `channel`: `media`, `rf`, or `organic_media` channel name.
       *   `media_units`: Media (for `media` channels) or average frequency (for
           `rf` channels) units.
       *   `distribution`: Indication of `posterior` or `prior` draw.
@@ -4188,7 +4193,12 @@ class Analyzer:
       *   `ci_lo`: Lower bound of the credible interval of the value of the Hill
           function.
       *   `mean`: Point-wise mean of the value of the Hill function per draw.
-      *   channel_type: Indication of a `media` or `rf` channel.
+      *   channel_type: Indication of a `media`, `rf`, or `organic_media`
+          channel.
+    Raises:
+      ValueError: If `channel_type` is not one of the recognized constants
+      `media`, `rf`, or `organic_media`.
     """
     if (
         channel_type == constants.MEDIA
@@ -4196,31 +4206,46 @@ class Analyzer:
     ):
       ec = constants.EC_M
       slope = constants.SLOPE_M
-      linspace = np.linspace(
-          0,
-          np.max(
-              np.array(self._meridian.media_tensors.media_scaled), axis=(0, 1)
-          ),
-          constants.HILL_NUM_STEPS,
-      )
       channels = self._meridian.input_data.media_channel.values
+      transformer = self._meridian.media_tensors.media_transformer
+      linspace_max_values = np.max(
+          np.array(self._meridian.media_tensors.media_scaled), axis=(0, 1)
+      )
     elif (
         channel_type == constants.RF
         and self._meridian.input_data.rf_channel is not None
     ):
       ec = constants.EC_RF
       slope = constants.SLOPE_RF
-      linspace = np.linspace(
-          0,
-          np.max(np.array(self._meridian.rf_tensors.frequency), axis=(0, 1)),
-          constants.HILL_NUM_STEPS,
-      )
       channels = self._meridian.input_data.rf_channel.values
+      transformer = None
+      linspace_max_values = np.max(
+          np.array(self._meridian.rf_tensors.frequency), axis=(0, 1)
+      )
+    elif (
+        channel_type == constants.ORGANIC_MEDIA
+        and self._meridian.input_data.organic_media_channel is not None
+    ):
+      ec = constants.EC_OM
+      slope = constants.SLOPE_OM
+      channels = self._meridian.input_data.organic_media_channel.values
+      transformer = (
+          self._meridian.organic_media_tensors.organic_media_transformer
+      )
+      linspace_max_values = np.max(
+          np.array(self._meridian.organic_media_tensors.organic_media_scaled),
+          axis=(0, 1),
+      )
     else:
       raise ValueError(
-          f"Unsupported channel type: {channel_type} or the"
-          " requested type of channels (`media` or `rf`) are not present."
+          f"Unsupported channel type: {channel_type} or the requested type of"
+          " channels (`media`, `rf`, or `organic_media`) are not present."
       )
+    linspace = np.linspace(
+        0,
+        linspace_max_values,
+        constants.HILL_NUM_STEPS,
+    )
     linspace_filler = np.linspace(0, 1, constants.HILL_NUM_STEPS)
     xr_dims = [
         constants.MEDIA_UNITS,
@@ -4229,19 +4254,10 @@ class Analyzer:
         constants.DISTRIBUTION,
     ]
     xr_coords = {
-        constants.MEDIA_UNITS: ([constants.MEDIA_UNITS], linspace_filler),
-        constants.CHANNEL: (
-            [constants.CHANNEL],
-            list(channels),
-        ),
-        constants.DISTRIBUTION: (
-            [constants.DISTRIBUTION],
-            [constants.PRIOR, constants.POSTERIOR],
-        ),
-        constants.METRIC: (
-            [constants.METRIC],
-            [constants.MEAN, constants.CI_LO, constants.CI_HI],
-        ),
+        constants.MEDIA_UNITS: linspace_filler,
+        constants.CHANNEL: list(channels),
+        constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
+        constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
     }
     # Expanding the linspace by one dimension since the HillTransformer requires
     # 3-dimensional input as (geo, time, channel).
@@ -4285,13 +4301,10 @@ class Analyzer:
     # Fill media_units or frequency x-axis with the correct range.
     media_units_arr = []
-    if channel_type == constants.MEDIA:
-      media_transformers = transformers.MediaTransformer(
-          self._meridian.media_tensors.media, self._meridian.population
-      )
-      population_scaled_median_m = media_transformers.population_scaled_median_m
+    if transformer is not None:
+      population_scaled_median = transformer.population_scaled_median_m
       x_range_full_shape = linspace * tf.transpose(
-          population_scaled_median_m[:, np.newaxis]
+          population_scaled_median[:, np.newaxis]
       )
     else:
       x_range_full_shape = linspace
@@ -4312,8 +4325,68 @@ class Analyzer:
     df[constants.MEDIA_UNITS] = media_units_arr
     return df
+  def _get_channel_hill_histogram_dataframe(
+      self,
+      channel_type: str,
+      data_to_histogram: tf.Tensor,
+      channel_names: Sequence[str],
+      n_bins: int,
+  ) -> pd.DataFrame:
+    """Calculates hill histogram dataframe for a given channel type's values.
+    Args:
+      channel_type: The type of channel (e.g., 'rf', 'media', 'organic_media').
+      data_to_histogram: The 2D tensor (observations, channels). containing the
+        data whose distribution needs to be histogrammed for each channel.
+      channel_names: The names corresponding to the channels in
+        data_to_histogram.
+      n_bins: The number of bins for the histogram.
+    Returns:
+      A Pandas DataFrame containing the calculated histogram data for all
+      channels of the given type. Returns an empty DataFrame if no valid
+      data is found for any channel.
+    """
+    channels_data = {
+        constants.CHANNEL: [],
+        constants.CHANNEL_TYPE: [],
+        constants.SCALED_COUNT_HISTOGRAM: [],
+        constants.COUNT_HISTOGRAM: [],
+        constants.START_INTERVAL_HISTOGRAM: [],
+        constants.END_INTERVAL_HISTOGRAM: [],
+    }
+    for i, channel_name in enumerate(channel_names):
+      channel_data_np = data_to_histogram[:, i].numpy()
+      channel_data_np = channel_data_np[~np.isnan(channel_data_np)]
+      if channel_data_np.size == 0:
+        continue
+      counts_per_bucket, buckets = np.histogram(
+          channel_data_np, bins=n_bins, density=True
+      )
+      max_counts = (
+          np.max(counts_per_bucket) if np.max(counts_per_bucket) > 0 else 1.0
+      )
+      num_buckets = len(counts_per_bucket)
+      channels_data[constants.CHANNEL].extend([channel_name] * num_buckets)
+      channels_data[constants.CHANNEL_TYPE].extend([channel_type] * num_buckets)
+      channels_data[constants.SCALED_COUNT_HISTOGRAM].extend(
+          counts_per_bucket / max_counts
+      )
+      channels_data[constants.COUNT_HISTOGRAM].extend(counts_per_bucket)
+      channels_data[constants.START_INTERVAL_HISTOGRAM].extend(buckets[:-1])
+      channels_data[constants.END_INTERVAL_HISTOGRAM].extend(buckets[1:])
+    return pd.DataFrame(channels_data)
   def _get_hill_histogram_dataframe(self, n_bins: int) -> pd.DataFrame:
-    """Returns the bucketed media_units counts per each `media` or `rf` channel.
+    """Calculates histogram data for a given channel type's values.
+      Computes histogram data for the distribution of media units (for media or
+      organic media channels) or frequency (for RF channels) across
+      observations.
     Args:
       n_bins: Number of equal-width bins to include in the histogram for the
@@ -4339,73 +4412,64 @@ class Analyzer:
     """
     n_geos = self._meridian.n_geos
     n_media_times = self._meridian.n_media_times
-    n_rf_channels = self._meridian.n_rf_channels
-    n_media_channels = self._meridian.n_media_channels
-    (
-        channels,
-        scaled_count,
-        channel_type_arr,
-        start_interval_histogram,
-        end_interval_histogram,
-        count,
-    ) = ([], [], [], [], [], [])
+    df_list = []
     # RF.
     if self._meridian.input_data.rf_channel is not None:
-      frequency = (
-          self._meridian.rf_tensors.frequency
-      )  # Shape: (n_geos, n_media_times, n_channels).
-      reshaped_frequency = tf.reshape(
-          frequency, (n_geos * n_media_times, n_rf_channels)
-      )
-      for i, channel in enumerate(self._meridian.input_data.rf_channel.values):
-        # Bucketize the histogram data for RF channels.
-        counts_per_bucket, buckets = np.histogram(
-            reshaped_frequency[:, i], bins=n_bins, density=True
+      frequency = self._meridian.rf_tensors.frequency
+      if frequency is not None:
+        reshaped_frequency = tf.reshape(
+            frequency, (n_geos * n_media_times, self._meridian.n_rf_channels)
+        )
+        rf_hist_data = self._get_channel_hill_histogram_dataframe(
+            channel_type=constants.RF,
+            data_to_histogram=reshaped_frequency,
+            channel_names=self._meridian.input_data.rf_channel.values,
+            n_bins=n_bins,
         )
-        channels.extend([channel] * len(counts_per_bucket))
-        channel_type_arr.extend([constants.RF] * len(counts_per_bucket))
-        scaled_count.extend(counts_per_bucket / max(counts_per_bucket))
-        count.extend(counts_per_bucket)
-        start_interval_histogram.extend(buckets[:-1])
-        end_interval_histogram.extend(buckets[1:])
+        df_list.append(pd.DataFrame(rf_hist_data))
     # Media.
     if self._meridian.input_data.media_channel is not None:
-      transformer = transformers.MediaTransformer(
-          self._meridian.media_tensors.media, self._meridian.population
-      )
-      scaled = (
-          self._meridian.media_tensors.media_scaled
-      )  # Shape: (n_geos, n_media_times, n_channels)
-      population_scaled_median = transformer.population_scaled_median_m
-      scaled_media_units = scaled * population_scaled_median
-      reshaped_scaled_media_units = tf.reshape(
-          scaled_media_units, (n_geos * n_media_times, n_media_channels)
+      transformer = self._meridian.media_tensors.media_transformer
+      scaled = self._meridian.media_tensors.media_scaled
+      if transformer is not None and scaled is not None:
+        population_scaled_median = transformer.population_scaled_median_m
+        scaled_media_units = scaled * population_scaled_median
+        reshaped_scaled_media_units = tf.reshape(
+            scaled_media_units,
+            (n_geos * n_media_times, self._meridian.n_media_channels),
+        )
+        media_hist_data = self._get_channel_hill_histogram_dataframe(
+            channel_type=constants.MEDIA,
+            data_to_histogram=reshaped_scaled_media_units,
+            channel_names=self._meridian.input_data.media_channel.values,
+            n_bins=n_bins,
+        )
+        df_list.append(pd.DataFrame(media_hist_data))
+    # Organic media.
+    if self._meridian.input_data.organic_media_channel is not None:
+      transformer_om = (
+          self._meridian.organic_media_tensors.organic_media_transformer
       )
-      for i, channel in enumerate(
-          self._meridian.input_data.media_channel.values
-      ):
-        # Bucketize the histogram data for media channels.
-        counts_per_bucket, buckets = np.histogram(
-            reshaped_scaled_media_units[:, i], bins=n_bins, density=True
+      scaled_om = self._meridian.organic_media_tensors.organic_media_scaled
+      if transformer_om is not None and scaled_om is not None:
+        population_scaled_median_om = transformer_om.population_scaled_median_m
+        scaled_organic_media_units = scaled_om * population_scaled_median_om
+        reshaped_scaled_organic_media_units = tf.reshape(
+            scaled_organic_media_units,
+            (n_geos * n_media_times, self._meridian.n_organic_media_channels),
         )
-        channel_type_arr.extend([constants.MEDIA] * len(counts_per_bucket))
-        channels.extend([channel] * (len(counts_per_bucket)))
-        scaled_count.extend(counts_per_bucket / max(counts_per_bucket))
-        count.extend(counts_per_bucket)
-        start_interval_histogram.extend(buckets[:-1])
-        end_interval_histogram.extend(buckets[1:])
-    return pd.DataFrame({
-        constants.CHANNEL: channels,
-        constants.CHANNEL_TYPE: channel_type_arr,
-        constants.SCALED_COUNT_HISTOGRAM: scaled_count,
-        constants.COUNT_HISTOGRAM: count,
-        constants.START_INTERVAL_HISTOGRAM: start_interval_histogram,
-        constants.END_INTERVAL_HISTOGRAM: end_interval_histogram,
-    })
+        organic_media_hist_data = self._get_channel_hill_histogram_dataframe(
+            channel_type=constants.ORGANIC_MEDIA,
+            data_to_histogram=reshaped_scaled_organic_media_units,
+            channel_names=self._meridian.input_data.organic_media_channel.values,
+            n_bins=n_bins,
+        )
+        df_list.append(pd.DataFrame(organic_media_hist_data))
+    return pd.concat(df_list, ignore_index=True)
   def hill_curves(
       self,
@@ -4453,17 +4517,16 @@ class Analyzer:
       )
     final_dfs = [pd.DataFrame()]
-    if self._meridian.n_media_channels > 0:
-      hill_df_media = self._get_hill_curves_dataframe(
-          constants.MEDIA, confidence_level
-      )
-      final_dfs.append(hill_df_media)
-    if self._meridian.n_rf_channels > 0:
-      hill_df_rf = self._get_hill_curves_dataframe(
-          constants.RF, confidence_level
-      )
-      final_dfs.append(hill_df_rf)
+    for n_channels, channel_type in [
+        (self._meridian.n_media_channels, constants.MEDIA),
+        (self._meridian.n_rf_channels, constants.RF),
+        (self._meridian.n_organic_media_channels, constants.ORGANIC_MEDIA),
+    ]:
+      if n_channels > 0:
+        hill_df = self._get_hill_curves_dataframe(
+            channel_type, confidence_level
+        )
+        final_dfs.append(hill_df)
     final_dfs.append(self._get_hill_histogram_dataframe(n_bins=n_bins))
     return pd.concat(final_dfs)

google-meridian 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

google-meridian 1.0.6py3-none-any.whl → 1.0.8py3-none-any.whl