google-meridian 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -181,6 +181,13 @@ class DataTensors(tf.experimental.ExtensionType):
181
181
  return new_tensor.shape[1]
182
182
  return None
183
183
 
184
+ def filter_fields(self, fields: Sequence[str]) -> Self:
185
+ """Returns a new DataTensors object with only the specified fields."""
186
+ output = {}
187
+ for field in fields:
188
+ output[field] = getattr(self, field)
189
+ return DataTensors(**output)
190
+
184
191
  def validate_and_fill_missing_data(
185
192
  self,
186
193
  required_tensors_names: Sequence[str],
@@ -896,8 +903,8 @@ class Analyzer:
896
903
  """Computes decayed effect means and CIs for media or RF channels.
897
904
 
898
905
  Args:
899
- channel_type: Specifies `media` or `reach` for computing prior and
900
- posterior decayed effects.
906
+ channel_type: Specifies `media`, `reach`, or `organic_media` for computing
907
+ prior and posterior decayed effects.
901
908
  l_range: The range of time across which the adstock effect is computed.
902
909
  xr_dims: A list of dimensions for the output dataset.
903
910
  xr_coords: A dictionary with the coordinates for the output dataset.
@@ -914,12 +921,22 @@ class Analyzer:
914
921
  self._meridian.inference_data.posterior.alpha_m.values,
915
922
  (-1, self._meridian.n_media_channels),
916
923
  )
917
- else:
924
+ elif channel_type is constants.REACH:
918
925
  prior = self._meridian.inference_data.prior.alpha_rf.values[0]
919
926
  posterior = np.reshape(
920
927
  self._meridian.inference_data.posterior.alpha_rf.values,
921
928
  (-1, self._meridian.n_rf_channels),
922
929
  )
930
+ elif channel_type is constants.ORGANIC_MEDIA:
931
+ prior = self._meridian.inference_data.prior.alpha_om.values[0]
932
+ posterior = np.reshape(
933
+ self._meridian.inference_data.posterior.alpha_om.values,
934
+ (-1, self._meridian.n_organic_media_channels),
935
+ )
936
+ else:
937
+ raise ValueError(
938
+ f"Unsupported channel type for adstock decay: '{channel_type}'. "
939
+ )
923
940
 
924
941
  decayed_effect_prior = (
925
942
  prior[np.newaxis, ...] ** l_range[:, np.newaxis, np.newaxis, np.newaxis]
@@ -1455,16 +1472,7 @@ class Analyzer:
1455
1472
  if new_data is None:
1456
1473
  new_data = DataTensors()
1457
1474
 
1458
- required_fields = [
1459
- constants.CONTROLS,
1460
- constants.MEDIA,
1461
- constants.REACH,
1462
- constants.FREQUENCY,
1463
- constants.ORGANIC_MEDIA,
1464
- constants.ORGANIC_REACH,
1465
- constants.ORGANIC_FREQUENCY,
1466
- constants.NON_MEDIA_TREATMENTS,
1467
- ]
1475
+ required_fields = constants.NON_REVENUE_DATA
1468
1476
  filled_tensors = new_data.validate_and_fill_missing_data(
1469
1477
  required_tensors_names=required_fields,
1470
1478
  meridian=self._meridian,
@@ -1841,8 +1849,8 @@ class Analyzer:
1841
1849
  include or booleans with length equal to the number of time periods in
1842
1850
  `new_data`, if provided. If `new_data` is provided,
1843
1851
  `media_selected_times` can select any subset of time periods in
1844
- `new_data`. If `new_data is not provided, `media_selected_times` selects
1845
- from `InputData.time`. The incremental outcome corresponds to
1852
+ `new_data`. If `new_data` is not provided, `media_selected_times`
1853
+ selects from `InputData.time`. The incremental outcome corresponds to
1846
1854
  incremental KPI generated during the `selected_times` arg by treatment
1847
1855
  variables executed during the `media_selected_times` arg. For each
1848
1856
  channel, the incremental outcome is defined as the difference between
@@ -1922,16 +1930,9 @@ class Analyzer:
1922
1930
  if new_data is None:
1923
1931
  new_data = DataTensors()
1924
1932
 
1925
- required_params = [
1926
- constants.MEDIA,
1927
- constants.REACH,
1928
- constants.FREQUENCY,
1929
- constants.ORGANIC_MEDIA,
1930
- constants.ORGANIC_REACH,
1931
- constants.ORGANIC_FREQUENCY,
1932
- constants.NON_MEDIA_TREATMENTS,
1933
- constants.REVENUE_PER_KPI,
1934
- ]
1933
+ required_params = constants.PAID_DATA
1934
+ if include_non_paid_channels:
1935
+ required_params += constants.NON_PAID_DATA
1935
1936
  data_tensors = new_data.validate_and_fill_missing_data(
1936
1937
  required_tensors_names=required_params, meridian=self._meridian
1937
1938
  )
@@ -2193,14 +2194,7 @@ class Analyzer:
2193
2194
  }
2194
2195
  self._check_revenue_data_exists(use_kpi)
2195
2196
  self._validate_geo_and_time_granularity(**dim_kwargs)
2196
- required_values = [
2197
- constants.MEDIA,
2198
- constants.MEDIA_SPEND,
2199
- constants.REACH,
2200
- constants.FREQUENCY,
2201
- constants.RF_SPEND,
2202
- constants.REVENUE_PER_KPI,
2203
- ]
2197
+ required_values = constants.PERFORMANCE_DATA
2204
2198
  if not new_data:
2205
2199
  new_data = DataTensors()
2206
2200
  filled_data = new_data.validate_and_fill_missing_data(
@@ -2208,7 +2202,7 @@ class Analyzer:
2208
2202
  meridian=self._meridian,
2209
2203
  )
2210
2204
  numerator = self.incremental_outcome(
2211
- new_data=filled_data,
2205
+ new_data=filled_data.filter_fields(constants.PAID_DATA),
2212
2206
  scaling_factor0=1,
2213
2207
  scaling_factor1=1 + incremental_increase,
2214
2208
  inverse_transform_outcome=True,
@@ -2322,14 +2316,7 @@ class Analyzer:
2322
2316
  }
2323
2317
  self._check_revenue_data_exists(use_kpi)
2324
2318
  self._validate_geo_and_time_granularity(**dim_kwargs)
2325
- required_values = [
2326
- constants.MEDIA,
2327
- constants.MEDIA_SPEND,
2328
- constants.REACH,
2329
- constants.FREQUENCY,
2330
- constants.RF_SPEND,
2331
- constants.REVENUE_PER_KPI,
2332
- ]
2319
+ required_values = constants.PERFORMANCE_DATA
2333
2320
  if not new_data:
2334
2321
  new_data = DataTensors()
2335
2322
  filled_data = new_data.validate_and_fill_missing_data(
@@ -2337,7 +2324,7 @@ class Analyzer:
2337
2324
  meridian=self._meridian,
2338
2325
  )
2339
2326
  incremental_outcome = self.incremental_outcome(
2340
- new_data=filled_data,
2327
+ new_data=filled_data.filter_fields(constants.PAID_DATA),
2341
2328
  **incremental_outcome_kwargs,
2342
2329
  **dim_kwargs,
2343
2330
  )
@@ -2579,21 +2566,15 @@ class Analyzer:
2579
2566
 
2580
2567
  # Set up the coordinates.
2581
2568
  coords = {
2582
- constants.METRIC: (
2583
- [constants.METRIC],
2584
- [constants.MEAN, constants.CI_LO, constants.CI_HI],
2585
- ),
2569
+ constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
2586
2570
  }
2587
2571
 
2588
2572
  if not aggregate_geos:
2589
- coords[constants.GEO] = ([constants.GEO], mmm.input_data.geo.data)
2573
+ coords[constants.GEO] = mmm.input_data.geo.data
2590
2574
  if not aggregate_times:
2591
- coords[constants.TIME] = ([constants.TIME], mmm.input_data.time.data)
2575
+ coords[constants.TIME] = mmm.input_data.time.data
2592
2576
  if can_split_by_holdout:
2593
- coords[constants.EVALUATION_SET_VAR] = (
2594
- [constants.EVALUATION_SET_VAR],
2595
- list(constants.EVALUATION_SET),
2596
- )
2577
+ coords[constants.EVALUATION_SET_VAR] = list(constants.EVALUATION_SET)
2597
2578
 
2598
2579
  # Set up the dimensions.
2599
2580
  actual_dims = ((constants.GEO,) if not aggregate_geos else ()) + (
@@ -2879,7 +2860,7 @@ class Analyzer:
2879
2860
  batched_kwargs = {"batch_size": batch_size}
2880
2861
  new_data = new_data or DataTensors()
2881
2862
  aggregated_impressions = self.get_aggregated_impressions(
2882
- new_data=new_data,
2863
+ new_data=new_data.filter_fields(constants.IMPRESSIONS_DATA),
2883
2864
  optimal_frequency=optimal_frequency,
2884
2865
  include_non_paid_channels=include_non_paid_channels,
2885
2866
  **dim_kwargs,
@@ -2892,9 +2873,12 @@ class Analyzer:
2892
2873
  axis=-1,
2893
2874
  )
2894
2875
 
2876
+ incremental_outcome_fields = list(
2877
+ constants.PAID_DATA + constants.NON_PAID_DATA
2878
+ )
2895
2879
  incremental_outcome_prior = self.compute_incremental_outcome_aggregate(
2896
2880
  use_posterior=False,
2897
- new_data=new_data,
2881
+ new_data=new_data.filter_fields(incremental_outcome_fields),
2898
2882
  use_kpi=use_kpi,
2899
2883
  include_non_paid_channels=include_non_paid_channels,
2900
2884
  non_media_baseline_values=non_media_baseline_values,
@@ -2903,7 +2887,7 @@ class Analyzer:
2903
2887
  )
2904
2888
  incremental_outcome_posterior = self.compute_incremental_outcome_aggregate(
2905
2889
  use_posterior=True,
2906
- new_data=new_data,
2890
+ new_data=new_data.filter_fields(incremental_outcome_fields),
2907
2891
  use_kpi=use_kpi,
2908
2892
  include_non_paid_channels=include_non_paid_channels,
2909
2893
  non_media_baseline_values=non_media_baseline_values,
@@ -2912,7 +2896,7 @@ class Analyzer:
2912
2896
  )
2913
2897
  incremental_outcome_mroi_prior = self.compute_incremental_outcome_aggregate(
2914
2898
  use_posterior=False,
2915
- new_data=new_data,
2899
+ new_data=new_data.filter_fields(incremental_outcome_fields),
2916
2900
  use_kpi=use_kpi,
2917
2901
  by_reach=marginal_roi_by_reach,
2918
2902
  scaling_factor0=1,
@@ -2925,7 +2909,7 @@ class Analyzer:
2925
2909
  incremental_outcome_mroi_posterior = (
2926
2910
  self.compute_incremental_outcome_aggregate(
2927
2911
  use_posterior=True,
2928
- new_data=new_data,
2912
+ new_data=new_data.filter_fields(incremental_outcome_fields),
2929
2913
  use_kpi=use_kpi,
2930
2914
  by_reach=marginal_roi_by_reach,
2931
2915
  scaling_factor0=1,
@@ -2947,19 +2931,14 @@ class Analyzer:
2947
2931
  if include_non_paid_channels
2948
2932
  else self._meridian.input_data.get_all_paid_channels()
2949
2933
  )
2950
- xr_coords = {
2951
- constants.CHANNEL: (
2952
- [constants.CHANNEL],
2953
- list(channels) + [constants.ALL_CHANNELS],
2954
- ),
2955
- }
2934
+ xr_coords = {constants.CHANNEL: list(channels) + [constants.ALL_CHANNELS]}
2956
2935
  if not aggregate_geos:
2957
2936
  geo_dims = (
2958
2937
  self._meridian.input_data.geo.data
2959
2938
  if selected_geos is None
2960
2939
  else selected_geos
2961
2940
  )
2962
- xr_coords[constants.GEO] = ([constants.GEO], geo_dims)
2941
+ xr_coords[constants.GEO] = geo_dims
2963
2942
  if not aggregate_times:
2964
2943
  # Get the time coordinates for flexible time dimensions.
2965
2944
  modified_times = new_data.get_modified_times(self._meridian)
@@ -2975,25 +2954,19 @@ class Analyzer:
2975
2954
  time_dims = times[indices]
2976
2955
  else:
2977
2956
  time_dims = selected_times
2978
- xr_coords[constants.TIME] = ([constants.TIME], time_dims)
2957
+ xr_coords[constants.TIME] = time_dims
2979
2958
  xr_dims_with_ci_and_distribution = xr_dims + (
2980
2959
  constants.METRIC,
2981
2960
  constants.DISTRIBUTION,
2982
2961
  )
2983
2962
  xr_coords_with_ci_and_distribution = {
2984
- constants.METRIC: (
2985
- [constants.METRIC],
2986
- [
2987
- constants.MEAN,
2988
- constants.MEDIAN,
2989
- constants.CI_LO,
2990
- constants.CI_HI,
2991
- ],
2992
- ),
2993
- constants.DISTRIBUTION: (
2994
- [constants.DISTRIBUTION],
2995
- [constants.PRIOR, constants.POSTERIOR],
2996
- ),
2963
+ constants.METRIC: [
2964
+ constants.MEAN,
2965
+ constants.MEDIAN,
2966
+ constants.CI_LO,
2967
+ constants.CI_HI,
2968
+ ],
2969
+ constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
2997
2970
  **xr_coords,
2998
2971
  }
2999
2972
  incremental_outcome = _central_tendency_and_ci_by_prior_and_posterior(
@@ -3022,14 +2995,14 @@ class Analyzer:
3022
2995
  if new_data.get_modified_times(self._meridian) is None:
3023
2996
  expected_outcome_prior = self.expected_outcome(
3024
2997
  use_posterior=False,
3025
- new_data=new_data,
2998
+ new_data=new_data.filter_fields(constants.NON_REVENUE_DATA),
3026
2999
  use_kpi=use_kpi,
3027
3000
  **dim_kwargs,
3028
3001
  **batched_kwargs,
3029
3002
  )
3030
3003
  expected_outcome_posterior = self.expected_outcome(
3031
3004
  use_posterior=True,
3032
- new_data=new_data,
3005
+ new_data=new_data.filter_fields(constants.NON_REVENUE_DATA),
3033
3006
  use_kpi=use_kpi,
3034
3007
  **dim_kwargs,
3035
3008
  **batched_kwargs,
@@ -3070,11 +3043,9 @@ class Analyzer:
3070
3043
  # If non-paid channels are not included, return all metrics, paid and
3071
3044
  # non-paid.
3072
3045
  spend_list = []
3073
- if new_data is None:
3074
- new_data = DataTensors()
3075
- new_spend_tensors = new_data.validate_and_fill_missing_data(
3076
- [constants.MEDIA_SPEND, constants.RF_SPEND], self._meridian
3077
- )
3046
+ new_spend_tensors = new_data.filter_fields(
3047
+ constants.SPEND_DATA
3048
+ ).validate_and_fill_missing_data(constants.SPEND_DATA, self._meridian)
3078
3049
  if self._meridian.n_media_channels > 0:
3079
3050
  spend_list.append(new_spend_tensors.media_spend)
3080
3051
  if self._meridian.n_rf_channels > 0:
@@ -3136,7 +3107,7 @@ class Analyzer:
3136
3107
  cpik = self._compute_cpik_aggregate(
3137
3108
  incremental_kpi_prior=self.compute_incremental_outcome_aggregate(
3138
3109
  use_posterior=False,
3139
- new_data=new_data,
3110
+ new_data=new_data.filter_fields(incremental_outcome_fields),
3140
3111
  use_kpi=True,
3141
3112
  include_non_paid_channels=False,
3142
3113
  **dim_kwargs,
@@ -3144,7 +3115,7 @@ class Analyzer:
3144
3115
  ),
3145
3116
  incremental_kpi_posterior=self.compute_incremental_outcome_aggregate(
3146
3117
  use_posterior=True,
3147
- new_data=new_data,
3118
+ new_data=new_data.filter_fields(incremental_outcome_fields),
3148
3119
  use_kpi=True,
3149
3120
  include_non_paid_channels=False,
3150
3121
  **dim_kwargs,
@@ -3207,18 +3178,13 @@ class Analyzer:
3207
3178
  (or `(n_channels,)` if geos and times are aggregated) with aggregate
3208
3179
  impression values per channel.
3209
3180
  """
3210
- tensor_names_list = [
3181
+ tensor_names_list = (
3211
3182
  constants.MEDIA,
3212
3183
  constants.REACH,
3213
3184
  constants.FREQUENCY,
3214
- ]
3185
+ )
3215
3186
  if include_non_paid_channels:
3216
- tensor_names_list.extend([
3217
- constants.ORGANIC_MEDIA,
3218
- constants.ORGANIC_REACH,
3219
- constants.ORGANIC_FREQUENCY,
3220
- constants.NON_MEDIA_TREATMENTS,
3221
- ])
3187
+ tensor_names_list += constants.NON_PAID_DATA
3222
3188
  if new_data is None:
3223
3189
  new_data = DataTensors()
3224
3190
  data_tensors = new_data.validate_and_fill_missing_data(
@@ -3323,41 +3289,33 @@ class Analyzer:
3323
3289
  + ((constants.TIME,) if not aggregate_times else ())
3324
3290
  + (constants.CHANNEL,)
3325
3291
  )
3326
- xr_coords = {
3327
- constants.CHANNEL: ([constants.CHANNEL], [constants.BASELINE]),
3328
- }
3292
+ xr_coords = {constants.CHANNEL: [constants.BASELINE]}
3329
3293
  if not aggregate_geos:
3330
3294
  geo_dims = (
3331
3295
  self._meridian.input_data.geo.data
3332
3296
  if selected_geos is None
3333
3297
  else selected_geos
3334
3298
  )
3335
- xr_coords[constants.GEO] = ([constants.GEO], geo_dims)
3299
+ xr_coords[constants.GEO] = geo_dims
3336
3300
  if not aggregate_times:
3337
3301
  time_dims = (
3338
3302
  self._meridian.input_data.time.data
3339
3303
  if selected_times is None
3340
3304
  else selected_times
3341
3305
  )
3342
- xr_coords[constants.TIME] = ([constants.TIME], time_dims)
3306
+ xr_coords[constants.TIME] = time_dims
3343
3307
  xr_dims_with_ci_and_distribution = xr_dims + (
3344
3308
  constants.METRIC,
3345
3309
  constants.DISTRIBUTION,
3346
3310
  )
3347
3311
  xr_coords_with_ci_and_distribution = {
3348
- constants.METRIC: (
3349
- [constants.METRIC],
3350
- [
3351
- constants.MEAN,
3352
- constants.MEDIAN,
3353
- constants.CI_LO,
3354
- constants.CI_HI,
3355
- ],
3356
- ),
3357
- constants.DISTRIBUTION: (
3358
- [constants.DISTRIBUTION],
3359
- [constants.PRIOR, constants.POSTERIOR],
3360
- ),
3312
+ constants.METRIC: [
3313
+ constants.MEAN,
3314
+ constants.MEDIAN,
3315
+ constants.CI_LO,
3316
+ constants.CI_HI,
3317
+ ],
3318
+ constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
3361
3319
  **xr_coords,
3362
3320
  }
3363
3321
 
@@ -3414,11 +3372,12 @@ class Analyzer:
3414
3372
 
3415
3373
  def optimal_freq(
3416
3374
  self,
3375
+ new_data: DataTensors | None = None,
3417
3376
  freq_grid: Sequence[float] | None = None,
3418
3377
  use_posterior: bool = True,
3419
3378
  use_kpi: bool = False,
3420
3379
  selected_geos: Sequence[str | int] | None = None,
3421
- selected_times: Sequence[str | int] | None = None,
3380
+ selected_times: Sequence[str | int | bool] | None = None,
3422
3381
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
3423
3382
  ) -> xr.Dataset:
3424
3383
  """Calculates the optimal frequency that maximizes posterior mean ROI.
@@ -3429,10 +3388,27 @@ class Analyzer:
3429
3388
  number of impressions remains unchanged as frequency varies. Meridian solves
3430
3389
  for the frequency at which posterior mean ROI is optimized.
3431
3390
 
3391
+ If `new_data=None`, this method calculates the opptimal frequency on the
3392
+ values of the paid RF variables that the Meridian object was initialized
3393
+ with. The user can override this historical data through the `new_data`
3394
+ argument. For example,
3395
+
3396
+ ```python
3397
+ new_data = DataTensors(reach=new_reach, frequency=new_frequency)
3398
+ ```
3399
+
3432
3400
  Note: The ROI numerator is revenue if `use_kpi` is `False`, otherwise, the
3433
3401
  ROI numerator is KPI units.
3434
3402
 
3435
3403
  Args:
3404
+ new_data: Optional `DataTensors` object containing `reach`, `frequency`,
3405
+ `rf_spend`, and `revenue_per_kpi`. If provided, the optimal frequency is
3406
+ calculated using the values of the tensors passed in `new_data` and the
3407
+ original values of all the remaining tensors. If `None`, the historical
3408
+ data used to initialize the Meridian object is used. If any of the
3409
+ tensors in `new_data` is provided with a different number of time
3410
+ periods than in `InputData`, then all tensors must be provided with the
3411
+ same number of time periods.
3436
3412
  freq_grid: List of frequency values. The ROI of each channel is calculated
3437
3413
  for each frequency value in the list. By default, the list includes
3438
3414
  numbers from `1.0` to the maximum frequency in increments of `0.1`.
@@ -3443,8 +3419,10 @@ class Analyzer:
3443
3419
  revenue.
3444
3420
  selected_geos: Optional list containing a subset of geos to include. By
3445
3421
  default, all geos are included.
3446
- selected_times: Optional list containing a subset of times to include. By
3447
- default, all time periods are included.
3422
+ selected_times: Optional list containing either a subset of dates to
3423
+ include or booleans with length equal to the number of time periods in
3424
+ the `new_data` args, if provided. By default, all time periods are
3425
+ included.
3448
3426
  confidence_level: Confidence level for prior and posterior credible
3449
3427
  intervals, represented as a value between zero and one.
3450
3428
 
@@ -3475,6 +3453,7 @@ class Analyzer:
3475
3453
  ValueError: If there are no channels with reach and frequency data.
3476
3454
  """
3477
3455
  dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
3456
+ new_data = new_data or DataTensors()
3478
3457
  if self._meridian.n_rf_channels == 0:
3479
3458
  raise ValueError(
3480
3459
  "Must have at least one channel with reach and frequency data."
@@ -3484,7 +3463,29 @@ class Analyzer:
3484
3463
  f"sample_{dist_type}() must be called prior to calling this method."
3485
3464
  )
3486
3465
 
3487
- max_freq = np.max(np.array(self._meridian.rf_tensors.frequency))
3466
+ filled_data = new_data.validate_and_fill_missing_data(
3467
+ constants.RF_DATA,
3468
+ self._meridian,
3469
+ )
3470
+ # TODO: Once treatment type filtering is added, remove adding
3471
+ # dummy media and media spend to `roi()` and `summary_metrics()`. This is a
3472
+ # hack to use `roi()` and `summary_metrics()` for RF only analysis.
3473
+ has_media = self._meridian.n_media_channels > 0
3474
+ n_media_times = (
3475
+ filled_data.get_modified_times(self._meridian)
3476
+ or self._meridian.n_media_times
3477
+ )
3478
+ n_times = (
3479
+ filled_data.get_modified_times(self._meridian) or self._meridian.n_times
3480
+ )
3481
+ dummy_media = tf.ones(
3482
+ (self._meridian.n_geos, n_media_times, self._meridian.n_media_channels)
3483
+ )
3484
+ dummy_media_spend = tf.ones(
3485
+ (self._meridian.n_geos, n_times, self._meridian.n_media_channels)
3486
+ )
3487
+
3488
+ max_freq = np.max(np.array(filled_data.frequency))
3488
3489
  if freq_grid is None:
3489
3490
  freq_grid = np.arange(1, max_freq, 0.1)
3490
3491
 
@@ -3494,14 +3495,18 @@ class Analyzer:
3494
3495
  metric_grid = np.zeros((len(freq_grid), self._meridian.n_rf_channels, 4))
3495
3496
 
3496
3497
  for i, freq in enumerate(freq_grid):
3497
- new_frequency = tf.ones_like(self._meridian.rf_tensors.frequency) * freq
3498
- new_reach = (
3499
- self._meridian.rf_tensors.frequency
3500
- * self._meridian.rf_tensors.reach
3501
- / new_frequency
3498
+ new_frequency = tf.ones_like(filled_data.frequency) * freq
3499
+ new_reach = filled_data.frequency * filled_data.reach / new_frequency
3500
+ new_roi_data = DataTensors(
3501
+ reach=new_reach,
3502
+ frequency=new_frequency,
3503
+ rf_spend=filled_data.rf_spend,
3504
+ revenue_per_kpi=filled_data.revenue_per_kpi,
3505
+ media=dummy_media if has_media else None,
3506
+ media_spend=dummy_media_spend if has_media else None,
3502
3507
  )
3503
3508
  metric_grid_temp = self.roi(
3504
- new_data=DataTensors(reach=new_reach, frequency=new_frequency),
3509
+ new_data=new_roi_data,
3505
3510
  use_posterior=use_posterior,
3506
3511
  selected_geos=selected_geos,
3507
3512
  selected_times=selected_times,
@@ -3521,20 +3526,25 @@ class Analyzer:
3521
3526
 
3522
3527
  optimal_frequency = [freq_grid[i] for i in optimal_freq_idx]
3523
3528
  optimal_frequency_tensor = tf.convert_to_tensor(
3524
- tf.ones_like(self._meridian.rf_tensors.frequency) * optimal_frequency,
3529
+ tf.ones_like(filled_data.frequency) * optimal_frequency,
3525
3530
  tf.float32,
3526
3531
  )
3527
3532
  optimal_reach = (
3528
- self._meridian.rf_tensors.frequency
3529
- * self._meridian.rf_tensors.reach
3530
- / optimal_frequency_tensor
3533
+ filled_data.frequency * filled_data.reach / optimal_frequency_tensor
3534
+ )
3535
+
3536
+ new_summary_metrics_data = DataTensors(
3537
+ reach=optimal_reach,
3538
+ frequency=optimal_frequency_tensor,
3539
+ rf_spend=filled_data.rf_spend,
3540
+ revenue_per_kpi=filled_data.revenue_per_kpi,
3541
+ media=dummy_media if has_media else None,
3542
+ media_spend=dummy_media_spend if has_media else None,
3531
3543
  )
3532
3544
 
3533
3545
  # Compute the optimized metrics based on the optimal frequency.
3534
3546
  optimized_metrics_by_reach = self.summary_metrics(
3535
- new_data=DataTensors(
3536
- reach=optimal_reach, frequency=optimal_frequency_tensor
3537
- ),
3547
+ new_data=new_summary_metrics_data,
3538
3548
  marginal_roi_by_reach=True,
3539
3549
  selected_geos=selected_geos,
3540
3550
  selected_times=selected_times,
@@ -3544,9 +3554,7 @@ class Analyzer:
3544
3554
  constants.DISTRIBUTION: dist_type,
3545
3555
  })
3546
3556
  optimized_metrics_by_frequency = self.summary_metrics(
3547
- new_data=DataTensors(
3548
- reach=optimal_reach, frequency=optimal_frequency_tensor
3549
- ),
3557
+ new_data=new_summary_metrics_data,
3550
3558
  marginal_roi_by_reach=False,
3551
3559
  selected_geos=selected_geos,
3552
3560
  selected_times=selected_times,
@@ -3594,17 +3602,14 @@ class Analyzer:
3594
3602
  return xr.Dataset(
3595
3603
  data_vars=data_vars,
3596
3604
  coords={
3597
- constants.FREQUENCY: ([constants.FREQUENCY], freq_grid),
3598
- constants.RF_CHANNEL: ([constants.RF_CHANNEL], rf_channel_values),
3599
- constants.METRIC: (
3600
- [constants.METRIC],
3601
- [
3602
- constants.MEAN,
3603
- constants.MEDIAN,
3604
- constants.CI_LO,
3605
- constants.CI_HI,
3606
- ],
3607
- ),
3605
+ constants.FREQUENCY: freq_grid,
3606
+ constants.RF_CHANNEL: rf_channel_values,
3607
+ constants.METRIC: [
3608
+ constants.MEAN,
3609
+ constants.MEDIAN,
3610
+ constants.CI_LO,
3611
+ constants.CI_HI,
3612
+ ],
3608
3613
  },
3609
3614
  attrs={
3610
3615
  constants.CONFIDENCE_LEVEL: confidence_level,
@@ -3677,14 +3682,12 @@ class Analyzer:
3677
3682
 
3678
3683
  xr_dims = [constants.METRIC, constants.GEO_GRANULARITY]
3679
3684
  xr_coords = {
3680
- constants.METRIC: (
3681
- [constants.METRIC],
3682
- [constants.R_SQUARED, constants.MAPE, constants.WMAPE],
3683
- ),
3684
- constants.GEO_GRANULARITY: (
3685
- [constants.GEO_GRANULARITY],
3686
- [constants.GEO, constants.NATIONAL],
3687
- ),
3685
+ constants.METRIC: [
3686
+ constants.R_SQUARED,
3687
+ constants.MAPE,
3688
+ constants.WMAPE,
3689
+ ],
3690
+ constants.GEO_GRANULARITY: [constants.GEO, constants.NATIONAL],
3688
3691
  }
3689
3692
  if self._meridian.revenue_per_kpi is not None:
3690
3693
  input_tensor = self._meridian.kpi * self._meridian.revenue_per_kpi
@@ -3715,10 +3718,7 @@ class Analyzer:
3715
3718
  dataset = xr.Dataset(data_vars=xr_data, coords=xr_coords)
3716
3719
  else:
3717
3720
  xr_dims.append(constants.EVALUATION_SET_VAR)
3718
- xr_coords[constants.EVALUATION_SET_VAR] = (
3719
- [constants.EVALUATION_SET_VAR],
3720
- list(constants.EVALUATION_SET),
3721
- )
3721
+ xr_coords[constants.EVALUATION_SET_VAR] = list(constants.EVALUATION_SET)
3722
3722
 
3723
3723
  holdout_id = self._filter_holdout_id_for_selected_geos_and_times(
3724
3724
  self._meridian.model_spec.holdout_id, selected_geos, selected_times
@@ -3913,6 +3913,7 @@ class Analyzer:
3913
3913
  selected_times: Sequence[str] | None = None,
3914
3914
  by_reach: bool = True,
3915
3915
  use_optimal_frequency: bool = False,
3916
+ use_kpi: bool = False,
3916
3917
  confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
3917
3918
  batch_size: int = constants.DEFAULT_BATCH_SIZE,
3918
3919
  ) -> xr.Dataset:
@@ -3940,6 +3941,8 @@ class Analyzer:
3940
3941
  frequency.
3941
3942
  use_optimal_frequency: If `True`, uses the optimal frequency to plot the
3942
3943
  response curves. Defaults to `False`.
3944
+ use_kpi: A boolean flag indicating whether to use KPI instead of revenue
3945
+ to generate the response curves. Defaults to `False`.
3943
3946
  confidence_level: Confidence level for prior and posterior credible
3944
3947
  intervals, represented as a value between zero and one.
3945
3948
  batch_size: Integer representing the maximum draws per chain in each
@@ -3951,7 +3954,6 @@ class Analyzer:
3951
3954
  An `xarray.Dataset` containing the data needed to visualize response
3952
3955
  curves.
3953
3956
  """
3954
- use_kpi = self._meridian.input_data.revenue_per_kpi is None
3955
3957
  if self._meridian.is_national:
3956
3958
  _warn_if_geo_arg_in_kwargs(
3957
3959
  selected_geos=selected_geos,
@@ -4004,7 +4006,7 @@ class Analyzer:
4004
4006
  )
4005
4007
  inc_outcome_temp = self.incremental_outcome(
4006
4008
  use_posterior=use_posterior,
4007
- new_data=new_data,
4009
+ new_data=new_data.filter_fields(constants.PAID_DATA),
4008
4010
  inverse_transform_outcome=True,
4009
4011
  batch_size=batch_size,
4010
4012
  use_kpi=use_kpi,
@@ -4035,22 +4037,13 @@ class Analyzer:
4035
4037
  )
4036
4038
  spend_einsum = tf.einsum("k,m->km", np.array(spend_multipliers), spend)
4037
4039
  xr_coords = {
4038
- constants.CHANNEL: (
4039
- [constants.CHANNEL],
4040
- self._meridian.input_data.get_all_paid_channels(),
4041
- ),
4042
- constants.METRIC: (
4043
- [constants.METRIC],
4044
- [
4045
- constants.MEAN,
4046
- constants.CI_LO,
4047
- constants.CI_HI,
4048
- ],
4049
- ),
4050
- constants.SPEND_MULTIPLIER: (
4051
- [constants.SPEND_MULTIPLIER],
4052
- spend_multipliers,
4053
- ),
4040
+ constants.CHANNEL: self._meridian.input_data.get_all_paid_channels(),
4041
+ constants.METRIC: [
4042
+ constants.MEAN,
4043
+ constants.CI_LO,
4044
+ constants.CI_HI,
4045
+ ],
4046
+ constants.SPEND_MULTIPLIER: spend_multipliers,
4054
4047
  }
4055
4048
  xr_data_vars = {
4056
4049
  constants.SPEND: (
@@ -4068,14 +4061,14 @@ class Analyzer:
4068
4061
  def adstock_decay(
4069
4062
  self, confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL
4070
4063
  ) -> pd.DataFrame:
4071
- """Calculates adstock decay for media and reach and frequency channels.
4064
+ """Calculates adstock decay for paid media, RF, and organic media channels.
4072
4065
 
4073
4066
  Args:
4074
4067
  confidence_level: Confidence level for prior and posterior credible
4075
4068
  intervals, represented as a value between zero and one.
4076
4069
 
4077
4070
  Returns:
4078
- Pandas DataFrame containing the channel, `time_units`, distribution,
4071
+ Pandas DataFrame containing the `channel`, `time_units`, `distribution`,
4079
4072
  `ci_hi`, `ci_lo`, and `mean` for the Adstock function.
4080
4073
  """
4081
4074
  if (
@@ -4100,63 +4093,75 @@ class Analyzer:
4100
4093
  step_size = 1 / steps_per_time_period
4101
4094
  l_range = np.arange(0, max_lag, step_size)
4102
4095
 
4103
- rf_channel_values = (
4104
- self._meridian.input_data.rf_channel.values
4105
- if self._meridian.input_data.rf_channel is not None
4106
- else []
4107
- )
4108
-
4109
- media_channel_values = (
4110
- self._meridian.input_data.media_channel.values
4111
- if self._meridian.input_data.media_channel is not None
4112
- else []
4113
- )
4114
-
4115
4096
  xr_dims = [
4116
4097
  constants.TIME_UNITS,
4117
4098
  constants.CHANNEL,
4118
4099
  constants.METRIC,
4119
4100
  constants.DISTRIBUTION,
4120
4101
  ]
4121
- xr_coords = {
4122
- constants.TIME_UNITS: ([constants.TIME_UNITS], l_range),
4123
- constants.CHANNEL: (
4124
- [constants.CHANNEL],
4125
- rf_channel_values,
4126
- ),
4127
- constants.DISTRIBUTION: (
4128
- [constants.DISTRIBUTION],
4129
- [constants.PRIOR, constants.POSTERIOR],
4130
- ),
4131
- constants.METRIC: (
4132
- [constants.METRIC],
4133
- [constants.MEAN, constants.CI_LO, constants.CI_HI],
4134
- ),
4102
+ base_xr_coords = {
4103
+ constants.TIME_UNITS: l_range,
4104
+ constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
4105
+ constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
4135
4106
  }
4136
- final_df = pd.DataFrame()
4107
+ final_df_list = []
4108
+
4109
+ if self._meridian.n_media_channels > 0:
4110
+ media_channel_values = (
4111
+ self._meridian.input_data.media_channel.values
4112
+ if self._meridian.input_data.media_channel is not None
4113
+ else []
4114
+ )
4115
+ media_xr_coords = base_xr_coords | {
4116
+ constants.CHANNEL: media_channel_values
4117
+ }
4118
+ adstock_df_m = self._get_adstock_dataframe(
4119
+ constants.MEDIA,
4120
+ l_range,
4121
+ xr_dims,
4122
+ media_xr_coords,
4123
+ confidence_level,
4124
+ )
4125
+ if not adstock_df_m.empty:
4126
+ final_df_list.append(adstock_df_m)
4137
4127
 
4138
4128
  if self._meridian.n_rf_channels > 0:
4129
+ rf_channel_values = (
4130
+ self._meridian.input_data.rf_channel.values
4131
+ if self._meridian.input_data.rf_channel is not None
4132
+ else []
4133
+ )
4134
+ rf_xr_coords = base_xr_coords | {constants.CHANNEL: rf_channel_values}
4139
4135
  adstock_df_rf = self._get_adstock_dataframe(
4140
4136
  constants.REACH,
4141
4137
  l_range,
4142
4138
  xr_dims,
4143
- xr_coords,
4139
+ rf_xr_coords,
4144
4140
  confidence_level,
4145
4141
  )
4146
- final_df = pd.concat([final_df, adstock_df_rf], axis=0)
4147
- if self._meridian.n_media_channels > 0:
4148
- xr_coords[constants.CHANNEL] = ([constants.CHANNEL], media_channel_values)
4149
- adstock_df_m = self._get_adstock_dataframe(
4150
- constants.MEDIA,
4142
+ if not adstock_df_rf.empty:
4143
+ final_df_list.append(adstock_df_rf)
4144
+
4145
+ if self._meridian.n_organic_media_channels > 0:
4146
+ organic_media_channel_values = (
4147
+ self._meridian.input_data.organic_media_channel.values
4148
+ if self._meridian.input_data.organic_media_channel is not None
4149
+ else []
4150
+ )
4151
+ organic_media_xr_coords = base_xr_coords | {
4152
+ constants.CHANNEL: organic_media_channel_values
4153
+ }
4154
+ adstock_df_om = self._get_adstock_dataframe(
4155
+ constants.ORGANIC_MEDIA,
4151
4156
  l_range,
4152
4157
  xr_dims,
4153
- xr_coords,
4158
+ organic_media_xr_coords,
4154
4159
  confidence_level,
4155
4160
  )
4156
- final_df = pd.concat([final_df, adstock_df_m], axis=0).reset_index(
4157
- drop=True
4158
- )
4161
+ if not adstock_df_om.empty:
4162
+ final_df_list.append(adstock_df_om)
4159
4163
 
4164
+ final_df = pd.concat(final_df_list, ignore_index=True)
4160
4165
  # Adding an extra column that indicates whether time_units is an integer
4161
4166
  # for marking the discrete points on the plot.
4162
4167
  final_df[constants.IS_INT_TIME_UNIT] = final_df[constants.TIME_UNITS].apply(
@@ -4172,14 +4177,14 @@ class Analyzer:
4172
4177
  """Computes the point-wise mean and credible intervals for the Hill curves.
4173
4178
 
4174
4179
  Args:
4175
- channel_type: Type of channel, either `media` or `rf`.
4180
+ channel_type: Type of channel, either `media`, `rf`, or `organic_media`.
4176
4181
  confidence_level: Confidence level for `posterior` and `prior` credible
4177
4182
  intervals, represented as a value between zero and one.
4178
4183
 
4179
4184
  Returns:
4180
4185
  A DataFrame with data needed to plot the Hill curves, with columns:
4181
4186
 
4182
- * `channel`: `media` or `rf` channel name.
4187
+ * `channel`: `media`, `rf`, or `organic_media` channel name.
4183
4188
  * `media_units`: Media (for `media` channels) or average frequency (for
4184
4189
  `rf` channels) units.
4185
4190
  * `distribution`: Indication of `posterior` or `prior` draw.
@@ -4188,7 +4193,12 @@ class Analyzer:
4188
4193
  * `ci_lo`: Lower bound of the credible interval of the value of the Hill
4189
4194
  function.
4190
4195
  * `mean`: Point-wise mean of the value of the Hill function per draw.
4191
- * channel_type: Indication of a `media` or `rf` channel.
4196
+ * channel_type: Indication of a `media`, `rf`, or `organic_media`
4197
+ channel.
4198
+
4199
+ Raises:
4200
+ ValueError: If `channel_type` is not one of the recognized constants
4201
+ `media`, `rf`, or `organic_media`.
4192
4202
  """
4193
4203
  if (
4194
4204
  channel_type == constants.MEDIA
@@ -4196,31 +4206,46 @@ class Analyzer:
4196
4206
  ):
4197
4207
  ec = constants.EC_M
4198
4208
  slope = constants.SLOPE_M
4199
- linspace = np.linspace(
4200
- 0,
4201
- np.max(
4202
- np.array(self._meridian.media_tensors.media_scaled), axis=(0, 1)
4203
- ),
4204
- constants.HILL_NUM_STEPS,
4205
- )
4206
4209
  channels = self._meridian.input_data.media_channel.values
4210
+ transformer = self._meridian.media_tensors.media_transformer
4211
+ linspace_max_values = np.max(
4212
+ np.array(self._meridian.media_tensors.media_scaled), axis=(0, 1)
4213
+ )
4207
4214
  elif (
4208
4215
  channel_type == constants.RF
4209
4216
  and self._meridian.input_data.rf_channel is not None
4210
4217
  ):
4211
4218
  ec = constants.EC_RF
4212
4219
  slope = constants.SLOPE_RF
4213
- linspace = np.linspace(
4214
- 0,
4215
- np.max(np.array(self._meridian.rf_tensors.frequency), axis=(0, 1)),
4216
- constants.HILL_NUM_STEPS,
4217
- )
4218
4220
  channels = self._meridian.input_data.rf_channel.values
4221
+ transformer = None
4222
+ linspace_max_values = np.max(
4223
+ np.array(self._meridian.rf_tensors.frequency), axis=(0, 1)
4224
+ )
4225
+ elif (
4226
+ channel_type == constants.ORGANIC_MEDIA
4227
+ and self._meridian.input_data.organic_media_channel is not None
4228
+ ):
4229
+ ec = constants.EC_OM
4230
+ slope = constants.SLOPE_OM
4231
+ channels = self._meridian.input_data.organic_media_channel.values
4232
+ transformer = (
4233
+ self._meridian.organic_media_tensors.organic_media_transformer
4234
+ )
4235
+ linspace_max_values = np.max(
4236
+ np.array(self._meridian.organic_media_tensors.organic_media_scaled),
4237
+ axis=(0, 1),
4238
+ )
4219
4239
  else:
4220
4240
  raise ValueError(
4221
- f"Unsupported channel type: {channel_type} or the"
4222
- " requested type of channels (`media` or `rf`) are not present."
4241
+ f"Unsupported channel type: {channel_type} or the requested type of"
4242
+ " channels (`media`, `rf`, or `organic_media`) are not present."
4223
4243
  )
4244
+ linspace = np.linspace(
4245
+ 0,
4246
+ linspace_max_values,
4247
+ constants.HILL_NUM_STEPS,
4248
+ )
4224
4249
  linspace_filler = np.linspace(0, 1, constants.HILL_NUM_STEPS)
4225
4250
  xr_dims = [
4226
4251
  constants.MEDIA_UNITS,
@@ -4229,19 +4254,10 @@ class Analyzer:
4229
4254
  constants.DISTRIBUTION,
4230
4255
  ]
4231
4256
  xr_coords = {
4232
- constants.MEDIA_UNITS: ([constants.MEDIA_UNITS], linspace_filler),
4233
- constants.CHANNEL: (
4234
- [constants.CHANNEL],
4235
- list(channels),
4236
- ),
4237
- constants.DISTRIBUTION: (
4238
- [constants.DISTRIBUTION],
4239
- [constants.PRIOR, constants.POSTERIOR],
4240
- ),
4241
- constants.METRIC: (
4242
- [constants.METRIC],
4243
- [constants.MEAN, constants.CI_LO, constants.CI_HI],
4244
- ),
4257
+ constants.MEDIA_UNITS: linspace_filler,
4258
+ constants.CHANNEL: list(channels),
4259
+ constants.DISTRIBUTION: [constants.PRIOR, constants.POSTERIOR],
4260
+ constants.METRIC: [constants.MEAN, constants.CI_LO, constants.CI_HI],
4245
4261
  }
4246
4262
  # Expanding the linspace by one dimension since the HillTransformer requires
4247
4263
  # 3-dimensional input as (geo, time, channel).
@@ -4285,13 +4301,10 @@ class Analyzer:
4285
4301
 
4286
4302
  # Fill media_units or frequency x-axis with the correct range.
4287
4303
  media_units_arr = []
4288
- if channel_type == constants.MEDIA:
4289
- media_transformers = transformers.MediaTransformer(
4290
- self._meridian.media_tensors.media, self._meridian.population
4291
- )
4292
- population_scaled_median_m = media_transformers.population_scaled_median_m
4304
+ if transformer is not None:
4305
+ population_scaled_median = transformer.population_scaled_median_m
4293
4306
  x_range_full_shape = linspace * tf.transpose(
4294
- population_scaled_median_m[:, np.newaxis]
4307
+ population_scaled_median[:, np.newaxis]
4295
4308
  )
4296
4309
  else:
4297
4310
  x_range_full_shape = linspace
@@ -4312,8 +4325,68 @@ class Analyzer:
4312
4325
  df[constants.MEDIA_UNITS] = media_units_arr
4313
4326
  return df
4314
4327
 
4328
+ def _get_channel_hill_histogram_dataframe(
4329
+ self,
4330
+ channel_type: str,
4331
+ data_to_histogram: tf.Tensor,
4332
+ channel_names: Sequence[str],
4333
+ n_bins: int,
4334
+ ) -> pd.DataFrame:
4335
+ """Calculates hill histogram dataframe for a given channel type's values.
4336
+
4337
+ Args:
4338
+ channel_type: The type of channel (e.g., 'rf', 'media', 'organic_media').
4339
+ data_to_histogram: The 2D tensor (observations, channels). containing the
4340
+ data whose distribution needs to be histogrammed for each channel.
4341
+ channel_names: The names corresponding to the channels in
4342
+ data_to_histogram.
4343
+ n_bins: The number of bins for the histogram.
4344
+
4345
+ Returns:
4346
+ A Pandas DataFrame containing the calculated histogram data for all
4347
+ channels of the given type. Returns an empty DataFrame if no valid
4348
+ data is found for any channel.
4349
+ """
4350
+ channels_data = {
4351
+ constants.CHANNEL: [],
4352
+ constants.CHANNEL_TYPE: [],
4353
+ constants.SCALED_COUNT_HISTOGRAM: [],
4354
+ constants.COUNT_HISTOGRAM: [],
4355
+ constants.START_INTERVAL_HISTOGRAM: [],
4356
+ constants.END_INTERVAL_HISTOGRAM: [],
4357
+ }
4358
+
4359
+ for i, channel_name in enumerate(channel_names):
4360
+ channel_data_np = data_to_histogram[:, i].numpy()
4361
+ channel_data_np = channel_data_np[~np.isnan(channel_data_np)]
4362
+ if channel_data_np.size == 0:
4363
+ continue
4364
+
4365
+ counts_per_bucket, buckets = np.histogram(
4366
+ channel_data_np, bins=n_bins, density=True
4367
+ )
4368
+ max_counts = (
4369
+ np.max(counts_per_bucket) if np.max(counts_per_bucket) > 0 else 1.0
4370
+ )
4371
+
4372
+ num_buckets = len(counts_per_bucket)
4373
+ channels_data[constants.CHANNEL].extend([channel_name] * num_buckets)
4374
+ channels_data[constants.CHANNEL_TYPE].extend([channel_type] * num_buckets)
4375
+ channels_data[constants.SCALED_COUNT_HISTOGRAM].extend(
4376
+ counts_per_bucket / max_counts
4377
+ )
4378
+ channels_data[constants.COUNT_HISTOGRAM].extend(counts_per_bucket)
4379
+ channels_data[constants.START_INTERVAL_HISTOGRAM].extend(buckets[:-1])
4380
+ channels_data[constants.END_INTERVAL_HISTOGRAM].extend(buckets[1:])
4381
+
4382
+ return pd.DataFrame(channels_data)
4383
+
4315
4384
  def _get_hill_histogram_dataframe(self, n_bins: int) -> pd.DataFrame:
4316
- """Returns the bucketed media_units counts per each `media` or `rf` channel.
4385
+ """Calculates histogram data for a given channel type's values.
4386
+
4387
+ Computes histogram data for the distribution of media units (for media or
4388
+ organic media channels) or frequency (for RF channels) across
4389
+ observations.
4317
4390
 
4318
4391
  Args:
4319
4392
  n_bins: Number of equal-width bins to include in the histogram for the
@@ -4339,73 +4412,64 @@ class Analyzer:
4339
4412
  """
4340
4413
  n_geos = self._meridian.n_geos
4341
4414
  n_media_times = self._meridian.n_media_times
4342
- n_rf_channels = self._meridian.n_rf_channels
4343
- n_media_channels = self._meridian.n_media_channels
4344
-
4345
- (
4346
- channels,
4347
- scaled_count,
4348
- channel_type_arr,
4349
- start_interval_histogram,
4350
- end_interval_histogram,
4351
- count,
4352
- ) = ([], [], [], [], [], [])
4415
+
4416
+ df_list = []
4353
4417
 
4354
4418
  # RF.
4355
4419
  if self._meridian.input_data.rf_channel is not None:
4356
- frequency = (
4357
- self._meridian.rf_tensors.frequency
4358
- ) # Shape: (n_geos, n_media_times, n_channels).
4359
- reshaped_frequency = tf.reshape(
4360
- frequency, (n_geos * n_media_times, n_rf_channels)
4361
- )
4362
- for i, channel in enumerate(self._meridian.input_data.rf_channel.values):
4363
- # Bucketize the histogram data for RF channels.
4364
- counts_per_bucket, buckets = np.histogram(
4365
- reshaped_frequency[:, i], bins=n_bins, density=True
4420
+ frequency = self._meridian.rf_tensors.frequency
4421
+ if frequency is not None:
4422
+ reshaped_frequency = tf.reshape(
4423
+ frequency, (n_geos * n_media_times, self._meridian.n_rf_channels)
4424
+ )
4425
+ rf_hist_data = self._get_channel_hill_histogram_dataframe(
4426
+ channel_type=constants.RF,
4427
+ data_to_histogram=reshaped_frequency,
4428
+ channel_names=self._meridian.input_data.rf_channel.values,
4429
+ n_bins=n_bins,
4366
4430
  )
4367
- channels.extend([channel] * len(counts_per_bucket))
4368
- channel_type_arr.extend([constants.RF] * len(counts_per_bucket))
4369
- scaled_count.extend(counts_per_bucket / max(counts_per_bucket))
4370
- count.extend(counts_per_bucket)
4371
- start_interval_histogram.extend(buckets[:-1])
4372
- end_interval_histogram.extend(buckets[1:])
4431
+ df_list.append(pd.DataFrame(rf_hist_data))
4373
4432
 
4374
4433
  # Media.
4375
4434
  if self._meridian.input_data.media_channel is not None:
4376
- transformer = transformers.MediaTransformer(
4377
- self._meridian.media_tensors.media, self._meridian.population
4378
- )
4379
- scaled = (
4380
- self._meridian.media_tensors.media_scaled
4381
- ) # Shape: (n_geos, n_media_times, n_channels)
4382
- population_scaled_median = transformer.population_scaled_median_m
4383
- scaled_media_units = scaled * population_scaled_median
4384
- reshaped_scaled_media_units = tf.reshape(
4385
- scaled_media_units, (n_geos * n_media_times, n_media_channels)
4435
+ transformer = self._meridian.media_tensors.media_transformer
4436
+ scaled = self._meridian.media_tensors.media_scaled
4437
+ if transformer is not None and scaled is not None:
4438
+ population_scaled_median = transformer.population_scaled_median_m
4439
+ scaled_media_units = scaled * population_scaled_median
4440
+ reshaped_scaled_media_units = tf.reshape(
4441
+ scaled_media_units,
4442
+ (n_geos * n_media_times, self._meridian.n_media_channels),
4443
+ )
4444
+ media_hist_data = self._get_channel_hill_histogram_dataframe(
4445
+ channel_type=constants.MEDIA,
4446
+ data_to_histogram=reshaped_scaled_media_units,
4447
+ channel_names=self._meridian.input_data.media_channel.values,
4448
+ n_bins=n_bins,
4449
+ )
4450
+ df_list.append(pd.DataFrame(media_hist_data))
4451
+ # Organic media.
4452
+ if self._meridian.input_data.organic_media_channel is not None:
4453
+ transformer_om = (
4454
+ self._meridian.organic_media_tensors.organic_media_transformer
4386
4455
  )
4387
- for i, channel in enumerate(
4388
- self._meridian.input_data.media_channel.values
4389
- ):
4390
- # Bucketize the histogram data for media channels.
4391
- counts_per_bucket, buckets = np.histogram(
4392
- reshaped_scaled_media_units[:, i], bins=n_bins, density=True
4456
+ scaled_om = self._meridian.organic_media_tensors.organic_media_scaled
4457
+ if transformer_om is not None and scaled_om is not None:
4458
+ population_scaled_median_om = transformer_om.population_scaled_median_m
4459
+ scaled_organic_media_units = scaled_om * population_scaled_median_om
4460
+ reshaped_scaled_organic_media_units = tf.reshape(
4461
+ scaled_organic_media_units,
4462
+ (n_geos * n_media_times, self._meridian.n_organic_media_channels),
4393
4463
  )
4394
- channel_type_arr.extend([constants.MEDIA] * len(counts_per_bucket))
4395
- channels.extend([channel] * (len(counts_per_bucket)))
4396
- scaled_count.extend(counts_per_bucket / max(counts_per_bucket))
4397
- count.extend(counts_per_bucket)
4398
- start_interval_histogram.extend(buckets[:-1])
4399
- end_interval_histogram.extend(buckets[1:])
4400
-
4401
- return pd.DataFrame({
4402
- constants.CHANNEL: channels,
4403
- constants.CHANNEL_TYPE: channel_type_arr,
4404
- constants.SCALED_COUNT_HISTOGRAM: scaled_count,
4405
- constants.COUNT_HISTOGRAM: count,
4406
- constants.START_INTERVAL_HISTOGRAM: start_interval_histogram,
4407
- constants.END_INTERVAL_HISTOGRAM: end_interval_histogram,
4408
- })
4464
+ organic_media_hist_data = self._get_channel_hill_histogram_dataframe(
4465
+ channel_type=constants.ORGANIC_MEDIA,
4466
+ data_to_histogram=reshaped_scaled_organic_media_units,
4467
+ channel_names=self._meridian.input_data.organic_media_channel.values,
4468
+ n_bins=n_bins,
4469
+ )
4470
+ df_list.append(pd.DataFrame(organic_media_hist_data))
4471
+
4472
+ return pd.concat(df_list, ignore_index=True)
4409
4473
 
4410
4474
  def hill_curves(
4411
4475
  self,
@@ -4453,17 +4517,16 @@ class Analyzer:
4453
4517
  )
4454
4518
 
4455
4519
  final_dfs = [pd.DataFrame()]
4456
- if self._meridian.n_media_channels > 0:
4457
- hill_df_media = self._get_hill_curves_dataframe(
4458
- constants.MEDIA, confidence_level
4459
- )
4460
- final_dfs.append(hill_df_media)
4461
-
4462
- if self._meridian.n_rf_channels > 0:
4463
- hill_df_rf = self._get_hill_curves_dataframe(
4464
- constants.RF, confidence_level
4465
- )
4466
- final_dfs.append(hill_df_rf)
4520
+ for n_channels, channel_type in [
4521
+ (self._meridian.n_media_channels, constants.MEDIA),
4522
+ (self._meridian.n_rf_channels, constants.RF),
4523
+ (self._meridian.n_organic_media_channels, constants.ORGANIC_MEDIA),
4524
+ ]:
4525
+ if n_channels > 0:
4526
+ hill_df = self._get_hill_curves_dataframe(
4527
+ channel_type, confidence_level
4528
+ )
4529
+ final_dfs.append(hill_df)
4467
4530
 
4468
4531
  final_dfs.append(self._get_hill_histogram_dataframe(n_bins=n_bins))
4469
4532
  return pd.concat(final_dfs)