google-meridian 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1368,7 +1368,10 @@ class BudgetOptimizer:
1368
1368
  versions of all the remaining tensors. If any of the tensors in
1369
1369
  `new_data` is provided with a different number of time periods than in
1370
1370
  `InputData`, then all tensors must be provided with the same number of
1371
- time periods and the `time` tensor must be provided.
1371
+ time periods and the `time` tensor must be provided. In this case, spend
1372
+ tensors must be provided with `geo` and `time` granularity. If
1373
+ `use_optimal_frequency` is `True`, `new_data.frequency` does not need to
1374
+ be provided and is ignored. The optimal frequency is used instead.
1372
1375
  use_posterior: Boolean. If `True`, then the budget is optimized based on
1373
1376
  the posterior distribution of the model. Otherwise, the prior
1374
1377
  distribution is used.
@@ -1427,7 +1430,7 @@ class BudgetOptimizer:
1427
1430
  or equal to `(budget * gtol)`. `gtol` must be less than 1.
1428
1431
  use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
1429
1432
  trained Meridian model for optimization. If `False`, uses historical
1430
- frequency.
1433
+ frequency or `new_data.frequency` if provided.
1431
1434
  use_kpi: If `True`, runs the optimization on KPI. Defaults to revenue.
1432
1435
  confidence_level: The threshold for computing the confidence intervals.
1433
1436
  batch_size: Maximum draws per chain in each batch. The calculation is run
@@ -1596,6 +1599,142 @@ class BudgetOptimizer:
1596
1599
  _optimization_grid=optimization_grid,
1597
1600
  )
1598
1601
 
1602
+ def create_optimization_tensors(
1603
+ self,
1604
+ time: Sequence[str] | tf.Tensor,
1605
+ cpmu: tf.Tensor | None = None,
1606
+ media: tf.Tensor | None = None,
1607
+ media_spend: tf.Tensor | None = None,
1608
+ cprf: tf.Tensor | None = None,
1609
+ rf_impressions: tf.Tensor | None = None,
1610
+ frequency: tf.Tensor | None = None,
1611
+ rf_spend: tf.Tensor | None = None,
1612
+ revenue_per_kpi: tf.Tensor | None = None,
1613
+ use_optimal_frequency: bool = True,
1614
+ ) -> analyzer.DataTensors:
1615
+ """Creates a `DataTensors` for optimizations from CPM and flighting data.
1616
+
1617
+ CPM is broken down into cost per media unit, `cpmu`, for the media channels
1618
+ and cost per impression (reach * frequency), `cprf`, for the reach and
1619
+ frequency channels.
1620
+
1621
+ The flighting pattern can be specified as the spend flighting or the media
1622
+ units flighting pattern at the time or geo and time granularity. If data is
1623
+ passed without a geo dimension, then the values are interpreted as
1624
+ national-level totals. If the model is a geo-level model, then the values
1625
+ are allocated across geos based on the population used in the model.
1626
+
1627
+ Below are the different combinations of tensors that can be provided:
1628
+ For media:
1629
+ 1) `media`, `cpmu` (media units flighting pattern)
1630
+ 2) `media_spend`, `cpmu` (spend flighting pattern)
1631
+
1632
+ For R&F:
1633
+ If `use_optimal_frequency=True`, `frequency` should not be provided.
1634
+ Frequency input is not required for the optimization, so the new
1635
+ `DataTensors` object will be created with `frequuency` arbitrarily set to
1636
+ 1 and `reach=rf_impressions`.
1637
+ 1) `rf_impressions`, `cprf` (impressions flighting pattern)
1638
+ 2) `rf_spend`, `cprf` (spend flighting pattern)
1639
+
1640
+ If `use_optimal_frequency=False`:
1641
+ 1) `rf_impressions`, `frequency`, `cprf` (impressions flighting pattern)
1642
+ 2) `rf_spend`, `frequency`, `cprf` (spend flighting pattern)
1643
+
1644
+
1645
+ Args:
1646
+ time: A sequence or tensor of time coordinates in the "YYYY-mm-dd" string
1647
+ format.
1648
+ cpmu: A tensor of cost per media unit with dimensions `(n_media_channels),
1649
+ `(T, n_media_channels)` or `(n_geos, T, n_media_channels)` for any time
1650
+ dimension `T`.
1651
+ media: An optional tensor of media unit values with dimensions `(T,
1652
+ n_media_channels)` or `(n_geos, T, n_media_channels)` for any time
1653
+ dimension `T`.
1654
+ media_spend: A tensor of media spend values with dimensions `(T,
1655
+ n_media_channels)` or `(n_geos, T, n_media_channels)` for any time
1656
+ dimension `T`.
1657
+ cprf: A tensor of cost per impression (reach * frequency) with dimensions
1658
+ `(n_rf_channels), `(T, n_rf_channels)` or `(n_geos, T, n_rf_channels)`
1659
+ for any time dimension `T`.
1660
+ rf_impressions: A tensor of impressions (reach * frequency) values with
1661
+ dimensions `(T, n_rf_channels)` or `(n_geos, T, n_rf_channels)` for any
1662
+ time dimension `T`.
1663
+ frequency: A tensor of frequency values with dimensions `(n_rf_channels)`,
1664
+ `(T, n_rf_channels)` or `(n_geos, T, n_rf_channels)` for any time
1665
+ dimension `T`. If `use_optimal_frequency=True`, then this tensor should
1666
+ not be provided and the optimal frequency will be calculated and used.
1667
+ rf_spend: A tensor of rf spend values with dimensions `(T, n_rf_channels)`
1668
+ or `(n_geos, T, n_rf_channels)` for any time dimension `T`.
1669
+ revenue_per_kpi: A tensor of revenue per KPI values with dimensions `()`,
1670
+ `(T)`, or `(n_geos, T)` for any time dimension `T`.
1671
+ use_optimal_frequency: Boolean. If `True`, the optiaml frequency will be
1672
+ used in the optimization and a frequency value should not be provided.
1673
+ In this case, `reach=rf_impressions` and `frequency=1` (by arbitrary
1674
+ convention) in the new data. If `False`, the frequency value must be
1675
+ provided.
1676
+
1677
+ Returns:
1678
+ A `DataTensors` object with optional tensors `media`, `reach`,
1679
+ `frequency`, `media_spend`, `rf_spend`, `revenue_per_kpi`, and `time`.
1680
+ """
1681
+ self._validate_optimization_tensors(
1682
+ cpmu=cpmu,
1683
+ cprf=cprf,
1684
+ media=media,
1685
+ rf_impressions=rf_impressions,
1686
+ frequency=frequency,
1687
+ media_spend=media_spend,
1688
+ rf_spend=rf_spend,
1689
+ revenue_per_kpi=revenue_per_kpi,
1690
+ use_optimal_frequency=use_optimal_frequency,
1691
+ )
1692
+ n_times = time.shape[0] if isinstance(time, tf.Tensor) else len(time)
1693
+ n_geos = self._meridian.n_geos
1694
+ revenue_per_kpi = (
1695
+ _expand_tensor(revenue_per_kpi, (n_geos, n_times))
1696
+ if revenue_per_kpi is not None
1697
+ else None
1698
+ )
1699
+
1700
+ tensors = {}
1701
+ if media is not None:
1702
+ cpmu = _expand_tensor(cpmu, (n_geos, n_times, media.shape[-1]))
1703
+ tensors[c.MEDIA] = self._allocate_tensor_by_population(media)
1704
+ tensors[c.MEDIA_SPEND] = tensors[c.MEDIA] * cpmu
1705
+ if media_spend is not None:
1706
+ cpmu = _expand_tensor(cpmu, (n_geos, n_times, media_spend.shape[-1]))
1707
+ tensors[c.MEDIA_SPEND] = self._allocate_tensor_by_population(media_spend)
1708
+ tensors[c.MEDIA] = tensors[c.MEDIA_SPEND] / cpmu
1709
+ if rf_impressions is not None:
1710
+ shape = (n_geos, n_times, rf_impressions.shape[-1])
1711
+ cprf = _expand_tensor(cprf, shape)
1712
+ allocated_impressions = self._allocate_tensor_by_population(
1713
+ rf_impressions
1714
+ )
1715
+ tensors[c.RF_SPEND] = allocated_impressions * cprf
1716
+ if use_optimal_frequency:
1717
+ frequency = tf.ones_like(allocated_impressions)
1718
+ tensors[c.FREQUENCY] = _expand_tensor(frequency, shape)
1719
+ tensors[c.REACH] = tf.math.divide_no_nan(
1720
+ allocated_impressions, tensors[c.FREQUENCY]
1721
+ )
1722
+ if rf_spend is not None:
1723
+ shape = (n_geos, n_times, rf_spend.shape[-1])
1724
+ cprf = _expand_tensor(cprf, shape)
1725
+ tensors[c.RF_SPEND] = self._allocate_tensor_by_population(rf_spend)
1726
+ impressions = tf.math.divide_no_nan(tensors[c.RF_SPEND], cprf)
1727
+ if use_optimal_frequency:
1728
+ frequency = tf.ones_like(impressions)
1729
+ tensors[c.FREQUENCY] = _expand_tensor(frequency, shape)
1730
+ tensors[c.REACH] = tf.math.divide_no_nan(
1731
+ impressions, tensors[c.FREQUENCY]
1732
+ )
1733
+ if revenue_per_kpi is not None:
1734
+ tensors[c.REVENUE_PER_KPI] = revenue_per_kpi
1735
+ tensors[c.TIME] = tf.convert_to_tensor(time)
1736
+ return analyzer.DataTensors(**tensors)
1737
+
1599
1738
  def _validate_grid(
1600
1739
  self,
1601
1740
  new_data: analyzer.DataTensors | None,
@@ -1863,9 +2002,14 @@ class BudgetOptimizer:
1863
2002
  )
1864
2003
  )
1865
2004
  if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
2005
+ opt_freq_data = analyzer.DataTensors(
2006
+ rf_impressions=filled_data.reach * filled_data.frequency,
2007
+ rf_spend=filled_data.rf_spend,
2008
+ revenue_per_kpi=filled_data.revenue_per_kpi,
2009
+ )
1866
2010
  optimal_frequency = tf.convert_to_tensor(
1867
2011
  self._analyzer.optimal_freq(
1868
- new_data=filled_data.filter_fields(c.RF_DATA),
2012
+ new_data=opt_freq_data,
1869
2013
  use_posterior=use_posterior,
1870
2014
  selected_times=selected_times,
1871
2015
  use_kpi=use_kpi,
@@ -1985,8 +2129,6 @@ class BudgetOptimizer:
1985
2129
  tf.Tensor | None,
1986
2130
  tf.Tensor | None,
1987
2131
  tf.Tensor | None,
1988
- tf.Tensor | None,
1989
- tf.Tensor | None,
1990
2132
  ]:
1991
2133
  """Gets the tensors for incremental outcome, based on spend data.
1992
2134
 
@@ -1994,12 +2136,11 @@ class BudgetOptimizer:
1994
2136
  incremental_outcome() for creating budget data. new_media is calculated
1995
2137
  assuming a constant cpm between historical spend and optimization spend.
1996
2138
  new_reach and new_frequency are calculated by first multiplying them
1997
- together and getting rf_media(impressions), and then calculating
1998
- new_rf_media given the same formula for new_media. new_frequency is
1999
- optimal_frequency if optimal_frequency is not none, and
2000
- self._meridian.rf_tensors.frequency otherwise. new_reach is calculated using
2001
- (new_rf_media / new_frequency). new_spend and new_rf_spend are taken from
2002
- their respective indexes in spend.
2139
+ together and getting `rf_impressions`, and then calculating
2140
+ `new_rf_impressions` given the same formula for `new_media`. `new_frequency`
2141
+ is `optimal_frequency` if `optimal_frequency` is not None, and
2142
+ `self._meridian.rf_tensors.frequency` otherwise. `new_reach` is calculated
2143
+ using `new_rf_impressions / new_frequency`.
2003
2144
 
2004
2145
  Args:
2005
2146
  hist_spend: historical spend data.
@@ -2016,8 +2157,7 @@ class BudgetOptimizer:
2016
2157
  frequency is used for the optimization scenario.
2017
2158
 
2018
2159
  Returns:
2019
- Tuple of tf.tensors (new_media, new_media_spend, new_reach, new_frequency,
2020
- new_rf_spend).
2160
+ Tuple of tf.tensors (new_media, new_reach, new_frequency).
2021
2161
  """
2022
2162
  new_data = new_data or analyzer.DataTensors()
2023
2163
  filled_data = new_data.validate_and_fill_missing_data(
@@ -2032,37 +2172,29 @@ class BudgetOptimizer:
2032
2172
  )
2033
2173
  * filled_data.media
2034
2174
  )
2035
- new_media_spend = tf.convert_to_tensor(
2036
- spend[: self._meridian.n_media_channels]
2037
- )
2038
2175
  else:
2039
2176
  new_media = None
2040
- new_media_spend = None
2041
2177
  if self._meridian.n_rf_channels > 0:
2042
- rf_media = filled_data.reach * filled_data.frequency
2043
- new_rf_media = (
2178
+ rf_impressions = filled_data.reach * filled_data.frequency
2179
+ new_rf_impressions = (
2044
2180
  tf.math.divide_no_nan(
2045
2181
  spend[-self._meridian.n_rf_channels :],
2046
2182
  hist_spend[-self._meridian.n_rf_channels :],
2047
2183
  )
2048
- * rf_media
2184
+ * rf_impressions
2049
2185
  )
2050
2186
  frequency = (
2051
2187
  filled_data.frequency
2052
2188
  if optimal_frequency is None
2053
2189
  else optimal_frequency
2054
2190
  )
2055
- new_reach = tf.math.divide_no_nan(new_rf_media, frequency)
2056
- new_frequency = tf.math.divide_no_nan(new_rf_media, new_reach)
2057
- new_rf_spend = tf.convert_to_tensor(
2058
- spend[-self._meridian.n_rf_channels :]
2059
- )
2191
+ new_reach = tf.math.divide_no_nan(new_rf_impressions, frequency)
2192
+ new_frequency = tf.math.divide_no_nan(new_rf_impressions, new_reach)
2060
2193
  else:
2061
2194
  new_reach = None
2062
2195
  new_frequency = None
2063
- new_rf_spend = None
2064
2196
 
2065
- return (new_media, new_media_spend, new_reach, new_frequency, new_rf_spend)
2197
+ return (new_media, new_reach, new_frequency)
2066
2198
 
2067
2199
  def _create_budget_dataset(
2068
2200
  self,
@@ -2086,7 +2218,7 @@ class BudgetOptimizer:
2086
2218
  )
2087
2219
  spend_tensor = tf.convert_to_tensor(spend, dtype=tf.float32)
2088
2220
  hist_spend = tf.convert_to_tensor(hist_spend, dtype=tf.float32)
2089
- (new_media, new_media_spend, new_reach, new_frequency, new_rf_spend) = (
2221
+ (new_media, new_reach, new_frequency) = (
2090
2222
  self._get_incremental_outcome_tensors(
2091
2223
  hist_spend,
2092
2224
  spend_tensor,
@@ -2095,22 +2227,34 @@ class BudgetOptimizer:
2095
2227
  )
2096
2228
  )
2097
2229
  budget = np.sum(spend_tensor)
2230
+ inc_outcome_data = analyzer.DataTensors(
2231
+ media=new_media,
2232
+ reach=new_reach,
2233
+ frequency=new_frequency,
2234
+ revenue_per_kpi=filled_data.revenue_per_kpi,
2235
+ )
2098
2236
 
2099
2237
  # incremental_outcome here is a tensor with the shape
2100
2238
  # (n_chains, n_draws, n_channels)
2101
2239
  incremental_outcome = self._analyzer.incremental_outcome(
2102
2240
  use_posterior=use_posterior,
2103
- new_data=analyzer.DataTensors(
2104
- media=new_media,
2105
- reach=new_reach,
2106
- frequency=new_frequency,
2107
- revenue_per_kpi=filled_data.revenue_per_kpi,
2108
- ),
2241
+ new_data=inc_outcome_data,
2109
2242
  selected_times=selected_times,
2110
2243
  use_kpi=use_kpi,
2111
2244
  batch_size=batch_size,
2112
2245
  include_non_paid_channels=False,
2113
2246
  )
2247
+ incremental_increase = 0.01
2248
+ mroi_numerator = self._analyzer.incremental_outcome(
2249
+ new_data=inc_outcome_data,
2250
+ selected_times=selected_times,
2251
+ scaling_factor0=1.0,
2252
+ scaling_factor1=1 + incremental_increase,
2253
+ use_posterior=use_posterior,
2254
+ use_kpi=use_kpi,
2255
+ batch_size=batch_size,
2256
+ include_non_paid_channels=False,
2257
+ )
2114
2258
  # incremental_outcome_with_mean_median_and_ci here is an ndarray with the
2115
2259
  # shape (n_channels, n_metrics) where n_metrics = 4 for (mean, median,
2116
2260
  # ci_lo, and ci_hi)
@@ -2153,20 +2297,8 @@ class BudgetOptimizer:
2153
2297
  include_median=True,
2154
2298
  )
2155
2299
  marginal_roi = analyzer.get_central_tendency_and_ci(
2156
- data=self._analyzer.marginal_roi(
2157
- use_posterior=use_posterior,
2158
- new_data=analyzer.DataTensors(
2159
- media=new_media,
2160
- reach=new_reach,
2161
- frequency=new_frequency,
2162
- media_spend=new_media_spend,
2163
- rf_spend=new_rf_spend,
2164
- revenue_per_kpi=filled_data.revenue_per_kpi,
2165
- ),
2166
- selected_times=selected_times,
2167
- batch_size=batch_size,
2168
- by_reach=True,
2169
- use_kpi=use_kpi,
2300
+ data=tf.math.divide_no_nan(
2301
+ mroi_numerator, spend_tensor * incremental_increase
2170
2302
  ),
2171
2303
  confidence_level=confidence_level,
2172
2304
  include_median=True,
@@ -2447,6 +2579,100 @@ class BudgetOptimizer:
2447
2579
  )
2448
2580
  return (spend_grid, incremental_outcome_grid)
2449
2581
 
2582
+ def _validate_optimization_tensors(
2583
+ self,
2584
+ cpmu: tf.Tensor | None = None,
2585
+ cprf: tf.Tensor | None = None,
2586
+ media: tf.Tensor | None = None,
2587
+ rf_impressions: tf.Tensor | None = None,
2588
+ frequency: tf.Tensor | None = None,
2589
+ media_spend: tf.Tensor | None = None,
2590
+ rf_spend: tf.Tensor | None = None,
2591
+ revenue_per_kpi: tf.Tensor | None = None,
2592
+ use_optimal_frequency: bool = True,
2593
+ ):
2594
+ """Validates the tensors needed for optimization."""
2595
+ if (media is not None or media_spend is not None) and cpmu is None:
2596
+ raise ValueError(
2597
+ 'If `media` or `media_spend` is provided, then `cpmu` must also be'
2598
+ ' provided.'
2599
+ )
2600
+ if (rf_impressions is not None or rf_spend is not None) and cprf is None:
2601
+ raise ValueError(
2602
+ 'If `reach` and `frequency` or `rf_spend` is provided, then `cprf`'
2603
+ ' must also be provided.'
2604
+ )
2605
+ if media is not None and media_spend is not None:
2606
+ raise ValueError('Only one of `media` or `media_spend` can be provided.')
2607
+ if rf_impressions is not None and rf_spend is not None:
2608
+ raise ValueError(
2609
+ 'Only one of `rf_impressions` or `rf_spend` can be provided.'
2610
+ )
2611
+ if use_optimal_frequency and frequency is not None:
2612
+ raise ValueError(
2613
+ 'If `use_optimal_frequency` is `True`, then `frequency` must not be'
2614
+ ' provided.'
2615
+ )
2616
+ if not use_optimal_frequency and frequency is None:
2617
+ if rf_impressions is not None or rf_spend is not None:
2618
+ raise ValueError(
2619
+ 'If `use_optimal_frequency` is `False`, then `frequency` must be'
2620
+ ' provided.'
2621
+ )
2622
+
2623
+ n_geos = [
2624
+ t.shape[0]
2625
+ for t in [
2626
+ cpmu,
2627
+ cprf,
2628
+ media,
2629
+ rf_impressions,
2630
+ frequency,
2631
+ media_spend,
2632
+ rf_spend,
2633
+ ]
2634
+ if t is not None and t.ndim == 3
2635
+ ]
2636
+ if revenue_per_kpi is not None and revenue_per_kpi.ndim == 2:
2637
+ n_geos.append(revenue_per_kpi.shape[0])
2638
+ if any(n_geo != self._meridian.n_geos for n_geo in n_geos):
2639
+ raise ValueError(
2640
+ 'All tensors with a geo dimension must have the same number of geos'
2641
+ ' as in `meridian.InputData`.'
2642
+ )
2643
+
2644
+ def _allocate_tensor_by_population(
2645
+ self, tensor: tf.Tensor, required_ndim: int = 3
2646
+ ):
2647
+ """Allocates a tensor of shape (time,) or (time, channel) by the population.
2648
+
2649
+ Args:
2650
+ tensor: A tensor of shape (time,) or (time, channel).
2651
+ required_ndim: The required number of dimensions for the tensor.
2652
+
2653
+ Returns:
2654
+ The scaled tensor of shape (geo, time) or (geo, time, channel).
2655
+ """
2656
+ if tensor.ndim == required_ndim:
2657
+ return tensor
2658
+
2659
+ if tensor.ndim != required_ndim - 1:
2660
+ raise ValueError(
2661
+ 'Tensor must have 1 less than the required number of dimensions, '
2662
+ f'{required_ndim}, in order to be allocated by population. Found '
2663
+ f'{tensor.ndim} dimensions.'
2664
+ )
2665
+
2666
+ population = self._meridian.population
2667
+ normalized_population = population / tf.reduce_sum(population)
2668
+ if tensor.ndim == 1:
2669
+ reshaped_population = normalized_population[:, tf.newaxis]
2670
+ reshaped_tensor = tensor[tf.newaxis, :]
2671
+ else:
2672
+ reshaped_population = normalized_population[:, tf.newaxis, tf.newaxis]
2673
+ reshaped_tensor = tensor[tf.newaxis, :, :]
2674
+ return reshaped_tensor * reshaped_population
2675
+
2450
2676
 
2451
2677
  def _validate_pct_of_spend(
2452
2678
  n_channels: int,
@@ -2700,3 +2926,27 @@ def _raise_warning_if_target_constraints_not_met(
2700
2926
  f' ROI is {target_mroi}, but the actual channel marginal ROIs are'
2701
2927
  f' {optimized_mroi}.'
2702
2928
  )
2929
+
2930
+
2931
+ def _expand_tensor(tensor: tf.Tensor, required_shape: tuple[int, ...]):
2932
+ """Expands a tensor to the required number of dimensions."""
2933
+ if tensor.shape == required_shape:
2934
+ return tensor
2935
+ if tensor.ndim == 0:
2936
+ return tf.fill(required_shape, tensor)
2937
+
2938
+ # Tensor must be less than or equal to the required number of dimensions and
2939
+ # the shape must match the required shape excluding the difference in number
2940
+ # of dims.
2941
+ if tensor.ndim <= len(required_shape) and list(tensor.shape) == list(
2942
+ required_shape[-tensor.ndim :]
2943
+ ):
2944
+ n_tile_dims = len(required_shape) - tensor.ndim
2945
+ repeats = list(required_shape[:n_tile_dims]) + [1] * tensor.ndim
2946
+ reshaped_tensor = tf.reshape(tensor, [1] * n_tile_dims + list(tensor.shape))
2947
+ return tf.tile(reshaped_tensor, repeats)
2948
+
2949
+ raise ValueError(
2950
+ f'Cannot expand tensor with shape {tensor.shape} to target'
2951
+ f' {required_shape}.'
2952
+ )
meridian/constants.py CHANGED
@@ -63,6 +63,7 @@ CONTROLS = 'controls'
63
63
  POPULATION = 'population'
64
64
  REACH = 'reach'
65
65
  FREQUENCY = 'frequency'
66
+ RF_IMPRESSIONS = 'rf_impressions'
66
67
  RF_SPEND = 'rf_spend'
67
68
  ORGANIC_MEDIA = 'organic_media'
68
69
  ORGANIC_REACH = 'organic_reach'
@@ -70,6 +71,8 @@ ORGANIC_FREQUENCY = 'organic_frequency'
70
71
  NON_MEDIA_TREATMENTS = 'non_media_treatments'
71
72
  REVENUE = 'revenue'
72
73
  NON_REVENUE = 'non_revenue'
74
+ CPMU = 'cpmu'
75
+ CPRF = 'cprf'
73
76
  REQUIRED_INPUT_DATA_ARRAY_NAMES = (
74
77
  KPI,
75
78
  POPULATION,
@@ -92,6 +92,10 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
92
92
  Returns:
93
93
  The `DataFrameInputDataBuilder` with the added controls data.
94
94
  """
95
+ if not control_cols:
96
+ warnings.warn('No control columns provided. Not adding controls data.')
97
+ return self
98
+
95
99
  controls_df = df.copy()
96
100
 
97
101
  ### Validate ###
@@ -236,6 +240,12 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
236
240
  Returns:
237
241
  The `DataFrameInputDataBuilder` with the added media and media spend data.
238
242
  """
243
+ if not media_cols or not media_spend_cols or not media_channels:
244
+ raise ValueError(
245
+ '`media_cols`, `media_spend_cols`, and `media_channels` must not be '
246
+ 'empty.'
247
+ )
248
+
239
249
  media_df = df.copy()
240
250
 
241
251
  ### Validate ###
@@ -308,6 +318,17 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
308
318
  The `DataFrameInputDataBuilder` with the added reach, frequency, and rf
309
319
  spend data.
310
320
  """
321
+ if (
322
+ not reach_cols
323
+ or not frequency_cols
324
+ or not rf_spend_cols
325
+ or not rf_channels
326
+ ):
327
+ raise ValueError(
328
+ '`reach_cols`, `frequency_cols`, `rf_spend_cols`, and `rf_channels` '
329
+ 'must not be empty.'
330
+ )
331
+
311
332
  reach_df = df.copy()
312
333
 
313
334
  ### Validate ###
@@ -392,6 +413,9 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
392
413
  Returns:
393
414
  The `DataFrameInputDataBuilder` with the added organic media data.
394
415
  """
416
+ if not organic_media_cols:
417
+ raise ValueError('`organic_media_cols` must not be empty.')
418
+
395
419
  organic_media_df = df.copy()
396
420
 
397
421
  ### Validate ###
@@ -458,6 +482,16 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
458
482
  The `DataFrameInputDataBuilder` with the added organic reach and organic
459
483
  frequency data.
460
484
  """
485
+ if (
486
+ not organic_reach_cols
487
+ or not organic_frequency_cols
488
+ or not organic_rf_channels
489
+ ):
490
+ raise ValueError(
491
+ '`organic_reach_cols`, `organic_frequency_cols`, and'
492
+ ' `organic_rf_channels` must not be empty.'
493
+ )
494
+
461
495
  organic_reach_frequency_df = df.copy()
462
496
 
463
497
  ### Validate ###
@@ -526,6 +560,13 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
526
560
  Returns:
527
561
  The `DataFrameInputDataBuilder` with the added non-media treatments data.
528
562
  """
563
+ if not non_media_treatment_cols:
564
+ warnings.warn(
565
+ 'No non-media treatment columns were provided. Not adding non-media '
566
+ 'treatments data.'
567
+ )
568
+ return self
569
+
529
570
  non_media_treatments_df = df.copy()
530
571
 
531
572
  ### Validate ###
@@ -134,7 +134,9 @@ class InputDataBuilder(abc.ABC):
134
134
  if len(value) != len(set(value)):
135
135
  raise ValueError('Geos must be unique.')
136
136
  if self.geos is not None and set(self.geos) != set(value):
137
- raise ValueError(f'geos already set to {self.geos}.')
137
+ raise ValueError(
138
+ f'geos already set to {self.geos}. Cannot reassign to {value}.'
139
+ )
138
140
  self._geos = value
139
141
 
140
142
  @property
@@ -646,12 +648,13 @@ class InputDataBuilder(abc.ABC):
646
648
  """Normalizes the given `DataArray`'s coordinates in Meridian convention.
647
649
 
648
650
  Validates that time values are in the conventional Meridian format and
649
- that geos have national name if national.
651
+ that geos have national name if national. If geo coordinates are not string-
652
+ typed, they will be converted to strings.
650
653
 
651
654
  Args:
652
655
  da: The DataArray to normalize.
653
- time_dimension_name: The name of the time dimension. If None, the
654
- will skip time normalization.
656
+ time_dimension_name: The name of the time dimension. If None, the will
657
+ skip time normalization.
655
658
 
656
659
  Returns:
657
660
  The normalized DataArray.
@@ -686,6 +689,11 @@ class InputDataBuilder(abc.ABC):
686
689
  da = da.assign_coords(
687
690
  {constants.GEO: [constants.NATIONAL_MODEL_DEFAULT_GEO_NAME]},
688
691
  )
692
+ else:
693
+ da = da.assign_coords(
694
+ {constants.GEO: da.coords[constants.GEO].astype(str)}
695
+ )
696
+
689
697
  return da
690
698
 
691
699
  def _validate_set(self, component: str, da: xr.DataArray):