google-meridian 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. google_meridian-1.3.1.dist-info/METADATA +209 -0
  2. google_meridian-1.3.1.dist-info/RECORD +76 -0
  3. {google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/top_level.txt +1 -0
  4. meridian/analysis/__init__.py +2 -0
  5. meridian/analysis/analyzer.py +179 -105
  6. meridian/analysis/formatter.py +2 -2
  7. meridian/analysis/optimizer.py +227 -87
  8. meridian/analysis/review/__init__.py +20 -0
  9. meridian/analysis/review/checks.py +721 -0
  10. meridian/analysis/review/configs.py +110 -0
  11. meridian/analysis/review/constants.py +40 -0
  12. meridian/analysis/review/results.py +544 -0
  13. meridian/analysis/review/reviewer.py +186 -0
  14. meridian/analysis/summarizer.py +21 -34
  15. meridian/analysis/templates/chips.html.jinja +12 -0
  16. meridian/analysis/test_utils.py +27 -5
  17. meridian/analysis/visualizer.py +41 -57
  18. meridian/backend/__init__.py +457 -118
  19. meridian/backend/test_utils.py +162 -0
  20. meridian/constants.py +39 -3
  21. meridian/model/__init__.py +1 -0
  22. meridian/model/eda/__init__.py +3 -0
  23. meridian/model/eda/constants.py +21 -0
  24. meridian/model/eda/eda_engine.py +1309 -196
  25. meridian/model/eda/eda_outcome.py +200 -0
  26. meridian/model/eda/eda_spec.py +84 -0
  27. meridian/model/eda/meridian_eda.py +220 -0
  28. meridian/model/knots.py +55 -49
  29. meridian/model/media.py +10 -8
  30. meridian/model/model.py +79 -16
  31. meridian/model/model_test_data.py +53 -0
  32. meridian/model/posterior_sampler.py +39 -32
  33. meridian/model/prior_distribution.py +12 -2
  34. meridian/model/prior_sampler.py +146 -90
  35. meridian/model/spec.py +7 -8
  36. meridian/model/transformers.py +11 -3
  37. meridian/version.py +1 -1
  38. schema/__init__.py +18 -0
  39. schema/serde/__init__.py +26 -0
  40. schema/serde/constants.py +48 -0
  41. schema/serde/distribution.py +515 -0
  42. schema/serde/eda_spec.py +192 -0
  43. schema/serde/function_registry.py +143 -0
  44. schema/serde/hyperparameters.py +363 -0
  45. schema/serde/inference_data.py +105 -0
  46. schema/serde/marketing_data.py +1321 -0
  47. schema/serde/meridian_serde.py +413 -0
  48. schema/serde/serde.py +47 -0
  49. schema/serde/test_data.py +4608 -0
  50. schema/utils/__init__.py +17 -0
  51. schema/utils/time_record.py +156 -0
  52. google_meridian-1.2.1.dist-info/METADATA +0 -409
  53. google_meridian-1.2.1.dist-info/RECORD +0 -52
  54. {google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/WHEEL +0 -0
  55. {google_meridian-1.2.1.dist-info → google_meridian-1.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -46,9 +46,10 @@ alt.data_transformers.disable_max_rows()
46
46
  class ModelDiagnostics:
47
47
  """Generates model diagnostics plots from the Meridian model fitting."""
48
48
 
49
- def __init__(self, meridian: model.Meridian):
49
+ def __init__(self, meridian: model.Meridian, use_kpi: bool = False):
50
50
  self._meridian = meridian
51
51
  self._analyzer = analyzer.Analyzer(meridian)
52
+ self._use_kpi = self._analyzer._use_kpi(use_kpi)
52
53
 
53
54
  @functools.lru_cache(maxsize=128)
54
55
  def _predictive_accuracy_dataset(
@@ -82,6 +83,7 @@ class ModelDiagnostics:
82
83
  return self._analyzer.predictive_accuracy(
83
84
  selected_geos=selected_geos_list,
84
85
  selected_times=selected_times_list,
86
+ use_kpi=self._use_kpi,
85
87
  batch_size=batch_size,
86
88
  )
87
89
 
@@ -366,19 +368,23 @@ class ModelFit:
366
368
  def __init__(
367
369
  self,
368
370
  meridian: model.Meridian,
371
+ use_kpi: bool = False,
369
372
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
370
373
  ):
371
374
  """Initializes the dataset based on the model and confidence level.
372
375
 
373
376
  Args:
374
377
  meridian: Media mix model with the raw data from the model fitting.
378
+ use_kpi: If `True`, plots the incremental KPI. Otherwise, plots the
379
+ incremental revenue using the revenue per KPI (if available).
375
380
  confidence_level: Confidence level for expected outcome credible intervals
376
381
  represented as a value between zero and one. Default is `0.9`.
377
382
  """
378
383
  self._meridian = meridian
379
384
  self._analyzer = analyzer.Analyzer(meridian)
385
+ self._use_kpi = self._analyzer._use_kpi(use_kpi)
380
386
  self._model_fit_data = self._analyzer.expected_vs_actual_data(
381
- confidence_level=confidence_level
387
+ use_kpi=self._use_kpi, confidence_level=confidence_level
382
388
  )
383
389
 
384
390
  @property
@@ -430,11 +436,7 @@ class ModelFit:
430
436
  Returns:
431
437
  An Altair plot showing the model fit.
432
438
  """
433
- outcome = (
434
- c.REVENUE
435
- if self._meridian.input_data.revenue_per_kpi is not None
436
- else c.KPI.upper()
437
- )
439
+ outcome = c.KPI.upper() if self._use_kpi else c.REVENUE
438
440
  self._validate_times_to_plot(selected_times)
439
441
  self._validate_geos_to_plot(
440
442
  selected_geos, n_top_largest_geos, show_geo_level
@@ -459,10 +461,10 @@ class ModelFit:
459
461
  title = summary_text.EXPECTED_ACTUAL_OUTCOME_CHART_TITLE.format(
460
462
  outcome=outcome
461
463
  )
462
- if self._meridian.input_data.revenue_per_kpi is not None:
463
- y_axis_label = summary_text.REVENUE_LABEL
464
- else:
464
+ if self._use_kpi:
465
465
  y_axis_label = summary_text.KPI_LABEL
466
+ else:
467
+ y_axis_label = summary_text.REVENUE_LABEL
466
468
  plot = (
467
469
  alt.Chart(model_fit_df, width=c.VEGALITE_FACET_EXTRA_LARGE_WIDTH)
468
470
  .mark_line()
@@ -638,7 +640,7 @@ class ReachAndFrequency:
638
640
  self,
639
641
  meridian: model.Meridian,
640
642
  selected_times: Sequence[str] | None = None,
641
- use_kpi: bool | None = None,
643
+ use_kpi: bool = False,
642
644
  ):
643
645
  """Initializes the reach and frequency dataset for the model data.
644
646
 
@@ -651,15 +653,7 @@ class ReachAndFrequency:
651
653
  self._meridian = meridian
652
654
  self._analyzer = analyzer.Analyzer(meridian)
653
655
  self._selected_times = selected_times
654
- # TODO Adapt the mechanisms to choose between KPI and REVENUE
655
- # from Analyzer.
656
- if use_kpi is None:
657
- self._use_kpi = (
658
- meridian.input_data.kpi_type == c.NON_REVENUE
659
- and meridian.input_data.revenue_per_kpi is None
660
- )
661
- else:
662
- self._use_kpi = use_kpi
656
+ self._use_kpi = self._analyzer._use_kpi(use_kpi)
663
657
  self._optimal_frequency_data = self._analyzer.optimal_freq(
664
658
  selected_times=selected_times,
665
659
  use_kpi=self._use_kpi,
@@ -844,6 +838,7 @@ class MediaEffects:
844
838
  self,
845
839
  meridian: model.Meridian,
846
840
  by_reach: bool = True,
841
+ use_kpi: bool = False,
847
842
  ):
848
843
  """Initializes the Media Effects based on the model data and params.
849
844
 
@@ -852,10 +847,13 @@ class MediaEffects:
852
847
  by_reach: For the channel w/ reach and frequency, return the response
853
848
  curves by reach given fixed frequency if true; return the response
854
849
  curves by frequency given fixed reach if false.
850
+ use_kpi: If `True`, calculate the incremental KPI. Otherwise, calculate
851
+ the incremental revenue using the revenue per KPI (if available).
855
852
  """
856
853
  self._meridian = meridian
857
854
  self._analyzer = analyzer.Analyzer(meridian)
858
855
  self._by_reach = by_reach
856
+ self._use_kpi = self._analyzer._use_kpi(use_kpi)
859
857
 
860
858
  @functools.lru_cache(maxsize=128)
861
859
  def response_curves_data(
@@ -863,7 +861,6 @@ class MediaEffects:
863
861
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
864
862
  selected_times: frozenset[str] | None = None,
865
863
  by_reach: bool = True,
866
- use_kpi: bool = False,
867
864
  ) -> xr.Dataset:
868
865
  """Dataset holding the calculated response curves data.
869
866
 
@@ -887,20 +884,17 @@ class MediaEffects:
887
884
  by_reach: For the channel w/ reach and frequency, return the response
888
885
  curves by reach given fixed frequency if true; return the response
889
886
  curves by frequency given fixed reach if false.
890
- use_kpi: If `True`, calculate the incremental KPI. Otherwise, calculate
891
- the incremental revenue using the revenue per KPI (if available).
892
887
 
893
888
  Returns:
894
889
  A Dataset displaying the response curves data.
895
890
  """
896
891
  selected_times_list = list(selected_times) if selected_times else None
897
- use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
898
892
  return self._analyzer.response_curves(
899
893
  spend_multipliers=list(np.arange(0, 2.2, c.RESPONSE_CURVE_STEP_SIZE)),
900
894
  confidence_level=confidence_level,
901
895
  selected_times=selected_times_list,
902
896
  by_reach=by_reach,
903
- use_kpi=use_kpi,
897
+ use_kpi=self._use_kpi,
904
898
  )
905
899
 
906
900
  @functools.lru_cache(maxsize=128)
@@ -964,7 +958,6 @@ class MediaEffects:
964
958
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
965
959
  selected_times: frozenset[str] | None = None,
966
960
  by_reach: bool = True,
967
- use_kpi: bool = False,
968
961
  plot_separately: bool = True,
969
962
  include_ci: bool = True,
970
963
  num_channels_displayed: int | None = None,
@@ -990,8 +983,6 @@ class MediaEffects:
990
983
  by_reach: For the channel w/ reach and frequency, return the response
991
984
  curves by reach given fixed frequency if true; return the response
992
985
  curves by frequency given fixed reach if false.
993
- use_kpi: If `True`, calculate the incremental KPI. Otherwise, calculate
994
- the incremental revenue using the revenue per KPI (if available).
995
986
  plot_separately: If `True`, the plots are faceted. If `False`, the plots
996
987
  are layered to create one plot with all of the channels.
997
988
  include_ci: If `True`, plots the credible interval. Defaults to `True`.
@@ -1027,11 +1018,10 @@ class MediaEffects:
1027
1018
  confidence_level=confidence_level,
1028
1019
  selected_times=selected_times,
1029
1020
  by_reach=by_reach,
1030
- use_kpi=use_kpi,
1031
1021
  )
1032
1022
  y_axis_label = (
1033
1023
  summary_text.INC_KPI_LABEL
1034
- if use_kpi or self._meridian.input_data.revenue_per_kpi is None
1024
+ if self._use_kpi
1035
1025
  else summary_text.INC_OUTCOME_LABEL
1036
1026
  )
1037
1027
  base = (
@@ -1341,7 +1331,6 @@ class MediaEffects:
1341
1331
  selected_times: frozenset[str] | None = None,
1342
1332
  confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
1343
1333
  by_reach: bool = True,
1344
- use_kpi: bool = False,
1345
1334
  ) -> pd.DataFrame:
1346
1335
  """Returns DataFrame with top channels by spend for the layered plot.
1347
1336
 
@@ -1356,7 +1345,6 @@ class MediaEffects:
1356
1345
  by_reach: For the channel w/ reach and frequency, return the response
1357
1346
  curves by reach given fixed frequency if true; return the response
1358
1347
  curves by frequency given fixed reach if false.
1359
- use_kpi: If `True`, use KPI instead of revenue.
1360
1348
 
1361
1349
  Returns:
1362
1350
  A DataFrame containing the top chosen channels
@@ -1367,7 +1355,6 @@ class MediaEffects:
1367
1355
  confidence_level=confidence_level,
1368
1356
  selected_times=selected_times,
1369
1357
  by_reach=by_reach,
1370
- use_kpi=use_kpi,
1371
1358
  )
1372
1359
  list_sorted_channels_cost = list(
1373
1360
  data.sel(spend_multiplier=1)
@@ -1415,6 +1402,7 @@ class MediaSummary:
1415
1402
  selected_times: Sequence[str] | None = None,
1416
1403
  marginal_roi_by_reach: bool = True,
1417
1404
  non_media_baseline_values: Sequence[float] | None = None,
1405
+ use_kpi: bool = False,
1418
1406
  ):
1419
1407
  """Initializes the media summary metrics based on the model data and params.
1420
1408
 
@@ -1434,6 +1422,7 @@ class MediaSummary:
1434
1422
  value which will be used as baseline for the given channel. If `None`,
1435
1423
  the values defined with `ModelSpec.non_media_baseline_values` will be
1436
1424
  used.
1425
+ use_kpi: If `True`, use KPI instead of revenue.
1437
1426
  """
1438
1427
  self._meridian = meridian
1439
1428
  self._analyzer = analyzer.Analyzer(meridian)
@@ -1441,6 +1430,7 @@ class MediaSummary:
1441
1430
  self._selected_times = selected_times
1442
1431
  self._marginal_roi_by_reach = marginal_roi_by_reach
1443
1432
  self._non_media_baseline_values = non_media_baseline_values
1433
+ self._use_kpi = self._analyzer._use_kpi(use_kpi)
1444
1434
 
1445
1435
  @property
1446
1436
  def paid_summary_metrics(self):
@@ -1475,7 +1465,7 @@ class MediaSummary:
1475
1465
  return self._analyzer.summary_metrics(
1476
1466
  selected_times=self._selected_times,
1477
1467
  marginal_roi_by_reach=self._marginal_roi_by_reach,
1478
- use_kpi=self._meridian.input_data.revenue_per_kpi is None,
1468
+ use_kpi=self._use_kpi,
1479
1469
  confidence_level=self._confidence_level,
1480
1470
  include_non_paid_channels=False,
1481
1471
  aggregate_times=aggregate_times,
@@ -1508,7 +1498,7 @@ class MediaSummary:
1508
1498
  """
1509
1499
  return self._analyzer.summary_metrics(
1510
1500
  selected_times=self._selected_times,
1511
- use_kpi=self._meridian.input_data.revenue_per_kpi is None,
1501
+ use_kpi=self._use_kpi,
1512
1502
  confidence_level=self._confidence_level,
1513
1503
  include_non_paid_channels=True,
1514
1504
  non_media_baseline_values=self._non_media_baseline_values,
@@ -1520,6 +1510,7 @@ class MediaSummary:
1520
1510
  include_prior: bool = True,
1521
1511
  include_posterior: bool = True,
1522
1512
  include_non_paid_channels: bool = False,
1513
+ currency: str = c.DEFAULT_CURRENCY,
1523
1514
  ) -> pd.DataFrame:
1524
1515
  """Returns a formatted dataframe table of the summary metrics.
1525
1516
 
@@ -1536,6 +1527,7 @@ class MediaSummary:
1536
1527
  reported. If `False`, only the paid channels (media, reach and
1537
1528
  frequency) are included but the summary contains also the metrics
1538
1529
  dependent on spend. Default: `False`.
1530
+ currency: The currency to use for the monetary values. Default: `'$'`.
1539
1531
 
1540
1532
  Returns:
1541
1533
  pandas.DataFrame of formatted summary metrics.
@@ -1545,7 +1537,7 @@ class MediaSummary:
1545
1537
  'At least one of `include_posterior` or `include_prior` must be True.'
1546
1538
  )
1547
1539
 
1548
- use_revenue = self._meridian.input_data.revenue_per_kpi is not None
1540
+ use_revenue = not self._use_kpi
1549
1541
  distribution = [c.PRIOR] * include_prior + [c.POSTERIOR] * include_posterior
1550
1542
 
1551
1543
  percentage_metrics = [
@@ -1618,7 +1610,7 @@ class MediaSummary:
1618
1610
  # Format monetary values.
1619
1611
  for k in monetary_metrics:
1620
1612
  if k in df.columns:
1621
- df[k] = '$' + df[k].astype(str)
1613
+ df[k] = currency + df[k].astype(str)
1622
1614
 
1623
1615
  # Format the model result data variables as central_tendency (ci_lo, ci_hi).
1624
1616
  index_vars = [c.CHANNEL, c.DISTRIBUTION]
@@ -1731,11 +1723,7 @@ class MediaSummary:
1731
1723
  ),
1732
1724
  y=alt.Y(
1733
1725
  f'{c.INCREMENTAL_OUTCOME}:Q',
1734
- title=(
1735
- c.REVENUE.title()
1736
- if self._meridian.input_data.revenue_per_kpi is not None
1737
- else c.KPI.upper()
1738
- ),
1726
+ title=(c.KPI.upper() if self._use_kpi else c.REVENUE.title()),
1739
1727
  axis=alt.Axis(
1740
1728
  ticks=False,
1741
1729
  domain=False,
@@ -1901,11 +1889,7 @@ class MediaSummary:
1901
1889
  Returns:
1902
1890
  An Altair plot showing the contributions per channel.
1903
1891
  """
1904
- outcome = (
1905
- c.REVENUE.title()
1906
- if self._meridian.input_data.revenue_per_kpi is not None
1907
- else c.KPI.upper()
1908
- )
1892
+ outcome = c.KPI.upper() if self._use_kpi else c.REVENUE.title()
1909
1893
  outcome_df = self.contribution_metrics(include_non_paid=True)
1910
1894
  pct = c.PCT_OF_CONTRIBUTION
1911
1895
  value = c.INCREMENTAL_OUTCOME
@@ -1918,7 +1902,7 @@ class MediaSummary:
1918
1902
  num_channels = len(outcome_df[c.CHANNEL])
1919
1903
 
1920
1904
  base = (
1921
- alt.Chart(outcome_df, width=c.VEGALITE_FACET_LARGE_WIDTH)
1905
+ alt.Chart(outcome_df)
1922
1906
  .transform_window(
1923
1907
  sum_outcome=f'sum({c.PCT_OF_CONTRIBUTION})',
1924
1908
  kwargs=f'lead({c.CHANNEL})',
@@ -1934,7 +1918,10 @@ class MediaSummary:
1934
1918
  y=alt.Y(
1935
1919
  f'{c.CHANNEL}:N',
1936
1920
  axis=alt.Axis(
1937
- ticks=False, labelPadding=c.PADDING_10, domain=False
1921
+ ticks=False,
1922
+ labelPadding=c.PADDING_10,
1923
+ domain=False,
1924
+ labelLimit=0,
1938
1925
  ),
1939
1926
  title=None,
1940
1927
  sort=None,
@@ -1977,6 +1964,7 @@ class MediaSummary:
1977
1964
  title=formatter.custom_title_params(
1978
1965
  summary_text.CHANNEL_DRIVERS_CHART_TITLE
1979
1966
  ),
1967
+ width=c.VEGALITE_FACET_LARGE_WIDTH,
1980
1968
  height=c.BAR_SIZE * num_channels
1981
1969
  + c.BAR_SIZE * 2 * c.SCALED_PADDING,
1982
1970
  )
@@ -2039,11 +2027,7 @@ class MediaSummary:
2039
2027
  Returns:
2040
2028
  An Altair plot showing the spend versus outcome percentages per channel.
2041
2029
  """
2042
- outcome = (
2043
- c.REVENUE
2044
- if self._meridian.input_data.revenue_per_kpi is not None
2045
- else c.KPI.upper()
2046
- )
2030
+ outcome = c.KPI.upper() if self._use_kpi else c.REVENUE
2047
2031
  df = self._transform_contribution_spend_metrics()
2048
2032
  domain = [
2049
2033
  f'% {outcome.title() if outcome == c.REVENUE else outcome}',
@@ -2567,10 +2551,10 @@ class MediaSummary:
2567
2551
  A dataframe of spend and outcome percentages and ROI per channel.
2568
2552
  """
2569
2553
  paid_summary_metrics = self.get_paid_summary_metrics()
2570
- if self._meridian.input_data.revenue_per_kpi is not None:
2571
- outcome = summary_text.REVENUE_LABEL
2572
- else:
2554
+ if self._use_kpi:
2573
2555
  outcome = summary_text.KPI_LABEL
2556
+ else:
2557
+ outcome = summary_text.REVENUE_LABEL
2574
2558
  total_media_outcome = (
2575
2559
  paid_summary_metrics[c.INCREMENTAL_OUTCOME]
2576
2560
  .sel(