mergeron 2024.738953.1__py3-none-any.whl → 2025.739265.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (39) hide show
  1. mergeron/__init__.py +26 -6
  2. mergeron/core/__init__.py +5 -65
  3. mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
  4. mergeron/core/ftc_merger_investigations_data.py +147 -101
  5. mergeron/core/guidelines_boundaries.py +290 -1078
  6. mergeron/core/guidelines_boundary_functions.py +1128 -0
  7. mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +87 -55
  8. mergeron/core/pseudorandom_numbers.py +16 -22
  9. mergeron/data/__init__.py +3 -0
  10. mergeron/data/damodaran_margin_data.xls +0 -0
  11. mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  12. mergeron/demo/__init__.py +3 -0
  13. mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
  14. mergeron/gen/__init__.py +258 -246
  15. mergeron/gen/data_generation.py +473 -224
  16. mergeron/gen/data_generation_functions.py +876 -0
  17. mergeron/gen/enforcement_stats.py +355 -0
  18. mergeron/gen/upp_tests.py +171 -259
  19. mergeron-2025.739265.0.dist-info/METADATA +115 -0
  20. mergeron-2025.739265.0.dist-info/RECORD +23 -0
  21. {mergeron-2024.738953.1.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
  22. mergeron/License.txt +0 -16
  23. mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
  24. mergeron/core/excel_helper.py +0 -257
  25. mergeron/core/proportions_tests.py +0 -520
  26. mergeron/ext/__init__.py +0 -5
  27. mergeron/ext/tol_colors.py +0 -851
  28. mergeron/gen/_data_generation_functions_nonpublic.py +0 -623
  29. mergeron/gen/investigations_stats.py +0 -709
  30. mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
  31. mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
  32. mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
  33. mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
  34. mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
  35. mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
  36. mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
  37. mergeron-2024.738953.1.dist-info/METADATA +0 -93
  38. mergeron-2024.738953.1.dist-info/RECORD +0 -30
  39. /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
@@ -1,520 +0,0 @@
1
- """
2
- Functions to estimate confidence intervals for
3
- (a.) a proportion or muliple proportions, and (b.) contrast between
4
- two independent proportions or two series of independent propotions.
5
-
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from collections.abc import Sequence
11
- from dataclasses import dataclass
12
- from importlib.metadata import version
13
- from typing import Literal, TypeVar
14
-
15
- import numpy as np
16
- from numpy.typing import NBitBase, NDArray
17
- from scipy.optimize import OptimizeResult, root # type: ignore
18
- from scipy.stats import beta, chi2, norm # type: ignore
19
-
20
- from .. import _PKG_NAME # noqa: TID252
21
-
22
- __version__ = version(_PKG_NAME)
23
-
24
-
25
- TI = TypeVar("TI", bound=NBitBase)
26
-
27
-
28
- def propn_ci(
29
- _npos: NDArray[np.integer[TI]] | int = 4,
30
- _nobs: NDArray[np.integer[TI]] | int = 10,
31
- /,
32
- *,
33
- alpha: float = 0.05,
34
- method: Literal[
35
- "Agresti-Coull", "Clopper-Pearson", "Exact", "Wilson", "Score"
36
- ] = "Wilson",
37
- ) -> tuple[
38
- NDArray[np.float64] | float,
39
- NDArray[np.float64] | float,
40
- NDArray[np.float64] | float,
41
- NDArray[np.float64] | float,
42
- ]:
43
- """Returns point estimates and confidence interval for a proportion
44
-
45
- Methods "Clopper-Pearson" and "Exact" are synoymous [3]_. Similarly,
46
- "Wilson" and "Score" are synonyms here.
47
-
48
- Parameters
49
- ----------
50
- _npos
51
- Number of positives
52
-
53
- _nobs
54
- Number of observed values
55
-
56
- alpha
57
- Significance level
58
-
59
- method
60
- Method to use for estimating confidence interval
61
-
62
- Returns
63
- -------
64
- Raw and estimated proportions, and bounds of the confidence interval
65
-
66
-
67
- References
68
- ----------
69
-
70
- .. [3] Alan Agresti & Brent A. Coull (1998) Approximate is Better
71
- than “Exact” for Interval Estimation of Binomial Proportions,
72
- The American Statistician, 52:2, 119-126,
73
- https://doi.org/10.1080/00031305.1998.10480550
74
-
75
- """
76
-
77
- for _f in _npos, _nobs:
78
- if not isinstance(_f, int | np.integer):
79
- raise ValueError(
80
- f"Count, {_f!r} must have type that is a subtype of np.integer."
81
- )
82
-
83
- if not _nobs:
84
- return (np.nan, np.nan, np.nan, np.nan)
85
-
86
- _raw_phat: NDArray[np.float64] | float = _npos / _nobs
87
- _est_phat: NDArray[np.float64] | float
88
- _est_ci_l: NDArray[np.float64] | float
89
- _est_ci_u: NDArray[np.float64] | float
90
-
91
- match method:
92
- case "Clopper-Pearson" | "Exact":
93
- _est_ci_l, _est_ci_u = (
94
- beta.ppf(*_f)
95
- for _f in (
96
- (alpha / 2, _npos, _nobs - _npos + 1),
97
- (1 - alpha / 2, _npos + 1, _nobs - _npos),
98
- )
99
- )
100
- _est_phat = 1 / 2 * (_est_ci_l + _est_ci_u)
101
-
102
- case "Agresti-Coull":
103
- _zsc = norm.ppf(1 - alpha / 2)
104
- _zscsq = _zsc * _zsc
105
- _adjmt = 4 if alpha == 0.05 else _zscsq
106
- _est_phat = (_npos + _adjmt / 2) / (_nobs + _adjmt)
107
- _est_ci_l, _est_ci_u = (
108
- _est_phat + _g
109
- for _g in [
110
- _f * _zsc * np.sqrt(_est_phat * (1 - _est_phat) / (_nobs + _adjmt))
111
- for _f in (-1, 1)
112
- ]
113
- )
114
-
115
- case "Wilson" | "Score":
116
- _zsc = norm.ppf(1 - alpha / 2)
117
- _zscsq = _zsc * _zsc
118
- _est_phat = (_npos + _zscsq / 2) / (_nobs + _zscsq)
119
- _est_ci_l, _est_ci_u = (
120
- _est_phat
121
- + _f
122
- * _zsc
123
- * np.sqrt(_nobs * _raw_phat * (1 - _raw_phat) + _zscsq / 4)
124
- / (_nobs + _zscsq)
125
- for _f in (-1, 1)
126
- )
127
-
128
- case _:
129
- raise ValueError(f"Method, {f'"{method}"'} not yet implemented.")
130
-
131
- return _raw_phat, _est_phat, _est_ci_l, _est_ci_u
132
-
133
-
134
- def propn_ci_multinomial(
135
- _counts: NDArray[np.integer[TI]],
136
- /,
137
- *,
138
- alpha: float = 0.05,
139
- method: Literal["goodman", "quesenberry-hurst"] = "goodman",
140
- alternative: Literal["default", "simplified"] = "default",
141
- ) -> NDArray[np.float64]:
142
- """Confidence intervals for multiple proportions.
143
-
144
- Parameters
145
- ----------
146
- _counts
147
- `n x 2` np.array of multinomial counts
148
- alpha
149
- Significance level
150
- method
151
- Method used to computer confidence intervals
152
- alternative
153
- Method used to estimate standard errors, whether "default"
154
- or "simplified"
155
-
156
- Returns
157
- -------
158
- Array of confidence intervals
159
-
160
- """
161
- if method not in (_mli := ("goodman", "quesenberry-hurst")):
162
- raise ValueError(
163
- f'Invalid value {f'"{method}"'} for "method". Must be one of {_mli}.'
164
- )
165
-
166
- _n = np.einsum("j->", _counts).astype(np.int64)
167
- _prob = _counts / _n
168
- _chi2_cr = (
169
- chi2(len(_counts) - 1).ppf(1 - alpha)
170
- if method == "quesenberry-hurst"
171
- else chi2(1).ppf(1 - (alpha / len(_counts)))
172
- )
173
-
174
- if alternative == "default":
175
- _ci_len_half = np.sqrt(_chi2_cr * (_chi2_cr + 4 * _n * _prob * (1 - _prob)))
176
- return np.column_stack([
177
- (_chi2_cr + 2 * _counts + _f * _ci_len_half) / (2 * (_n + _chi2_cr))
178
- for _f in (-1, 1)
179
- ])
180
-
181
- elif alternative == "simplified":
182
- _ci_len_half = np.sqrt(_chi2_cr * _prob * (1 - _prob) / _n)
183
- return np.column_stack([_prob + _f * _ci_len_half for _f in (-1, 1)])
184
-
185
- else:
186
- raise ValueError(
187
- f"Invalid value, {f'"{alternative}"'} for, \"alternative\". "
188
- f"Must be one of '{'("default", "simplified")'}'."
189
- )
190
-
191
-
192
- def propn_diff_ci(
193
- _npos1: int = 4,
194
- _nobs1: int = 10,
195
- _npos2: int = 4,
196
- _nobs2: int = 10,
197
- /,
198
- *,
199
- alpha: float = 0.05,
200
- method: Literal["Agresti-Caffo", "Mee", "M-N", "Newcombe", "Score"] = "M-N",
201
- ) -> tuple[float, float, float, float]:
202
- R"""Confidence intervals for differences in binomial proportions.
203
-
204
- Methods available are Agresti-Caffo [4]_, Mee [5]_, Meitinen-Nurminen [5]_ [6]_
205
- and Newcombe (aka, Score method) [5]_. See also, source code for the
206
- R-language function BinomDiffCI, in the module StatsAndCIs [7]_.
207
-
208
- Parameters
209
- ----------
210
- _npos1, _npos2
211
- Counts of positive outcomes in the respective binomial distributions
212
- _nobs1, _nobs2
213
- Counts of all outcomes in the respective binomial distributions
214
- alpha
215
- Significance level
216
- method
217
- Method used to compute confidence intervals
218
-
219
- Returns
220
- -------
221
- Raw and expected values of estimated difference, with bounds of c.i.
222
-
223
- References
224
- ----------
225
-
226
- .. [4] Agresti, A., & Caffo, T. (2000). Simple and Effective
227
- Confidence Intervals for Proportions and Differences of Proportions
228
- Result from Adding Two Successes and Two Failures.
229
- The American Statistician, 54(4), 280--288. https://doi.org/10.2307/2685779
230
-
231
- .. [5] Newcombe, R.G. (1998). Two-sided confidence intervals for
232
- the single proportion: comparison of seven methods. Statist. Med., 17: 857-872.
233
- https://doi.org/10.1002/(SICI)1097-0258(19980430)17:8%3C857::AID-SIM777%3E3.0.CO;2-E
234
-
235
- .. [6] Miettinen, O. and Nurminen, M. (1985). Comparative analysis of two rates.
236
- Statist. Med., 4: 213-226. https://doi.org/10.1002/sim.4780040211; Appendix I
237
-
238
- .. [7] StatsAndCIs.r, function BinomDiffCI, method, "mn"
239
- https://github.com/cran/DescTools/blob/master/R/StatsAndCIs.r
240
- (R source code is distributed under the CC-BY license.)
241
-
242
- """
243
- for _f in _npos1, _nobs1, _npos1, _nobs2:
244
- if not isinstance(_f, int | np.integer):
245
- raise ValueError(
246
- f"Count, {_f!r} must be of int type or be a subtype of np.integer."
247
- )
248
-
249
- if not min(_nobs1, _nobs2):
250
- return (np.nan, np.nan, np.nan, np.nan)
251
-
252
- match method:
253
- case "Agresti-Caffo":
254
- _res = _propn_diff_ci_agresti_caffo(
255
- _npos1, _nobs1, _npos2, _nobs2, alpha=alpha
256
- )
257
-
258
- case "Newcombe" | "Score":
259
- _res = _propn_diff_ci_newcombe_score(
260
- _npos1, _nobs1, _npos2, _nobs2, alpha=alpha
261
- )
262
-
263
- case "M-N" | "Mee":
264
- _res = _propn_diff_ci_mn(
265
- _npos1, _nobs1, _npos2, _nobs2, alpha=alpha, method=method
266
- )
267
-
268
- case _:
269
- raise ValueError(f"Method, {f'"{method}"'} not implemented.")
270
-
271
- return _res
272
-
273
-
274
- def _propn_diff_ci_agresti_caffo(
275
- _npos1: int = 4,
276
- _nobs1: int = 10,
277
- _npos2: int = 4,
278
- _nobs2: int = 10,
279
- /,
280
- *,
281
- alpha: float = 0.05,
282
- ) -> tuple[float, float, float, float]:
283
- """
284
- Estimate Agresti-Caffo confidence intervals for differences of
285
- multiple proportions.
286
- """
287
-
288
- _diff_hat = _npos1 / _nobs1 - _npos2 / _nobs2
289
-
290
- _zsc = norm.ppf(1 - alpha / 2)
291
- _zscsq = _zsc * _zsc
292
-
293
- _adjmt_t = 2 if alpha == 0.05 else _zscsq / 2
294
- _npos1_ac, _npos2_ac = (_f + _adjmt_t / 2 for _f in (_npos1, _npos2))
295
- _nobs1_ac, _nobs2_ac = (_f + _adjmt_t for _f in (_nobs1, _nobs2))
296
-
297
- _p1_est = _npos1_ac / _nobs1_ac
298
- _p2_est = _npos2_ac / _nobs2_ac
299
- _diff_est = _p1_est - _p2_est
300
- _se_est = np.sqrt(
301
- _p1_est * (1 - _p1_est) / _nobs1_ac + _p2_est * (1 - _p2_est) / _nobs2_ac
302
- )
303
-
304
- _diff_cl_l, _diff_cl_u = (_diff_est + _s * _zsc * _se_est for _s in (-1, 1))
305
-
306
- return _diff_hat, _diff_est, max(-1.0, _diff_cl_l), min(1.0, _diff_cl_u)
307
-
308
-
309
- def _propn_diff_ci_newcombe_score(
310
- _npos1: int = 4,
311
- _nobs1: int = 10,
312
- _npos2: int = 4,
313
- _nobs2: int = 10,
314
- /,
315
- *,
316
- alpha: float = 0.05,
317
- ) -> tuple[float, float, float, float]:
318
- """
319
- See Neccombe(1998), Agrest-Caffo (2002).
320
- """
321
- _l1, _u1 = propn_ci(_npos1, _nobs1, alpha=alpha, method="Wilson")[-2:]
322
- _l2, _u2 = propn_ci(_npos2, _nobs2, alpha=alpha, method="Wilson")[-2:]
323
-
324
- _zsc = norm.ppf(1 - alpha / 2)
325
- _diff_hat = _npos1 / _nobs1 - _npos2 / _nobs2
326
-
327
- _diff_cl_l = _diff_hat - _zsc * np.sqrt(
328
- _l1 * (1 - _l1) / _nobs1 + _u2 * (1 - _u2) / _nobs2
329
- )
330
- _diff_cl_u = _diff_hat + _zsc * np.sqrt(
331
- _u1 * (1 - _u1) / _nobs1 + _l2 * (1 - _l2) / _nobs2
332
- )
333
-
334
- return _diff_hat, (_diff_cl_l + _diff_cl_u) / 2, _diff_cl_l, _diff_cl_u
335
-
336
-
337
- def _propn_diff_ci_mn(
338
- _npos1: int = 4,
339
- _nobs1: int = 10,
340
- _npos2: int = 4,
341
- _nobs2: int = 10,
342
- /,
343
- *,
344
- alpha: float = 0.05,
345
- method: Literal["M-N", "Mee"] = "M-N",
346
- ) -> tuple[float, float, float, float]:
347
- """
348
- See Miettinen and Nurminen (1985; Newcombe (1998);
349
- and StasAndCIs.r -> BinomDiffCi -> "mn".
350
-
351
- """
352
- for _f in _npos1, _nobs1, _npos1, _nobs2:
353
- if not isinstance(_f, int | np.integer):
354
- raise ValueError(
355
- f"Count, {_f!r} must have type that is a subtype of np.integer."
356
- )
357
-
358
- _chi_sq_cr = chi2.ppf(1 - alpha, 1)
359
- _counts = (_npos1, _nobs1, _npos2, _nobs2)
360
-
361
- _diff_hat = _npos1 / _nobs1 - _npos2 / _nobs2
362
-
363
- _ci_est_start = np.array([(_diff_hat + _s) / 2 for _s in (-1, 1)])
364
- # Avoid potential corner cases
365
- _ci_est_offset = (1 - 1.055e-2, 1)
366
- if _diff_hat == 1.0:
367
- _ci_est_start += _ci_est_offset
368
- elif _diff_hat == -1.0:
369
- _ci_est_start -= _ci_est_offset[::-1]
370
-
371
- def _obj_fn(
372
- _dh: float, _counts: Sequence[int], _cr: float, _method: Literal["M-N", "Mee"]
373
- ) -> float:
374
- return _cr - _propn_diff_chisq_mn(_counts, _dh, method=_method)
375
-
376
- def _get_sol(_sol: OptimizeResult, /) -> float:
377
- return float(_sol.x[0] if _sol.x.shape else _sol.x)
378
-
379
- _diff_cl_l, _diff_cl_u = (
380
- _get_sol(root(_obj_fn, _dh0, args=(_counts, _chi_sq_cr, method)))
381
- for _dh0 in _ci_est_start
382
- )
383
-
384
- _ci_lo, _ci_hi = max(-1.0, _diff_cl_l), min(1.0, _diff_cl_u)
385
- return _diff_hat, (_ci_lo + _ci_hi) / 2, _ci_lo, _ci_hi
386
-
387
-
388
- def _propn_diff_chisq_mn(
389
- _counts: Sequence[int],
390
- _rd: float = 0.0,
391
- /,
392
- *,
393
- method: Literal["M-N", "Mee"] = "M-N",
394
- ) -> float:
395
- R"""Estimate the :math:`\chi^2` statistic for the Meittinen-Nurminen (1985),
396
- and Newcombe (1998) confidence intervals for a difference in binomial proportions.
397
-
398
- Parameters
399
- ----------
400
- _counts
401
- Numbers of positives and observations for (two) samples to be tested
402
-
403
- _rd
404
- Starting value
405
-
406
- method
407
- Specify Meitinen-Nurminen or Mee
408
-
409
- Returns
410
- -------
411
- Chi-square estimate
412
-
413
- """
414
- if _counts is None:
415
- _counts = [1] * 4
416
-
417
- _np1, _no1, _np2, _no2 = _counts
418
- _p1h, _p2h = _np1 / _no1, _np2 / _no2
419
- _diff = _p1h - _p2h - _rd
420
-
421
- if not _diff:
422
- return 0.0
423
-
424
- _np, _no = _np1 + _np2, _no1 + _no2
425
-
426
- _l3 = _no
427
- _l2 = (_no1 + 2 * _no2) * _rd - _no - _np
428
- _l1 = (_no2 * _rd - _no - 2 * _np2) * _rd + _np
429
- _l0 = _np2 * _rd * (1 - _rd)
430
- _l2_to_3l3 = _l2 / (3 * _l3)
431
-
432
- _q = _l2_to_3l3**3 - (_l1 * _l2_to_3l3 - _l0) / (2 * _l3)
433
- _p = np.sign(_q) * np.sqrt(_l2**2 - 3 * _l3 * _l1) / (3 * _l3)
434
- _a = (np.pi + np.arccos(_q / _p**3)) / 3
435
-
436
- _p2t: float = 2 * _p * np.cos(_a) - _l2_to_3l3
437
- _p1t: float = _p2t + _rd
438
-
439
- return _diff**2 / (
440
- (_p1t * (1 - _p1t) / _no1 + _p2t * (1 - _p2t) / _no2)
441
- * (_no / (_no - 1) if method == "M-N" else 1.0)
442
- )
443
-
444
-
445
- def propn_diff_ci_multinomial(
446
- _counts: NDArray[np.integer[TI]], /, *, alpha: float = 0.05
447
- ) -> NDArray[np.float64]:
448
- """Estimate confidence intervals of pair-wise differences in multinomial proportions
449
-
450
- Differences in multinomial proportions sum to zero.
451
-
452
- Parameters
453
- ----------
454
- _counts
455
- Two dimensional np.array of observed values of multinomial distributions
456
- (in columns).
457
- alpha
458
- Significance level
459
-
460
- Returns
461
- -------
462
- Array of confidence intervals
463
-
464
- """
465
-
466
- if len(_counts.shape) > 2:
467
- raise ValueError(
468
- "This implementation is only valid for estimating confidence intervals "
469
- "for differences in two (2) sets of multinomial proportions."
470
- )
471
-
472
- _prob = _counts / np.einsum("jk->k", _counts).astype(np.int64)
473
- _var = np.einsum("jk->j", _prob * (1 - _prob) / _counts)[:, None]
474
-
475
- _d, _d_cr = np.diff(_prob, axis=1), norm.ppf(1 - (alpha / len(_counts)))
476
- return np.column_stack([_d + _f * _d_cr * np.sqrt(_var) for _f in (-1, 1)])
477
-
478
-
479
- @dataclass(slots=True, frozen=True)
480
- class MultinomialPropnsTest:
481
- estimate: np.float64
482
- dof: int
483
- critical_value: np.float64
484
- p_value: np.float64
485
-
486
-
487
- def propn_test_multinomial(
488
- _counts: NDArray[np.integer[TI]], /, *, alpha: float = 0.05
489
- ) -> MultinomialPropnsTest:
490
- """Chi-square test for homogeneity of differences in multinomial proportions.
491
-
492
- Differences in multinomial proportions sum to zero.
493
-
494
- Parameters
495
- ----------
496
- _counts
497
- Two dimensional array of observed values of multinomial distributions
498
- (in columns).
499
- alpha
500
- Significance level
501
-
502
- Returns
503
- -------
504
- Estimated statistic, degrees of freedom, critical value, p-value
505
-
506
- """
507
-
508
- _n = np.einsum("jk->", _counts).astype(np.int64)
509
- _n_k = np.einsum("jk->k", _counts).astype(np.int64)
510
- _prob = _counts / _n_k
511
-
512
- _p_bar = _n / np.einsum("jk->j", _n_k / _prob)
513
-
514
- _y_sq = _n * ((1 / np.einsum("j->", _p_bar)) - 1)
515
- _dof = np.array([_s - 1 for _s in _counts.shape]).prod()
516
- _chi_rv = chi2(_dof)
517
-
518
- return MultinomialPropnsTest(
519
- _y_sq, _dof, _chi_rv.ppf(1 - alpha), 1 - _chi_rv.cdf(_y_sq)
520
- )
mergeron/ext/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- from importlib.metadata import version
2
-
3
- from .. import _PKG_NAME # noqa: TID252
4
-
5
- __version__ = version(_PKG_NAME)