mergeron_extra 2024.739148.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ from numpy.typing import NDArray
5
+
6
+ VERSION = "2024.739148.7"
7
+
8
+ __version__ = VERSION
9
+
10
+ np.set_printoptions(precision=24, floatmode="fixed")
11
+
12
+ type ArrayBoolean = NDArray[np.bool_]
13
+ type ArrayFloat = NDArray[np.half | np.single | np.double]
14
+ type ArrayINT = NDArray[np.intp]
15
+
16
+ type ArrayDouble = NDArray[np.double]
17
+ type ArrayBIGINT = NDArray[np.int64]
@@ -0,0 +1,512 @@
1
+ """
2
+ Functions to estimate confidence intervals for
3
+ (a.) a proportion or multiple proportions, and (b.) contrast between
4
+ two independent proportions or two series of independent proportions.
5
+
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Sequence
11
+ from dataclasses import dataclass
12
+ from typing import Literal
13
+
14
+ import numpy as np
15
+ from scipy.optimize import OptimizeResult, root # type: ignore
16
+ from scipy.stats import beta, chi2, norm # type: ignore
17
+
18
+ from . import VERSION, ArrayDouble, ArrayINT
19
+
20
+ __version__ = VERSION
21
+
22
+
23
+ def propn_ci(
24
+ _npos: ArrayINT | int = 4,
25
+ _nobs: ArrayINT | int = 10,
26
+ /,
27
+ *,
28
+ alpha: float = 0.05,
29
+ method: Literal[
30
+ "Agresti-Coull", "Clopper-Pearson", "Exact", "Wilson", "Score"
31
+ ] = "Wilson",
32
+ ) -> tuple[
33
+ ArrayDouble | float, ArrayDouble | float, ArrayDouble | float, ArrayDouble | float
34
+ ]:
35
+ """Returns point estimates and confidence interval for a proportion
36
+
37
+ Methods "Clopper-Pearson" and "Exact" are synoymous [3]_. Similarly,
38
+ "Wilson" and "Score" are synonyms here.
39
+
40
+ Parameters
41
+ ----------
42
+ _npos
43
+ Number of positives
44
+
45
+ _nobs
46
+ Number of observed values
47
+
48
+ alpha
49
+ Significance level
50
+
51
+ method
52
+ Method to use for estimating confidence interval
53
+
54
+ Returns
55
+ -------
56
+ Raw and estimated proportions, and bounds of the confidence interval
57
+
58
+
59
+ References
60
+ ----------
61
+
62
+ .. [3] Alan Agresti & Brent A. Coull (1998) Approximate is Better
63
+ than “Exact” for Interval Estimation of Binomial Proportions,
64
+ The American Statistician, 52:2, 119-126,
65
+ https://doi.org/10.1080/00031305.1998.10480550
66
+
67
+ """
68
+
69
+ for _f in _npos, _nobs:
70
+ if not isinstance(_f, int | np.integer):
71
+ raise ValueError(
72
+ f"Count, {_f!r} must have type that is a subtype of np.integer."
73
+ )
74
+
75
+ if not _nobs:
76
+ return (np.nan, np.nan, np.nan, np.nan)
77
+
78
+ _raw_phat: ArrayDouble | float = _npos / _nobs
79
+ _est_phat: ArrayDouble | float
80
+ _est_ci_l: ArrayDouble | float
81
+ _est_ci_u: ArrayDouble | float
82
+
83
+ match method:
84
+ case "Clopper-Pearson" | "Exact":
85
+ _est_ci_l, _est_ci_u = (
86
+ beta.ppf(*_f)
87
+ for _f in (
88
+ (alpha / 2, _npos, _nobs - _npos + 1),
89
+ (1 - alpha / 2, _npos + 1, _nobs - _npos),
90
+ )
91
+ )
92
+ _est_phat = 1 / 2 * (_est_ci_l + _est_ci_u)
93
+
94
+ case "Agresti-Coull":
95
+ _zsc = norm.ppf(1 - alpha / 2)
96
+ _zscsq = _zsc * _zsc
97
+ _adjmt = 4 if alpha == 0.05 else _zscsq
98
+ _est_phat = (_npos + _adjmt / 2) / (_nobs + _adjmt)
99
+ _est_ci_l, _est_ci_u = (
100
+ _est_phat + _g
101
+ for _g in [
102
+ _f * _zsc * np.sqrt(_est_phat * (1 - _est_phat) / (_nobs + _adjmt))
103
+ for _f in (-1, 1)
104
+ ]
105
+ )
106
+
107
+ case "Wilson" | "Score":
108
+ _zsc = norm.ppf(1 - alpha / 2)
109
+ _zscsq = _zsc * _zsc
110
+ _est_phat = (_npos + _zscsq / 2) / (_nobs + _zscsq)
111
+ _est_ci_l, _est_ci_u = (
112
+ _est_phat
113
+ + _f
114
+ * _zsc
115
+ * np.sqrt(_nobs * _raw_phat * (1 - _raw_phat) + _zscsq / 4)
116
+ / (_nobs + _zscsq)
117
+ for _f in (-1, 1)
118
+ )
119
+
120
+ case _:
121
+ raise ValueError(f"Method, {f'"{method}"'} not yet implemented.")
122
+
123
+ return _raw_phat, _est_phat, _est_ci_l, _est_ci_u
124
+
125
+
126
+ def propn_ci_multinomial(
127
+ _counts: ArrayINT,
128
+ /,
129
+ *,
130
+ alpha: float = 0.05,
131
+ method: Literal["goodman", "quesenberry-hurst"] = "goodman",
132
+ alternative: Literal["default", "simplified"] = "default",
133
+ ) -> ArrayDouble:
134
+ """Confidence intervals for multiple proportions.
135
+
136
+ Parameters
137
+ ----------
138
+ _counts
139
+ `n x 2` np.array of multinomial counts
140
+ alpha
141
+ Significance level
142
+ method
143
+ Method used to computer confidence intervals
144
+ alternative
145
+ Method used to estimate standard errors, whether "default"
146
+ or "simplified"
147
+
148
+ Returns
149
+ -------
150
+ Array of confidence intervals
151
+
152
+ """
153
+ if method not in (_mli := ("goodman", "quesenberry-hurst")):
154
+ raise ValueError(
155
+ f'Invalid value {f'"{method}"'} for "method". Must be one of {_mli}.'
156
+ )
157
+
158
+ _n = np.einsum("j->", _counts).astype(np.int64)
159
+ _prob = _counts / _n
160
+ _chi2_cr = (
161
+ chi2(len(_counts) - 1).ppf(1 - alpha)
162
+ if method == "quesenberry-hurst"
163
+ else chi2(1).ppf(1 - (alpha / len(_counts)))
164
+ )
165
+
166
+ if alternative == "default":
167
+ _ci_len_half = np.sqrt(_chi2_cr * (_chi2_cr + 4 * _n * _prob * (1 - _prob)))
168
+ return np.column_stack([
169
+ (_chi2_cr + 2 * _counts + _f * _ci_len_half) / (2 * (_n + _chi2_cr))
170
+ for _f in (-1, 1)
171
+ ])
172
+
173
+ elif alternative == "simplified":
174
+ _ci_len_half = np.sqrt(_chi2_cr * _prob * (1 - _prob) / _n)
175
+ return np.column_stack([_prob + _f * _ci_len_half for _f in (-1, 1)])
176
+
177
+ else:
178
+ raise ValueError(
179
+ f"Invalid value, {f'"{alternative}"'} for, \"alternative\". "
180
+ f"Must be one of '{'("default", "simplified")'}'."
181
+ )
182
+
183
+
184
+ def propn_diff_ci(
185
+ _npos1: int = 4,
186
+ _nobs1: int = 10,
187
+ _npos2: int = 4,
188
+ _nobs2: int = 10,
189
+ /,
190
+ *,
191
+ alpha: float = 0.05,
192
+ method: Literal["Agresti-Caffo", "Mee", "M-N", "Newcombe", "Score"] = "M-N",
193
+ ) -> tuple[float, float, float, float]:
194
+ R"""Confidence intervals for differences in binomial proportions.
195
+
196
+ Methods available are Agresti-Caffo [4]_, Mee [5]_, Meitinen-Nurminen [5]_ [6]_
197
+ and Newcombe (aka, Score method) [5]_. See also, source code for the
198
+ R-language function BinomDiffCI, in the module StatsAndCIs [7]_.
199
+
200
+ Parameters
201
+ ----------
202
+ _npos1, _npos2
203
+ Counts of positive outcomes in the respective binomial distributions
204
+ _nobs1, _nobs2
205
+ Counts of all outcomes in the respective binomial distributions
206
+ alpha
207
+ Significance level
208
+ method
209
+ Method used to compute confidence intervals
210
+
211
+ Returns
212
+ -------
213
+ Raw and expected values of estimated difference, with bounds of c.i.
214
+
215
+ References
216
+ ----------
217
+
218
+ .. [4] Agresti, A., & Caffo, T. (2000). Simple and Effective
219
+ Confidence Intervals for Proportions and Differences of Proportions
220
+ Result from Adding Two Successes and Two Failures.
221
+ The American Statistician, 54(4), 280--288. https://doi.org/10.2307/2685779
222
+
223
+ .. [5] Newcombe, R.G. (1998). Two-sided confidence intervals for
224
+ the single proportion: comparison of seven methods. Statist. Med., 17: 857-872.
225
+ https://doi.org/10.1002/(SICI)1097-0258(19980430)17:8%3C857::AID-SIM777%3E3.0.CO;2-E
226
+
227
+ .. [6] Miettinen, O. and Nurminen, M. (1985). Comparative analysis of two rates.
228
+ Statist. Med., 4: 213-226. https://doi.org/10.1002/sim.4780040211; Appendix I
229
+
230
+ .. [7] StatsAndCIs.r, function BinomDiffCI, method, "mn"
231
+ https://github.com/cran/DescTools/blob/master/R/StatsAndCIs.r
232
+ (R source code is distributed under the CC-BY license.)
233
+
234
+ """
235
+ for _f in _npos1, _nobs1, _npos1, _nobs2:
236
+ if not isinstance(_f, int | np.integer):
237
+ raise ValueError(
238
+ f"Count, {_f!r} must be of int type or be a subtype of np.integer."
239
+ )
240
+
241
+ if not min(_nobs1, _nobs2):
242
+ return (np.nan, np.nan, np.nan, np.nan)
243
+
244
+ match method:
245
+ case "Agresti-Caffo":
246
+ _res = _propn_diff_ci_agresti_caffo(
247
+ _npos1, _nobs1, _npos2, _nobs2, alpha=alpha
248
+ )
249
+
250
+ case "Newcombe" | "Score":
251
+ _res = _propn_diff_ci_newcombe_score(
252
+ _npos1, _nobs1, _npos2, _nobs2, alpha=alpha
253
+ )
254
+
255
+ case "M-N" | "Mee":
256
+ _res = _propn_diff_ci_mn(
257
+ _npos1, _nobs1, _npos2, _nobs2, alpha=alpha, method=method
258
+ )
259
+
260
+ case _:
261
+ raise ValueError(f"Method, {f'"{method}"'} not implemented.")
262
+
263
+ return _res
264
+
265
+
266
+ def _propn_diff_ci_agresti_caffo(
267
+ _npos1: int = 4,
268
+ _nobs1: int = 10,
269
+ _npos2: int = 4,
270
+ _nobs2: int = 10,
271
+ /,
272
+ *,
273
+ alpha: float = 0.05,
274
+ ) -> tuple[float, float, float, float]:
275
+ """
276
+ Estimate Agresti-Caffo confidence intervals for differences of
277
+ multiple proportions.
278
+ """
279
+
280
+ _diff_hat = _npos1 / _nobs1 - _npos2 / _nobs2
281
+
282
+ _zsc = norm.ppf(1 - alpha / 2)
283
+ _zscsq = _zsc * _zsc
284
+
285
+ _adjmt_t = 2 if alpha == 0.05 else _zscsq / 2
286
+ _npos1_ac, _npos2_ac = (_f + _adjmt_t / 2 for _f in (_npos1, _npos2))
287
+ _nobs1_ac, _nobs2_ac = (_f + _adjmt_t for _f in (_nobs1, _nobs2))
288
+
289
+ _p1_est = _npos1_ac / _nobs1_ac
290
+ _p2_est = _npos2_ac / _nobs2_ac
291
+ _diff_est = _p1_est - _p2_est
292
+ _se_est = np.sqrt(
293
+ _p1_est * (1 - _p1_est) / _nobs1_ac + _p2_est * (1 - _p2_est) / _nobs2_ac
294
+ )
295
+
296
+ _diff_cl_l, _diff_cl_u = (_diff_est + _s * _zsc * _se_est for _s in (-1, 1))
297
+
298
+ return _diff_hat, _diff_est, max(-1.0, _diff_cl_l), min(1.0, _diff_cl_u)
299
+
300
+
301
+ def _propn_diff_ci_newcombe_score(
302
+ _npos1: int = 4,
303
+ _nobs1: int = 10,
304
+ _npos2: int = 4,
305
+ _nobs2: int = 10,
306
+ /,
307
+ *,
308
+ alpha: float = 0.05,
309
+ ) -> tuple[float, float, float, float]:
310
+ """
311
+ See Neccombe(1998), Agrest-Caffo (2002).
312
+ """
313
+ _l1, _u1 = propn_ci(_npos1, _nobs1, alpha=alpha, method="Wilson")[-2:]
314
+ _l2, _u2 = propn_ci(_npos2, _nobs2, alpha=alpha, method="Wilson")[-2:]
315
+
316
+ _zsc = norm.ppf(1 - alpha / 2)
317
+ _diff_hat = _npos1 / _nobs1 - _npos2 / _nobs2
318
+
319
+ _diff_cl_l = _diff_hat - _zsc * np.sqrt(
320
+ _l1 * (1 - _l1) / _nobs1 + _u2 * (1 - _u2) / _nobs2
321
+ )
322
+ _diff_cl_u = _diff_hat + _zsc * np.sqrt(
323
+ _u1 * (1 - _u1) / _nobs1 + _l2 * (1 - _l2) / _nobs2
324
+ )
325
+
326
+ return _diff_hat, (_diff_cl_l + _diff_cl_u) / 2, _diff_cl_l, _diff_cl_u
327
+
328
+
329
+ def _propn_diff_ci_mn(
330
+ _npos1: int = 4,
331
+ _nobs1: int = 10,
332
+ _npos2: int = 4,
333
+ _nobs2: int = 10,
334
+ /,
335
+ *,
336
+ alpha: float = 0.05,
337
+ method: Literal["M-N", "Mee"] = "M-N",
338
+ ) -> tuple[float, float, float, float]:
339
+ """
340
+ See Miettinen and Nurminen (1985; Newcombe (1998);
341
+ and StasAndCIs.r -> BinomDiffCi -> "mn".
342
+
343
+ """
344
+ for _f in _npos1, _nobs1, _npos1, _nobs2:
345
+ if not isinstance(_f, int | np.integer):
346
+ raise ValueError(
347
+ f"Count, {_f!r} must have type that is a subtype of np.integer."
348
+ )
349
+
350
+ _chi_sq_cr = chi2.ppf(1 - alpha, 1)
351
+ _counts = (_npos1, _nobs1, _npos2, _nobs2)
352
+
353
+ _diff_hat = _npos1 / _nobs1 - _npos2 / _nobs2
354
+
355
+ _ci_est_start = np.array([(_diff_hat + _s) / 2 for _s in (-1, 1)])
356
+ # Avoid potential corner cases
357
+ _ci_est_offset = (1 - 1.055e-2, 1)
358
+ if _diff_hat == 1.0:
359
+ _ci_est_start += _ci_est_offset
360
+ elif _diff_hat == -1.0:
361
+ _ci_est_start -= _ci_est_offset[::-1]
362
+
363
+ def _obj_fn(
364
+ _dh: float, _counts: Sequence[int], _cr: float, _method: Literal["M-N", "Mee"]
365
+ ) -> float:
366
+ return _cr - _propn_diff_chisq_mn(_counts, _dh, method=_method)
367
+
368
+ def _get_sol(_sol: OptimizeResult, /) -> float:
369
+ return float(_sol.x[0] if _sol.x.shape else _sol.x)
370
+
371
+ _diff_cl_l, _diff_cl_u = (
372
+ _get_sol(root(_obj_fn, _dh0, args=(_counts, _chi_sq_cr, method)))
373
+ for _dh0 in _ci_est_start
374
+ )
375
+
376
+ _ci_lo, _ci_hi = max(-1.0, _diff_cl_l), min(1.0, _diff_cl_u)
377
+ return _diff_hat, (_ci_lo + _ci_hi) / 2, _ci_lo, _ci_hi
378
+
379
+
380
+ def _propn_diff_chisq_mn(
381
+ _counts: Sequence[int],
382
+ _rd: float = 0.0,
383
+ /,
384
+ *,
385
+ method: Literal["M-N", "Mee"] = "M-N",
386
+ ) -> float:
387
+ R"""Estimate the :math:`\chi^2` statistic for the Meittinen-Nurminen (1985),
388
+ and Newcombe (1998) confidence intervals for a difference in binomial proportions.
389
+
390
+ Parameters
391
+ ----------
392
+ _counts
393
+ Numbers of positives and observations for (two) samples to be tested
394
+
395
+ _rd
396
+ Starting value
397
+
398
+ method
399
+ Specify Meitinen-Nurminen or Mee
400
+
401
+ Returns
402
+ -------
403
+ Chi-square estimate
404
+
405
+ """
406
+ if _counts is None:
407
+ _counts = [1] * 4
408
+
409
+ _np1, _no1, _np2, _no2 = _counts
410
+ _p1h, _p2h = _np1 / _no1, _np2 / _no2
411
+ _diff = _p1h - _p2h - _rd
412
+
413
+ if not _diff:
414
+ return 0.0
415
+
416
+ _np, _no = _np1 + _np2, _no1 + _no2
417
+
418
+ _l3 = _no
419
+ _l2 = (_no1 + 2 * _no2) * _rd - _no - _np
420
+ _l1 = (_no2 * _rd - _no - 2 * _np2) * _rd + _np
421
+ _l0 = _np2 * _rd * (1 - _rd)
422
+ _l2_to_3l3 = _l2 / (3 * _l3)
423
+
424
+ _q = _l2_to_3l3**3 - (_l1 * _l2_to_3l3 - _l0) / (2 * _l3)
425
+ _p = np.sign(_q) * np.sqrt(_l2**2 - 3 * _l3 * _l1) / (3 * _l3)
426
+ _a = (np.pi + np.arccos(_q / _p**3)) / 3
427
+
428
+ _p2t: float = 2 * _p * np.cos(_a) - _l2_to_3l3
429
+ _p1t: float = _p2t + _rd
430
+
431
+ return _diff**2 / (
432
+ (_p1t * (1 - _p1t) / _no1 + _p2t * (1 - _p2t) / _no2)
433
+ * (_no / (_no - 1) if method == "M-N" else 1.0)
434
+ )
435
+
436
+
437
+ def propn_diff_ci_multinomial(
438
+ _counts: ArrayINT, /, *, alpha: float = 0.05
439
+ ) -> ArrayDouble:
440
+ """Estimate confidence intervals of pair-wise differences in multinomial proportions
441
+
442
+ Differences in multinomial proportions sum to zero.
443
+
444
+ Parameters
445
+ ----------
446
+ _counts
447
+ Two dimensional np.array of observed values of multinomial distributions
448
+ (in columns).
449
+ alpha
450
+ Significance level
451
+
452
+ Returns
453
+ -------
454
+ Array of confidence intervals
455
+
456
+ """
457
+
458
+ if len(_counts.shape) > 2:
459
+ raise ValueError(
460
+ "This implementation is only valid for estimating confidence intervals "
461
+ "for differences in two (2) sets of multinomial proportions."
462
+ )
463
+
464
+ _prob = _counts / np.einsum("jk->k", _counts).astype(np.int64)
465
+ _var = np.einsum("jk->j", _prob * (1 - _prob) / _counts)[:, None]
466
+
467
+ _d, _d_cr = np.diff(_prob, axis=1), norm.ppf(1 - (alpha / len(_counts)))
468
+ return np.column_stack([_d + _f * _d_cr * np.sqrt(_var) for _f in (-1, 1)])
469
+
470
+
471
+ @dataclass(slots=True, frozen=True)
472
+ class MultinomialPropnsTest:
473
+ estimate: np.float64
474
+ dof: int
475
+ critical_value: np.float64
476
+ p_value: np.float64
477
+
478
+
479
+ def propn_test_multinomial(
480
+ _counts: ArrayINT, /, *, alpha: float = 0.05
481
+ ) -> MultinomialPropnsTest:
482
+ """Chi-square test for homogeneity of differences in multinomial proportions.
483
+
484
+ Differences in multinomial proportions sum to zero.
485
+
486
+ Parameters
487
+ ----------
488
+ _counts
489
+ Two dimensional array of observed values of multinomial distributions
490
+ (in columns).
491
+ alpha
492
+ Significance level
493
+
494
+ Returns
495
+ -------
496
+ Estimated statistic, degrees of freedom, critical value, p-value
497
+
498
+ """
499
+
500
+ _n = np.einsum("jk->", _counts).astype(np.int64)
501
+ _n_k = np.einsum("jk->k", _counts).astype(np.int64)
502
+ _prob = _counts / _n_k
503
+
504
+ _p_bar = _n / np.einsum("jk->j", _n_k / _prob)
505
+
506
+ _y_sq = _n * ((1 / np.einsum("j->", _p_bar)) - 1)
507
+ _dof = np.array([_s - 1 for _s in _counts.shape]).prod()
508
+ _chi_rv = chi2(_dof)
509
+
510
+ return MultinomialPropnsTest(
511
+ _y_sq, _dof, _chi_rv.ppf(1 - alpha), 1 - _chi_rv.cdf(_y_sq)
512
+ )
File without changes