lambda-or 0.0.8__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lambda_or
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Lambda Odds Ratio
5
5
  Home-page: https://github.com/zeroknowledgediscovery/lambda_or
6
- Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.8.tar.gz
6
+ Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.9.tar.gz
7
7
  Author: Ishanu Chattopadhyay
8
8
  Author-email: zeroknowledgediscovery@gmail.com
9
9
  License: LICENSE
@@ -236,4 +236,5 @@ def lambda_or(
236
236
  )
237
237
 
238
238
 
239
- __all__ = ["lambda_or", "pq_from_two_gates", "LambdaORResult"]
239
+ __all__ = ["lambda_or", "pq_from_two_gates", "LambdaORResult", "neglog10_p_from_z"
240
+ ]
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lambda-or
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Lambda Odds Ratio
5
5
  Home-page: https://github.com/zeroknowledgediscovery/lambda_or
6
- Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.8.tar.gz
6
+ Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.9.tar.gz
7
7
  Author: Ishanu Chattopadhyay
8
8
  Author-email: zeroknowledgediscovery@gmail.com
9
9
  License: LICENSE
@@ -5,7 +5,6 @@ setup.py
5
5
  version.py
6
6
  lambda_or/__init__.py
7
7
  lambda_or/lambda_or.py
8
- lambda_or/lambda_or_upd.py
9
8
  lambda_or.egg-info/PKG-INFO
10
9
  lambda_or.egg-info/SOURCES.txt
11
10
  lambda_or.egg-info/dependency_links.txt
@@ -0,0 +1 @@
1
+ __version__ = '0.0.9'
@@ -1,235 +0,0 @@
1
- """
2
- lambda_or.py — λ-OR (Lambda Odds Ratio) for misclassification-corrected attribution.
3
-
4
- This module implements a ridge-stabilized inversion of a misclassified 2x2 contingency
5
- table to recover corrected counts and compute the corrected log-odds ratio, its
6
- variance (including uncertainty from selection-conditional sensitivity/specificity),
7
- a Wald z-statistic, and a -log10 p-value with a large-|z| tail approximation.
8
-
9
- Core API
10
- --------
11
- lambda_or(tilde_counts, p_sel, q_sel, n_val, lambda_start=1e-6, lambda_max=1e6,
12
- step=10.0, eps=1e-9) -> LambdaORResult
13
-
14
- Helpers
15
- -------
16
- pq_from_two_gates(pi_H1, pi_L1, pi_H0, pi_L0) -> (p_sel, q_sel)
17
-
18
- Notation
19
- --------
20
- - tilde_counts: observed (misclassified) 2x2 counts as [[ã, b̃],[ĉ, d̃]]
21
- - p_sel = P(tilde Y=1 | Y=1, selected), q_sel = P(tilde Y=0 | Y=0, selected)
22
- - n_val: size of validation cohort used to estimate (p_sel, q_sel)
23
-
24
- References
25
- ----------
26
- See manuscript text for derivations and delta-method variance.
27
- """
28
-
29
- from __future__ import annotations
30
- from dataclasses import dataclass
31
- from math import log, sqrt, erfc, pi, log10
32
- from typing import Tuple, Dict, Any
33
- import numpy as np
34
-
35
-
36
- @dataclass
37
- class LambdaORResult:
38
- log_or: float
39
- neglog10_p: float
40
- z: float
41
- se: float
42
- counts: np.ndarray # corrected 2x2 matrix [[a, b], [c, d]]
43
- lambda_used: float
44
- converged: bool
45
- meta: Dict[str, Any]
46
-
47
-
48
- def _normal_two_sided_p(z_abs: float) -> float:
49
- """Two-sided p-value using the complementary error function."""
50
- return erfc(z_abs / sqrt(2.0))
51
-
52
-
53
- def _neglog10_p_from_z(z_abs: float) -> float:
54
- """
55
- Return -log10(two-sided p) from |z|.
56
-
57
- For moderate |z|, use erfc directly. For very large |z|, use the Mills-ratio
58
- approximation for the two-sided normal tail:
59
-
60
- p ≈ 2 * phi(z) / z
61
-
62
- which implies
63
-
64
- -log10 p ≈ z^2 / (2 ln 10) + log10(z) + 0.5 log10(2π) - log10(2).
65
- """
66
- if z_abs <= 7.0:
67
- p_two = max(min(_normal_two_sided_p(z_abs), 1.0), 1e-323)
68
- return -log10(p_two)
69
-
70
- return (
71
- (z_abs * z_abs) / (2.0 * np.log(10.0))
72
- + log10(sqrt(2.0 * pi) * z_abs)
73
- - log10(2.0)
74
- )
75
-
76
-
77
- def pq_from_two_gates(pi_H1: float, pi_L1: float, pi_H0: float, pi_L0: float) -> Tuple[float, float]:
78
- """
79
- Compute selection-conditional sensitivity/specificity from two-gate ROC probabilities.
80
-
81
- Parameters
82
- ----------
83
- pi_H1 : float
84
- P(S in high-specificity gate | Y=1)
85
- pi_L1 : float
86
- P(S in low-score gate | Y=1)
87
- pi_H0 : float
88
- P(S in high-specificity gate | Y=0)
89
- pi_L0 : float
90
- P(S in low-score gate | Y=0)
91
- """
92
- denom1 = pi_H1 + pi_L1
93
- denom0 = pi_H0 + pi_L0
94
- if denom1 <= 0 or denom0 <= 0:
95
- raise ValueError("Invalid gate probabilities: denominators must be positive.")
96
- p_sel = pi_H1 / denom1
97
- q_sel = pi_L0 / denom0
98
- return p_sel, q_sel
99
-
100
-
101
- def lambda_or(
102
- tilde_counts: np.ndarray,
103
- p_sel: float,
104
- q_sel: float,
105
- n_val: int,
106
- lambda_start: float = 1e-6,
107
- lambda_max: float = 1e6,
108
- step: float = 10.0,
109
- eps: float = 1e-9,
110
- ) -> LambdaORResult:
111
- """
112
- Compute λ-OR given a misclassified 2x2 table and selection-conditional (p_sel, q_sel).
113
-
114
- The variance calculation follows the manuscript algorithm: after obtaining the
115
- ridge-corrected table T^(λ) = \tilde T (K^T + λI)^(-1), propagate uncertainty in
116
- (p_sel, q_sel) using the ridge-aware gradient g_p, g_q derived from the corrected
117
- inverse rather than a reduced J-only approximation.
118
- """
119
- Ttil = np.asarray(tilde_counts, dtype=float)
120
- if Ttil.shape != (2, 2):
121
- raise ValueError("tilde_counts must be 2x2.")
122
-
123
- if not (0.0 <= p_sel <= 1.0 and 0.0 <= q_sel <= 1.0):
124
- raise ValueError("p_sel and q_sel must be in [0,1].")
125
- if n_val <= 1:
126
- raise ValueError("n_val must be > 1.")
127
- if lambda_start <= 0.0:
128
- raise ValueError("lambda_start must be positive.")
129
- if lambda_max < lambda_start:
130
- raise ValueError("lambda_max must be >= lambda_start.")
131
- if step <= 1.0:
132
- raise ValueError("step must be > 1.0 for a multiplicative ridge path.")
133
-
134
- # Misclassification matrix K.
135
- K = np.array(
136
- [[p_sel, 1.0 - p_sel],
137
- [1.0 - q_sel, q_sel]],
138
- dtype=float,
139
- )
140
-
141
- lam = float(lambda_start)
142
- converged = False
143
- counts = None
144
- A_inv = None # A = (K^T + λI)^(-1) in the manuscript notation.
145
-
146
- while lam <= lambda_max:
147
- M_lam = K.T + lam * np.eye(2)
148
- try:
149
- A_inv = np.linalg.inv(M_lam)
150
- M = Ttil @ A_inv
151
- except np.linalg.LinAlgError:
152
- lam *= step
153
- continue
154
-
155
- if np.all(M >= eps) and np.all(np.isfinite(M)):
156
- counts = M
157
- converged = True
158
- break
159
- lam *= step
160
-
161
- if not converged or counts is None or A_inv is None:
162
- return LambdaORResult(
163
- log_or=float("nan"),
164
- neglog10_p=float("nan"),
165
- z=float("nan"),
166
- se=float("nan"),
167
- counts=np.full((2, 2), np.nan),
168
- lambda_used=float("nan"),
169
- converged=False,
170
- meta={
171
- "message": "Ridge path failed to find feasible counts.",
172
- "lambda_max": lambda_max,
173
- "p_sel": p_sel,
174
- "q_sel": q_sel,
175
- },
176
- )
177
-
178
- a, b = counts[0, 0], counts[0, 1]
179
- c, d = counts[1, 0], counts[1, 1]
180
-
181
- # Corrected log odds ratio.
182
- log_or = log((a * d) / (b * c))
183
-
184
- # Base variance.
185
- var_base = 1.0 / a + 1.0 / b + 1.0 / c + 1.0 / d
186
-
187
- # Ridge-aware delta-method gradient from the manuscript algorithm.
188
- # Here A_inv = (K^T + λI)^(-1), with entries A_ij in the paper's notation.
189
- A11, A12 = float(A_inv[0, 0]), float(A_inv[0, 1])
190
- A21, A22 = float(A_inv[1, 0]), float(A_inv[1, 1])
191
-
192
- g_p = (a - b) * (A12 / b - A11 / a) + (c - d) * (A11 / c - A12 / d)
193
- g_q = (a - b) * (A21 / a - A22 / b) + (c - d) * (A22 / d - A21 / c)
194
-
195
- var_p = p_sel * (1.0 - p_sel) / float(n_val)
196
- var_q = q_sel * (1.0 - q_sel) / float(n_val)
197
- var_extra = (g_p * g_p) * var_p + (g_q * g_q) * var_q
198
-
199
- se = sqrt(var_base + var_extra)
200
- z = log_or / se if se > 0 else float("inf")
201
- z_abs = abs(z)
202
- neglog10_p = _neglog10_p_from_z(z_abs)
203
-
204
- meta = {
205
- "var_base": var_base,
206
- "var_extra": var_extra,
207
- "var_p": var_p,
208
- "var_q": var_q,
209
- "g_p": g_p,
210
- "g_q": g_q,
211
- "K": K,
212
- "A_inv": A_inv,
213
- "J": p_sel + q_sel - 1.0,
214
- "lambda_path_start": lambda_start,
215
- "lambda_used": lam,
216
- "step": step,
217
- "eps": eps,
218
- "p_sel": p_sel,
219
- "q_sel": q_sel,
220
- "n_val": n_val,
221
- }
222
-
223
- return LambdaORResult(
224
- log_or=log_or,
225
- neglog10_p=neglog10_p,
226
- z=z,
227
- se=se,
228
- counts=counts,
229
- lambda_used=lam,
230
- converged=converged,
231
- meta=meta,
232
- )
233
-
234
-
235
- __all__ = ["lambda_or", "pq_from_two_gates", "LambdaORResult"]
@@ -1 +0,0 @@
1
- __version__ = '0.0.8'
File without changes
File without changes
File without changes
File without changes
File without changes