lambda-or 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lambda_or
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: Lambda Odds Ratio
5
5
  Home-page: https://github.com/zeroknowledgediscovery/lambda_or
6
- Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.3.tar.gz
6
+ Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.5.tar.gz
7
7
  Author: Ishanu Chattopadhyay
8
8
  Author-email: zeroknowledgediscovery@gmail.com
9
9
  License: LICENSE
@@ -24,6 +24,10 @@ License-File: LICENSE
24
24
  Lambda-OR
25
25
  ===============
26
26
 
27
+ .. image:: https://zenodo.org/badge/1063683292.svg
28
+ :target: https://doi.org/10.5281/zenodo.17196710
29
+
30
+
27
31
  .. class:: no-web no-pdf
28
32
 
29
33
  :Info: Draft link will be posted here
@@ -2,6 +2,10 @@
2
2
  Lambda-OR
3
3
  ===============
4
4
 
5
+ .. image:: https://zenodo.org/badge/1063683292.svg
6
+ :target: https://doi.org/10.5281/zenodo.17196710
7
+
8
+
5
9
  .. class:: no-web no-pdf
6
10
 
7
11
  :Info: Draft link will be posted here
@@ -0,0 +1,49 @@
1
+ from lambda_or import lambda_or
2
+ import numpy as np
3
+ from math import erfc, sqrt, log10, pi
4
+
5
+ def neglog10_p_from_z(z_abs):
6
+ if z_abs <= 7.0:
7
+ p_two = max(min(erfc(z_abs / sqrt(2.0)), 1.0), 1e-323)
8
+ return -log10(p_two)
9
+ return (z_abs * z_abs) / (2.0 * np.log(10.0)) + np.log10(np.sqrt(2.0 * np.pi) * z_abs) - np.log10(2.0)
10
+
11
+ # rows = X in {1,0}
12
+ # cols = Y~ in {1,0}
13
+ tilde_counts = np.array([
14
+ [20, 30],
15
+ [10, 240]
16
+ ], dtype=float)
17
+
18
+ # naive OR
19
+ a, b = tilde_counts[0]
20
+ c, d = tilde_counts[1]
21
+
22
+ naive_or = (a * d) / (b * c)
23
+ naive_log_or = np.log(naive_or)
24
+ naive_se = np.sqrt(1/a + 1/b + 1/c + 1/d)
25
+ naive_z = naive_log_or / naive_se
26
+ naive_neglog10_p = neglog10_p_from_z(abs(naive_z))
27
+
28
+ # lambda-OR
29
+ res = lambda_or(
30
+ tilde_counts=tilde_counts,
31
+ p_sel=0.92,
32
+ q_sel=0.88,
33
+ n_val=1000
34
+ )
35
+
36
+ print("Naive OR:", naive_or)
37
+ print("Naive log OR:", naive_log_or)
38
+ print("Naive z:", naive_z)
39
+ print("Naive -log10(p):", naive_neglog10_p)
40
+
41
+ print("\nLambda-OR:", np.exp(res.log_or))
42
+ print("Log Lambda-OR:", res.log_or)
43
+ print("SE:", res.se)
44
+ print("z:", res.z)
45
+ print("Lambda -log10(p):", res.neglog10_p)
46
+ print("lambda used:", res.lambda_used)
47
+
48
+ print("\nCorrected contingency table:")
49
+ print(res.counts)
@@ -0,0 +1,235 @@
1
+ """
2
+ lambda_or.py — λ-OR (Lambda Odds Ratio) for misclassification-corrected attribution.
3
+
4
+ This module implements a ridge-stabilized inversion of a misclassified 2x2 contingency
5
+ table to recover corrected counts and compute the corrected log-odds ratio, its
6
+ variance (including uncertainty from selection-conditional sensitivity/specificity),
7
+ a Wald z-statistic, and a -log10 p-value with a large-|z| tail approximation.
8
+
9
+ Core API
10
+ --------
11
+ lambda_or(tilde_counts, p_sel, q_sel, n_val, lambda_start=1e-6, lambda_max=1e6,
12
+ step=10.0, eps=1e-9) -> LambdaORResult
13
+
14
+ Helpers
15
+ -------
16
+ pq_from_two_gates(pi_H1, pi_L1, pi_H0, pi_L0) -> (p_sel, q_sel)
17
+
18
+ Notation
19
+ --------
20
+ - tilde_counts: observed (misclassified) 2x2 counts as [[ã, b̃],[ĉ, d̃]]
21
+ - p_sel = P(tilde Y=1 | Y=1, selected), q_sel = P(tilde Y=0 | Y=0, selected)
22
+ - n_val: size of validation cohort used to estimate (p_sel, q_sel)
23
+
24
+ References
25
+ ----------
26
+ See manuscript text for derivations and delta-method variance.
27
+ """
28
+
29
+ from __future__ import annotations
30
+ from dataclasses import dataclass
31
+ from math import log, sqrt, erfc, pi, log10
32
+ from typing import Tuple, Dict, Any
33
+ import numpy as np
34
+
35
+
36
+ @dataclass
37
+ class LambdaORResult:
38
+ log_or: float
39
+ neglog10_p: float
40
+ z: float
41
+ se: float
42
+ counts: np.ndarray # corrected 2x2 matrix [[a, b], [c, d]]
43
+ lambda_used: float
44
+ converged: bool
45
+ meta: Dict[str, Any]
46
+
47
+
48
+ def _normal_two_sided_p(z_abs: float) -> float:
49
+ """Two-sided p-value using the complementary error function."""
50
+ return erfc(z_abs / sqrt(2.0))
51
+
52
+
53
+ def _neglog10_p_from_z(z_abs: float) -> float:
54
+ """
55
+ Return -log10(two-sided p) from |z|.
56
+
57
+ For moderate |z|, use erfc directly. For very large |z|, use the Mills-ratio
58
+ approximation for the two-sided normal tail:
59
+
60
+ p ≈ 2 * phi(z) / z
61
+
62
+ which implies
63
+
64
+ -log10 p ≈ z^2 / (2 ln 10) + log10(z) + 0.5 log10(2π) - log10(2).
65
+ """
66
+ if z_abs <= 7.0:
67
+ p_two = max(min(_normal_two_sided_p(z_abs), 1.0), 1e-323)
68
+ return -log10(p_two)
69
+
70
+ return (
71
+ (z_abs * z_abs) / (2.0 * np.log(10.0))
72
+ + log10(sqrt(2.0 * pi) * z_abs)
73
+ - log10(2.0)
74
+ )
75
+
76
+
77
+ def pq_from_two_gates(pi_H1: float, pi_L1: float, pi_H0: float, pi_L0: float) -> Tuple[float, float]:
78
+ """
79
+ Compute selection-conditional sensitivity/specificity from two-gate ROC probabilities.
80
+
81
+ Parameters
82
+ ----------
83
+ pi_H1 : float
84
+ P(S in high-specificity gate | Y=1)
85
+ pi_L1 : float
86
+ P(S in low-score gate | Y=1)
87
+ pi_H0 : float
88
+ P(S in high-specificity gate | Y=0)
89
+ pi_L0 : float
90
+ P(S in low-score gate | Y=0)
91
+ """
92
+ denom1 = pi_H1 + pi_L1
93
+ denom0 = pi_H0 + pi_L0
94
+ if denom1 <= 0 or denom0 <= 0:
95
+ raise ValueError("Invalid gate probabilities: denominators must be positive.")
96
+ p_sel = pi_H1 / denom1
97
+ q_sel = pi_L0 / denom0
98
+ return p_sel, q_sel
99
+
100
+
101
+ def lambda_or(
102
+ tilde_counts: np.ndarray,
103
+ p_sel: float,
104
+ q_sel: float,
105
+ n_val: int,
106
+ lambda_start: float = 1e-6,
107
+ lambda_max: float = 1e6,
108
+ step: float = 10.0,
109
+ eps: float = 1e-9,
110
+ ) -> LambdaORResult:
111
+ """
112
+ Compute λ-OR given a misclassified 2x2 table and selection-conditional (p_sel, q_sel).
113
+
114
+ The variance calculation follows the manuscript algorithm: after obtaining the
115
+ ridge-corrected table T^(λ) = \tilde T (K^T + λI)^(-1), propagate uncertainty in
116
+ (p_sel, q_sel) using the ridge-aware gradient g_p, g_q derived from the corrected
117
+ inverse rather than a reduced J-only approximation.
118
+ """
119
+ Ttil = np.asarray(tilde_counts, dtype=float)
120
+ if Ttil.shape != (2, 2):
121
+ raise ValueError("tilde_counts must be 2x2.")
122
+
123
+ if not (0.0 <= p_sel <= 1.0 and 0.0 <= q_sel <= 1.0):
124
+ raise ValueError("p_sel and q_sel must be in [0,1].")
125
+ if n_val <= 1:
126
+ raise ValueError("n_val must be > 1.")
127
+ if lambda_start <= 0.0:
128
+ raise ValueError("lambda_start must be positive.")
129
+ if lambda_max < lambda_start:
130
+ raise ValueError("lambda_max must be >= lambda_start.")
131
+ if step <= 1.0:
132
+ raise ValueError("step must be > 1.0 for a multiplicative ridge path.")
133
+
134
+ # Misclassification matrix K.
135
+ K = np.array(
136
+ [[p_sel, 1.0 - p_sel],
137
+ [1.0 - q_sel, q_sel]],
138
+ dtype=float,
139
+ )
140
+
141
+ lam = float(lambda_start)
142
+ converged = False
143
+ counts = None
144
+ A_inv = None # A = (K^T + λI)^(-1) in the manuscript notation.
145
+
146
+ while lam <= lambda_max:
147
+ M_lam = K.T + lam * np.eye(2)
148
+ try:
149
+ A_inv = np.linalg.inv(M_lam)
150
+ M = Ttil @ A_inv
151
+ except np.linalg.LinAlgError:
152
+ lam *= step
153
+ continue
154
+
155
+ if np.all(M >= eps) and np.all(np.isfinite(M)):
156
+ counts = M
157
+ converged = True
158
+ break
159
+ lam *= step
160
+
161
+ if not converged or counts is None or A_inv is None:
162
+ return LambdaORResult(
163
+ log_or=float("nan"),
164
+ neglog10_p=float("nan"),
165
+ z=float("nan"),
166
+ se=float("nan"),
167
+ counts=np.full((2, 2), np.nan),
168
+ lambda_used=float("nan"),
169
+ converged=False,
170
+ meta={
171
+ "message": "Ridge path failed to find feasible counts.",
172
+ "lambda_max": lambda_max,
173
+ "p_sel": p_sel,
174
+ "q_sel": q_sel,
175
+ },
176
+ )
177
+
178
+ a, b = counts[0, 0], counts[0, 1]
179
+ c, d = counts[1, 0], counts[1, 1]
180
+
181
+ # Corrected log odds ratio.
182
+ log_or = log((a * d) / (b * c))
183
+
184
+ # Base variance.
185
+ var_base = 1.0 / a + 1.0 / b + 1.0 / c + 1.0 / d
186
+
187
+ # Ridge-aware delta-method gradient from the manuscript algorithm.
188
+ # Here A_inv = (K^T + λI)^(-1), with entries A_ij in the paper's notation.
189
+ A11, A12 = float(A_inv[0, 0]), float(A_inv[0, 1])
190
+ A21, A22 = float(A_inv[1, 0]), float(A_inv[1, 1])
191
+
192
+ g_p = (a - b) * (A12 / b - A11 / a) + (c - d) * (A11 / c - A12 / d)
193
+ g_q = (a - b) * (A21 / a - A22 / b) + (c - d) * (A22 / d - A21 / c)
194
+
195
+ var_p = p_sel * (1.0 - p_sel) / float(n_val)
196
+ var_q = q_sel * (1.0 - q_sel) / float(n_val)
197
+ var_extra = (g_p * g_p) * var_p + (g_q * g_q) * var_q
198
+
199
+ se = sqrt(var_base + var_extra)
200
+ z = log_or / se if se > 0 else float("inf")
201
+ z_abs = abs(z)
202
+ neglog10_p = _neglog10_p_from_z(z_abs)
203
+
204
+ meta = {
205
+ "var_base": var_base,
206
+ "var_extra": var_extra,
207
+ "var_p": var_p,
208
+ "var_q": var_q,
209
+ "g_p": g_p,
210
+ "g_q": g_q,
211
+ "K": K,
212
+ "A_inv": A_inv,
213
+ "J": p_sel + q_sel - 1.0,
214
+ "lambda_path_start": lambda_start,
215
+ "lambda_used": lam,
216
+ "step": step,
217
+ "eps": eps,
218
+ "p_sel": p_sel,
219
+ "q_sel": q_sel,
220
+ "n_val": n_val,
221
+ }
222
+
223
+ return LambdaORResult(
224
+ log_or=log_or,
225
+ neglog10_p=neglog10_p,
226
+ z=z,
227
+ se=se,
228
+ counts=counts,
229
+ lambda_used=lam,
230
+ converged=converged,
231
+ meta=meta,
232
+ )
233
+
234
+
235
+ __all__ = ["lambda_or", "pq_from_two_gates", "LambdaORResult"]
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lambda-or
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: Lambda Odds Ratio
5
5
  Home-page: https://github.com/zeroknowledgediscovery/lambda_or
6
- Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.3.tar.gz
6
+ Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.5.tar.gz
7
7
  Author: Ishanu Chattopadhyay
8
8
  Author-email: zeroknowledgediscovery@gmail.com
9
9
  License: LICENSE
@@ -24,6 +24,10 @@ License-File: LICENSE
24
24
  Lambda-OR
25
25
  ===============
26
26
 
27
+ .. image:: https://zenodo.org/badge/1063683292.svg
28
+ :target: https://doi.org/10.5281/zenodo.17196710
29
+
30
+
27
31
  .. class:: no-web no-pdf
28
32
 
29
33
  :Info: Draft link will be posted here
@@ -4,6 +4,7 @@ README.rst
4
4
  setup.py
5
5
  version.py
6
6
  lambda_or/__init__.py
7
+ lambda_or/example.py
7
8
  lambda_or/lambda_or.py
8
9
  lambda_or.egg-info/PKG-INFO
9
10
  lambda_or.egg-info/SOURCES.txt
@@ -0,0 +1 @@
1
+ __version__ = '0.0.5'
@@ -1,213 +0,0 @@
1
- """
2
- lambda_or.py — Λ-OR (Lambda Odds Ratio) for misclassification-corrected attribution.
3
-
4
- This module implements a ridge-stabilized inversion of a misclassified 2x2 contingency
5
- table to recover corrected counts and compute the corrected log-odds ratio, its
6
- variance (including uncertainty from selection-conditional sensitivity/specificity),
7
- a Wald z-statistic, and a -log10 p-value with a large-|z| tail approximation.
8
-
9
- Core API
10
- --------
11
- lambda_or(tilde_counts, p_sel, q_sel, n_val, lambda_start=1e-6, lambda_max=1e6,
12
- step=10.0, eps=1e-9) -> dict
13
-
14
- Helpers
15
- -------
16
- pq_from_two_gates(pi_H1, pi_L1, pi_H0, pi_L0) -> (p_sel, q_sel)
17
-
18
- Notation
19
- --------
20
- - tilde_counts: observed (misclassified) 2x2 counts as [[ã, b̃],[ĉ, d̃]]
21
- - p_sel = P(tilde Y=1 | Y=1, selected), q_sel = P(tilde Y=0 | Y=0, selected)
22
- - n_val: size of validation cohort used to estimate (p_sel, q_sel)
23
-
24
- References
25
- ----------
26
- See manuscript text for derivations and delta-method variance.
27
- """
28
-
29
- from __future__ import annotations
30
- from dataclasses import dataclass
31
- from math import log, sqrt, isfinite, erfc, pi, log10
32
- from typing import Tuple, Dict, Any, Optional
33
- import numpy as np
34
-
35
-
36
- @dataclass
37
- class LambdaORResult:
38
- log_or: float
39
- neglog10_p: float
40
- z: float
41
- se: float
42
- counts: np.ndarray # corrected 2x2 matrix [ [a,b], [c,d] ]
43
- lambda_used: float
44
- converged: bool
45
- meta: Dict[str, Any]
46
-
47
-
48
- def _normal_two_sided_p(z_abs: float) -> float:
49
- """Two-sided p-value using complementary error function (numerically stable)."""
50
- # sf(|z|) = 0.5 * erfc(|z|/sqrt(2))
51
- return 2.0 * 0.5 * erfc(z_abs / sqrt(2.0))
52
-
53
-
54
- def pq_from_two_gates(pi_H1: float, pi_L1: float, pi_H0: float, pi_L0: float) -> Tuple[float, float]:
55
- """
56
- Compute selection-conditional sensitivity/specificity from two-gate ROC probabilities:
57
- - pi_H1 = P(S in high-specificity gate | Y=1)
58
- - pi_L1 = P(S in low-specificity gate | Y=1)
59
- - pi_H0 = P(S in high-specificity gate | Y=0)
60
- - pi_L0 = P(S in low-specificity gate | Y=0)
61
- Returns (p_sel, q_sel).
62
- """
63
- denom1 = pi_H1 + pi_L1
64
- denom0 = pi_H0 + pi_L0
65
- if denom1 <= 0 or denom0 <= 0:
66
- raise ValueError("Invalid gate probabilities: denominators must be positive.")
67
- p_sel = pi_H1 / denom1
68
- q_sel = pi_L0 / denom0
69
- return p_sel, q_sel
70
-
71
-
72
- def lambda_or(tilde_counts: np.ndarray,
73
- p_sel: float,
74
- q_sel: float,
75
- n_val: int,
76
- lambda_start: float = 1e-6,
77
- lambda_max: float = 1e6,
78
- step: float = 10.0,
79
- eps: float = 1e-9) -> LambdaORResult:
80
- """
81
- Compute Λ-OR given a misclassified 2x2 table and selection-conditional (p_sel, q_sel).
82
-
83
- Parameters
84
- ----------
85
- tilde_counts : array-like shape (2,2)
86
- Observed (misclassified) counts [[ã, b̃],[ĉ, d̃]].
87
- p_sel, q_sel : float
88
- Selection-conditional sensitivity and specificity (from ROC gates).
89
- n_val : int
90
- Validation cohort size used to estimate (p_sel, q_sel) for variance propagation.
91
- lambda_start, lambda_max : float
92
- Ridge path start and upper bound.
93
- step : float
94
- Multiplicative factor for ridge path (e.g., 10.0).
95
- eps : float
96
- Numerical/feasibility floor for corrected counts.
97
-
98
- Returns
99
- -------
100
- LambdaORResult
101
- Structured result with fields (log_or, neglog10_p, z, se, counts, lambda_used, converged, meta).
102
- """
103
- Ttil = np.asarray(tilde_counts, dtype=float)
104
- if Ttil.shape != (2,2):
105
- raise ValueError("tilde_counts must be 2x2.")
106
-
107
- if not (0.0 <= p_sel <= 1.0 and 0.0 <= q_sel <= 1.0):
108
- raise ValueError("p_sel and q_sel must be in [0,1].")
109
- if n_val <= 1:
110
- raise ValueError("n_val must be > 1.")
111
-
112
- # Misclassification matrix A
113
- A = np.array([[p_sel, 1.0 - p_sel],
114
- [1.0 - q_sel, q_sel]], dtype=float)
115
-
116
- lam = float(lambda_start)
117
- converged = False
118
- counts = None
119
-
120
- while lam <= lambda_max:
121
- A_lam = A + lam * np.eye(2)
122
- try:
123
- # counts = tilde T * (A_lam^{-T})
124
- inv_AT = np.linalg.inv(A_lam).T
125
- M = Ttil @ inv_AT
126
- except np.linalg.LinAlgError:
127
- lam *= step
128
- continue
129
-
130
- if np.all(M > eps) and np.all(np.isfinite(M)):
131
- counts = M
132
- converged = True
133
- break
134
- lam *= step
135
-
136
- if not converged:
137
- return LambdaORResult(
138
- log_or=float('nan'),
139
- neglog10_p=float('nan'),
140
- z=float('nan'),
141
- se=float('nan'),
142
- counts=np.full((2,2), np.nan),
143
- lambda_used=float('nan'),
144
- converged=False,
145
- meta={
146
- "message": "Ridge path failed to find feasible counts.",
147
- "lambda_max": lambda_max,
148
- "p_sel": p_sel,
149
- "q_sel": q_sel
150
- }
151
- )
152
-
153
- a, b = counts[0,0], counts[0,1]
154
- c, d = counts[1,0], counts[1,1]
155
- # Corrected log odds ratio
156
- log_or = log((a * d) / (b * c))
157
-
158
- # Base variance
159
- var_base = 1.0/a + 1.0/b + 1.0/c + 1.0/d
160
-
161
- # Delta-method extra variance from (p_sel, q_sel)
162
- J = p_sel + q_sel - 1.0
163
- # Protect against divide by zero; if J ~ 0, variance will be huge anyway
164
- denom = max(J*J, 1e-18)
165
- ad = a * d
166
- bc = b * c
167
- dlog_dp = ((1.0 - q_sel) * (ad - bc)) / (denom * ad)
168
- dlog_dq = ((1.0 - p_sel) * (bc - ad)) / (denom * ad)
169
-
170
- var_p = p_sel * (1.0 - p_sel) / float(n_val)
171
- var_q = q_sel * (1.0 - q_sel) / float(n_val)
172
- var_extra = (dlog_dp ** 2) * var_p + (dlog_dq ** 2) * var_q
173
-
174
- se = sqrt(var_base + var_extra)
175
- z = log_or / se if se > 0 else float('inf')
176
-
177
- # Two-sided p-value and -log10 p with asymptotic tail
178
- z_abs = abs(z)
179
- if z_abs <= 7.0:
180
- p_two = _normal_two_sided_p(z_abs)
181
- # Guard for underflow
182
- p_two = max(min(p_two, 1.0), 1e-323)
183
- neglog10_p = -log10(p_two)
184
- else:
185
- # Asymptotic: -log10 p ≈ z^2/(2 ln 10) - log10( sqrt(2π)*|z| )
186
- neglog10_p = (z_abs*z_abs) / (2.0 * np.log(10.0)) - log10((2.0*pi)**0.5 * z_abs)
187
-
188
- meta = {
189
- "var_base": var_base,
190
- "var_extra": var_extra,
191
- "J": J,
192
- "lambda_path_start": lambda_start,
193
- "lambda_used": lam,
194
- "step": step,
195
- "eps": eps,
196
- "p_sel": p_sel,
197
- "q_sel": q_sel,
198
- "n_val": n_val
199
- }
200
-
201
- return LambdaORResult(
202
- log_or=log_or,
203
- neglog10_p=neglog10_p,
204
- z=z,
205
- se=se,
206
- counts=counts,
207
- lambda_used=lam,
208
- converged=converged,
209
- meta=meta
210
- )
211
-
212
-
213
- __all__ = ["lambda_or", "pq_from_two_gates", "LambdaORResult"]
@@ -1 +0,0 @@
1
- __version__ = '0.0.3'
File without changes
File without changes
File without changes
File without changes