lambda-or 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lambda_or/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .lambda_or import lambda_or, pq_from_two_gates, LambdaORResult
2
+ __all__ = ["lambda_or", "pq_from_two_gates", "LambdaORResult"]
3
+ __version__ = "0.1.0"
4
+
lambda_or/lambda_or.py ADDED
@@ -0,0 +1,213 @@
1
+ """
2
+ lambda_or.py — Λ-OR (Lambda Odds Ratio) for misclassification-corrected attribution.
3
+
4
+ This module implements a ridge-stabilized inversion of a misclassified 2x2 contingency
5
+ table to recover corrected counts and compute the corrected log-odds ratio, its
6
+ variance (including uncertainty from selection-conditional sensitivity/specificity),
7
+ a Wald z-statistic, and a -log10 p-value with a large-|z| tail approximation.
8
+
9
+ Core API
10
+ --------
11
+ lambda_or(tilde_counts, p_sel, q_sel, n_val, lambda_start=1e-6, lambda_max=1e6,
12
+ step=10.0, eps=1e-9) -> dict
13
+
14
+ Helpers
15
+ -------
16
+ pq_from_two_gates(pi_H1, pi_L1, pi_H0, pi_L0) -> (p_sel, q_sel)
17
+
18
+ Notation
19
+ --------
20
+ - tilde_counts: observed (misclassified) 2x2 counts as [[ã, b̃],[ĉ, d̃]]
21
+ - p_sel = P(tilde Y=1 | Y=1, selected), q_sel = P(tilde Y=0 | Y=0, selected)
22
+ - n_val: size of validation cohort used to estimate (p_sel, q_sel)
23
+
24
+ References
25
+ ----------
26
+ See manuscript text for derivations and delta-method variance.
27
+ """
28
+
29
+ from __future__ import annotations
30
+ from dataclasses import dataclass
31
+ from math import log, sqrt, isfinite, erfc, pi, log10
32
+ from typing import Tuple, Dict, Any, Optional
33
+ import numpy as np
34
+
35
+
36
+ @dataclass
37
+ class LambdaORResult:
38
+ log_or: float
39
+ neglog10_p: float
40
+ z: float
41
+ se: float
42
+ counts: np.ndarray # corrected 2x2 matrix [ [a,b], [c,d] ]
43
+ lambda_used: float
44
+ converged: bool
45
+ meta: Dict[str, Any]
46
+
47
+
48
+ def _normal_two_sided_p(z_abs: float) -> float:
49
+ """Two-sided p-value using complementary error function (numerically stable)."""
50
+ # sf(|z|) = 0.5 * erfc(|z|/sqrt(2))
51
+ return 2.0 * 0.5 * erfc(z_abs / sqrt(2.0))
52
+
53
+
54
+ def pq_from_two_gates(pi_H1: float, pi_L1: float, pi_H0: float, pi_L0: float) -> Tuple[float, float]:
55
+ """
56
+ Compute selection-conditional sensitivity/specificity from two-gate ROC probabilities:
57
+ - pi_H1 = P(S in high-specificity gate | Y=1)
58
+ - pi_L1 = P(S in low-specificity gate | Y=1)
59
+ - pi_H0 = P(S in high-specificity gate | Y=0)
60
+ - pi_L0 = P(S in low-specificity gate | Y=0)
61
+ Returns (p_sel, q_sel).
62
+ """
63
+ denom1 = pi_H1 + pi_L1
64
+ denom0 = pi_H0 + pi_L0
65
+ if denom1 <= 0 or denom0 <= 0:
66
+ raise ValueError("Invalid gate probabilities: denominators must be positive.")
67
+ p_sel = pi_H1 / denom1
68
+ q_sel = pi_L0 / denom0
69
+ return p_sel, q_sel
70
+
71
+
72
+ def lambda_or(tilde_counts: np.ndarray,
73
+ p_sel: float,
74
+ q_sel: float,
75
+ n_val: int,
76
+ lambda_start: float = 1e-6,
77
+ lambda_max: float = 1e6,
78
+ step: float = 10.0,
79
+ eps: float = 1e-9) -> LambdaORResult:
80
+ """
81
+ Compute Λ-OR given a misclassified 2x2 table and selection-conditional (p_sel, q_sel).
82
+
83
+ Parameters
84
+ ----------
85
+ tilde_counts : array-like shape (2,2)
86
+ Observed (misclassified) counts [[ã, b̃],[ĉ, d̃]].
87
+ p_sel, q_sel : float
88
+ Selection-conditional sensitivity and specificity (from ROC gates).
89
+ n_val : int
90
+ Validation cohort size used to estimate (p_sel, q_sel) for variance propagation.
91
+ lambda_start, lambda_max : float
92
+ Ridge path start and upper bound.
93
+ step : float
94
+ Multiplicative factor for ridge path (e.g., 10.0).
95
+ eps : float
96
+ Numerical/feasibility floor for corrected counts.
97
+
98
+ Returns
99
+ -------
100
+ LambdaORResult
101
+ Structured result with fields (log_or, neglog10_p, z, se, counts, lambda_used, converged, meta).
102
+ """
103
+ Ttil = np.asarray(tilde_counts, dtype=float)
104
+ if Ttil.shape != (2,2):
105
+ raise ValueError("tilde_counts must be 2x2.")
106
+
107
+ if not (0.0 <= p_sel <= 1.0 and 0.0 <= q_sel <= 1.0):
108
+ raise ValueError("p_sel and q_sel must be in [0,1].")
109
+ if n_val <= 1:
110
+ raise ValueError("n_val must be > 1.")
111
+
112
+ # Misclassification matrix A
113
+ A = np.array([[p_sel, 1.0 - p_sel],
114
+ [1.0 - q_sel, q_sel]], dtype=float)
115
+
116
+ lam = float(lambda_start)
117
+ converged = False
118
+ counts = None
119
+
120
+ while lam <= lambda_max:
121
+ A_lam = A + lam * np.eye(2)
122
+ try:
123
+ # counts = tilde T * (A_lam^{-T})
124
+ inv_AT = np.linalg.inv(A_lam).T
125
+ M = Ttil @ inv_AT
126
+ except np.linalg.LinAlgError:
127
+ lam *= step
128
+ continue
129
+
130
+ if np.all(M > eps) and np.all(np.isfinite(M)):
131
+ counts = M
132
+ converged = True
133
+ break
134
+ lam *= step
135
+
136
+ if not converged:
137
+ return LambdaORResult(
138
+ log_or=float('nan'),
139
+ neglog10_p=float('nan'),
140
+ z=float('nan'),
141
+ se=float('nan'),
142
+ counts=np.full((2,2), np.nan),
143
+ lambda_used=float('nan'),
144
+ converged=False,
145
+ meta={
146
+ "message": "Ridge path failed to find feasible counts.",
147
+ "lambda_max": lambda_max,
148
+ "p_sel": p_sel,
149
+ "q_sel": q_sel
150
+ }
151
+ )
152
+
153
+ a, b = counts[0,0], counts[0,1]
154
+ c, d = counts[1,0], counts[1,1]
155
+ # Corrected log odds ratio
156
+ log_or = log((a * d) / (b * c))
157
+
158
+ # Base variance
159
+ var_base = 1.0/a + 1.0/b + 1.0/c + 1.0/d
160
+
161
+ # Delta-method extra variance from (p_sel, q_sel)
162
+ J = p_sel + q_sel - 1.0
163
+ # Protect against divide by zero; if J ~ 0, variance will be huge anyway
164
+ denom = max(J*J, 1e-18)
165
+ ad = a * d
166
+ bc = b * c
167
+ dlog_dp = ((1.0 - q_sel) * (ad - bc)) / (denom * ad)
168
+ dlog_dq = ((1.0 - p_sel) * (bc - ad)) / (denom * ad)
169
+
170
+ var_p = p_sel * (1.0 - p_sel) / float(n_val)
171
+ var_q = q_sel * (1.0 - q_sel) / float(n_val)
172
+ var_extra = (dlog_dp ** 2) * var_p + (dlog_dq ** 2) * var_q
173
+
174
+ se = sqrt(var_base + var_extra)
175
+ z = log_or / se if se > 0 else float('inf')
176
+
177
+ # Two-sided p-value and -log10 p with asymptotic tail
178
+ z_abs = abs(z)
179
+ if z_abs <= 7.0:
180
+ p_two = _normal_two_sided_p(z_abs)
181
+ # Guard for underflow
182
+ p_two = max(min(p_two, 1.0), 1e-323)
183
+ neglog10_p = -log10(p_two)
184
+ else:
185
+ # Asymptotic: -log10 p ≈ z^2/(2 ln 10) - log10( sqrt(2π)*|z| )
186
+ neglog10_p = (z_abs*z_abs) / (2.0 * np.log(10.0)) - log10((2.0*pi)**0.5 * z_abs)
187
+
188
+ meta = {
189
+ "var_base": var_base,
190
+ "var_extra": var_extra,
191
+ "J": J,
192
+ "lambda_path_start": lambda_start,
193
+ "lambda_used": lam,
194
+ "step": step,
195
+ "eps": eps,
196
+ "p_sel": p_sel,
197
+ "q_sel": q_sel,
198
+ "n_val": n_val
199
+ }
200
+
201
+ return LambdaORResult(
202
+ log_or=log_or,
203
+ neglog10_p=neglog10_p,
204
+ z=z,
205
+ se=se,
206
+ counts=counts,
207
+ lambda_used=lam,
208
+ converged=converged,
209
+ meta=meta
210
+ )
211
+
212
+
213
+ __all__ = ["lambda_or", "pq_from_two_gates", "LambdaORResult"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 ZeD Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.1
2
+ Name: lambda-or
3
+ Version: 0.0.2
4
+ Summary: Lambda Odds Ratio
5
+ Home-page: https://github.com/zeroknowledgediscovery/lambda_or
6
+ Download-URL: https://github.com/zeroknowledgediscovery/lambda_or/archive/0.0.2.tar.gz
7
+ Author: Ishanu Chattopadhyay
8
+ Author-email: zeroknowledgediscovery@gmail.com
9
+ License: LICENSE
10
+ Keywords: robust odds ratio,misclassification correction,machine learning,EHR databases
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Requires-Python: >=3.6
20
+ Description-Content-Type: text/x-rst
21
+ License-File: LICENSE
22
+ Requires-Dist: scikit-learn
23
+ Requires-Dist: scipy
24
+ Requires-Dist: numpy
25
+ Requires-Dist: pandas
26
+
27
+ ===============
28
+ Qbiome
29
+ ===============
30
+
31
+ .. class:: no-web no-pdf
32
+
33
+ :Info: Draft link will be posted here
34
+ :Author: ZeD Lab <zed.createuky.net>
35
+ :Description: Robust Odds Ratio correcting label noise
36
+ :Documentation:
37
+
38
+
39
+ **Usage:**
40
+
41
+ .. code-block::
42
+
43
+ from lambda_or import lambda_or, pq_from_two_gates
44
+
45
+ tilde = np.array([[100, 50],[ 80, 70]], dtype=float)
46
+ # Suppose selection-conditional rates from two-gate ROC:
47
+ p_sel, q_sel = 0.92, 0.90
48
+ res = lambda_or(tilde, p_sel, q_sel, n_val=2000)
49
+
50
+ print(res.log_or, res.neglog10_p, res.z, res.se)
51
+ print(res.counts) # corrected a,b,c,d
52
+
@@ -0,0 +1,7 @@
1
+ lambda_or/__init__.py,sha256=pwS8ViU8b530DoT4JRxpMkzsZtcPyeKfwkDEwJgdAV8,154
2
+ lambda_or/lambda_or.py,sha256=49eI7_bM40vfLoT1n-niO0TD05q9ge3bwyQV8dl_9ds,6659
3
+ lambda_or-0.0.2.dist-info/LICENSE,sha256=KeJJ1GzaC0b5EuE7XmauPozraQDwWxm4oC5AtPlh5M8,1065
4
+ lambda_or-0.0.2.dist-info/METADATA,sha256=GWwLBzFT1986I4JfEb2bqJrObTxqFZXboY3SmF5HCOU,1615
5
+ lambda_or-0.0.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
6
+ lambda_or-0.0.2.dist-info/top_level.txt,sha256=Qt2JijQSTXoKOwDNGJHynBeWl8nWf78a5-SL5B0g68A,10
7
+ lambda_or-0.0.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.37.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ lambda_or