edgepython 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {edgepython-0.2.0 → edgepython-0.2.1}/PKG-INFO +1 -1
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/__init__.py +11 -1
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/dispersion.py +152 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/dispersion_lowlevel.py +116 -36
- edgepython-0.2.1/edgepython/voom_lmfit.py +1371 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/PKG-INFO +1 -1
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/SOURCES.txt +4 -1
- {edgepython-0.2.0 → edgepython-0.2.1}/pyproject.toml +1 -1
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_dispersion.py +19 -0
- edgepython-0.2.1/tests/test_voom_lmfit.py +160 -0
- edgepython-0.2.1/tests/test_voom_r_parity.py +78 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/LICENSE +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/README.md +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/classes.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/compressed_matrix.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/dgelist.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/exact_test.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/expression.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/filtering.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/gene_sets.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/glm_fit.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/glm_levenberg.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/glm_test.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/io.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/limma_port.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/normalization.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/ql_weights.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/results.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/sc_fit.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/smoothing.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/splicing.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/utils.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/visualization.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/weighted_lowess.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/dependency_links.txt +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/requires.txt +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/top_level.txt +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/setup.cfg +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_bismark.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_classes.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_dgelist.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_exact_test.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_expression.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_gene_sets.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_glm.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_io.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_r_vs_py.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_sc_eb_sim.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_sc_fit.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_splicing.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_treat.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_utilities.py +0 -0
- {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_visualization.py +0 -0
|
@@ -5,7 +5,7 @@ edgePython: Python port of the edgeR Bioconductor package.
|
|
|
5
5
|
Empirical analysis of digital gene expression data in Python.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.1
|
|
8
|
+
__version__ = "0.2.1"
|
|
9
9
|
|
|
10
10
|
# --- Classes ---
|
|
11
11
|
from .classes import DGEList, DGEExact, DGEGLM, DGELRT, TopTags
|
|
@@ -42,6 +42,7 @@ from .dispersion import (
|
|
|
42
42
|
estimate_glm_common_disp,
|
|
43
43
|
estimate_glm_trended_disp,
|
|
44
44
|
estimate_glm_tagwise_disp,
|
|
45
|
+
estimate_glm_robust_disp,
|
|
45
46
|
)
|
|
46
47
|
|
|
47
48
|
# --- GLM fitting ---
|
|
@@ -112,3 +113,12 @@ from .sc_fit import glm_sc_fit, glm_sc_test, shrink_sc_disp
|
|
|
112
113
|
|
|
113
114
|
# --- limma utilities ---
|
|
114
115
|
from .limma_port import squeeze_var
|
|
116
|
+
|
|
117
|
+
# --- voom ---
|
|
118
|
+
from .voom_lmfit import (
|
|
119
|
+
voom,
|
|
120
|
+
voom_lmfit,
|
|
121
|
+
voom_basic,
|
|
122
|
+
array_weights,
|
|
123
|
+
duplicate_correlation,
|
|
124
|
+
)
|
|
@@ -918,3 +918,155 @@ def estimate_glm_tagwise_disp(y, design=None, offset=None, dispersion=None,
|
|
|
918
918
|
ave_log_cpm_vals=ave_log_cpm_vals, weights=weights)
|
|
919
919
|
|
|
920
920
|
return tagwise
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _calc_resid(fit, residual_type='pearson'):
|
|
924
|
+
"""Compute GLM residual matrix for robust dispersion fitting."""
|
|
925
|
+
residual_type = str(residual_type).lower()
|
|
926
|
+
if residual_type not in ('pearson', 'anscombe', 'deviance'):
|
|
927
|
+
raise ValueError("residual_type must be one of ('pearson', 'anscombe', 'deviance')")
|
|
928
|
+
|
|
929
|
+
mu = np.asarray(fit['fitted.values'], dtype=np.float64)
|
|
930
|
+
yi = np.asarray(fit['counts'], dtype=np.float64)
|
|
931
|
+
disp = expand_as_matrix(np.asarray(fit['dispersion'], dtype=np.float64), mu.shape)
|
|
932
|
+
|
|
933
|
+
if residual_type == 'pearson':
|
|
934
|
+
res = (yi - mu) / np.sqrt(np.maximum(mu * (1 + disp * mu), 1e-12))
|
|
935
|
+
elif residual_type == 'deviance':
|
|
936
|
+
y_adj = yi + 1e-5
|
|
937
|
+
with np.errstate(divide='ignore', invalid='ignore'):
|
|
938
|
+
r = 2 * (y_adj * np.log(np.maximum(y_adj, 1e-12) / np.maximum(mu, 1e-12)) +
|
|
939
|
+
(y_adj + 1 / np.maximum(disp, 1e-12)) *
|
|
940
|
+
np.log((mu + 1 / np.maximum(disp, 1e-12)) /
|
|
941
|
+
(y_adj + 1 / np.maximum(disp, 1e-12))))
|
|
942
|
+
r = np.maximum(r, 0)
|
|
943
|
+
res = np.sign(yi - mu) * np.sqrt(r)
|
|
944
|
+
else:
|
|
945
|
+
# Numerical approximation to the Anscombe residual integral used by edgeR.
|
|
946
|
+
from scipy.integrate import quad
|
|
947
|
+
|
|
948
|
+
def _anscombe_scalar(yv, muv, dv):
|
|
949
|
+
if muv <= 0 or yv <= 0:
|
|
950
|
+
return 0.0
|
|
951
|
+
|
|
952
|
+
def ffun(x):
|
|
953
|
+
return (x * (1 + dv * x)) ** (-1.0 / 3.0)
|
|
954
|
+
|
|
955
|
+
const = ffun(muv) ** 0.5
|
|
956
|
+
if yv == muv:
|
|
957
|
+
return 0.0
|
|
958
|
+
val, _ = quad(ffun, muv, yv, limit=50)
|
|
959
|
+
return const * val
|
|
960
|
+
|
|
961
|
+
res = np.zeros_like(yi, dtype=np.float64)
|
|
962
|
+
for g in range(yi.shape[0]):
|
|
963
|
+
for s in range(yi.shape[1]):
|
|
964
|
+
res[g, s] = _anscombe_scalar(yi[g, s], mu[g, s], disp[g, s])
|
|
965
|
+
|
|
966
|
+
res[mu == 0] = 0
|
|
967
|
+
return res
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
def _psi_huber_matrix(u, k=1.345):
|
|
971
|
+
"""Huber psi weights on a residual matrix."""
|
|
972
|
+
u = np.asarray(u, dtype=np.float64)
|
|
973
|
+
out = np.ones_like(u, dtype=np.float64)
|
|
974
|
+
mask = np.abs(u) > k
|
|
975
|
+
out[mask] = k / np.abs(u[mask])
|
|
976
|
+
out[~np.isfinite(out)] = 1.0
|
|
977
|
+
return out
|
|
978
|
+
|
|
979
|
+
|
|
980
|
+
def _record_robust_disp_state(y, i, res=None, weights=None, fit=None):
|
|
981
|
+
"""Store per-iteration state for estimate_glm_robust_disp(record=True)."""
|
|
982
|
+
key = f'iteration_{i}'
|
|
983
|
+
rec = y.get('record')
|
|
984
|
+
if rec is None:
|
|
985
|
+
rec = {
|
|
986
|
+
'AveLogCPM': {},
|
|
987
|
+
'trended.dispersion': {},
|
|
988
|
+
'tagwise.dispersion': {},
|
|
989
|
+
'weights': {},
|
|
990
|
+
'res': {},
|
|
991
|
+
'mu': {}
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
if y.get('AveLogCPM') is not None:
|
|
995
|
+
rec['AveLogCPM'][key] = np.asarray(y['AveLogCPM']).copy()
|
|
996
|
+
if y.get('trended.dispersion') is not None:
|
|
997
|
+
rec['trended.dispersion'][key] = np.asarray(y['trended.dispersion']).copy()
|
|
998
|
+
if y.get('tagwise.dispersion') is not None:
|
|
999
|
+
rec['tagwise.dispersion'][key] = np.asarray(y['tagwise.dispersion']).copy()
|
|
1000
|
+
if weights is not None:
|
|
1001
|
+
rec['weights'][key] = np.asarray(weights).copy()
|
|
1002
|
+
if res is not None:
|
|
1003
|
+
rec['res'][key] = np.asarray(res).copy()
|
|
1004
|
+
if fit is not None and fit.get('fitted.values') is not None:
|
|
1005
|
+
rec['mu'][key] = np.asarray(fit['fitted.values']).copy()
|
|
1006
|
+
|
|
1007
|
+
y['record'] = rec
|
|
1008
|
+
return y
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
def estimate_glm_robust_disp(y, design=None, prior_df=10, update_trend=True,
|
|
1012
|
+
trend_method='bin.loess', maxit=6, k=1.345,
|
|
1013
|
+
residual_type='pearson', verbose=False,
|
|
1014
|
+
record=False):
|
|
1015
|
+
"""Robust GLM dispersion estimation via iterative Huber reweighting.
|
|
1016
|
+
|
|
1017
|
+
Port of edgeR's estimateGLMRobustDisp.
|
|
1018
|
+
"""
|
|
1019
|
+
from .utils import _resolve_design
|
|
1020
|
+
design = _resolve_design(design, y)
|
|
1021
|
+
|
|
1022
|
+
if not (isinstance(y, dict) and 'counts' in y):
|
|
1023
|
+
raise ValueError("Input must be a DGEList-like dict with 'counts'.")
|
|
1024
|
+
|
|
1025
|
+
from .dgelist import valid_dgelist
|
|
1026
|
+
y = valid_dgelist(y)
|
|
1027
|
+
|
|
1028
|
+
y['weights'] = np.ones_like(np.asarray(y['counts'], dtype=np.float64), dtype=np.float64)
|
|
1029
|
+
|
|
1030
|
+
if y.get('trended.dispersion') is None:
|
|
1031
|
+
y = estimate_glm_trended_disp(y, design=design, method=trend_method,
|
|
1032
|
+
weights=y['weights'])
|
|
1033
|
+
if y.get('tagwise.dispersion') is None:
|
|
1034
|
+
y = estimate_glm_tagwise_disp(y, design=design, prior_df=prior_df,
|
|
1035
|
+
weights=y['weights'])
|
|
1036
|
+
|
|
1037
|
+
if record:
|
|
1038
|
+
y = _record_robust_disp_state(y, i=0, weights=y['weights'])
|
|
1039
|
+
|
|
1040
|
+
from .glm_fit import glm_fit
|
|
1041
|
+
|
|
1042
|
+
for i in range(1, int(maxit) + 1):
|
|
1043
|
+
if verbose:
|
|
1044
|
+
print(f"Iteration {i}: Re-fitting GLM.")
|
|
1045
|
+
|
|
1046
|
+
fit = glm_fit(y, design=design, prior_count=0)
|
|
1047
|
+
res = _calc_resid(fit, residual_type=residual_type)
|
|
1048
|
+
|
|
1049
|
+
y['weights'] = _psi_huber_matrix(res, k=k)
|
|
1050
|
+
y['AveLogCPM'] = ave_log_cpm(y, dispersion=y.get('trended.dispersion'))
|
|
1051
|
+
|
|
1052
|
+
if update_trend:
|
|
1053
|
+
if verbose:
|
|
1054
|
+
print("Re-estimating trended dispersion.")
|
|
1055
|
+
y = estimate_glm_trended_disp(y, design=design, method=trend_method,
|
|
1056
|
+
weights=y['weights'])
|
|
1057
|
+
|
|
1058
|
+
if verbose:
|
|
1059
|
+
print("Re-estimating tagwise dispersion.")
|
|
1060
|
+
y = estimate_glm_tagwise_disp(y, design=design, prior_df=prior_df,
|
|
1061
|
+
weights=y['weights'])
|
|
1062
|
+
|
|
1063
|
+
if record:
|
|
1064
|
+
y = _record_robust_disp_state(y, i=i, res=res,
|
|
1065
|
+
weights=y['weights'], fit=fit)
|
|
1066
|
+
|
|
1067
|
+
return y
|
|
1068
|
+
|
|
1069
|
+
|
|
1070
|
+
def estimateGLMRobustDisp(*args, **kwargs):
|
|
1071
|
+
"""Compatibility alias for edgeR-style camelCase naming."""
|
|
1072
|
+
return estimate_glm_robust_disp(*args, **kwargs)
|
|
@@ -20,6 +20,27 @@ from .expression import ave_log_cpm
|
|
|
20
20
|
from .limma_port import is_fullrank
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def _cox_reid_adjust_from_xtwx(XtWX):
|
|
24
|
+
"""Return -0.5 * log|XtWX| using LDL, matching edgeR's C path."""
|
|
25
|
+
from scipy.linalg import ldl
|
|
26
|
+
|
|
27
|
+
A = np.asarray(XtWX, dtype=np.float64)
|
|
28
|
+
if A.ndim == 2:
|
|
29
|
+
A = A[None, :, :]
|
|
30
|
+
|
|
31
|
+
ngenes = A.shape[0]
|
|
32
|
+
out = np.zeros(ngenes, dtype=np.float64)
|
|
33
|
+
for g in range(ngenes):
|
|
34
|
+
# edgeR's C code uses LAPACK Bunch-Kaufman factorization (dsytrf)
|
|
35
|
+
# and sums half log diagonal terms with clipping; LDL is the same
|
|
36
|
+
# symmetric-indefinite factorization family.
|
|
37
|
+
_, dmat, _ = ldl(A[g], lower=True, hermitian=True)
|
|
38
|
+
diag = np.abs(np.diag(dmat))
|
|
39
|
+
diag = np.where(diag > 1e-10, diag, 1e-10)
|
|
40
|
+
out[g] = -0.5 * np.sum(np.log(diag))
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
23
44
|
def adjusted_profile_lik_grid(grid_dispersions, y, design, offset, weights=None):
|
|
24
45
|
"""Evaluate APL at multiple dispersion grid points efficiently.
|
|
25
46
|
|
|
@@ -127,20 +148,45 @@ def adjusted_profile_lik_grid(grid_dispersions, y, design, offset, weights=None)
|
|
|
127
148
|
|
|
128
149
|
XtWX = np.einsum('gj,jk,jl->gkl', working_w, design, design)
|
|
129
150
|
|
|
130
|
-
|
|
131
|
-
logdet = np.log(np.maximum(XtWX[:, 0, 0], 1e-300))
|
|
132
|
-
elif ncoefs == 2:
|
|
133
|
-
det = XtWX[:, 0, 0] * XtWX[:, 1, 1] - XtWX[:, 0, 1] ** 2
|
|
134
|
-
logdet = np.log(np.maximum(det, 1e-300))
|
|
135
|
-
else:
|
|
136
|
-
sign, logdet = np.linalg.slogdet(XtWX)
|
|
137
|
-
logdet = np.where(sign > 0, logdet, 0.0)
|
|
138
|
-
|
|
139
|
-
apl[:, gi] = ll - 0.5 * logdet
|
|
151
|
+
apl[:, gi] = ll + _cox_reid_adjust_from_xtwx(XtWX)
|
|
140
152
|
|
|
141
153
|
return apl
|
|
142
154
|
|
|
143
155
|
|
|
156
|
+
def _apl_sum_oneway_scalar(dispersion, y, design, offset, w, group_cols, lgamma_y1):
|
|
157
|
+
"""Fast sum of Cox-Reid adjusted profile log-likelihood for one-way designs."""
|
|
158
|
+
from .glm_fit import mglm_one_group
|
|
159
|
+
|
|
160
|
+
y = np.asarray(y, dtype=np.float64)
|
|
161
|
+
offset = np.asarray(offset, dtype=np.float64)
|
|
162
|
+
w = np.asarray(w, dtype=np.float64)
|
|
163
|
+
ngenes, _ = y.shape
|
|
164
|
+
ncoefs = design.shape[1]
|
|
165
|
+
|
|
166
|
+
d = float(max(dispersion, 1e-300))
|
|
167
|
+
mu = np.empty_like(y, dtype=np.float64)
|
|
168
|
+
|
|
169
|
+
# Fit each group independently via one-group Fisher scoring.
|
|
170
|
+
for cols in group_cols:
|
|
171
|
+
y_g = y[:, cols]
|
|
172
|
+
off_g = offset[:, cols]
|
|
173
|
+
w_g = w[:, cols]
|
|
174
|
+
disp_g = np.full_like(y_g, d)
|
|
175
|
+
b = mglm_one_group(y_g, dispersion=disp_g, offset=off_g, weights=w_g)
|
|
176
|
+
mu[:, cols] = np.exp(np.clip(b[:, None] + off_g, -500, 500))
|
|
177
|
+
|
|
178
|
+
mu_safe = np.maximum(mu, 1e-300)
|
|
179
|
+
r = 1.0 / d
|
|
180
|
+
ll = np.sum(w * (gammaln(y + r) - gammaln(r) - lgamma_y1
|
|
181
|
+
+ r * np.log(r) + y * np.log(mu_safe)
|
|
182
|
+
- (r + y) * np.log(r + mu_safe)), axis=1)
|
|
183
|
+
|
|
184
|
+
working_w = np.maximum(w * mu_safe / (1.0 + d * mu_safe), 1e-300)
|
|
185
|
+
XtWX = np.einsum('gj,jk,jl->gkl', working_w, design, design)
|
|
186
|
+
|
|
187
|
+
return float(np.sum(ll + _cox_reid_adjust_from_xtwx(XtWX)))
|
|
188
|
+
|
|
189
|
+
|
|
144
190
|
def adjusted_profile_lik(dispersion, y, design, offset, weights=None,
|
|
145
191
|
start=None, get_coef=False):
|
|
146
192
|
"""Tagwise Cox-Reid adjusted profile log-likelihoods for the dispersion.
|
|
@@ -195,12 +241,23 @@ def adjusted_profile_lik(dispersion, y, design, offset, weights=None,
|
|
|
195
241
|
else:
|
|
196
242
|
w = np.ones_like(y)
|
|
197
243
|
|
|
198
|
-
# Fit GLM to get mu
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
244
|
+
# Fit GLM to get mu.
|
|
245
|
+
# Fast path for one-way designs avoids glm_fit bookkeeping overhead.
|
|
246
|
+
from .glm_fit import glm_fit, mglm_one_way
|
|
247
|
+
from .utils import design_as_factor
|
|
248
|
+
group = design_as_factor(design)
|
|
249
|
+
is_oneway = (len(np.unique(group)) == ncoefs)
|
|
250
|
+
|
|
251
|
+
if is_oneway:
|
|
252
|
+
fit = mglm_one_way(y, design=design, group=group, dispersion=disp,
|
|
253
|
+
offset=offset, weights=weights, coef_start=start)
|
|
254
|
+
mu = fit['fitted.values']
|
|
255
|
+
beta = fit['coefficients']
|
|
256
|
+
else:
|
|
257
|
+
fit = glm_fit(y, design=design, dispersion=disp, offset=offset,
|
|
258
|
+
weights=weights, prior_count=0, start=start)
|
|
259
|
+
mu = fit['fitted.values']
|
|
260
|
+
beta = fit.get('unshrunk.coefficients', fit['coefficients'])
|
|
204
261
|
|
|
205
262
|
# Compute adjusted profile log-likelihood for all genes (vectorized)
|
|
206
263
|
mu_safe = np.maximum(mu, 1e-300) # (ngenes, nlibs)
|
|
@@ -233,17 +290,7 @@ def adjusted_profile_lik(dispersion, y, design, offset, weights=None,
|
|
|
233
290
|
# XtWX[g, k, l] = sum_j working_w[g,j] * design[j,k] * design[j,l]
|
|
234
291
|
XtWX = np.einsum('gj,jk,jl->gkl', working_w, design, design) # (ngenes, ncoefs, ncoefs)
|
|
235
292
|
|
|
236
|
-
|
|
237
|
-
if ncoefs == 1:
|
|
238
|
-
logdet = np.log(np.maximum(XtWX[:, 0, 0], 1e-300))
|
|
239
|
-
elif ncoefs == 2:
|
|
240
|
-
det = XtWX[:, 0, 0] * XtWX[:, 1, 1] - XtWX[:, 0, 1] ** 2
|
|
241
|
-
logdet = np.log(np.maximum(det, 1e-300))
|
|
242
|
-
else:
|
|
243
|
-
sign, logdet = np.linalg.slogdet(XtWX)
|
|
244
|
-
logdet = np.where(sign > 0, logdet, 0.0)
|
|
245
|
-
|
|
246
|
-
cr_adj = -0.5 * logdet
|
|
293
|
+
cr_adj = _cox_reid_adjust_from_xtwx(XtWX)
|
|
247
294
|
apl = ll + cr_adj
|
|
248
295
|
|
|
249
296
|
if get_coef:
|
|
@@ -555,10 +602,31 @@ def disp_cox_reid(y, design=None, offset=None, weights=None, ave_log_cpm_vals=No
|
|
|
555
602
|
if weights is not None and weights.ndim == 2:
|
|
556
603
|
weights = weights[i]
|
|
557
604
|
|
|
558
|
-
# Function to optimize
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
605
|
+
# Function to optimize.
|
|
606
|
+
# Fast path: one-way designs can evaluate APL sum without generic glm_fit overhead.
|
|
607
|
+
from .utils import design_as_factor
|
|
608
|
+
group = design_as_factor(design)
|
|
609
|
+
is_oneway = len(np.unique(group)) == design.shape[1]
|
|
610
|
+
|
|
611
|
+
if weights is None:
|
|
612
|
+
w = np.ones_like(y)
|
|
613
|
+
else:
|
|
614
|
+
w = np.asarray(weights, dtype=np.float64)
|
|
615
|
+
if w.ndim == 1:
|
|
616
|
+
w = np.tile(w, (y.shape[0], 1))
|
|
617
|
+
|
|
618
|
+
if is_oneway:
|
|
619
|
+
unique_groups = np.unique(group)
|
|
620
|
+
group_cols = [np.where(group == grp)[0] for grp in unique_groups]
|
|
621
|
+
lgamma_y1 = gammaln(y + 1)
|
|
622
|
+
|
|
623
|
+
def fun(par):
|
|
624
|
+
disp = par ** 4
|
|
625
|
+
return -_apl_sum_oneway_scalar(disp, y, design, offset, w, group_cols, lgamma_y1)
|
|
626
|
+
else:
|
|
627
|
+
def fun(par):
|
|
628
|
+
disp = par ** 4
|
|
629
|
+
return -np.sum(adjusted_profile_lik(disp, y, design, offset, weights=weights))
|
|
562
630
|
|
|
563
631
|
# Optimize
|
|
564
632
|
lo = interval[0] ** 0.25
|
|
@@ -930,12 +998,24 @@ def disp_bin_trend(y, design=None, offset=None, df=5, span=0.3,
|
|
|
930
998
|
bin_d[i - 1] = 0.1
|
|
931
999
|
bin_a[i - 1] = np.mean(bin_ave)
|
|
932
1000
|
|
|
933
|
-
# If few bins, use linear interpolation
|
|
1001
|
+
# If few bins, use linear interpolation with R's approxfun(rule=2, ties=mean)
|
|
1002
|
+
# behavior: average duplicate x values and clamp to boundary values outside range.
|
|
934
1003
|
if nbins < 7:
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
1004
|
+
x = np.asarray(bin_a, dtype=np.float64)
|
|
1005
|
+
yv = np.sqrt(np.maximum(np.asarray(bin_d, dtype=np.float64), 0))
|
|
1006
|
+
order = np.argsort(x)
|
|
1007
|
+
x = x[order]
|
|
1008
|
+
yv = yv[order]
|
|
1009
|
+
# ties=mean
|
|
1010
|
+
xu, inv = np.unique(x, return_inverse=True)
|
|
1011
|
+
yu = np.zeros_like(xu, dtype=np.float64)
|
|
1012
|
+
cnt = np.zeros_like(xu, dtype=np.float64)
|
|
1013
|
+
for i, idx in enumerate(inv):
|
|
1014
|
+
yu[idx] += yv[i]
|
|
1015
|
+
cnt[idx] += 1.0
|
|
1016
|
+
yu = yu / np.maximum(cnt, 1.0)
|
|
1017
|
+
y_interp = np.interp(ave_log_cpm_vals, xu, yu, left=yu[0], right=yu[-1])
|
|
1018
|
+
dispersion = np.maximum(y_interp ** 2, 0)
|
|
939
1019
|
return {'AveLogCPM': ave_log_cpm_vals, 'dispersion': dispersion,
|
|
940
1020
|
'bin.AveLogCPM': bin_a, 'bin.dispersion': bin_d}
|
|
941
1021
|
|