edgepython 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {edgepython-0.2.0 → edgepython-0.2.1}/PKG-INFO +1 -1
  2. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/__init__.py +11 -1
  3. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/dispersion.py +152 -0
  4. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/dispersion_lowlevel.py +116 -36
  5. edgepython-0.2.1/edgepython/voom_lmfit.py +1371 -0
  6. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/PKG-INFO +1 -1
  7. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/SOURCES.txt +4 -1
  8. {edgepython-0.2.0 → edgepython-0.2.1}/pyproject.toml +1 -1
  9. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_dispersion.py +19 -0
  10. edgepython-0.2.1/tests/test_voom_lmfit.py +160 -0
  11. edgepython-0.2.1/tests/test_voom_r_parity.py +78 -0
  12. {edgepython-0.2.0 → edgepython-0.2.1}/LICENSE +0 -0
  13. {edgepython-0.2.0 → edgepython-0.2.1}/README.md +0 -0
  14. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/classes.py +0 -0
  15. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/compressed_matrix.py +0 -0
  16. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/dgelist.py +0 -0
  17. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/exact_test.py +0 -0
  18. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/expression.py +0 -0
  19. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/filtering.py +0 -0
  20. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/gene_sets.py +0 -0
  21. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/glm_fit.py +0 -0
  22. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/glm_levenberg.py +0 -0
  23. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/glm_test.py +0 -0
  24. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/io.py +0 -0
  25. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/limma_port.py +0 -0
  26. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/normalization.py +0 -0
  27. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/ql_weights.py +0 -0
  28. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/results.py +0 -0
  29. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/sc_fit.py +0 -0
  30. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/smoothing.py +0 -0
  31. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/splicing.py +0 -0
  32. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/utils.py +0 -0
  33. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/visualization.py +0 -0
  34. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython/weighted_lowess.py +0 -0
  35. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/dependency_links.txt +0 -0
  36. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/requires.txt +0 -0
  37. {edgepython-0.2.0 → edgepython-0.2.1}/edgepython.egg-info/top_level.txt +0 -0
  38. {edgepython-0.2.0 → edgepython-0.2.1}/setup.cfg +0 -0
  39. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_bismark.py +0 -0
  40. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_classes.py +0 -0
  41. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_dgelist.py +0 -0
  42. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_exact_test.py +0 -0
  43. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_expression.py +0 -0
  44. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_gene_sets.py +0 -0
  45. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_glm.py +0 -0
  46. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_io.py +0 -0
  47. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_r_vs_py.py +0 -0
  48. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_sc_eb_sim.py +0 -0
  49. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_sc_fit.py +0 -0
  50. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_splicing.py +0 -0
  51. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_treat.py +0 -0
  52. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_utilities.py +0 -0
  53. {edgepython-0.2.0 → edgepython-0.2.1}/tests/test_visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: edgepython
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Python port of the edgeR Bioconductor package for differential expression analysis of digital gene expression data.
5
5
  Author: Lior Pachter
6
6
  License-Expression: GPL-3.0-or-later
@@ -5,7 +5,7 @@ edgePython: Python port of the edgeR Bioconductor package.
5
5
  Empirical analysis of digital gene expression data in Python.
6
6
  """
7
7
 
8
- __version__ = "0.1.0"
8
+ __version__ = "0.2.1"
9
9
 
10
10
  # --- Classes ---
11
11
  from .classes import DGEList, DGEExact, DGEGLM, DGELRT, TopTags
@@ -42,6 +42,7 @@ from .dispersion import (
42
42
  estimate_glm_common_disp,
43
43
  estimate_glm_trended_disp,
44
44
  estimate_glm_tagwise_disp,
45
+ estimate_glm_robust_disp,
45
46
  )
46
47
 
47
48
  # --- GLM fitting ---
@@ -112,3 +113,12 @@ from .sc_fit import glm_sc_fit, glm_sc_test, shrink_sc_disp
112
113
 
113
114
  # --- limma utilities ---
114
115
  from .limma_port import squeeze_var
116
+
117
+ # --- voom ---
118
+ from .voom_lmfit import (
119
+ voom,
120
+ voom_lmfit,
121
+ voom_basic,
122
+ array_weights,
123
+ duplicate_correlation,
124
+ )
@@ -918,3 +918,155 @@ def estimate_glm_tagwise_disp(y, design=None, offset=None, dispersion=None,
918
918
  ave_log_cpm_vals=ave_log_cpm_vals, weights=weights)
919
919
 
920
920
  return tagwise
921
+
922
+
923
+ def _calc_resid(fit, residual_type='pearson'):
924
+ """Compute GLM residual matrix for robust dispersion fitting."""
925
+ residual_type = str(residual_type).lower()
926
+ if residual_type not in ('pearson', 'anscombe', 'deviance'):
927
+ raise ValueError("residual_type must be one of ('pearson', 'anscombe', 'deviance')")
928
+
929
+ mu = np.asarray(fit['fitted.values'], dtype=np.float64)
930
+ yi = np.asarray(fit['counts'], dtype=np.float64)
931
+ disp = expand_as_matrix(np.asarray(fit['dispersion'], dtype=np.float64), mu.shape)
932
+
933
+ if residual_type == 'pearson':
934
+ res = (yi - mu) / np.sqrt(np.maximum(mu * (1 + disp * mu), 1e-12))
935
+ elif residual_type == 'deviance':
936
+ y_adj = yi + 1e-5
937
+ with np.errstate(divide='ignore', invalid='ignore'):
938
+ r = 2 * (y_adj * np.log(np.maximum(y_adj, 1e-12) / np.maximum(mu, 1e-12)) +
939
+ (y_adj + 1 / np.maximum(disp, 1e-12)) *
940
+ np.log((mu + 1 / np.maximum(disp, 1e-12)) /
941
+ (y_adj + 1 / np.maximum(disp, 1e-12))))
942
+ r = np.maximum(r, 0)
943
+ res = np.sign(yi - mu) * np.sqrt(r)
944
+ else:
945
+ # Numerical approximation to the Anscombe residual integral used by edgeR.
946
+ from scipy.integrate import quad
947
+
948
+ def _anscombe_scalar(yv, muv, dv):
949
+ if muv <= 0 or yv <= 0:
950
+ return 0.0
951
+
952
+ def ffun(x):
953
+ return (x * (1 + dv * x)) ** (-1.0 / 3.0)
954
+
955
+ const = ffun(muv) ** 0.5
956
+ if yv == muv:
957
+ return 0.0
958
+ val, _ = quad(ffun, muv, yv, limit=50)
959
+ return const * val
960
+
961
+ res = np.zeros_like(yi, dtype=np.float64)
962
+ for g in range(yi.shape[0]):
963
+ for s in range(yi.shape[1]):
964
+ res[g, s] = _anscombe_scalar(yi[g, s], mu[g, s], disp[g, s])
965
+
966
+ res[mu == 0] = 0
967
+ return res
968
+
969
+
970
+ def _psi_huber_matrix(u, k=1.345):
971
+ """Huber psi weights on a residual matrix."""
972
+ u = np.asarray(u, dtype=np.float64)
973
+ out = np.ones_like(u, dtype=np.float64)
974
+ mask = np.abs(u) > k
975
+ out[mask] = k / np.abs(u[mask])
976
+ out[~np.isfinite(out)] = 1.0
977
+ return out
978
+
979
+
980
+ def _record_robust_disp_state(y, i, res=None, weights=None, fit=None):
981
+ """Store per-iteration state for estimate_glm_robust_disp(record=True)."""
982
+ key = f'iteration_{i}'
983
+ rec = y.get('record')
984
+ if rec is None:
985
+ rec = {
986
+ 'AveLogCPM': {},
987
+ 'trended.dispersion': {},
988
+ 'tagwise.dispersion': {},
989
+ 'weights': {},
990
+ 'res': {},
991
+ 'mu': {}
992
+ }
993
+
994
+ if y.get('AveLogCPM') is not None:
995
+ rec['AveLogCPM'][key] = np.asarray(y['AveLogCPM']).copy()
996
+ if y.get('trended.dispersion') is not None:
997
+ rec['trended.dispersion'][key] = np.asarray(y['trended.dispersion']).copy()
998
+ if y.get('tagwise.dispersion') is not None:
999
+ rec['tagwise.dispersion'][key] = np.asarray(y['tagwise.dispersion']).copy()
1000
+ if weights is not None:
1001
+ rec['weights'][key] = np.asarray(weights).copy()
1002
+ if res is not None:
1003
+ rec['res'][key] = np.asarray(res).copy()
1004
+ if fit is not None and fit.get('fitted.values') is not None:
1005
+ rec['mu'][key] = np.asarray(fit['fitted.values']).copy()
1006
+
1007
+ y['record'] = rec
1008
+ return y
1009
+
1010
+
1011
+ def estimate_glm_robust_disp(y, design=None, prior_df=10, update_trend=True,
1012
+ trend_method='bin.loess', maxit=6, k=1.345,
1013
+ residual_type='pearson', verbose=False,
1014
+ record=False):
1015
+ """Robust GLM dispersion estimation via iterative Huber reweighting.
1016
+
1017
+ Port of edgeR's estimateGLMRobustDisp.
1018
+ """
1019
+ from .utils import _resolve_design
1020
+ design = _resolve_design(design, y)
1021
+
1022
+ if not (isinstance(y, dict) and 'counts' in y):
1023
+ raise ValueError("Input must be a DGEList-like dict with 'counts'.")
1024
+
1025
+ from .dgelist import valid_dgelist
1026
+ y = valid_dgelist(y)
1027
+
1028
+ y['weights'] = np.ones_like(np.asarray(y['counts'], dtype=np.float64), dtype=np.float64)
1029
+
1030
+ if y.get('trended.dispersion') is None:
1031
+ y = estimate_glm_trended_disp(y, design=design, method=trend_method,
1032
+ weights=y['weights'])
1033
+ if y.get('tagwise.dispersion') is None:
1034
+ y = estimate_glm_tagwise_disp(y, design=design, prior_df=prior_df,
1035
+ weights=y['weights'])
1036
+
1037
+ if record:
1038
+ y = _record_robust_disp_state(y, i=0, weights=y['weights'])
1039
+
1040
+ from .glm_fit import glm_fit
1041
+
1042
+ for i in range(1, int(maxit) + 1):
1043
+ if verbose:
1044
+ print(f"Iteration {i}: Re-fitting GLM.")
1045
+
1046
+ fit = glm_fit(y, design=design, prior_count=0)
1047
+ res = _calc_resid(fit, residual_type=residual_type)
1048
+
1049
+ y['weights'] = _psi_huber_matrix(res, k=k)
1050
+ y['AveLogCPM'] = ave_log_cpm(y, dispersion=y.get('trended.dispersion'))
1051
+
1052
+ if update_trend:
1053
+ if verbose:
1054
+ print("Re-estimating trended dispersion.")
1055
+ y = estimate_glm_trended_disp(y, design=design, method=trend_method,
1056
+ weights=y['weights'])
1057
+
1058
+ if verbose:
1059
+ print("Re-estimating tagwise dispersion.")
1060
+ y = estimate_glm_tagwise_disp(y, design=design, prior_df=prior_df,
1061
+ weights=y['weights'])
1062
+
1063
+ if record:
1064
+ y = _record_robust_disp_state(y, i=i, res=res,
1065
+ weights=y['weights'], fit=fit)
1066
+
1067
+ return y
1068
+
1069
+
1070
+ def estimateGLMRobustDisp(*args, **kwargs):
1071
+ """Compatibility alias for edgeR-style camelCase naming."""
1072
+ return estimate_glm_robust_disp(*args, **kwargs)
@@ -20,6 +20,27 @@ from .expression import ave_log_cpm
20
20
  from .limma_port import is_fullrank
21
21
 
22
22
 
23
+ def _cox_reid_adjust_from_xtwx(XtWX):
24
+ """Return -0.5 * log|XtWX| using LDL, matching edgeR's C path."""
25
+ from scipy.linalg import ldl
26
+
27
+ A = np.asarray(XtWX, dtype=np.float64)
28
+ if A.ndim == 2:
29
+ A = A[None, :, :]
30
+
31
+ ngenes = A.shape[0]
32
+ out = np.zeros(ngenes, dtype=np.float64)
33
+ for g in range(ngenes):
34
+ # edgeR's C code uses LAPACK Bunch-Kaufman factorization (dsytrf)
35
+ # and sums half log diagonal terms with clipping; LDL is the same
36
+ # symmetric-indefinite factorization family.
37
+ _, dmat, _ = ldl(A[g], lower=True, hermitian=True)
38
+ diag = np.abs(np.diag(dmat))
39
+ diag = np.where(diag > 1e-10, diag, 1e-10)
40
+ out[g] = -0.5 * np.sum(np.log(diag))
41
+ return out
42
+
43
+
23
44
  def adjusted_profile_lik_grid(grid_dispersions, y, design, offset, weights=None):
24
45
  """Evaluate APL at multiple dispersion grid points efficiently.
25
46
 
@@ -127,20 +148,45 @@ def adjusted_profile_lik_grid(grid_dispersions, y, design, offset, weights=None)
127
148
 
128
149
  XtWX = np.einsum('gj,jk,jl->gkl', working_w, design, design)
129
150
 
130
- if ncoefs == 1:
131
- logdet = np.log(np.maximum(XtWX[:, 0, 0], 1e-300))
132
- elif ncoefs == 2:
133
- det = XtWX[:, 0, 0] * XtWX[:, 1, 1] - XtWX[:, 0, 1] ** 2
134
- logdet = np.log(np.maximum(det, 1e-300))
135
- else:
136
- sign, logdet = np.linalg.slogdet(XtWX)
137
- logdet = np.where(sign > 0, logdet, 0.0)
138
-
139
- apl[:, gi] = ll - 0.5 * logdet
151
+ apl[:, gi] = ll + _cox_reid_adjust_from_xtwx(XtWX)
140
152
 
141
153
  return apl
142
154
 
143
155
 
156
+ def _apl_sum_oneway_scalar(dispersion, y, design, offset, w, group_cols, lgamma_y1):
157
+ """Fast sum of Cox-Reid adjusted profile log-likelihood for one-way designs."""
158
+ from .glm_fit import mglm_one_group
159
+
160
+ y = np.asarray(y, dtype=np.float64)
161
+ offset = np.asarray(offset, dtype=np.float64)
162
+ w = np.asarray(w, dtype=np.float64)
163
+ ngenes, _ = y.shape
164
+ ncoefs = design.shape[1]
165
+
166
+ d = float(max(dispersion, 1e-300))
167
+ mu = np.empty_like(y, dtype=np.float64)
168
+
169
+ # Fit each group independently via one-group Fisher scoring.
170
+ for cols in group_cols:
171
+ y_g = y[:, cols]
172
+ off_g = offset[:, cols]
173
+ w_g = w[:, cols]
174
+ disp_g = np.full_like(y_g, d)
175
+ b = mglm_one_group(y_g, dispersion=disp_g, offset=off_g, weights=w_g)
176
+ mu[:, cols] = np.exp(np.clip(b[:, None] + off_g, -500, 500))
177
+
178
+ mu_safe = np.maximum(mu, 1e-300)
179
+ r = 1.0 / d
180
+ ll = np.sum(w * (gammaln(y + r) - gammaln(r) - lgamma_y1
181
+ + r * np.log(r) + y * np.log(mu_safe)
182
+ - (r + y) * np.log(r + mu_safe)), axis=1)
183
+
184
+ working_w = np.maximum(w * mu_safe / (1.0 + d * mu_safe), 1e-300)
185
+ XtWX = np.einsum('gj,jk,jl->gkl', working_w, design, design)
186
+
187
+ return float(np.sum(ll + _cox_reid_adjust_from_xtwx(XtWX)))
188
+
189
+
144
190
  def adjusted_profile_lik(dispersion, y, design, offset, weights=None,
145
191
  start=None, get_coef=False):
146
192
  """Tagwise Cox-Reid adjusted profile log-likelihoods for the dispersion.
@@ -195,12 +241,23 @@ def adjusted_profile_lik(dispersion, y, design, offset, weights=None,
195
241
  else:
196
242
  w = np.ones_like(y)
197
243
 
198
- # Fit GLM to get mu
199
- from .glm_fit import glm_fit
200
- fit = glm_fit(y, design=design, dispersion=disp, offset=offset,
201
- weights=weights, prior_count=0, start=start)
202
- mu = fit['fitted.values']
203
- beta = fit.get('unshrunk.coefficients', fit['coefficients'])
244
+ # Fit GLM to get mu.
245
+ # Fast path for one-way designs avoids glm_fit bookkeeping overhead.
246
+ from .glm_fit import glm_fit, mglm_one_way
247
+ from .utils import design_as_factor
248
+ group = design_as_factor(design)
249
+ is_oneway = (len(np.unique(group)) == ncoefs)
250
+
251
+ if is_oneway:
252
+ fit = mglm_one_way(y, design=design, group=group, dispersion=disp,
253
+ offset=offset, weights=weights, coef_start=start)
254
+ mu = fit['fitted.values']
255
+ beta = fit['coefficients']
256
+ else:
257
+ fit = glm_fit(y, design=design, dispersion=disp, offset=offset,
258
+ weights=weights, prior_count=0, start=start)
259
+ mu = fit['fitted.values']
260
+ beta = fit.get('unshrunk.coefficients', fit['coefficients'])
204
261
 
205
262
  # Compute adjusted profile log-likelihood for all genes (vectorized)
206
263
  mu_safe = np.maximum(mu, 1e-300) # (ngenes, nlibs)
@@ -233,17 +290,7 @@ def adjusted_profile_lik(dispersion, y, design, offset, weights=None,
233
290
  # XtWX[g, k, l] = sum_j working_w[g,j] * design[j,k] * design[j,l]
234
291
  XtWX = np.einsum('gj,jk,jl->gkl', working_w, design, design) # (ngenes, ncoefs, ncoefs)
235
292
 
236
- # Log determinant for all genes
237
- if ncoefs == 1:
238
- logdet = np.log(np.maximum(XtWX[:, 0, 0], 1e-300))
239
- elif ncoefs == 2:
240
- det = XtWX[:, 0, 0] * XtWX[:, 1, 1] - XtWX[:, 0, 1] ** 2
241
- logdet = np.log(np.maximum(det, 1e-300))
242
- else:
243
- sign, logdet = np.linalg.slogdet(XtWX)
244
- logdet = np.where(sign > 0, logdet, 0.0)
245
-
246
- cr_adj = -0.5 * logdet
293
+ cr_adj = _cox_reid_adjust_from_xtwx(XtWX)
247
294
  apl = ll + cr_adj
248
295
 
249
296
  if get_coef:
@@ -555,10 +602,31 @@ def disp_cox_reid(y, design=None, offset=None, weights=None, ave_log_cpm_vals=No
555
602
  if weights is not None and weights.ndim == 2:
556
603
  weights = weights[i]
557
604
 
558
- # Function to optimize
559
- def fun(par):
560
- disp = par ** 4
561
- return -np.sum(adjusted_profile_lik(disp, y, design, offset, weights=weights))
605
+ # Function to optimize.
606
+ # Fast path: one-way designs can evaluate APL sum without generic glm_fit overhead.
607
+ from .utils import design_as_factor
608
+ group = design_as_factor(design)
609
+ is_oneway = len(np.unique(group)) == design.shape[1]
610
+
611
+ if weights is None:
612
+ w = np.ones_like(y)
613
+ else:
614
+ w = np.asarray(weights, dtype=np.float64)
615
+ if w.ndim == 1:
616
+ w = np.tile(w, (y.shape[0], 1))
617
+
618
+ if is_oneway:
619
+ unique_groups = np.unique(group)
620
+ group_cols = [np.where(group == grp)[0] for grp in unique_groups]
621
+ lgamma_y1 = gammaln(y + 1)
622
+
623
+ def fun(par):
624
+ disp = par ** 4
625
+ return -_apl_sum_oneway_scalar(disp, y, design, offset, w, group_cols, lgamma_y1)
626
+ else:
627
+ def fun(par):
628
+ disp = par ** 4
629
+ return -np.sum(adjusted_profile_lik(disp, y, design, offset, weights=weights))
562
630
 
563
631
  # Optimize
564
632
  lo = interval[0] ** 0.25
@@ -930,12 +998,24 @@ def disp_bin_trend(y, design=None, offset=None, df=5, span=0.3,
930
998
  bin_d[i - 1] = 0.1
931
999
  bin_a[i - 1] = np.mean(bin_ave)
932
1000
 
933
- # If few bins, use linear interpolation
1001
+ # If few bins, use linear interpolation with R's approxfun(rule=2, ties=mean)
1002
+ # behavior: average duplicate x values and clamp to boundary values outside range.
934
1003
  if nbins < 7:
935
- from scipy.interpolate import interp1d
936
- f = interp1d(bin_a, np.sqrt(np.maximum(bin_d, 0)),
937
- fill_value='extrapolate', kind='linear')
938
- dispersion = f(ave_log_cpm_vals) ** 2
1004
+ x = np.asarray(bin_a, dtype=np.float64)
1005
+ yv = np.sqrt(np.maximum(np.asarray(bin_d, dtype=np.float64), 0))
1006
+ order = np.argsort(x)
1007
+ x = x[order]
1008
+ yv = yv[order]
1009
+ # ties=mean
1010
+ xu, inv = np.unique(x, return_inverse=True)
1011
+ yu = np.zeros_like(xu, dtype=np.float64)
1012
+ cnt = np.zeros_like(xu, dtype=np.float64)
1013
+ for i, idx in enumerate(inv):
1014
+ yu[idx] += yv[i]
1015
+ cnt[idx] += 1.0
1016
+ yu = yu / np.maximum(cnt, 1.0)
1017
+ y_interp = np.interp(ave_log_cpm_vals, xu, yu, left=yu[0], right=yu[-1])
1018
+ dispersion = np.maximum(y_interp ** 2, 0)
939
1019
  return {'AveLogCPM': ave_log_cpm_vals, 'dispersion': dispersion,
940
1020
  'bin.AveLogCPM': bin_a, 'bin.dispersion': bin_d}
941
1021