edgepython 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,323 @@
1
+ # This code was written by Claude (Anthropic). The project was directed by Lior Pachter.
2
+ """Port of limma's weightedLowess C code.
3
+
4
+ Weighted local regression with delta-based binning to approximately npts
5
+ seed points. Matches the behavior of limma's weighted_lowess() C function
6
+ in src/weighted_lowess.c.
7
+
8
+ The algorithm:
9
+ 1. Sort data by x
10
+ 2. Select ~npts seed points spaced at least delta apart
11
+ 3. For each seed, find the span window where cumulative weight >= span * total_weight
12
+ 4. Fit local weighted linear regression (tricube kernel) at each seed
13
+ 5. Linearly interpolate between seeds
14
+ 6. Optionally iterate with bisquare robustness weights
15
+ """
16
+
17
+ import numpy as np
18
+ from numba import njit
19
+
20
+ _THRESHOLD = 1e-7
21
+
22
+
23
+ def weighted_lowess(x, y, weights=None, span=0.3, iterations=4, npts=200, delta=None):
24
+ """Weighted lowess smoothing matching limma's C implementation.
25
+
26
+ Parameters
27
+ ----------
28
+ x : array-like
29
+ Covariate values.
30
+ y : array-like
31
+ Response values.
32
+ weights : array-like, optional
33
+ Prior weights (default: all ones).
34
+ span : float
35
+ Proportion of total weight to use in each local regression window.
36
+ iterations : int
37
+ Total number of fitting passes (1 = no robustness iterations).
38
+ npts : int
39
+ Approximate number of seed points for binning.
40
+ delta : float, optional
41
+ Minimum distance between seed points. Computed from npts if None.
42
+
43
+ Returns
44
+ -------
45
+ dict with keys 'fitted', 'residuals', 'weights' (robustness weights), 'delta'.
46
+ """
47
+ x = np.asarray(x, dtype=np.float64)
48
+ y = np.asarray(y, dtype=np.float64)
49
+ n = len(x)
50
+
51
+ if weights is None:
52
+ weights = np.ones(n, dtype=np.float64)
53
+ else:
54
+ weights = np.asarray(weights, dtype=np.float64).copy()
55
+
56
+ if n < 2:
57
+ raise ValueError("Need at least two points")
58
+
59
+ # Sort by x (mergesort for stable ordering matching R's order())
60
+ o = np.argsort(x, kind='mergesort')
61
+ xs = x[o].copy()
62
+ ys = y[o].copy()
63
+ ws = weights[o].copy()
64
+
65
+ # Compute delta if not provided (matching R wrapper logic)
66
+ if delta is None:
67
+ npts = int(npts + 0.5)
68
+ if npts >= n:
69
+ delta = 0.0
70
+ else:
71
+ dx = np.sort(np.diff(xs))
72
+ cumrange = np.cumsum(dx)
73
+ numclusters = np.arange(npts)
74
+ # R 1-based to Python 0-based index conversion
75
+ indices = len(dx) - 1 - numclusters
76
+ delta = float(np.min(cumrange[indices] / (npts - numclusters)))
77
+
78
+ delta = float(delta)
79
+
80
+ # Compute total weight and span weight
81
+ total_weight = np.sum(ws)
82
+ span_weight = total_weight * span
83
+ subrange = (xs[-1] - xs[0]) / n
84
+
85
+ # Find seed points (binned to ~npts)
86
+ seed_idx, nseeds = _find_seeds(xs, n, delta)
87
+
88
+ # Find span limits for each seed
89
+ frame_start, frame_end, max_dist = _find_limits(
90
+ seed_idx, nseeds, xs, ws, n, span_weight)
91
+
92
+ # Initialize fitted values and robustness weights
93
+ fitted = np.zeros(n, dtype=np.float64)
94
+ rob_w = np.ones(n, dtype=np.float64)
95
+
96
+ # Run iterations in compiled code
97
+ _lowess_iterations(xs, ys, ws, fitted, rob_w, seed_idx, nseeds,
98
+ frame_start, frame_end, max_dist, total_weight,
99
+ subrange, iterations)
100
+
101
+ # Map back to original (unsorted) order
102
+ fitted_orig = np.empty(n, dtype=np.float64)
103
+ fitted_orig[o] = fitted
104
+ rob_orig = np.empty(n, dtype=np.float64)
105
+ rob_orig[o] = rob_w
106
+
107
+ return {
108
+ 'fitted': fitted_orig,
109
+ 'residuals': y - fitted_orig,
110
+ 'weights': rob_orig,
111
+ 'delta': delta
112
+ }
113
+
114
+
115
+ def _find_seeds(xs, n, delta):
116
+ """Find seed point indices for delta-based binning.
117
+
118
+ Always includes first and last points. Interior points are included
119
+ if they are more than delta away from the last included point.
120
+ """
121
+ if delta <= 0 or n <= 2:
122
+ return np.arange(n, dtype=np.intp), n
123
+
124
+ seeds = [0]
125
+ last_pt = 0
126
+ for pt in range(1, n - 1):
127
+ if xs[pt] - xs[last_pt] > delta:
128
+ seeds.append(pt)
129
+ last_pt = pt
130
+ seeds.append(n - 1)
131
+
132
+ return np.array(seeds, dtype=np.intp), len(seeds)
133
+
134
+
135
+ @njit(cache=True)
136
+ def _find_limits(seed_idx, nseeds, xs, ws, n, span_weight):
137
+ """Find span window [start, end] and max distance for each seed.
138
+
139
+ For each seed point, extends the window left and right (choosing
140
+ the closer direction each step) until the cumulative weight in the
141
+ window reaches span_weight. Then extends to include ties.
142
+ """
143
+ frame_start = np.empty(nseeds, dtype=np.intp)
144
+ frame_end = np.empty(nseeds, dtype=np.intp)
145
+ max_dist = np.empty(nseeds, dtype=np.float64)
146
+
147
+ for s in range(nseeds):
148
+ curpt = seed_idx[s]
149
+ left = curpt
150
+ right = curpt
151
+ cur_w = ws[curpt]
152
+ at_start = (left == 0)
153
+ at_end = (right == n - 1)
154
+ mdist = 0.0
155
+
156
+ while cur_w < span_weight and (not at_end or not at_start):
157
+ if at_end:
158
+ # Can only extend left
159
+ left -= 1
160
+ cur_w += ws[left]
161
+ if left == 0:
162
+ at_start = True
163
+ ldist = xs[curpt] - xs[left]
164
+ if mdist < ldist:
165
+ mdist = ldist
166
+ elif at_start:
167
+ # Can only extend right
168
+ right += 1
169
+ cur_w += ws[right]
170
+ if right == n - 1:
171
+ at_end = True
172
+ rdist = xs[right] - xs[curpt]
173
+ if mdist < rdist:
174
+ mdist = rdist
175
+ else:
176
+ # Extend in direction of closer point
177
+ ldist = xs[curpt] - xs[left - 1]
178
+ rdist = xs[right + 1] - xs[curpt]
179
+ if ldist < rdist:
180
+ left -= 1
181
+ cur_w += ws[left]
182
+ if left == 0:
183
+ at_start = True
184
+ if mdist < ldist:
185
+ mdist = ldist
186
+ else:
187
+ right += 1
188
+ cur_w += ws[right]
189
+ if right == n - 1:
190
+ at_end = True
191
+ if mdist < rdist:
192
+ mdist = rdist
193
+
194
+ # Extend to ties
195
+ while left > 0 and xs[left] == xs[left - 1]:
196
+ left -= 1
197
+ while right < n - 1 and xs[right] == xs[right + 1]:
198
+ right += 1
199
+
200
+ frame_start[s] = left
201
+ frame_end[s] = right
202
+ max_dist[s] = mdist
203
+
204
+ return frame_start, frame_end, max_dist
205
+
206
+
207
+ @njit(cache=True)
208
+ def _lowess_fit(xs, ys, ws, rw, curpt, left, right, dist):
209
+ """Local weighted linear regression at a single point."""
210
+ threshold = 1e-7
211
+ allweight = 0.0
212
+ xmean = 0.0
213
+ ymean = 0.0
214
+
215
+ if dist < threshold:
216
+ for i in range(left, right + 1):
217
+ w = ws[i] * rw[i]
218
+ allweight += w
219
+ if allweight == 0.0:
220
+ return 0.0
221
+ val = 0.0
222
+ for i in range(left, right + 1):
223
+ val += ys[i] * ws[i] * rw[i]
224
+ return val / allweight
225
+
226
+ for i in range(left, right + 1):
227
+ u = abs(xs[curpt] - xs[i]) / dist
228
+ tricube = (1.0 - u * u * u)
229
+ tricube = tricube * tricube * tricube
230
+ w = tricube * ws[i] * rw[i]
231
+ allweight += w
232
+ xmean += w * xs[i]
233
+ ymean += w * ys[i]
234
+
235
+ if allweight == 0.0:
236
+ return 0.0
237
+
238
+ xmean /= allweight
239
+ ymean /= allweight
240
+
241
+ var = 0.0
242
+ covar = 0.0
243
+ for i in range(left, right + 1):
244
+ u = abs(xs[curpt] - xs[i]) / dist
245
+ tricube = (1.0 - u * u * u)
246
+ tricube = tricube * tricube * tricube
247
+ w = tricube * ws[i] * rw[i]
248
+ temp = xs[i] - xmean
249
+ var += temp * temp * w
250
+ covar += temp * (ys[i] - ymean) * w
251
+
252
+ if var < threshold:
253
+ return ymean
254
+
255
+ slope = covar / var
256
+ return slope * xs[curpt] + ymean - slope * xmean
257
+
258
+
259
+ @njit(cache=True)
260
+ def _lowess_iterations(xs, ys, ws, fitted, rob_w, seed_idx, nseeds,
261
+ frame_start, frame_end, max_dist, total_weight,
262
+ subrange, iterations):
263
+ """Run the full lowess iteration loop in compiled code."""
264
+ n = len(xs)
265
+ threshold = 1e-7
266
+
267
+ for _it in range(iterations):
268
+ fitted[0] = _lowess_fit(xs, ys, ws, rob_w,
269
+ 0, frame_start[0], frame_end[0], max_dist[0])
270
+ last_pt = 0
271
+ for s in range(1, nseeds):
272
+ pt = seed_idx[s]
273
+ fitted[pt] = _lowess_fit(xs, ys, ws, rob_w,
274
+ pt, frame_start[s], frame_end[s],
275
+ max_dist[s])
276
+ if pt - last_pt > 1:
277
+ dx_interp = xs[pt] - xs[last_pt]
278
+ if dx_interp > threshold * subrange:
279
+ slope = (fitted[pt] - fitted[last_pt]) / dx_interp
280
+ intercept = fitted[pt] - slope * xs[pt]
281
+ for j in range(last_pt + 1, pt):
282
+ fitted[j] = slope * xs[j] + intercept
283
+ else:
284
+ avg = 0.5 * (fitted[pt] + fitted[last_pt])
285
+ for j in range(last_pt + 1, pt):
286
+ fitted[j] = avg
287
+ last_pt = pt
288
+
289
+ # Compute absolute residuals
290
+ abs_resid = np.empty(n)
291
+ resid_sum = 0.0
292
+ for i in range(n):
293
+ abs_resid[i] = abs(ys[i] - fitted[i])
294
+ resid_sum += abs_resid[i]
295
+ resid_scale = resid_sum / n
296
+
297
+ # Sort residuals
298
+ ror = np.argsort(abs_resid)
299
+ sorted_resid = abs_resid[ror]
300
+
301
+ cumw = 0.0
302
+ half_weight = total_weight / 2.0
303
+ cmad = 0.0
304
+ for i in range(n):
305
+ cumw += ws[ror[i]]
306
+ if cumw == half_weight and i < n - 1:
307
+ cmad = 3.0 * (sorted_resid[i] + sorted_resid[i + 1])
308
+ break
309
+ elif cumw > half_weight:
310
+ cmad = 6.0 * sorted_resid[i]
311
+ break
312
+
313
+ if cmad <= threshold * resid_scale:
314
+ break
315
+
316
+ for i in range(n):
317
+ rob_w[i] = 0.0
318
+ for i in range(n):
319
+ if sorted_resid[i] < cmad:
320
+ u = sorted_resid[i] / cmad
321
+ rob_w[ror[i]] = (1.0 - u * u) * (1.0 - u * u)
322
+ else:
323
+ break
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: edgepython
3
+ Version: 0.2.0
4
+ Summary: Python port of the edgeR Bioconductor package for differential expression analysis of digital gene expression data.
5
+ Author: Lior Pachter
6
+ License-Expression: GPL-3.0-or-later
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: numpy>=1.21
11
+ Requires-Dist: scipy>=1.7
12
+ Requires-Dist: pandas>=1.3
13
+ Requires-Dist: matplotlib>=3.4
14
+ Requires-Dist: statsmodels>=0.13
15
+ Requires-Dist: numba>=0.57
16
+ Provides-Extra: h5
17
+ Requires-Dist: h5py>=3.0; extra == "h5"
18
+ Provides-Extra: anndata
19
+ Requires-Dist: anndata>=0.7; extra == "anndata"
20
+ Provides-Extra: parquet
21
+ Requires-Dist: pyarrow>=8.0; extra == "parquet"
22
+ Provides-Extra: formula
23
+ Requires-Dist: patsy>=0.5; extra == "formula"
24
+ Provides-Extra: all
25
+ Requires-Dist: h5py>=3.0; extra == "all"
26
+ Requires-Dist: anndata>=0.7; extra == "all"
27
+ Requires-Dist: pyarrow>=8.0; extra == "all"
28
+ Requires-Dist: patsy>=0.5; extra == "all"
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=7.0; extra == "dev"
31
+ Requires-Dist: pytest-cov; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ # edgePython
35
+
36
+ `edgePython` is a Python implementation of the Bioconductor `edgeR` package for differential analysis of genomics count data. It also includes a new single-cell differential expression method that extends the NEBULA-LN negative binomial mixed model with edgeR's TMM normalization and empirical Bayes dispersion shrinkage.
37
+
38
+ ## Installation
39
+
40
+ From source:
41
+
42
+ ```bash
43
+ pip install .
44
+ ```
45
+
46
+ With optional extras:
47
+
48
+ ```bash
49
+ pip install .[all]
50
+ ```
51
+
52
+ ## Quick Start
53
+
54
+ ```python
55
+ import numpy as np
56
+ import edgepython as ep
57
+
58
+ # genes x samples count matrix
59
+ counts = np.random.poisson(lam=10, size=(1000, 6))
60
+ group = np.array(["A", "A", "A", "B", "B", "B"])
61
+
62
+ y = ep.make_dgelist(counts=counts, group=group)
63
+ y = ep.calc_norm_factors(y)
64
+ y = ep.estimate_disp(y)
65
+
66
+ design = np.column_stack([np.ones(6), (group == "B").astype(float)])
67
+ fit = ep.glm_ql_fit(y, design)
68
+ res = ep.glm_ql_ftest(fit, coef=1)
69
+ top = ep.top_tags(res, n=10)
70
+ print(top["table"].head())
71
+ ```
72
+
73
+ ## Features
74
+
75
+ ### Data Structures
76
+
77
+ `DGEList`-style data structures (`make_dgelist`, `cbind_dgelist`, `rbind_dgelist`, `valid_dgelist`) with accessor functions (`get_counts`, `get_dispersion`, `get_norm_lib_sizes`, `get_offset`).
78
+
79
+ ### Normalization
80
+
81
+ TMM, TMMwsp, RLE, and upper-quartile normalization via `calc_norm_factors`. Normalized expression values via `cpm`, `rpkm`, `tpm`, `ave_log_cpm`, `cpm_by_group`, and `rpkm_by_group`.
82
+
83
+ ### Filtering
84
+
85
+ Gene filtering by expression level via `filter_by_expr`.
86
+
87
+ ### Dispersion Estimation
88
+
89
+ Common, trended, and tagwise dispersion estimation (`estimate_disp`, `estimate_common_disp`, `estimate_trended_disp`, `estimate_tagwise_disp`) with GLM variants (`estimate_glm_common_disp`, `estimate_glm_trended_disp`, `estimate_glm_tagwise_disp`). Weighted likelihood empirical Bayes shrinkage via `WLEB`.
90
+
91
+ ### Differential Expression Testing
92
+
93
+ - **Exact test**: `exact_test` for two-group comparisons with exact negative binomial tests, plus helpers (`exact_test_double_tail`, `equalize_lib_sizes`, `q2q_nbinom`, `split_into_groups`).
94
+ - **GLM fitting**: `glm_fit`, `glm_ql_fit` for generalized linear model fitting.
95
+ - **GLM testing**: likelihood ratio tests (`glm_lrt`), quasi-likelihood F-tests (`glm_ql_ftest`), and fold-change threshold testing (`glm_treat`).
96
+ - **Results**: `top_tags` for extracting top DE genes with p-value adjustment, `decide_tests` for classifying genes as up/down/unchanged.
97
+
98
+ ### Gene Set Testing
99
+
100
+ Competitive and self-contained gene set tests: `camera`, `fry`, `roast`, `mroast`, `romer`. Gene ontology and KEGG pathway enrichment via `goana` and `kegga`.
101
+
102
+ ### Differential Splicing
103
+
104
+ Differential exon and transcript usage testing via `diff_splice` (GLM-based with LRT or QL tests), `diff_splice_dge` (exact test for two-group comparisons), and `splice_variants` (chi-squared tests for homogeneity of proportions across exons).
105
+
106
+ ### Quantification Uncertainty
107
+
108
+ Reading quantification output with bootstrap or Gibbs sampling uncertainty from Salmon (`catch_salmon`), kallisto (`catch_kallisto`), and RSEM (`catch_rsem`). Overdispersion estimates from quantification uncertainty are used for differential transcript expression following the approach of Baldoni et al. (2024).
109
+
110
+ ### I/O
111
+
112
+ - **Universal reader**: `read_data` with auto-detection for kallisto (H5/TSV), Salmon, oarfish, RSEM, 10X CellRanger, CSV/TSV count tables, AnnData (`.h5ad`), and RDS files.
113
+ - **Specialized readers**: `read_dge` (collates per-sample count files), `read_10x` (10X Genomics output), `feature_counts_to_dgelist` (featureCounts output), `read_bismark2dge` (Bismark methylation coverage).
114
+ - **Single-cell aggregation**: `seurat_to_pb` for pseudo-bulk aggregation.
115
+ - **Export**: `to_anndata` for converting DGEList and results to AnnData format.
116
+
117
+ ### Visualization
118
+
119
+ `plot_md` (mean-difference plots), `plot_bcv` (biological coefficient of variation), `plot_mds` (multidimensional scaling), `plot_ql_disp` (quasi-likelihood dispersion), `plot_smear` (smear plots), `ma_plot` (MA plots), and `gof` (goodness of fit).
120
+
121
+ ### Single-Cell Mixed Model
122
+
123
+ NEBULA-LN-style negative binomial gamma mixed model for multi-subject single-cell data: `glm_sc_fit`, `shrink_sc_disp`, `glm_sc_test`.
124
+
125
+ ### ChIP-Seq
126
+
127
+ ChIP-seq normalization to matched input controls via `normalize_chip_to_input` and `calc_norm_offsets_for_chip`.
128
+
129
+ ### Methylation/RRBS
130
+
131
+ Bismark coverage file reader (`read_bismark2dge`) and methylation-specific design matrix construction (`model_matrix_meth`).
132
+
133
+ ### Utilities
134
+
135
+ Design matrix construction (`model_matrix`), prior count addition (`add_prior_count`), predicted fold changes (`pred_fc`), Good-Turing smoothing (`good_turing`), count thinning/downsampling (`thin_counts`), Gini coefficient (`gini`), sum technical replicates (`sum_tech_reps`), negative binomial z-scores (`zscore_nbinom`), nearest TSS annotation (`nearest_tss`), and variance shrinkage (`squeeze_var`).
136
+
137
+ ## Examples
138
+
139
+ The [examples/mammary](examples/mammary) directory contains two notebooks for the GSE60450 mouse mammary dataset ([Fu et al. 2015](https://www.nature.com/articles/ncb3117)):
140
+
141
+ - [mouse_mammary_tutorial.ipynb](examples/mammary/mouse_mammary_tutorial.ipynb) — edgePython-only tutorial (Colab-ready)
142
+ - [mouse_mammary_R_vs_Python.ipynb](examples/mammary/mouse_mammary_R_vs_Python.ipynb) — side-by-side edgeR vs edgePython comparison
143
+
144
+ The [examples/hoxa1](examples/hoxa1) directory contains two notebooks for the GSE37704 HOXA1 knockdown dataset ([Trapnell et al. 2013](https://doi.org/10.1038/nbt.2594)), with transcript-level quantification by kallisto:
145
+
146
+ - [hoxa1_tutorial.ipynb](examples/hoxa1/hoxa1_tutorial.ipynb) — edgePython-only tutorial with scaled analysis using bootstrap overdispersion (Colab-ready)
147
+ - [hoxa1_R_vs_Python.ipynb](examples/hoxa1/hoxa1_R_vs_Python.ipynb) — side-by-side edgeR vs edgePython comparison reproducing Figure 1 panels
148
+
149
+ The [examples/clytia](examples/clytia) directory contains a notebook for the *Clytia hemisphaerica* single-cell RNA-seq dataset ([Chari et al. 2021](https://doi.org/10.1016/j.celrep.2021.109751)), demonstrating the NEBULA-LN mixed model with empirical Bayes dispersion shrinkage:
150
+
151
+ - [clytia_tutorial.ipynb](examples/clytia/clytia_tutorial.ipynb) — single-cell differential expression of fed vs starved gastrodigestive cells across 10 organisms, reproducing Figure 2 panels (Colab-ready)
152
+
153
+ ## Development
154
+
155
+ Run tests:
156
+
157
+ ```bash
158
+ pytest -q
159
+ ```
160
+
161
+ ## Authorship
162
+
163
+ This code was written by Claude (Anthropic). The project was directed by Lior Pachter.
164
+
165
+ ## edgeR
166
+
167
+ `edgePython` is based on the [edgeR](https://bioconductor.org/packages/edgeR/) Bioconductor package. The edgeR publications are:
168
+
169
+ - Robinson MD, Smyth GK (2007). Moderated statistical tests for assessing differences in tag abundance. *Bioinformatics*, 23(21), 2881-2887. [doi:10.1093/bioinformatics/btm453](https://doi.org/10.1093/bioinformatics/btm453)
170
+
171
+ - Robinson MD, Smyth GK (2007). Small-sample estimation of negative binomial dispersion, with applications to SAGE data. *Biostatistics*, 9(2), 321-332. [doi:10.1093/biostatistics/kxm030](https://doi.org/10.1093/biostatistics/kxm030)
172
+
173
+ - Robinson MD, McCarthy DJ, Smyth GK (2010). edgeR: a Bioconductor package for differential expression analysis of digital gene expression data. *Bioinformatics*, 26(1), 139-140. [doi:10.1093/bioinformatics/btp616](https://doi.org/10.1093/bioinformatics/btp616)
174
+
175
+ - Robinson MD, Oshlack A (2010). A scaling normalization method for differential expression analysis of RNA-seq data. *Genome Biology*, 11(3), R25. [doi:10.1186/gb-2010-11-3-r25](https://doi.org/10.1186/gb-2010-11-3-r25)
176
+
177
+ - McCarthy DJ, Chen Y, Smyth GK (2012). Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation. *Nucleic Acids Research*, 40(10), 4288-4297. [doi:10.1093/nar/gks042](https://doi.org/10.1093/nar/gks042)
178
+
179
+ - Chen Y, Lun ATL, Smyth GK (2014). Differential expression analysis of complex RNA-seq experiments using edgeR. In *Statistical Analysis of Next Generation Sequencing Data*, Springer, 51-74. [doi:10.1007/978-3-319-07212-8_3](https://doi.org/10.1007/978-3-319-07212-8_3)
180
+
181
+ - Zhou X, Lindsay H, Robinson MD (2014). Robustly detecting differential expression in RNA sequencing data using observation weights. *Nucleic Acids Research*, 42(11), e91. [doi:10.1093/nar/gku310](https://doi.org/10.1093/nar/gku310)
182
+
183
+ - Dai Z, Sheridan JM, Gearing LJ, Moore DL, Su S, Wormald S, Wilcox S, O'Connor L, Dickins RA, Blewitt ME, Ritchie ME (2014). edgeR: a versatile tool for the analysis of shRNA-seq and CRISPR-Cas9 genetic screens. *F1000Research*, 3, 95. [doi:10.12688/f1000research.3928.2](https://doi.org/10.12688/f1000research.3928.2)
184
+
185
+ - Lun ATL, Chen Y, Smyth GK (2016). It's DE-licious: A recipe for differential expression analyses of RNA-seq experiments using quasi-likelihood methods in edgeR. In *Statistical Genomics*, Springer, 391-416. [doi:10.1007/978-1-4939-3578-9_19](https://doi.org/10.1007/978-1-4939-3578-9_19)
186
+
187
+ - Chen Y, Lun ATL, Smyth GK (2016). From reads to genes to pathways: differential expression analysis of RNA-Seq experiments using Rsubread and the edgeR quasi-likelihood pipeline. *F1000Research*, 5, 1438. [doi:10.12688/f1000research.8987.2](https://doi.org/10.12688/f1000research.8987.2)
188
+
189
+ - Chen Y, Pal B, Visvader JE, Smyth GK (2018). Differential methylation analysis of reduced representation bisulfite sequencing experiments using edgeR. *F1000Research*, 6, 2055. [doi:10.12688/f1000research.13196.2](https://doi.org/10.12688/f1000research.13196.2)
190
+
191
+ - Baldoni PL, Chen Y, Hediyeh-zadeh S, Liao Y, Dong X, Ritchie ME, Shi W, Smyth GK (2024). Dividing out quantification uncertainty allows efficient assessment of differential transcript expression with edgeR. *Nucleic Acids Research*, 52(3), e13. [doi:10.1093/nar/gkad1167](https://doi.org/10.1093/nar/gkad1167)
192
+
193
+ - Chen Y, Chen L, Lun ATL, Baldoni PL, Smyth GK (2025). edgeR v4: powerful differential analysis of sequencing data with expanded functionality and improved support for small counts and larger datasets. *Nucleic Acids Research*, 53(2), gkaf018. [doi:10.1093/nar/gkaf018](https://doi.org/10.1093/nar/gkaf018)
194
+
195
+ The single-cell mixed model in `edgePython` is based on NEBULA:
196
+
197
+ - He L, Davila-Velderrain J, Sumida TS, Hafler DA, Kellis M, Kulminski AM (2021). NEBULA is a fast negative binomial mixed model for differential or co-expression analysis of large-scale multi-subject single-cell data. *Communications Biology*, 4, 629. [doi:10.1038/s42003-021-02146-6](https://doi.org/10.1038/s42003-021-02146-6)
198
+
199
+ ## License
200
+
201
+ This project is licensed under the GNU General Public License v3.0.
@@ -0,0 +1,29 @@
1
+ edgepython/__init__.py,sha256=SA-OSJITfGIkrgRKLewTCB3eMTN44t3HkqQjIuYt5h8,2489
2
+ edgepython/classes.py,sha256=tUgpQJFeFdR-48aEYOst0aAMo24eL-LLlXDUbWTgmjQ,16399
3
+ edgepython/compressed_matrix.py,sha256=XugtBswZX_Y8KxRBKe9eDM6FevxVStItw9RNYHYBQ5Q,14768
4
+ edgepython/dgelist.py,sha256=YAi8Sdf1eXdh7e5VUOWXvye9gAr1dF8_kQz0JbGacQU,10682
5
+ edgepython/dispersion.py,sha256=ybA-AQlParEb2GvGJv79me1RXnlc1csgl2uDad7EOBw,31664
6
+ edgepython/dispersion_lowlevel.py,sha256=I-21X-3vdOFo2gWXKLcfNJdonUWzMDxNLXNjpvgUd7g,35368
7
+ edgepython/exact_test.py,sha256=GtWbIpgWOHGuEh6dyPrYL1ASjh56cLCqU6vewcUkEwQ,16461
8
+ edgepython/expression.py,sha256=7QXdle37JSoqCi5TNDLQQo7fT2Av3kSzNjaR_eav8SM,11123
9
+ edgepython/filtering.py,sha256=f5f1imjrNzX3a2m-oI605V3J_D1JIEOlHYr2_DRwXRc,2843
10
+ edgepython/gene_sets.py,sha256=EIi64MDQU7uUuoextCMsuVR8YGWhEGrNDH91JKvKtC0,42208
11
+ edgepython/glm_fit.py,sha256=V5mLHUDSGYyKcQTZRnJMwWzYEfi4gpURFym7Y-kMleg,23250
12
+ edgepython/glm_levenberg.py,sha256=jQkmRtnnYZKj6ZiW_iVxFmgBpvbkdYKzgtP9b8aqAdA,12093
13
+ edgepython/glm_test.py,sha256=QVJHjZrcGqIdULdWVwcO4Qj3fycyX7RLHcoJciYh6mo,12134
14
+ edgepython/io.py,sha256=FjePd6ziup23MCpiCU2ApSsVW0hfpqAHGk3rTKxdLgs,64373
15
+ edgepython/limma_port.py,sha256=sMvphlGWCp-pOrGDDWlPjiS0O3w6BjfaSkHI-4JJ0wM,32453
16
+ edgepython/normalization.py,sha256=5ZIBV1t-XfD7qy-3HNYJ2-wdHYEu59YI2nRkLc_2a6w,18852
17
+ edgepython/ql_weights.py,sha256=XpUEwAS_UYvbFNF-g935jQhzcdfe-UXNkAAuDrx-m-A,68132
18
+ edgepython/results.py,sha256=2M_nNq8efUNp512OGeLZE60XB6_atygSJVwFpmS7Lfc,7197
19
+ edgepython/sc_fit.py,sha256=dN_UcHyFF4wCSwx5pHhorst4qxz96TDDxBD-96aWQd8,48333
20
+ edgepython/smoothing.py,sha256=5rEP0dUKrDr8YaEzekRtJ7tX5Ip3rgBc2lxMywrA9ZU,13751
21
+ edgepython/splicing.py,sha256=Do_JOGxOPawLxj0qQObPgytYBTi9N39tcHlPgYDICDc,19047
22
+ edgepython/utils.py,sha256=mP9wcffw_7cRjPypkKvQFw7kiNj4OCABX5Mrigh4CkE,34647
23
+ edgepython/visualization.py,sha256=VeDeWtSCpPWQ5Pn6oDCWg2gDZMwf4F1yWcEMihMPcR8,12227
24
+ edgepython/weighted_lowess.py,sha256=ldUHZJcKFFY9NdAFeq0pWkJS6e-HgCARAJ5NN7kklc0,10251
25
+ edgepython-0.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
26
+ edgepython-0.2.0.dist-info/METADATA,sha256=pQuv9fwnUvhdwd88yhsHsMymLJLkqQ8XeKxzRlChafo,11674
27
+ edgepython-0.2.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
28
+ edgepython-0.2.0.dist-info/top_level.txt,sha256=XDJV3dwIsV8JKQQDZX0qoQT7Z0iXfwZnjA0PLr_srBU,11
29
+ edgepython-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+