microarray 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- microarray/__init__.py +15 -0
- microarray/_version.py +3 -0
- microarray/datasets/__init__.py +3 -0
- microarray/datasets/_arrayexpress.py +1 -0
- microarray/datasets/_cdf_files.py +35 -0
- microarray/datasets/_geo.py +1 -0
- microarray/datasets/_utils.py +143 -0
- microarray/io/__init__.py +17 -0
- microarray/io/_anndata_converter.py +198 -0
- microarray/io/_cdf.py +575 -0
- microarray/io/_cel.py +591 -0
- microarray/io/_read.py +127 -0
- microarray/plotting/__init__.py +28 -0
- microarray/plotting/_base.py +253 -0
- microarray/plotting/_cel.py +75 -0
- microarray/plotting/_de_plots.py +239 -0
- microarray/plotting/_diagnostic_plots.py +268 -0
- microarray/plotting/_heatmap.py +279 -0
- microarray/plotting/_ma_plots.py +136 -0
- microarray/plotting/_pca.py +320 -0
- microarray/plotting/_qc_plots.py +335 -0
- microarray/plotting/_score.py +38 -0
- microarray/plotting/_top_table_heatmap.py +98 -0
- microarray/plotting/_utils.py +280 -0
- microarray/preprocessing/__init__.py +39 -0
- microarray/preprocessing/_background.py +862 -0
- microarray/preprocessing/_log2.py +77 -0
- microarray/preprocessing/_normalize.py +1292 -0
- microarray/preprocessing/_rma.py +243 -0
- microarray/preprocessing/_robust.py +170 -0
- microarray/preprocessing/_summarize.py +318 -0
- microarray/py.typed +0 -0
- microarray/tools/__init__.py +26 -0
- microarray/tools/_biomart.py +416 -0
- microarray/tools/_empirical_bayes.py +401 -0
- microarray/tools/_fdist.py +171 -0
- microarray/tools/_linear_models.py +387 -0
- microarray/tools/_mds.py +101 -0
- microarray/tools/_pca.py +88 -0
- microarray/tools/_score.py +86 -0
- microarray/tools/_toptable.py +360 -0
- microarray-0.1.0.dist-info/METADATA +75 -0
- microarray-0.1.0.dist-info/RECORD +44 -0
- microarray-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""Extract and format top differentially expressed genes.
|
|
2
|
+
|
|
3
|
+
This module provides functions for extracting results from empirical Bayes
|
|
4
|
+
moderated differential expression analysis, with multiple testing correction
|
|
5
|
+
and flexible sorting/filtering options.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import warnings
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from anndata import AnnData
|
|
14
|
+
from scipy import stats
|
|
15
|
+
from statsmodels.stats.multitest import multipletests
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def top_table(
|
|
19
|
+
data: AnnData | dict,
|
|
20
|
+
group: str | None = None,
|
|
21
|
+
coef: int | None = None,
|
|
22
|
+
reference: int | None = None,
|
|
23
|
+
number: int = 10,
|
|
24
|
+
adjust_method: str = "fdr_bh",
|
|
25
|
+
sort_by: Literal["logfc", "avg_expr", "t", "p_value", "b", "none"] = "b",
|
|
26
|
+
p_value: float = 1.0,
|
|
27
|
+
lfc: float = 0.0,
|
|
28
|
+
) -> pd.DataFrame:
|
|
29
|
+
"""Extract top differentially expressed genes from a moderated fit.
|
|
30
|
+
|
|
31
|
+
Creates a summary table of the top genes ranked by evidence of differential
|
|
32
|
+
expression, with multiple testing correction and flexible filtering/sorting.
|
|
33
|
+
This is the final step in limma-style differential expression analysis.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
fit : dict
|
|
38
|
+
Linear model fit object from ebayes(), containing:
|
|
39
|
+
- coefficients: coefficient estimates
|
|
40
|
+
- t: moderated t-statistics
|
|
41
|
+
- p_value: raw p-values
|
|
42
|
+
- lods: B-statistics (log-odds)
|
|
43
|
+
- genes: gene identifiers
|
|
44
|
+
- adata: AnnData object for annotation retrieval
|
|
45
|
+
coef : int, optional
|
|
46
|
+
Which coefficient/contrast to extract. If None, uses the first coefficient
|
|
47
|
+
(index 0). Coefficients correspond to columns in the design matrix.
|
|
48
|
+
reference : int, optional
|
|
49
|
+
Reference coefficient index for a pairwise contrast. When provided,
|
|
50
|
+
statistics are computed for (coef - reference). This is useful for
|
|
51
|
+
no-intercept group-mean designs where columns represent groups.
|
|
52
|
+
number : int, default=10
|
|
53
|
+
Maximum number of genes to return.
|
|
54
|
+
adjust_method : str, default="fdr_bh"
|
|
55
|
+
Multiple testing correction method. Options:
|
|
56
|
+
- "bonferroni": Bonferroni correction
|
|
57
|
+
- "sidak": Sidak correction
|
|
58
|
+
- "holm-sidak": Holm-Sidak correction
|
|
59
|
+
- "holm": Holm correction
|
|
60
|
+
- "simes-hochberg": Simes-Hochberg correction
|
|
61
|
+
- "hommel": Hommel correction
|
|
62
|
+
- "fdr_bh": Benjamini-Hochberg FDR (default)
|
|
63
|
+
- "fdr_by": Benjamini-Yekutieli FDR
|
|
64
|
+
- "fdr_tsbh": Two-stage Benjamini-Hochberg
|
|
65
|
+
- "fdr_tsbky": Two-stage Benjamini-Krieger-Yekutieli
|
|
66
|
+
sort_by : {"logfc", "avg_expr", "t", "p_value", "b", "none"}, default="b"
|
|
67
|
+
Column to sort results by:
|
|
68
|
+
- "logfc": Log fold-change (descending absolute value)
|
|
69
|
+
- "avg_expr": Average expression level (descending)
|
|
70
|
+
- "t": Moderated t-statistic (descending absolute value)
|
|
71
|
+
- "p_value": P-value (ascending)
|
|
72
|
+
- "b": B-statistic/log-odds (descending)
|
|
73
|
+
- "none": No sorting (gene order as in original data)
|
|
74
|
+
p_value : float, default=1.0
|
|
75
|
+
Filter genes by adjusted p-value threshold. Only genes with
|
|
76
|
+
p_adj ≤ p_value are returned.
|
|
77
|
+
lfc : float, default=0.0
|
|
78
|
+
Filter genes by log-fold-change threshold. Only genes with
|
|
79
|
+
|logfc| ≥ lfc are returned.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
-------
|
|
83
|
+
pd.DataFrame
|
|
84
|
+
Results table with the following columns:
|
|
85
|
+
- Gene identifiers (index)
|
|
86
|
+
- logfc: Log2 fold-change for the selected coefficient
|
|
87
|
+
- avg_expr: Average expression across all samples
|
|
88
|
+
- t: Moderated t-statistic
|
|
89
|
+
- p_value: Raw p-value
|
|
90
|
+
- p_adj: Adjusted p-value (multiple testing corrected)
|
|
91
|
+
- b: B-statistic (log-odds of differential expression)
|
|
92
|
+
|
|
93
|
+
Additional columns from adata.var are also included if available.
|
|
94
|
+
|
|
95
|
+
Notes:
|
|
96
|
+
-----
|
|
97
|
+
**Multiple Testing Correction:**
|
|
98
|
+
|
|
99
|
+
When testing thousands of genes simultaneously, multiple testing correction
|
|
100
|
+
is essential to control false discovery rate (FDR) or family-wise error rate
|
|
101
|
+
(FWER). The default "fdr_bh" (Benjamini-Hochberg) controls FDR, meaning that
|
|
102
|
+
among genes called significant at level α, we expect α proportion to be false
|
|
103
|
+
positives.
|
|
104
|
+
|
|
105
|
+
**Sorting and Filtering:**
|
|
106
|
+
|
|
107
|
+
The function applies filtering (p_value and lfc thresholds) before selecting
|
|
108
|
+
the top 'number' genes. Sorting determines which genes appear in the top N.
|
|
109
|
+
|
|
110
|
+
**B-statistic:**
|
|
111
|
+
|
|
112
|
+
The B-statistic (log-odds) is often the best ranking criterion because it
|
|
113
|
+
accounts for both effect size and statistical significance. Higher B values
|
|
114
|
+
indicate stronger evidence of differential expression.
|
|
115
|
+
|
|
116
|
+
**Typical Usage:**
|
|
117
|
+
|
|
118
|
+
1. Fit linear models: fit = lm_fit(adata, design)
|
|
119
|
+
2. Moderate variances: fit = ebayes(fit)
|
|
120
|
+
3. Extract top genes: results = top_table(fit, coef=1, number=100)
|
|
121
|
+
|
|
122
|
+
Examples:
|
|
123
|
+
--------
|
|
124
|
+
>>> import microarray as ma
|
|
125
|
+
>>> import numpy as np
|
|
126
|
+
>>> # After lm_fit and ebayes
|
|
127
|
+
>>> fit = ma.tl.ebayes(fit)
|
|
128
|
+
>>> # Get top 20 genes for coefficient 1 (e.g., treatment effect)
|
|
129
|
+
>>> results = ma.tl.top_table(fit, coef=1, number=20)
|
|
130
|
+
>>> print(results)
|
|
131
|
+
>>> # Filter by adjusted p-value and fold-change
|
|
132
|
+
>>> sig_genes = ma.tl.top_table(
|
|
133
|
+
... fit,
|
|
134
|
+
... coef=1,
|
|
135
|
+
... number=1000,
|
|
136
|
+
... p_value=0.05, # FDR < 5%
|
|
137
|
+
... lfc=1.0, # |log2FC| > 1 (i.e., >2-fold change)
|
|
138
|
+
... sort_by="logfc",
|
|
139
|
+
... )
|
|
140
|
+
>>> print(f"Found {len(sig_genes)} significant genes")
|
|
141
|
+
>>> # Get all genes, sorted by p-value
|
|
142
|
+
>>> all_results = ma.tl.top_table(
|
|
143
|
+
... fit,
|
|
144
|
+
... coef=1,
|
|
145
|
+
... number=np.inf, # Get all genes
|
|
146
|
+
... sort_by="p_value",
|
|
147
|
+
... )
|
|
148
|
+
>>> # Create volcano plot
|
|
149
|
+
>>> import matplotlib.pyplot as plt
|
|
150
|
+
>>> plt.scatter(all_results["logfc"], -np.log10(all_results["p_value"]))
|
|
151
|
+
>>> plt.xlabel("Log2 Fold Change")
|
|
152
|
+
>>> plt.ylabel("-log10(P-value)")
|
|
153
|
+
>>> plt.title("Volcano Plot")
|
|
154
|
+
|
|
155
|
+
References:
|
|
156
|
+
----------
|
|
157
|
+
Smyth, G. K. (2004). Linear models and empirical Bayes methods for assessing
|
|
158
|
+
differential expression in microarray experiments. Statistical Applications
|
|
159
|
+
in Genetics and Molecular Biology, 3(1).
|
|
160
|
+
|
|
161
|
+
Benjamini, Y., and Hochberg, Y. (1995). Controlling the false discovery rate:
|
|
162
|
+
a practical and powerful approach to multiple testing. Journal of the Royal
|
|
163
|
+
Statistical Society B, 57, 289-300.
|
|
164
|
+
|
|
165
|
+
See Also:
|
|
166
|
+
--------
|
|
167
|
+
lm_fit : Fit linear models
|
|
168
|
+
ebayes : Apply empirical Bayes moderation
|
|
169
|
+
"""
|
|
170
|
+
if isinstance(data, AnnData):
|
|
171
|
+
adata = data
|
|
172
|
+
fit = adata.uns.get("lm_fit")
|
|
173
|
+
if fit is None:
|
|
174
|
+
raise ValueError("No fit object found in adata.uns['lm_fit']. Run lm_fit and ebayes first.")
|
|
175
|
+
else:
|
|
176
|
+
fit = data
|
|
177
|
+
adata = fit.get("adata") if isinstance(fit, dict) else None
|
|
178
|
+
|
|
179
|
+
# Validate that ebayes has been run
|
|
180
|
+
required_keys = ["t", "p_value", "coefficients", "genes"]
|
|
181
|
+
for key in required_keys:
|
|
182
|
+
if key not in fit:
|
|
183
|
+
raise ValueError(f"Fit object missing '{key}'. Did you run ebayes() before top_table()?")
|
|
184
|
+
|
|
185
|
+
# Select coefficient.
|
|
186
|
+
# Limma's topTable avoids using intercept-only tests by default when
|
|
187
|
+
# multiple coefficients are present.
|
|
188
|
+
n_coef = fit["coefficients"].shape[1]
|
|
189
|
+
design = fit.get("design")
|
|
190
|
+
|
|
191
|
+
if group is not None:
|
|
192
|
+
group_to_column = fit.get("group_to_column")
|
|
193
|
+
design_columns = fit.get("design_columns")
|
|
194
|
+
if group_to_column is None or design_columns is None:
|
|
195
|
+
raise ValueError("Fit object does not contain group metadata. Re-run lm_fit with groupby.")
|
|
196
|
+
|
|
197
|
+
if hasattr(group_to_column, "__len__") and len(group_to_column) == 0:
|
|
198
|
+
raise ValueError("Fit object does not contain group metadata. Re-run lm_fit with groupby.")
|
|
199
|
+
|
|
200
|
+
if hasattr(design_columns, "__len__") and len(design_columns) == 0:
|
|
201
|
+
raise ValueError("Fit object does not contain group metadata. Re-run lm_fit with groupby.")
|
|
202
|
+
|
|
203
|
+
if not isinstance(group_to_column, dict):
|
|
204
|
+
raise ValueError("Fit object contains invalid 'group_to_column' metadata. Re-run lm_fit with groupby.")
|
|
205
|
+
|
|
206
|
+
if not isinstance(design_columns, list):
|
|
207
|
+
design_columns = list(design_columns)
|
|
208
|
+
|
|
209
|
+
if group not in group_to_column:
|
|
210
|
+
valid_groups = ", ".join(str(x) for x in group_to_column.keys())
|
|
211
|
+
raise ValueError(f"Unknown group '{group}'. Valid groups: {valid_groups}")
|
|
212
|
+
coef = int(design_columns.index(group_to_column[group]))
|
|
213
|
+
elif coef is not None:
|
|
214
|
+
warnings.warn("'coef' is deprecated. Use 'group' instead.", DeprecationWarning, stacklevel=2)
|
|
215
|
+
|
|
216
|
+
is_binary_one_hot = False
|
|
217
|
+
if design is not None and design.ndim == 2 and design.shape[1] == n_coef:
|
|
218
|
+
is_binary = np.all(np.isclose(design, 0.0) | np.isclose(design, 1.0))
|
|
219
|
+
row_sums = np.sum(design, axis=1)
|
|
220
|
+
one_hot_rows = np.all(np.isclose(row_sums, 1.0))
|
|
221
|
+
nonempty_groups = np.all(np.sum(design, axis=0) > 0)
|
|
222
|
+
is_binary_one_hot = bool(is_binary and one_hot_rows and nonempty_groups)
|
|
223
|
+
|
|
224
|
+
if coef is None:
|
|
225
|
+
if is_binary_one_hot and n_coef == 2:
|
|
226
|
+
# Common limma design (~0 + group): default to group2 - group1.
|
|
227
|
+
coef = 1
|
|
228
|
+
reference = 0
|
|
229
|
+
elif n_coef == 1:
|
|
230
|
+
coef = 0
|
|
231
|
+
else:
|
|
232
|
+
coef = 0
|
|
233
|
+
if design is not None and design.ndim == 2 and design.shape[1] == n_coef:
|
|
234
|
+
intercept_cols = np.where(np.all(np.isclose(design, 1.0), axis=0))[0]
|
|
235
|
+
candidates = [i for i in range(n_coef) if i not in set(intercept_cols)]
|
|
236
|
+
if candidates:
|
|
237
|
+
coef = candidates[0]
|
|
238
|
+
|
|
239
|
+
if coef < 0 or coef >= n_coef:
|
|
240
|
+
raise ValueError(f"Invalid coefficient index {coef}. Valid range: 0 to {fit['coefficients'].shape[1] - 1}")
|
|
241
|
+
|
|
242
|
+
# For two-group one-hot designs (no intercept), treat coef as group-vs-group
|
|
243
|
+
# by default, matching expected DE behavior without an explicit contrasts step.
|
|
244
|
+
if reference is None and is_binary_one_hot and n_coef == 2:
|
|
245
|
+
reference = 1 - coef
|
|
246
|
+
|
|
247
|
+
if reference is not None:
|
|
248
|
+
if reference < 0 or reference >= n_coef:
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f"Invalid reference index {reference}. Valid range: 0 to {fit['coefficients'].shape[1] - 1}"
|
|
251
|
+
)
|
|
252
|
+
if reference == coef:
|
|
253
|
+
raise ValueError("reference must be different from coef")
|
|
254
|
+
|
|
255
|
+
# Extract statistics for selected coefficient
|
|
256
|
+
# n_genes = len(fit["genes"])
|
|
257
|
+
if reference is None:
|
|
258
|
+
logFC = fit["coefficients"][:, coef]
|
|
259
|
+
t_stat = fit["t"][:, coef]
|
|
260
|
+
p_val = fit["p_value"][:, coef]
|
|
261
|
+
B_stat = fit["lods"][:, coef]
|
|
262
|
+
else:
|
|
263
|
+
cvec = np.zeros(n_coef, dtype=float)
|
|
264
|
+
cvec[coef] = 1.0
|
|
265
|
+
cvec[reference] = -1.0
|
|
266
|
+
|
|
267
|
+
logFC = fit["coefficients"] @ cvec
|
|
268
|
+
|
|
269
|
+
cov_coef = fit.get("cov_coefficients")
|
|
270
|
+
if cov_coef is None:
|
|
271
|
+
raise ValueError("Fit object missing 'cov_coefficients'.")
|
|
272
|
+
c_unscaled = float(np.sqrt(cvec @ cov_coef @ cvec))
|
|
273
|
+
|
|
274
|
+
se = c_unscaled * np.sqrt(fit["s2_post"])
|
|
275
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
276
|
+
t_stat = logFC / se
|
|
277
|
+
p_val = 2 * stats.t.sf(np.abs(t_stat), fit["df_total"])
|
|
278
|
+
|
|
279
|
+
var_prior = fit.get("var_prior")
|
|
280
|
+
if var_prior is None:
|
|
281
|
+
B_stat = np.full_like(logFC, np.nan, dtype=float)
|
|
282
|
+
else:
|
|
283
|
+
var_prior_contrast = float(np.sum((cvec**2) * np.asarray(var_prior)))
|
|
284
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
285
|
+
r = (c_unscaled**2 + var_prior_contrast) / (c_unscaled**2)
|
|
286
|
+
t2 = t_stat**2
|
|
287
|
+
df_prior = fit.get("df_prior", np.inf)
|
|
288
|
+
if np.isfinite(df_prior) and df_prior <= 1e6:
|
|
289
|
+
kernel = (1.0 + fit["df_total"]) / 2.0 * np.log((t2 + fit["df_total"]) / (t2 / r + fit["df_total"]))
|
|
290
|
+
else:
|
|
291
|
+
kernel = t2 * (1.0 - 1.0 / r) / 2.0
|
|
292
|
+
proportion = fit.get("proportion", 0.01)
|
|
293
|
+
B_stat = np.log(proportion / (1.0 - proportion)) - np.log(r) / 2.0 + kernel
|
|
294
|
+
|
|
295
|
+
# Compute average expression across samples
|
|
296
|
+
# adata.X is samples × genes, so mean over axis 0
|
|
297
|
+
if adata is None:
|
|
298
|
+
raise ValueError("AnnData object is required to compute AveExpr. Pass AnnData input to top_table.")
|
|
299
|
+
ave_expr = np.mean(adata.X, axis=0)
|
|
300
|
+
if ave_expr.ndim > 1:
|
|
301
|
+
ave_expr = ave_expr.squeeze()
|
|
302
|
+
|
|
303
|
+
# Apply multiple testing correction
|
|
304
|
+
# Remove NaN p-values
|
|
305
|
+
valid_pvals = ~np.isnan(p_val)
|
|
306
|
+
adj_p_val = np.full_like(p_val, np.nan)
|
|
307
|
+
|
|
308
|
+
if np.sum(valid_pvals) > 0:
|
|
309
|
+
_, adj_p_val[valid_pvals], _, _ = multipletests(
|
|
310
|
+
p_val[valid_pvals],
|
|
311
|
+
method=adjust_method,
|
|
312
|
+
is_sorted=False,
|
|
313
|
+
returnsorted=False,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Create results DataFrame
|
|
317
|
+
results = pd.DataFrame(
|
|
318
|
+
{
|
|
319
|
+
"logfc": logFC,
|
|
320
|
+
"avg_expr": ave_expr,
|
|
321
|
+
"t": t_stat,
|
|
322
|
+
"p_value": p_val,
|
|
323
|
+
"p_adj": adj_p_val,
|
|
324
|
+
"b": B_stat,
|
|
325
|
+
},
|
|
326
|
+
index=fit["genes"],
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Add gene annotations from adata.var if available
|
|
330
|
+
if adata.var.shape[1] > 0:
|
|
331
|
+
# Join with var DataFrame (excluding index which is already used)
|
|
332
|
+
for col in adata.var.columns:
|
|
333
|
+
if col not in results.columns:
|
|
334
|
+
results[col] = adata.var[col].values
|
|
335
|
+
|
|
336
|
+
# Apply filters
|
|
337
|
+
mask = (results["p_adj"] <= p_value) & (np.abs(results["logfc"]) >= lfc)
|
|
338
|
+
results = results[mask]
|
|
339
|
+
|
|
340
|
+
# Sort results
|
|
341
|
+
if sort_by == "logfc":
|
|
342
|
+
results = results.iloc[np.argsort(-np.abs(results["logfc"].values))]
|
|
343
|
+
elif sort_by == "avg_expr":
|
|
344
|
+
results = results.sort_values("avg_expr", ascending=False)
|
|
345
|
+
elif sort_by == "t":
|
|
346
|
+
results = results.iloc[np.argsort(-np.abs(results["t"].values))]
|
|
347
|
+
elif sort_by == "p_value":
|
|
348
|
+
results = results.sort_values("p_value", ascending=True)
|
|
349
|
+
elif sort_by == "b":
|
|
350
|
+
results = results.sort_values("b", ascending=False)
|
|
351
|
+
elif sort_by == "none":
|
|
352
|
+
pass # No sorting
|
|
353
|
+
else:
|
|
354
|
+
raise ValueError(f"Invalid sort_by: {sort_by}. Use 'logfc', 'avg_expr', 't', 'p_value', 'b', or 'none'.")
|
|
355
|
+
|
|
356
|
+
# Limit to top N genes
|
|
357
|
+
if number < len(results):
|
|
358
|
+
results = results.iloc[:number]
|
|
359
|
+
|
|
360
|
+
return results
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: microarray
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Microarray analysis tools
|
|
5
|
+
Author: harryhaller001
|
|
6
|
+
Author-email: harryhaller001 <harryhaller001@gmail.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Natural Language :: English
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Dist: adjusttext>=1.3
|
|
19
|
+
Requires-Dist: anndata
|
|
20
|
+
Requires-Dist: click
|
|
21
|
+
Requires-Dist: matplotlib
|
|
22
|
+
Requires-Dist: requests
|
|
23
|
+
Requires-Dist: scikit-learn
|
|
24
|
+
Requires-Dist: scipy
|
|
25
|
+
Requires-Dist: statsmodels
|
|
26
|
+
Requires-Dist: ipython ; extra == 'docs'
|
|
27
|
+
Requires-Dist: myst-parser ; extra == 'docs'
|
|
28
|
+
Requires-Dist: nbsphinx ; extra == 'docs'
|
|
29
|
+
Requires-Dist: sphinx ; extra == 'docs'
|
|
30
|
+
Requires-Dist: sphinx-autoapi ; extra == 'docs'
|
|
31
|
+
Requires-Dist: sphinx-autodoc-typehints ; extra == 'docs'
|
|
32
|
+
Requires-Dist: sphinx-book-theme ; extra == 'docs'
|
|
33
|
+
Requires-Dist: decoupler>=2.1.4 ; extra == 'test'
|
|
34
|
+
Requires-Dist: ipykernel ; extra == 'test'
|
|
35
|
+
Requires-Dist: ipython ; extra == 'test'
|
|
36
|
+
Requires-Dist: ipywidgets ; extra == 'test'
|
|
37
|
+
Requires-Dist: pre-commit ; extra == 'test'
|
|
38
|
+
Requires-Dist: pytest ; extra == 'test'
|
|
39
|
+
Requires-Dist: pytest-cov ; extra == 'test'
|
|
40
|
+
Requires-Dist: responses ; extra == 'test'
|
|
41
|
+
Requires-Dist: ruff ; extra == 'test'
|
|
42
|
+
Requires-Dist: scanpy>=1.11.5 ; extra == 'test'
|
|
43
|
+
Requires-Dist: tqdm>=4.67.3 ; extra == 'test'
|
|
44
|
+
Requires-Dist: twine ; extra == 'test'
|
|
45
|
+
Requires-Dist: ty>=0.0.16 ; extra == 'test'
|
|
46
|
+
Requires-Dist: types-requests ; extra == 'test'
|
|
47
|
+
Maintainer: harryhaller001
|
|
48
|
+
Maintainer-email: harryhaller001 <harryhaller001@gmail.com>
|
|
49
|
+
Requires-Python: >=3.11
|
|
50
|
+
Project-URL: Source, https://github.com/harryhaller001/microarray
|
|
51
|
+
Provides-Extra: docs
|
|
52
|
+
Provides-Extra: test
|
|
53
|
+
Description-Content-Type: text/markdown
|
|
54
|
+
|
|
55
|
+
# microarray
|
|
56
|
+
Processing microarray data in Python
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
## Installation
|
|
60
|
+
|
|
61
|
+
Use `pip` to install the microarray package:
|
|
62
|
+
|
|
63
|
+
`pip install microarray`
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import microarray as ma
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
See tutorial notebook for detailed workflow examples.
|
|
72
|
+
|
|
73
|
+
## License
|
|
74
|
+
|
|
75
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
microarray/__init__.py,sha256=VNXeP5rIxEHgkCbR69_Yvu-LTMqT-ipV7LXLXrStFF8,301
|
|
2
|
+
microarray/_version.py,sha256=RlPHvA08BJFT7InYKmrqUbpk4No6mfqPzrxOXfUS1bA,81
|
|
3
|
+
microarray/datasets/__init__.py,sha256=KECQdeG_6jM1BLHnolCvC5Bl26odJIMER93F634ifT4,118
|
|
4
|
+
microarray/datasets/_arrayexpress.py,sha256=kcbe8qUInt3rhZ2Ioc1WheeSYvPzLWPVm2lD8fkEOas,134
|
|
5
|
+
microarray/datasets/_cdf_files.py,sha256=E6F2KTA2okDaGe5YdW27JqcYLScJ7gksppMY63iT61Q,1160
|
|
6
|
+
microarray/datasets/_geo.py,sha256=OlUEZfH_w6eDnnENF__3utpQGxc2OztraE3OBAUxptM,111
|
|
7
|
+
microarray/datasets/_utils.py,sha256=G-BlIufEm-KUTlz_mAx8pOC5-6nyyIsZ5gU3rLuHw-M,4310
|
|
8
|
+
microarray/io/__init__.py,sha256=rnBtYn59d0JQt6U3klQYgeaMV7mqJy_iP37zTfMPRiY,419
|
|
9
|
+
microarray/io/_anndata_converter.py,sha256=p3_uJsx___dVVRSink6RcGlyQpyiZK6paxb-1XhVRvw,7579
|
|
10
|
+
microarray/io/_cdf.py,sha256=8qHOYVOxlCx_Do0mwIvU5ahIma67LMinLsa86-8Xcqw,19825
|
|
11
|
+
microarray/io/_cel.py,sha256=JCqYvgWvuTqjrcY9y129YYmwMltDYTkeS6774E98rFw,23440
|
|
12
|
+
microarray/io/_read.py,sha256=uVvNsZUtoBngML-AifRACugofzQi9MfWJTNneXxNjR8,3745
|
|
13
|
+
microarray/plotting/__init__.py,sha256=Fjy-omJEHAIKUD8E1EyOJSJ5hQzlPpNTpRF_EbMQwNM,817
|
|
14
|
+
microarray/plotting/_base.py,sha256=0G1kI6SiF2pQt9MXdIVBrUlfII54ehPne0V4hPTEfjw,6508
|
|
15
|
+
microarray/plotting/_cel.py,sha256=_6TK9n6b2bjPYsq01C69a6g1JLsKUaq6kWmnEWcGP68,1962
|
|
16
|
+
microarray/plotting/_de_plots.py,sha256=IrLhM4XSjLTVn52KbVyzz-MGj2KjSNDxRSxQ6BCGH4E,8961
|
|
17
|
+
microarray/plotting/_diagnostic_plots.py,sha256=cSW89oBfnx7YKOHBiH9-bGgraXde6LGCVHQH5jPW84o,9283
|
|
18
|
+
microarray/plotting/_heatmap.py,sha256=cZLVJ3y9lzlrLmfNZiac0ZTpimfVlyb6sjLY5SF9s68,10694
|
|
19
|
+
microarray/plotting/_ma_plots.py,sha256=Gn5SG_kLRQSUpRb73uOw98rj5MkPDU83uPh97o1NSSs,4615
|
|
20
|
+
microarray/plotting/_pca.py,sha256=nWhdjt9_UUNyzpetGovVEmd1LV18PQXN07UUY-7Ozc8,10565
|
|
21
|
+
microarray/plotting/_qc_plots.py,sha256=ITSVy_Lof8Ek81cK1VBz5vP5BZeW5dTrI747uKefFpk,10963
|
|
22
|
+
microarray/plotting/_score.py,sha256=sR_xJPRmg_aAmQ8WNYHhj1VmqC8Q8d-DNZmwZ91ugeE,1474
|
|
23
|
+
microarray/plotting/_top_table_heatmap.py,sha256=1T0J28X2xU_3T2cz1p-cD3EqUDnoqtBXfko2UhwRoOw,3693
|
|
24
|
+
microarray/plotting/_utils.py,sha256=MVJjkbnFrXzJzJkm5LxcNwzIQZ9PoFi6DfsonH6RIXw,8708
|
|
25
|
+
microarray/preprocessing/__init__.py,sha256=UsLBTcg_ajG4GzqHYJGSImVDU9HYzz2oYrKoRef_vcU,1086
|
|
26
|
+
microarray/preprocessing/_background.py,sha256=tyGScDhgwvUvJmGvzK6w1Ch3i9vlR0gA3Y9rGykMwdg,29002
|
|
27
|
+
microarray/preprocessing/_log2.py,sha256=ih57Z3kcHrF7qGI_f-Z5a4Vcl42kJVUhW1BI898OTn4,2317
|
|
28
|
+
microarray/preprocessing/_normalize.py,sha256=KhV7-1kmLOouDhhEnsFwD6vJm60C7LuuVz2pPfl79W8,45811
|
|
29
|
+
microarray/preprocessing/_rma.py,sha256=oZJ8xj6CDWMlJPRiWTxTHnTZ645l9JPLC_LM2m8SjS4,8674
|
|
30
|
+
microarray/preprocessing/_robust.py,sha256=RsgC6lgCca5Cg1a49WIUgnhdZ7eqDztGMnunWMS2_DM,5529
|
|
31
|
+
microarray/preprocessing/_summarize.py,sha256=ctv-NkZDKMScEyqKDoSKZy3szg_YoVhUyHdOj9wzV8U,10844
|
|
32
|
+
microarray/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
+
microarray/tools/__init__.py,sha256=m2WBfDG1-rmgt3fEfEMUp-Z_051fl8tRibDN10UpcVM,677
|
|
34
|
+
microarray/tools/_biomart.py,sha256=4aWBtUwxELLR4uBJHHASEdpIsR1GCbG6kkvV4QJIg8w,15201
|
|
35
|
+
microarray/tools/_empirical_bayes.py,sha256=cQgV7Gx4ii-0v2TCrQpNeIMjDlUOup4kmrKzAhoVvqw,14224
|
|
36
|
+
microarray/tools/_fdist.py,sha256=pA3nM72ywkLlMMDk-YrG9aISdx5ShgcPHGhgaF4vgPU,6065
|
|
37
|
+
microarray/tools/_linear_models.py,sha256=xrkujMD7LUYsJpa_quDRkBGGU4A1MIr-QORzWoTMf48,14265
|
|
38
|
+
microarray/tools/_mds.py,sha256=FDZJajyT81YhE8CL7vLtSoftYR2HpyObvWH1CqRqrK8,3436
|
|
39
|
+
microarray/tools/_pca.py,sha256=hVg172p-jB0vAogPGnfloCljq6r4JphlB2OZu8sIYkM,2837
|
|
40
|
+
microarray/tools/_score.py,sha256=fJQEB2W7tuSoJN9aQ6h-uG7bPvz12qNW-OLfqb0D_ps,2979
|
|
41
|
+
microarray/tools/_toptable.py,sha256=Qy8heiPZsK7YVX_AcwxUw3c4x2kdMl4Vcx_5dWb3S1Q,14278
|
|
42
|
+
microarray-0.1.0.dist-info/WHEEL,sha256=iHtWm8nRfs0VRdCYVXocAWFW8ppjHL-uTJkAdZJKOBM,80
|
|
43
|
+
microarray-0.1.0.dist-info/METADATA,sha256=N8pCMHUqOaZ2xs0Sxu8Xys_1Fw7U8Ee_JxzYR7NuYt4,2320
|
|
44
|
+
microarray-0.1.0.dist-info/RECORD,,
|