microarray 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- microarray/__init__.py +15 -0
- microarray/_version.py +3 -0
- microarray/datasets/__init__.py +3 -0
- microarray/datasets/_arrayexpress.py +1 -0
- microarray/datasets/_cdf_files.py +35 -0
- microarray/datasets/_geo.py +1 -0
- microarray/datasets/_utils.py +143 -0
- microarray/io/__init__.py +17 -0
- microarray/io/_anndata_converter.py +198 -0
- microarray/io/_cdf.py +575 -0
- microarray/io/_cel.py +591 -0
- microarray/io/_read.py +127 -0
- microarray/plotting/__init__.py +28 -0
- microarray/plotting/_base.py +253 -0
- microarray/plotting/_cel.py +75 -0
- microarray/plotting/_de_plots.py +239 -0
- microarray/plotting/_diagnostic_plots.py +268 -0
- microarray/plotting/_heatmap.py +279 -0
- microarray/plotting/_ma_plots.py +136 -0
- microarray/plotting/_pca.py +320 -0
- microarray/plotting/_qc_plots.py +335 -0
- microarray/plotting/_score.py +38 -0
- microarray/plotting/_top_table_heatmap.py +98 -0
- microarray/plotting/_utils.py +280 -0
- microarray/preprocessing/__init__.py +39 -0
- microarray/preprocessing/_background.py +862 -0
- microarray/preprocessing/_log2.py +77 -0
- microarray/preprocessing/_normalize.py +1292 -0
- microarray/preprocessing/_rma.py +243 -0
- microarray/preprocessing/_robust.py +170 -0
- microarray/preprocessing/_summarize.py +318 -0
- microarray/py.typed +0 -0
- microarray/tools/__init__.py +26 -0
- microarray/tools/_biomart.py +416 -0
- microarray/tools/_empirical_bayes.py +401 -0
- microarray/tools/_fdist.py +171 -0
- microarray/tools/_linear_models.py +387 -0
- microarray/tools/_mds.py +101 -0
- microarray/tools/_pca.py +88 -0
- microarray/tools/_score.py +86 -0
- microarray/tools/_toptable.py +360 -0
- microarray-0.1.0.dist-info/METADATA +75 -0
- microarray-0.1.0.dist-info/RECORD +44 -0
- microarray-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""PCA plotting for microarray data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import numpy as np
|
|
9
|
+
from adjustText import adjust_text
|
|
10
|
+
from anndata import AnnData
|
|
11
|
+
from matplotlib.axes import Axes
|
|
12
|
+
from matplotlib.colors import Colormap
|
|
13
|
+
|
|
14
|
+
from microarray.plotting._utils import get_default_colors
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def pca(
|
|
18
|
+
adata: AnnData,
|
|
19
|
+
obsm_key: str = "X_pca",
|
|
20
|
+
components: tuple[int, int] = (1, 2),
|
|
21
|
+
labels: list[str] | None = None,
|
|
22
|
+
colors: str | None = None,
|
|
23
|
+
cmap: str | Colormap = "viridis",
|
|
24
|
+
xlab: str | None = None,
|
|
25
|
+
ylab: str | None = None,
|
|
26
|
+
title: str = "PCA Plot",
|
|
27
|
+
ax: Axes | None = None,
|
|
28
|
+
**kwargs: Any,
|
|
29
|
+
) -> Axes:
|
|
30
|
+
"""Plot PCA coordinates stored in ``adata.obsm``.
|
|
31
|
+
|
|
32
|
+
If ``obsm_key`` does not exist, PCA is computed automatically using
|
|
33
|
+
:func:`microarray.tools.pca`.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
adata: AnnData object with PCA embedding in ``.obsm`` or expression data.
|
|
37
|
+
obsm_key: Key in ``.obsm`` where PCA coordinates are stored.
|
|
38
|
+
components: 1-based component indices to plot (e.g. ``(1, 2)``).
|
|
39
|
+
labels: Optional labels for each sample. Defaults to ``adata.obs_names``.
|
|
40
|
+
colors: Optional ``adata.obs`` column used for coloring points.
|
|
41
|
+
Categorical columns are shown with a legend. Numeric columns
|
|
42
|
+
are shown with a continuous colormap.
|
|
43
|
+
cmap: Colormap used when ``colors`` points to a numeric ``adata.obs`` column.
|
|
44
|
+
xlab: Optional x-axis label.
|
|
45
|
+
ylab: Optional y-axis label.
|
|
46
|
+
title: Plot title.
|
|
47
|
+
ax: Existing matplotlib axes to reuse.
|
|
48
|
+
**kwargs: Extra arguments forwarded to ``ax.scatter``.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Matplotlib Axes with PCA scatter plot.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If components are invalid for the available embedding.
|
|
55
|
+
"""
|
|
56
|
+
if ax is None:
|
|
57
|
+
_, ax = plt.subplots(figsize=(8, 7))
|
|
58
|
+
|
|
59
|
+
if len(components) != 2:
|
|
60
|
+
raise ValueError("components must contain exactly two indices")
|
|
61
|
+
|
|
62
|
+
if obsm_key not in adata.obsm:
|
|
63
|
+
raise KeyError(f"AnnData .obsm has no '{obsm_key}' key. Compute PCA first using microarray.tools.pca.")
|
|
64
|
+
|
|
65
|
+
coords = np.asarray(adata.obsm[obsm_key])
|
|
66
|
+
n_samples, n_dims = coords.shape
|
|
67
|
+
|
|
68
|
+
x_idx = components[0] - 1
|
|
69
|
+
y_idx = components[1] - 1
|
|
70
|
+
if x_idx < 0 or y_idx < 0 or x_idx >= n_dims or y_idx >= n_dims:
|
|
71
|
+
raise ValueError(f"components={components} are out of bounds for embedding with {n_dims} dimensions")
|
|
72
|
+
|
|
73
|
+
if labels is None:
|
|
74
|
+
labels = list(adata.obs_names) if adata.obs_names is not None else [f"Sample {i}" for i in range(n_samples)]
|
|
75
|
+
|
|
76
|
+
if colors is None:
|
|
77
|
+
point_color = get_default_colors(1)[0]
|
|
78
|
+
ax.scatter(
|
|
79
|
+
coords[:, x_idx],
|
|
80
|
+
coords[:, y_idx],
|
|
81
|
+
c=point_color,
|
|
82
|
+
s=100,
|
|
83
|
+
alpha=0.7,
|
|
84
|
+
edgecolors="black",
|
|
85
|
+
linewidth=0.5,
|
|
86
|
+
**kwargs,
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
if colors not in adata.obs.columns:
|
|
90
|
+
raise KeyError(f"AnnData .obs has no '{colors}' column")
|
|
91
|
+
|
|
92
|
+
group_values = adata.obs[colors]
|
|
93
|
+
numeric_values = np.asarray(group_values)
|
|
94
|
+
|
|
95
|
+
if np.issubdtype(numeric_values.dtype, np.number):
|
|
96
|
+
scatter = ax.scatter(
|
|
97
|
+
coords[:, x_idx],
|
|
98
|
+
coords[:, y_idx],
|
|
99
|
+
c=numeric_values.astype(float),
|
|
100
|
+
cmap=cmap,
|
|
101
|
+
s=100,
|
|
102
|
+
alpha=0.7,
|
|
103
|
+
edgecolors="black",
|
|
104
|
+
linewidth=0.5,
|
|
105
|
+
**kwargs,
|
|
106
|
+
)
|
|
107
|
+
colorbar = ax.figure.colorbar(scatter, ax=ax)
|
|
108
|
+
colorbar.set_label(colors)
|
|
109
|
+
else:
|
|
110
|
+
unique_groups = np.unique(numeric_values.astype(str))
|
|
111
|
+
default_colors = get_default_colors(len(unique_groups))
|
|
112
|
+
color_map = dict(zip(unique_groups, default_colors, strict=False))
|
|
113
|
+
|
|
114
|
+
for group in unique_groups:
|
|
115
|
+
mask = numeric_values.astype(str) == group
|
|
116
|
+
ax.scatter(
|
|
117
|
+
coords[mask, x_idx],
|
|
118
|
+
coords[mask, y_idx],
|
|
119
|
+
c=color_map[group],
|
|
120
|
+
label=str(group),
|
|
121
|
+
s=100,
|
|
122
|
+
alpha=0.7,
|
|
123
|
+
edgecolors="black",
|
|
124
|
+
linewidth=0.5,
|
|
125
|
+
**kwargs,
|
|
126
|
+
)
|
|
127
|
+
ax.legend(
|
|
128
|
+
loc="upper left",
|
|
129
|
+
bbox_to_anchor=(1.02, 1.0),
|
|
130
|
+
borderaxespad=0.0,
|
|
131
|
+
frameon=False,
|
|
132
|
+
title=colors,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
text_artists = []
|
|
136
|
+
for i, label in enumerate(labels):
|
|
137
|
+
text_artists.append(
|
|
138
|
+
ax.annotate(
|
|
139
|
+
label,
|
|
140
|
+
(coords[i, x_idx], coords[i, y_idx]),
|
|
141
|
+
fontsize=9,
|
|
142
|
+
alpha=0.8,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
adjust_text(
|
|
147
|
+
text_artists,
|
|
148
|
+
ax=ax,
|
|
149
|
+
expand=(1.02, 1.05),
|
|
150
|
+
force_text=(0.05, 0.08),
|
|
151
|
+
force_static=(0.05, 0.08),
|
|
152
|
+
force_pull=(0.25, 0.25),
|
|
153
|
+
max_move=10,
|
|
154
|
+
min_arrow_len=0,
|
|
155
|
+
arrowprops={"arrowstyle": "-", "color": "0.35", "lw": 0.6, "alpha": 0.8},
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
variance_ratio = None
|
|
159
|
+
if obsm_key in adata.uns and isinstance(adata.uns[obsm_key], dict):
|
|
160
|
+
variance_ratio = adata.uns[obsm_key].get("variance_ratio")
|
|
161
|
+
|
|
162
|
+
if xlab is None:
|
|
163
|
+
xlab = f"PC{components[0]}"
|
|
164
|
+
if variance_ratio is not None and len(variance_ratio) > x_idx:
|
|
165
|
+
xlab = f"{xlab} ({100 * float(variance_ratio[x_idx]):.1f}%)"
|
|
166
|
+
if ylab is None:
|
|
167
|
+
ylab = f"PC{components[1]}"
|
|
168
|
+
if variance_ratio is not None and len(variance_ratio) > y_idx:
|
|
169
|
+
ylab = f"{ylab} ({100 * float(variance_ratio[y_idx]):.1f}%)"
|
|
170
|
+
|
|
171
|
+
ax.set_xlabel(xlab)
|
|
172
|
+
ax.set_ylabel(ylab)
|
|
173
|
+
ax.set_title(title)
|
|
174
|
+
|
|
175
|
+
ax.grid(False)
|
|
176
|
+
|
|
177
|
+
return ax
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def pca_variance(
|
|
181
|
+
adata: AnnData,
|
|
182
|
+
obsm_key: str = "X_pca",
|
|
183
|
+
xlab: str = "Component",
|
|
184
|
+
ylab: str = "Cumulative variance explained",
|
|
185
|
+
title: str = "PCA Cumulative Variance",
|
|
186
|
+
ax: Axes | None = None,
|
|
187
|
+
**kwargs: Any,
|
|
188
|
+
) -> Axes:
|
|
189
|
+
"""Plot cumulative explained variance from a fitted PCA.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
adata: AnnData object with PCA variance information in ``adata.uns``.
|
|
193
|
+
obsm_key: PCA key used in ``adata.uns`` (default: ``"X_pca"``).
|
|
194
|
+
xlab: X-axis label.
|
|
195
|
+
ylab: Y-axis label.
|
|
196
|
+
title: Plot title.
|
|
197
|
+
ax: Existing matplotlib axes to reuse.
|
|
198
|
+
**kwargs: Extra arguments forwarded to ``ax.plot``.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Matplotlib Axes with cumulative variance line chart.
|
|
202
|
+
|
|
203
|
+
Raises:
|
|
204
|
+
KeyError: If variance ratio is unavailable in ``adata.uns``.
|
|
205
|
+
"""
|
|
206
|
+
if ax is None:
|
|
207
|
+
_, ax = plt.subplots(figsize=(6, 6))
|
|
208
|
+
|
|
209
|
+
if obsm_key not in adata.uns or not isinstance(adata.uns[obsm_key], dict):
|
|
210
|
+
raise KeyError(f"AnnData .uns has no '{obsm_key}' PCA metadata. Compute PCA first using microarray.tools.pca.")
|
|
211
|
+
|
|
212
|
+
variance_ratio = adata.uns[obsm_key].get("variance_ratio")
|
|
213
|
+
if variance_ratio is None:
|
|
214
|
+
raise KeyError(
|
|
215
|
+
f"AnnData .uns['{obsm_key}'] has no 'variance_ratio'. Compute PCA first using microarray.tools.pca."
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
variance_ratio = np.asarray(variance_ratio, dtype=float)
|
|
219
|
+
cumulative = np.cumsum(variance_ratio)
|
|
220
|
+
components = np.arange(1, cumulative.size + 1)
|
|
221
|
+
|
|
222
|
+
ax.plot(components, cumulative, marker="o", **kwargs)
|
|
223
|
+
ax.set_xlabel(xlab)
|
|
224
|
+
ax.set_ylabel(ylab)
|
|
225
|
+
ax.set_title(title)
|
|
226
|
+
ax.set_xlim(1, max(1, cumulative.size))
|
|
227
|
+
ax.set_ylim(0.0, 1.05)
|
|
228
|
+
ax.grid(False)
|
|
229
|
+
|
|
230
|
+
return ax
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def pca_feature_variance(
|
|
234
|
+
adata: AnnData,
|
|
235
|
+
component: int = 1,
|
|
236
|
+
n_var: int = 20,
|
|
237
|
+
obsm_key: str = "X_pca",
|
|
238
|
+
xlab: str = "Feature rank",
|
|
239
|
+
ylab: str = "Variance contribution",
|
|
240
|
+
title: str | None = None,
|
|
241
|
+
ax: Axes | None = None,
|
|
242
|
+
**kwargs: Any,
|
|
243
|
+
) -> Axes:
|
|
244
|
+
"""Plot top feature variance contributions for one PCA component.
|
|
245
|
+
|
|
246
|
+
This uses squared PCA loadings as per-feature variance contribution within
|
|
247
|
+
a selected component and ranks features in decreasing order.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
adata: AnnData object with PCA metadata in ``adata.uns``.
|
|
251
|
+
component: 1-based PCA component index to inspect.
|
|
252
|
+
n_var: Number of top-ranked features to display.
|
|
253
|
+
obsm_key: PCA key used in ``adata.uns`` (default: ``"X_pca"``).
|
|
254
|
+
xlab: X-axis label.
|
|
255
|
+
ylab: Y-axis label.
|
|
256
|
+
title: Plot title. Defaults to ``"PC{component} Feature Variance"``.
|
|
257
|
+
ax: Existing matplotlib axes to reuse.
|
|
258
|
+
**kwargs: Extra arguments forwarded to ``ax.plot``.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Matplotlib Axes with ranked feature variance contributions.
|
|
262
|
+
|
|
263
|
+
Raises:
|
|
264
|
+
KeyError: If PCA metadata/components are unavailable.
|
|
265
|
+
ValueError: If ``component`` or ``n_var`` is invalid.
|
|
266
|
+
"""
|
|
267
|
+
if ax is None:
|
|
268
|
+
_, ax = plt.subplots(figsize=(6, 6))
|
|
269
|
+
|
|
270
|
+
if n_var < 1:
|
|
271
|
+
raise ValueError("n_var must be at least 1")
|
|
272
|
+
if component < 1:
|
|
273
|
+
raise ValueError("component must be at least 1")
|
|
274
|
+
|
|
275
|
+
if obsm_key not in adata.uns or not isinstance(adata.uns[obsm_key], dict):
|
|
276
|
+
raise KeyError(f"AnnData .uns has no '{obsm_key}' PCA metadata. Compute PCA first using microarray.tools.pca.")
|
|
277
|
+
|
|
278
|
+
components = adata.uns[obsm_key].get("components")
|
|
279
|
+
if components is None:
|
|
280
|
+
raise KeyError(
|
|
281
|
+
f"AnnData .uns['{obsm_key}'] has no 'components'. Recompute PCA with a version that stores loadings."
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
components = np.asarray(components, dtype=float)
|
|
285
|
+
n_components, n_features = components.shape
|
|
286
|
+
if component > n_components:
|
|
287
|
+
raise ValueError(f"component must be <= {n_components}")
|
|
288
|
+
|
|
289
|
+
contribution = np.square(components[component - 1])
|
|
290
|
+
top_n = min(n_var, n_features)
|
|
291
|
+
top_idx = np.argsort(contribution)[::-1][:top_n]
|
|
292
|
+
top_contrib = contribution[top_idx]
|
|
293
|
+
ranks = np.arange(1, top_n + 1)
|
|
294
|
+
|
|
295
|
+
feature_names = np.asarray(adata.var_names.astype(str)) if adata.var_names is not None else np.array([])
|
|
296
|
+
if feature_names.size != n_features:
|
|
297
|
+
feature_names = np.array([f"Feature {i}" for i in range(n_features)])
|
|
298
|
+
|
|
299
|
+
ax.plot(ranks, top_contrib, marker="o", alpha=0.1, **kwargs)
|
|
300
|
+
|
|
301
|
+
for rank, value, idx in zip(ranks, top_contrib, top_idx, strict=False):
|
|
302
|
+
ax.text(
|
|
303
|
+
rank,
|
|
304
|
+
value,
|
|
305
|
+
feature_names[idx],
|
|
306
|
+
rotation=90,
|
|
307
|
+
ha="center",
|
|
308
|
+
va="center",
|
|
309
|
+
fontsize=8,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if title is None:
|
|
313
|
+
title = f"PC{component} Feature Variance"
|
|
314
|
+
|
|
315
|
+
ax.set_xlabel(xlab)
|
|
316
|
+
ax.set_ylabel(ylab)
|
|
317
|
+
ax.set_title(title)
|
|
318
|
+
ax.grid(False)
|
|
319
|
+
|
|
320
|
+
return ax
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""Quality control plot functions for microarray data."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import numpy as np
|
|
7
|
+
from anndata import AnnData
|
|
8
|
+
from matplotlib.axes import Axes
|
|
9
|
+
from scipy import stats
|
|
10
|
+
|
|
11
|
+
from microarray.plotting._utils import get_default_colors
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def densities(
|
|
15
|
+
adata: AnnData,
|
|
16
|
+
arrays: list[int | str] | None = None,
|
|
17
|
+
colors: list[str] | None = None,
|
|
18
|
+
xlab: str = "Log2 intensity",
|
|
19
|
+
ylab: str = "Density",
|
|
20
|
+
title: str = "Intensity Distributions",
|
|
21
|
+
legend: bool | str = "best",
|
|
22
|
+
ax: Axes | None = None,
|
|
23
|
+
**kwargs: Any,
|
|
24
|
+
) -> Axes:
|
|
25
|
+
"""Plot overlaid density estimates for multiple arrays.
|
|
26
|
+
|
|
27
|
+
Displays kernel density estimates of probe intensity distributions
|
|
28
|
+
for quality control. Similar arrays should have similar distributions.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
adata: AnnData object with probe-level expression data in .X
|
|
32
|
+
arrays: List of array indices/names to plot. If None, plots all arrays.
|
|
33
|
+
colors: List of colors for each array. If None, uses default palette.
|
|
34
|
+
xlab: X-axis label
|
|
35
|
+
ylab: Y-axis label
|
|
36
|
+
title: Plot title
|
|
37
|
+
legend: Legend position ('best', 'upper right', etc.) or False to disable
|
|
38
|
+
ax: Existing Axes object. If None, creates new figure.
|
|
39
|
+
**kwargs: Additional arguments passed to ax.plot()
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Axes object with density plot
|
|
43
|
+
|
|
44
|
+
Examples:
|
|
45
|
+
>>> import anndata as ad
|
|
46
|
+
>>> import numpy as np
|
|
47
|
+
>>> from microarray.plotting import densities
|
|
48
|
+
>>> data = np.random.randn(1000, 4)
|
|
49
|
+
>>> adata = ad.AnnData(data.T)
|
|
50
|
+
>>> ax = densities(adata)
|
|
51
|
+
"""
|
|
52
|
+
if ax is None:
|
|
53
|
+
_, ax = plt.subplots(figsize=(10, 6))
|
|
54
|
+
|
|
55
|
+
# Get expression matrix (samples x probes)
|
|
56
|
+
expr = adata.X
|
|
57
|
+
|
|
58
|
+
# Convert to log2 if not already
|
|
59
|
+
if expr.min() >= 0 and (expr.max() - expr.min()) > 20:
|
|
60
|
+
# Likely raw intensity values
|
|
61
|
+
log_expr = np.log2(expr + 1)
|
|
62
|
+
else:
|
|
63
|
+
log_expr = expr
|
|
64
|
+
|
|
65
|
+
# Select arrays to plot
|
|
66
|
+
if arrays is None:
|
|
67
|
+
array_indices = list(range(expr.shape[0]))
|
|
68
|
+
array_names = list(adata.obs_names) if adata.obs_names is not None else [f"Array {i}" for i in array_indices]
|
|
69
|
+
else:
|
|
70
|
+
array_indices = []
|
|
71
|
+
array_names = []
|
|
72
|
+
for arr in arrays:
|
|
73
|
+
if isinstance(arr, str):
|
|
74
|
+
idx = list(adata.obs_names).index(arr)
|
|
75
|
+
array_indices.append(idx)
|
|
76
|
+
array_names.append(arr)
|
|
77
|
+
else:
|
|
78
|
+
array_indices.append(arr)
|
|
79
|
+
array_names.append(adata.obs_names[arr] if adata.obs_names is not None else f"Array {arr}")
|
|
80
|
+
|
|
81
|
+
# Get colors
|
|
82
|
+
if colors is None:
|
|
83
|
+
colors = get_default_colors(len(array_indices))
|
|
84
|
+
elif len(colors) < len(array_indices):
|
|
85
|
+
colors = colors + get_default_colors(len(array_indices) - len(colors))
|
|
86
|
+
|
|
87
|
+
# Plot density for each array
|
|
88
|
+
for idx, name, color in zip(array_indices, array_names, colors, strict=False):
|
|
89
|
+
data = log_expr[idx, :]
|
|
90
|
+
# Remove NaN values
|
|
91
|
+
data = data[np.isfinite(data)]
|
|
92
|
+
|
|
93
|
+
if len(data) < 2:
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# Use kernel density estimation
|
|
97
|
+
kde = stats.gaussian_kde(data)
|
|
98
|
+
|
|
99
|
+
# Create x-axis for density plot
|
|
100
|
+
x_min, x_max = data.min(), data.max()
|
|
101
|
+
x_range = x_max - x_min
|
|
102
|
+
x = np.linspace(x_min - 0.1 * x_range, x_max + 0.1 * x_range, 512)
|
|
103
|
+
|
|
104
|
+
# Compute density
|
|
105
|
+
density = kde(x)
|
|
106
|
+
|
|
107
|
+
# Plot
|
|
108
|
+
ax.plot(x, density, color=color, label=name, linewidth=2, **kwargs)
|
|
109
|
+
|
|
110
|
+
# Set labels and title
|
|
111
|
+
ax.set_xlabel(xlab)
|
|
112
|
+
ax.set_ylabel(ylab)
|
|
113
|
+
ax.set_title(title)
|
|
114
|
+
|
|
115
|
+
# Add legend if requested
|
|
116
|
+
if legend and len(array_indices) > 1:
|
|
117
|
+
ax.legend(loc=legend if isinstance(legend, str) else "best", frameon=True)
|
|
118
|
+
|
|
119
|
+
ax.grid(True, alpha=0.3, linestyle="--")
|
|
120
|
+
ax.set_ylim(bottom=0) # Density should start at 0
|
|
121
|
+
|
|
122
|
+
return ax
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def boxplot(
|
|
126
|
+
adata: AnnData,
|
|
127
|
+
arrays: list[int | str] | None = None,
|
|
128
|
+
colors: list[str] | str | None = None,
|
|
129
|
+
xlab: str = "Array",
|
|
130
|
+
ylab: str = "Log2 intensity",
|
|
131
|
+
title: str = "Intensity Boxplots",
|
|
132
|
+
show_fliers: bool = False,
|
|
133
|
+
ax: Axes | None = None,
|
|
134
|
+
**kwargs: Any,
|
|
135
|
+
) -> Axes:
|
|
136
|
+
"""Create boxplots of probe intensities across arrays.
|
|
137
|
+
|
|
138
|
+
Displays distribution of probe intensities for each array using boxplots.
|
|
139
|
+
Useful for comparing overall intensity levels and dispersion across arrays.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
adata: AnnData object with probe-level expression data in .X
|
|
143
|
+
arrays: List of array indices/names to plot. If None, plots all arrays.
|
|
144
|
+
colors: Color(s) for boxes. Can be single color or list of colors.
|
|
145
|
+
xlab: X-axis label
|
|
146
|
+
ylab: Y-axis label
|
|
147
|
+
title: Plot title
|
|
148
|
+
show_fliers: Whether to show outlier points. Default False.
|
|
149
|
+
ax: Existing Axes object. If None, creates new figure.
|
|
150
|
+
**kwargs: Additional arguments passed to ax.boxplot()
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Axes object with boxplot
|
|
154
|
+
|
|
155
|
+
Examples:
|
|
156
|
+
>>> import anndata as ad
|
|
157
|
+
>>> import numpy as np
|
|
158
|
+
>>> from microarray.plotting import boxplot
|
|
159
|
+
>>> data = np.random.randn(1000, 4)
|
|
160
|
+
>>> adata = ad.AnnData(data.T)
|
|
161
|
+
>>> ax = boxplot(adata)
|
|
162
|
+
"""
|
|
163
|
+
if ax is None:
|
|
164
|
+
_, ax = plt.subplots(figsize=(max(8, len(adata.obs_names) * 0.8), 6))
|
|
165
|
+
|
|
166
|
+
# Get expression matrix (samples x probes)
|
|
167
|
+
expr = adata.X
|
|
168
|
+
|
|
169
|
+
# Convert to log2 if not already
|
|
170
|
+
if expr.min() >= 0 and (expr.max() - expr.min()) > 20:
|
|
171
|
+
log_expr = np.log2(expr + 1)
|
|
172
|
+
else:
|
|
173
|
+
log_expr = expr
|
|
174
|
+
|
|
175
|
+
# Select arrays to plot
|
|
176
|
+
if arrays is None:
|
|
177
|
+
array_indices = list(range(expr.shape[0]))
|
|
178
|
+
array_names = list(adata.obs_names) if adata.obs_names is not None else [f"Array {i}" for i in array_indices]
|
|
179
|
+
else:
|
|
180
|
+
array_indices = []
|
|
181
|
+
array_names = []
|
|
182
|
+
for arr in arrays:
|
|
183
|
+
if isinstance(arr, str):
|
|
184
|
+
idx = list(adata.obs_names).index(arr)
|
|
185
|
+
array_indices.append(idx)
|
|
186
|
+
array_names.append(arr)
|
|
187
|
+
else:
|
|
188
|
+
array_indices.append(arr)
|
|
189
|
+
array_names.append(adata.obs_names[arr] if adata.obs_names is not None else f"Array {arr}")
|
|
190
|
+
|
|
191
|
+
# Prepare data for boxplot
|
|
192
|
+
data_list = []
|
|
193
|
+
for idx in array_indices:
|
|
194
|
+
data = log_expr[idx, :]
|
|
195
|
+
# Remove NaN values
|
|
196
|
+
data = data[np.isfinite(data)]
|
|
197
|
+
data_list.append(data)
|
|
198
|
+
|
|
199
|
+
# Create boxplot
|
|
200
|
+
bp = ax.boxplot(data_list, labels=array_names, showfliers=show_fliers, patch_artist=True, **kwargs)
|
|
201
|
+
|
|
202
|
+
# Color boxes
|
|
203
|
+
if colors is not None:
|
|
204
|
+
if isinstance(colors, str):
|
|
205
|
+
# Single color for all boxes
|
|
206
|
+
for patch in bp["boxes"]:
|
|
207
|
+
patch.set_facecolor(colors)
|
|
208
|
+
else:
|
|
209
|
+
# List of colors
|
|
210
|
+
if len(colors) < len(array_indices):
|
|
211
|
+
colors = colors + get_default_colors(len(array_indices) - len(colors))
|
|
212
|
+
for patch, color in zip(bp["boxes"], colors, strict=False):
|
|
213
|
+
patch.set_facecolor(color)
|
|
214
|
+
|
|
215
|
+
# Set labels and title
|
|
216
|
+
ax.set_xlabel(xlab)
|
|
217
|
+
ax.set_ylabel(ylab)
|
|
218
|
+
ax.set_title(title)
|
|
219
|
+
|
|
220
|
+
# Rotate x-axis labels if many arrays
|
|
221
|
+
if len(array_indices) > 4:
|
|
222
|
+
ax.set_xticklabels(array_names, rotation=45, ha="right")
|
|
223
|
+
|
|
224
|
+
ax.grid(True, alpha=0.3, linestyle="--", axis="y")
|
|
225
|
+
|
|
226
|
+
return ax
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def histogram(
|
|
230
|
+
adata: AnnData,
|
|
231
|
+
arrays: list[int | str] | None = None,
|
|
232
|
+
bins: int = 50,
|
|
233
|
+
colors: list[str] | None = None,
|
|
234
|
+
xlab: str = "Log2 intensity",
|
|
235
|
+
ylab: str = "Frequency",
|
|
236
|
+
title: str = "Intensity Histograms",
|
|
237
|
+
alpha: float = 0.6,
|
|
238
|
+
legend: bool | str = "best",
|
|
239
|
+
ax: Axes | None = None,
|
|
240
|
+
**kwargs: Any,
|
|
241
|
+
) -> Axes:
|
|
242
|
+
"""Plot histograms of probe intensities for multiple arrays.
|
|
243
|
+
|
|
244
|
+
Displays probe intensity distributions as histograms.
|
|
245
|
+
Similar to densities() but shows actual counts rather than smoothed density.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
adata: AnnData object with probe-level expression data in .X
|
|
249
|
+
arrays: List of array indices/names to plot. If None, plots all arrays.
|
|
250
|
+
bins: Number of histogram bins. Default 50.
|
|
251
|
+
colors: List of colors for each array. If None, uses default palette.
|
|
252
|
+
xlab: X-axis label
|
|
253
|
+
ylab: Y-axis label
|
|
254
|
+
title: Plot title
|
|
255
|
+
alpha: Transparency of histogram bars (0-1)
|
|
256
|
+
legend: Legend position ('best', 'upper right', etc.) or False to disable
|
|
257
|
+
ax: Existing Axes object. If None, creates new figure.
|
|
258
|
+
**kwargs: Additional arguments passed to ax.hist()
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Axes object with histogram
|
|
262
|
+
|
|
263
|
+
Examples:
|
|
264
|
+
>>> import anndata as ad
|
|
265
|
+
>>> import numpy as np
|
|
266
|
+
>>> from microarray.plotting import histogram
|
|
267
|
+
>>> data = np.random.randn(1000, 4)
|
|
268
|
+
>>> adata = ad.AnnData(data.T)
|
|
269
|
+
>>> ax = histogram(adata)
|
|
270
|
+
"""
|
|
271
|
+
if ax is None:
|
|
272
|
+
_, ax = plt.subplots(figsize=(10, 6))
|
|
273
|
+
|
|
274
|
+
# Get expression matrix (samples x probes)
|
|
275
|
+
expr = adata.X
|
|
276
|
+
|
|
277
|
+
# Convert to log2 if not already
|
|
278
|
+
if expr.min() >= 0 and (expr.max() - expr.min()) > 20:
|
|
279
|
+
log_expr = np.log2(expr + 1)
|
|
280
|
+
else:
|
|
281
|
+
log_expr = expr
|
|
282
|
+
|
|
283
|
+
# Select arrays to plot
|
|
284
|
+
if arrays is None:
|
|
285
|
+
array_indices = list(range(expr.shape[0]))
|
|
286
|
+
array_names = list(adata.obs_names) if adata.obs_names is not None else [f"Array {i}" for i in array_indices]
|
|
287
|
+
else:
|
|
288
|
+
array_indices = []
|
|
289
|
+
array_names = []
|
|
290
|
+
for arr in arrays:
|
|
291
|
+
if isinstance(arr, str):
|
|
292
|
+
idx = list(adata.obs_names).index(arr)
|
|
293
|
+
array_indices.append(idx)
|
|
294
|
+
array_names.append(arr)
|
|
295
|
+
else:
|
|
296
|
+
array_indices.append(arr)
|
|
297
|
+
array_names.append(adata.obs_names[arr] if adata.obs_names is not None else f"Array {arr}")
|
|
298
|
+
|
|
299
|
+
# Get colors
|
|
300
|
+
if colors is None:
|
|
301
|
+
colors = get_default_colors(len(array_indices))
|
|
302
|
+
elif len(colors) < len(array_indices):
|
|
303
|
+
colors = colors + get_default_colors(len(array_indices) - len(colors))
|
|
304
|
+
|
|
305
|
+
# Determine common bin range across all arrays
|
|
306
|
+
all_data = []
|
|
307
|
+
for idx in array_indices:
|
|
308
|
+
data = log_expr[idx, :]
|
|
309
|
+
data = data[np.isfinite(data)]
|
|
310
|
+
all_data.extend(data)
|
|
311
|
+
|
|
312
|
+
bin_range = (np.min(all_data), np.max(all_data))
|
|
313
|
+
|
|
314
|
+
# Plot histogram for each array
|
|
315
|
+
for idx, name, color in zip(array_indices, array_names, colors, strict=False):
|
|
316
|
+
data = log_expr[idx, :]
|
|
317
|
+
data = data[np.isfinite(data)]
|
|
318
|
+
|
|
319
|
+
if len(data) < 1:
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
ax.hist(data, bins=bins, range=bin_range, color=color, alpha=alpha, label=name, edgecolor="none", **kwargs)
|
|
323
|
+
|
|
324
|
+
# Set labels and title
|
|
325
|
+
ax.set_xlabel(xlab)
|
|
326
|
+
ax.set_ylabel(ylab)
|
|
327
|
+
ax.set_title(title)
|
|
328
|
+
|
|
329
|
+
# Add legend if requested
|
|
330
|
+
if legend and len(array_indices) > 1:
|
|
331
|
+
ax.legend(loc=legend if isinstance(legend, str) else "best", frameon=True)
|
|
332
|
+
|
|
333
|
+
ax.grid(True, alpha=0.3, linestyle="--", axis="y")
|
|
334
|
+
|
|
335
|
+
return ax
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from anndata import AnnData
|
|
4
|
+
from matplotlib.axes import Axes
|
|
5
|
+
from matplotlib.figure import Figure
|
|
6
|
+
|
|
7
|
+
from ._base import _plot_obs_barplot, _plot_obs_boxplot, _plot_obs_violinplot
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def score(
|
|
11
|
+
adata: AnnData,
|
|
12
|
+
groupby: str | None = None,
|
|
13
|
+
score_name: str = "score",
|
|
14
|
+
kind: Literal["bar", "box", "violin"] = "bar",
|
|
15
|
+
**kwargs,
|
|
16
|
+
) -> tuple[Figure, Axes]:
|
|
17
|
+
"""Plot gene set scores.
|
|
18
|
+
|
|
19
|
+
This function computes gene set scores using :func:`score` and then visualizes them.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
adata: AnnData object with expression values in ``.X`` or ``layer``.
|
|
23
|
+
groupby: Optional column in ``adata.obs`` to group samples by for plotting.
|
|
24
|
+
score_name: Column name in ``adata.obs`` where scores are stored.
|
|
25
|
+
kind: Type of plot to create. Options are "bar", "box", or "violin".
|
|
26
|
+
**kwargs: Additional keyword arguments passed to the underlying plotting function.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
A tuple containing a matplotlib Figure and Axes object with the plot of scores.
|
|
30
|
+
"""
|
|
31
|
+
if kind == "bar":
|
|
32
|
+
return _plot_obs_barplot(adata, groupby=groupby, values=score_name, **kwargs)
|
|
33
|
+
elif kind == "box":
|
|
34
|
+
return _plot_obs_boxplot(adata, groupby=groupby, values=score_name, **kwargs)
|
|
35
|
+
elif kind == "violin":
|
|
36
|
+
return _plot_obs_violinplot(adata, groupby=groupby, values=score_name, **kwargs)
|
|
37
|
+
else:
|
|
38
|
+
raise ValueError(f"Invalid plot kind: {kind}. Choose from 'bar', 'box', or 'violin'.")
|