microarray 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. microarray/__init__.py +15 -0
  2. microarray/_version.py +3 -0
  3. microarray/datasets/__init__.py +3 -0
  4. microarray/datasets/_arrayexpress.py +1 -0
  5. microarray/datasets/_cdf_files.py +35 -0
  6. microarray/datasets/_geo.py +1 -0
  7. microarray/datasets/_utils.py +143 -0
  8. microarray/io/__init__.py +17 -0
  9. microarray/io/_anndata_converter.py +198 -0
  10. microarray/io/_cdf.py +575 -0
  11. microarray/io/_cel.py +591 -0
  12. microarray/io/_read.py +127 -0
  13. microarray/plotting/__init__.py +28 -0
  14. microarray/plotting/_base.py +253 -0
  15. microarray/plotting/_cel.py +75 -0
  16. microarray/plotting/_de_plots.py +239 -0
  17. microarray/plotting/_diagnostic_plots.py +268 -0
  18. microarray/plotting/_heatmap.py +279 -0
  19. microarray/plotting/_ma_plots.py +136 -0
  20. microarray/plotting/_pca.py +320 -0
  21. microarray/plotting/_qc_plots.py +335 -0
  22. microarray/plotting/_score.py +38 -0
  23. microarray/plotting/_top_table_heatmap.py +98 -0
  24. microarray/plotting/_utils.py +280 -0
  25. microarray/preprocessing/__init__.py +39 -0
  26. microarray/preprocessing/_background.py +862 -0
  27. microarray/preprocessing/_log2.py +77 -0
  28. microarray/preprocessing/_normalize.py +1292 -0
  29. microarray/preprocessing/_rma.py +243 -0
  30. microarray/preprocessing/_robust.py +170 -0
  31. microarray/preprocessing/_summarize.py +318 -0
  32. microarray/py.typed +0 -0
  33. microarray/tools/__init__.py +26 -0
  34. microarray/tools/_biomart.py +416 -0
  35. microarray/tools/_empirical_bayes.py +401 -0
  36. microarray/tools/_fdist.py +171 -0
  37. microarray/tools/_linear_models.py +387 -0
  38. microarray/tools/_mds.py +101 -0
  39. microarray/tools/_pca.py +88 -0
  40. microarray/tools/_score.py +86 -0
  41. microarray/tools/_toptable.py +360 -0
  42. microarray-0.1.0.dist-info/METADATA +75 -0
  43. microarray-0.1.0.dist-info/RECORD +44 -0
  44. microarray-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,320 @@
1
+ """PCA plotting for microarray data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ from adjustText import adjust_text
10
+ from anndata import AnnData
11
+ from matplotlib.axes import Axes
12
+ from matplotlib.colors import Colormap
13
+
14
+ from microarray.plotting._utils import get_default_colors
15
+
16
+
17
+ def pca(
18
+ adata: AnnData,
19
+ obsm_key: str = "X_pca",
20
+ components: tuple[int, int] = (1, 2),
21
+ labels: list[str] | None = None,
22
+ colors: str | None = None,
23
+ cmap: str | Colormap = "viridis",
24
+ xlab: str | None = None,
25
+ ylab: str | None = None,
26
+ title: str = "PCA Plot",
27
+ ax: Axes | None = None,
28
+ **kwargs: Any,
29
+ ) -> Axes:
30
+ """Plot PCA coordinates stored in ``adata.obsm``.
31
+
32
+ If ``obsm_key`` does not exist, PCA is computed automatically using
33
+ :func:`microarray.tools.pca`.
34
+
35
+ Args:
36
+ adata: AnnData object with PCA embedding in ``.obsm`` or expression data.
37
+ obsm_key: Key in ``.obsm`` where PCA coordinates are stored.
38
+ components: 1-based component indices to plot (e.g. ``(1, 2)``).
39
+ labels: Optional labels for each sample. Defaults to ``adata.obs_names``.
40
+ colors: Optional ``adata.obs`` column used for coloring points.
41
+ Categorical columns are shown with a legend. Numeric columns
42
+ are shown with a continuous colormap.
43
+ cmap: Colormap used when ``colors`` points to a numeric ``adata.obs`` column.
44
+ xlab: Optional x-axis label.
45
+ ylab: Optional y-axis label.
46
+ title: Plot title.
47
+ ax: Existing matplotlib axes to reuse.
48
+ **kwargs: Extra arguments forwarded to ``ax.scatter``.
49
+
50
+ Returns:
51
+ Matplotlib Axes with PCA scatter plot.
52
+
53
+ Raises:
54
+ ValueError: If components are invalid for the available embedding.
55
+ """
56
+ if ax is None:
57
+ _, ax = plt.subplots(figsize=(8, 7))
58
+
59
+ if len(components) != 2:
60
+ raise ValueError("components must contain exactly two indices")
61
+
62
+ if obsm_key not in adata.obsm:
63
+ raise KeyError(f"AnnData .obsm has no '{obsm_key}' key. Compute PCA first using microarray.tools.pca.")
64
+
65
+ coords = np.asarray(adata.obsm[obsm_key])
66
+ n_samples, n_dims = coords.shape
67
+
68
+ x_idx = components[0] - 1
69
+ y_idx = components[1] - 1
70
+ if x_idx < 0 or y_idx < 0 or x_idx >= n_dims or y_idx >= n_dims:
71
+ raise ValueError(f"components={components} are out of bounds for embedding with {n_dims} dimensions")
72
+
73
+ if labels is None:
74
+ labels = list(adata.obs_names) if adata.obs_names is not None else [f"Sample {i}" for i in range(n_samples)]
75
+
76
+ if colors is None:
77
+ point_color = get_default_colors(1)[0]
78
+ ax.scatter(
79
+ coords[:, x_idx],
80
+ coords[:, y_idx],
81
+ c=point_color,
82
+ s=100,
83
+ alpha=0.7,
84
+ edgecolors="black",
85
+ linewidth=0.5,
86
+ **kwargs,
87
+ )
88
+ else:
89
+ if colors not in adata.obs.columns:
90
+ raise KeyError(f"AnnData .obs has no '{colors}' column")
91
+
92
+ group_values = adata.obs[colors]
93
+ numeric_values = np.asarray(group_values)
94
+
95
+ if np.issubdtype(numeric_values.dtype, np.number):
96
+ scatter = ax.scatter(
97
+ coords[:, x_idx],
98
+ coords[:, y_idx],
99
+ c=numeric_values.astype(float),
100
+ cmap=cmap,
101
+ s=100,
102
+ alpha=0.7,
103
+ edgecolors="black",
104
+ linewidth=0.5,
105
+ **kwargs,
106
+ )
107
+ colorbar = ax.figure.colorbar(scatter, ax=ax)
108
+ colorbar.set_label(colors)
109
+ else:
110
+ unique_groups = np.unique(numeric_values.astype(str))
111
+ default_colors = get_default_colors(len(unique_groups))
112
+ color_map = dict(zip(unique_groups, default_colors, strict=False))
113
+
114
+ for group in unique_groups:
115
+ mask = numeric_values.astype(str) == group
116
+ ax.scatter(
117
+ coords[mask, x_idx],
118
+ coords[mask, y_idx],
119
+ c=color_map[group],
120
+ label=str(group),
121
+ s=100,
122
+ alpha=0.7,
123
+ edgecolors="black",
124
+ linewidth=0.5,
125
+ **kwargs,
126
+ )
127
+ ax.legend(
128
+ loc="upper left",
129
+ bbox_to_anchor=(1.02, 1.0),
130
+ borderaxespad=0.0,
131
+ frameon=False,
132
+ title=colors,
133
+ )
134
+
135
+ text_artists = []
136
+ for i, label in enumerate(labels):
137
+ text_artists.append(
138
+ ax.annotate(
139
+ label,
140
+ (coords[i, x_idx], coords[i, y_idx]),
141
+ fontsize=9,
142
+ alpha=0.8,
143
+ )
144
+ )
145
+
146
+ adjust_text(
147
+ text_artists,
148
+ ax=ax,
149
+ expand=(1.02, 1.05),
150
+ force_text=(0.05, 0.08),
151
+ force_static=(0.05, 0.08),
152
+ force_pull=(0.25, 0.25),
153
+ max_move=10,
154
+ min_arrow_len=0,
155
+ arrowprops={"arrowstyle": "-", "color": "0.35", "lw": 0.6, "alpha": 0.8},
156
+ )
157
+
158
+ variance_ratio = None
159
+ if obsm_key in adata.uns and isinstance(adata.uns[obsm_key], dict):
160
+ variance_ratio = adata.uns[obsm_key].get("variance_ratio")
161
+
162
+ if xlab is None:
163
+ xlab = f"PC{components[0]}"
164
+ if variance_ratio is not None and len(variance_ratio) > x_idx:
165
+ xlab = f"{xlab} ({100 * float(variance_ratio[x_idx]):.1f}%)"
166
+ if ylab is None:
167
+ ylab = f"PC{components[1]}"
168
+ if variance_ratio is not None and len(variance_ratio) > y_idx:
169
+ ylab = f"{ylab} ({100 * float(variance_ratio[y_idx]):.1f}%)"
170
+
171
+ ax.set_xlabel(xlab)
172
+ ax.set_ylabel(ylab)
173
+ ax.set_title(title)
174
+
175
+ ax.grid(False)
176
+
177
+ return ax
178
+
179
+
180
+ def pca_variance(
181
+ adata: AnnData,
182
+ obsm_key: str = "X_pca",
183
+ xlab: str = "Component",
184
+ ylab: str = "Cumulative variance explained",
185
+ title: str = "PCA Cumulative Variance",
186
+ ax: Axes | None = None,
187
+ **kwargs: Any,
188
+ ) -> Axes:
189
+ """Plot cumulative explained variance from a fitted PCA.
190
+
191
+ Args:
192
+ adata: AnnData object with PCA variance information in ``adata.uns``.
193
+ obsm_key: PCA key used in ``adata.uns`` (default: ``"X_pca"``).
194
+ xlab: X-axis label.
195
+ ylab: Y-axis label.
196
+ title: Plot title.
197
+ ax: Existing matplotlib axes to reuse.
198
+ **kwargs: Extra arguments forwarded to ``ax.plot``.
199
+
200
+ Returns:
201
+ Matplotlib Axes with cumulative variance line chart.
202
+
203
+ Raises:
204
+ KeyError: If variance ratio is unavailable in ``adata.uns``.
205
+ """
206
+ if ax is None:
207
+ _, ax = plt.subplots(figsize=(6, 6))
208
+
209
+ if obsm_key not in adata.uns or not isinstance(adata.uns[obsm_key], dict):
210
+ raise KeyError(f"AnnData .uns has no '{obsm_key}' PCA metadata. Compute PCA first using microarray.tools.pca.")
211
+
212
+ variance_ratio = adata.uns[obsm_key].get("variance_ratio")
213
+ if variance_ratio is None:
214
+ raise KeyError(
215
+ f"AnnData .uns['{obsm_key}'] has no 'variance_ratio'. Compute PCA first using microarray.tools.pca."
216
+ )
217
+
218
+ variance_ratio = np.asarray(variance_ratio, dtype=float)
219
+ cumulative = np.cumsum(variance_ratio)
220
+ components = np.arange(1, cumulative.size + 1)
221
+
222
+ ax.plot(components, cumulative, marker="o", **kwargs)
223
+ ax.set_xlabel(xlab)
224
+ ax.set_ylabel(ylab)
225
+ ax.set_title(title)
226
+ ax.set_xlim(1, max(1, cumulative.size))
227
+ ax.set_ylim(0.0, 1.05)
228
+ ax.grid(False)
229
+
230
+ return ax
231
+
232
+
233
+ def pca_feature_variance(
234
+ adata: AnnData,
235
+ component: int = 1,
236
+ n_var: int = 20,
237
+ obsm_key: str = "X_pca",
238
+ xlab: str = "Feature rank",
239
+ ylab: str = "Variance contribution",
240
+ title: str | None = None,
241
+ ax: Axes | None = None,
242
+ **kwargs: Any,
243
+ ) -> Axes:
244
+ """Plot top feature variance contributions for one PCA component.
245
+
246
+ This uses squared PCA loadings as per-feature variance contribution within
247
+ a selected component and ranks features in decreasing order.
248
+
249
+ Args:
250
+ adata: AnnData object with PCA metadata in ``adata.uns``.
251
+ component: 1-based PCA component index to inspect.
252
+ n_var: Number of top-ranked features to display.
253
+ obsm_key: PCA key used in ``adata.uns`` (default: ``"X_pca"``).
254
+ xlab: X-axis label.
255
+ ylab: Y-axis label.
256
+ title: Plot title. Defaults to ``"PC{component} Feature Variance"``.
257
+ ax: Existing matplotlib axes to reuse.
258
+ **kwargs: Extra arguments forwarded to ``ax.plot``.
259
+
260
+ Returns:
261
+ Matplotlib Axes with ranked feature variance contributions.
262
+
263
+ Raises:
264
+ KeyError: If PCA metadata/components are unavailable.
265
+ ValueError: If ``component`` or ``n_var`` is invalid.
266
+ """
267
+ if ax is None:
268
+ _, ax = plt.subplots(figsize=(6, 6))
269
+
270
+ if n_var < 1:
271
+ raise ValueError("n_var must be at least 1")
272
+ if component < 1:
273
+ raise ValueError("component must be at least 1")
274
+
275
+ if obsm_key not in adata.uns or not isinstance(adata.uns[obsm_key], dict):
276
+ raise KeyError(f"AnnData .uns has no '{obsm_key}' PCA metadata. Compute PCA first using microarray.tools.pca.")
277
+
278
+ components = adata.uns[obsm_key].get("components")
279
+ if components is None:
280
+ raise KeyError(
281
+ f"AnnData .uns['{obsm_key}'] has no 'components'. Recompute PCA with a version that stores loadings."
282
+ )
283
+
284
+ components = np.asarray(components, dtype=float)
285
+ n_components, n_features = components.shape
286
+ if component > n_components:
287
+ raise ValueError(f"component must be <= {n_components}")
288
+
289
+ contribution = np.square(components[component - 1])
290
+ top_n = min(n_var, n_features)
291
+ top_idx = np.argsort(contribution)[::-1][:top_n]
292
+ top_contrib = contribution[top_idx]
293
+ ranks = np.arange(1, top_n + 1)
294
+
295
+ feature_names = np.asarray(adata.var_names.astype(str)) if adata.var_names is not None else np.array([])
296
+ if feature_names.size != n_features:
297
+ feature_names = np.array([f"Feature {i}" for i in range(n_features)])
298
+
299
+ ax.plot(ranks, top_contrib, marker="o", alpha=0.1, **kwargs)
300
+
301
+ for rank, value, idx in zip(ranks, top_contrib, top_idx, strict=False):
302
+ ax.text(
303
+ rank,
304
+ value,
305
+ feature_names[idx],
306
+ rotation=90,
307
+ ha="center",
308
+ va="center",
309
+ fontsize=8,
310
+ )
311
+
312
+ if title is None:
313
+ title = f"PC{component} Feature Variance"
314
+
315
+ ax.set_xlabel(xlab)
316
+ ax.set_ylabel(ylab)
317
+ ax.set_title(title)
318
+ ax.grid(False)
319
+
320
+ return ax
@@ -0,0 +1,335 @@
1
+ """Quality control plot functions for microarray data."""
2
+
3
+ from typing import Any
4
+
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ from anndata import AnnData
8
+ from matplotlib.axes import Axes
9
+ from scipy import stats
10
+
11
+ from microarray.plotting._utils import get_default_colors
12
+
13
+
14
+ def densities(
15
+ adata: AnnData,
16
+ arrays: list[int | str] | None = None,
17
+ colors: list[str] | None = None,
18
+ xlab: str = "Log2 intensity",
19
+ ylab: str = "Density",
20
+ title: str = "Intensity Distributions",
21
+ legend: bool | str = "best",
22
+ ax: Axes | None = None,
23
+ **kwargs: Any,
24
+ ) -> Axes:
25
+ """Plot overlaid density estimates for multiple arrays.
26
+
27
+ Displays kernel density estimates of probe intensity distributions
28
+ for quality control. Similar arrays should have similar distributions.
29
+
30
+ Args:
31
+ adata: AnnData object with probe-level expression data in .X
32
+ arrays: List of array indices/names to plot. If None, plots all arrays.
33
+ colors: List of colors for each array. If None, uses default palette.
34
+ xlab: X-axis label
35
+ ylab: Y-axis label
36
+ title: Plot title
37
+ legend: Legend position ('best', 'upper right', etc.) or False to disable
38
+ ax: Existing Axes object. If None, creates new figure.
39
+ **kwargs: Additional arguments passed to ax.plot()
40
+
41
+ Returns:
42
+ Axes object with density plot
43
+
44
+ Examples:
45
+ >>> import anndata as ad
46
+ >>> import numpy as np
47
+ >>> from microarray.plotting import densities
48
+ >>> data = np.random.randn(1000, 4)
49
+ >>> adata = ad.AnnData(data.T)
50
+ >>> ax = densities(adata)
51
+ """
52
+ if ax is None:
53
+ _, ax = plt.subplots(figsize=(10, 6))
54
+
55
+ # Get expression matrix (samples x probes)
56
+ expr = adata.X
57
+
58
+ # Convert to log2 if not already
59
+ if expr.min() >= 0 and (expr.max() - expr.min()) > 20:
60
+ # Likely raw intensity values
61
+ log_expr = np.log2(expr + 1)
62
+ else:
63
+ log_expr = expr
64
+
65
+ # Select arrays to plot
66
+ if arrays is None:
67
+ array_indices = list(range(expr.shape[0]))
68
+ array_names = list(adata.obs_names) if adata.obs_names is not None else [f"Array {i}" for i in array_indices]
69
+ else:
70
+ array_indices = []
71
+ array_names = []
72
+ for arr in arrays:
73
+ if isinstance(arr, str):
74
+ idx = list(adata.obs_names).index(arr)
75
+ array_indices.append(idx)
76
+ array_names.append(arr)
77
+ else:
78
+ array_indices.append(arr)
79
+ array_names.append(adata.obs_names[arr] if adata.obs_names is not None else f"Array {arr}")
80
+
81
+ # Get colors
82
+ if colors is None:
83
+ colors = get_default_colors(len(array_indices))
84
+ elif len(colors) < len(array_indices):
85
+ colors = colors + get_default_colors(len(array_indices) - len(colors))
86
+
87
+ # Plot density for each array
88
+ for idx, name, color in zip(array_indices, array_names, colors, strict=False):
89
+ data = log_expr[idx, :]
90
+ # Remove NaN values
91
+ data = data[np.isfinite(data)]
92
+
93
+ if len(data) < 2:
94
+ continue
95
+
96
+ # Use kernel density estimation
97
+ kde = stats.gaussian_kde(data)
98
+
99
+ # Create x-axis for density plot
100
+ x_min, x_max = data.min(), data.max()
101
+ x_range = x_max - x_min
102
+ x = np.linspace(x_min - 0.1 * x_range, x_max + 0.1 * x_range, 512)
103
+
104
+ # Compute density
105
+ density = kde(x)
106
+
107
+ # Plot
108
+ ax.plot(x, density, color=color, label=name, linewidth=2, **kwargs)
109
+
110
+ # Set labels and title
111
+ ax.set_xlabel(xlab)
112
+ ax.set_ylabel(ylab)
113
+ ax.set_title(title)
114
+
115
+ # Add legend if requested
116
+ if legend and len(array_indices) > 1:
117
+ ax.legend(loc=legend if isinstance(legend, str) else "best", frameon=True)
118
+
119
+ ax.grid(True, alpha=0.3, linestyle="--")
120
+ ax.set_ylim(bottom=0) # Density should start at 0
121
+
122
+ return ax
123
+
124
+
125
+ def boxplot(
126
+ adata: AnnData,
127
+ arrays: list[int | str] | None = None,
128
+ colors: list[str] | str | None = None,
129
+ xlab: str = "Array",
130
+ ylab: str = "Log2 intensity",
131
+ title: str = "Intensity Boxplots",
132
+ show_fliers: bool = False,
133
+ ax: Axes | None = None,
134
+ **kwargs: Any,
135
+ ) -> Axes:
136
+ """Create boxplots of probe intensities across arrays.
137
+
138
+ Displays distribution of probe intensities for each array using boxplots.
139
+ Useful for comparing overall intensity levels and dispersion across arrays.
140
+
141
+ Args:
142
+ adata: AnnData object with probe-level expression data in .X
143
+ arrays: List of array indices/names to plot. If None, plots all arrays.
144
+ colors: Color(s) for boxes. Can be single color or list of colors.
145
+ xlab: X-axis label
146
+ ylab: Y-axis label
147
+ title: Plot title
148
+ show_fliers: Whether to show outlier points. Default False.
149
+ ax: Existing Axes object. If None, creates new figure.
150
+ **kwargs: Additional arguments passed to ax.boxplot()
151
+
152
+ Returns:
153
+ Axes object with boxplot
154
+
155
+ Examples:
156
+ >>> import anndata as ad
157
+ >>> import numpy as np
158
+ >>> from microarray.plotting import boxplot
159
+ >>> data = np.random.randn(1000, 4)
160
+ >>> adata = ad.AnnData(data.T)
161
+ >>> ax = boxplot(adata)
162
+ """
163
+ if ax is None:
164
+ _, ax = plt.subplots(figsize=(max(8, len(adata.obs_names) * 0.8), 6))
165
+
166
+ # Get expression matrix (samples x probes)
167
+ expr = adata.X
168
+
169
+ # Convert to log2 if not already
170
+ if expr.min() >= 0 and (expr.max() - expr.min()) > 20:
171
+ log_expr = np.log2(expr + 1)
172
+ else:
173
+ log_expr = expr
174
+
175
+ # Select arrays to plot
176
+ if arrays is None:
177
+ array_indices = list(range(expr.shape[0]))
178
+ array_names = list(adata.obs_names) if adata.obs_names is not None else [f"Array {i}" for i in array_indices]
179
+ else:
180
+ array_indices = []
181
+ array_names = []
182
+ for arr in arrays:
183
+ if isinstance(arr, str):
184
+ idx = list(adata.obs_names).index(arr)
185
+ array_indices.append(idx)
186
+ array_names.append(arr)
187
+ else:
188
+ array_indices.append(arr)
189
+ array_names.append(adata.obs_names[arr] if adata.obs_names is not None else f"Array {arr}")
190
+
191
+ # Prepare data for boxplot
192
+ data_list = []
193
+ for idx in array_indices:
194
+ data = log_expr[idx, :]
195
+ # Remove NaN values
196
+ data = data[np.isfinite(data)]
197
+ data_list.append(data)
198
+
199
+ # Create boxplot
200
+ bp = ax.boxplot(data_list, labels=array_names, showfliers=show_fliers, patch_artist=True, **kwargs)
201
+
202
+ # Color boxes
203
+ if colors is not None:
204
+ if isinstance(colors, str):
205
+ # Single color for all boxes
206
+ for patch in bp["boxes"]:
207
+ patch.set_facecolor(colors)
208
+ else:
209
+ # List of colors
210
+ if len(colors) < len(array_indices):
211
+ colors = colors + get_default_colors(len(array_indices) - len(colors))
212
+ for patch, color in zip(bp["boxes"], colors, strict=False):
213
+ patch.set_facecolor(color)
214
+
215
+ # Set labels and title
216
+ ax.set_xlabel(xlab)
217
+ ax.set_ylabel(ylab)
218
+ ax.set_title(title)
219
+
220
+ # Rotate x-axis labels if many arrays
221
+ if len(array_indices) > 4:
222
+ ax.set_xticklabels(array_names, rotation=45, ha="right")
223
+
224
+ ax.grid(True, alpha=0.3, linestyle="--", axis="y")
225
+
226
+ return ax
227
+
228
+
229
+ def histogram(
230
+ adata: AnnData,
231
+ arrays: list[int | str] | None = None,
232
+ bins: int = 50,
233
+ colors: list[str] | None = None,
234
+ xlab: str = "Log2 intensity",
235
+ ylab: str = "Frequency",
236
+ title: str = "Intensity Histograms",
237
+ alpha: float = 0.6,
238
+ legend: bool | str = "best",
239
+ ax: Axes | None = None,
240
+ **kwargs: Any,
241
+ ) -> Axes:
242
+ """Plot histograms of probe intensities for multiple arrays.
243
+
244
+ Displays probe intensity distributions as histograms.
245
+ Similar to densities() but shows actual counts rather than smoothed density.
246
+
247
+ Args:
248
+ adata: AnnData object with probe-level expression data in .X
249
+ arrays: List of array indices/names to plot. If None, plots all arrays.
250
+ bins: Number of histogram bins. Default 50.
251
+ colors: List of colors for each array. If None, uses default palette.
252
+ xlab: X-axis label
253
+ ylab: Y-axis label
254
+ title: Plot title
255
+ alpha: Transparency of histogram bars (0-1)
256
+ legend: Legend position ('best', 'upper right', etc.) or False to disable
257
+ ax: Existing Axes object. If None, creates new figure.
258
+ **kwargs: Additional arguments passed to ax.hist()
259
+
260
+ Returns:
261
+ Axes object with histogram
262
+
263
+ Examples:
264
+ >>> import anndata as ad
265
+ >>> import numpy as np
266
+ >>> from microarray.plotting import histogram
267
+ >>> data = np.random.randn(1000, 4)
268
+ >>> adata = ad.AnnData(data.T)
269
+ >>> ax = histogram(adata)
270
+ """
271
+ if ax is None:
272
+ _, ax = plt.subplots(figsize=(10, 6))
273
+
274
+ # Get expression matrix (samples x probes)
275
+ expr = adata.X
276
+
277
+ # Convert to log2 if not already
278
+ if expr.min() >= 0 and (expr.max() - expr.min()) > 20:
279
+ log_expr = np.log2(expr + 1)
280
+ else:
281
+ log_expr = expr
282
+
283
+ # Select arrays to plot
284
+ if arrays is None:
285
+ array_indices = list(range(expr.shape[0]))
286
+ array_names = list(adata.obs_names) if adata.obs_names is not None else [f"Array {i}" for i in array_indices]
287
+ else:
288
+ array_indices = []
289
+ array_names = []
290
+ for arr in arrays:
291
+ if isinstance(arr, str):
292
+ idx = list(adata.obs_names).index(arr)
293
+ array_indices.append(idx)
294
+ array_names.append(arr)
295
+ else:
296
+ array_indices.append(arr)
297
+ array_names.append(adata.obs_names[arr] if adata.obs_names is not None else f"Array {arr}")
298
+
299
+ # Get colors
300
+ if colors is None:
301
+ colors = get_default_colors(len(array_indices))
302
+ elif len(colors) < len(array_indices):
303
+ colors = colors + get_default_colors(len(array_indices) - len(colors))
304
+
305
+ # Determine common bin range across all arrays
306
+ all_data = []
307
+ for idx in array_indices:
308
+ data = log_expr[idx, :]
309
+ data = data[np.isfinite(data)]
310
+ all_data.extend(data)
311
+
312
+ bin_range = (np.min(all_data), np.max(all_data))
313
+
314
+ # Plot histogram for each array
315
+ for idx, name, color in zip(array_indices, array_names, colors, strict=False):
316
+ data = log_expr[idx, :]
317
+ data = data[np.isfinite(data)]
318
+
319
+ if len(data) < 1:
320
+ continue
321
+
322
+ ax.hist(data, bins=bins, range=bin_range, color=color, alpha=alpha, label=name, edgecolor="none", **kwargs)
323
+
324
+ # Set labels and title
325
+ ax.set_xlabel(xlab)
326
+ ax.set_ylabel(ylab)
327
+ ax.set_title(title)
328
+
329
+ # Add legend if requested
330
+ if legend and len(array_indices) > 1:
331
+ ax.legend(loc=legend if isinstance(legend, str) else "best", frameon=True)
332
+
333
+ ax.grid(True, alpha=0.3, linestyle="--", axis="y")
334
+
335
+ return ax
@@ -0,0 +1,38 @@
1
+ from typing import Literal
2
+
3
+ from anndata import AnnData
4
+ from matplotlib.axes import Axes
5
+ from matplotlib.figure import Figure
6
+
7
+ from ._base import _plot_obs_barplot, _plot_obs_boxplot, _plot_obs_violinplot
8
+
9
+
10
+ def score(
11
+ adata: AnnData,
12
+ groupby: str | None = None,
13
+ score_name: str = "score",
14
+ kind: Literal["bar", "box", "violin"] = "bar",
15
+ **kwargs,
16
+ ) -> tuple[Figure, Axes]:
17
+ """Plot gene set scores.
18
+
19
+ This function computes gene set scores using :func:`score` and then visualizes them.
20
+
21
+ Args:
22
+ adata: AnnData object with expression values in ``.X`` or ``layer``.
23
+ groupby: Optional column in ``adata.obs`` to group samples by for plotting.
24
+ score_name: Column name in ``adata.obs`` where scores are stored.
25
+ kind: Type of plot to create. Options are "bar", "box", or "violin".
26
+ **kwargs: Additional keyword arguments passed to the underlying plotting function.
27
+
28
+ Returns:
29
+ A tuple containing a matplotlib Figure and Axes object with the plot of scores.
30
+ """
31
+ if kind == "bar":
32
+ return _plot_obs_barplot(adata, groupby=groupby, values=score_name, **kwargs)
33
+ elif kind == "box":
34
+ return _plot_obs_boxplot(adata, groupby=groupby, values=score_name, **kwargs)
35
+ elif kind == "violin":
36
+ return _plot_obs_violinplot(adata, groupby=groupby, values=score_name, **kwargs)
37
+ else:
38
+ raise ValueError(f"Invalid plot kind: {kind}. Choose from 'bar', 'box', or 'violin'.")