microarray 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. microarray/__init__.py +15 -0
  2. microarray/_version.py +3 -0
  3. microarray/datasets/__init__.py +3 -0
  4. microarray/datasets/_arrayexpress.py +1 -0
  5. microarray/datasets/_cdf_files.py +35 -0
  6. microarray/datasets/_geo.py +1 -0
  7. microarray/datasets/_utils.py +143 -0
  8. microarray/io/__init__.py +17 -0
  9. microarray/io/_anndata_converter.py +198 -0
  10. microarray/io/_cdf.py +575 -0
  11. microarray/io/_cel.py +591 -0
  12. microarray/io/_read.py +127 -0
  13. microarray/plotting/__init__.py +28 -0
  14. microarray/plotting/_base.py +253 -0
  15. microarray/plotting/_cel.py +75 -0
  16. microarray/plotting/_de_plots.py +239 -0
  17. microarray/plotting/_diagnostic_plots.py +268 -0
  18. microarray/plotting/_heatmap.py +279 -0
  19. microarray/plotting/_ma_plots.py +136 -0
  20. microarray/plotting/_pca.py +320 -0
  21. microarray/plotting/_qc_plots.py +335 -0
  22. microarray/plotting/_score.py +38 -0
  23. microarray/plotting/_top_table_heatmap.py +98 -0
  24. microarray/plotting/_utils.py +280 -0
  25. microarray/preprocessing/__init__.py +39 -0
  26. microarray/preprocessing/_background.py +862 -0
  27. microarray/preprocessing/_log2.py +77 -0
  28. microarray/preprocessing/_normalize.py +1292 -0
  29. microarray/preprocessing/_rma.py +243 -0
  30. microarray/preprocessing/_robust.py +170 -0
  31. microarray/preprocessing/_summarize.py +318 -0
  32. microarray/py.typed +0 -0
  33. microarray/tools/__init__.py +26 -0
  34. microarray/tools/_biomart.py +416 -0
  35. microarray/tools/_empirical_bayes.py +401 -0
  36. microarray/tools/_fdist.py +171 -0
  37. microarray/tools/_linear_models.py +387 -0
  38. microarray/tools/_mds.py +101 -0
  39. microarray/tools/_pca.py +88 -0
  40. microarray/tools/_score.py +86 -0
  41. microarray/tools/_toptable.py +360 -0
  42. microarray-0.1.0.dist-info/METADATA +75 -0
  43. microarray-0.1.0.dist-info/RECORD +44 -0
  44. microarray-0.1.0.dist-info/WHEEL +4 -0
microarray/io/_read.py ADDED
@@ -0,0 +1,127 @@
1
+ """High-level readers for raw CEL data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from glob import glob
7
+ from typing import Literal
8
+
9
+ from anndata import AnnData, concat
10
+ from tqdm import tqdm
11
+
12
+ from microarray.io._anndata_converter import cel_to_anndata
13
+ from microarray.io._cdf import CdfFile
14
+
15
+ DEFAULT_CEL_PATTERNS = ["*.cel", "*.cel.gz"]
16
+
17
+
18
+ def _collect_cel_paths(folder: str, patterns: str | list[str] | None = None) -> list[str]:
19
+ """Collect CEL file paths from a folder using common CEL filename patterns.
20
+
21
+ Parameters
22
+ ----------
23
+ folder : str
24
+ Path to the folder containing CEL files.
25
+ patterns : str or list of str, optional
26
+ Filename patterns to match CEL files. Defaults to common CEL patterns.
27
+
28
+ Returns:
29
+ -------
30
+ list of str
31
+ List of paths to the CEL files found in the folder.
32
+
33
+ Raises:
34
+ ------
35
+ ValueError
36
+ If the specified folder does not exist or if no CEL files are found.
37
+ """
38
+ if not os.path.isdir(folder):
39
+ raise ValueError(f"Not a directory: {folder}")
40
+
41
+ if patterns is None:
42
+ patterns = DEFAULT_CEL_PATTERNS
43
+
44
+ cel_paths = []
45
+ if isinstance(patterns, str):
46
+ patterns = [patterns]
47
+ for pattern in patterns:
48
+ cel_paths.extend(glob(os.path.join(folder, pattern)))
49
+
50
+ if len(cel_paths) == 0:
51
+ raise ValueError(f"No CEL files found in folder: {folder}")
52
+
53
+ return cel_paths
54
+
55
+
56
+ def read_cel(cel_file: str, cdf_file: str | CdfFile) -> AnnData:
57
+ """Read a CEL file and return an unprocessed AnnData object.
58
+
59
+ The resulting object is probe-grid based (one sample, all array positions)
60
+ and does not require CDF annotation.
61
+
62
+ Parameters
63
+ ----------
64
+ cel_file : str
65
+ Path to the CEL file.
66
+ cdf_file : str or CdfFile
67
+ Path to the CDF file or a parsed CdfFile instance for metadata.
68
+
69
+ Returns:
70
+ --------
71
+ AnnData
72
+ Unprocessed probe-grid AnnData with shape (1, n_cells).
73
+ """
74
+ sample_name = os.path.splitext(os.path.split(cel_file)[-1])[0]
75
+ return cel_to_anndata(cel_file, cdf_file, sample_name=sample_name)
76
+
77
+
78
+ def read_cel_batch(
79
+ cel_folder: str,
80
+ cdf_file: str | CdfFile,
81
+ patterns: str | list[str] | None = None,
82
+ axis: Literal[0, 1] = 0,
83
+ join: Literal["inner", "outer"] = "outer",
84
+ merge: Literal["same", "unique"] | None = "same",
85
+ verbose: bool = True,
86
+ ) -> AnnData:
87
+ """Read all CEL files from a folder and stack them into one AnnData object.
88
+
89
+ Parameters
90
+ ----------
91
+ cel_folder : str
92
+ Path to the folder containing CEL files.
93
+ cdf_file : str or CdfFile
94
+ Path to the CDF file or a parsed CdfFile instance for metadata.
95
+ patterns : str or list of str, optional
96
+ Filename patterns to match CEL files. Defaults to common CEL patterns.
97
+ join : {"inner", "outer"}, optional
98
+ How to join variables when concatenating. Defaults to "outer".
99
+ merge : {"same", "unique"}, optional
100
+ How to merge observations when concatenating. Defaults to "same".
101
+
102
+
103
+ Returns:
104
+ -------
105
+ AnnData
106
+ Stacked AnnData object with shape (n_samples, n_cells).
107
+ """
108
+ cel_paths = _collect_cel_paths(cel_folder, patterns)
109
+
110
+ batch: list[AnnData] = []
111
+
112
+ for path in tqdm(cel_paths, desc="Reading CEL files", disable=not verbose):
113
+ if not os.path.isfile(path):
114
+ raise ValueError(f"File not found: {path}")
115
+ batch.append(read_cel(path, cdf_file))
116
+
117
+ n_features = batch[0].n_vars
118
+ for adata in batch[1:]:
119
+ if adata.n_vars != n_features:
120
+ raise ValueError("CEL files in folder have inconsistent chip dimensions and cannot be stacked.")
121
+
122
+ return concat(
123
+ batch,
124
+ axis=axis,
125
+ merge=merge,
126
+ join=join,
127
+ )
@@ -0,0 +1,28 @@
1
+ from microarray.plotting._cel import intensities, probe_annotations
2
+ from microarray.plotting._de_plots import venn, volcano
3
+ from microarray.plotting._diagnostic_plots import mds, sa
4
+ from microarray.plotting._heatmap import heatmap
5
+ from microarray.plotting._ma_plots import ma
6
+ from microarray.plotting._pca import pca, pca_feature_variance, pca_variance
7
+ from microarray.plotting._qc_plots import boxplot, densities, histogram
8
+ from microarray.plotting._score import score
9
+ from microarray.plotting._top_table_heatmap import top_table_heatmap
10
+
11
+ __all__ = [
12
+ "intensities",
13
+ "probe_annotations",
14
+ "ma",
15
+ "pca",
16
+ "pca_variance",
17
+ "pca_feature_variance",
18
+ "densities",
19
+ "boxplot",
20
+ "histogram",
21
+ "mds",
22
+ "sa",
23
+ "volcano",
24
+ "venn",
25
+ "heatmap",
26
+ "top_table_heatmap",
27
+ "score",
28
+ ]
@@ -0,0 +1,253 @@
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ from anndata import AnnData
4
+ from matplotlib.axes import Axes
5
+ from matplotlib.colors import Colormap
6
+ from matplotlib.figure import Figure
7
+
8
+
9
+ def _plot_base(
10
+ ax: Axes | None = None,
11
+ title: str | None = None,
12
+ xlabel: str | None = None,
13
+ ylabel: str | None = None,
14
+ figsize: tuple[float | int, float | int] = (5, 6),
15
+ ) -> tuple[Figure, Axes]:
16
+
17
+ if ax is None:
18
+ fig, ax = plt.subplots(figsize=figsize)
19
+ else:
20
+ fig = ax.figure
21
+
22
+ ax.set_xticks([])
23
+ ax.set_xticklabels([])
24
+ if ylabel is not None:
25
+ ax.set_ylabel(ylabel)
26
+ if xlabel is not None:
27
+ ax.set_xlabel(xlabel)
28
+ if title is not None:
29
+ ax.set_title(title)
30
+
31
+ return fig, ax
32
+
33
+
34
+ def _plot_obs_barplot(
35
+ adata_obj: AnnData,
36
+ groupby: str,
37
+ values: str,
38
+ ax: Axes | None = None,
39
+ title: str | None = None,
40
+ xlabel: str | None = None,
41
+ ylabel: str | None = None,
42
+ bar_alpha: float = 0.7,
43
+ bar_width: float = 0.9,
44
+ jitter_std: float = 0.05,
45
+ jitter_seed: int = 42,
46
+ figsize: tuple[float | int, float | int] = (5, 6),
47
+ cmap: str | Colormap = "tab10",
48
+ show: bool = True,
49
+ show_legend: bool = True,
50
+ ) -> tuple[Figure, Axes]:
51
+
52
+ fig, ax = _plot_base(
53
+ ax=ax,
54
+ title=title,
55
+ xlabel=xlabel,
56
+ ylabel=ylabel,
57
+ figsize=figsize,
58
+ )
59
+
60
+ obs = adata_obj.obs[[groupby, values]].copy()
61
+ group_means = obs.groupby(groupby)[values].mean()
62
+
63
+ conditions = group_means.index.tolist()
64
+ x = np.arange(len(conditions))
65
+
66
+ cmap = plt.get_cmap(cmap)
67
+ colors = {cond: cmap(i % 10) for i, cond in enumerate(conditions)}
68
+
69
+ ax.bar(
70
+ x,
71
+ group_means.values,
72
+ color=[colors[c] for c in conditions],
73
+ alpha=bar_alpha,
74
+ width=bar_width,
75
+ edgecolor="black",
76
+ )
77
+
78
+ rng = np.random.default_rng(jitter_seed)
79
+ for i, cond in enumerate(conditions):
80
+ y = obs.loc[obs[groupby] == cond, values].values
81
+ jitter = rng.normal(0, jitter_std, size=len(y))
82
+ ax.scatter(
83
+ np.full(len(y), i) + jitter,
84
+ y,
85
+ color=colors[cond],
86
+ edgecolor="black",
87
+ s=45,
88
+ label=cond,
89
+ zorder=3,
90
+ )
91
+
92
+ if ylabel is None:
93
+ ax.set_ylabel(values)
94
+ if xlabel is None:
95
+ ax.set_xlabel(groupby)
96
+
97
+ if show_legend:
98
+ ax.legend(
99
+ title=None,
100
+ loc="upper left",
101
+ bbox_to_anchor=(1.02, 1),
102
+ borderaxespad=0,
103
+ frameon=False,
104
+ )
105
+
106
+ if show:
107
+ fig.show()
108
+ return fig, ax
109
+
110
+
111
+ def _plot_obs_violinplot(
112
+ adata_obj: AnnData,
113
+ groupby: str,
114
+ values: str,
115
+ ax: Axes | None = None,
116
+ title: str | None = None,
117
+ xlabel: str | None = None,
118
+ ylabel: str | None = None,
119
+ jitter_std: float = 0.05,
120
+ jitter_seed: int = 42,
121
+ figsize: tuple[float | int, float | int] = (5, 6),
122
+ cmap: str | Colormap = "tab10",
123
+ show: bool = True,
124
+ show_legend: bool = True,
125
+ ) -> tuple[Figure, Axes]:
126
+
127
+ fig, ax = _plot_base(
128
+ ax=ax,
129
+ title=title,
130
+ xlabel=xlabel,
131
+ ylabel=ylabel,
132
+ figsize=figsize,
133
+ )
134
+
135
+ obs = adata_obj.obs[[groupby, values]].copy()
136
+ conditions = obs[groupby].unique().tolist()
137
+
138
+ cmap = plt.get_cmap(cmap)
139
+ colors = {cond: cmap(i % 10) for i, cond in enumerate(conditions)}
140
+
141
+ data_to_plot = [obs.loc[obs[groupby] == cond, values].values for cond in conditions]
142
+ violin_parts = ax.violinplot(data_to_plot, showmeans=False, showmedians=False, showextrema=False)
143
+
144
+ for part, cond in zip(violin_parts["bodies"], conditions, strict=True):
145
+ part.set_facecolor(colors[cond])
146
+ part.set_edgecolor("black")
147
+ part.set_alpha(0.7)
148
+
149
+ rng = np.random.default_rng(jitter_seed)
150
+ for i, cond in enumerate(conditions):
151
+ y = obs.loc[obs[groupby] == cond, values].values
152
+ jitter = rng.normal(0, jitter_std, size=len(y))
153
+ ax.scatter(
154
+ np.full(len(y), i + 1) + jitter,
155
+ y,
156
+ color=colors[cond],
157
+ edgecolor="black",
158
+ s=45,
159
+ label=cond,
160
+ zorder=3,
161
+ )
162
+
163
+ if ylabel is None:
164
+ ax.set_ylabel(values)
165
+ if xlabel is None:
166
+ ax.set_xlabel(groupby)
167
+
168
+ if show_legend:
169
+ ax.legend(
170
+ title=None,
171
+ loc="upper left",
172
+ bbox_to_anchor=(1.02, 1),
173
+ borderaxespad=0,
174
+ frameon=False,
175
+ )
176
+
177
+ if show:
178
+ fig.show()
179
+ return fig, ax
180
+
181
+
182
+ def _plot_obs_boxplot(
183
+ adata_obj: AnnData,
184
+ groupby: str,
185
+ values: str,
186
+ ax: Axes | None = None,
187
+ title: str | None = None,
188
+ xlabel: str | None = None,
189
+ ylabel: str | None = None,
190
+ jitter_std: float = 0.05,
191
+ jitter_seed: int = 42,
192
+ figsize: tuple[float | int, float | int] = (5, 6),
193
+ cmap: str | Colormap = "tab10",
194
+ show: bool = True,
195
+ show_legend: bool = True,
196
+ widths: float | list[float] = 0.9,
197
+ ) -> tuple[Figure, Axes]:
198
+
199
+ fig, ax = _plot_base(
200
+ ax=ax,
201
+ title=title,
202
+ xlabel=xlabel,
203
+ ylabel=ylabel,
204
+ figsize=figsize,
205
+ )
206
+
207
+ obs = adata_obj.obs[[groupby, values]].copy()
208
+ conditions = obs[groupby].unique().tolist()
209
+
210
+ cmap = plt.get_cmap(cmap)
211
+ colors = {cond: cmap(i % 10) for i, cond in enumerate(conditions)}
212
+
213
+ data_to_plot = [obs.loc[obs[groupby] == cond, values].values for cond in conditions]
214
+ bplot = ax.boxplot(data_to_plot, patch_artist=True, widths=widths)
215
+
216
+ for patch, cond in zip(bplot["boxes"], conditions, strict=True):
217
+ patch.set_facecolor(colors[cond])
218
+ patch.set_edgecolor("black")
219
+
220
+ for median in bplot["medians"]:
221
+ median.set(color="black")
222
+
223
+ rng = np.random.default_rng(jitter_seed)
224
+ for i, cond in enumerate(conditions):
225
+ y = obs.loc[obs[groupby] == cond, values].values
226
+ jitter = rng.normal(0, jitter_std, size=len(y))
227
+ ax.scatter(
228
+ np.full(len(y), i + 1) + jitter,
229
+ y,
230
+ color=colors[cond],
231
+ edgecolor="black",
232
+ s=45,
233
+ label=cond,
234
+ zorder=3,
235
+ )
236
+
237
+ if ylabel is None:
238
+ ax.set_ylabel(values)
239
+ if xlabel is None:
240
+ ax.set_xlabel(groupby)
241
+
242
+ if show_legend:
243
+ ax.legend(
244
+ title=None,
245
+ loc="upper left",
246
+ bbox_to_anchor=(1.02, 1),
247
+ borderaxespad=0,
248
+ frameon=False,
249
+ )
250
+
251
+ if show:
252
+ fig.show()
253
+ return fig, ax
@@ -0,0 +1,75 @@
1
+ import matplotlib.pyplot as plt
2
+ from matplotlib.axes import Axes
3
+ from matplotlib.colors import Colormap
4
+ from matplotlib.patches import Patch
5
+
6
+ from microarray.io import CelFile
7
+
8
+
9
+ def _set_spines(ax: Axes, show_spines: bool) -> None:
10
+ ax.spines["top"].set_visible(show_spines)
11
+ ax.spines["right"].set_visible(show_spines)
12
+ ax.spines["bottom"].set_visible(show_spines)
13
+ ax.spines["left"].set_visible(show_spines)
14
+
15
+
16
+ def _remove_ticks(ax: Axes) -> None:
17
+ ax.set_xticklabels([])
18
+ ax.set_yticklabels([])
19
+ ax.set_xticks([])
20
+ ax.set_yticks([])
21
+
22
+
23
+ def intensities(
24
+ celfile: CelFile,
25
+ show_spines: bool = True,
26
+ show_ticks: bool = False,
27
+ cmap: str | Colormap = "Reds",
28
+ ax: Axes | None = None,
29
+ title: str | None = "Intensities",
30
+ ) -> Axes:
31
+ """Plot the intensities of a CEL file."""
32
+ if ax is None:
33
+ _, ax = plt.subplots(figsize=(10, 10))
34
+ ax.imshow(celfile.intensities, cmap=cmap)
35
+ if not show_ticks:
36
+ _remove_ticks(ax)
37
+
38
+ _set_spines(ax, show_spines)
39
+
40
+ if title is not None:
41
+ ax.set_title(title)
42
+
43
+ return ax
44
+
45
+
46
+ def probe_annotations(
47
+ celfile: CelFile,
48
+ show_spines: bool = True,
49
+ show_ticks: bool = False,
50
+ ax: Axes | None = None,
51
+ title: str | None = "Probe annotations",
52
+ ) -> Axes:
53
+ """Plot the probe annotations of a CEL file."""
54
+ if ax is None:
55
+ _, ax = plt.subplots(figsize=(10, 10))
56
+ ax.imshow(celfile.probe_annotation == None, cmap="gray") # noqa: E711
57
+ if not show_ticks:
58
+ _remove_ticks(ax)
59
+
60
+ _set_spines(ax, show_spines)
61
+
62
+ if title is not None:
63
+ ax.set_title(title)
64
+
65
+ ax.legend(
66
+ handles=[
67
+ Patch(facecolor="black", label="Probe", linewidth=1, edgecolor="black"),
68
+ Patch(facecolor="white", label="No probe", linewidth=1, edgecolor="black"),
69
+ ],
70
+ labels=["Probe", "No probe"],
71
+ loc="upper left",
72
+ bbox_to_anchor=(1, 1),
73
+ frameon=False,
74
+ )
75
+ return ax