multiscoresplot 2.0.0__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/.gitignore +1 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/PKG-INFO +21 -1
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/README.md +20 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/api/index.md +1 -0
- multiscoresplot-2.1.0/docs/api/pipeline.md +3 -0
- multiscoresplot-2.1.0/docs/changelog.md +54 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/examples.md +27 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/pipeline.md +123 -26
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/mkdocs.yml +1 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/pyproject.toml +1 -1
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/src/multiscoresplot/__init__.py +3 -1
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/src/multiscoresplot/_colorspace.py +98 -21
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/src/multiscoresplot/_interactive.py +30 -4
- multiscoresplot-2.1.0/src/multiscoresplot/_pipeline.py +195 -0
- multiscoresplot-2.1.0/src/multiscoresplot/_scoring.py +224 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/uv.lock +1 -1
- multiscoresplot-2.0.0/docs/changelog.md +0 -35
- multiscoresplot-2.0.0/src/multiscoresplot/_scoring.py +0 -99
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/.github/workflows/ci.yml +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/.github/workflows/docs.yml +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/.github/workflows/publish.yml +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/.pre-commit-config.yaml +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/LICENSE +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/api/colorspace.md +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/api/interactive.md +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/api/legend.md +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/api/plotting.md +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/api/scoring.md +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/getting-started.md +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/docs/index.md +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/src/multiscoresplot/_legend.py +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/src/multiscoresplot/_plotting.py +0 -0
- {multiscoresplot-2.0.0 → multiscoresplot-2.1.0}/src/multiscoresplot/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multiscoresplot
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.0
|
|
4
4
|
Summary: Multi-dimensional gene set scoring and visualization for single-cell transcriptomics
|
|
5
5
|
Project-URL: Homepage, https://github.com/AndreMacedo88/multiscoresplot
|
|
6
6
|
Project-URL: Documentation, https://AndreMacedo88.github.io/multiscoresplot/
|
|
@@ -121,6 +121,14 @@ rgb = msp.reduce_to_rgb(scores, method="nmf")
|
|
|
121
121
|
rgb = msp.reduce_to_rgb(scores, method="ica")
|
|
122
122
|
```
|
|
123
123
|
|
|
124
|
+
**Or use a callable directly for one-off reductions:**
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
rgb = msp.reduce_to_rgb(scores, method=my_custom_fn, component_prefix="MY")
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
> **Note:** In reduction mode, RGB channels are learned statistical axes. Similar colors indicate similar *projected* profiles, not necessarily identical biology — see the [Pipeline Guide](https://AndreMacedo88.github.io/multiscoresplot/pipeline/) for interpretation caveats.
|
|
131
|
+
|
|
124
132
|
### Step 3 — Plot embedding
|
|
125
133
|
|
|
126
134
|
Scatter plot of embedding coordinates colored by RGB values, with an integrated color-space legend.
|
|
@@ -180,6 +188,17 @@ msp.render_legend(ax, "pca")
|
|
|
180
188
|
msp.render_legend(ax, "nmf", component_labels=["NMF1", "NMF2", "NMF3"])
|
|
181
189
|
```
|
|
182
190
|
|
|
191
|
+
## One-Step Convenience Function
|
|
192
|
+
|
|
193
|
+
For quick exploration, `plot_scores` wraps the entire pipeline in a single call:
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
scores, rgb, ax = msp.plot_scores(adata, gene_sets, basis="X_umap")
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
It auto-selects `blend_to_rgb` for ≤ 3 gene sets and `reduce_to_rgb(method="pca")` for more.
|
|
200
|
+
All scoring, color-mapping, and plotting parameters are accepted as keyword arguments.
|
|
201
|
+
|
|
183
202
|
## Extensibility — Custom reducers
|
|
184
203
|
|
|
185
204
|
Register your own dimensionality reduction method:
|
|
@@ -216,6 +235,7 @@ Full documentation is available at **[AndreMacedo88.github.io/multiscoresplot](h
|
|
|
216
235
|
|
|
217
236
|
| Function / Class | Description |
|
|
218
237
|
| --------------------------------------------------- | --------------------------------------------------- |
|
|
238
|
+
| `plot_scores(adata, gene_sets)` | One-step convenience: score → RGB → plot |
|
|
219
239
|
| `score_gene_sets(adata, gene_sets)` | Score gene sets per cell via pyUCell |
|
|
220
240
|
| `blend_to_rgb(scores)` | Multiplicative blend to RGB (2–3 sets) → RGBResult |
|
|
221
241
|
| `reduce_to_rgb(scores, method="pca")` | Dimensionality reduction to RGB (2+ sets) → RGBResult |
|
|
@@ -88,6 +88,14 @@ rgb = msp.reduce_to_rgb(scores, method="nmf")
|
|
|
88
88
|
rgb = msp.reduce_to_rgb(scores, method="ica")
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
+
**Or use a callable directly for one-off reductions:**
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
rgb = msp.reduce_to_rgb(scores, method=my_custom_fn, component_prefix="MY")
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
> **Note:** In reduction mode, RGB channels are learned statistical axes. Similar colors indicate similar *projected* profiles, not necessarily identical biology — see the [Pipeline Guide](https://AndreMacedo88.github.io/multiscoresplot/pipeline/) for interpretation caveats.
|
|
98
|
+
|
|
91
99
|
### Step 3 — Plot embedding
|
|
92
100
|
|
|
93
101
|
Scatter plot of embedding coordinates colored by RGB values, with an integrated color-space legend.
|
|
@@ -147,6 +155,17 @@ msp.render_legend(ax, "pca")
|
|
|
147
155
|
msp.render_legend(ax, "nmf", component_labels=["NMF1", "NMF2", "NMF3"])
|
|
148
156
|
```
|
|
149
157
|
|
|
158
|
+
## One-Step Convenience Function
|
|
159
|
+
|
|
160
|
+
For quick exploration, `plot_scores` wraps the entire pipeline in a single call:
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
scores, rgb, ax = msp.plot_scores(adata, gene_sets, basis="X_umap")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
It auto-selects `blend_to_rgb` for ≤ 3 gene sets and `reduce_to_rgb(method="pca")` for more.
|
|
167
|
+
All scoring, color-mapping, and plotting parameters are accepted as keyword arguments.
|
|
168
|
+
|
|
150
169
|
## Extensibility — Custom reducers
|
|
151
170
|
|
|
152
171
|
Register your own dimensionality reduction method:
|
|
@@ -183,6 +202,7 @@ Full documentation is available at **[AndreMacedo88.github.io/multiscoresplot](h
|
|
|
183
202
|
|
|
184
203
|
| Function / Class | Description |
|
|
185
204
|
| --------------------------------------------------- | --------------------------------------------------- |
|
|
205
|
+
| `plot_scores(adata, gene_sets)` | One-step convenience: score → RGB → plot |
|
|
186
206
|
| `score_gene_sets(adata, gene_sets)` | Score gene sets per cell via pyUCell |
|
|
187
207
|
| `blend_to_rgb(scores)` | Multiplicative blend to RGB (2–3 sets) → RGBResult |
|
|
188
208
|
| `reduce_to_rgb(scores, method="pca")` | Dimensionality reduction to RGB (2+ sets) → RGBResult |
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
| [`blend_to_rgb`](colorspace.md#multiscoresplot.blend_to_rgb) | Multiplicative blend to RGB (2–3 sets) | Step 2 |
|
|
9
9
|
| [`reduce_to_rgb`](colorspace.md#multiscoresplot.reduce_to_rgb) | Dimensionality reduction to RGB (2+ sets) | Step 2 |
|
|
10
10
|
| [`RGBResult`](colorspace.md#multiscoresplot.RGBResult) | Return type of `blend_to_rgb` / `reduce_to_rgb` with metadata | Step 2 |
|
|
11
|
+
| [`plot_scores`](pipeline.md) | One-step convenience: score → RGB → plot | Steps 1–3 |
|
|
11
12
|
| [`plot_embedding`](plotting.md) | Static matplotlib scatter plot | Step 3 |
|
|
12
13
|
| [`plot_embedding_interactive`](interactive.md) | Interactive Plotly scatter plot | Step 3 |
|
|
13
14
|
| [`render_legend`](legend.md) | Draw color-space legend on axes | Optional |
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 2.1.0
|
|
4
|
+
|
|
5
|
+
### New features
|
|
6
|
+
- **`blend_to_rgb` / `reduce_to_rgb`**: new `prefix` and `suffix` keyword parameters for custom score column naming conventions (e.g., `prefix="msp-"`, `suffix="_v2"`). Defaults match existing `"score-"` behavior.
|
|
7
|
+
- **`RGBResult`**: new `prefix` and `suffix` fields so downstream functions auto-detect the naming convention.
|
|
8
|
+
- **`plot_embedding_interactive`**: new `prefix` and `suffix` keyword parameters for correct hover auto-extraction with custom column names. Defaults inherit from `RGBResult` when available.
|
|
9
|
+
- **`plot_scores`**: new `prefix` and `suffix` keyword parameters forwarded to all pipeline steps (scoring, color mapping, and interactive plotting).
|
|
10
|
+
- **`plot_scores`**: new one-step convenience function that wraps the full score → RGB → plot pipeline. Auto-selects `blend_to_rgb` for ≤ 3 gene sets and `reduce_to_rgb(method="pca")` for more.
|
|
11
|
+
- **`reduce_to_rgb`**: `method` now accepts a callable with signature `(X, n_components, **kwargs) -> NDArray` for one-off custom reductions. New `component_prefix` parameter overrides legend axis labels.
|
|
12
|
+
- **`score_gene_sets`**: emits `UserWarning` listing missing genes per gene set (genes not found in `adata.var_names` are imputed by pyUCell with worst-case rank).
|
|
13
|
+
- **`score_gene_sets`**: emits `UserWarning` when `adata.X` contains negative values (e.g., after `sc.pp.scale()`), since UCell is designed for non-negative counts.
|
|
14
|
+
- **`score_gene_sets`**: automatically copies read-only `adata.X` arrays to prevent crashes inside pyUCell (works around a pyUCell bug with read-only arrays after `sc.pp.scale()`).
|
|
15
|
+
- **`score_gene_sets`**: new `clip_pct` parameter for per-gene-set percentile clipping (winsorization). Accepts a single float for upper-tail clipping or a `(lo, hi)` tuple for both tails.
|
|
16
|
+
- **`score_gene_sets`**: new `normalize` parameter for per-gene-set min-max rescaling to [0, 1]. Applied after clipping.
|
|
17
|
+
|
|
18
|
+
### Documentation
|
|
19
|
+
- Added color interpretation caveats for reduction mode in Pipeline Guide.
|
|
20
|
+
- Added inline callable example in Examples page.
|
|
21
|
+
|
|
22
|
+
## 2.0.0
|
|
23
|
+
|
|
24
|
+
### Breaking changes
|
|
25
|
+
- `blend_to_rgb` and `reduce_to_rgb` now return `RGBResult` (carries RGB array + metadata). The object supports numpy array protocol, so `np.asarray(result)`, indexing, and comparisons still work.
|
|
26
|
+
- `plot_embedding` and `plot_embedding_interactive`: `basis=` now takes the **full obsm key** (e.g. `"X_umap"`, `"umap_consensus"`). The old short form (`basis="umap"`) still works but emits a `DeprecationWarning`.
|
|
27
|
+
- `plot_embedding`: `legend=True` (default) now **requires** a known method. Previously it silently skipped the legend when `method=None`; now it raises `ValueError`. Pass `legend=False` or provide `method=`, or use an `RGBResult`.
|
|
28
|
+
- `plot_embedding_interactive`: `width`/`height` replaced by `figsize` (inches) + `dpi`. Pixel dimensions = `figsize * dpi`.
|
|
29
|
+
|
|
30
|
+
### New features
|
|
31
|
+
- **`RGBResult`**: new dataclass returned by `blend_to_rgb` / `reduce_to_rgb`. Carries `method`, `gene_set_names`, and `colors` metadata that plotting functions auto-detect.
|
|
32
|
+
- **`plot_embedding`**: new params `legend_size`, `legend_resolution`, `dpi`.
|
|
33
|
+
- **`plot_embedding_interactive`**: new param `legend_kwargs`. `figsize`/`dpi` replace `width`/`height`.
|
|
34
|
+
- **Both plotting functions**: consistent `legend_size`, `legend_resolution`, and `legend_kwargs` params.
|
|
35
|
+
- **`hover_columns`** now falls back to `adata.var_names` for gene expression values (sparse matrices supported).
|
|
36
|
+
- **`gene_set_names`** behavior is now consistent between static and interactive plots.
|
|
37
|
+
|
|
38
|
+
## 1.0.3
|
|
39
|
+
|
|
40
|
+
- Fix badge display in README
|
|
41
|
+
|
|
42
|
+
## 1.0.2
|
|
43
|
+
|
|
44
|
+
- Fix legend not being plotted in interactive "direct" methods
|
|
45
|
+
- Fix CI badge path in README
|
|
46
|
+
|
|
47
|
+
## 1.0.1
|
|
48
|
+
|
|
49
|
+
- Initial stable release
|
|
50
|
+
- 5-step pipeline: score, blend, reduce, plot, legend
|
|
51
|
+
- Built-in reducers: PCA, NMF, ICA
|
|
52
|
+
- Pluggable reducer registry
|
|
53
|
+
- Static matplotlib and interactive Plotly plotting
|
|
54
|
+
- Color-space legends for direct and reduction modes
|
|
@@ -43,6 +43,33 @@ rgb = msp.reduce_to_rgb(scores, method="umap")
|
|
|
43
43
|
msp.plot_embedding(adata, rgb, basis="X_umap")
|
|
44
44
|
```
|
|
45
45
|
|
|
46
|
+
## Inline Callable Reducer
|
|
47
|
+
|
|
48
|
+
For one-off custom reductions, pass a callable directly to `reduce_to_rgb` instead of
|
|
49
|
+
registering it:
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
import multiscoresplot as msp
|
|
53
|
+
import umap
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def umap_reducer(X, n_components, **kwargs):
|
|
57
|
+
embedding = umap.UMAP(n_components=n_components, **kwargs).fit_transform(X)
|
|
58
|
+
for j in range(embedding.shape[1]):
|
|
59
|
+
col = embedding[:, j]
|
|
60
|
+
lo, hi = col.min(), col.max()
|
|
61
|
+
if hi > lo:
|
|
62
|
+
embedding[:, j] = (col - lo) / (hi - lo)
|
|
63
|
+
return embedding
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
rgb = msp.reduce_to_rgb(scores, method=umap_reducer, component_prefix="UMAP")
|
|
67
|
+
msp.plot_embedding(adata, rgb, basis="X_umap")
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
This is equivalent to `register_reducer` + `reduce_to_rgb(method="umap")`, but more
|
|
71
|
+
convenient when you only need the reducer once.
|
|
72
|
+
|
|
46
73
|
## Different Embeddings
|
|
47
74
|
|
|
48
75
|
Plot the same RGB coloring on different embeddings to compare:
|
|
@@ -21,9 +21,51 @@ scores = msp.score_gene_sets(
|
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
!!! note
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
`score_gene_sets` wraps pyUCell's ranking-based scoring. The `max_rank` parameter
|
|
25
|
+
controls how many top-ranked genes per cell are considered — set it close to the
|
|
26
|
+
median number of detected genes per cell for best results.
|
|
27
|
+
|
|
28
|
+
!!! tip "Custom column naming"
|
|
29
|
+
All pipeline functions accept `prefix` and `suffix` parameters to customise score
|
|
30
|
+
column names. For example, `prefix="msp-"` produces columns like `msp-Dorsal`
|
|
31
|
+
instead of `score-Dorsal`. Pass the same `prefix`/`suffix` to downstream functions
|
|
32
|
+
(`blend_to_rgb`, `reduce_to_rgb`, `plot_embedding_interactive`) — or use `plot_scores`
|
|
33
|
+
which forwards them automatically. `RGBResult` stores these values so interactive
|
|
34
|
+
plots auto-detect them from the result object.
|
|
35
|
+
|
|
36
|
+
### Score Post-Processing
|
|
37
|
+
|
|
38
|
+
UCell scores are bounded to [0, 1] in theory, but in practice the observed range is
|
|
39
|
+
often narrow (e.g., [0.1, 0.6]) because the theoretical extremes are rare. This
|
|
40
|
+
underutilizes the color space for visualization. Two optional parameters help:
|
|
41
|
+
|
|
42
|
+
- **`clip_pct`** — Percentile clipping (winsorization) to reduce the influence of
|
|
43
|
+
outlier cells. A single float clips the upper tail; a tuple `(lo, hi)` clips both.
|
|
44
|
+
- **`normalize`** — Per-gene-set min-max rescaling so the scores span the full [0, 1]
|
|
45
|
+
range, maximizing use of the color space.
|
|
46
|
+
|
|
47
|
+
Processing order: clip first, then normalize (so normalization stretches the clipped
|
|
48
|
+
range).
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
# Clip upper 1% outliers, then stretch to [0, 1]
|
|
52
|
+
scores = msp.score_gene_sets(
|
|
53
|
+
adata, gene_sets,
|
|
54
|
+
clip_pct=99,
|
|
55
|
+
normalize=True,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Clip both tails
|
|
59
|
+
scores = msp.score_gene_sets(adata, gene_sets, clip_pct=(1, 99))
|
|
60
|
+
|
|
61
|
+
# Only normalize (no clipping)
|
|
62
|
+
scores = msp.score_gene_sets(adata, gene_sets, normalize=True)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
!!! tip
|
|
66
|
+
For most visualization workflows, `clip_pct=99, normalize=True` is a good default.
|
|
67
|
+
This removes extreme outliers and stretches the remaining scores to fill the full
|
|
68
|
+
color range.
|
|
27
69
|
|
|
28
70
|
## Step 2 — Map Scores to RGB
|
|
29
71
|
|
|
@@ -57,20 +99,23 @@ For **any number of gene sets**, dimensionality reduction projects the score mat
|
|
|
57
99
|
rgb = msp.reduce_to_rgb(scores, method="pca") # default
|
|
58
100
|
rgb = msp.reduce_to_rgb(scores, method="nmf")
|
|
59
101
|
rgb = msp.reduce_to_rgb(scores, method="ica")
|
|
102
|
+
|
|
103
|
+
# Or pass a callable directly for one-off custom reductions
|
|
104
|
+
rgb = msp.reduce_to_rgb(scores, method=my_reducer_fn)
|
|
60
105
|
```
|
|
61
106
|
|
|
62
107
|
#### Choosing a Reduction Method
|
|
63
108
|
|
|
64
|
-
| Method
|
|
65
|
-
|
|
66
|
-
| **PCA** | General use
|
|
67
|
-
| **NMF** | Interpretability
|
|
68
|
-
| **ICA** | Independent signals | Maximizes statistical independence between components. Useful when gene programs are expected to be independent.
|
|
109
|
+
| Method | Best for | Properties |
|
|
110
|
+
| ------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
111
|
+
| **PCA** | General use | Linear, orthogonal components, preserves maximum variance. Components can mix positive and negative loadings. |
|
|
112
|
+
| **NMF** | Interpretability | Non-negative components — each RGB channel corresponds to a non-negative combination of gene sets. Often more biologically intuitive. |
|
|
113
|
+
| **ICA** | Independent signals | Maximizes statistical independence between components. Useful when gene programs are expected to be independent. |
|
|
69
114
|
|
|
70
115
|
!!! tip
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
116
|
+
Start with **PCA** for exploration. Switch to **NMF** if you want components that are
|
|
117
|
+
easier to interpret biologically. Use **ICA** when you have prior reason to believe
|
|
118
|
+
the gene programs are driven by separate, non-overlapping regulatory mechanisms.
|
|
74
119
|
|
|
75
120
|
#### PCA — Principal Component Analysis
|
|
76
121
|
|
|
@@ -79,7 +124,7 @@ RGB channels. It is the best default choice because it captures the most informa
|
|
|
79
124
|
(the largest differences between cells) in the fewest components.
|
|
80
125
|
|
|
81
126
|
However, PCA components can have both positive and negative loadings on gene sets. This
|
|
82
|
-
means a single RGB channel might represent "high in gene set A
|
|
127
|
+
means a single RGB channel might represent "high in gene set A _and_ low in gene set B"
|
|
83
128
|
at the same time, which can make the color mapping less intuitive to interpret. In
|
|
84
129
|
practice, this is rarely a problem for visualization — the overall color patterns still
|
|
85
130
|
reveal meaningful structure — but it does mean the legend's R/G/B labels are abstract
|
|
@@ -91,7 +136,7 @@ to map neatly to a biological concept.
|
|
|
91
136
|
#### NMF — Non-negative Matrix Factorization
|
|
92
137
|
|
|
93
138
|
NMF decomposes the score matrix into non-negative factors. Because both the loadings and
|
|
94
|
-
the coefficients are constrained to be ≥ 0, each RGB channel can only be a
|
|
139
|
+
the coefficients are constrained to be ≥ 0, each RGB channel can only be a _positive_
|
|
95
140
|
combination of gene sets — it can never represent "high A and low B" in the same
|
|
96
141
|
component. This makes the components more naturally interpretable: each color channel
|
|
97
142
|
tends to capture a distinct group of co-active gene programs.
|
|
@@ -110,7 +155,7 @@ programs, making the plot easier to interpret biologically.
|
|
|
110
155
|
|
|
111
156
|
#### ICA — Independent Component Analysis
|
|
112
157
|
|
|
113
|
-
ICA looks for components that are statistically
|
|
158
|
+
ICA looks for components that are statistically _independent_ — meaning knowing the value
|
|
114
159
|
of one component tells you nothing about the others. This is a stronger requirement than
|
|
115
160
|
PCA's orthogonality (uncorrelated), which only rules out linear relationships.
|
|
116
161
|
|
|
@@ -129,14 +174,35 @@ components by variance like PCA does, so the R/G/B assignment is less predictabl
|
|
|
129
174
|
regulated biological processes and want the color channels to separate them as cleanly
|
|
130
175
|
as possible.
|
|
131
176
|
|
|
177
|
+
!!! warning "Interpreting Colors in Reduction Mode"
|
|
178
|
+
|
|
179
|
+
1. **Similar colors ≠ similar biology.** Reduction methods (PCA/NMF/ICA) project
|
|
180
|
+
the full score matrix into just 3 dimensions. Only the top 3 axes of variation
|
|
181
|
+
are captured - all other differences between cells are collapsed. Two cells with
|
|
182
|
+
very different gene set score profiles can appear the same color if their
|
|
183
|
+
differences lie along components that were dropped.
|
|
184
|
+
|
|
185
|
+
2. **Same gene set score ≠ same gene expression.** UCell computes a rank-based
|
|
186
|
+
aggregate score per gene set. Two cells can achieve the same UCell score by
|
|
187
|
+
highly expressing different subsets of genes within that gene set. The score
|
|
188
|
+
summarizes overall gene set activity, not the identity of which specific genes
|
|
189
|
+
drive it.
|
|
190
|
+
|
|
191
|
+
3. **RGB channels are abstract in reduction mode.** Unlike `blend_to_rgb` where
|
|
192
|
+
each color maps directly to a specific gene set, the R, G, and B channels in
|
|
193
|
+
reduction mode represent learned linear combinations of all gene set scores.
|
|
194
|
+
The legend labels (e.g., PC1/PC2/PC3) are statistical axes, not biological
|
|
195
|
+
programs. For more interpretable components, use `method="nmf"` (non-negative
|
|
196
|
+
parts-based decomposition) or `blend_to_rgb` for ≤ 3 gene sets.
|
|
197
|
+
|
|
132
198
|
### Blend vs. Reduce — When to Use Which
|
|
133
199
|
|
|
134
|
-
|
|
|
135
|
-
|
|
136
|
-
| **Gene sets**
|
|
137
|
-
| **Color mapping**
|
|
138
|
-
| **Interpretability** | Immediate — colors correspond directly to gene sets | Requires the legend to interpret RGB channels
|
|
139
|
-
| **Best for**
|
|
200
|
+
| | `blend_to_rgb` | `reduce_to_rgb` |
|
|
201
|
+
| -------------------- | --------------------------------------------------- | ------------------------------------------------------- |
|
|
202
|
+
| **Gene sets** | 2–3 only | 2 or more |
|
|
203
|
+
| **Color mapping** | Direct: each gene set has its own color | Learned: RGB channels are linear combinations of scores |
|
|
204
|
+
| **Interpretability** | Immediate — colors correspond directly to gene sets | Requires the legend to interpret RGB channels |
|
|
205
|
+
| **Best for** | Focused comparisons of 2–3 programs | Exploratory analysis of many programs simultaneously |
|
|
140
206
|
|
|
141
207
|
## Step 3 — Plot Embedding
|
|
142
208
|
|
|
@@ -149,9 +215,9 @@ functions detect this metadata automatically, so you don't need to repeat `metho
|
|
|
149
215
|
and `gene_set_names=`.
|
|
150
216
|
|
|
151
217
|
!!! note "Full obsm key"
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
218
|
+
The `basis` parameter now takes the **full obsm key** (e.g., `"X_umap"`,
|
|
219
|
+
`"umap_consensus"`), not a short name. This lets you use any obsm key, not
|
|
220
|
+
just those prefixed with `X_`.
|
|
155
221
|
|
|
156
222
|
```python
|
|
157
223
|
# Basic usage — method and gene_set_names auto-detected from RGBResult
|
|
@@ -206,11 +272,11 @@ msp.plot_embedding_interactive(
|
|
|
206
272
|
```
|
|
207
273
|
|
|
208
274
|
!!! tip "Hover over genes"
|
|
209
|
-
|
|
210
|
-
|
|
275
|
+
`hover_columns` accepts both `adata.obs` column names **and** gene names from
|
|
276
|
+
`adata.var_names`. Gene names display the expression value from `adata.X`.
|
|
211
277
|
|
|
212
278
|
!!! note
|
|
213
|
-
|
|
279
|
+
Interactive plots require the `plotly` extra: `pip install 'multiscoresplot[interactive]'`
|
|
214
280
|
|
|
215
281
|
## Optional — Standalone Legend
|
|
216
282
|
|
|
@@ -230,3 +296,34 @@ msp.render_legend(ax, "direct", gene_set_names=["A", "B", "C"])
|
|
|
230
296
|
msp.render_legend(ax, "pca")
|
|
231
297
|
msp.render_legend(ax, "nmf", component_labels=["NMF1", "NMF2", "NMF3"])
|
|
232
298
|
```
|
|
299
|
+
|
|
300
|
+
## One-Step Convenience Function
|
|
301
|
+
|
|
302
|
+
For quick exploration, `plot_scores` wraps the entire 3-step pipeline in a single call.
|
|
303
|
+
It auto-selects `blend_to_rgb` for ≤ 3 gene sets and `reduce_to_rgb(method="pca")` for
|
|
304
|
+
more.
|
|
305
|
+
|
|
306
|
+
```python
|
|
307
|
+
# All-in-one: score → map to RGB → plot
|
|
308
|
+
scores, rgb, ax = msp.plot_scores(adata, gene_sets, basis="X_umap")
|
|
309
|
+
|
|
310
|
+
# With post-processing and custom method
|
|
311
|
+
scores, rgb, ax = msp.plot_scores(
|
|
312
|
+
adata, gene_sets,
|
|
313
|
+
clip_pct=99,
|
|
314
|
+
normalize=True,
|
|
315
|
+
method="nmf",
|
|
316
|
+
basis="X_umap",
|
|
317
|
+
show=False,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# Force blend even with 2 sets (default already does this, but explicit)
|
|
321
|
+
scores, rgb, ax = msp.plot_scores(
|
|
322
|
+
adata, gene_sets_2,
|
|
323
|
+
method="blend",
|
|
324
|
+
basis="X_umap",
|
|
325
|
+
)
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
The return value is a `(scores, rgb, plot_result)` tuple, so you can reuse
|
|
329
|
+
the intermediate outputs for further analysis.
|
|
@@ -11,6 +11,7 @@ from multiscoresplot._colorspace import (
|
|
|
11
11
|
)
|
|
12
12
|
from multiscoresplot._interactive import plot_embedding_interactive
|
|
13
13
|
from multiscoresplot._legend import render_legend
|
|
14
|
+
from multiscoresplot._pipeline import plot_scores
|
|
14
15
|
from multiscoresplot._plotting import plot_embedding
|
|
15
16
|
from multiscoresplot._scoring import score_gene_sets
|
|
16
17
|
|
|
@@ -20,6 +21,7 @@ __all__ = [
|
|
|
20
21
|
"get_component_labels",
|
|
21
22
|
"plot_embedding",
|
|
22
23
|
"plot_embedding_interactive",
|
|
24
|
+
"plot_scores",
|
|
23
25
|
"project_direct",
|
|
24
26
|
"project_pca",
|
|
25
27
|
"reduce_to_rgb",
|
|
@@ -27,4 +29,4 @@ __all__ = [
|
|
|
27
29
|
"render_legend",
|
|
28
30
|
"score_gene_sets",
|
|
29
31
|
]
|
|
30
|
-
__version__ = "2.
|
|
32
|
+
__version__ = "2.1.0"
|