lotsofcells 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lotsofcells/__init__.py +39 -0
- lotsofcells/_stats.py +279 -0
- lotsofcells/_utils.py +211 -0
- lotsofcells/entropy.py +354 -0
- lotsofcells/lotsofcells.py +330 -0
- lotsofcells/plots.py +681 -0
- lotsofcells-0.3.0.dist-info/METADATA +21 -0
- lotsofcells-0.3.0.dist-info/RECORD +10 -0
- lotsofcells-0.3.0.dist-info/WHEEL +5 -0
- lotsofcells-0.3.0.dist-info/top_level.txt +1 -0
lotsofcells/plots.py
ADDED
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
"""Visualizations: bar, waffle, polar, density-ridge, dynamics, abundance test.
|
|
2
|
+
|
|
3
|
+
All functions accept either an `AnnData` (or `SpatialData`/`MuData`) or a
|
|
4
|
+
`pandas.DataFrame` containing the metadata.
|
|
5
|
+
|
|
6
|
+
Returns either a `matplotlib.figure.Figure` or `matplotlib.axes.Axes`.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Optional, Sequence
|
|
11
|
+
|
|
12
|
+
import matplotlib.patches as mpatches
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from matplotlib.colors import LinearSegmentedColormap, to_rgb
|
|
17
|
+
|
|
18
|
+
from ._utils import (
|
|
19
|
+
desaturate,
|
|
20
|
+
get_metadata,
|
|
21
|
+
get_numerical_variable,
|
|
22
|
+
get_palette,
|
|
23
|
+
lighten,
|
|
24
|
+
save_to_pdf,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ------------------------------------------------------------------
|
|
29
|
+
# Bar chart
|
|
30
|
+
# ------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
def bar_chart(
|
|
33
|
+
sc_object,
|
|
34
|
+
main_variable: str,
|
|
35
|
+
subtype_variable: str,
|
|
36
|
+
sample_id: Optional[str] = None,
|
|
37
|
+
subtype_only: Optional[str] = None,
|
|
38
|
+
contribution: bool = False,
|
|
39
|
+
colors: Optional[Sequence[str]] = None,
|
|
40
|
+
table: Optional[str] = None,
|
|
41
|
+
ax: Optional[plt.Axes] = None,
|
|
42
|
+
figsize=(7, 5),
|
|
43
|
+
pdf_file: Optional[str] = None,
|
|
44
|
+
):
|
|
45
|
+
"""Stacked barplot of subtype proportions per main_variable level.
|
|
46
|
+
|
|
47
|
+
Mirrors the R ``bar_chart``. Pass ``pdf_file="path.pdf"`` to also save
|
|
48
|
+
the figure to disk.
|
|
49
|
+
"""
|
|
50
|
+
metadata = get_metadata(sc_object, table=table)
|
|
51
|
+
groups = metadata[main_variable].astype(str)
|
|
52
|
+
covariable = metadata[subtype_variable].astype(str)
|
|
53
|
+
order = list(covariable.value_counts(ascending=True).index) # smallest first
|
|
54
|
+
palette = get_palette(use_palette=colors, n_colors=len(order))
|
|
55
|
+
color_map = dict(zip(order[::-1], palette)) # largest avg → first color
|
|
56
|
+
|
|
57
|
+
if sample_id is not None:
|
|
58
|
+
samples = metadata[sample_id].astype(str)
|
|
59
|
+
if contribution:
|
|
60
|
+
return _bar_chart_contribution(
|
|
61
|
+
groups, covariable, samples, order, color_map,
|
|
62
|
+
main_variable, subtype_variable, sample_id, figsize,
|
|
63
|
+
pdf_file=pdf_file,
|
|
64
|
+
)
|
|
65
|
+
bar_keys = (groups + "_" + samples).to_numpy()
|
|
66
|
+
df = pd.DataFrame({"groups": bar_keys, "covariable": covariable.values})
|
|
67
|
+
labels_main = groups
|
|
68
|
+
else:
|
|
69
|
+
df = pd.DataFrame({"groups": groups.values, "covariable": covariable.values})
|
|
70
|
+
labels_main = groups
|
|
71
|
+
|
|
72
|
+
contig = pd.crosstab(df["groups"], df["covariable"])
|
|
73
|
+
contig = contig.div(contig.sum(axis=1), axis=0)
|
|
74
|
+
if subtype_only is not None:
|
|
75
|
+
contig = contig[[subtype_only]]
|
|
76
|
+
|
|
77
|
+
# Order bars within main group by descending value of largest covariable
|
|
78
|
+
bar_keys = list(contig.index)
|
|
79
|
+
if sample_id is not None:
|
|
80
|
+
bar_main = [k.split("_")[0] for k in bar_keys]
|
|
81
|
+
else:
|
|
82
|
+
bar_main = bar_keys
|
|
83
|
+
main_levels = sorted(set(bar_main))
|
|
84
|
+
|
|
85
|
+
if subtype_only is None:
|
|
86
|
+
sort_col = order[-1]
|
|
87
|
+
else:
|
|
88
|
+
sort_col = subtype_only
|
|
89
|
+
sort_vals = contig[sort_col]
|
|
90
|
+
bar_order = sorted(
|
|
91
|
+
bar_keys,
|
|
92
|
+
key=lambda k: (
|
|
93
|
+
main_levels.index(k.split("_")[0] if sample_id is not None else k),
|
|
94
|
+
-sort_vals[k],
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
contig = contig.loc[bar_order]
|
|
98
|
+
bar_main = [k.split("_")[0] if sample_id is not None else k for k in bar_order]
|
|
99
|
+
|
|
100
|
+
# Plot
|
|
101
|
+
if ax is None:
|
|
102
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
103
|
+
else:
|
|
104
|
+
fig = ax.figure
|
|
105
|
+
bottom = np.zeros(len(contig))
|
|
106
|
+
cov_order = order[::-1] # largest at bottom
|
|
107
|
+
if subtype_only is not None:
|
|
108
|
+
cov_order = [subtype_only]
|
|
109
|
+
|
|
110
|
+
# If subtype_only, color bars by main group
|
|
111
|
+
if subtype_only is not None:
|
|
112
|
+
group_colors = _group_colors(main_levels)
|
|
113
|
+
gc_map = {m: c for m, c in zip(main_levels, group_colors)}
|
|
114
|
+
bar_colors = [gc_map[m] for m in bar_main]
|
|
115
|
+
ax.bar(range(len(contig)), contig[subtype_only].values, color=bar_colors)
|
|
116
|
+
else:
|
|
117
|
+
for cov in cov_order:
|
|
118
|
+
ax.bar(
|
|
119
|
+
range(len(contig)),
|
|
120
|
+
contig[cov].values,
|
|
121
|
+
bottom=bottom,
|
|
122
|
+
color=color_map[cov],
|
|
123
|
+
label=cov,
|
|
124
|
+
)
|
|
125
|
+
bottom += contig[cov].values
|
|
126
|
+
ax.legend(title=f"Class: {subtype_variable}", bbox_to_anchor=(1.02, 1), loc="upper left")
|
|
127
|
+
|
|
128
|
+
ax.set_xticks(range(len(contig)))
|
|
129
|
+
ax.set_xticklabels(contig.index, rotation=45, ha="right")
|
|
130
|
+
ax.set_ylabel("percentage")
|
|
131
|
+
ax.set_yticks(np.linspace(0, 1, 11))
|
|
132
|
+
ax.set_yticklabels([f"{int(100 * v)}" for v in np.linspace(0, 1, 11)])
|
|
133
|
+
title = f"Proportions of {subtype_variable} by {main_variable}"
|
|
134
|
+
if subtype_only:
|
|
135
|
+
ax.set_title(f"{title}\nClass: {subtype_only}")
|
|
136
|
+
elif sample_id:
|
|
137
|
+
ax.set_title(f"{title}\nIndividual sub-level by: {sample_id}")
|
|
138
|
+
else:
|
|
139
|
+
ax.set_title(title)
|
|
140
|
+
|
|
141
|
+
# Annotate group bands at the bottom
|
|
142
|
+
_annotate_groups(ax, bar_main, main_levels)
|
|
143
|
+
fig.tight_layout()
|
|
144
|
+
save_to_pdf(fig, pdf_file)
|
|
145
|
+
return ax
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _group_colors(levels):
|
|
149
|
+
base = ["#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854",
|
|
150
|
+
"#FFD92F", "#E5C494", "#B3B3B3"]
|
|
151
|
+
palette = get_palette(use_palette=base, n_colors=len(levels))
|
|
152
|
+
return [desaturate(c, 0.16) for c in palette]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _annotate_groups(ax, bar_main, main_levels):
|
|
156
|
+
palette = _group_colors(main_levels)
|
|
157
|
+
color_for = dict(zip(main_levels, palette))
|
|
158
|
+
n = len(bar_main)
|
|
159
|
+
ymin, ymax = -0.05, -0.02
|
|
160
|
+
runs = []
|
|
161
|
+
start = 0
|
|
162
|
+
for i in range(1, n):
|
|
163
|
+
if bar_main[i] != bar_main[i - 1]:
|
|
164
|
+
runs.append((start, i - 1, bar_main[start]))
|
|
165
|
+
start = i
|
|
166
|
+
runs.append((start, n - 1, bar_main[start]))
|
|
167
|
+
for s, e, lbl in runs:
|
|
168
|
+
ax.add_patch(mpatches.Rectangle(
|
|
169
|
+
(s - 0.5, ymin), (e - s + 1), (ymax - ymin),
|
|
170
|
+
color=color_for[lbl], clip_on=False, zorder=3,
|
|
171
|
+
))
|
|
172
|
+
ax.text((s + e) / 2, (ymin + ymax) / 2, lbl,
|
|
173
|
+
ha="center", va="center", color="white",
|
|
174
|
+
fontsize=8, style="italic", zorder=4)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _bar_chart_contribution(
|
|
178
|
+
groups, covariable, samples, order, color_map,
|
|
179
|
+
main_variable, subtype_variable, sample_id, figsize,
|
|
180
|
+
pdf_file=None,
|
|
181
|
+
):
|
|
182
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
183
|
+
main_levels = sorted(groups.unique())
|
|
184
|
+
width = 0.7
|
|
185
|
+
for i, m in enumerate(main_levels):
|
|
186
|
+
sub = (groups == m)
|
|
187
|
+
df = pd.DataFrame({
|
|
188
|
+
"samples": samples[sub].values,
|
|
189
|
+
"covariable": covariable[sub].values,
|
|
190
|
+
})
|
|
191
|
+
contig = pd.crosstab(df["samples"], df["covariable"])
|
|
192
|
+
contig = contig / contig.values.sum()
|
|
193
|
+
bottom = 0.0
|
|
194
|
+
cov_order = order[::-1]
|
|
195
|
+
for cov in cov_order:
|
|
196
|
+
base_color = color_map[cov]
|
|
197
|
+
samples_present = list(contig.index)
|
|
198
|
+
n_s = len(samples_present)
|
|
199
|
+
if n_s == 0:
|
|
200
|
+
continue
|
|
201
|
+
shades = [
|
|
202
|
+
lighten(base_color, t)
|
|
203
|
+
for t in np.linspace(-0.2, 0.2, n_s)
|
|
204
|
+
]
|
|
205
|
+
shades = [c if not c.startswith("-") else base_color for c in shades]
|
|
206
|
+
for idx, s in enumerate(samples_present):
|
|
207
|
+
v = contig.loc[s, cov] if cov in contig.columns else 0
|
|
208
|
+
ax.bar(i, v, width, bottom=bottom, color=shades[idx])
|
|
209
|
+
bottom += v
|
|
210
|
+
ax.set_xticks(range(len(main_levels)))
|
|
211
|
+
ax.set_xticklabels(main_levels, rotation=45, ha="right")
|
|
212
|
+
ax.set_ylabel("percentage")
|
|
213
|
+
ax.set_title(
|
|
214
|
+
f"Proportions of {subtype_variable} by {main_variable}\n"
|
|
215
|
+
f"Contribution by {sample_id}"
|
|
216
|
+
)
|
|
217
|
+
handles = [
|
|
218
|
+
mpatches.Patch(color=color_map[c], label=c) for c in order[::-1]
|
|
219
|
+
]
|
|
220
|
+
ax.legend(
|
|
221
|
+
handles=handles, title=f"Class: {subtype_variable}",
|
|
222
|
+
bbox_to_anchor=(1.02, 1), loc="upper left",
|
|
223
|
+
)
|
|
224
|
+
fig.tight_layout()
|
|
225
|
+
save_to_pdf(fig, pdf_file)
|
|
226
|
+
return ax
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# ------------------------------------------------------------------
|
|
230
|
+
# Waffle chart (each tile = 1%)
|
|
231
|
+
# ------------------------------------------------------------------
|
|
232
|
+
|
|
233
|
+
def waffle_chart(
|
|
234
|
+
sc_object,
|
|
235
|
+
main_variable: str,
|
|
236
|
+
subtype_variable: str,
|
|
237
|
+
sample_id: Optional[str] = None,
|
|
238
|
+
subtype_only: Optional[str] = None,
|
|
239
|
+
colors: Optional[Sequence[str]] = None,
|
|
240
|
+
table: Optional[str] = None,
|
|
241
|
+
figsize=None,
|
|
242
|
+
pdf_file: Optional[str] = None,
|
|
243
|
+
):
|
|
244
|
+
metadata = get_metadata(sc_object, table=table)
|
|
245
|
+
groups = metadata[main_variable].astype(str)
|
|
246
|
+
covariable = metadata[subtype_variable].astype(str)
|
|
247
|
+
|
|
248
|
+
if subtype_only is not None:
|
|
249
|
+
if subtype_only not in covariable.unique():
|
|
250
|
+
raise ValueError(
|
|
251
|
+
f"subtype_only '{subtype_only}' not found in {subtype_variable}."
|
|
252
|
+
)
|
|
253
|
+
cov = np.where(covariable == subtype_only, subtype_only, "All Other")
|
|
254
|
+
order = [subtype_only, "All Other"]
|
|
255
|
+
# alternating dim/main shades per main group
|
|
256
|
+
coloresSubtype = [
|
|
257
|
+
"#DBECDA", "#92C791", "#BEDAEC", "#7EB6D9", "#DDC7E2", "#86608E",
|
|
258
|
+
]
|
|
259
|
+
coloresSubtype = [desaturate(c, 0.16) for c in coloresSubtype]
|
|
260
|
+
subtype_palette = coloresSubtype
|
|
261
|
+
else:
|
|
262
|
+
cov = covariable.to_numpy()
|
|
263
|
+
order = list(covariable.value_counts(ascending=False).index)[::-1]
|
|
264
|
+
subtype_palette = None
|
|
265
|
+
|
|
266
|
+
if sample_id is not None:
|
|
267
|
+
keys = (groups + "_" + metadata[sample_id].astype(str)).to_numpy()
|
|
268
|
+
else:
|
|
269
|
+
keys = groups.to_numpy()
|
|
270
|
+
|
|
271
|
+
df = pd.DataFrame({"groups": keys, "covariable": cov})
|
|
272
|
+
contig = pd.crosstab(df["groups"], df["covariable"])
|
|
273
|
+
if subtype_only is not None:
|
|
274
|
+
ncells = contig.get(subtype_only, pd.Series(0, index=contig.index))
|
|
275
|
+
else:
|
|
276
|
+
ncells = contig.sum(axis=1)
|
|
277
|
+
contig = contig.div(contig.sum(axis=1), axis=0)
|
|
278
|
+
contig = contig.reindex(columns=order, fill_value=0)
|
|
279
|
+
|
|
280
|
+
palette = (
|
|
281
|
+
get_palette(use_palette=colors, n_colors=len(order))
|
|
282
|
+
if subtype_palette is None
|
|
283
|
+
else subtype_palette
|
|
284
|
+
)
|
|
285
|
+
n_panels = len(contig)
|
|
286
|
+
ncol = max(1, int(np.ceil(np.sqrt(n_panels))))
|
|
287
|
+
nrow = int(np.ceil(n_panels / ncol))
|
|
288
|
+
if figsize is None:
|
|
289
|
+
# +1 column reserved for the legend → 3*ncol for waffles, 1.5 for legend
|
|
290
|
+
figsize = (3 * ncol + 1.8, 3 * nrow)
|
|
291
|
+
|
|
292
|
+
# GridSpec: nrow x (ncol + 1). The last column is a dedicated, axis-off
|
|
293
|
+
# area where the legend lives, so it never overlaps the waffles.
|
|
294
|
+
fig = plt.figure(figsize=figsize)
|
|
295
|
+
gs = fig.add_gridspec(
|
|
296
|
+
nrow, ncol + 1,
|
|
297
|
+
width_ratios=[1.0] * ncol + [0.45],
|
|
298
|
+
wspace=0.15, hspace=0.25,
|
|
299
|
+
)
|
|
300
|
+
axes = np.empty((nrow, ncol), dtype=object)
|
|
301
|
+
for r in range(nrow):
|
|
302
|
+
for c in range(ncol):
|
|
303
|
+
axes[r, c] = fig.add_subplot(gs[r, c])
|
|
304
|
+
legend_ax = fig.add_subplot(gs[:, -1])
|
|
305
|
+
legend_ax.axis("off")
|
|
306
|
+
|
|
307
|
+
# Map main groups → color pair indices for subtype_only mode
|
|
308
|
+
main_order = sorted({k.split("_")[0] if sample_id is not None else k
|
|
309
|
+
for k in contig.index})
|
|
310
|
+
main_idx = {m: i for i, m in enumerate(main_order)}
|
|
311
|
+
|
|
312
|
+
i = -1
|
|
313
|
+
for i, (gname, row) in enumerate(contig.iterrows()):
|
|
314
|
+
ax = axes[i // ncol][i % ncol]
|
|
315
|
+
percentages = (row * 100).round().astype(int).to_numpy()
|
|
316
|
+
percentages = _balance_to_100(percentages)
|
|
317
|
+
if subtype_only is not None:
|
|
318
|
+
mg = gname.split("_")[0] if sample_id is not None else gname
|
|
319
|
+
pi = main_idx[mg] * 2
|
|
320
|
+
colors_panel = [palette[pi % len(palette)],
|
|
321
|
+
palette[(pi + 1) % len(palette)]]
|
|
322
|
+
else:
|
|
323
|
+
colors_panel = [palette[order.index(o)] for o in order]
|
|
324
|
+
_draw_waffle(ax, percentages, colors_panel, order, gname, ncells.get(gname, 0))
|
|
325
|
+
if subtype_only is not None and len(percentages) >= 1:
|
|
326
|
+
ax.text(4.5, 8, f"{percentages[0]:.0f}%",
|
|
327
|
+
ha="center", va="center", fontsize=11,
|
|
328
|
+
fontweight="bold", color=colors_panel[0])
|
|
329
|
+
|
|
330
|
+
# Hide unused panels in the bottom-right of the waffle grid.
|
|
331
|
+
for j in range(i + 1, nrow * ncol):
|
|
332
|
+
axes[j // ncol][j % ncol].axis("off")
|
|
333
|
+
|
|
334
|
+
handles = [
|
|
335
|
+
mpatches.Patch(
|
|
336
|
+
color=(palette[order.index(o)] if subtype_only is None
|
|
337
|
+
else (palette[1] if o == subtype_only else palette[0])),
|
|
338
|
+
label=o,
|
|
339
|
+
)
|
|
340
|
+
for o in order
|
|
341
|
+
]
|
|
342
|
+
legend_ax.legend(
|
|
343
|
+
handles=handles,
|
|
344
|
+
title=f"Class: {subtype_variable}",
|
|
345
|
+
loc="center",
|
|
346
|
+
frameon=False,
|
|
347
|
+
borderaxespad=0.0,
|
|
348
|
+
labelspacing=0.8,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
save_to_pdf(fig, pdf_file)
|
|
352
|
+
|
|
353
|
+
# Display once in Jupyter inline (the inline backend auto-flushes
|
|
354
|
+
# at cell end AND the Figure has `_repr_*_` that re-renders if
|
|
355
|
+
# returned). Show the figure here, then close it so the cell does
|
|
356
|
+
# not re-display, and return None.
|
|
357
|
+
try:
|
|
358
|
+
from matplotlib import get_backend
|
|
359
|
+
if "inline" in get_backend().lower():
|
|
360
|
+
from IPython.display import display
|
|
361
|
+
display(fig)
|
|
362
|
+
plt.close(fig)
|
|
363
|
+
return None
|
|
364
|
+
except Exception:
|
|
365
|
+
pass
|
|
366
|
+
return None
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _balance_to_100(arr):
|
|
370
|
+
arr = arr.astype(int)
|
|
371
|
+
diff = 100 - int(arr.sum())
|
|
372
|
+
if diff == 0:
|
|
373
|
+
return arr
|
|
374
|
+
arr = arr.copy()
|
|
375
|
+
if diff > 0:
|
|
376
|
+
arr[arr.argmax()] += diff
|
|
377
|
+
else:
|
|
378
|
+
arr[arr.argmax()] += diff
|
|
379
|
+
return arr
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _draw_waffle(ax, percentages, colors_panel, order, title, ncells):
|
|
383
|
+
grid = np.zeros(100, dtype=int)
|
|
384
|
+
cum = np.cumsum(percentages)
|
|
385
|
+
for k in range(100):
|
|
386
|
+
grid[k] = int(np.searchsorted(cum, k, side="right"))
|
|
387
|
+
grid = grid.reshape(10, 10)
|
|
388
|
+
for i in range(10):
|
|
389
|
+
for j in range(10):
|
|
390
|
+
idx = grid[i, j]
|
|
391
|
+
idx = min(idx, len(colors_panel) - 1)
|
|
392
|
+
ax.add_patch(mpatches.Rectangle(
|
|
393
|
+
(j, i), 0.85, 0.85, color=colors_panel[idx],
|
|
394
|
+
))
|
|
395
|
+
ax.set_xlim(-0.1, 10)
|
|
396
|
+
ax.set_ylim(-0.1, 10)
|
|
397
|
+
ax.set_aspect("equal")
|
|
398
|
+
ax.invert_yaxis()
|
|
399
|
+
ax.set_xticks([])
|
|
400
|
+
ax.set_yticks([])
|
|
401
|
+
ax.set_title(str(title))
|
|
402
|
+
if ncells:
|
|
403
|
+
ax.set_xlabel(f"n. cells: {int(ncells):,}",
|
|
404
|
+
fontsize=8, style="italic", color="grey")
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# ------------------------------------------------------------------
|
|
408
|
+
# Polar / circular barplot
|
|
409
|
+
# ------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
def polar_chart(
|
|
412
|
+
sc_object,
|
|
413
|
+
main_variable: str,
|
|
414
|
+
subtype_variable: str,
|
|
415
|
+
sample_id: Optional[str] = None,
|
|
416
|
+
subtype_only: Optional[str] = None,
|
|
417
|
+
colors: Optional[Sequence[str]] = None,
|
|
418
|
+
table: Optional[str] = None,
|
|
419
|
+
figsize=(8, 8),
|
|
420
|
+
pdf_file: Optional[str] = None,
|
|
421
|
+
):
|
|
422
|
+
metadata = get_metadata(sc_object, table=table)
|
|
423
|
+
groups = metadata[main_variable].astype(str)
|
|
424
|
+
covariable = metadata[subtype_variable].astype(str)
|
|
425
|
+
order = list(covariable.value_counts(ascending=True).index) # smallest first
|
|
426
|
+
palette = get_palette(use_palette=colors, n_colors=len(order))
|
|
427
|
+
color_map = dict(zip(order[::-1], palette))
|
|
428
|
+
|
|
429
|
+
if sample_id is not None:
|
|
430
|
+
keys = (groups + "_" + metadata[sample_id].astype(str)).to_numpy()
|
|
431
|
+
else:
|
|
432
|
+
keys = groups.to_numpy()
|
|
433
|
+
df = pd.DataFrame({"groups": keys, "covariable": covariable.values})
|
|
434
|
+
contig = pd.crosstab(df["groups"], df["covariable"])
|
|
435
|
+
if subtype_only is not None:
|
|
436
|
+
contig = contig[[subtype_only]]
|
|
437
|
+
|
|
438
|
+
# Sort by main_group then preserve order
|
|
439
|
+
if sample_id is not None:
|
|
440
|
+
main_levels = sorted({k.split("_")[0] for k in contig.index})
|
|
441
|
+
contig = contig.reindex(
|
|
442
|
+
sorted(contig.index, key=lambda k: (main_levels.index(k.split("_")[0]), k))
|
|
443
|
+
)
|
|
444
|
+
else:
|
|
445
|
+
contig = contig.sort_index()
|
|
446
|
+
|
|
447
|
+
n_bars = len(contig)
|
|
448
|
+
angles = np.linspace(0, 2 * np.pi, n_bars, endpoint=False)
|
|
449
|
+
width = 2 * np.pi / n_bars * 0.9
|
|
450
|
+
|
|
451
|
+
fig = plt.figure(figsize=figsize)
|
|
452
|
+
ax = fig.add_subplot(111, projection="polar")
|
|
453
|
+
bottom = np.zeros(n_bars)
|
|
454
|
+
cov_order = order[::-1]
|
|
455
|
+
if subtype_only is not None:
|
|
456
|
+
cov_order = [subtype_only]
|
|
457
|
+
for cov in cov_order:
|
|
458
|
+
if cov not in contig.columns:
|
|
459
|
+
continue
|
|
460
|
+
vals = contig[cov].values
|
|
461
|
+
ax.bar(angles, vals, width=width, bottom=bottom,
|
|
462
|
+
color=color_map.get(cov, "#999999"), label=cov, edgecolor="white")
|
|
463
|
+
bottom += vals
|
|
464
|
+
|
|
465
|
+
ax.set_xticks(angles)
|
|
466
|
+
ax.set_xticklabels(contig.index, fontsize=7)
|
|
467
|
+
ax.set_yticklabels([])
|
|
468
|
+
ax.set_title(f"Proportions of {subtype_variable} by {main_variable}",
|
|
469
|
+
fontsize=14, fontweight="bold")
|
|
470
|
+
ax.legend(bbox_to_anchor=(1.2, 1), loc="upper left",
|
|
471
|
+
title=f"Class: {subtype_variable}")
|
|
472
|
+
fig.tight_layout()
|
|
473
|
+
save_to_pdf(fig, pdf_file)
|
|
474
|
+
return ax
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
# ------------------------------------------------------------------
|
|
478
|
+
# Density (ridge) chart
|
|
479
|
+
# ------------------------------------------------------------------
|
|
480
|
+
|
|
481
|
+
def density_chart(
|
|
482
|
+
sc_object,
|
|
483
|
+
main_variable: str,
|
|
484
|
+
subtype_variable: str,
|
|
485
|
+
numerical_variable: str,
|
|
486
|
+
sample_id: Optional[str] = None,
|
|
487
|
+
colors: Optional[Sequence[str]] = None,
|
|
488
|
+
table: Optional[str] = None,
|
|
489
|
+
figsize=(9, 7),
|
|
490
|
+
pdf_file: Optional[str] = None,
|
|
491
|
+
):
|
|
492
|
+
"""Ridge-style density plot of a numerical variable across covariate levels.
|
|
493
|
+
|
|
494
|
+
The numerical variable can be a column in ``.obs`` or — when ``sc_object``
|
|
495
|
+
is an AnnData — a feature name (gene); expression values from ``.X`` will
|
|
496
|
+
be used.
|
|
497
|
+
"""
|
|
498
|
+
metadata = get_metadata(sc_object, table=table)
|
|
499
|
+
metadata = metadata.dropna(subset=[subtype_variable])
|
|
500
|
+
groups = metadata[main_variable].astype(str)
|
|
501
|
+
covariable = metadata[subtype_variable].astype(str)
|
|
502
|
+
order = list(covariable.value_counts(ascending=True).index)
|
|
503
|
+
palette = get_palette(use_palette=colors, n_colors=len(order))
|
|
504
|
+
color_map = dict(zip(order, palette))
|
|
505
|
+
|
|
506
|
+
values = get_numerical_variable(sc_object, numerical_variable, metadata)
|
|
507
|
+
metadata = metadata.assign(_val=values)
|
|
508
|
+
metadata = metadata.dropna(subset=["_val"])
|
|
509
|
+
|
|
510
|
+
if sample_id is not None:
|
|
511
|
+
sub_label = (covariable + "_" + groups + "_" + metadata[sample_id].astype(str))
|
|
512
|
+
else:
|
|
513
|
+
sub_label = covariable + "_" + groups
|
|
514
|
+
|
|
515
|
+
levels = []
|
|
516
|
+
for o in order:
|
|
517
|
+
sub_levels = sorted(sub_label[covariable == o].unique())
|
|
518
|
+
levels.extend(sub_levels)
|
|
519
|
+
metadata = metadata.assign(_label=sub_label)
|
|
520
|
+
metadata["_label"] = pd.Categorical(metadata["_label"], categories=levels, ordered=True)
|
|
521
|
+
|
|
522
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
523
|
+
overlap = 0.7
|
|
524
|
+
y = 0
|
|
525
|
+
for label in levels:
|
|
526
|
+
cov_name = label.split("_")[0]
|
|
527
|
+
color = color_map[cov_name]
|
|
528
|
+
vals = metadata.loc[metadata["_label"] == label, "_val"].to_numpy()
|
|
529
|
+
if len(vals) < 2:
|
|
530
|
+
y += 1
|
|
531
|
+
continue
|
|
532
|
+
from scipy.stats import gaussian_kde
|
|
533
|
+
try:
|
|
534
|
+
kde = gaussian_kde(vals)
|
|
535
|
+
xs = np.linspace(np.min(vals), np.max(vals), 200)
|
|
536
|
+
ys = kde(xs)
|
|
537
|
+
ys = ys / ys.max() * (1 + overlap)
|
|
538
|
+
ax.fill_between(xs, y, y + ys, color=color, alpha=0.6, lw=0)
|
|
539
|
+
ax.plot(xs, y + ys, color=color, lw=0.5)
|
|
540
|
+
med = np.median(vals)
|
|
541
|
+
ax.vlines(med, y, y + np.interp(med, xs, ys), color="black", lw=0.7)
|
|
542
|
+
except Exception:
|
|
543
|
+
pass
|
|
544
|
+
y += 1
|
|
545
|
+
|
|
546
|
+
ax.set_yticks(np.arange(len(levels)) + 0.3)
|
|
547
|
+
ax.set_yticklabels(levels, fontsize=8)
|
|
548
|
+
ax.set_xlabel(numerical_variable)
|
|
549
|
+
title = f"Density distribution of {numerical_variable} across {subtype_variable}"
|
|
550
|
+
if sample_id:
|
|
551
|
+
title += f"\nsplit across {sample_id}"
|
|
552
|
+
ax.set_title(title)
|
|
553
|
+
fig.tight_layout()
|
|
554
|
+
save_to_pdf(fig, pdf_file)
|
|
555
|
+
return ax
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
# ------------------------------------------------------------------
|
|
559
|
+
# Dynamics chart (proportion trends across >2 conditions)
|
|
560
|
+
# ------------------------------------------------------------------
|
|
561
|
+
|
|
562
|
+
def dynamics_chart(
|
|
563
|
+
gamma_results: pd.DataFrame,
|
|
564
|
+
scale_data: bool = False,
|
|
565
|
+
figsize=(10, 8),
|
|
566
|
+
pdf_file: Optional[str] = None,
|
|
567
|
+
):
|
|
568
|
+
"""Visualize per-cell-type proportion dynamics across ordered groups."""
|
|
569
|
+
df = gamma_results.copy()
|
|
570
|
+
summary_cols = {"groupGammaCor", "p.adj", "CI95low", "CI95high"}
|
|
571
|
+
pct_cols = [c for c in df.columns if c not in summary_cols]
|
|
572
|
+
if scale_data:
|
|
573
|
+
scaled = df[pct_cols].apply(lambda r: (r - r.mean()) / r.std(ddof=1), axis=1)
|
|
574
|
+
df[pct_cols] = scaled
|
|
575
|
+
|
|
576
|
+
fig, axes = plt.subplots(2, 1, figsize=figsize, gridspec_kw={"height_ratios": [3, 1]})
|
|
577
|
+
ax = axes[0]
|
|
578
|
+
palette = get_palette(n_colors=len(df))
|
|
579
|
+
color_map = dict(zip(df.index, palette))
|
|
580
|
+
x_labels = [c.replace("percent_in_", "proportion ") for c in pct_cols]
|
|
581
|
+
for cov in df.index:
|
|
582
|
+
ax.plot(range(len(pct_cols)), df.loc[cov, pct_cols].values,
|
|
583
|
+
marker="s", color=color_map[cov], label=cov, lw=1.2)
|
|
584
|
+
ax.text(len(pct_cols) - 0.95, df.loc[cov, pct_cols[-1]],
|
|
585
|
+
f"cor. {df.loc[cov, 'groupGammaCor']:.2f}",
|
|
586
|
+
fontsize=8, fontweight="bold", color=color_map[cov])
|
|
587
|
+
ax.set_xticks(range(len(pct_cols)))
|
|
588
|
+
ax.set_xticklabels(x_labels, rotation=20, ha="right")
|
|
589
|
+
ax.set_ylabel("proportion")
|
|
590
|
+
ax.set_title("Proportion dynamics across groups", fontweight="bold")
|
|
591
|
+
ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", fontsize=8)
|
|
592
|
+
|
|
593
|
+
ax2 = axes[1]
|
|
594
|
+
sorted_cov = df.sort_values("groupGammaCor").index.tolist()
|
|
595
|
+
for cov in sorted_cov:
|
|
596
|
+
ax2.scatter(cov, df.loc[cov, "groupGammaCor"],
|
|
597
|
+
s=120, color=color_map[cov], edgecolor="black", zorder=4)
|
|
598
|
+
ax2.axhline(0, color="darkgrey", lw=0.6)
|
|
599
|
+
ax2.set_ylim(-1, 1)
|
|
600
|
+
ax2.set_ylabel("Kendall correlation")
|
|
601
|
+
ax2.tick_params(axis="x", rotation=45)
|
|
602
|
+
ax2.grid(True, axis="y", alpha=0.3)
|
|
603
|
+
|
|
604
|
+
fig.tight_layout()
|
|
605
|
+
save_to_pdf(fig, pdf_file)
|
|
606
|
+
# Same Jupyter inline double-render issue as `waffle_chart` — display
|
|
607
|
+
# once explicitly and close so the cell return doesn't re-display.
|
|
608
|
+
try:
|
|
609
|
+
from matplotlib import get_backend
|
|
610
|
+
if "inline" in get_backend().lower():
|
|
611
|
+
from IPython.display import display
|
|
612
|
+
display(fig)
|
|
613
|
+
plt.close(fig)
|
|
614
|
+
return None
|
|
615
|
+
except Exception:
|
|
616
|
+
pass
|
|
617
|
+
return None
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
# ------------------------------------------------------------------
|
|
621
|
+
# Abundance test plot (after lots_of_cells with 2 groups)
|
|
622
|
+
# ------------------------------------------------------------------
|
|
623
|
+
|
|
624
|
+
def plot_abundance_test(
|
|
625
|
+
table_results: pd.DataFrame,
|
|
626
|
+
subtype_variable: str = "covariable",
|
|
627
|
+
figsize=(8, 6),
|
|
628
|
+
pdf_file: Optional[str] = None,
|
|
629
|
+
):
|
|
630
|
+
"""Beautiful bubble plot of FC ± Monte-Carlo SD shown as pink ribbon."""
|
|
631
|
+
df = table_results.sort_values("groupFC").copy()
|
|
632
|
+
df["classLabel"] = df.index
|
|
633
|
+
cols = list(df.columns)
|
|
634
|
+
on_right = cols[1].split("percent_in_")[1]
|
|
635
|
+
on_left = cols[2].split("percent_in_")[1]
|
|
636
|
+
guide = float(np.ceil(max(np.abs(df[["CI95low", "CI95high"]].fillna(0).to_numpy()).max(), 0))) + 0.5
|
|
637
|
+
p_adj = df["p.adj"].to_numpy()
|
|
638
|
+
p_adj[p_adj == 0] = 1e-5
|
|
639
|
+
significance = np.sign(df["groupFC"].values) * -np.log10(p_adj)
|
|
640
|
+
|
|
641
|
+
cmap = LinearSegmentedColormap.from_list(
|
|
642
|
+
"fc_cmap",
|
|
643
|
+
["#122A53", "#43587D", "#8BBCD4", "#C1DEEF", "#EEF6FF", "#FDFFFF",
|
|
644
|
+
"#F6F3FF", "#DDCFFF", "#D1AADB", "#76608E", "#463955"],
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
648
|
+
# SD ribbon
|
|
649
|
+
for i, (cls, row) in enumerate(df.iterrows()):
|
|
650
|
+
ax.add_patch(mpatches.Rectangle(
|
|
651
|
+
(-row["sd.montecarlo"], i - 0.4),
|
|
652
|
+
2 * row["sd.montecarlo"], 0.8,
|
|
653
|
+
color="pink", alpha=0.3, zorder=1,
|
|
654
|
+
))
|
|
655
|
+
# CI bars
|
|
656
|
+
for i, (cls, row) in enumerate(df.iterrows()):
|
|
657
|
+
ax.hlines(i, row["CI95low"], row["CI95high"],
|
|
658
|
+
colors="#70508E", lw=0.6, zorder=2)
|
|
659
|
+
# Points
|
|
660
|
+
norm = plt.Normalize(-3, 3)
|
|
661
|
+
sc = ax.scatter(
|
|
662
|
+
df["groupFC"], range(len(df)),
|
|
663
|
+
c=significance, cmap=cmap, norm=norm,
|
|
664
|
+
s=140, edgecolors="black", linewidths=0.3, zorder=3,
|
|
665
|
+
)
|
|
666
|
+
ax.axvline(0, color="#86608E", lw=0.6)
|
|
667
|
+
ax.set_yticks(range(len(df)))
|
|
668
|
+
ax.set_yticklabels(df.index)
|
|
669
|
+
ax.set_xlim(-guide, guide)
|
|
670
|
+
ax.set_xlabel(f"log2(proportion_FC) : ({on_right}/{on_left})")
|
|
671
|
+
ax.set_title(
|
|
672
|
+
f"Fold-Change difference in proportion\n"
|
|
673
|
+
f"Monte-Carlo simulation on {subtype_variable}"
|
|
674
|
+
)
|
|
675
|
+
ax.text(-1, -0.6, on_left, color="grey", ha="center")
|
|
676
|
+
ax.text(1, -0.6, on_right, color="grey", ha="center")
|
|
677
|
+
cbar = plt.colorbar(sc, ax=ax, ticks=[-3, -2, -1, 0, 1, 2, 3])
|
|
678
|
+
cbar.set_label("signed -log10(p.adj)")
|
|
679
|
+
fig.tight_layout()
|
|
680
|
+
save_to_pdf(fig, pdf_file)
|
|
681
|
+
return ax
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lotsofcells
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Python port of lotsOfCells: proportion-test statistics and visualization on single-cell metadata. Compatible with scanpy/AnnData and spatial transcriptomics.
|
|
5
|
+
Author: Oscar Gonzalez-Velasco
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: numpy>=1.23
|
|
10
|
+
Requires-Dist: pandas>=1.5
|
|
11
|
+
Requires-Dist: scipy>=1.9
|
|
12
|
+
Requires-Dist: matplotlib>=3.6
|
|
13
|
+
Requires-Dist: anndata>=0.9
|
|
14
|
+
Provides-Extra: scanpy
|
|
15
|
+
Requires-Dist: scanpy>=1.9; extra == "scanpy"
|
|
16
|
+
Provides-Extra: spatial
|
|
17
|
+
Requires-Dist: spatialdata>=0.1; extra == "spatial"
|
|
18
|
+
Provides-Extra: mudata
|
|
19
|
+
Requires-Dist: mudata>=0.2; extra == "mudata"
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=7; extra == "dev"
|