lotsofcells 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lotsofcells/plots.py ADDED
@@ -0,0 +1,681 @@
1
+ """Visualizations: bar, waffle, polar, density-ridge, dynamics, abundance test.
2
+
3
+ All functions accept either an `AnnData` (or `SpatialData`/`MuData`) or a
4
+ `pandas.DataFrame` containing the metadata.
5
+
6
+ Returns either a `matplotlib.figure.Figure` or `matplotlib.axes.Axes`.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from typing import Optional, Sequence
11
+
12
+ import matplotlib.patches as mpatches
13
+ import matplotlib.pyplot as plt
14
+ import numpy as np
15
+ import pandas as pd
16
+ from matplotlib.colors import LinearSegmentedColormap, to_rgb
17
+
18
+ from ._utils import (
19
+ desaturate,
20
+ get_metadata,
21
+ get_numerical_variable,
22
+ get_palette,
23
+ lighten,
24
+ save_to_pdf,
25
+ )
26
+
27
+
28
+ # ------------------------------------------------------------------
29
+ # Bar chart
30
+ # ------------------------------------------------------------------
31
+
32
+ def bar_chart(
33
+ sc_object,
34
+ main_variable: str,
35
+ subtype_variable: str,
36
+ sample_id: Optional[str] = None,
37
+ subtype_only: Optional[str] = None,
38
+ contribution: bool = False,
39
+ colors: Optional[Sequence[str]] = None,
40
+ table: Optional[str] = None,
41
+ ax: Optional[plt.Axes] = None,
42
+ figsize=(7, 5),
43
+ pdf_file: Optional[str] = None,
44
+ ):
45
+ """Stacked barplot of subtype proportions per main_variable level.
46
+
47
+ Mirrors the R ``bar_chart``. Pass ``pdf_file="path.pdf"`` to also save
48
+ the figure to disk.
49
+ """
50
+ metadata = get_metadata(sc_object, table=table)
51
+ groups = metadata[main_variable].astype(str)
52
+ covariable = metadata[subtype_variable].astype(str)
53
+ order = list(covariable.value_counts(ascending=True).index) # smallest first
54
+ palette = get_palette(use_palette=colors, n_colors=len(order))
55
+ color_map = dict(zip(order[::-1], palette)) # largest avg → first color
56
+
57
+ if sample_id is not None:
58
+ samples = metadata[sample_id].astype(str)
59
+ if contribution:
60
+ return _bar_chart_contribution(
61
+ groups, covariable, samples, order, color_map,
62
+ main_variable, subtype_variable, sample_id, figsize,
63
+ pdf_file=pdf_file,
64
+ )
65
+ bar_keys = (groups + "_" + samples).to_numpy()
66
+ df = pd.DataFrame({"groups": bar_keys, "covariable": covariable.values})
67
+ labels_main = groups
68
+ else:
69
+ df = pd.DataFrame({"groups": groups.values, "covariable": covariable.values})
70
+ labels_main = groups
71
+
72
+ contig = pd.crosstab(df["groups"], df["covariable"])
73
+ contig = contig.div(contig.sum(axis=1), axis=0)
74
+ if subtype_only is not None:
75
+ contig = contig[[subtype_only]]
76
+
77
+ # Order bars within main group by descending value of largest covariable
78
+ bar_keys = list(contig.index)
79
+ if sample_id is not None:
80
+ bar_main = [k.split("_")[0] for k in bar_keys]
81
+ else:
82
+ bar_main = bar_keys
83
+ main_levels = sorted(set(bar_main))
84
+
85
+ if subtype_only is None:
86
+ sort_col = order[-1]
87
+ else:
88
+ sort_col = subtype_only
89
+ sort_vals = contig[sort_col]
90
+ bar_order = sorted(
91
+ bar_keys,
92
+ key=lambda k: (
93
+ main_levels.index(k.split("_")[0] if sample_id is not None else k),
94
+ -sort_vals[k],
95
+ ),
96
+ )
97
+ contig = contig.loc[bar_order]
98
+ bar_main = [k.split("_")[0] if sample_id is not None else k for k in bar_order]
99
+
100
+ # Plot
101
+ if ax is None:
102
+ fig, ax = plt.subplots(figsize=figsize)
103
+ else:
104
+ fig = ax.figure
105
+ bottom = np.zeros(len(contig))
106
+ cov_order = order[::-1] # largest at bottom
107
+ if subtype_only is not None:
108
+ cov_order = [subtype_only]
109
+
110
+ # If subtype_only, color bars by main group
111
+ if subtype_only is not None:
112
+ group_colors = _group_colors(main_levels)
113
+ gc_map = {m: c for m, c in zip(main_levels, group_colors)}
114
+ bar_colors = [gc_map[m] for m in bar_main]
115
+ ax.bar(range(len(contig)), contig[subtype_only].values, color=bar_colors)
116
+ else:
117
+ for cov in cov_order:
118
+ ax.bar(
119
+ range(len(contig)),
120
+ contig[cov].values,
121
+ bottom=bottom,
122
+ color=color_map[cov],
123
+ label=cov,
124
+ )
125
+ bottom += contig[cov].values
126
+ ax.legend(title=f"Class: {subtype_variable}", bbox_to_anchor=(1.02, 1), loc="upper left")
127
+
128
+ ax.set_xticks(range(len(contig)))
129
+ ax.set_xticklabels(contig.index, rotation=45, ha="right")
130
+ ax.set_ylabel("percentage")
131
+ ax.set_yticks(np.linspace(0, 1, 11))
132
+ ax.set_yticklabels([f"{int(100 * v)}" for v in np.linspace(0, 1, 11)])
133
+ title = f"Proportions of {subtype_variable} by {main_variable}"
134
+ if subtype_only:
135
+ ax.set_title(f"{title}\nClass: {subtype_only}")
136
+ elif sample_id:
137
+ ax.set_title(f"{title}\nIndividual sub-level by: {sample_id}")
138
+ else:
139
+ ax.set_title(title)
140
+
141
+ # Annotate group bands at the bottom
142
+ _annotate_groups(ax, bar_main, main_levels)
143
+ fig.tight_layout()
144
+ save_to_pdf(fig, pdf_file)
145
+ return ax
146
+
147
+
148
+ def _group_colors(levels):
149
+ base = ["#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854",
150
+ "#FFD92F", "#E5C494", "#B3B3B3"]
151
+ palette = get_palette(use_palette=base, n_colors=len(levels))
152
+ return [desaturate(c, 0.16) for c in palette]
153
+
154
+
155
+ def _annotate_groups(ax, bar_main, main_levels):
156
+ palette = _group_colors(main_levels)
157
+ color_for = dict(zip(main_levels, palette))
158
+ n = len(bar_main)
159
+ ymin, ymax = -0.05, -0.02
160
+ runs = []
161
+ start = 0
162
+ for i in range(1, n):
163
+ if bar_main[i] != bar_main[i - 1]:
164
+ runs.append((start, i - 1, bar_main[start]))
165
+ start = i
166
+ runs.append((start, n - 1, bar_main[start]))
167
+ for s, e, lbl in runs:
168
+ ax.add_patch(mpatches.Rectangle(
169
+ (s - 0.5, ymin), (e - s + 1), (ymax - ymin),
170
+ color=color_for[lbl], clip_on=False, zorder=3,
171
+ ))
172
+ ax.text((s + e) / 2, (ymin + ymax) / 2, lbl,
173
+ ha="center", va="center", color="white",
174
+ fontsize=8, style="italic", zorder=4)
175
+
176
+
177
+ def _bar_chart_contribution(
178
+ groups, covariable, samples, order, color_map,
179
+ main_variable, subtype_variable, sample_id, figsize,
180
+ pdf_file=None,
181
+ ):
182
+ fig, ax = plt.subplots(figsize=figsize)
183
+ main_levels = sorted(groups.unique())
184
+ width = 0.7
185
+ for i, m in enumerate(main_levels):
186
+ sub = (groups == m)
187
+ df = pd.DataFrame({
188
+ "samples": samples[sub].values,
189
+ "covariable": covariable[sub].values,
190
+ })
191
+ contig = pd.crosstab(df["samples"], df["covariable"])
192
+ contig = contig / contig.values.sum()
193
+ bottom = 0.0
194
+ cov_order = order[::-1]
195
+ for cov in cov_order:
196
+ base_color = color_map[cov]
197
+ samples_present = list(contig.index)
198
+ n_s = len(samples_present)
199
+ if n_s == 0:
200
+ continue
201
+ shades = [
202
+ lighten(base_color, t)
203
+ for t in np.linspace(-0.2, 0.2, n_s)
204
+ ]
205
+ shades = [c if not c.startswith("-") else base_color for c in shades]
206
+ for idx, s in enumerate(samples_present):
207
+ v = contig.loc[s, cov] if cov in contig.columns else 0
208
+ ax.bar(i, v, width, bottom=bottom, color=shades[idx])
209
+ bottom += v
210
+ ax.set_xticks(range(len(main_levels)))
211
+ ax.set_xticklabels(main_levels, rotation=45, ha="right")
212
+ ax.set_ylabel("percentage")
213
+ ax.set_title(
214
+ f"Proportions of {subtype_variable} by {main_variable}\n"
215
+ f"Contribution by {sample_id}"
216
+ )
217
+ handles = [
218
+ mpatches.Patch(color=color_map[c], label=c) for c in order[::-1]
219
+ ]
220
+ ax.legend(
221
+ handles=handles, title=f"Class: {subtype_variable}",
222
+ bbox_to_anchor=(1.02, 1), loc="upper left",
223
+ )
224
+ fig.tight_layout()
225
+ save_to_pdf(fig, pdf_file)
226
+ return ax
227
+
228
+
229
+ # ------------------------------------------------------------------
230
+ # Waffle chart (each tile = 1%)
231
+ # ------------------------------------------------------------------
232
+
233
+ def waffle_chart(
234
+ sc_object,
235
+ main_variable: str,
236
+ subtype_variable: str,
237
+ sample_id: Optional[str] = None,
238
+ subtype_only: Optional[str] = None,
239
+ colors: Optional[Sequence[str]] = None,
240
+ table: Optional[str] = None,
241
+ figsize=None,
242
+ pdf_file: Optional[str] = None,
243
+ ):
244
+ metadata = get_metadata(sc_object, table=table)
245
+ groups = metadata[main_variable].astype(str)
246
+ covariable = metadata[subtype_variable].astype(str)
247
+
248
+ if subtype_only is not None:
249
+ if subtype_only not in covariable.unique():
250
+ raise ValueError(
251
+ f"subtype_only '{subtype_only}' not found in {subtype_variable}."
252
+ )
253
+ cov = np.where(covariable == subtype_only, subtype_only, "All Other")
254
+ order = [subtype_only, "All Other"]
255
+ # alternating dim/main shades per main group
256
+ coloresSubtype = [
257
+ "#DBECDA", "#92C791", "#BEDAEC", "#7EB6D9", "#DDC7E2", "#86608E",
258
+ ]
259
+ coloresSubtype = [desaturate(c, 0.16) for c in coloresSubtype]
260
+ subtype_palette = coloresSubtype
261
+ else:
262
+ cov = covariable.to_numpy()
263
+ order = list(covariable.value_counts(ascending=False).index)[::-1]
264
+ subtype_palette = None
265
+
266
+ if sample_id is not None:
267
+ keys = (groups + "_" + metadata[sample_id].astype(str)).to_numpy()
268
+ else:
269
+ keys = groups.to_numpy()
270
+
271
+ df = pd.DataFrame({"groups": keys, "covariable": cov})
272
+ contig = pd.crosstab(df["groups"], df["covariable"])
273
+ if subtype_only is not None:
274
+ ncells = contig.get(subtype_only, pd.Series(0, index=contig.index))
275
+ else:
276
+ ncells = contig.sum(axis=1)
277
+ contig = contig.div(contig.sum(axis=1), axis=0)
278
+ contig = contig.reindex(columns=order, fill_value=0)
279
+
280
+ palette = (
281
+ get_palette(use_palette=colors, n_colors=len(order))
282
+ if subtype_palette is None
283
+ else subtype_palette
284
+ )
285
+ n_panels = len(contig)
286
+ ncol = max(1, int(np.ceil(np.sqrt(n_panels))))
287
+ nrow = int(np.ceil(n_panels / ncol))
288
+ if figsize is None:
289
+ # +1 column reserved for the legend → 3*ncol for waffles, 1.5 for legend
290
+ figsize = (3 * ncol + 1.8, 3 * nrow)
291
+
292
+ # GridSpec: nrow x (ncol + 1). The last column is a dedicated, axis-off
293
+ # area where the legend lives, so it never overlaps the waffles.
294
+ fig = plt.figure(figsize=figsize)
295
+ gs = fig.add_gridspec(
296
+ nrow, ncol + 1,
297
+ width_ratios=[1.0] * ncol + [0.45],
298
+ wspace=0.15, hspace=0.25,
299
+ )
300
+ axes = np.empty((nrow, ncol), dtype=object)
301
+ for r in range(nrow):
302
+ for c in range(ncol):
303
+ axes[r, c] = fig.add_subplot(gs[r, c])
304
+ legend_ax = fig.add_subplot(gs[:, -1])
305
+ legend_ax.axis("off")
306
+
307
+ # Map main groups → color pair indices for subtype_only mode
308
+ main_order = sorted({k.split("_")[0] if sample_id is not None else k
309
+ for k in contig.index})
310
+ main_idx = {m: i for i, m in enumerate(main_order)}
311
+
312
+ i = -1
313
+ for i, (gname, row) in enumerate(contig.iterrows()):
314
+ ax = axes[i // ncol][i % ncol]
315
+ percentages = (row * 100).round().astype(int).to_numpy()
316
+ percentages = _balance_to_100(percentages)
317
+ if subtype_only is not None:
318
+ mg = gname.split("_")[0] if sample_id is not None else gname
319
+ pi = main_idx[mg] * 2
320
+ colors_panel = [palette[pi % len(palette)],
321
+ palette[(pi + 1) % len(palette)]]
322
+ else:
323
+ colors_panel = [palette[order.index(o)] for o in order]
324
+ _draw_waffle(ax, percentages, colors_panel, order, gname, ncells.get(gname, 0))
325
+ if subtype_only is not None and len(percentages) >= 1:
326
+ ax.text(4.5, 8, f"{percentages[0]:.0f}%",
327
+ ha="center", va="center", fontsize=11,
328
+ fontweight="bold", color=colors_panel[0])
329
+
330
+ # Hide unused panels in the bottom-right of the waffle grid.
331
+ for j in range(i + 1, nrow * ncol):
332
+ axes[j // ncol][j % ncol].axis("off")
333
+
334
+ handles = [
335
+ mpatches.Patch(
336
+ color=(palette[order.index(o)] if subtype_only is None
337
+ else (palette[1] if o == subtype_only else palette[0])),
338
+ label=o,
339
+ )
340
+ for o in order
341
+ ]
342
+ legend_ax.legend(
343
+ handles=handles,
344
+ title=f"Class: {subtype_variable}",
345
+ loc="center",
346
+ frameon=False,
347
+ borderaxespad=0.0,
348
+ labelspacing=0.8,
349
+ )
350
+
351
+ save_to_pdf(fig, pdf_file)
352
+
353
+ # Display once in Jupyter inline (the inline backend auto-flushes
354
+ # at cell end AND the Figure has `_repr_*_` that re-renders if
355
+ # returned). Show the figure here, then close it so the cell does
356
+ # not re-display, and return None.
357
+ try:
358
+ from matplotlib import get_backend
359
+ if "inline" in get_backend().lower():
360
+ from IPython.display import display
361
+ display(fig)
362
+ plt.close(fig)
363
+ return None
364
+ except Exception:
365
+ pass
366
+ return None
367
+
368
+
369
+ def _balance_to_100(arr):
370
+ arr = arr.astype(int)
371
+ diff = 100 - int(arr.sum())
372
+ if diff == 0:
373
+ return arr
374
+ arr = arr.copy()
375
+ if diff > 0:
376
+ arr[arr.argmax()] += diff
377
+ else:
378
+ arr[arr.argmax()] += diff
379
+ return arr
380
+
381
+
382
+ def _draw_waffle(ax, percentages, colors_panel, order, title, ncells):
383
+ grid = np.zeros(100, dtype=int)
384
+ cum = np.cumsum(percentages)
385
+ for k in range(100):
386
+ grid[k] = int(np.searchsorted(cum, k, side="right"))
387
+ grid = grid.reshape(10, 10)
388
+ for i in range(10):
389
+ for j in range(10):
390
+ idx = grid[i, j]
391
+ idx = min(idx, len(colors_panel) - 1)
392
+ ax.add_patch(mpatches.Rectangle(
393
+ (j, i), 0.85, 0.85, color=colors_panel[idx],
394
+ ))
395
+ ax.set_xlim(-0.1, 10)
396
+ ax.set_ylim(-0.1, 10)
397
+ ax.set_aspect("equal")
398
+ ax.invert_yaxis()
399
+ ax.set_xticks([])
400
+ ax.set_yticks([])
401
+ ax.set_title(str(title))
402
+ if ncells:
403
+ ax.set_xlabel(f"n. cells: {int(ncells):,}",
404
+ fontsize=8, style="italic", color="grey")
405
+
406
+
407
+ # ------------------------------------------------------------------
408
+ # Polar / circular barplot
409
+ # ------------------------------------------------------------------
410
+
411
+ def polar_chart(
412
+ sc_object,
413
+ main_variable: str,
414
+ subtype_variable: str,
415
+ sample_id: Optional[str] = None,
416
+ subtype_only: Optional[str] = None,
417
+ colors: Optional[Sequence[str]] = None,
418
+ table: Optional[str] = None,
419
+ figsize=(8, 8),
420
+ pdf_file: Optional[str] = None,
421
+ ):
422
+ metadata = get_metadata(sc_object, table=table)
423
+ groups = metadata[main_variable].astype(str)
424
+ covariable = metadata[subtype_variable].astype(str)
425
+ order = list(covariable.value_counts(ascending=True).index) # smallest first
426
+ palette = get_palette(use_palette=colors, n_colors=len(order))
427
+ color_map = dict(zip(order[::-1], palette))
428
+
429
+ if sample_id is not None:
430
+ keys = (groups + "_" + metadata[sample_id].astype(str)).to_numpy()
431
+ else:
432
+ keys = groups.to_numpy()
433
+ df = pd.DataFrame({"groups": keys, "covariable": covariable.values})
434
+ contig = pd.crosstab(df["groups"], df["covariable"])
435
+ if subtype_only is not None:
436
+ contig = contig[[subtype_only]]
437
+
438
+ # Sort by main_group then preserve order
439
+ if sample_id is not None:
440
+ main_levels = sorted({k.split("_")[0] for k in contig.index})
441
+ contig = contig.reindex(
442
+ sorted(contig.index, key=lambda k: (main_levels.index(k.split("_")[0]), k))
443
+ )
444
+ else:
445
+ contig = contig.sort_index()
446
+
447
+ n_bars = len(contig)
448
+ angles = np.linspace(0, 2 * np.pi, n_bars, endpoint=False)
449
+ width = 2 * np.pi / n_bars * 0.9
450
+
451
+ fig = plt.figure(figsize=figsize)
452
+ ax = fig.add_subplot(111, projection="polar")
453
+ bottom = np.zeros(n_bars)
454
+ cov_order = order[::-1]
455
+ if subtype_only is not None:
456
+ cov_order = [subtype_only]
457
+ for cov in cov_order:
458
+ if cov not in contig.columns:
459
+ continue
460
+ vals = contig[cov].values
461
+ ax.bar(angles, vals, width=width, bottom=bottom,
462
+ color=color_map.get(cov, "#999999"), label=cov, edgecolor="white")
463
+ bottom += vals
464
+
465
+ ax.set_xticks(angles)
466
+ ax.set_xticklabels(contig.index, fontsize=7)
467
+ ax.set_yticklabels([])
468
+ ax.set_title(f"Proportions of {subtype_variable} by {main_variable}",
469
+ fontsize=14, fontweight="bold")
470
+ ax.legend(bbox_to_anchor=(1.2, 1), loc="upper left",
471
+ title=f"Class: {subtype_variable}")
472
+ fig.tight_layout()
473
+ save_to_pdf(fig, pdf_file)
474
+ return ax
475
+
476
+
477
+ # ------------------------------------------------------------------
478
+ # Density (ridge) chart
479
+ # ------------------------------------------------------------------
480
+
481
+ def density_chart(
482
+ sc_object,
483
+ main_variable: str,
484
+ subtype_variable: str,
485
+ numerical_variable: str,
486
+ sample_id: Optional[str] = None,
487
+ colors: Optional[Sequence[str]] = None,
488
+ table: Optional[str] = None,
489
+ figsize=(9, 7),
490
+ pdf_file: Optional[str] = None,
491
+ ):
492
+ """Ridge-style density plot of a numerical variable across covariate levels.
493
+
494
+ The numerical variable can be a column in ``.obs`` or — when ``sc_object``
495
+ is an AnnData — a feature name (gene); expression values from ``.X`` will
496
+ be used.
497
+ """
498
+ metadata = get_metadata(sc_object, table=table)
499
+ metadata = metadata.dropna(subset=[subtype_variable])
500
+ groups = metadata[main_variable].astype(str)
501
+ covariable = metadata[subtype_variable].astype(str)
502
+ order = list(covariable.value_counts(ascending=True).index)
503
+ palette = get_palette(use_palette=colors, n_colors=len(order))
504
+ color_map = dict(zip(order, palette))
505
+
506
+ values = get_numerical_variable(sc_object, numerical_variable, metadata)
507
+ metadata = metadata.assign(_val=values)
508
+ metadata = metadata.dropna(subset=["_val"])
509
+
510
+ if sample_id is not None:
511
+ sub_label = (covariable + "_" + groups + "_" + metadata[sample_id].astype(str))
512
+ else:
513
+ sub_label = covariable + "_" + groups
514
+
515
+ levels = []
516
+ for o in order:
517
+ sub_levels = sorted(sub_label[covariable == o].unique())
518
+ levels.extend(sub_levels)
519
+ metadata = metadata.assign(_label=sub_label)
520
+ metadata["_label"] = pd.Categorical(metadata["_label"], categories=levels, ordered=True)
521
+
522
+ fig, ax = plt.subplots(figsize=figsize)
523
+ overlap = 0.7
524
+ y = 0
525
+ for label in levels:
526
+ cov_name = label.split("_")[0]
527
+ color = color_map[cov_name]
528
+ vals = metadata.loc[metadata["_label"] == label, "_val"].to_numpy()
529
+ if len(vals) < 2:
530
+ y += 1
531
+ continue
532
+ from scipy.stats import gaussian_kde
533
+ try:
534
+ kde = gaussian_kde(vals)
535
+ xs = np.linspace(np.min(vals), np.max(vals), 200)
536
+ ys = kde(xs)
537
+ ys = ys / ys.max() * (1 + overlap)
538
+ ax.fill_between(xs, y, y + ys, color=color, alpha=0.6, lw=0)
539
+ ax.plot(xs, y + ys, color=color, lw=0.5)
540
+ med = np.median(vals)
541
+ ax.vlines(med, y, y + np.interp(med, xs, ys), color="black", lw=0.7)
542
+ except Exception:
543
+ pass
544
+ y += 1
545
+
546
+ ax.set_yticks(np.arange(len(levels)) + 0.3)
547
+ ax.set_yticklabels(levels, fontsize=8)
548
+ ax.set_xlabel(numerical_variable)
549
+ title = f"Density distribution of {numerical_variable} across {subtype_variable}"
550
+ if sample_id:
551
+ title += f"\nsplit across {sample_id}"
552
+ ax.set_title(title)
553
+ fig.tight_layout()
554
+ save_to_pdf(fig, pdf_file)
555
+ return ax
556
+
557
+
558
+ # ------------------------------------------------------------------
559
+ # Dynamics chart (proportion trends across >2 conditions)
560
+ # ------------------------------------------------------------------
561
+
562
+ def dynamics_chart(
563
+ gamma_results: pd.DataFrame,
564
+ scale_data: bool = False,
565
+ figsize=(10, 8),
566
+ pdf_file: Optional[str] = None,
567
+ ):
568
+ """Visualize per-cell-type proportion dynamics across ordered groups."""
569
+ df = gamma_results.copy()
570
+ summary_cols = {"groupGammaCor", "p.adj", "CI95low", "CI95high"}
571
+ pct_cols = [c for c in df.columns if c not in summary_cols]
572
+ if scale_data:
573
+ scaled = df[pct_cols].apply(lambda r: (r - r.mean()) / r.std(ddof=1), axis=1)
574
+ df[pct_cols] = scaled
575
+
576
+ fig, axes = plt.subplots(2, 1, figsize=figsize, gridspec_kw={"height_ratios": [3, 1]})
577
+ ax = axes[0]
578
+ palette = get_palette(n_colors=len(df))
579
+ color_map = dict(zip(df.index, palette))
580
+ x_labels = [c.replace("percent_in_", "proportion ") for c in pct_cols]
581
+ for cov in df.index:
582
+ ax.plot(range(len(pct_cols)), df.loc[cov, pct_cols].values,
583
+ marker="s", color=color_map[cov], label=cov, lw=1.2)
584
+ ax.text(len(pct_cols) - 0.95, df.loc[cov, pct_cols[-1]],
585
+ f"cor. {df.loc[cov, 'groupGammaCor']:.2f}",
586
+ fontsize=8, fontweight="bold", color=color_map[cov])
587
+ ax.set_xticks(range(len(pct_cols)))
588
+ ax.set_xticklabels(x_labels, rotation=20, ha="right")
589
+ ax.set_ylabel("proportion")
590
+ ax.set_title("Proportion dynamics across groups", fontweight="bold")
591
+ ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", fontsize=8)
592
+
593
+ ax2 = axes[1]
594
+ sorted_cov = df.sort_values("groupGammaCor").index.tolist()
595
+ for cov in sorted_cov:
596
+ ax2.scatter(cov, df.loc[cov, "groupGammaCor"],
597
+ s=120, color=color_map[cov], edgecolor="black", zorder=4)
598
+ ax2.axhline(0, color="darkgrey", lw=0.6)
599
+ ax2.set_ylim(-1, 1)
600
+ ax2.set_ylabel("Kendall correlation")
601
+ ax2.tick_params(axis="x", rotation=45)
602
+ ax2.grid(True, axis="y", alpha=0.3)
603
+
604
+ fig.tight_layout()
605
+ save_to_pdf(fig, pdf_file)
606
+ # Same Jupyter inline double-render issue as `waffle_chart` — display
607
+ # once explicitly and close so the cell return doesn't re-display.
608
+ try:
609
+ from matplotlib import get_backend
610
+ if "inline" in get_backend().lower():
611
+ from IPython.display import display
612
+ display(fig)
613
+ plt.close(fig)
614
+ return None
615
+ except Exception:
616
+ pass
617
+ return None
618
+
619
+
620
+ # ------------------------------------------------------------------
621
+ # Abundance test plot (after lots_of_cells with 2 groups)
622
+ # ------------------------------------------------------------------
623
+
624
+ def plot_abundance_test(
625
+ table_results: pd.DataFrame,
626
+ subtype_variable: str = "covariable",
627
+ figsize=(8, 6),
628
+ pdf_file: Optional[str] = None,
629
+ ):
630
+ """Beautiful bubble plot of FC ± Monte-Carlo SD shown as pink ribbon."""
631
+ df = table_results.sort_values("groupFC").copy()
632
+ df["classLabel"] = df.index
633
+ cols = list(df.columns)
634
+ on_right = cols[1].split("percent_in_")[1]
635
+ on_left = cols[2].split("percent_in_")[1]
636
+ guide = float(np.ceil(max(np.abs(df[["CI95low", "CI95high"]].fillna(0).to_numpy()).max(), 0))) + 0.5
637
+ p_adj = df["p.adj"].to_numpy()
638
+ p_adj[p_adj == 0] = 1e-5
639
+ significance = np.sign(df["groupFC"].values) * -np.log10(p_adj)
640
+
641
+ cmap = LinearSegmentedColormap.from_list(
642
+ "fc_cmap",
643
+ ["#122A53", "#43587D", "#8BBCD4", "#C1DEEF", "#EEF6FF", "#FDFFFF",
644
+ "#F6F3FF", "#DDCFFF", "#D1AADB", "#76608E", "#463955"],
645
+ )
646
+
647
+ fig, ax = plt.subplots(figsize=figsize)
648
+ # SD ribbon
649
+ for i, (cls, row) in enumerate(df.iterrows()):
650
+ ax.add_patch(mpatches.Rectangle(
651
+ (-row["sd.montecarlo"], i - 0.4),
652
+ 2 * row["sd.montecarlo"], 0.8,
653
+ color="pink", alpha=0.3, zorder=1,
654
+ ))
655
+ # CI bars
656
+ for i, (cls, row) in enumerate(df.iterrows()):
657
+ ax.hlines(i, row["CI95low"], row["CI95high"],
658
+ colors="#70508E", lw=0.6, zorder=2)
659
+ # Points
660
+ norm = plt.Normalize(-3, 3)
661
+ sc = ax.scatter(
662
+ df["groupFC"], range(len(df)),
663
+ c=significance, cmap=cmap, norm=norm,
664
+ s=140, edgecolors="black", linewidths=0.3, zorder=3,
665
+ )
666
+ ax.axvline(0, color="#86608E", lw=0.6)
667
+ ax.set_yticks(range(len(df)))
668
+ ax.set_yticklabels(df.index)
669
+ ax.set_xlim(-guide, guide)
670
+ ax.set_xlabel(f"log2(proportion_FC) : ({on_right}/{on_left})")
671
+ ax.set_title(
672
+ f"Fold-Change difference in proportion\n"
673
+ f"Monte-Carlo simulation on {subtype_variable}"
674
+ )
675
+ ax.text(-1, -0.6, on_left, color="grey", ha="center")
676
+ ax.text(1, -0.6, on_right, color="grey", ha="center")
677
+ cbar = plt.colorbar(sc, ax=ax, ticks=[-3, -2, -1, 0, 1, 2, 3])
678
+ cbar.set_label("signed -log10(p.adj)")
679
+ fig.tight_layout()
680
+ save_to_pdf(fig, pdf_file)
681
+ return ax
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: lotsofcells
3
+ Version: 0.3.0
4
+ Summary: Python port of lotsOfCells: proportion-test statistics and visualization on single-cell metadata. Compatible with scanpy/AnnData and spatial transcriptomics.
5
+ Author: Oscar Gonzalez-Velasco
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: numpy>=1.23
10
+ Requires-Dist: pandas>=1.5
11
+ Requires-Dist: scipy>=1.9
12
+ Requires-Dist: matplotlib>=3.6
13
+ Requires-Dist: anndata>=0.9
14
+ Provides-Extra: scanpy
15
+ Requires-Dist: scanpy>=1.9; extra == "scanpy"
16
+ Provides-Extra: spatial
17
+ Requires-Dist: spatialdata>=0.1; extra == "spatial"
18
+ Provides-Extra: mudata
19
+ Requires-Dist: mudata>=0.2; extra == "mudata"
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7; extra == "dev"