resolutiontree 0.1.1__tar.gz → 0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {resolutiontree-0.1.1 → resolutiontree-0.2}/PKG-INFO +17 -5
- {resolutiontree-0.1.1 → resolutiontree-0.2}/README.md +1 -1
- {resolutiontree-0.1.1 → resolutiontree-0.2}/pyproject.toml +33 -6
- resolutiontree-0.2/src/resolutiontree/__init__.py +14 -0
- {resolutiontree-0.1.1 → resolutiontree-0.2}/src/resolutiontree/core.py +35 -6
- {resolutiontree-0.1.1 → resolutiontree-0.2}/src/resolutiontree/utils.py +123 -56
- {resolutiontree-0.1.1 → resolutiontree-0.2}/src/resolutiontree.egg-info/PKG-INFO +17 -5
- {resolutiontree-0.1.1 → resolutiontree-0.2}/src/resolutiontree.egg-info/SOURCES.txt +8 -1
- resolutiontree-0.2/src/resolutiontree.egg-info/requires.txt +13 -0
- {resolutiontree-0.1.1 → resolutiontree-0.2}/tests/test_cluster_resolution.py +10 -11
- {resolutiontree-0.1.1 → resolutiontree-0.2}/tests/test_cluster_tree.py +2 -2
- resolutiontree-0.2/tests/test_install_illumina.py +0 -0
- resolutiontree-0.1.1/tests/test_installation.py → resolutiontree-0.2/tests/test_install_pbmc3k.py +9 -12
- resolutiontree-0.2/tests/test_install_visiumhd.py +0 -0
- resolutiontree-0.2/tests/test_install_xenium.py +59 -0
- resolutiontree-0.2/tests/test_pipeline_pbmc3k.py +73 -0
- resolutiontree-0.2/tests/test_pipeline_xenium.py +69 -0
- {resolutiontree-0.1.1 → resolutiontree-0.2}/setup.cfg +0 -0
- {resolutiontree-0.1.1 → resolutiontree-0.2}/src/resolutiontree.egg-info/dependency_links.txt +0 -0
- {resolutiontree-0.1.1 → resolutiontree-0.2}/src/resolutiontree.egg-info/top_level.txt +0 -0
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: resolutiontree
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2
|
|
4
4
|
Summary: Systematic exploration of clustering resolutions in single-cell analysis
|
|
5
5
|
Author-email: Joe Hou <joseph.houjue@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/joe-jhou2/resolutiontree
|
|
8
|
-
Project-URL: Documentation, https://resolutiontree.readthedocs.io/
|
|
9
8
|
Project-URL: Repository, https://github.com/joe-jhou2/resolutiontree
|
|
10
9
|
Project-URL: Bug Tracker, https://github.com/joe-jhou2/resolutiontree/issues
|
|
11
10
|
Keywords: single-cell,clustering,resolution,scanpy,leiden,visualization
|
|
@@ -14,12 +13,25 @@ Classifier: Intended Audience :: Science/Research
|
|
|
14
13
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
14
|
Classifier: License :: OSI Approved :: MIT License
|
|
16
15
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.9
|
|
19
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
20
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
-
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Requires-Python: >=3.9
|
|
22
21
|
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: numpy>=1.26
|
|
23
|
+
Requires-Dist: pandas>=2.1
|
|
24
|
+
Requires-Dist: scipy>=1.11
|
|
25
|
+
Requires-Dist: matplotlib>=3.8
|
|
26
|
+
Requires-Dist: seaborn>=0.13
|
|
27
|
+
Requires-Dist: scanpy>=1.9.8
|
|
28
|
+
Requires-Dist: anndata>=0.10
|
|
29
|
+
Requires-Dist: igraph>=0.11
|
|
30
|
+
Requires-Dist: networkx>=3.2
|
|
31
|
+
Requires-Dist: leidenalg>=0.10
|
|
32
|
+
Requires-Dist: umap-learn>=0.5.5
|
|
33
|
+
Requires-Dist: spatialdata
|
|
34
|
+
Requires-Dist: spatialdata[extra]
|
|
23
35
|
|
|
24
36
|
# ResolutionTree
|
|
25
37
|
|
|
@@ -56,7 +68,7 @@ ResolutionTree provides:
|
|
|
56
68
|
### From GitHub Release (Recommended)
|
|
57
69
|
|
|
58
70
|
```bash
|
|
59
|
-
pip install resolutiontree
|
|
71
|
+
pip install resolutiontree==0.2
|
|
60
72
|
```
|
|
61
73
|
|
|
62
74
|
### From Source
|
|
@@ -1,33 +1,60 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["setuptools>=
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "resolutiontree"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2"
|
|
8
8
|
description = "Systematic exploration of clustering resolutions in single-cell analysis"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
|
|
11
12
|
license = {text = "MIT"}
|
|
13
|
+
|
|
12
14
|
authors = [
|
|
13
15
|
{name = "Joe Hou", email = "joseph.houjue@gmail.com"},
|
|
14
16
|
]
|
|
15
|
-
|
|
17
|
+
|
|
18
|
+
keywords = [
|
|
19
|
+
"single-cell",
|
|
20
|
+
"clustering",
|
|
21
|
+
"resolution",
|
|
22
|
+
"scanpy",
|
|
23
|
+
"leiden",
|
|
24
|
+
"visualization"
|
|
25
|
+
]
|
|
26
|
+
|
|
16
27
|
classifiers = [
|
|
17
28
|
"Development Status :: 4 - Beta",
|
|
18
29
|
"Intended Audience :: Science/Research",
|
|
19
30
|
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
20
31
|
"License :: OSI Approved :: MIT License",
|
|
32
|
+
|
|
21
33
|
"Programming Language :: Python :: 3",
|
|
22
|
-
"Programming Language :: Python :: 3.8",
|
|
23
34
|
"Programming Language :: Python :: 3.9",
|
|
24
35
|
"Programming Language :: Python :: 3.10",
|
|
25
36
|
"Programming Language :: Python :: 3.11",
|
|
37
|
+
"Programming Language :: Python :: 3.12"
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
dependencies = [
|
|
41
|
+
"numpy>=1.26",
|
|
42
|
+
"pandas>=2.1",
|
|
43
|
+
"scipy>=1.11",
|
|
44
|
+
"matplotlib>=3.8",
|
|
45
|
+
"seaborn>=0.13",
|
|
46
|
+
"scanpy>=1.9.8",
|
|
47
|
+
"anndata>=0.10",
|
|
48
|
+
"igraph>=0.11",
|
|
49
|
+
"networkx>=3.2",
|
|
50
|
+
"leidenalg>=0.10",
|
|
51
|
+
"umap-learn>=0.5.5",
|
|
52
|
+
"spatialdata",
|
|
53
|
+
"spatialdata[extra]"
|
|
26
54
|
]
|
|
27
55
|
|
|
28
56
|
[project.urls]
|
|
29
57
|
Homepage = "https://github.com/joe-jhou2/resolutiontree"
|
|
30
|
-
Documentation = "https://resolutiontree.readthedocs.io/"
|
|
31
58
|
Repository = "https://github.com/joe-jhou2/resolutiontree"
|
|
32
59
|
"Bug Tracker" = "https://github.com/joe-jhou2/resolutiontree/issues"
|
|
33
60
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ResolutionTree: Systematic exploration of clustering resolutions in single-cell analysis
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
__version__ = "0.2"
|
|
6
|
+
__author__ = "Joe Hou"
|
|
7
|
+
|
|
8
|
+
from .utils import cluster_resolution_finder
|
|
9
|
+
from .core import cluster_decision_tree
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"cluster_resolution_finder",
|
|
13
|
+
"cluster_decision_tree",
|
|
14
|
+
]
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
4
|
from typing import TYPE_CHECKING, TypedDict, cast
|
|
5
|
-
|
|
5
|
+
# import sys
|
|
6
6
|
import igraph as ig
|
|
7
7
|
import matplotlib.colors as mcolors
|
|
8
8
|
import matplotlib.pyplot as plt
|
|
@@ -14,6 +14,7 @@ if TYPE_CHECKING:
|
|
|
14
14
|
import networkx as nx
|
|
15
15
|
import pandas as pd
|
|
16
16
|
from anndata import AnnData
|
|
17
|
+
import spatialdata as sd
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class OutputSettings(TypedDict):
|
|
@@ -112,8 +113,17 @@ class ClusterTreePlotter:
|
|
|
112
113
|
clustering_settings
|
|
113
114
|
Clustering settings (prefix).
|
|
114
115
|
"""
|
|
116
|
+
if isinstance(adata, sd.SpatialData):
|
|
117
|
+
if "table" not in adata.tables:
|
|
118
|
+
raise ValueError("SpatialData must contain 'table'")
|
|
119
|
+
adata = adata.tables["table"]
|
|
120
|
+
|
|
121
|
+
if not isinstance(adata, AnnData):
|
|
122
|
+
raise TypeError("ClusterTreePlotter requires AnnData")
|
|
123
|
+
|
|
115
124
|
self.adata = adata
|
|
116
125
|
self.resolutions = resolutions
|
|
126
|
+
|
|
117
127
|
self.output_settings = self._merge_with_default(
|
|
118
128
|
output_settings, self.default_output_settings()
|
|
119
129
|
)
|
|
@@ -564,6 +574,10 @@ class ClusterTreePlotter:
|
|
|
564
574
|
data = settings["data"]
|
|
565
575
|
prefix = settings["prefix"]
|
|
566
576
|
|
|
577
|
+
# in_jupyter = 'ipykernel' in sys.modules
|
|
578
|
+
# if in_jupyter:
|
|
579
|
+
# print("🔄 Running latest code from editable install")
|
|
580
|
+
|
|
567
581
|
# Step 1: Compute Cluster Sizes, Node Sizes, and Node Colors
|
|
568
582
|
cluster_sizes = self._compute_cluster_sizes(data, prefix, self.resolutions)
|
|
569
583
|
node_sizes = self._scale_node_sizes(
|
|
@@ -579,16 +593,18 @@ class ClusterTreePlotter:
|
|
|
579
593
|
node_colors = self._assign_node_colors(
|
|
580
594
|
data, prefix, self.resolutions, settings["node_color"], color_schemes
|
|
581
595
|
)
|
|
582
|
-
|
|
583
|
-
plt.ioff() # Disable interactive mode
|
|
584
|
-
|
|
585
596
|
# Step 2: Set up the plot figure and axis
|
|
597
|
+
plt.close('all')
|
|
598
|
+
plt.ioff()
|
|
599
|
+
|
|
586
600
|
self.fig = plt.figure(figsize=settings["figsize"], dpi=settings["dpi"])
|
|
587
601
|
self.ax = self.fig.add_subplot(111)
|
|
602
|
+
|
|
588
603
|
# Step 3: Compute Edge Weights, Edge Colors
|
|
589
604
|
edges, weights, edge_colors = self._compute_edge_weights_colors(
|
|
590
605
|
self.G, settings["edge_threshold"], settings["edge_color"], node_colors
|
|
591
606
|
)
|
|
607
|
+
|
|
592
608
|
# Step 4: Draw Nodes and Node Labels
|
|
593
609
|
node_styles = {"colors": node_colors, "sizes": node_sizes}
|
|
594
610
|
node_labels, gene_labels = self._draw_nodes_and_labels(
|
|
@@ -610,6 +626,7 @@ class ClusterTreePlotter:
|
|
|
610
626
|
font_size=int(settings["node_label_fontsize"]),
|
|
611
627
|
font_color="black",
|
|
612
628
|
)
|
|
629
|
+
|
|
613
630
|
# Step 5: Draw Gene Labels
|
|
614
631
|
gene_label_bottoms = {}
|
|
615
632
|
if settings["show_gene_labels"] and gene_labels:
|
|
@@ -622,6 +639,7 @@ class ClusterTreePlotter:
|
|
|
622
639
|
offset=settings["gene_label_offset"],
|
|
623
640
|
fontsize=settings["gene_label_fontsize"],
|
|
624
641
|
)
|
|
642
|
+
|
|
625
643
|
# Step 6: Build and Draw Edge Labels
|
|
626
644
|
edge_labels = self._build_edge_labels(
|
|
627
645
|
self.G, settings["edge_threshold"], settings["edge_label_threshold"]
|
|
@@ -642,6 +660,7 @@ class ClusterTreePlotter:
|
|
|
642
660
|
edge_labels=edge_labels,
|
|
643
661
|
edge_label_style=edge_label_style,
|
|
644
662
|
)
|
|
663
|
+
|
|
645
664
|
# Step 7: Draw Level Labels
|
|
646
665
|
self._draw_level_labels(
|
|
647
666
|
resolutions=self.resolutions,
|
|
@@ -651,15 +670,17 @@ class ClusterTreePlotter:
|
|
|
651
670
|
level_label_offset=settings["level_label_offset"],
|
|
652
671
|
level_label_fontsize=settings["level_label_fontsize"],
|
|
653
672
|
)
|
|
673
|
+
|
|
654
674
|
# Step 8: Final Plot Settings
|
|
655
675
|
self.ax.set_title(settings["title"], fontsize=settings["title_fontsize"])
|
|
656
676
|
self.ax.axis("off")
|
|
677
|
+
|
|
657
678
|
# Save or show the plot
|
|
658
679
|
if settings["output_path"]:
|
|
659
680
|
plt.savefig(settings["output_path"], bbox_inches="tight")
|
|
660
681
|
if settings["draw"]:
|
|
682
|
+
self.fig.canvas.draw()
|
|
661
683
|
plt.show()
|
|
662
|
-
|
|
663
684
|
plt.ion()
|
|
664
685
|
|
|
665
686
|
def _get_draw_settings(self) -> dict:
|
|
@@ -879,7 +900,7 @@ class ClusterTreePlotter:
|
|
|
879
900
|
pos,
|
|
880
901
|
nodelist=[node],
|
|
881
902
|
node_size=size,
|
|
882
|
-
node_color=color,
|
|
903
|
+
node_color=[color],
|
|
883
904
|
edgecolors="none",
|
|
884
905
|
)
|
|
885
906
|
node_labels[node] = str(cluster)
|
|
@@ -1237,6 +1258,14 @@ class ClusterTreePlotter:
|
|
|
1237
1258
|
Directed graph representing the hierarchical clustering.
|
|
1238
1259
|
|
|
1239
1260
|
"""
|
|
1261
|
+
if isinstance(adata, sd.SpatialData):
|
|
1262
|
+
if "table" not in adata.tables:
|
|
1263
|
+
raise ValueError("SpatialData must contain 'table'")
|
|
1264
|
+
adata = adata.tables["table"]
|
|
1265
|
+
|
|
1266
|
+
if not isinstance(adata, AnnData):
|
|
1267
|
+
raise TypeError("Expected AnnData after extraction")
|
|
1268
|
+
|
|
1240
1269
|
# Run all validations
|
|
1241
1270
|
ClusterTreePlotter._validate_parameters(output_settings, node_style, edge_style)
|
|
1242
1271
|
ClusterTreePlotter._validate_clustering_data(
|
|
@@ -5,6 +5,7 @@ from collections.abc import Sequence
|
|
|
5
5
|
from typing import Literal
|
|
6
6
|
from anndata import AnnData
|
|
7
7
|
from scanpy.tools._rank_genes_groups import rank_genes_groups
|
|
8
|
+
import spatialdata as sd
|
|
8
9
|
|
|
9
10
|
def find_cluster_specific_genes(
|
|
10
11
|
adata: AnnData,
|
|
@@ -181,8 +182,61 @@ def find_per_resolution_degs(
|
|
|
181
182
|
return top_genes_dict
|
|
182
183
|
|
|
183
184
|
|
|
185
|
+
""" def _extract_adata(obj):
|
|
186
|
+
import spatialdata as sd
|
|
187
|
+
from anndata import AnnData
|
|
188
|
+
|
|
189
|
+
if isinstance(obj, AnnData):
|
|
190
|
+
return obj
|
|
191
|
+
|
|
192
|
+
if isinstance(obj, sd.SpatialData):
|
|
193
|
+
if "table" not in obj.tables:
|
|
194
|
+
raise ValueError("SpatialData object has no 'table' in .tables")
|
|
195
|
+
return obj.tables["table"]
|
|
196
|
+
|
|
197
|
+
raise TypeError(
|
|
198
|
+
"Input must be AnnData or SpatialData (with .tables['table'])"
|
|
199
|
+
)
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def extract_adata(obj) -> AnnData:
|
|
204
|
+
"""
|
|
205
|
+
Accepts either AnnData or SpatialData and returns AnnData.
|
|
206
|
+
"""
|
|
207
|
+
if isinstance(obj, AnnData):
|
|
208
|
+
return obj
|
|
209
|
+
|
|
210
|
+
if isinstance(obj, sd.SpatialData):
|
|
211
|
+
if len(obj.tables) == 0:
|
|
212
|
+
raise ValueError("SpatialData has no tables")
|
|
213
|
+
|
|
214
|
+
# default convention: first table is main expression matrix
|
|
215
|
+
return obj.tables.get("table", next(iter(obj.tables.values())))
|
|
216
|
+
|
|
217
|
+
raise TypeError("Input must be AnnData or SpatialData")
|
|
218
|
+
|
|
219
|
+
def write_results(obj, adata: AnnData, results: dict, prefix: str = None):
|
|
220
|
+
"""
|
|
221
|
+
Write results back to either AnnData or SpatialData.
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
if isinstance(obj, AnnData):
|
|
225
|
+
obj.uns.update(results)
|
|
226
|
+
return obj
|
|
227
|
+
|
|
228
|
+
if isinstance(obj, sd.SpatialData):
|
|
229
|
+
table = obj.tables.get("table", None)
|
|
230
|
+
if table is None:
|
|
231
|
+
raise ValueError("SpatialData has no 'table'")
|
|
232
|
+
|
|
233
|
+
table.uns.update(results)
|
|
234
|
+
return obj
|
|
235
|
+
|
|
236
|
+
raise TypeError("Unsupported object type")
|
|
237
|
+
|
|
184
238
|
def cluster_resolution_finder(
|
|
185
|
-
|
|
239
|
+
data,
|
|
186
240
|
resolutions: list[float],
|
|
187
241
|
*,
|
|
188
242
|
prefix: str = "leiden_res_",
|
|
@@ -255,71 +309,84 @@ def cluster_resolution_finder(
|
|
|
255
309
|
import io
|
|
256
310
|
|
|
257
311
|
from scanpy.tools import leiden
|
|
312
|
+
import warnings
|
|
313
|
+
from anndata._core.views import ImplicitModificationWarning
|
|
314
|
+
|
|
315
|
+
adata = extract_adata(data)
|
|
316
|
+
|
|
317
|
+
with warnings.catch_warnings():
|
|
318
|
+
warnings.simplefilter("ignore", category=ImplicitModificationWarning)
|
|
258
319
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
320
|
+
# Suppress prints if pytest is running
|
|
321
|
+
if "pytest" in sys.modules:
|
|
322
|
+
sys.stdout = io.StringIO()
|
|
262
323
|
|
|
263
|
-
|
|
324
|
+
_validate_cluster_resolution_inputs(adata, resolutions, method, flavor)
|
|
264
325
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
326
|
+
# Run Leiden clustering
|
|
327
|
+
for resolution in resolutions:
|
|
328
|
+
res_key = f"{prefix}{resolution}"
|
|
329
|
+
try:
|
|
330
|
+
leiden(
|
|
331
|
+
adata,
|
|
332
|
+
resolution=resolution,
|
|
333
|
+
flavor="igraph",
|
|
334
|
+
n_iterations=n_iterations,
|
|
335
|
+
key_added=res_key,
|
|
336
|
+
)
|
|
337
|
+
if "pytest" not in sys.modules and not hasattr(
|
|
338
|
+
sys, "_called_from_test"
|
|
339
|
+
): # Suppress print in tests
|
|
340
|
+
print(f"Completed Leiden clustering for resolution {resolution}")
|
|
341
|
+
except ValueError as e:
|
|
342
|
+
msg = f"Leiden clustering failed at resolution {resolution} due to invalid value: {e}"
|
|
343
|
+
raise RuntimeError(msg) from None
|
|
344
|
+
except TypeError as e:
|
|
345
|
+
msg = f"Leiden clustering failed at resolution {resolution} due to incorrect type: {e}"
|
|
346
|
+
raise RuntimeError(msg) from None
|
|
347
|
+
except RuntimeError as e:
|
|
348
|
+
msg = f"Leiden clustering failed at resolution {resolution}: {e}"
|
|
349
|
+
raise RuntimeError(msg) from None
|
|
350
|
+
|
|
351
|
+
# Find cluster-specific genes
|
|
352
|
+
top_genes_dict = find_cluster_specific_genes(
|
|
353
|
+
adata=adata,
|
|
354
|
+
resolutions=resolutions,
|
|
355
|
+
prefix=prefix,
|
|
356
|
+
method=method,
|
|
357
|
+
n_top_genes=n_top_genes,
|
|
358
|
+
min_cells=min_cells,
|
|
359
|
+
deg_mode=deg_mode,
|
|
360
|
+
verbose=verbose,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Create DataFrame for clusterDecisionTree
|
|
268
364
|
try:
|
|
269
|
-
|
|
270
|
-
adata
|
|
271
|
-
resolution=resolution,
|
|
272
|
-
flavor="igraph",
|
|
273
|
-
n_iterations=n_iterations,
|
|
274
|
-
key_added=res_key,
|
|
365
|
+
cluster_data = pd.DataFrame(
|
|
366
|
+
{f"{prefix}{r}": adata.obs[f"{prefix}{r}"] for r in resolutions}
|
|
275
367
|
)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
)
|
|
279
|
-
print(f"Completed Leiden clustering for resolution {resolution}")
|
|
368
|
+
except KeyError as e:
|
|
369
|
+
msg = f"Failed to create cluster_data DataFrame: missing column {e}"
|
|
370
|
+
raise RuntimeError(msg) from None
|
|
280
371
|
except ValueError as e:
|
|
281
|
-
msg = f"
|
|
372
|
+
msg = f"Failed to create cluster_data DataFrame due to invalid value: {e}"
|
|
282
373
|
raise RuntimeError(msg) from None
|
|
283
374
|
except TypeError as e:
|
|
284
|
-
msg = f"
|
|
375
|
+
msg = f"Failed to create cluster_data DataFrame due to incorrect type: {e}"
|
|
285
376
|
raise RuntimeError(msg) from None
|
|
286
|
-
except RuntimeError as e:
|
|
287
|
-
msg = f"Leiden clustering failed at resolution {resolution}: {e}"
|
|
288
|
-
raise RuntimeError(msg) from None
|
|
289
|
-
|
|
290
|
-
# Find cluster-specific genes
|
|
291
|
-
top_genes_dict = find_cluster_specific_genes(
|
|
292
|
-
adata=adata,
|
|
293
|
-
resolutions=resolutions,
|
|
294
|
-
prefix=prefix,
|
|
295
|
-
method=method,
|
|
296
|
-
n_top_genes=n_top_genes,
|
|
297
|
-
min_cells=min_cells,
|
|
298
|
-
deg_mode=deg_mode,
|
|
299
|
-
verbose=verbose,
|
|
300
|
-
)
|
|
301
377
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
msg = f"Failed to create cluster_data DataFrame due to incorrect type: {e}"
|
|
315
|
-
raise RuntimeError(msg) from None
|
|
316
|
-
|
|
317
|
-
# Store the results in adata.uns
|
|
318
|
-
adata.uns["cluster_resolution_top_genes"] = top_genes_dict
|
|
319
|
-
# adata.uns["cluster_resolution_top_genes"] = _convert_tuple_keys(top_genes_dict)
|
|
320
|
-
adata.uns["cluster_resolution_cluster_data"] = cluster_data
|
|
321
|
-
|
|
322
|
-
# return adata
|
|
378
|
+
# Store the results in adata.uns
|
|
379
|
+
# adata.uns["cluster_resolution_top_genes"] = top_genes_dict
|
|
380
|
+
# adata.uns["cluster_resolution_cluster_data"] = cluster_data
|
|
381
|
+
# return adata
|
|
382
|
+
|
|
383
|
+
results = {
|
|
384
|
+
"cluster_resolution_top_genes": top_genes_dict,
|
|
385
|
+
"cluster_resolution_cluster_data": cluster_data,
|
|
386
|
+
}
|
|
387
|
+
data = write_results(data, adata, results)
|
|
388
|
+
return data
|
|
389
|
+
|
|
323
390
|
|
|
324
391
|
def _validate_cluster_resolution_inputs(
|
|
325
392
|
adata: AnnData,
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: resolutiontree
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2
|
|
4
4
|
Summary: Systematic exploration of clustering resolutions in single-cell analysis
|
|
5
5
|
Author-email: Joe Hou <joseph.houjue@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/joe-jhou2/resolutiontree
|
|
8
|
-
Project-URL: Documentation, https://resolutiontree.readthedocs.io/
|
|
9
8
|
Project-URL: Repository, https://github.com/joe-jhou2/resolutiontree
|
|
10
9
|
Project-URL: Bug Tracker, https://github.com/joe-jhou2/resolutiontree/issues
|
|
11
10
|
Keywords: single-cell,clustering,resolution,scanpy,leiden,visualization
|
|
@@ -14,12 +13,25 @@ Classifier: Intended Audience :: Science/Research
|
|
|
14
13
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
14
|
Classifier: License :: OSI Approved :: MIT License
|
|
16
15
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.9
|
|
19
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
20
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
-
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Requires-Python: >=3.9
|
|
22
21
|
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: numpy>=1.26
|
|
23
|
+
Requires-Dist: pandas>=2.1
|
|
24
|
+
Requires-Dist: scipy>=1.11
|
|
25
|
+
Requires-Dist: matplotlib>=3.8
|
|
26
|
+
Requires-Dist: seaborn>=0.13
|
|
27
|
+
Requires-Dist: scanpy>=1.9.8
|
|
28
|
+
Requires-Dist: anndata>=0.10
|
|
29
|
+
Requires-Dist: igraph>=0.11
|
|
30
|
+
Requires-Dist: networkx>=3.2
|
|
31
|
+
Requires-Dist: leidenalg>=0.10
|
|
32
|
+
Requires-Dist: umap-learn>=0.5.5
|
|
33
|
+
Requires-Dist: spatialdata
|
|
34
|
+
Requires-Dist: spatialdata[extra]
|
|
23
35
|
|
|
24
36
|
# ResolutionTree
|
|
25
37
|
|
|
@@ -56,7 +68,7 @@ ResolutionTree provides:
|
|
|
56
68
|
### From GitHub Release (Recommended)
|
|
57
69
|
|
|
58
70
|
```bash
|
|
59
|
-
pip install resolutiontree
|
|
71
|
+
pip install resolutiontree==0.2
|
|
60
72
|
```
|
|
61
73
|
|
|
62
74
|
### From Source
|
|
@@ -1,11 +1,18 @@
|
|
|
1
1
|
README.md
|
|
2
2
|
pyproject.toml
|
|
3
|
+
src/resolutiontree/__init__.py
|
|
3
4
|
src/resolutiontree/core.py
|
|
4
5
|
src/resolutiontree/utils.py
|
|
5
6
|
src/resolutiontree.egg-info/PKG-INFO
|
|
6
7
|
src/resolutiontree.egg-info/SOURCES.txt
|
|
7
8
|
src/resolutiontree.egg-info/dependency_links.txt
|
|
9
|
+
src/resolutiontree.egg-info/requires.txt
|
|
8
10
|
src/resolutiontree.egg-info/top_level.txt
|
|
9
11
|
tests/test_cluster_resolution.py
|
|
10
12
|
tests/test_cluster_tree.py
|
|
11
|
-
tests/
|
|
13
|
+
tests/test_install_illumina.py
|
|
14
|
+
tests/test_install_pbmc3k.py
|
|
15
|
+
tests/test_install_visiumhd.py
|
|
16
|
+
tests/test_install_xenium.py
|
|
17
|
+
tests/test_pipeline_pbmc3k.py
|
|
18
|
+
tests/test_pipeline_xenium.py
|
|
@@ -9,7 +9,7 @@ import re
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
import pytest
|
|
11
11
|
import scanpy as sc
|
|
12
|
-
from src import cluster_resolution_finder
|
|
12
|
+
from src.resolutiontree import cluster_resolution_finder
|
|
13
13
|
from testing.scanpy._helpers.data import pbmc68k_reduced
|
|
14
14
|
|
|
15
15
|
|
|
@@ -28,7 +28,7 @@ def test_cluster_resolution_finder_basic(adata_for_test):
|
|
|
28
28
|
"""Test that cluster_resolution_finder runs without errors and modifies adata."""
|
|
29
29
|
adata = adata_for_test.copy() # Create a copy to avoid modifying the fixture
|
|
30
30
|
resolutions = [0.1, 0.5]
|
|
31
|
-
|
|
31
|
+
cluster_resolution_finder(
|
|
32
32
|
adata,
|
|
33
33
|
resolutions,
|
|
34
34
|
prefix="leiden_res_",
|
|
@@ -42,24 +42,23 @@ def test_cluster_resolution_finder_basic(adata_for_test):
|
|
|
42
42
|
|
|
43
43
|
# Check that clustering columns were added to adata.obs
|
|
44
44
|
for res in resolutions:
|
|
45
|
-
assert f"leiden_res_{res}" in
|
|
45
|
+
assert f"leiden_res_{res}" in adata.obs
|
|
46
46
|
|
|
47
47
|
# Check that top_genes_dict was added to adata.uns
|
|
48
|
-
assert "cluster_resolution_top_genes" in
|
|
49
|
-
top_genes_dict =
|
|
48
|
+
assert "cluster_resolution_top_genes" in adata.uns
|
|
49
|
+
top_genes_dict = adata.uns["cluster_resolution_top_genes"]
|
|
50
50
|
assert isinstance(top_genes_dict, dict)
|
|
51
51
|
assert len(top_genes_dict) > 0
|
|
52
52
|
|
|
53
|
-
for
|
|
54
|
-
parent, child = k.split("_", 1) # Split the combined key back into tuple
|
|
53
|
+
for (parent, child), genes in top_genes_dict.items():
|
|
55
54
|
assert isinstance(parent, str)
|
|
56
55
|
assert isinstance(child, str)
|
|
57
56
|
assert isinstance(genes, list)
|
|
58
57
|
assert all(isinstance(g, str) for g in genes)
|
|
59
58
|
|
|
60
59
|
# Check that cluster_data was added to adata.uns
|
|
61
|
-
assert "cluster_resolution_cluster_data" in
|
|
62
|
-
cluster_data =
|
|
60
|
+
assert "cluster_resolution_cluster_data" in adata.uns
|
|
61
|
+
cluster_data = adata.uns["cluster_resolution_cluster_data"]
|
|
63
62
|
assert isinstance(cluster_data, pd.DataFrame)
|
|
64
63
|
for res in resolutions:
|
|
65
64
|
assert f"leiden_res_{res}" in cluster_data.columns
|
|
@@ -145,13 +144,13 @@ def test_cluster_resolution_finder_n_top_genes(adata_for_test, n_top_genes):
|
|
|
145
144
|
"""Test that n_top_genes bounds the number of genes stored in adata.uns."""
|
|
146
145
|
adata = adata_for_test.copy()
|
|
147
146
|
resolutions = [0.1, 0.5]
|
|
148
|
-
|
|
147
|
+
cluster_resolution_finder(
|
|
149
148
|
adata,
|
|
150
149
|
resolutions,
|
|
151
150
|
n_top_genes=n_top_genes,
|
|
152
151
|
)
|
|
153
152
|
|
|
154
153
|
# Check the number of genes in adata.uns["cluster_resolution_top_genes"]
|
|
155
|
-
top_genes_dict =
|
|
154
|
+
top_genes_dict = adata.uns["cluster_resolution_top_genes"]
|
|
156
155
|
for genes in top_genes_dict.values():
|
|
157
156
|
assert len(genes) <= n_top_genes
|
|
@@ -7,7 +7,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
|
|
7
7
|
import networkx as nx
|
|
8
8
|
import pytest
|
|
9
9
|
from scanpy.tools import leiden
|
|
10
|
-
from src import cluster_decision_tree, cluster_resolution_finder
|
|
10
|
+
from src.resolutiontree import cluster_decision_tree, cluster_resolution_finder
|
|
11
11
|
import scanpy as sc
|
|
12
12
|
from testing.scanpy._helpers.data import pbmc68k_reduced
|
|
13
13
|
|
|
@@ -26,7 +26,7 @@ def adata_with_clusters(adata_for_test):
|
|
|
26
26
|
"""Fixture providing clustering data and top_genes_dict for cluster_decision_tree."""
|
|
27
27
|
adata = adata_for_test.copy()
|
|
28
28
|
resolutions = [0.0, 0.2, 0.5, 1.0, 1.5, 2.0]
|
|
29
|
-
|
|
29
|
+
cluster_resolution_finder(
|
|
30
30
|
adata,
|
|
31
31
|
resolutions,
|
|
32
32
|
prefix="leiden_res_",
|
|
File without changes
|
resolutiontree-0.1.1/tests/test_installation.py → resolutiontree-0.2/tests/test_install_pbmc3k.py
RENAMED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import scanpy as sc
|
|
2
|
-
import
|
|
2
|
+
import resolutiontree as rt
|
|
3
3
|
|
|
4
4
|
print("Loading PBMC dataset...")
|
|
5
5
|
adata = sc.datasets.pbmc3k()
|
|
6
6
|
|
|
7
|
-
|
|
8
7
|
print("Running preprocessing...")
|
|
9
8
|
sc.pp.normalize_total(adata, inplace=True)
|
|
10
9
|
sc.pp.log1p(adata)
|
|
@@ -13,20 +12,18 @@ sc.pp.neighbors(adata)
|
|
|
13
12
|
sc.tl.umap(adata)
|
|
14
13
|
|
|
15
14
|
print("Testing cluster_resolution_finder...")
|
|
16
|
-
resolutions = [0.0, 0.2, 0.5, 1.0]
|
|
17
15
|
resolutions = [0.0, 0.2, 0.5, 1.0, 1.5, 2.0]
|
|
18
16
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
)
|
|
17
|
+
rt.cluster_resolution_finder(adata,
|
|
18
|
+
resolutions=resolutions,
|
|
19
|
+
n_top_genes=3,
|
|
20
|
+
min_cells=2,
|
|
21
|
+
deg_mode="within_parent"
|
|
22
|
+
)
|
|
26
23
|
|
|
27
|
-
rt.cluster_decision_tree(
|
|
24
|
+
rt.cluster_decision_tree(adata, resolutions=resolutions,
|
|
28
25
|
output_settings = {
|
|
29
|
-
"output_path": "
|
|
26
|
+
"output_path": "tests/results/test_pypi_pbmc3k.png",
|
|
30
27
|
"draw": False,
|
|
31
28
|
"figsize": (12, 6),
|
|
32
29
|
"dpi": 300
|
|
File without changes
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import pickle as pkl
|
|
2
|
+
import spatialdata as sd
|
|
3
|
+
import resolutiontree as rt
|
|
4
|
+
|
|
5
|
+
sdata = sd.read_zarr("data/CF_8wph_processed.zarr")
|
|
6
|
+
|
|
7
|
+
resolutions = [0.0, 0.5, 1.0, 1.5, 2.0, 2.5]
|
|
8
|
+
|
|
9
|
+
rt.cluster_resolution_finder(sdata['table'],
|
|
10
|
+
resolutions=resolutions,
|
|
11
|
+
n_top_genes=3,
|
|
12
|
+
min_cells=2,
|
|
13
|
+
deg_mode="within_parent")
|
|
14
|
+
|
|
15
|
+
rt.cluster_decision_tree(sdata['table'], resolutions=resolutions,
|
|
16
|
+
output_settings = {
|
|
17
|
+
"output_path": "tests/results/test_xenium.png",
|
|
18
|
+
"draw": False,
|
|
19
|
+
"figsize": (12, 6),
|
|
20
|
+
"dpi": 300
|
|
21
|
+
},
|
|
22
|
+
node_style = {
|
|
23
|
+
"node_size": 500,
|
|
24
|
+
"node_colormap": None,
|
|
25
|
+
"node_label_fontsize": 12
|
|
26
|
+
},
|
|
27
|
+
edge_style = {
|
|
28
|
+
"edge_color": "parent",
|
|
29
|
+
"edge_curvature": 0.01,
|
|
30
|
+
"edge_threshold": 0.01,
|
|
31
|
+
"show_weight": True,
|
|
32
|
+
"edge_label_threshold": 0.05,
|
|
33
|
+
"edge_label_position": 0.8,
|
|
34
|
+
"edge_label_fontsize": 8
|
|
35
|
+
},
|
|
36
|
+
gene_label_settings = {
|
|
37
|
+
"show_gene_labels": True,
|
|
38
|
+
"n_top_genes": 2,
|
|
39
|
+
"gene_label_threshold": 0.001,
|
|
40
|
+
"gene_label_style": {"offset":0.5, "fontsize":8},
|
|
41
|
+
},
|
|
42
|
+
level_label_style = {
|
|
43
|
+
"level_label_offset": 15,
|
|
44
|
+
"level_label_fontsize": 12
|
|
45
|
+
},
|
|
46
|
+
title_style = {
|
|
47
|
+
"title": "Hierarchical Leiden Clustering",
|
|
48
|
+
"title_fontsize": 20
|
|
49
|
+
},
|
|
50
|
+
layout_settings = {
|
|
51
|
+
"node_spacing": 5.0,
|
|
52
|
+
"level_spacing": 1.5
|
|
53
|
+
},
|
|
54
|
+
clustering_settings = {
|
|
55
|
+
"prefix": "leiden_res_",
|
|
56
|
+
"edge_threshold": 0.05
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
|
4
|
+
|
|
5
|
+
import scanpy as sc
|
|
6
|
+
from src.resolutiontree import cluster_resolution_finder, cluster_decision_tree
|
|
7
|
+
|
|
8
|
+
# Load the dataset
|
|
9
|
+
adata = sc.datasets.pbmc3k()
|
|
10
|
+
|
|
11
|
+
# Perform standard preprocessing
|
|
12
|
+
sc.pp.normalize_total(adata)
|
|
13
|
+
sc.pp.log1p(adata)
|
|
14
|
+
sc.pp.pca(adata)
|
|
15
|
+
sc.pp.neighbors(adata)
|
|
16
|
+
sc.tl.umap(adata)
|
|
17
|
+
|
|
18
|
+
resolutions = [0.0, 0.2, 0.5, 1.0, 1.5, 2.0]
|
|
19
|
+
|
|
20
|
+
# Perform hierarchical clustering with different resolutions
|
|
21
|
+
cluster_resolution_finder(adata,
|
|
22
|
+
resolutions=resolutions,
|
|
23
|
+
n_top_genes=3,
|
|
24
|
+
min_cells=2,
|
|
25
|
+
deg_mode="within_parent"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
cluster_decision_tree(adata, resolutions=resolutions,
|
|
29
|
+
output_settings = {
|
|
30
|
+
"output_path": "tests/results/test_pipeline_pbmc3k.png",
|
|
31
|
+
"draw": False,
|
|
32
|
+
"figsize": (12, 8),
|
|
33
|
+
"dpi": 300
|
|
34
|
+
},
|
|
35
|
+
node_style = {
|
|
36
|
+
"node_size": 500,
|
|
37
|
+
"node_colormap": None,
|
|
38
|
+
"node_label_fontsize": 12
|
|
39
|
+
},
|
|
40
|
+
edge_style = {
|
|
41
|
+
"edge_color": "parent",
|
|
42
|
+
"edge_curvature": 0.01,
|
|
43
|
+
"edge_threshold": 0.01,
|
|
44
|
+
"show_weight": True,
|
|
45
|
+
"edge_label_threshold": 0.05,
|
|
46
|
+
"edge_label_position": 0.8,
|
|
47
|
+
"edge_label_fontsize": 8
|
|
48
|
+
},
|
|
49
|
+
gene_label_settings = {
|
|
50
|
+
"show_gene_labels": True,
|
|
51
|
+
"n_top_genes": 2,
|
|
52
|
+
"gene_label_threshold": 0.001,
|
|
53
|
+
"gene_label_style": {"offset":0.5, "fontsize":8},
|
|
54
|
+
},
|
|
55
|
+
level_label_style = {
|
|
56
|
+
"level_label_offset": 15,
|
|
57
|
+
"level_label_fontsize": 12
|
|
58
|
+
},
|
|
59
|
+
title_style = {
|
|
60
|
+
"title": "Hierarchical Leiden Clustering",
|
|
61
|
+
"title_fontsize": 20
|
|
62
|
+
},
|
|
63
|
+
layout_settings = {
|
|
64
|
+
"node_spacing": 5.0,
|
|
65
|
+
"level_spacing": 1.5
|
|
66
|
+
},
|
|
67
|
+
clustering_settings = {
|
|
68
|
+
"prefix": "leiden_res_",
|
|
69
|
+
"edge_threshold": 0.05
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# print(adata.uns["cluster_resolution_top_genes"])
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import spatialdata as sd
|
|
4
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
|
5
|
+
|
|
6
|
+
import scanpy as sc
|
|
7
|
+
from src.resolutiontree import cluster_resolution_finder, cluster_decision_tree
|
|
8
|
+
|
|
9
|
+
# Load the dataset
|
|
10
|
+
adata = sd.read_zarr("data/CF_8wph_processed.zarr")
|
|
11
|
+
print(adata)
|
|
12
|
+
|
|
13
|
+
# Perform standard preprocessing
|
|
14
|
+
resolutions = [0.0, 0.2, 0.5, 1.0, 1.5, 2.0]
|
|
15
|
+
|
|
16
|
+
# Perform hierarchical clustering with different resolutions
|
|
17
|
+
cluster_resolution_finder(adata,
|
|
18
|
+
resolutions=resolutions,
|
|
19
|
+
n_top_genes=3,
|
|
20
|
+
min_cells=2,
|
|
21
|
+
deg_mode="within_parent"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
cluster_decision_tree(adata, resolutions=resolutions,
|
|
25
|
+
output_settings = {
|
|
26
|
+
"output_path": "tests/results/test_pipeline_xenium.png",
|
|
27
|
+
"draw": False,
|
|
28
|
+
"figsize": (12, 8),
|
|
29
|
+
"dpi": 300
|
|
30
|
+
},
|
|
31
|
+
node_style = {
|
|
32
|
+
"node_size": 500,
|
|
33
|
+
"node_colormap": None,
|
|
34
|
+
"node_label_fontsize": 12
|
|
35
|
+
},
|
|
36
|
+
edge_style = {
|
|
37
|
+
"edge_color": "parent",
|
|
38
|
+
"edge_curvature": 0.01,
|
|
39
|
+
"edge_threshold": 0.01,
|
|
40
|
+
"show_weight": True,
|
|
41
|
+
"edge_label_threshold": 0.05,
|
|
42
|
+
"edge_label_position": 0.8,
|
|
43
|
+
"edge_label_fontsize": 8
|
|
44
|
+
},
|
|
45
|
+
gene_label_settings = {
|
|
46
|
+
"show_gene_labels": True,
|
|
47
|
+
"n_top_genes": 2,
|
|
48
|
+
"gene_label_threshold": 0.001,
|
|
49
|
+
"gene_label_style": {"offset":0.5, "fontsize":8},
|
|
50
|
+
},
|
|
51
|
+
level_label_style = {
|
|
52
|
+
"level_label_offset": 15,
|
|
53
|
+
"level_label_fontsize": 12
|
|
54
|
+
},
|
|
55
|
+
title_style = {
|
|
56
|
+
"title": "Hierarchical Leiden Clustering",
|
|
57
|
+
"title_fontsize": 20
|
|
58
|
+
},
|
|
59
|
+
layout_settings = {
|
|
60
|
+
"node_spacing": 5.0,
|
|
61
|
+
"level_spacing": 1.5
|
|
62
|
+
},
|
|
63
|
+
clustering_settings = {
|
|
64
|
+
"prefix": "leiden_res_",
|
|
65
|
+
"edge_threshold": 0.05
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# print(adata['table'].uns["cluster_resolution_top_genes"])
|
|
File without changes
|
{resolutiontree-0.1.1 → resolutiontree-0.2}/src/resolutiontree.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|