sc-graft 0.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sc_graft-0.0.0/PKG-INFO +39 -0
- sc_graft-0.0.0/README.md +18 -0
- sc_graft-0.0.0/pyproject.toml +33 -0
- sc_graft-0.0.0/setup.cfg +4 -0
- sc_graft-0.0.0/src/sc_graft/__init__.py +30 -0
- sc_graft-0.0.0/src/sc_graft/priors.py +124 -0
- sc_graft-0.0.0/src/sc_graft.egg-info/PKG-INFO +39 -0
- sc_graft-0.0.0/src/sc_graft.egg-info/SOURCES.txt +9 -0
- sc_graft-0.0.0/src/sc_graft.egg-info/dependency_links.txt +1 -0
- sc_graft-0.0.0/src/sc_graft.egg-info/requires.txt +3 -0
- sc_graft-0.0.0/src/sc_graft.egg-info/top_level.txt +1 -0
sc_graft-0.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sc-graft
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: single-cell Graph of Receptors, pAthways, Factors and Targets — external-prior loaders (early placeholder; core model not yet released)
|
|
5
|
+
Author-email: Ohbin Kwon <ohbin.kwon01@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ObKwon115/sc-graft
|
|
8
|
+
Project-URL: Repository, https://github.com/ObKwon115/sc-graft
|
|
9
|
+
Project-URL: Issues, https://github.com/ObKwon115/sc-graft/issues
|
|
10
|
+
Keywords: single-cell,scRNA-seq,graph-neural-network,cell-signaling,bioinformatics
|
|
11
|
+
Classifier: Development Status :: 1 - Planning
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Provides-Extra: scenic
|
|
20
|
+
Requires-Dist: pandas>=1.5; extra == "scenic"
|
|
21
|
+
|
|
22
|
+
# sc-graft
|
|
23
|
+
|
|
24
|
+
**Early placeholder release.** `sc-graft` — a single-cell **G**raph of
|
|
25
|
+
**R**eceptors, p**A**thways, **F**actors and **T**argets — is a
|
|
26
|
+
biologically-grounded heterogeneous graph transformer for inferring how receptor
|
|
27
|
+
signaling propagates to target genes in single cells.
|
|
28
|
+
|
|
29
|
+
This pre-release ships only a small slice of the **preprocessing** layer: the
|
|
30
|
+
**external-prior loaders** (STRING physical PPI, MSigDB gene sets, SCENIC
|
|
31
|
+
regulons). The network-construction logic and the model itself are under active
|
|
32
|
+
development and are **not included yet**.
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from sc_graft import load_string_interactions, read_gmt, gmt_path, load_regulons
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
- Source / docs: https://github.com/ObKwon115/sc-graft
|
|
39
|
+
- License: MIT
|
sc_graft-0.0.0/README.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# sc-graft
|
|
2
|
+
|
|
3
|
+
**Early placeholder release.** `sc-graft` — a single-cell **G**raph of
|
|
4
|
+
**R**eceptors, p**A**thways, **F**actors and **T**argets — is a
|
|
5
|
+
biologically-grounded heterogeneous graph transformer for inferring how receptor
|
|
6
|
+
signaling propagates to target genes in single cells.
|
|
7
|
+
|
|
8
|
+
This pre-release ships only a small slice of the **preprocessing** layer: the
|
|
9
|
+
**external-prior loaders** (STRING physical PPI, MSigDB gene sets, SCENIC
|
|
10
|
+
regulons). The network-construction logic and the model itself are under active
|
|
11
|
+
development and are **not included yet**.
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from sc_graft import load_string_interactions, read_gmt, gmt_path, load_regulons
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
- Source / docs: https://github.com/ObKwon115/sc-graft
|
|
18
|
+
- License: MIT
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sc-graft"
|
|
7
|
+
version = "0.0.0"
|
|
8
|
+
description = "single-cell Graph of Receptors, pAthways, Factors and Targets — external-prior loaders (early placeholder; core model not yet released)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Ohbin Kwon", email = "ohbin.kwon01@gmail.com" }]
|
|
13
|
+
keywords = ["single-cell", "scRNA-seq", "graph-neural-network", "cell-signaling", "bioinformatics"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 1 - Planning",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
21
|
+
]
|
|
22
|
+
dependencies = []
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
scenic = ["pandas>=1.5"]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/ObKwon115/sc-graft"
|
|
29
|
+
Repository = "https://github.com/ObKwon115/sc-graft"
|
|
30
|
+
Issues = "https://github.com/ObKwon115/sc-graft/issues"
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.packages.find]
|
|
33
|
+
where = ["src"]
|
sc_graft-0.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""sc-graft — single-cell Graph of Receptors, pAthways, Factors and Targets.
|
|
2
|
+
|
|
3
|
+
Early placeholder. Only the external-prior loaders are published so far; the
|
|
4
|
+
network-construction logic and the model are not yet released.
|
|
5
|
+
See https://github.com/ObKwon115/sc-graft.
|
|
6
|
+
"""
|
|
7
|
+
from .priors import (
|
|
8
|
+
TAXID,
|
|
9
|
+
MSIGDB_SUFFIX,
|
|
10
|
+
MSIGDB_STEM,
|
|
11
|
+
string_paths,
|
|
12
|
+
load_string_interactions,
|
|
13
|
+
gmt_path,
|
|
14
|
+
read_gmt,
|
|
15
|
+
load_regulons,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__version__ = "0.0.0"
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"TAXID",
|
|
22
|
+
"MSIGDB_SUFFIX",
|
|
23
|
+
"MSIGDB_STEM",
|
|
24
|
+
"string_paths",
|
|
25
|
+
"load_string_interactions",
|
|
26
|
+
"gmt_path",
|
|
27
|
+
"read_gmt",
|
|
28
|
+
"load_regulons",
|
|
29
|
+
"__version__",
|
|
30
|
+
]
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""External prior loaders (species-aware).
|
|
2
|
+
|
|
3
|
+
All edge priors come from public databases; this module resolves the right
|
|
4
|
+
files for mouse or human and loads them. Nothing here is dataset-specific.
|
|
5
|
+
|
|
6
|
+
Resources:
|
|
7
|
+
STRING physical PPI data/prior/STRING/<taxid>.protein.*.v12.0.txt.gz
|
|
8
|
+
MSigDB pathway gene sets data/prior/misgdb/*.<Mm|Hs>.symbols.gmt
|
|
9
|
+
SCENIC regulons (your run) <proc>/scenic/regulons.csv
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
import gzip
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, List, Set
|
|
16
|
+
|
|
17
|
+
TAXID = {"mouse": "10090", "human": "9606"}
|
|
18
|
+
MSIGDB_SUFFIX = {"mouse": "Mm", "human": "Hs"}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# --------------------------------------------------------------------------- #
|
|
22
|
+
# STRING physical PPI
|
|
23
|
+
# --------------------------------------------------------------------------- #
|
|
24
|
+
def string_paths(species: str, prior_root: str | Path) -> Dict[str, Path]:
|
|
25
|
+
"""Resolve the three STRING files for a species."""
|
|
26
|
+
tax = TAXID[species]
|
|
27
|
+
root = Path(prior_root) / "STRING"
|
|
28
|
+
return {
|
|
29
|
+
"info": root / f"{tax}.protein.info.v12.0.txt.gz",
|
|
30
|
+
"links": root / f"{tax}.protein.links.v12.0.txt.gz",
|
|
31
|
+
"physical": root / f"{tax}.protein.physical.links.v12.0.txt.gz",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_string_interactions(
|
|
36
|
+
species: str,
|
|
37
|
+
prior_root: str | Path,
|
|
38
|
+
*,
|
|
39
|
+
gene_universe: Set[str],
|
|
40
|
+
score_thr: int = 700,
|
|
41
|
+
physical: bool = True,
|
|
42
|
+
) -> Dict[str, Set[str]]:
|
|
43
|
+
"""Gene -> set of genes it interacts with (within ``gene_universe``).
|
|
44
|
+
|
|
45
|
+
``physical=True`` uses the physical-binding subnetwork (expression-
|
|
46
|
+
independent); ``False`` uses the combined score (includes coexpression).
|
|
47
|
+
"""
|
|
48
|
+
paths = string_paths(species, prior_root)
|
|
49
|
+
info, links = paths["info"], paths["physical"] if physical else paths["links"]
|
|
50
|
+
for p in (info, links):
|
|
51
|
+
if not p.exists():
|
|
52
|
+
raise FileNotFoundError(
|
|
53
|
+
f"STRING file missing: {p}\n download the STRING protein.info and "
|
|
54
|
+
f"protein.physical.links files for taxid {TAXID[species]} from "
|
|
55
|
+
f"https://string-db.org/cgi/download"
|
|
56
|
+
)
|
|
57
|
+
# protein id <-> gene symbol
|
|
58
|
+
name2prot: Dict[str, Set[str]] = defaultdict(set)
|
|
59
|
+
prot2name: Dict[str, str] = {}
|
|
60
|
+
with gzip.open(info, "rt") as f:
|
|
61
|
+
next(f)
|
|
62
|
+
for ln in f:
|
|
63
|
+
x = ln.split("\t")
|
|
64
|
+
prot2name[x[0]] = x[1]
|
|
65
|
+
name2prot[x[1]].add(x[0])
|
|
66
|
+
keep = {pr for g in gene_universe for pr in name2prot.get(g, ())}
|
|
67
|
+
inter: Dict[str, Set[str]] = defaultdict(set)
|
|
68
|
+
with gzip.open(links, "rt") as f:
|
|
69
|
+
next(f)
|
|
70
|
+
for ln in f:
|
|
71
|
+
p1, p2, s = ln.split()
|
|
72
|
+
if int(s) < score_thr:
|
|
73
|
+
continue
|
|
74
|
+
if p1 in keep and p2 in keep:
|
|
75
|
+
a, b = prot2name[p1], prot2name[p2]
|
|
76
|
+
inter[a].add(b)
|
|
77
|
+
inter[b].add(a)
|
|
78
|
+
return inter
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# --------------------------------------------------------------------------- #
|
|
82
|
+
# MSigDB gene sets (WikiPathways / Reactome / Hallmark)
|
|
83
|
+
# --------------------------------------------------------------------------- #
|
|
84
|
+
# MSigDB collection names differ by species: mouse uses m2/mh/m5, human c2/h/c5.
|
|
85
|
+
MSIGDB_STEM = {
|
|
86
|
+
"mouse": {"wikipathways": "m2.cp.wikipathways", "reactome": "m2.cp.reactome",
|
|
87
|
+
"hallmark": "mh.all", "gobp": "m5.go.bp"},
|
|
88
|
+
"human": {"wikipathways": "c2.cp.wikipathways", "reactome": "c2.cp.reactome",
|
|
89
|
+
"hallmark": "h.all", "gobp": "c5.go.bp"},
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def gmt_path(species: str, prior_root: str | Path, collection: str = "wikipathways",
|
|
94
|
+
version: str = "2026.1") -> Path:
|
|
95
|
+
"""e.g. mouse m2.cp.wikipathways.v2026.1.Mm.symbols.gmt
|
|
96
|
+
human c2.cp.wikipathways.v2026.1.Hs.symbols.gmt"""
|
|
97
|
+
sfx = MSIGDB_SUFFIX[species]
|
|
98
|
+
stem = MSIGDB_STEM[species][collection]
|
|
99
|
+
return Path(prior_root) / "misgdb" / f"{stem}.v{version}.{sfx}.symbols.gmt"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def read_gmt(path: str | Path) -> Dict[str, List[str]]:
|
|
103
|
+
path = Path(path)
|
|
104
|
+
if not path.exists():
|
|
105
|
+
raise FileNotFoundError(f"GMT missing: {path}")
|
|
106
|
+
out: Dict[str, List[str]] = {}
|
|
107
|
+
for ln in open(path):
|
|
108
|
+
x = ln.rstrip("\n").split("\t")
|
|
109
|
+
out[x[0]] = [g for g in x[2:] if g]
|
|
110
|
+
return out
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# --------------------------------------------------------------------------- #
|
|
114
|
+
# SCENIC regulons (from the user's own SCENIC run)
|
|
115
|
+
# --------------------------------------------------------------------------- #
|
|
116
|
+
def load_regulons(path: str | Path):
|
|
117
|
+
"""SCENIC regulons table with columns TF, target[, weight]."""
|
|
118
|
+
import pandas as pd
|
|
119
|
+
path = Path(path)
|
|
120
|
+
if not path.exists():
|
|
121
|
+
raise FileNotFoundError(
|
|
122
|
+
f"SCENIC regulons missing: {path}\n run pySCENIC on your counts first"
|
|
123
|
+
)
|
|
124
|
+
return pd.read_csv(path)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sc-graft
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: single-cell Graph of Receptors, pAthways, Factors and Targets — external-prior loaders (early placeholder; core model not yet released)
|
|
5
|
+
Author-email: Ohbin Kwon <ohbin.kwon01@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ObKwon115/sc-graft
|
|
8
|
+
Project-URL: Repository, https://github.com/ObKwon115/sc-graft
|
|
9
|
+
Project-URL: Issues, https://github.com/ObKwon115/sc-graft/issues
|
|
10
|
+
Keywords: single-cell,scRNA-seq,graph-neural-network,cell-signaling,bioinformatics
|
|
11
|
+
Classifier: Development Status :: 1 - Planning
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Provides-Extra: scenic
|
|
20
|
+
Requires-Dist: pandas>=1.5; extra == "scenic"
|
|
21
|
+
|
|
22
|
+
# sc-graft
|
|
23
|
+
|
|
24
|
+
**Early placeholder release.** `sc-graft` — a single-cell **G**raph of
|
|
25
|
+
**R**eceptors, p**A**thways, **F**actors and **T**argets — is a
|
|
26
|
+
biologically-grounded heterogeneous graph transformer for inferring how receptor
|
|
27
|
+
signaling propagates to target genes in single cells.
|
|
28
|
+
|
|
29
|
+
This pre-release ships only a small slice of the **preprocessing** layer: the
|
|
30
|
+
**external-prior loaders** (STRING physical PPI, MSigDB gene sets, SCENIC
|
|
31
|
+
regulons). The network-construction logic and the model itself are under active
|
|
32
|
+
development and are **not included yet**.
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from sc_graft import load_string_interactions, read_gmt, gmt_path, load_regulons
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
- Source / docs: https://github.com/ObKwon115/sc-graft
|
|
39
|
+
- License: MIT
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/sc_graft/__init__.py
|
|
4
|
+
src/sc_graft/priors.py
|
|
5
|
+
src/sc_graft.egg-info/PKG-INFO
|
|
6
|
+
src/sc_graft.egg-info/SOURCES.txt
|
|
7
|
+
src/sc_graft.egg-info/dependency_links.txt
|
|
8
|
+
src/sc_graft.egg-info/requires.txt
|
|
9
|
+
src/sc_graft.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sc_graft
|