sc-graft 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: sc-graft
3
+ Version: 0.0.0
4
+ Summary: single-cell Graph of Receptors, pAthways, Factors and Targets — external-prior loaders (early placeholder; core model not yet released)
5
+ Author-email: Ohbin Kwon <ohbin.kwon01@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ObKwon115/sc-graft
8
+ Project-URL: Repository, https://github.com/ObKwon115/sc-graft
9
+ Project-URL: Issues, https://github.com/ObKwon115/sc-graft/issues
10
+ Keywords: single-cell,scRNA-seq,graph-neural-network,cell-signaling,bioinformatics
11
+ Classifier: Development Status :: 1 - Planning
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Provides-Extra: scenic
20
+ Requires-Dist: pandas>=1.5; extra == "scenic"
21
+
22
+ # sc-graft
23
+
24
+ **Early placeholder release.** `sc-graft` — a single-cell **G**raph of
25
+ **R**eceptors, p**A**thways, **F**actors and **T**argets — is a
26
+ biologically-grounded heterogeneous graph transformer for inferring how receptor
27
+ signaling propagates to target genes in single cells.
28
+
29
+ This pre-release ships only a small slice of the **preprocessing** layer: the
30
+ **external-prior loaders** (STRING physical PPI, MSigDB gene sets, SCENIC
31
+ regulons). The network-construction logic and the model itself are under active
32
+ development and are **not included yet**.
33
+
34
+ ```python
35
+ from sc_graft import load_string_interactions, read_gmt, gmt_path, load_regulons
36
+ ```
37
+
38
+ - Source / docs: https://github.com/ObKwon115/sc-graft
39
+ - License: MIT
@@ -0,0 +1,18 @@
1
+ # sc-graft
2
+
3
+ **Early placeholder release.** `sc-graft` — a single-cell **G**raph of
4
+ **R**eceptors, p**A**thways, **F**actors and **T**argets — is a
5
+ biologically-grounded heterogeneous graph transformer for inferring how receptor
6
+ signaling propagates to target genes in single cells.
7
+
8
+ This pre-release ships only a small slice of the **preprocessing** layer: the
9
+ **external-prior loaders** (STRING physical PPI, MSigDB gene sets, SCENIC
10
+ regulons). The network-construction logic and the model itself are under active
11
+ development and are **not included yet**.
12
+
13
+ ```python
14
+ from sc_graft import load_string_interactions, read_gmt, gmt_path, load_regulons
15
+ ```
16
+
17
+ - Source / docs: https://github.com/ObKwon115/sc-graft
18
+ - License: MIT
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sc-graft"
7
+ version = "0.0.0"
8
+ description = "single-cell Graph of Receptors, pAthways, Factors and Targets — external-prior loaders (early placeholder; core model not yet released)"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Ohbin Kwon", email = "ohbin.kwon01@gmail.com" }]
13
+ keywords = ["single-cell", "scRNA-seq", "graph-neural-network", "cell-signaling", "bioinformatics"]
14
+ classifiers = [
15
+ "Development Status :: 1 - Planning",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
21
+ ]
22
+ dependencies = []
23
+
24
+ [project.optional-dependencies]
25
+ scenic = ["pandas>=1.5"]
26
+
27
+ [project.urls]
28
+ Homepage = "https://github.com/ObKwon115/sc-graft"
29
+ Repository = "https://github.com/ObKwon115/sc-graft"
30
+ Issues = "https://github.com/ObKwon115/sc-graft/issues"
31
+
32
+ [tool.setuptools.packages.find]
33
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,30 @@
1
+ """sc-graft — single-cell Graph of Receptors, pAthways, Factors and Targets.
2
+
3
+ Early placeholder. Only the external-prior loaders are published so far; the
4
+ network-construction logic and the model are not yet released.
5
+ See https://github.com/ObKwon115/sc-graft.
6
+ """
7
+ from .priors import (
8
+ TAXID,
9
+ MSIGDB_SUFFIX,
10
+ MSIGDB_STEM,
11
+ string_paths,
12
+ load_string_interactions,
13
+ gmt_path,
14
+ read_gmt,
15
+ load_regulons,
16
+ )
17
+
18
+ __version__ = "0.0.0"
19
+
20
+ __all__ = [
21
+ "TAXID",
22
+ "MSIGDB_SUFFIX",
23
+ "MSIGDB_STEM",
24
+ "string_paths",
25
+ "load_string_interactions",
26
+ "gmt_path",
27
+ "read_gmt",
28
+ "load_regulons",
29
+ "__version__",
30
+ ]
@@ -0,0 +1,124 @@
1
+ """External prior loaders (species-aware).
2
+
3
+ All edge priors come from public databases; this module resolves the right
4
+ files for mouse or human and loads them. Nothing here is dataset-specific.
5
+
6
+ Resources:
7
+ STRING physical PPI data/prior/STRING/<taxid>.protein.*.v12.0.txt.gz
8
+ MSigDB pathway gene sets data/prior/misgdb/*.<Mm|Hs>.symbols.gmt
9
+ SCENIC regulons (your run) <proc>/scenic/regulons.csv
10
+ """
11
+ from __future__ import annotations
12
+ import gzip
13
+ from collections import defaultdict
14
+ from pathlib import Path
15
+ from typing import Dict, List, Set
16
+
17
+ TAXID = {"mouse": "10090", "human": "9606"}
18
+ MSIGDB_SUFFIX = {"mouse": "Mm", "human": "Hs"}
19
+
20
+
21
+ # --------------------------------------------------------------------------- #
22
+ # STRING physical PPI
23
+ # --------------------------------------------------------------------------- #
24
+ def string_paths(species: str, prior_root: str | Path) -> Dict[str, Path]:
25
+ """Resolve the three STRING files for a species."""
26
+ tax = TAXID[species]
27
+ root = Path(prior_root) / "STRING"
28
+ return {
29
+ "info": root / f"{tax}.protein.info.v12.0.txt.gz",
30
+ "links": root / f"{tax}.protein.links.v12.0.txt.gz",
31
+ "physical": root / f"{tax}.protein.physical.links.v12.0.txt.gz",
32
+ }
33
+
34
+
35
+ def load_string_interactions(
36
+ species: str,
37
+ prior_root: str | Path,
38
+ *,
39
+ gene_universe: Set[str],
40
+ score_thr: int = 700,
41
+ physical: bool = True,
42
+ ) -> Dict[str, Set[str]]:
43
+ """Gene -> set of genes it interacts with (within ``gene_universe``).
44
+
45
+ ``physical=True`` uses the physical-binding subnetwork (expression-
46
+ independent); ``False`` uses the combined score (includes coexpression).
47
+ """
48
+ paths = string_paths(species, prior_root)
49
+ info, links = paths["info"], paths["physical"] if physical else paths["links"]
50
+ for p in (info, links):
51
+ if not p.exists():
52
+ raise FileNotFoundError(
53
+ f"STRING file missing: {p}\n download the STRING protein.info and "
54
+ f"protein.physical.links files for taxid {TAXID[species]} from "
55
+ f"https://string-db.org/cgi/download"
56
+ )
57
+ # protein id <-> gene symbol
58
+ name2prot: Dict[str, Set[str]] = defaultdict(set)
59
+ prot2name: Dict[str, str] = {}
60
+ with gzip.open(info, "rt") as f:
61
+ next(f)
62
+ for ln in f:
63
+ x = ln.split("\t")
64
+ prot2name[x[0]] = x[1]
65
+ name2prot[x[1]].add(x[0])
66
+ keep = {pr for g in gene_universe for pr in name2prot.get(g, ())}
67
+ inter: Dict[str, Set[str]] = defaultdict(set)
68
+ with gzip.open(links, "rt") as f:
69
+ next(f)
70
+ for ln in f:
71
+ p1, p2, s = ln.split()
72
+ if int(s) < score_thr:
73
+ continue
74
+ if p1 in keep and p2 in keep:
75
+ a, b = prot2name[p1], prot2name[p2]
76
+ inter[a].add(b)
77
+ inter[b].add(a)
78
+ return inter
79
+
80
+
81
+ # --------------------------------------------------------------------------- #
82
+ # MSigDB gene sets (WikiPathways / Reactome / Hallmark)
83
+ # --------------------------------------------------------------------------- #
84
+ # MSigDB collection names differ by species: mouse uses m2/mh/m5, human c2/h/c5.
85
+ MSIGDB_STEM = {
86
+ "mouse": {"wikipathways": "m2.cp.wikipathways", "reactome": "m2.cp.reactome",
87
+ "hallmark": "mh.all", "gobp": "m5.go.bp"},
88
+ "human": {"wikipathways": "c2.cp.wikipathways", "reactome": "c2.cp.reactome",
89
+ "hallmark": "h.all", "gobp": "c5.go.bp"},
90
+ }
91
+
92
+
93
+ def gmt_path(species: str, prior_root: str | Path, collection: str = "wikipathways",
94
+ version: str = "2026.1") -> Path:
95
+ """e.g. mouse m2.cp.wikipathways.v2026.1.Mm.symbols.gmt
96
+ human c2.cp.wikipathways.v2026.1.Hs.symbols.gmt"""
97
+ sfx = MSIGDB_SUFFIX[species]
98
+ stem = MSIGDB_STEM[species][collection]
99
+ return Path(prior_root) / "misgdb" / f"{stem}.v{version}.{sfx}.symbols.gmt"
100
+
101
+
102
+ def read_gmt(path: str | Path) -> Dict[str, List[str]]:
103
+ path = Path(path)
104
+ if not path.exists():
105
+ raise FileNotFoundError(f"GMT missing: {path}")
106
+ out: Dict[str, List[str]] = {}
107
+ for ln in open(path):
108
+ x = ln.rstrip("\n").split("\t")
109
+ out[x[0]] = [g for g in x[2:] if g]
110
+ return out
111
+
112
+
113
+ # --------------------------------------------------------------------------- #
114
+ # SCENIC regulons (from the user's own SCENIC run)
115
+ # --------------------------------------------------------------------------- #
116
+ def load_regulons(path: str | Path):
117
+ """SCENIC regulons table with columns TF, target[, weight]."""
118
+ import pandas as pd
119
+ path = Path(path)
120
+ if not path.exists():
121
+ raise FileNotFoundError(
122
+ f"SCENIC regulons missing: {path}\n run pySCENIC on your counts first"
123
+ )
124
+ return pd.read_csv(path)
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: sc-graft
3
+ Version: 0.0.0
4
+ Summary: single-cell Graph of Receptors, pAthways, Factors and Targets — external-prior loaders (early placeholder; core model not yet released)
5
+ Author-email: Ohbin Kwon <ohbin.kwon01@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ObKwon115/sc-graft
8
+ Project-URL: Repository, https://github.com/ObKwon115/sc-graft
9
+ Project-URL: Issues, https://github.com/ObKwon115/sc-graft/issues
10
+ Keywords: single-cell,scRNA-seq,graph-neural-network,cell-signaling,bioinformatics
11
+ Classifier: Development Status :: 1 - Planning
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Provides-Extra: scenic
20
+ Requires-Dist: pandas>=1.5; extra == "scenic"
21
+
22
+ # sc-graft
23
+
24
+ **Early placeholder release.** `sc-graft` — a single-cell **G**raph of
25
+ **R**eceptors, p**A**thways, **F**actors and **T**argets — is a
26
+ biologically-grounded heterogeneous graph transformer for inferring how receptor
27
+ signaling propagates to target genes in single cells.
28
+
29
+ This pre-release ships only a small slice of the **preprocessing** layer: the
30
+ **external-prior loaders** (STRING physical PPI, MSigDB gene sets, SCENIC
31
+ regulons). The network-construction logic and the model itself are under active
32
+ development and are **not included yet**.
33
+
34
+ ```python
35
+ from sc_graft import load_string_interactions, read_gmt, gmt_path, load_regulons
36
+ ```
37
+
38
+ - Source / docs: https://github.com/ObKwon115/sc-graft
39
+ - License: MIT
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/sc_graft/__init__.py
4
+ src/sc_graft/priors.py
5
+ src/sc_graft.egg-info/PKG-INFO
6
+ src/sc_graft.egg-info/SOURCES.txt
7
+ src/sc_graft.egg-info/dependency_links.txt
8
+ src/sc_graft.egg-info/requires.txt
9
+ src/sc_graft.egg-info/top_level.txt
@@ -0,0 +1,3 @@
1
+
2
+ [scenic]
3
+ pandas>=1.5
@@ -0,0 +1 @@
1
+ sc_graft