gnomon-pgs 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnomon_pgs-0.1.0/.gitignore +6 -0
- gnomon_pgs-0.1.0/PKG-INFO +142 -0
- gnomon_pgs-0.1.0/README.md +120 -0
- gnomon_pgs-0.1.0/gnomon/__init__.py +76 -0
- gnomon_pgs-0.1.0/gnomon/_api.py +810 -0
- gnomon_pgs-0.1.0/gnomon/calibrate.py +103 -0
- gnomon_pgs-0.1.0/gnomon/map.py +110 -0
- gnomon_pgs-0.1.0/pyproject.toml +33 -0
- gnomon_pgs-0.1.0/tests/test_gnomon.py +607 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gnomon-pgs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python wrapper for SauersML/gnomon (high-performance polygenic score engine).
|
|
5
|
+
Project-URL: Homepage, https://github.com/SauersML/gnomon
|
|
6
|
+
Project-URL: Issues, https://github.com/SauersML/gnomon/issues
|
|
7
|
+
Author: SauersML
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: bioinformatics,genomics,pgs,plink,polygenic-score,vcf
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Provides-Extra: pandas
|
|
18
|
+
Requires-Dist: pandas>=1.5; extra == 'pandas'
|
|
19
|
+
Provides-Extra: test
|
|
20
|
+
Requires-Dist: pytest>=7; extra == 'test'
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# gnomon (Python)
|
|
24
|
+
|
|
25
|
+
Python wrapper for the [`SauersML/gnomon`](https://github.com/SauersML/gnomon)
|
|
26
|
+
high-performance polygenic score engine. Each gnomon subcommand is a
|
|
27
|
+
typed Python function with kwargs that mirror the CLI flags one-to-one,
|
|
28
|
+
plus a parsed-result dataclass.
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import gnomon
|
|
32
|
+
|
|
33
|
+
result = gnomon.score(
|
|
34
|
+
"PGS004536,PGS001320,PGS005331",
|
|
35
|
+
"/data/aou_array_plink/arrays",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
result.output_path # PosixPath('.../arrays_pgs3_<hash>.sscore')
|
|
39
|
+
result.n_samples # 245678
|
|
40
|
+
result.score_names # ('PGS004536', 'PGS001320', 'PGS005331')
|
|
41
|
+
|
|
42
|
+
# Indexable / queryable:
|
|
43
|
+
result.scores["NWD_001"] # {score: {avg, sum, denom}}
|
|
44
|
+
result.scores.score_for("NWD_001", "PGS004536") # float
|
|
45
|
+
result.scores.to_pandas() # DataFrame
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install gnomon
|
|
52
|
+
cargo install gnomon
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Binary located via `binary=` or PATH. No env-var indirection — pass
|
|
56
|
+
`binary=` explicitly if it isn't on PATH.
|
|
57
|
+
|
|
58
|
+
## Subcommands
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
gnomon.score(score, input_path, *, keep=None, reference=None, build=None,
|
|
62
|
+
panel=None, inferred_sex=None, ...) -> ScoreResult
|
|
63
|
+
gnomon.terms(genotype_path, *, sex=True) -> TermsResult
|
|
64
|
+
gnomon.run_all(score, input_path, model) -> AllResult
|
|
65
|
+
|
|
66
|
+
import gnomon.map as gmap
|
|
67
|
+
gmap.fit(genotype_path, components=20, ld=True, bp_window=500_000)
|
|
68
|
+
gmap.project(genotype_path, model="hwe_1kg_hgdp_gsa_v3")
|
|
69
|
+
|
|
70
|
+
import gnomon.calibrate as gcal
|
|
71
|
+
gcal.train("train.tsv", num_pcs=10, model_family="gam")
|
|
72
|
+
gcal.infer("test.tsv", model="model.toml")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Shortcuts: avoid downloads and re-inference
|
|
76
|
+
|
|
77
|
+
Every kwarg the CLI exposes is on the Python API — none of these
|
|
78
|
+
overrides require touching the binary directly.
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
gnomon.score(
|
|
82
|
+
"PGS004536,PGS001320",
|
|
83
|
+
"/data/cohort/arrays",
|
|
84
|
+
reference="/cache/hg38.fa", # skip reference auto-download
|
|
85
|
+
build="38", # skip build auto-detection
|
|
86
|
+
panel="/cache/1kg_panel.vcf", # supply harmonisation panel
|
|
87
|
+
inferred_sex="male", # skip the in-pipeline sex scan
|
|
88
|
+
keep="/data/keep.iids.txt", # restrict to a sample subset
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
`inferred_sex` accepts `"male"`, `"female"`, `"unknown"`, the matching
|
|
93
|
+
`InferredSex` enum members, or any string returned by `infer_sex`.
|
|
94
|
+
|
|
95
|
+
## ScoreTable
|
|
96
|
+
|
|
97
|
+
`gnomon.read_sscore(path)` returns a `ScoreTable`. The result of
|
|
98
|
+
`gnomon.score(...)` carries one on `.scores`.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
table.iids # tuple of sample IDs
|
|
102
|
+
table.score_names # tuple of PGS names (suffix-stripped)
|
|
103
|
+
table.fids # tuple of family IDs
|
|
104
|
+
table.avg / .sum / .denom # tuple-of-tuples or None per column
|
|
105
|
+
|
|
106
|
+
# Membership / lookup:
|
|
107
|
+
"NWD_001" in table
|
|
108
|
+
table.index_of("NWD_001") # int row index
|
|
109
|
+
table["NWD_001"] # {score: {avg, sum, denom}}
|
|
110
|
+
table.score_for("NWD_001", "PGS004536", kind="avg") # float
|
|
111
|
+
|
|
112
|
+
# Pandas adapter (optional, requires `pip install gnomon[pandas]`):
|
|
113
|
+
table.to_pandas()
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Missing columns (e.g. older builds without `_SUM` / `_DENOM`) return
|
|
117
|
+
`None` — no fake zeros.
|
|
118
|
+
|
|
119
|
+
## Path inference
|
|
120
|
+
|
|
121
|
+
Sscore output filenames follow `score::main::score_output_path` exactly.
|
|
122
|
+
For inline PGS arguments the wrapper computes the same
|
|
123
|
+
`pgs<count>_<fnv1a64_hex8>` suffix the Rust binary writes — you don't
|
|
124
|
+
have to guess where the file lands.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from gnomon import expected_sscore_path
|
|
128
|
+
|
|
129
|
+
expected_sscore_path("/data/arrays.vcf.gz", "PGS001,PGS002")
|
|
130
|
+
# PosixPath('/data/arrays_pgs2_<hash>.sscore')
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Errors
|
|
134
|
+
|
|
135
|
+
* `GnomonBinaryNotFound` — CLI not installed / not on PATH.
|
|
136
|
+
* `InvalidConfig` — argument combination rejected before launching.
|
|
137
|
+
* `GnomonFailed` — CLI exited non-zero. The exception preserves
|
|
138
|
+
`stdout`, `stderr`, `returncode`. Includes the last non-empty stderr
|
|
139
|
+
line in the message so the failure mode is obvious without spelunking.
|
|
140
|
+
* `SscoreParseError` — corrupt or unexpectedly-shaped `.sscore` output.
|
|
141
|
+
|
|
142
|
+
All subclass `GnomonError`.
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# gnomon (Python)
|
|
2
|
+
|
|
3
|
+
Python wrapper for the [`SauersML/gnomon`](https://github.com/SauersML/gnomon)
|
|
4
|
+
high-performance polygenic score engine. Each gnomon subcommand is a
|
|
5
|
+
typed Python function with kwargs that mirror the CLI flags one-to-one,
|
|
6
|
+
plus a parsed-result dataclass.
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
import gnomon
|
|
10
|
+
|
|
11
|
+
result = gnomon.score(
|
|
12
|
+
"PGS004536,PGS001320,PGS005331",
|
|
13
|
+
"/data/aou_array_plink/arrays",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
result.output_path # PosixPath('.../arrays_pgs3_<hash>.sscore')
|
|
17
|
+
result.n_samples # 245678
|
|
18
|
+
result.score_names # ('PGS004536', 'PGS001320', 'PGS005331')
|
|
19
|
+
|
|
20
|
+
# Indexable / queryable:
|
|
21
|
+
result.scores["NWD_001"] # {score: {avg, sum, denom}}
|
|
22
|
+
result.scores.score_for("NWD_001", "PGS004536") # float
|
|
23
|
+
result.scores.to_pandas() # DataFrame
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Install
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install gnomon
|
|
30
|
+
cargo install gnomon
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Binary located via `binary=` or PATH. No env-var indirection — pass
|
|
34
|
+
`binary=` explicitly if it isn't on PATH.
|
|
35
|
+
|
|
36
|
+
## Subcommands
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
gnomon.score(score, input_path, *, keep=None, reference=None, build=None,
|
|
40
|
+
panel=None, inferred_sex=None, ...) -> ScoreResult
|
|
41
|
+
gnomon.terms(genotype_path, *, sex=True) -> TermsResult
|
|
42
|
+
gnomon.run_all(score, input_path, model) -> AllResult
|
|
43
|
+
|
|
44
|
+
import gnomon.map as gmap
|
|
45
|
+
gmap.fit(genotype_path, components=20, ld=True, bp_window=500_000)
|
|
46
|
+
gmap.project(genotype_path, model="hwe_1kg_hgdp_gsa_v3")
|
|
47
|
+
|
|
48
|
+
import gnomon.calibrate as gcal
|
|
49
|
+
gcal.train("train.tsv", num_pcs=10, model_family="gam")
|
|
50
|
+
gcal.infer("test.tsv", model="model.toml")
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Shortcuts: avoid downloads and re-inference
|
|
54
|
+
|
|
55
|
+
Every kwarg the CLI exposes is on the Python API — none of these
|
|
56
|
+
overrides require touching the binary directly.
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
gnomon.score(
|
|
60
|
+
"PGS004536,PGS001320",
|
|
61
|
+
"/data/cohort/arrays",
|
|
62
|
+
reference="/cache/hg38.fa", # skip reference auto-download
|
|
63
|
+
build="38", # skip build auto-detection
|
|
64
|
+
panel="/cache/1kg_panel.vcf", # supply harmonisation panel
|
|
65
|
+
inferred_sex="male", # skip the in-pipeline sex scan
|
|
66
|
+
keep="/data/keep.iids.txt", # restrict to a sample subset
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
`inferred_sex` accepts `"male"`, `"female"`, `"unknown"`, the matching
|
|
71
|
+
`InferredSex` enum members, or any string returned by `infer_sex`.
|
|
72
|
+
|
|
73
|
+
## ScoreTable
|
|
74
|
+
|
|
75
|
+
`gnomon.read_sscore(path)` returns a `ScoreTable`. The result of
|
|
76
|
+
`gnomon.score(...)` carries one on `.scores`.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
table.iids # tuple of sample IDs
|
|
80
|
+
table.score_names # tuple of PGS names (suffix-stripped)
|
|
81
|
+
table.fids # tuple of family IDs
|
|
82
|
+
table.avg / .sum / .denom # tuple-of-tuples or None per column
|
|
83
|
+
|
|
84
|
+
# Membership / lookup:
|
|
85
|
+
"NWD_001" in table
|
|
86
|
+
table.index_of("NWD_001") # int row index
|
|
87
|
+
table["NWD_001"] # {score: {avg, sum, denom}}
|
|
88
|
+
table.score_for("NWD_001", "PGS004536", kind="avg") # float
|
|
89
|
+
|
|
90
|
+
# Pandas adapter (optional, requires `pip install gnomon[pandas]`):
|
|
91
|
+
table.to_pandas()
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Missing columns (e.g. older builds without `_SUM` / `_DENOM`) return
|
|
95
|
+
`None` — no fake zeros.
|
|
96
|
+
|
|
97
|
+
## Path inference
|
|
98
|
+
|
|
99
|
+
Sscore output filenames follow `score::main::score_output_path` exactly.
|
|
100
|
+
For inline PGS arguments the wrapper computes the same
|
|
101
|
+
`pgs<count>_<fnv1a64_hex8>` suffix the Rust binary writes — you don't
|
|
102
|
+
have to guess where the file lands.
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from gnomon import expected_sscore_path
|
|
106
|
+
|
|
107
|
+
expected_sscore_path("/data/arrays.vcf.gz", "PGS001,PGS002")
|
|
108
|
+
# PosixPath('/data/arrays_pgs2_<hash>.sscore')
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Errors
|
|
112
|
+
|
|
113
|
+
* `GnomonBinaryNotFound` — CLI not installed / not on PATH.
|
|
114
|
+
* `InvalidConfig` — argument combination rejected before launching.
|
|
115
|
+
* `GnomonFailed` — CLI exited non-zero. The exception preserves
|
|
116
|
+
`stdout`, `stderr`, `returncode`. Includes the last non-empty stderr
|
|
117
|
+
line in the message so the failure mode is obvious without spelunking.
|
|
118
|
+
* `SscoreParseError` — corrupt or unexpectedly-shaped `.sscore` output.
|
|
119
|
+
|
|
120
|
+
All subclass `GnomonError`.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""gnomon — Python bindings for the SauersML/gnomon CLI.
|
|
2
|
+
|
|
3
|
+
gnomon is a high-performance polygenic score engine. This Python package
|
|
4
|
+
wraps its CLI surface as a small set of typed functions:
|
|
5
|
+
|
|
6
|
+
* ``gnomon.score(score, input_path, ...)`` – compute raw PGS
|
|
7
|
+
* ``gnomon.read_sscore(path)`` – parse a .sscore output
|
|
8
|
+
* ``gnomon.terms(genotype_path, sex=True)`` – terms inference (sex etc.)
|
|
9
|
+
* ``gnomon.map.fit(...)`` / ``gnomon.map.project(...)`` – HWE PCA
|
|
10
|
+
* ``gnomon.calibrate.train(...)`` / ``gnomon.calibrate.infer(...)``
|
|
11
|
+
* ``gnomon.run_all(...)`` – score + project + terms
|
|
12
|
+
|
|
13
|
+
Each subcommand has its own typed kwargs and returns a frozen dataclass
|
|
14
|
+
result (e.g. ``ScoreResult`` carries the output path, the parsed
|
|
15
|
+
``ScoreTable``, plus captured stdout/stderr for inspection).
|
|
16
|
+
|
|
17
|
+
Quick start
|
|
18
|
+
-----------
|
|
19
|
+
|
|
20
|
+
>>> from gnomon import score
|
|
21
|
+
>>> result = score(
|
|
22
|
+
... score="PGS004536,PGS001320,PGS005331",
|
|
23
|
+
... input_path="/data/aou_array_plink/arrays",
|
|
24
|
+
... )
|
|
25
|
+
>>> result.output_path
|
|
26
|
+
PosixPath('/data/aou_array_plink/arrays_PGS004536-PGS001320-PGS005331.sscore')
|
|
27
|
+
>>> result.scores.shape
|
|
28
|
+
(245678, 4) # IID + 3 scores
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from ._api import (
|
|
32
|
+
score,
|
|
33
|
+
terms,
|
|
34
|
+
run_all,
|
|
35
|
+
read_sscore,
|
|
36
|
+
expected_sscore_path,
|
|
37
|
+
ScoreResult,
|
|
38
|
+
ScoreTable,
|
|
39
|
+
TermsResult,
|
|
40
|
+
MapResult,
|
|
41
|
+
CalibrateResult,
|
|
42
|
+
AllResult,
|
|
43
|
+
InferredSex,
|
|
44
|
+
GnomonError,
|
|
45
|
+
GnomonBinaryNotFound,
|
|
46
|
+
GnomonFailed,
|
|
47
|
+
InvalidConfig,
|
|
48
|
+
SscoreParseError,
|
|
49
|
+
locate_binary,
|
|
50
|
+
)
|
|
51
|
+
from . import map, calibrate
|
|
52
|
+
|
|
53
|
+
__all__ = [
|
|
54
|
+
"score",
|
|
55
|
+
"terms",
|
|
56
|
+
"run_all",
|
|
57
|
+
"read_sscore",
|
|
58
|
+
"expected_sscore_path",
|
|
59
|
+
"ScoreResult",
|
|
60
|
+
"ScoreTable",
|
|
61
|
+
"TermsResult",
|
|
62
|
+
"MapResult",
|
|
63
|
+
"CalibrateResult",
|
|
64
|
+
"AllResult",
|
|
65
|
+
"InferredSex",
|
|
66
|
+
"GnomonError",
|
|
67
|
+
"GnomonBinaryNotFound",
|
|
68
|
+
"GnomonFailed",
|
|
69
|
+
"InvalidConfig",
|
|
70
|
+
"SscoreParseError",
|
|
71
|
+
"locate_binary",
|
|
72
|
+
"map",
|
|
73
|
+
"calibrate",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
__version__ = "0.1.0"
|