pysec2pri 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysec2pri-0.0.2/LICENSE +21 -0
- pysec2pri-0.0.2/PKG-INFO +181 -0
- pysec2pri-0.0.2/README.md +128 -0
- pysec2pri-0.0.2/pyproject.toml +291 -0
- pysec2pri-0.0.2/src/pysec2pri/__init__.py +78 -0
- pysec2pri-0.0.2/src/pysec2pri/__main__.py +12 -0
- pysec2pri-0.0.2/src/pysec2pri/api.py +871 -0
- pysec2pri-0.0.2/src/pysec2pri/cli.py +719 -0
- pysec2pri-0.0.2/src/pysec2pri/config/__init__.py +1 -0
- pysec2pri-0.0.2/src/pysec2pri/config/chebi.yaml +144 -0
- pysec2pri-0.0.2/src/pysec2pri/config/hgnc.yaml +127 -0
- pysec2pri-0.0.2/src/pysec2pri/config/hmdb.yaml +128 -0
- pysec2pri-0.0.2/src/pysec2pri/config/ncbi.yaml +131 -0
- pysec2pri-0.0.2/src/pysec2pri/config/uniprot.yaml +127 -0
- pysec2pri-0.0.2/src/pysec2pri/config/wikidata.yaml +127 -0
- pysec2pri-0.0.2/src/pysec2pri/constants.py +31 -0
- pysec2pri-0.0.2/src/pysec2pri/diff.py +345 -0
- pysec2pri-0.0.2/src/pysec2pri/download.py +782 -0
- pysec2pri-0.0.2/src/pysec2pri/exports.py +401 -0
- pysec2pri-0.0.2/src/pysec2pri/logging.py +65 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/__init__.py +38 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/base.py +1304 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/chebi.py +655 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/hgnc.py +383 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/hmdb.py +293 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/ncbi.py +274 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/uniprot.py +210 -0
- pysec2pri-0.0.2/src/pysec2pri/parsers/wikidata.py +524 -0
- pysec2pri-0.0.2/src/pysec2pri/py.typed +1 -0
- pysec2pri-0.0.2/src/pysec2pri/queries/__init__.py +118 -0
- pysec2pri-0.0.2/src/pysec2pri/queries/chemical_redirects.rq +33 -0
- pysec2pri-0.0.2/src/pysec2pri/queries/chemical_redirects_test.rq +34 -0
- pysec2pri-0.0.2/src/pysec2pri/queries/gene_redirects.rq +22 -0
- pysec2pri-0.0.2/src/pysec2pri/queries/gene_redirects_test.rq +23 -0
- pysec2pri-0.0.2/src/pysec2pri/queries/protein_redirects.rq +30 -0
- pysec2pri-0.0.2/src/pysec2pri/queries/protein_redirects_test.rq +31 -0
- pysec2pri-0.0.2/src/pysec2pri/update_ids.py +404 -0
- pysec2pri-0.0.2/src/pysec2pri/version.py +39 -0
pysec2pri-0.0.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Javier Millán Acosta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pysec2pri-0.0.2/PKG-INFO
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pysec2pri
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Secondary to primary identifier mapping
|
|
5
|
+
Keywords: snekpack,cookiecutter
|
|
6
|
+
Author: Javier Millán Acosta
|
|
7
|
+
Author-email: Javier Millán Acosta <javier.millan.acosta@gmail.com>
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: Development Status :: 1 - Planning
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Framework :: Pytest
|
|
15
|
+
Classifier: Framework :: tox
|
|
16
|
+
Classifier: Framework :: Sphinx
|
|
17
|
+
Classifier: Natural Language :: English
|
|
18
|
+
Classifier: Programming Language :: Python
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Dist: more-itertools
|
|
27
|
+
Requires-Dist: tqdm
|
|
28
|
+
Requires-Dist: click
|
|
29
|
+
Requires-Dist: more-click
|
|
30
|
+
Requires-Dist: typing-extensions
|
|
31
|
+
Requires-Dist: pydantic>=2.0
|
|
32
|
+
Requires-Dist: linkml>=1.10.0
|
|
33
|
+
Requires-Dist: sssom>=0.4.0
|
|
34
|
+
Requires-Dist: curies>=0.7.0
|
|
35
|
+
Requires-Dist: httpx>=0.24.0
|
|
36
|
+
Requires-Dist: polars>=0.20.0
|
|
37
|
+
Requires-Dist: regex==2026.1.15
|
|
38
|
+
Requires-Dist: lxml
|
|
39
|
+
Requires-Dist: defusedxml
|
|
40
|
+
Requires-Dist: pandas
|
|
41
|
+
Requires-Dist: numpy
|
|
42
|
+
Requires-Dist: rdkit
|
|
43
|
+
Requires-Dist: polars-runtime-32
|
|
44
|
+
Maintainer: Javier Millán Acosta
|
|
45
|
+
Maintainer-email: Javier Millán Acosta <javier.millan.acosta@gmail.com>
|
|
46
|
+
Requires-Python: >=3.10
|
|
47
|
+
Project-URL: Bug Tracker, https://github.com/sec2pri/pysec2pri/issues
|
|
48
|
+
Project-URL: Homepage, https://github.com/sec2pri/pysec2pri
|
|
49
|
+
Project-URL: Repository, https://github.com/sec2pri/pysec2pri.git
|
|
50
|
+
Project-URL: Documentation, https://pysec2pri.readthedocs.io
|
|
51
|
+
Project-URL: Funding, https://github.com/sponsors/jmillanacosta
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
<!--
|
|
55
|
+
<p align="center">
|
|
56
|
+
<img src="https://github.com/jmillanacosta/pysec2pri/raw/main/docs/source/logo.png" height="150">
|
|
57
|
+
</p>
|
|
58
|
+
-->
|
|
59
|
+
|
|
60
|
+
<h1 align="center">
|
|
61
|
+
pySec2Pri
|
|
62
|
+
</h1>
|
|
63
|
+
|
|
64
|
+
<p align="center">
|
|
65
|
+
<a href="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml">
|
|
66
|
+
<img alt="Tests" src="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml/badge.svg" /></a>
|
|
67
|
+
<a href="https://pypi.org/project/pysec2pri">
|
|
68
|
+
<img alt="PyPI" src="https://img.shields.io/pypi/v/pysec2pri" /></a>
|
|
69
|
+
<a href="https://pypi.org/project/pysec2pri">
|
|
70
|
+
<img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/pysec2pri" /></a>
|
|
71
|
+
<a href="https://github.com/jmillanacosta/pysec2pri/blob/main/LICENSE">
|
|
72
|
+
<img alt="PyPI - License" src="https://img.shields.io/pypi/l/pysec2pri" /></a>
|
|
73
|
+
<a href='https://pysec2pri.readthedocs.io/en/latest/?badge=latest'>
|
|
74
|
+
<img src='https://readthedocs.org/projects/pysec2pri/badge/?version=latest' alt='Documentation Status' /></a>
|
|
75
|
+
</p>
|
|
76
|
+
|
|
77
|
+
Create and use mapping files for secondary (retired/withdrawn) biological
|
|
78
|
+
database identifiers and symbols to primary (current) identifiers and symbols.
|
|
79
|
+
|
|
80
|
+
Outputs mappings in [SSSOM format](https://w3id.org/sssom) by default. Subjects
|
|
81
|
+
are secondary, objects are primary.
|
|
82
|
+
|
|
83
|
+
## Installation
|
|
84
|
+
|
|
85
|
+
```console
|
|
86
|
+
uv pip install pysec2pri
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Or install from source:
|
|
90
|
+
|
|
91
|
+
```console
|
|
92
|
+
uv pip install git+https://github.com/jmillanacosta/pysec2pri.git
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Quick Start
|
|
96
|
+
|
|
97
|
+
### Generating mapping sets
|
|
98
|
+
|
|
99
|
+
To obtain the secondary to primary identifier SSSOM mapping set for ChEBI:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pysec2pri chebi
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
This will automatically download the latest ChEBI release and generate an SSSOM
|
|
106
|
+
mapping file in your current directory.
|
|
107
|
+
|
|
108
|
+
To process locally and specify the output:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pysec2pri chebi ChEBI_complete_3star.sdf --output my_mappings.sssom.tsv
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
For more options and help on any command:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pysec2pri --help
|
|
118
|
+
pysec2pri chebi --help
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
The default output is in [SSSOM](https://mapping-commons.github.io/sssom/)
|
|
122
|
+
(Simple Standard for Sharing Ontology Mappings) TSV format.
|
|
123
|
+
|
|
124
|
+
### Updating IDs and symbols
|
|
125
|
+
|
|
126
|
+
A generated mapping set can be used to update IDs and symbols in Python:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
from pysec2pri import generate_chebi_synonyms, resolve_symbols
|
|
130
|
+
cs = generate_chebi_synonyms()
|
|
131
|
+
resolve_symbols(["Glucose", "ATP", "Guanine"], cs)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Or from the command line, given a TSV file `gene_ex.tsv`:
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
gene data
|
|
138
|
+
HGNC:131 3.5
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Resolve the `gene` column to primary HGNC IDs (a new `_primary` column is
|
|
142
|
+
added):
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
pysec2pri update-ids gene_ex.tsv hgnc --at gene -o gene_ex_primary.tsv
|
|
146
|
+
# gene data gene_primary
|
|
147
|
+
# HGNC:131 3.5 HGNC:145
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
The same pattern works for symbols with `update-symbols`, and multiple columns
|
|
151
|
+
can be resolved by repeating `--at`:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
pysec2pri update-ids data.tsv hgnc --at gene_id --at related_gene_id
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
To skip regenerating the mapping set, pass a pre-built mapping file:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
pysec2pri hgnc ids # outputs hgnc_{version}_sssom.tsv
|
|
161
|
+
pysec2pri update-ids gene_ex.tsv hgnc --at gene --mapping hgnc_{version}_sssom.tsv
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Documentation
|
|
165
|
+
|
|
166
|
+
Full documentation: <https://pysec2pri.readthedocs.io/>
|
|
167
|
+
|
|
168
|
+
## Supported Databases
|
|
169
|
+
|
|
170
|
+
| Datasource | license | citation |
|
|
171
|
+
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
172
|
+
| ChEBI | [CC BY 4.0](docs/licenses/chebi/LICENSE). | Hastings J, Owen G, Dekker A, et al. ChEBI in 2016: Improved services and an expanding collection of metabolites. Nucleic Acids Research. 2016 Jan;44(D1):D1214-9. DOI: [10.1093/nar/gkv1031](https://doi.org/10.1093/nar/gkv1031). PMID: 26467479; PMCID: PMC4702775. |
|
|
173
|
+
| HMDB | [CC0](https://hmdb.ca/about#compliance:~:text=international%20scientific%20conferences.-,Citing%20the%20HMDB,-HMDB%20is%20offered) | Wishart DS, Guo A, Oler E, Wang F, Anjum A, Peters H, Dizon R, Sayeeda Z, Tian S, Lee BL, Berjanskii M, Mah R, Yamamoto M, Jovel J, Torres-Calzada C, Hiebert-Giesbrecht M, Lui VW, Varshavi D, Varshavi D, Allen D, Arndt D, Khetarpal N, Sivakumaran A, Harford K, Sanford S, Yee K, Cao X, Budinski Z, Liigand J, Zhang L, Zheng J, Mandal R, Karu N, Dambrova M, Schiöth HB, Greiner R, Gautam V. HMDB 5.0: the Human Metabolome Database for 2022. Nucleic Acids Res. 2022 Jan 7;50(D1):D622-D631. doi: [10.1093/nar/gkab1062](https://doi.org/10.1093/nar/gkab1062). PMID: 34986597; PMCID: PMC8728138. |
|
|
174
|
+
| HGNC | [link](https://www.genenames.org/about/license/) | Seal RL, Braschi B, Gray K, Jones TEM, Tweedie S, Haim-Vilmovsky L, Bruford EA. Genenames.org: the HGNC resources in 2023. Nucleic Acids Res. 2023 Jan 6;51(D1):D1003-D1009. doi: [10.1093/nar/gkac888](https://doi.org/10.1093/nar/gkac888). PMID: 36243972; PMCID: PMC9825485. |
|
|
175
|
+
| NCBI | [link](https://www.ncbi.nlm.nih.gov/home/about/policies/) | Sayers EW, Bolton EE, Brister JR, Canese K, Chan J, Comeau DC, Connor R, Funk K, Kelly C, Kim S, Madej T, Marchler-Bauer A, Lanczycki C, Lathrop S, Lu Z, Thibaud-Nissen F, Murphy T, Phan L, Skripchenko Y, Tse T, Wang J, Williams R, Trawick BW, Pruitt KD, Sherry ST. Database resources of the national center for biotechnology information. Nucleic Acids Res. 2022 Jan 7;50(D1):D20-D26. doi: [10.1093/nar/gkab1112](https://doi.org/10.1093/nar/gkab1112). PMID: 34850941; PMCID: PMC8728269. |
|
|
176
|
+
| UniProt | [CC BY 4.0](https://ftp.uniprot.org/pub/docs/licenses/uniprot/current_release/knowledgebase/complete/LICENSE) | UniProt Consortium. UniProt: the universal protein knowledgebase in 2021. Nucleic Acids Res. 2021 Jan 8;49(D1):D480-D489. doi: [10.1093/nar/gkaa1100](https://doi.org/10.1093/nar/gkaa1100). PMID: 33237286; PMCID: PMC7778908. |
|
|
177
|
+
| Wikidata | | Vrandecic, D., Krotzsch, M. Wikidata: a free collaborative knowledgebase. Communications of the ACM. 2014. doi: [10.1145/2629489](https://doi.org/10.1145/2629489). |
|
|
178
|
+
|
|
179
|
+
## License
|
|
180
|
+
|
|
181
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
<!--
|
|
2
|
+
<p align="center">
|
|
3
|
+
<img src="https://github.com/jmillanacosta/pysec2pri/raw/main/docs/source/logo.png" height="150">
|
|
4
|
+
</p>
|
|
5
|
+
-->
|
|
6
|
+
|
|
7
|
+
<h1 align="center">
|
|
8
|
+
pySec2Pri
|
|
9
|
+
</h1>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<a href="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml">
|
|
13
|
+
<img alt="Tests" src="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml/badge.svg" /></a>
|
|
14
|
+
<a href="https://pypi.org/project/pysec2pri">
|
|
15
|
+
<img alt="PyPI" src="https://img.shields.io/pypi/v/pysec2pri" /></a>
|
|
16
|
+
<a href="https://pypi.org/project/pysec2pri">
|
|
17
|
+
<img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/pysec2pri" /></a>
|
|
18
|
+
<a href="https://github.com/jmillanacosta/pysec2pri/blob/main/LICENSE">
|
|
19
|
+
<img alt="PyPI - License" src="https://img.shields.io/pypi/l/pysec2pri" /></a>
|
|
20
|
+
<a href='https://pysec2pri.readthedocs.io/en/latest/?badge=latest'>
|
|
21
|
+
<img src='https://readthedocs.org/projects/pysec2pri/badge/?version=latest' alt='Documentation Status' /></a>
|
|
22
|
+
</p>
|
|
23
|
+
|
|
24
|
+
Create and use mapping files for secondary (retired/withdrawn) biological
|
|
25
|
+
database identifiers and symbols to primary (current) identifiers and symbols.
|
|
26
|
+
|
|
27
|
+
Outputs mappings in [SSSOM format](https://w3id.org/sssom) by default. Subjects
|
|
28
|
+
are secondary, objects are primary.
|
|
29
|
+
|
|
30
|
+
## Installation
|
|
31
|
+
|
|
32
|
+
```console
|
|
33
|
+
uv pip install pysec2pri
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Or install from source:
|
|
37
|
+
|
|
38
|
+
```console
|
|
39
|
+
uv pip install git+https://github.com/jmillanacosta/pysec2pri.git
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quick Start
|
|
43
|
+
|
|
44
|
+
### Generating mapping sets
|
|
45
|
+
|
|
46
|
+
To obtain the secondary to primary identifier SSSOM mapping set for ChEBI:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pysec2pri chebi
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
This will automatically download the latest ChEBI release and generate an SSSOM
|
|
53
|
+
mapping file in your current directory.
|
|
54
|
+
|
|
55
|
+
To process locally and specify the output:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pysec2pri chebi ChEBI_complete_3star.sdf --output my_mappings.sssom.tsv
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
For more options and help on any command:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pysec2pri --help
|
|
65
|
+
pysec2pri chebi --help
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The default output is in [SSSOM](https://mapping-commons.github.io/sssom/)
|
|
69
|
+
(Simple Standard for Sharing Ontology Mappings) TSV format.
|
|
70
|
+
|
|
71
|
+
### Updating IDs and symbols
|
|
72
|
+
|
|
73
|
+
A generated mapping set can be used to update IDs and symbols in Python:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from pysec2pri import generate_chebi_synonyms, resolve_symbols
|
|
77
|
+
cs = generate_chebi_synonyms()
|
|
78
|
+
resolve_symbols(["Glucose", "ATP", "Guanine"], cs)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Or from the command line, given a TSV file `gene_ex.tsv`:
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
gene data
|
|
85
|
+
HGNC:131 3.5
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Resolve the `gene` column to primary HGNC IDs (a new `_primary` column is
|
|
89
|
+
added):
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pysec2pri update-ids gene_ex.tsv hgnc --at gene -o gene_ex_primary.tsv
|
|
93
|
+
# gene data gene_primary
|
|
94
|
+
# HGNC:131 3.5 HGNC:145
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The same pattern works for symbols with `update-symbols`, and multiple columns
|
|
98
|
+
can be resolved by repeating `--at`:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pysec2pri update-ids data.tsv hgnc --at gene_id --at related_gene_id
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
To skip regenerating the mapping set, pass a pre-built mapping file:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pysec2pri hgnc ids # outputs hgnc_{version}_sssom.tsv
|
|
108
|
+
pysec2pri update-ids gene_ex.tsv hgnc --at gene --mapping hgnc_{version}_sssom.tsv
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Documentation
|
|
112
|
+
|
|
113
|
+
Full documentation: <https://pysec2pri.readthedocs.io/>
|
|
114
|
+
|
|
115
|
+
## Supported Databases
|
|
116
|
+
|
|
117
|
+
| Datasource | license | citation |
|
|
118
|
+
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
119
|
+
| ChEBI | [CC BY 4.0](docs/licenses/chebi/LICENSE). | Hastings J, Owen G, Dekker A, et al. ChEBI in 2016: Improved services and an expanding collection of metabolites. Nucleic Acids Research. 2016 Jan;44(D1):D1214-9. DOI: [10.1093/nar/gkv1031](https://doi.org/10.1093/nar/gkv1031). PMID: 26467479; PMCID: PMC4702775. |
|
|
120
|
+
| HMDB | [CC0](https://hmdb.ca/about#compliance:~:text=international%20scientific%20conferences.-,Citing%20the%20HMDB,-HMDB%20is%20offered) | Wishart DS, Guo A, Oler E, Wang F, Anjum A, Peters H, Dizon R, Sayeeda Z, Tian S, Lee BL, Berjanskii M, Mah R, Yamamoto M, Jovel J, Torres-Calzada C, Hiebert-Giesbrecht M, Lui VW, Varshavi D, Varshavi D, Allen D, Arndt D, Khetarpal N, Sivakumaran A, Harford K, Sanford S, Yee K, Cao X, Budinski Z, Liigand J, Zhang L, Zheng J, Mandal R, Karu N, Dambrova M, Schiöth HB, Greiner R, Gautam V. HMDB 5.0: the Human Metabolome Database for 2022. Nucleic Acids Res. 2022 Jan 7;50(D1):D622-D631. doi: [10.1093/nar/gkab1062](https://doi.org/10.1093/nar/gkab1062). PMID: 34986597; PMCID: PMC8728138. |
|
|
121
|
+
| HGNC | [link](https://www.genenames.org/about/license/) | Seal RL, Braschi B, Gray K, Jones TEM, Tweedie S, Haim-Vilmovsky L, Bruford EA. Genenames.org: the HGNC resources in 2023. Nucleic Acids Res. 2023 Jan 6;51(D1):D1003-D1009. doi: [10.1093/nar/gkac888](https://doi.org/10.1093/nar/gkac888). PMID: 36243972; PMCID: PMC9825485. |
|
|
122
|
+
| NCBI | [link](https://www.ncbi.nlm.nih.gov/home/about/policies/) | Sayers EW, Bolton EE, Brister JR, Canese K, Chan J, Comeau DC, Connor R, Funk K, Kelly C, Kim S, Madej T, Marchler-Bauer A, Lanczycki C, Lathrop S, Lu Z, Thibaud-Nissen F, Murphy T, Phan L, Skripchenko Y, Tse T, Wang J, Williams R, Trawick BW, Pruitt KD, Sherry ST. Database resources of the national center for biotechnology information. Nucleic Acids Res. 2022 Jan 7;50(D1):D20-D26. doi: [10.1093/nar/gkab1112](https://doi.org/10.1093/nar/gkab1112). PMID: 34850941; PMCID: PMC8728269. |
|
|
123
|
+
| UniProt | [CC BY 4.0](https://ftp.uniprot.org/pub/docs/licenses/uniprot/current_release/knowledgebase/complete/LICENSE) | UniProt Consortium. UniProt: the universal protein knowledgebase in 2021. Nucleic Acids Res. 2021 Jan 8;49(D1):D480-D489. doi: [10.1093/nar/gkaa1100](https://doi.org/10.1093/nar/gkaa1100). PMID: 33237286; PMCID: PMC7778908. |
|
|
124
|
+
| Wikidata | | Vrandecic, D., Krotzsch, M. Wikidata: a free collaborative knowledgebase. Communications of the ACM. 2014. doi: [10.1145/2629489](https://doi.org/10.1145/2629489). |
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["uv_build>=0.9.6,<1.0"]
|
|
3
|
+
build-backend = "uv_build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pysec2pri"
|
|
7
|
+
version = "0.0.2"
|
|
8
|
+
description = "Secondary to primary identifier mapping"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name = "Javier Millán Acosta", email = "javier.millan.acosta@gmail.com" }
|
|
12
|
+
]
|
|
13
|
+
maintainers = [
|
|
14
|
+
{ name = "Javier Millán Acosta", email = "javier.millan.acosta@gmail.com" }
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#classifiers
|
|
18
|
+
# Search tags using the controlled vocabulary at https://pypi.org/classifiers
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 1 - Planning",
|
|
21
|
+
"Environment :: Console",
|
|
22
|
+
"Intended Audience :: Developers",
|
|
23
|
+
"License :: OSI Approved :: MIT License",
|
|
24
|
+
"Operating System :: OS Independent",
|
|
25
|
+
"Framework :: Pytest",
|
|
26
|
+
"Framework :: tox",
|
|
27
|
+
"Framework :: Sphinx",
|
|
28
|
+
"Natural Language :: English",
|
|
29
|
+
"Programming Language :: Python",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Programming Language :: Python :: 3.13",
|
|
34
|
+
"Programming Language :: Python :: 3.14",
|
|
35
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
36
|
+
"Typing :: Typed",
|
|
37
|
+
# TODO add your topics from the Trove controlled vocabulary (see https://pypi.org/classifiers)
|
|
38
|
+
]
|
|
39
|
+
keywords = [
|
|
40
|
+
"snekpack", # please keep this keyword to credit the cookiecutter-snekpack template
|
|
41
|
+
"cookiecutter",
|
|
42
|
+
# TODO add your own free-text keywords
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
# License Information.
|
|
46
|
+
# See PEP-639 at https://peps.python.org/pep-0639/#add-license-files-key
|
|
47
|
+
license-files = [
|
|
48
|
+
"LICENSE",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
requires-python = ">=3.10"
|
|
52
|
+
dependencies = [
|
|
53
|
+
"more_itertools", # missing from itertools in stdlib
|
|
54
|
+
"tqdm", # progress bars
|
|
55
|
+
"click",
|
|
56
|
+
"more_click",
|
|
57
|
+
"typing-extensions",
|
|
58
|
+
"pydantic>=2.0", # data validation and models
|
|
59
|
+
"linkml>=1.10.0",
|
|
60
|
+
"sssom>=0.4.0", # SSSOM standard support
|
|
61
|
+
"curies>=0.7.0", # CURIE handling
|
|
62
|
+
"httpx>=0.24.0", # HTTP client for downloads
|
|
63
|
+
"polars>=0.20.0", # Fast dataframe operations for diffs
|
|
64
|
+
"regex==2026.1.15",
|
|
65
|
+
"lxml", # XML parsing for HMDB
|
|
66
|
+
"defusedxml", # Secure XML parsing
|
|
67
|
+
"pandas", # DataFrame support for some workflows
|
|
68
|
+
"numpy", # Required by pandas and polars
|
|
69
|
+
"rdkit", # Chemistry toolkit for SDF parsing
|
|
70
|
+
"polars-runtime-32", # Polars runtime for 32-bit systems
|
|
71
|
+
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
# see https://peps.python.org/pep-0735/ and https://docs.astral.sh/uv/concepts/dependencies/#dependency-groups
|
|
75
|
+
[dependency-groups]
|
|
76
|
+
tests = [
|
|
77
|
+
"pytest",
|
|
78
|
+
"coverage[toml]",
|
|
79
|
+
"lxml",
|
|
80
|
+
"pandas",
|
|
81
|
+
"numpy",
|
|
82
|
+
"rdkit",
|
|
83
|
+
"polars-runtime-32",
|
|
84
|
+
]
|
|
85
|
+
docs = [
|
|
86
|
+
"sphinx>=8,<9",
|
|
87
|
+
"sphinx-rtd-theme>=3.0","sphinx-click",
|
|
88
|
+
"sphinx_automodapi",
|
|
89
|
+
# Include if your project uses Pydantic:
|
|
90
|
+
# "autodoc_pydantic",
|
|
91
|
+
# To include LaTeX comments easily in your docs.
|
|
92
|
+
# If you uncomment this, don't forget to do the same in docs/conf.py
|
|
93
|
+
# texext
|
|
94
|
+
]
|
|
95
|
+
lint = [
|
|
96
|
+
"ruff",
|
|
97
|
+
]
|
|
98
|
+
typing = [
|
|
99
|
+
{ include-group = "tests" },
|
|
100
|
+
"mypy",
|
|
101
|
+
"pydantic",
|
|
102
|
+
"types-click",
|
|
103
|
+
"types-PyYAML",
|
|
104
|
+
"types-requests",
|
|
105
|
+
# You will probably have to add additional type stubs here, especially if you're using tox-uv
|
|
106
|
+
]
|
|
107
|
+
docs-lint = [
|
|
108
|
+
{ include-group = "docs" },
|
|
109
|
+
"doc8",
|
|
110
|
+
]
|
|
111
|
+
format-docs = [
|
|
112
|
+
{ include-group = "docs" },
|
|
113
|
+
"docstrfmt",
|
|
114
|
+
]
|
|
115
|
+
doctests = [
|
|
116
|
+
"xdoctest",
|
|
117
|
+
"pygments",
|
|
118
|
+
]
|
|
119
|
+
pyroma = [
|
|
120
|
+
"pyroma",
|
|
121
|
+
"pygments",
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
# see https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#dependencies-optional-dependencies
|
|
125
|
+
[project.optional-dependencies]
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#urls
|
|
129
|
+
# and also https://packaging.python.org/en/latest/specifications/well-known-project-urls/
|
|
130
|
+
[project.urls]
|
|
131
|
+
"Bug Tracker" = "https://github.com/sec2pri/pysec2pri/issues"
|
|
132
|
+
Homepage = "https://github.com/sec2pri/pysec2pri"
|
|
133
|
+
Repository = "https://github.com/sec2pri/pysec2pri.git"
|
|
134
|
+
Documentation = "https://pysec2pri.readthedocs.io"
|
|
135
|
+
Funding = "https://github.com/sponsors/jmillanacosta"
|
|
136
|
+
|
|
137
|
+
[project.scripts]
|
|
138
|
+
pysec2pri = "pysec2pri.cli:main"
|
|
139
|
+
|
|
140
|
+
[tool.cruft]
|
|
141
|
+
skip = [
|
|
142
|
+
"**/__init__.py",
|
|
143
|
+
"tests/*"
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
# MyPy, see https://mypy.readthedocs.io/en/stable/config_file.html
|
|
147
|
+
[tool.mypy]
|
|
148
|
+
plugins = [
|
|
149
|
+
"pydantic.mypy",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
# Doc8, see https://doc8.readthedocs.io/en/stable/readme.html#ini-file-usage
|
|
153
|
+
[tool.doc8]
|
|
154
|
+
|
|
155
|
+
max-line-length = 120
|
|
156
|
+
ignore = ["D000", "D001"]
|
|
157
|
+
|
|
158
|
+
# Pytest, see https://docs.pytest.org/en/stable/reference/customize.html#pyproject-toml
|
|
159
|
+
[tool.pytest.ini_options]
|
|
160
|
+
markers = [
|
|
161
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
# Coverage, see https://coverage.readthedocs.io/en/latest/config.html
|
|
165
|
+
[tool.coverage.run]
|
|
166
|
+
branch = true
|
|
167
|
+
source = [
|
|
168
|
+
"pysec2pri",
|
|
169
|
+
]
|
|
170
|
+
omit = [
|
|
171
|
+
"tests/*",
|
|
172
|
+
"docs/*",
|
|
173
|
+
"src/pysec2pri/version.py",
|
|
174
|
+
"src/pysec2pri/__main__.py",
|
|
175
|
+
"src/pysec2pri/cli.py",
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
[tool.coverage.paths]
|
|
179
|
+
source = [
|
|
180
|
+
"src/pysec2pri",
|
|
181
|
+
".tox/*/lib/python*/site-packages/pysec2pri",
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
[tool.coverage.report]
|
|
185
|
+
show_missing = true
|
|
186
|
+
exclude_lines = [
|
|
187
|
+
"pragma: no cover",
|
|
188
|
+
"raise NotImplementedError",
|
|
189
|
+
"if __name__ == \"__main__\":",
|
|
190
|
+
"if TYPE_CHECKING:",
|
|
191
|
+
"def __str__",
|
|
192
|
+
"def __repr__",
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
[tool.ruff]
|
|
196
|
+
line-length = 100
|
|
197
|
+
extend-include = ["*.ipynb"]
|
|
198
|
+
|
|
199
|
+
[tool.ruff.lint]
|
|
200
|
+
# See https://docs.astral.sh/ruff/rules
|
|
201
|
+
extend-select = [
|
|
202
|
+
"F", # pyflakes
|
|
203
|
+
"E", # pycodestyle errors
|
|
204
|
+
"W", # pycodestyle warnings
|
|
205
|
+
"C90", # mccabe
|
|
206
|
+
"I", # isort
|
|
207
|
+
"UP", # pyupgrade
|
|
208
|
+
"D", # pydocstyle
|
|
209
|
+
"DOC", # pydoclint
|
|
210
|
+
"B", # bugbear
|
|
211
|
+
"S", # bandit
|
|
212
|
+
"T20", # print
|
|
213
|
+
"N", # pep8 naming
|
|
214
|
+
"ERA", # eradicate commented out code
|
|
215
|
+
"NPY", # numpy checks
|
|
216
|
+
"RUF", # ruff rules
|
|
217
|
+
"C4", # comprehensions
|
|
218
|
+
]
|
|
219
|
+
ignore = [
|
|
220
|
+
"D105", # Missing docstring in magic method
|
|
221
|
+
"D418", # Function decorated with @overload shouldn't contain a docstring
|
|
222
|
+
"E203", # Black conflicts with the following
|
|
223
|
+
"S101", # Used in tests,
|
|
224
|
+
"RUF001", # Special characters for tests
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
# See https://docs.astral.sh/ruff/settings/#per-file-ignores
|
|
228
|
+
[tool.ruff.lint.per-file-ignores]
|
|
229
|
+
# Ignore security issues in the version.py, which are inconsistent
|
|
230
|
+
"src/pysec2pri/version.py" = ["S603", "S607"]
|
|
231
|
+
# Ignore commented out code in Sphinx configuration file
|
|
232
|
+
"docs/source/conf.py" = ["ERA001"]
|
|
233
|
+
# Prints are okay in notebooks
|
|
234
|
+
"notebooks/**/*.ipynb" = ["T201"]
|
|
235
|
+
|
|
236
|
+
[tool.ruff.lint.pydocstyle]
|
|
237
|
+
convention = "pep257"
|
|
238
|
+
|
|
239
|
+
[tool.ruff.lint.isort]
|
|
240
|
+
relative-imports-order = "closest-to-furthest"
|
|
241
|
+
known-third-party = [
|
|
242
|
+
"tqdm",
|
|
243
|
+
]
|
|
244
|
+
known-first-party = [
|
|
245
|
+
"pysec2pri",
|
|
246
|
+
"tests",
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
[tool.ruff.format]
|
|
250
|
+
# see https://docs.astral.sh/ruff/settings/#format_docstring-code-format
|
|
251
|
+
docstring-code-format = true
|
|
252
|
+
|
|
253
|
+
[tool.bumpversion]
|
|
254
|
+
current_version = "0.0.2"
|
|
255
|
+
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+(?P<build>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?"
|
|
256
|
+
serialize = [
|
|
257
|
+
"{major}.{minor}.{patch}-{release}+{build}",
|
|
258
|
+
"{major}.{minor}.{patch}+{build}",
|
|
259
|
+
"{major}.{minor}.{patch}-{release}",
|
|
260
|
+
"{major}.{minor}.{patch}",
|
|
261
|
+
]
|
|
262
|
+
commit = true
|
|
263
|
+
tag = false
|
|
264
|
+
|
|
265
|
+
[tool.bumpversion.parts.release]
|
|
266
|
+
optional_value = "production"
|
|
267
|
+
first_value = "dev"
|
|
268
|
+
values = [
|
|
269
|
+
"dev",
|
|
270
|
+
"production",
|
|
271
|
+
]
|
|
272
|
+
|
|
273
|
+
[[tool.bumpversion.files]]
|
|
274
|
+
filename = "pyproject.toml"
|
|
275
|
+
search = "version = \"{current_version}\""
|
|
276
|
+
replace = "version = \"{new_version}\""
|
|
277
|
+
|
|
278
|
+
[[tool.bumpversion.files]]
|
|
279
|
+
filename = "docs/source/conf.py"
|
|
280
|
+
search = "release = \"{current_version}\""
|
|
281
|
+
replace = "release = \"{new_version}\""
|
|
282
|
+
|
|
283
|
+
[[tool.bumpversion.files]]
|
|
284
|
+
filename = "src/pysec2pri/version.py"
|
|
285
|
+
search = "VERSION = \"{current_version}\""
|
|
286
|
+
replace = "VERSION = \"{new_version}\""
|
|
287
|
+
|
|
288
|
+
[[tool.bumpversion.files]]
|
|
289
|
+
filename = "CITATION.cff"
|
|
290
|
+
search = "version: {current_version}"
|
|
291
|
+
replace = "version: {new_version}"
|