pysec2pri 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. pysec2pri-0.0.2/LICENSE +21 -0
  2. pysec2pri-0.0.2/PKG-INFO +181 -0
  3. pysec2pri-0.0.2/README.md +128 -0
  4. pysec2pri-0.0.2/pyproject.toml +291 -0
  5. pysec2pri-0.0.2/src/pysec2pri/__init__.py +78 -0
  6. pysec2pri-0.0.2/src/pysec2pri/__main__.py +12 -0
  7. pysec2pri-0.0.2/src/pysec2pri/api.py +871 -0
  8. pysec2pri-0.0.2/src/pysec2pri/cli.py +719 -0
  9. pysec2pri-0.0.2/src/pysec2pri/config/__init__.py +1 -0
  10. pysec2pri-0.0.2/src/pysec2pri/config/chebi.yaml +144 -0
  11. pysec2pri-0.0.2/src/pysec2pri/config/hgnc.yaml +127 -0
  12. pysec2pri-0.0.2/src/pysec2pri/config/hmdb.yaml +128 -0
  13. pysec2pri-0.0.2/src/pysec2pri/config/ncbi.yaml +131 -0
  14. pysec2pri-0.0.2/src/pysec2pri/config/uniprot.yaml +127 -0
  15. pysec2pri-0.0.2/src/pysec2pri/config/wikidata.yaml +127 -0
  16. pysec2pri-0.0.2/src/pysec2pri/constants.py +31 -0
  17. pysec2pri-0.0.2/src/pysec2pri/diff.py +345 -0
  18. pysec2pri-0.0.2/src/pysec2pri/download.py +782 -0
  19. pysec2pri-0.0.2/src/pysec2pri/exports.py +401 -0
  20. pysec2pri-0.0.2/src/pysec2pri/logging.py +65 -0
  21. pysec2pri-0.0.2/src/pysec2pri/parsers/__init__.py +38 -0
  22. pysec2pri-0.0.2/src/pysec2pri/parsers/base.py +1304 -0
  23. pysec2pri-0.0.2/src/pysec2pri/parsers/chebi.py +655 -0
  24. pysec2pri-0.0.2/src/pysec2pri/parsers/hgnc.py +383 -0
  25. pysec2pri-0.0.2/src/pysec2pri/parsers/hmdb.py +293 -0
  26. pysec2pri-0.0.2/src/pysec2pri/parsers/ncbi.py +274 -0
  27. pysec2pri-0.0.2/src/pysec2pri/parsers/uniprot.py +210 -0
  28. pysec2pri-0.0.2/src/pysec2pri/parsers/wikidata.py +524 -0
  29. pysec2pri-0.0.2/src/pysec2pri/py.typed +1 -0
  30. pysec2pri-0.0.2/src/pysec2pri/queries/__init__.py +118 -0
  31. pysec2pri-0.0.2/src/pysec2pri/queries/chemical_redirects.rq +33 -0
  32. pysec2pri-0.0.2/src/pysec2pri/queries/chemical_redirects_test.rq +34 -0
  33. pysec2pri-0.0.2/src/pysec2pri/queries/gene_redirects.rq +22 -0
  34. pysec2pri-0.0.2/src/pysec2pri/queries/gene_redirects_test.rq +23 -0
  35. pysec2pri-0.0.2/src/pysec2pri/queries/protein_redirects.rq +30 -0
  36. pysec2pri-0.0.2/src/pysec2pri/queries/protein_redirects_test.rq +31 -0
  37. pysec2pri-0.0.2/src/pysec2pri/update_ids.py +404 -0
  38. pysec2pri-0.0.2/src/pysec2pri/version.py +39 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Javier Millán Acosta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,181 @@
1
+ Metadata-Version: 2.4
2
+ Name: pysec2pri
3
+ Version: 0.0.2
4
+ Summary: Secondary to primary identifier mapping
5
+ Keywords: snekpack,cookiecutter
6
+ Author: Javier Millán Acosta
7
+ Author-email: Javier Millán Acosta <javier.millan.acosta@gmail.com>
8
+ License-File: LICENSE
9
+ Classifier: Development Status :: 1 - Planning
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Framework :: Pytest
15
+ Classifier: Framework :: tox
16
+ Classifier: Framework :: Sphinx
17
+ Classifier: Natural Language :: English
18
+ Classifier: Programming Language :: Python
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Programming Language :: Python :: 3.14
24
+ Classifier: Programming Language :: Python :: 3 :: Only
25
+ Classifier: Typing :: Typed
26
+ Requires-Dist: more-itertools
27
+ Requires-Dist: tqdm
28
+ Requires-Dist: click
29
+ Requires-Dist: more-click
30
+ Requires-Dist: typing-extensions
31
+ Requires-Dist: pydantic>=2.0
32
+ Requires-Dist: linkml>=1.10.0
33
+ Requires-Dist: sssom>=0.4.0
34
+ Requires-Dist: curies>=0.7.0
35
+ Requires-Dist: httpx>=0.24.0
36
+ Requires-Dist: polars>=0.20.0
37
+ Requires-Dist: regex==2026.1.15
38
+ Requires-Dist: lxml
39
+ Requires-Dist: defusedxml
40
+ Requires-Dist: pandas
41
+ Requires-Dist: numpy
42
+ Requires-Dist: rdkit
43
+ Requires-Dist: polars-runtime-32
44
+ Maintainer: Javier Millán Acosta
45
+ Maintainer-email: Javier Millán Acosta <javier.millan.acosta@gmail.com>
46
+ Requires-Python: >=3.10
47
+ Project-URL: Bug Tracker, https://github.com/sec2pri/pysec2pri/issues
48
+ Project-URL: Homepage, https://github.com/sec2pri/pysec2pri
49
+ Project-URL: Repository, https://github.com/sec2pri/pysec2pri.git
50
+ Project-URL: Documentation, https://pysec2pri.readthedocs.io
51
+ Project-URL: Funding, https://github.com/sponsors/jmillanacosta
52
+ Description-Content-Type: text/markdown
53
+
54
+ <!--
55
+ <p align="center">
56
+ <img src="https://github.com/jmillanacosta/pysec2pri/raw/main/docs/source/logo.png" height="150">
57
+ </p>
58
+ -->
59
+
60
+ <h1 align="center">
61
+ pySec2Pri
62
+ </h1>
63
+
64
+ <p align="center">
65
+ <a href="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml">
66
+ <img alt="Tests" src="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml/badge.svg" /></a>
67
+ <a href="https://pypi.org/project/pysec2pri">
68
+ <img alt="PyPI" src="https://img.shields.io/pypi/v/pysec2pri" /></a>
69
+ <a href="https://pypi.org/project/pysec2pri">
70
+ <img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/pysec2pri" /></a>
71
+ <a href="https://github.com/jmillanacosta/pysec2pri/blob/main/LICENSE">
72
+ <img alt="PyPI - License" src="https://img.shields.io/pypi/l/pysec2pri" /></a>
73
+ <a href='https://pysec2pri.readthedocs.io/en/latest/?badge=latest'>
74
+ <img src='https://readthedocs.org/projects/pysec2pri/badge/?version=latest' alt='Documentation Status' /></a>
75
+ </p>
76
+
77
+ Create and use mapping files for secondary (retired/withdrawn) biological
78
+ database identifiers and symbols to primary (current) identifiers and symbols.
79
+
80
+ Outputs mappings in [SSSOM format](https://w3id.org/sssom) by default. Subjects
81
+ are secondary, objects are primary.
82
+
83
+ ## Installation
84
+
85
+ ```console
86
+ uv pip install pysec2pri
87
+ ```
88
+
89
+ Or install from source:
90
+
91
+ ```console
92
+ uv pip install git+https://github.com/jmillanacosta/pysec2pri.git
93
+ ```
94
+
95
+ ## Quick Start
96
+
97
+ ### Generating mapping sets
98
+
99
+ To obtain the secondary to primary identifier SSSOM mapping set for ChEBI:
100
+
101
+ ```bash
102
+ pysec2pri chebi
103
+ ```
104
+
105
+ This will automatically download the latest ChEBI release and generate an SSSOM
106
+ mapping file in your current directory.
107
+
108
+ To process locally and specify the output:
109
+
110
+ ```bash
111
+ pysec2pri chebi ChEBI_complete_3star.sdf --output my_mappings.sssom.tsv
112
+ ```
113
+
114
+ For more options and help on any command:
115
+
116
+ ```bash
117
+ pysec2pri --help
118
+ pysec2pri chebi --help
119
+ ```
120
+
121
+ The default output is in [SSSOM](https://mapping-commons.github.io/sssom/)
122
+ (Simple Standard for Sharing Ontology Mappings) TSV format.
123
+
124
+ ### Updating IDs and symbols
125
+
126
+ A generated mapping set can be used to update IDs and symbols in Python:
127
+
128
+ ```python
129
+ from pysec2pri import generate_chebi_synonyms, resolve_symbols
130
+ cs = generate_chebi_synonyms()
131
+ resolve_symbols(["Glucose", "ATP", "Guanine"], cs)
132
+ ```
133
+
134
+ Or from the command line, given a TSV file `gene_ex.tsv`:
135
+
136
+ ```
137
+ gene data
138
+ HGNC:131 3.5
139
+ ```
140
+
141
+ Resolve the `gene` column to primary HGNC IDs (a new `_primary` column is
142
+ added):
143
+
144
+ ```bash
145
+ pysec2pri update-ids gene_ex.tsv hgnc --at gene -o gene_ex_primary.tsv
146
+ # gene data gene_primary
147
+ # HGNC:131 3.5 HGNC:145
148
+ ```
149
+
150
+ The same pattern works for symbols with `update-symbols`, and multiple columns
151
+ can be resolved by repeating `--at`:
152
+
153
+ ```bash
154
+ pysec2pri update-ids data.tsv hgnc --at gene_id --at related_gene_id
155
+ ```
156
+
157
+ To skip regenerating the mapping set, pass a pre-built mapping file:
158
+
159
+ ```bash
160
+ pysec2pri hgnc ids # outputs hgnc_{version}_sssom.tsv
161
+ pysec2pri update-ids gene_ex.tsv hgnc --at gene --mapping hgnc_{version}_sssom.tsv
162
+ ```
163
+
164
+ ## Documentation
165
+
166
+ Full documentation: <https://pysec2pri.readthedocs.io/>
167
+
168
+ ## Supported Databases
169
+
170
+ | Datasource | license | citation |
171
+ | ---------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
172
+ | ChEBI | [CC BY 4.0](docs/licenses/chebi/LICENSE). | Hastings J, Owen G, Dekker A, et al. ChEBI in 2016: Improved services and an expanding collection of metabolites. Nucleic Acids Research. 2016 Jan;44(D1):D1214-9. DOI: [10.1093/nar/gkv1031](https://doi.org/10.1093/nar/gkv1031). PMID: 26467479; PMCID: PMC4702775. |
173
+ | HMDB | [CC0](https://hmdb.ca/about#compliance:~:text=international%20scientific%20conferences.-,Citing%20the%20HMDB,-HMDB%20is%20offered) | Wishart DS, Guo A, Oler E, Wang F, Anjum A, Peters H, Dizon R, Sayeeda Z, Tian S, Lee BL, Berjanskii M, Mah R, Yamamoto M, Jovel J, Torres-Calzada C, Hiebert-Giesbrecht M, Lui VW, Varshavi D, Varshavi D, Allen D, Arndt D, Khetarpal N, Sivakumaran A, Harford K, Sanford S, Yee K, Cao X, Budinski Z, Liigand J, Zhang L, Zheng J, Mandal R, Karu N, Dambrova M, Schiöth HB, Greiner R, Gautam V. HMDB 5.0: the Human Metabolome Database for 2022. Nucleic Acids Res. 2022 Jan 7;50(D1):D622-D631. doi: [10.1093/nar/gkab1062](https://doi.org/10.1093/nar/gkab1062). PMID: 34986597; PMCID: PMC8728138. |
174
+ | HGNC | [link](https://www.genenames.org/about/license/) | Seal RL, Braschi B, Gray K, Jones TEM, Tweedie S, Haim-Vilmovsky L, Bruford EA. Genenames.org: the HGNC resources in 2023. Nucleic Acids Res. 2023 Jan 6;51(D1):D1003-D1009. doi: [10.1093/nar/gkac888](https://doi.org/10.1093/nar/gkac888). PMID: 36243972; PMCID: PMC9825485. |
175
+ | NCBI | [link](https://www.ncbi.nlm.nih.gov/home/about/policies/) | Sayers EW, Bolton EE, Brister JR, Canese K, Chan J, Comeau DC, Connor R, Funk K, Kelly C, Kim S, Madej T, Marchler-Bauer A, Lanczycki C, Lathrop S, Lu Z, Thibaud-Nissen F, Murphy T, Phan L, Skripchenko Y, Tse T, Wang J, Williams R, Trawick BW, Pruitt KD, Sherry ST. Database resources of the national center for biotechnology information. Nucleic Acids Res. 2022 Jan 7;50(D1):D20-D26. doi: [10.1093/nar/gkab1112](https://doi.org/10.1093/nar/gkab1112). PMID: 34850941; PMCID: PMC8728269. |
176
+ | UniProt | [CC BY 4.0](https://ftp.uniprot.org/pub/docs/licenses/uniprot/current_release/knowledgebase/complete/LICENSE) | UniProt Consortium. UniProt: the universal protein knowledgebase in 2021. Nucleic Acids Res. 2021 Jan 8;49(D1):D480-D489. doi: [10.1093/nar/gkaa1100](https://doi.org/10.1093/nar/gkaa1100). PMID: 33237286; PMCID: PMC7778908. |
177
+ | Wikidata | | Vrandecic, D., Krotzsch, M. Wikidata: a free collaborative knowledgebase. Communications of the ACM. 2014. doi: [10.1145/2629489](https://doi.org/10.1145/2629489). |
178
+
179
+ ## License
180
+
181
+ MIT License. See [LICENSE](LICENSE) for details.
@@ -0,0 +1,128 @@
1
+ <!--
2
+ <p align="center">
3
+ <img src="https://github.com/jmillanacosta/pysec2pri/raw/main/docs/source/logo.png" height="150">
4
+ </p>
5
+ -->
6
+
7
+ <h1 align="center">
8
+ pySec2Pri
9
+ </h1>
10
+
11
+ <p align="center">
12
+ <a href="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml">
13
+ <img alt="Tests" src="https://github.com/jmillanacosta/pysec2pri/actions/workflows/tests.yml/badge.svg" /></a>
14
+ <a href="https://pypi.org/project/pysec2pri">
15
+ <img alt="PyPI" src="https://img.shields.io/pypi/v/pysec2pri" /></a>
16
+ <a href="https://pypi.org/project/pysec2pri">
17
+ <img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/pysec2pri" /></a>
18
+ <a href="https://github.com/jmillanacosta/pysec2pri/blob/main/LICENSE">
19
+ <img alt="PyPI - License" src="https://img.shields.io/pypi/l/pysec2pri" /></a>
20
+ <a href='https://pysec2pri.readthedocs.io/en/latest/?badge=latest'>
21
+ <img src='https://readthedocs.org/projects/pysec2pri/badge/?version=latest' alt='Documentation Status' /></a>
22
+ </p>
23
+
24
+ Create and use mapping files for secondary (retired/withdrawn) biological
25
+ database identifiers and symbols to primary (current) identifiers and symbols.
26
+
27
+ Outputs mappings in [SSSOM format](https://w3id.org/sssom) by default. Subjects
28
+ are secondary, objects are primary.
29
+
30
+ ## Installation
31
+
32
+ ```console
33
+ uv pip install pysec2pri
34
+ ```
35
+
36
+ Or install from source:
37
+
38
+ ```console
39
+ uv pip install git+https://github.com/jmillanacosta/pysec2pri.git
40
+ ```
41
+
42
+ ## Quick Start
43
+
44
+ ### Generating mapping sets
45
+
46
+ To obtain the secondary to primary identifier SSSOM mapping set for ChEBI:
47
+
48
+ ```bash
49
+ pysec2pri chebi
50
+ ```
51
+
52
+ This will automatically download the latest ChEBI release and generate an SSSOM
53
+ mapping file in your current directory.
54
+
55
+ To process locally and specify the output:
56
+
57
+ ```bash
58
+ pysec2pri chebi ChEBI_complete_3star.sdf --output my_mappings.sssom.tsv
59
+ ```
60
+
61
+ For more options and help on any command:
62
+
63
+ ```bash
64
+ pysec2pri --help
65
+ pysec2pri chebi --help
66
+ ```
67
+
68
+ The default output is in [SSSOM](https://mapping-commons.github.io/sssom/)
69
+ (Simple Standard for Sharing Ontology Mappings) TSV format.
70
+
71
+ ### Updating IDs and symbols
72
+
73
+ A generated mapping set can be used to update IDs and symbols in Python:
74
+
75
+ ```python
76
+ from pysec2pri import generate_chebi_synonyms, resolve_symbols
77
+ cs = generate_chebi_synonyms()
78
+ resolve_symbols(["Glucose", "ATP", "Guanine"], cs)
79
+ ```
80
+
81
+ Or from the command line, given a TSV file `gene_ex.tsv`:
82
+
83
+ ```
84
+ gene data
85
+ HGNC:131 3.5
86
+ ```
87
+
88
+ Resolve the `gene` column to primary HGNC IDs (a new `_primary` column is
89
+ added):
90
+
91
+ ```bash
92
+ pysec2pri update-ids gene_ex.tsv hgnc --at gene -o gene_ex_primary.tsv
93
+ # gene data gene_primary
94
+ # HGNC:131 3.5 HGNC:145
95
+ ```
96
+
97
+ The same pattern works for symbols with `update-symbols`, and multiple columns
98
+ can be resolved by repeating `--at`:
99
+
100
+ ```bash
101
+ pysec2pri update-ids data.tsv hgnc --at gene_id --at related_gene_id
102
+ ```
103
+
104
+ To skip regenerating the mapping set, pass a pre-built mapping file:
105
+
106
+ ```bash
107
+ pysec2pri hgnc ids # outputs hgnc_{version}_sssom.tsv
108
+ pysec2pri update-ids gene_ex.tsv hgnc --at gene --mapping hgnc_{version}_sssom.tsv
109
+ ```
110
+
111
+ ## Documentation
112
+
113
+ Full documentation: <https://pysec2pri.readthedocs.io/>
114
+
115
+ ## Supported Databases
116
+
117
+ | Datasource | license | citation |
118
+ | ---------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
119
+ | ChEBI | [CC BY 4.0](docs/licenses/chebi/LICENSE). | Hastings J, Owen G, Dekker A, et al. ChEBI in 2016: Improved services and an expanding collection of metabolites. Nucleic Acids Research. 2016 Jan;44(D1):D1214-9. DOI: [10.1093/nar/gkv1031](https://doi.org/10.1093/nar/gkv1031). PMID: 26467479; PMCID: PMC4702775. |
120
+ | HMDB | [CC0](https://hmdb.ca/about#compliance:~:text=international%20scientific%20conferences.-,Citing%20the%20HMDB,-HMDB%20is%20offered) | Wishart DS, Guo A, Oler E, Wang F, Anjum A, Peters H, Dizon R, Sayeeda Z, Tian S, Lee BL, Berjanskii M, Mah R, Yamamoto M, Jovel J, Torres-Calzada C, Hiebert-Giesbrecht M, Lui VW, Varshavi D, Varshavi D, Allen D, Arndt D, Khetarpal N, Sivakumaran A, Harford K, Sanford S, Yee K, Cao X, Budinski Z, Liigand J, Zhang L, Zheng J, Mandal R, Karu N, Dambrova M, Schiöth HB, Greiner R, Gautam V. HMDB 5.0: the Human Metabolome Database for 2022. Nucleic Acids Res. 2022 Jan 7;50(D1):D622-D631. doi: [10.1093/nar/gkab1062](https://doi.org/10.1093/nar/gkab1062). PMID: 34986597; PMCID: PMC8728138. |
121
+ | HGNC | [link](https://www.genenames.org/about/license/) | Seal RL, Braschi B, Gray K, Jones TEM, Tweedie S, Haim-Vilmovsky L, Bruford EA. Genenames.org: the HGNC resources in 2023. Nucleic Acids Res. 2023 Jan 6;51(D1):D1003-D1009. doi: [10.1093/nar/gkac888](https://doi.org/10.1093/nar/gkac888). PMID: 36243972; PMCID: PMC9825485. |
122
+ | NCBI | [link](https://www.ncbi.nlm.nih.gov/home/about/policies/) | Sayers EW, Bolton EE, Brister JR, Canese K, Chan J, Comeau DC, Connor R, Funk K, Kelly C, Kim S, Madej T, Marchler-Bauer A, Lanczycki C, Lathrop S, Lu Z, Thibaud-Nissen F, Murphy T, Phan L, Skripchenko Y, Tse T, Wang J, Williams R, Trawick BW, Pruitt KD, Sherry ST. Database resources of the national center for biotechnology information. Nucleic Acids Res. 2022 Jan 7;50(D1):D20-D26. doi: [10.1093/nar/gkab1112](https://doi.org/10.1093/nar/gkab1112). PMID: 34850941; PMCID: PMC8728269. |
123
+ | UniProt | [CC BY 4.0](https://ftp.uniprot.org/pub/docs/licenses/uniprot/current_release/knowledgebase/complete/LICENSE) | UniProt Consortium. UniProt: the universal protein knowledgebase in 2021. Nucleic Acids Res. 2021 Jan 8;49(D1):D480-D489. doi: [10.1093/nar/gkaa1100](https://doi.org/10.1093/nar/gkaa1100). PMID: 33237286; PMCID: PMC7778908. |
124
+ | Wikidata | | Vrandecic, D., Krotzsch, M. Wikidata: a free collaborative knowledgebase. Communications of the ACM. 2014. doi: [10.1145/2629489](https://doi.org/10.1145/2629489). |
125
+
126
+ ## License
127
+
128
+ MIT License. See [LICENSE](LICENSE) for details.
@@ -0,0 +1,291 @@
1
+ [build-system]
2
+ requires = ["uv_build>=0.9.6,<1.0"]
3
+ build-backend = "uv_build"
4
+
5
+ [project]
6
+ name = "pysec2pri"
7
+ version = "0.0.2"
8
+ description = "Secondary to primary identifier mapping"
9
+ readme = "README.md"
10
+ authors = [
11
+ { name = "Javier Millán Acosta", email = "javier.millan.acosta@gmail.com" }
12
+ ]
13
+ maintainers = [
14
+ { name = "Javier Millán Acosta", email = "javier.millan.acosta@gmail.com" }
15
+ ]
16
+
17
+ # See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#classifiers
18
+ # Search tags using the controlled vocabulary at https://pypi.org/classifiers
19
+ classifiers = [
20
+ "Development Status :: 1 - Planning",
21
+ "Environment :: Console",
22
+ "Intended Audience :: Developers",
23
+ "License :: OSI Approved :: MIT License",
24
+ "Operating System :: OS Independent",
25
+ "Framework :: Pytest",
26
+ "Framework :: tox",
27
+ "Framework :: Sphinx",
28
+ "Natural Language :: English",
29
+ "Programming Language :: Python",
30
+ "Programming Language :: Python :: 3.10",
31
+ "Programming Language :: Python :: 3.11",
32
+ "Programming Language :: Python :: 3.12",
33
+ "Programming Language :: Python :: 3.13",
34
+ "Programming Language :: Python :: 3.14",
35
+ "Programming Language :: Python :: 3 :: Only",
36
+ "Typing :: Typed",
37
+ # TODO add your topics from the Trove controlled vocabulary (see https://pypi.org/classifiers)
38
+ ]
39
+ keywords = [
40
+ "snekpack", # please keep this keyword to credit the cookiecutter-snekpack template
41
+ "cookiecutter",
42
+ # TODO add your own free-text keywords
43
+ ]
44
+
45
+ # License Information.
46
+ # See PEP-639 at https://peps.python.org/pep-0639/#add-license-files-key
47
+ license-files = [
48
+ "LICENSE",
49
+ ]
50
+
51
+ requires-python = ">=3.10"
52
+ dependencies = [
53
+ "more_itertools", # missing from itertools in stdlib
54
+ "tqdm", # progress bars
55
+ "click",
56
+ "more_click",
57
+ "typing-extensions",
58
+ "pydantic>=2.0", # data validation and models
59
+ "linkml>=1.10.0",
60
+ "sssom>=0.4.0", # SSSOM standard support
61
+ "curies>=0.7.0", # CURIE handling
62
+ "httpx>=0.24.0", # HTTP client for downloads
63
+ "polars>=0.20.0", # Fast dataframe operations for diffs
64
+ "regex==2026.1.15",
65
+ "lxml", # XML parsing for HMDB
66
+ "defusedxml", # Secure XML parsing
67
+ "pandas", # DataFrame support for some workflows
68
+ "numpy", # Required by pandas and polars
69
+ "rdkit", # Chemistry toolkit for SDF parsing
70
+ "polars-runtime-32", # Polars runtime for 32-bit systems
71
+
72
+ ]
73
+
74
+ # see https://peps.python.org/pep-0735/ and https://docs.astral.sh/uv/concepts/dependencies/#dependency-groups
75
+ [dependency-groups]
76
+ tests = [
77
+ "pytest",
78
+ "coverage[toml]",
79
+ "lxml",
80
+ "pandas",
81
+ "numpy",
82
+ "rdkit",
83
+ "polars-runtime-32",
84
+ ]
85
+ docs = [
86
+ "sphinx>=8,<9",
87
+ "sphinx-rtd-theme>=3.0","sphinx-click",
88
+ "sphinx_automodapi",
89
+ # Include if your project uses Pydantic:
90
+ # "autodoc_pydantic",
91
+ # To include LaTeX comments easily in your docs.
92
+ # If you uncomment this, don't forget to do the same in docs/conf.py
93
+ # texext
94
+ ]
95
+ lint = [
96
+ "ruff",
97
+ ]
98
+ typing = [
99
+ { include-group = "tests" },
100
+ "mypy",
101
+ "pydantic",
102
+ "types-click",
103
+ "types-PyYAML",
104
+ "types-requests",
105
+ # You will probably have to add additional type stubs here, especially if you're using tox-uv
106
+ ]
107
+ docs-lint = [
108
+ { include-group = "docs" },
109
+ "doc8",
110
+ ]
111
+ format-docs = [
112
+ { include-group = "docs" },
113
+ "docstrfmt",
114
+ ]
115
+ doctests = [
116
+ "xdoctest",
117
+ "pygments",
118
+ ]
119
+ pyroma = [
120
+ "pyroma",
121
+ "pygments",
122
+ ]
123
+
124
+ # see https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#dependencies-optional-dependencies
125
+ [project.optional-dependencies]
126
+
127
+
128
+ # See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#urls
129
+ # and also https://packaging.python.org/en/latest/specifications/well-known-project-urls/
130
+ [project.urls]
131
+ "Bug Tracker" = "https://github.com/sec2pri/pysec2pri/issues"
132
+ Homepage = "https://github.com/sec2pri/pysec2pri"
133
+ Repository = "https://github.com/sec2pri/pysec2pri.git"
134
+ Documentation = "https://pysec2pri.readthedocs.io"
135
+ Funding = "https://github.com/sponsors/jmillanacosta"
136
+
137
+ [project.scripts]
138
+ pysec2pri = "pysec2pri.cli:main"
139
+
140
+ [tool.cruft]
141
+ skip = [
142
+ "**/__init__.py",
143
+ "tests/*"
144
+ ]
145
+
146
+ # MyPy, see https://mypy.readthedocs.io/en/stable/config_file.html
147
+ [tool.mypy]
148
+ plugins = [
149
+ "pydantic.mypy",
150
+ ]
151
+
152
+ # Doc8, see https://doc8.readthedocs.io/en/stable/readme.html#ini-file-usage
153
+ [tool.doc8]
154
+
155
+ max-line-length = 120
156
+ ignore = ["D000", "D001"]
157
+
158
+ # Pytest, see https://docs.pytest.org/en/stable/reference/customize.html#pyproject-toml
159
+ [tool.pytest.ini_options]
160
+ markers = [
161
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
162
+ ]
163
+
164
+ # Coverage, see https://coverage.readthedocs.io/en/latest/config.html
165
+ [tool.coverage.run]
166
+ branch = true
167
+ source = [
168
+ "pysec2pri",
169
+ ]
170
+ omit = [
171
+ "tests/*",
172
+ "docs/*",
173
+ "src/pysec2pri/version.py",
174
+ "src/pysec2pri/__main__.py",
175
+ "src/pysec2pri/cli.py",
176
+ ]
177
+
178
+ [tool.coverage.paths]
179
+ source = [
180
+ "src/pysec2pri",
181
+ ".tox/*/lib/python*/site-packages/pysec2pri",
182
+ ]
183
+
184
+ [tool.coverage.report]
185
+ show_missing = true
186
+ exclude_lines = [
187
+ "pragma: no cover",
188
+ "raise NotImplementedError",
189
+ "if __name__ == \"__main__\":",
190
+ "if TYPE_CHECKING:",
191
+ "def __str__",
192
+ "def __repr__",
193
+ ]
194
+
195
+ [tool.ruff]
196
+ line-length = 100
197
+ extend-include = ["*.ipynb"]
198
+
199
+ [tool.ruff.lint]
200
+ # See https://docs.astral.sh/ruff/rules
201
+ extend-select = [
202
+ "F", # pyflakes
203
+ "E", # pycodestyle errors
204
+ "W", # pycodestyle warnings
205
+ "C90", # mccabe
206
+ "I", # isort
207
+ "UP", # pyupgrade
208
+ "D", # pydocstyle
209
+ "DOC", # pydoclint
210
+ "B", # bugbear
211
+ "S", # bandit
212
+ "T20", # print
213
+ "N", # pep8 naming
214
+ "ERA", # eradicate commented out code
215
+ "NPY", # numpy checks
216
+ "RUF", # ruff rules
217
+ "C4", # comprehensions
218
+ ]
219
+ ignore = [
220
+ "D105", # Missing docstring in magic method
221
+ "D418", # Function decorated with @overload shouldn't contain a docstring
222
+ "E203", # Black conflicts with the following
223
+ "S101", # Used in tests,
224
+ "RUF001", # Special characters for tests
225
+ ]
226
+
227
+ # See https://docs.astral.sh/ruff/settings/#per-file-ignores
228
+ [tool.ruff.lint.per-file-ignores]
229
+ # Ignore security issues in the version.py, which are inconsistent
230
+ "src/pysec2pri/version.py" = ["S603", "S607"]
231
+ # Ignore commented out code in Sphinx configuration file
232
+ "docs/source/conf.py" = ["ERA001"]
233
+ # Prints are okay in notebooks
234
+ "notebooks/**/*.ipynb" = ["T201"]
235
+
236
+ [tool.ruff.lint.pydocstyle]
237
+ convention = "pep257"
238
+
239
+ [tool.ruff.lint.isort]
240
+ relative-imports-order = "closest-to-furthest"
241
+ known-third-party = [
242
+ "tqdm",
243
+ ]
244
+ known-first-party = [
245
+ "pysec2pri",
246
+ "tests",
247
+ ]
248
+
249
+ [tool.ruff.format]
250
+ # see https://docs.astral.sh/ruff/settings/#format_docstring-code-format
251
+ docstring-code-format = true
252
+
253
+ [tool.bumpversion]
254
+ current_version = "0.0.2"
255
+ parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+(?P<build>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?"
256
+ serialize = [
257
+ "{major}.{minor}.{patch}-{release}+{build}",
258
+ "{major}.{minor}.{patch}+{build}",
259
+ "{major}.{minor}.{patch}-{release}",
260
+ "{major}.{minor}.{patch}",
261
+ ]
262
+ commit = true
263
+ tag = false
264
+
265
+ [tool.bumpversion.parts.release]
266
+ optional_value = "production"
267
+ first_value = "dev"
268
+ values = [
269
+ "dev",
270
+ "production",
271
+ ]
272
+
273
+ [[tool.bumpversion.files]]
274
+ filename = "pyproject.toml"
275
+ search = "version = \"{current_version}\""
276
+ replace = "version = \"{new_version}\""
277
+
278
+ [[tool.bumpversion.files]]
279
+ filename = "docs/source/conf.py"
280
+ search = "release = \"{current_version}\""
281
+ replace = "release = \"{new_version}\""
282
+
283
+ [[tool.bumpversion.files]]
284
+ filename = "src/pysec2pri/version.py"
285
+ search = "VERSION = \"{current_version}\""
286
+ replace = "VERSION = \"{new_version}\""
287
+
288
+ [[tool.bumpversion.files]]
289
+ filename = "CITATION.cff"
290
+ search = "version: {current_version}"
291
+ replace = "version: {new_version}"