pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/reader_utils.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Utilities for reading OBO files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import typing as t
|
|
7
|
+
from collections import Counter
|
|
8
|
+
from collections.abc import Mapping, Sequence
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
from curies import ReferenceTuple
|
|
12
|
+
from curies import vocabulary as v
|
|
13
|
+
|
|
14
|
+
from pyobo.struct.reference import (
|
|
15
|
+
OBOLiteral,
|
|
16
|
+
_obo_parse_identifier,
|
|
17
|
+
_parse_reference_or_uri_literal,
|
|
18
|
+
)
|
|
19
|
+
from pyobo.struct.struct import Reference, SynonymTypeDef, _synonym_typedef_warn
|
|
20
|
+
from pyobo.struct.struct_utils import Annotation
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
TARGET_URI_WARNINGS: Counter[tuple[str, str]] = Counter()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _chomp_specificity(s: str) -> tuple[v.SynonymScope | None, str]:
|
|
28
|
+
s = s.strip()
|
|
29
|
+
for _specificity in t.get_args(v.SynonymScope):
|
|
30
|
+
if s.startswith(_specificity):
|
|
31
|
+
return _specificity, s[len(_specificity) :].strip()
|
|
32
|
+
return None, s
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _chomp_typedef(
|
|
36
|
+
s: str,
|
|
37
|
+
*,
|
|
38
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
39
|
+
strict: bool = False,
|
|
40
|
+
node: Reference,
|
|
41
|
+
ontology_prefix: str,
|
|
42
|
+
upgrade: bool,
|
|
43
|
+
) -> tuple[SynonymTypeDef | None, str]:
|
|
44
|
+
if not s:
|
|
45
|
+
# This might happen if a synonym is just given as a string
|
|
46
|
+
return None, ""
|
|
47
|
+
|
|
48
|
+
if s.startswith("[") or s.startswith("{"):
|
|
49
|
+
# there's no typedef reference here, just return
|
|
50
|
+
return None, s
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
synonym_typedef_id, rest = (x.strip() for x in s.split(" ", 1))
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
if "not enough values to unpack" not in str(e):
|
|
56
|
+
raise
|
|
57
|
+
|
|
58
|
+
# let's just check if this might be a CURIE all by itself.
|
|
59
|
+
# if there's a space, we are out of luck, otherwise, let's
|
|
60
|
+
# try to parse it like a curie
|
|
61
|
+
if " " in s:
|
|
62
|
+
# if there
|
|
63
|
+
return None, s
|
|
64
|
+
|
|
65
|
+
synonym_typedef_id, rest = s, ""
|
|
66
|
+
|
|
67
|
+
reference = _obo_parse_identifier(
|
|
68
|
+
synonym_typedef_id,
|
|
69
|
+
strict=strict,
|
|
70
|
+
node=node,
|
|
71
|
+
ontology_prefix=ontology_prefix,
|
|
72
|
+
upgrade=upgrade,
|
|
73
|
+
)
|
|
74
|
+
if reference is None:
|
|
75
|
+
logger.warning(
|
|
76
|
+
"[%s] unable to parse synonym type `%s` in line %s",
|
|
77
|
+
node.curie,
|
|
78
|
+
synonym_typedef_id,
|
|
79
|
+
click.style(s, fg="yellow"),
|
|
80
|
+
)
|
|
81
|
+
return None, rest
|
|
82
|
+
|
|
83
|
+
synonym_typedef = _synonym_typedef_warn(
|
|
84
|
+
ontology_prefix, predicate=reference, synonym_typedefs=synonym_typedefs
|
|
85
|
+
)
|
|
86
|
+
return synonym_typedef, rest
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
SYNONYM_REFERENCE_WARNED: Counter[tuple[str, str]] = Counter()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _chomp_references(
|
|
93
|
+
s: str, *, strict: bool = False, node: Reference, ontology_prefix: str, line: str
|
|
94
|
+
) -> tuple[Sequence[Reference | OBOLiteral], str]:
|
|
95
|
+
if not s:
|
|
96
|
+
return [], ""
|
|
97
|
+
if not s.startswith("["):
|
|
98
|
+
if s.startswith("{"):
|
|
99
|
+
# This means there are no reference, but there are some qualifiers
|
|
100
|
+
return [], s
|
|
101
|
+
else:
|
|
102
|
+
logger.debug("[%s] synonym had no references: %s", node.curie, s)
|
|
103
|
+
return [], s
|
|
104
|
+
|
|
105
|
+
if "]" not in s:
|
|
106
|
+
logger.warning(
|
|
107
|
+
"[%s] missing closing square bracket in references: %s",
|
|
108
|
+
node.curie,
|
|
109
|
+
click.style(line, fg="yellow"),
|
|
110
|
+
)
|
|
111
|
+
return [], s
|
|
112
|
+
|
|
113
|
+
first, rest = s.lstrip("[").split("]", 1)
|
|
114
|
+
references = _parse_provenance_list(
|
|
115
|
+
first,
|
|
116
|
+
node=node,
|
|
117
|
+
ontology_prefix=ontology_prefix,
|
|
118
|
+
counter=SYNONYM_REFERENCE_WARNED,
|
|
119
|
+
scope_text="synonym provenance",
|
|
120
|
+
line=line,
|
|
121
|
+
strict=strict,
|
|
122
|
+
)
|
|
123
|
+
return references, rest
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _chomp_axioms(s: str, *, strict: bool = False, node: Reference) -> list[Annotation]:
|
|
127
|
+
return []
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _parse_provenance_list(
|
|
131
|
+
curies_or_uris: str,
|
|
132
|
+
node: Reference,
|
|
133
|
+
ontology_prefix: str,
|
|
134
|
+
counter: Counter[tuple[str, str]],
|
|
135
|
+
scope_text: str,
|
|
136
|
+
line: str,
|
|
137
|
+
strict: bool,
|
|
138
|
+
) -> list[Reference | OBOLiteral]:
|
|
139
|
+
rv = []
|
|
140
|
+
for curie_or_uri_raw in curies_or_uris.strip().split(","):
|
|
141
|
+
curie_or_uri_raw = curie_or_uri_raw.strip()
|
|
142
|
+
if not curie_or_uri_raw:
|
|
143
|
+
continue
|
|
144
|
+
curie_or_uri, _, _ = curie_or_uri_raw.strip().partition(" ")
|
|
145
|
+
if reference_or_literal := _parse_reference_or_uri_literal(
|
|
146
|
+
curie_or_uri,
|
|
147
|
+
node=node,
|
|
148
|
+
ontology_prefix=ontology_prefix,
|
|
149
|
+
counter=counter,
|
|
150
|
+
context=scope_text,
|
|
151
|
+
line=line,
|
|
152
|
+
strict=strict,
|
|
153
|
+
):
|
|
154
|
+
rv.append(reference_or_literal)
|
|
155
|
+
return rv
|
pyobo/resource_utils.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Resource utilities for PyOBO."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
from collections.abc import Sequence
|
|
4
6
|
from functools import lru_cache
|
|
5
7
|
|
|
@@ -28,15 +30,15 @@ from .constants import (
|
|
|
28
30
|
)
|
|
29
31
|
|
|
30
32
|
__all__ = [
|
|
33
|
+
"ensure_alts",
|
|
34
|
+
"ensure_definitions",
|
|
31
35
|
"ensure_inspector_javert",
|
|
32
36
|
"ensure_inspector_javert_df",
|
|
33
37
|
"ensure_ooh_na_na",
|
|
34
|
-
"ensure_alts",
|
|
35
|
-
"ensure_synonyms",
|
|
36
|
-
"ensure_definitions",
|
|
37
38
|
"ensure_properties",
|
|
38
39
|
"ensure_relations",
|
|
39
40
|
"ensure_species",
|
|
41
|
+
"ensure_synonyms",
|
|
40
42
|
]
|
|
41
43
|
|
|
42
44
|
|
|
@@ -49,14 +51,16 @@ def _get_zenodo() -> Zenodo:
|
|
|
49
51
|
def _get_parts(_concept_rec_id, _record_id, version) -> Sequence[str]:
|
|
50
52
|
"""Get sequence to use in :func:`pystow.ensure`.
|
|
51
53
|
|
|
52
|
-
.. note::
|
|
54
|
+
.. note::
|
|
55
|
+
|
|
56
|
+
Corresponds to :data:`pyobo.constants.DATABASE_MODULE`.
|
|
53
57
|
"""
|
|
54
58
|
return ["pyobo", "database", version]
|
|
55
59
|
|
|
56
60
|
|
|
57
|
-
def _ensure(record_id,
|
|
61
|
+
def _ensure(record_id: str | int, name: str, force: bool = False) -> str:
|
|
58
62
|
rv = _get_zenodo().download_latest(
|
|
59
|
-
record_id=record_id,
|
|
63
|
+
record_id=record_id, name=name, parts=_get_parts, force=force
|
|
60
64
|
)
|
|
61
65
|
return rv.as_posix()
|
|
62
66
|
|
|
@@ -64,17 +68,21 @@ def _ensure(record_id, path, force: bool = False) -> str:
|
|
|
64
68
|
def ensure_ooh_na_na(force: bool = False) -> str:
|
|
65
69
|
"""Ensure that the Ooh Na Na Nomenclature Database is downloaded/built.
|
|
66
70
|
|
|
67
|
-
.. seealso::
|
|
71
|
+
.. seealso::
|
|
72
|
+
|
|
73
|
+
:data:`pyobo.constants.OOH_NA_NA_RECORD`
|
|
68
74
|
"""
|
|
69
|
-
return _ensure(record_id=OOH_NA_NA_RECORD,
|
|
75
|
+
return _ensure(record_id=OOH_NA_NA_RECORD, name=OOH_NA_NA_FILE, force=force)
|
|
70
76
|
|
|
71
77
|
|
|
72
78
|
def ensure_inspector_javert(force: bool = False) -> str:
|
|
73
79
|
"""Ensure that the Inspector Javert's Xref Database is downloaded/built.
|
|
74
80
|
|
|
75
|
-
.. seealso::
|
|
81
|
+
.. seealso::
|
|
82
|
+
|
|
83
|
+
:data:`pyobo.constants.JAVERT_RECORD`
|
|
76
84
|
"""
|
|
77
|
-
return _ensure(record_id=JAVERT_RECORD,
|
|
85
|
+
return _ensure(record_id=JAVERT_RECORD, name=JAVERT_FILE, force=force)
|
|
78
86
|
|
|
79
87
|
|
|
80
88
|
def ensure_inspector_javert_df(force: bool = False) -> pd.DataFrame:
|
|
@@ -86,49 +94,61 @@ def ensure_inspector_javert_df(force: bool = False) -> pd.DataFrame:
|
|
|
86
94
|
def ensure_synonyms(force: bool = False) -> str:
|
|
87
95
|
"""Ensure that the Synonym Database is downloaded/built.
|
|
88
96
|
|
|
89
|
-
.. seealso::
|
|
97
|
+
.. seealso::
|
|
98
|
+
|
|
99
|
+
:data:`pyobo.constants.SYNONYMS_RECORD`
|
|
90
100
|
"""
|
|
91
|
-
return _ensure(record_id=SYNONYMS_RECORD,
|
|
101
|
+
return _ensure(record_id=SYNONYMS_RECORD, name=SYNONYMS_FILE, force=force)
|
|
92
102
|
|
|
93
103
|
|
|
94
104
|
def ensure_alts(force: bool = False) -> str:
|
|
95
105
|
"""Ensure that the alt data is downloaded/built.
|
|
96
106
|
|
|
97
|
-
.. seealso::
|
|
107
|
+
.. seealso::
|
|
108
|
+
|
|
109
|
+
:data:`pyobo.constants.ALTS_DATA_RECORD`
|
|
98
110
|
"""
|
|
99
|
-
return _ensure(record_id=ALTS_DATA_RECORD,
|
|
111
|
+
return _ensure(record_id=ALTS_DATA_RECORD, name=ALTS_FILE, force=force)
|
|
100
112
|
|
|
101
113
|
|
|
102
114
|
def ensure_species(force: bool = False) -> str:
|
|
103
115
|
"""Ensure that the species data is downloaded/built.
|
|
104
116
|
|
|
105
|
-
.. seealso::
|
|
117
|
+
.. seealso::
|
|
118
|
+
|
|
119
|
+
:data:`pyobo.constants.SPECIES_RECORD`
|
|
106
120
|
"""
|
|
107
|
-
return _ensure(record_id=SPECIES_RECORD,
|
|
121
|
+
return _ensure(record_id=SPECIES_RECORD, name=SPECIES_FILE, force=force)
|
|
108
122
|
|
|
109
123
|
|
|
110
124
|
def ensure_definitions(force: bool = False) -> str:
|
|
111
125
|
"""Ensure that the definitions data is downloaded/built.
|
|
112
126
|
|
|
113
|
-
.. seealso::
|
|
127
|
+
.. seealso::
|
|
128
|
+
|
|
129
|
+
:data:`pyobo.constants.DEFINITIONS_RECORD`
|
|
114
130
|
"""
|
|
115
|
-
return _ensure(record_id=DEFINITIONS_RECORD,
|
|
131
|
+
return _ensure(record_id=DEFINITIONS_RECORD, name=DEFINITIONS_FILE, force=force)
|
|
116
132
|
|
|
117
133
|
|
|
118
134
|
def ensure_properties(force: bool = False) -> str:
|
|
119
135
|
"""Ensure that the properties data is downloaded/built.
|
|
120
136
|
|
|
121
|
-
.. seealso::
|
|
137
|
+
.. seealso::
|
|
138
|
+
|
|
139
|
+
:data:`pyobo.constants.PROPERTIES_RECORD`
|
|
122
140
|
"""
|
|
123
|
-
return _ensure(record_id=PROPERTIES_RECORD,
|
|
141
|
+
return _ensure(record_id=PROPERTIES_RECORD, name=PROPERTIES_FILE, force=force)
|
|
124
142
|
|
|
125
143
|
|
|
126
144
|
def ensure_relations(force: bool = False) -> str:
|
|
127
145
|
"""Ensure that the relations data is downloaded/built.
|
|
128
146
|
|
|
129
|
-
.. seealso::
|
|
147
|
+
.. seealso::
|
|
148
|
+
|
|
149
|
+
:data:`pyobo.constants.RELATIONS_RECORD`
|
|
130
150
|
"""
|
|
131
|
-
return _ensure(record_id=RELATIONS_RECORD,
|
|
151
|
+
return _ensure(record_id=RELATIONS_RECORD, name=RELATIONS_FILE, force=force)
|
|
132
152
|
|
|
133
153
|
|
|
134
154
|
@click.command()
|
pyobo/resources/__init__.py
CHANGED
|
File without changes
|
pyobo/resources/goc.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Get GOC to ORCID CURIE mappings.
|
|
2
|
+
|
|
3
|
+
Due to historical reasons, the Gene Ontology and related resources use an internal
|
|
4
|
+
curator identifier space ``GOC`` instead of ORCID. This namespace is partially mapped to
|
|
5
|
+
ORCID and is version controlled `here
|
|
6
|
+
<https://raw.githubusercontent.com/geneontology/go-site/refs/heads/master/metadata/users.yaml>`_.
|
|
7
|
+
|
|
8
|
+
This module loads that namespace and uses :mod:`orcid_downloader` to try and add
|
|
9
|
+
additional ORCID groundings. Then, this module is loaded in PyOBO's custom CURIE upgrade
|
|
10
|
+
system so GOC CURIEs are seamlessly replaced with ORCID CURIEs, when possible.
|
|
11
|
+
|
|
12
|
+
.. seealso::
|
|
13
|
+
|
|
14
|
+
https://github.com/geneontology/go-ontology/issues/22551
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import csv
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
__all__ = ["load_goc_map"]
|
|
21
|
+
|
|
22
|
+
URL = "https://raw.githubusercontent.com/geneontology/go-site/refs/heads/master/metadata/users.yaml"
|
|
23
|
+
|
|
24
|
+
HERE = Path(__file__).parent.resolve()
|
|
25
|
+
PATH = HERE.joinpath("goc.tsv")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def load_goc_map() -> dict[str, str]:
|
|
29
|
+
"""Get GOC to ORCID mappings."""
|
|
30
|
+
rv = {}
|
|
31
|
+
with PATH.open() as f:
|
|
32
|
+
for goc_curie, _, orcid, *_ in csv.reader(f, delimiter="\t"):
|
|
33
|
+
rv[goc_curie] = f"orcid:{orcid}"
|
|
34
|
+
rv[goc_curie.upper()] = f"orcid:{orcid}"
|
|
35
|
+
return rv
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def main() -> None:
|
|
39
|
+
"""Generate GOC to ORCID mappings."""
|
|
40
|
+
import orcid_downloader
|
|
41
|
+
import requests
|
|
42
|
+
import yaml
|
|
43
|
+
from tqdm import tqdm
|
|
44
|
+
|
|
45
|
+
columns = ["curie", "name", "orcid", "guessed"]
|
|
46
|
+
res = requests.get(URL, timeout=5)
|
|
47
|
+
records = yaml.safe_load(res.text)
|
|
48
|
+
with PATH.open("w") as file:
|
|
49
|
+
print(*columns, sep="\t", file=file)
|
|
50
|
+
for record in tqdm(records, unit="person"):
|
|
51
|
+
goc_curie = record.get("xref")
|
|
52
|
+
if goc_curie is None or not goc_curie.startswith("GOC:"):
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
guessed = False
|
|
56
|
+
nickname = record["nickname"]
|
|
57
|
+
uri = record.get("uri", "")
|
|
58
|
+
if not uri:
|
|
59
|
+
continue
|
|
60
|
+
if "orcid.org" in uri:
|
|
61
|
+
orcid = uri.removeprefix("https://orcid.org/").removeprefix("https://orcid.org/")
|
|
62
|
+
if "orcid.org" not in uri:
|
|
63
|
+
orcid = orcid_downloader.ground_researcher_unambiguous(nickname)
|
|
64
|
+
if not orcid:
|
|
65
|
+
tqdm.write(f"Could not guess ORCID for {nickname}")
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
tqdm.write(f"Check if https://orcid.org/{orcid} is correct for {nickname}")
|
|
69
|
+
guessed = True
|
|
70
|
+
|
|
71
|
+
print(goc_curie, nickname, orcid, guessed, sep="\t", file=file)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
main()
|
pyobo/resources/goc.tsv
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
curie name orcid guessed
|
|
2
|
+
GOC:jk Jim Knowles 0009-0009-5100-2472 True
|
|
3
|
+
GOC:amu Anushya Muruganujan 0000-0001-7169-5864 True
|
|
4
|
+
GOC:pt Paul Thomas 0000-0002-9074-3507 False
|
|
5
|
+
GOC:hm Huaiyu Mi 0000-0001-8721-202X False
|
|
6
|
+
GOC:sjc Seth Carbon 0000-0001-8244-1536 False
|
|
7
|
+
GOC:jnx Jeremy Nguyen Xuan 0000-0002-4301-0968 True
|
|
8
|
+
GOC:anm Anna Melidoni 0000-0002-2535-883X True
|
|
9
|
+
GOC:ml2 Mike Livstone 0000-0001-5386-5823 True
|
|
10
|
+
GOC:hd Heiko Dietze 0000-0003-0234-1688 False
|
|
11
|
+
GOC:dp Dexter Pratt 0000-0002-1471-9513 True
|
|
12
|
+
GOC:di Diane Inglis 0000-0003-3166-4638 False
|
|
13
|
+
GOC:cy Courtland Yockey 0000-0003-4917-3490 True
|
|
14
|
+
GOC:hal Hadil Alrohaif 0000-0002-6980-6972 True
|
|
15
|
+
GOC:jbu Jessica Buxton 0000-0002-0918-9335 False
|
|
16
|
+
GOC:kom Klaus Mitchell 0000-0001-9510-5320 False
|
|
17
|
+
GOC:nc Nancy Campbell 0000-0001-9995-0839 False
|
|
18
|
+
GOC:rl Ruth Lovering 0000-0002-9791-0064 False
|
|
19
|
+
GOC:vk Varsha Khodiyar 0000-0002-2743-6918 True
|
|
20
|
+
GOC:amm Anna Maria Masci 0000-0003-1940-6740 True
|
|
21
|
+
GOC:dsd David S. Dougall 0000-0002-9043-2709 True
|
|
22
|
+
GOC:pf Petra Fey 0000-0002-4532-2703 False
|
|
23
|
+
GOC:pg Pascale Gaudet 0000-0003-1813-6857 False
|
|
24
|
+
GOC:rjd Robert Dodson 0000-0002-2757-5950 False
|
|
25
|
+
GOC:jh2 Jim Hu 0000-0001-9016-2684 False
|
|
26
|
+
GOC:nv Nicole Vasilevsky 0000-0001-5208-3432 True
|
|
27
|
+
GOC:ha Helen Attrill 0000-0003-3212-6364 False
|
|
28
|
+
GOC:hb Heather Butler 0000-0003-4454-4889 True
|
|
29
|
+
GOC:mc Marta Costa 0000-0001-5948-3092 False
|
|
30
|
+
GOC:ma Michael Ashburner 0000-0002-6962-2807 False
|
|
31
|
+
GOC:dos David Osumi-Sutherland 0000-0002-7073-9172 False
|
|
32
|
+
GOC:sart Susan Tweedie 0000-0003-1818-8243 False
|
|
33
|
+
GOC:mb Matt Berriman 0000-0002-9581-0377 False
|
|
34
|
+
GOC:bf Rebecca Foulger 0000-0001-8682-8754 False
|
|
35
|
+
GOC:ceb Cath Brooksbank 0000-0001-9395-7001 False
|
|
36
|
+
GOC:jid Jennifer Deegan (nee Clark) 0000-0001-9227-417X False
|
|
37
|
+
GOC:jl Jane Lomax 0000-0001-8865-4321 False
|
|
38
|
+
GOC:mec Melanie Courtot 0000-0002-9551-6370 False
|
|
39
|
+
GOC:pr Paola Roncaglia 0000-0002-2825-0621 False
|
|
40
|
+
GOC:als Alice Dashow 0000-0003-3829-1600 False
|
|
41
|
+
GOC:es Elena Speretta 0000-0003-1506-7438 False
|
|
42
|
+
GOC:gg George Georghiou 0000-0001-5067-3199 False
|
|
43
|
+
GOC:hbye Hema Bye-A-Jee 0000-0003-2464-7688 False
|
|
44
|
+
GOC:pm Prudence Mutowo 0000-0002-4646-4172 True
|
|
45
|
+
GOC:rph Rachael Huntley 0000-0001-6718-3559 False
|
|
46
|
+
GOC:yaf Yasmin Alam-Faruque 0000-0001-8902-0232 True
|
|
47
|
+
GOC:bhm Birgit Meldal 0000-0003-4062-6158 False
|
|
48
|
+
GOC:imk Ingrid Keseler 0000-0003-1738-6117 True
|
|
49
|
+
GOC:dph David Hill 0000-0001-7476-6306 False
|
|
50
|
+
GOC:tfm Terry Meehan 0000-0003-1980-3228 True
|
|
51
|
+
GOC:ajp Tony Planchart 0000-0001-8691-8856 False
|
|
52
|
+
GOC:dms Dmitry Sitnikov 0000-0003-3394-9805 False
|
|
53
|
+
GOC:smb Sue Bello 0000-0003-4606-0597 False
|
|
54
|
+
GOC:hjd Harold Drabkin 0000-0003-2689-5511 False
|
|
55
|
+
GOC:ln Li Ni 0000-0002-9796-7693 False
|
|
56
|
+
GOC:jab Judith Blake 0000-0001-8522-334X False
|
|
57
|
+
GOC:crds Claudia Rato da Silva 0000-0002-3971-046X True
|
|
58
|
+
GOC:tw Trish Whetzel 0000-0002-3458-4839 True
|
|
59
|
+
GOC:rv Randi Vita 0000-0001-8957-7612 True
|
|
60
|
+
GOC:ml Magdalen Lindeberg 0000-0001-6386-4066 True
|
|
61
|
+
GOC:gvg George Gkoutos 0000-0002-2061-091X True
|
|
62
|
+
GOC:al Antonia Lock 0000-0003-1179-5999 False
|
|
63
|
+
GOC:jb Jurg Bahler 0000-0003-4036-1532 True
|
|
64
|
+
GOC:mah Midori Harris 0000-0003-4148-4606 False
|
|
65
|
+
GOC:vw Val Wood 0000-0001-6330-7526 False
|
|
66
|
+
GOC:lc Laurel Cooper 0000-0002-6379-8932 False
|
|
67
|
+
GOC:rw Ramona Walls 0000-0001-8815-0078 True
|
|
68
|
+
GOC:cna Cecilia Arighi 0000-0002-0803-4817 True
|
|
69
|
+
GOC:bj Bijay Jassal 0000-0002-5039-5405 True
|
|
70
|
+
GOC:mg2 Marc Gillespie 0000-0002-5766-1702 True
|
|
71
|
+
GOC:pde Peter D'Eustachio 0000-0002-5494-626X False
|
|
72
|
+
GOC:phg Phani Garapati 0000-0003-0941-2207 False
|
|
73
|
+
GOC:sj Steven Jupe 0000-0001-5807-0069 True
|
|
74
|
+
GOC:vs Veronica Shamovsky 0000-0002-2187-2241 True
|
|
75
|
+
GOC:jsg John Garavelli 0000-0002-4131-735X True
|
|
76
|
+
GOC:sjw Shur-Jen Wang 0000-0001-5256-8683 True
|
|
77
|
+
GOC:sl Stan Laulederkind 0000-0001-5356-4174 False
|
|
78
|
+
GOC:st Simon Twigger 0000-0001-5659-3632 True
|
|
79
|
+
GOC:vp Victoria Petri 0000-0002-5540-8498 True
|
|
80
|
+
GOC:cb Colin Batchelor 0000-0001-5985-7429 True
|
|
81
|
+
GOC:cjm Chris Mungall 0000-0002-6601-2165 False
|
|
82
|
+
GOC:clt Chandra Theesfeld 0000-0002-8379-6600 False
|
|
83
|
+
GOC:dgf Dianna Fisk 0000-0003-4929-9472 False
|
|
84
|
+
GOC:ew Edith Wong 0000-0001-9799-5523 False
|
|
85
|
+
GOC:elh Eurie Hong 0000-0002-1775-4998 False
|
|
86
|
+
GOC:jd Janos Demeter 0000-0002-7301-8055 False
|
|
87
|
+
GOC:jh Jodi Hirschman 0000-0001-8850-9925 False
|
|
88
|
+
GOC:krc Karen Christie 0000-0001-5501-853X False
|
|
89
|
+
GOC:mcc Maria Costanzo 0000-0001-9043-693X False
|
|
90
|
+
GOC:rb Rama Balakrishnan 0000-0003-2468-9933 False
|
|
91
|
+
GOC:rn Rob Nash 0000-0002-3726-7441 False
|
|
92
|
+
GOC:se Stacia Engel 0000-0001-5472-917X False
|
|
93
|
+
GOC:ssd Selina Dwight 0000-0002-8546-7798 False
|
|
94
|
+
GOC:dw Dani Welter 0000-0003-1058-2668 False
|
|
95
|
+
GOC:ask A. S. Karthikeyan 0000-0003-0065-0217 True
|
|
96
|
+
GOC:ct Christopher Tissier 0000-0002-0693-3202 True
|
|
97
|
+
GOC:dhl Donghui Li 0000-0003-3335-4537 False
|
|
98
|
+
GOC:ds David Swarbreck 0000-0002-5453-1013 True
|
|
99
|
+
GOC:kad Kate Dreher 0000-0003-4652-4398 False
|
|
100
|
+
GOC:tb Tanya Berardini 0000-0002-3837-8864 False
|
|
101
|
+
GOC:dh Dan Haft 0000-0001-8101-4938 True
|
|
102
|
+
GOC:lh Linda Hannick 0000-0002-8018-8466 False
|
|
103
|
+
GOC:ef Erika Feltrin 0000-0002-9899-7456 False
|
|
104
|
+
GOC:lm Lorenza Mittempergher 0000-0003-3425-3965 True
|
|
105
|
+
GOC:ar Alan Ruttenberg 0000-0002-1604-3078 True
|
|
106
|
+
GOC:hw Heather Wick 0000-0003-0961-0377 True
|
|
107
|
+
GOC:pad Paul Denny 0000-0003-4659-6893 False
|
|
108
|
+
GOC:mat Mathew Tata 0000-0002-0960-5677 True
|
|
109
|
+
GOC:aa Andrea Auchincloss 0000-0002-5297-5390 True
|
|
110
|
+
GOC:ae Anne Estreicher 0000-0001-6828-2508 True
|
|
111
|
+
GOC:ab Alan Bridge 0000-0003-2148-9135 False
|
|
112
|
+
GOC:ans Andre Stutz 0000-0002-7175-2168 True
|
|
113
|
+
GOC:ag Arnaud Gos 0000-0002-5018-1378 True
|
|
114
|
+
GOC:cr2 Catherine Rivoire 0000-0002-5979-8382 True
|
|
115
|
+
GOC:ch Chantal Hulo 0000-0001-8176-7999 True
|
|
116
|
+
GOC:dl2 Damien Lieberherr 0000-0002-9724-1710 True
|
|
117
|
+
GOC:ecu Elena Cibrian-Uhalte 0000-0002-0987-9862 False
|
|
118
|
+
GOC:fj Florence Jungo 0000-0002-7456-8390 True
|
|
119
|
+
GOC:gc Gayatri Chavali 0000-0001-8575-1847 True
|
|
120
|
+
GOC:gk Guillaume Keller 0000-0001-9497-8269 True
|
|
121
|
+
GOC:ip Ivo Pedruzzi 0000-0001-8561-7170 True
|
|
122
|
+
GOC:kd Kirill Degtyarenko 0000-0003-0058-650X True
|
|
123
|
+
GOC:klp Klemens Pichler 0000-0001-6099-8931 False
|
|
124
|
+
GOC:ka Kristian Axelsen 0000-0003-3889-2879 True
|
|
125
|
+
GOC:mf Marc Feuermann 0000-0002-4187-2863 False
|
|
126
|
+
GOC:mt Michael Tognolli 0000-0002-5278-3321 True
|
|
127
|
+
GOC:mm2 Michele Magrane 0000-0003-3544-996X True
|
|
128
|
+
GOC:pm2 Patrick Masson 0000-0001-7646-0052 False
|
|
129
|
+
GOC:pdr Paula Duek Roggli 0000-0002-0819-0473 True
|
|
130
|
+
GOC:pga Penelope Garmiri 0000-0002-2283-2575 False
|
|
131
|
+
GOC:plm Philippe Le Mercier 0000-0001-8528-090X True
|
|
132
|
+
GOC:reh Reija Hieta 0000-0001-5724-6253 True
|
|
133
|
+
GOC:so Sandra Orchard 0000-0002-8878-3972 True
|
|
134
|
+
GOC:sp Sylvain Poux 0000-0001-7299-6685 False
|
|
135
|
+
GOC:ss Shyamala Sundaram 0000-0003-4209-460X True
|
|
136
|
+
GOC:uh Ursula Hinz 0000-0002-2365-2234 True
|
|
137
|
+
GOC:wmc Wei Mun Chan 0000-0002-9971-813X True
|
|
138
|
+
GOC:nhn Nevila Hyka-Nouspikel 0000-0001-7855-209X True
|
|
139
|
+
GOC:jc Jonas Cicenas 0000-0002-9365-1843 True
|
|
140
|
+
GOC:gap Ghislaine Argoud-Puy 0000-0002-2979-8613 False
|
|
141
|
+
GOC:ppm Pablo Porras Millan 0000-0002-8429-8793 True
|
|
142
|
+
GOC:dsz Dora Szakonyi 0000-0002-9189-629X True
|
|
143
|
+
GOC:dr Daniela Raciti 0000-0002-4945-5837 False
|
|
144
|
+
GOC:kmv Kimberly Van Auken 0000-0002-1706-4196 False
|
|
145
|
+
GOC:rk Ranjana Kishore 0000-0002-1478-7671 False
|
|
146
|
+
GOC:dgh Doug Howe 0000-0001-5831-7439 False
|
|
147
|
+
GOC:dsf David Fashena 0000-0001-9656-0683 False
|
|
148
|
+
GOC:cvs Ceri Van Slyke 0000-0002-2244-7917 False
|
|
149
|
+
GOC:sr Sridhar Ramachandran 0000-0002-2246-3722 False
|
|
150
|
+
GOC:ymb Yvonne M Bradford 0000-0002-9900-7880 False
|
|
151
|
+
GOC:sat Sabrina Toro 0000-0002-4142-7153 False
|
|
152
|
+
GOC:ksf Ken Frazer 0000-0002-6889-0711 False
|
|
153
|
+
GOC:lrz Leyla Ruzicka 0000-0002-1009-339X False
|
|
154
|
+
GOC:ejs Erik Segerdell 0000-0002-9611-1279 True
|
|
155
|
+
GOC:lb Lionel Breuza 0000-0002-8075-8625 False
|
|
156
|
+
GOC:mh Melissa Haendel 0000-0001-9114-8737 False
|
|
157
|
+
GOC:mag Marion Gremse 0000-0003-0350-6392 True
|
|
158
|
+
GOC:hp Helen Parkinson 0000-0003-3035-4195 True
|
|
159
|
+
GOC:sk Scott Kalberer 0000-0003-2101-2484 True
|
|
160
|
+
GOC:md Mickael Desvaux 0000-0003-2986-6417 True
|
|
161
|
+
GOC:expert_db Dominique Bergmann 0000-0003-0873-3543 True
|
|
162
|
+
GOC:expert_ks Kevin Struhl 0000-0002-4181-7856 True
|
|
163
|
+
GOC:expert_jwt Jeremy Thorner 0000-0002-2583-500X True
|
|
164
|
+
GOC:expert_mm Michael Melkonian 0000-0002-5911-6548 True
|
|
165
|
+
GOC:expert_pt Philippa Talmud 0000-0002-5560-1933 True
|
|
166
|
+
GOC:expert_rsh R. Scott Hawley 0000-0002-6478-0494 True
|
|
167
|
+
GOC:expert_tf Tim Formosa 0000-0002-8477-2483 True
|
|
168
|
+
GOC:jal Jamie A. Lee 0000-0001-6182-2372 True
|
|
169
|
+
GOC:mmt Monica Munoz-Torres 0000-0001-8430-6039 False
|
|
170
|
+
GOC:at Anne Thessen 0000-0002-2908-3327 False
|
|
171
|
+
GOC:ga Giulia Antonazzo 0000-0003-0086-5621 False
|
|
172
|
+
GOC:ani Anne Niknejad 0000-0003-3308-6245 True
|
|
173
|
+
GOC:pvn Pim van Nierop 0000-0003-0593-3443 False
|
|
174
|
+
GOC:add Alexander Diehl 0000-0001-9990-8331 False
|
|
175
|
+
GOC:pj Pankaj Jaiswal 0000-0002-1005-8383 False
|
|
176
|
+
GOC:rz Rossana Zaru 0000-0002-3358-4423 False
|
|
177
|
+
GOC:lr Leonore Reiser 0000-0003-0073-0858 False
|
|
178
|
+
GOC:mcc2 Marcus Chibucos 0000-0001-9586-0780 False
|
|
179
|
+
GOC:das Debby Siegele 0000-0001-8935-0696 False
|
|
180
|
+
GOC:bc Barbara Kramarz 0000-0002-3898-1727 False
|
|
181
|
+
GOC:lnp Livia Perfetto 0000-0003-4392-8725 False
|
|
182
|
+
GOC:ach Achchuthan Shanmugasundram 0000-0003-2349-6929 False
|
|
183
|
+
GOC:mch Marie-Claire Harrison 0000-0002-3013-9906 False
|
|
184
|
+
GOC:mlg Michelle Gwinn Giglio 0000-0001-7628-5565 False
|
|
185
|
+
GOC:jja Josh Jaffery 0000-0002-1965-6945 True
|
|
186
|
+
GOC:tc Teresa Chu 0000-0003-4172-1966 True
|
|
187
|
+
GOC:apd Allan P Davis 0000-0002-5741-7128 False
|
|
188
|
+
GOC:sjm Steven Marygold 0000-0003-2759-266X False
|
pyobo/resources/ncbitaxon.py
CHANGED
|
@@ -5,14 +5,13 @@ import gzip
|
|
|
5
5
|
from collections.abc import Mapping
|
|
6
6
|
from functools import lru_cache
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Optional, Union
|
|
9
8
|
|
|
10
9
|
import requests
|
|
11
10
|
|
|
12
11
|
__all__ = [
|
|
13
|
-
"load_ncbitaxon",
|
|
14
12
|
"get_ncbitaxon_id",
|
|
15
13
|
"get_ncbitaxon_name",
|
|
14
|
+
"load_ncbitaxon",
|
|
16
15
|
]
|
|
17
16
|
|
|
18
17
|
HERE = Path(__file__).parent.resolve()
|
|
@@ -33,17 +32,17 @@ def load_ncbitaxon_reverse() -> Mapping[str, str]:
|
|
|
33
32
|
return {name: identifier for identifier, name in load_ncbitaxon().items()}
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
def get_ncbitaxon_name(ncbitaxon_id: str) ->
|
|
35
|
+
def get_ncbitaxon_name(ncbitaxon_id: str) -> str | None:
|
|
37
36
|
"""Get the name from the identifier."""
|
|
38
37
|
return load_ncbitaxon().get(ncbitaxon_id)
|
|
39
38
|
|
|
40
39
|
|
|
41
|
-
def get_ncbitaxon_id(name: str) ->
|
|
40
|
+
def get_ncbitaxon_id(name: str) -> str | None:
|
|
42
41
|
"""Get the identifier from the name."""
|
|
43
42
|
return load_ncbitaxon_reverse().get(name)
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def ensure(url: str, path:
|
|
45
|
+
def ensure(url: str, path: str | Path, uri_prefix: str) -> Mapping[str, str]:
|
|
47
46
|
"""Download the latest version of the resource."""
|
|
48
47
|
path = Path(path)
|
|
49
48
|
if path.is_file():
|
pyobo/resources/ncbitaxon.tsv.gz
CHANGED
|
File without changes
|
pyobo/resources/ro.py
CHANGED
|
@@ -6,6 +6,7 @@ from collections.abc import Mapping
|
|
|
6
6
|
from functools import lru_cache
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
|
+
from curies import ReferenceTuple
|
|
9
10
|
|
|
10
11
|
__all__ = [
|
|
11
12
|
"load_ro",
|
|
@@ -18,13 +19,13 @@ PREFIX = "http://purl.obolibrary.org/obo/"
|
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
@lru_cache(maxsize=1)
|
|
21
|
-
def load_ro() -> Mapping[
|
|
22
|
+
def load_ro() -> Mapping[ReferenceTuple, str]:
|
|
22
23
|
"""Load the relation ontology names."""
|
|
23
24
|
if not os.path.exists(PATH):
|
|
24
25
|
download()
|
|
25
26
|
with open(PATH) as file:
|
|
26
27
|
return {
|
|
27
|
-
(prefix, identifier): name
|
|
28
|
+
ReferenceTuple(prefix, identifier): name
|
|
28
29
|
for prefix, identifier, name in csv.reader(file, delimiter="\t")
|
|
29
30
|
}
|
|
30
31
|
|
pyobo/resources/ro.tsv
CHANGED
|
File without changes
|
pyobo/resources/so.py
CHANGED
|
File without changes
|
pyobo/resources/so.tsv
CHANGED
|
File without changes
|
pyobo/sources/README.md
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
# Sources
|
|
2
2
|
|
|
3
|
-
1. Create a new module in `pyobo.sources` named with the prefix for the resource
|
|
4
|
-
|
|
3
|
+
1. Create a new module in `pyobo.sources` named with the prefix for the resource
|
|
4
|
+
you're ontologizing
|
|
5
|
+
2. Make sure your resource has a corresponding prefix in
|
|
6
|
+
[the Bioregistry](https://github.com/biopragmatics/bioregistry)
|
|
5
7
|
3. Subclass the `pyobo.Obo` class to represent your resource
|
|
6
8
|
4. Add your resource to the list in `pyobo.sources.__init__`
|
|
7
9
|
|
|
8
10
|
## What is in scope?
|
|
9
11
|
|
|
10
|
-
1. Biomedical, semantic web, bibliographic, life sciences, and related natural
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
1. Biomedical, semantic web, bibliographic, life sciences, and related natural
|
|
13
|
+
sciences resources are welcome
|
|
14
|
+
2. The source you want to ontologize should be an identifier resource, i.e., it
|
|
15
|
+
mints its own identifiers. If you want to ontologize some database that
|
|
16
|
+
reuses some other identifier resource's identifiers, then this isn't the
|
|
17
|
+
right place.
|
|
18
|
+
3. Resources that are not possible to download automatically are not in scope
|
|
19
|
+
for PyOBO. Reproducibility and reusability are core values of this software
|