pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
|
@@ -6,8 +6,6 @@ import logging
|
|
|
6
6
|
import typing
|
|
7
7
|
from collections import Counter, defaultdict
|
|
8
8
|
from collections.abc import Iterable
|
|
9
|
-
from operator import attrgetter
|
|
10
|
-
from typing import Optional
|
|
11
9
|
|
|
12
10
|
from tabulate import tabulate
|
|
13
11
|
from tqdm.auto import tqdm
|
|
@@ -17,11 +15,13 @@ from pyobo.resources.so import get_so_name
|
|
|
17
15
|
from pyobo.struct import (
|
|
18
16
|
Obo,
|
|
19
17
|
Reference,
|
|
20
|
-
Synonym,
|
|
21
18
|
SynonymTypeDef,
|
|
22
19
|
Term,
|
|
20
|
+
TypeDef,
|
|
21
|
+
default_reference,
|
|
23
22
|
from_species,
|
|
24
23
|
gene_product_member_of,
|
|
24
|
+
has_citation,
|
|
25
25
|
has_gene_product,
|
|
26
26
|
member_of,
|
|
27
27
|
orthologous,
|
|
@@ -42,10 +42,27 @@ DEFINITIONS_URL_FMT = (
|
|
|
42
42
|
"hgnc_complete_set_{version}.json"
|
|
43
43
|
)
|
|
44
44
|
|
|
45
|
-
previous_symbol_type = SynonymTypeDef
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
previous_symbol_type = SynonymTypeDef(
|
|
46
|
+
reference=default_reference(PREFIX, "previous_symbol", name="previous symbol")
|
|
47
|
+
)
|
|
48
|
+
alias_symbol_type = SynonymTypeDef(
|
|
49
|
+
reference=default_reference(PREFIX, "alias_symbol", name="alias symbol")
|
|
50
|
+
)
|
|
51
|
+
previous_name_type = SynonymTypeDef(
|
|
52
|
+
reference=default_reference(PREFIX, "previous_name", name="previous name")
|
|
53
|
+
)
|
|
54
|
+
alias_name_type = SynonymTypeDef(
|
|
55
|
+
reference=default_reference(PREFIX, "alias_name", name="alias name")
|
|
56
|
+
)
|
|
57
|
+
HAS_LOCUS_TYPE = TypeDef(
|
|
58
|
+
reference=default_reference(PREFIX, "locus_type", name="has locus type"), is_metadata_tag=True
|
|
59
|
+
)
|
|
60
|
+
HAS_LOCUS_GROUP = TypeDef(
|
|
61
|
+
reference=default_reference(PREFIX, "locus_group", name="has locus group"), is_metadata_tag=True
|
|
62
|
+
)
|
|
63
|
+
HAS_LOCATION = TypeDef(
|
|
64
|
+
reference=default_reference(PREFIX, "location", name="has location"), is_metadata_tag=True
|
|
65
|
+
)
|
|
49
66
|
|
|
50
67
|
#: First column is MIRIAM prefix, second column is HGNC key
|
|
51
68
|
gene_xrefs = [
|
|
@@ -129,6 +146,7 @@ SKIP_KEYS = {
|
|
|
129
146
|
"cd", # symbol
|
|
130
147
|
"homeodb", # TODO add to bioregistry, though this is defunct
|
|
131
148
|
"mamit-trnadb", # TODO add to bioregistry, though this is defunct
|
|
149
|
+
"mane_select", # TODO
|
|
132
150
|
}
|
|
133
151
|
|
|
134
152
|
#: A mapping from HGNC's locus_type annotations to sequence ontology identifiers
|
|
@@ -167,38 +185,8 @@ LOCUS_TYPE_TO_SO = {
|
|
|
167
185
|
None: "0000704", # gene
|
|
168
186
|
}
|
|
169
187
|
|
|
170
|
-
|
|
171
|
-
prefix
|
|
172
|
-
for prefix in {
|
|
173
|
-
"rgd",
|
|
174
|
-
"mgi",
|
|
175
|
-
"eccode",
|
|
176
|
-
"rnacentral",
|
|
177
|
-
"pubmed",
|
|
178
|
-
"uniprot",
|
|
179
|
-
"mirbase",
|
|
180
|
-
"snornabase",
|
|
181
|
-
"hgnc",
|
|
182
|
-
"hgnc.genegroup",
|
|
183
|
-
"debio",
|
|
184
|
-
"ensembl",
|
|
185
|
-
"NCBIGene",
|
|
186
|
-
"vega",
|
|
187
|
-
"ucsc",
|
|
188
|
-
"ena",
|
|
189
|
-
"ccds",
|
|
190
|
-
"omim",
|
|
191
|
-
"cosmic",
|
|
192
|
-
"merops",
|
|
193
|
-
"orphanet",
|
|
194
|
-
"pseudogene",
|
|
195
|
-
"lncipedia",
|
|
196
|
-
"refseq",
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
IDSPACES.update(
|
|
200
|
-
NCBITaxon="http://purl.obolibrary.org/obo/NCBITaxon_",
|
|
201
|
-
SO="http://purl.obolibrary.org/obo/SO_",
|
|
188
|
+
PUBLICATION_TERM = Term(
|
|
189
|
+
reference=Reference(prefix="IAO", identifier="0000013", name="journal article")
|
|
202
190
|
)
|
|
203
191
|
|
|
204
192
|
|
|
@@ -214,8 +202,11 @@ class HGNCGetter(Obo):
|
|
|
214
202
|
orthologous,
|
|
215
203
|
member_of,
|
|
216
204
|
exact_match,
|
|
205
|
+
has_citation,
|
|
206
|
+
HAS_LOCUS_GROUP,
|
|
207
|
+
HAS_LOCUS_TYPE,
|
|
208
|
+
HAS_LOCATION,
|
|
217
209
|
]
|
|
218
|
-
idspaces = IDSPACES
|
|
219
210
|
synonym_typedefs = [
|
|
220
211
|
previous_name_type,
|
|
221
212
|
previous_symbol_type,
|
|
@@ -233,12 +224,7 @@ class HGNCGetter(Obo):
|
|
|
233
224
|
return get_terms(force=force, version=self.data_version)
|
|
234
225
|
|
|
235
226
|
|
|
236
|
-
def
|
|
237
|
-
"""Get HGNC as OBO."""
|
|
238
|
-
return HGNCGetter(force=force)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
|
|
227
|
+
def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]:
|
|
242
228
|
"""Get HGNC terms."""
|
|
243
229
|
if version is None:
|
|
244
230
|
version = get_version("hgnc")
|
|
@@ -251,18 +237,15 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
251
237
|
version=version,
|
|
252
238
|
name="hgnc_complete_set.json",
|
|
253
239
|
)
|
|
254
|
-
with open(
|
|
240
|
+
with path.open() as file:
|
|
255
241
|
entries = json.load(file)["response"]["docs"]
|
|
256
242
|
|
|
257
243
|
yield Term.from_triple("NCBITaxon", "9606", "Homo sapiens")
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
},
|
|
264
|
-
key=attrgetter("identifier"),
|
|
265
|
-
)
|
|
244
|
+
_so_ids: set[str] = {s for s in LOCUS_TYPE_TO_SO.values() if s}
|
|
245
|
+
yield from [
|
|
246
|
+
Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
|
|
247
|
+
for so_id in sorted(_so_ids)
|
|
248
|
+
]
|
|
266
249
|
|
|
267
250
|
statuses = set()
|
|
268
251
|
for entry in tqdm(entries, desc=f"Mapping {PREFIX}", unit="gene", unit_scale=True):
|
|
@@ -273,7 +256,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
273
256
|
)
|
|
274
257
|
status = entry.pop("status")
|
|
275
258
|
if status == "Approved":
|
|
276
|
-
is_obsolete =
|
|
259
|
+
is_obsolete = None
|
|
277
260
|
elif status not in statuses:
|
|
278
261
|
statuses.add(status)
|
|
279
262
|
tqdm.write(f"[{PREFIX}] unhandled {status}")
|
|
@@ -297,7 +280,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
297
280
|
continue # only add concrete annotations
|
|
298
281
|
term.append_relationship(
|
|
299
282
|
gene_product_member_of,
|
|
300
|
-
Reference(prefix="
|
|
283
|
+
Reference(prefix="ec", identifier=ec_code),
|
|
301
284
|
)
|
|
302
285
|
for rna_central_ids in entry.pop("rna_central_id", []):
|
|
303
286
|
for rna_central_id in rna_central_ids.split(","):
|
|
@@ -364,7 +347,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
364
347
|
xref_identifiers = entry.pop(key, None)
|
|
365
348
|
if xref_identifiers is None:
|
|
366
349
|
continue
|
|
367
|
-
if isinstance(xref_identifiers,
|
|
350
|
+
if isinstance(xref_identifiers, str | int):
|
|
368
351
|
xref_identifiers = [str(xref_identifiers)]
|
|
369
352
|
|
|
370
353
|
if xref_prefix == "merops.entry":
|
|
@@ -389,7 +372,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
389
372
|
|
|
390
373
|
gene_group_ids = entry.pop("gene_group_id", [])
|
|
391
374
|
gene_groups = entry.pop("gene_group", [])
|
|
392
|
-
for gene_group_id, gene_group_label in zip(gene_group_ids, gene_groups):
|
|
375
|
+
for gene_group_id, gene_group_label in zip(gene_group_ids, gene_groups, strict=False):
|
|
393
376
|
term.append_relationship(
|
|
394
377
|
member_of,
|
|
395
378
|
Reference(
|
|
@@ -400,20 +383,20 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
400
383
|
)
|
|
401
384
|
|
|
402
385
|
for alias_symbol in entry.pop("alias_symbol", []):
|
|
403
|
-
term.append_synonym(
|
|
386
|
+
term.append_synonym(alias_symbol, type=alias_symbol_type)
|
|
404
387
|
for alias_name in entry.pop("alias_name", []):
|
|
405
|
-
term.append_synonym(
|
|
388
|
+
term.append_synonym(alias_name, type=alias_name_type)
|
|
406
389
|
for previous_symbol in itt.chain(
|
|
407
390
|
entry.pop("previous_symbol", []), entry.pop("prev_symbol", [])
|
|
408
391
|
):
|
|
409
|
-
term.append_synonym(
|
|
392
|
+
term.append_synonym(previous_symbol, type=previous_symbol_type)
|
|
410
393
|
for previous_name in entry.pop("prev_name", []):
|
|
411
|
-
term.append_synonym(
|
|
394
|
+
term.append_synonym(previous_name, type=previous_name_type)
|
|
412
395
|
|
|
413
|
-
for prop in ["location"]:
|
|
396
|
+
for prop, td in [("location", HAS_LOCATION)]:
|
|
414
397
|
value = entry.pop(prop, None)
|
|
415
398
|
if value:
|
|
416
|
-
term.
|
|
399
|
+
term.annotate_string(td, value)
|
|
417
400
|
|
|
418
401
|
locus_type = entry.pop("locus_type")
|
|
419
402
|
locus_group = entry.pop("locus_group")
|
|
@@ -425,8 +408,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
425
408
|
Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704"))
|
|
426
409
|
) # gene
|
|
427
410
|
unhandle_locus_types[locus_type][identifier] = term
|
|
428
|
-
term.
|
|
429
|
-
term.
|
|
411
|
+
term.annotate_string(HAS_LOCUS_TYPE, locus_type)
|
|
412
|
+
term.annotate_string(HAS_LOCUS_GROUP, locus_group)
|
|
430
413
|
|
|
431
414
|
term.set_species(identifier="9606", name="Homo sapiens")
|
|
432
415
|
|
|
@@ -453,9 +436,11 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
453
436
|
hgnc_id,
|
|
454
437
|
term.name,
|
|
455
438
|
term.is_obsolete,
|
|
456
|
-
term.
|
|
439
|
+
f"https://bioregistry.io/{term.curie}",
|
|
457
440
|
", ".join(
|
|
458
|
-
|
|
441
|
+
f"https://bioregistry.io/{p.curie}"
|
|
442
|
+
for p in term.provenance
|
|
443
|
+
if isinstance(p, Reference)
|
|
459
444
|
),
|
|
460
445
|
)
|
|
461
446
|
for hgnc_id, term in sorted(v.items())
|
|
@@ -472,7 +457,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
472
457
|
logger.warning(
|
|
473
458
|
"Unhandled locus types:\n%s", tabulate(unhandle_locus_type_counter.most_common())
|
|
474
459
|
)
|
|
475
|
-
|
|
460
|
+
if unhandled_entry_keys:
|
|
461
|
+
logger.warning("Unhandled keys:\n%s", tabulate(unhandled_entry_keys.most_common()))
|
|
476
462
|
|
|
477
463
|
|
|
478
464
|
if __name__ == "__main__":
|
|
@@ -5,16 +5,9 @@ from collections.abc import Iterable, Mapping
|
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
Synonym,
|
|
12
|
-
SynonymTypeDef,
|
|
13
|
-
Term,
|
|
14
|
-
enables,
|
|
15
|
-
from_species,
|
|
16
|
-
)
|
|
17
|
-
from ..utils.path import ensure_path
|
|
8
|
+
from ...struct import Obo, Reference, SynonymTypeDef, Term, has_citation
|
|
9
|
+
from ...struct.typedef import enables, exact_match, from_species
|
|
10
|
+
from ...utils.path import ensure_path
|
|
18
11
|
|
|
19
12
|
__all__ = [
|
|
20
13
|
"HGNCGroupGetter",
|
|
@@ -36,18 +29,13 @@ class HGNCGroupGetter(Obo):
|
|
|
36
29
|
ontology = PREFIX
|
|
37
30
|
bioversions_key = "hgnc"
|
|
38
31
|
synonym_typedefs = [symbol_type]
|
|
39
|
-
typedefs = [from_species, enables]
|
|
32
|
+
typedefs = [from_species, enables, exact_match, has_citation]
|
|
40
33
|
|
|
41
34
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
42
35
|
"""Iterate over terms in the ontology."""
|
|
43
36
|
return get_terms(force=force)
|
|
44
37
|
|
|
45
38
|
|
|
46
|
-
def get_obo(force: bool = False) -> Obo:
|
|
47
|
-
"""Get HGNC Gene Groups as OBO."""
|
|
48
|
-
return HGNCGroupGetter(force=force)
|
|
49
|
-
|
|
50
|
-
|
|
51
39
|
def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
|
|
52
40
|
"""Get the HGNC Gene Families hierarchy as a dictionary."""
|
|
53
41
|
path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
|
|
@@ -99,12 +87,14 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]:
|
|
|
99
87
|
)
|
|
100
88
|
if pubmed_ids and pd.notna(pubmed_ids):
|
|
101
89
|
for s in pubmed_ids.replace(" ", ",").split(","):
|
|
102
|
-
|
|
90
|
+
s = s.strip()
|
|
91
|
+
if s:
|
|
92
|
+
term.append_provenance(Reference(prefix="pubmed", identifier=s))
|
|
103
93
|
if desc_go and pd.notna(desc_go):
|
|
104
94
|
go_id = desc_go[len("http://purl.uniprot.org/go/") :]
|
|
105
95
|
term.append_relationship(enables, Reference(prefix="GO", identifier=go_id))
|
|
106
96
|
if symbol and pd.notna(symbol):
|
|
107
|
-
term.append_synonym(
|
|
97
|
+
term.append_synonym(symbol, type=symbol_type)
|
|
108
98
|
term.set_species(identifier="9606", name="Homo sapiens")
|
|
109
99
|
yield term
|
|
110
100
|
|
|
@@ -1,24 +1,27 @@
|
|
|
1
1
|
"""Convert ICD-10 to OBO.
|
|
2
2
|
|
|
3
|
-
Run with python -m pyobo.sources.icd10 -v
|
|
3
|
+
Run with ``python -m pyobo.sources.icd10 -v``.
|
|
4
|
+
|
|
5
|
+
.. note::
|
|
6
|
+
|
|
7
|
+
If web requests are stalling, try deleting the ``~/.cachier`` directory.
|
|
4
8
|
"""
|
|
5
9
|
|
|
6
10
|
import logging
|
|
7
11
|
from collections.abc import Iterable, Mapping
|
|
12
|
+
from pathlib import Path
|
|
8
13
|
from typing import Any
|
|
9
14
|
|
|
10
|
-
import click
|
|
11
|
-
from more_click import verbose_option
|
|
12
15
|
from tqdm.auto import tqdm
|
|
13
16
|
|
|
14
|
-
from
|
|
17
|
+
from .icd_utils import (
|
|
15
18
|
ICD10_TOP_LEVEL_URL,
|
|
16
19
|
get_child_identifiers,
|
|
17
|
-
|
|
20
|
+
get_icd_10_top,
|
|
18
21
|
visiter,
|
|
19
22
|
)
|
|
20
|
-
from
|
|
21
|
-
from
|
|
23
|
+
from ...struct import Obo, Reference, Synonym, Term, has_category
|
|
24
|
+
from ...utils.path import prefix_directory_join
|
|
22
25
|
|
|
23
26
|
__all__ = [
|
|
24
27
|
"ICD10Getter",
|
|
@@ -34,37 +37,39 @@ class ICD10Getter(Obo):
|
|
|
34
37
|
"""An ontology representation of ICD-10."""
|
|
35
38
|
|
|
36
39
|
ontology = PREFIX
|
|
37
|
-
|
|
40
|
+
static_version = VERSION
|
|
41
|
+
typedefs = [has_category]
|
|
38
42
|
|
|
39
43
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
40
44
|
"""Iterate over terms in the ontology."""
|
|
41
|
-
return iter_terms()
|
|
45
|
+
return iter_terms(self._version_or_raise)
|
|
42
46
|
|
|
43
47
|
|
|
44
|
-
def
|
|
45
|
-
|
|
46
|
-
|
|
48
|
+
def _get_chapters(version: str, path: Path):
|
|
49
|
+
res_json = get_icd_10_top(version=version, path=path)
|
|
50
|
+
chapter_urls = res_json["child"]
|
|
51
|
+
tqdm.write(f"there are {len(chapter_urls)} chapters")
|
|
52
|
+
identifiers = get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json)
|
|
53
|
+
return identifiers
|
|
47
54
|
|
|
48
55
|
|
|
49
|
-
def iter_terms() -> Iterable[Term]:
|
|
56
|
+
def iter_terms(version: str) -> Iterable[Term]:
|
|
50
57
|
"""Iterate over ICD-10 terms."""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
directory = prefix_directory_join(PREFIX, version=VERSION)
|
|
55
|
-
|
|
56
|
-
chapter_urls = res_json["child"]
|
|
57
|
-
tqdm.write(f"there are {len(chapter_urls)} chapters")
|
|
58
|
+
directory = prefix_directory_join(PREFIX, version=version)
|
|
59
|
+
identifiers = _get_chapters(version=version, path=directory.joinpath("top.json"))
|
|
58
60
|
|
|
59
61
|
visited_identifiers: set[str] = set()
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
62
|
+
with tqdm(desc=f"[{PREFIX}]") as pbar:
|
|
63
|
+
for identifier in identifiers:
|
|
64
|
+
for term in visiter(
|
|
65
|
+
identifier,
|
|
66
|
+
visited_identifiers,
|
|
67
|
+
directory,
|
|
68
|
+
endpoint=ICD10_TOP_LEVEL_URL,
|
|
69
|
+
converter=_extract_icd10,
|
|
70
|
+
):
|
|
71
|
+
pbar.update(1)
|
|
72
|
+
yield term
|
|
68
73
|
|
|
69
74
|
|
|
70
75
|
def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
|
|
@@ -81,17 +86,10 @@ def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
|
|
|
81
86
|
synonyms=synonyms,
|
|
82
87
|
parents=parents,
|
|
83
88
|
)
|
|
84
|
-
|
|
85
|
-
rv.append_property("class_kind", res_json["classKind"])
|
|
89
|
+
rv.annotate_string(has_category, res_json["classKind"])
|
|
86
90
|
|
|
87
91
|
return rv
|
|
88
92
|
|
|
89
93
|
|
|
90
|
-
@click.command()
|
|
91
|
-
@verbose_option
|
|
92
|
-
def _main():
|
|
93
|
-
get_obo().write_default(use_tqdm=True)
|
|
94
|
-
|
|
95
|
-
|
|
96
94
|
if __name__ == "__main__":
|
|
97
|
-
|
|
95
|
+
ICD10Getter.cli()
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Convert ICD11 to OBO.
|
|
2
|
+
|
|
3
|
+
Run with ``python -m pyobo.sources.icd11 -v``.
|
|
4
|
+
|
|
5
|
+
.. note::
|
|
6
|
+
|
|
7
|
+
If web requests are stalling, try deleting the ``~/.cachier`` directory.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
from collections.abc import Iterable, Mapping
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from tqdm.auto import tqdm
|
|
16
|
+
|
|
17
|
+
from .icd_utils import (
|
|
18
|
+
ICD11_TOP_LEVEL_URL,
|
|
19
|
+
ICDError,
|
|
20
|
+
get_child_identifiers,
|
|
21
|
+
get_icd,
|
|
22
|
+
get_icd_11_mms,
|
|
23
|
+
visiter,
|
|
24
|
+
)
|
|
25
|
+
from ...struct import Obo, Reference, Synonym, Term, TypeDef, default_reference
|
|
26
|
+
from ...utils.path import prefix_directory_join
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"ICD11Getter",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
PREFIX = "icd11"
|
|
35
|
+
CODE_PREFIX = "icd11.code"
|
|
36
|
+
|
|
37
|
+
CODE_PROP = TypeDef(reference=default_reference(PREFIX, "icd_mms_code"), is_metadata_tag=True)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ICD11Getter(Obo):
|
|
41
|
+
"""An ontology representation of ICD-11."""
|
|
42
|
+
|
|
43
|
+
ontology = PREFIX
|
|
44
|
+
typedefs = [CODE_PROP]
|
|
45
|
+
dynamic_version = True
|
|
46
|
+
|
|
47
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
48
|
+
"""Iterate over terms in the ontology."""
|
|
49
|
+
return iterate_icd11()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def iterate_icd11(version: str | None = None) -> Iterable[Term]:
|
|
53
|
+
"""Iterate over the terms in ICD11 and enrich them with MMS."""
|
|
54
|
+
# Get all terms from the ICD foundation API
|
|
55
|
+
version_strict, terms = _get_icd11_terms_helper(version=version)
|
|
56
|
+
|
|
57
|
+
# prepare a directory for enriching from MMS
|
|
58
|
+
mms_directory = prefix_directory_join(PREFIX, "mms", version=version_strict)
|
|
59
|
+
|
|
60
|
+
# this takes a bit more than 2 hours
|
|
61
|
+
for term in tqdm(terms, desc="Getting MMS", unit_scale=True):
|
|
62
|
+
path = mms_directory.joinpath(term.identifier).with_suffix(".json")
|
|
63
|
+
if path.exists():
|
|
64
|
+
mms_data = json.loads(path.read_text())
|
|
65
|
+
else:
|
|
66
|
+
try:
|
|
67
|
+
mms_data = get_icd_11_mms(term.identifier)
|
|
68
|
+
except ICDError:
|
|
69
|
+
# writing this isn't necessary since not all terms have MMS entries
|
|
70
|
+
# tqdm.write(str(e))
|
|
71
|
+
mms_data = {}
|
|
72
|
+
path.write_text(json.dumps(mms_data))
|
|
73
|
+
|
|
74
|
+
if code := mms_data.get("code"):
|
|
75
|
+
term.append_exact_match(Reference(prefix=CODE_PREFIX, identifier=code))
|
|
76
|
+
|
|
77
|
+
yield term
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _get_icd11_terms_helper(version: str | None = None) -> tuple[str, list[Term]]:
|
|
81
|
+
"""Iterate over the terms in ICD11.
|
|
82
|
+
|
|
83
|
+
The API doesn't seem to have a rate limit, but returns pretty slow. This means that
|
|
84
|
+
it only gets results at at about 5 calls/second. Get ready to be patient - the API
|
|
85
|
+
token expires every hour so there's a caching mechanism with :mod:`cachier` that
|
|
86
|
+
gets a new one every hour.
|
|
87
|
+
"""
|
|
88
|
+
if version is not None:
|
|
89
|
+
directory = prefix_directory_join(PREFIX, "base", version=version)
|
|
90
|
+
top_path = directory.joinpath("top.json")
|
|
91
|
+
if top_path.is_file():
|
|
92
|
+
res_json = json.loads(top_path.read_text())
|
|
93
|
+
else:
|
|
94
|
+
res_json = get_icd(ICD11_TOP_LEVEL_URL).json()
|
|
95
|
+
top_path.write_text(json.dumps(res_json, indent=2))
|
|
96
|
+
else:
|
|
97
|
+
tqdm.write("No version passed, looking up version from ICD11")
|
|
98
|
+
res_json = get_icd(ICD11_TOP_LEVEL_URL).json()
|
|
99
|
+
version = res_json["releaseId"]
|
|
100
|
+
directory = prefix_directory_join(PREFIX, "base", version=version)
|
|
101
|
+
top_path = directory.joinpath("top.json")
|
|
102
|
+
with top_path.open("w") as file:
|
|
103
|
+
json.dump(res_json, file, indent=2)
|
|
104
|
+
|
|
105
|
+
tqdm.write(f"There are {len(res_json['child'])} top level entities")
|
|
106
|
+
|
|
107
|
+
visited_identifiers: set[str] = set()
|
|
108
|
+
rv: list[Term] = []
|
|
109
|
+
for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
|
|
110
|
+
rv.extend(
|
|
111
|
+
visiter(
|
|
112
|
+
identifier,
|
|
113
|
+
visited_identifiers,
|
|
114
|
+
directory,
|
|
115
|
+
endpoint=ICD11_TOP_LEVEL_URL,
|
|
116
|
+
converter=_extract_icd11,
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return version, rv
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _extract_icd11(res_json: Mapping[str, Any]) -> Term:
|
|
124
|
+
identifier = res_json["@id"][len(ICD11_TOP_LEVEL_URL) :].lstrip("/")
|
|
125
|
+
if "definition" in res_json:
|
|
126
|
+
definition = res_json["definition"]["@value"]
|
|
127
|
+
definition = definition.strip().replace("\r\n", " ")
|
|
128
|
+
definition = definition.strip().replace("\\n", " ")
|
|
129
|
+
definition = definition.strip().replace("\n", " ")
|
|
130
|
+
else:
|
|
131
|
+
definition = None
|
|
132
|
+
name = res_json["title"]["@value"]
|
|
133
|
+
synonyms = [Synonym(synonym["label"]["@value"]) for synonym in res_json.get("synonym", [])]
|
|
134
|
+
parents = [
|
|
135
|
+
Reference(prefix=PREFIX, identifier=url[len("http://id.who.int/icd/entity/") :])
|
|
136
|
+
for url in res_json["parent"]
|
|
137
|
+
if url[len("http://id.who.int/icd/entity/") :]
|
|
138
|
+
]
|
|
139
|
+
return Term(
|
|
140
|
+
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
141
|
+
definition=definition,
|
|
142
|
+
synonyms=synonyms,
|
|
143
|
+
parents=parents,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
if __name__ == "__main__":
|
|
148
|
+
ICD11Getter.cli()
|