pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
- pyobo/apps/__init__.py +0 -3
- pyobo/apps/cli.py +0 -24
- pyobo/apps/gilda/__init__.py +0 -3
- pyobo/apps/gilda/__main__.py +0 -8
- pyobo/apps/gilda/app.py +0 -48
- pyobo/apps/gilda/cli.py +0 -36
- pyobo/apps/gilda/templates/base.html +0 -33
- pyobo/apps/gilda/templates/home.html +0 -11
- pyobo/apps/gilda/templates/matches.html +0 -32
- pyobo/apps/mapper/__init__.py +0 -3
- pyobo/apps/mapper/__main__.py +0 -11
- pyobo/apps/mapper/cli.py +0 -37
- pyobo/apps/mapper/mapper.py +0 -187
- pyobo/apps/mapper/templates/base.html +0 -35
- pyobo/apps/mapper/templates/mapper_home.html +0 -64
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.1.dist-info/RECORD +0 -173
- pyobo-0.11.1.dist-info/WHEEL +0 -5
- pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/normalizer.py
DELETED
|
@@ -1,232 +0,0 @@
|
|
|
1
|
-
"""Use synonyms from OBO to normalize names."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from abc import ABC, abstractmethod
|
|
5
|
-
from collections.abc import Iterable, Mapping
|
|
6
|
-
from dataclasses import dataclass
|
|
7
|
-
from functools import lru_cache
|
|
8
|
-
from typing import Optional, Union
|
|
9
|
-
|
|
10
|
-
import bioregistry
|
|
11
|
-
|
|
12
|
-
from .api import names
|
|
13
|
-
from .utils.io import multisetdict
|
|
14
|
-
|
|
15
|
-
__all__ = [
|
|
16
|
-
"ground",
|
|
17
|
-
"Normalizer",
|
|
18
|
-
"OboNormalizer",
|
|
19
|
-
"MultiNormalizer",
|
|
20
|
-
"NormalizationResult",
|
|
21
|
-
]
|
|
22
|
-
|
|
23
|
-
logger = logging.getLogger(__name__)
|
|
24
|
-
|
|
25
|
-
NormalizationSuccess = tuple[str, str, str]
|
|
26
|
-
NormalizationFailure = tuple[None, None, str]
|
|
27
|
-
NormalizationResult = Union[NormalizationSuccess, NormalizationFailure]
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class Normalizer(ABC):
|
|
31
|
-
"""A normalizer."""
|
|
32
|
-
|
|
33
|
-
id_to_name: dict[str, str]
|
|
34
|
-
id_to_synonyms: dict[str, list[str]]
|
|
35
|
-
|
|
36
|
-
#: A mapping from all synonyms to the set of identifiers that they point to.
|
|
37
|
-
#: In a perfect world, each would only be a single element.
|
|
38
|
-
synonym_to_identifiers_mapping: dict[str, set[str]]
|
|
39
|
-
#: A mapping from normalized names to the actual ones that they came from
|
|
40
|
-
norm_name_to_name: dict[str, set[str]]
|
|
41
|
-
|
|
42
|
-
def __init__(
|
|
43
|
-
self,
|
|
44
|
-
id_to_name: dict[str, str],
|
|
45
|
-
id_to_synonyms: dict[str, list[str]],
|
|
46
|
-
remove_prefix: Optional[str] = None,
|
|
47
|
-
) -> None:
|
|
48
|
-
"""Initialize the normalizer.
|
|
49
|
-
|
|
50
|
-
:param id_to_name: An identifier to name dictionary.
|
|
51
|
-
:param id_to_synonyms: An identifier to list of synonyms dictionary.
|
|
52
|
-
:param remove_prefix: A prefix to be removed from the identifiers. Useful for nomenclatures like ChEBI.
|
|
53
|
-
"""
|
|
54
|
-
self.id_to_name = id_to_name
|
|
55
|
-
self.id_to_synonyms = id_to_synonyms
|
|
56
|
-
self.synonym_to_identifiers_mapping = multisetdict(
|
|
57
|
-
self._iterate_synonyms_to_identifiers(
|
|
58
|
-
id_to_name=self.id_to_name,
|
|
59
|
-
id_to_synonyms=self.id_to_synonyms,
|
|
60
|
-
remove_prefix=remove_prefix,
|
|
61
|
-
)
|
|
62
|
-
)
|
|
63
|
-
self.norm_name_to_name = self._get_norm_name_to_names(self.synonym_to_identifiers_mapping)
|
|
64
|
-
|
|
65
|
-
@classmethod
|
|
66
|
-
def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> dict[str, set[str]]:
|
|
67
|
-
return multisetdict((cls._normalize_text(synonym), synonym) for synonym in synonyms)
|
|
68
|
-
|
|
69
|
-
@staticmethod
|
|
70
|
-
def _normalize_text(text: str) -> str:
|
|
71
|
-
text = text.strip().strip('"').strip("'").lower()
|
|
72
|
-
text = normalize_dashes(text)
|
|
73
|
-
text = text.replace("-", "") # remove all dashes
|
|
74
|
-
text = text.replace(" ", "") # remove all spaces
|
|
75
|
-
return text
|
|
76
|
-
|
|
77
|
-
@staticmethod
|
|
78
|
-
def _iterate_synonyms_to_identifiers(
|
|
79
|
-
*,
|
|
80
|
-
id_to_name: Mapping[str, str],
|
|
81
|
-
id_to_synonyms: Mapping[str, Iterable[str]],
|
|
82
|
-
remove_prefix: Optional[str] = None,
|
|
83
|
-
) -> Iterable[tuple[str, str]]:
|
|
84
|
-
if remove_prefix is not None:
|
|
85
|
-
remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'
|
|
86
|
-
|
|
87
|
-
# Add name
|
|
88
|
-
for identifier, name in id_to_name.items():
|
|
89
|
-
if remove_prefix and identifier.lower().startswith(remove_prefix):
|
|
90
|
-
identifier = identifier[len(remove_prefix) :]
|
|
91
|
-
|
|
92
|
-
yield name, identifier
|
|
93
|
-
|
|
94
|
-
# Add synonyms
|
|
95
|
-
for identifier, synonyms in id_to_synonyms.items():
|
|
96
|
-
if remove_prefix and identifier.lower().startswith(remove_prefix):
|
|
97
|
-
identifier = identifier[len(remove_prefix) :]
|
|
98
|
-
|
|
99
|
-
for synonym in synonyms:
|
|
100
|
-
# it might overwrite but this is probably always due to alternate ids
|
|
101
|
-
yield synonym, identifier
|
|
102
|
-
|
|
103
|
-
def get_names(self, query: str) -> list[str]:
|
|
104
|
-
"""Get all names to which the query text maps."""
|
|
105
|
-
norm_text = self._normalize_text(query)
|
|
106
|
-
return list(self.norm_name_to_name.get(norm_text, []))
|
|
107
|
-
|
|
108
|
-
@abstractmethod
|
|
109
|
-
def normalize(self, query: str) -> NormalizationResult:
|
|
110
|
-
"""Try and normalize a name to a identifier and canonical name."""
|
|
111
|
-
raise NotImplementedError
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
@lru_cache
|
|
115
|
-
def get_normalizer(prefix: str) -> Normalizer:
|
|
116
|
-
"""Get an OBO normalizer."""
|
|
117
|
-
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
118
|
-
if norm_prefix is None:
|
|
119
|
-
raise ValueError(f"unhandled prefix: {prefix}")
|
|
120
|
-
logger.info("getting obo normalizer for %s", norm_prefix)
|
|
121
|
-
normalizer = OboNormalizer(norm_prefix)
|
|
122
|
-
logger.debug(
|
|
123
|
-
"normalizer for %s with %s name lookups",
|
|
124
|
-
normalizer.prefix,
|
|
125
|
-
len(normalizer.norm_name_to_name),
|
|
126
|
-
)
|
|
127
|
-
return normalizer
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def ground(prefix: Union[str, Iterable[str]], query: str) -> NormalizationResult:
|
|
131
|
-
"""Normalize a string given the prefix's labels and synonyms.
|
|
132
|
-
|
|
133
|
-
:param prefix: If a string, only grounds against that namespace. If a list, will try grounding
|
|
134
|
-
against all in that order
|
|
135
|
-
:param query: The string to try grounding
|
|
136
|
-
"""
|
|
137
|
-
if isinstance(prefix, str):
|
|
138
|
-
normalizer = get_normalizer(prefix)
|
|
139
|
-
return normalizer.normalize(query)
|
|
140
|
-
else:
|
|
141
|
-
for p in prefix:
|
|
142
|
-
norm_prefix, identifier, name = ground(p, query)
|
|
143
|
-
if norm_prefix and identifier and name:
|
|
144
|
-
return norm_prefix, identifier, name
|
|
145
|
-
return None, None, query
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
class OboNormalizer(Normalizer):
|
|
149
|
-
"""A utility for normalizing by names."""
|
|
150
|
-
|
|
151
|
-
def __init__(self, prefix: str) -> None:
|
|
152
|
-
"""Initialize the normalizer by an ontology's Bioregistry prefix."""
|
|
153
|
-
self.prefix = prefix
|
|
154
|
-
self._len_prefix = len(prefix)
|
|
155
|
-
id_to_name = names.get_id_name_mapping(prefix)
|
|
156
|
-
id_to_synonyms = names.get_id_synonyms_mapping(prefix)
|
|
157
|
-
super().__init__(
|
|
158
|
-
id_to_name=dict(id_to_name),
|
|
159
|
-
id_to_synonyms=dict(id_to_synonyms),
|
|
160
|
-
remove_prefix=prefix,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
def __repr__(self) -> str:
|
|
164
|
-
return f'OboNormalizer(prefix="{self.prefix}")'
|
|
165
|
-
|
|
166
|
-
def normalize(self, query: str) -> NormalizationResult:
|
|
167
|
-
"""Try and normalize a name to a identifier and canonical name."""
|
|
168
|
-
names = self.get_names(query)
|
|
169
|
-
if not names:
|
|
170
|
-
return None, None, query
|
|
171
|
-
|
|
172
|
-
for name in names:
|
|
173
|
-
identifiers = self.synonym_to_identifiers_mapping[name]
|
|
174
|
-
for identifier in identifiers:
|
|
175
|
-
if identifier in self.id_to_name:
|
|
176
|
-
return self.prefix, identifier, self.id_to_name[identifier]
|
|
177
|
-
logger.warning(f"Could not find valid identifier for {name} from {identifiers}")
|
|
178
|
-
|
|
179
|
-
# maybe it happens that one can't be found?
|
|
180
|
-
logger.warning(f"was able to look up name {query}->{names} but not find fresh identifier")
|
|
181
|
-
return None, None, query
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
@dataclass
|
|
185
|
-
class MultiNormalizer:
|
|
186
|
-
"""Multiple normalizers together.
|
|
187
|
-
|
|
188
|
-
If you're looking for taxa of exotic plants, you might use:
|
|
189
|
-
|
|
190
|
-
>>> from pyobo.normalizer import MultiNormalizer
|
|
191
|
-
>>> normalizer = MultiNormalizer(prefixes=["ncbitaxon", "itis"])
|
|
192
|
-
>>> normalizer.normalize("Homo sapiens")
|
|
193
|
-
('ncbitaxon', '9606', 'Homo sapiens')
|
|
194
|
-
>>> normalizer.normalize("Abies bifolia") # variety not listed in NCBI
|
|
195
|
-
('itis', '507501', 'Abies bifolia')
|
|
196
|
-
>>> normalizer.normalize("vulcan") # nice try, nerds
|
|
197
|
-
(None, None, None)
|
|
198
|
-
"""
|
|
199
|
-
|
|
200
|
-
#: The normalizers for each prefix
|
|
201
|
-
normalizers: list[Normalizer]
|
|
202
|
-
|
|
203
|
-
@staticmethod
|
|
204
|
-
def from_prefixes(prefixes: list[str]) -> "MultiNormalizer":
|
|
205
|
-
"""Instantiate normalizers based on the given prefixes, in preferred order.."""
|
|
206
|
-
return MultiNormalizer([get_normalizer(prefix) for prefix in prefixes])
|
|
207
|
-
|
|
208
|
-
def normalize(self, query: str) -> NormalizationResult:
|
|
209
|
-
"""Try and normalize a canonical name using multiple normalizers."""
|
|
210
|
-
for normalizer in self.normalizers:
|
|
211
|
-
prefix, identifier, name = normalizer.normalize(query)
|
|
212
|
-
if prefix and identifier and name: # all not empty
|
|
213
|
-
return prefix, identifier, name
|
|
214
|
-
return None, None, query
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
# See: https://en.wikipedia.org/wiki/Dash
|
|
218
|
-
FIGURE_DASH = b"\xe2\x80\x92".decode("utf-8")
|
|
219
|
-
EN_DASH = b"\xe2\x80\x93".decode("utf-8")
|
|
220
|
-
EM_DASH = b"\xe2\x80\x94".decode("utf-8")
|
|
221
|
-
HORIZONAL_BAR = b"\xe2\x80\x95".decode("utf-8")
|
|
222
|
-
NORMAL_DASH = "-"
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def normalize_dashes(s: str) -> str:
|
|
226
|
-
"""Normalize dashes in a string."""
|
|
227
|
-
return (
|
|
228
|
-
s.replace(FIGURE_DASH, NORMAL_DASH)
|
|
229
|
-
.replace(EN_DASH, NORMAL_DASH)
|
|
230
|
-
.replace(EM_DASH, NORMAL_DASH)
|
|
231
|
-
.replace(HORIZONAL_BAR, NORMAL_DASH)
|
|
232
|
-
)
|
pyobo/registries/__init__.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
"""Extract registry information."""
|
|
2
|
-
|
|
3
|
-
from .metaregistry import ( # noqa: F401
|
|
4
|
-
curie_has_blacklisted_prefix,
|
|
5
|
-
curie_has_blacklisted_suffix,
|
|
6
|
-
curie_is_blacklisted,
|
|
7
|
-
get_remappings_full,
|
|
8
|
-
get_remappings_prefix,
|
|
9
|
-
get_xrefs_blacklist,
|
|
10
|
-
get_xrefs_prefix_blacklist,
|
|
11
|
-
get_xrefs_suffix_blacklist,
|
|
12
|
-
has_no_download,
|
|
13
|
-
iter_cached_obo,
|
|
14
|
-
remap_full,
|
|
15
|
-
remap_prefix,
|
|
16
|
-
)
|