pyobo 0.12.7__py3-none-any.whl → 0.12.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +12 -1
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +2 -1
- pyobo/api/alts.py +0 -0
- pyobo/api/combine.py +0 -0
- pyobo/api/edges.py +0 -0
- pyobo/api/embedding.py +36 -6
- pyobo/api/hierarchy.py +6 -4
- pyobo/api/metadata.py +0 -0
- pyobo/api/names.py +0 -0
- pyobo/api/properties.py +12 -3
- pyobo/api/relations.py +9 -5
- pyobo/api/species.py +0 -0
- pyobo/api/typedefs.py +0 -0
- pyobo/api/utils.py +0 -0
- pyobo/api/xrefs.py +0 -0
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +0 -0
- pyobo/cli/database.py +0 -0
- pyobo/cli/database_utils.py +0 -0
- pyobo/cli/lookup.py +0 -0
- pyobo/cli/utils.py +0 -0
- pyobo/constants.py +0 -0
- pyobo/getters.py +0 -0
- pyobo/gilda_utils.py +0 -0
- pyobo/identifier_utils/__init__.py +0 -0
- pyobo/identifier_utils/api.py +0 -0
- pyobo/identifier_utils/relations/__init__.py +0 -0
- pyobo/identifier_utils/relations/api.py +0 -0
- pyobo/identifier_utils/relations/data.json +0 -0
- pyobo/identifier_utils/relations/data_owl.json +0 -0
- pyobo/identifier_utils/relations/data_rdf.json +0 -0
- pyobo/identifier_utils/relations/data_rdfs.json +0 -0
- pyobo/mocks.py +0 -0
- pyobo/ner/__init__.py +8 -0
- pyobo/ner/api.py +0 -0
- pyobo/ner/normalizer.py +2 -2
- pyobo/ner/scispacy_utils.py +241 -0
- pyobo/plugins.py +0 -0
- pyobo/py.typed +0 -0
- pyobo/resource_utils.py +0 -0
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/ncbitaxon.py +0 -0
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +0 -0
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +0 -0
- pyobo/sources/__init__.py +2 -0
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +0 -0
- pyobo/sources/bigg/__init__.py +0 -0
- pyobo/sources/bigg/bigg_compartment.py +0 -0
- pyobo/sources/bigg/bigg_metabolite.py +0 -0
- pyobo/sources/bigg/bigg_model.py +0 -0
- pyobo/sources/bigg/bigg_reaction.py +0 -0
- pyobo/sources/biogrid.py +0 -0
- pyobo/sources/ccle.py +0 -0
- pyobo/sources/cgnc.py +0 -0
- pyobo/sources/chebi.py +0 -0
- pyobo/sources/chembl/__init__.py +0 -0
- pyobo/sources/chembl/chembl_cell.py +0 -0
- pyobo/sources/chembl/chembl_compound.py +0 -0
- pyobo/sources/chembl/chembl_mechanism.py +0 -0
- pyobo/sources/chembl/chembl_target.py +1 -1
- pyobo/sources/chembl/chembl_tissue.py +0 -0
- pyobo/sources/civic_gene.py +0 -0
- pyobo/sources/clinicaltrials.py +0 -0
- pyobo/sources/complexportal.py +0 -0
- pyobo/sources/conso.py +0 -0
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +0 -0
- pyobo/sources/cvx.py +0 -0
- pyobo/sources/depmap.py +0 -0
- pyobo/sources/dictybase_gene.py +0 -0
- pyobo/sources/drugbank/__init__.py +0 -0
- pyobo/sources/drugbank/drugbank.py +0 -0
- pyobo/sources/drugbank/drugbank_salt.py +0 -0
- pyobo/sources/drugcentral.py +0 -0
- pyobo/sources/expasy.py +4 -1
- pyobo/sources/famplex.py +0 -0
- pyobo/sources/flybase.py +0 -0
- pyobo/sources/gard.py +0 -0
- pyobo/sources/geonames/__init__.py +0 -0
- pyobo/sources/geonames/features.py +0 -0
- pyobo/sources/geonames/geonames.py +0 -0
- pyobo/sources/geonames/utils.py +0 -0
- pyobo/sources/gmt_utils.py +0 -0
- pyobo/sources/go.py +6 -3
- pyobo/sources/gtdb.py +1 -0
- pyobo/sources/gwascentral/__init__.py +0 -0
- pyobo/sources/gwascentral/gwascentral_phenotype.py +0 -0
- pyobo/sources/gwascentral/gwascentral_study.py +0 -0
- pyobo/sources/hgnc/__init__.py +0 -0
- pyobo/sources/hgnc/hgnc.py +0 -0
- pyobo/sources/hgnc/hgncgenefamily.py +0 -0
- pyobo/sources/iana_media_type.py +3 -1
- pyobo/sources/icd/__init__.py +0 -0
- pyobo/sources/icd/icd10.py +0 -0
- pyobo/sources/icd/icd11.py +0 -0
- pyobo/sources/icd/icd_utils.py +0 -0
- pyobo/sources/iconclass.py +55 -0
- pyobo/sources/intact.py +0 -0
- pyobo/sources/interpro.py +0 -0
- pyobo/sources/itis.py +0 -0
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +0 -0
- pyobo/sources/kegg/genes.py +0 -0
- pyobo/sources/kegg/genome.py +0 -0
- pyobo/sources/kegg/pathway.py +0 -0
- pyobo/sources/mesh.py +0 -0
- pyobo/sources/mgi.py +0 -0
- pyobo/sources/mirbase/__init__.py +0 -0
- pyobo/sources/mirbase/mirbase.py +0 -0
- pyobo/sources/mirbase/mirbase_constants.py +0 -0
- pyobo/sources/mirbase/mirbase_family.py +0 -0
- pyobo/sources/mirbase/mirbase_mature.py +0 -0
- pyobo/sources/msigdb.py +0 -0
- pyobo/sources/ncbi/__init__.py +0 -0
- pyobo/sources/ncbi/ncbi_gc.py +0 -0
- pyobo/sources/ncbi/ncbigene.py +0 -0
- pyobo/sources/nih_reporter.py +0 -0
- pyobo/sources/nlm/__init__.py +0 -0
- pyobo/sources/nlm/nlm_catalog.py +0 -0
- pyobo/sources/nlm/nlm_publisher.py +0 -0
- pyobo/sources/nlm/utils.py +0 -0
- pyobo/sources/npass.py +0 -0
- pyobo/sources/omim_ps.py +0 -0
- pyobo/sources/pathbank.py +0 -0
- pyobo/sources/pfam/__init__.py +0 -0
- pyobo/sources/pfam/pfam.py +0 -0
- pyobo/sources/pfam/pfam_clan.py +0 -0
- pyobo/sources/pharmgkb/__init__.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +0 -0
- pyobo/sources/pharmgkb/utils.py +0 -0
- pyobo/sources/pid.py +0 -0
- pyobo/sources/pombase.py +0 -0
- pyobo/sources/pubchem.py +0 -0
- pyobo/sources/reactome.py +0 -0
- pyobo/sources/rgd.py +0 -0
- pyobo/sources/rhea.py +0 -0
- pyobo/sources/ror.py +0 -0
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +0 -0
- pyobo/sources/selventa/scomp.py +0 -0
- pyobo/sources/selventa/sdis.py +0 -0
- pyobo/sources/selventa/sfam.py +0 -0
- pyobo/sources/sgd.py +0 -0
- pyobo/sources/signor/__init__.py +0 -0
- pyobo/sources/signor/download.py +0 -0
- pyobo/sources/signor/signor_complexes.py +0 -0
- pyobo/sources/slm.py +0 -0
- pyobo/sources/spdx.py +0 -0
- pyobo/sources/umls/__init__.py +0 -0
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +0 -0
- pyobo/sources/umls/sty.py +0 -0
- pyobo/sources/umls/synonym_types.tsv +0 -0
- pyobo/sources/umls/umls.py +0 -0
- pyobo/sources/unimod.py +0 -0
- pyobo/sources/uniprot/__init__.py +0 -0
- pyobo/sources/uniprot/uniprot.py +0 -0
- pyobo/sources/uniprot/uniprot_ptm.py +0 -0
- pyobo/sources/utils.py +0 -0
- pyobo/sources/wikipathways.py +0 -0
- pyobo/sources/zfin.py +0 -0
- pyobo/ssg/__init__.py +0 -0
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +0 -0
- pyobo/ssg/term.html +0 -0
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +0 -0
- pyobo/struct/functional/__init__.py +0 -0
- pyobo/struct/functional/dsl.py +0 -0
- pyobo/struct/functional/macros.py +12 -12
- pyobo/struct/functional/obo_to_functional.py +0 -0
- pyobo/struct/functional/ontology.py +0 -0
- pyobo/struct/functional/utils.py +0 -0
- pyobo/struct/obo/__init__.py +0 -0
- pyobo/struct/obo/reader.py +0 -0
- pyobo/struct/obo/reader_utils.py +0 -0
- pyobo/struct/obograph/__init__.py +0 -0
- pyobo/struct/obograph/export.py +0 -0
- pyobo/struct/obograph/reader.py +0 -0
- pyobo/struct/obograph/utils.py +0 -0
- pyobo/struct/reference.py +3 -1
- pyobo/struct/struct.py +22 -14
- pyobo/struct/struct_utils.py +0 -0
- pyobo/struct/typedef.py +0 -0
- pyobo/struct/utils.py +0 -0
- pyobo/struct/vocabulary.py +0 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +0 -0
- pyobo/utils/io.py +0 -0
- pyobo/utils/iter.py +0 -0
- pyobo/utils/misc.py +0 -0
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +0 -0
- pyobo/version.py +1 -1
- {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/METADATA +5 -1
- pyobo-0.12.9.dist-info/RECORD +210 -0
- {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/WHEEL +1 -1
- {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/licenses/LICENSE +0 -0
- pyobo-0.12.7.dist-info/RECORD +0 -208
- {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/entry_points.txt +0 -0
pyobo/.DS_Store
CHANGED
|
File without changes
|
pyobo/__init__.py
CHANGED
|
@@ -50,6 +50,7 @@ from .api import (
|
|
|
50
50
|
get_synonyms,
|
|
51
51
|
get_text_embedding,
|
|
52
52
|
get_text_embedding_similarity,
|
|
53
|
+
get_text_embeddings_df,
|
|
53
54
|
get_typedef_df,
|
|
54
55
|
get_xref,
|
|
55
56
|
get_xrefs,
|
|
@@ -58,7 +59,13 @@ from .api import (
|
|
|
58
59
|
is_descendent,
|
|
59
60
|
)
|
|
60
61
|
from .getters import get_ontology
|
|
61
|
-
from .ner import
|
|
62
|
+
from .ner import (
|
|
63
|
+
get_grounder,
|
|
64
|
+
get_scispacy_entities,
|
|
65
|
+
get_scispacy_entity_linker,
|
|
66
|
+
get_scispacy_knowledgebase,
|
|
67
|
+
ground,
|
|
68
|
+
)
|
|
62
69
|
from .plugins import (
|
|
63
70
|
has_nomenclature_plugin,
|
|
64
71
|
iter_nomenclature_plugins,
|
|
@@ -139,12 +146,16 @@ __all__ = [
|
|
|
139
146
|
"get_relation",
|
|
140
147
|
"get_relation_mapping",
|
|
141
148
|
"get_relations_df",
|
|
149
|
+
"get_scispacy_entities",
|
|
150
|
+
"get_scispacy_entity_linker",
|
|
151
|
+
"get_scispacy_knowledgebase",
|
|
142
152
|
"get_species",
|
|
143
153
|
"get_sssom_df",
|
|
144
154
|
"get_subhierarchy",
|
|
145
155
|
"get_synonyms",
|
|
146
156
|
"get_text_embedding",
|
|
147
157
|
"get_text_embedding_similarity",
|
|
158
|
+
"get_text_embeddings_df",
|
|
148
159
|
"get_typedef_df",
|
|
149
160
|
"get_version",
|
|
150
161
|
"get_xref",
|
pyobo/__main__.py
CHANGED
|
File without changes
|
pyobo/api/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from .alts import (
|
|
|
8
8
|
)
|
|
9
9
|
from .combine import get_literal_mappings_subset
|
|
10
10
|
from .edges import get_edges, get_edges_df, get_graph
|
|
11
|
-
from .embedding import get_text_embedding, get_text_embedding_similarity
|
|
11
|
+
from .embedding import get_text_embedding, get_text_embedding_similarity, get_text_embeddings_df
|
|
12
12
|
from .hierarchy import (
|
|
13
13
|
get_ancestors,
|
|
14
14
|
get_children,
|
|
@@ -119,6 +119,7 @@ __all__ = [
|
|
|
119
119
|
"get_synonyms",
|
|
120
120
|
"get_text_embedding",
|
|
121
121
|
"get_text_embedding_similarity",
|
|
122
|
+
"get_text_embeddings_df",
|
|
122
123
|
"get_typedef_df",
|
|
123
124
|
"get_version",
|
|
124
125
|
"get_xref",
|
pyobo/api/alts.py
CHANGED
|
File without changes
|
pyobo/api/combine.py
CHANGED
|
File without changes
|
pyobo/api/edges.py
CHANGED
|
File without changes
|
pyobo/api/embedding.py
CHANGED
|
@@ -6,8 +6,9 @@ from typing import TYPE_CHECKING
|
|
|
6
6
|
|
|
7
7
|
import curies
|
|
8
8
|
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
9
10
|
|
|
10
|
-
from pyobo.api.names import get_definition, get_name
|
|
11
|
+
from pyobo.api.names import get_definition, get_name, get_references
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
14
|
import sentence_transformers
|
|
@@ -16,6 +17,7 @@ __all__ = [
|
|
|
16
17
|
"get_text_embedding",
|
|
17
18
|
"get_text_embedding_model",
|
|
18
19
|
"get_text_embedding_similarity",
|
|
20
|
+
"get_text_embeddings_df",
|
|
19
21
|
]
|
|
20
22
|
|
|
21
23
|
|
|
@@ -39,6 +41,30 @@ def _get_text(
|
|
|
39
41
|
return name
|
|
40
42
|
|
|
41
43
|
|
|
44
|
+
def get_text_embeddings_df(
|
|
45
|
+
prefix: str,
|
|
46
|
+
*,
|
|
47
|
+
model: sentence_transformers.SentenceTransformer | None = None,
|
|
48
|
+
) -> pd.DataFrame:
|
|
49
|
+
"""Get embeddings for all entities in the resource.
|
|
50
|
+
|
|
51
|
+
:param prefix: A reference, either as a string or Reference object
|
|
52
|
+
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
53
|
+
given.
|
|
54
|
+
"""
|
|
55
|
+
luids, texts = [], []
|
|
56
|
+
for reference in get_references(prefix):
|
|
57
|
+
text = _get_text(reference)
|
|
58
|
+
if text is None:
|
|
59
|
+
continue
|
|
60
|
+
luids.append(reference.identifier)
|
|
61
|
+
texts.append(text)
|
|
62
|
+
if model is None:
|
|
63
|
+
model = get_text_embedding_model()
|
|
64
|
+
res = model.encode(texts)
|
|
65
|
+
return pd.DataFrame(res, index=luids)
|
|
66
|
+
|
|
67
|
+
|
|
42
68
|
def get_text_embedding(
|
|
43
69
|
reference: str | curies.Reference | curies.ReferenceTuple,
|
|
44
70
|
*,
|
|
@@ -47,8 +73,10 @@ def get_text_embedding(
|
|
|
47
73
|
"""Get a text embedding for an entity, or return none if no text is available.
|
|
48
74
|
|
|
49
75
|
:param reference: A reference, either as a string or Reference object
|
|
50
|
-
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
51
|
-
|
|
76
|
+
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
77
|
+
given.
|
|
78
|
+
|
|
79
|
+
:returns: A 1D numpy float array of embeddings from :class:`sentence_transformers`
|
|
52
80
|
|
|
53
81
|
.. code-block:: python
|
|
54
82
|
|
|
@@ -87,9 +115,11 @@ def get_text_embedding_similarity(
|
|
|
87
115
|
|
|
88
116
|
:param reference_1: A reference, given as a string or Reference object
|
|
89
117
|
:param reference_2: A second reference
|
|
90
|
-
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
91
|
-
|
|
92
|
-
|
|
118
|
+
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
119
|
+
given.
|
|
120
|
+
|
|
121
|
+
:returns: A floating point similarity, if text is available for both references,
|
|
122
|
+
otherwise none
|
|
93
123
|
|
|
94
124
|
.. code-block:: python
|
|
95
125
|
|
pyobo/api/hierarchy.py
CHANGED
|
@@ -163,7 +163,8 @@ def is_descendent(
|
|
|
163
163
|
:param ancestor_prefix: The prefix for the ancestor
|
|
164
164
|
:param ancestor_identifier: The local unique identifier for the ancestor
|
|
165
165
|
:param kwargs: Keyword arguments for :func:`get_hierarchy`
|
|
166
|
-
|
|
166
|
+
|
|
167
|
+
:returns: If the decendant has the given ancestor
|
|
167
168
|
|
|
168
169
|
Check that ``GO:0070246`` (natural killer cell apoptotic process) is a descendant of
|
|
169
170
|
``GO:0006915`` (apoptotic process)
|
|
@@ -254,10 +255,11 @@ def has_ancestor(
|
|
|
254
255
|
:param ancestor_prefix: The prefix for the ancestor
|
|
255
256
|
:param ancestor_identifier: The local unique identifier for the ancestor
|
|
256
257
|
:param kwargs: Keyword arguments for :func:`get_hierarchy`
|
|
257
|
-
:return: If the decendant has the given ancestor
|
|
258
258
|
|
|
259
|
-
|
|
260
|
-
|
|
259
|
+
:returns: If the decendant has the given ancestor
|
|
260
|
+
|
|
261
|
+
Check that ``GO:0008219`` (cell death) is an ancestor of ``GO:0006915`` (apoptotic
|
|
262
|
+
process):
|
|
261
263
|
|
|
262
264
|
>>> apoptosis = Reference.from_curie("GO:0006915", name="apoptotic process")
|
|
263
265
|
>>> cell_death = Reference.from_curie("GO:0008219", name="cell death")
|
pyobo/api/metadata.py
CHANGED
|
File without changes
|
pyobo/api/names.py
CHANGED
|
File without changes
|
pyobo/api/properties.py
CHANGED
|
@@ -111,6 +111,7 @@ def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.Da
|
|
|
111
111
|
"""Extract properties.
|
|
112
112
|
|
|
113
113
|
:param prefix: the resource to load
|
|
114
|
+
|
|
114
115
|
:returns: A dataframe with the properties
|
|
115
116
|
"""
|
|
116
117
|
df1 = get_literal_properties_df(prefix, **kwargs)
|
|
@@ -131,6 +132,7 @@ def get_filtered_properties_mapping(
|
|
|
131
132
|
|
|
132
133
|
:param prefix: the resource to load
|
|
133
134
|
:param prop: the property to extract
|
|
135
|
+
|
|
134
136
|
:returns: A mapping from identifier to property value
|
|
135
137
|
"""
|
|
136
138
|
df = get_filtered_properties_df(prefix, prop, **kwargs)
|
|
@@ -145,6 +147,7 @@ def get_filtered_properties_multimapping(
|
|
|
145
147
|
|
|
146
148
|
:param prefix: the resource to load
|
|
147
149
|
:param prop: the property to extract
|
|
150
|
+
|
|
148
151
|
:returns: A mapping from identifier to property values
|
|
149
152
|
"""
|
|
150
153
|
df = get_filtered_properties_df(prefix, prop, **kwargs)
|
|
@@ -159,7 +162,9 @@ def get_property(
|
|
|
159
162
|
:param prefix: the resource to load
|
|
160
163
|
:param identifier: the identifier withing the resource
|
|
161
164
|
:param prop: the property to extract
|
|
162
|
-
|
|
165
|
+
|
|
166
|
+
:returns: The single value for the property. If multiple are expected, use
|
|
167
|
+
:func:`get_properties`
|
|
163
168
|
|
|
164
169
|
>>> import pyobo
|
|
165
170
|
>>> pyobo.get_property("chebi", "132964", "http://purl.obolibrary.org/obo/chebi/smiles")
|
|
@@ -182,7 +187,9 @@ def get_properties(
|
|
|
182
187
|
:param prefix: the resource to load
|
|
183
188
|
:param identifier: the identifier withing the resource
|
|
184
189
|
:param prop: the property to extract
|
|
185
|
-
|
|
190
|
+
|
|
191
|
+
:returns: Multiple values for the property. If only one is expected, use
|
|
192
|
+
:func:`get_property`
|
|
186
193
|
"""
|
|
187
194
|
filtered_properties_multimapping = get_filtered_properties_multimapping(
|
|
188
195
|
prefix=prefix, prop=prop, **kwargs
|
|
@@ -198,7 +205,9 @@ def get_filtered_properties_df(
|
|
|
198
205
|
|
|
199
206
|
:param prefix: the resource to load
|
|
200
207
|
:param prop: the property to extract
|
|
201
|
-
|
|
208
|
+
|
|
209
|
+
:returns: A dataframe from identifier to property value. Columns are [<prefix>_id,
|
|
210
|
+
value].
|
|
202
211
|
"""
|
|
203
212
|
prop = _ensure_ref(prop, ontology_prefix=prefix)
|
|
204
213
|
df = get_properties_df(prefix, **kwargs)
|
pyobo/api/relations.py
CHANGED
|
@@ -135,9 +135,11 @@ def get_relation_mapping(
|
|
|
135
135
|
) -> Mapping[str, str]:
|
|
136
136
|
"""Get relations from identifiers in the source prefix to target prefix with the given relation.
|
|
137
137
|
|
|
138
|
-
.. warning::
|
|
138
|
+
.. warning::
|
|
139
139
|
|
|
140
|
-
|
|
140
|
+
Assumes there's only one version of the property for each term.
|
|
141
|
+
|
|
142
|
+
Example usage: get homology between HGNC and MGI:
|
|
141
143
|
|
|
142
144
|
>>> import pyobo
|
|
143
145
|
>>> human_mapt_hgnc_id = "6893"
|
|
@@ -161,16 +163,18 @@ def get_relation(
|
|
|
161
163
|
) -> str | None:
|
|
162
164
|
"""Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
|
|
163
165
|
|
|
164
|
-
.. warning::
|
|
166
|
+
.. warning::
|
|
167
|
+
|
|
168
|
+
Assumes there's only one version of the property for each term.
|
|
165
169
|
|
|
166
|
-
|
|
170
|
+
Example usage: get homology between MAPT in HGNC and MGI:
|
|
167
171
|
|
|
168
172
|
>>> import pyobo
|
|
169
173
|
>>> human_mapt_hgnc_id = "6893"
|
|
170
174
|
>>> mouse_mapt_mgi_id = "97180"
|
|
171
175
|
>>> assert mouse_mapt_mgi_id == pyobo.get_relation(
|
|
172
176
|
... "hgnc", human_mapt_hgnc_id, "ro:HOM0000017", "mgi"
|
|
173
|
-
|
|
177
|
+
>>> )
|
|
174
178
|
"""
|
|
175
179
|
relation_mapping = get_relation_mapping(
|
|
176
180
|
prefix=prefix,
|
pyobo/api/species.py
CHANGED
|
File without changes
|
pyobo/api/typedefs.py
CHANGED
|
File without changes
|
pyobo/api/utils.py
CHANGED
|
File without changes
|
pyobo/api/xrefs.py
CHANGED
|
File without changes
|
pyobo/cli/__init__.py
CHANGED
|
File without changes
|
pyobo/cli/cli.py
CHANGED
|
File without changes
|
pyobo/cli/database.py
CHANGED
|
File without changes
|
pyobo/cli/database_utils.py
CHANGED
|
File without changes
|
pyobo/cli/lookup.py
CHANGED
|
File without changes
|
pyobo/cli/utils.py
CHANGED
|
File without changes
|
pyobo/constants.py
CHANGED
|
File without changes
|
pyobo/getters.py
CHANGED
|
File without changes
|
pyobo/gilda_utils.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/identifier_utils/api.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/mocks.py
CHANGED
|
File without changes
|
pyobo/ner/__init__.py
CHANGED
|
@@ -2,8 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
from .api import get_grounder
|
|
4
4
|
from .normalizer import ground
|
|
5
|
+
from .scispacy_utils import (
|
|
6
|
+
get_scispacy_entities,
|
|
7
|
+
get_scispacy_entity_linker,
|
|
8
|
+
get_scispacy_knowledgebase,
|
|
9
|
+
)
|
|
5
10
|
|
|
6
11
|
__all__ = [
|
|
7
12
|
"get_grounder",
|
|
13
|
+
"get_scispacy_entities",
|
|
14
|
+
"get_scispacy_entity_linker",
|
|
15
|
+
"get_scispacy_knowledgebase",
|
|
8
16
|
"ground",
|
|
9
17
|
]
|
pyobo/ner/api.py
CHANGED
|
File without changes
|
pyobo/ner/normalizer.py
CHANGED
|
@@ -20,8 +20,8 @@ def ground(
|
|
|
20
20
|
) -> Reference | None:
|
|
21
21
|
"""Normalize a string given the prefix's labels and synonyms.
|
|
22
22
|
|
|
23
|
-
:param prefix: If a string, only grounds against that namespace. If a list, will try
|
|
24
|
-
against all in that order
|
|
23
|
+
:param prefix: If a string, only grounds against that namespace. If a list, will try
|
|
24
|
+
grounding against all in that order
|
|
25
25
|
:param query: The string to try grounding
|
|
26
26
|
"""
|
|
27
27
|
grounder = get_grounder(prefix, **kwargs)
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""A bridge between PyOBO and :mod:`scispacy`.
|
|
2
|
+
|
|
3
|
+
:mod:`scispacy` implements a lexical index in
|
|
4
|
+
:class:`scispacy.linking_utils.KnowledgeBase` which keeps track of labels, synonyms, and
|
|
5
|
+
definitions for entities. These are used to construct a TF-IDF index and implement
|
|
6
|
+
entity linking (also called named entity normalization (NEN) or grounding) in
|
|
7
|
+
:class:`scispacy.linking.EntityLinker`.
|
|
8
|
+
|
|
9
|
+
Constructing a Lexical Index
|
|
10
|
+
============================
|
|
11
|
+
|
|
12
|
+
An *ad hoc* SciSpacy lexical index can be constructed on-the-fly by passing a
|
|
13
|
+
Bioregistry prefix to :func:`pyobo.get_scispacy_knowledgebase`. In the following
|
|
14
|
+
example, the prefix ``to`` is used to construct a lexical index for the `Plant Trait
|
|
15
|
+
Ontology <https://bioregistry.io/to>`_.
|
|
16
|
+
|
|
17
|
+
.. code-block:: python
|
|
18
|
+
|
|
19
|
+
import pyobo
|
|
20
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
21
|
+
|
|
22
|
+
kb: KnowledgeBase = pyobo.get_scispacy_knowledgebase("to")
|
|
23
|
+
|
|
24
|
+
The high-level PyOBO interface abstracts the differences between external ontologies
|
|
25
|
+
like the Plant Trait Ontology and databases that are converted to ontologies in
|
|
26
|
+
:mod:`pyobo.sources` like the `HUGO Gene Nomenclature Committee
|
|
27
|
+
<https://bioregistry.io/hgnc>`_. Therefore, you can also do
|
|
28
|
+
|
|
29
|
+
.. code-block:: python
|
|
30
|
+
|
|
31
|
+
import pyobo
|
|
32
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
33
|
+
|
|
34
|
+
kb: KnowledgeBase = pyobo.get_scispacy_knowledgebase("hgnc")
|
|
35
|
+
|
|
36
|
+
Alternatively, a reusable class can be defined like in the following:
|
|
37
|
+
|
|
38
|
+
.. code-block:: python
|
|
39
|
+
|
|
40
|
+
import pyobo
|
|
41
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class HGNCKnowledgeBase(KnowledgeBase):
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
super().__init__(pyobo.get_scispacy_entities("hgnc"))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
kb = HGNCKnowledgeBase()
|
|
50
|
+
|
|
51
|
+
Constructing an Entity Linker
|
|
52
|
+
=============================
|
|
53
|
+
|
|
54
|
+
An entity linker can be constructed from a :class:`scispacy.linking_utils.KnowledgeBase`
|
|
55
|
+
like in:
|
|
56
|
+
|
|
57
|
+
.. code-block:: python
|
|
58
|
+
|
|
59
|
+
import pyobo
|
|
60
|
+
from scispacy.linking import EntityLinker
|
|
61
|
+
|
|
62
|
+
kb = pyobo.get_scispacy_knowledgebase("hgnc")
|
|
63
|
+
linker = EntityLinker.from_kb(kb, filter_for_definitions=False)
|
|
64
|
+
|
|
65
|
+
Where ``filter_for_definitions`` is set to ``False`` to retain entities that don't have
|
|
66
|
+
a definition.
|
|
67
|
+
|
|
68
|
+
PyOBO provides a convenience function :func:`pyobo.get_scispacy_entity_linker` that
|
|
69
|
+
wraps this workflow and also automatically caches the TF-IDF index constructed in the
|
|
70
|
+
process in the correctly versioned folder in the PyOBO cache.
|
|
71
|
+
|
|
72
|
+
.. code-block:: python
|
|
73
|
+
|
|
74
|
+
import pyobo
|
|
75
|
+
from scispacy.linking import EntityLinker
|
|
76
|
+
|
|
77
|
+
linker = pyobo.get_scispacy_entity_linker("hgnc", filter_for_definitions=False)
|
|
78
|
+
|
|
79
|
+
Full Workflow
|
|
80
|
+
=============
|
|
81
|
+
|
|
82
|
+
Once an entity linker has been constructed, it can b used in series with a
|
|
83
|
+
:mod:`spacy.Language` object instantiated with :func:`spacy.load` to ground named
|
|
84
|
+
entities that were recognized by a model like ``en_core_web_sm``
|
|
85
|
+
|
|
86
|
+
.. code-block:: python
|
|
87
|
+
|
|
88
|
+
import pyobo
|
|
89
|
+
import spacy
|
|
90
|
+
from scispacy.linking import EntityLinker
|
|
91
|
+
from tabulate import tabulate
|
|
92
|
+
|
|
93
|
+
linker: EntityLinker = pyobo.get_scispacy_entity_linker("hgnc", filter_for_definitions=False)
|
|
94
|
+
|
|
95
|
+
# now, put it all together with a NER model
|
|
96
|
+
nlp = spacy.load("en_core_web_sm")
|
|
97
|
+
|
|
98
|
+
text = (
|
|
99
|
+
"RAC(Rho family)-alpha serine/threonine-protein kinase "
|
|
100
|
+
"is an enzyme that in humans is encoded by the AKT1 gene."
|
|
101
|
+
)
|
|
102
|
+
doc = linker(nlp(text))
|
|
103
|
+
|
|
104
|
+
rows = [
|
|
105
|
+
(
|
|
106
|
+
span,
|
|
107
|
+
span.start_char,
|
|
108
|
+
span.end_char,
|
|
109
|
+
f"`{curie} <https://bioregistry.io/{curie}>`_",
|
|
110
|
+
score,
|
|
111
|
+
)
|
|
112
|
+
for span in doc.ents
|
|
113
|
+
for curie, score in span._.kb_ents
|
|
114
|
+
]
|
|
115
|
+
print(tabulate(rows, headers=["text", "start", "end", "prefix", "identifier"], tablefmt="rst"))
|
|
116
|
+
|
|
117
|
+
==== ===== === ============================================= ========
|
|
118
|
+
text start end curie score
|
|
119
|
+
==== ===== === ============================================= ========
|
|
120
|
+
AKT1 100 104 `hgnc:391 <https://bioregistry.io/hgnc:391>`_ 1
|
|
121
|
+
AKT1 100 104 `hgnc:392 <https://bioregistry.io/hgnc:392>`_ 0.776504
|
|
122
|
+
AKT1 100 104 `hgnc:393 <https://bioregistry.io/hgnc:393>`_ 0.764049
|
|
123
|
+
==== ===== === ============================================= ========
|
|
124
|
+
|
|
125
|
+
This example recognizes the AKT serine/threonine kinase 1 (AKT1) gene and provides three
|
|
126
|
+
highly scored groundings, the best of which, `hgnc:391
|
|
127
|
+
<https://bioregistry.io/hgnc:391>`_, is correct.
|
|
128
|
+
|
|
129
|
+
.. note::
|
|
130
|
+
|
|
131
|
+
The groundings and scores are stored by SciSpacy in the hidden attribute
|
|
132
|
+
``span._.kb_ents``.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
from __future__ import annotations
|
|
136
|
+
|
|
137
|
+
from collections.abc import Iterable
|
|
138
|
+
from typing import TYPE_CHECKING, Any
|
|
139
|
+
|
|
140
|
+
from typing_extensions import Unpack
|
|
141
|
+
|
|
142
|
+
from ..api.utils import get_version_from_kwargs
|
|
143
|
+
from ..constants import GetOntologyKwargs
|
|
144
|
+
from ..getters import get_ontology
|
|
145
|
+
from ..utils.path import prefix_directory_join
|
|
146
|
+
|
|
147
|
+
if TYPE_CHECKING:
|
|
148
|
+
from scispacy.linking import EntityLinker
|
|
149
|
+
from scispacy.linking_utils import Entity, KnowledgeBase
|
|
150
|
+
|
|
151
|
+
__all__ = [
|
|
152
|
+
"get_scispacy_entities",
|
|
153
|
+
"get_scispacy_entity_linker",
|
|
154
|
+
"get_scispacy_knowledgebase",
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_scispacy_entity_linker(
|
|
159
|
+
prefix: str,
|
|
160
|
+
*,
|
|
161
|
+
ontology_kwargs: GetOntologyKwargs | None = None,
|
|
162
|
+
candidate_generator_kwargs: dict[str, Any] | None = None,
|
|
163
|
+
**entity_linker_kwargs: Any,
|
|
164
|
+
) -> EntityLinker:
|
|
165
|
+
"""Get a knowledgebase object for usage with :mod:`scispacy`.
|
|
166
|
+
|
|
167
|
+
:param prefix :
|
|
168
|
+
The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
|
|
169
|
+
Ontology, or more.
|
|
170
|
+
|
|
171
|
+
:param ontology_kwargs: keyword arguments to pass to :func:`pyobo.get_ontology`,
|
|
172
|
+
such as ``version``.
|
|
173
|
+
:param candidate_generator_kwargs: keyword arguments to pass to
|
|
174
|
+
:class:`scispacy.candidate_generation.CandidateGenerator`, such as ``ef_search``
|
|
175
|
+
:param entity_linker_kwargs: keyword arguments to pass to
|
|
176
|
+
:class:`scispacy.linking.EntityLinker`, such as ``ef_search``
|
|
177
|
+
|
|
178
|
+
:returns: An object that can be applied in a :mod:`spacy` natural language
|
|
179
|
+
processing workflow, namely to apply grounding/named entity normalization to
|
|
180
|
+
recognized named entities.
|
|
181
|
+
"""
|
|
182
|
+
from scispacy.linking import EntityLinker
|
|
183
|
+
|
|
184
|
+
if ontology_kwargs is None:
|
|
185
|
+
ontology_kwargs = {}
|
|
186
|
+
|
|
187
|
+
version = get_version_from_kwargs(prefix, ontology_kwargs)
|
|
188
|
+
scispacy_cache_directory = prefix_directory_join(prefix, "scispacy", version=version)
|
|
189
|
+
|
|
190
|
+
# TODO see if we can skip loading the KB
|
|
191
|
+
kb = get_scispacy_knowledgebase(prefix, **ontology_kwargs)
|
|
192
|
+
linker = EntityLinker.from_kb(
|
|
193
|
+
kb,
|
|
194
|
+
ann_index_out_dir=scispacy_cache_directory.as_posix(),
|
|
195
|
+
candidate_generator_kwargs=candidate_generator_kwargs,
|
|
196
|
+
**(entity_linker_kwargs or {}),
|
|
197
|
+
)
|
|
198
|
+
return linker
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def get_scispacy_knowledgebase(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> KnowledgeBase:
|
|
202
|
+
"""Get a knowledgebase object for usage with :mod:`scispacy`.
|
|
203
|
+
|
|
204
|
+
:param prefix :
|
|
205
|
+
The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
|
|
206
|
+
Ontology, or more.
|
|
207
|
+
|
|
208
|
+
:param kwargs :
|
|
209
|
+
keyword arguments to pass to :func:`pyobo.get_ontology`, such as ``version``.
|
|
210
|
+
|
|
211
|
+
:returns: An object that represents a lexical index over name, synonym, and
|
|
212
|
+
definition strings from the ontology.
|
|
213
|
+
"""
|
|
214
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
215
|
+
|
|
216
|
+
return KnowledgeBase(get_scispacy_entities(prefix, **kwargs))
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def get_scispacy_entities(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Iterable[Entity]:
|
|
220
|
+
"""Iterate over entities in a given ontology via :mod:`pyobo`.
|
|
221
|
+
|
|
222
|
+
:param prefix :
|
|
223
|
+
The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
|
|
224
|
+
Ontology, or more.
|
|
225
|
+
|
|
226
|
+
:param kwargs :
|
|
227
|
+
keyword arguments to pass to :func:`pyobo.get_ontology`, such as ``version``.
|
|
228
|
+
|
|
229
|
+
:yields: Entity objects for all terms in the ontology
|
|
230
|
+
"""
|
|
231
|
+
from scispacy.linking_utils import Entity
|
|
232
|
+
|
|
233
|
+
# TODO reuse labels, synonyms, and definitions cache
|
|
234
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
235
|
+
for term in ontology:
|
|
236
|
+
yield Entity(
|
|
237
|
+
concept_id=term.curie,
|
|
238
|
+
canonical_name=term.name,
|
|
239
|
+
aliases=[s.name for s in term.synonyms],
|
|
240
|
+
definition=term.definition,
|
|
241
|
+
)
|
pyobo/plugins.py
CHANGED
|
File without changes
|
pyobo/py.typed
CHANGED
|
File without changes
|
pyobo/resource_utils.py
CHANGED
|
File without changes
|
pyobo/resources/__init__.py
CHANGED
|
File without changes
|
pyobo/resources/ncbitaxon.py
CHANGED
|
File without changes
|
pyobo/resources/ncbitaxon.tsv.gz
CHANGED
|
File without changes
|
pyobo/resources/ro.py
CHANGED
|
File without changes
|
pyobo/resources/ro.tsv
CHANGED
|
File without changes
|
pyobo/resources/so.py
CHANGED
|
File without changes
|
pyobo/resources/so.tsv
CHANGED
|
File without changes
|
pyobo/sources/README.md
CHANGED
|
File without changes
|
pyobo/sources/__init__.py
CHANGED
|
@@ -34,6 +34,7 @@ from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
|
|
|
34
34
|
from .hgnc import HGNCGetter, HGNCGroupGetter
|
|
35
35
|
from .iana_media_type import IANAGetter
|
|
36
36
|
from .icd import ICD10Getter, ICD11Getter
|
|
37
|
+
from .iconclass import IconclassGetter
|
|
37
38
|
from .intact import IntactGetter
|
|
38
39
|
from .interpro import InterProGetter
|
|
39
40
|
from .itis import ITISGetter
|
|
@@ -115,6 +116,7 @@ __all__ = [
|
|
|
115
116
|
"ICD10Getter",
|
|
116
117
|
"ICD11Getter",
|
|
117
118
|
"ITISGetter",
|
|
119
|
+
"IconclassGetter",
|
|
118
120
|
"IntactGetter",
|
|
119
121
|
"InterProGetter",
|
|
120
122
|
"KEGGGeneGetter",
|
pyobo/sources/agrovoc.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/sources/bigg/__init__.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/sources/bigg/bigg_model.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/sources/biogrid.py
CHANGED
|
File without changes
|
pyobo/sources/ccle.py
CHANGED
|
File without changes
|
pyobo/sources/cgnc.py
CHANGED
|
File without changes
|