pyobo 0.12.7__py3-none-any.whl → 0.12.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +12 -1
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +2 -1
  5. pyobo/api/alts.py +0 -0
  6. pyobo/api/combine.py +0 -0
  7. pyobo/api/edges.py +0 -0
  8. pyobo/api/embedding.py +36 -6
  9. pyobo/api/hierarchy.py +6 -4
  10. pyobo/api/metadata.py +0 -0
  11. pyobo/api/names.py +0 -0
  12. pyobo/api/properties.py +12 -3
  13. pyobo/api/relations.py +9 -5
  14. pyobo/api/species.py +0 -0
  15. pyobo/api/typedefs.py +0 -0
  16. pyobo/api/utils.py +0 -0
  17. pyobo/api/xrefs.py +0 -0
  18. pyobo/cli/__init__.py +0 -0
  19. pyobo/cli/cli.py +0 -0
  20. pyobo/cli/database.py +0 -0
  21. pyobo/cli/database_utils.py +0 -0
  22. pyobo/cli/lookup.py +0 -0
  23. pyobo/cli/utils.py +0 -0
  24. pyobo/constants.py +0 -0
  25. pyobo/getters.py +0 -0
  26. pyobo/gilda_utils.py +0 -0
  27. pyobo/identifier_utils/__init__.py +0 -0
  28. pyobo/identifier_utils/api.py +0 -0
  29. pyobo/identifier_utils/relations/__init__.py +0 -0
  30. pyobo/identifier_utils/relations/api.py +0 -0
  31. pyobo/identifier_utils/relations/data.json +0 -0
  32. pyobo/identifier_utils/relations/data_owl.json +0 -0
  33. pyobo/identifier_utils/relations/data_rdf.json +0 -0
  34. pyobo/identifier_utils/relations/data_rdfs.json +0 -0
  35. pyobo/mocks.py +0 -0
  36. pyobo/ner/__init__.py +8 -0
  37. pyobo/ner/api.py +0 -0
  38. pyobo/ner/normalizer.py +2 -2
  39. pyobo/ner/scispacy_utils.py +241 -0
  40. pyobo/plugins.py +0 -0
  41. pyobo/py.typed +0 -0
  42. pyobo/resource_utils.py +0 -0
  43. pyobo/resources/__init__.py +0 -0
  44. pyobo/resources/ncbitaxon.py +0 -0
  45. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  46. pyobo/resources/ro.py +0 -0
  47. pyobo/resources/ro.tsv +0 -0
  48. pyobo/resources/so.py +0 -0
  49. pyobo/resources/so.tsv +0 -0
  50. pyobo/sources/README.md +0 -0
  51. pyobo/sources/__init__.py +2 -0
  52. pyobo/sources/agrovoc.py +0 -0
  53. pyobo/sources/antibodyregistry.py +0 -0
  54. pyobo/sources/bigg/__init__.py +0 -0
  55. pyobo/sources/bigg/bigg_compartment.py +0 -0
  56. pyobo/sources/bigg/bigg_metabolite.py +0 -0
  57. pyobo/sources/bigg/bigg_model.py +0 -0
  58. pyobo/sources/bigg/bigg_reaction.py +0 -0
  59. pyobo/sources/biogrid.py +0 -0
  60. pyobo/sources/ccle.py +0 -0
  61. pyobo/sources/cgnc.py +0 -0
  62. pyobo/sources/chebi.py +0 -0
  63. pyobo/sources/chembl/__init__.py +0 -0
  64. pyobo/sources/chembl/chembl_cell.py +0 -0
  65. pyobo/sources/chembl/chembl_compound.py +0 -0
  66. pyobo/sources/chembl/chembl_mechanism.py +0 -0
  67. pyobo/sources/chembl/chembl_target.py +1 -1
  68. pyobo/sources/chembl/chembl_tissue.py +0 -0
  69. pyobo/sources/civic_gene.py +0 -0
  70. pyobo/sources/clinicaltrials.py +0 -0
  71. pyobo/sources/complexportal.py +0 -0
  72. pyobo/sources/conso.py +0 -0
  73. pyobo/sources/cpt.py +0 -0
  74. pyobo/sources/credit.py +0 -0
  75. pyobo/sources/cvx.py +0 -0
  76. pyobo/sources/depmap.py +0 -0
  77. pyobo/sources/dictybase_gene.py +0 -0
  78. pyobo/sources/drugbank/__init__.py +0 -0
  79. pyobo/sources/drugbank/drugbank.py +0 -0
  80. pyobo/sources/drugbank/drugbank_salt.py +0 -0
  81. pyobo/sources/drugcentral.py +0 -0
  82. pyobo/sources/expasy.py +4 -1
  83. pyobo/sources/famplex.py +0 -0
  84. pyobo/sources/flybase.py +0 -0
  85. pyobo/sources/gard.py +0 -0
  86. pyobo/sources/geonames/__init__.py +0 -0
  87. pyobo/sources/geonames/features.py +0 -0
  88. pyobo/sources/geonames/geonames.py +0 -0
  89. pyobo/sources/geonames/utils.py +0 -0
  90. pyobo/sources/gmt_utils.py +0 -0
  91. pyobo/sources/go.py +6 -3
  92. pyobo/sources/gtdb.py +1 -0
  93. pyobo/sources/gwascentral/__init__.py +0 -0
  94. pyobo/sources/gwascentral/gwascentral_phenotype.py +0 -0
  95. pyobo/sources/gwascentral/gwascentral_study.py +0 -0
  96. pyobo/sources/hgnc/__init__.py +0 -0
  97. pyobo/sources/hgnc/hgnc.py +0 -0
  98. pyobo/sources/hgnc/hgncgenefamily.py +0 -0
  99. pyobo/sources/iana_media_type.py +3 -1
  100. pyobo/sources/icd/__init__.py +0 -0
  101. pyobo/sources/icd/icd10.py +0 -0
  102. pyobo/sources/icd/icd11.py +0 -0
  103. pyobo/sources/icd/icd_utils.py +0 -0
  104. pyobo/sources/iconclass.py +55 -0
  105. pyobo/sources/intact.py +0 -0
  106. pyobo/sources/interpro.py +0 -0
  107. pyobo/sources/itis.py +0 -0
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +0 -0
  110. pyobo/sources/kegg/genes.py +0 -0
  111. pyobo/sources/kegg/genome.py +0 -0
  112. pyobo/sources/kegg/pathway.py +0 -0
  113. pyobo/sources/mesh.py +0 -0
  114. pyobo/sources/mgi.py +0 -0
  115. pyobo/sources/mirbase/__init__.py +0 -0
  116. pyobo/sources/mirbase/mirbase.py +0 -0
  117. pyobo/sources/mirbase/mirbase_constants.py +0 -0
  118. pyobo/sources/mirbase/mirbase_family.py +0 -0
  119. pyobo/sources/mirbase/mirbase_mature.py +0 -0
  120. pyobo/sources/msigdb.py +0 -0
  121. pyobo/sources/ncbi/__init__.py +0 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +0 -0
  123. pyobo/sources/ncbi/ncbigene.py +0 -0
  124. pyobo/sources/nih_reporter.py +0 -0
  125. pyobo/sources/nlm/__init__.py +0 -0
  126. pyobo/sources/nlm/nlm_catalog.py +0 -0
  127. pyobo/sources/nlm/nlm_publisher.py +0 -0
  128. pyobo/sources/nlm/utils.py +0 -0
  129. pyobo/sources/npass.py +0 -0
  130. pyobo/sources/omim_ps.py +0 -0
  131. pyobo/sources/pathbank.py +0 -0
  132. pyobo/sources/pfam/__init__.py +0 -0
  133. pyobo/sources/pfam/pfam.py +0 -0
  134. pyobo/sources/pfam/pfam_clan.py +0 -0
  135. pyobo/sources/pharmgkb/__init__.py +0 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +0 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +0 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +0 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +0 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +0 -0
  141. pyobo/sources/pharmgkb/utils.py +0 -0
  142. pyobo/sources/pid.py +0 -0
  143. pyobo/sources/pombase.py +0 -0
  144. pyobo/sources/pubchem.py +0 -0
  145. pyobo/sources/reactome.py +0 -0
  146. pyobo/sources/rgd.py +0 -0
  147. pyobo/sources/rhea.py +0 -0
  148. pyobo/sources/ror.py +0 -0
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +0 -0
  151. pyobo/sources/selventa/scomp.py +0 -0
  152. pyobo/sources/selventa/sdis.py +0 -0
  153. pyobo/sources/selventa/sfam.py +0 -0
  154. pyobo/sources/sgd.py +0 -0
  155. pyobo/sources/signor/__init__.py +0 -0
  156. pyobo/sources/signor/download.py +0 -0
  157. pyobo/sources/signor/signor_complexes.py +0 -0
  158. pyobo/sources/slm.py +0 -0
  159. pyobo/sources/spdx.py +0 -0
  160. pyobo/sources/umls/__init__.py +0 -0
  161. pyobo/sources/umls/__main__.py +0 -0
  162. pyobo/sources/umls/get_synonym_types.py +0 -0
  163. pyobo/sources/umls/sty.py +0 -0
  164. pyobo/sources/umls/synonym_types.tsv +0 -0
  165. pyobo/sources/umls/umls.py +0 -0
  166. pyobo/sources/unimod.py +0 -0
  167. pyobo/sources/uniprot/__init__.py +0 -0
  168. pyobo/sources/uniprot/uniprot.py +0 -0
  169. pyobo/sources/uniprot/uniprot_ptm.py +0 -0
  170. pyobo/sources/utils.py +0 -0
  171. pyobo/sources/wikipathways.py +0 -0
  172. pyobo/sources/zfin.py +0 -0
  173. pyobo/ssg/__init__.py +0 -0
  174. pyobo/ssg/base.html +0 -0
  175. pyobo/ssg/index.html +0 -0
  176. pyobo/ssg/term.html +0 -0
  177. pyobo/ssg/typedef.html +0 -0
  178. pyobo/struct/__init__.py +0 -0
  179. pyobo/struct/functional/__init__.py +0 -0
  180. pyobo/struct/functional/dsl.py +0 -0
  181. pyobo/struct/functional/macros.py +12 -12
  182. pyobo/struct/functional/obo_to_functional.py +0 -0
  183. pyobo/struct/functional/ontology.py +0 -0
  184. pyobo/struct/functional/utils.py +0 -0
  185. pyobo/struct/obo/__init__.py +0 -0
  186. pyobo/struct/obo/reader.py +0 -0
  187. pyobo/struct/obo/reader_utils.py +0 -0
  188. pyobo/struct/obograph/__init__.py +0 -0
  189. pyobo/struct/obograph/export.py +0 -0
  190. pyobo/struct/obograph/reader.py +0 -0
  191. pyobo/struct/obograph/utils.py +0 -0
  192. pyobo/struct/reference.py +3 -1
  193. pyobo/struct/struct.py +22 -14
  194. pyobo/struct/struct_utils.py +0 -0
  195. pyobo/struct/typedef.py +0 -0
  196. pyobo/struct/utils.py +0 -0
  197. pyobo/struct/vocabulary.py +0 -0
  198. pyobo/utils/__init__.py +0 -0
  199. pyobo/utils/cache.py +0 -0
  200. pyobo/utils/io.py +0 -0
  201. pyobo/utils/iter.py +0 -0
  202. pyobo/utils/misc.py +0 -0
  203. pyobo/utils/ndex_utils.py +0 -0
  204. pyobo/utils/path.py +0 -0
  205. pyobo/version.py +1 -1
  206. {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/METADATA +5 -1
  207. pyobo-0.12.9.dist-info/RECORD +210 -0
  208. {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/WHEEL +1 -1
  209. {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/licenses/LICENSE +0 -0
  210. pyobo-0.12.7.dist-info/RECORD +0 -208
  211. {pyobo-0.12.7.dist-info → pyobo-0.12.9.dist-info}/entry_points.txt +0 -0
pyobo/.DS_Store CHANGED
File without changes
pyobo/__init__.py CHANGED
@@ -50,6 +50,7 @@ from .api import (
50
50
  get_synonyms,
51
51
  get_text_embedding,
52
52
  get_text_embedding_similarity,
53
+ get_text_embeddings_df,
53
54
  get_typedef_df,
54
55
  get_xref,
55
56
  get_xrefs,
@@ -58,7 +59,13 @@ from .api import (
58
59
  is_descendent,
59
60
  )
60
61
  from .getters import get_ontology
61
- from .ner import get_grounder, ground
62
+ from .ner import (
63
+ get_grounder,
64
+ get_scispacy_entities,
65
+ get_scispacy_entity_linker,
66
+ get_scispacy_knowledgebase,
67
+ ground,
68
+ )
62
69
  from .plugins import (
63
70
  has_nomenclature_plugin,
64
71
  iter_nomenclature_plugins,
@@ -139,12 +146,16 @@ __all__ = [
139
146
  "get_relation",
140
147
  "get_relation_mapping",
141
148
  "get_relations_df",
149
+ "get_scispacy_entities",
150
+ "get_scispacy_entity_linker",
151
+ "get_scispacy_knowledgebase",
142
152
  "get_species",
143
153
  "get_sssom_df",
144
154
  "get_subhierarchy",
145
155
  "get_synonyms",
146
156
  "get_text_embedding",
147
157
  "get_text_embedding_similarity",
158
+ "get_text_embeddings_df",
148
159
  "get_typedef_df",
149
160
  "get_version",
150
161
  "get_xref",
pyobo/__main__.py CHANGED
File without changes
pyobo/api/__init__.py CHANGED
@@ -8,7 +8,7 @@ from .alts import (
8
8
  )
9
9
  from .combine import get_literal_mappings_subset
10
10
  from .edges import get_edges, get_edges_df, get_graph
11
- from .embedding import get_text_embedding, get_text_embedding_similarity
11
+ from .embedding import get_text_embedding, get_text_embedding_similarity, get_text_embeddings_df
12
12
  from .hierarchy import (
13
13
  get_ancestors,
14
14
  get_children,
@@ -119,6 +119,7 @@ __all__ = [
119
119
  "get_synonyms",
120
120
  "get_text_embedding",
121
121
  "get_text_embedding_similarity",
122
+ "get_text_embeddings_df",
122
123
  "get_typedef_df",
123
124
  "get_version",
124
125
  "get_xref",
pyobo/api/alts.py CHANGED
File without changes
pyobo/api/combine.py CHANGED
File without changes
pyobo/api/edges.py CHANGED
File without changes
pyobo/api/embedding.py CHANGED
@@ -6,8 +6,9 @@ from typing import TYPE_CHECKING
6
6
 
7
7
  import curies
8
8
  import numpy as np
9
+ import pandas as pd
9
10
 
10
- from pyobo.api.names import get_definition, get_name
11
+ from pyobo.api.names import get_definition, get_name, get_references
11
12
 
12
13
  if TYPE_CHECKING:
13
14
  import sentence_transformers
@@ -16,6 +17,7 @@ __all__ = [
16
17
  "get_text_embedding",
17
18
  "get_text_embedding_model",
18
19
  "get_text_embedding_similarity",
20
+ "get_text_embeddings_df",
19
21
  ]
20
22
 
21
23
 
@@ -39,6 +41,30 @@ def _get_text(
39
41
  return name
40
42
 
41
43
 
44
+ def get_text_embeddings_df(
45
+ prefix: str,
46
+ *,
47
+ model: sentence_transformers.SentenceTransformer | None = None,
48
+ ) -> pd.DataFrame:
49
+ """Get embeddings for all entities in the resource.
50
+
51
+ :param prefix: A reference, either as a string or Reference object
52
+ :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
53
+ given.
54
+ """
55
+ luids, texts = [], []
56
+ for reference in get_references(prefix):
57
+ text = _get_text(reference)
58
+ if text is None:
59
+ continue
60
+ luids.append(reference.identifier)
61
+ texts.append(text)
62
+ if model is None:
63
+ model = get_text_embedding_model()
64
+ res = model.encode(texts)
65
+ return pd.DataFrame(res, index=luids)
66
+
67
+
42
68
  def get_text_embedding(
43
69
  reference: str | curies.Reference | curies.ReferenceTuple,
44
70
  *,
@@ -47,8 +73,10 @@ def get_text_embedding(
47
73
  """Get a text embedding for an entity, or return none if no text is available.
48
74
 
49
75
  :param reference: A reference, either as a string or Reference object
50
- :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not given.
51
- :return: A 1D numpy float array of embeddings from :class:`sentence_transformers`
76
+ :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
77
+ given.
78
+
79
+ :returns: A 1D numpy float array of embeddings from :class:`sentence_transformers`
52
80
 
53
81
  .. code-block:: python
54
82
 
@@ -87,9 +115,11 @@ def get_text_embedding_similarity(
87
115
 
88
116
  :param reference_1: A reference, given as a string or Reference object
89
117
  :param reference_2: A second reference
90
- :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not given.
91
- :returns:
92
- A floating point similarity, if text is available for both references, otherwise none
118
+ :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
119
+ given.
120
+
121
+ :returns: A floating point similarity, if text is available for both references,
122
+ otherwise none
93
123
 
94
124
  .. code-block:: python
95
125
 
pyobo/api/hierarchy.py CHANGED
@@ -163,7 +163,8 @@ def is_descendent(
163
163
  :param ancestor_prefix: The prefix for the ancestor
164
164
  :param ancestor_identifier: The local unique identifier for the ancestor
165
165
  :param kwargs: Keyword arguments for :func:`get_hierarchy`
166
- :return: If the decendant has the given ancestor
166
+
167
+ :returns: If the decendant has the given ancestor
167
168
 
168
169
  Check that ``GO:0070246`` (natural killer cell apoptotic process) is a descendant of
169
170
  ``GO:0006915`` (apoptotic process)
@@ -254,10 +255,11 @@ def has_ancestor(
254
255
  :param ancestor_prefix: The prefix for the ancestor
255
256
  :param ancestor_identifier: The local unique identifier for the ancestor
256
257
  :param kwargs: Keyword arguments for :func:`get_hierarchy`
257
- :return: If the decendant has the given ancestor
258
258
 
259
- Check that ``GO:0008219`` (cell death) is an ancestor of ``GO:0006915``
260
- (apoptotic process):
259
+ :returns: If the decendant has the given ancestor
260
+
261
+ Check that ``GO:0008219`` (cell death) is an ancestor of ``GO:0006915`` (apoptotic
262
+ process):
261
263
 
262
264
  >>> apoptosis = Reference.from_curie("GO:0006915", name="apoptotic process")
263
265
  >>> cell_death = Reference.from_curie("GO:0008219", name="cell death")
pyobo/api/metadata.py CHANGED
File without changes
pyobo/api/names.py CHANGED
File without changes
pyobo/api/properties.py CHANGED
@@ -111,6 +111,7 @@ def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.Da
111
111
  """Extract properties.
112
112
 
113
113
  :param prefix: the resource to load
114
+
114
115
  :returns: A dataframe with the properties
115
116
  """
116
117
  df1 = get_literal_properties_df(prefix, **kwargs)
@@ -131,6 +132,7 @@ def get_filtered_properties_mapping(
131
132
 
132
133
  :param prefix: the resource to load
133
134
  :param prop: the property to extract
135
+
134
136
  :returns: A mapping from identifier to property value
135
137
  """
136
138
  df = get_filtered_properties_df(prefix, prop, **kwargs)
@@ -145,6 +147,7 @@ def get_filtered_properties_multimapping(
145
147
 
146
148
  :param prefix: the resource to load
147
149
  :param prop: the property to extract
150
+
148
151
  :returns: A mapping from identifier to property values
149
152
  """
150
153
  df = get_filtered_properties_df(prefix, prop, **kwargs)
@@ -159,7 +162,9 @@ def get_property(
159
162
  :param prefix: the resource to load
160
163
  :param identifier: the identifier withing the resource
161
164
  :param prop: the property to extract
162
- :returns: The single value for the property. If multiple are expected, use :func:`get_properties`
165
+
166
+ :returns: The single value for the property. If multiple are expected, use
167
+ :func:`get_properties`
163
168
 
164
169
  >>> import pyobo
165
170
  >>> pyobo.get_property("chebi", "132964", "http://purl.obolibrary.org/obo/chebi/smiles")
@@ -182,7 +187,9 @@ def get_properties(
182
187
  :param prefix: the resource to load
183
188
  :param identifier: the identifier withing the resource
184
189
  :param prop: the property to extract
185
- :returns: Multiple values for the property. If only one is expected, use :func:`get_property`
190
+
191
+ :returns: Multiple values for the property. If only one is expected, use
192
+ :func:`get_property`
186
193
  """
187
194
  filtered_properties_multimapping = get_filtered_properties_multimapping(
188
195
  prefix=prefix, prop=prop, **kwargs
@@ -198,7 +205,9 @@ def get_filtered_properties_df(
198
205
 
199
206
  :param prefix: the resource to load
200
207
  :param prop: the property to extract
201
- :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value].
208
+
209
+ :returns: A dataframe from identifier to property value. Columns are [<prefix>_id,
210
+ value].
202
211
  """
203
212
  prop = _ensure_ref(prop, ontology_prefix=prefix)
204
213
  df = get_properties_df(prefix, **kwargs)
pyobo/api/relations.py CHANGED
@@ -135,9 +135,11 @@ def get_relation_mapping(
135
135
  ) -> Mapping[str, str]:
136
136
  """Get relations from identifiers in the source prefix to target prefix with the given relation.
137
137
 
138
- .. warning:: Assumes there's only one version of the property for each term.
138
+ .. warning::
139
139
 
140
- Example usage: get homology between HGNC and MGI:
140
+ Assumes there's only one version of the property for each term.
141
+
142
+ Example usage: get homology between HGNC and MGI:
141
143
 
142
144
  >>> import pyobo
143
145
  >>> human_mapt_hgnc_id = "6893"
@@ -161,16 +163,18 @@ def get_relation(
161
163
  ) -> str | None:
162
164
  """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
163
165
 
164
- .. warning:: Assumes there's only one version of the property for each term.
166
+ .. warning::
167
+
168
+ Assumes there's only one version of the property for each term.
165
169
 
166
- Example usage: get homology between MAPT in HGNC and MGI:
170
+ Example usage: get homology between MAPT in HGNC and MGI:
167
171
 
168
172
  >>> import pyobo
169
173
  >>> human_mapt_hgnc_id = "6893"
170
174
  >>> mouse_mapt_mgi_id = "97180"
171
175
  >>> assert mouse_mapt_mgi_id == pyobo.get_relation(
172
176
  ... "hgnc", human_mapt_hgnc_id, "ro:HOM0000017", "mgi"
173
- ... )
177
+ >>> )
174
178
  """
175
179
  relation_mapping = get_relation_mapping(
176
180
  prefix=prefix,
pyobo/api/species.py CHANGED
File without changes
pyobo/api/typedefs.py CHANGED
File without changes
pyobo/api/utils.py CHANGED
File without changes
pyobo/api/xrefs.py CHANGED
File without changes
pyobo/cli/__init__.py CHANGED
File without changes
pyobo/cli/cli.py CHANGED
File without changes
pyobo/cli/database.py CHANGED
File without changes
File without changes
pyobo/cli/lookup.py CHANGED
File without changes
pyobo/cli/utils.py CHANGED
File without changes
pyobo/constants.py CHANGED
File without changes
pyobo/getters.py CHANGED
File without changes
pyobo/gilda_utils.py CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
pyobo/mocks.py CHANGED
File without changes
pyobo/ner/__init__.py CHANGED
@@ -2,8 +2,16 @@
2
2
 
3
3
  from .api import get_grounder
4
4
  from .normalizer import ground
5
+ from .scispacy_utils import (
6
+ get_scispacy_entities,
7
+ get_scispacy_entity_linker,
8
+ get_scispacy_knowledgebase,
9
+ )
5
10
 
6
11
  __all__ = [
7
12
  "get_grounder",
13
+ "get_scispacy_entities",
14
+ "get_scispacy_entity_linker",
15
+ "get_scispacy_knowledgebase",
8
16
  "ground",
9
17
  ]
pyobo/ner/api.py CHANGED
File without changes
pyobo/ner/normalizer.py CHANGED
@@ -20,8 +20,8 @@ def ground(
20
20
  ) -> Reference | None:
21
21
  """Normalize a string given the prefix's labels and synonyms.
22
22
 
23
- :param prefix: If a string, only grounds against that namespace. If a list, will try grounding
24
- against all in that order
23
+ :param prefix: If a string, only grounds against that namespace. If a list, will try
24
+ grounding against all in that order
25
25
  :param query: The string to try grounding
26
26
  """
27
27
  grounder = get_grounder(prefix, **kwargs)
@@ -0,0 +1,241 @@
1
+ """A bridge between PyOBO and :mod:`scispacy`.
2
+
3
+ :mod:`scispacy` implements a lexical index in
4
+ :class:`scispacy.linking_utils.KnowledgeBase` which keeps track of labels, synonyms, and
5
+ definitions for entities. These are used to construct a TF-IDF index and implement
6
+ entity linking (also called named entity normalization (NEN) or grounding) in
7
+ :class:`scispacy.linking.EntityLinker`.
8
+
9
+ Constructing a Lexical Index
10
+ ============================
11
+
12
+ An *ad hoc* SciSpacy lexical index can be constructed on-the-fly by passing a
13
+ Bioregistry prefix to :func:`pyobo.get_scispacy_knowledgebase`. In the following
14
+ example, the prefix ``to`` is used to construct a lexical index for the `Plant Trait
15
+ Ontology <https://bioregistry.io/to>`_.
16
+
17
+ .. code-block:: python
18
+
19
+ import pyobo
20
+ from scispacy.linking_utils import KnowledgeBase
21
+
22
+ kb: KnowledgeBase = pyobo.get_scispacy_knowledgebase("to")
23
+
24
+ The high-level PyOBO interface abstracts the differences between external ontologies
25
+ like the Plant Trait Ontology and databases that are converted to ontologies in
26
+ :mod:`pyobo.sources` like the `HUGO Gene Nomenclature Committee
27
+ <https://bioregistry.io/hgnc>`_. Therefore, you can also do
28
+
29
+ .. code-block:: python
30
+
31
+ import pyobo
32
+ from scispacy.linking_utils import KnowledgeBase
33
+
34
+ kb: KnowledgeBase = pyobo.get_scispacy_knowledgebase("hgnc")
35
+
36
+ Alternatively, a reusable class can be defined like in the following:
37
+
38
+ .. code-block:: python
39
+
40
+ import pyobo
41
+ from scispacy.linking_utils import KnowledgeBase
42
+
43
+
44
+ class HGNCKnowledgeBase(KnowledgeBase):
45
+ def __init__(self) -> None:
46
+ super().__init__(pyobo.get_scispacy_entities("hgnc"))
47
+
48
+
49
+ kb = HGNCKnowledgeBase()
50
+
51
+ Constructing an Entity Linker
52
+ =============================
53
+
54
+ An entity linker can be constructed from a :class:`scispacy.linking_utils.KnowledgeBase`
55
+ like in:
56
+
57
+ .. code-block:: python
58
+
59
+ import pyobo
60
+ from scispacy.linking import EntityLinker
61
+
62
+ kb = pyobo.get_scispacy_knowledgebase("hgnc")
63
+ linker = EntityLinker.from_kb(kb, filter_for_definitions=False)
64
+
65
+ Where ``filter_for_definitions`` is set to ``False`` to retain entities that don't have
66
+ a definition.
67
+
68
+ PyOBO provides a convenience function :func:`pyobo.get_scispacy_entity_linker` that
69
+ wraps this workflow and also automatically caches the TF-IDF index constructed in the
70
+ process in the correctly versioned folder in the PyOBO cache.
71
+
72
+ .. code-block:: python
73
+
74
+ import pyobo
75
+ from scispacy.linking import EntityLinker
76
+
77
+ linker = pyobo.get_scispacy_entity_linker("hgnc", filter_for_definitions=False)
78
+
79
+ Full Workflow
80
+ =============
81
+
82
+ Once an entity linker has been constructed, it can b used in series with a
83
+ :mod:`spacy.Language` object instantiated with :func:`spacy.load` to ground named
84
+ entities that were recognized by a model like ``en_core_web_sm``
85
+
86
+ .. code-block:: python
87
+
88
+ import pyobo
89
+ import spacy
90
+ from scispacy.linking import EntityLinker
91
+ from tabulate import tabulate
92
+
93
+ linker: EntityLinker = pyobo.get_scispacy_entity_linker("hgnc", filter_for_definitions=False)
94
+
95
+ # now, put it all together with a NER model
96
+ nlp = spacy.load("en_core_web_sm")
97
+
98
+ text = (
99
+ "RAC(Rho family)-alpha serine/threonine-protein kinase "
100
+ "is an enzyme that in humans is encoded by the AKT1 gene."
101
+ )
102
+ doc = linker(nlp(text))
103
+
104
+ rows = [
105
+ (
106
+ span,
107
+ span.start_char,
108
+ span.end_char,
109
+ f"`{curie} <https://bioregistry.io/{curie}>`_",
110
+ score,
111
+ )
112
+ for span in doc.ents
113
+ for curie, score in span._.kb_ents
114
+ ]
115
+ print(tabulate(rows, headers=["text", "start", "end", "prefix", "identifier"], tablefmt="rst"))
116
+
117
+ ==== ===== === ============================================= ========
118
+ text start end curie score
119
+ ==== ===== === ============================================= ========
120
+ AKT1 100 104 `hgnc:391 <https://bioregistry.io/hgnc:391>`_ 1
121
+ AKT1 100 104 `hgnc:392 <https://bioregistry.io/hgnc:392>`_ 0.776504
122
+ AKT1 100 104 `hgnc:393 <https://bioregistry.io/hgnc:393>`_ 0.764049
123
+ ==== ===== === ============================================= ========
124
+
125
+ This example recognizes the AKT serine/threonine kinase 1 (AKT1) gene and provides three
126
+ highly scored groundings, the best of which, `hgnc:391
127
+ <https://bioregistry.io/hgnc:391>`_, is correct.
128
+
129
+ .. note::
130
+
131
+ The groundings and scores are stored by SciSpacy in the hidden attribute
132
+ ``span._.kb_ents``.
133
+ """
134
+
135
+ from __future__ import annotations
136
+
137
+ from collections.abc import Iterable
138
+ from typing import TYPE_CHECKING, Any
139
+
140
+ from typing_extensions import Unpack
141
+
142
+ from ..api.utils import get_version_from_kwargs
143
+ from ..constants import GetOntologyKwargs
144
+ from ..getters import get_ontology
145
+ from ..utils.path import prefix_directory_join
146
+
147
+ if TYPE_CHECKING:
148
+ from scispacy.linking import EntityLinker
149
+ from scispacy.linking_utils import Entity, KnowledgeBase
150
+
151
+ __all__ = [
152
+ "get_scispacy_entities",
153
+ "get_scispacy_entity_linker",
154
+ "get_scispacy_knowledgebase",
155
+ ]
156
+
157
+
158
+ def get_scispacy_entity_linker(
159
+ prefix: str,
160
+ *,
161
+ ontology_kwargs: GetOntologyKwargs | None = None,
162
+ candidate_generator_kwargs: dict[str, Any] | None = None,
163
+ **entity_linker_kwargs: Any,
164
+ ) -> EntityLinker:
165
+ """Get a knowledgebase object for usage with :mod:`scispacy`.
166
+
167
+ :param prefix :
168
+ The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
169
+ Ontology, or more.
170
+
171
+ :param ontology_kwargs: keyword arguments to pass to :func:`pyobo.get_ontology`,
172
+ such as ``version``.
173
+ :param candidate_generator_kwargs: keyword arguments to pass to
174
+ :class:`scispacy.candidate_generation.CandidateGenerator`, such as ``ef_search``
175
+ :param entity_linker_kwargs: keyword arguments to pass to
176
+ :class:`scispacy.linking.EntityLinker`, such as ``ef_search``
177
+
178
+ :returns: An object that can be applied in a :mod:`spacy` natural language
179
+ processing workflow, namely to apply grounding/named entity normalization to
180
+ recognized named entities.
181
+ """
182
+ from scispacy.linking import EntityLinker
183
+
184
+ if ontology_kwargs is None:
185
+ ontology_kwargs = {}
186
+
187
+ version = get_version_from_kwargs(prefix, ontology_kwargs)
188
+ scispacy_cache_directory = prefix_directory_join(prefix, "scispacy", version=version)
189
+
190
+ # TODO see if we can skip loading the KB
191
+ kb = get_scispacy_knowledgebase(prefix, **ontology_kwargs)
192
+ linker = EntityLinker.from_kb(
193
+ kb,
194
+ ann_index_out_dir=scispacy_cache_directory.as_posix(),
195
+ candidate_generator_kwargs=candidate_generator_kwargs,
196
+ **(entity_linker_kwargs or {}),
197
+ )
198
+ return linker
199
+
200
+
201
+ def get_scispacy_knowledgebase(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> KnowledgeBase:
202
+ """Get a knowledgebase object for usage with :mod:`scispacy`.
203
+
204
+ :param prefix :
205
+ The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
206
+ Ontology, or more.
207
+
208
+ :param kwargs :
209
+ keyword arguments to pass to :func:`pyobo.get_ontology`, such as ``version``.
210
+
211
+ :returns: An object that represents a lexical index over name, synonym, and
212
+ definition strings from the ontology.
213
+ """
214
+ from scispacy.linking_utils import KnowledgeBase
215
+
216
+ return KnowledgeBase(get_scispacy_entities(prefix, **kwargs))
217
+
218
+
219
+ def get_scispacy_entities(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Iterable[Entity]:
220
+ """Iterate over entities in a given ontology via :mod:`pyobo`.
221
+
222
+ :param prefix :
223
+ The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
224
+ Ontology, or more.
225
+
226
+ :param kwargs :
227
+ keyword arguments to pass to :func:`pyobo.get_ontology`, such as ``version``.
228
+
229
+ :yields: Entity objects for all terms in the ontology
230
+ """
231
+ from scispacy.linking_utils import Entity
232
+
233
+ # TODO reuse labels, synonyms, and definitions cache
234
+ ontology = get_ontology(prefix, **kwargs)
235
+ for term in ontology:
236
+ yield Entity(
237
+ concept_id=term.curie,
238
+ canonical_name=term.name,
239
+ aliases=[s.name for s in term.synonyms],
240
+ definition=term.definition,
241
+ )
pyobo/plugins.py CHANGED
File without changes
pyobo/py.typed CHANGED
File without changes
pyobo/resource_utils.py CHANGED
File without changes
File without changes
File without changes
File without changes
pyobo/resources/ro.py CHANGED
File without changes
pyobo/resources/ro.tsv CHANGED
File without changes
pyobo/resources/so.py CHANGED
File without changes
pyobo/resources/so.tsv CHANGED
File without changes
pyobo/sources/README.md CHANGED
File without changes
pyobo/sources/__init__.py CHANGED
@@ -34,6 +34,7 @@ from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
34
34
  from .hgnc import HGNCGetter, HGNCGroupGetter
35
35
  from .iana_media_type import IANAGetter
36
36
  from .icd import ICD10Getter, ICD11Getter
37
+ from .iconclass import IconclassGetter
37
38
  from .intact import IntactGetter
38
39
  from .interpro import InterProGetter
39
40
  from .itis import ITISGetter
@@ -115,6 +116,7 @@ __all__ = [
115
116
  "ICD10Getter",
116
117
  "ICD11Getter",
117
118
  "ITISGetter",
119
+ "IconclassGetter",
118
120
  "IntactGetter",
119
121
  "InterProGetter",
120
122
  "KEGGGeneGetter",
pyobo/sources/agrovoc.py CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
pyobo/sources/biogrid.py CHANGED
File without changes
pyobo/sources/ccle.py CHANGED
File without changes
pyobo/sources/cgnc.py CHANGED
File without changes