pyobo 0.12.9__py3-none-any.whl → 0.12.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pyobo/__init__.py +6 -0
  2. pyobo/api/__init__.py +11 -1
  3. pyobo/api/alts.py +18 -4
  4. pyobo/api/embedding.py +108 -9
  5. pyobo/api/names.py +28 -6
  6. pyobo/api/xrefs.py +26 -1
  7. pyobo/constants.py +38 -2
  8. pyobo/getters.py +8 -3
  9. pyobo/ner/api.py +14 -10
  10. pyobo/ner/scispacy_utils.py +15 -21
  11. pyobo/sources/__init__.py +2 -0
  12. pyobo/sources/antibodyregistry.py +3 -3
  13. pyobo/sources/bigg/bigg_compartment.py +1 -1
  14. pyobo/sources/complexportal.py +3 -3
  15. pyobo/sources/conso.py +3 -3
  16. pyobo/sources/famplex.py +3 -3
  17. pyobo/sources/goldbook.py +86 -0
  18. pyobo/sources/hgnc/hgnc.py +157 -96
  19. pyobo/sources/hgnc/hgncgenefamily.py +14 -13
  20. pyobo/sources/msigdb.py +3 -3
  21. pyobo/sources/omim_ps.py +8 -2
  22. pyobo/sources/reactome.py +3 -3
  23. pyobo/sources/rgd.py +7 -11
  24. pyobo/sources/slm.py +3 -3
  25. pyobo/sources/uniprot/uniprot.py +3 -3
  26. pyobo/sources/wikipathways.py +7 -2
  27. pyobo/struct/__init__.py +2 -2
  28. pyobo/struct/functional/macros.py +1 -1
  29. pyobo/struct/functional/obo_to_functional.py +7 -3
  30. pyobo/struct/obo/reader.py +1 -1
  31. pyobo/struct/struct.py +88 -18
  32. pyobo/struct/struct_utils.py +19 -5
  33. pyobo/struct/typedef.py +16 -3
  34. pyobo/struct/vocabulary.py +4 -3
  35. pyobo/utils/path.py +5 -4
  36. pyobo/version.py +1 -1
  37. {pyobo-0.12.9.dist-info → pyobo-0.12.11.dist-info}/METADATA +8 -1
  38. {pyobo-0.12.9.dist-info → pyobo-0.12.11.dist-info}/RECORD +41 -40
  39. {pyobo-0.12.9.dist-info → pyobo-0.12.11.dist-info}/WHEEL +0 -0
  40. {pyobo-0.12.9.dist-info → pyobo-0.12.11.dist-info}/entry_points.txt +0 -0
  41. {pyobo-0.12.9.dist-info → pyobo-0.12.11.dist-info}/licenses/LICENSE +0 -0
pyobo/__init__.py CHANGED
@@ -14,6 +14,7 @@ from .api import (
14
14
  get_filtered_relations_df,
15
15
  get_filtered_xrefs,
16
16
  get_graph,
17
+ get_graph_embeddings_df,
17
18
  get_hierarchy,
18
19
  get_id_definition_mapping,
19
20
  get_id_multirelations_mapping,
@@ -37,6 +38,7 @@ from .api import (
37
38
  get_obsolete,
38
39
  get_primary_curie,
39
40
  get_primary_identifier,
41
+ get_primary_reference,
40
42
  get_properties,
41
43
  get_properties_df,
42
44
  get_property,
@@ -44,6 +46,7 @@ from .api import (
44
46
  get_relation,
45
47
  get_relation_mapping,
46
48
  get_relations_df,
49
+ get_semantic_mappings,
47
50
  get_species,
48
51
  get_sssom_df,
49
52
  get_subhierarchy,
@@ -114,6 +117,7 @@ __all__ = [
114
117
  "get_filtered_relations_df",
115
118
  "get_filtered_xrefs",
116
119
  "get_graph",
120
+ "get_graph_embeddings_df",
117
121
  "get_grounder",
118
122
  "get_hierarchy",
119
123
  "get_id_definition_mapping",
@@ -139,6 +143,7 @@ __all__ = [
139
143
  "get_ontology",
140
144
  "get_primary_curie",
141
145
  "get_primary_identifier",
146
+ "get_primary_reference",
142
147
  "get_properties",
143
148
  "get_properties_df",
144
149
  "get_property",
@@ -149,6 +154,7 @@ __all__ = [
149
154
  "get_scispacy_entities",
150
155
  "get_scispacy_entity_linker",
151
156
  "get_scispacy_knowledgebase",
157
+ "get_semantic_mappings",
152
158
  "get_species",
153
159
  "get_sssom_df",
154
160
  "get_subhierarchy",
pyobo/api/__init__.py CHANGED
@@ -5,10 +5,16 @@ from .alts import (
5
5
  get_id_to_alts,
6
6
  get_primary_curie,
7
7
  get_primary_identifier,
8
+ get_primary_reference,
8
9
  )
9
10
  from .combine import get_literal_mappings_subset
10
11
  from .edges import get_edges, get_edges_df, get_graph
11
- from .embedding import get_text_embedding, get_text_embedding_similarity, get_text_embeddings_df
12
+ from .embedding import (
13
+ get_graph_embeddings_df,
14
+ get_text_embedding,
15
+ get_text_embedding_similarity,
16
+ get_text_embeddings_df,
17
+ )
12
18
  from .hierarchy import (
13
19
  get_ancestors,
14
20
  get_children,
@@ -59,6 +65,7 @@ from .typedefs import get_typedef_df
59
65
  from .xrefs import (
60
66
  get_filtered_xrefs,
61
67
  get_mappings_df,
68
+ get_semantic_mappings,
62
69
  get_sssom_df,
63
70
  get_xref,
64
71
  get_xrefs,
@@ -80,6 +87,7 @@ __all__ = [
80
87
  "get_filtered_relations_df",
81
88
  "get_filtered_xrefs",
82
89
  "get_graph",
90
+ "get_graph_embeddings_df",
83
91
  "get_hierarchy",
84
92
  "get_id_definition_mapping",
85
93
  "get_id_multirelations_mapping",
@@ -105,6 +113,7 @@ __all__ = [
105
113
  "get_ontology",
106
114
  "get_primary_curie",
107
115
  "get_primary_identifier",
116
+ "get_primary_reference",
108
117
  "get_priority_curie",
109
118
  "get_properties",
110
119
  "get_properties_df",
@@ -113,6 +122,7 @@ __all__ = [
113
122
  "get_relation",
114
123
  "get_relation_mapping",
115
124
  "get_relations_df",
125
+ "get_semantic_mappings",
116
126
  "get_species",
117
127
  "get_sssom_df",
118
128
  "get_subhierarchy",
pyobo/api/alts.py CHANGED
@@ -20,6 +20,7 @@ __all__ = [
20
20
  "get_id_to_alts",
21
21
  "get_primary_curie",
22
22
  "get_primary_identifier",
23
+ "get_primary_reference",
23
24
  ]
24
25
 
25
26
  logger = logging.getLogger(__name__)
@@ -61,13 +62,13 @@ def get_alts_to_id(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Mapping[
61
62
  }
62
63
 
63
64
 
64
- def get_primary_curie(
65
+ def get_primary_reference(
65
66
  prefix: str | curies.Reference | curies.ReferenceTuple,
66
67
  identifier: str | None = None,
67
68
  /,
68
69
  **kwargs: Unpack[GetOntologyKwargs],
69
- ) -> str | None:
70
- """Get the primary curie for an entity."""
70
+ ) -> curies.ReferenceTuple | None:
71
+ """Get the primary reference for an entity."""
71
72
  reference = _get_pi(prefix, identifier)
72
73
  try:
73
74
  primary_identifier = get_primary_identifier(reference, **kwargs)
@@ -76,7 +77,20 @@ def get_primary_curie(
76
77
  raise
77
78
  # this happens on invalid prefix. maybe revise?
78
79
  return None
79
- return f"{reference.prefix}:{primary_identifier}"
80
+ return curies.ReferenceTuple(reference.prefix, primary_identifier)
81
+
82
+
83
+ def get_primary_curie(
84
+ prefix: str | curies.Reference | curies.ReferenceTuple,
85
+ identifier: str | None = None,
86
+ /,
87
+ **kwargs: Unpack[GetOntologyKwargs],
88
+ ) -> str | None:
89
+ """Get the primary curie for an entity."""
90
+ reference = get_primary_reference(prefix, identifier, **kwargs)
91
+ if reference is None:
92
+ return None
93
+ return reference.curie
80
94
 
81
95
 
82
96
  def get_primary_identifier(
pyobo/api/embedding.py CHANGED
@@ -2,18 +2,29 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import TYPE_CHECKING
5
+ import tempfile
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Literal
6
8
 
9
+ import bioregistry
7
10
  import curies
8
11
  import numpy as np
9
12
  import pandas as pd
13
+ from tqdm import tqdm
14
+ from typing_extensions import Unpack
10
15
 
11
- from pyobo.api.names import get_definition, get_name, get_references
16
+ from pyobo.api.edges import get_edges_df
17
+ from pyobo.api.names import get_definition, get_id_name_mapping, get_name
18
+ from pyobo.api.utils import get_version_from_kwargs
19
+ from pyobo.constants import GetOntologyKwargs, check_should_force
20
+ from pyobo.identifier_utils import wrap_norm_prefix
21
+ from pyobo.utils.path import CacheArtifact, get_cache_path
12
22
 
13
23
  if TYPE_CHECKING:
14
24
  import sentence_transformers
15
25
 
16
26
  __all__ = [
27
+ "get_graph_embeddings_df",
17
28
  "get_text_embedding",
18
29
  "get_text_embedding_model",
19
30
  "get_text_embedding_similarity",
@@ -31,38 +42,126 @@ def get_text_embedding_model() -> sentence_transformers.SentenceTransformer:
31
42
 
32
43
  def _get_text(
33
44
  reference: str | curies.Reference | curies.ReferenceTuple,
45
+ /,
46
+ *,
47
+ name: str | None = None,
48
+ **kwargs: Unpack[GetOntologyKwargs],
34
49
  ) -> str | None:
35
- name = get_name(reference)
50
+ if name is None:
51
+ name = get_name(reference, **kwargs)
36
52
  if name is None:
37
53
  return None
38
- description = get_definition(reference)
54
+ description = get_definition(reference, **kwargs)
39
55
  if description:
40
56
  name += " " + description
41
57
  return name
42
58
 
43
59
 
60
+ def get_graph_embeddings_df(
61
+ prefix: str,
62
+ *,
63
+ method: Literal["pykeen", "grape"] | None = None,
64
+ epochs: int = 30,
65
+ dimension: int = 32,
66
+ **kwargs: Unpack[GetOntologyKwargs],
67
+ ) -> pd.DataFrame:
68
+ """Get graph machine learning embeddings."""
69
+ if method == "pykeen" or method is None:
70
+ from pykeen.models import PairRE
71
+ from pykeen.training import SLCWATrainingLoop
72
+ from pykeen.triples import TriplesFactory
73
+ from torch.optim import Adam
74
+
75
+ triples_df = get_edges_df(prefix, **kwargs)
76
+ training = TriplesFactory.from_labeled_triples(triples_df.values)
77
+ model = PairRE(triples_factory=training, embedding_dim=dimension)
78
+ optimizer = Adam(params=model.get_grad_params())
79
+ training_loop = SLCWATrainingLoop(
80
+ model=model, triples_factory=training, optimizer=optimizer
81
+ )
82
+ # can also set batch size here
83
+ training_loop.train(triples_factory=training, num_epochs=epochs)
84
+ embeddings = model.entity_representations[0]()
85
+ df = pd.DataFrame(
86
+ embeddings.detach().numpy(),
87
+ index=[training.entity_id_to_label[i] for i in range(embeddings.shape[0])],
88
+ )
89
+
90
+ elif method == "grape":
91
+ from ensmallen import Graph
92
+
93
+ edges_df = get_edges_df(prefix, **kwargs)
94
+ with tempfile.TemporaryDirectory() as d:
95
+ path = Path(d).joinpath("test.tsv")
96
+ edges_df[[":START_ID", ":END_ID"]].to_csv(path, header=None, sep="\t", index=False)
97
+ graph = Graph.from_csv(
98
+ edge_path=str(path),
99
+ edge_list_separator="\t",
100
+ sources_column_number=0,
101
+ destinations_column_number=1,
102
+ edge_list_numeric_node_ids=False,
103
+ directed=True,
104
+ name=bioregistry.get_name(prefix, strict=True),
105
+ verbose=True,
106
+ )
107
+ graph = graph.remove_disconnected_nodes()
108
+
109
+ from embiggen.embedders.ensmallen_embedders.second_order_line import (
110
+ SecondOrderLINEEnsmallen,
111
+ )
112
+
113
+ embedding = SecondOrderLINEEnsmallen(embedding_size=dimension, epochs=epochs).fit_transform(
114
+ graph
115
+ )
116
+ df = embedding.get_all_node_embedding()[0].sort_index()
117
+ # df.columns = [str(c) for c in df.columns]
118
+ else:
119
+ raise ValueError(f"invalid graph machine learning method: {method}")
120
+
121
+ df.index.name = "curie"
122
+ return df
123
+
124
+
125
+ @wrap_norm_prefix
44
126
  def get_text_embeddings_df(
45
127
  prefix: str,
46
128
  *,
47
129
  model: sentence_transformers.SentenceTransformer | None = None,
130
+ **kwargs: Unpack[GetOntologyKwargs],
48
131
  ) -> pd.DataFrame:
49
132
  """Get embeddings for all entities in the resource.
50
133
 
51
134
  :param prefix: A reference, either as a string or Reference object
52
135
  :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
53
136
  given.
137
+ :param kwargs: The keyword arguments to forward to ontology getter functions for
138
+ names, definitions, and version
139
+
140
+ :returns: A pandas dataframe with an index representing local unique identifiers and
141
+ columns for the values of the model returned vectors
54
142
  """
143
+ path = get_cache_path(
144
+ prefix, CacheArtifact.embeddings, version=get_version_from_kwargs(prefix, kwargs)
145
+ )
146
+ if path.is_file() and not check_should_force(kwargs):
147
+ df = pd.read_csv(path, sep="\t").set_index(0)
148
+ return df
149
+
150
+ id_to_name = get_id_name_mapping(prefix, **kwargs)
151
+
55
152
  luids, texts = [], []
56
- for reference in get_references(prefix):
57
- text = _get_text(reference)
153
+ for identifier, name in tqdm(id_to_name.items(), desc=f"[{prefix}] constructing text"):
154
+ text = _get_text(curies.ReferenceTuple(prefix, identifier), name=name, **kwargs)
58
155
  if text is None:
59
156
  continue
60
- luids.append(reference.identifier)
157
+ luids.append(identifier)
61
158
  texts.append(text)
62
159
  if model is None:
63
160
  model = get_text_embedding_model()
64
- res = model.encode(texts)
65
- return pd.DataFrame(res, index=luids)
161
+ res = model.encode(texts, show_progress_bar=True)
162
+ df = pd.DataFrame(res, index=luids)
163
+ df.to_csv(path, sep="\t") # index is important here!
164
+ return df
66
165
 
67
166
 
68
167
  def get_text_embedding(
pyobo/api/names.py CHANGED
@@ -6,7 +6,7 @@ import logging
6
6
  import subprocess
7
7
  from collections.abc import Callable, Mapping
8
8
  from functools import lru_cache
9
- from typing import Any, TypeVar
9
+ from typing import TypeVar
10
10
 
11
11
  import curies
12
12
  import pandas as pd
@@ -49,9 +49,15 @@ __all__ = [
49
49
  logger = logging.getLogger(__name__)
50
50
 
51
51
 
52
- def get_name_by_curie(curie: str, **kwargs: Any) -> str | None:
52
+ def get_name_by_curie(
53
+ curie: str,
54
+ /,
55
+ *,
56
+ upgrade_identifier: bool | None = None,
57
+ **kwargs: Unpack[GetOntologyKwargs],
58
+ ) -> str | None:
53
59
  """Get the name for a CURIE, if possible."""
54
- return get_name(curie, **kwargs)
60
+ return get_name(curie, upgrade_identifier=upgrade_identifier, **kwargs)
55
61
 
56
62
 
57
63
  X = TypeVar("X")
@@ -63,6 +69,8 @@ NO_BUILD_LOGGED: set = set()
63
69
  def _help_get(
64
70
  f: Callable[[str, Unpack[GetOntologyKwargs]], Mapping[str, X]],
65
71
  reference: Reference,
72
+ *,
73
+ upgrade_identifier: bool | None = None,
66
74
  **kwargs: Unpack[GetOntologyKwargs],
67
75
  ) -> X | None:
68
76
  """Get the result for an entity based on a mapping maker function ``f``."""
@@ -87,19 +95,32 @@ def _help_get(
87
95
  NO_BUILD_PREFIXES.add(reference.prefix)
88
96
  return None
89
97
 
90
- primary_id = get_primary_identifier(reference, **kwargs)
91
- return mapping.get(primary_id)
98
+ if upgrade_identifier is None:
99
+ if reference.identifier in mapping:
100
+ return mapping[reference.identifier]
101
+ else:
102
+ primary_id = get_primary_identifier(reference, **kwargs)
103
+ return mapping.get(primary_id)
104
+ elif upgrade_identifier is True:
105
+ primary_id = get_primary_identifier(reference, **kwargs)
106
+ return mapping.get(primary_id)
107
+ else:
108
+ return mapping.get(reference.identifier)
92
109
 
93
110
 
94
111
  def get_name(
95
112
  prefix: str | curies.Reference | curies.ReferenceTuple,
96
113
  identifier: str | None = None,
97
114
  /,
115
+ *,
116
+ upgrade_identifier: bool | None = None,
98
117
  **kwargs: Unpack[GetOntologyKwargs],
99
118
  ) -> str | None:
100
119
  """Get the name for an entity."""
101
120
  reference = _get_pi(prefix, identifier)
102
- return _help_get(get_id_name_mapping, reference, **kwargs)
121
+ return _help_get(
122
+ get_id_name_mapping, reference, upgrade_identifier=upgrade_identifier, **kwargs
123
+ )
103
124
 
104
125
 
105
126
  @lru_cache
@@ -325,6 +346,7 @@ def get_literal_mappings(
325
346
  return rv
326
347
 
327
348
 
349
+ @wrap_norm_prefix
328
350
  def get_literal_mappings_df(
329
351
  prefix: str,
330
352
  **kwargs: Unpack[GetOntologyKwargs],
pyobo/api/xrefs.py CHANGED
@@ -5,8 +5,11 @@ import warnings
5
5
  from collections.abc import Mapping
6
6
  from functools import lru_cache
7
7
 
8
+ import curies
8
9
  import pandas as pd
9
10
  from curies import ReferenceTuple
11
+ from sssom_pydantic import SemanticMapping
12
+ from sssom_pydantic.io import parse_record, parse_row
10
13
  from typing_extensions import Unpack
11
14
 
12
15
  from .utils import get_version_from_kwargs
@@ -19,7 +22,7 @@ from ..constants import (
19
22
  check_should_use_tqdm,
20
23
  )
21
24
  from ..getters import get_ontology
22
- from ..identifier_utils import wrap_norm_prefix
25
+ from ..identifier_utils import get_converter, wrap_norm_prefix
23
26
  from ..struct import Obo
24
27
  from ..utils.cache import cached_df
25
28
  from ..utils.path import CacheArtifact, get_cache_path
@@ -27,6 +30,7 @@ from ..utils.path import CacheArtifact, get_cache_path
27
30
  __all__ = [
28
31
  "get_filtered_xrefs",
29
32
  "get_mappings_df",
33
+ "get_semantic_mappings",
30
34
  "get_sssom_df",
31
35
  "get_xref",
32
36
  "get_xrefs",
@@ -107,6 +111,27 @@ def get_sssom_df(
107
111
  return get_mappings_df(prefix=prefix, names=names, **kwargs)
108
112
 
109
113
 
114
+ def get_semantic_mappings(
115
+ prefix: str,
116
+ converter: curies.Converter | None = None,
117
+ names: bool = True,
118
+ include_mapping_source_column: bool = False,
119
+ **kwargs: Unpack[GetOntologyKwargs],
120
+ ) -> list[SemanticMapping]:
121
+ """Get semantic mapping objects."""
122
+ df = get_mappings_df(
123
+ prefix, names=names, include_mapping_source_column=include_mapping_source_column, **kwargs
124
+ )
125
+ if converter is None:
126
+ converter = get_converter()
127
+ rv = []
128
+ for _, row in df.iterrows():
129
+ record = parse_row(row.to_dict())
130
+ mapping = parse_record(record, converter=converter)
131
+ rv.append(mapping)
132
+ return rv
133
+
134
+
110
135
  def get_mappings_df(
111
136
  prefix: str | Obo,
112
137
  *,
pyobo/constants.py CHANGED
@@ -14,8 +14,21 @@ from typing_extensions import NotRequired, TypedDict
14
14
 
15
15
  __all__ = [
16
16
  "DATABASE_DIRECTORY",
17
+ "DEFAULT_PREFIX_MAP",
18
+ "ONTOLOGY_GETTERS",
19
+ "PROVENANCE_PREFIXES",
17
20
  "RAW_DIRECTORY",
18
21
  "SPECIES_REMAPPING",
22
+ "DatabaseKwargs",
23
+ "GetOntologyKwargs",
24
+ "IterHelperHelperDict",
25
+ "LookupKwargs",
26
+ "OntologyFormat",
27
+ "OntologyPathPack",
28
+ "SlimGetOntologyKwargs",
29
+ "check_should_cache",
30
+ "check_should_force",
31
+ "check_should_use_tqdm",
19
32
  ]
20
33
 
21
34
  logger = logging.getLogger(__name__)
@@ -96,6 +109,8 @@ SPECIES_FILE = "species.tsv.gz"
96
109
 
97
110
  NCBITAXON_PREFIX = "ncbitaxon"
98
111
  DATE_FORMAT = "%d:%m:%Y %H:%M"
112
+
113
+ #: Prefixes for resources that are considered as provenance
99
114
  PROVENANCE_PREFIXES = {
100
115
  "pubmed",
101
116
  "pmc",
@@ -117,13 +132,21 @@ PROVENANCE_PREFIXES = {
117
132
  class DatabaseKwargs(TypedDict):
118
133
  """Keyword arguments for database CLI functions."""
119
134
 
135
+ #: Should strict identifier parsing be enabled?
120
136
  strict: bool
137
+ #: Should re-download and re-processing be forced?
121
138
  force: bool
139
+ #: Should re-processing be forced?
122
140
  force_process: bool
123
- skip_pyobo: bool
141
+
142
+ #: Should a progress bar be used?
143
+ use_tqdm: bool
144
+ #: Skip all prefixes lexicographically sorted below the given prefix
124
145
  skip_below: str | None
146
+ #: If true, skips prefixes that are ontologized as sources in PyOBO
147
+ skip_pyobo: bool
148
+ #: An enumerated set of prefixes to skip
125
149
  skip_set: set[str] | None
126
- use_tqdm: bool
127
150
 
128
151
 
129
152
  class SlimGetOntologyKwargs(TypedDict):
@@ -134,8 +157,11 @@ class SlimGetOntologyKwargs(TypedDict):
134
157
  only a single ontology is requested.
135
158
  """
136
159
 
160
+ #: Should strict identifier parsing be enabled?
137
161
  strict: NotRequired[bool]
162
+ #: Should re-download and re-processing be forced?
138
163
  force: NotRequired[bool]
164
+ #: Should re-processing be forced?
139
165
  force_process: NotRequired[bool]
140
166
 
141
167
 
@@ -145,8 +171,11 @@ class GetOntologyKwargs(SlimGetOntologyKwargs):
145
171
  This dictionary doesn't contain ``prefix`` since this is always explicitly handled.
146
172
  """
147
173
 
174
+ #: The version of the ontology to get
148
175
  version: NotRequired[str | None]
176
+ #: Should the cache be used?
149
177
  cache: NotRequired[bool]
178
+ #: Should a progress bar be used?
150
179
  use_tqdm: NotRequired[bool]
151
180
 
152
181
 
@@ -186,12 +215,17 @@ class IterHelperHelperDict(SlimGetOntologyKwargs):
186
215
  :func:`pyobo.get_ontology` in each iteration.
187
216
  """
188
217
 
218
+ #: Should a progress bar be used?
189
219
  use_tqdm: bool
220
+ #: Skip all prefixes lexicographically sorted below the given prefix
190
221
  skip_below: str | None
222
+ #: If true, skips prefixes that are ontologized as sources in PyOBO
191
223
  skip_pyobo: bool
224
+ #: An enumerated set of prefixes to skip
192
225
  skip_set: set[str] | None
193
226
 
194
227
 
228
+ #: The ontology format
195
229
  OntologyFormat: TypeAlias = Literal["obo", "owl", "json", "rdf"]
196
230
 
197
231
  #: from table 2 of the Functional OWL syntax definition
@@ -207,7 +241,9 @@ DEFAULT_PREFIX_MAP = {
207
241
  class OntologyPathPack(NamedTuple):
208
242
  """A format and path tuple."""
209
243
 
244
+ #: The ontology format
210
245
  format: OntologyFormat
246
+ #: The path to the ontology file
211
247
  path: Path
212
248
 
213
249
 
pyobo/getters.py CHANGED
@@ -45,8 +45,14 @@ from .utils.path import ensure_path, prefix_directory_join
45
45
  from .version import get_git_hash, get_version
46
46
 
47
47
  __all__ = [
48
+ "REQUIRES_NO_ROBOT_CHECK",
49
+ "SKIP",
48
50
  "NoBuildError",
51
+ "UnhandledFormatError",
52
+ "db_output_helper",
49
53
  "get_ontology",
54
+ "iter_helper",
55
+ "iter_helper_helper",
50
56
  ]
51
57
 
52
58
  logger = logging.getLogger(__name__)
@@ -112,8 +118,6 @@ def get_ontology(
112
118
 
113
119
  :returns: An OBO object
114
120
 
115
- :raises OnlyOWLError: If the OBO foundry only has an OWL document for this resource.
116
-
117
121
  Alternate usage if you have a custom url
118
122
 
119
123
  .. code-block:: python
@@ -220,7 +224,8 @@ def _ensure_ontology_path(
220
224
  return None
221
225
 
222
226
 
223
- SKIP = {
227
+ #: A dictioanry of prefixes to skip during full build with reasons as values
228
+ SKIP: dict[str, str] = {
224
229
  "ncbigene": "too big, refs acquired from other dbs",
225
230
  "pubchem.compound": "top big, can't deal with this now",
226
231
  "gaz": "Gazetteer is irrelevant for biology",
pyobo/ner/api.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import logging
5
6
  from collections.abc import Iterable
6
7
  from subprocess import CalledProcessError
7
8
  from typing import TYPE_CHECKING
@@ -22,6 +23,8 @@ __all__ = [
22
23
  "get_grounder",
23
24
  ]
24
25
 
26
+ logger = logging.getLogger(__name__)
27
+
25
28
 
26
29
  def get_grounder(
27
30
  prefixes: str | Iterable[str],
@@ -32,22 +35,23 @@ def get_grounder(
32
35
  **kwargs: Unpack[GetOntologyKwargs],
33
36
  ) -> ssslm.Grounder:
34
37
  """Get a grounder for the given prefix(es)."""
35
- literal_mappings: list[LiteralMapping] = []
38
+ all_literal_mappings: list[LiteralMapping] = []
36
39
  it = _clean_prefix_versions(prefixes, versions=versions)
37
40
  disable = len(it) == 1 or not check_should_use_tqdm(kwargs)
38
41
  for prefix, kwargs["version"] in tqdm(it, leave=False, disable=disable):
39
42
  try:
40
- literal_mappings.extend(
41
- get_literal_mappings(
42
- prefix,
43
- skip_obsolete=skip_obsolete,
44
- **kwargs,
45
- )
46
- )
47
- except (NoBuildError, CalledProcessError):
43
+ literal_mappings = get_literal_mappings(prefix, skip_obsolete=skip_obsolete, **kwargs)
44
+ except (NoBuildError, CalledProcessError) as e:
45
+ logger.warning("[%s] unable to get literal mappings: %s", prefix, e)
48
46
  continue
47
+ else:
48
+ if not literal_mappings:
49
+ logger.warning("[%s] no literal mappings loaded", prefix)
50
+ all_literal_mappings.extend(literal_mappings)
49
51
 
50
- return ssslm.make_grounder(literal_mappings, implementation="gilda", grounder_cls=grounder_cls)
52
+ return ssslm.make_grounder(
53
+ all_literal_mappings, implementation="gilda", grounder_cls=grounder_cls
54
+ )
51
55
 
52
56
 
53
57
  def _clean_prefix_versions(