pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
  203. pyobo/aws.py +0 -162
  204. pyobo/cli/aws.py +0 -47
  205. pyobo/identifier_utils.py +0 -142
  206. pyobo/normalizer.py +0 -232
  207. pyobo/registries/__init__.py +0 -16
  208. pyobo/registries/metaregistry.json +0 -507
  209. pyobo/registries/metaregistry.py +0 -135
  210. pyobo/sources/icd11.py +0 -105
  211. pyobo/xrefdb/__init__.py +0 -1
  212. pyobo/xrefdb/canonicalizer.py +0 -214
  213. pyobo/xrefdb/priority.py +0 -59
  214. pyobo/xrefdb/sources/__init__.py +0 -60
  215. pyobo/xrefdb/sources/biomappings.py +0 -36
  216. pyobo/xrefdb/sources/cbms2019.py +0 -91
  217. pyobo/xrefdb/sources/chembl.py +0 -83
  218. pyobo/xrefdb/sources/compath.py +0 -82
  219. pyobo/xrefdb/sources/famplex.py +0 -64
  220. pyobo/xrefdb/sources/gilda.py +0 -50
  221. pyobo/xrefdb/sources/intact.py +0 -113
  222. pyobo/xrefdb/sources/ncit.py +0 -133
  223. pyobo/xrefdb/sources/pubchem.py +0 -27
  224. pyobo/xrefdb/sources/wikidata.py +0 -116
  225. pyobo-0.11.2.dist-info/RECORD +0 -157
  226. pyobo-0.11.2.dist-info/WHEEL +0 -5
  227. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/api/typedefs.py CHANGED
@@ -2,15 +2,16 @@
2
2
 
3
3
  import logging
4
4
  from functools import lru_cache
5
- from typing import Optional
6
5
 
7
6
  import pandas as pd
7
+ from typing_extensions import Unpack
8
8
 
9
- from .utils import get_version
9
+ from .utils import get_version_from_kwargs
10
+ from ..constants import GetOntologyKwargs, check_should_cache, check_should_force
10
11
  from ..getters import get_ontology
11
12
  from ..identifier_utils import wrap_norm_prefix
12
13
  from ..utils.cache import cached_df
13
- from ..utils.path import prefix_cache_join
14
+ from ..utils.path import CacheArtifact, get_cache_path
14
15
 
15
16
  __all__ = [
16
17
  "get_typedef_df",
@@ -21,18 +22,17 @@ logger = logging.getLogger(__name__)
21
22
 
22
23
  @lru_cache
23
24
  @wrap_norm_prefix
24
- def get_typedef_df(
25
- prefix: str, *, force: bool = False, version: Optional[str] = None
26
- ) -> pd.DataFrame:
25
+ def get_typedef_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
27
26
  """Get an identifier to name mapping for the typedefs in an OBO file."""
28
- if version is None:
29
- version = get_version(prefix)
30
- path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)
27
+ version = get_version_from_kwargs(prefix, kwargs)
28
+ path = get_cache_path(prefix, CacheArtifact.typedefs, version=version)
31
29
 
32
- @cached_df(path=path, dtype=str, force=force)
30
+ @cached_df(
31
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
32
+ )
33
33
  def _df_getter() -> pd.DataFrame:
34
34
  logger.debug("[%s] no cached typedefs found. getting from OBO loader", prefix)
35
- ontology = get_ontology(prefix, force=force, version=version)
35
+ ontology = get_ontology(prefix, **kwargs)
36
36
  logger.debug("[%s] loading typedef mappings", prefix)
37
37
  return ontology.get_typedef_df()
38
38
 
pyobo/api/utils.py CHANGED
@@ -3,18 +3,23 @@
3
3
  import json
4
4
  import logging
5
5
  import os
6
+ import warnings
6
7
  from functools import lru_cache
7
- from typing import Optional
8
+ from typing import Literal, overload
8
9
 
9
10
  import bioversions
11
+ import curies
12
+ from bioregistry import NormalizedNamableReference as Reference
13
+ from curies import ReferenceTuple
10
14
 
15
+ from ..constants import GetOntologyKwargs
11
16
  from ..utils.path import prefix_directory_join
12
17
 
13
18
  __all__ = [
14
- "safe_get_version",
19
+ "VersionError",
15
20
  "get_version",
16
21
  "get_version_pins",
17
- "VersionError",
22
+ "safe_get_version",
18
23
  ]
19
24
 
20
25
  logger = logging.getLogger(__name__)
@@ -24,11 +29,25 @@ class VersionError(ValueError):
24
29
  """A catch-all for version getting failure."""
25
30
 
26
31
 
27
- def get_version(prefix: str) -> Optional[str]:
32
+ # docstr-coverage:excused `overload`
33
+ @overload
34
+ def get_version(prefix: str, *, strict: Literal[True] = True) -> str: ...
35
+
36
+
37
+ # docstr-coverage:excused `overload`
38
+ @overload
39
+ def get_version(prefix: str, *, strict: Literal[False] = False) -> str | None: ...
40
+
41
+
42
+ def get_version(prefix: str, *, strict: bool = False) -> str | None:
28
43
  """Get the version for the resource, if available.
29
44
 
30
45
  :param prefix: the resource name
31
- :return: The version if available else None
46
+ :param strict: Should an error be raised if no version is available?
47
+
48
+ :returns: The version if available else None
49
+
50
+ :raises VersionError: if the version is not available and strict mode is enabled
32
51
  """
33
52
  # Prioritize loaded environment variable PYOBO_VERSION_PINS dictionary
34
53
  version = get_version_pins().get(prefix)
@@ -47,13 +66,27 @@ def get_version(prefix: str) -> Optional[str]:
47
66
  metadata_json_path = prefix_directory_join(prefix, name="metadata.json", ensure_exists=False)
48
67
  if metadata_json_path.exists():
49
68
  data = json.loads(metadata_json_path.read_text())
50
- return data["version"]
69
+ version = data["version"]
70
+ if version:
71
+ return version
72
+
73
+ if strict:
74
+ raise ValueError
51
75
 
52
76
  return None
53
77
 
54
78
 
79
+ def get_version_from_kwargs(prefix: str, kwargs: GetOntologyKwargs) -> str | None:
80
+ """Get the version for the resource based on generic keyword arguments."""
81
+ if version := kwargs.get("version"):
82
+ return version
83
+ # it's okay if none gets returned after getting this far, we at least tried
84
+ return get_version(prefix, strict=False)
85
+
86
+
55
87
  def safe_get_version(prefix: str) -> str:
56
88
  """Get the version."""
89
+ # FIXME replace with get_version(prefix, strict=True)
57
90
  v = get_version(prefix)
58
91
  if v is None:
59
92
  raise ValueError
@@ -65,13 +98,12 @@ def get_version_pins() -> dict[str, str]:
65
98
  """Retrieve user-defined resource version pins.
66
99
 
67
100
  To set your own resource pins, set your machine's environmental variable
68
- "PYOBO_VERSION_PINS" to a JSON string containing string resource prefixes
69
- as keys and string versions of their respective resource as values.
70
- Constraining version pins will make PyOBO rely on cached versions of a resource.
71
- A user might want to pin resource versions that are used by PyOBO due to
72
- the fact that PyOBO will download the latest version of a resource if it is
73
- not pinned. This downloading process can lead to a slow-down in downstream
74
- applications that rely on PyOBO.
101
+ "PYOBO_VERSION_PINS" to a JSON string containing string resource prefixes as keys
102
+ and string versions of their respective resource as values. Constraining version
103
+ pins will make PyOBO rely on cached versions of a resource. A user might want to pin
104
+ resource versions that are used by PyOBO due to the fact that PyOBO will download
105
+ the latest version of a resource if it is not pinned. This downloading process can
106
+ lead to a slow-down in downstream applications that rely on PyOBO.
75
107
  """
76
108
  version_pins_str = os.getenv("PYOBO_VERSION_PINS")
77
109
  if not version_pins_str:
@@ -102,3 +134,24 @@ def get_version_pins() -> dict[str, str]:
102
134
  f"name."
103
135
  )
104
136
  return version_pins
137
+
138
+
139
+ def _get_pi(
140
+ prefix: str | curies.Reference | ReferenceTuple, identifier: str | None = None, /
141
+ ) -> Reference:
142
+ if isinstance(prefix, ReferenceTuple | curies.Reference):
143
+ if identifier is not None:
144
+ raise ValueError("unexpected non-none value passed as second positional argument")
145
+ return Reference(prefix=prefix.prefix, identifier=prefix.identifier)
146
+ if isinstance(prefix, str) and identifier is None:
147
+ return Reference.from_curie(prefix)
148
+ if identifier is None:
149
+ raise ValueError(
150
+ "prefix was given as a string, so an identifier was expected to be passed as a string as well"
151
+ )
152
+ warnings.warn(
153
+ "Passing a prefix and identifier as seperate arguments is deprecated. Please pass a curies.Reference or curies.ReferenceTuple in the first positional-only argument instead.",
154
+ DeprecationWarning,
155
+ stacklevel=4, # this is 4 since this is (always?) called from inside a decorator
156
+ )
157
+ return Reference(prefix=prefix, identifier=identifier)
pyobo/api/xrefs.py CHANGED
@@ -1,28 +1,36 @@
1
1
  """High-level API for synonyms."""
2
2
 
3
3
  import logging
4
+ import warnings
4
5
  from collections.abc import Mapping
5
6
  from functools import lru_cache
6
- from typing import Optional, Union
7
7
 
8
8
  import pandas as pd
9
- from tqdm.auto import tqdm
10
- from tqdm.contrib.logging import logging_redirect_tqdm
11
-
12
- from .utils import get_version
13
- from ..constants import TARGET_ID, TARGET_PREFIX
9
+ from curies import ReferenceTuple
10
+ from typing_extensions import Unpack
11
+
12
+ from .utils import get_version_from_kwargs
13
+ from ..constants import (
14
+ TARGET_ID,
15
+ TARGET_PREFIX,
16
+ GetOntologyKwargs,
17
+ check_should_cache,
18
+ check_should_force,
19
+ check_should_use_tqdm,
20
+ )
14
21
  from ..getters import get_ontology
15
22
  from ..identifier_utils import wrap_norm_prefix
16
- from ..struct import Obo, Reference
17
- from ..utils.cache import cached_df, cached_mapping
18
- from ..utils.path import prefix_cache_join
23
+ from ..struct import Obo
24
+ from ..utils.cache import cached_df
25
+ from ..utils.path import CacheArtifact, get_cache_path
19
26
 
20
27
  __all__ = [
21
- "get_xrefs_df",
22
28
  "get_filtered_xrefs",
29
+ "get_mappings_df",
30
+ "get_sssom_df",
23
31
  "get_xref",
24
32
  "get_xrefs",
25
- "get_sssom_df",
33
+ "get_xrefs_df",
26
34
  ]
27
35
 
28
36
  logger = logging.getLogger(__name__)
@@ -35,10 +43,10 @@ def get_xref(
35
43
  new_prefix: str,
36
44
  *,
37
45
  flip: bool = False,
38
- version: Optional[str] = None,
39
- ) -> Optional[str]:
46
+ **kwargs: Unpack[GetOntologyKwargs],
47
+ ) -> str | None:
40
48
  """Get the xref with the new prefix if a direct path exists."""
41
- filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version)
49
+ filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, **kwargs)
42
50
  return filtered_xrefs.get(identifier)
43
51
 
44
52
 
@@ -49,32 +57,18 @@ def get_filtered_xrefs(
49
57
  xref_prefix: str,
50
58
  *,
51
59
  flip: bool = False,
52
- use_tqdm: bool = False,
53
- force: bool = False,
54
- strict: bool = False,
55
- version: Optional[str] = None,
60
+ **kwargs: Unpack[GetOntologyKwargs],
56
61
  ) -> Mapping[str, str]:
57
62
  """Get xrefs to a given target."""
58
- if version is None:
59
- version = get_version(prefix)
60
- path = prefix_cache_join(prefix, "xrefs", name=f"{xref_prefix}.tsv", version=version)
61
- all_xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
62
- header = [f"{prefix}_id", f"{xref_prefix}_id"]
63
-
64
- @cached_mapping(path=path, header=header, use_tqdm=use_tqdm, force=force)
65
- def _get_mapping() -> Mapping[str, str]:
66
- if all_xrefs_path.is_file():
67
- logger.info("[%s] loading pre-cached xrefs", prefix)
68
- df = pd.read_csv(all_xrefs_path, sep="\t", dtype=str)
69
- logger.info("[%s] filtering pre-cached xrefs", prefix)
70
- df = df.loc[df[TARGET_PREFIX] == xref_prefix, [f"{prefix}_id", TARGET_ID]]
71
- return dict(df.values)
72
-
73
- logger.info("[%s] no cached xrefs found. getting from OBO loader", prefix)
74
- ontology = get_ontology(prefix, force=force, strict=strict, version=version)
75
- return ontology.get_filtered_xrefs_mapping(xref_prefix, use_tqdm=use_tqdm)
76
-
77
- rv = _get_mapping()
63
+ mappings_df = get_mappings_df(prefix, **kwargs)
64
+
65
+ rv = {}
66
+ for subject_curie, object_curie in mappings_df[["subject_id", "object_id"]].values:
67
+ subject_pair = ReferenceTuple.from_curie(subject_curie)
68
+ object_pair = ReferenceTuple.from_curie(object_curie)
69
+ if object_pair.prefix == xref_prefix:
70
+ rv[subject_pair.identifier] = object_pair.identifier
71
+
78
72
  if flip:
79
73
  return {v: k for k, v in rv.items()}
80
74
  return rv
@@ -84,104 +78,104 @@ get_xrefs = get_filtered_xrefs
84
78
 
85
79
 
86
80
  @wrap_norm_prefix
87
- def get_xrefs_df(
88
- prefix: str,
89
- *,
90
- use_tqdm: bool = False,
91
- force: bool = False,
92
- strict: bool = False,
93
- version: Optional[str] = None,
94
- ) -> pd.DataFrame:
81
+ def get_xrefs_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
95
82
  """Get all xrefs."""
96
- if version is None:
97
- version = get_version(prefix)
98
- path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
83
+ warnings.warn(
84
+ f"use pyobo.get_mappings_df instead of pyobo.get_xrefs_df."
85
+ f"Not using cache artifact path to {CacheArtifact.xrefs}",
86
+ DeprecationWarning,
87
+ stacklevel=2,
88
+ )
99
89
 
100
- @cached_df(path=path, dtype=str, force=force)
101
- def _df_getter() -> pd.DataFrame:
102
- logger.info("[%s] no cached xrefs found. getting from OBO loader", prefix)
103
- ontology = get_ontology(prefix, force=force, strict=strict, version=version)
104
- return ontology.get_xrefs_df(use_tqdm=use_tqdm)
90
+ mappings_df = get_mappings_df(prefix, **kwargs)
105
91
 
106
- return _df_getter()
92
+ rows = []
93
+ for subject_curie, object_curie in mappings_df[["subject_id", "object_id"]].values:
94
+ subject_pair = ReferenceTuple.from_curie(subject_curie)
95
+ object_pair = ReferenceTuple.from_curie(object_curie)
96
+ rows.append((subject_pair.identifier, object_pair.prefix, object_pair.identifier))
97
+
98
+ df = pd.DataFrame(rows, columns=[f"{prefix}_id", TARGET_PREFIX, TARGET_ID])
99
+ df = df.drop_duplicates()
100
+ return df
107
101
 
108
102
 
109
103
  def get_sssom_df(
110
- prefix: Union[str, Obo],
104
+ prefix: str | Obo, *, names: bool = True, **kwargs: Unpack[GetOntologyKwargs]
105
+ ) -> pd.DataFrame:
106
+ """Get an SSSOM dataframe, replaced by :func:`get_mappings_df`."""
107
+ warnings.warn("get_sssom_df was renamed to get_mappings_df", DeprecationWarning, stacklevel=2)
108
+ return get_mappings_df(prefix=prefix, names=names, **kwargs)
109
+
110
+
111
+ def get_mappings_df(
112
+ prefix: str | Obo,
111
113
  *,
112
- predicate_id: str = "oboinowl:hasDbXref",
113
- justification: str = "sempav:UnspecifiedMatching",
114
114
  names: bool = True,
115
- **kwargs,
115
+ include_mapping_source_column: bool = False,
116
+ **kwargs: Unpack[GetOntologyKwargs],
116
117
  ) -> pd.DataFrame:
117
- r"""Get xrefs from a source as an SSSOM dataframe.
118
+ r"""Get semantic mappings from a source as an SSSOM dataframe.
118
119
 
119
120
  :param prefix: The ontology to look in for xrefs
120
- :param predicate_id: The predicate used in the SSSOM document. By default, ontologies
121
- don't typically ascribe semantics to xrefs so ``oboinowl:hasDbXref`` is used
122
- :param justification: The justification for the mapping. By default, ontologies
123
- don't typically ascribe semantics, so this is left with `sempav:UnspecifiedMatching`
124
121
  :param names: Add name columns (``subject_label`` and ``object_label``)
122
+
125
123
  :returns: A SSSOM-compliant dataframe of xrefs
126
124
 
127
125
  For example, if you want to get UMLS as an SSSOM dataframe, you can do
128
126
 
129
- >>> import pyobo
130
- >>> df = pyobo.get_sssom_df("umls")
131
- >>> df.to_csv("umls.sssom.tsv", sep="\t", index=False)
127
+ .. code-block:: python
132
128
 
133
- If you don't want to get all of the many resources required to add
134
- names, you can pass ``names=False``
129
+ import pyobo
135
130
 
136
- >>> import pyobo
137
- >>> df = pyobo.get_sssom_df("umls", names=False)
138
- >>> df.to_csv("umls.sssom.tsv", sep="\t", index=False)
131
+ df = pyobo.get_mappings_df("umls")
132
+ df.to_csv("umls.sssom.tsv", sep="\t", index=False)
139
133
 
140
- .. note:: This assumes the Bioregistry as the prefix map
141
- """
142
- from .names import get_name
134
+ If you don't want to get all of the many resources required to add names, you can
135
+ pass ``names=False``
136
+
137
+ .. code-block:: python
138
+
139
+ import pyobo
140
+
141
+ df = pyobo.get_mappings_df("umls", names=False)
142
+ df.to_csv("umls.sssom.tsv", sep="\t", index=False)
143
143
 
144
+ .. note::
145
+
146
+ This assumes the Bioregistry as the prefix map
147
+ """
144
148
  if isinstance(prefix, Obo):
145
- df = prefix.get_xrefs_df()
149
+ df = prefix.get_mappings_df(
150
+ include_subject_labels=names,
151
+ include_mapping_source_column=include_mapping_source_column,
152
+ use_tqdm=check_should_use_tqdm(kwargs),
153
+ )
146
154
  prefix = prefix.ontology
155
+
147
156
  else:
148
- df = get_xrefs_df(prefix=prefix, **kwargs)
149
- rows: list[tuple[str, ...]] = []
150
- with logging_redirect_tqdm():
151
- for source_id, target_prefix, target_id in tqdm(
152
- df.values, unit="mapping", unit_scale=True, desc=f"[{prefix}] SSSOM"
153
- ):
154
- source = Reference(prefix=prefix, identifier=source_id)
155
- target = Reference(prefix=target_prefix, identifier=target_id)
156
-
157
- if names:
158
- rows.append(
159
- (
160
- source.curie,
161
- get_name(prefix, source_id) or "",
162
- target.curie,
163
- get_name(target_prefix, target_id),
164
- predicate_id,
165
- justification,
166
- )
167
- )
168
- else:
169
- rows.append((source.curie, target.curie, predicate_id, justification))
157
+ version = get_version_from_kwargs(prefix, kwargs)
158
+ path = get_cache_path(prefix, CacheArtifact.mappings, version=version)
159
+
160
+ @cached_df(
161
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
162
+ )
163
+ def _df_getter() -> pd.DataFrame:
164
+ logger.info("[%s] rebuilding SSSOM", prefix)
165
+ ontology = get_ontology(prefix, **kwargs)
166
+ return ontology.get_mappings_df(
167
+ use_tqdm=check_should_use_tqdm(kwargs),
168
+ include_subject_labels=True,
169
+ include_mapping_source_column=include_mapping_source_column,
170
+ )
171
+
172
+ df = _df_getter()
170
173
 
171
174
  if names:
172
- columns = [
173
- "subject_id",
174
- "subject_label",
175
- "object_id",
176
- "object_label",
177
- "predicate_id",
178
- "mapping_justification",
179
- ]
180
- else:
181
- columns = [
182
- "subject_id",
183
- "object_id",
184
- "predicate_id",
185
- "mapping_justification",
186
- ]
187
- return pd.DataFrame(rows, columns=columns)
175
+ from .names import get_name_by_curie
176
+
177
+ df["object_label"] = df["object_id"].map(get_name_by_curie)
178
+ elif "subject_label" in df.columns:
179
+ del df["subject_label"]
180
+
181
+ return df
pyobo/cli/__init__.py CHANGED
File without changes
pyobo/cli/cli.py CHANGED
@@ -2,23 +2,19 @@
2
2
 
3
3
  import logging
4
4
  import os
5
- import sys
5
+ from collections.abc import Iterable
6
+ from functools import lru_cache
6
7
  from operator import itemgetter
7
8
 
9
+ import bioregistry
8
10
  import click
9
11
  import humanize
10
- from more_click import verbose_option
11
12
  from tabulate import tabulate
12
13
 
13
- from .aws import main as aws_main
14
14
  from .database import main as database_main
15
15
  from .lookup import lookup
16
- from ..constants import RAW_DIRECTORY
17
- from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins
18
- from ..registries import iter_cached_obo
19
- from ..utils.io import get_writer
20
- from ..xrefdb.canonicalizer import Canonicalizer, get_priority_curie, remap_file_stream
21
- from ..xrefdb.priority import DEFAULT_PRIORITY_LIST
16
+ from ..constants import GLOBAL_SKIP, RAW_DIRECTORY
17
+ from ..plugins import has_nomenclature_plugin
22
18
 
23
19
  __all__ = ["main"]
24
20
 
@@ -31,36 +27,6 @@ def main():
31
27
  """CLI for PyOBO."""
32
28
 
33
29
 
34
- _ORDERING_TEXT = ", ".join(f"{i}) {x}" for i, x in enumerate(DEFAULT_PRIORITY_LIST, start=1))
35
-
36
-
37
- @main.command(help=f"Prioritize a CURIE from ordering: {_ORDERING_TEXT}")
38
- @click.argument("curie")
39
- def prioritize(curie: str):
40
- """Prioritize a CURIE."""
41
- priority_curie = get_priority_curie(curie)
42
- click.secho(priority_curie)
43
-
44
-
45
- @main.command()
46
- @click.option("-i", "--file-in", type=click.File("r"), default=sys.stdin)
47
- @click.option("-o", "--file-out", type=click.File("w"), default=sys.stdout)
48
- @click.option("--column", type=int, default=0, show_default=True)
49
- @click.option("--sep", default="\t", show_default=True)
50
- def recurify(file_in, file_out, column: int, sep: str):
51
- """Remap a column in a given file stream."""
52
- remap_file_stream(file_in=file_in, file_out=file_out, column=column, sep=sep)
53
-
54
-
55
- @main.command()
56
- @verbose_option
57
- def cache():
58
- """Cache all resources."""
59
- for obo in iter_nomenclature_plugins():
60
- click.secho(f"Caching {obo.ontology}", bold=True, fg="green")
61
- obo.write_default()
62
-
63
-
64
30
  @main.command()
65
31
  @click.option("--remove-obo", is_flag=True)
66
32
  def clean(remove_obo: bool):
@@ -93,7 +59,7 @@ def clean(remove_obo: bool):
93
59
  @main.command()
94
60
  def ls():
95
61
  """List how big all of the OBO files are."""
96
- entries = [(prefix, os.path.getsize(path)) for prefix, path in iter_cached_obo()]
62
+ entries = [(prefix, os.path.getsize(path)) for prefix, path in _iter_cached_obo()]
97
63
  entries = [
98
64
  (prefix, humanize.naturalsize(size), "✅" if not has_nomenclature_plugin(prefix) else "❌")
99
65
  for prefix, size in sorted(entries, key=itemgetter(1), reverse=True)
@@ -101,19 +67,38 @@ def ls():
101
67
  click.echo(tabulate(entries, headers=["Source", "Size", "OBO"]))
102
68
 
103
69
 
104
- @main.command()
105
- @verbose_option
106
- @click.option("-f", "--file", type=click.File("w"))
107
- def remapping(file):
108
- """Make a canonical remapping."""
109
- canonicalizer = Canonicalizer.get_default()
110
- writer = get_writer(file)
111
- writer.writerow(["input", "canonical"])
112
- writer.writerows(canonicalizer.iterate_flat_mapping())
70
+ def _iter_cached_obo() -> Iterable[tuple[str, str]]:
71
+ """Iterate over cached OBO paths."""
72
+ for prefix in os.listdir(RAW_DIRECTORY):
73
+ if prefix in GLOBAL_SKIP or _has_no_download(prefix) or bioregistry.is_deprecated(prefix):
74
+ continue
75
+ d = RAW_DIRECTORY.joinpath(prefix)
76
+ if not os.path.isdir(d):
77
+ continue
78
+ for x in os.listdir(d):
79
+ if x.endswith(".obo"):
80
+ p = os.path.join(d, x)
81
+ yield prefix, p
82
+
83
+
84
+ def _has_no_download(prefix: str) -> bool:
85
+ """Return if the prefix is not available."""
86
+ prefix_norm = bioregistry.normalize_prefix(prefix)
87
+ return prefix_norm is not None and prefix_norm in _no_download()
88
+
89
+
90
+ @lru_cache(maxsize=1)
91
+ def _no_download() -> set[str]:
92
+ """Get the list of prefixes not available as OBO."""
93
+ return {
94
+ prefix
95
+ for prefix in bioregistry.read_registry()
96
+ if bioregistry.get_obo_download(prefix) is None
97
+ and bioregistry.get_owl_download(prefix) is None
98
+ }
113
99
 
114
100
 
115
101
  main.add_command(lookup)
116
- main.add_command(aws_main)
117
102
  main.add_command(database_main)
118
103
 
119
104
  if __name__ == "__main__":