pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
  203. pyobo/aws.py +0 -162
  204. pyobo/cli/aws.py +0 -47
  205. pyobo/identifier_utils.py +0 -142
  206. pyobo/normalizer.py +0 -232
  207. pyobo/registries/__init__.py +0 -16
  208. pyobo/registries/metaregistry.json +0 -507
  209. pyobo/registries/metaregistry.py +0 -135
  210. pyobo/sources/icd11.py +0 -105
  211. pyobo/xrefdb/__init__.py +0 -1
  212. pyobo/xrefdb/canonicalizer.py +0 -214
  213. pyobo/xrefdb/priority.py +0 -59
  214. pyobo/xrefdb/sources/__init__.py +0 -60
  215. pyobo/xrefdb/sources/biomappings.py +0 -36
  216. pyobo/xrefdb/sources/cbms2019.py +0 -91
  217. pyobo/xrefdb/sources/chembl.py +0 -83
  218. pyobo/xrefdb/sources/compath.py +0 -82
  219. pyobo/xrefdb/sources/famplex.py +0 -64
  220. pyobo/xrefdb/sources/gilda.py +0 -50
  221. pyobo/xrefdb/sources/intact.py +0 -113
  222. pyobo/xrefdb/sources/ncit.py +0 -133
  223. pyobo/xrefdb/sources/pubchem.py +0 -27
  224. pyobo/xrefdb/sources/wikidata.py +0 -116
  225. pyobo-0.11.2.dist-info/RECORD +0 -157
  226. pyobo-0.11.2.dist-info/WHEEL +0 -5
  227. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/api/properties.py CHANGED
@@ -1,140 +1,163 @@
1
1
  """High-level API for properties."""
2
2
 
3
3
  import logging
4
- import os
5
4
  from collections.abc import Mapping
6
- from typing import Optional
7
5
 
8
6
  import pandas as pd
9
-
10
- from .utils import get_version
7
+ from tqdm import tqdm
8
+ from typing_extensions import Unpack
9
+
10
+ from .utils import get_version_from_kwargs
11
+ from ..constants import (
12
+ GetOntologyKwargs,
13
+ check_should_cache,
14
+ check_should_force,
15
+ check_should_use_tqdm,
16
+ )
11
17
  from ..getters import get_ontology
12
18
  from ..identifier_utils import wrap_norm_prefix
13
- from ..utils.cache import cached_df, cached_mapping, cached_multidict
19
+ from ..struct import Reference
20
+ from ..struct.struct_utils import OBOLiteral, ReferenceHint, _ensure_ref
21
+ from ..utils.cache import cached_df
14
22
  from ..utils.io import multidict
15
- from ..utils.path import prefix_cache_join
23
+ from ..utils.path import CacheArtifact, get_cache_path
16
24
 
17
25
  __all__ = [
18
- "get_properties_df",
19
26
  "get_filtered_properties_df",
20
27
  "get_filtered_properties_mapping",
21
28
  "get_filtered_properties_multimapping",
22
- "get_property",
29
+ "get_literal_properties",
30
+ "get_literal_properties_df",
31
+ "get_object_properties",
32
+ "get_object_properties_df",
23
33
  "get_properties",
34
+ "get_properties_df",
35
+ "get_property",
24
36
  ]
25
37
 
26
38
  logger = logging.getLogger(__name__)
27
39
 
28
40
 
41
+ def get_object_properties_df(prefix, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
42
+ """Get a dataframe of object property triples."""
43
+ version = get_version_from_kwargs(prefix, kwargs)
44
+ path = get_cache_path(prefix, CacheArtifact.object_properties, version=version)
45
+
46
+ @cached_df(
47
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
48
+ )
49
+ def _df_getter() -> pd.DataFrame:
50
+ return get_ontology(prefix, **kwargs).get_object_properties_df(
51
+ use_tqdm=check_should_use_tqdm(kwargs)
52
+ )
53
+
54
+ return _df_getter()
55
+
56
+
57
+ def get_object_properties(
58
+ prefix, **kwargs: Unpack[GetOntologyKwargs]
59
+ ) -> list[tuple[Reference, Reference, Reference]]:
60
+ """Get a list of object property triples."""
61
+ df = get_object_properties_df(prefix, **kwargs)
62
+ return [
63
+ (Reference.from_curie(s), Reference.from_curie(p), Reference.from_curie(o))
64
+ for s, p, o in df.values
65
+ ]
66
+
67
+
68
+ def get_literal_properties(
69
+ prefix: str, **kwargs: Unpack[GetOntologyKwargs]
70
+ ) -> list[tuple[Reference, Reference, OBOLiteral]]:
71
+ """Get a list of literal property triples."""
72
+ df = get_literal_properties_df(prefix, **kwargs)
73
+ return [
74
+ (
75
+ Reference.from_curie(s),
76
+ Reference.from_curie(p),
77
+ OBOLiteral(
78
+ value,
79
+ Reference.from_curie(datatype),
80
+ language if language and pd.notna(language) else None,
81
+ ),
82
+ )
83
+ for s, p, value, datatype, language in tqdm(
84
+ df.values,
85
+ desc=f"[{prefix}] parsing properties",
86
+ unit_scale=True,
87
+ unit="triple",
88
+ disable=not check_should_use_tqdm(kwargs),
89
+ )
90
+ ]
91
+
92
+
93
+ def get_literal_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
94
+ """Get a dataframe of literal property quads."""
95
+ version = get_version_from_kwargs(prefix, kwargs)
96
+ path = get_cache_path(prefix, CacheArtifact.literal_properties, version=version)
97
+
98
+ @cached_df(
99
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
100
+ )
101
+ def _df_getter() -> pd.DataFrame:
102
+ return get_ontology(prefix, **kwargs).get_literal_properties_df(
103
+ use_tqdm=check_should_use_tqdm(kwargs)
104
+ )
105
+
106
+ return _df_getter()
107
+
108
+
29
109
  @wrap_norm_prefix
30
- def get_properties_df(
31
- prefix: str, *, force: bool = False, version: Optional[str] = None
32
- ) -> pd.DataFrame:
110
+ def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
33
111
  """Extract properties.
34
112
 
35
113
  :param prefix: the resource to load
36
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
37
114
  :returns: A dataframe with the properties
38
115
  """
39
- if version is None:
40
- version = get_version(prefix)
41
- path = prefix_cache_join(prefix, name="properties.tsv", version=version)
116
+ version = get_version_from_kwargs(prefix, kwargs)
117
+ path = get_cache_path(prefix, CacheArtifact.properties, version=version)
42
118
 
43
- @cached_df(path=path, dtype=str, force=force)
119
+ @cached_df(
120
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
121
+ )
44
122
  def _df_getter() -> pd.DataFrame:
45
- if force:
46
- logger.info("[%s] forcing reload for properties", prefix)
47
- else:
48
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
49
- ontology = get_ontology(prefix, force=force, version=version)
50
- df = ontology.get_properties_df()
51
- df.dropna(inplace=True)
52
- return df
123
+ return get_ontology(prefix, **kwargs).get_properties_df(
124
+ use_tqdm=check_should_use_tqdm(kwargs)
125
+ )
53
126
 
54
127
  return _df_getter()
55
128
 
56
129
 
57
130
  @wrap_norm_prefix
58
131
  def get_filtered_properties_mapping(
59
- prefix: str,
60
- prop: str,
61
- *,
62
- use_tqdm: bool = False,
63
- force: bool = False,
64
- version: Optional[str] = None,
132
+ prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
65
133
  ) -> Mapping[str, str]:
66
134
  """Extract a single property for each term as a dictionary.
67
135
 
68
136
  :param prefix: the resource to load
69
137
  :param prop: the property to extract
70
- :param use_tqdm: should a progress bar be shown?
71
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
72
138
  :returns: A mapping from identifier to property value
73
139
  """
74
- df = get_properties_df(prefix=prefix, force=force, version=version)
75
- df = df[df["property"] == prop]
76
- return dict(df[[f"{prefix}_id", "value"]].values)
77
-
78
- if version is None:
79
- version = get_version(prefix)
80
- path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
81
- all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
82
-
83
- @cached_mapping(path=path, header=[f"{prefix}_id", prop], force=force)
84
- def _mapping_getter() -> Mapping[str, str]:
85
- if os.path.exists(all_properties_path):
86
- logger.info("[%s] loading pre-cached properties", prefix)
87
- df = pd.read_csv(all_properties_path, sep="\t")
88
- logger.info("[%s] filtering pre-cached properties", prefix)
89
- df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
90
- return dict(df.values)
91
-
92
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
93
- ontology = get_ontology(prefix, force=force, version=version)
94
- return ontology.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm)
95
-
96
- return _mapping_getter()
140
+ df = get_filtered_properties_df(prefix, prop, **kwargs)
141
+ return dict(df.values)
97
142
 
98
143
 
99
144
  @wrap_norm_prefix
100
145
  def get_filtered_properties_multimapping(
101
- prefix: str,
102
- prop: str,
103
- *,
104
- use_tqdm: bool = False,
105
- force: bool = False,
106
- version: Optional[str] = None,
146
+ prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
107
147
  ) -> Mapping[str, list[str]]:
108
148
  """Extract multiple properties for each term as a dictionary.
109
149
 
110
150
  :param prefix: the resource to load
111
151
  :param prop: the property to extract
112
- :param use_tqdm: should a progress bar be shown?
113
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
114
152
  :returns: A mapping from identifier to property values
115
153
  """
116
- if version is None:
117
- version = get_version(prefix)
118
- path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
119
- all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
120
-
121
- @cached_multidict(path=path, header=[f"{prefix}_id", prop], force=force)
122
- def _mapping_getter() -> Mapping[str, list[str]]:
123
- if os.path.exists(all_properties_path):
124
- logger.info("[%s] loading pre-cached properties", prefix)
125
- df = pd.read_csv(all_properties_path, sep="\t")
126
- logger.info("[%s] filtering pre-cached properties", prefix)
127
- df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
128
- return multidict(df.values)
154
+ df = get_filtered_properties_df(prefix, prop, **kwargs)
155
+ return multidict(df.values)
129
156
 
130
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
131
- ontology = get_ontology(prefix, force=force, version=version)
132
- return ontology.get_filtered_properties_multimapping(prop, use_tqdm=use_tqdm)
133
157
 
134
- return _mapping_getter()
135
-
136
-
137
- def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[str]:
158
+ def get_property(
159
+ prefix: str, identifier: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
160
+ ) -> str | None:
138
161
  """Extract a single property for the given entity.
139
162
 
140
163
  :param prefix: the resource to load
@@ -152,7 +175,12 @@ def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[
152
175
  return filtered_properties_mapping.get(identifier)
153
176
 
154
177
 
155
- def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[list[str]]:
178
+ def get_properties(
179
+ prefix: str,
180
+ identifier: str,
181
+ prop: ReferenceHint,
182
+ **kwargs: Unpack[GetOntologyKwargs],
183
+ ) -> list[str] | None:
156
184
  """Extract a set of properties for the given entity.
157
185
 
158
186
  :param prefix: the resource to load
@@ -168,39 +196,15 @@ def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optiona
168
196
 
169
197
  @wrap_norm_prefix
170
198
  def get_filtered_properties_df(
171
- prefix: str,
172
- prop: str,
173
- *,
174
- use_tqdm: bool = False,
175
- force: bool = False,
176
- version: Optional[str] = None,
199
+ prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
177
200
  ) -> pd.DataFrame:
178
201
  """Extract a single property for each term.
179
202
 
180
203
  :param prefix: the resource to load
181
204
  :param prop: the property to extract
182
- :param use_tqdm: should a progress bar be shown?
183
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
184
205
  :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value].
185
206
  """
186
- if version is None:
187
- version = get_version(prefix)
188
- path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
189
- all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
190
-
191
- @cached_df(path=path, dtype=str, force=force)
192
- def _df_getter() -> pd.DataFrame:
193
- if os.path.exists(all_properties_path):
194
- logger.info("[%s] loading pre-cached properties", prefix)
195
- df = pd.read_csv(all_properties_path, sep="\t")
196
- logger.info("[%s] filtering pre-cached properties", prefix)
197
- return df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
198
-
199
- if force:
200
- logger.info("[%s] forcing reload for properties", prefix)
201
- else:
202
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
203
- ontology = get_ontology(prefix, force=force, version=version)
204
- return ontology.get_filtered_properties_df(prop, use_tqdm=use_tqdm)
205
-
206
- return _df_getter()
207
+ prop = _ensure_ref(prop, ontology_prefix=prefix)
208
+ df = get_properties_df(prefix, **kwargs)
209
+ df = df.loc[df["property"] == prop.curie, [f"{prefix}_id", "value"]]
210
+ return df
pyobo/api/relations.py CHANGED
@@ -1,15 +1,13 @@
1
1
  """High-level API for relations."""
2
2
 
3
3
  import logging
4
- import os
5
4
  from collections.abc import Mapping
6
5
  from functools import lru_cache
7
- from typing import Optional
8
6
 
9
- import networkx as nx
10
7
  import pandas as pd
8
+ from typing_extensions import Unpack
11
9
 
12
- from .utils import get_version
10
+ from .utils import get_version_from_kwargs
13
11
  from ..constants import (
14
12
  RELATION_COLUMNS,
15
13
  RELATION_ID,
@@ -18,50 +16,60 @@ from ..constants import (
18
16
  SOURCE_PREFIX,
19
17
  TARGET_ID,
20
18
  TARGET_PREFIX,
19
+ GetOntologyKwargs,
20
+ check_should_cache,
21
+ check_should_force,
22
+ check_should_use_tqdm,
21
23
  )
22
24
  from ..getters import get_ontology
23
25
  from ..identifier_utils import wrap_norm_prefix
24
- from ..struct import Reference, RelationHint, TypeDef, get_reference_tuple
26
+ from ..struct.reference import Reference
27
+ from ..struct.struct_utils import ReferenceHint, _ensure_ref
25
28
  from ..utils.cache import cached_df
26
- from ..utils.path import prefix_cache_join
29
+ from ..utils.path import CacheArtifact, get_cache_path, get_relation_cache_path
27
30
 
28
31
  __all__ = [
29
- "get_relations_df",
30
32
  "get_filtered_relations_df",
31
33
  "get_id_multirelations_mapping",
32
- "get_relation_mapping",
33
34
  "get_relation",
34
- "get_graph",
35
+ "get_relation_mapping",
36
+ "get_relations",
37
+ "get_relations_df",
35
38
  ]
36
39
 
37
- # TODO get_relation, get_relations
38
-
39
40
  logger = logging.getLogger(__name__)
40
41
 
41
42
 
43
+ @wrap_norm_prefix
44
+ def get_relations(
45
+ prefix: str, **kwargs: Unpack[GetOntologyKwargs]
46
+ ) -> list[tuple[Reference, Reference, Reference]]:
47
+ """Get relations."""
48
+ df = get_relations_df(prefix, wide=False, **kwargs)
49
+ return [
50
+ (
51
+ Reference(prefix=prefix, identifier=source_id),
52
+ Reference(prefix=relation_prefix, identifier=relation_id),
53
+ Reference(prefix=target_prefix, identifier=target_id),
54
+ )
55
+ for source_id, relation_prefix, relation_id, target_prefix, target_id in df.values
56
+ ]
57
+
58
+
42
59
  @wrap_norm_prefix
43
60
  def get_relations_df(
44
- prefix: str,
45
- *,
46
- use_tqdm: bool = False,
47
- force: bool = False,
48
- wide: bool = False,
49
- strict: bool = True,
50
- version: Optional[str] = None,
61
+ prefix: str, *, wide: bool = False, **kwargs: Unpack[GetOntologyKwargs]
51
62
  ) -> pd.DataFrame:
52
63
  """Get all relations from the OBO."""
53
- if version is None:
54
- version = get_version(prefix)
55
- path = prefix_cache_join(prefix, name="relations.tsv", version=version)
64
+ version = get_version_from_kwargs(prefix, kwargs)
65
+ path = get_cache_path(prefix, CacheArtifact.relations, version=version)
56
66
 
57
- @cached_df(path=path, dtype=str, force=force)
67
+ @cached_df(
68
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
69
+ )
58
70
  def _df_getter() -> pd.DataFrame:
59
- if force:
60
- logger.info("[%s] forcing reload for relations", prefix)
61
- else:
62
- logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
63
- ontology = get_ontology(prefix, force=force, version=version, strict=strict)
64
- return ontology.get_relations_df(use_tqdm=use_tqdm)
71
+ ontology = get_ontology(prefix, **kwargs)
72
+ return ontology.get_relations_df(use_tqdm=check_should_use_tqdm(kwargs))
65
73
 
66
74
  rv = _df_getter()
67
75
 
@@ -76,38 +84,29 @@ def get_relations_df(
76
84
  @wrap_norm_prefix
77
85
  def get_filtered_relations_df(
78
86
  prefix: str,
79
- relation: RelationHint,
80
- *,
81
- use_tqdm: bool = False,
82
- force: bool = False,
83
- version: Optional[str] = None,
87
+ relation: ReferenceHint,
88
+ **kwargs: Unpack[GetOntologyKwargs],
84
89
  ) -> pd.DataFrame:
85
90
  """Get all the given relation."""
86
- relation_prefix, relation_identifier = relation = get_reference_tuple(relation)
87
- if version is None:
88
- version = get_version(prefix)
89
- path = prefix_cache_join(
90
- prefix,
91
- "relations",
92
- name=f"{relation_prefix}:{relation_identifier}.tsv",
93
- version=version,
91
+ relation = _ensure_ref(relation, ontology_prefix=prefix)
92
+ version = get_version_from_kwargs(prefix, kwargs)
93
+ all_relations_path = get_cache_path(prefix, CacheArtifact.relations, version=version)
94
+ if all_relations_path.is_file():
95
+ logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
96
+ df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
97
+ idx = (df[RELATION_PREFIX] == relation.prefix) & (df[RELATION_ID] == relation.identifier)
98
+ columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
99
+ return df.loc[idx, columns]
100
+
101
+ path = get_relation_cache_path(prefix, relation, version=version)
102
+
103
+ @cached_df(
104
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
94
105
  )
95
- all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
96
-
97
- @cached_df(path=path, dtype=str, force=force)
98
106
  def _df_getter() -> pd.DataFrame:
99
- if os.path.exists(all_relations_path):
100
- logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
101
- df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
102
- idx = (df[RELATION_PREFIX] == relation_prefix) & (
103
- df[RELATION_ID] == relation_identifier
104
- )
105
- columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
106
- return df.loc[idx, columns]
107
-
108
107
  logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
109
- ontology = get_ontology(prefix, force=force, version=version)
110
- return ontology.get_filtered_relations_df(relation, use_tqdm=use_tqdm)
108
+ ontology = get_ontology(prefix, **kwargs)
109
+ return ontology.get_filtered_relations_df(relation, use_tqdm=check_should_use_tqdm(kwargs))
111
110
 
112
111
  return _df_getter()
113
112
 
@@ -115,29 +114,24 @@ def get_filtered_relations_df(
115
114
  @wrap_norm_prefix
116
115
  def get_id_multirelations_mapping(
117
116
  prefix: str,
118
- typedef: TypeDef,
119
- *,
120
- use_tqdm: bool = False,
121
- force: bool = False,
122
- version: Optional[str] = None,
117
+ typedef: ReferenceHint,
118
+ **kwargs: Unpack[GetOntologyKwargs],
123
119
  ) -> Mapping[str, list[Reference]]:
124
120
  """Get the OBO file and output a synonym dictionary."""
125
- if version is None:
126
- version = get_version(prefix)
127
- ontology = get_ontology(prefix, force=force, version=version)
128
- return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)
121
+ kwargs["version"] = get_version_from_kwargs(prefix, kwargs)
122
+ ontology = get_ontology(prefix, **kwargs)
123
+ return ontology.get_id_multirelations_mapping(
124
+ typedef=typedef, use_tqdm=check_should_use_tqdm(kwargs)
125
+ )
129
126
 
130
127
 
131
128
  @lru_cache
132
129
  @wrap_norm_prefix
133
130
  def get_relation_mapping(
134
131
  prefix: str,
135
- relation: RelationHint,
132
+ relation: ReferenceHint,
136
133
  target_prefix: str,
137
- *,
138
- use_tqdm: bool = False,
139
- force: bool = False,
140
- version: Optional[str] = None,
134
+ **kwargs: Unpack[GetOntologyKwargs],
141
135
  ) -> Mapping[str, str]:
142
136
  """Get relations from identifiers in the source prefix to target prefix with the given relation.
143
137
 
@@ -151,11 +145,9 @@ def get_relation_mapping(
151
145
  >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping("hgnc", "ro:HOM0000017", "mgi")
152
146
  >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
153
147
  """
154
- if version is None:
155
- version = get_version(prefix)
156
- ontology = get_ontology(prefix, force=force, version=version)
148
+ ontology = get_ontology(prefix, **kwargs)
157
149
  return ontology.get_relation_mapping(
158
- relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
150
+ relation=relation, target_prefix=target_prefix, use_tqdm=check_should_use_tqdm(kwargs)
159
151
  )
160
152
 
161
153
 
@@ -163,13 +155,10 @@ def get_relation_mapping(
163
155
  def get_relation(
164
156
  prefix: str,
165
157
  source_identifier: str,
166
- relation: RelationHint,
158
+ relation: ReferenceHint,
167
159
  target_prefix: str,
168
- *,
169
- use_tqdm: bool = False,
170
- force: bool = False,
171
- **kwargs,
172
- ) -> Optional[str]:
160
+ **kwargs: Unpack[GetOntologyKwargs],
161
+ ) -> str | None:
173
162
  """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
174
163
 
175
164
  .. warning:: Assumes there's only one version of the property for each term.
@@ -187,21 +176,6 @@ def get_relation(
187
176
  prefix=prefix,
188
177
  relation=relation,
189
178
  target_prefix=target_prefix,
190
- use_tqdm=use_tqdm,
191
- force=force,
192
179
  **kwargs,
193
180
  )
194
181
  return relation_mapping.get(source_identifier)
195
-
196
-
197
- def get_graph(prefix: str, **kwargs) -> nx.DiGraph:
198
- """Get the relation graph."""
199
- rv = nx.MultiDiGraph()
200
- df = get_relations_df(prefix=prefix, **kwargs)
201
- for source_id, relation_prefix, relation_id, target_ns, target_id in df.values:
202
- rv.add_edge(
203
- f"{prefix}:{source_id}",
204
- f"{target_ns}:{target_id}",
205
- key=f"{relation_prefix}:{relation_id}",
206
- )
207
- return rv
pyobo/api/species.py CHANGED
@@ -3,14 +3,17 @@
3
3
  import logging
4
4
  from collections.abc import Mapping
5
5
  from functools import lru_cache
6
- from typing import Optional
6
+
7
+ import curies
8
+ from typing_extensions import Unpack
7
9
 
8
10
  from .alts import get_primary_identifier
9
- from .utils import get_version
11
+ from .utils import _get_pi, get_version_from_kwargs
12
+ from ..constants import GetOntologyKwargs, check_should_force
10
13
  from ..getters import NoBuildError, get_ontology
11
14
  from ..identifier_utils import wrap_norm_prefix
12
15
  from ..utils.cache import cached_mapping
13
- from ..utils.path import prefix_cache_join
16
+ from ..utils.path import CacheArtifact, get_cache_path
14
17
 
15
18
  __all__ = [
16
19
  "get_id_species_mapping",
@@ -20,34 +23,35 @@ __all__ = [
20
23
  logger = logging.getLogger(__name__)
21
24
 
22
25
 
23
- @wrap_norm_prefix
24
- def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
26
+ def get_species(
27
+ prefix: str | curies.Reference | curies.ReferenceTuple,
28
+ identifier: str | None = None,
29
+ /,
30
+ **kwargs: Unpack[GetOntologyKwargs],
31
+ ) -> str | None:
25
32
  """Get the species."""
26
- if prefix == "uniprot":
33
+ t = _get_pi(prefix, identifier)
34
+
35
+ if t.prefix == "uniprot":
27
36
  raise NotImplementedError
28
37
 
29
38
  try:
30
- id_species = get_id_species_mapping(prefix, version=version)
39
+ id_species = get_id_species_mapping(t.prefix, **kwargs)
31
40
  except NoBuildError:
32
- logger.warning("unable to look up species for prefix %s", prefix)
41
+ logger.warning("unable to look up species for prefix %s", t.prefix)
33
42
  return None
34
43
 
35
44
  if not id_species:
36
- logger.warning("no results produced for prefix %s", prefix)
45
+ logger.warning("no results produced for prefix %s", t.prefix)
37
46
  return None
38
47
 
39
- primary_id = get_primary_identifier(prefix, identifier, version=version)
48
+ primary_id = get_primary_identifier(t, **kwargs)
40
49
  return id_species.get(primary_id)
41
50
 
42
51
 
43
52
  @lru_cache
44
53
  @wrap_norm_prefix
45
- def get_id_species_mapping(
46
- prefix: str,
47
- force: bool = False,
48
- strict: bool = True,
49
- version: Optional[str] = None,
50
- ) -> Mapping[str, str]:
54
+ def get_id_species_mapping(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Mapping[str, str]:
51
55
  """Get an identifier to species mapping."""
52
56
  if prefix == "ncbigene":
53
57
  from ..sources.ncbigene import get_ncbigene_id_to_species_mapping
@@ -57,14 +61,13 @@ def get_id_species_mapping(
57
61
  logger.info("[%s] done loading species mappings", prefix)
58
62
  return rv
59
63
 
60
- if version is None:
61
- version = get_version(prefix)
62
- path = prefix_cache_join(prefix, name="species.tsv", version=version)
64
+ version = get_version_from_kwargs(prefix, kwargs)
65
+ path = get_cache_path(prefix, CacheArtifact.species, version=version)
63
66
 
64
- @cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=force)
67
+ @cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=check_should_force(kwargs))
65
68
  def _get_id_species_mapping() -> Mapping[str, str]:
66
69
  logger.info("[%s] no cached species found. getting from OBO loader", prefix)
67
- ontology = get_ontology(prefix, force=force, strict=strict, version=version)
70
+ ontology = get_ontology(prefix, **kwargs)
68
71
  logger.info("[%s] loading species mappings", prefix)
69
72
  return ontology.get_id_species_mapping()
70
73