pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/api/properties.py CHANGED
@@ -1,140 +1,159 @@
1
1
  """High-level API for properties."""
2
2
 
3
3
  import logging
4
- import os
5
4
  from collections.abc import Mapping
6
- from typing import Optional
7
5
 
8
6
  import pandas as pd
9
-
10
- from .utils import get_version
7
+ from tqdm import tqdm
8
+ from typing_extensions import Unpack
9
+
10
+ from .utils import get_version_from_kwargs
11
+ from ..constants import (
12
+ GetOntologyKwargs,
13
+ check_should_cache,
14
+ check_should_force,
15
+ check_should_use_tqdm,
16
+ )
11
17
  from ..getters import get_ontology
12
18
  from ..identifier_utils import wrap_norm_prefix
13
- from ..utils.cache import cached_df, cached_mapping, cached_multidict
19
+ from ..struct import Reference
20
+ from ..struct.struct_utils import OBOLiteral, ReferenceHint, _ensure_ref
21
+ from ..utils.cache import cached_df
14
22
  from ..utils.io import multidict
15
- from ..utils.path import prefix_cache_join
23
+ from ..utils.path import CacheArtifact, get_cache_path
16
24
 
17
25
  __all__ = [
18
- "get_properties_df",
19
26
  "get_filtered_properties_df",
20
27
  "get_filtered_properties_mapping",
21
28
  "get_filtered_properties_multimapping",
22
- "get_property",
29
+ "get_literal_properties",
30
+ "get_literal_properties_df",
31
+ "get_object_properties",
32
+ "get_object_properties_df",
23
33
  "get_properties",
34
+ "get_properties_df",
35
+ "get_property",
24
36
  ]
25
37
 
26
38
  logger = logging.getLogger(__name__)
27
39
 
28
40
 
41
+ def get_object_properties_df(prefix, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
42
+ """Get a dataframe of object property triples."""
43
+ version = get_version_from_kwargs(prefix, kwargs)
44
+ path = get_cache_path(prefix, CacheArtifact.object_properties, version=version)
45
+
46
+ @cached_df(
47
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
48
+ )
49
+ def _df_getter() -> pd.DataFrame:
50
+ return get_ontology(prefix, **kwargs).get_object_properties_df(
51
+ use_tqdm=check_should_use_tqdm(kwargs)
52
+ )
53
+
54
+ return _df_getter()
55
+
56
+
57
+ def get_object_properties(
58
+ prefix, **kwargs: Unpack[GetOntologyKwargs]
59
+ ) -> list[tuple[Reference, Reference, Reference]]:
60
+ """Get a list of object property triples."""
61
+ df = get_object_properties_df(prefix, **kwargs)
62
+ return [
63
+ (Reference.from_curie(s), Reference.from_curie(p), Reference.from_curie(o))
64
+ for s, p, o in df.values
65
+ ]
66
+
67
+
68
+ def get_literal_properties(
69
+ prefix: str, **kwargs: Unpack[GetOntologyKwargs]
70
+ ) -> list[tuple[Reference, Reference, OBOLiteral]]:
71
+ """Get a list of literal property triples."""
72
+ df = get_literal_properties_df(prefix, **kwargs)
73
+ return [
74
+ (
75
+ Reference.from_curie(s),
76
+ Reference.from_curie(p),
77
+ OBOLiteral(
78
+ value,
79
+ Reference.from_curie(datatype),
80
+ language if language and pd.notna(language) else None,
81
+ ),
82
+ )
83
+ for s, p, value, datatype, language in tqdm(
84
+ df.values,
85
+ desc=f"[{prefix}] parsing properties",
86
+ unit_scale=True,
87
+ unit="triple",
88
+ disable=not check_should_use_tqdm(kwargs),
89
+ )
90
+ ]
91
+
92
+
93
+ def get_literal_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
94
+ """Get a dataframe of literal property quads."""
95
+ version = get_version_from_kwargs(prefix, kwargs)
96
+ path = get_cache_path(prefix, CacheArtifact.literal_properties, version=version)
97
+
98
+ @cached_df(
99
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
100
+ )
101
+ def _df_getter() -> pd.DataFrame:
102
+ return get_ontology(prefix, **kwargs).get_literal_properties_df(
103
+ use_tqdm=check_should_use_tqdm(kwargs)
104
+ )
105
+
106
+ return _df_getter()
107
+
108
+
29
109
  @wrap_norm_prefix
30
- def get_properties_df(
31
- prefix: str, *, force: bool = False, version: Optional[str] = None
32
- ) -> pd.DataFrame:
110
+ def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
33
111
  """Extract properties.
34
112
 
35
113
  :param prefix: the resource to load
36
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
37
114
  :returns: A dataframe with the properties
38
115
  """
39
- if version is None:
40
- version = get_version(prefix)
41
- path = prefix_cache_join(prefix, name="properties.tsv", version=version)
42
-
43
- @cached_df(path=path, dtype=str, force=force)
44
- def _df_getter() -> pd.DataFrame:
45
- if force:
46
- logger.info("[%s] forcing reload for properties", prefix)
47
- else:
48
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
49
- ontology = get_ontology(prefix, force=force, version=version)
50
- df = ontology.get_properties_df()
51
- df.dropna(inplace=True)
52
- return df
53
-
54
- return _df_getter()
116
+ df1 = get_literal_properties_df(prefix, **kwargs)
117
+ df2 = get_object_properties_df(prefix, **kwargs)
118
+ df = pd.concat([df1[["source", "predicate", "target"]], df2])
119
+ ll = len(prefix) + 1
120
+ df[f"{prefix}_id"] = df["source"].map(lambda x: x[ll:])
121
+ df = df.rename(columns={"predicate": "property", "target": "value"})
122
+ del df["source"]
123
+ return df[[f"{prefix}_id", "property", "value"]]
55
124
 
56
125
 
57
126
  @wrap_norm_prefix
58
127
  def get_filtered_properties_mapping(
59
- prefix: str,
60
- prop: str,
61
- *,
62
- use_tqdm: bool = False,
63
- force: bool = False,
64
- version: Optional[str] = None,
128
+ prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
65
129
  ) -> Mapping[str, str]:
66
130
  """Extract a single property for each term as a dictionary.
67
131
 
68
132
  :param prefix: the resource to load
69
133
  :param prop: the property to extract
70
- :param use_tqdm: should a progress bar be shown?
71
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
72
134
  :returns: A mapping from identifier to property value
73
135
  """
74
- df = get_properties_df(prefix=prefix, force=force, version=version)
75
- df = df[df["property"] == prop]
76
- return dict(df[[f"{prefix}_id", "value"]].values)
77
-
78
- if version is None:
79
- version = get_version(prefix)
80
- path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
81
- all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
82
-
83
- @cached_mapping(path=path, header=[f"{prefix}_id", prop], force=force)
84
- def _mapping_getter() -> Mapping[str, str]:
85
- if os.path.exists(all_properties_path):
86
- logger.info("[%s] loading pre-cached properties", prefix)
87
- df = pd.read_csv(all_properties_path, sep="\t")
88
- logger.info("[%s] filtering pre-cached properties", prefix)
89
- df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
90
- return dict(df.values)
91
-
92
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
93
- ontology = get_ontology(prefix, force=force, version=version)
94
- return ontology.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm)
95
-
96
- return _mapping_getter()
136
+ df = get_filtered_properties_df(prefix, prop, **kwargs)
137
+ return dict(df.values)
97
138
 
98
139
 
99
140
  @wrap_norm_prefix
100
141
  def get_filtered_properties_multimapping(
101
- prefix: str,
102
- prop: str,
103
- *,
104
- use_tqdm: bool = False,
105
- force: bool = False,
106
- version: Optional[str] = None,
142
+ prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
107
143
  ) -> Mapping[str, list[str]]:
108
144
  """Extract multiple properties for each term as a dictionary.
109
145
 
110
146
  :param prefix: the resource to load
111
147
  :param prop: the property to extract
112
- :param use_tqdm: should a progress bar be shown?
113
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
114
148
  :returns: A mapping from identifier to property values
115
149
  """
116
- if version is None:
117
- version = get_version(prefix)
118
- path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
119
- all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
150
+ df = get_filtered_properties_df(prefix, prop, **kwargs)
151
+ return multidict(df.values)
120
152
 
121
- @cached_multidict(path=path, header=[f"{prefix}_id", prop], force=force)
122
- def _mapping_getter() -> Mapping[str, list[str]]:
123
- if os.path.exists(all_properties_path):
124
- logger.info("[%s] loading pre-cached properties", prefix)
125
- df = pd.read_csv(all_properties_path, sep="\t")
126
- logger.info("[%s] filtering pre-cached properties", prefix)
127
- df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
128
- return multidict(df.values)
129
153
 
130
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
131
- ontology = get_ontology(prefix, force=force, version=version)
132
- return ontology.get_filtered_properties_multimapping(prop, use_tqdm=use_tqdm)
133
-
134
- return _mapping_getter()
135
-
136
-
137
- def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[str]:
154
+ def get_property(
155
+ prefix: str, identifier: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
156
+ ) -> str | None:
138
157
  """Extract a single property for the given entity.
139
158
 
140
159
  :param prefix: the resource to load
@@ -152,7 +171,12 @@ def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[
152
171
  return filtered_properties_mapping.get(identifier)
153
172
 
154
173
 
155
- def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[list[str]]:
174
+ def get_properties(
175
+ prefix: str,
176
+ identifier: str,
177
+ prop: ReferenceHint,
178
+ **kwargs: Unpack[GetOntologyKwargs],
179
+ ) -> list[str] | None:
156
180
  """Extract a set of properties for the given entity.
157
181
 
158
182
  :param prefix: the resource to load
@@ -168,39 +192,15 @@ def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optiona
168
192
 
169
193
  @wrap_norm_prefix
170
194
  def get_filtered_properties_df(
171
- prefix: str,
172
- prop: str,
173
- *,
174
- use_tqdm: bool = False,
175
- force: bool = False,
176
- version: Optional[str] = None,
195
+ prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
177
196
  ) -> pd.DataFrame:
178
197
  """Extract a single property for each term.
179
198
 
180
199
  :param prefix: the resource to load
181
200
  :param prop: the property to extract
182
- :param use_tqdm: should a progress bar be shown?
183
- :param force: should the resource be re-downloaded, re-parsed, and re-cached?
184
201
  :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value].
185
202
  """
186
- if version is None:
187
- version = get_version(prefix)
188
- path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
189
- all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
190
-
191
- @cached_df(path=path, dtype=str, force=force)
192
- def _df_getter() -> pd.DataFrame:
193
- if os.path.exists(all_properties_path):
194
- logger.info("[%s] loading pre-cached properties", prefix)
195
- df = pd.read_csv(all_properties_path, sep="\t")
196
- logger.info("[%s] filtering pre-cached properties", prefix)
197
- return df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
198
-
199
- if force:
200
- logger.info("[%s] forcing reload for properties", prefix)
201
- else:
202
- logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
203
- ontology = get_ontology(prefix, force=force, version=version)
204
- return ontology.get_filtered_properties_df(prop, use_tqdm=use_tqdm)
205
-
206
- return _df_getter()
203
+ prop = _ensure_ref(prop, ontology_prefix=prefix)
204
+ df = get_properties_df(prefix, **kwargs)
205
+ df = df.loc[df["property"] == prop.curie, [f"{prefix}_id", "value"]]
206
+ return df
pyobo/api/relations.py CHANGED
@@ -1,15 +1,13 @@
1
1
  """High-level API for relations."""
2
2
 
3
3
  import logging
4
- import os
5
4
  from collections.abc import Mapping
6
5
  from functools import lru_cache
7
- from typing import Optional
8
6
 
9
- import networkx as nx
10
7
  import pandas as pd
8
+ from typing_extensions import Unpack
11
9
 
12
- from .utils import get_version
10
+ from .utils import get_version_from_kwargs
13
11
  from ..constants import (
14
12
  RELATION_COLUMNS,
15
13
  RELATION_ID,
@@ -18,50 +16,60 @@ from ..constants import (
18
16
  SOURCE_PREFIX,
19
17
  TARGET_ID,
20
18
  TARGET_PREFIX,
19
+ GetOntologyKwargs,
20
+ check_should_cache,
21
+ check_should_force,
22
+ check_should_use_tqdm,
21
23
  )
22
24
  from ..getters import get_ontology
23
25
  from ..identifier_utils import wrap_norm_prefix
24
- from ..struct import Reference, RelationHint, TypeDef, get_reference_tuple
26
+ from ..struct.reference import Reference
27
+ from ..struct.struct_utils import ReferenceHint, _ensure_ref
25
28
  from ..utils.cache import cached_df
26
- from ..utils.path import prefix_cache_join
29
+ from ..utils.path import CacheArtifact, get_cache_path, get_relation_cache_path
27
30
 
28
31
  __all__ = [
29
- "get_relations_df",
30
32
  "get_filtered_relations_df",
31
33
  "get_id_multirelations_mapping",
32
- "get_relation_mapping",
33
34
  "get_relation",
34
- "get_graph",
35
+ "get_relation_mapping",
36
+ "get_relations",
37
+ "get_relations_df",
35
38
  ]
36
39
 
37
- # TODO get_relation, get_relations
38
-
39
40
  logger = logging.getLogger(__name__)
40
41
 
41
42
 
43
+ @wrap_norm_prefix
44
+ def get_relations(
45
+ prefix: str, **kwargs: Unpack[GetOntologyKwargs]
46
+ ) -> list[tuple[Reference, Reference, Reference]]:
47
+ """Get relations."""
48
+ df = get_relations_df(prefix, wide=False, **kwargs)
49
+ return [
50
+ (
51
+ Reference(prefix=prefix, identifier=source_id),
52
+ Reference(prefix=relation_prefix, identifier=relation_id),
53
+ Reference(prefix=target_prefix, identifier=target_id),
54
+ )
55
+ for source_id, relation_prefix, relation_id, target_prefix, target_id in df.values
56
+ ]
57
+
58
+
42
59
  @wrap_norm_prefix
43
60
  def get_relations_df(
44
- prefix: str,
45
- *,
46
- use_tqdm: bool = False,
47
- force: bool = False,
48
- wide: bool = False,
49
- strict: bool = True,
50
- version: Optional[str] = None,
61
+ prefix: str, *, wide: bool = False, **kwargs: Unpack[GetOntologyKwargs]
51
62
  ) -> pd.DataFrame:
52
63
  """Get all relations from the OBO."""
53
- if version is None:
54
- version = get_version(prefix)
55
- path = prefix_cache_join(prefix, name="relations.tsv", version=version)
64
+ version = get_version_from_kwargs(prefix, kwargs)
65
+ path = get_cache_path(prefix, CacheArtifact.relations, version=version)
56
66
 
57
- @cached_df(path=path, dtype=str, force=force)
67
+ @cached_df(
68
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
69
+ )
58
70
  def _df_getter() -> pd.DataFrame:
59
- if force:
60
- logger.info("[%s] forcing reload for relations", prefix)
61
- else:
62
- logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
63
- ontology = get_ontology(prefix, force=force, version=version, strict=strict)
64
- return ontology.get_relations_df(use_tqdm=use_tqdm)
71
+ ontology = get_ontology(prefix, **kwargs)
72
+ return ontology.get_relations_df(use_tqdm=check_should_use_tqdm(kwargs))
65
73
 
66
74
  rv = _df_getter()
67
75
 
@@ -76,38 +84,29 @@ def get_relations_df(
76
84
  @wrap_norm_prefix
77
85
  def get_filtered_relations_df(
78
86
  prefix: str,
79
- relation: RelationHint,
80
- *,
81
- use_tqdm: bool = False,
82
- force: bool = False,
83
- version: Optional[str] = None,
87
+ relation: ReferenceHint,
88
+ **kwargs: Unpack[GetOntologyKwargs],
84
89
  ) -> pd.DataFrame:
85
90
  """Get all the given relation."""
86
- relation_prefix, relation_identifier = relation = get_reference_tuple(relation)
87
- if version is None:
88
- version = get_version(prefix)
89
- path = prefix_cache_join(
90
- prefix,
91
- "relations",
92
- name=f"{relation_prefix}:{relation_identifier}.tsv",
93
- version=version,
91
+ relation = _ensure_ref(relation, ontology_prefix=prefix)
92
+ version = get_version_from_kwargs(prefix, kwargs)
93
+ all_relations_path = get_cache_path(prefix, CacheArtifact.relations, version=version)
94
+ if all_relations_path.is_file():
95
+ logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
96
+ df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
97
+ idx = (df[RELATION_PREFIX] == relation.prefix) & (df[RELATION_ID] == relation.identifier)
98
+ columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
99
+ return df.loc[idx, columns]
100
+
101
+ path = get_relation_cache_path(prefix, relation, version=version)
102
+
103
+ @cached_df(
104
+ path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
94
105
  )
95
- all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
96
-
97
- @cached_df(path=path, dtype=str, force=force)
98
106
  def _df_getter() -> pd.DataFrame:
99
- if os.path.exists(all_relations_path):
100
- logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
101
- df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
102
- idx = (df[RELATION_PREFIX] == relation_prefix) & (
103
- df[RELATION_ID] == relation_identifier
104
- )
105
- columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
106
- return df.loc[idx, columns]
107
-
108
107
  logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
109
- ontology = get_ontology(prefix, force=force, version=version)
110
- return ontology.get_filtered_relations_df(relation, use_tqdm=use_tqdm)
108
+ ontology = get_ontology(prefix, **kwargs)
109
+ return ontology.get_filtered_relations_df(relation, use_tqdm=check_should_use_tqdm(kwargs))
111
110
 
112
111
  return _df_getter()
113
112
 
@@ -115,29 +114,24 @@ def get_filtered_relations_df(
115
114
  @wrap_norm_prefix
116
115
  def get_id_multirelations_mapping(
117
116
  prefix: str,
118
- typedef: TypeDef,
119
- *,
120
- use_tqdm: bool = False,
121
- force: bool = False,
122
- version: Optional[str] = None,
117
+ typedef: ReferenceHint,
118
+ **kwargs: Unpack[GetOntologyKwargs],
123
119
  ) -> Mapping[str, list[Reference]]:
124
120
  """Get the OBO file and output a synonym dictionary."""
125
- if version is None:
126
- version = get_version(prefix)
127
- ontology = get_ontology(prefix, force=force, version=version)
128
- return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)
121
+ kwargs["version"] = get_version_from_kwargs(prefix, kwargs)
122
+ ontology = get_ontology(prefix, **kwargs)
123
+ return ontology.get_id_multirelations_mapping(
124
+ typedef=typedef, use_tqdm=check_should_use_tqdm(kwargs)
125
+ )
129
126
 
130
127
 
131
128
  @lru_cache
132
129
  @wrap_norm_prefix
133
130
  def get_relation_mapping(
134
131
  prefix: str,
135
- relation: RelationHint,
132
+ relation: ReferenceHint,
136
133
  target_prefix: str,
137
- *,
138
- use_tqdm: bool = False,
139
- force: bool = False,
140
- version: Optional[str] = None,
134
+ **kwargs: Unpack[GetOntologyKwargs],
141
135
  ) -> Mapping[str, str]:
142
136
  """Get relations from identifiers in the source prefix to target prefix with the given relation.
143
137
 
@@ -151,11 +145,9 @@ def get_relation_mapping(
151
145
  >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping("hgnc", "ro:HOM0000017", "mgi")
152
146
  >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
153
147
  """
154
- if version is None:
155
- version = get_version(prefix)
156
- ontology = get_ontology(prefix, force=force, version=version)
148
+ ontology = get_ontology(prefix, **kwargs)
157
149
  return ontology.get_relation_mapping(
158
- relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
150
+ relation=relation, target_prefix=target_prefix, use_tqdm=check_should_use_tqdm(kwargs)
159
151
  )
160
152
 
161
153
 
@@ -163,13 +155,10 @@ def get_relation_mapping(
163
155
  def get_relation(
164
156
  prefix: str,
165
157
  source_identifier: str,
166
- relation: RelationHint,
158
+ relation: ReferenceHint,
167
159
  target_prefix: str,
168
- *,
169
- use_tqdm: bool = False,
170
- force: bool = False,
171
- **kwargs,
172
- ) -> Optional[str]:
160
+ **kwargs: Unpack[GetOntologyKwargs],
161
+ ) -> str | None:
173
162
  """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
174
163
 
175
164
  .. warning:: Assumes there's only one version of the property for each term.
@@ -187,21 +176,6 @@ def get_relation(
187
176
  prefix=prefix,
188
177
  relation=relation,
189
178
  target_prefix=target_prefix,
190
- use_tqdm=use_tqdm,
191
- force=force,
192
179
  **kwargs,
193
180
  )
194
181
  return relation_mapping.get(source_identifier)
195
-
196
-
197
- def get_graph(prefix: str, **kwargs) -> nx.DiGraph:
198
- """Get the relation graph."""
199
- rv = nx.MultiDiGraph()
200
- df = get_relations_df(prefix=prefix, **kwargs)
201
- for source_id, relation_prefix, relation_id, target_ns, target_id in df.values:
202
- rv.add_edge(
203
- f"{prefix}:{source_id}",
204
- f"{target_ns}:{target_id}",
205
- key=f"{relation_prefix}:{relation_id}",
206
- )
207
- return rv
pyobo/api/species.py CHANGED
@@ -3,14 +3,17 @@
3
3
  import logging
4
4
  from collections.abc import Mapping
5
5
  from functools import lru_cache
6
- from typing import Optional
6
+
7
+ import curies
8
+ from typing_extensions import Unpack
7
9
 
8
10
  from .alts import get_primary_identifier
9
- from .utils import get_version
11
+ from .utils import _get_pi, get_version_from_kwargs
12
+ from ..constants import GetOntologyKwargs, check_should_force
10
13
  from ..getters import NoBuildError, get_ontology
11
14
  from ..identifier_utils import wrap_norm_prefix
12
15
  from ..utils.cache import cached_mapping
13
- from ..utils.path import prefix_cache_join
16
+ from ..utils.path import CacheArtifact, get_cache_path
14
17
 
15
18
  __all__ = [
16
19
  "get_id_species_mapping",
@@ -20,34 +23,35 @@ __all__ = [
20
23
  logger = logging.getLogger(__name__)
21
24
 
22
25
 
23
- @wrap_norm_prefix
24
- def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
26
+ def get_species(
27
+ prefix: str | curies.Reference | curies.ReferenceTuple,
28
+ identifier: str | None = None,
29
+ /,
30
+ **kwargs: Unpack[GetOntologyKwargs],
31
+ ) -> str | None:
25
32
  """Get the species."""
26
- if prefix == "uniprot":
33
+ t = _get_pi(prefix, identifier)
34
+
35
+ if t.prefix == "uniprot":
27
36
  raise NotImplementedError
28
37
 
29
38
  try:
30
- id_species = get_id_species_mapping(prefix, version=version)
39
+ id_species = get_id_species_mapping(t.prefix, **kwargs)
31
40
  except NoBuildError:
32
- logger.warning("unable to look up species for prefix %s", prefix)
41
+ logger.warning("unable to look up species for prefix %s", t.prefix)
33
42
  return None
34
43
 
35
44
  if not id_species:
36
- logger.warning("no results produced for prefix %s", prefix)
45
+ logger.warning("no results produced for prefix %s", t.prefix)
37
46
  return None
38
47
 
39
- primary_id = get_primary_identifier(prefix, identifier, version=version)
48
+ primary_id = get_primary_identifier(t, **kwargs)
40
49
  return id_species.get(primary_id)
41
50
 
42
51
 
43
52
  @lru_cache
44
53
  @wrap_norm_prefix
45
- def get_id_species_mapping(
46
- prefix: str,
47
- force: bool = False,
48
- strict: bool = True,
49
- version: Optional[str] = None,
50
- ) -> Mapping[str, str]:
54
+ def get_id_species_mapping(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Mapping[str, str]:
51
55
  """Get an identifier to species mapping."""
52
56
  if prefix == "ncbigene":
53
57
  from ..sources.ncbigene import get_ncbigene_id_to_species_mapping
@@ -57,14 +61,13 @@ def get_id_species_mapping(
57
61
  logger.info("[%s] done loading species mappings", prefix)
58
62
  return rv
59
63
 
60
- if version is None:
61
- version = get_version(prefix)
62
- path = prefix_cache_join(prefix, name="species.tsv", version=version)
64
+ version = get_version_from_kwargs(prefix, kwargs)
65
+ path = get_cache_path(prefix, CacheArtifact.species, version=version)
63
66
 
64
- @cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=force)
67
+ @cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=check_should_force(kwargs))
65
68
  def _get_id_species_mapping() -> Mapping[str, str]:
66
69
  logger.info("[%s] no cached species found. getting from OBO loader", prefix)
67
- ontology = get_ontology(prefix, force=force, strict=strict, version=version)
70
+ ontology = get_ontology(prefix, **kwargs)
68
71
  logger.info("[%s] loading species mappings", prefix)
69
72
  return ontology.get_id_species_mapping()
70
73