pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
  203. pyobo/apps/__init__.py +0 -3
  204. pyobo/apps/cli.py +0 -24
  205. pyobo/apps/gilda/__init__.py +0 -3
  206. pyobo/apps/gilda/__main__.py +0 -8
  207. pyobo/apps/gilda/app.py +0 -48
  208. pyobo/apps/gilda/cli.py +0 -36
  209. pyobo/apps/gilda/templates/base.html +0 -33
  210. pyobo/apps/gilda/templates/home.html +0 -11
  211. pyobo/apps/gilda/templates/matches.html +0 -32
  212. pyobo/apps/mapper/__init__.py +0 -3
  213. pyobo/apps/mapper/__main__.py +0 -11
  214. pyobo/apps/mapper/cli.py +0 -37
  215. pyobo/apps/mapper/mapper.py +0 -187
  216. pyobo/apps/mapper/templates/base.html +0 -35
  217. pyobo/apps/mapper/templates/mapper_home.html +0 -64
  218. pyobo/aws.py +0 -162
  219. pyobo/cli/aws.py +0 -47
  220. pyobo/identifier_utils.py +0 -142
  221. pyobo/normalizer.py +0 -232
  222. pyobo/registries/__init__.py +0 -16
  223. pyobo/registries/metaregistry.json +0 -507
  224. pyobo/registries/metaregistry.py +0 -135
  225. pyobo/sources/icd11.py +0 -105
  226. pyobo/xrefdb/__init__.py +0 -1
  227. pyobo/xrefdb/canonicalizer.py +0 -214
  228. pyobo/xrefdb/priority.py +0 -59
  229. pyobo/xrefdb/sources/__init__.py +0 -60
  230. pyobo/xrefdb/sources/biomappings.py +0 -36
  231. pyobo/xrefdb/sources/cbms2019.py +0 -91
  232. pyobo/xrefdb/sources/chembl.py +0 -83
  233. pyobo/xrefdb/sources/compath.py +0 -82
  234. pyobo/xrefdb/sources/famplex.py +0 -64
  235. pyobo/xrefdb/sources/gilda.py +0 -50
  236. pyobo/xrefdb/sources/intact.py +0 -113
  237. pyobo/xrefdb/sources/ncit.py +0 -133
  238. pyobo/xrefdb/sources/pubchem.py +0 -27
  239. pyobo/xrefdb/sources/wikidata.py +0 -116
  240. pyobo-0.11.1.dist-info/RECORD +0 -173
  241. pyobo-0.11.1.dist-info/WHEEL +0 -5
  242. pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/utils/cache.py CHANGED
@@ -3,10 +3,9 @@
3
3
  import gzip
4
4
  import json
5
5
  import logging
6
- import os
7
6
  from collections.abc import Iterable, Mapping
8
7
  from pathlib import Path
9
- from typing import Generic, TypeVar, Union
8
+ from typing import Generic, TypeVar
10
9
 
11
10
  import networkx as nx
12
11
  from pystow.cache import Cached
@@ -18,15 +17,15 @@ from pystow.cache import CachedPickle as cached_pickle # noqa:N813
18
17
  from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
19
18
 
20
19
  __all__ = [
21
- # from pystow
22
- "cached_json",
23
20
  "cached_collection",
24
21
  "cached_df",
25
- "cached_pickle",
26
22
  # implemented here
27
23
  "cached_graph",
24
+ # from pystow
25
+ "cached_json",
28
26
  "cached_mapping",
29
27
  "cached_multidict",
28
+ "cached_pickle",
30
29
  ]
31
30
 
32
31
  logger = logging.getLogger(__name__)
@@ -39,14 +38,15 @@ class _CachedMapping(Cached[X], Generic[X]):
39
38
 
40
39
  def __init__(
41
40
  self,
42
- path: Union[str, Path, os.PathLike],
41
+ path: str | Path,
43
42
  header: Iterable[str],
44
43
  *,
45
44
  use_tqdm: bool = False,
46
45
  force: bool = False,
46
+ cache: bool = True,
47
47
  ):
48
48
  """Initialize the mapping cache."""
49
- super().__init__(path=path, force=force)
49
+ super().__init__(path=path, cache=cache, force=force)
50
50
  self.header = header
51
51
  self.use_tqdm = use_tqdm
52
52
 
@@ -65,17 +65,19 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
65
65
 
66
66
  cached_mapping = CachedMapping
67
67
 
68
+ NODE_LINK_STYLE = "links" # TODO update to "edges"
69
+
68
70
 
69
- def get_gzipped_graph(path: Union[str, Path]) -> nx.MultiDiGraph:
71
+ def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
70
72
  """Read a graph that's gzipped nodelink."""
71
73
  with gzip.open(path, "rt") as file:
72
- return nx.node_link_graph(json.load(file))
74
+ return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
73
75
 
74
76
 
75
- def write_gzipped_graph(graph: nx.MultiDiGraph, path: Union[str, Path]) -> None:
77
+ def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
76
78
  """Write a graph as gzipped nodelink."""
77
79
  with gzip.open(path, "wt") as file:
78
- json.dump(nx.node_link_data(graph), file)
80
+ json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
79
81
 
80
82
 
81
83
  class CachedGraph(Cached[nx.MultiDiGraph]):
pyobo/utils/io.py CHANGED
@@ -4,30 +4,26 @@ import collections.abc
4
4
  import csv
5
5
  import gzip
6
6
  import logging
7
- import time
8
7
  from collections import defaultdict
9
8
  from collections.abc import Iterable, Mapping
10
9
  from contextlib import contextmanager
11
10
  from pathlib import Path
12
- from typing import Optional, TypeVar, Union
13
- from xml.etree.ElementTree import Element
11
+ from typing import TypeVar
14
12
 
15
13
  import pandas as pd
16
- from lxml import etree
17
14
  from tqdm.auto import tqdm
18
15
 
19
16
  __all__ = [
20
- "open_map_tsv",
21
- "open_multimap_tsv",
17
+ "get_reader",
18
+ "get_writer",
22
19
  "multidict",
23
20
  "multisetdict",
21
+ "open_map_tsv",
22
+ "open_multimap_tsv",
23
+ "open_reader",
24
+ "write_iterable_tsv",
24
25
  "write_map_tsv",
25
26
  "write_multimap_tsv",
26
- "write_iterable_tsv",
27
- "parse_xml_gz",
28
- "get_writer",
29
- "open_reader",
30
- "get_reader",
31
27
  ]
32
28
 
33
29
  logger = logging.getLogger(__name__)
@@ -37,7 +33,7 @@ Y = TypeVar("Y")
37
33
 
38
34
 
39
35
  @contextmanager
40
- def open_reader(path: Union[str, Path], sep: str = "\t"):
36
+ def open_reader(path: str | Path, sep: str = "\t"):
41
37
  """Open a file and get a reader for it."""
42
38
  path = Path(path)
43
39
  with gzip.open(path, "rt") if path.suffix == ".gz" else open(path) as file:
@@ -55,7 +51,7 @@ def get_writer(x, sep: str = "\t"):
55
51
 
56
52
 
57
53
  def open_map_tsv(
58
- path: Union[str, Path], *, use_tqdm: bool = False, has_header: bool = True
54
+ path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
59
55
  ) -> Mapping[str, str]:
60
56
  """Load a mapping TSV file into a dictionary."""
61
57
  with open(path) as file:
@@ -73,7 +69,7 @@ def open_map_tsv(
73
69
 
74
70
 
75
71
  def open_multimap_tsv(
76
- path: Union[str, Path],
72
+ path: str | Path,
77
73
  *,
78
74
  use_tqdm: bool = False,
79
75
  has_header: bool = True,
@@ -83,7 +79,7 @@ def open_multimap_tsv(
83
79
 
84
80
 
85
81
  def _help_multimap_tsv(
86
- path: Union[str, Path],
82
+ path: str | Path,
87
83
  *,
88
84
  use_tqdm: bool = False,
89
85
  has_header: bool = True,
@@ -115,9 +111,9 @@ def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
115
111
 
116
112
  def write_map_tsv(
117
113
  *,
118
- path: Union[str, Path],
119
- header: Optional[Iterable[str]] = None,
120
- rv: Union[Iterable[tuple[str, str]], Mapping[str, str]],
114
+ path: str | Path,
115
+ header: Iterable[str] | None = None,
116
+ rv: Iterable[tuple[str, str]] | Mapping[str, str],
121
117
  sep: str = "\t",
122
118
  ) -> None:
123
119
  """Write a mapping dictionary to a TSV file."""
@@ -129,7 +125,7 @@ def write_map_tsv(
129
125
 
130
126
  def write_multimap_tsv(
131
127
  *,
132
- path: Union[str, Path],
128
+ path: str | Path,
133
129
  header: Iterable[str],
134
130
  rv: Mapping[str, list[str]],
135
131
  sep: str = "\t",
@@ -141,8 +137,8 @@ def write_multimap_tsv(
141
137
 
142
138
  def write_iterable_tsv(
143
139
  *,
144
- path: Union[str, Path],
145
- header: Optional[Iterable[str]] = None,
140
+ path: str | Path,
141
+ header: Iterable[str] | None = None,
146
142
  it: Iterable[tuple[str, ...]],
147
143
  sep: str = "\t",
148
144
  ) -> None:
@@ -154,13 +150,3 @@ def write_iterable_tsv(
154
150
  if header is not None:
155
151
  writer.writerow(header)
156
152
  writer.writerows(it)
157
-
158
-
159
- def parse_xml_gz(path: Union[str, Path]) -> Element:
160
- """Parse an XML file from a path to a GZIP file."""
161
- path = Path(path).resolve()
162
- t = time.time()
163
- logger.info("parsing xml from %s", path)
164
- tree = etree.parse(path.as_posix()) # type:ignore
165
- logger.info("parsed xml in %.2f seconds", time.time() - t)
166
- return tree.getroot()
pyobo/utils/iter.py CHANGED
@@ -8,8 +8,8 @@ from typing import TypeVar
8
8
  from more_itertools import peekable
9
9
 
10
10
  __all__ = [
11
- "iterate_together",
12
11
  "iterate_gzips_together",
12
+ "iterate_together",
13
13
  ]
14
14
 
15
15
  X = TypeVar("X")
@@ -20,9 +20,9 @@ Y = TypeVar("Y")
20
20
  def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
21
21
  """Iterate over two gzipped files together."""
22
22
  with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
23
- a = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
24
- b = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
25
- yield from iterate_together(a, b)
23
+ a_reader = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
24
+ b_reader = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
25
+ yield from iterate_together(a_reader, b_reader) # type:ignore
26
26
 
27
27
 
28
28
  def iterate_together(
@@ -38,7 +38,7 @@ def iterate_together(
38
38
  - Each key in the index is present within both files
39
39
  """
40
40
  b_peekable = peekable(b)
41
- b_index, _ = b_peekable.peek()
41
+ b_index: X | type[_Done] = b_peekable.peek()[0]
42
42
 
43
43
  for a_index, a_value in a:
44
44
  zs = []
pyobo/utils/misc.py CHANGED
@@ -1,79 +1,67 @@
1
1
  """Miscellaneous utilities."""
2
2
 
3
- import gzip
4
3
  import logging
5
- import os
6
4
  from datetime import datetime
7
- from subprocess import check_output
8
- from typing import Optional
9
5
 
10
6
  __all__ = [
11
- "obo_to_obograph",
12
- "obo_to_owl",
13
7
  "cleanup_version",
14
8
  ]
15
9
 
16
-
17
10
  logger = logging.getLogger(__name__)
18
11
 
19
-
20
- def obo_to_obograph(obo_path, obograph_path) -> None:
21
- """Convert an OBO file to OBO Graph file with pronto."""
22
- import pronto
23
-
24
- ontology = pronto.Ontology(obo_path)
25
- with gzip.open(obograph_path, "wb") as file:
26
- ontology.dump(file, format="json")
27
-
28
-
29
- def obo_to_owl(obo_path, owl_path, owl_format: str = "ofn"):
30
- """Convert an OBO file to an OWL file with ROBOT."""
31
- args = ["robot", "convert", "-i", obo_path, "-o", owl_path, "--format", owl_format]
32
- ret = check_output( # noqa:S603
33
- args,
34
- cwd=os.path.dirname(__file__),
35
- )
36
- return ret.decode()
37
-
38
-
39
12
  BIZARRE_LOGGED = set()
40
13
 
14
+ #: Rewrites for mostly static resources that have weird quirks
15
+ VERSION_REWRITES = {
16
+ "$Date: 2009/11/15 10:54:12 $": "2009-11-15", # for owl
17
+ "http://www.w3.org/2006/time#2016": "2016", # for time
18
+ }
19
+ STATIC_VERSION_REWRITES = {"orth": "2"}
20
+ VERSION_PREFIXES = [
21
+ "http://www.orpha.net/version",
22
+ "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
23
+ "http://humanbehaviourchange.org/ontology/bcio.owl/",
24
+ "http://purl.org/pav/",
25
+ "http://identifiers.org/combine.specifications/teddy.rel-",
26
+ "https://purl.dataone.org/odo/MOSAIC/",
27
+ "http://purl.dataone.org/odo/SASAP/", # like in http://purl.dataone.org/odo/SASAP/0.3.1
28
+ "http://purl.dataone.org/odo/SENSO/", # like in http://purl.dataone.org/odo/SENSO/0.1.0
29
+ "https://purl.dataone.org/odo/ADCAD/",
30
+ ]
31
+ VERSION_PREFIX_SPLITS = [
32
+ "http://www.ebi.ac.uk/efo/releases/v",
33
+ "http://www.ebi.ac.uk/swo/swo.owl/",
34
+ "http://semanticscience.org/ontology/sio/v",
35
+ "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
36
+ ]
37
+
41
38
 
42
- def cleanup_version(data_version: str, prefix: str) -> Optional[str]:
39
+ def cleanup_version(data_version: str, prefix: str) -> str:
43
40
  """Clean the version information."""
44
- if data_version.endswith(".owl"):
45
- data_version = data_version[: -len(".owl")]
41
+ if data_version in VERSION_REWRITES:
42
+ return VERSION_REWRITES[data_version]
43
+
44
+ data_version = data_version.removesuffix(".owl")
46
45
  if data_version.endswith(prefix):
47
46
  data_version = data_version[: -len(prefix)]
48
- if data_version.startswith("releases/"):
49
- data_version = data_version[len("releases/") :]
50
- if prefix == "orth":
51
- # TODO add bioversions for this
52
- return "2"
47
+ data_version = data_version.removesuffix("/")
48
+
49
+ data_version = data_version.removeprefix("releases/")
50
+ data_version = data_version.removeprefix("release/")
53
51
 
54
- version_prefixes = [
55
- "http://www.orpha.net/version",
56
- "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
57
- "http://humanbehaviourchange.org/ontology/bcio.owl/",
58
- "http://purl.org/pav/",
59
- "http://identifiers.org/combine.specifications/teddy.rel-",
60
- ]
61
- for version_prefix in version_prefixes:
52
+ for version_prefix in VERSION_PREFIXES:
62
53
  if data_version.startswith(version_prefix):
63
- return data_version[len(version_prefix) :]
54
+ return data_version.removeprefix(version_prefix)
64
55
 
65
- version_prefixes_split = [
66
- "http://www.ebi.ac.uk/efo/releases/v",
67
- "http://www.ebi.ac.uk/swo/swo.owl/",
68
- "http://semanticscience.org/ontology/sio/v",
69
- "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
70
- ]
71
- for version_prefix_split in version_prefixes_split:
56
+ for version_prefix_split in VERSION_PREFIX_SPLITS:
72
57
  if data_version.startswith(version_prefix_split):
73
- return data_version[len(version_prefix_split) :].split("/")[0]
58
+ return data_version.removeprefix(version_prefix_split).split("/")[0]
74
59
 
60
+ # use a heuristic to determine if the version is one of
61
+ # consecutive, major.minor, or semantic versioning (i.e., major.minor.patch)
75
62
  if data_version.replace(".", "").isnumeric():
76
- return data_version # consecutive, major.minor, or semantic versioning
63
+ return data_version
64
+
77
65
  for v in reversed(data_version.split("/")):
78
66
  v = v.strip()
79
67
  try:
pyobo/utils/ndex_utils.py CHANGED
File without changes
pyobo/utils/path.py CHANGED
@@ -1,60 +1,42 @@
1
1
  """Utilities for building paths."""
2
2
 
3
+ import enum
3
4
  import logging
4
5
  from pathlib import Path
5
- from typing import Any, Callable, Literal, Optional, Union
6
+ from typing import Any, Literal
6
7
 
7
8
  import pandas as pd
8
- import requests_ftp
9
- from pystow.utils import download, name_from_url, read_tarfile_csv
9
+ from curies import Reference
10
+ from pystow import VersionHint
10
11
 
11
- from .misc import cleanup_version
12
- from ..constants import RAW_MODULE
12
+ from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME
13
13
 
14
14
  __all__ = [
15
- "prefix_directory_join",
16
- "prefix_directory_join",
17
- "prefix_cache_join",
18
- "get_prefix_obo_path",
19
- "ensure_path",
15
+ "CacheArtifact",
20
16
  "ensure_df",
21
- "ensure_tar_df",
17
+ "ensure_path",
18
+ "get_cache_path",
19
+ "get_relation_cache_path",
20
+ "prefix_directory_join",
22
21
  ]
23
22
 
24
23
  logger = logging.getLogger(__name__)
25
24
 
26
- VersionHint = Union[None, str, Callable[[], Optional[str]]]
27
-
28
- requests_ftp.monkeypatch_session()
29
-
30
25
 
31
26
  def prefix_directory_join(
32
27
  prefix: str,
33
28
  *parts: str,
34
- name: Optional[str] = None,
29
+ name: str | None = None,
35
30
  version: VersionHint = None,
36
31
  ensure_exists: bool = True,
37
32
  ) -> Path:
38
33
  """Join in the prefix directory."""
39
- if version is None:
40
- return RAW_MODULE.join(prefix, *parts, name=name, ensure_exists=ensure_exists)
41
- if callable(version):
42
- logger.info("[%s] looking up version", prefix)
43
- version = version()
44
- logger.info("[%s] got version %s", prefix, version)
45
- elif not isinstance(version, str):
46
- raise TypeError(f"Invalid type: {version} ({type(version)})")
47
- if version is None:
48
- raise AssertionError
49
- version = cleanup_version(version, prefix=prefix)
50
- if version is not None and "/" in version:
51
- raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
52
- return RAW_MODULE.join(prefix, version, *parts, name=name, ensure_exists=ensure_exists)
53
-
54
-
55
- def get_prefix_obo_path(prefix: str, version: VersionHint = None, ext: str = "obo") -> Path:
56
- """Get the canonical path to the OBO file."""
57
- return prefix_directory_join(prefix, name=f"{prefix}.{ext}", version=version)
34
+ return RAW_MODULE.module(prefix).join(
35
+ *parts,
36
+ name=name,
37
+ ensure_exists=ensure_exists,
38
+ version=version,
39
+ )
58
40
 
59
41
 
60
42
  def ensure_path(
@@ -62,36 +44,29 @@ def ensure_path(
62
44
  *parts: str,
63
45
  url: str,
64
46
  version: VersionHint = None,
65
- name: Optional[str] = None,
47
+ name: str | None = None,
66
48
  force: bool = False,
67
- error_on_missing: bool = False,
68
49
  backend: Literal["requests", "urllib"] = "urllib",
69
50
  verify: bool = True,
70
- ) -> str:
51
+ **download_kwargs: Any,
52
+ ) -> Path:
71
53
  """Download a file if it doesn't exist."""
72
- if name is None:
73
- name = name_from_url(url)
74
-
75
- path = prefix_directory_join(prefix, *parts, name=name, version=version)
76
-
77
- if not path.exists() and error_on_missing:
78
- raise FileNotFoundError
79
-
80
- kwargs: dict[str, Any]
81
54
  if verify:
82
- kwargs = {"backend": backend}
55
+ download_kwargs = {"backend": backend}
83
56
  else:
84
57
  if backend != "requests":
85
58
  logger.warning("using requests since verify=False")
86
- kwargs = {"backend": "requests", "verify": False}
59
+ download_kwargs = {"backend": "requests", "verify": False}
87
60
 
88
- download(
61
+ path = RAW_MODULE.module(prefix).ensure(
62
+ *parts,
89
63
  url=url,
90
- path=path,
64
+ name=name,
91
65
  force=force,
92
- **kwargs,
66
+ version=version,
67
+ download_kwargs=download_kwargs,
93
68
  )
94
- return path.as_posix()
69
+ return path
95
70
 
96
71
 
97
72
  def ensure_df(
@@ -99,7 +74,7 @@ def ensure_df(
99
74
  *parts: str,
100
75
  url: str,
101
76
  version: VersionHint = None,
102
- name: Optional[str] = None,
77
+ name: str | None = None,
103
78
  force: bool = False,
104
79
  sep: str = "\t",
105
80
  dtype=str,
@@ -121,21 +96,52 @@ def ensure_df(
121
96
  return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
122
97
 
123
98
 
124
- def ensure_tar_df(
125
- prefix: str,
126
- *parts: str,
127
- url: str,
128
- inner_path: str,
129
- version: VersionHint = None,
130
- path: Optional[str] = None,
131
- force: bool = False,
132
- **kwargs,
133
- ) -> pd.DataFrame:
134
- """Download a tar file and open as a dataframe."""
135
- path = ensure_path(prefix, *parts, url=url, version=version, name=path, force=force)
136
- return read_tarfile_csv(path, inner_path=inner_path, **kwargs)
99
+ class CacheArtifact(enum.Enum):
100
+ """An enumeration for."""
101
+
102
+ names = "names.tsv"
103
+ definitions = "definitions.tsv"
104
+ species = "species.tsv"
105
+ synonyms = "synonyms.tsv" # deprecated
106
+ xrefs = "xrefs.tsv" # deprecated
107
+ mappings = "mappings.tsv"
108
+ relations = "relations.tsv"
109
+ alts = "alt_ids.tsv"
110
+ typedefs = "typedefs.tsv"
111
+ literal_mappings = "literal_mappings.tsv"
112
+ references = "references.tsv"
113
+ obsoletes = "obsolete.tsv"
114
+
115
+ properties = "properties.tsv" # deprecated
116
+ literal_properties = "literal_properties.tsv"
117
+ object_properties = "object_properties.tsv"
137
118
 
119
+ nodes = "nodes.tsv"
120
+ edges = "edges.tsv"
138
121
 
139
- def prefix_cache_join(prefix: str, *parts, name: Optional[str], version: VersionHint) -> Path:
140
- """Ensure the prefix cache is available."""
141
- return prefix_directory_join(prefix, "cache", *parts, name=name, version=version)
122
+ prefixes = "prefixes.json"
123
+ metadata = "metadata.json"
124
+
125
+
126
+ def get_cache_path(
127
+ ontology: str,
128
+ name: CacheArtifact,
129
+ *,
130
+ version: str | None = None,
131
+ ) -> Path:
132
+ """Get a cache path."""
133
+ return prefix_directory_join(
134
+ ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version
135
+ )
136
+
137
+
138
+ def get_relation_cache_path(
139
+ ontology: str,
140
+ reference: Reference,
141
+ *,
142
+ version: str | None = None,
143
+ ) -> Path:
144
+ """Get a relation cache path."""
145
+ return prefix_directory_join(
146
+ ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version
147
+ )
pyobo/version.py CHANGED
@@ -8,11 +8,11 @@ from subprocess import CalledProcessError, check_output
8
8
 
9
9
  __all__ = [
10
10
  "VERSION",
11
- "get_version",
12
11
  "get_git_hash",
12
+ "get_version",
13
13
  ]
14
14
 
15
- VERSION = "0.11.1"
15
+ VERSION = "0.12.0"
16
16
 
17
17
 
18
18
  def get_git_hash() -> str:
@@ -30,7 +30,7 @@ def get_git_hash() -> str:
30
30
  return ret.strip().decode("utf-8")[:8]
31
31
 
32
32
 
33
- def get_version(with_git_hash: bool = False):
33
+ def get_version(with_git_hash: bool = False) -> str:
34
34
  """Get the PyOBO version string, including a git hash."""
35
35
  return f"{VERSION}-{get_git_hash()}" if with_git_hash else VERSION
36
36