pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/utils/cache.py CHANGED
@@ -1,12 +1,10 @@
1
1
  """Utilities for caching files."""
2
2
 
3
- import gzip
4
3
  import json
5
4
  import logging
6
- import os
7
5
  from collections.abc import Iterable, Mapping
8
6
  from pathlib import Path
9
- from typing import Generic, TypeVar, Union
7
+ from typing import Generic, TypeVar
10
8
 
11
9
  import networkx as nx
12
10
  from pystow.cache import Cached
@@ -15,18 +13,18 @@ from pystow.cache import CachedDataFrame as cached_df # noqa:N813
15
13
  from pystow.cache import CachedJSON as cached_json # noqa:N813
16
14
  from pystow.cache import CachedPickle as cached_pickle # noqa:N813
17
15
 
18
- from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
16
+ from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
19
17
 
20
18
  __all__ = [
21
- # from pystow
22
- "cached_json",
23
19
  "cached_collection",
24
20
  "cached_df",
25
- "cached_pickle",
26
21
  # implemented here
27
22
  "cached_graph",
23
+ # from pystow
24
+ "cached_json",
28
25
  "cached_mapping",
29
26
  "cached_multidict",
27
+ "cached_pickle",
30
28
  ]
31
29
 
32
30
  logger = logging.getLogger(__name__)
@@ -39,14 +37,15 @@ class _CachedMapping(Cached[X], Generic[X]):
39
37
 
40
38
  def __init__(
41
39
  self,
42
- path: Union[str, Path, os.PathLike],
40
+ path: str | Path,
43
41
  header: Iterable[str],
44
42
  *,
45
43
  use_tqdm: bool = False,
46
44
  force: bool = False,
45
+ cache: bool = True,
47
46
  ):
48
47
  """Initialize the mapping cache."""
49
- super().__init__(path=path, force=force)
48
+ super().__init__(path=path, cache=cache, force=force)
50
49
  self.header = header
51
50
  self.use_tqdm = use_tqdm
52
51
 
@@ -65,17 +64,19 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
65
64
 
66
65
  cached_mapping = CachedMapping
67
66
 
67
+ NODE_LINK_STYLE = "links" # TODO update to "edges"
68
+
68
69
 
69
- def get_gzipped_graph(path: Union[str, Path]) -> nx.MultiDiGraph:
70
+ def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
70
71
  """Read a graph that's gzipped nodelink."""
71
- with gzip.open(path, "rt") as file:
72
- return nx.node_link_graph(json.load(file))
72
+ with safe_open(path, read=True) as file:
73
+ return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
73
74
 
74
75
 
75
- def write_gzipped_graph(graph: nx.MultiDiGraph, path: Union[str, Path]) -> None:
76
+ def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
76
77
  """Write a graph as gzipped nodelink."""
77
- with gzip.open(path, "wt") as file:
78
- json.dump(nx.node_link_data(graph), file)
78
+ with safe_open(path, read=False) as file:
79
+ json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
79
80
 
80
81
 
81
82
  class CachedGraph(Cached[nx.MultiDiGraph]):
pyobo/utils/io.py CHANGED
@@ -1,33 +1,31 @@
1
1
  """I/O utilities."""
2
2
 
3
3
  import collections.abc
4
+ import contextlib
4
5
  import csv
5
6
  import gzip
6
7
  import logging
7
- import time
8
8
  from collections import defaultdict
9
- from collections.abc import Iterable, Mapping
9
+ from collections.abc import Generator, Iterable, Mapping
10
10
  from contextlib import contextmanager
11
11
  from pathlib import Path
12
- from typing import Optional, TypeVar, Union
13
- from xml.etree.ElementTree import Element
12
+ from typing import Literal, TextIO, TypeVar
14
13
 
15
14
  import pandas as pd
16
- from lxml import etree
17
15
  from tqdm.auto import tqdm
18
16
 
19
17
  __all__ = [
20
- "open_map_tsv",
21
- "open_multimap_tsv",
18
+ "get_reader",
22
19
  "multidict",
23
20
  "multisetdict",
21
+ "open_map_tsv",
22
+ "open_multimap_tsv",
23
+ "open_reader",
24
+ "safe_open",
25
+ "safe_open_writer",
26
+ "write_iterable_tsv",
24
27
  "write_map_tsv",
25
28
  "write_multimap_tsv",
26
- "write_iterable_tsv",
27
- "parse_xml_gz",
28
- "get_writer",
29
- "open_reader",
30
- "get_reader",
31
29
  ]
32
30
 
33
31
  logger = logging.getLogger(__name__)
@@ -37,10 +35,10 @@ Y = TypeVar("Y")
37
35
 
38
36
 
39
37
  @contextmanager
40
- def open_reader(path: Union[str, Path], sep: str = "\t"):
38
+ def open_reader(path: str | Path, sep: str = "\t"):
41
39
  """Open a file and get a reader for it."""
42
40
  path = Path(path)
43
- with gzip.open(path, "rt") if path.suffix == ".gz" else open(path) as file:
41
+ with safe_open(path, read=True) as file:
44
42
  yield get_reader(file, sep=sep)
45
43
 
46
44
 
@@ -49,16 +47,11 @@ def get_reader(x, sep: str = "\t"):
49
47
  return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
50
48
 
51
49
 
52
- def get_writer(x, sep: str = "\t"):
53
- """Get a :func:`csv.writer` with PyOBO default settings."""
54
- return csv.writer(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
55
-
56
-
57
50
  def open_map_tsv(
58
- path: Union[str, Path], *, use_tqdm: bool = False, has_header: bool = True
51
+ path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
59
52
  ) -> Mapping[str, str]:
60
53
  """Load a mapping TSV file into a dictionary."""
61
- with open(path) as file:
54
+ with safe_open(path, read=True) as file:
62
55
  if has_header:
63
56
  next(file) # throw away header
64
57
  if use_tqdm:
@@ -73,7 +66,7 @@ def open_map_tsv(
73
66
 
74
67
 
75
68
  def open_multimap_tsv(
76
- path: Union[str, Path],
69
+ path: str | Path,
77
70
  *,
78
71
  use_tqdm: bool = False,
79
72
  has_header: bool = True,
@@ -83,14 +76,17 @@ def open_multimap_tsv(
83
76
 
84
77
 
85
78
  def _help_multimap_tsv(
86
- path: Union[str, Path],
79
+ path: str | Path,
87
80
  *,
88
81
  use_tqdm: bool = False,
89
82
  has_header: bool = True,
90
83
  ) -> Iterable[tuple[str, str]]:
91
- with open(path) as file:
84
+ with safe_open(path, read=True) as file:
92
85
  if has_header:
93
- next(file) # throw away header
86
+ try:
87
+ next(file) # throw away header
88
+ except gzip.BadGzipFile as e:
89
+ raise ValueError(f"could not open file {path}") from e
94
90
  if use_tqdm:
95
91
  file = tqdm(file, desc=f"loading TSV from {path}")
96
92
  yield from get_reader(file)
@@ -115,9 +111,9 @@ def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
115
111
 
116
112
  def write_map_tsv(
117
113
  *,
118
- path: Union[str, Path],
119
- header: Optional[Iterable[str]] = None,
120
- rv: Union[Iterable[tuple[str, str]], Mapping[str, str]],
114
+ path: str | Path,
115
+ header: Iterable[str] | None = None,
116
+ rv: Iterable[tuple[str, str]] | Mapping[str, str],
121
117
  sep: str = "\t",
122
118
  ) -> None:
123
119
  """Write a mapping dictionary to a TSV file."""
@@ -129,7 +125,7 @@ def write_map_tsv(
129
125
 
130
126
  def write_multimap_tsv(
131
127
  *,
132
- path: Union[str, Path],
128
+ path: str | Path,
133
129
  header: Iterable[str],
134
130
  rv: Mapping[str, list[str]],
135
131
  sep: str = "\t",
@@ -141,26 +137,40 @@ def write_multimap_tsv(
141
137
 
142
138
  def write_iterable_tsv(
143
139
  *,
144
- path: Union[str, Path],
145
- header: Optional[Iterable[str]] = None,
140
+ path: str | Path,
141
+ header: Iterable[str] | None = None,
146
142
  it: Iterable[tuple[str, ...]],
147
143
  sep: str = "\t",
148
144
  ) -> None:
149
145
  """Write a mapping dictionary to a TSV file."""
150
146
  it = (row for row in it if all(cell is not None for cell in row))
151
147
  it = sorted(it)
152
- with open(path, "w") as file:
153
- writer = get_writer(file, sep=sep)
148
+ with safe_open_writer(path, delimiter=sep) as writer:
154
149
  if header is not None:
155
150
  writer.writerow(header)
156
151
  writer.writerows(it)
157
152
 
158
153
 
159
- def parse_xml_gz(path: Union[str, Path]) -> Element:
160
- """Parse an XML file from a path to a GZIP file."""
161
- path = Path(path).resolve()
162
- t = time.time()
163
- logger.info("parsing xml from %s", path)
164
- tree = etree.parse(path.as_posix()) # type:ignore
165
- logger.info("parsed xml in %.2f seconds", time.time() - t)
166
- return tree.getroot()
154
+ @contextlib.contextmanager
155
+ def safe_open(
156
+ path: str | Path, read: bool, encoding: str | None = None
157
+ ) -> Generator[TextIO, None, None]:
158
+ """Safely open a file for reading or writing text."""
159
+ path = Path(path).expanduser().resolve()
160
+ mode: Literal["rt", "wt"] = "rt" if read else "wt"
161
+ if path.suffix.endswith(".gz"):
162
+ with gzip.open(path, mode=mode, encoding=encoding) as file:
163
+ yield file
164
+ else:
165
+ with open(path, mode=mode) as file:
166
+ yield file
167
+
168
+
169
+ @contextlib.contextmanager
170
+ def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"): # type:ignore
171
+ """Open a CSV writer, wrapping :func:`csv.writer`."""
172
+ if isinstance(f, str | Path):
173
+ with safe_open(f, read=False) as file:
174
+ yield csv.writer(file, delimiter=delimiter)
175
+ else:
176
+ yield csv.writer(f, delimiter=delimiter)
pyobo/utils/iter.py CHANGED
@@ -8,8 +8,8 @@ from typing import TypeVar
8
8
  from more_itertools import peekable
9
9
 
10
10
  __all__ = [
11
- "iterate_together",
12
11
  "iterate_gzips_together",
12
+ "iterate_together",
13
13
  ]
14
14
 
15
15
  X = TypeVar("X")
@@ -20,9 +20,9 @@ Y = TypeVar("Y")
20
20
  def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
21
21
  """Iterate over two gzipped files together."""
22
22
  with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
23
- a = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
24
- b = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
25
- yield from iterate_together(a, b)
23
+ a_reader = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
24
+ b_reader = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
25
+ yield from iterate_together(a_reader, b_reader) # type:ignore
26
26
 
27
27
 
28
28
  def iterate_together(
@@ -38,7 +38,7 @@ def iterate_together(
38
38
  - Each key in the index is present within both files
39
39
  """
40
40
  b_peekable = peekable(b)
41
- b_index, _ = b_peekable.peek()
41
+ b_index: X | type[_Done] = b_peekable.peek()[0]
42
42
 
43
43
  for a_index, a_value in a:
44
44
  zs = []
pyobo/utils/misc.py CHANGED
@@ -1,79 +1,67 @@
1
1
  """Miscellaneous utilities."""
2
2
 
3
- import gzip
4
3
  import logging
5
- import os
6
4
  from datetime import datetime
7
- from subprocess import check_output
8
- from typing import Optional
9
5
 
10
6
  __all__ = [
11
- "obo_to_obograph",
12
- "obo_to_owl",
13
7
  "cleanup_version",
14
8
  ]
15
9
 
16
-
17
10
  logger = logging.getLogger(__name__)
18
11
 
19
-
20
- def obo_to_obograph(obo_path, obograph_path) -> None:
21
- """Convert an OBO file to OBO Graph file with pronto."""
22
- import pronto
23
-
24
- ontology = pronto.Ontology(obo_path)
25
- with gzip.open(obograph_path, "wb") as file:
26
- ontology.dump(file, format="json")
27
-
28
-
29
- def obo_to_owl(obo_path, owl_path, owl_format: str = "ofn"):
30
- """Convert an OBO file to an OWL file with ROBOT."""
31
- args = ["robot", "convert", "-i", obo_path, "-o", owl_path, "--format", owl_format]
32
- ret = check_output( # noqa:S603
33
- args,
34
- cwd=os.path.dirname(__file__),
35
- )
36
- return ret.decode()
37
-
38
-
39
12
  BIZARRE_LOGGED = set()
40
13
 
14
+ #: Rewrites for mostly static resources that have weird quirks
15
+ VERSION_REWRITES = {
16
+ "$Date: 2009/11/15 10:54:12 $": "2009-11-15", # for owl
17
+ "http://www.w3.org/2006/time#2016": "2016", # for time
18
+ }
19
+ STATIC_VERSION_REWRITES = {"orth": "2"}
20
+ VERSION_PREFIXES = [
21
+ "http://www.orpha.net/version",
22
+ "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
23
+ "http://humanbehaviourchange.org/ontology/bcio.owl/",
24
+ "http://purl.org/pav/",
25
+ "http://identifiers.org/combine.specifications/teddy.rel-",
26
+ "https://purl.dataone.org/odo/MOSAIC/",
27
+ "http://purl.dataone.org/odo/SASAP/", # like in http://purl.dataone.org/odo/SASAP/0.3.1
28
+ "http://purl.dataone.org/odo/SENSO/", # like in http://purl.dataone.org/odo/SENSO/0.1.0
29
+ "https://purl.dataone.org/odo/ADCAD/",
30
+ ]
31
+ VERSION_PREFIX_SPLITS = [
32
+ "http://www.ebi.ac.uk/efo/releases/v",
33
+ "http://www.ebi.ac.uk/swo/swo.owl/",
34
+ "http://semanticscience.org/ontology/sio/v",
35
+ "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
36
+ ]
37
+
41
38
 
42
- def cleanup_version(data_version: str, prefix: str) -> Optional[str]:
39
+ def cleanup_version(data_version: str, prefix: str) -> str:
43
40
  """Clean the version information."""
44
- if data_version.endswith(".owl"):
45
- data_version = data_version[: -len(".owl")]
41
+ if data_version in VERSION_REWRITES:
42
+ return VERSION_REWRITES[data_version]
43
+
44
+ data_version = data_version.removesuffix(".owl")
46
45
  if data_version.endswith(prefix):
47
46
  data_version = data_version[: -len(prefix)]
48
- if data_version.startswith("releases/"):
49
- data_version = data_version[len("releases/") :]
50
- if prefix == "orth":
51
- # TODO add bioversions for this
52
- return "2"
47
+ data_version = data_version.removesuffix("/")
48
+
49
+ data_version = data_version.removeprefix("releases/")
50
+ data_version = data_version.removeprefix("release/")
53
51
 
54
- version_prefixes = [
55
- "http://www.orpha.net/version",
56
- "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
57
- "http://humanbehaviourchange.org/ontology/bcio.owl/",
58
- "http://purl.org/pav/",
59
- "http://identifiers.org/combine.specifications/teddy.rel-",
60
- ]
61
- for version_prefix in version_prefixes:
52
+ for version_prefix in VERSION_PREFIXES:
62
53
  if data_version.startswith(version_prefix):
63
- return data_version[len(version_prefix) :]
54
+ return data_version.removeprefix(version_prefix)
64
55
 
65
- version_prefixes_split = [
66
- "http://www.ebi.ac.uk/efo/releases/v",
67
- "http://www.ebi.ac.uk/swo/swo.owl/",
68
- "http://semanticscience.org/ontology/sio/v",
69
- "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
70
- ]
71
- for version_prefix_split in version_prefixes_split:
56
+ for version_prefix_split in VERSION_PREFIX_SPLITS:
72
57
  if data_version.startswith(version_prefix_split):
73
- return data_version[len(version_prefix_split) :].split("/")[0]
58
+ return data_version.removeprefix(version_prefix_split).split("/")[0]
74
59
 
60
+ # use a heuristic to determine if the version is one of
61
+ # consecutive, major.minor, or semantic versioning (i.e., major.minor.patch)
75
62
  if data_version.replace(".", "").isnumeric():
76
- return data_version # consecutive, major.minor, or semantic versioning
63
+ return data_version
64
+
77
65
  for v in reversed(data_version.split("/")):
78
66
  v = v.strip()
79
67
  try:
pyobo/utils/ndex_utils.py CHANGED
File without changes
pyobo/utils/path.py CHANGED
@@ -1,60 +1,42 @@
1
1
  """Utilities for building paths."""
2
2
 
3
+ import enum
3
4
  import logging
4
5
  from pathlib import Path
5
- from typing import Any, Callable, Literal, Optional, Union
6
+ from typing import Any, Literal
6
7
 
7
8
  import pandas as pd
8
- import requests_ftp
9
- from pystow.utils import download, name_from_url, read_tarfile_csv
9
+ from curies import Reference
10
+ from pystow import VersionHint
10
11
 
11
- from .misc import cleanup_version
12
- from ..constants import RAW_MODULE
12
+ from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME
13
13
 
14
14
  __all__ = [
15
- "prefix_directory_join",
16
- "prefix_directory_join",
17
- "prefix_cache_join",
18
- "get_prefix_obo_path",
19
- "ensure_path",
15
+ "CacheArtifact",
20
16
  "ensure_df",
21
- "ensure_tar_df",
17
+ "ensure_path",
18
+ "get_cache_path",
19
+ "get_relation_cache_path",
20
+ "prefix_directory_join",
22
21
  ]
23
22
 
24
23
  logger = logging.getLogger(__name__)
25
24
 
26
- VersionHint = Union[None, str, Callable[[], Optional[str]]]
27
-
28
- requests_ftp.monkeypatch_session()
29
-
30
25
 
31
26
  def prefix_directory_join(
32
27
  prefix: str,
33
28
  *parts: str,
34
- name: Optional[str] = None,
29
+ name: str | None = None,
35
30
  version: VersionHint = None,
36
31
  ensure_exists: bool = True,
37
32
  ) -> Path:
38
33
  """Join in the prefix directory."""
39
- if version is None:
40
- return RAW_MODULE.join(prefix, *parts, name=name, ensure_exists=ensure_exists)
41
- if callable(version):
42
- logger.info("[%s] looking up version", prefix)
43
- version = version()
44
- logger.info("[%s] got version %s", prefix, version)
45
- elif not isinstance(version, str):
46
- raise TypeError(f"Invalid type: {version} ({type(version)})")
47
- if version is None:
48
- raise AssertionError
49
- version = cleanup_version(version, prefix=prefix)
50
- if version is not None and "/" in version:
51
- raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
52
- return RAW_MODULE.join(prefix, version, *parts, name=name, ensure_exists=ensure_exists)
53
-
54
-
55
- def get_prefix_obo_path(prefix: str, version: VersionHint = None, ext: str = "obo") -> Path:
56
- """Get the canonical path to the OBO file."""
57
- return prefix_directory_join(prefix, name=f"{prefix}.{ext}", version=version)
34
+ return RAW_MODULE.module(prefix).join(
35
+ *parts,
36
+ name=name,
37
+ ensure_exists=ensure_exists,
38
+ version=version,
39
+ )
58
40
 
59
41
 
60
42
  def ensure_path(
@@ -62,36 +44,29 @@ def ensure_path(
62
44
  *parts: str,
63
45
  url: str,
64
46
  version: VersionHint = None,
65
- name: Optional[str] = None,
47
+ name: str | None = None,
66
48
  force: bool = False,
67
- error_on_missing: bool = False,
68
49
  backend: Literal["requests", "urllib"] = "urllib",
69
50
  verify: bool = True,
70
- ) -> str:
51
+ **download_kwargs: Any,
52
+ ) -> Path:
71
53
  """Download a file if it doesn't exist."""
72
- if name is None:
73
- name = name_from_url(url)
74
-
75
- path = prefix_directory_join(prefix, *parts, name=name, version=version)
76
-
77
- if not path.exists() and error_on_missing:
78
- raise FileNotFoundError
79
-
80
- kwargs: dict[str, Any]
81
54
  if verify:
82
- kwargs = {"backend": backend}
55
+ download_kwargs = {"backend": backend}
83
56
  else:
84
57
  if backend != "requests":
85
58
  logger.warning("using requests since verify=False")
86
- kwargs = {"backend": "requests", "verify": False}
59
+ download_kwargs = {"backend": "requests", "verify": False}
87
60
 
88
- download(
61
+ path = RAW_MODULE.module(prefix).ensure(
62
+ *parts,
89
63
  url=url,
90
- path=path,
64
+ name=name,
91
65
  force=force,
92
- **kwargs,
66
+ version=version,
67
+ download_kwargs=download_kwargs,
93
68
  )
94
- return path.as_posix()
69
+ return path
95
70
 
96
71
 
97
72
  def ensure_df(
@@ -99,7 +74,7 @@ def ensure_df(
99
74
  *parts: str,
100
75
  url: str,
101
76
  version: VersionHint = None,
102
- name: Optional[str] = None,
77
+ name: str | None = None,
103
78
  force: bool = False,
104
79
  sep: str = "\t",
105
80
  dtype=str,
@@ -121,21 +96,49 @@ def ensure_df(
121
96
  return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
122
97
 
123
98
 
124
- def ensure_tar_df(
125
- prefix: str,
126
- *parts: str,
127
- url: str,
128
- inner_path: str,
129
- version: VersionHint = None,
130
- path: Optional[str] = None,
131
- force: bool = False,
132
- **kwargs,
133
- ) -> pd.DataFrame:
134
- """Download a tar file and open as a dataframe."""
135
- path = ensure_path(prefix, *parts, url=url, version=version, name=path, force=force)
136
- return read_tarfile_csv(path, inner_path=inner_path, **kwargs)
99
+ class CacheArtifact(enum.Enum):
100
+ """An enumeration for."""
101
+
102
+ names = "names.tsv.gz"
103
+ definitions = "definitions.tsv.gz"
104
+ species = "species.tsv.gz"
105
+ mappings = "mappings.tsv.gz"
106
+ relations = "relations.tsv.gz"
107
+ alts = "alt_ids.tsv.gz"
108
+ typedefs = "typedefs.tsv.gz"
109
+ literal_mappings = "literal_mappings.tsv.gz"
110
+ references = "references.tsv.gz"
111
+ obsoletes = "obsolete.tsv.gz"
112
+
113
+ literal_properties = "literal_properties.tsv.gz"
114
+ object_properties = "object_properties.tsv.gz"
137
115
 
116
+ nodes = "nodes.tsv.gz"
117
+ edges = "edges.tsv.gz"
138
118
 
139
- def prefix_cache_join(prefix: str, *parts, name: Optional[str], version: VersionHint) -> Path:
140
- """Ensure the prefix cache is available."""
141
- return prefix_directory_join(prefix, "cache", *parts, name=name, version=version)
119
+ prefixes = "prefixes.json"
120
+ metadata = "metadata.json"
121
+
122
+
123
+ def get_cache_path(
124
+ ontology: str,
125
+ name: CacheArtifact,
126
+ *,
127
+ version: str | None = None,
128
+ ) -> Path:
129
+ """Get a cache path."""
130
+ return prefix_directory_join(
131
+ ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version
132
+ )
133
+
134
+
135
+ def get_relation_cache_path(
136
+ ontology: str,
137
+ reference: Reference,
138
+ *,
139
+ version: str | None = None,
140
+ ) -> Path:
141
+ """Get a relation cache path."""
142
+ return prefix_directory_join(
143
+ ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version
144
+ )
pyobo/version.py CHANGED
@@ -8,11 +8,11 @@ from subprocess import CalledProcessError, check_output
8
8
 
9
9
  __all__ = [
10
10
  "VERSION",
11
- "get_version",
12
11
  "get_git_hash",
12
+ "get_version",
13
13
  ]
14
14
 
15
- VERSION = "0.11.2"
15
+ VERSION = "0.12.1"
16
16
 
17
17
 
18
18
  def get_git_hash() -> str:
@@ -30,7 +30,7 @@ def get_git_hash() -> str:
30
30
  return ret.strip().decode("utf-8")[:8]
31
31
 
32
32
 
33
- def get_version(with_git_hash: bool = False):
33
+ def get_version(with_git_hash: bool = False) -> str:
34
34
  """Get the PyOBO version string, including a git hash."""
35
35
  return f"{VERSION}-{get_git_hash()}" if with_git_hash else VERSION
36
36