pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
@@ -1,180 +0,0 @@
1
- """Pipeline for extracting all xrefs from OBO documents available."""
2
-
3
- import gzip
4
- import itertools as itt
5
- import logging
6
- from collections.abc import Iterable
7
- from typing import Optional, cast
8
-
9
- import bioregistry
10
- import networkx as nx
11
- import pandas as pd
12
- from tqdm.auto import tqdm
13
-
14
- from .sources import iter_xref_plugins
15
- from .. import get_xrefs_df
16
- from ..api import (
17
- get_id_definition_mapping,
18
- get_id_name_mapping,
19
- get_id_species_mapping,
20
- get_id_synonyms_mapping,
21
- get_id_to_alts,
22
- get_metadata,
23
- get_properties_df,
24
- get_relations_df,
25
- get_typedef_df,
26
- )
27
- from ..constants import SOURCE_ID, SOURCE_PREFIX, TARGET_ID, TARGET_PREFIX
28
- from ..getters import iter_helper, iter_helper_helper
29
- from ..sources import ncbigene, pubchem
30
- from ..utils.path import ensure_path
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- # TODO a normal graph can easily be turned into a directed graph where each
36
- # edge points from low priority to higher priority, then the graph can
37
- # be reduced to a set of star graphs and ultimately to a single dictionary
38
-
39
-
40
- def get_graph_from_xref_df(df: pd.DataFrame) -> nx.Graph:
41
- """Generate a graph from the mappings dataframe."""
42
- rv = nx.Graph()
43
-
44
- it = itt.chain(
45
- df[[SOURCE_PREFIX, SOURCE_ID]].drop_duplicates().values,
46
- df[[TARGET_PREFIX, TARGET_ID]].drop_duplicates().values,
47
- )
48
- it = tqdm(it, desc="loading curies", unit_scale=True)
49
- for prefix, identifier in it:
50
- rv.add_node(_to_curie(prefix, identifier), prefix=prefix, identifier=identifier)
51
-
52
- it = tqdm(df.values, total=len(df.index), desc="loading xrefs", unit_scale=True)
53
- for source_ns, source_id, target_ns, target_id, provenance in it:
54
- rv.add_edge(
55
- _to_curie(source_ns, source_id),
56
- _to_curie(target_ns, target_id),
57
- provenance=provenance,
58
- )
59
-
60
- return rv
61
-
62
-
63
- def _to_curie(prefix: str, identifier: str) -> str:
64
- return f"{prefix}:{identifier}"
65
-
66
-
67
- def _iter_ncbigene(left, right):
68
- ncbi_path = ensure_path(ncbigene.PREFIX, url=ncbigene.GENE_INFO_URL)
69
- with gzip.open(ncbi_path, "rt") as file:
70
- next(file) # throw away the header
71
- for line in tqdm(
72
- file, desc=f"extracting {ncbigene.PREFIX}", unit_scale=True, total=27_000_000
73
- ):
74
- line = line.strip().split("\t")
75
- yield ncbigene.PREFIX, line[left], line[right]
76
-
77
-
78
- def _iter_metadata(**kwargs):
79
- for prefix, data in iter_helper_helper(get_metadata, **kwargs):
80
- version = data["version"]
81
- tqdm.write(f"[{prefix}] using version {version}")
82
- yield prefix, version, data["date"], bioregistry.is_deprecated(prefix)
83
-
84
-
85
- def _iter_names(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
86
- """Iterate over all prefix-identifier-name triples we can get.
87
-
88
- :param leave: should the tqdm be left behind?
89
- """
90
- yield from iter_helper(get_id_name_mapping, leave=leave, **kwargs)
91
- yield from _iter_ncbigene(1, 2)
92
-
93
- pcc_path = pubchem._ensure_cid_name_path()
94
- with gzip.open(pcc_path, mode="rt", encoding="ISO-8859-1") as file:
95
- for line in tqdm(
96
- file, desc=f"extracting {pubchem.PREFIX}", unit_scale=True, total=103_000_000
97
- ):
98
- identifier, name = line.strip().split("\t", 1)
99
- yield pubchem.PREFIX, identifier, name
100
-
101
-
102
- def _iter_species(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
103
- """Iterate over all prefix-identifier-species triples we can get."""
104
- yield from iter_helper(get_id_species_mapping, leave=leave, **kwargs)
105
- # TODO ncbigene
106
-
107
-
108
- def _iter_definitions(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
109
- """Iterate over all prefix-identifier-descriptions triples we can get."""
110
- yield from iter_helper(get_id_definition_mapping, leave=leave, **kwargs)
111
- yield from _iter_ncbigene(1, 8)
112
-
113
-
114
- def _iter_alts(
115
- leave: bool = False, strict: bool = True, **kwargs
116
- ) -> Iterable[tuple[str, str, str]]:
117
- for prefix, identifier, alts in iter_helper(
118
- get_id_to_alts, leave=leave, strict=strict, **kwargs
119
- ):
120
- for alt in alts:
121
- yield prefix, identifier, alt
122
-
123
-
124
- def _iter_synonyms(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
125
- """Iterate over all prefix-identifier-synonym triples we can get.
126
-
127
- :param leave: should the tqdm be left behind?
128
- """
129
- for prefix, identifier, synonyms in iter_helper(get_id_synonyms_mapping, leave=leave, **kwargs):
130
- for synonym in synonyms:
131
- yield prefix, identifier, synonym
132
-
133
-
134
- def _iter_typedefs(**kwargs) -> Iterable[tuple[str, str, str, str]]:
135
- """Iterate over all prefix-identifier-name triples we can get."""
136
- for prefix, df in iter_helper_helper(get_typedef_df, **kwargs):
137
- for t in df.values:
138
- if all(t):
139
- yield cast(tuple[str, str, str, str], (prefix, *t))
140
-
141
-
142
- def _iter_relations(**kwargs) -> Iterable[tuple[str, str, str, str, str, str]]:
143
- for prefix, df in iter_helper_helper(get_relations_df, **kwargs):
144
- for t in df.values:
145
- if all(t):
146
- yield cast(tuple[str, str, str, str, str, str], (prefix, *t))
147
-
148
-
149
- def _iter_properties(**kwargs) -> Iterable[tuple[str, str, str, str]]:
150
- for prefix, df in iter_helper_helper(get_properties_df, **kwargs):
151
- for t in df.values:
152
- if all(t):
153
- yield cast(tuple[str, str, str, str], (prefix, *t))
154
-
155
-
156
- def _iter_xrefs(
157
- *,
158
- force: bool = False,
159
- use_tqdm: bool = True,
160
- skip_below: Optional[str] = None,
161
- strict: bool = True,
162
- **kwargs,
163
- ) -> Iterable[tuple[str, str, str, str, str]]:
164
- it = iter_helper_helper(
165
- get_xrefs_df,
166
- use_tqdm=use_tqdm,
167
- force=force,
168
- skip_below=skip_below,
169
- strict=strict,
170
- **kwargs,
171
- )
172
- for prefix, df in it:
173
- df.dropna(inplace=True)
174
- for row in df.values:
175
- if any(not element for element in row):
176
- continue
177
- yield cast(tuple[str, str, str, str, str], (prefix, *row, prefix))
178
- for df in iter_xref_plugins(skip_below=skip_below):
179
- df.dropna(inplace=True)
180
- yield from tqdm(df.values, leave=False, total=len(df.index), unit_scale=True)