pymetadata 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pymetadata might be problematic. Click here for more details.
- pymetadata/__init__.py +1 -1
- pymetadata/chebi.py +21 -29
- pymetadata/core/annotation.py +114 -53
- pymetadata/identifiers/registry.py +0 -202
- pymetadata/metadata/eco.py +882 -301
- {pymetadata-0.5.3.dist-info → pymetadata-0.5.5.dist-info}/METADATA +7 -8
- {pymetadata-0.5.3.dist-info → pymetadata-0.5.5.dist-info}/RECORD +9 -10
- pymetadata/resources/chebi_webservice_wsdl.xml +0 -509
- {pymetadata-0.5.3.dist-info → pymetadata-0.5.5.dist-info}/WHEEL +0 -0
- {pymetadata-0.5.3.dist-info → pymetadata-0.5.5.dist-info}/licenses/LICENSE +0 -0
pymetadata/__init__.py
CHANGED
pymetadata/chebi.py
CHANGED
|
@@ -1,28 +1,19 @@
|
|
|
1
1
|
"""Module for working with chebi."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from pprint import pprint
|
|
5
4
|
from typing import Any, Dict, Optional
|
|
6
|
-
|
|
7
|
-
from zeep import Client
|
|
8
|
-
|
|
5
|
+
import requests
|
|
9
6
|
|
|
10
7
|
import pymetadata
|
|
11
8
|
from pymetadata import log
|
|
12
9
|
from pymetadata.cache import DataclassJSONEncoder, read_json_cache, write_json_cache
|
|
10
|
+
from pymetadata.console import console
|
|
13
11
|
|
|
14
12
|
logger = log.get_logger(__name__)
|
|
15
13
|
|
|
16
|
-
# FIXME: copy the file to the cache dir
|
|
17
|
-
client = Client(str(pymetadata.RESOURCES_DIR / "chebi_webservice_wsdl.xml"))
|
|
18
|
-
|
|
19
14
|
|
|
20
15
|
class ChebiQuery:
|
|
21
|
-
"""Class to query information from ChEBI.
|
|
22
|
-
|
|
23
|
-
An overview over available methods:
|
|
24
|
-
python -mzeep https://www.ebi.ac.uk/webservices/chebi/2.0/webservice?wsdl
|
|
25
|
-
"""
|
|
16
|
+
"""Class to query information from ChEBI."""
|
|
26
17
|
|
|
27
18
|
@staticmethod
|
|
28
19
|
def query(
|
|
@@ -52,27 +43,28 @@ class ChebiQuery:
|
|
|
52
43
|
|
|
53
44
|
# fetch and cache data
|
|
54
45
|
if not data:
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
46
|
+
response = requests.get(
|
|
47
|
+
url=f"https://www.ebi.ac.uk/chebi/backend/api/public/compounds/?chebi_ids={chebi}"
|
|
48
|
+
)
|
|
49
|
+
if response.status_code == 200:
|
|
50
|
+
result = response.json()
|
|
51
|
+
else:
|
|
59
52
|
logger.error(f"CHEBI information could not be retrieved for: {chebi}")
|
|
60
53
|
return dict()
|
|
61
54
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
if formulae:
|
|
66
|
-
formula = formulae[0]["data"]
|
|
67
|
-
|
|
55
|
+
result = result[chebi]["data"]
|
|
56
|
+
chemical_data = result["chemical_data"]
|
|
57
|
+
default_structure = result["default_structure"]
|
|
68
58
|
data = {
|
|
69
59
|
"chebi": chebi,
|
|
70
|
-
"name": result["
|
|
60
|
+
"name": result["ascii_name"],
|
|
71
61
|
"definition": result["definition"],
|
|
72
|
-
"formula": formula,
|
|
73
|
-
"charge":
|
|
74
|
-
"mass":
|
|
75
|
-
"inchikey":
|
|
62
|
+
"formula": chemical_data["formula"] if chemical_data else None,
|
|
63
|
+
"charge": chemical_data["charge"] if chemical_data else None,
|
|
64
|
+
"mass": chemical_data["mass"] if chemical_data else None,
|
|
65
|
+
"inchikey": default_structure["standard_inchi_key"]
|
|
66
|
+
if default_structure
|
|
67
|
+
else None,
|
|
76
68
|
}
|
|
77
69
|
|
|
78
70
|
logger.info(f"Write chebi: {chebi_path}")
|
|
@@ -86,7 +78,7 @@ class ChebiQuery:
|
|
|
86
78
|
if __name__ == "__main__":
|
|
87
79
|
chebis = ["CHEBI:2668", "CHEBI:138366", "CHEBI:9637", "CHEBI:155897"]
|
|
88
80
|
for chebi in chebis:
|
|
89
|
-
|
|
81
|
+
console.rule(chebi, align="left", style="bold white")
|
|
90
82
|
d = ChebiQuery.query(chebi=chebi, cache=False)
|
|
91
|
-
|
|
83
|
+
console.print(d)
|
|
92
84
|
d = ChebiQuery.query(chebi=chebi, cache=True)
|
pymetadata/core/annotation.py
CHANGED
|
@@ -5,12 +5,14 @@ Core data structure to store annotations.
|
|
|
5
5
|
|
|
6
6
|
import re
|
|
7
7
|
import urllib
|
|
8
|
+
from enum import Enum
|
|
8
9
|
from pprint import pprint
|
|
9
10
|
from typing import Any, Dict, Final, List, Optional, Tuple, Union
|
|
10
11
|
|
|
11
12
|
import requests
|
|
12
13
|
|
|
13
14
|
from pymetadata import log
|
|
15
|
+
from pymetadata.console import console
|
|
14
16
|
from pymetadata.core.xref import CrossReference, is_url
|
|
15
17
|
from pymetadata.identifiers.miriam import BQB, BQM
|
|
16
18
|
from pymetadata.identifiers.registry import REGISTRY
|
|
@@ -19,19 +21,31 @@ from pymetadata.ontologies.ols import ONTOLOGIES, OLSQuery
|
|
|
19
21
|
|
|
20
22
|
OLS_QUERY = OLSQuery(ontologies=ONTOLOGIES)
|
|
21
23
|
|
|
22
|
-
IDENTIFIERS_ORG_PREFIX: Final = "
|
|
24
|
+
IDENTIFIERS_ORG_PREFIX: Final = "https://identifiers.org"
|
|
23
25
|
IDENTIFIERS_ORG_PATTERN1: Final = re.compile(r"^https?://identifiers.org/(.+?)/(.+)")
|
|
24
26
|
IDENTIFIERS_ORG_PATTERN2: Final = re.compile(r"^https?://identifiers.org/(.+)")
|
|
27
|
+
|
|
28
|
+
BIOREGISTRY_PREFIX: Final = "https://bioregistry.io"
|
|
29
|
+
BIOREGISTRY_PATTERN: Final = re.compile(r"^https?://bioregistry.io/(.+)")
|
|
30
|
+
|
|
25
31
|
MIRIAM_URN_PATTERN: Final = re.compile(r"^urn:miriam:(.+)")
|
|
26
32
|
|
|
27
33
|
logger = log.get_logger(__name__)
|
|
28
34
|
|
|
29
35
|
|
|
36
|
+
class ProviderType(str, Enum):
|
|
37
|
+
"""Provider type."""
|
|
38
|
+
|
|
39
|
+
IDENTIFIERS_ORG = "identifiers.org"
|
|
40
|
+
BIOREGISTRY_IO = "bioregistry.io"
|
|
41
|
+
NONE = "none"
|
|
42
|
+
|
|
43
|
+
|
|
30
44
|
class RDFAnnotation:
|
|
31
45
|
"""RDFAnnotation class.
|
|
32
46
|
|
|
33
47
|
Basic storage of annotation information. This consists of the relation
|
|
34
|
-
and the
|
|
48
|
+
and the resource.
|
|
35
49
|
The annotations can be attached to other objects thereby forming
|
|
36
50
|
triples which can be converted to RDF.
|
|
37
51
|
|
|
@@ -40,6 +54,7 @@ class RDFAnnotation:
|
|
|
40
54
|
- `collection/term`, i.e., the combination of collection and term
|
|
41
55
|
- `http(s)://arbitrary.url`, an arbitrary URL
|
|
42
56
|
- urn:miriam:uniprot:P03023
|
|
57
|
+
- https://bioregistry.io/chebi:15996 urls via the bioregistry provider
|
|
43
58
|
"""
|
|
44
59
|
|
|
45
60
|
replaced_collections: Dict[str, str] = {
|
|
@@ -53,6 +68,7 @@ class RDFAnnotation:
|
|
|
53
68
|
self.collection: Optional[str] = None
|
|
54
69
|
self.term: Optional[str] = None
|
|
55
70
|
self.resource: str = resource
|
|
71
|
+
self.provider: ProviderType = ProviderType.IDENTIFIERS_ORG
|
|
56
72
|
|
|
57
73
|
if not qualifier:
|
|
58
74
|
raise ValueError(
|
|
@@ -75,15 +91,19 @@ class RDFAnnotation:
|
|
|
75
91
|
if match1:
|
|
76
92
|
# handle identifiers.org pattern
|
|
77
93
|
self.collection, self.term = match1.group(1), match1.group(2)
|
|
94
|
+
self.provider = ProviderType.IDENTIFIERS_ORG
|
|
78
95
|
|
|
79
96
|
if not self.collection:
|
|
80
|
-
# tests new
|
|
97
|
+
# tests new compact patterns
|
|
81
98
|
match2 = IDENTIFIERS_ORG_PATTERN2.match(resource)
|
|
82
99
|
if match2:
|
|
83
100
|
tokens = match2.group(1).split(":")
|
|
84
101
|
if len(tokens) == 2:
|
|
85
102
|
self.collection = tokens[0].lower()
|
|
103
|
+
|
|
104
|
+
# check if the namespace is embedded
|
|
86
105
|
self.term = match2.group(1)
|
|
106
|
+
self.provider = ProviderType.IDENTIFIERS_ORG
|
|
87
107
|
else:
|
|
88
108
|
logger.warning(
|
|
89
109
|
f"Identifiers.org URL does not conform to new"
|
|
@@ -94,16 +114,25 @@ class RDFAnnotation:
|
|
|
94
114
|
# other urls are directly stored as resources without collection
|
|
95
115
|
self.collection = None
|
|
96
116
|
self.term = resource
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
117
|
+
if BIOREGISTRY_PATTERN.match(resource):
|
|
118
|
+
self.provider = ProviderType.BIOREGISTRY_IO
|
|
119
|
+
console.print(self.provider)
|
|
120
|
+
else:
|
|
121
|
+
self.provider = ProviderType.NONE
|
|
122
|
+
logger.warning(
|
|
123
|
+
f"{resource} does not conform to "
|
|
124
|
+
f"http(s)://identifiers.org/collection/id or http(s)://identifiers.org/id or "
|
|
125
|
+
f"https://bioregistry.io/id .",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# handle urns
|
|
101
129
|
elif resource.startswith("urn:miriam:"):
|
|
102
130
|
match3 = MIRIAM_URN_PATTERN.match(resource)
|
|
103
131
|
if match3:
|
|
104
132
|
tokens = match3.group(1).split(":")
|
|
105
133
|
self.collection = tokens[0]
|
|
106
134
|
self.term = ":".join(tokens[1:]).replace("%3A", ":")
|
|
135
|
+
self.provider = ProviderType.IDENTIFIERS_ORG
|
|
107
136
|
|
|
108
137
|
logger.warning(
|
|
109
138
|
f"Deprecated urn pattern `{resource}` updated: "
|
|
@@ -113,13 +142,16 @@ class RDFAnnotation:
|
|
|
113
142
|
else:
|
|
114
143
|
# handle short notation
|
|
115
144
|
tokens = resource.split("/")
|
|
116
|
-
if len(tokens)
|
|
145
|
+
if len(tokens) > 1:
|
|
117
146
|
self.collection = tokens[0]
|
|
118
147
|
self.term = "/".join(tokens[1:])
|
|
148
|
+
self.provider = ProviderType.IDENTIFIERS_ORG
|
|
119
149
|
elif len(tokens) == 1 and ":" in tokens[0]:
|
|
120
150
|
self.collection = tokens[0].split(":")[0].lower()
|
|
121
151
|
self.term = tokens[0]
|
|
152
|
+
self.provider = ProviderType.IDENTIFIERS_ORG
|
|
122
153
|
|
|
154
|
+
# validation
|
|
123
155
|
if len(tokens) < 2 and not self.collection:
|
|
124
156
|
logger.error(
|
|
125
157
|
f"Resource `{resource}` could not be split in collection and term. "
|
|
@@ -129,6 +161,13 @@ class RDFAnnotation:
|
|
|
129
161
|
)
|
|
130
162
|
self.collection = None
|
|
131
163
|
self.term = resource
|
|
164
|
+
self.provider = ProviderType.NONE
|
|
165
|
+
|
|
166
|
+
# shorten compact terms
|
|
167
|
+
if self.term and self.collection:
|
|
168
|
+
self.term = self.shorten_compact_term(
|
|
169
|
+
term=self.term, collection=self.collection
|
|
170
|
+
)
|
|
132
171
|
|
|
133
172
|
# clean legacy collections
|
|
134
173
|
if self.collection in self.replaced_collections:
|
|
@@ -136,6 +175,21 @@ class RDFAnnotation:
|
|
|
136
175
|
|
|
137
176
|
self.validate()
|
|
138
177
|
|
|
178
|
+
@staticmethod
|
|
179
|
+
def shorten_compact_term(term: str, collection: str) -> str:
|
|
180
|
+
"""Shorten the compact terms and return term.
|
|
181
|
+
|
|
182
|
+
If the namespace is not embeddd in the term return the shortened term.
|
|
183
|
+
"""
|
|
184
|
+
namespace = REGISTRY.ns_dict.get(collection, None)
|
|
185
|
+
if namespace and not namespace.namespaceEmbeddedInLui:
|
|
186
|
+
# shorter term
|
|
187
|
+
if term.lower().startswith(collection):
|
|
188
|
+
tokens = term.split(":")
|
|
189
|
+
term = ":".join(tokens[1:])
|
|
190
|
+
|
|
191
|
+
return term
|
|
192
|
+
|
|
139
193
|
@staticmethod
|
|
140
194
|
def from_tuple(t: Tuple[Union[BQB, BQM], str]) -> "RDFAnnotation":
|
|
141
195
|
"""Construct from tuple."""
|
|
@@ -161,12 +215,12 @@ class RDFAnnotation:
|
|
|
161
215
|
|
|
162
216
|
def __repr__(self) -> str:
|
|
163
217
|
"""Get representation string."""
|
|
164
|
-
return f"RDFAnnotation({self.qualifier}|{self.collection}|{self.term})"
|
|
218
|
+
return f"RDFAnnotation({self.qualifier}|{self.collection}|{self.term}|{self.provider.value})"
|
|
165
219
|
|
|
166
220
|
def to_dict(self) -> Dict:
|
|
167
221
|
"""Convert to dict."""
|
|
168
222
|
return {
|
|
169
|
-
"qualifier": self.qualifier.value,
|
|
223
|
+
"qualifier": self.qualifier.value,
|
|
170
224
|
"collection": self.collection,
|
|
171
225
|
"term": self.term,
|
|
172
226
|
}
|
|
@@ -343,52 +397,59 @@ class RDFAnnotationData(RDFAnnotation):
|
|
|
343
397
|
|
|
344
398
|
if __name__ == "__main__":
|
|
345
399
|
for annotation in [
|
|
346
|
-
# FIXME: support this
|
|
347
400
|
RDFAnnotation(
|
|
348
401
|
qualifier=BQB.IS_VERSION_OF,
|
|
349
|
-
resource="
|
|
350
|
-
),
|
|
351
|
-
RDFAnnotation(
|
|
352
|
-
qualifier=BQB.IS_VERSION_OF,
|
|
353
|
-
resource="taxonomy/562",
|
|
354
|
-
),
|
|
355
|
-
RDFAnnotation(
|
|
356
|
-
qualifier=BQB.IS_VERSION_OF,
|
|
357
|
-
resource="http://identifiers.org/taxonomy/9606",
|
|
358
|
-
),
|
|
359
|
-
RDFAnnotation(
|
|
360
|
-
qualifier=BQB.IS_VERSION_OF,
|
|
361
|
-
resource="http://identifiers.org/biomodels.sbo/SBO:0000247",
|
|
362
|
-
),
|
|
363
|
-
RDFAnnotation(
|
|
364
|
-
qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:obo.go:GO%3A0005623"
|
|
365
|
-
),
|
|
366
|
-
RDFAnnotation(
|
|
367
|
-
qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:chebi:CHEBI%3A33699"
|
|
368
|
-
),
|
|
369
|
-
RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="chebi/CHEBI:456215"),
|
|
370
|
-
RDFAnnotation(
|
|
371
|
-
qualifier=BQB.IS, resource="https://en.wikipedia.org/wiki/Cytosol"
|
|
372
|
-
),
|
|
373
|
-
RDFAnnotation(
|
|
374
|
-
qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:uniprot:P03023"
|
|
375
|
-
),
|
|
376
|
-
RDFAnnotation(
|
|
377
|
-
qualifier=BQB.IS_VERSION_OF,
|
|
378
|
-
resource="http://identifiers.org/go/GO:0005829",
|
|
379
|
-
),
|
|
380
|
-
RDFAnnotation(
|
|
381
|
-
qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/go/GO:0005829"
|
|
382
|
-
),
|
|
383
|
-
RDFAnnotation(
|
|
384
|
-
qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/GO:0005829"
|
|
385
|
-
),
|
|
386
|
-
RDFAnnotation(
|
|
387
|
-
qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/GO:0005829"
|
|
402
|
+
resource="https://bioregistry.io/chebi:15996",
|
|
388
403
|
),
|
|
389
|
-
RDFAnnotation(
|
|
390
|
-
|
|
391
|
-
|
|
404
|
+
# RDFAnnotation(
|
|
405
|
+
# qualifier=BQB.IS_VERSION_OF,
|
|
406
|
+
# resource="NCIT:C75913",
|
|
407
|
+
# ),
|
|
408
|
+
# RDFAnnotation(
|
|
409
|
+
# qualifier=BQB.IS_VERSION_OF,
|
|
410
|
+
# resource="ncit:C75913",
|
|
411
|
+
# ),
|
|
412
|
+
# RDFAnnotation(
|
|
413
|
+
# qualifier=BQB.IS_VERSION_OF,
|
|
414
|
+
# resource="taxonomy/562",
|
|
415
|
+
# ),
|
|
416
|
+
# RDFAnnotation(
|
|
417
|
+
# qualifier=BQB.IS_VERSION_OF,
|
|
418
|
+
# resource="http://identifiers.org/taxonomy/9606",
|
|
419
|
+
# ),
|
|
420
|
+
# RDFAnnotation(
|
|
421
|
+
# qualifier=BQB.IS_VERSION_OF,
|
|
422
|
+
# resource="http://identifiers.org/biomodels.sbo/SBO:0000247",
|
|
423
|
+
# ),
|
|
424
|
+
# RDFAnnotation(
|
|
425
|
+
# qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:obo.go:GO%3A0005623"
|
|
426
|
+
# ),
|
|
427
|
+
# RDFAnnotation(
|
|
428
|
+
# qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:chebi:CHEBI%3A33699"
|
|
429
|
+
# ),
|
|
430
|
+
# RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="chebi/CHEBI:456215"),
|
|
431
|
+
# RDFAnnotation(
|
|
432
|
+
# qualifier=BQB.IS, resource="https://en.wikipedia.org/wiki/Cytosol"
|
|
433
|
+
# ),
|
|
434
|
+
# RDFAnnotation(
|
|
435
|
+
# qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:uniprot:P03023"
|
|
436
|
+
# ),
|
|
437
|
+
# RDFAnnotation(
|
|
438
|
+
# qualifier=BQB.IS_VERSION_OF,
|
|
439
|
+
# resource="http://identifiers.org/go/GO:0005829",
|
|
440
|
+
# ),
|
|
441
|
+
# RDFAnnotation(
|
|
442
|
+
# qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/go/GO:0005829"
|
|
443
|
+
# ),
|
|
444
|
+
# RDFAnnotation(
|
|
445
|
+
# qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/GO:0005829"
|
|
446
|
+
# ),
|
|
447
|
+
# RDFAnnotation(
|
|
448
|
+
# qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/GO:0005829"
|
|
449
|
+
# ),
|
|
450
|
+
# RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="bto/BTO:0000089"),
|
|
451
|
+
# RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="BTO:0000089"),
|
|
452
|
+
# RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="chebi/CHEBI:000012"),
|
|
392
453
|
]:
|
|
393
454
|
print("-" * 80)
|
|
394
455
|
data = RDFAnnotationData(annotation)
|
|
@@ -88,195 +88,6 @@ class Namespace:
|
|
|
88
88
|
self.resources = list()
|
|
89
89
|
|
|
90
90
|
|
|
91
|
-
def ols_namespaces() -> Dict[str, Namespace]:
|
|
92
|
-
"""Define Ontologies available from OLS but not in identifiers.org."""
|
|
93
|
-
ols_info: Dict = {
|
|
94
|
-
"deprecated": False,
|
|
95
|
-
"deprecationDate": None,
|
|
96
|
-
"institution": {
|
|
97
|
-
"description": "At EMBL-EBI, we make the "
|
|
98
|
-
"world’s public biological data "
|
|
99
|
-
"freely available to the "
|
|
100
|
-
"scientific community via a "
|
|
101
|
-
"range of services and tools, "
|
|
102
|
-
"perform basic research and "
|
|
103
|
-
"provide professional training "
|
|
104
|
-
"in bioinformatics. \n"
|
|
105
|
-
"We are part of the European "
|
|
106
|
-
"Molecular Biology Laboratory "
|
|
107
|
-
"(EMBL), an international, "
|
|
108
|
-
"innovative and "
|
|
109
|
-
"interdisciplinary research "
|
|
110
|
-
"organisation funded by 26 "
|
|
111
|
-
"member states and two "
|
|
112
|
-
"associate member states.",
|
|
113
|
-
"homeUrl": "https://www.ebi.ac.uk",
|
|
114
|
-
"id": 2,
|
|
115
|
-
"location": {"countryCode": "GB", "countryName": "United Kingdom"},
|
|
116
|
-
"name": "European Bioinformatics Institute",
|
|
117
|
-
"rorId": "https://ror.org/02catss52",
|
|
118
|
-
},
|
|
119
|
-
"location": {"countryCode": "GB", "countryName": "United Kingdom"},
|
|
120
|
-
"official": False,
|
|
121
|
-
"providerCode": "ols",
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
# Custom namespaces for OLS ontology, for simple support
|
|
125
|
-
namespaces = [
|
|
126
|
-
Namespace(
|
|
127
|
-
id=None,
|
|
128
|
-
prefix="omim",
|
|
129
|
-
pattern=r"^MI:\d+$",
|
|
130
|
-
name="OMIM",
|
|
131
|
-
description="Molecular Interactions Controlled Vocabulary",
|
|
132
|
-
namespaceEmbeddedInLui=True,
|
|
133
|
-
),
|
|
134
|
-
Namespace(
|
|
135
|
-
id=None,
|
|
136
|
-
prefix="dron",
|
|
137
|
-
pattern=r"^DRON:\d+$",
|
|
138
|
-
name="DRON",
|
|
139
|
-
description="The drug ontology",
|
|
140
|
-
namespaceEmbeddedInLui=True,
|
|
141
|
-
),
|
|
142
|
-
Namespace(
|
|
143
|
-
id=None,
|
|
144
|
-
prefix="cmo",
|
|
145
|
-
pattern=r"^CMO:\d+$",
|
|
146
|
-
name="Chemical methods ontology",
|
|
147
|
-
description="Morphological and physiological measurement records "
|
|
148
|
-
"generated from clinical and model organism research and health programs.",
|
|
149
|
-
namespaceEmbeddedInLui=True,
|
|
150
|
-
),
|
|
151
|
-
Namespace(
|
|
152
|
-
id=None,
|
|
153
|
-
prefix="chmo",
|
|
154
|
-
pattern=r"^CHMO:\d+$",
|
|
155
|
-
name="Chemical methods ontology",
|
|
156
|
-
description="CHMO, the chemical methods ontology",
|
|
157
|
-
namespaceEmbeddedInLui=True,
|
|
158
|
-
),
|
|
159
|
-
Namespace(
|
|
160
|
-
id=None,
|
|
161
|
-
prefix="vto",
|
|
162
|
-
pattern=r"^VTO:\d+$",
|
|
163
|
-
name="Vertebrate Taxonomy Ontology",
|
|
164
|
-
description="VTO Vertebrate Taxonomy Ontology",
|
|
165
|
-
namespaceEmbeddedInLui=True,
|
|
166
|
-
),
|
|
167
|
-
Namespace(
|
|
168
|
-
id=None,
|
|
169
|
-
prefix="opmi",
|
|
170
|
-
pattern=r"^OPMI:\d+$",
|
|
171
|
-
name="Ontology of Precision Medicine and Investigation",
|
|
172
|
-
description="OPMI: Ontology of Precision Medicine and Investigation",
|
|
173
|
-
namespaceEmbeddedInLui=True,
|
|
174
|
-
),
|
|
175
|
-
Namespace(
|
|
176
|
-
id=None,
|
|
177
|
-
prefix="atol",
|
|
178
|
-
pattern=r"^ATOL:\d+$",
|
|
179
|
-
name="ATOL",
|
|
180
|
-
description="Animal Trait Ontology for Livestock",
|
|
181
|
-
namespaceEmbeddedInLui=True,
|
|
182
|
-
),
|
|
183
|
-
Namespace(
|
|
184
|
-
id=None,
|
|
185
|
-
prefix="nbo",
|
|
186
|
-
pattern=r"^NBO:\d+$",
|
|
187
|
-
name="NBO",
|
|
188
|
-
description="Neuro Behavior Ontology",
|
|
189
|
-
namespaceEmbeddedInLui=True,
|
|
190
|
-
),
|
|
191
|
-
Namespace(
|
|
192
|
-
id=None,
|
|
193
|
-
prefix="scdo",
|
|
194
|
-
pattern=r"^SCDO:\d+$",
|
|
195
|
-
name="Sickle Cell Disease Ontology",
|
|
196
|
-
description="Sickle Cell Disease Ontology",
|
|
197
|
-
namespaceEmbeddedInLui=True,
|
|
198
|
-
),
|
|
199
|
-
Namespace(
|
|
200
|
-
id=None,
|
|
201
|
-
prefix="fix",
|
|
202
|
-
pattern=r"^FIX:\d+$",
|
|
203
|
-
name="Physico-chemical methods and properties Ontology",
|
|
204
|
-
description="Physico-chemical methods and properties Ontology",
|
|
205
|
-
namespaceEmbeddedInLui=True,
|
|
206
|
-
),
|
|
207
|
-
Namespace(
|
|
208
|
-
id=None,
|
|
209
|
-
prefix="oba",
|
|
210
|
-
pattern=r"^OBA:\d+$",
|
|
211
|
-
name="Ontology of Biological Attributes",
|
|
212
|
-
description="PubChem is an open chemistry database at the National "
|
|
213
|
-
"Institutes of Health (NIH).",
|
|
214
|
-
namespaceEmbeddedInLui=True,
|
|
215
|
-
),
|
|
216
|
-
Namespace(
|
|
217
|
-
id=None,
|
|
218
|
-
prefix="mmo",
|
|
219
|
-
pattern=r"^MMO:\d+$",
|
|
220
|
-
name="Measurement method ontology",
|
|
221
|
-
description="Measurement method ontology",
|
|
222
|
-
namespaceEmbeddedInLui=True,
|
|
223
|
-
),
|
|
224
|
-
Namespace(
|
|
225
|
-
id=None,
|
|
226
|
-
prefix="symp",
|
|
227
|
-
pattern=r"^SYMP:\d+$",
|
|
228
|
-
name="Symptom ontology",
|
|
229
|
-
description="The Symptom Ontology has been developed as a standardized ontology for symptoms of human diseases.",
|
|
230
|
-
namespaceEmbeddedInLui=True,
|
|
231
|
-
),
|
|
232
|
-
]
|
|
233
|
-
|
|
234
|
-
for ns in namespaces:
|
|
235
|
-
if not ns.resources:
|
|
236
|
-
ns.resources = []
|
|
237
|
-
if not ns.prefix:
|
|
238
|
-
continue
|
|
239
|
-
ns.resources.append(
|
|
240
|
-
Resource(
|
|
241
|
-
id=None,
|
|
242
|
-
name=f"{ns.prefix} through OLS",
|
|
243
|
-
description=f"{ns.prefix} through OLS",
|
|
244
|
-
mirId=None,
|
|
245
|
-
sampleId=None,
|
|
246
|
-
resourceHomeUrl=None,
|
|
247
|
-
urlPattern=f"https://www.ebi.ac.uk/ols4/ontologies/{ns.prefix}/terms?obo_id={ns.prefix.upper()}"
|
|
248
|
-
+ ":{$id}",
|
|
249
|
-
**ols_info,
|
|
250
|
-
)
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
return {ns.prefix: ns for ns in namespaces} # type: ignore
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def misc_namespaces() -> Dict[str, Namespace]:
|
|
257
|
-
"""Define misc namespaces."""
|
|
258
|
-
namespaces = [
|
|
259
|
-
Namespace(
|
|
260
|
-
id="brenda.ligand",
|
|
261
|
-
pattern=r"^\d+$",
|
|
262
|
-
name="BRENDA Ligand",
|
|
263
|
-
prefix=None,
|
|
264
|
-
description="BRENDA Ligand Information",
|
|
265
|
-
namespaceEmbeddedInLui=False,
|
|
266
|
-
),
|
|
267
|
-
Namespace(
|
|
268
|
-
id="metabolights.compound",
|
|
269
|
-
pattern=r"^MTBLC\d+$",
|
|
270
|
-
name="Metabolights compound",
|
|
271
|
-
prefix=None,
|
|
272
|
-
description="metabolights compound",
|
|
273
|
-
namespaceEmbeddedInLui=False,
|
|
274
|
-
),
|
|
275
|
-
]
|
|
276
|
-
|
|
277
|
-
return {ns.id: ns for ns in namespaces} # type: ignore
|
|
278
|
-
|
|
279
|
-
|
|
280
91
|
class Registry:
|
|
281
92
|
"""Managing the available annotation information.
|
|
282
93
|
|
|
@@ -284,10 +95,6 @@ class Registry:
|
|
|
284
95
|
"""
|
|
285
96
|
|
|
286
97
|
URL = "https://registry.api.identifiers.org/resolutionApi/getResolverDataset"
|
|
287
|
-
CUSTOM_NAMESPACES = {
|
|
288
|
-
**ols_namespaces(),
|
|
289
|
-
**misc_namespaces(),
|
|
290
|
-
}
|
|
291
98
|
|
|
292
99
|
def __init__(
|
|
293
100
|
self,
|
|
@@ -325,7 +132,6 @@ class Registry:
|
|
|
325
132
|
|
|
326
133
|
@staticmethod
|
|
327
134
|
def update_registry(
|
|
328
|
-
custom_namespaces: Dict[str, Namespace] = CUSTOM_NAMESPACES,
|
|
329
135
|
registry_path: Optional[Path] = None,
|
|
330
136
|
) -> Dict[str, Namespace]:
|
|
331
137
|
"""Update registry from identifiers.org webservice."""
|
|
@@ -338,14 +144,6 @@ class Registry:
|
|
|
338
144
|
ns = Namespace.from_dict(data)
|
|
339
145
|
ns_dict[ns.prefix] = ns
|
|
340
146
|
|
|
341
|
-
if custom_namespaces is not None:
|
|
342
|
-
for key, ns in custom_namespaces.items():
|
|
343
|
-
if key in ns_dict:
|
|
344
|
-
logger.error(
|
|
345
|
-
f"Namespace with key '{key}' exists in MIRIAM. Overwrite namespace!"
|
|
346
|
-
)
|
|
347
|
-
ns_dict[key] = ns
|
|
348
|
-
|
|
349
147
|
if registry_path is not None:
|
|
350
148
|
write_json_cache(
|
|
351
149
|
data=ns_dict,
|