pymetadata 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pymetadata might be problematic. Click here for more details.

Files changed (42) hide show
  1. pymetadata/__init__.py +14 -0
  2. pymetadata/cache.py +52 -0
  3. pymetadata/chebi.py +92 -0
  4. pymetadata/console.py +18 -0
  5. pymetadata/core/__init__.py +1 -0
  6. pymetadata/core/annotation.py +396 -0
  7. pymetadata/core/creator.py +46 -0
  8. pymetadata/core/synonym.py +12 -0
  9. pymetadata/core/xref.py +66 -0
  10. pymetadata/examples/__init__.py +1 -0
  11. pymetadata/examples/cache_path_example.py +15 -0
  12. pymetadata/examples/omex_example.py +46 -0
  13. pymetadata/examples/results/test_from_files.omex +0 -0
  14. pymetadata/examples/results/test_from_omex.omex +0 -0
  15. pymetadata/examples/results/testomex/README.md +3 -0
  16. pymetadata/examples/results/testomex/manifest.xml +9 -0
  17. pymetadata/examples/results/testomex/models/omex_comp.xml +174 -0
  18. pymetadata/examples/results/testomex/models/omex_comp_flat.xml +215 -0
  19. pymetadata/examples/results/testomex/models/omex_minimal.xml +99 -0
  20. pymetadata/examples/test.omex +0 -0
  21. pymetadata/identifiers/__init__.py +1 -0
  22. pymetadata/identifiers/miriam.py +43 -0
  23. pymetadata/identifiers/registry.py +397 -0
  24. pymetadata/log.py +29 -0
  25. pymetadata/metadata/__init__.py +6 -0
  26. pymetadata/metadata/eco.py +15918 -0
  27. pymetadata/metadata/kisao.py +2731 -0
  28. pymetadata/metadata/sbo.py +3754 -0
  29. pymetadata/omex.py +771 -0
  30. pymetadata/omex_v2.py +30 -0
  31. pymetadata/ontologies/__init__.py +1 -0
  32. pymetadata/ontologies/ols.py +214 -0
  33. pymetadata/ontologies/ontology.py +312 -0
  34. pymetadata/py.typed +0 -0
  35. pymetadata/resources/chebi_webservice_wsdl.xml +509 -0
  36. pymetadata/resources/ontologies/README.md +4 -0
  37. pymetadata/resources/templates/ontology_enum.pytemplate +61 -0
  38. pymetadata/unichem.py +190 -0
  39. pymetadata-0.5.0.dist-info/METADATA +154 -0
  40. pymetadata-0.5.0.dist-info/RECORD +42 -0
  41. pymetadata-0.5.0.dist-info/WHEEL +4 -0
  42. pymetadata-0.5.0.dist-info/licenses/LICENSE +7 -0
pymetadata/omex_v2.py ADDED
@@ -0,0 +1,30 @@
1
+ """COMBINE archive version 2.
2
+
3
+ See https://docs.google.com/document/d/1-UDgY5lQ6tv4mZILZzol-PvCoAYW8yr2Ydn1OxcHMjM/edit#
4
+ """
5
+
6
+ from typing import List, Optional
7
+
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class Creator(BaseModel):
12
+ """Creator version 2."""
13
+
14
+ name: str
15
+ affiliation: Optional[str]
16
+ orcid: Optional[str]
17
+
18
+
19
+ class Manifest(BaseModel):
20
+ """Manifest version 2."""
21
+
22
+ title: str
23
+ description: str
24
+ access_right: str
25
+ access_conditions: str
26
+ license: str
27
+ creators: List[Creator]
28
+ version: Optional[str]
29
+ doi: Optional[str]
30
+ keywords: List[str]
@@ -0,0 +1 @@
1
+ """Ontologies."""
@@ -0,0 +1,214 @@
1
+ """Lookup of ontology information from the ontology lookup service (OLS).
2
+
3
+ This uses the EMBL-EBI Ontology Lookup Service
4
+ https://www.ebi.ac.uk/ols4
5
+
6
+ """
7
+
8
+ import urllib.parse
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ import requests
14
+
15
+ import pymetadata
16
+ from pymetadata import log
17
+ from pymetadata.cache import read_json_cache, write_json_cache
18
+ from pymetadata.identifiers.registry import Registry
19
+
20
+
21
+ registry = Registry()
22
+ logger = log.get_logger(__name__)
23
+
24
+
25
+ @dataclass
26
+ class OLSOntology:
27
+ """OLSOntology."""
28
+
29
+ name: str
30
+ iri_pattern: Optional[str] = field(default=None)
31
+
32
+ def __post_init__(self) -> None:
33
+ """Fix IRI patterns."""
34
+ if self.iri_pattern is None:
35
+ self.iri_pattern = (
36
+ f"http://purl.obolibrary.org/obo/{self.name.upper()}" + "_{$Id}"
37
+ )
38
+
39
+
40
+ ONTOLOGIES = [
41
+ # ontologies which are used in most projects
42
+ OLSOntology(name="sbo", iri_pattern="http://biomodels.net/SBO/SBO_{$Id}"),
43
+ OLSOntology(
44
+ name="ncbitaxon", iri_pattern="http://purl.obolibrary.org/obo/NCBITaxon_{$Id}"
45
+ ),
46
+ OLSOntology(name="bto"),
47
+ OLSOntology(name="chebi"),
48
+ OLSOntology(name="cmo"),
49
+ OLSOntology(name="chmo"),
50
+ OLSOntology(name="doid"),
51
+ OLSOntology(name="efo", iri_pattern="http://www.ebi.ac.uk/efo/EFO_{$Id}"),
52
+ OLSOntology(name="fix"),
53
+ OLSOntology(name="fma"),
54
+ OLSOntology(name="foodon"),
55
+ OLSOntology(name="go"),
56
+ OLSOntology(name="hp"),
57
+ OLSOntology(name="nbo"),
58
+ OLSOntology(name="obi"),
59
+ OLSOntology(name="mondo"),
60
+ OLSOntology(name="ncit"),
61
+ OLSOntology(name="mp"),
62
+ OLSOntology(name="oba"),
63
+ OLSOntology(name="opmi"),
64
+ OLSOntology(name="omit"),
65
+ OLSOntology(
66
+ name="sio", iri_pattern="http://semanticscience.org/resource/SIO_{$Id}"
67
+ ),
68
+ OLSOntology(name="vto"),
69
+ ]
70
+
71
+
72
+ class OLSQuery:
73
+ """Handling OLS queries."""
74
+
75
+ url_term_query = "https://www.ebi.ac.uk/ols4/api/ontologies/{}/terms/{}"
76
+
77
+ def __init__(
78
+ self,
79
+ ontologies: List[OLSOntology],
80
+ cache_path: Optional[Path] = None,
81
+ cache: Optional[bool] = None,
82
+ ):
83
+ """Initialize OLSQuery."""
84
+ self.ontologies: Dict[str, OLSOntology] = {
85
+ ontology.name: ontology for ontology in ontologies
86
+ }
87
+ if not cache_path:
88
+ cache_path = pymetadata.CACHE_PATH
89
+ if not cache:
90
+ cache = pymetadata.CACHE_USE
91
+
92
+ self.cache_path = cache_path / "ols"
93
+ self.cache = cache
94
+
95
+ if cache and not self.cache_path.exists():
96
+ self.cache_path.mkdir(parents=True)
97
+
98
+ def get_iri(self, ontology: str, term: str) -> str:
99
+ """Get IRI information."""
100
+ ols_ontology: Optional[OLSOntology] = self.ontologies.get(ontology, None)
101
+ # remove prefix if existing
102
+ if term.startswith(ontology.upper()):
103
+ term = term.replace(f"{ontology.upper()}:", "")
104
+
105
+ if ols_ontology is None:
106
+ logger.warning(
107
+ f"Ontology '{ontology}' is not registered, using default iri."
108
+ )
109
+ iri = f"http://purl.obolibrary.org/obo/{ontology.upper()}_{term}"
110
+ else:
111
+ if not ols_ontology.iri_pattern:
112
+ raise ValueError(f"No iri pattern for `{ols_ontology}")
113
+ iri = ols_ontology.iri_pattern.replace("{$Id}", term)
114
+
115
+ return iri
116
+
117
+ def query_ols(self, ontology: Optional[str], term: Optional[str]) -> Dict:
118
+ """Query the ontology lookup service."""
119
+ if not ontology:
120
+ return {"errors": [], "warnings": ["No collection."]}
121
+ if not term:
122
+ return {"errors": [], "warnings": [f"No term: '{ontology}'"]}
123
+
124
+ namespace = registry.ns_dict.get(ontology)
125
+ ols_pattern = None
126
+ if namespace and namespace.resources:
127
+ for ns_resource in namespace.resources:
128
+ if ns_resource.providerCode == "ols":
129
+ ols_pattern = ns_resource.urlPattern
130
+ break
131
+
132
+ if not ols_pattern:
133
+ return {
134
+ "errors": [],
135
+ "warnings": [f"'{ontology}' is not on OLS."],
136
+ }
137
+
138
+ if ontology == "taxonomy":
139
+ ontology = "ncbitaxon"
140
+
141
+ iri = self.get_iri(ontology=ontology, term=term)
142
+
143
+ # double urlencode iri for OLS
144
+ urliri = urllib.parse.quote(iri, safe="")
145
+ urliri = urllib.parse.quote(urliri, safe="")
146
+ cache_path = self.cache_path / f"{urliri}.json"
147
+ data: Dict[str, Any] = {}
148
+ if self.cache:
149
+ try:
150
+ data = read_json_cache(cache_path=cache_path)
151
+ except IOError:
152
+ # cache does not exist
153
+ pass
154
+
155
+ if not data:
156
+ url = self.url_term_query.format(ontology, urliri)
157
+ logger.info(f"Query: {url}")
158
+ response = requests.get(url)
159
+
160
+ if response.status_code != 200:
161
+ data = {
162
+ "errors": [f"{response.status_code} response for: '{url}'"],
163
+ "warnings": [],
164
+ }
165
+ else:
166
+ # print(response.text)
167
+ data = response.json()
168
+ if not data or "error" in data:
169
+ error_msg = (
170
+ f"Error in OLS query <{ontology}|{term}> at {url}: {data}"
171
+ )
172
+ logger.error(error_msg)
173
+ data = {
174
+ "errors": [error_msg],
175
+ "warnings": [],
176
+ }
177
+ return data
178
+ else:
179
+ data["errors"] = []
180
+ data["warnings"] = []
181
+ if self.cache:
182
+ write_json_cache(data=data, cache_path=cache_path) # type: ignore
183
+
184
+ return data
185
+
186
+ def process_response(self, term: Dict) -> Dict[str, Any]:
187
+ """Process the response dictionary."""
188
+ data = {
189
+ "errors": term["errors"],
190
+ "warnings": term["warnings"],
191
+ }
192
+
193
+ label = term.get("label", None)
194
+ description = term.get("description", None)
195
+ # fallback description
196
+ if description is None:
197
+ annotation = term.get("annotation")
198
+ if annotation:
199
+ definition = annotation.get("definition")
200
+ if definition:
201
+ description = definition[0]
202
+
203
+ if description and isinstance(description, list):
204
+ description = description[0]
205
+ synonyms = term.get("obo_synonym", [])
206
+ xrefs = term.get("obo_xref", [])
207
+
208
+ return {
209
+ **data,
210
+ "label": label,
211
+ "description": description,
212
+ "synonyms": synonyms,
213
+ "xrefs": xrefs,
214
+ }
@@ -0,0 +1,312 @@
1
+ """Ontology support.
2
+
3
+ This file allows to download the ontologies for local use.
4
+ Special ontologies are provided as enums.
5
+
6
+ Uses the OWL links provided on OLS4 to download the ontologies.
7
+ """
8
+
9
+ import gzip
10
+ import importlib
11
+ import re
12
+ import shutil
13
+ import tempfile
14
+ import warnings
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ from dataclasses import dataclass
17
+ from enum import Enum
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional, Union
20
+
21
+ import pronto
22
+ import requests
23
+ from jinja2 import Template
24
+ from pronto.ontology import Ontology as ProntoOntology
25
+ from pronto.relationship import Relationship as ProntoRelationship
26
+ from pronto.term import Term as ProntoTerm
27
+
28
+ from pymetadata import ENUM_DIR, RESOURCES_DIR, log
29
+
30
+ logger = log.get_logger(__name__)
31
+
32
+
33
+ class OntologyFormat(str, Enum):
34
+ """Formats for ontologies."""
35
+
36
+ OBO = "obo"
37
+ OWL = "owl"
38
+
39
+
40
+ @dataclass
41
+ class OntologyFile:
42
+ """Definition file for ontology."""
43
+
44
+ id: str
45
+ name: str
46
+ format: OntologyFormat
47
+ source: str
48
+ bioportal: bool
49
+ ols: bool
50
+
51
+ @property
52
+ def path(self) -> Path:
53
+ """Path of ontology file."""
54
+ return RESOURCES_DIR / "ontologies" / f"{self.id.lower()}.{self.format}.gz"
55
+
56
+ @property
57
+ def filename(self) -> str:
58
+ """Filename of ontology file.
59
+
60
+ :return: ontolgoy filename
61
+ :rtype: str
62
+ """
63
+ data = str(self.path)
64
+ print(data)
65
+ return data
66
+
67
+
68
+ _ontology_files: List[OntologyFile] = [
69
+ OntologyFile(
70
+ "BTO",
71
+ name="The BRENDA Tissue Ontology (BTO)",
72
+ format=OntologyFormat.OWL,
73
+ source="http://purl.obolibrary.org/obo/bto.owl",
74
+ bioportal=False,
75
+ ols=True,
76
+ ),
77
+ OntologyFile(
78
+ "CHEBI",
79
+ name="Chemical Entities of Biological Interest Ontology",
80
+ format=OntologyFormat.OWL,
81
+ source="http://purl.obolibrary.org/obo/chebi.owl",
82
+ bioportal=True,
83
+ ols=True,
84
+ ),
85
+ OntologyFile(
86
+ "FMA",
87
+ name="Foundational Model of Anatomy",
88
+ format=OntologyFormat.OWL,
89
+ source="http://purl.obolibrary.org/obo/fma.owl",
90
+ bioportal=True,
91
+ ols=True,
92
+ ),
93
+ OntologyFile(
94
+ "ECO",
95
+ name="Evidence & Conclusion Ontology (ECO)",
96
+ format=OntologyFormat.OWL,
97
+ source="http://purl.obolibrary.org/obo/eco.owl",
98
+ bioportal=True,
99
+ ols=True,
100
+ ),
101
+ OntologyFile(
102
+ "GO",
103
+ name="Gene Ontology",
104
+ format=OntologyFormat.OWL,
105
+ source="http://purl.obolibrary.org/obo/go/extensions/go-plus.owl",
106
+ bioportal=True,
107
+ ols=True,
108
+ ),
109
+ OntologyFile(
110
+ "KISAO",
111
+ name="Kinetic Simulation Algorithm Ontology",
112
+ format=OntologyFormat.OWL,
113
+ # source="https://raw.githubusercontent.com/SED-ML/KiSAO/deploy/kisao.owl",
114
+ source="https://raw.githubusercontent.com/SED-ML/KiSAO/dev/kisao.owl",
115
+ bioportal=True,
116
+ ols=True,
117
+ ),
118
+ OntologyFile(
119
+ "SBO",
120
+ name="Systems Biology Ontology",
121
+ format=OntologyFormat.OWL,
122
+ source="https://raw.githubusercontent.com/EBI-BioModels/SBO/master/SBO_OWL.owl",
123
+ bioportal=True,
124
+ ols=True,
125
+ ),
126
+ OntologyFile(
127
+ "NCIT",
128
+ name="National Cancer Institute Thesaurus",
129
+ format=OntologyFormat.OWL,
130
+ source="http://purl.obolibrary.org/obo/ncit.owl",
131
+ bioportal=True,
132
+ ols=True,
133
+ ),
134
+ # OntologyFile(
135
+ # "NCBITAXON",
136
+ # name="NCBI organismal classification",
137
+ # format=OntologyFormat.OWL,
138
+ # source=OLS_BASE_URL + "ncbitaxon",
139
+ # bioportal=False,
140
+ # ols=True,
141
+ # ),
142
+ ]
143
+
144
+
145
+ ontology_files: Dict[str, OntologyFile] = {
146
+ ontology.id: ontology for ontology in _ontology_files
147
+ }
148
+
149
+
150
+ def update_ontology_file(ofile: OntologyFile) -> None:
151
+ """Download latest versions of ontologies."""
152
+
153
+ oid = ofile.id
154
+
155
+ logger.info(f"Update ontology: `{oid}`")
156
+
157
+ with tempfile.TemporaryDirectory() as tmp_dir:
158
+ # download in tmp location
159
+ owl_path = Path(tmp_dir) / f"{oid.lower()}.owl"
160
+ url = ofile.source
161
+ with requests.get(url, stream=True) as r:
162
+ r.raise_for_status()
163
+ with open(owl_path, "wb") as f:
164
+ for chunk in r.iter_content(chunk_size=8192):
165
+ f.write(chunk)
166
+
167
+ # only store gzip version
168
+ with open(owl_path, "rb") as f_in:
169
+ gzip_path = RESOURCES_DIR / "ontologies" / f"{oid.lower()}.owl.gz"
170
+ with gzip.open(gzip_path, "wb") as f_out:
171
+ shutil.copyfileobj(f_in, f_out)
172
+
173
+
174
+ def update_ontology_files() -> None:
175
+ """Download latest ontology files."""
176
+ with ThreadPoolExecutor(max_workers=4) as pool:
177
+ for ofile in ontology_files.values():
178
+ pool.submit(update_ontology_file, ofile)
179
+
180
+
181
+ class Ontology:
182
+ """Ontology."""
183
+
184
+ _ontology: Optional[ProntoOntology] = None
185
+
186
+ def __init__(self, ontology_id: str):
187
+ """Construct ontology."""
188
+ ontology_file = ontology_files[ontology_id]
189
+ logger.info(f"Read ontology: `{ontology_id}`")
190
+
191
+ # read ontology with pronto
192
+ with warnings.catch_warnings():
193
+ warnings.simplefilter("ignore", pronto.utils.warnings.SyntaxWarning)
194
+ warnings.simplefilter("ignore", pronto.utils.warnings.NotImplementedWarning)
195
+ self.__class__._ontology = pronto.Ontology(ontology_file.filename)
196
+
197
+ def get_pronto_ontology(self) -> Optional[ProntoOntology]:
198
+ """Get a proto object for the ontology.
199
+
200
+ :return: `pronto.Ontology`: pronto object for the ontology
201
+ :rtype: [type]
202
+ """
203
+ return self._ontology
204
+
205
+
206
+ def create_ontology_enum(ontology_id: str, pattern: str) -> None:
207
+ """Create enum of the ontology."""
208
+
209
+ logger.info(f"Create enum: `{ontology_id}`")
210
+
211
+ def name_to_variable(name: str) -> Optional[str]:
212
+ """Clean string to python variable name."""
213
+ if name is None:
214
+ return None
215
+ name = re.sub(r"\W|^(?=\d)", "_", name)
216
+ return name.upper()
217
+
218
+ # load ontology
219
+ terms: Dict[str, Dict] = {}
220
+ ontology: Ontology = Ontology(ontology_id=ontology_id)
221
+
222
+ names = set()
223
+ pronto_term: Union[ProntoTerm, ProntoRelationship]
224
+
225
+ if not ontology._ontology:
226
+ raise ValueError(f"No Pronto Ontology for `{ontology_id}`")
227
+
228
+ for term_id in ontology._ontology:
229
+ pronto_term = ontology._ontology[term_id]
230
+
231
+ pronto_name: Union[str, None, Any] = pronto_term.name
232
+ if not isinstance(pronto_name, str):
233
+ logger.warning(f"Pronto name is none: `{pronto_term}`")
234
+ continue
235
+
236
+ var_name: Optional[str] = name_to_variable(pronto_name)
237
+ if var_name in names:
238
+ logger.error(f"Duplicate name in ontology: `{var_name}`")
239
+ continue
240
+ else:
241
+ names.add(var_name)
242
+ term_id = pronto_term.id
243
+ # fix the ids
244
+ if ontology_id == "KISAO":
245
+ term_id = term_id.replace("http://www.biomodels.net/kisao/KISAO#", "")
246
+ if ontology_id == "SBO":
247
+ term_id = term_id.replace("http://biomodels.net/SBO/", "")
248
+
249
+ if ":" in term_id:
250
+ term_id = term_id.replace(":", "_")
251
+
252
+ terms[term_id] = {
253
+ "id": term_id,
254
+ "var_name": var_name,
255
+ "name": pronto_name.replace('"', "'"),
256
+ "definition": pronto_term.definition,
257
+ }
258
+ terms_sorted = {}
259
+ for key in sorted(terms.keys()):
260
+ terms_sorted[key] = terms[key]
261
+
262
+ with open(
263
+ RESOURCES_DIR / "templates" / "ontology_enum.pytemplate", "r"
264
+ ) as f_template:
265
+ template = Template(
266
+ f_template.read(),
267
+ trim_blocks=True,
268
+ lstrip_blocks=True,
269
+ )
270
+
271
+ context = {
272
+ "ontology_id": ontology_id,
273
+ "terms": terms_sorted,
274
+ "pattern": pattern,
275
+ }
276
+ module_str = template.render(**context)
277
+ # print(module_str)
278
+ path_module = ENUM_DIR / f"{ontology_id.lower()}.py"
279
+ print(path_module)
280
+ with open(path_module, "w") as f_py:
281
+ f_py.write(module_str)
282
+
283
+
284
+ def try_ontology_import(ontology_id: str) -> None:
285
+ """Try import of created module."""
286
+ # try to import
287
+ importlib.import_module(f"pymetadata.metadata.{ontology_id.lower()}")
288
+
289
+
290
+ if __name__ == "__main__":
291
+ # download latest versions
292
+ update_ontology_files()
293
+
294
+ # test loading of OWL files
295
+ # ofile: OntologyFile
296
+ # for oid, ofile in ontology_files.items():
297
+ # console.rule(style="white")
298
+ # ontology = Ontology(ontology_id=oid)
299
+ # console.print(ontology)
300
+ # ontology = Ontology(ontology_id="CHEBI")
301
+
302
+ # convert to python module
303
+ create_ontology_enum("SBO", r"^SBO_\d{7}$")
304
+ create_ontology_enum("KISAO", r"^KISAO_\d{7}$")
305
+ create_ontology_enum("ECO", r"^ECO_\d{7}$")
306
+
307
+ try_ontology_import("SBO")
308
+ try_ontology_import("KISAO")
309
+ try_ontology_import("ECO")
310
+
311
+ # for ontology_id in ontology_files:
312
+ # create_ontology_enum(ontology_id)
pymetadata/py.typed ADDED
File without changes