pymetadata 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pymetadata might be problematic. Click here for more details.

Files changed (42) hide show
  1. pymetadata/__init__.py +14 -0
  2. pymetadata/cache.py +52 -0
  3. pymetadata/chebi.py +92 -0
  4. pymetadata/console.py +18 -0
  5. pymetadata/core/__init__.py +1 -0
  6. pymetadata/core/annotation.py +396 -0
  7. pymetadata/core/creator.py +46 -0
  8. pymetadata/core/synonym.py +12 -0
  9. pymetadata/core/xref.py +66 -0
  10. pymetadata/examples/__init__.py +1 -0
  11. pymetadata/examples/cache_path_example.py +15 -0
  12. pymetadata/examples/omex_example.py +46 -0
  13. pymetadata/examples/results/test_from_files.omex +0 -0
  14. pymetadata/examples/results/test_from_omex.omex +0 -0
  15. pymetadata/examples/results/testomex/README.md +3 -0
  16. pymetadata/examples/results/testomex/manifest.xml +9 -0
  17. pymetadata/examples/results/testomex/models/omex_comp.xml +174 -0
  18. pymetadata/examples/results/testomex/models/omex_comp_flat.xml +215 -0
  19. pymetadata/examples/results/testomex/models/omex_minimal.xml +99 -0
  20. pymetadata/examples/test.omex +0 -0
  21. pymetadata/identifiers/__init__.py +1 -0
  22. pymetadata/identifiers/miriam.py +43 -0
  23. pymetadata/identifiers/registry.py +397 -0
  24. pymetadata/log.py +29 -0
  25. pymetadata/metadata/__init__.py +6 -0
  26. pymetadata/metadata/eco.py +15918 -0
  27. pymetadata/metadata/kisao.py +2731 -0
  28. pymetadata/metadata/sbo.py +3754 -0
  29. pymetadata/omex.py +771 -0
  30. pymetadata/omex_v2.py +30 -0
  31. pymetadata/ontologies/__init__.py +1 -0
  32. pymetadata/ontologies/ols.py +214 -0
  33. pymetadata/ontologies/ontology.py +312 -0
  34. pymetadata/py.typed +0 -0
  35. pymetadata/resources/chebi_webservice_wsdl.xml +509 -0
  36. pymetadata/resources/ontologies/README.md +4 -0
  37. pymetadata/resources/templates/ontology_enum.pytemplate +61 -0
  38. pymetadata/unichem.py +190 -0
  39. pymetadata-0.5.0.dist-info/METADATA +154 -0
  40. pymetadata-0.5.0.dist-info/RECORD +42 -0
  41. pymetadata-0.5.0.dist-info/WHEEL +4 -0
  42. pymetadata-0.5.0.dist-info/licenses/LICENSE +7 -0
@@ -0,0 +1,43 @@
1
+ """Module for working with MIRIAM qualifiers."""
2
+
3
+ from enum import Enum
4
+
5
+ from pymetadata import log
6
+
7
+
8
+ logger = log.get_logger(__name__)
9
+
10
+ __all__ = [
11
+ "BQM",
12
+ "BQB",
13
+ ]
14
+
15
+
16
+ class BQM(Enum):
17
+ """MIRIAM model qualifier."""
18
+
19
+ IS = "BQM_IS"
20
+ IS_DESCRIBED_BY = "BQM_IS_DESCRIBED_BY"
21
+ IS_DERIVED_FROM = "BQM_IS_DERIVED_FROM"
22
+ IS_INSTANCE_OF = "BQM_IS_INSTANCE_OF"
23
+ HAS_INSTANCE = "BQM_HAS_INSTANCE"
24
+ UNKNOWN = "BQM_UNKNOWN"
25
+
26
+
27
+ class BQB(Enum):
28
+ """MIRIAM biological qualifier."""
29
+
30
+ IS = "BQB_IS"
31
+ HAS_PART = "BQB_HAS_PART"
32
+ IS_PART_OF = "BQB_IS_PART_OF"
33
+ IS_VERSION_OF = "BQB_IS_VERSION_OF"
34
+ HAS_VERSION = "BQB_HAS_VERSION"
35
+ IS_HOMOLOG_TO = "BQB_IS_HOMOLOG_TO"
36
+ IS_DESCRIBED_BY = "BQB_IS_DESCRIBED_BY"
37
+ IS_ENCODED_BY = "BQB_IS_ENCODED_BY"
38
+ ENCODES = "BQB_ENCODES"
39
+ OCCURS_IN = "BQB_OCCURS_IN"
40
+ HAS_PROPERTY = "BQB_HAS_PROPERTY"
41
+ IS_PROPERTY_OF = "BQB_IS_PROPERTY_OF"
42
+ HAS_TAXON = "BQB_HAS_TAXON"
43
+ UNKNOWN = "BQB_UNKNOWN"
@@ -0,0 +1,397 @@
1
+ """
2
+ Helper tools to work with identifiers registry.
3
+
4
+ https://identifiers.org/
5
+ https://docs.identifiers.org/articles/api.html
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import inspect
11
+ import os
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ import requests
18
+
19
+ import pymetadata
20
+ from pymetadata import log
21
+ from pymetadata.cache import DataclassJSONEncoder, read_json_cache, write_json_cache
22
+
23
+
24
+ logger = log.get_logger(__name__)
25
+
26
+
27
+ @dataclass
28
+ class Resource:
29
+ """Resource."""
30
+
31
+ id: Optional[int]
32
+ providerCode: str
33
+ name: str
34
+ urlPattern: str
35
+ mirId: Optional[str] = field(repr=False)
36
+ description: str = field(repr=False)
37
+ official: bool = field(repr=False)
38
+
39
+ sampleId: Optional[str] = field(repr=False)
40
+ resourceHomeUrl: Optional[str] = field(repr=False)
41
+ institution: dict = field(repr=False)
42
+ location: dict = field(repr=False)
43
+ deprecated: bool = field(repr=False)
44
+ deprecationDate: str = field(repr=False)
45
+ protectedUrls: bool = field(repr=False, default=False)
46
+ renderProtectedLanding: bool = field(repr=False, default=False)
47
+ authHelpUrl: Optional[str] = field(repr=False, default=None)
48
+ authHelpDescription: Optional[str] = field(repr=False, default=None)
49
+
50
+ @classmethod
51
+ def from_dict(cls, d: Dict[str, Any]) -> Resource:
52
+ """Handle additional keyword arguments."""
53
+ return cls(
54
+ **{k: v for k, v in d.items() if k in inspect.signature(cls).parameters}
55
+ )
56
+
57
+
58
+ @dataclass
59
+ class Namespace:
60
+ """Namespace."""
61
+
62
+ id: Optional[str]
63
+ prefix: Optional[str]
64
+ name: str
65
+ pattern: str
66
+ namespaceEmbeddedInLui: bool
67
+ description: str = field(repr=False)
68
+ mirId: Optional[str] = field(repr=False, default=None)
69
+ resources: Optional[List] = field(repr=False, default=None)
70
+ created: Optional[str] = field(repr=False, default=None)
71
+ modified: Optional[str] = field(repr=False, default=None)
72
+ sampleId: Optional[str] = field(repr=False, default=None)
73
+ deprecated: bool = field(repr=False, default=False)
74
+ deprecationDate: Optional[str] = field(repr=False, default=None)
75
+
76
+ @classmethod
77
+ def from_dict(cls, d: Dict[str, Any]) -> Namespace:
78
+ """Handle additional keyword arguments."""
79
+ return cls(
80
+ **{k: v for k, v in d.items() if k in inspect.signature(cls).parameters}
81
+ )
82
+
83
+ def __post_init__(self) -> None:
84
+ """Set resources."""
85
+ if self.resources is not None:
86
+ self.resources = [Resource.from_dict(d) for d in self.resources]
87
+ else:
88
+ self.resources = list()
89
+
90
+
91
+ def ols_namespaces() -> Dict[str, Namespace]:
92
+ """Define Ontologies available from OLS but not in identifiers.org."""
93
+ ols_info: Dict = {
94
+ "deprecated": False,
95
+ "deprecationDate": None,
96
+ "institution": {
97
+ "description": "At EMBL-EBI, we make the "
98
+ "world’s public biological data "
99
+ "freely available to the "
100
+ "scientific community via a "
101
+ "range of services and tools, "
102
+ "perform basic research and "
103
+ "provide professional training "
104
+ "in bioinformatics. \n"
105
+ "We are part of the European "
106
+ "Molecular Biology Laboratory "
107
+ "(EMBL), an international, "
108
+ "innovative and "
109
+ "interdisciplinary research "
110
+ "organisation funded by 26 "
111
+ "member states and two "
112
+ "associate member states.",
113
+ "homeUrl": "https://www.ebi.ac.uk",
114
+ "id": 2,
115
+ "location": {"countryCode": "GB", "countryName": "United Kingdom"},
116
+ "name": "European Bioinformatics Institute",
117
+ "rorId": "https://ror.org/02catss52",
118
+ },
119
+ "location": {"countryCode": "GB", "countryName": "United Kingdom"},
120
+ "official": False,
121
+ "providerCode": "ols",
122
+ }
123
+
124
+ # Custom namespaces for OLS ontology, for simple support
125
+ namespaces = [
126
+ Namespace(
127
+ id=None,
128
+ prefix="snomed",
129
+ pattern=r"^\d+$",
130
+ name="SNOMED",
131
+ description="SNOMED CT or SNOMED Clinical Terms is a systematically organized computer processable collection of medical terms providing codes, terms, synonyms and definitions used in clinical documentation and reporting.",
132
+ namespaceEmbeddedInLui=True,
133
+ ),
134
+ Namespace(
135
+ id=None,
136
+ prefix="omim",
137
+ pattern=r"^MI:\d+$",
138
+ name="OMIM",
139
+ description="Molecular Interactions Controlled Vocabulary",
140
+ namespaceEmbeddedInLui=True,
141
+ ),
142
+ Namespace(
143
+ id=None,
144
+ prefix="dron",
145
+ pattern=r"^DRON:\d+$",
146
+ name="DRON",
147
+ description="The drug ontology",
148
+ namespaceEmbeddedInLui=True,
149
+ ),
150
+ Namespace(
151
+ id=None,
152
+ prefix="cmo",
153
+ pattern=r"^CMO:\d+$",
154
+ name="Chemical methods ontology",
155
+ description="Morphological and physiological measurement records "
156
+ "generated from clinical and model organism research and health programs.",
157
+ namespaceEmbeddedInLui=True,
158
+ ),
159
+ Namespace(
160
+ id=None,
161
+ prefix="chmo",
162
+ pattern=r"^CHMO:\d+$",
163
+ name="Chemical methods ontology",
164
+ description="CHMO, the chemical methods ontology",
165
+ namespaceEmbeddedInLui=True,
166
+ ),
167
+ Namespace(
168
+ id=None,
169
+ prefix="vto",
170
+ pattern=r"^VTO:\d+$",
171
+ name="Vertebrate Taxonomy Ontology",
172
+ description="VTO Vertebrate Taxonomy Ontology",
173
+ namespaceEmbeddedInLui=True,
174
+ ),
175
+ Namespace(
176
+ id=None,
177
+ prefix="opmi",
178
+ pattern=r"^OPMI:\d+$",
179
+ name="Ontology of Precision Medicine and Investigation",
180
+ description="OPMI: Ontology of Precision Medicine and Investigation",
181
+ namespaceEmbeddedInLui=True,
182
+ ),
183
+ Namespace(
184
+ id=None,
185
+ prefix="mondo",
186
+ pattern=r"^MONDO:\d+$",
187
+ name="MONDO",
188
+ description="MONDO",
189
+ namespaceEmbeddedInLui=True,
190
+ ),
191
+ Namespace(
192
+ id=None,
193
+ prefix="atol",
194
+ pattern=r"^ATOL:\d+$",
195
+ name="ATOL",
196
+ description="Animal Trait Ontology for Livestock",
197
+ namespaceEmbeddedInLui=True,
198
+ ),
199
+ Namespace(
200
+ id=None,
201
+ prefix="nbo",
202
+ pattern=r"^NBO:\d+$",
203
+ name="NBO",
204
+ description="Neuro Behavior Ontology",
205
+ namespaceEmbeddedInLui=True,
206
+ ),
207
+ Namespace(
208
+ id=None,
209
+ prefix="scdo",
210
+ pattern=r"^SCDO:\d+$",
211
+ name="Sickle Cell Disease Ontology",
212
+ description="Sickle Cell Disease Ontology",
213
+ namespaceEmbeddedInLui=True,
214
+ ),
215
+ Namespace(
216
+ id=None,
217
+ prefix="fix",
218
+ pattern=r"^FIX:\d+$",
219
+ name="Physico-chemical methods and properties Ontology",
220
+ description="Physico-chemical methods and properties Ontology",
221
+ namespaceEmbeddedInLui=True,
222
+ ),
223
+ Namespace(
224
+ id=None,
225
+ prefix="oba",
226
+ pattern=r"^OBA:\d+$",
227
+ name="Ontology of Biological Attributes",
228
+ description="PubChem is an open chemistry database at the National "
229
+ "Institutes of Health (NIH).",
230
+ namespaceEmbeddedInLui=True,
231
+ ),
232
+ Namespace(
233
+ id=None,
234
+ prefix="mmo",
235
+ pattern=r"^MMO:\d+$",
236
+ name="Measurement method ontology",
237
+ description="Measurement method ontology",
238
+ namespaceEmbeddedInLui=True,
239
+ ),
240
+ ]
241
+
242
+ for ns in namespaces:
243
+ if not ns.resources:
244
+ ns.resources = []
245
+ if not ns.prefix:
246
+ continue
247
+ ns.resources.append(
248
+ Resource(
249
+ id=None,
250
+ name=f"{ns.prefix} through OLS",
251
+ description=f"{ns.prefix} through OLS",
252
+ mirId=None,
253
+ sampleId=None,
254
+ resourceHomeUrl=None,
255
+ urlPattern=f"https://www.ebi.ac.uk/ols4/ontologies/{ns.prefix}/terms?obo_id={ns.prefix.upper()}"
256
+ + ":{$id}",
257
+ **ols_info,
258
+ )
259
+ )
260
+
261
+ return {ns.prefix: ns for ns in namespaces} # type: ignore
262
+
263
+
264
+ def misc_namespaces() -> Dict[str, Namespace]:
265
+ """Define misc namespaces."""
266
+ namespaces = [
267
+ Namespace(
268
+ id="brenda.ligand",
269
+ pattern=r"^\d+$",
270
+ name="BRENDA Ligand",
271
+ prefix=None,
272
+ description="BRENDA Ligand Information",
273
+ namespaceEmbeddedInLui=False,
274
+ ),
275
+ Namespace(
276
+ id="metabolights.compound",
277
+ pattern=r"^MTBLC\d+$",
278
+ name="Metabolights compound",
279
+ prefix=None,
280
+ description="metabolights compound",
281
+ namespaceEmbeddedInLui=False,
282
+ ),
283
+ ]
284
+
285
+ return {ns.id: ns for ns in namespaces} # type: ignore
286
+
287
+
288
+ class Registry:
289
+ """Managing the available annotation information.
290
+
291
+ Registry of meta information.
292
+ """
293
+
294
+ URL = "https://registry.api.identifiers.org/resolutionApi/getResolverDataset"
295
+ CUSTOM_NAMESPACES = {
296
+ **ols_namespaces(),
297
+ **misc_namespaces(),
298
+ }
299
+
300
+ def __init__(
301
+ self,
302
+ cache_duration: int = 24,
303
+ cache: bool = True,
304
+ ):
305
+ """Initialize registry.
306
+
307
+ :param cache_path: Path of cached identifiers.org path
308
+ :param cache_duration: Duration of caching in hours.
309
+ :param cache: boolean flag to stop caching
310
+ """
311
+ self.registry_path = pymetadata.CACHE_PATH / "identifiers_registry.json"
312
+
313
+ # check if update needed
314
+ if cache:
315
+ if os.path.exists(self.registry_path):
316
+ registry_age = (
317
+ time.time() - os.path.getmtime(self.registry_path)
318
+ ) / 3600 # [hr]
319
+ update = registry_age > cache_duration
320
+ else:
321
+ update = True
322
+ else:
323
+ update = True
324
+
325
+ self.ns_dict: Dict[str, Namespace] = (
326
+ self.update() if update else Registry.load_registry(self.registry_path)
327
+ )
328
+
329
+ def update(self) -> Dict[str, Namespace]:
330
+ """Update registry."""
331
+ Registry.update_registry(registry_path=self.registry_path)
332
+ return Registry.load_registry(registry_path=self.registry_path)
333
+
334
+ @staticmethod
335
+ def update_registry(
336
+ custom_namespaces: Dict[str, Namespace] = CUSTOM_NAMESPACES,
337
+ registry_path: Optional[Path] = None,
338
+ ) -> Dict[str, Namespace]:
339
+ """Update registry from identifiers.org webservice."""
340
+ logger.info(f"Update registry from '{Registry.URL}'")
341
+ response = requests.get(Registry.URL)
342
+ namespaces = response.json()["payload"]["namespaces"]
343
+
344
+ ns_dict = {}
345
+ for _, data in enumerate(namespaces):
346
+ ns = Namespace.from_dict(data)
347
+
348
+ # bugfix OLS4 (https://github.com/identifiers-org/identifiers-org.github.io/issues/231)
349
+ if ns.resources:
350
+ for resource in ns.resources:
351
+ if resource.urlPattern.startswith("https://www.ebi.ac.uk/ols/"):
352
+ resource.urlPattern = resource.urlPattern.replace(
353
+ "/ols/", "/ols4/"
354
+ )
355
+ if resource.resourceHomeUrl.startswith(
356
+ "https://www.ebi.ac.uk/ols/"
357
+ ):
358
+ resource.resourceHomeUrl = resource.resourceHomeUrl.replace(
359
+ "/ols/", "/ols4/"
360
+ )
361
+
362
+ ns_dict[ns.prefix] = ns
363
+
364
+ if custom_namespaces is not None:
365
+ for key, ns in custom_namespaces.items():
366
+ if key in ns_dict:
367
+ logger.error(
368
+ f"Namespace with key '{key}' exists in MIRIAM. Overwrite namespace!"
369
+ )
370
+ ns_dict[key] = ns
371
+
372
+ if registry_path is not None:
373
+ write_json_cache(
374
+ data=ns_dict,
375
+ cache_path=registry_path,
376
+ json_encoder=DataclassJSONEncoder,
377
+ )
378
+
379
+ return ns_dict # type: ignore
380
+
381
+ @staticmethod
382
+ def load_registry(registry_path: Path) -> Dict[str, Namespace]:
383
+ """Load namespaces with resources from path."""
384
+ if not registry_path.exists():
385
+ Registry.update_registry(registry_path=registry_path)
386
+
387
+ d = read_json_cache(cache_path=registry_path)
388
+ if not d:
389
+ raise ValueError("Registry could not be loaded from cache.")
390
+
391
+ return {k: Namespace(**v) for k, v in d.items()}
392
+
393
+
394
+ REGISTRY = Registry()
395
+
396
+ if __name__ == "__main__":
397
+ registry = Registry()
pymetadata/log.py ADDED
@@ -0,0 +1,29 @@
1
+ """Module for logging.
2
+
3
+ Using rich for output formating.
4
+ """
5
+
6
+ import logging
7
+
8
+ from rich.logging import RichHandler
9
+
10
+ from pymetadata.console import console
11
+
12
+
13
+ def get_logger(name: str, level: int = logging.INFO) -> logging.Logger:
14
+ """Get new custom logger for name."""
15
+ formatter = logging.Formatter(
16
+ fmt="%(message)s",
17
+ datefmt="[%X]",
18
+ )
19
+
20
+ # handler = logging.StreamHandler()
21
+ handler = RichHandler(
22
+ markup=False, rich_tracebacks=True, show_time=False, console=console
23
+ )
24
+ handler.setFormatter(formatter)
25
+
26
+ logger = logging.getLogger(name)
27
+ logger.setLevel(level)
28
+ logger.addHandler(handler)
29
+ return logger
@@ -0,0 +1,6 @@
1
+ """Package handling ontologies."""
2
+
3
+ from .sbo import SBO, SBOType
4
+ from .kisao import KISAO, KISAOType
5
+
6
+ __all__ = ["SBO", "SBOType", "KISAO", "KISAOType"]