pymetadata 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pymetadata might be problematic. Click here for more details.

pymetadata/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from pathlib import Path
4
4
 
5
5
  __author__ = "Matthias Koenig"
6
- __version__ = "0.5.2"
6
+ __version__ = "0.5.4"
7
7
 
8
8
 
9
9
  program_name: str = "pymetadata"
pymetadata/chebi.py CHANGED
@@ -1,28 +1,19 @@
1
1
  """Module for working with chebi."""
2
2
 
3
3
  from pathlib import Path
4
- from pprint import pprint
5
4
  from typing import Any, Dict, Optional
6
-
7
- from zeep import Client
8
-
5
+ import requests
9
6
 
10
7
  import pymetadata
11
8
  from pymetadata import log
12
9
  from pymetadata.cache import DataclassJSONEncoder, read_json_cache, write_json_cache
10
+ from pymetadata.console import console
13
11
 
14
12
  logger = log.get_logger(__name__)
15
13
 
16
- # FIXME: copy the file to the cache dir
17
- client = Client(str(pymetadata.RESOURCES_DIR / "chebi_webservice_wsdl.xml"))
18
-
19
14
 
20
15
  class ChebiQuery:
21
- """Class to query information from ChEBI.
22
-
23
- An overview over available methods:
24
- python -mzeep https://www.ebi.ac.uk/webservices/chebi/2.0/webservice?wsdl
25
- """
16
+ """Class to query information from ChEBI."""
26
17
 
27
18
  @staticmethod
28
19
  def query(
@@ -52,27 +43,28 @@ class ChebiQuery:
52
43
 
53
44
  # fetch and cache data
54
45
  if not data:
55
- try:
56
- result = client.service.getCompleteEntity(chebi)
57
- # print(result)
58
- except Exception:
46
+ response = requests.get(
47
+ url=f"https://www.ebi.ac.uk/chebi/backend/api/public/compounds/?chebi_ids={chebi}"
48
+ )
49
+ if response.status_code == 200:
50
+ result = response.json()
51
+ else:
59
52
  logger.error(f"CHEBI information could not be retrieved for: {chebi}")
60
53
  return dict()
61
54
 
62
- # parse formula
63
- formula = None
64
- formulae = result["Formulae"]
65
- if formulae:
66
- formula = formulae[0]["data"]
67
-
55
+ result = result[chebi]["data"]
56
+ chemical_data = result["chemical_data"]
57
+ default_structure = result["default_structure"]
68
58
  data = {
69
59
  "chebi": chebi,
70
- "name": result["chebiAsciiName"],
60
+ "name": result["ascii_name"],
71
61
  "definition": result["definition"],
72
- "formula": formula,
73
- "charge": result["charge"],
74
- "mass": result["mass"],
75
- "inchikey": result["inchiKey"],
62
+ "formula": chemical_data["formula"] if chemical_data else None,
63
+ "charge": chemical_data["charge"] if chemical_data else None,
64
+ "mass": chemical_data["mass"] if chemical_data else None,
65
+ "inchikey": default_structure["standard_inchi_key"]
66
+ if default_structure
67
+ else None,
76
68
  }
77
69
 
78
70
  logger.info(f"Write chebi: {chebi_path}")
@@ -86,7 +78,7 @@ class ChebiQuery:
86
78
  if __name__ == "__main__":
87
79
  chebis = ["CHEBI:2668", "CHEBI:138366", "CHEBI:9637", "CHEBI:155897"]
88
80
  for chebi in chebis:
89
- print(chebi)
81
+ console.rule(chebi, align="left", style="bold white")
90
82
  d = ChebiQuery.query(chebi=chebi, cache=False)
91
- pprint(d)
83
+ console.print(d)
92
84
  d = ChebiQuery.query(chebi=chebi, cache=True)
@@ -98,6 +98,8 @@ class RDFAnnotation:
98
98
  f"{resource} does not conform to "
99
99
  f"http(s)://identifiers.org/collection/id or http(s)://identifiers.org/id",
100
100
  )
101
+
102
+ # handle urns
101
103
  elif resource.startswith("urn:miriam:"):
102
104
  match3 = MIRIAM_URN_PATTERN.match(resource)
103
105
  if match3:
@@ -113,13 +115,14 @@ class RDFAnnotation:
113
115
  else:
114
116
  # handle short notation
115
117
  tokens = resource.split("/")
116
- if len(tokens) == 2:
118
+ if len(tokens) > 1:
117
119
  self.collection = tokens[0]
118
120
  self.term = "/".join(tokens[1:])
119
121
  elif len(tokens) == 1 and ":" in tokens[0]:
120
122
  self.collection = tokens[0].split(":")[0].lower()
121
123
  self.term = tokens[0]
122
124
 
125
+ # validation
123
126
  if len(tokens) < 2 and not self.collection:
124
127
  logger.error(
125
128
  f"Resource `{resource}` could not be split in collection and term. "
@@ -9,7 +9,6 @@ from pymetadata.chebi import ChebiQuery
9
9
  pymetadata.CACHE_PATH = Path.home() / ".cache" / "pymetadata"
10
10
 
11
11
  if __name__ == "__main__":
12
-
13
12
  chebis = ["CHEBI:2668", "CHEBI:138366", "CHEBI:9637", "CHEBI:155897"]
14
13
  for chebi in chebis:
15
14
  d = ChebiQuery.query(chebi=chebi, cache=True)
@@ -19,7 +19,7 @@ import requests
19
19
  import pymetadata
20
20
  from pymetadata import log
21
21
  from pymetadata.cache import DataclassJSONEncoder, read_json_cache, write_json_cache
22
-
22
+ from pymetadata.console import console
23
23
 
24
24
  logger = log.get_logger(__name__)
25
25
 
@@ -123,14 +123,6 @@ def ols_namespaces() -> Dict[str, Namespace]:
123
123
 
124
124
  # Custom namespaces for OLS ontology, for simple support
125
125
  namespaces = [
126
- Namespace(
127
- id=None,
128
- prefix="snomed",
129
- pattern=r"^\d+$",
130
- name="SNOMED",
131
- description="SNOMED CT or SNOMED Clinical Terms is a systematically organized computer processable collection of medical terms providing codes, terms, synonyms and definitions used in clinical documentation and reporting.",
132
- namespaceEmbeddedInLui=True,
133
- ),
134
126
  Namespace(
135
127
  id=None,
136
128
  prefix="omim",
@@ -180,14 +172,6 @@ def ols_namespaces() -> Dict[str, Namespace]:
180
172
  description="OPMI: Ontology of Precision Medicine and Investigation",
181
173
  namespaceEmbeddedInLui=True,
182
174
  ),
183
- Namespace(
184
- id=None,
185
- prefix="mondo",
186
- pattern=r"^MONDO:\d+$",
187
- name="MONDO",
188
- description="MONDO",
189
- namespaceEmbeddedInLui=True,
190
- ),
191
175
  Namespace(
192
176
  id=None,
193
177
  prefix="atol",
@@ -237,6 +221,14 @@ def ols_namespaces() -> Dict[str, Namespace]:
237
221
  description="Measurement method ontology",
238
222
  namespaceEmbeddedInLui=True,
239
223
  ),
224
+ Namespace(
225
+ id=None,
226
+ prefix="symp",
227
+ pattern=r"^SYMP:\d+$",
228
+ name="Symptom ontology",
229
+ description="The Symptom Ontology has been developed as a standardized ontology for symptoms of human diseases.",
230
+ namespaceEmbeddedInLui=True,
231
+ ),
240
232
  ]
241
233
 
242
234
  for ns in namespaces:
@@ -344,21 +336,6 @@ class Registry:
344
336
  ns_dict = {}
345
337
  for _, data in enumerate(namespaces):
346
338
  ns = Namespace.from_dict(data)
347
-
348
- # bugfix OLS4 (https://github.com/identifiers-org/identifiers-org.github.io/issues/231)
349
- if ns.resources:
350
- for resource in ns.resources:
351
- if resource.urlPattern.startswith("https://www.ebi.ac.uk/ols/"):
352
- resource.urlPattern = resource.urlPattern.replace(
353
- "/ols/", "/ols4/"
354
- )
355
- if resource.resourceHomeUrl.startswith(
356
- "https://www.ebi.ac.uk/ols/"
357
- ):
358
- resource.resourceHomeUrl = resource.resourceHomeUrl.replace(
359
- "/ols/", "/ols4/"
360
- )
361
-
362
339
  ns_dict[ns.prefix] = ns
363
340
 
364
341
  if custom_namespaces is not None:
@@ -394,4 +371,5 @@ class Registry:
394
371
  REGISTRY = Registry()
395
372
 
396
373
  if __name__ == "__main__":
397
- registry = Registry()
374
+ registry = Registry(cache=False)
375
+ console.print(registry.ns_dict)