metahq-core 0.1.2__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metahq_core/__init__.py +1 -1
- metahq_core/curations/annotation_converter.py +5 -5
- metahq_core/curations/annotations.py +361 -151
- metahq_core/curations/index.py +104 -43
- metahq_core/curations/labels.py +259 -128
- metahq_core/curations/propagator.py +62 -85
- metahq_core/export/__init__.py +0 -0
- metahq_core/export/annotations.py +125 -59
- metahq_core/export/labels.py +128 -70
- metahq_core/logger.py +11 -18
- metahq_core/query.py +346 -241
- metahq_core/{ontology/loader.py → relations_loader.py} +2 -1
- metahq_core/search.py +37 -14
- metahq_core/util/io.py +109 -46
- metahq_core/util/supported.py +16 -5
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/METADATA +13 -6
- metahq_core-1.0.0rc1.dist-info/RECORD +30 -0
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/WHEEL +1 -1
- metahq_core-1.0.0rc1.dist-info/licenses/LICENSE +28 -0
- metahq_core/ontology/base.py +0 -376
- metahq_core/ontology/graph.py +0 -252
- metahq_core-0.1.2.dist-info/RECORD +0 -30
- /metahq_core/{ontology → curations}/__init__.py +0 -0
metahq_core/ontology/base.py
DELETED
|
@@ -1,376 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Operations for ontologies. Currently, only UBERON, CL, and MONDO are supported.
|
|
3
|
-
|
|
4
|
-
Authors: Parker Hicks, Hao Yuan
|
|
5
|
-
Date: 2025-01-16
|
|
6
|
-
|
|
7
|
-
Last updated: 2025-09-01 by Parker Hicks
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import numpy as np
|
|
11
|
-
import polars as pl
|
|
12
|
-
|
|
13
|
-
from metahq_core.util.alltypes import FilePath
|
|
14
|
-
from metahq_core.util.helpers import reverse_dict
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Ontology:
|
|
18
|
-
"""
|
|
19
|
-
This class contains functionalities for working with ontologies. Currently only supports
|
|
20
|
-
ontologies stored in obo files.
|
|
21
|
-
|
|
22
|
-
Attributes
|
|
23
|
-
----------
|
|
24
|
-
ontology: str
|
|
25
|
-
Name of the ontology (e.g., mondo, MONDO, uberon, CL).
|
|
26
|
-
|
|
27
|
-
entries: list
|
|
28
|
-
Entries from the ontology that begin with the pattern [Term].
|
|
29
|
-
|
|
30
|
-
_class_dict: dict
|
|
31
|
-
Term ID to term name mapping (e.g., {MONDO:0006858: 'mouth disorder'}).
|
|
32
|
-
|
|
33
|
-
Methods
|
|
34
|
-
-------
|
|
35
|
-
get_class_dict()
|
|
36
|
-
Retrieves ID: name pairs for terms in the ontology.
|
|
37
|
-
|
|
38
|
-
xref()
|
|
39
|
-
Retrives cross referenced terms from another ontology that are equivalent to terms
|
|
40
|
-
in self.ontology.
|
|
41
|
-
|
|
42
|
-
mesh_to_mondo()
|
|
43
|
-
Given a MESH ID and an xref map, will find the corresponding MONDO ID.
|
|
44
|
-
|
|
45
|
-
read()
|
|
46
|
-
Opens and reads the ontology file.
|
|
47
|
-
|
|
48
|
-
Static Methods
|
|
49
|
-
--------------
|
|
50
|
-
get_entries()
|
|
51
|
-
Finds ontology term entries from lines in the ontology file.
|
|
52
|
-
|
|
53
|
-
map_terms()
|
|
54
|
-
Given a list of terms from another ontology, will map those to self.ontology terms.
|
|
55
|
-
|
|
56
|
-
obo_reader()
|
|
57
|
-
Opens and read the .obo file.
|
|
58
|
-
|
|
59
|
-
Class Methods
|
|
60
|
-
-------------
|
|
61
|
-
mapping_func()
|
|
62
|
-
Assigns the correct mapping function for `map_terms`.
|
|
63
|
-
|
|
64
|
-
from_obo()
|
|
65
|
-
Initializes the class by loading the ontology from an .obo file.
|
|
66
|
-
|
|
67
|
-
Example
|
|
68
|
-
-------
|
|
69
|
-
>>> from ontology.ontology import Ontology
|
|
70
|
-
>>> op = Ontology.from_obo("mondo.obo", ontology="mondo")
|
|
71
|
-
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
def __init__(self, ontology: str):
|
|
75
|
-
self.ontology = ontology
|
|
76
|
-
self.entries = []
|
|
77
|
-
self._class_dict = {}
|
|
78
|
-
|
|
79
|
-
def get_class_dict(self, verbose=False) -> None:
|
|
80
|
-
"""
|
|
81
|
-
Fills the _class_dict attribute with id: name pairs.
|
|
82
|
-
|
|
83
|
-
:param verbose: If True, will print redundant terms.
|
|
84
|
-
"""
|
|
85
|
-
for entry in self.entries:
|
|
86
|
-
lines = entry.split("\n")
|
|
87
|
-
for line in lines:
|
|
88
|
-
line = line.rstrip()
|
|
89
|
-
if line.startswith("id:"):
|
|
90
|
-
_id = line.split("id: ")[1]
|
|
91
|
-
elif line.startswith("name:"):
|
|
92
|
-
name = line.split("name: ")[1]
|
|
93
|
-
if _id not in self._class_dict:
|
|
94
|
-
self._class_dict[_id] = name.lower()
|
|
95
|
-
elif verbose:
|
|
96
|
-
print(f"{_id} showing up more than once")
|
|
97
|
-
|
|
98
|
-
def xref(self, ref: str, reverse=False, verbose=False) -> dict:
|
|
99
|
-
"""
|
|
100
|
-
Finds cross references to other ontology terms within self.ontology.
|
|
101
|
-
|
|
102
|
-
If there are cross references, this function will just choose the first
|
|
103
|
-
one it comes across.
|
|
104
|
-
|
|
105
|
-
:param ref: The cross referenced ontology (e.g., MESH).
|
|
106
|
-
:param reverse: If True, will return {xref: term} instead of {term: xref}.
|
|
107
|
-
:param verbose: If True, will print redundant cross references.
|
|
108
|
-
:returns _map: Mapping between terms from self.ontology and the xref ontology.
|
|
109
|
-
|
|
110
|
-
Example
|
|
111
|
-
-------
|
|
112
|
-
>>> from ontology.ontology import Ontology
|
|
113
|
-
>>> op = Ontology.from_obo("mondo.obo", ontology="mondo")
|
|
114
|
-
>>> op.xref("MESH").pop("MONDO:0100340")
|
|
115
|
-
MESH:C565561
|
|
116
|
-
|
|
117
|
-
>>> op.xref("MESH", verbose=True)
|
|
118
|
-
MESH:D006966 showing up more than once (duplicate: MONDO:0024305)
|
|
119
|
-
MESH:C537897 showing up more than once (duplicate: MONDO:0043176)
|
|
120
|
-
...
|
|
121
|
-
MESH:D000086382 showing up more than once (duplicate: MONDO:0100096)
|
|
122
|
-
|
|
123
|
-
"""
|
|
124
|
-
_map = {}
|
|
125
|
-
for entry in self.entries:
|
|
126
|
-
lines = entry.split("\n")
|
|
127
|
-
for line in lines:
|
|
128
|
-
line = line.rstrip()
|
|
129
|
-
if line.startswith("id:"):
|
|
130
|
-
_id = line.split("id: ")[1]
|
|
131
|
-
elif line.startswith(f"xref: {ref}:"):
|
|
132
|
-
__id = line.split(" ")[1]
|
|
133
|
-
if _id not in _map:
|
|
134
|
-
_map[_id] = __id
|
|
135
|
-
elif verbose:
|
|
136
|
-
print(f"{__id} showing up more than once (duplicate: {_id})")
|
|
137
|
-
if reverse:
|
|
138
|
-
_map = reverse_dict(_map)
|
|
139
|
-
|
|
140
|
-
return _map
|
|
141
|
-
|
|
142
|
-
def map_terms(self, terms: np.ndarray, _from: str, _to: str) -> dict:
|
|
143
|
-
"""
|
|
144
|
-
Maps term IDs from a list of terms to another ontology.
|
|
145
|
-
|
|
146
|
-
:param terms: Array of term IDs to map.
|
|
147
|
-
:param _from: The ontology type of the IDs in terms.
|
|
148
|
-
:param _to: The ontology type you want to map those terms to.
|
|
149
|
-
:returns mapped: Mapping between terms in terms to _to.
|
|
150
|
-
|
|
151
|
-
Example
|
|
152
|
-
-------
|
|
153
|
-
>>> from ontology.ontology import Ontology
|
|
154
|
-
>>> op = Ontology.from_obo("mondo.obo", ontology="mondo")
|
|
155
|
-
>>> op.map_terms(mesh, _from="MESH", _to="MONDO").pop("MESH:D007680")
|
|
156
|
-
MONDO:0002367
|
|
157
|
-
|
|
158
|
-
>>> op.map_terms(mesh, _from="MONDO", _to="MESH").pop("MONDO:0002367")
|
|
159
|
-
MESH:D007680
|
|
160
|
-
|
|
161
|
-
"""
|
|
162
|
-
if _to == self.ontology:
|
|
163
|
-
reverse = True
|
|
164
|
-
ref = _from
|
|
165
|
-
else:
|
|
166
|
-
reverse = False
|
|
167
|
-
ref = _to
|
|
168
|
-
|
|
169
|
-
_map = self.xref(ref=ref, reverse=reverse)
|
|
170
|
-
|
|
171
|
-
mapped = {}
|
|
172
|
-
mapper = self.mapping_func(_from, _to, self.ontology)
|
|
173
|
-
for term in terms:
|
|
174
|
-
mapped[term] = mapper(term, _map)
|
|
175
|
-
|
|
176
|
-
return mapped
|
|
177
|
-
|
|
178
|
-
def read(self, file: FilePath, reader="obo") -> None:
|
|
179
|
-
"""
|
|
180
|
-
Loads and reads an ontology file.
|
|
181
|
-
|
|
182
|
-
:param file: Path to ontology file.
|
|
183
|
-
:param reader: File type to read from.
|
|
184
|
-
|
|
185
|
-
Example
|
|
186
|
-
-------
|
|
187
|
-
>>> from ontology.ontology import Ontology
|
|
188
|
-
>>> op = Ontology(ontology="mondo")
|
|
189
|
-
>>> op.read("mondo.obo", reader="obo")
|
|
190
|
-
>>> op.entries[0]
|
|
191
|
-
[Term]
|
|
192
|
-
id: MONDO:0000001
|
|
193
|
-
name: disease
|
|
194
|
-
def: "A diease is a disposition to ..."
|
|
195
|
-
...
|
|
196
|
-
property_value: exactMatch Orphannet:377788
|
|
197
|
-
|
|
198
|
-
"""
|
|
199
|
-
if isinstance(reader, str):
|
|
200
|
-
if reader == "obo":
|
|
201
|
-
_reader = self.obo_reader
|
|
202
|
-
loaded = _reader(file)
|
|
203
|
-
self.entries = self.get_entries(loaded)
|
|
204
|
-
else:
|
|
205
|
-
raise ValueError(
|
|
206
|
-
f"Unknown reader {reader!r}, available options are [obo]",
|
|
207
|
-
)
|
|
208
|
-
|
|
209
|
-
def id_map(self, fmt: str = "polars") -> dict[str, str] | pl.DataFrame:
|
|
210
|
-
"""Returns class_dict as specified data structure."""
|
|
211
|
-
supported = ["polars", "dict"]
|
|
212
|
-
|
|
213
|
-
if fmt not in supported:
|
|
214
|
-
raise ValueError(f"Expected struct in {supported}, got {fmt}.")
|
|
215
|
-
|
|
216
|
-
if fmt == "polars":
|
|
217
|
-
return self._id_map_to_polars()
|
|
218
|
-
|
|
219
|
-
return self.class_dict
|
|
220
|
-
|
|
221
|
-
def _id_map_to_polars(self):
|
|
222
|
-
"""Convert self.class_dict to polars DataFrame."""
|
|
223
|
-
d = {"id": list(self.class_dict.keys()), "name": list(self.class_dict.values())}
|
|
224
|
-
return pl.DataFrame(d)
|
|
225
|
-
|
|
226
|
-
@property
|
|
227
|
-
def class_dict(self) -> dict:
|
|
228
|
-
"""Returns the dictionary storing terms IDs and their names."""
|
|
229
|
-
if len(self._class_dict) == 0:
|
|
230
|
-
self.get_class_dict()
|
|
231
|
-
|
|
232
|
-
return self._class_dict
|
|
233
|
-
|
|
234
|
-
@property
|
|
235
|
-
def entries(self) -> list:
|
|
236
|
-
"""Returns entries from the ontology."""
|
|
237
|
-
return self._entries
|
|
238
|
-
|
|
239
|
-
@entries.setter
|
|
240
|
-
def entries(self, val):
|
|
241
|
-
"""Sets self.entries value."""
|
|
242
|
-
if not isinstance(val, list):
|
|
243
|
-
raise TypeError(f"Expected list, not {type(val)}.")
|
|
244
|
-
self._entries = val
|
|
245
|
-
|
|
246
|
-
@staticmethod
|
|
247
|
-
def get_entries(obo_text: str) -> list:
|
|
248
|
-
"""Returns a list of entries from entries combined by \n\n"""
|
|
249
|
-
entries = [
|
|
250
|
-
entry
|
|
251
|
-
for entry in obo_text.split("\n\n")
|
|
252
|
-
if (entry.startswith("[Term]")) and ("is_obsolete: true" not in entry)
|
|
253
|
-
]
|
|
254
|
-
|
|
255
|
-
return entries
|
|
256
|
-
|
|
257
|
-
@staticmethod
|
|
258
|
-
def doid_to_mondo(doid: str, _map: dict[str, str]) -> str:
|
|
259
|
-
"""
|
|
260
|
-
Maps a single DOID to MONDO id.
|
|
261
|
-
|
|
262
|
-
Args
|
|
263
|
-
----
|
|
264
|
-
doid: str
|
|
265
|
-
DOID id (e.g. D000324)
|
|
266
|
-
_map: dict[str, str]
|
|
267
|
-
Dict with DOID keys and MONDO values
|
|
268
|
-
|
|
269
|
-
Returns
|
|
270
|
-
-------
|
|
271
|
-
_id: str
|
|
272
|
-
Mapped id
|
|
273
|
-
|
|
274
|
-
Example
|
|
275
|
-
-------
|
|
276
|
-
>>> from ontology_parser import Ontology
|
|
277
|
-
>>> op = Ontology.from_obo("mondo.obo", ontology="mondo")
|
|
278
|
-
>>> _map = op.xref("DOID")
|
|
279
|
-
>>> op.mesh_to_mondo("DOID:299", _map)
|
|
280
|
-
MONDO:0004970
|
|
281
|
-
|
|
282
|
-
"""
|
|
283
|
-
if doid == "DOID:0000000":
|
|
284
|
-
_id = "control"
|
|
285
|
-
elif doid in _map.keys():
|
|
286
|
-
_id = _map[doid]
|
|
287
|
-
else:
|
|
288
|
-
_id = "NA"
|
|
289
|
-
|
|
290
|
-
return _id
|
|
291
|
-
|
|
292
|
-
@staticmethod
|
|
293
|
-
def mesh_to_mondo(mesh: str, _map: dict) -> str:
|
|
294
|
-
"""
|
|
295
|
-
Maps a single MESH to MONDO id.
|
|
296
|
-
:param mesh: MESH id (e.g. D000324)
|
|
297
|
-
:param _map: Dict with MESH keys and MONDO values
|
|
298
|
-
:return _id: Mapped id
|
|
299
|
-
Example
|
|
300
|
-
-------
|
|
301
|
-
>>> from ontology_parser import Ontology
|
|
302
|
-
>>> op = Ontology.from_obo("mondo.obo", ontology="mondo")
|
|
303
|
-
>>> _map = op.xref("MESH")
|
|
304
|
-
>>> op.mesh_to_mondo("MESH:D007680", _map)
|
|
305
|
-
MONDO:0002367
|
|
306
|
-
"""
|
|
307
|
-
if mesh == "MESH:D000000":
|
|
308
|
-
_id = "control"
|
|
309
|
-
elif mesh in _map.keys():
|
|
310
|
-
_id = _map[mesh]
|
|
311
|
-
else:
|
|
312
|
-
_id = "NA"
|
|
313
|
-
|
|
314
|
-
return _id
|
|
315
|
-
|
|
316
|
-
@staticmethod
|
|
317
|
-
def umls_to_mondo(umls: str, _map: dict) -> str:
|
|
318
|
-
"""
|
|
319
|
-
Maps a single UMLS to MONDO id.
|
|
320
|
-
:param mesh: UMLS id (e.g. D000324)
|
|
321
|
-
:param _map: Dict with UMLS keys and MONDO values
|
|
322
|
-
:return _id: Mapped id
|
|
323
|
-
Example
|
|
324
|
-
-------
|
|
325
|
-
>>> from ontology_parser import Ontology
|
|
326
|
-
>>> op = Ontology.from_obo("mondo.obo", ontology="mondo")
|
|
327
|
-
>>> _map = op.xref("UMLS")
|
|
328
|
-
>>> op.mesh_to_mondo("UMLS:C2673913", _map)
|
|
329
|
-
MONDO:0000104
|
|
330
|
-
"""
|
|
331
|
-
if umls == "UMLS:C0000000":
|
|
332
|
-
_id = "MONDO:0000000"
|
|
333
|
-
elif umls in _map.keys():
|
|
334
|
-
_id = _map[umls]
|
|
335
|
-
else:
|
|
336
|
-
_id = "NA"
|
|
337
|
-
|
|
338
|
-
return _id
|
|
339
|
-
|
|
340
|
-
@staticmethod
|
|
341
|
-
def obo_reader(obo: FilePath) -> str:
|
|
342
|
-
"""Reads text from an obo file."""
|
|
343
|
-
with open(obo, "r", encoding="utf-8") as f:
|
|
344
|
-
text = f.read()
|
|
345
|
-
return text
|
|
346
|
-
|
|
347
|
-
@staticmethod
|
|
348
|
-
def select_from_xref(term: str, _map: dict) -> str:
|
|
349
|
-
"""Pulls the xref for a query term."""
|
|
350
|
-
if term in _map.keys():
|
|
351
|
-
_id = _map[term]
|
|
352
|
-
else:
|
|
353
|
-
_id = "NA"
|
|
354
|
-
|
|
355
|
-
return _id
|
|
356
|
-
|
|
357
|
-
@classmethod
|
|
358
|
-
def from_obo(cls, obo: FilePath, ontology: str):
|
|
359
|
-
"""Create Ontology class from an obo file."""
|
|
360
|
-
parser = cls(ontology=ontology.upper())
|
|
361
|
-
parser.read(obo, reader="obo")
|
|
362
|
-
return parser
|
|
363
|
-
|
|
364
|
-
@classmethod
|
|
365
|
-
def mapping_func(cls, _from: str, _to: str, ontology: str) -> object:
|
|
366
|
-
"""Assigns the correct mapping function for mapping xref terms."""
|
|
367
|
-
if _from == "MESH" and _to == "MONDO" and ontology == "MONDO":
|
|
368
|
-
return cls.mesh_to_mondo
|
|
369
|
-
if _from == "UMLS" and _to == "MONDO" and ontology == "MONDO":
|
|
370
|
-
return cls.umls_to_mondo
|
|
371
|
-
if _from == "MONDO" and _to == "MESH" and ontology == "MONDO":
|
|
372
|
-
return cls.select_from_xref
|
|
373
|
-
if _from == "DOID" and _to == "MONDO" and ontology == "MONDO":
|
|
374
|
-
return cls.doid_to_mondo
|
|
375
|
-
|
|
376
|
-
raise NotImplementedError()
|
metahq_core/ontology/graph.py
DELETED
|
@@ -1,252 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Operations for creating and using directed acyclic graphs (DAGs) constructed from ontologies.
|
|
3
|
-
|
|
4
|
-
Authors: Parker Hicks, Hao Yuan
|
|
5
|
-
Date: 2025-01-18
|
|
6
|
-
|
|
7
|
-
Last updated: 2025-09-01 by Parker Hicks
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import re
|
|
11
|
-
|
|
12
|
-
import networkx as nx
|
|
13
|
-
import numpy as np
|
|
14
|
-
|
|
15
|
-
from metahq_core.ontology.base import Ontology
|
|
16
|
-
from metahq_core.util.alltypes import IdArray
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Graph(Ontology):
|
|
20
|
-
"""
|
|
21
|
-
This class provides functionalities for creating and operating on ontology knowledge graphs.
|
|
22
|
-
See Ontology documentation for inherited attributes and methods.
|
|
23
|
-
|
|
24
|
-
Methods
|
|
25
|
-
-------
|
|
26
|
-
ancestors()
|
|
27
|
-
Returns the ancestors of a single node.
|
|
28
|
-
|
|
29
|
-
ancestors_from()
|
|
30
|
-
Returns ancestors for each node in a list of nodes.
|
|
31
|
-
|
|
32
|
-
construct_graph()
|
|
33
|
-
Constructs the ontology directed acyclic graph.
|
|
34
|
-
|
|
35
|
-
descendants()
|
|
36
|
-
Returns the descendants of a single node.
|
|
37
|
-
|
|
38
|
-
descendants_from()
|
|
39
|
-
Returns descendants from a list of nodes.
|
|
40
|
-
|
|
41
|
-
propagate_term()
|
|
42
|
-
Will return a -1,0,1 label given a query term and reference term.
|
|
43
|
-
|
|
44
|
-
Example
|
|
45
|
-
-------
|
|
46
|
-
>>> from ontology.graph import Graph
|
|
47
|
-
>>> ontograph = Graph.from_obo("mondo.obo", ontology="mondo")
|
|
48
|
-
>>> ontograph.graph
|
|
49
|
-
DiGraph with 23314 nodes and 35351 edges
|
|
50
|
-
|
|
51
|
-
>>> ontograph.nodes
|
|
52
|
-
['MONDO:0002816' 'MONDO:0000004' 'MONDO:0021034' ... 'MONDO:8000019'
|
|
53
|
-
'MONDO:8000023' 'MONDO:8000024']
|
|
54
|
-
|
|
55
|
-
>>> ontograph.leaves
|
|
56
|
-
['MONDO:0000082' 'MONDO:0000138' 'MONDO:0000208' ... 'MONDO:8000019'
|
|
57
|
-
'MONDO:8000023' 'MONDO:8000024']
|
|
58
|
-
|
|
59
|
-
>>> ontograph.class_dict["MONDO:0021054"]
|
|
60
|
-
bone sarcoma
|
|
61
|
-
|
|
62
|
-
"""
|
|
63
|
-
|
|
64
|
-
def __init__(self, ontology: str):
|
|
65
|
-
"""Initialize Graph object as a child of Ontology"""
|
|
66
|
-
super().__init__(ontology=ontology)
|
|
67
|
-
self._graph = nx.DiGraph()
|
|
68
|
-
|
|
69
|
-
def construct_graph(self):
|
|
70
|
-
"""
|
|
71
|
-
Constructs an ontology graph from entries from an ontology file.
|
|
72
|
-
|
|
73
|
-
A simple cycle occurs between 2 nodes UBERON:8000009 and UBERON:0002354
|
|
74
|
-
(cardiac Purkinje fiber network and cardiac Purkinje fiber)
|
|
75
|
-
They are both parents and children of eachother, so to preserve the
|
|
76
|
-
directed acyclic structure of the edgelist, we intentionally keep only one
|
|
77
|
-
edge (fiber network is parent of fiber) on Line 100.
|
|
78
|
-
"""
|
|
79
|
-
# ID entries have at least 1 capital letter, a colon, and at least 1 digit
|
|
80
|
-
id_pattern = re.compile(r"[A-Za-z]+:\S+")
|
|
81
|
-
|
|
82
|
-
for entry in self.entries:
|
|
83
|
-
if "is_obsolete: true" in entry:
|
|
84
|
-
continue # skip obsolete entries
|
|
85
|
-
|
|
86
|
-
lines = entry.split("\n")
|
|
87
|
-
for line in lines:
|
|
88
|
-
|
|
89
|
-
# Get ID of the term
|
|
90
|
-
if line.startswith("id:"):
|
|
91
|
-
_id = line.split("id: ")[1]
|
|
92
|
-
if ("UBERON" in _id) or ("CL" in _id) or ("MONDO" in _id):
|
|
93
|
-
pass
|
|
94
|
-
else:
|
|
95
|
-
break
|
|
96
|
-
|
|
97
|
-
# Get is_a connection from the reference term to another
|
|
98
|
-
elif line.startswith("is_a:"):
|
|
99
|
-
parent = id_pattern.search(line).group(0)
|
|
100
|
-
if ("UBERON" in parent) or ("CL" in parent) or ("MONDO" in _id):
|
|
101
|
-
self._graph.add_edge(parent, _id)
|
|
102
|
-
|
|
103
|
-
# Get part_of connections
|
|
104
|
-
# Ignoring 'develops from' and 'related to'
|
|
105
|
-
elif line.startswith("relationship: part_of"):
|
|
106
|
-
parent = id_pattern.search(line).group(0)
|
|
107
|
-
if ("UBERON" in parent) or ("CL" in parent) or ("MONDO" in _id):
|
|
108
|
-
# If parent is the fiber and child is the fiber network, then leave that edge out
|
|
109
|
-
if _id == "UBERON:8000009" and parent == "UBERON:0002354":
|
|
110
|
-
continue
|
|
111
|
-
self._graph.add_edge(parent, _id)
|
|
112
|
-
|
|
113
|
-
def descendants_from(self, nodes: np.ndarray | list, verbose=False) -> dict:
|
|
114
|
-
"""
|
|
115
|
-
Retrieves descendants from an array of parent nodes.
|
|
116
|
-
|
|
117
|
-
:param nodes: IDs in self.nodes for which to find desendants.
|
|
118
|
-
:param verbose: If True, will print nodes not in the graph
|
|
119
|
-
:returns _map: Mapping between parents (keys) and their children (values).
|
|
120
|
-
|
|
121
|
-
Example
|
|
122
|
-
-------
|
|
123
|
-
>>> from ontology.graph import Graph
|
|
124
|
-
>>> ontograph = Graph.from_obo("mondo.obo", ontology="mondo")
|
|
125
|
-
>>> ontograph.descendants_from(['MONDO:0005071', 'MONDO:0043543'])
|
|
126
|
-
{'MONDO:0005071': ['MONDO:0019438' ... 'MONDO:0100070'],
|
|
127
|
-
'MONDO:0043543': ['MONDO:0043544' ... 'MONDO:0005188']}
|
|
128
|
-
|
|
129
|
-
"""
|
|
130
|
-
_map = {}
|
|
131
|
-
for node in nodes:
|
|
132
|
-
if node in self.nodes:
|
|
133
|
-
_map[node] = list(nx.descendants(self.graph, node))
|
|
134
|
-
elif verbose:
|
|
135
|
-
print(f"{node} not in graph.")
|
|
136
|
-
|
|
137
|
-
return _map
|
|
138
|
-
|
|
139
|
-
def ancestors_from(self, nodes: np.ndarray | list, verbose=False) -> dict:
|
|
140
|
-
"""
|
|
141
|
-
Retrieves ancestors from an array of parent nodes.
|
|
142
|
-
|
|
143
|
-
:param nodes: IDs in self.nodes for which to find desendants.
|
|
144
|
-
:param verbose: If True, will print nodes not in the graph
|
|
145
|
-
:returns _map: Mapping between parents (keys) and their children (values).
|
|
146
|
-
|
|
147
|
-
Example
|
|
148
|
-
-------
|
|
149
|
-
>>> from ontology.graph import Graph
|
|
150
|
-
>>> ontograph = Graph.from_obo("mondo.obo", ontology="mondo")
|
|
151
|
-
>>> ontograph.ancestors_from(['MONDO:0008791', 'MONDO:0043209'])
|
|
152
|
-
{'MONDO:0008791': ['MONDO:0019042' ... 'MONDO:0021147'],
|
|
153
|
-
'MONDO:0043209': ['MONDO:0700096' ... 'MONDO:0004736']}
|
|
154
|
-
|
|
155
|
-
"""
|
|
156
|
-
|
|
157
|
-
_map = {}
|
|
158
|
-
for node in nodes:
|
|
159
|
-
if node in self.nodes:
|
|
160
|
-
_map[node] = list(nx.ancestors(self.graph, node))
|
|
161
|
-
elif verbose:
|
|
162
|
-
print(f"{node} not in graph.")
|
|
163
|
-
|
|
164
|
-
return _map
|
|
165
|
-
|
|
166
|
-
def deepest_node(self, query: IdArray) -> str:
|
|
167
|
-
"""
|
|
168
|
-
Find the deepest node using breadth first search from root nodes.
|
|
169
|
-
|
|
170
|
-
Args
|
|
171
|
-
----
|
|
172
|
-
query: (IdArray)
|
|
173
|
-
An array of nodes for which to find the deepest node.
|
|
174
|
-
|
|
175
|
-
Returns
|
|
176
|
-
-------
|
|
177
|
-
deepest: (str)
|
|
178
|
-
The deepest node out of all query nodes.
|
|
179
|
-
|
|
180
|
-
"""
|
|
181
|
-
subset_nodes = set(node for node in query if node in self.graph)
|
|
182
|
-
|
|
183
|
-
# Find roots
|
|
184
|
-
roots = [node for node in self.graph if self.graph.in_degree(node) == 0]
|
|
185
|
-
if not roots:
|
|
186
|
-
roots = list(self.graph.nodes())[0:1]
|
|
187
|
-
|
|
188
|
-
# Track depths from all roots
|
|
189
|
-
depths = {node: 0 for node in self.graph}
|
|
190
|
-
|
|
191
|
-
# Run BFS from each root
|
|
192
|
-
for root in roots:
|
|
193
|
-
visited = {node: False for node in self.graph}
|
|
194
|
-
visited[root] = True
|
|
195
|
-
queue = [(root, 0)] # (node, depth)
|
|
196
|
-
|
|
197
|
-
while queue:
|
|
198
|
-
current, depth = queue.pop(0)
|
|
199
|
-
depths[current] = max(depths[current], depth)
|
|
200
|
-
|
|
201
|
-
for neighbor in self.graph.successors(current):
|
|
202
|
-
if not visited[neighbor]:
|
|
203
|
-
visited[neighbor] = True
|
|
204
|
-
queue.append((neighbor, depth + 1))
|
|
205
|
-
|
|
206
|
-
# Find deepest node from the subset
|
|
207
|
-
max_depth = -1
|
|
208
|
-
deepest = None
|
|
209
|
-
|
|
210
|
-
for node in subset_nodes:
|
|
211
|
-
if depths[node] > max_depth:
|
|
212
|
-
max_depth = depths[node]
|
|
213
|
-
deepest = node
|
|
214
|
-
|
|
215
|
-
return deepest
|
|
216
|
-
|
|
217
|
-
def propagate_term(self, query: str, ref_term: str) -> int:
|
|
218
|
-
"""Sets label for ontology terms"""
|
|
219
|
-
if query in self.descendants(ref_term):
|
|
220
|
-
return 1
|
|
221
|
-
elif query in self.ancestors(ref_term):
|
|
222
|
-
return 0
|
|
223
|
-
else:
|
|
224
|
-
return -1
|
|
225
|
-
|
|
226
|
-
def ancestors(self, term: str) -> list:
|
|
227
|
-
"""Gets ancestors of a single term"""
|
|
228
|
-
return list(nx.ancestors(self.graph, term))
|
|
229
|
-
|
|
230
|
-
def descendants(self, term: str) -> list:
|
|
231
|
-
"""Gets descendants of a single term"""
|
|
232
|
-
return list(nx.descendants(self.graph, term))
|
|
233
|
-
|
|
234
|
-
@property
|
|
235
|
-
def graph(self) -> nx.DiGraph:
|
|
236
|
-
"""Return the ontology DiGraph"""
|
|
237
|
-
if self._graph.number_of_nodes() == 0:
|
|
238
|
-
self.construct_graph()
|
|
239
|
-
|
|
240
|
-
return self._graph
|
|
241
|
-
|
|
242
|
-
@property
|
|
243
|
-
def nodes(self) -> np.ndarray:
|
|
244
|
-
"""Return the IDs of the graph nodes"""
|
|
245
|
-
return np.array(self.graph.nodes())
|
|
246
|
-
|
|
247
|
-
@property
|
|
248
|
-
def leaves(self) -> np.ndarray:
|
|
249
|
-
"""Return leaf nodes of the ontology"""
|
|
250
|
-
return np.array(
|
|
251
|
-
[node for node in self.nodes if self.graph.out_degree(node) == 0]
|
|
252
|
-
)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
metahq_core/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
metahq_core/__init__.py,sha256=YvuYzWnKtqBb-IqG8HAu-nhIYAsgj9Vmc_b9o7vO-js,22
|
|
3
|
-
metahq_core/logger.py,sha256=06QrC5msnehVLZ40T8oBFIUrlmBB-ysNiF5zo7Ms4fw,1319
|
|
4
|
-
metahq_core/query.py,sha256=B8oyh_vKps02QXW3MgBQmZgCK20tdy2bUB4tTmNfBXk,18526
|
|
5
|
-
metahq_core/search.py,sha256=XEyZqqAIE5GPK-kUw2YQFnntvcsjMmZkAR_xjRJjuYI,9549
|
|
6
|
-
metahq_core/curations/_multiprocess_propagator.py,sha256=KUiTgcOSLJv0VskzEk2kOpSFHZsUCyupdpOCVdN8vwM,3510
|
|
7
|
-
metahq_core/curations/annotation_converter.py,sha256=RjJs0HiLuScYma4yxo1mB6suDpUayNPopvi_lym_OSA,8879
|
|
8
|
-
metahq_core/curations/annotations.py,sha256=vp3omRRN1VCdKHvhug1zq_nVjoHXH5OpM2bBPshiOuA,13576
|
|
9
|
-
metahq_core/curations/base.py,sha256=3Q3gd9G7K4J9rewg7Cn9rAfml1N2Xgor-V2C3OHWeuI,1829
|
|
10
|
-
metahq_core/curations/index.py,sha256=iKJaBD5fZ_4BmgPFl-yRaxc50AgMucRsX3ghOaIVN0I,2334
|
|
11
|
-
metahq_core/curations/labels.py,sha256=h1Pf5DMqw0BECHsxd580hh3N6k_70vILIr5ZWhsi-fA,9207
|
|
12
|
-
metahq_core/curations/propagator.py,sha256=fYnH77-VAjaJbxaN1OzfP1wNJFNGh9Z3T6kSnUpl3a0,9577
|
|
13
|
-
metahq_core/export/annotations.py,sha256=PygcnCx1BPiIuhooaeiRehxz5_-WUDGirws_pwP4ruo,12702
|
|
14
|
-
metahq_core/export/base.py,sha256=j7cKc-ru0ggAXEmGFYiaxBn_OROahQ9HPiR1tK-VX94,1394
|
|
15
|
-
metahq_core/export/labels.py,sha256=pvS5_WIa2Aau7s4rpnSAm7OlD57Ht6Qln8-91GbNNao,13060
|
|
16
|
-
metahq_core/ontology/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
metahq_core/ontology/base.py,sha256=DtdIb18aerwi3sX_B6opmkaoSuO_sVEKFTWbEY3MZHg,11545
|
|
18
|
-
metahq_core/ontology/graph.py,sha256=47S_kfO6h6Np6i2g90Zizehq9yZiE4sXbuZzukyoDVE,8430
|
|
19
|
-
metahq_core/ontology/loader.py,sha256=epSiBAjiJa2KFe4Tfm3dxtjPcPgxTIHwOpN3VV03iTI,4687
|
|
20
|
-
metahq_core/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
metahq_core/util/alltypes.py,sha256=zYrBKvtkAhR7lmqDlGp7BOh2bUNI-toQfd7o7isDCxw,721
|
|
22
|
-
metahq_core/util/checkers.py,sha256=_7Aw2bO1PNg-9TPYOwKwvTqFtmb5vMyYU-omfUID_DE,508
|
|
23
|
-
metahq_core/util/exceptions.py,sha256=FlYUN8g4lNB3OSwQ20dfnSDmgX8NKZV5Yx7xERurrWc,115
|
|
24
|
-
metahq_core/util/helpers.py,sha256=sak1A7gp2qvnbhCBYHNQ5IUxIZeljgyRSu5U7C8a7bU,949
|
|
25
|
-
metahq_core/util/io.py,sha256=3JY04VjxuWA1TSx59iIQaON1LG-ZZp2zxSLkuFDoHWg,3075
|
|
26
|
-
metahq_core/util/progress.py,sha256=NF158KVFO9pO3vrsl7OrPAhYtd-cXaXRavL0iOV5XeM,2128
|
|
27
|
-
metahq_core/util/supported.py,sha256=RRMCaNh5xKBL8zNDW_7oR7v0bWRXyhI4FgWmiUH4_i4,10995
|
|
28
|
-
metahq_core-0.1.2.dist-info/METADATA,sha256=lcjV5sxKOWdTV69O1FSgWbVN-fa4DJu7Cy1YcTCT5RI,1836
|
|
29
|
-
metahq_core-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
30
|
-
metahq_core-0.1.2.dist-info/RECORD,,
|
|
File without changes
|