biocypher 0.5.19__py3-none-any.whl → 0.5.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/_connect.py +6 -12
- biocypher/_core.py +135 -23
- biocypher/_deduplicate.py +72 -30
- biocypher/_get.py +299 -0
- biocypher/_metadata.py +1 -1
- biocypher/_ontology.py +11 -9
- biocypher/_pandas.py +32 -7
- biocypher/_translate.py +29 -26
- biocypher/_write.py +75 -57
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/METADATA +6 -2
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/RECORD +13 -12
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/LICENSE +0 -0
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/WHEEL +0 -0
biocypher/_get.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2021, Heidelberg University Clinic
|
|
5
|
+
#
|
|
6
|
+
# File author(s): Sebastian Lobentanzer
|
|
7
|
+
# ...
|
|
8
|
+
#
|
|
9
|
+
# Distributed under MIT licence, see the file `LICENSE`.
|
|
10
|
+
#
|
|
11
|
+
"""
|
|
12
|
+
BioCypher get module. Used to download and cache data from external sources.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from ._logger import logger
|
|
18
|
+
|
|
19
|
+
logger.debug(f"Loading module {__name__}.")
|
|
20
|
+
|
|
21
|
+
from datetime import datetime, timedelta
|
|
22
|
+
from tempfile import TemporaryDirectory
|
|
23
|
+
import os
|
|
24
|
+
import json
|
|
25
|
+
import ftplib
|
|
26
|
+
|
|
27
|
+
import pooch
|
|
28
|
+
|
|
29
|
+
from ._misc import to_list
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Resource:
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
name: str,
|
|
36
|
+
url_s: str | list[str],
|
|
37
|
+
lifetime: int = 0,
|
|
38
|
+
is_dir: bool = False,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
A resource is a file that can be downloaded from a URL and cached
|
|
42
|
+
locally. This class implements checks of the minimum requirements for
|
|
43
|
+
a resource, to be implemented by a biocypher adapter.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
name (str): The name of the resource.
|
|
47
|
+
|
|
48
|
+
url_s (str | list[str]): The URL or URLs of the resource.
|
|
49
|
+
|
|
50
|
+
lifetime (int): The lifetime of the resource in days. If 0, the
|
|
51
|
+
resource is considered to be permanent.
|
|
52
|
+
"""
|
|
53
|
+
self.name = name
|
|
54
|
+
self.url_s = url_s
|
|
55
|
+
self.lifetime = lifetime
|
|
56
|
+
self.is_dir = is_dir
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Downloader:
|
|
60
|
+
def __init__(self, cache_dir: str):
|
|
61
|
+
"""
|
|
62
|
+
A downloader is a collection of resources that can be downloaded
|
|
63
|
+
and cached locally. It manages the lifetime of downloaded resources by
|
|
64
|
+
keeping a JSON record of the download date of each resource.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
cache_dir (str): The directory where the resources are cached. If
|
|
68
|
+
not given, a temporary directory is created.
|
|
69
|
+
"""
|
|
70
|
+
self.cache_dir = cache_dir or TemporaryDirectory().name
|
|
71
|
+
self.cache_file = os.path.join(self.cache_dir, "cache.json")
|
|
72
|
+
self.cache_dict = self._load_cache_dict()
|
|
73
|
+
|
|
74
|
+
# download function that accepts a resource or a list of resources
|
|
75
|
+
def download(self, *resources: Resource):
|
|
76
|
+
"""
|
|
77
|
+
Download one or multiple resources.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
resources (Resource): The resource or resources to download.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
str or list: The path or paths to the downloaded resource(s).
|
|
84
|
+
"""
|
|
85
|
+
paths = []
|
|
86
|
+
for resource in resources:
|
|
87
|
+
paths.append(self._download_or_cache(resource))
|
|
88
|
+
|
|
89
|
+
# flatten list if it is nested
|
|
90
|
+
if is_nested(paths):
|
|
91
|
+
paths = [path for sublist in paths for path in sublist]
|
|
92
|
+
|
|
93
|
+
return paths
|
|
94
|
+
|
|
95
|
+
def _download_or_cache(self, resource: Resource, cache: bool = True):
|
|
96
|
+
"""
|
|
97
|
+
Download a resource if it is not cached or exceeded its lifetime.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
resource (Resource): The resource to download.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
str or list: The path or paths to the downloaded resource(s).
|
|
104
|
+
"""
|
|
105
|
+
# check if resource is cached
|
|
106
|
+
cache_record = self._get_cache_record(resource)
|
|
107
|
+
|
|
108
|
+
if cache_record:
|
|
109
|
+
# check if resource is expired (formatted in days)
|
|
110
|
+
dl = cache_record.get("date_downloaded")
|
|
111
|
+
lt = timedelta(days=resource.lifetime)
|
|
112
|
+
expired = dl + lt < datetime.now()
|
|
113
|
+
else:
|
|
114
|
+
expired = True
|
|
115
|
+
|
|
116
|
+
# download resource
|
|
117
|
+
if expired or not cache:
|
|
118
|
+
logger.info(f"Downloading resource {resource.name}.")
|
|
119
|
+
|
|
120
|
+
if resource.is_dir:
|
|
121
|
+
files = self._get_files(resource)
|
|
122
|
+
resource.url_s = [resource.url_s + "/" + file for file in files]
|
|
123
|
+
resource.is_dir = False
|
|
124
|
+
paths = self._download_or_cache(resource, cache)
|
|
125
|
+
elif isinstance(resource.url_s, list):
|
|
126
|
+
paths = []
|
|
127
|
+
for url in resource.url_s:
|
|
128
|
+
fname = url[url.rfind("/") + 1 :]
|
|
129
|
+
paths.append(
|
|
130
|
+
self._retrieve(
|
|
131
|
+
url=url,
|
|
132
|
+
fname=fname,
|
|
133
|
+
path=os.path.join(self.cache_dir, resource.name),
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
fname = resource.url_s[resource.url_s.rfind("/") + 1 :]
|
|
138
|
+
paths = self._retrieve(
|
|
139
|
+
url=resource.url_s,
|
|
140
|
+
fname=fname,
|
|
141
|
+
path=os.path.join(self.cache_dir, resource.name),
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# sometimes a compressed file contains multiple files
|
|
145
|
+
# TODO ask for a list of files in the archive to be used from the
|
|
146
|
+
# adapter
|
|
147
|
+
|
|
148
|
+
# update cache record
|
|
149
|
+
self._update_cache_record(resource)
|
|
150
|
+
|
|
151
|
+
return paths
|
|
152
|
+
|
|
153
|
+
def _retrieve(
|
|
154
|
+
self,
|
|
155
|
+
url: str,
|
|
156
|
+
fname: str,
|
|
157
|
+
path: str,
|
|
158
|
+
known_hash: str = None,
|
|
159
|
+
):
|
|
160
|
+
"""
|
|
161
|
+
Retrieve a file from a URL using Pooch. Infer type of file from
|
|
162
|
+
extension and use appropriate processor.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
url (str): The URL to retrieve the file from.
|
|
166
|
+
|
|
167
|
+
fname (str): The name of the file.
|
|
168
|
+
|
|
169
|
+
path (str): The path to the file.
|
|
170
|
+
"""
|
|
171
|
+
if fname.endswith(".zip"):
|
|
172
|
+
return pooch.retrieve(
|
|
173
|
+
url=url,
|
|
174
|
+
known_hash=known_hash,
|
|
175
|
+
fname=fname,
|
|
176
|
+
path=path,
|
|
177
|
+
processor=pooch.Unzip(),
|
|
178
|
+
progressbar=True,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
elif fname.endswith(".tar.gz"):
|
|
182
|
+
return pooch.retrieve(
|
|
183
|
+
url=url,
|
|
184
|
+
known_hash=known_hash,
|
|
185
|
+
fname=fname,
|
|
186
|
+
path=path,
|
|
187
|
+
processor=pooch.Untar(),
|
|
188
|
+
progressbar=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
elif fname.endswith(".gz"):
|
|
192
|
+
return pooch.retrieve(
|
|
193
|
+
url=url,
|
|
194
|
+
known_hash=known_hash,
|
|
195
|
+
fname=fname,
|
|
196
|
+
path=path,
|
|
197
|
+
processor=pooch.Decompress(),
|
|
198
|
+
progressbar=True,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
else:
|
|
202
|
+
return pooch.retrieve(
|
|
203
|
+
url=url,
|
|
204
|
+
known_hash=known_hash,
|
|
205
|
+
fname=fname,
|
|
206
|
+
path=path,
|
|
207
|
+
progressbar=True,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def _get_files(self, resource: Resource):
|
|
211
|
+
"""
|
|
212
|
+
Get the files contained in a directory resource.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
resource (Resource): The directory resource.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
list: The files contained in the directory.
|
|
219
|
+
"""
|
|
220
|
+
if resource.url_s.startswith("ftp://"):
|
|
221
|
+
# remove protocol
|
|
222
|
+
url = resource.url_s[6:]
|
|
223
|
+
# get base url
|
|
224
|
+
url = url[: url.find("/")]
|
|
225
|
+
# get directory (remove initial slash as well)
|
|
226
|
+
dir = resource.url_s[7 + len(url) :]
|
|
227
|
+
# get files
|
|
228
|
+
ftp = ftplib.FTP(url)
|
|
229
|
+
ftp.login()
|
|
230
|
+
ftp.cwd(dir)
|
|
231
|
+
files = ftp.nlst()
|
|
232
|
+
ftp.quit()
|
|
233
|
+
else:
|
|
234
|
+
raise NotImplementedError(
|
|
235
|
+
"Only FTP directories are supported at the moment."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
return files
|
|
239
|
+
|
|
240
|
+
def _load_cache_dict(self):
|
|
241
|
+
"""
|
|
242
|
+
Load the cache dictionary from the cache file. Create an empty cache
|
|
243
|
+
file if it does not exist.
|
|
244
|
+
"""
|
|
245
|
+
if not os.path.exists(self.cache_dir):
|
|
246
|
+
logger.info(f"Creating cache directory {self.cache_dir}.")
|
|
247
|
+
os.makedirs(self.cache_dir)
|
|
248
|
+
|
|
249
|
+
if not os.path.exists(self.cache_file):
|
|
250
|
+
logger.info(f"Creating cache file {self.cache_file}.")
|
|
251
|
+
with open(self.cache_file, "w") as f:
|
|
252
|
+
json.dump({}, f)
|
|
253
|
+
|
|
254
|
+
with open(self.cache_file, "r") as f:
|
|
255
|
+
logger.info(f"Loading cache file {self.cache_file}.")
|
|
256
|
+
return json.load(f)
|
|
257
|
+
|
|
258
|
+
def _get_cache_record(self, resource: Resource):
|
|
259
|
+
"""
|
|
260
|
+
Get the cache record of a resource.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
resource (Resource): The resource to get the cache record of.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
The cache record of the resource.
|
|
267
|
+
"""
|
|
268
|
+
return self.cache_dict.get(resource.name, {})
|
|
269
|
+
|
|
270
|
+
def _update_cache_record(self, resource: Resource):
|
|
271
|
+
"""
|
|
272
|
+
Update the cache record of a resource.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
resource (Resource): The resource to update the cache record of.
|
|
276
|
+
"""
|
|
277
|
+
cache_record = {}
|
|
278
|
+
cache_record["url"] = to_list(resource.url_s)
|
|
279
|
+
cache_record["date_downloaded"] = datetime.now()
|
|
280
|
+
cache_record["lifetime"] = resource.lifetime
|
|
281
|
+
self.cache_dict[resource.name] = cache_record
|
|
282
|
+
with open(self.cache_file, "w") as f:
|
|
283
|
+
json.dump(self.cache_dict, f, default=str)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def is_nested(lst):
|
|
287
|
+
"""
|
|
288
|
+
Check if a list is nested.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
lst (list): The list to check.
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
bool: True if the list is nested, False otherwise.
|
|
295
|
+
"""
|
|
296
|
+
for item in lst:
|
|
297
|
+
if isinstance(item, list):
|
|
298
|
+
return True
|
|
299
|
+
return False
|
biocypher/_metadata.py
CHANGED
biocypher/_ontology.py
CHANGED
|
@@ -269,7 +269,7 @@ class Ontology:
|
|
|
269
269
|
"""
|
|
270
270
|
|
|
271
271
|
self._head_ontology_meta = head_ontology
|
|
272
|
-
self.
|
|
272
|
+
self.mapping = ontology_mapping
|
|
273
273
|
self._tail_ontology_meta = tail_ontologies
|
|
274
274
|
|
|
275
275
|
self._tail_ontologies = None
|
|
@@ -403,7 +403,7 @@ class Ontology:
|
|
|
403
403
|
if not self._nx_graph:
|
|
404
404
|
self._nx_graph = self._head_ontology.get_nx_graph().copy()
|
|
405
405
|
|
|
406
|
-
for key, value in self.extended_schema.items():
|
|
406
|
+
for key, value in self.mapping.extended_schema.items():
|
|
407
407
|
if not value.get("is_a"):
|
|
408
408
|
if self._nx_graph.has_node(value.get("synonym_for")):
|
|
409
409
|
continue
|
|
@@ -485,7 +485,7 @@ class Ontology:
|
|
|
485
485
|
setting the synonym as the primary node label.
|
|
486
486
|
"""
|
|
487
487
|
|
|
488
|
-
for key, value in self.extended_schema.items():
|
|
488
|
+
for key, value in self.mapping.extended_schema.items():
|
|
489
489
|
if key in self._nx_graph.nodes:
|
|
490
490
|
self._nx_graph.nodes[key].update(value)
|
|
491
491
|
|
|
@@ -541,9 +541,9 @@ class Ontology:
|
|
|
541
541
|
|
|
542
542
|
if not full:
|
|
543
543
|
# set of leaves and their intermediate parents up to the root
|
|
544
|
-
filter_nodes = set(self.extended_schema.keys())
|
|
544
|
+
filter_nodes = set(self.mapping.extended_schema.keys())
|
|
545
545
|
|
|
546
|
-
for node in self.extended_schema.keys():
|
|
546
|
+
for node in self.mapping.extended_schema.keys():
|
|
547
547
|
filter_nodes.update(self.get_ancestors(node).nodes)
|
|
548
548
|
|
|
549
549
|
# filter graph
|
|
@@ -557,11 +557,13 @@ class Ontology:
|
|
|
557
557
|
tree = _misc.create_tree_visualisation(G)
|
|
558
558
|
|
|
559
559
|
# add synonym information
|
|
560
|
-
for node in self.extended_schema:
|
|
561
|
-
if self.extended_schema[node]
|
|
560
|
+
for node in self.mapping.extended_schema:
|
|
561
|
+
if not isinstance(self.mapping.extended_schema[node], dict):
|
|
562
|
+
continue
|
|
563
|
+
if self.mapping.extended_schema[node].get("synonym_for"):
|
|
562
564
|
tree.nodes[node].tag = (
|
|
563
565
|
f"{node} = "
|
|
564
|
-
f"{self.extended_schema[node].get('synonym_for')}"
|
|
566
|
+
f"{self.mapping.extended_schema[node].get('synonym_for')}"
|
|
565
567
|
)
|
|
566
568
|
|
|
567
569
|
tree.show()
|
|
@@ -602,7 +604,7 @@ class Ontology:
|
|
|
602
604
|
"node_id": self._get_current_id(),
|
|
603
605
|
"node_label": "BioCypher",
|
|
604
606
|
"properties": {
|
|
605
|
-
"schema": "self.extended_schema",
|
|
607
|
+
"schema": "self.ontology_mapping.extended_schema",
|
|
606
608
|
},
|
|
607
609
|
}
|
|
608
610
|
|
biocypher/_pandas.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
|
|
3
|
-
from ._create import BioCypherEdge, BioCypherNode
|
|
3
|
+
from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Pandas:
|
|
7
|
-
def __init__(self,
|
|
8
|
-
self.ontology = ontology
|
|
7
|
+
def __init__(self, translator, deduplicator):
|
|
9
8
|
self.translator = translator
|
|
10
9
|
self.deduplicator = deduplicator
|
|
11
10
|
|
|
@@ -18,22 +17,48 @@ class Pandas:
|
|
|
18
17
|
"""
|
|
19
18
|
lists = {}
|
|
20
19
|
for entity in entities:
|
|
21
|
-
if
|
|
22
|
-
entity,
|
|
20
|
+
if (
|
|
21
|
+
not isinstance(entity, BioCypherNode)
|
|
22
|
+
and not isinstance(entity, BioCypherEdge)
|
|
23
|
+
and not isinstance(entity, BioCypherRelAsNode)
|
|
23
24
|
):
|
|
24
25
|
raise TypeError(
|
|
25
|
-
|
|
26
|
+
"Expected a BioCypherNode / BioCypherEdge / "
|
|
27
|
+
f"BioCypherRelAsNode, got {type(entity)}."
|
|
26
28
|
)
|
|
27
29
|
|
|
28
30
|
if isinstance(entity, BioCypherNode):
|
|
29
31
|
seen = self.deduplicator.node_seen(entity)
|
|
30
32
|
elif isinstance(entity, BioCypherEdge):
|
|
31
33
|
seen = self.deduplicator.edge_seen(entity)
|
|
34
|
+
elif isinstance(entity, BioCypherRelAsNode):
|
|
35
|
+
seen = self.deduplicator.rel_as_node_seen(entity)
|
|
32
36
|
|
|
33
37
|
if seen:
|
|
34
38
|
continue
|
|
35
39
|
|
|
36
|
-
|
|
40
|
+
if isinstance(entity, BioCypherRelAsNode):
|
|
41
|
+
node = entity.get_node()
|
|
42
|
+
source_edge = entity.get_source_edge()
|
|
43
|
+
target_edge = entity.get_target_edge()
|
|
44
|
+
|
|
45
|
+
_type = node.get_type()
|
|
46
|
+
if not _type in lists:
|
|
47
|
+
lists[_type] = []
|
|
48
|
+
lists[_type].append(node)
|
|
49
|
+
|
|
50
|
+
_source_type = source_edge.get_type()
|
|
51
|
+
if not _source_type in lists:
|
|
52
|
+
lists[_source_type] = []
|
|
53
|
+
lists[_source_type].append(source_edge)
|
|
54
|
+
|
|
55
|
+
_target_type = target_edge.get_type()
|
|
56
|
+
if not _target_type in lists:
|
|
57
|
+
lists[_target_type] = []
|
|
58
|
+
lists[_target_type].append(target_edge)
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
_type = entity.get_type()
|
|
37
62
|
if not _type in lists:
|
|
38
63
|
lists[_type] = []
|
|
39
64
|
lists[_type].append(entity)
|
biocypher/_translate.py
CHANGED
|
@@ -23,7 +23,7 @@ from more_itertools import peekable
|
|
|
23
23
|
|
|
24
24
|
from . import _misc
|
|
25
25
|
from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
26
|
-
from .
|
|
26
|
+
from ._ontology import Ontology
|
|
27
27
|
|
|
28
28
|
__all__ = ["BiolinkAdapter", "Translator"]
|
|
29
29
|
|
|
@@ -41,9 +41,7 @@ class Translator:
|
|
|
41
41
|
and cypher queries.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
def __init__(
|
|
45
|
-
self, ontology_mapping: "OntologyMapping", strict_mode: bool = False
|
|
46
|
-
):
|
|
44
|
+
def __init__(self, ontology: "Ontology", strict_mode: bool = False):
|
|
47
45
|
"""
|
|
48
46
|
Args:
|
|
49
47
|
leaves:
|
|
@@ -57,7 +55,7 @@ class Translator:
|
|
|
57
55
|
carry source, licence, and version information.
|
|
58
56
|
"""
|
|
59
57
|
|
|
60
|
-
self.
|
|
58
|
+
self.ontology = ontology
|
|
61
59
|
self.strict_mode = strict_mode
|
|
62
60
|
|
|
63
61
|
# record nodes without biolink type configured in schema_config.yaml
|
|
@@ -71,7 +69,7 @@ class Translator:
|
|
|
71
69
|
|
|
72
70
|
def translate_nodes(
|
|
73
71
|
self,
|
|
74
|
-
|
|
72
|
+
node_tuples: Iterable,
|
|
75
73
|
) -> Generator[BioCypherNode, None, None]:
|
|
76
74
|
"""
|
|
77
75
|
Translates input node representation to a representation that
|
|
@@ -79,16 +77,16 @@ class Translator:
|
|
|
79
77
|
requires explicit statement of node type on pass.
|
|
80
78
|
|
|
81
79
|
Args:
|
|
82
|
-
|
|
80
|
+
node_tuples (list of tuples): collection of tuples
|
|
83
81
|
representing individual nodes by their unique id and a type
|
|
84
82
|
that is translated from the original database notation to
|
|
85
83
|
the corresponding BioCypher notation.
|
|
86
84
|
|
|
87
85
|
"""
|
|
88
86
|
|
|
89
|
-
self._log_begin_translate(
|
|
87
|
+
self._log_begin_translate(node_tuples, "nodes")
|
|
90
88
|
|
|
91
|
-
for _id, _type, _props in
|
|
89
|
+
for _id, _type, _props in node_tuples:
|
|
92
90
|
# check for strict mode requirements
|
|
93
91
|
required_props = ["source", "licence", "version"]
|
|
94
92
|
|
|
@@ -132,8 +130,9 @@ class Translator:
|
|
|
132
130
|
"""
|
|
133
131
|
|
|
134
132
|
return (
|
|
135
|
-
self.extended_schema[_bl_type]["preferred_id"]
|
|
136
|
-
if "preferred_id"
|
|
133
|
+
self.ontology.mapping.extended_schema[_bl_type]["preferred_id"]
|
|
134
|
+
if "preferred_id"
|
|
135
|
+
in self.ontology.mapping.extended_schema.get(_bl_type, {})
|
|
137
136
|
else "id"
|
|
138
137
|
)
|
|
139
138
|
|
|
@@ -142,7 +141,9 @@ class Translator:
|
|
|
142
141
|
Filters properties for those specified in schema_config if any.
|
|
143
142
|
"""
|
|
144
143
|
|
|
145
|
-
filter_props = self.extended_schema[bl_type].get(
|
|
144
|
+
filter_props = self.ontology.mapping.extended_schema[bl_type].get(
|
|
145
|
+
"properties", {}
|
|
146
|
+
)
|
|
146
147
|
|
|
147
148
|
# strict mode: add required properties (only if there is a whitelist)
|
|
148
149
|
if self.strict_mode and filter_props:
|
|
@@ -150,7 +151,7 @@ class Translator:
|
|
|
150
151
|
{"source": "str", "licence": "str", "version": "str"},
|
|
151
152
|
)
|
|
152
153
|
|
|
153
|
-
exclude_props = self.extended_schema[bl_type].get(
|
|
154
|
+
exclude_props = self.ontology.mapping.extended_schema[bl_type].get(
|
|
154
155
|
"exclude_properties", []
|
|
155
156
|
)
|
|
156
157
|
|
|
@@ -188,7 +189,7 @@ class Translator:
|
|
|
188
189
|
|
|
189
190
|
def translate_edges(
|
|
190
191
|
self,
|
|
191
|
-
|
|
192
|
+
edge_tuples: Iterable,
|
|
192
193
|
) -> Generator[Union[BioCypherEdge, BioCypherRelAsNode], None, None]:
|
|
193
194
|
"""
|
|
194
195
|
Translates input edge representation to a representation that
|
|
@@ -197,7 +198,7 @@ class Translator:
|
|
|
197
198
|
|
|
198
199
|
Args:
|
|
199
200
|
|
|
200
|
-
|
|
201
|
+
edge_tuples (list of tuples):
|
|
201
202
|
|
|
202
203
|
collection of tuples representing source and target of
|
|
203
204
|
an interaction via their unique ids as well as the type
|
|
@@ -206,18 +207,18 @@ class Translator:
|
|
|
206
207
|
Can optionally possess its own ID.
|
|
207
208
|
"""
|
|
208
209
|
|
|
209
|
-
self._log_begin_translate(
|
|
210
|
+
self._log_begin_translate(edge_tuples, "edges")
|
|
210
211
|
|
|
211
212
|
# legacy: deal with 4-tuples (no edge id)
|
|
212
213
|
# TODO remove for performance reasons once safe
|
|
213
|
-
|
|
214
|
-
if len(
|
|
215
|
-
|
|
214
|
+
edge_tuples = peekable(edge_tuples)
|
|
215
|
+
if len(edge_tuples.peek()) == 4:
|
|
216
|
+
edge_tuples = [
|
|
216
217
|
(None, src, tar, typ, props)
|
|
217
|
-
for src, tar, typ, props in
|
|
218
|
+
for src, tar, typ, props in edge_tuples
|
|
218
219
|
]
|
|
219
220
|
|
|
220
|
-
for _id, _src, _tar, _type, _props in
|
|
221
|
+
for _id, _src, _tar, _type, _props in edge_tuples:
|
|
221
222
|
# check for strict mode requirements
|
|
222
223
|
if self.strict_mode:
|
|
223
224
|
if not "source" in _props:
|
|
@@ -239,7 +240,9 @@ class Translator:
|
|
|
239
240
|
# filter properties for those specified in schema_config if any
|
|
240
241
|
_filtered_props = self._filter_props(bl_type, _props)
|
|
241
242
|
|
|
242
|
-
rep = self.extended_schema[bl_type][
|
|
243
|
+
rep = self.ontology.mapping.extended_schema[bl_type][
|
|
244
|
+
"represented_as"
|
|
245
|
+
]
|
|
243
246
|
|
|
244
247
|
if rep == "node":
|
|
245
248
|
if _id:
|
|
@@ -295,9 +298,9 @@ class Translator:
|
|
|
295
298
|
yield BioCypherRelAsNode(n, e_s, e_t)
|
|
296
299
|
|
|
297
300
|
else:
|
|
298
|
-
edge_label = self.extended_schema[
|
|
299
|
-
|
|
300
|
-
)
|
|
301
|
+
edge_label = self.ontology.mapping.extended_schema[
|
|
302
|
+
bl_type
|
|
303
|
+
].get("label_as_edge")
|
|
301
304
|
|
|
302
305
|
if edge_label is None:
|
|
303
306
|
edge_label = bl_type
|
|
@@ -356,7 +359,7 @@ class Translator:
|
|
|
356
359
|
|
|
357
360
|
self._ontology_mapping = {}
|
|
358
361
|
|
|
359
|
-
for key, value in self.extended_schema.items():
|
|
362
|
+
for key, value in self.ontology.mapping.extended_schema.items():
|
|
360
363
|
labels = value.get("input_label") or value.get("label_in_input")
|
|
361
364
|
|
|
362
365
|
if isinstance(labels, str):
|