biocypher 0.5.17__py3-none-any.whl → 0.5.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/__init__.py +10 -11
- biocypher/_config/__init__.py +25 -27
- biocypher/_config/biocypher_config.yaml +1 -2
- biocypher/_connect.py +59 -79
- biocypher/_core.py +146 -78
- biocypher/_create.py +55 -52
- biocypher/_deduplicate.py +81 -36
- biocypher/_logger.py +12 -13
- biocypher/_mapping.py +69 -83
- biocypher/_metadata.py +12 -17
- biocypher/_misc.py +17 -28
- biocypher/_ontology.py +85 -101
- biocypher/_pandas.py +46 -11
- biocypher/_translate.py +93 -113
- biocypher/_write.py +457 -404
- {biocypher-0.5.17.dist-info → biocypher-0.5.20.dist-info}/METADATA +16 -6
- biocypher-0.5.20.dist-info/RECORD +23 -0
- biocypher-0.5.17.dist-info/RECORD +0 -23
- {biocypher-0.5.17.dist-info → biocypher-0.5.20.dist-info}/LICENSE +0 -0
- {biocypher-0.5.17.dist-info → biocypher-0.5.20.dist-info}/WHEEL +0 -0
biocypher/__init__.py
CHANGED
|
@@ -13,14 +13,14 @@ BioCypher: a unifying framework for biomedical knowledge graphs.
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
"__version__",
|
|
17
|
+
"__author__",
|
|
18
|
+
"module_data",
|
|
19
|
+
"config",
|
|
20
|
+
"logfile",
|
|
21
|
+
"log",
|
|
22
|
+
"Driver",
|
|
23
|
+
"BioCypher",
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
from ._core import BioCypher
|
|
@@ -30,11 +30,10 @@ from ._metadata import __author__, __version__
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class Driver(BioCypher):
|
|
33
|
-
|
|
34
33
|
# initialise parent class but log a warning
|
|
35
34
|
def __init__(self, *args, **kwargs):
|
|
36
35
|
logger.warning(
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
"The class `Driver` is deprecated and will be removed in a future "
|
|
37
|
+
"release. Please use `BioCypher` instead."
|
|
39
38
|
)
|
|
40
39
|
super().__init__(*args, **kwargs)
|
biocypher/_config/__init__.py
CHANGED
|
@@ -23,10 +23,10 @@ import warnings
|
|
|
23
23
|
import yaml
|
|
24
24
|
import appdirs
|
|
25
25
|
|
|
26
|
-
__all__ = [
|
|
26
|
+
__all__ = ["module_data", "module_data_path", "read_config", "config", "reset"]
|
|
27
27
|
|
|
28
|
-
_USER_CONFIG_DIR = appdirs.user_config_dir(
|
|
29
|
-
_USER_CONFIG_FILE = os.path.join(_USER_CONFIG_DIR,
|
|
28
|
+
_USER_CONFIG_DIR = appdirs.user_config_dir("biocypher", "saezlab")
|
|
29
|
+
_USER_CONFIG_FILE = os.path.join(_USER_CONFIG_DIR, "conf.yaml")
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class MyLoader(yaml.SafeLoader):
|
|
@@ -34,18 +34,18 @@ class MyLoader(yaml.SafeLoader):
|
|
|
34
34
|
# Check if the scalar contains double quotes and an escape sequence
|
|
35
35
|
value = super().construct_scalar(node)
|
|
36
36
|
q = bool(node.style == '"')
|
|
37
|
-
b = bool(
|
|
37
|
+
b = bool("\\" in value.encode("unicode_escape").decode("utf-8"))
|
|
38
38
|
if q and b:
|
|
39
39
|
warnings.warn(
|
|
40
40
|
(
|
|
41
|
-
|
|
41
|
+
"Double quotes detected in YAML configuration scalar: "
|
|
42
42
|
f"{value.encode('unicode_escape')}. "
|
|
43
|
-
|
|
43
|
+
"These allow escape sequences and may cause problems, for "
|
|
44
44
|
"instance with the Neo4j admin import files (e.g. '\\t'). "
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
"Make sure you wanted to do this, and use single quotes "
|
|
46
|
+
"whenever possible."
|
|
47
47
|
),
|
|
48
|
-
category=UserWarning
|
|
48
|
+
category=UserWarning,
|
|
49
49
|
)
|
|
50
50
|
return value
|
|
51
51
|
|
|
@@ -57,7 +57,7 @@ def module_data_path(name: str) -> str:
|
|
|
57
57
|
|
|
58
58
|
here = os.path.dirname(os.path.abspath(__file__))
|
|
59
59
|
|
|
60
|
-
return os.path.join(here, f
|
|
60
|
+
return os.path.join(here, f"{name}.yaml")
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def module_data(name: str) -> Any:
|
|
@@ -71,11 +71,8 @@ def module_data(name: str) -> Any:
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def _read_yaml(path: str) -> Optional[dict]:
|
|
74
|
-
|
|
75
74
|
if os.path.exists(path):
|
|
76
|
-
|
|
77
|
-
with open(path, 'r') as fp:
|
|
78
|
-
|
|
75
|
+
with open(path, "r") as fp:
|
|
79
76
|
return yaml.load(fp.read(), Loader=MyLoader)
|
|
80
77
|
|
|
81
78
|
|
|
@@ -89,18 +86,22 @@ def read_config() -> dict:
|
|
|
89
86
|
TODO explain path configuration
|
|
90
87
|
"""
|
|
91
88
|
|
|
92
|
-
defaults = module_data(
|
|
89
|
+
defaults = module_data("biocypher_config")
|
|
93
90
|
user = _read_yaml(_USER_CONFIG_FILE) or {}
|
|
94
91
|
# TODO account for .yml?
|
|
95
|
-
local =
|
|
96
|
-
|
|
92
|
+
local = (
|
|
93
|
+
_read_yaml("biocypher_config.yaml")
|
|
94
|
+
or _read_yaml("config/biocypher_config.yaml")
|
|
95
|
+
or {}
|
|
96
|
+
)
|
|
97
97
|
|
|
98
98
|
for key in defaults:
|
|
99
|
-
|
|
100
|
-
|
|
99
|
+
value = (
|
|
100
|
+
local[key] if key in local else user[key] if key in user else None
|
|
101
|
+
)
|
|
101
102
|
|
|
102
103
|
if value is not None:
|
|
103
|
-
if type(defaults[key]) == str:
|
|
104
|
+
if type(defaults[key]) == str: # first level config (like title)
|
|
104
105
|
defaults[key] = value
|
|
105
106
|
else:
|
|
106
107
|
defaults[key].update(value)
|
|
@@ -114,20 +115,17 @@ def config(*args, **kwargs) -> Optional[Any]:
|
|
|
114
115
|
"""
|
|
115
116
|
|
|
116
117
|
if args and kwargs:
|
|
117
|
-
|
|
118
118
|
raise ValueError(
|
|
119
|
-
|
|
119
|
+
"Setting and getting values in the same call is not allowed.",
|
|
120
120
|
)
|
|
121
121
|
|
|
122
122
|
if args:
|
|
123
|
-
|
|
124
|
-
result = tuple(globals()['_config'].get(key, None) for key in args)
|
|
123
|
+
result = tuple(globals()["_config"].get(key, None) for key in args)
|
|
125
124
|
|
|
126
125
|
return result[0] if len(result) == 1 else result
|
|
127
126
|
|
|
128
127
|
for key, value in kwargs.items():
|
|
129
|
-
|
|
130
|
-
globals()['_config'][key].update(value)
|
|
128
|
+
globals()["_config"][key].update(value)
|
|
131
129
|
|
|
132
130
|
|
|
133
131
|
def reset():
|
|
@@ -135,7 +133,7 @@ def reset():
|
|
|
135
133
|
Reload configuration from the config files.
|
|
136
134
|
"""
|
|
137
135
|
|
|
138
|
-
globals()[
|
|
136
|
+
globals()["_config"] = read_config()
|
|
139
137
|
|
|
140
138
|
|
|
141
139
|
reset()
|
|
@@ -102,6 +102,7 @@ postgresql:
|
|
|
102
102
|
database_name: postgres # DB name
|
|
103
103
|
user: postgres # user name
|
|
104
104
|
password: postgres # password
|
|
105
|
+
host: localhost # host
|
|
105
106
|
port: 5432 # port
|
|
106
107
|
|
|
107
108
|
# PostgreSQL import batch writer settings
|
|
@@ -109,5 +110,3 @@ postgresql:
|
|
|
109
110
|
delimiter: '\t'
|
|
110
111
|
# import_call_bin_prefix: '' # path to "psql"
|
|
111
112
|
# import_call_file_prefix: '/path/to/files'
|
|
112
|
-
|
|
113
|
-
|
biocypher/_connect.py
CHANGED
|
@@ -13,7 +13,7 @@ BioCypher 'online' mode. Handles connection and manipulation of a running DBMS.
|
|
|
13
13
|
"""
|
|
14
14
|
from ._logger import logger
|
|
15
15
|
|
|
16
|
-
logger.debug(f
|
|
16
|
+
logger.debug(f"Loading module {__name__}.")
|
|
17
17
|
|
|
18
18
|
from typing import Optional
|
|
19
19
|
from collections.abc import Iterable
|
|
@@ -27,10 +27,10 @@ from ._create import BioCypherEdge, BioCypherNode
|
|
|
27
27
|
from ._ontology import Ontology
|
|
28
28
|
from ._translate import Translator
|
|
29
29
|
|
|
30
|
-
__all__ = [
|
|
30
|
+
__all__ = ["_Neo4jDriver"]
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class _Neo4jDriver
|
|
33
|
+
class _Neo4jDriver:
|
|
34
34
|
"""
|
|
35
35
|
Manages a BioCypher connection to a Neo4j database using the
|
|
36
36
|
``neo4j_utils.Driver`` class.
|
|
@@ -53,11 +53,10 @@ class _Neo4jDriver():
|
|
|
53
53
|
|
|
54
54
|
increment_version (bool): Whether to increment the version number.
|
|
55
55
|
|
|
56
|
-
ontology (Ontology): The ontology to use for mapping.
|
|
57
|
-
|
|
58
56
|
translator (Translator): The translator to use for mapping.
|
|
59
57
|
|
|
60
58
|
"""
|
|
59
|
+
|
|
61
60
|
def __init__(
|
|
62
61
|
self,
|
|
63
62
|
database_name: str,
|
|
@@ -65,15 +64,12 @@ class _Neo4jDriver():
|
|
|
65
64
|
user: str,
|
|
66
65
|
password: str,
|
|
67
66
|
multi_db: bool,
|
|
68
|
-
ontology: Ontology,
|
|
69
67
|
translator: Translator,
|
|
70
68
|
wipe: bool = False,
|
|
71
69
|
fetch_size: int = 1000,
|
|
72
70
|
increment_version: bool = True,
|
|
73
71
|
):
|
|
74
|
-
|
|
75
|
-
self._ontology = ontology
|
|
76
|
-
self._translator = translator
|
|
72
|
+
self.translator = translator
|
|
77
73
|
|
|
78
74
|
self._driver = neo4j_utils.Driver(
|
|
79
75
|
db_name=database_name,
|
|
@@ -89,35 +85,30 @@ class _Neo4jDriver():
|
|
|
89
85
|
# check for biocypher config in connected graph
|
|
90
86
|
|
|
91
87
|
if wipe:
|
|
92
|
-
|
|
93
88
|
self.init_db()
|
|
94
89
|
|
|
95
90
|
if increment_version:
|
|
96
|
-
|
|
97
91
|
# set new current version node
|
|
98
92
|
self._update_meta_graph()
|
|
99
93
|
|
|
100
94
|
def _update_meta_graph(self):
|
|
101
|
-
|
|
102
|
-
logger.info('Updating Neo4j meta graph.')
|
|
95
|
+
logger.info("Updating Neo4j meta graph.")
|
|
103
96
|
|
|
104
97
|
# find current version node
|
|
105
98
|
db_version = self._driver.query(
|
|
106
|
-
|
|
107
|
-
'WHERE NOT (v)-[:PRECEDES]->() '
|
|
108
|
-
'RETURN v',
|
|
99
|
+
"MATCH (v:BioCypher) " "WHERE NOT (v)-[:PRECEDES]->() " "RETURN v",
|
|
109
100
|
)
|
|
110
101
|
# add version node
|
|
111
|
-
self.add_biocypher_nodes(self.
|
|
102
|
+
self.add_biocypher_nodes(self.translator.ontology)
|
|
112
103
|
|
|
113
104
|
# connect version node to previous
|
|
114
105
|
if db_version[0]:
|
|
115
106
|
previous = db_version[0][0]
|
|
116
|
-
previous_id = previous[
|
|
107
|
+
previous_id = previous["v"]["id"]
|
|
117
108
|
e_meta = BioCypherEdge(
|
|
118
109
|
previous_id,
|
|
119
|
-
self.
|
|
120
|
-
|
|
110
|
+
self.translator.ontology.get_dict().get("node_id"),
|
|
111
|
+
"PRECEDES",
|
|
121
112
|
)
|
|
122
113
|
self.add_biocypher_edges(e_meta)
|
|
123
114
|
|
|
@@ -132,7 +123,7 @@ class _Neo4jDriver():
|
|
|
132
123
|
need of the database
|
|
133
124
|
"""
|
|
134
125
|
|
|
135
|
-
logger.info(
|
|
126
|
+
logger.info("Initialising database.")
|
|
136
127
|
self._create_constraints()
|
|
137
128
|
|
|
138
129
|
def _create_constraints(self):
|
|
@@ -144,17 +135,16 @@ class _Neo4jDriver():
|
|
|
144
135
|
constraints on the id of all entities represented as nodes.
|
|
145
136
|
"""
|
|
146
137
|
|
|
147
|
-
logger.info(
|
|
138
|
+
logger.info("Creating constraints for node types in config.")
|
|
148
139
|
|
|
149
140
|
# get structure
|
|
150
|
-
for leaf in self.
|
|
141
|
+
for leaf in self.translator.ontology.mapping.extended_schema.items():
|
|
151
142
|
label = _misc.sentencecase_to_pascalcase(leaf[0])
|
|
152
|
-
if leaf[1][
|
|
153
|
-
|
|
143
|
+
if leaf[1]["represented_as"] == "node":
|
|
154
144
|
s = (
|
|
155
|
-
f
|
|
156
|
-
f
|
|
157
|
-
|
|
145
|
+
f"CREATE CONSTRAINT `{label}_id` "
|
|
146
|
+
f"IF NOT EXISTS ON (n:`{label}`) "
|
|
147
|
+
"ASSERT n.id IS UNIQUE"
|
|
158
148
|
)
|
|
159
149
|
self._driver.query(s)
|
|
160
150
|
|
|
@@ -178,7 +168,7 @@ class _Neo4jDriver():
|
|
|
178
168
|
- second entry: Neo4j summary.
|
|
179
169
|
"""
|
|
180
170
|
|
|
181
|
-
bn = self.
|
|
171
|
+
bn = self.translator.translate_nodes(id_type_tuples)
|
|
182
172
|
return self.add_biocypher_nodes(bn)
|
|
183
173
|
|
|
184
174
|
def add_edges(self, id_src_tar_type_tuples: Iterable[tuple]) -> tuple:
|
|
@@ -210,7 +200,7 @@ class _Neo4jDriver():
|
|
|
210
200
|
- second entry: Neo4j summary.
|
|
211
201
|
"""
|
|
212
202
|
|
|
213
|
-
bn = self.
|
|
203
|
+
bn = self.translator.translate_edges(id_src_tar_type_tuples)
|
|
214
204
|
return self.add_biocypher_edges(bn)
|
|
215
205
|
|
|
216
206
|
def add_biocypher_nodes(
|
|
@@ -246,38 +236,36 @@ class _Neo4jDriver():
|
|
|
246
236
|
"""
|
|
247
237
|
|
|
248
238
|
try:
|
|
249
|
-
|
|
250
239
|
nodes = _misc.to_list(nodes)
|
|
251
240
|
|
|
252
241
|
entities = [node.get_dict() for node in nodes]
|
|
253
242
|
|
|
254
243
|
except AttributeError:
|
|
255
|
-
|
|
256
|
-
msg = 'Nodes must have a `get_dict` method.'
|
|
244
|
+
msg = "Nodes must have a `get_dict` method."
|
|
257
245
|
logger.error(msg)
|
|
258
246
|
|
|
259
247
|
raise ValueError(msg)
|
|
260
248
|
|
|
261
|
-
logger.info(f
|
|
249
|
+
logger.info(f"Merging {len(entities)} nodes.")
|
|
262
250
|
|
|
263
251
|
entity_query = (
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
252
|
+
"UNWIND $entities AS ent "
|
|
253
|
+
"CALL apoc.merge.node([ent.node_label], "
|
|
254
|
+
"{id: ent.node_id}, ent.properties, ent.properties) "
|
|
255
|
+
"YIELD node "
|
|
256
|
+
"RETURN node"
|
|
269
257
|
)
|
|
270
258
|
|
|
271
|
-
method =
|
|
259
|
+
method = "explain" if explain else "profile" if profile else "query"
|
|
272
260
|
|
|
273
261
|
result = getattr(self._driver, method)(
|
|
274
262
|
entity_query,
|
|
275
263
|
parameters={
|
|
276
|
-
|
|
264
|
+
"entities": entities,
|
|
277
265
|
},
|
|
278
266
|
)
|
|
279
267
|
|
|
280
|
-
logger.info(
|
|
268
|
+
logger.info("Finished merging nodes.")
|
|
281
269
|
|
|
282
270
|
return result
|
|
283
271
|
|
|
@@ -326,28 +314,23 @@ class _Neo4jDriver():
|
|
|
326
314
|
rels = []
|
|
327
315
|
|
|
328
316
|
try:
|
|
329
|
-
|
|
330
317
|
for e in edges:
|
|
331
|
-
|
|
332
|
-
if hasattr(e, 'get_node'):
|
|
333
|
-
|
|
318
|
+
if hasattr(e, "get_node"):
|
|
334
319
|
nodes.append(e.get_node())
|
|
335
320
|
rels.append(e.get_source_edge().get_dict())
|
|
336
321
|
rels.append(e.get_target_edge().get_dict())
|
|
337
322
|
|
|
338
323
|
else:
|
|
339
|
-
|
|
340
324
|
rels.append(e.get_dict())
|
|
341
325
|
|
|
342
326
|
except AttributeError:
|
|
343
|
-
|
|
344
|
-
msg = 'Edges and nodes must have a `get_dict` method.'
|
|
327
|
+
msg = "Edges and nodes must have a `get_dict` method."
|
|
345
328
|
logger.error(msg)
|
|
346
329
|
|
|
347
330
|
raise ValueError(msg)
|
|
348
331
|
|
|
349
332
|
self.add_biocypher_nodes(nodes)
|
|
350
|
-
logger.info(f
|
|
333
|
+
logger.info(f"Merging {len(rels)} edges.")
|
|
351
334
|
|
|
352
335
|
# cypher query
|
|
353
336
|
|
|
@@ -355,41 +338,39 @@ class _Neo4jDriver():
|
|
|
355
338
|
# properties on match and on create;
|
|
356
339
|
# TODO add node labels?
|
|
357
340
|
node_query = (
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
341
|
+
"UNWIND $rels AS r "
|
|
342
|
+
"MERGE (src {id: r.source_id}) "
|
|
343
|
+
"MERGE (tar {id: r.target_id}) "
|
|
361
344
|
)
|
|
362
345
|
|
|
363
|
-
self._driver.query(node_query, parameters={
|
|
346
|
+
self._driver.query(node_query, parameters={"rels": rels})
|
|
364
347
|
|
|
365
348
|
edge_query = (
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
349
|
+
"UNWIND $rels AS r "
|
|
350
|
+
"MATCH (src {id: r.source_id}) "
|
|
351
|
+
"MATCH (tar {id: r.target_id}) "
|
|
352
|
+
"WITH src, tar, r "
|
|
353
|
+
"CALL apoc.merge.relationship"
|
|
354
|
+
"(src, r.relationship_label, NULL, "
|
|
355
|
+
"r.properties, tar, r.properties) "
|
|
356
|
+
"YIELD rel "
|
|
357
|
+
"RETURN rel"
|
|
375
358
|
)
|
|
376
359
|
|
|
377
|
-
method =
|
|
360
|
+
method = "explain" if explain else "profile" if profile else "query"
|
|
378
361
|
|
|
379
|
-
result = getattr(self._driver,
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
})
|
|
362
|
+
result = getattr(self._driver, method)(
|
|
363
|
+
edge_query, parameters={"rels": rels}
|
|
364
|
+
)
|
|
383
365
|
|
|
384
|
-
logger.info(
|
|
366
|
+
logger.info("Finished merging edges.")
|
|
385
367
|
|
|
386
368
|
return result
|
|
387
369
|
|
|
388
370
|
|
|
389
371
|
def get_driver(
|
|
390
372
|
dbms: str,
|
|
391
|
-
translator:
|
|
392
|
-
ontology: 'Ontology',
|
|
373
|
+
translator: "Translator",
|
|
393
374
|
):
|
|
394
375
|
"""
|
|
395
376
|
Function to return the writer class.
|
|
@@ -400,15 +381,14 @@ def get_driver(
|
|
|
400
381
|
|
|
401
382
|
dbms_config = _config(dbms)
|
|
402
383
|
|
|
403
|
-
if dbms ==
|
|
384
|
+
if dbms == "neo4j":
|
|
404
385
|
return _Neo4jDriver(
|
|
405
|
-
database_name=dbms_config[
|
|
406
|
-
wipe=dbms_config[
|
|
407
|
-
uri=dbms_config[
|
|
408
|
-
user=dbms_config[
|
|
409
|
-
password=dbms_config[
|
|
410
|
-
multi_db=dbms_config[
|
|
411
|
-
ontology=ontology,
|
|
386
|
+
database_name=dbms_config["database_name"],
|
|
387
|
+
wipe=dbms_config["wipe"],
|
|
388
|
+
uri=dbms_config["uri"],
|
|
389
|
+
user=dbms_config["user"],
|
|
390
|
+
password=dbms_config["password"],
|
|
391
|
+
multi_db=dbms_config["multi_db"],
|
|
412
392
|
translator=translator,
|
|
413
393
|
)
|
|
414
394
|
|