biocypher 0.5.17__py3-none-any.whl → 0.5.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/__init__.py +10 -11
- biocypher/_config/__init__.py +25 -27
- biocypher/_config/biocypher_config.yaml +1 -2
- biocypher/_connect.py +59 -79
- biocypher/_core.py +146 -78
- biocypher/_create.py +55 -52
- biocypher/_deduplicate.py +81 -36
- biocypher/_logger.py +12 -13
- biocypher/_mapping.py +69 -83
- biocypher/_metadata.py +12 -17
- biocypher/_misc.py +17 -28
- biocypher/_ontology.py +85 -101
- biocypher/_pandas.py +46 -11
- biocypher/_translate.py +93 -113
- biocypher/_write.py +457 -404
- {biocypher-0.5.17.dist-info → biocypher-0.5.20.dist-info}/METADATA +16 -6
- biocypher-0.5.20.dist-info/RECORD +23 -0
- biocypher-0.5.17.dist-info/RECORD +0 -23
- {biocypher-0.5.17.dist-info → biocypher-0.5.20.dist-info}/LICENSE +0 -0
- {biocypher-0.5.17.dist-info → biocypher-0.5.20.dist-info}/WHEEL +0 -0
biocypher/_core.py
CHANGED
|
@@ -12,34 +12,38 @@
|
|
|
12
12
|
BioCypher core module. Interfaces with the user and distributes tasks to
|
|
13
13
|
submodules.
|
|
14
14
|
"""
|
|
15
|
-
from typing import
|
|
15
|
+
from typing import Optional
|
|
16
|
+
import os
|
|
17
|
+
|
|
16
18
|
from more_itertools import peekable
|
|
19
|
+
import yaml
|
|
20
|
+
|
|
17
21
|
import pandas as pd
|
|
18
22
|
|
|
19
23
|
from ._logger import logger
|
|
20
24
|
|
|
21
|
-
logger.debug(f
|
|
25
|
+
logger.debug(f"Loading module {__name__}.")
|
|
22
26
|
|
|
23
27
|
from ._write import get_writer
|
|
24
|
-
from ._pandas import Pandas
|
|
25
28
|
from ._config import config as _config
|
|
26
29
|
from ._config import update_from_file as _file_update
|
|
27
|
-
from ._create import BioCypherEdge, BioCypherNode
|
|
30
|
+
from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
31
|
+
from ._pandas import Pandas
|
|
28
32
|
from ._connect import get_driver
|
|
29
33
|
from ._mapping import OntologyMapping
|
|
30
34
|
from ._ontology import Ontology
|
|
31
35
|
from ._translate import Translator
|
|
32
36
|
from ._deduplicate import Deduplicator
|
|
33
37
|
|
|
34
|
-
__all__ = [
|
|
38
|
+
__all__ = ["BioCypher"]
|
|
35
39
|
|
|
36
|
-
SUPPORTED_DBMS = [
|
|
40
|
+
SUPPORTED_DBMS = ["neo4j", "postgresql"]
|
|
37
41
|
|
|
38
42
|
REQUIRED_CONFIG = [
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
"dbms",
|
|
44
|
+
"offline",
|
|
45
|
+
"strict_mode",
|
|
46
|
+
"head_ontology",
|
|
43
47
|
]
|
|
44
48
|
|
|
45
49
|
|
|
@@ -75,6 +79,7 @@ class BioCypher:
|
|
|
75
79
|
provided, the default value 'biocypher-out' will be used.
|
|
76
80
|
|
|
77
81
|
"""
|
|
82
|
+
|
|
78
83
|
def __init__(
|
|
79
84
|
self,
|
|
80
85
|
dbms: str = None,
|
|
@@ -88,65 +93,64 @@ class BioCypher:
|
|
|
88
93
|
# legacy params
|
|
89
94
|
db_name: str = None,
|
|
90
95
|
):
|
|
91
|
-
|
|
92
96
|
# Update configuration if custom path is provided
|
|
93
97
|
if biocypher_config_path:
|
|
94
98
|
_file_update(biocypher_config_path)
|
|
95
99
|
|
|
96
100
|
if db_name:
|
|
97
101
|
logger.warning(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
"The parameter `db_name` is deprecated. Please set the "
|
|
103
|
+
"`database_name` setting in the `biocypher_config.yaml` file "
|
|
104
|
+
"instead."
|
|
101
105
|
)
|
|
102
|
-
_config(**{db_name: {
|
|
106
|
+
_config(**{db_name: {"database_name": db_name}})
|
|
103
107
|
|
|
104
108
|
# Load configuration
|
|
105
|
-
self.base_config = _config(
|
|
109
|
+
self.base_config = _config("biocypher")
|
|
106
110
|
|
|
107
111
|
# Check for required configuration
|
|
108
112
|
for key in REQUIRED_CONFIG:
|
|
109
113
|
if key not in self.base_config:
|
|
110
|
-
raise ValueError(f
|
|
114
|
+
raise ValueError(f"Configuration key {key} is required.")
|
|
111
115
|
|
|
112
116
|
# Set configuration - mandatory
|
|
113
|
-
self._dbms = dbms or self.base_config[
|
|
117
|
+
self._dbms = dbms or self.base_config["dbms"]
|
|
114
118
|
|
|
115
119
|
if offline is None:
|
|
116
|
-
self._offline = self.base_config[
|
|
120
|
+
self._offline = self.base_config["offline"]
|
|
117
121
|
else:
|
|
118
122
|
self._offline = offline
|
|
119
123
|
|
|
120
124
|
if strict_mode is None:
|
|
121
|
-
self._strict_mode = self.base_config[
|
|
125
|
+
self._strict_mode = self.base_config["strict_mode"]
|
|
122
126
|
else:
|
|
123
127
|
self._strict_mode = strict_mode
|
|
124
128
|
|
|
125
129
|
self._schema_config_path = schema_config_path or self.base_config.get(
|
|
126
|
-
|
|
130
|
+
"schema_config_path"
|
|
127
131
|
)
|
|
128
132
|
|
|
129
133
|
if not self._schema_config_path:
|
|
130
134
|
raise ValueError(
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
135
|
+
"BioCypher requires a schema configuration; please provide a "
|
|
136
|
+
"path to the schema configuration YAML file via "
|
|
137
|
+
"`biocypher_config.yaml` or `BioCypher` class parameter."
|
|
134
138
|
)
|
|
135
139
|
|
|
136
|
-
self._head_ontology = head_ontology or self.base_config[
|
|
140
|
+
self._head_ontology = head_ontology or self.base_config["head_ontology"]
|
|
137
141
|
|
|
138
142
|
# Set configuration - optional
|
|
139
143
|
self._output_directory = output_directory or self.base_config.get(
|
|
140
|
-
|
|
144
|
+
"output_directory"
|
|
141
145
|
)
|
|
142
146
|
self._tail_ontologies = tail_ontologies or self.base_config.get(
|
|
143
|
-
|
|
147
|
+
"tail_ontologies"
|
|
144
148
|
)
|
|
145
149
|
|
|
146
150
|
if self._dbms not in SUPPORTED_DBMS:
|
|
147
151
|
raise ValueError(
|
|
148
|
-
f
|
|
149
|
-
f
|
|
152
|
+
f"DBMS {self._dbms} not supported. "
|
|
153
|
+
f"Please select from {SUPPORTED_DBMS}."
|
|
150
154
|
)
|
|
151
155
|
|
|
152
156
|
# Initialize
|
|
@@ -156,7 +160,7 @@ class BioCypher:
|
|
|
156
160
|
self._ontology = None
|
|
157
161
|
self._writer = None
|
|
158
162
|
self._pd = None
|
|
159
|
-
|
|
163
|
+
|
|
160
164
|
def _get_deduplicator(self) -> Deduplicator:
|
|
161
165
|
"""
|
|
162
166
|
Create deduplicator if not exists and return.
|
|
@@ -179,19 +183,6 @@ class BioCypher:
|
|
|
179
183
|
|
|
180
184
|
return self._ontology_mapping
|
|
181
185
|
|
|
182
|
-
def _get_translator(self) -> Translator:
|
|
183
|
-
"""
|
|
184
|
-
Create translator if not exists and return.
|
|
185
|
-
"""
|
|
186
|
-
|
|
187
|
-
if not self._translator:
|
|
188
|
-
self._translator = Translator(
|
|
189
|
-
ontology_mapping=self._get_ontology_mapping(),
|
|
190
|
-
strict_mode=self._strict_mode,
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
return self._translator
|
|
194
|
-
|
|
195
186
|
def _get_ontology(self) -> Ontology:
|
|
196
187
|
"""
|
|
197
188
|
Create ontology if not exists and return.
|
|
@@ -206,23 +197,34 @@ class BioCypher:
|
|
|
206
197
|
|
|
207
198
|
return self._ontology
|
|
208
199
|
|
|
200
|
+
def _get_translator(self) -> Translator:
|
|
201
|
+
"""
|
|
202
|
+
Create translator if not exists and return.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
if not self._translator:
|
|
206
|
+
self._translator = Translator(
|
|
207
|
+
ontology=self._get_ontology(),
|
|
208
|
+
strict_mode=self._strict_mode,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
return self._translator
|
|
212
|
+
|
|
209
213
|
def _get_writer(self):
|
|
210
214
|
"""
|
|
211
215
|
Create writer if not online. Set as instance variable `self._writer`.
|
|
212
216
|
"""
|
|
213
217
|
|
|
214
|
-
# Get worker
|
|
215
218
|
if self._offline:
|
|
216
219
|
self._writer = get_writer(
|
|
217
220
|
dbms=self._dbms,
|
|
218
221
|
translator=self._get_translator(),
|
|
219
|
-
ontology=self._get_ontology(),
|
|
220
222
|
deduplicator=self._get_deduplicator(),
|
|
221
223
|
output_directory=self._output_directory,
|
|
222
224
|
strict_mode=self._strict_mode,
|
|
223
225
|
)
|
|
224
226
|
else:
|
|
225
|
-
raise NotImplementedError(
|
|
227
|
+
raise NotImplementedError("Cannot get writer in online mode.")
|
|
226
228
|
|
|
227
229
|
def _get_driver(self):
|
|
228
230
|
"""
|
|
@@ -233,16 +235,15 @@ class BioCypher:
|
|
|
233
235
|
self._driver = get_driver(
|
|
234
236
|
dbms=self._dbms,
|
|
235
237
|
translator=self._get_translator(),
|
|
236
|
-
ontology=self._get_ontology(),
|
|
237
238
|
deduplicator=self._get_deduplicator(),
|
|
238
239
|
)
|
|
239
240
|
else:
|
|
240
|
-
raise NotImplementedError(
|
|
241
|
+
raise NotImplementedError("Cannot get driver in offline mode.")
|
|
241
242
|
|
|
242
243
|
def write_nodes(self, nodes, batch_size: int = int(1e6)) -> bool:
|
|
243
244
|
"""
|
|
244
245
|
Write nodes to database. Either takes an iterable of tuples (if given,
|
|
245
|
-
translates to ``BioCypherNode`` objects) or an iterable of
|
|
246
|
+
translates to ``BioCypherNode`` objects) or an iterable of
|
|
246
247
|
``BioCypherNode`` objects.
|
|
247
248
|
|
|
248
249
|
Args:
|
|
@@ -287,7 +288,7 @@ class BioCypher:
|
|
|
287
288
|
# write edge files
|
|
288
289
|
return self._writer.write_edges(tedges, batch_size=batch_size)
|
|
289
290
|
|
|
290
|
-
def to_df(self) ->
|
|
291
|
+
def to_df(self) -> list[pd.DataFrame]:
|
|
291
292
|
"""
|
|
292
293
|
Convert entities to a pandas DataFrame for each entity type and return
|
|
293
294
|
a list.
|
|
@@ -303,9 +304,8 @@ class BioCypher:
|
|
|
303
304
|
raise ValueError(
|
|
304
305
|
"No pandas instance found. Please call `add()` first."
|
|
305
306
|
)
|
|
306
|
-
|
|
307
|
+
|
|
307
308
|
return self._pd.dfs
|
|
308
|
-
|
|
309
309
|
|
|
310
310
|
def add(self, entities):
|
|
311
311
|
"""
|
|
@@ -317,13 +317,16 @@ class BioCypher:
|
|
|
317
317
|
if not self._pd:
|
|
318
318
|
self._pd = Pandas(
|
|
319
319
|
translator=self._get_translator(),
|
|
320
|
-
ontology=self._get_ontology(),
|
|
321
320
|
deduplicator=self._get_deduplicator(),
|
|
322
321
|
)
|
|
323
322
|
|
|
324
323
|
entities = peekable(entities)
|
|
325
324
|
|
|
326
|
-
if
|
|
325
|
+
if (
|
|
326
|
+
isinstance(entities.peek(), BioCypherNode)
|
|
327
|
+
or isinstance(entities.peek(), BioCypherEdge)
|
|
328
|
+
or isinstance(entities.peek(), BioCypherRelAsNode)
|
|
329
|
+
):
|
|
327
330
|
tentities = entities
|
|
328
331
|
elif len(entities.peek()) < 4:
|
|
329
332
|
tentities = self._translator.translate_nodes(entities)
|
|
@@ -367,11 +370,11 @@ class BioCypher:
|
|
|
367
370
|
Merge edges into database. Either takes an iterable of tuples (if given,
|
|
368
371
|
translates to ``BioCypherEdge`` objects) or an iterable of
|
|
369
372
|
``BioCypherEdge`` objects.
|
|
370
|
-
|
|
373
|
+
|
|
371
374
|
Args:
|
|
372
|
-
edges (iterable): An iterable of edges to merge into the database.
|
|
375
|
+
edges (iterable): An iterable of edges to merge into the database.
|
|
373
376
|
|
|
374
|
-
Returns:
|
|
377
|
+
Returns:
|
|
375
378
|
bool: True if successful.
|
|
376
379
|
"""
|
|
377
380
|
|
|
@@ -388,7 +391,7 @@ class BioCypher:
|
|
|
388
391
|
|
|
389
392
|
# OVERVIEW AND CONVENIENCE METHODS ###
|
|
390
393
|
|
|
391
|
-
def log_missing_input_labels(self) -> Optional[
|
|
394
|
+
def log_missing_input_labels(self) -> Optional[dict[str, list[str]]]:
|
|
392
395
|
"""
|
|
393
396
|
|
|
394
397
|
Get the set of input labels encountered without an entry in the
|
|
@@ -405,19 +408,19 @@ class BioCypher:
|
|
|
405
408
|
|
|
406
409
|
if mt:
|
|
407
410
|
msg = (
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
411
|
+
"Input entities not accounted for due to them not being "
|
|
412
|
+
"present in the `schema_config.yaml` configuration file "
|
|
413
|
+
"(this is not necessarily a problem, if you did not intend "
|
|
414
|
+
"to include them in the database; see the log for details): \n"
|
|
412
415
|
)
|
|
413
416
|
for k, v in mt.items():
|
|
414
|
-
msg += f
|
|
417
|
+
msg += f" {k}: {v} \n"
|
|
415
418
|
|
|
416
419
|
logger.info(msg)
|
|
417
420
|
return mt
|
|
418
421
|
|
|
419
422
|
else:
|
|
420
|
-
logger.info(
|
|
423
|
+
logger.info("No missing labels in input.")
|
|
421
424
|
return None
|
|
422
425
|
|
|
423
426
|
def log_duplicates(self) -> None:
|
|
@@ -429,46 +432,44 @@ class BioCypher:
|
|
|
429
432
|
dn = self._deduplicator.get_duplicate_nodes()
|
|
430
433
|
|
|
431
434
|
if dn:
|
|
432
|
-
|
|
433
435
|
ntypes = dn[0]
|
|
434
436
|
nids = dn[1]
|
|
435
437
|
|
|
436
|
-
msg =
|
|
438
|
+
msg = "Duplicate node types encountered (IDs in log): \n"
|
|
437
439
|
for typ in ntypes:
|
|
438
|
-
msg += f
|
|
440
|
+
msg += f" {typ}\n"
|
|
439
441
|
|
|
440
442
|
logger.info(msg)
|
|
441
443
|
|
|
442
|
-
idmsg =
|
|
444
|
+
idmsg = "Duplicate node IDs encountered: \n"
|
|
443
445
|
for _id in nids:
|
|
444
|
-
idmsg += f
|
|
446
|
+
idmsg += f" {_id}\n"
|
|
445
447
|
|
|
446
448
|
logger.debug(idmsg)
|
|
447
449
|
|
|
448
450
|
else:
|
|
449
|
-
logger.info(
|
|
451
|
+
logger.info("No duplicate nodes in input.")
|
|
450
452
|
|
|
451
453
|
de = self._deduplicator.get_duplicate_edges()
|
|
452
454
|
|
|
453
455
|
if de:
|
|
454
|
-
|
|
455
456
|
etypes = de[0]
|
|
456
457
|
eids = de[1]
|
|
457
458
|
|
|
458
|
-
msg =
|
|
459
|
+
msg = "Duplicate edge types encountered (IDs in log): \n"
|
|
459
460
|
for typ in etypes:
|
|
460
|
-
msg += f
|
|
461
|
+
msg += f" {typ}\n"
|
|
461
462
|
|
|
462
463
|
logger.info(msg)
|
|
463
464
|
|
|
464
|
-
idmsg =
|
|
465
|
+
idmsg = "Duplicate edge IDs encountered: \n"
|
|
465
466
|
for _id in eids:
|
|
466
|
-
idmsg += f
|
|
467
|
+
idmsg += f" {_id}\n"
|
|
467
468
|
|
|
468
469
|
logger.debug(idmsg)
|
|
469
470
|
|
|
470
471
|
else:
|
|
471
|
-
logger.info(
|
|
472
|
+
logger.info("No duplicate edges in input.")
|
|
472
473
|
|
|
473
474
|
def show_ontology_structure(self, **kwargs) -> None:
|
|
474
475
|
"""
|
|
@@ -498,11 +499,78 @@ class BioCypher:
|
|
|
498
499
|
|
|
499
500
|
if not self._offline:
|
|
500
501
|
raise NotImplementedError(
|
|
501
|
-
|
|
502
|
+
"Cannot write import call in online mode."
|
|
502
503
|
)
|
|
503
504
|
|
|
504
505
|
self._writer.write_import_call()
|
|
505
506
|
|
|
507
|
+
def write_schema_info(self) -> None:
|
|
508
|
+
"""
|
|
509
|
+
Write an extended schema info YAML file that extends the
|
|
510
|
+
`schema_config.yaml` with run-time information of the built KG. For
|
|
511
|
+
instance, include information on whether something present in the actual
|
|
512
|
+
knowledge graph, whether it is a relationship (which is important in the
|
|
513
|
+
case of representing relationships as nodes) and the actual sources and
|
|
514
|
+
targets of edges. Since this file can be used in place of the original
|
|
515
|
+
`schema_config.yaml` file, it indicates that it is the extended schema
|
|
516
|
+
by setting `is_schema_info` to `true`.
|
|
517
|
+
|
|
518
|
+
We start by using the `extended_schema` dictionary from the ontology
|
|
519
|
+
class instance, which contains all expanded entities and relationships.
|
|
520
|
+
The information of whether something is a relationship can be gathered
|
|
521
|
+
from the deduplicator instance, which keeps track of all entities that
|
|
522
|
+
have been seen.
|
|
523
|
+
"""
|
|
524
|
+
|
|
525
|
+
if not self._offline:
|
|
526
|
+
raise NotImplementedError(
|
|
527
|
+
"Cannot write schema info in online mode."
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
ontology = self._get_ontology()
|
|
531
|
+
schema = ontology.mapping.extended_schema
|
|
532
|
+
schema["is_schema_info"] = True
|
|
533
|
+
|
|
534
|
+
deduplicator = self._get_deduplicator()
|
|
535
|
+
for node in deduplicator.entity_types:
|
|
536
|
+
if node in schema.keys():
|
|
537
|
+
schema[node]["present_in_knowledge_graph"] = True
|
|
538
|
+
schema[node]["is_relationship"] = False
|
|
539
|
+
else:
|
|
540
|
+
logger.info(
|
|
541
|
+
f"Node {node} not present in extended schema. "
|
|
542
|
+
"Skipping schema info."
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# find 'label_as_edge' cases in schema entries
|
|
546
|
+
changed_labels = {}
|
|
547
|
+
for k, v in schema.items():
|
|
548
|
+
if not isinstance(v, dict):
|
|
549
|
+
continue
|
|
550
|
+
if "label_as_edge" in v.keys():
|
|
551
|
+
if v["label_as_edge"] in deduplicator.seen_relationships.keys():
|
|
552
|
+
changed_labels[v["label_as_edge"]] = k
|
|
553
|
+
|
|
554
|
+
for edge in deduplicator.seen_relationships.keys():
|
|
555
|
+
if edge in changed_labels.keys():
|
|
556
|
+
edge = changed_labels[edge]
|
|
557
|
+
if edge in schema.keys():
|
|
558
|
+
schema[edge]["present_in_knowledge_graph"] = True
|
|
559
|
+
schema[edge]["is_relationship"] = True
|
|
560
|
+
# TODO information about source and target nodes
|
|
561
|
+
else:
|
|
562
|
+
logger.info(
|
|
563
|
+
f"Edge {edge} not present in extended schema. "
|
|
564
|
+
"Skipping schema info."
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
# write to output directory as YAML file
|
|
568
|
+
path = os.path.join(self._output_directory, "schema_info.yaml")
|
|
569
|
+
with open(path, "w") as f:
|
|
570
|
+
f.write(yaml.dump(schema))
|
|
571
|
+
|
|
572
|
+
return schema
|
|
573
|
+
|
|
506
574
|
# TRANSLATION METHODS ###
|
|
507
575
|
|
|
508
576
|
def translate_term(self, term: str) -> str:
|
|
@@ -520,7 +588,7 @@ class BioCypher:
|
|
|
520
588
|
self.start_ontology()
|
|
521
589
|
|
|
522
590
|
return self._translator.translate_term(term)
|
|
523
|
-
|
|
591
|
+
|
|
524
592
|
def summary(self) -> None:
|
|
525
593
|
"""
|
|
526
594
|
Wrapper for showing ontology structure and logging duplicates and
|
biocypher/_create.py
CHANGED
|
@@ -13,16 +13,16 @@ dataclasses.
|
|
|
13
13
|
"""
|
|
14
14
|
from ._logger import logger
|
|
15
15
|
|
|
16
|
-
logger.debug(f
|
|
16
|
+
logger.debug(f"Loading module {__name__}.")
|
|
17
17
|
|
|
18
18
|
from typing import Union
|
|
19
19
|
from dataclasses import field, dataclass
|
|
20
20
|
import os
|
|
21
21
|
|
|
22
22
|
__all__ = [
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
"BioCypherEdge",
|
|
24
|
+
"BioCypherNode",
|
|
25
|
+
"BioCypherRelAsNode",
|
|
26
26
|
]
|
|
27
27
|
|
|
28
28
|
|
|
@@ -53,7 +53,7 @@ class BioCypherNode:
|
|
|
53
53
|
|
|
54
54
|
node_id: str
|
|
55
55
|
node_label: str
|
|
56
|
-
preferred_id: str =
|
|
56
|
+
preferred_id: str = "id"
|
|
57
57
|
properties: dict = field(default_factory=dict)
|
|
58
58
|
|
|
59
59
|
def __post_init__(self):
|
|
@@ -64,47 +64,50 @@ class BioCypherNode:
|
|
|
64
64
|
|
|
65
65
|
Replace unwanted characters in properties.
|
|
66
66
|
"""
|
|
67
|
-
self.properties[
|
|
68
|
-
self.properties[
|
|
67
|
+
self.properties["id"] = self.node_id
|
|
68
|
+
self.properties["preferred_id"] = self.preferred_id or None
|
|
69
69
|
# TODO actually make None possible here; as is, "id" is the default in
|
|
70
70
|
# the dataclass as well as in the configuration file
|
|
71
71
|
|
|
72
|
-
if
|
|
72
|
+
if ":TYPE" in self.properties.keys():
|
|
73
73
|
logger.warning(
|
|
74
74
|
"Keyword ':TYPE' is reserved for Neo4j. "
|
|
75
|
-
|
|
75
|
+
"Removing from properties.",
|
|
76
76
|
# "Renaming to 'type'."
|
|
77
77
|
)
|
|
78
78
|
# self.properties["type"] = self.properties[":TYPE"]
|
|
79
|
-
del self.properties[
|
|
79
|
+
del self.properties[":TYPE"]
|
|
80
80
|
|
|
81
81
|
for k, v in self.properties.items():
|
|
82
82
|
if isinstance(v, str):
|
|
83
83
|
self.properties[k] = (
|
|
84
84
|
v.replace(
|
|
85
85
|
os.linesep,
|
|
86
|
-
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
86
|
+
" ",
|
|
87
|
+
)
|
|
88
|
+
.replace(
|
|
89
|
+
"\n",
|
|
90
|
+
" ",
|
|
91
|
+
)
|
|
92
|
+
.replace(
|
|
93
|
+
"\r",
|
|
94
|
+
" ",
|
|
93
95
|
)
|
|
94
96
|
)
|
|
95
97
|
|
|
96
98
|
elif isinstance(v, list):
|
|
97
|
-
self.properties[k] =
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
99
|
+
self.properties[k] = [
|
|
100
|
+
val.replace(
|
|
101
|
+
os.linesep,
|
|
102
|
+
" ",
|
|
103
|
+
)
|
|
104
|
+
.replace(
|
|
105
|
+
"\n",
|
|
106
|
+
" ",
|
|
107
|
+
)
|
|
108
|
+
.replace("\r", " ")
|
|
109
|
+
for val in v
|
|
110
|
+
]
|
|
108
111
|
|
|
109
112
|
def get_id(self) -> str:
|
|
110
113
|
"""
|
|
@@ -123,7 +126,7 @@ class BioCypherNode:
|
|
|
123
126
|
str: node_label
|
|
124
127
|
"""
|
|
125
128
|
return self.node_label
|
|
126
|
-
|
|
129
|
+
|
|
127
130
|
def get_type(self) -> str:
|
|
128
131
|
"""
|
|
129
132
|
Returns primary node label.
|
|
@@ -161,9 +164,9 @@ class BioCypherNode:
|
|
|
161
164
|
properties as second-level dict.
|
|
162
165
|
"""
|
|
163
166
|
return {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
+
"node_id": self.node_id,
|
|
168
|
+
"node_label": self.node_label,
|
|
169
|
+
"properties": self.properties,
|
|
167
170
|
}
|
|
168
171
|
|
|
169
172
|
|
|
@@ -204,30 +207,30 @@ class BioCypherEdge:
|
|
|
204
207
|
Check for reserved keywords.
|
|
205
208
|
"""
|
|
206
209
|
|
|
207
|
-
if
|
|
210
|
+
if ":TYPE" in self.properties.keys():
|
|
208
211
|
logger.debug(
|
|
209
212
|
"Keyword ':TYPE' is reserved for Neo4j. "
|
|
210
|
-
|
|
213
|
+
"Removing from properties.",
|
|
211
214
|
# "Renaming to 'type'."
|
|
212
215
|
)
|
|
213
216
|
# self.properties["type"] = self.properties[":TYPE"]
|
|
214
|
-
del self.properties[
|
|
215
|
-
elif
|
|
217
|
+
del self.properties[":TYPE"]
|
|
218
|
+
elif "id" in self.properties.keys():
|
|
216
219
|
logger.debug(
|
|
217
220
|
"Keyword 'id' is reserved for Neo4j. "
|
|
218
|
-
|
|
221
|
+
"Removing from properties.",
|
|
219
222
|
# "Renaming to 'type'."
|
|
220
223
|
)
|
|
221
224
|
# self.properties["type"] = self.properties[":TYPE"]
|
|
222
|
-
del self.properties[
|
|
223
|
-
elif
|
|
225
|
+
del self.properties["id"]
|
|
226
|
+
elif "_ID" in self.properties.keys():
|
|
224
227
|
logger.debug(
|
|
225
228
|
"Keyword '_ID' is reserved for Postgres. "
|
|
226
|
-
|
|
229
|
+
"Removing from properties.",
|
|
227
230
|
# "Renaming to 'type'."
|
|
228
231
|
)
|
|
229
232
|
# self.properties["type"] = self.properties[":TYPE"]
|
|
230
|
-
del self.properties[
|
|
233
|
+
del self.properties["_ID"]
|
|
231
234
|
|
|
232
235
|
def get_id(self) -> Union[str, None]:
|
|
233
236
|
"""
|
|
@@ -295,11 +298,11 @@ class BioCypherEdge:
|
|
|
295
298
|
dict.
|
|
296
299
|
"""
|
|
297
300
|
return {
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
301
|
+
"relationship_id": self.relationship_id or None,
|
|
302
|
+
"source_id": self.source_id,
|
|
303
|
+
"target_id": self.target_id,
|
|
304
|
+
"relationship_label": self.relationship_label,
|
|
305
|
+
"properties": self.properties,
|
|
303
306
|
}
|
|
304
307
|
|
|
305
308
|
|
|
@@ -331,20 +334,20 @@ class BioCypherRelAsNode:
|
|
|
331
334
|
def __post_init__(self):
|
|
332
335
|
if not isinstance(self.node, BioCypherNode):
|
|
333
336
|
raise TypeError(
|
|
334
|
-
f
|
|
335
|
-
f
|
|
337
|
+
f"BioCypherRelAsNode.node must be a BioCypherNode, "
|
|
338
|
+
f"not {type(self.node)}.",
|
|
336
339
|
)
|
|
337
340
|
|
|
338
341
|
if not isinstance(self.source_edge, BioCypherEdge):
|
|
339
342
|
raise TypeError(
|
|
340
|
-
f
|
|
341
|
-
f
|
|
343
|
+
f"BioCypherRelAsNode.source_edge must be a BioCypherEdge, "
|
|
344
|
+
f"not {type(self.source_edge)}.",
|
|
342
345
|
)
|
|
343
346
|
|
|
344
347
|
if not isinstance(self.target_edge, BioCypherEdge):
|
|
345
348
|
raise TypeError(
|
|
346
|
-
f
|
|
347
|
-
f
|
|
349
|
+
f"BioCypherRelAsNode.target_edge must be a BioCypherEdge, "
|
|
350
|
+
f"not {type(self.target_edge)}.",
|
|
348
351
|
)
|
|
349
352
|
|
|
350
353
|
def get_node(self) -> BioCypherNode:
|