biocypher 0.8.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/_config/biocypher_config.yaml +7 -1
- biocypher/_core.py +25 -4
- biocypher/_metadata.py +1 -1
- biocypher/_ontology.py +144 -51
- biocypher/_translate.py +84 -79
- biocypher/output/write/_batch_writer.py +99 -50
- biocypher/output/write/_get_writer.py +29 -12
- biocypher/output/write/graph/_arangodb.py +44 -32
- biocypher/output/write/graph/_neo4j.py +3 -4
- biocypher/output/write/graph/_owl.py +569 -0
- biocypher/output/write/graph/_rdf.py +234 -97
- {biocypher-0.8.0.dist-info → biocypher-0.9.1.dist-info}/METADATA +1 -1
- {biocypher-0.8.0.dist-info → biocypher-0.9.1.dist-info}/RECORD +15 -14
- {biocypher-0.8.0.dist-info → biocypher-0.9.1.dist-info}/LICENSE +0 -0
- {biocypher-0.8.0.dist-info → biocypher-0.9.1.dist-info}/WHEEL +0 -0
|
@@ -1,33 +1,113 @@
|
|
|
1
|
-
"""
|
|
2
|
-
suitable for import into a DBMS.
|
|
3
|
-
"""
|
|
1
|
+
"""Module to provide the RDF writer class."""
|
|
4
2
|
|
|
5
3
|
import os
|
|
6
4
|
|
|
7
5
|
from types import GeneratorType
|
|
8
6
|
|
|
9
|
-
from rdflib import
|
|
7
|
+
from rdflib import (
|
|
8
|
+
DC,
|
|
9
|
+
DCTERMS,
|
|
10
|
+
RDF,
|
|
11
|
+
RDFS,
|
|
12
|
+
SKOS,
|
|
13
|
+
Graph,
|
|
14
|
+
Literal,
|
|
15
|
+
Namespace,
|
|
16
|
+
URIRef,
|
|
17
|
+
)
|
|
10
18
|
from rdflib.namespace import (
|
|
11
19
|
_NAMESPACE_PREFIXES_CORE,
|
|
12
20
|
_NAMESPACE_PREFIXES_RDFLIB,
|
|
13
21
|
)
|
|
14
22
|
|
|
15
23
|
from biocypher._create import BioCypherEdge, BioCypherNode
|
|
24
|
+
from biocypher._deduplicate import Deduplicator
|
|
16
25
|
from biocypher._logger import logger
|
|
26
|
+
from biocypher._translate import Translator
|
|
17
27
|
from biocypher.output.write._batch_writer import _BatchWriter
|
|
18
28
|
|
|
19
29
|
|
|
20
30
|
class _RDFWriter(_BatchWriter):
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
|
|
31
|
+
"""Write BioCypher's property graph into an RDF format.
|
|
32
|
+
|
|
33
|
+
Uses `rdflib` and all the extensions it supports (RDF/XML, N3, NTriples,
|
|
34
|
+
N-Quads, Turtle, TriX, Trig and JSON-LD). By default, the conversion
|
|
24
35
|
is done keeping only the minimum information about node and edges,
|
|
25
36
|
skipping all properties.
|
|
26
37
|
"""
|
|
27
38
|
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
translator: Translator,
|
|
42
|
+
deduplicator: Deduplicator,
|
|
43
|
+
delimiter: str,
|
|
44
|
+
array_delimiter: str = ",",
|
|
45
|
+
quote: str = '"',
|
|
46
|
+
output_directory: str | None = None,
|
|
47
|
+
db_name: str = "neo4j",
|
|
48
|
+
import_call_bin_prefix: str | None = None,
|
|
49
|
+
import_call_file_prefix: str | None = None,
|
|
50
|
+
wipe: bool = True,
|
|
51
|
+
strict_mode: bool = False,
|
|
52
|
+
skip_bad_relationships: bool = False,
|
|
53
|
+
skip_duplicate_nodes: bool = False,
|
|
54
|
+
db_user: str = None,
|
|
55
|
+
db_password: str = None,
|
|
56
|
+
db_host: str = None,
|
|
57
|
+
db_port: str = None,
|
|
58
|
+
file_format: str = None,
|
|
59
|
+
rdf_namespaces: dict = {},
|
|
60
|
+
labels_order: str = "Ascending",
|
|
61
|
+
**kwargs,
|
|
62
|
+
):
|
|
63
|
+
super().__init__(
|
|
64
|
+
translator=translator,
|
|
65
|
+
deduplicator=deduplicator,
|
|
66
|
+
delimiter=delimiter,
|
|
67
|
+
array_delimiter=array_delimiter,
|
|
68
|
+
quote=quote,
|
|
69
|
+
output_directory=output_directory,
|
|
70
|
+
db_name=db_name,
|
|
71
|
+
import_call_bin_prefix=import_call_bin_prefix,
|
|
72
|
+
import_call_file_prefix=import_call_file_prefix,
|
|
73
|
+
wipe=wipe,
|
|
74
|
+
strict_mode=strict_mode,
|
|
75
|
+
skip_bad_relationships=skip_bad_relationships,
|
|
76
|
+
skip_duplicate_nodes=skip_duplicate_nodes,
|
|
77
|
+
db_user=db_user,
|
|
78
|
+
db_password=db_password,
|
|
79
|
+
db_host=db_host,
|
|
80
|
+
db_port=db_port,
|
|
81
|
+
file_format=file_format,
|
|
82
|
+
rdf_namespaces=rdf_namespaces,
|
|
83
|
+
labels_order=labels_order,
|
|
84
|
+
**kwargs,
|
|
85
|
+
)
|
|
86
|
+
if not self.rdf_namespaces:
|
|
87
|
+
# For some reason, the config can pass
|
|
88
|
+
# the None object.
|
|
89
|
+
self.rdf_namespaces = {}
|
|
90
|
+
|
|
91
|
+
if "rdf_format" in kwargs:
|
|
92
|
+
logger.warning("The 'rdf_format' config option is deprecated, use 'file_format' instead.")
|
|
93
|
+
if not file_format:
|
|
94
|
+
format = kwargs["rdf_format"]
|
|
95
|
+
logger.warning(f"I will set 'file_format: {format}' for you.")
|
|
96
|
+
self.file_format = format
|
|
97
|
+
kwargs.pop("rdf_format")
|
|
98
|
+
logger.warning("NOTE: this warning will become an error in next versions.")
|
|
99
|
+
|
|
100
|
+
if not file_format:
|
|
101
|
+
msg = "You need to indicate a 'file_format'."
|
|
102
|
+
logger.error(msg)
|
|
103
|
+
raise RuntimeError(msg)
|
|
104
|
+
|
|
105
|
+
self.namespaces = {}
|
|
106
|
+
|
|
28
107
|
def _get_import_script_name(self) -> str:
|
|
29
|
-
"""
|
|
30
|
-
|
|
108
|
+
"""Return the name of the RDF admin import script.
|
|
109
|
+
|
|
110
|
+
This function is used for RDF export.
|
|
31
111
|
|
|
32
112
|
Returns
|
|
33
113
|
-------
|
|
@@ -37,7 +117,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
37
117
|
return "rdf-import-call.sh"
|
|
38
118
|
|
|
39
119
|
def _get_default_import_call_bin_prefix(self):
|
|
40
|
-
"""
|
|
120
|
+
"""Provide the default string for the import call bin prefix.
|
|
41
121
|
|
|
42
122
|
Returns
|
|
43
123
|
-------
|
|
@@ -46,12 +126,12 @@ class _RDFWriter(_BatchWriter):
|
|
|
46
126
|
"""
|
|
47
127
|
return "bin/"
|
|
48
128
|
|
|
49
|
-
def _is_rdf_format_supported(self,
|
|
50
|
-
"""
|
|
129
|
+
def _is_rdf_format_supported(self, file_format: str) -> bool:
|
|
130
|
+
"""Check if the specified RDF format is supported.
|
|
51
131
|
|
|
52
132
|
Args:
|
|
53
133
|
----
|
|
54
|
-
|
|
134
|
+
file_format (str): The RDF format to check.
|
|
55
135
|
|
|
56
136
|
Returns:
|
|
57
137
|
-------
|
|
@@ -62,6 +142,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
62
142
|
"xml",
|
|
63
143
|
"n3",
|
|
64
144
|
"turtle",
|
|
145
|
+
"ttl",
|
|
65
146
|
"nt",
|
|
66
147
|
"pretty-xml",
|
|
67
148
|
"trix",
|
|
@@ -69,22 +150,18 @@ class _RDFWriter(_BatchWriter):
|
|
|
69
150
|
"nquads",
|
|
70
151
|
"json-ld",
|
|
71
152
|
]
|
|
72
|
-
if
|
|
153
|
+
if file_format not in supported_formats:
|
|
73
154
|
logger.error(
|
|
74
|
-
f"
|
|
75
|
-
f
|
|
155
|
+
f"Incorrect or unsupported RDF format: '{file_format}',"
|
|
156
|
+
f"use one of the following: {', '.join(supported_formats)}.",
|
|
76
157
|
)
|
|
77
158
|
return False
|
|
78
159
|
else:
|
|
79
|
-
#
|
|
80
|
-
|
|
81
|
-
if self.rdf_format == "turtle":
|
|
82
|
-
self.extension = "ttl"
|
|
83
|
-
elif self.rdf_format == "ttl":
|
|
84
|
-
self.rdf_format = "turtle"
|
|
160
|
+
# Set the file extension to match the format
|
|
161
|
+
if self.file_format == "turtle":
|
|
85
162
|
self.extension = "ttl"
|
|
86
163
|
else:
|
|
87
|
-
self.extension = self.
|
|
164
|
+
self.extension = self.file_format
|
|
88
165
|
return True
|
|
89
166
|
|
|
90
167
|
def _write_single_edge_list_to_file(
|
|
@@ -93,8 +170,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
93
170
|
label: str,
|
|
94
171
|
prop_dict: dict,
|
|
95
172
|
):
|
|
96
|
-
"""
|
|
97
|
-
to an RDF file with the given format.
|
|
173
|
+
"""Write a list of BioCypherEdges to an RDF file.
|
|
98
174
|
|
|
99
175
|
Args:
|
|
100
176
|
----
|
|
@@ -110,6 +186,8 @@ class _RDFWriter(_BatchWriter):
|
|
|
110
186
|
bool: The return value. True for success, False otherwise.
|
|
111
187
|
|
|
112
188
|
"""
|
|
189
|
+
# NOTE: prop_dict is not used. Remove in next refactor.
|
|
190
|
+
|
|
113
191
|
if not all(isinstance(n, BioCypherEdge) for n in edge_list):
|
|
114
192
|
logger.error("Edges must be passed as type BioCypherEdge.")
|
|
115
193
|
return False
|
|
@@ -133,27 +211,27 @@ class _RDFWriter(_BatchWriter):
|
|
|
133
211
|
rdf_predicate = rdf_subject + rdf_object
|
|
134
212
|
|
|
135
213
|
edge_label = self.translator.name_sentence_to_pascal(edge.get_label())
|
|
136
|
-
edge_uri = self.
|
|
214
|
+
edge_uri = self.as_uri(edge_label, "biocypher")
|
|
137
215
|
graph.add((edge_uri, RDF.type, RDFS.Class))
|
|
138
216
|
graph.add(
|
|
139
217
|
(
|
|
140
|
-
self.
|
|
218
|
+
self.as_uri(rdf_predicate, "biocypher"),
|
|
141
219
|
RDF.type,
|
|
142
220
|
edge_uri,
|
|
143
221
|
),
|
|
144
222
|
)
|
|
145
223
|
graph.add(
|
|
146
224
|
(
|
|
147
|
-
self.
|
|
148
|
-
self.
|
|
149
|
-
self.
|
|
225
|
+
self.as_uri(rdf_predicate, "biocypher"),
|
|
226
|
+
self.as_uri("subject", "biocypher"),
|
|
227
|
+
self.to_uri(rdf_subject),
|
|
150
228
|
),
|
|
151
229
|
)
|
|
152
230
|
graph.add(
|
|
153
231
|
(
|
|
154
|
-
self.
|
|
155
|
-
self.
|
|
156
|
-
self.
|
|
232
|
+
self.as_uri(rdf_predicate, "biocypher"),
|
|
233
|
+
self.as_uri("object", "biocypher"),
|
|
234
|
+
self.to_uri(rdf_object),
|
|
157
235
|
),
|
|
158
236
|
)
|
|
159
237
|
|
|
@@ -163,10 +241,10 @@ class _RDFWriter(_BatchWriter):
|
|
|
163
241
|
if value:
|
|
164
242
|
self.add_property_to_graph(graph, rdf_predicate, value, key)
|
|
165
243
|
|
|
166
|
-
graph.serialize(destination=file_name, format=self.
|
|
244
|
+
graph.serialize(destination=file_name, format=self.file_format)
|
|
167
245
|
|
|
168
246
|
logger.info(
|
|
169
|
-
f"Writing {len(edge_list)} entries to {label_pascal}.{self.
|
|
247
|
+
f"Writing {len(edge_list)} entries to {label_pascal}.{self.file_format}",
|
|
170
248
|
)
|
|
171
249
|
|
|
172
250
|
return True
|
|
@@ -206,7 +284,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
206
284
|
for obj in rdf_object:
|
|
207
285
|
graph.add(
|
|
208
286
|
(
|
|
209
|
-
self.
|
|
287
|
+
self.to_uri(rdf_subject),
|
|
210
288
|
self.property_to_uri(rdf_predicate),
|
|
211
289
|
Literal(obj),
|
|
212
290
|
),
|
|
@@ -222,7 +300,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
222
300
|
else:
|
|
223
301
|
graph.add(
|
|
224
302
|
(
|
|
225
|
-
self.
|
|
303
|
+
self.to_uri(rdf_subject),
|
|
226
304
|
self.property_to_uri(rdf_predicate),
|
|
227
305
|
Literal(rdf_object),
|
|
228
306
|
),
|
|
@@ -230,14 +308,14 @@ class _RDFWriter(_BatchWriter):
|
|
|
230
308
|
else:
|
|
231
309
|
graph.add(
|
|
232
310
|
(
|
|
233
|
-
self.
|
|
311
|
+
self.to_uri(rdf_subject),
|
|
234
312
|
self.property_to_uri(rdf_predicate),
|
|
235
313
|
Literal(rdf_object),
|
|
236
314
|
),
|
|
237
315
|
)
|
|
238
316
|
|
|
239
317
|
def transform_string_to_list(self, string_list: str) -> list:
|
|
240
|
-
"""
|
|
318
|
+
"""Transform a string representation of a list into a list.
|
|
241
319
|
|
|
242
320
|
Args:
|
|
243
321
|
----
|
|
@@ -257,8 +335,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
257
335
|
prop_dict: dict,
|
|
258
336
|
labels: str,
|
|
259
337
|
):
|
|
260
|
-
"""
|
|
261
|
-
to an RDF file in the specified format.
|
|
338
|
+
"""Write a list of BioCypherNodes to an RDF file.
|
|
262
339
|
|
|
263
340
|
Args:
|
|
264
341
|
----
|
|
@@ -268,11 +345,15 @@ class _RDFWriter(_BatchWriter):
|
|
|
268
345
|
|
|
269
346
|
prop_dict (dict): A dictionary of properties and their types for the node class.
|
|
270
347
|
|
|
348
|
+
labels (str): string of one or several concatenated labels
|
|
349
|
+
|
|
271
350
|
Returns:
|
|
272
351
|
-------
|
|
273
352
|
bool: True if the writing is successful, False otherwise.
|
|
274
353
|
|
|
275
354
|
"""
|
|
355
|
+
# NOTE: labels and prop_dict are not used.
|
|
356
|
+
|
|
276
357
|
if not all(isinstance(n, BioCypherNode) for n in node_list):
|
|
277
358
|
logger.error("Nodes must be passed as type BioCypherNode.")
|
|
278
359
|
return False
|
|
@@ -294,16 +375,16 @@ class _RDFWriter(_BatchWriter):
|
|
|
294
375
|
class_name = self.translator.name_sentence_to_pascal(rdf_object)
|
|
295
376
|
graph.add(
|
|
296
377
|
(
|
|
297
|
-
self.
|
|
378
|
+
self.as_uri(class_name, "biocypher"),
|
|
298
379
|
RDF.type,
|
|
299
380
|
RDFS.Class,
|
|
300
381
|
),
|
|
301
382
|
)
|
|
302
383
|
graph.add(
|
|
303
384
|
(
|
|
304
|
-
self.
|
|
385
|
+
self.to_uri(rdf_subject),
|
|
305
386
|
RDF.type,
|
|
306
|
-
self.
|
|
387
|
+
self.as_uri(class_name, "biocypher"),
|
|
307
388
|
),
|
|
308
389
|
)
|
|
309
390
|
for key, value in properties.items():
|
|
@@ -311,22 +392,24 @@ class _RDFWriter(_BatchWriter):
|
|
|
311
392
|
if value:
|
|
312
393
|
self.add_property_to_graph(graph, rdf_subject, value, key)
|
|
313
394
|
|
|
314
|
-
graph.serialize(destination=file_name, format=self.
|
|
395
|
+
graph.serialize(destination=file_name, format=self.file_format)
|
|
315
396
|
|
|
316
397
|
logger.info(
|
|
317
|
-
f"Writing {len(node_list)} entries to {label_pascal}.{self.
|
|
398
|
+
f"Writing {len(node_list)} entries to {label_pascal}.{self.file_format}",
|
|
318
399
|
)
|
|
319
400
|
|
|
320
401
|
return True
|
|
321
402
|
|
|
322
403
|
def write_nodes(self, nodes, batch_size: int = int(1e6), force: bool = False) -> bool:
|
|
323
|
-
"""
|
|
404
|
+
"""Write nodes in RDF format.
|
|
324
405
|
|
|
325
406
|
Args:
|
|
326
407
|
----
|
|
327
|
-
nodes (list or generator): A list or generator of nodes in
|
|
408
|
+
nodes (list or generator): A list or generator of nodes in
|
|
409
|
+
BioCypherNode format.
|
|
328
410
|
batch_size (int): The number of nodes to write in each batch.
|
|
329
|
-
force (bool): Flag to force the writing even if the output file
|
|
411
|
+
force (bool): Flag to force the writing even if the output file
|
|
412
|
+
already exists.
|
|
330
413
|
|
|
331
414
|
Returns:
|
|
332
415
|
-------
|
|
@@ -334,7 +417,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
334
417
|
|
|
335
418
|
"""
|
|
336
419
|
# check if specified output format is correct
|
|
337
|
-
passed = self._is_rdf_format_supported(self.
|
|
420
|
+
passed = self._is_rdf_format_supported(self.file_format)
|
|
338
421
|
if not passed:
|
|
339
422
|
logger.error("Error while writing node data, wrong RDF format")
|
|
340
423
|
return False
|
|
@@ -350,8 +433,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
350
433
|
edges: list | GeneratorType,
|
|
351
434
|
batch_size: int = int(1e6),
|
|
352
435
|
) -> bool:
|
|
353
|
-
"""
|
|
354
|
-
functions specifying it's edge data.
|
|
436
|
+
"""Write edges in RDF format.
|
|
355
437
|
|
|
356
438
|
Args:
|
|
357
439
|
----
|
|
@@ -365,7 +447,7 @@ class _RDFWriter(_BatchWriter):
|
|
|
365
447
|
|
|
366
448
|
"""
|
|
367
449
|
# check if specified output format is correct
|
|
368
|
-
passed = self._is_rdf_format_supported(self.
|
|
450
|
+
passed = self._is_rdf_format_supported(self.file_format)
|
|
369
451
|
if not passed:
|
|
370
452
|
logger.error("Error while writing edge data, wrong RDF format")
|
|
371
453
|
return False
|
|
@@ -378,7 +460,8 @@ class _RDFWriter(_BatchWriter):
|
|
|
378
460
|
return True
|
|
379
461
|
|
|
380
462
|
def _construct_import_call(self) -> bool:
|
|
381
|
-
"""
|
|
463
|
+
"""Write the import call.
|
|
464
|
+
|
|
382
465
|
This function is not applicable for RDF.
|
|
383
466
|
|
|
384
467
|
Returns
|
|
@@ -389,15 +472,12 @@ class _RDFWriter(_BatchWriter):
|
|
|
389
472
|
return ""
|
|
390
473
|
|
|
391
474
|
def _quote_string(self, value: str) -> str:
|
|
392
|
-
"""
|
|
393
|
-
Quote a string.
|
|
394
|
-
"""
|
|
395
|
-
|
|
475
|
+
"""Quote a string."""
|
|
396
476
|
return f"{self.quote}{value}{self.quote}"
|
|
397
477
|
|
|
398
478
|
def _write_array_string(self, string_list):
|
|
399
|
-
"""
|
|
400
|
-
|
|
479
|
+
"""Write the string representation of an array into a .csv file.
|
|
480
|
+
|
|
401
481
|
This function is not applicable for RDF.
|
|
402
482
|
|
|
403
483
|
Args:
|
|
@@ -412,8 +492,8 @@ class _RDFWriter(_BatchWriter):
|
|
|
412
492
|
return True
|
|
413
493
|
|
|
414
494
|
def _write_node_headers(self):
|
|
415
|
-
"""
|
|
416
|
-
|
|
495
|
+
"""Import properties of a graph entity.
|
|
496
|
+
|
|
417
497
|
This function is not applicable for RDF.
|
|
418
498
|
|
|
419
499
|
Returns
|
|
@@ -424,9 +504,8 @@ class _RDFWriter(_BatchWriter):
|
|
|
424
504
|
return True
|
|
425
505
|
|
|
426
506
|
def _write_edge_headers(self):
|
|
427
|
-
"""
|
|
428
|
-
|
|
429
|
-
containing only the header for this type of edge.
|
|
507
|
+
"""Write a database import-file for a graph entity.
|
|
508
|
+
|
|
430
509
|
This function is not applicable for RDF.
|
|
431
510
|
|
|
432
511
|
Returns
|
|
@@ -436,9 +515,38 @@ class _RDFWriter(_BatchWriter):
|
|
|
436
515
|
"""
|
|
437
516
|
return True
|
|
438
517
|
|
|
439
|
-
def
|
|
440
|
-
"""
|
|
441
|
-
|
|
518
|
+
def as_uri(self, name: str, namespace: str = "") -> str:
|
|
519
|
+
"""Return an RDFlib object with the given namespace as a URI.
|
|
520
|
+
|
|
521
|
+
There is often a default for empty namespaces, which would have been
|
|
522
|
+
loaded with the ontology, and put in `self.namespace` by
|
|
523
|
+
`self._init_namespaces`.
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
----
|
|
527
|
+
name (str): The name to be transformed.
|
|
528
|
+
namespace (str): The namespace to be used.
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
-------
|
|
532
|
+
str: The URI for the given name and namespace.
|
|
533
|
+
|
|
534
|
+
"""
|
|
535
|
+
if namespace in self.namespaces:
|
|
536
|
+
return URIRef(self.namespaces[namespace][name])
|
|
537
|
+
else:
|
|
538
|
+
assert "biocypher" in self.namespaces
|
|
539
|
+
# If no default empty NS, use the biocypher one,
|
|
540
|
+
# which is always there.
|
|
541
|
+
logger.debug(f"I'll consider '{name}' as part of 'biocypher' namespace.")
|
|
542
|
+
return URIRef(self.namespaces["biocypher"][name])
|
|
543
|
+
|
|
544
|
+
def to_uri(self, subject: str) -> str:
|
|
545
|
+
"""Extract the namespace from the given subject.
|
|
546
|
+
|
|
547
|
+
Split the subject's string on ":". Then convert the subject to a
|
|
548
|
+
proper URI, if the namespace is known. If namespace is unknown,
|
|
549
|
+
defaults to the default prefix of the ontology.
|
|
442
550
|
|
|
443
551
|
Args:
|
|
444
552
|
----
|
|
@@ -449,21 +557,37 @@ class _RDFWriter(_BatchWriter):
|
|
|
449
557
|
str: The corresponding URI for the subject.
|
|
450
558
|
|
|
451
559
|
"""
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
560
|
+
pref_id = subject.split(":")
|
|
561
|
+
if len(pref_id) == 2:
|
|
562
|
+
pref, id = pref_id
|
|
563
|
+
return self.as_uri(id, pref)
|
|
564
|
+
else:
|
|
565
|
+
return self.as_uri(subject)
|
|
566
|
+
|
|
567
|
+
def find_uri(self, regexp: str) -> str:
|
|
568
|
+
query = f'SELECT DISTINCT ?s WHERE {{ ?s ?p ?o . FILTER regex(str(?s), "{regexp}")}}'
|
|
569
|
+
gen = self.graph.query(query)
|
|
570
|
+
uris = list(gen)
|
|
571
|
+
if len(uris) > 1:
|
|
572
|
+
logger.warning(
|
|
573
|
+
f"Found several terms matching `{regexp}`, I will consider only the first one: `{uris[0][0]}`",
|
|
574
|
+
)
|
|
575
|
+
logger.debug("\tothers:")
|
|
576
|
+
for u in uris[1:]:
|
|
577
|
+
logger.debug(f"\t{u[0]}")
|
|
578
|
+
if uris:
|
|
579
|
+
logger.debug(f"Found {len(uris)} terms, returning: `{uris[0][0]}`")
|
|
580
|
+
return uris[0][0]
|
|
581
|
+
else:
|
|
582
|
+
logger.debug(f"Found no term matching: `{query}`")
|
|
583
|
+
return None
|
|
461
584
|
|
|
462
585
|
def property_to_uri(self, property_name: str) -> dict[str, str]:
|
|
463
|
-
"""
|
|
586
|
+
"""Convert a property name to its corresponding URI.
|
|
464
587
|
|
|
465
|
-
This function takes a property name and searches for its corresponding
|
|
466
|
-
It first checks the core namespaces for
|
|
588
|
+
This function takes a property name and searches for its corresponding
|
|
589
|
+
URI in various namespaces. It first checks the core namespaces for
|
|
590
|
+
rdflib, including owl, rdf, rdfs, xsd, and xml.
|
|
467
591
|
|
|
468
592
|
Args:
|
|
469
593
|
----
|
|
@@ -501,16 +625,16 @@ class _RDFWriter(_BatchWriter):
|
|
|
501
625
|
# If the input is not found in any of the namespaces, it returns
|
|
502
626
|
# the corresponding URI from the biocypher namespace.
|
|
503
627
|
# TODO: give a warning and try to prevent this option altogether
|
|
504
|
-
return self.
|
|
628
|
+
return self.as_uri(property_name, "biocypher")
|
|
505
629
|
|
|
506
630
|
def _init_namespaces(self, graph: Graph):
|
|
507
631
|
"""Initialise the namespaces for the RDF graph.
|
|
508
632
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
standard namespace
|
|
513
|
-
|
|
633
|
+
This function adds the biocypher standard namespace to the `namespaces`
|
|
634
|
+
attribute of the class. If `namespaces` is empty, it sets it to the
|
|
635
|
+
biocypher standard namespace. Otherwise, it merges the biocypher
|
|
636
|
+
standard namespace with the namespaces defined in the
|
|
637
|
+
biocypher_config.yaml.
|
|
514
638
|
|
|
515
639
|
Args:
|
|
516
640
|
----
|
|
@@ -521,14 +645,27 @@ class _RDFWriter(_BatchWriter):
|
|
|
521
645
|
None
|
|
522
646
|
|
|
523
647
|
"""
|
|
524
|
-
#
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
self.
|
|
534
|
-
|
|
648
|
+
# Bind and keep the biocypher namespace.
|
|
649
|
+
bcns = Namespace("https://biocypher.org/biocypher#")
|
|
650
|
+
bck = "biocypher"
|
|
651
|
+
self.namespaces = {bck: bcns}
|
|
652
|
+
graph.bind(bck, bcns)
|
|
653
|
+
|
|
654
|
+
# Keep track of namespaces loaded with the ontologies in the given graph.
|
|
655
|
+
logger.debug("Bind namespaces:")
|
|
656
|
+
for prefix, ns in graph.namespaces():
|
|
657
|
+
if prefix in self.namespaces and str(ns) != str(self.namespaces[prefix]):
|
|
658
|
+
logger.warning(
|
|
659
|
+
f"Namespace '{prefix}' was already loaded"
|
|
660
|
+
f"as '{self.namespaces[prefix]}',"
|
|
661
|
+
f"I will overwrite it with '{ns}'.",
|
|
662
|
+
)
|
|
663
|
+
logger.debug(f"\t'{prefix}'\t=>\t'{ns}'")
|
|
664
|
+
self.namespaces[prefix] = Namespace(ns)
|
|
665
|
+
|
|
666
|
+
# Bind and keep the namespaces given in the config.
|
|
667
|
+
for prefix, ns in self.rdf_namespaces.items():
|
|
668
|
+
assert prefix not in self.namespaces
|
|
669
|
+
self.namespaces[prefix] = Namespace(ns)
|
|
670
|
+
logger.debug(f"\t'{prefix}'\t->\t{ns}")
|
|
671
|
+
graph.bind(prefix, self.namespaces[prefix])
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
biocypher/__init__.py,sha256=-iq15Q3LhB8WBQjnD15ghszEdaqJ1XvQIF5IaF88KNQ,783
|
|
2
2
|
biocypher/_config/__init__.py,sha256=ku86JZaRrzW5NEz5EXsCRktFwxS0Yo2dwX4UuazdfA4,3416
|
|
3
|
-
biocypher/_config/biocypher_config.yaml,sha256=
|
|
3
|
+
biocypher/_config/biocypher_config.yaml,sha256=nrlwSW7YP8qTZYm-HvWdTCZy81jnd2FkIGWH8Se2MzI,3651
|
|
4
4
|
biocypher/_config/test_config.yaml,sha256=Np8jeS5_EP6HHOvMKb7B_Tkyqd5YaYlYz_DVsXypt-A,119
|
|
5
5
|
biocypher/_config/test_schema_config.yaml,sha256=D1600WgEj3iTXrumVU9LIivJHJO36iaxfkOgyam9zVU,3129
|
|
6
6
|
biocypher/_config/test_schema_config_disconnected.yaml,sha256=Qm8FLxEn2spHcyj_5F859KjcDvKSxNhxDvi4b4LLkvQ,68
|
|
7
7
|
biocypher/_config/test_schema_config_extended.yaml,sha256=wn3A76142hhjnImhMF6RODbCFESTJ2TtPvcFdIFsAT0,3309
|
|
8
|
-
biocypher/_core.py,sha256=
|
|
8
|
+
biocypher/_core.py,sha256=S8frW62bc0V9M6gwU5l_D2ESa-7xwbOTffRuPqbIbT0,27382
|
|
9
9
|
biocypher/_create.py,sha256=QsvXrwEQ8k0uNXvCG06UKejvw-QsJwzSaumrBjx9n1k,9884
|
|
10
10
|
biocypher/_deduplicate.py,sha256=rtglcaLRaVzNjLtaPwTGP8VvCM4PHYQ5CZ-cm32CrKQ,4840
|
|
11
11
|
biocypher/_get.py,sha256=2kjiBFu_onGUhgOpV7IMfGX67jyY1rpHg_GWncEUdrc,13278
|
|
12
12
|
biocypher/_logger.py,sha256=y9dh3SPJOCWXnkFSYSK7aj_-pB7zlAkNCf43Dp1lt74,2941
|
|
13
13
|
biocypher/_mapping.py,sha256=ntspG2C_NaQODhWTBFk0CDvolkOCjtqlQ9E-NkJAuTg,9030
|
|
14
|
-
biocypher/_metadata.py,sha256=
|
|
14
|
+
biocypher/_metadata.py,sha256=Eop3cijNQBsHWeOO8zbBmN_2ICJutZziDPvWppV474M,1415
|
|
15
15
|
biocypher/_misc.py,sha256=N8aDg8j3EEcKf9ZRqzSNruUEUK4RixCy1vQ1V4maGxk,6079
|
|
16
|
-
biocypher/_ontology.py,sha256=
|
|
17
|
-
biocypher/_translate.py,sha256=
|
|
16
|
+
biocypher/_ontology.py,sha256=lipZxU3aj6zrTbBrJZmCW6IRCuz-KQG3AfbYCVq6aFE,33133
|
|
17
|
+
biocypher/_translate.py,sha256=9E19eLRL0VnxxDuiNhZ5vu54XyKXnfLuBhCgNcL9yAE,17000
|
|
18
18
|
biocypher/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
biocypher/output/connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
biocypher/output/connect/_get_connector.py,sha256=r-DeDnvGyFEWOd9bXaXQOgVws9rgWz5kwwdzbmt5eKk,1038
|
|
@@ -25,19 +25,20 @@ biocypher/output/in_memory/_in_memory_kg.py,sha256=g1TPN8PkeAyXbrRuTAjshqC8voI6E
|
|
|
25
25
|
biocypher/output/in_memory/_networkx.py,sha256=cSOSAreP7S3oeGT6noZ1kAIvSnkVnU3NUp1OY4yqzn0,1515
|
|
26
26
|
biocypher/output/in_memory/_pandas.py,sha256=Ot2jbK5t_YLHqw0BUv9Z_qWNy9r6IX1LYEyejOSJzos,3288
|
|
27
27
|
biocypher/output/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
biocypher/output/write/_batch_writer.py,sha256=
|
|
29
|
-
biocypher/output/write/_get_writer.py,sha256=
|
|
28
|
+
biocypher/output/write/_batch_writer.py,sha256=_Dao7z4KN0Uhr86oOOWYEDrIUikR7T0v1SJC2Btd8Y4,38745
|
|
29
|
+
biocypher/output/write/_get_writer.py,sha256=JozRWCMhvh65aQAlcGiiD5x3Nl1HSW8mK1Zf2nTSOzI,4385
|
|
30
30
|
biocypher/output/write/_writer.py,sha256=y0dWI-RyQdrBLr9Fs91Y9KcCMjnlCaKJT0eWsIS2hG4,7158
|
|
31
31
|
biocypher/output/write/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
-
biocypher/output/write/graph/_arangodb.py,sha256=
|
|
33
|
-
biocypher/output/write/graph/_neo4j.py,sha256=
|
|
32
|
+
biocypher/output/write/graph/_arangodb.py,sha256=xue3hm_DVB5pMR5qqfGXlXll3RpILA0tXos2J-as1-E,7906
|
|
33
|
+
biocypher/output/write/graph/_neo4j.py,sha256=EK5gqQNyVMYfpH1DaDTtGfRKiiq4jx5DLtYCCjY-jbY,12081
|
|
34
34
|
biocypher/output/write/graph/_networkx.py,sha256=2WYkw5ZM3Bp236iwAxEAp3A1DxHKT4_hEPNMUKvPHp4,2320
|
|
35
|
-
biocypher/output/write/graph/
|
|
35
|
+
biocypher/output/write/graph/_owl.py,sha256=2DlxQuAXGnCW068N8bPgADjk_LFhU9D_LJV5lWQeu4A,21333
|
|
36
|
+
biocypher/output/write/graph/_rdf.py,sha256=1TgECkoTHFX0eXtWc_-pr9G55AesReGlh7rg5zvUj5w,21925
|
|
36
37
|
biocypher/output/write/relational/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
38
|
biocypher/output/write/relational/_csv.py,sha256=m0BSQXts88Qu5AEvoIgnwRz54ia38g4VN3PaA3LCYM8,2807
|
|
38
39
|
biocypher/output/write/relational/_postgresql.py,sha256=RckQJBiuwvDmHAyXxS8zCavYqDecHHWW_piofurokfQ,11965
|
|
39
40
|
biocypher/output/write/relational/_sqlite.py,sha256=BuGWOeeNA83lbUvjpkzqcR9_baWLsbfmLXBKe4O1EPE,2105
|
|
40
|
-
biocypher-0.
|
|
41
|
-
biocypher-0.
|
|
42
|
-
biocypher-0.
|
|
43
|
-
biocypher-0.
|
|
41
|
+
biocypher-0.9.1.dist-info/LICENSE,sha256=oejgxuxyjSnyPw3YPloz6-dCBB_nYizJ4jDQnr-xZUU,1082
|
|
42
|
+
biocypher-0.9.1.dist-info/METADATA,sha256=Qjy-T-iNXAn0wye7-NbLivn7ZPz80ZrOu8utum7_4vk,10643
|
|
43
|
+
biocypher-0.9.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
44
|
+
biocypher-0.9.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|