biocypher 0.5.19__py3-none-any.whl → 0.5.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/_connect.py +6 -12
- biocypher/_core.py +135 -23
- biocypher/_deduplicate.py +72 -30
- biocypher/_get.py +299 -0
- biocypher/_metadata.py +1 -1
- biocypher/_ontology.py +11 -9
- biocypher/_pandas.py +32 -7
- biocypher/_translate.py +29 -26
- biocypher/_write.py +75 -57
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/METADATA +6 -2
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/RECORD +13 -12
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/LICENSE +0 -0
- {biocypher-0.5.19.dist-info → biocypher-0.5.21.dist-info}/WHEEL +0 -0
biocypher/_write.py
CHANGED
|
@@ -125,7 +125,6 @@ class _BatchWriter(ABC):
|
|
|
125
125
|
|
|
126
126
|
def __init__(
|
|
127
127
|
self,
|
|
128
|
-
ontology: "Ontology",
|
|
129
128
|
translator: "Translator",
|
|
130
129
|
deduplicator: "Deduplicator",
|
|
131
130
|
delimiter: str,
|
|
@@ -167,10 +166,6 @@ class _BatchWriter(ABC):
|
|
|
167
166
|
- _get_import_script_name
|
|
168
167
|
|
|
169
168
|
Args:
|
|
170
|
-
ontology:
|
|
171
|
-
Instance of :py:class:`Ontology` to enable translation and
|
|
172
|
-
ontology queries
|
|
173
|
-
|
|
174
169
|
translator:
|
|
175
170
|
Instance of :py:class:`Translator` to enable translation of
|
|
176
171
|
nodes and manipulation of properties.
|
|
@@ -251,8 +246,6 @@ class _BatchWriter(ABC):
|
|
|
251
246
|
self.wipe = wipe
|
|
252
247
|
self.strict_mode = strict_mode
|
|
253
248
|
|
|
254
|
-
self.extended_schema = ontology.extended_schema
|
|
255
|
-
self.ontology = ontology
|
|
256
249
|
self.translator = translator
|
|
257
250
|
self.deduplicator = deduplicator
|
|
258
251
|
self.node_property_dict = {}
|
|
@@ -352,34 +345,34 @@ class _BatchWriter(ABC):
|
|
|
352
345
|
bool: The return value. True for success, False otherwise.
|
|
353
346
|
"""
|
|
354
347
|
passed = False
|
|
355
|
-
# unwrap generator in one step
|
|
356
348
|
edges = list(edges) # force evaluation to handle empty generator
|
|
357
349
|
if edges:
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
],
|
|
366
|
-
)
|
|
367
|
-
if isinstance(e, BioCypherRelAsNode)
|
|
368
|
-
else (None, [e])
|
|
369
|
-
for e in edges
|
|
370
|
-
)
|
|
371
|
-
)
|
|
372
|
-
nod, edg = (list(a) for a in z)
|
|
373
|
-
nod = [n for n in nod if n]
|
|
374
|
-
edg = [val for sublist in edg for val in sublist] # flatten
|
|
350
|
+
nodes_flat = []
|
|
351
|
+
edges_flat = []
|
|
352
|
+
for edge in edges:
|
|
353
|
+
if isinstance(edge, BioCypherRelAsNode):
|
|
354
|
+
# check if relationship has already been written, if so skip
|
|
355
|
+
if self.deduplicator.rel_as_node_seen(edge):
|
|
356
|
+
continue
|
|
375
357
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
358
|
+
nodes_flat.append(edge.get_node())
|
|
359
|
+
edges_flat.append(edge.get_source_edge())
|
|
360
|
+
edges_flat.append(edge.get_target_edge())
|
|
361
|
+
|
|
362
|
+
else:
|
|
363
|
+
# check if relationship has already been written, if so skip
|
|
364
|
+
if self.deduplicator.edge_seen(edge):
|
|
365
|
+
continue
|
|
366
|
+
|
|
367
|
+
edges_flat.append(edge)
|
|
368
|
+
|
|
369
|
+
if nodes_flat and edges_flat:
|
|
370
|
+
passed = self.write_nodes(nodes_flat) and self._write_edge_data(
|
|
371
|
+
edges_flat,
|
|
379
372
|
batch_size,
|
|
380
373
|
)
|
|
381
374
|
else:
|
|
382
|
-
passed = self._write_edge_data(
|
|
375
|
+
passed = self._write_edge_data(edges_flat, batch_size)
|
|
383
376
|
|
|
384
377
|
else:
|
|
385
378
|
# is this a problem? if the generator or list is empty, we
|
|
@@ -451,8 +444,12 @@ class _BatchWriter(ABC):
|
|
|
451
444
|
bin_l[label] = 1
|
|
452
445
|
|
|
453
446
|
# get properties from config if present
|
|
454
|
-
cprops =
|
|
455
|
-
|
|
447
|
+
cprops = (
|
|
448
|
+
self.translator.ontology.mapping.extended_schema.get(
|
|
449
|
+
label
|
|
450
|
+
).get(
|
|
451
|
+
"properties",
|
|
452
|
+
)
|
|
456
453
|
)
|
|
457
454
|
if cprops:
|
|
458
455
|
d = dict(cprops)
|
|
@@ -486,7 +483,7 @@ class _BatchWriter(ABC):
|
|
|
486
483
|
|
|
487
484
|
# get label hierarchy
|
|
488
485
|
# multiple labels:
|
|
489
|
-
all_labels = self.ontology.get_ancestors(label)
|
|
486
|
+
all_labels = self.translator.ontology.get_ancestors(label)
|
|
490
487
|
|
|
491
488
|
if all_labels:
|
|
492
489
|
# convert to pascal case
|
|
@@ -682,10 +679,6 @@ class _BatchWriter(ABC):
|
|
|
682
679
|
# for each label to check for consistency and their type
|
|
683
680
|
# for now, relevant for `int`
|
|
684
681
|
for edge in edges:
|
|
685
|
-
# check for duplicates
|
|
686
|
-
if self.deduplicator.edge_seen(edge):
|
|
687
|
-
continue
|
|
688
|
-
|
|
689
682
|
if not (edge.get_source_id() and edge.get_target_id()):
|
|
690
683
|
logger.error(
|
|
691
684
|
"Edge must have source and target node. "
|
|
@@ -706,13 +699,23 @@ class _BatchWriter(ABC):
|
|
|
706
699
|
# (may not be if it is an edge that carries the
|
|
707
700
|
# "label_as_edge" property)
|
|
708
701
|
cprops = None
|
|
709
|
-
if
|
|
710
|
-
|
|
702
|
+
if (
|
|
703
|
+
label
|
|
704
|
+
in self.translator.ontology.mapping.extended_schema
|
|
705
|
+
):
|
|
706
|
+
cprops = self.translator.ontology.mapping.extended_schema.get(
|
|
707
|
+
label
|
|
708
|
+
).get(
|
|
711
709
|
"properties",
|
|
712
710
|
)
|
|
713
711
|
else:
|
|
714
712
|
# try via "label_as_edge"
|
|
715
|
-
for
|
|
713
|
+
for (
|
|
714
|
+
k,
|
|
715
|
+
v,
|
|
716
|
+
) in (
|
|
717
|
+
self.translator.ontology.mapping.extended_schema.items()
|
|
718
|
+
):
|
|
716
719
|
if isinstance(v, dict):
|
|
717
720
|
if v.get("label_as_edge") == label:
|
|
718
721
|
cprops = v.get("properties")
|
|
@@ -873,9 +876,14 @@ class _BatchWriter(ABC):
|
|
|
873
876
|
|
|
874
877
|
if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
|
|
875
878
|
skip_id = True
|
|
876
|
-
elif not self.extended_schema.get(
|
|
879
|
+
elif not self.translator.ontology.mapping.extended_schema.get(
|
|
880
|
+
label
|
|
881
|
+
):
|
|
877
882
|
# find label in schema by label_as_edge
|
|
878
|
-
for
|
|
883
|
+
for (
|
|
884
|
+
k,
|
|
885
|
+
v,
|
|
886
|
+
) in self.translator.ontology.mapping.extended_schema.items():
|
|
879
887
|
if v.get("label_as_edge") == label:
|
|
880
888
|
schema_label = k
|
|
881
889
|
break
|
|
@@ -884,7 +892,9 @@ class _BatchWriter(ABC):
|
|
|
884
892
|
|
|
885
893
|
if schema_label:
|
|
886
894
|
if (
|
|
887
|
-
self.extended_schema.get(
|
|
895
|
+
self.translator.ontology.mapping.extended_schema.get(
|
|
896
|
+
schema_label
|
|
897
|
+
).get("use_id")
|
|
888
898
|
== False
|
|
889
899
|
):
|
|
890
900
|
skip_id = True
|
|
@@ -1009,6 +1019,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
1009
1019
|
|
|
1010
1020
|
This class inherits from the abstract class "_BatchWriter" and implements the
|
|
1011
1021
|
Neo4j-specific methods:
|
|
1022
|
+
|
|
1012
1023
|
- _write_node_headers
|
|
1013
1024
|
- _write_edge_headers
|
|
1014
1025
|
- _construct_import_call
|
|
@@ -1181,9 +1192,14 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
1181
1192
|
|
|
1182
1193
|
if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
|
|
1183
1194
|
skip_id = True
|
|
1184
|
-
elif not self.extended_schema.get(
|
|
1195
|
+
elif not self.translator.ontology.mapping.extended_schema.get(
|
|
1196
|
+
label
|
|
1197
|
+
):
|
|
1185
1198
|
# find label in schema by label_as_edge
|
|
1186
|
-
for
|
|
1199
|
+
for (
|
|
1200
|
+
k,
|
|
1201
|
+
v,
|
|
1202
|
+
) in self.translator.ontology.mapping.extended_schema.items():
|
|
1187
1203
|
if v.get("label_as_edge") == label:
|
|
1188
1204
|
schema_label = k
|
|
1189
1205
|
break
|
|
@@ -1194,7 +1210,9 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
1194
1210
|
|
|
1195
1211
|
if schema_label:
|
|
1196
1212
|
if (
|
|
1197
|
-
self.extended_schema.get(
|
|
1213
|
+
self.translator.ontology.mapping.extended_schema.get(
|
|
1214
|
+
schema_label
|
|
1215
|
+
).get("use_id")
|
|
1198
1216
|
== False
|
|
1199
1217
|
):
|
|
1200
1218
|
skip_id = True
|
|
@@ -1352,9 +1370,9 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
1352
1370
|
f.write(row)
|
|
1353
1371
|
|
|
1354
1372
|
# add collection from schema config
|
|
1355
|
-
collection = self.extended_schema[
|
|
1356
|
-
|
|
1357
|
-
)
|
|
1373
|
+
collection = self.translator.ontology.mapping.extended_schema[
|
|
1374
|
+
label
|
|
1375
|
+
].get("db_collection_name", None)
|
|
1358
1376
|
|
|
1359
1377
|
# add file path to neo4 admin import statement
|
|
1360
1378
|
# do once for each part file
|
|
@@ -1433,16 +1451,19 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
1433
1451
|
f.write(row)
|
|
1434
1452
|
|
|
1435
1453
|
# add collection from schema config
|
|
1436
|
-
if not self.extended_schema.get(label):
|
|
1437
|
-
for
|
|
1454
|
+
if not self.translator.ontology.mapping.extended_schema.get(label):
|
|
1455
|
+
for (
|
|
1456
|
+
_,
|
|
1457
|
+
v,
|
|
1458
|
+
) in self.translator.ontology.mapping.extended_schema.items():
|
|
1438
1459
|
if v.get("label_as_edge") == label:
|
|
1439
1460
|
collection = v.get("db_collection_name", None)
|
|
1440
1461
|
break
|
|
1441
1462
|
|
|
1442
1463
|
else:
|
|
1443
|
-
collection = self.extended_schema[
|
|
1444
|
-
|
|
1445
|
-
)
|
|
1464
|
+
collection = self.translator.ontology.mapping.extended_schema[
|
|
1465
|
+
label
|
|
1466
|
+
].get("db_collection_name", None)
|
|
1446
1467
|
|
|
1447
1468
|
# add file path to neo4 admin import statement (import call path
|
|
1448
1469
|
# may be different from actual output path)
|
|
@@ -1520,6 +1541,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
1520
1541
|
|
|
1521
1542
|
This class inherits from the abstract class "_BatchWriter" and implements the
|
|
1522
1543
|
PostgreSQL-specific methods:
|
|
1544
|
+
|
|
1523
1545
|
- _write_node_headers
|
|
1524
1546
|
- _write_edge_headers
|
|
1525
1547
|
- _construct_import_call
|
|
@@ -1839,7 +1861,6 @@ DBMS_TO_CLASS = {
|
|
|
1839
1861
|
def get_writer(
|
|
1840
1862
|
dbms: str,
|
|
1841
1863
|
translator: "Translator",
|
|
1842
|
-
ontology: "Ontology",
|
|
1843
1864
|
deduplicator: "Deduplicator",
|
|
1844
1865
|
output_directory: str,
|
|
1845
1866
|
strict_mode: bool,
|
|
@@ -1854,8 +1875,6 @@ def get_writer(
|
|
|
1854
1875
|
|
|
1855
1876
|
translator: the Translator object.
|
|
1856
1877
|
|
|
1857
|
-
ontology: the Ontology object.
|
|
1858
|
-
|
|
1859
1878
|
output_directory: the directory to write the output files to.
|
|
1860
1879
|
|
|
1861
1880
|
strict_mode: whether to use strict mode.
|
|
@@ -1879,7 +1898,6 @@ def get_writer(
|
|
|
1879
1898
|
|
|
1880
1899
|
if writer is not None:
|
|
1881
1900
|
return writer(
|
|
1882
|
-
ontology=ontology,
|
|
1883
1901
|
translator=translator,
|
|
1884
1902
|
deduplicator=deduplicator,
|
|
1885
1903
|
delimiter=dbms_config.get("delimiter"),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biocypher
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.21
|
|
4
4
|
Summary: A unifying framework for biomedical research knowledge graphs
|
|
5
5
|
Home-page: https://github.com/biocypher/biocypher
|
|
6
6
|
License: MIT
|
|
@@ -25,8 +25,10 @@ Requires-Dist: more_itertools
|
|
|
25
25
|
Requires-Dist: neo4j-utils (==0.0.7)
|
|
26
26
|
Requires-Dist: networkx (>=3.0,<4.0)
|
|
27
27
|
Requires-Dist: pandas (>=2.0.1,<3.0.0)
|
|
28
|
+
Requires-Dist: pooch (>=1.7.0,<2.0.0)
|
|
28
29
|
Requires-Dist: rdflib (>=6.2.0,<7.0.0)
|
|
29
30
|
Requires-Dist: stringcase (>=1.2.0,<2.0.0)
|
|
31
|
+
Requires-Dist: tqdm (>=4.65.0,<5.0.0)
|
|
30
32
|
Requires-Dist: treelib (>=1.6.1,<2.0.0)
|
|
31
33
|
Project-URL: Bug Tracker, https://github.com/biocypher/biocypher/issues
|
|
32
34
|
Project-URL: Repository, https://github.com/biocypher/biocypher
|
|
@@ -38,7 +40,9 @@ Description-Content-Type: text/markdown
|
|
|
38
40
|

|
|
39
41
|
[](https://badge.fury.io/py/biocypher)
|
|
40
42
|
[](https://www.repostatus.org/#active)
|
|
41
|
-
](https://github.com/biocypher/biocypher/actions/workflows/ci_cd.yaml)
|
|
44
|
+

|
|
45
|
+
[](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yaml)
|
|
42
46
|
[](https://pepy.tech/project/biocypher)
|
|
43
47
|
[](https://github.com/pre-commit/pre-commit)
|
|
44
48
|
[](http://makeapullrequest.com)
|
|
@@ -5,19 +5,20 @@ biocypher/_config/test_config.yaml,sha256=Np8jeS5_EP6HHOvMKb7B_Tkyqd5YaYlYz_DVsX
|
|
|
5
5
|
biocypher/_config/test_schema_config.yaml,sha256=D1600WgEj3iTXrumVU9LIivJHJO36iaxfkOgyam9zVU,3129
|
|
6
6
|
biocypher/_config/test_schema_config_disconnected.yaml,sha256=Qm8FLxEn2spHcyj_5F859KjcDvKSxNhxDvi4b4LLkvQ,68
|
|
7
7
|
biocypher/_config/test_schema_config_extended.yaml,sha256=wn3A76142hhjnImhMF6RODbCFESTJ2TtPvcFdIFsAT0,3309
|
|
8
|
-
biocypher/_connect.py,sha256=
|
|
9
|
-
biocypher/_core.py,sha256=
|
|
8
|
+
biocypher/_connect.py,sha256=0oSyO6CEIlKL8rHo-HHE7y0FzGfSi4vnEXSDy1TnIUE,12456
|
|
9
|
+
biocypher/_core.py,sha256=cc8iOOAhaByobN6zOwdUm1hZFAJ5CpGpKmQnBIIQrbY,21090
|
|
10
10
|
biocypher/_create.py,sha256=vpUchUdEpWupZi1LgFLxAWMtqoBwnWbP7PwEDUCBS4A,10202
|
|
11
|
-
biocypher/_deduplicate.py,sha256=
|
|
11
|
+
biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,4949
|
|
12
|
+
biocypher/_get.py,sha256=MHjHEqvPr4Z7Ud05qBcUJkR--iZ1SgUvUoft8MfwUic,8996
|
|
12
13
|
biocypher/_logger.py,sha256=soYtz1DiduLFw3XrMnphWWUxeuJqvSof4AYhlafxl08,2933
|
|
13
14
|
biocypher/_mapping.py,sha256=XJZjmXTPnXVkyub1ZU0h3EKXQ2YROaGaJOaGyPMqgy4,9338
|
|
14
|
-
biocypher/_metadata.py,sha256=
|
|
15
|
+
biocypher/_metadata.py,sha256=CHGBWJ8qYrb7QNQO-Fk0ROkRDXHvtFECSoex9GytJ4A,1658
|
|
15
16
|
biocypher/_misc.py,sha256=wsjGVOqBDVM5hxbE_TEaZ69u1kJc8HXwRAtQHUgE8XQ,4545
|
|
16
|
-
biocypher/_ontology.py,sha256=
|
|
17
|
-
biocypher/_pandas.py,sha256=
|
|
18
|
-
biocypher/_translate.py,sha256=
|
|
19
|
-
biocypher/_write.py,sha256=
|
|
20
|
-
biocypher-0.5.
|
|
21
|
-
biocypher-0.5.
|
|
22
|
-
biocypher-0.5.
|
|
23
|
-
biocypher-0.5.
|
|
17
|
+
biocypher/_ontology.py,sha256=pHc4hO8iZx-yg9gzqfBR9khoIni-lKAxWgnRFyNP91E,21530
|
|
18
|
+
biocypher/_pandas.py,sha256=GVCFM68J7yBjh40MpkNVgD8qT1RFMrrIjMOtD3iKsf4,3040
|
|
19
|
+
biocypher/_translate.py,sha256=nj4Y60F0U3JBH36N2dh5pFcC8Ot86rskJ2ChJwje9dI,16494
|
|
20
|
+
biocypher/_write.py,sha256=2ynF-VkvTr8WT2qPt2wji3iupP3WON94TlT6NpfDvCs,67738
|
|
21
|
+
biocypher-0.5.21.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
|
|
22
|
+
biocypher-0.5.21.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
|
|
23
|
+
biocypher-0.5.21.dist-info/METADATA,sha256=wJ1Hnuq_erwEJRMCKA3e7VeUF7cLibnZdcnSCryynx0,9505
|
|
24
|
+
biocypher-0.5.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|