biocypher 0.9.2__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biocypher/__init__.py +8 -0
- biocypher/_core.py +10 -4
- biocypher/_get.py +129 -46
- biocypher/_graph.py +819 -0
- biocypher/_metadata.py +32 -12
- biocypher/_translate.py +5 -8
- biocypher/_workflow.py +798 -0
- biocypher/output/connect/_get_connector.py +24 -7
- biocypher/output/connect/_neo4j_driver.py +55 -9
- biocypher/output/connect/_neo4j_driver_wrapper.py +1317 -0
- biocypher/output/in_memory/_airr.py +499 -0
- biocypher/output/in_memory/_get_in_memory_kg.py +25 -13
- biocypher/output/in_memory/_in_memory_kg.py +57 -0
- biocypher/output/in_memory/_pandas.py +1 -59
- biocypher/output/templates/powershell_template.ps1 +60 -0
- biocypher/output/write/_batch_writer.py +4 -4
- biocypher/output/write/_get_writer.py +5 -0
- biocypher/output/write/graph/_airr.py +32 -0
- biocypher/output/write/graph/_neo4j.py +91 -5
- biocypher/output/write/relational/_postgresql.py +37 -27
- {biocypher-0.9.2.dist-info → biocypher-0.12.3.dist-info}/METADATA +32 -28
- {biocypher-0.9.2.dist-info → biocypher-0.12.3.dist-info}/RECORD +31 -24
- {biocypher-0.9.2.dist-info → biocypher-0.12.3.dist-info}/WHEEL +1 -1
- biocypher-0.12.3.dist-info/licenses/LICENSE +202 -0
- biocypher-0.12.3.dist-info/licenses/NOTICE +9 -0
- biocypher-0.9.2.dist-info/LICENSE +0 -21
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
|
|
3
|
-
from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
4
3
|
from biocypher.output.in_memory._in_memory_kg import _InMemoryKG
|
|
5
4
|
|
|
6
5
|
|
|
@@ -20,65 +19,8 @@ class PandasKG(_InMemoryKG):
|
|
|
20
19
|
def add_edges(self, edges):
|
|
21
20
|
self.add_tables(edges)
|
|
22
21
|
|
|
23
|
-
def _separate_entity_types(self, entities):
|
|
24
|
-
"""
|
|
25
|
-
Given mixed iterable of BioCypher objects, separate them into lists by
|
|
26
|
-
type. Also deduplicates using the `Deduplicator` instance.
|
|
27
|
-
"""
|
|
28
|
-
lists = {}
|
|
29
|
-
for entity in entities:
|
|
30
|
-
if (
|
|
31
|
-
not isinstance(entity, BioCypherNode)
|
|
32
|
-
and not isinstance(entity, BioCypherEdge)
|
|
33
|
-
and not isinstance(entity, BioCypherRelAsNode)
|
|
34
|
-
):
|
|
35
|
-
raise TypeError(
|
|
36
|
-
"Expected a BioCypherNode / BioCypherEdge / " f"BioCypherRelAsNode, got {type(entity)}."
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
if isinstance(entity, BioCypherNode):
|
|
40
|
-
seen = self.deduplicator.node_seen(entity)
|
|
41
|
-
elif isinstance(entity, BioCypherEdge):
|
|
42
|
-
seen = self.deduplicator.edge_seen(entity)
|
|
43
|
-
elif isinstance(entity, BioCypherRelAsNode):
|
|
44
|
-
seen = self.deduplicator.rel_as_node_seen(entity)
|
|
45
|
-
|
|
46
|
-
if seen:
|
|
47
|
-
continue
|
|
48
|
-
|
|
49
|
-
if isinstance(entity, BioCypherRelAsNode):
|
|
50
|
-
node = entity.get_node()
|
|
51
|
-
source_edge = entity.get_source_edge()
|
|
52
|
-
target_edge = entity.get_target_edge()
|
|
53
|
-
|
|
54
|
-
_type = node.get_type()
|
|
55
|
-
if _type not in lists:
|
|
56
|
-
lists[_type] = []
|
|
57
|
-
lists[_type].append(node)
|
|
58
|
-
|
|
59
|
-
_source_type = source_edge.get_type()
|
|
60
|
-
if _source_type not in lists:
|
|
61
|
-
lists[_source_type] = []
|
|
62
|
-
lists[_source_type].append(source_edge)
|
|
63
|
-
|
|
64
|
-
_target_type = target_edge.get_type()
|
|
65
|
-
if _target_type not in lists:
|
|
66
|
-
lists[_target_type] = []
|
|
67
|
-
lists[_target_type].append(target_edge)
|
|
68
|
-
continue
|
|
69
|
-
|
|
70
|
-
_type = entity.get_type()
|
|
71
|
-
if _type not in lists:
|
|
72
|
-
lists[_type] = []
|
|
73
|
-
lists[_type].append(entity)
|
|
74
|
-
|
|
75
|
-
return lists
|
|
76
|
-
|
|
77
22
|
def add_tables(self, entities):
|
|
78
|
-
"""
|
|
79
|
-
Add Pandas dataframes for each node and edge type in the input.
|
|
80
|
-
"""
|
|
81
|
-
|
|
23
|
+
"""Add Pandas dataframes for each node and edge type in the input."""
|
|
82
24
|
lists = self._separate_entity_types(entities)
|
|
83
25
|
|
|
84
26
|
for _type, _entities in lists.items():
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
$banner = @"
|
|
2
|
+
# ============================================================================== #
|
|
3
|
+
# ======== Import Script for Powershell ======== #
|
|
4
|
+
# ============================================================================== #
|
|
5
|
+
"@
|
|
6
|
+
Write-Host $banner -ForegroundColor Cyan
|
|
7
|
+
Write-Host "[$(Get-Date -Format 'u')] Starting Neo4j import process..." -ForegroundColor Cyan
|
|
8
|
+
|
|
9
|
+
# ================================ #
|
|
10
|
+
# Neo4j Binary Settings #
|
|
11
|
+
# ================================ #
|
|
12
|
+
{neo4j_bin_path}
|
|
13
|
+
Write-Host "[$(Get-Date -Format 'u')] Neo4j bin path set to: $NEO4J_BIN_PATH_WINDOWS"
|
|
14
|
+
|
|
15
|
+
{neo4j_version_check}
|
|
16
|
+
Write-Host "[$(Get-Date -Format 'u')] Detected Neo4j version: $version"
|
|
17
|
+
|
|
18
|
+
$major_version = $version.Trim().Split('.')[0]
|
|
19
|
+
$major = [int]$major_version
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ================================ #
|
|
23
|
+
# Neo4j import arguments #
|
|
24
|
+
# ================================ #
|
|
25
|
+
|
|
26
|
+
if ( $major -lt 5 )
|
|
27
|
+
{{
|
|
28
|
+
$args_neo4j=@(
|
|
29
|
+
@'
|
|
30
|
+
{args_neo4j_v4}
|
|
31
|
+
'@
|
|
32
|
+
)
|
|
33
|
+
Write-Host "[$(Get-Date -Format 'u')] Detected Neo4j v4 - using legacy import command." -ForegroundColor Yellow
|
|
34
|
+
Write-Host "[$(Get-Date -Format 'u')] Args for Neo4j v4:"
|
|
35
|
+
$args_neo4j -split ' ' | ForEach-Object {{ Write-Host "`t$_" }}
|
|
36
|
+
}}
|
|
37
|
+
else
|
|
38
|
+
{{
|
|
39
|
+
$args_neo4j = @(
|
|
40
|
+
@'
|
|
41
|
+
{args_neo4j_v5}
|
|
42
|
+
'@
|
|
43
|
+
)
|
|
44
|
+
Write-Host "[$(Get-Date -Format 'u')] Detected Neo4j v5 or newer - using modern import command." -ForegroundColor Yellow
|
|
45
|
+
Write-Host "[$(Get-Date -Format 'u')] Args for Neo4j >= v5:"
|
|
46
|
+
$args_neo4j -split ' ' | ForEach-Object {{ Write-Host "`t$_" }}
|
|
47
|
+
}}
|
|
48
|
+
|
|
49
|
+
# ================================ #
|
|
50
|
+
# Neo4j-admin import call #
|
|
51
|
+
# ================================ #
|
|
52
|
+
Write-Host "[$(Get-Date -Format 'u')] Running import command..." -ForegroundColor Cyan
|
|
53
|
+
|
|
54
|
+
Invoke-Expression "$NEO4J_BIN_PATH_WINDOWS $args_neo4j"
|
|
55
|
+
if ($LASTEXITCODE -eq 0) {{
|
|
56
|
+
Write-Host "[$(Get-Date -Format 'u')] Import completed successfully!" -ForegroundColor Green
|
|
57
|
+
}} else {{
|
|
58
|
+
Write-Host "[$(Get-Date -Format 'u')] Import failed with exit code $LASTEXITCODE." -ForegroundColor Red
|
|
59
|
+
}}
|
|
60
|
+
Write-Host "[$(Get-Date -Format 'u')] Script finished." -ForegroundColor Cyan
|
|
@@ -162,8 +162,8 @@ class _BatchWriter(_Writer, ABC):
|
|
|
162
162
|
which need to be overwritten by the child classes.
|
|
163
163
|
|
|
164
164
|
Each batch writer instance has a fixed representation that needs to be
|
|
165
|
-
passed at instantiation via the
|
|
166
|
-
also expects an ontology adapter via
|
|
165
|
+
passed at instantiation via the `schema` argument. The instance
|
|
166
|
+
also expects an ontology adapter via `ontology_adapter` to be
|
|
167
167
|
able to convert and extend the hierarchy.
|
|
168
168
|
|
|
169
169
|
Requires the following methods to be overwritten by database-specific
|
|
@@ -289,7 +289,7 @@ class _BatchWriter(_Writer, ABC):
|
|
|
289
289
|
self._labels_orders = ["Alphabetical", "Ascending", "Descending", "Leaves"]
|
|
290
290
|
if labels_order not in self._labels_orders:
|
|
291
291
|
msg = (
|
|
292
|
-
f"
|
|
292
|
+
f"A batch writer 'labels_order' parameter cannot be '{labels_order}',"
|
|
293
293
|
"must be one of: {' ,'.join(self._labels_orders)}",
|
|
294
294
|
)
|
|
295
295
|
raise ValueError(msg)
|
|
@@ -545,7 +545,7 @@ class _BatchWriter(_Writer, ABC):
|
|
|
545
545
|
f"Must be one of {self._labels_orders}"
|
|
546
546
|
)
|
|
547
547
|
raise ValueError(msg)
|
|
548
|
-
# concatenate with array
|
|
548
|
+
# concatenate with array delimiters
|
|
549
549
|
all_labels = self._write_array_string(all_labels)
|
|
550
550
|
else:
|
|
551
551
|
all_labels = self.translator.name_sentence_to_pascal(label)
|
|
@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING
|
|
|
9
9
|
from biocypher._config import config as _config
|
|
10
10
|
from biocypher._logger import logger
|
|
11
11
|
from biocypher.output.write._batch_writer import _BatchWriter
|
|
12
|
+
from biocypher.output.write.graph._airr import _AirrWriter
|
|
12
13
|
from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
|
|
13
14
|
from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
|
|
14
15
|
from biocypher.output.write.graph._networkx import _NetworkXWriter
|
|
@@ -50,6 +51,7 @@ DBMS_TO_CLASS = {
|
|
|
50
51
|
"Tabular": _PandasCSVWriter,
|
|
51
52
|
"networkx": _NetworkXWriter,
|
|
52
53
|
"NetworkX": _NetworkXWriter,
|
|
54
|
+
"airr": _AirrWriter,
|
|
53
55
|
}
|
|
54
56
|
|
|
55
57
|
|
|
@@ -93,6 +95,8 @@ def get_writer(
|
|
|
93
95
|
raise ValueError(msg)
|
|
94
96
|
|
|
95
97
|
if writer is not None:
|
|
98
|
+
# FIXME: passing dbms_config as kwargs would ensure that we pass all config by default.
|
|
99
|
+
# TODO: to do that, config options names need to be aligned first.
|
|
96
100
|
return writer(
|
|
97
101
|
translator=translator,
|
|
98
102
|
deduplicator=deduplicator,
|
|
@@ -105,6 +109,7 @@ def get_writer(
|
|
|
105
109
|
import_call_file_prefix=dbms_config.get("import_call_file_prefix"),
|
|
106
110
|
wipe=dbms_config.get("wipe"),
|
|
107
111
|
strict_mode=strict_mode,
|
|
112
|
+
labels_order=dbms_config.get("labels_order"), # batch writer
|
|
108
113
|
skip_bad_relationships=dbms_config.get("skip_bad_relationships"), # neo4j
|
|
109
114
|
skip_duplicate_nodes=dbms_config.get("skip_duplicate_nodes"), # neo4j
|
|
110
115
|
db_user=dbms_config.get("user"), # psql
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Module to provide the AnnData writer class for BioCypher."""
|
|
2
|
+
|
|
3
|
+
from biocypher._logger import logger
|
|
4
|
+
from biocypher.output.write._writer import _Writer
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class _AirrWriter(_Writer):
|
|
8
|
+
"""A minimal placeholder writer class that implements the required methods
|
|
9
|
+
but performs no actual writing operations, since there is an existing anndata native writer functionality
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, *args, **kwargs):
|
|
13
|
+
super().__init__(*args, **kwargs)
|
|
14
|
+
logger.info("Placeholder writer initialized")
|
|
15
|
+
|
|
16
|
+
def _write_node_data(self, nodes) -> bool:
|
|
17
|
+
"""Required implementation that does nothing with nodes."""
|
|
18
|
+
logger.info("Placeholder: Node data received but not processed")
|
|
19
|
+
return True
|
|
20
|
+
|
|
21
|
+
def _write_edge_data(self, edges) -> bool:
|
|
22
|
+
"""Required implementation that does nothing with edges."""
|
|
23
|
+
logger.info("Placeholder: Edge data received but not processed")
|
|
24
|
+
return True
|
|
25
|
+
|
|
26
|
+
def _construct_import_call(self) -> str:
|
|
27
|
+
"""Return a placeholder import script."""
|
|
28
|
+
return "# This is a placeholder import script\nprint('No actual import functionality implemented')"
|
|
29
|
+
|
|
30
|
+
def _get_import_script_name(self) -> str:
|
|
31
|
+
"""Return a placeholder script name."""
|
|
32
|
+
return "placeholder_import.py"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Module to provide the Neo4j writer class."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import sys
|
|
4
5
|
|
|
5
6
|
from biocypher._logger import logger
|
|
6
7
|
from biocypher.output.write._batch_writer import _BatchWriter, parse_label
|
|
@@ -226,7 +227,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
226
227
|
self.translator.ontology.mapping.extended_schema.get( # (seems to not work with 'not')
|
|
227
228
|
schema_label,
|
|
228
229
|
).get("use_id")
|
|
229
|
-
|
|
230
|
+
is False
|
|
230
231
|
):
|
|
231
232
|
skip_id = True
|
|
232
233
|
|
|
@@ -260,12 +261,26 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
260
261
|
|
|
261
262
|
Returns
|
|
262
263
|
-------
|
|
263
|
-
str: The name of the import script (ending in .sh)
|
|
264
|
+
str: The name of the import script (ending in .sh or .ps1 depending on OS)
|
|
264
265
|
|
|
265
266
|
"""
|
|
267
|
+
if sys.platform.startswith("win"):
|
|
268
|
+
return "neo4j-admin-import-call.ps1"
|
|
266
269
|
return "neo4j-admin-import-call.sh"
|
|
267
270
|
|
|
268
271
|
def _construct_import_call(self) -> str:
|
|
272
|
+
"""Construct the import call script for Neo4j admin import.
|
|
273
|
+
|
|
274
|
+
Returns
|
|
275
|
+
-------
|
|
276
|
+
str: The import call script.
|
|
277
|
+
|
|
278
|
+
"""
|
|
279
|
+
if sys.platform.startswith("win"):
|
|
280
|
+
return self._construct_import_call_powershell()
|
|
281
|
+
return self._construct_import_call_bash()
|
|
282
|
+
|
|
283
|
+
def _construct_import_call_bash(self) -> str:
|
|
269
284
|
"""Function to construct the import call detailing folder and
|
|
270
285
|
individual node and edge headers and data files, as well as
|
|
271
286
|
delimiters and database name. Built after all data has been
|
|
@@ -278,9 +293,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
278
293
|
"""
|
|
279
294
|
import_call_neo4j_v4 = self._get_import_call("import", "--database=", "--force=")
|
|
280
295
|
import_call_neo4j_v5 = self._get_import_call("database import full", "", "--overwrite-destination=")
|
|
281
|
-
neo4j_version_check = (
|
|
282
|
-
f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
|
|
283
|
-
)
|
|
296
|
+
neo4j_version_check = f"version=$({self.import_call_bin_prefix}neo4j-admin --version | cut -d '.' -f 1)"
|
|
284
297
|
|
|
285
298
|
import_script = (
|
|
286
299
|
f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; "
|
|
@@ -288,6 +301,47 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
288
301
|
)
|
|
289
302
|
return import_script
|
|
290
303
|
|
|
304
|
+
def _construct_import_call_powershell(self) -> str:
|
|
305
|
+
"""Construct the import call script for Neo4j admin import (PowerShell).
|
|
306
|
+
|
|
307
|
+
Returns
|
|
308
|
+
-------
|
|
309
|
+
str: PowerShell script for Neo4j admin import.
|
|
310
|
+
|
|
311
|
+
"""
|
|
312
|
+
# Path to the PowerShell template
|
|
313
|
+
template_path = os.path.join(
|
|
314
|
+
os.path.dirname(os.path.abspath(__file__)),
|
|
315
|
+
"..",
|
|
316
|
+
"..",
|
|
317
|
+
"templates",
|
|
318
|
+
"powershell_template.ps1",
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# Read the template file
|
|
322
|
+
with open(template_path, encoding="utf-8") as f:
|
|
323
|
+
template = f.read()
|
|
324
|
+
|
|
325
|
+
# Prepare the dynamic components for the template
|
|
326
|
+
import_call_neo4j_v4 = self._get_import_call_windows("import", "--database=", "--force=")
|
|
327
|
+
import_call_neo4j_v5 = self._get_import_call_windows("database import full", "", "--overwrite-destination=")
|
|
328
|
+
|
|
329
|
+
# Prepare the version check command
|
|
330
|
+
neo4j_version_check = (
|
|
331
|
+
f"$version = & powershell -NoProfile -ExecutionPolicy Bypass -File "
|
|
332
|
+
f'"{self.import_call_bin_prefix}neo4j-admin.ps1" --version'
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Fill in the template with the dynamic components
|
|
336
|
+
import_script = template.format(
|
|
337
|
+
neo4j_bin_path=f'$NEO4J_BIN_PATH_WINDOWS = "{self.import_call_bin_prefix}neo4j-admin.ps1"',
|
|
338
|
+
neo4j_version_check=neo4j_version_check,
|
|
339
|
+
args_neo4j_v4=import_call_neo4j_v4,
|
|
340
|
+
args_neo4j_v5=import_call_neo4j_v5,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
return import_script
|
|
344
|
+
|
|
291
345
|
def _get_import_call(self, import_cmd: str, database_cmd: str, wipe_cmd: str) -> str:
|
|
292
346
|
"""Get parametrized import call for Neo4j 4 or 5+.
|
|
293
347
|
|
|
@@ -331,3 +385,35 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
331
385
|
import_call += f'--relationships="{header_path},{parts_path}" '
|
|
332
386
|
|
|
333
387
|
return import_call
|
|
388
|
+
|
|
389
|
+
def _get_import_call_windows(self, import_cmd: str, database_cmd: str, wipe_cmd: str) -> str:
|
|
390
|
+
"""Get parametrized import call for Neo4j 4 or 5+ (Windows).
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
----
|
|
394
|
+
import_cmd (str): The import command to use.
|
|
395
|
+
database_cmd (str): The database command to use.
|
|
396
|
+
wipe_cmd (str): The wipe command to use.
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
-------
|
|
400
|
+
str: The import call for Windows.
|
|
401
|
+
|
|
402
|
+
"""
|
|
403
|
+
import_call = []
|
|
404
|
+
import_call.append(f"{import_cmd} ")
|
|
405
|
+
import_call.append(f"{database_cmd}{self.db_name} ")
|
|
406
|
+
import_call.append(f'--delimiter="{self.escaped_delim}" ')
|
|
407
|
+
import_call.append(f'--array-delimiter="{self.escaped_adelim}" ')
|
|
408
|
+
import_call.append(f'--quote="{self.quote}" ' if self.quote == "'" else f"--quote='{self.quote}' ")
|
|
409
|
+
import_call.append(f"{wipe_cmd}true " if self.wipe else "")
|
|
410
|
+
import_call.append("--skip-bad-relationships=true " if self.skip_bad_relationships else "")
|
|
411
|
+
import_call.append("--skip-duplicate-nodes=true " if self.skip_duplicate_nodes else "")
|
|
412
|
+
import_call.extend(
|
|
413
|
+
f'--nodes="{header_path},{parts_path}" ' for header_path, parts_path in self.import_call_nodes
|
|
414
|
+
)
|
|
415
|
+
import_call.extend(
|
|
416
|
+
f'--relationships="{header_path},{parts_path}" ' for header_path, parts_path in self.import_call_edges
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
return "".join(import_call)
|
|
@@ -6,7 +6,8 @@ from biocypher.output.write._batch_writer import _BatchWriter
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class _PostgreSQLBatchWriter(_BatchWriter):
|
|
9
|
-
"""
|
|
9
|
+
"""Write node and edge representations for PostgreSQL.
|
|
10
|
+
|
|
10
11
|
Class for writing node and edge representations to disk using the
|
|
11
12
|
format specified by PostgreSQL for the use of "COPY FROM...". Each batch
|
|
12
13
|
writer instance has a fixed representation that needs to be passed
|
|
@@ -39,12 +40,13 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
39
40
|
self._copy_from_csv_commands = set()
|
|
40
41
|
super().__init__(*args, **kwargs)
|
|
41
42
|
|
|
42
|
-
def _get_default_import_call_bin_prefix(self):
|
|
43
|
-
"""
|
|
44
|
-
Method to provide the default string for the import call bin prefix.
|
|
43
|
+
def _get_default_import_call_bin_prefix(self) -> str:
|
|
44
|
+
"""Provide the default string for the import call bin prefix.
|
|
45
45
|
|
|
46
|
-
Returns
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
47
48
|
str: The default location for the psql command
|
|
49
|
+
|
|
48
50
|
"""
|
|
49
51
|
return ""
|
|
50
52
|
|
|
@@ -56,33 +58,36 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
56
58
|
return "VARCHAR"
|
|
57
59
|
|
|
58
60
|
def _quote_string(self, value: str) -> str:
|
|
59
|
-
"""
|
|
60
|
-
Quote a string.
|
|
61
|
-
"""
|
|
62
|
-
|
|
61
|
+
"""Quote a string."""
|
|
63
62
|
return f"{self.quote}{value}{self.quote}"
|
|
64
63
|
|
|
65
64
|
def _write_array_string(self, string_list) -> str:
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
|
|
65
|
+
"""Write the string representation of an array into a .csv file.
|
|
66
|
+
|
|
67
|
+
Abstract method to output.write the string representation of an array
|
|
68
|
+
into a .csv file as required by the postgresql COPY command, with
|
|
69
|
+
'{','}' brackets and ',' separation.
|
|
69
70
|
|
|
70
71
|
Args:
|
|
72
|
+
----
|
|
71
73
|
string_list (list): list of ontology strings
|
|
72
74
|
|
|
73
75
|
Returns:
|
|
76
|
+
-------
|
|
74
77
|
str: The string representation of an array for postgres COPY
|
|
78
|
+
|
|
75
79
|
"""
|
|
76
80
|
string = ",".join(string_list)
|
|
77
81
|
string = f'"{{{string}}}"'
|
|
78
82
|
return string
|
|
79
83
|
|
|
80
84
|
def _get_import_script_name(self) -> str:
|
|
81
|
-
"""
|
|
82
|
-
Returns the name of the psql import script
|
|
85
|
+
"""Return the name of the psql import script.
|
|
83
86
|
|
|
84
|
-
Returns
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
85
89
|
str: The name of the import script (ending in .sh)
|
|
90
|
+
|
|
86
91
|
"""
|
|
87
92
|
return f"{self.db_name}-import-call.sh"
|
|
88
93
|
|
|
@@ -91,14 +96,17 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
91
96
|
string = string.lower()
|
|
92
97
|
return string
|
|
93
98
|
|
|
94
|
-
def _write_node_headers(self):
|
|
95
|
-
"""
|
|
99
|
+
def _write_node_headers(self) -> bool:
|
|
100
|
+
"""Write node header files for PostgreSQL.
|
|
101
|
+
|
|
96
102
|
Writes single CSV file for a graph entity that is represented
|
|
97
103
|
as a node as per the definition in the `schema_config.yaml`,
|
|
98
104
|
containing only the header for this type of node.
|
|
99
105
|
|
|
100
|
-
Returns
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
101
108
|
bool: The return value. True for success, False otherwise.
|
|
109
|
+
|
|
102
110
|
"""
|
|
103
111
|
# load headers from data parse
|
|
104
112
|
if not self.node_property_dict:
|
|
@@ -158,7 +166,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
158
166
|
)
|
|
159
167
|
|
|
160
168
|
self._copy_from_csv_commands.add(
|
|
161
|
-
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
|
|
169
|
+
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;",
|
|
162
170
|
)
|
|
163
171
|
|
|
164
172
|
# add file path to import statement
|
|
@@ -175,13 +183,14 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
175
183
|
return True
|
|
176
184
|
|
|
177
185
|
def _write_edge_headers(self):
|
|
178
|
-
"""
|
|
179
|
-
Writes single CSV file for a graph entity that is represented
|
|
186
|
+
"""Writes single CSV file for a graph entity that is represented
|
|
180
187
|
as an edge as per the definition in the `schema_config.yaml`,
|
|
181
188
|
containing only the header for this type of edge.
|
|
182
189
|
|
|
183
|
-
Returns
|
|
190
|
+
Returns
|
|
191
|
+
-------
|
|
184
192
|
bool: The return value. True for success, False otherwise.
|
|
193
|
+
|
|
185
194
|
"""
|
|
186
195
|
# load headers from data parse
|
|
187
196
|
if not self.edge_property_dict:
|
|
@@ -221,7 +230,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
221
230
|
raise ValueError(
|
|
222
231
|
"Column name '_ID' is reserved for internal use, "
|
|
223
232
|
"denoting the relationship ID. Please choose a "
|
|
224
|
-
"different name for your column."
|
|
233
|
+
"different name for your column.",
|
|
225
234
|
)
|
|
226
235
|
|
|
227
236
|
columns.append(f"{col_name} {col_type}")
|
|
@@ -255,7 +264,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
255
264
|
)
|
|
256
265
|
|
|
257
266
|
self._copy_from_csv_commands.add(
|
|
258
|
-
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
|
|
267
|
+
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;",
|
|
259
268
|
)
|
|
260
269
|
|
|
261
270
|
# add file path to import statement
|
|
@@ -272,14 +281,15 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
272
281
|
return True
|
|
273
282
|
|
|
274
283
|
def _construct_import_call(self) -> str:
|
|
275
|
-
"""
|
|
276
|
-
Function to construct the import call detailing folder and
|
|
284
|
+
"""Function to construct the import call detailing folder and
|
|
277
285
|
individual node and edge headers and data files, as well as
|
|
278
286
|
delimiters and database name. Built after all data has been
|
|
279
287
|
processed to ensure that nodes are called before any edges.
|
|
280
288
|
|
|
281
|
-
Returns
|
|
289
|
+
Returns
|
|
290
|
+
-------
|
|
282
291
|
str: a bash command for postgresql import
|
|
292
|
+
|
|
283
293
|
"""
|
|
284
294
|
import_call = ""
|
|
285
295
|
|
|
@@ -1,43 +1,47 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: biocypher
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.3
|
|
4
4
|
Summary: A unifying framework for biomedical research knowledge graphs
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
Project-URL: Homepage, https://biocypher.org
|
|
6
|
+
Project-URL: Documentation, https://biocypher.org
|
|
7
|
+
Project-URL: Repository, https://github.com/biocypher/biocypher
|
|
8
|
+
Project-URL: Issues, https://github.com/biocypher/biocypher/issues
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/biocypher/biocypher/issues
|
|
10
|
+
Author-email: Sebastian Lobentanzer <sebastian.lobentanzer@gmail.com>, Denes Turei <turei.denes@gmail.com>
|
|
11
|
+
License: Apache-2.0
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
License-File: NOTICE
|
|
14
|
+
Keywords: bioinformatics,biomedical-data,graph-database,knowledge-graph
|
|
10
15
|
Classifier: Development Status :: 3 - Alpha
|
|
11
16
|
Classifier: Intended Audience :: Developers
|
|
12
17
|
Classifier: Intended Audience :: Science/Research
|
|
13
|
-
Classifier: License :: OSI Approved ::
|
|
18
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
19
|
Classifier: Natural Language :: English
|
|
15
20
|
Classifier: Operating System :: OS Independent
|
|
16
21
|
Classifier: Programming Language :: Python
|
|
17
|
-
Classifier: Programming Language :: Python :: 3
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
21
22
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
22
|
-
Requires-
|
|
23
|
+
Requires-Python: >=3.10
|
|
23
24
|
Requires-Dist: appdirs
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist: rdflib
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
|
|
33
|
-
|
|
25
|
+
Requires-Dist: more-itertools
|
|
26
|
+
Requires-Dist: networkx>=3.0
|
|
27
|
+
Requires-Dist: pandas>=2.0.1
|
|
28
|
+
Requires-Dist: pooch>=1.7.0
|
|
29
|
+
Requires-Dist: pyyaml>=5.0
|
|
30
|
+
Requires-Dist: rdflib>=6.2.0
|
|
31
|
+
Requires-Dist: toml>=0.10.2
|
|
32
|
+
Requires-Dist: tqdm>=4.65.0
|
|
33
|
+
Requires-Dist: treelib==1.6.4
|
|
34
|
+
Provides-Extra: neo4j
|
|
35
|
+
Requires-Dist: neo4j>=5.0; extra == 'neo4j'
|
|
36
|
+
Provides-Extra: scirpy
|
|
37
|
+
Requires-Dist: scirpy>=0.22.0; extra == 'scirpy'
|
|
34
38
|
Description-Content-Type: text/markdown
|
|
35
39
|
|
|
36
40
|
# BioCypher
|
|
37
41
|
|
|
38
42
|
| | | | |
|
|
39
43
|
| --- | --- | --- | --- |
|
|
40
|
-
| __License__ | [](https://opensource.org/license/apache-2-0) | __Python__ | [](https://www.python.org) |
|
|
41
45
|
| __Package__ | [](https://pypi.org/project/biocypher/) [](https://pepy.tech/project/biocypher) [](https://zenodo.org/doi/10.5281/zenodo.10158203) | __Build status__ | [](https://github.com/biocypher/biocypher/actions/workflows/tests_and_code_quality.yaml) [](https://github.com/biocypher/biocypher/actions/workflows/docs.yaml) |
|
|
42
46
|
| __Tests__ | [](https://github.com/biocypher/biocypher/actions/workflows/tests_and_code_quality.yaml) | __Docker__ | [](https://hub.docker.com/repository/docker/biocypher/base/general) [](https://hub.docker.com/repository/docker/biocypher/base/general) |
|
|
43
47
|
| __Development__ | [](https://github.com/pyOpenSci/software-review/issues/110) [](https://www.repostatus.org/#active) [](https://github.com/pre-commit/pre-commit) [](https://black.readthedocs.io/en/stable/) | __Contributions__ | [](http://makeapullrequest.com) [](CONTRIBUTING.md) [](https://github.com/biopragmatics/bioregistry) |
|
|
@@ -62,7 +66,7 @@ the docs [here](https://biocypher.org).
|
|
|
62
66
|
margin-left: auto;
|
|
63
67
|
margin-right: auto;
|
|
64
68
|
width: 70%;"
|
|
65
|
-
src="docs/
|
|
69
|
+
src="docs/assets/img/graphical-abstract-biocypher.png"
|
|
66
70
|
alt="Graphical Abstract">
|
|
67
71
|
</img>
|
|
68
72
|
|
|
@@ -77,9 +81,10 @@ Board](https://github.com/orgs/biocypher/projects/3/views/2).
|
|
|
77
81
|
|
|
78
82
|
## ⚙️ Installation / Usage
|
|
79
83
|
|
|
80
|
-
Install the package from PyPI using `pip install biocypher`.
|
|
84
|
+
Install the package from PyPI using `pip install biocypher`. For Neo4j online mode
|
|
85
|
+
support, install with `pip install biocypher[neo4j]`. More comprehensive
|
|
81
86
|
installation and configuration instructions can be found
|
|
82
|
-
[here](https://biocypher.org/installation
|
|
87
|
+
[here](https://biocypher.org/BioCypher/installation/).
|
|
83
88
|
|
|
84
89
|
Exemplary usage of BioCypher to build a graph database is shown in our tutorial
|
|
85
90
|
and the various pipelines we have created. You can find these on the [Components
|
|
@@ -112,4 +117,3 @@ as a preprint at https://arxiv.org/abs/2212.13543.
|
|
|
112
117
|
This project has received funding from the European Union’s Horizon 2020
|
|
113
118
|
research and innovation programme under grant agreement No 965193 for DECIDER
|
|
114
119
|
and No 116030 for TransQST.
|
|
115
|
-
|