biocypher 0.7.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- {biocypher-0.7.0 → biocypher-0.8.0}/PKG-INFO +1 -1
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_config/biocypher_config.yaml +14 -3
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_metadata.py +1 -1
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/_batch_writer.py +34 -2
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/_writer.py +32 -14
- {biocypher-0.7.0 → biocypher-0.8.0}/pyproject.toml +1 -1
- {biocypher-0.7.0 → biocypher-0.8.0}/LICENSE +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/README.md +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_config/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_config/test_config.yaml +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_config/test_schema_config.yaml +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_config/test_schema_config_disconnected.yaml +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_config/test_schema_config_extended.yaml +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_core.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_create.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_deduplicate.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_get.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_logger.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_mapping.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_misc.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_ontology.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/_translate.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/connect/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/connect/_get_connector.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/connect/_neo4j_driver.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/in_memory/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/in_memory/_get_in_memory_kg.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/in_memory/_in_memory_kg.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/in_memory/_networkx.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/in_memory/_pandas.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/_get_writer.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/graph/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/graph/_arangodb.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/graph/_neo4j.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/graph/_networkx.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/graph/_rdf.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/relational/__init__.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/relational/_csv.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/relational/_postgresql.py +0 -0
- {biocypher-0.7.0 → biocypher-0.8.0}/biocypher/output/write/relational/_sqlite.py +0 -0
|
@@ -30,12 +30,13 @@ biocypher:
|
|
|
30
30
|
# switch_label_and_id: true
|
|
31
31
|
|
|
32
32
|
### Optional parameters ###
|
|
33
|
-
|
|
34
33
|
## Logging
|
|
35
34
|
# Write log to disk
|
|
35
|
+
|
|
36
36
|
log_to_disk: true
|
|
37
37
|
|
|
38
38
|
# Activate more granular logging
|
|
39
|
+
|
|
39
40
|
debug: true
|
|
40
41
|
|
|
41
42
|
# Change the log directory
|
|
@@ -85,6 +86,14 @@ neo4j:
|
|
|
85
86
|
array_delimiter: "|"
|
|
86
87
|
quote_character: "'"
|
|
87
88
|
|
|
89
|
+
# How to write the labels in the export files.
|
|
90
|
+
|
|
91
|
+
labels_order: "Ascending" # Default: From more specific to more generic.
|
|
92
|
+
# Or:
|
|
93
|
+
# labels_order: "Descending" # From more generic to more specific.
|
|
94
|
+
# labels_order: "Alphabetical" # Alphabetically. Legacy option.
|
|
95
|
+
# labels_order: "Leaves" # Only the more specific label.
|
|
96
|
+
|
|
88
97
|
## MultiDB functionality
|
|
89
98
|
## Set to false for using community edition or older versions of Neo4j
|
|
90
99
|
|
|
@@ -102,8 +111,8 @@ neo4j:
|
|
|
102
111
|
|
|
103
112
|
postgresql:
|
|
104
113
|
### PostgreSQL configuration ###
|
|
105
|
-
|
|
106
114
|
# PostgreSQL connection credentials
|
|
115
|
+
|
|
107
116
|
database_name: postgres # DB name
|
|
108
117
|
user: postgres # user name
|
|
109
118
|
password: postgres # password
|
|
@@ -111,6 +120,7 @@ postgresql:
|
|
|
111
120
|
port: 5432 # port
|
|
112
121
|
|
|
113
122
|
# PostgreSQL import batch writer settings
|
|
123
|
+
|
|
114
124
|
quote_character: '"'
|
|
115
125
|
delimiter: '\t'
|
|
116
126
|
# import_call_bin_prefix: '' # path to "psql"
|
|
@@ -122,11 +132,12 @@ rdf:
|
|
|
122
132
|
|
|
123
133
|
sqlite:
|
|
124
134
|
### SQLite configuration ###
|
|
125
|
-
|
|
126
135
|
# SQLite connection credentials
|
|
136
|
+
|
|
127
137
|
database_name: sqlite.db # DB name
|
|
128
138
|
|
|
129
139
|
# SQLite import batch writer settings
|
|
140
|
+
|
|
130
141
|
quote_character: '"'
|
|
131
142
|
delimiter: '\t'
|
|
132
143
|
# import_call_bin_prefix: '' # path to "sqlite3"
|
|
@@ -124,6 +124,7 @@ class _BatchWriter(_Writer, ABC):
|
|
|
124
124
|
db_port: str = None,
|
|
125
125
|
rdf_format: str = None,
|
|
126
126
|
rdf_namespaces: dict = {},
|
|
127
|
+
labels_order: str = "Ascending",
|
|
127
128
|
):
|
|
128
129
|
"""Abtract parent class for writing node and edge representations to disk
|
|
129
130
|
using the format specified by each database type. The database-specific
|
|
@@ -209,6 +210,10 @@ class _BatchWriter(_Writer, ABC):
|
|
|
209
210
|
rdf_namespaces:
|
|
210
211
|
The namespaces for RDF.
|
|
211
212
|
|
|
213
|
+
labels_order:
|
|
214
|
+
The order of labels, to reflect the hierarchy (or not).
|
|
215
|
+
Default: "Ascending" (from more specific to more generic).
|
|
216
|
+
|
|
212
217
|
"""
|
|
213
218
|
super().__init__(
|
|
214
219
|
translator=translator,
|
|
@@ -251,6 +256,15 @@ class _BatchWriter(_Writer, ABC):
|
|
|
251
256
|
|
|
252
257
|
self.parts = {} # dict to store the paths of part files for each label
|
|
253
258
|
|
|
259
|
+
self._labels_orders = ["Alphabetical", "Ascending", "Descending", "Leaves"]
|
|
260
|
+
if labels_order not in self._labels_orders:
|
|
261
|
+
msg = (
|
|
262
|
+
f"neo4j's 'labels_order' parameter cannot be '{labels_order}',"
|
|
263
|
+
"must be one of: {' ,'.join(self._labels_orders)}",
|
|
264
|
+
)
|
|
265
|
+
raise ValueError(msg)
|
|
266
|
+
self.labels_order = labels_order
|
|
267
|
+
|
|
254
268
|
# TODO not memory efficient, but should be fine for most cases; is
|
|
255
269
|
# there a more elegant solution?
|
|
256
270
|
|
|
@@ -472,8 +486,26 @@ class _BatchWriter(_Writer, ABC):
|
|
|
472
486
|
all_labels = [self.translator.name_sentence_to_pascal(label) for label in all_labels]
|
|
473
487
|
# remove duplicates
|
|
474
488
|
all_labels = list(OrderedDict.fromkeys(all_labels))
|
|
475
|
-
|
|
476
|
-
|
|
489
|
+
match self.labels_order:
|
|
490
|
+
case "Ascending":
|
|
491
|
+
pass # Default from get_ancestors.
|
|
492
|
+
case "Alphabetical":
|
|
493
|
+
all_labels.sort()
|
|
494
|
+
case "Descending":
|
|
495
|
+
all_labels.reverse()
|
|
496
|
+
case "Leaves":
|
|
497
|
+
if len(all_labels) < 1:
|
|
498
|
+
msg = "Labels list cannot be empty when using 'Leaves' order."
|
|
499
|
+
raise ValueError(msg)
|
|
500
|
+
all_labels = [all_labels[0]]
|
|
501
|
+
case _:
|
|
502
|
+
# In case someone touched _label_orders after constructor.
|
|
503
|
+
if self.labels_order not in self._labels_orders:
|
|
504
|
+
msg = (
|
|
505
|
+
f"Invalid labels_order: {self.labels_order}. "
|
|
506
|
+
f"Must be one of {self._labels_orders}"
|
|
507
|
+
)
|
|
508
|
+
raise ValueError(msg)
|
|
477
509
|
# concatenate with array delimiter
|
|
478
510
|
all_labels = self._write_array_string(all_labels)
|
|
479
511
|
else:
|
|
@@ -2,7 +2,6 @@ import os
|
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from collections.abc import Iterable
|
|
5
|
-
from typing import Optional, Union
|
|
6
5
|
|
|
7
6
|
from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
8
7
|
from biocypher._deduplicate import Deduplicator
|
|
@@ -23,26 +22,28 @@ class _Writer(ABC):
|
|
|
23
22
|
- _get_import_script_name
|
|
24
23
|
|
|
25
24
|
Args:
|
|
25
|
+
----
|
|
26
26
|
translator (Translator): Instance of :py:class:`Translator` to enable translation of
|
|
27
27
|
nodes and manipulation of properties.
|
|
28
28
|
deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
|
|
29
29
|
of nodes and edges.
|
|
30
30
|
output_directory (str, optional): Path for exporting CSV files. Defaults to None.
|
|
31
31
|
strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
|
|
32
|
-
strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
|
|
33
32
|
|
|
34
33
|
Raises:
|
|
34
|
+
------
|
|
35
35
|
NotImplementedError: Writer implementation must override '_write_node_data'
|
|
36
36
|
NotImplementedError: Writer implementation must override '_write_edge_data'
|
|
37
37
|
NotImplementedError: Writer implementation must override '_construct_import_call'
|
|
38
38
|
NotImplementedError: Writer implementation must override '_get_import_script_name'
|
|
39
|
+
|
|
39
40
|
"""
|
|
40
41
|
|
|
41
42
|
def __init__(
|
|
42
43
|
self,
|
|
43
44
|
translator: Translator,
|
|
44
45
|
deduplicator: Deduplicator,
|
|
45
|
-
output_directory:
|
|
46
|
+
output_directory: str | None = None,
|
|
46
47
|
strict_mode: bool = False,
|
|
47
48
|
*args,
|
|
48
49
|
**kwargs,
|
|
@@ -50,13 +51,14 @@ class _Writer(ABC):
|
|
|
50
51
|
"""Abstract class for writing node and edge representations to disk.
|
|
51
52
|
|
|
52
53
|
Args:
|
|
54
|
+
----
|
|
53
55
|
translator (Translator): Instance of :py:class:`Translator` to enable translation of
|
|
54
56
|
nodes and manipulation of properties.
|
|
55
57
|
deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
|
|
56
58
|
of nodes and edges.
|
|
57
59
|
output_directory (str, optional): Path for exporting CSV files. Defaults to None.
|
|
58
60
|
strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
|
|
59
|
-
|
|
61
|
+
|
|
60
62
|
"""
|
|
61
63
|
self.translator = translator
|
|
62
64
|
self.deduplicator = deduplicator
|
|
@@ -67,7 +69,7 @@ class _Writer(ABC):
|
|
|
67
69
|
if kwargs.get("write_to_file", True):
|
|
68
70
|
logger.warning(
|
|
69
71
|
f"Output directory `{self.output_directory}` already exists. "
|
|
70
|
-
"If this is not planned, file consistency may be compromised."
|
|
72
|
+
"If this is not planned, file consistency may be compromised.",
|
|
71
73
|
)
|
|
72
74
|
else:
|
|
73
75
|
logger.info(f"Creating output directory `{self.output_directory}`.")
|
|
@@ -76,43 +78,50 @@ class _Writer(ABC):
|
|
|
76
78
|
@abstractmethod
|
|
77
79
|
def _write_node_data(
|
|
78
80
|
self,
|
|
79
|
-
nodes: Iterable[
|
|
81
|
+
nodes: Iterable[BioCypherNode | BioCypherEdge | BioCypherRelAsNode],
|
|
80
82
|
) -> bool:
|
|
81
83
|
"""Implement how to output.write nodes to disk.
|
|
82
84
|
|
|
83
85
|
Args:
|
|
86
|
+
----
|
|
84
87
|
nodes (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
|
|
85
88
|
|
|
86
89
|
Returns:
|
|
90
|
+
-------
|
|
87
91
|
bool: The return value. True for success, False otherwise.
|
|
92
|
+
|
|
88
93
|
"""
|
|
89
94
|
raise NotImplementedError("Writer implementation must override 'write_nodes'")
|
|
90
95
|
|
|
91
96
|
@abstractmethod
|
|
92
97
|
def _write_edge_data(
|
|
93
98
|
self,
|
|
94
|
-
edges: Iterable[
|
|
99
|
+
edges: Iterable[BioCypherNode | BioCypherEdge | BioCypherRelAsNode],
|
|
95
100
|
) -> bool:
|
|
96
101
|
"""Implement how to output.write edges to disk.
|
|
97
102
|
|
|
98
103
|
Args:
|
|
104
|
+
----
|
|
99
105
|
edges (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
|
|
100
106
|
|
|
101
107
|
Returns:
|
|
108
|
+
-------
|
|
102
109
|
bool: The return value. True for success, False otherwise.
|
|
110
|
+
|
|
103
111
|
"""
|
|
104
112
|
raise NotImplementedError("Writer implementation must override 'write_edges'")
|
|
105
113
|
|
|
106
114
|
@abstractmethod
|
|
107
115
|
def _construct_import_call(self) -> str:
|
|
108
|
-
"""
|
|
109
|
-
Function to construct the import call detailing folder and
|
|
116
|
+
"""Function to construct the import call detailing folder and
|
|
110
117
|
individual node and edge headers and data files, as well as
|
|
111
118
|
delimiters and database name. Built after all data has been
|
|
112
119
|
processed to ensure that nodes are called before any edges.
|
|
113
120
|
|
|
114
|
-
Returns
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
115
123
|
str: command for importing the output files into a DBMS.
|
|
124
|
+
|
|
116
125
|
"""
|
|
117
126
|
raise NotImplementedError("Writer implementation must override '_construct_import_call'")
|
|
118
127
|
|
|
@@ -120,8 +129,10 @@ class _Writer(ABC):
|
|
|
120
129
|
def _get_import_script_name(self) -> str:
|
|
121
130
|
"""Returns the name of the import script.
|
|
122
131
|
|
|
123
|
-
Returns
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
124
134
|
str: The name of the import script (ending in .sh)
|
|
135
|
+
|
|
125
136
|
"""
|
|
126
137
|
raise NotImplementedError("Writer implementation must override '_get_import_script_name'")
|
|
127
138
|
|
|
@@ -129,6 +140,7 @@ class _Writer(ABC):
|
|
|
129
140
|
"""Wrapper for writing nodes.
|
|
130
141
|
|
|
131
142
|
Args:
|
|
143
|
+
----
|
|
132
144
|
nodes (BioCypherNode): a list or generator of nodes in
|
|
133
145
|
:py:class:`BioCypherNode` format
|
|
134
146
|
batch_size (int): The batch size for writing nodes.
|
|
@@ -136,7 +148,9 @@ class _Writer(ABC):
|
|
|
136
148
|
not present in the schema.
|
|
137
149
|
|
|
138
150
|
Returns:
|
|
151
|
+
-------
|
|
139
152
|
bool: The return value. True for success, False otherwise.
|
|
153
|
+
|
|
140
154
|
"""
|
|
141
155
|
passed = self._write_node_data(nodes)
|
|
142
156
|
if not passed:
|
|
@@ -148,6 +162,7 @@ class _Writer(ABC):
|
|
|
148
162
|
"""Wrapper for writing edges.
|
|
149
163
|
|
|
150
164
|
Args:
|
|
165
|
+
----
|
|
151
166
|
nodes (BioCypherNode): a list or generator of nodes in
|
|
152
167
|
:py:class:`BioCypherNode` format
|
|
153
168
|
batch_size (int): The batch size for writing nodes.
|
|
@@ -155,7 +170,9 @@ class _Writer(ABC):
|
|
|
155
170
|
not present in the schema.
|
|
156
171
|
|
|
157
172
|
Returns:
|
|
173
|
+
-------
|
|
158
174
|
bool: The return value. True for success, False otherwise.
|
|
175
|
+
|
|
159
176
|
"""
|
|
160
177
|
passed = self._write_edge_data(edges)
|
|
161
178
|
if not passed:
|
|
@@ -164,13 +181,14 @@ class _Writer(ABC):
|
|
|
164
181
|
return True
|
|
165
182
|
|
|
166
183
|
def write_import_call(self):
|
|
167
|
-
"""
|
|
168
|
-
Function to output.write the import call detailing folder and
|
|
184
|
+
"""Function to output.write the import call detailing folder and
|
|
169
185
|
individual node and edge headers and data files, as well as
|
|
170
186
|
delimiters and database name, to the export folder as txt.
|
|
171
187
|
|
|
172
|
-
Returns
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
173
190
|
str: The path of the file holding the import call.
|
|
191
|
+
|
|
174
192
|
"""
|
|
175
193
|
file_path = os.path.join(self.output_directory, self._get_import_script_name())
|
|
176
194
|
logger.info(f"Writing {self.__class__.__name__} import call to `{file_path}`.")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|