biocypher 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

@@ -30,12 +30,13 @@ biocypher:
30
30
  # switch_label_and_id: true
31
31
 
32
32
  ### Optional parameters ###
33
-
34
33
  ## Logging
35
34
  # Write log to disk
35
+
36
36
  log_to_disk: true
37
37
 
38
38
  # Activate more granular logging
39
+
39
40
  debug: true
40
41
 
41
42
  # Change the log directory
@@ -85,6 +86,14 @@ neo4j:
85
86
  array_delimiter: "|"
86
87
  quote_character: "'"
87
88
 
89
+ # How to write the labels in the export files.
90
+
91
+ labels_order: "Ascending" # Default: From more specific to more generic.
92
+ # Or:
93
+ # labels_order: "Descending" # From more generic to more specific.
94
+ # labels_order: "Alphabetical" # Alphabetically. Legacy option.
95
+ # labels_order: "Leaves" # Only the more specific label.
96
+
88
97
  ## MultiDB functionality
89
98
  ## Set to false for using community edition or older versions of Neo4j
90
99
 
@@ -102,8 +111,8 @@ neo4j:
102
111
 
103
112
  postgresql:
104
113
  ### PostgreSQL configuration ###
105
-
106
114
  # PostgreSQL connection credentials
115
+
107
116
  database_name: postgres # DB name
108
117
  user: postgres # user name
109
118
  password: postgres # password
@@ -111,6 +120,7 @@ postgresql:
111
120
  port: 5432 # port
112
121
 
113
122
  # PostgreSQL import batch writer settings
123
+
114
124
  quote_character: '"'
115
125
  delimiter: '\t'
116
126
  # import_call_bin_prefix: '' # path to "psql"
@@ -122,11 +132,12 @@ rdf:
122
132
 
123
133
  sqlite:
124
134
  ### SQLite configuration ###
125
-
126
135
  # SQLite connection credentials
136
+
127
137
  database_name: sqlite.db # DB name
128
138
 
129
139
  # SQLite import batch writer settings
140
+
130
141
  quote_character: '"'
131
142
  delimiter: '\t'
132
143
  # import_call_bin_prefix: '' # path to "sqlite3"
biocypher/_metadata.py CHANGED
@@ -10,7 +10,7 @@ import pathlib
10
10
 
11
11
  import toml
12
12
 
13
- _VERSION = "0.7.0"
13
+ _VERSION = "0.8.0"
14
14
 
15
15
 
16
16
  def get_metadata():
@@ -124,6 +124,7 @@ class _BatchWriter(_Writer, ABC):
124
124
  db_port: str = None,
125
125
  rdf_format: str = None,
126
126
  rdf_namespaces: dict = {},
127
+ labels_order: str = "Ascending",
127
128
  ):
128
129
  """Abtract parent class for writing node and edge representations to disk
129
130
  using the format specified by each database type. The database-specific
@@ -209,6 +210,10 @@ class _BatchWriter(_Writer, ABC):
209
210
  rdf_namespaces:
210
211
  The namespaces for RDF.
211
212
 
213
+ labels_order:
214
+ The order of labels, to reflect the hierarchy (or not).
215
+ Default: "Ascending" (from more specific to more generic).
216
+
212
217
  """
213
218
  super().__init__(
214
219
  translator=translator,
@@ -251,6 +256,15 @@ class _BatchWriter(_Writer, ABC):
251
256
 
252
257
  self.parts = {} # dict to store the paths of part files for each label
253
258
 
259
+ self._labels_orders = ["Alphabetical", "Ascending", "Descending", "Leaves"]
260
+ if labels_order not in self._labels_orders:
261
+ msg = (
262
+ f"neo4j's 'labels_order' parameter cannot be '{labels_order}',"
263
+ "must be one of: {' ,'.join(self._labels_orders)}",
264
+ )
265
+ raise ValueError(msg)
266
+ self.labels_order = labels_order
267
+
254
268
  # TODO not memory efficient, but should be fine for most cases; is
255
269
  # there a more elegant solution?
256
270
 
@@ -472,8 +486,26 @@ class _BatchWriter(_Writer, ABC):
472
486
  all_labels = [self.translator.name_sentence_to_pascal(label) for label in all_labels]
473
487
  # remove duplicates
474
488
  all_labels = list(OrderedDict.fromkeys(all_labels))
475
- # order alphabetically
476
- all_labels.sort()
489
+ match self.labels_order:
490
+ case "Ascending":
491
+ pass # Default from get_ancestors.
492
+ case "Alphabetical":
493
+ all_labels.sort()
494
+ case "Descending":
495
+ all_labels.reverse()
496
+ case "Leaves":
497
+ if len(all_labels) < 1:
498
+ msg = "Labels list cannot be empty when using 'Leaves' order."
499
+ raise ValueError(msg)
500
+ all_labels = [all_labels[0]]
501
+ case _:
502
+ # In case someone touched _label_orders after constructor.
503
+ if self.labels_order not in self._labels_orders:
504
+ msg = (
505
+ f"Invalid labels_order: {self.labels_order}. "
506
+ f"Must be one of {self._labels_orders}"
507
+ )
508
+ raise ValueError(msg)
477
509
  # concatenate with array delimiter
478
510
  all_labels = self._write_array_string(all_labels)
479
511
  else:
@@ -2,7 +2,6 @@ import os
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from collections.abc import Iterable
5
- from typing import Optional, Union
6
5
 
7
6
  from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
8
7
  from biocypher._deduplicate import Deduplicator
@@ -23,26 +22,28 @@ class _Writer(ABC):
23
22
  - _get_import_script_name
24
23
 
25
24
  Args:
25
+ ----
26
26
  translator (Translator): Instance of :py:class:`Translator` to enable translation of
27
27
  nodes and manipulation of properties.
28
28
  deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
29
29
  of nodes and edges.
30
30
  output_directory (str, optional): Path for exporting CSV files. Defaults to None.
31
31
  strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
32
- strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
33
32
 
34
33
  Raises:
34
+ ------
35
35
  NotImplementedError: Writer implementation must override '_write_node_data'
36
36
  NotImplementedError: Writer implementation must override '_write_edge_data'
37
37
  NotImplementedError: Writer implementation must override '_construct_import_call'
38
38
  NotImplementedError: Writer implementation must override '_get_import_script_name'
39
+
39
40
  """
40
41
 
41
42
  def __init__(
42
43
  self,
43
44
  translator: Translator,
44
45
  deduplicator: Deduplicator,
45
- output_directory: Optional[str] = None,
46
+ output_directory: str | None = None,
46
47
  strict_mode: bool = False,
47
48
  *args,
48
49
  **kwargs,
@@ -50,13 +51,14 @@ class _Writer(ABC):
50
51
  """Abstract class for writing node and edge representations to disk.
51
52
 
52
53
  Args:
54
+ ----
53
55
  translator (Translator): Instance of :py:class:`Translator` to enable translation of
54
56
  nodes and manipulation of properties.
55
57
  deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
56
58
  of nodes and edges.
57
59
  output_directory (str, optional): Path for exporting CSV files. Defaults to None.
58
60
  strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
59
- strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
61
+
60
62
  """
61
63
  self.translator = translator
62
64
  self.deduplicator = deduplicator
@@ -67,7 +69,7 @@ class _Writer(ABC):
67
69
  if kwargs.get("write_to_file", True):
68
70
  logger.warning(
69
71
  f"Output directory `{self.output_directory}` already exists. "
70
- "If this is not planned, file consistency may be compromised."
72
+ "If this is not planned, file consistency may be compromised.",
71
73
  )
72
74
  else:
73
75
  logger.info(f"Creating output directory `{self.output_directory}`.")
@@ -76,43 +78,50 @@ class _Writer(ABC):
76
78
  @abstractmethod
77
79
  def _write_node_data(
78
80
  self,
79
- nodes: Iterable[Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]],
81
+ nodes: Iterable[BioCypherNode | BioCypherEdge | BioCypherRelAsNode],
80
82
  ) -> bool:
81
83
  """Implement how to output.write nodes to disk.
82
84
 
83
85
  Args:
86
+ ----
84
87
  nodes (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
85
88
 
86
89
  Returns:
90
+ -------
87
91
  bool: The return value. True for success, False otherwise.
92
+
88
93
  """
89
94
  raise NotImplementedError("Writer implementation must override 'write_nodes'")
90
95
 
91
96
  @abstractmethod
92
97
  def _write_edge_data(
93
98
  self,
94
- edges: Iterable[Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]],
99
+ edges: Iterable[BioCypherNode | BioCypherEdge | BioCypherRelAsNode],
95
100
  ) -> bool:
96
101
  """Implement how to output.write edges to disk.
97
102
 
98
103
  Args:
104
+ ----
99
105
  edges (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
100
106
 
101
107
  Returns:
108
+ -------
102
109
  bool: The return value. True for success, False otherwise.
110
+
103
111
  """
104
112
  raise NotImplementedError("Writer implementation must override 'write_edges'")
105
113
 
106
114
  @abstractmethod
107
115
  def _construct_import_call(self) -> str:
108
- """
109
- Function to construct the import call detailing folder and
116
+ """Function to construct the import call detailing folder and
110
117
  individual node and edge headers and data files, as well as
111
118
  delimiters and database name. Built after all data has been
112
119
  processed to ensure that nodes are called before any edges.
113
120
 
114
- Returns:
121
+ Returns
122
+ -------
115
123
  str: command for importing the output files into a DBMS.
124
+
116
125
  """
117
126
  raise NotImplementedError("Writer implementation must override '_construct_import_call'")
118
127
 
@@ -120,8 +129,10 @@ class _Writer(ABC):
120
129
  def _get_import_script_name(self) -> str:
121
130
  """Returns the name of the import script.
122
131
 
123
- Returns:
132
+ Returns
133
+ -------
124
134
  str: The name of the import script (ending in .sh)
135
+
125
136
  """
126
137
  raise NotImplementedError("Writer implementation must override '_get_import_script_name'")
127
138
 
@@ -129,6 +140,7 @@ class _Writer(ABC):
129
140
  """Wrapper for writing nodes.
130
141
 
131
142
  Args:
143
+ ----
132
144
  nodes (BioCypherNode): a list or generator of nodes in
133
145
  :py:class:`BioCypherNode` format
134
146
  batch_size (int): The batch size for writing nodes.
@@ -136,7 +148,9 @@ class _Writer(ABC):
136
148
  not present in the schema.
137
149
 
138
150
  Returns:
151
+ -------
139
152
  bool: The return value. True for success, False otherwise.
153
+
140
154
  """
141
155
  passed = self._write_node_data(nodes)
142
156
  if not passed:
@@ -148,6 +162,7 @@ class _Writer(ABC):
148
162
  """Wrapper for writing edges.
149
163
 
150
164
  Args:
165
+ ----
151
166
  nodes (BioCypherNode): a list or generator of nodes in
152
167
  :py:class:`BioCypherNode` format
153
168
  batch_size (int): The batch size for writing nodes.
@@ -155,7 +170,9 @@ class _Writer(ABC):
155
170
  not present in the schema.
156
171
 
157
172
  Returns:
173
+ -------
158
174
  bool: The return value. True for success, False otherwise.
175
+
159
176
  """
160
177
  passed = self._write_edge_data(edges)
161
178
  if not passed:
@@ -164,13 +181,14 @@ class _Writer(ABC):
164
181
  return True
165
182
 
166
183
  def write_import_call(self):
167
- """
168
- Function to output.write the import call detailing folder and
184
+ """Function to output.write the import call detailing folder and
169
185
  individual node and edge headers and data files, as well as
170
186
  delimiters and database name, to the export folder as txt.
171
187
 
172
- Returns:
188
+ Returns
189
+ -------
173
190
  str: The path of the file holding the import call.
191
+
174
192
  """
175
193
  file_path = os.path.join(self.output_directory, self._get_import_script_name())
176
194
  logger.info(f"Writing {self.__class__.__name__} import call to `{file_path}`.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  biocypher/__init__.py,sha256=-iq15Q3LhB8WBQjnD15ghszEdaqJ1XvQIF5IaF88KNQ,783
2
2
  biocypher/_config/__init__.py,sha256=ku86JZaRrzW5NEz5EXsCRktFwxS0Yo2dwX4UuazdfA4,3416
3
- biocypher/_config/biocypher_config.yaml,sha256=pusj0IjJM3uWRcm0N7U7mb1IX257HCV2reZV3YKFCk0,3037
3
+ biocypher/_config/biocypher_config.yaml,sha256=KdeqgY27YIyvZICNVBCI3h9y7duqF-buynXF_1G5dRY,3368
4
4
  biocypher/_config/test_config.yaml,sha256=Np8jeS5_EP6HHOvMKb7B_Tkyqd5YaYlYz_DVsXypt-A,119
5
5
  biocypher/_config/test_schema_config.yaml,sha256=D1600WgEj3iTXrumVU9LIivJHJO36iaxfkOgyam9zVU,3129
6
6
  biocypher/_config/test_schema_config_disconnected.yaml,sha256=Qm8FLxEn2spHcyj_5F859KjcDvKSxNhxDvi4b4LLkvQ,68
@@ -11,7 +11,7 @@ biocypher/_deduplicate.py,sha256=rtglcaLRaVzNjLtaPwTGP8VvCM4PHYQ5CZ-cm32CrKQ,484
11
11
  biocypher/_get.py,sha256=2kjiBFu_onGUhgOpV7IMfGX67jyY1rpHg_GWncEUdrc,13278
12
12
  biocypher/_logger.py,sha256=y9dh3SPJOCWXnkFSYSK7aj_-pB7zlAkNCf43Dp1lt74,2941
13
13
  biocypher/_mapping.py,sha256=ntspG2C_NaQODhWTBFk0CDvolkOCjtqlQ9E-NkJAuTg,9030
14
- biocypher/_metadata.py,sha256=yjVIaKnrerDo3ZQAYRQ67Dg3hlWQnwlOg8xwpFlSNQI,1415
14
+ biocypher/_metadata.py,sha256=xVVYPjLr00BmeV7okCdmxRgsd1clEQkQz4R93HOA8uk,1415
15
15
  biocypher/_misc.py,sha256=N8aDg8j3EEcKf9ZRqzSNruUEUK4RixCy1vQ1V4maGxk,6079
16
16
  biocypher/_ontology.py,sha256=McN50IAjmlVUUOc-7kpIdskCeatsmVpJ1a90ca6FQ7Y,30638
17
17
  biocypher/_translate.py,sha256=sXyXIg6hpR6Y-dp8NtoAN-H4mzrGI4t1ivlBGxaeqWM,16379
@@ -25,9 +25,9 @@ biocypher/output/in_memory/_in_memory_kg.py,sha256=g1TPN8PkeAyXbrRuTAjshqC8voI6E
25
25
  biocypher/output/in_memory/_networkx.py,sha256=cSOSAreP7S3oeGT6noZ1kAIvSnkVnU3NUp1OY4yqzn0,1515
26
26
  biocypher/output/in_memory/_pandas.py,sha256=Ot2jbK5t_YLHqw0BUv9Z_qWNy9r6IX1LYEyejOSJzos,3288
27
27
  biocypher/output/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- biocypher/output/write/_batch_writer.py,sha256=oyO3eG5-tsWpkFMdc-LChcFdBYsK5dYWBfHZRja0i2Y,36330
28
+ biocypher/output/write/_batch_writer.py,sha256=4w1YTXF1iAvzZyqcJLhirmKkU12J8NJqb4SiNjM91cY,38044
29
29
  biocypher/output/write/_get_writer.py,sha256=PCF-JMCGwlh5hLl1SFLseNsBH1Nt49XTKcqximKwfDY,3532
30
- biocypher/output/write/_writer.py,sha256=LLplYc2T9UVFkZlrwPypZ7XiLhSg5yzDE42NWuppdtk,7250
30
+ biocypher/output/write/_writer.py,sha256=y0dWI-RyQdrBLr9Fs91Y9KcCMjnlCaKJT0eWsIS2hG4,7158
31
31
  biocypher/output/write/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  biocypher/output/write/graph/_arangodb.py,sha256=NBnVJa4WuK7WNTIAXOg62YjAMAQfEZtcJX97erNLgSw,7794
33
33
  biocypher/output/write/graph/_neo4j.py,sha256=aMq8XKZ_UMbDI1gZmZx0D4T3pY2E4l1jMqX78M3TJK0,12051
@@ -37,7 +37,7 @@ biocypher/output/write/relational/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
37
37
  biocypher/output/write/relational/_csv.py,sha256=m0BSQXts88Qu5AEvoIgnwRz54ia38g4VN3PaA3LCYM8,2807
38
38
  biocypher/output/write/relational/_postgresql.py,sha256=RckQJBiuwvDmHAyXxS8zCavYqDecHHWW_piofurokfQ,11965
39
39
  biocypher/output/write/relational/_sqlite.py,sha256=BuGWOeeNA83lbUvjpkzqcR9_baWLsbfmLXBKe4O1EPE,2105
40
- biocypher-0.7.0.dist-info/LICENSE,sha256=oejgxuxyjSnyPw3YPloz6-dCBB_nYizJ4jDQnr-xZUU,1082
41
- biocypher-0.7.0.dist-info/METADATA,sha256=HTZXVo7kcxmwJFlv913SN6dh_LmdiRsNllNQZ9xr5-E,10643
42
- biocypher-0.7.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
43
- biocypher-0.7.0.dist-info/RECORD,,
40
+ biocypher-0.8.0.dist-info/LICENSE,sha256=oejgxuxyjSnyPw3YPloz6-dCBB_nYizJ4jDQnr-xZUU,1082
41
+ biocypher-0.8.0.dist-info/METADATA,sha256=VByGBm1JcK0yiXCq2aNWSAbguf5_5Lv8rCDTyG1uOLs,10643
42
+ biocypher-0.8.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
43
+ biocypher-0.8.0.dist-info/RECORD,,