biocypher 0.6.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

Files changed (34) hide show
  1. biocypher/__init__.py +3 -13
  2. biocypher/_config/__init__.py +6 -23
  3. biocypher/_config/biocypher_config.yaml +14 -3
  4. biocypher/_core.py +360 -262
  5. biocypher/_create.py +13 -27
  6. biocypher/_deduplicate.py +4 -11
  7. biocypher/_get.py +21 -60
  8. biocypher/_logger.py +4 -16
  9. biocypher/_mapping.py +4 -17
  10. biocypher/_metadata.py +3 -15
  11. biocypher/_misc.py +14 -28
  12. biocypher/_ontology.py +127 -212
  13. biocypher/_translate.py +34 -58
  14. biocypher/output/connect/_get_connector.py +40 -0
  15. biocypher/output/connect/_neo4j_driver.py +9 -65
  16. biocypher/output/in_memory/_get_in_memory_kg.py +34 -0
  17. biocypher/output/in_memory/_in_memory_kg.py +40 -0
  18. biocypher/output/in_memory/_networkx.py +44 -0
  19. biocypher/output/in_memory/_pandas.py +20 -15
  20. biocypher/output/write/_batch_writer.py +166 -179
  21. biocypher/output/write/_get_writer.py +11 -24
  22. biocypher/output/write/_writer.py +43 -44
  23. biocypher/output/write/graph/_arangodb.py +7 -24
  24. biocypher/output/write/graph/_neo4j.py +51 -56
  25. biocypher/output/write/graph/_networkx.py +36 -43
  26. biocypher/output/write/graph/_rdf.py +107 -95
  27. biocypher/output/write/relational/_csv.py +6 -11
  28. biocypher/output/write/relational/_postgresql.py +5 -13
  29. biocypher/output/write/relational/_sqlite.py +3 -1
  30. {biocypher-0.6.2.dist-info → biocypher-0.8.0.dist-info}/LICENSE +1 -1
  31. {biocypher-0.6.2.dist-info → biocypher-0.8.0.dist-info}/METADATA +3 -3
  32. biocypher-0.8.0.dist-info/RECORD +43 -0
  33. {biocypher-0.6.2.dist-info → biocypher-0.8.0.dist-info}/WHEEL +1 -1
  34. biocypher-0.6.2.dist-info/RECORD +0 -39
@@ -1,17 +1,17 @@
1
- from abc import ABC, abstractmethod
2
- from types import GeneratorType
3
- from typing import Union, Optional
4
- from collections import OrderedDict, defaultdict
1
+ import glob
5
2
  import os
6
3
  import re
7
- import glob
4
+
5
+ from abc import ABC, abstractmethod
6
+ from collections import OrderedDict, defaultdict
7
+ from types import GeneratorType
8
8
 
9
9
  from more_itertools import peekable
10
10
 
11
11
  from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
12
+ from biocypher._deduplicate import Deduplicator
12
13
  from biocypher._logger import logger
13
14
  from biocypher._translate import Translator
14
- from biocypher._deduplicate import Deduplicator
15
15
  from biocypher.output.write._writer import _Writer
16
16
 
17
17
 
@@ -20,96 +20,88 @@ class _BatchWriter(_Writer, ABC):
20
20
 
21
21
  @abstractmethod
22
22
  def _quote_string(self, value: str) -> str:
23
- """
24
- Abstract method to quote a string. Escaping is handled by the database-specific writer.
25
- """
26
-
23
+ """Abstract method to quote a string. Escaping is handled by the database-specific writer."""
27
24
  raise NotImplementedError(
28
- "Database writer must override '_quote_string'"
25
+ "Database writer must override '_quote_string'",
29
26
  )
30
27
 
31
28
  @abstractmethod
32
29
  def _get_default_import_call_bin_prefix(self):
33
- """
34
- Abstract method to provide the default string for the import call bin prefix.
30
+ """Abstract method to provide the default string for the import call bin prefix.
35
31
 
36
- Returns:
32
+ Returns
33
+ -------
37
34
  str: The database-specific string for the path to the import call bin prefix
35
+
38
36
  """
39
- raise NotImplementedError(
40
- "Database writer must override '_get_default_import_call_bin_prefix'"
41
- )
37
+ raise NotImplementedError("Database writer must override '_get_default_import_call_bin_prefix'")
42
38
 
43
39
  @abstractmethod
44
40
  def _write_array_string(self, string_list):
45
- """
46
- Abstract method to write the string representation of an array into a .csv file.
41
+ """Abstract method to write the string representation of an array into a .csv file.
47
42
  Different databases require different formats of array to optimize import speed.
48
43
 
49
44
  Args:
45
+ ----
50
46
  string_list (list): list of ontology strings
51
47
 
52
48
  Returns:
49
+ -------
53
50
  str: The database-specific string representation of an array
51
+
54
52
  """
55
- raise NotImplementedError(
56
- "Database writer must override '_write_array_string'"
57
- )
53
+ raise NotImplementedError("Database writer must override '_write_array_string'")
58
54
 
59
55
  @abstractmethod
60
56
  def _write_node_headers(self):
61
- """
62
- Abstract method that takes care of importing properties of a graph entity that is represented
57
+ """Abstract method that takes care of importing properties of a graph entity that is represented
63
58
  as a node as per the definition in the `schema_config.yaml`
64
59
 
65
- Returns:
60
+ Returns
61
+ -------
66
62
  bool: The return value. True for success, False otherwise.
63
+
67
64
  """
68
- raise NotImplementedError(
69
- "Database writer must override '_write_node_headers'"
70
- )
65
+ raise NotImplementedError("Database writer must override '_write_node_headers'")
71
66
 
72
67
  @abstractmethod
73
68
  def _write_edge_headers(self):
74
- """
75
- Abstract method to write a database import-file for a graph entity that is represented
69
+ """Abstract method to write a database import-file for a graph entity that is represented
76
70
  as an edge as per the definition in the `schema_config.yaml`,
77
71
  containing only the header for this type of edge.
78
72
 
79
- Returns:
73
+ Returns
74
+ -------
80
75
  bool: The return value. True for success, False otherwise.
76
+
81
77
  """
82
- raise NotImplementedError(
83
- "Database writer must override '_write_edge_headers'"
84
- )
78
+ raise NotImplementedError("Database writer must override '_write_edge_headers'")
85
79
 
86
80
  @abstractmethod
87
81
  def _construct_import_call(self) -> str:
88
- """
89
- Function to construct the import call detailing folder and
82
+ """Function to construct the import call detailing folder and
90
83
  individual node and edge headers and data files, as well as
91
84
  delimiters and database name. Built after all data has been
92
85
  processed to ensure that nodes are called before any edges.
93
86
 
94
- Returns:
87
+ Returns
88
+ -------
95
89
  str: A bash command for csv import.
90
+
96
91
  """
97
- raise NotImplementedError(
98
- "Database writer must override '_construct_import_call'"
99
- )
92
+ raise NotImplementedError("Database writer must override '_construct_import_call'")
100
93
 
101
94
  @abstractmethod
102
95
  def _get_import_script_name(self) -> str:
103
- """
104
- Returns the name of the import script.
96
+ """Returns the name of the import script.
105
97
  The name will be chosen based on the used database.
106
98
 
107
- Returns:
99
+ Returns
100
+ -------
108
101
  str: The name of the import script (ending in .sh)
102
+
109
103
  """
110
- raise NotImplementedError(
111
- "Database writer must override '_get_import_script_name'"
112
- )
104
+ raise NotImplementedError("Database writer must override '_get_import_script_name'")
113
105
 
114
106
  def __init__(
115
107
  self,
@@ -118,10 +110,10 @@ class _BatchWriter(_Writer, ABC):
118
110
  delimiter: str,
119
111
  array_delimiter: str = ",",
120
112
  quote: str = '"',
121
- output_directory: Optional[str] = None,
113
+ output_directory: str | None = None,
122
114
  db_name: str = "neo4j",
123
- import_call_bin_prefix: Optional[str] = None,
124
- import_call_file_prefix: Optional[str] = None,
115
+ import_call_bin_prefix: str | None = None,
116
+ import_call_file_prefix: str | None = None,
125
117
  wipe: bool = True,
126
118
  strict_mode: bool = False,
127
119
  skip_bad_relationships: bool = False,
@@ -132,10 +124,9 @@ class _BatchWriter(_Writer, ABC):
132
124
  db_port: str = None,
133
125
  rdf_format: str = None,
134
126
  rdf_namespaces: dict = {},
127
+ labels_order: str = "Ascending",
135
128
  ):
136
- """
137
-
138
- Abtract parent class for writing node and edge representations to disk
129
+ """Abtract parent class for writing node and edge representations to disk
139
130
  using the format specified by each database type. The database-specific
140
131
  functions are implemented by the respective child-classes. This abstract
141
132
  class contains all methods expected by a bach writer instance, some of
@@ -156,6 +147,7 @@ class _BatchWriter(_Writer, ABC):
156
147
  - _get_import_script_name
157
148
 
158
149
  Args:
150
+ ----
159
151
  translator:
160
152
  Instance of :py:class:`Translator` to enable translation of
161
153
  nodes and manipulation of properties.
@@ -217,6 +209,11 @@ class _BatchWriter(_Writer, ABC):
217
209
 
218
210
  rdf_namespaces:
219
211
  The namespaces for RDF.
212
+
213
+ labels_order:
214
+ The order of labels, to reflect the hierarchy (or not).
215
+ Default: "Ascending" (from more specific to more generic).
216
+
220
217
  """
221
218
  super().__init__(
222
219
  translator=translator,
@@ -233,17 +230,13 @@ class _BatchWriter(_Writer, ABC):
233
230
  self.rdf_namespaces = rdf_namespaces
234
231
 
235
232
  self.delim, self.escaped_delim = self._process_delimiter(delimiter)
236
- self.adelim, self.escaped_adelim = self._process_delimiter(
237
- array_delimiter
238
- )
233
+ self.adelim, self.escaped_adelim = self._process_delimiter(array_delimiter)
239
234
  self.quote = quote
240
235
  self.skip_bad_relationships = skip_bad_relationships
241
236
  self.skip_duplicate_nodes = skip_duplicate_nodes
242
237
 
243
238
  if import_call_bin_prefix is None:
244
- self.import_call_bin_prefix = (
245
- self._get_default_import_call_bin_prefix()
246
- )
239
+ self.import_call_bin_prefix = self._get_default_import_call_bin_prefix()
247
240
  else:
248
241
  self.import_call_bin_prefix = import_call_bin_prefix
249
242
 
@@ -263,39 +256,41 @@ class _BatchWriter(_Writer, ABC):
263
256
 
264
257
  self.parts = {} # dict to store the paths of part files for each label
265
258
 
259
+ self._labels_orders = ["Alphabetical", "Ascending", "Descending", "Leaves"]
260
+ if labels_order not in self._labels_orders:
261
+ msg = (
262
+ f"neo4j's 'labels_order' parameter cannot be '{labels_order}',"
263
+ "must be one of: {' ,'.join(self._labels_orders)}",
264
+ )
265
+ raise ValueError(msg)
266
+ self.labels_order = labels_order
267
+
266
268
  # TODO not memory efficient, but should be fine for most cases; is
267
269
  # there a more elegant solution?
268
270
 
269
271
  @property
270
272
  def import_call_file_prefix(self):
271
- """
272
- Property for output directory path.
273
- """
274
-
273
+ """Property for output directory path."""
275
274
  if self._import_call_file_prefix is None:
276
275
  return self.outdir
277
276
  else:
278
277
  return self._import_call_file_prefix
279
278
 
280
279
  def _process_delimiter(self, delimiter: str) -> str:
281
- """
282
- Return escaped characters in case of receiving their string
280
+ """Return escaped characters in case of receiving their string
283
281
  representation (e.g. tab for '\t').
284
282
  """
285
-
286
283
  if delimiter == "\\t":
287
284
  return "\t", "\\t"
288
285
 
289
286
  else:
290
287
  return delimiter, delimiter
291
288
 
292
- def write_nodes(
293
- self, nodes, batch_size: int = int(1e6), force: bool = False
294
- ):
295
- """
296
- Wrapper for writing nodes and their headers.
289
+ def write_nodes(self, nodes, batch_size: int = int(1e6), force: bool = False):
290
+ """Wrapper for writing nodes and their headers.
297
291
 
298
292
  Args:
293
+ ----
299
294
  nodes (BioCypherNode): a list or generator of nodes in
300
295
  :py:class:`BioCypherNode` format
301
296
 
@@ -306,7 +301,9 @@ class _BatchWriter(_Writer, ABC):
306
301
 
307
302
 
308
303
  Returns:
304
+ -------
309
305
  bool: The return value. True for success, False otherwise.
306
+
310
307
  """
311
308
  # TODO check represented_as
312
309
 
@@ -325,19 +322,21 @@ class _BatchWriter(_Writer, ABC):
325
322
 
326
323
  def write_edges(
327
324
  self,
328
- edges: Union[list, GeneratorType],
325
+ edges: list | GeneratorType,
329
326
  batch_size: int = int(1e6),
330
327
  ) -> bool:
331
- """
332
- Wrapper for writing edges and their headers.
328
+ """Wrapper for writing edges and their headers.
333
329
 
334
330
  Args:
331
+ ----
335
332
  edges (BioCypherEdge): a list or generator of edges in
336
333
  :py:class:`BioCypherEdge` or :py:class:`BioCypherRelAsNode`
337
334
  format
338
335
 
339
336
  Returns:
337
+ -------
340
338
  bool: The return value. True for success, False otherwise.
339
+
341
340
  """
342
341
  passed = False
343
342
  edges = list(edges) # force evaluation to handle empty generator
@@ -375,7 +374,6 @@ class _BatchWriter(_Writer, ABC):
375
374
  logger.debug(
376
375
  "No edges to write, possibly due to no matched Biolink classes.",
377
376
  )
378
- pass
379
377
 
380
378
  if not passed:
381
379
  logger.error("Error while writing edge data.")
@@ -389,8 +387,7 @@ class _BatchWriter(_Writer, ABC):
389
387
  return True
390
388
 
391
389
  def _write_node_data(self, nodes, batch_size, force: bool = False):
392
- """
393
- Writes biocypher nodes to CSV conforming to the headers created
390
+ """Writes biocypher nodes to CSV conforming to the headers created
394
391
  with `_write_node_headers()`, and is actually required to be run
395
392
  before calling `_write_node_headers()` to set the
396
393
  :py:attr:`self.node_property_dict` for passing the node properties
@@ -398,14 +395,16 @@ class _BatchWriter(_Writer, ABC):
398
395
  :py:class:`BioCypherNode` class.
399
396
 
400
397
  Args:
398
+ ----
401
399
  nodes (BioCypherNode): a list or generator of nodes in
402
400
  :py:class:`BioCypherNode` format
403
401
 
404
402
  Returns:
403
+ -------
405
404
  bool: The return value. True for success, False otherwise.
406
- """
407
405
 
408
- if isinstance(nodes, GeneratorType) or isinstance(nodes, peekable):
406
+ """
407
+ if isinstance(nodes, GeneratorType | peekable):
409
408
  logger.debug("Writing node CSV from generator.")
410
409
 
411
410
  bins = defaultdict(list) # dict to store a list for each
@@ -432,20 +431,15 @@ class _BatchWriter(_Writer, ABC):
432
431
  logger.warning(f"Node {label} has no id; skipping.")
433
432
  continue
434
433
 
435
- if not label in bins.keys():
434
+ if label not in bins.keys():
436
435
  # start new list
437
436
  all_labels = None
438
437
  bins[label].append(node)
439
438
  bin_l[label] = 1
440
439
 
441
440
  # get properties from config if present
442
- if (
443
- label
444
- in self.translator.ontology.mapping.extended_schema
445
- ):
446
- cprops = self.translator.ontology.mapping.extended_schema.get(
447
- label
448
- ).get(
441
+ if label in self.translator.ontology.mapping.extended_schema:
442
+ cprops = self.translator.ontology.mapping.extended_schema.get(label).get(
449
443
  "properties",
450
444
  )
451
445
  else:
@@ -483,28 +477,39 @@ class _BatchWriter(_Writer, ABC):
483
477
  # get label hierarchy
484
478
  # multiple labels:
485
479
  if not force:
486
- all_labels = self.translator.ontology.get_ancestors(
487
- label
488
- )
480
+ all_labels = self.translator.ontology.get_ancestors(label)
489
481
  else:
490
482
  all_labels = None
491
483
 
492
484
  if all_labels:
493
485
  # convert to pascal case
494
- all_labels = [
495
- self.translator.name_sentence_to_pascal(label)
496
- for label in all_labels
497
- ]
486
+ all_labels = [self.translator.name_sentence_to_pascal(label) for label in all_labels]
498
487
  # remove duplicates
499
488
  all_labels = list(OrderedDict.fromkeys(all_labels))
500
- # order alphabetically
501
- all_labels.sort()
489
+ match self.labels_order:
490
+ case "Ascending":
491
+ pass # Default from get_ancestors.
492
+ case "Alphabetical":
493
+ all_labels.sort()
494
+ case "Descending":
495
+ all_labels.reverse()
496
+ case "Leaves":
497
+ if len(all_labels) < 1:
498
+ msg = "Labels list cannot be empty when using 'Leaves' order."
499
+ raise ValueError(msg)
500
+ all_labels = [all_labels[0]]
501
+ case _:
502
+ # In case someone touched _label_orders after constructor.
503
+ if self.labels_order not in self._labels_orders:
504
+ msg = (
505
+ f"Invalid labels_order: {self.labels_order}. "
506
+ f"Must be one of {self._labels_orders}"
507
+ )
508
+ raise ValueError(msg)
502
509
  # concatenate with array delimiter
503
510
  all_labels = self._write_array_string(all_labels)
504
511
  else:
505
- all_labels = self.translator.name_sentence_to_pascal(
506
- label
507
- )
512
+ all_labels = self.translator.name_sentence_to_pascal(label)
508
513
 
509
514
  labels[label] = all_labels
510
515
 
@@ -549,16 +554,15 @@ class _BatchWriter(_Writer, ABC):
549
554
  self.node_property_dict[label] = reference_props[label]
550
555
 
551
556
  return True
557
+ elif not isinstance(nodes, list):
558
+ logger.error("Nodes must be passed as list or generator.")
559
+ return False
552
560
  else:
553
- if type(nodes) is not list:
554
- logger.error("Nodes must be passed as list or generator.")
555
- return False
556
- else:
557
561
 
558
- def gen(nodes):
559
- yield from nodes
562
+ def gen(nodes):
563
+ yield from nodes
560
564
 
561
- return self._write_node_data(gen(nodes), batch_size=batch_size)
565
+ return self._write_node_data(gen(nodes), batch_size=batch_size)
562
566
 
563
567
  def _write_single_node_list_to_file(
564
568
  self,
@@ -567,11 +571,11 @@ class _BatchWriter(_Writer, ABC):
567
571
  prop_dict: dict,
568
572
  labels: str,
569
573
  ):
570
- """
571
- This function takes one list of biocypher nodes and writes them
574
+ """This function takes one list of biocypher nodes and writes them
572
575
  to a Neo4j admin import compatible CSV file.
573
576
 
574
577
  Args:
578
+ ----
575
579
  node_list (list): list of BioCypherNodes to be written
576
580
  label (str): the primary label of the node
577
581
  prop_dict (dict): properties of node class passed from parsing
@@ -580,7 +584,9 @@ class _BatchWriter(_Writer, ABC):
580
584
  for the node class
581
585
 
582
586
  Returns:
587
+ -------
583
588
  bool: The return value. True for success, False otherwise.
589
+
584
590
  """
585
591
  if not all(isinstance(n, BioCypherNode) for n in node_list):
586
592
  logger.error("Nodes must be passed as type BioCypherNode.")
@@ -598,7 +604,7 @@ class _BatchWriter(_Writer, ABC):
598
604
  ref_props = list(prop_dict.keys())
599
605
 
600
606
  # compare lists order invariant
601
- if not set(ref_props) == set(n_keys):
607
+ if set(ref_props) != set(n_keys):
602
608
  onode = n.get_id()
603
609
  oprop1 = set(ref_props).difference(n_keys)
604
610
  oprop2 = set(n_keys).difference(ref_props)
@@ -632,11 +638,10 @@ class _BatchWriter(_Writer, ABC):
632
638
  "boolean",
633
639
  ]:
634
640
  plist.append(str(p))
641
+ elif isinstance(p, list):
642
+ plist.append(self._write_array_string(p))
635
643
  else:
636
- if isinstance(p, list):
637
- plist.append(self._write_array_string(p))
638
- else:
639
- plist.append(self._quote_string(str(p)))
644
+ plist.append(f"{self.quote}{p!s}{self.quote}")
640
645
 
641
646
  line.append(self.delim.join(plist))
642
647
  line.append(labels)
@@ -650,8 +655,7 @@ class _BatchWriter(_Writer, ABC):
650
655
  return True
651
656
 
652
657
  def _write_edge_data(self, edges, batch_size):
653
- """
654
- Writes biocypher edges to CSV conforming to the headers created
658
+ """Writes biocypher edges to CSV conforming to the headers created
655
659
  with `_write_edge_headers()`, and is actually required to be run
656
660
  before calling `_write_node_headers()` to set the
657
661
  :py:attr:`self.edge_property_dict` for passing the edge
@@ -659,17 +663,20 @@ class _BatchWriter(_Writer, ABC):
659
663
  from the :py:class:`BioCypherEdge` class.
660
664
 
661
665
  Args:
666
+ ----
662
667
  edges (BioCypherEdge): a list or generator of edges in
663
668
  :py:class:`BioCypherEdge` format
664
669
 
665
670
  Returns:
671
+ -------
666
672
  bool: The return value. True for success, False otherwise.
667
673
 
668
674
  Todo:
675
+ ----
669
676
  - currently works for mixed edges but in practice often is
670
677
  called on one iterable containing one type of edge only
671
- """
672
678
 
679
+ """
673
680
  if isinstance(edges, GeneratorType):
674
681
  logger.debug("Writing edge CSV from generator.")
675
682
 
@@ -685,14 +692,13 @@ class _BatchWriter(_Writer, ABC):
685
692
  for edge in edges:
686
693
  if not (edge.get_source_id() and edge.get_target_id()):
687
694
  logger.error(
688
- "Edge must have source and target node. "
689
- f"Caused by: {edge}",
695
+ f"Edge must have source and target node. Caused by: {edge}",
690
696
  )
691
697
  continue
692
698
 
693
699
  label = edge.get_label()
694
700
 
695
- if not label in bins.keys():
701
+ if label not in bins.keys():
696
702
  # start new list
697
703
  bins[label].append(edge)
698
704
  bin_l[label] = 1
@@ -703,13 +709,8 @@ class _BatchWriter(_Writer, ABC):
703
709
  # (may not be if it is an edge that carries the
704
710
  # "label_as_edge" property)
705
711
  cprops = None
706
- if (
707
- label
708
- in self.translator.ontology.mapping.extended_schema
709
- ):
710
- cprops = self.translator.ontology.mapping.extended_schema.get(
711
- label
712
- ).get(
712
+ if label in self.translator.ontology.mapping.extended_schema:
713
+ cprops = self.translator.ontology.mapping.extended_schema.get(label).get(
713
714
  "properties",
714
715
  )
715
716
  else:
@@ -717,9 +718,7 @@ class _BatchWriter(_Writer, ABC):
717
718
  for (
718
719
  k,
719
720
  v,
720
- ) in (
721
- self.translator.ontology.mapping.extended_schema.items()
722
- ):
721
+ ) in self.translator.ontology.mapping.extended_schema.items():
723
722
  if isinstance(v, dict):
724
723
  if v.get("label_as_edge") == label:
725
724
  cprops = v.get("properties")
@@ -789,16 +788,15 @@ class _BatchWriter(_Writer, ABC):
789
788
  self.edge_property_dict[label] = reference_props[label]
790
789
 
791
790
  return True
791
+ elif not isinstance(edges, list):
792
+ logger.error("Edges must be passed as list or generator.")
793
+ return False
792
794
  else:
793
- if type(edges) is not list:
794
- logger.error("Edges must be passed as list or generator.")
795
- return False
796
- else:
797
795
 
798
- def gen(edges):
799
- yield from edges
796
+ def gen(edges):
797
+ yield from edges
800
798
 
801
- return self._write_edge_data(gen(edges), batch_size=batch_size)
799
+ return self._write_edge_data(gen(edges), batch_size=batch_size)
802
800
 
803
801
  def _write_single_edge_list_to_file(
804
802
  self,
@@ -806,11 +804,11 @@ class _BatchWriter(_Writer, ABC):
806
804
  label: str,
807
805
  prop_dict: dict,
808
806
  ):
809
- """
810
- This function takes one list of biocypher edges and writes them
807
+ """This function takes one list of biocypher edges and writes them
811
808
  to a Neo4j admin import compatible CSV file.
812
809
 
813
810
  Args:
811
+ ----
814
812
  edge_list (list): list of BioCypherEdges to be written
815
813
 
816
814
  label (str): the label (type) of the edge
@@ -819,9 +817,10 @@ class _BatchWriter(_Writer, ABC):
819
817
  function and their types
820
818
 
821
819
  Returns:
820
+ -------
822
821
  bool: The return value. True for success, False otherwise.
823
- """
824
822
 
823
+ """
825
824
  if not all(isinstance(n, BioCypherEdge) for n in edge_list):
826
825
  logger.error("Edges must be passed as type BioCypherEdge.")
827
826
  return False
@@ -836,7 +835,7 @@ class _BatchWriter(_Writer, ABC):
836
835
  ref_props = list(prop_dict.keys())
837
836
 
838
837
  # compare list order invariant
839
- if not set(ref_props) == set(e_keys):
838
+ if set(ref_props) != set(e_keys):
840
839
  oedge = f"{e.get_source_id()}-{e.get_target_id()}"
841
840
  oprop1 = set(ref_props).difference(e_keys)
842
841
  oprop2 = set(e_keys).difference(ref_props)
@@ -867,11 +866,10 @@ class _BatchWriter(_Writer, ABC):
867
866
  "boolean",
868
867
  ]:
869
868
  plist.append(str(p))
869
+ elif isinstance(p, list):
870
+ plist.append(self._write_array_string(p))
870
871
  else:
871
- if isinstance(p, list):
872
- plist.append(self._write_array_string(p))
873
- else:
874
- plist.append(self._quote_string(str(p)))
872
+ plist.append(self.quote + str(p) + self.quote)
875
873
 
876
874
  entries = [e.get_source_id()]
877
875
 
@@ -880,9 +878,7 @@ class _BatchWriter(_Writer, ABC):
880
878
 
881
879
  if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
882
880
  skip_id = True
883
- elif not self.translator.ontology.mapping.extended_schema.get(
884
- label
885
- ):
881
+ elif not self.translator.ontology.mapping.extended_schema.get(label):
886
882
  # find label in schema by label_as_edge
887
883
  for (
888
884
  k,
@@ -897,9 +893,9 @@ class _BatchWriter(_Writer, ABC):
897
893
  if schema_label:
898
894
  if (
899
895
  self.translator.ontology.mapping.extended_schema.get(
900
- schema_label
896
+ schema_label,
901
897
  ).get("use_id")
902
- == False
898
+ == False # noqa: E712 (seems to not work with 'not')
903
899
  ):
904
900
  skip_id = True
905
901
 
@@ -913,7 +909,7 @@ class _BatchWriter(_Writer, ABC):
913
909
  entries.append(
914
910
  self.translator.name_sentence_to_pascal(
915
911
  e.get_label(),
916
- )
912
+ ),
917
913
  )
918
914
 
919
915
  lines.append(
@@ -927,10 +923,10 @@ class _BatchWriter(_Writer, ABC):
927
923
  return True
928
924
 
929
925
  def _write_next_part(self, label: str, lines: list):
930
- """
931
- This function writes a list of strings to a new part file.
926
+ """This function writes a list of strings to a new part file.
932
927
 
933
928
  Args:
929
+ ----
934
930
  label (str): the label (type) of the edge; internal
935
931
  representation sentence case -> needs to become PascalCase
936
932
  for disk representation
@@ -938,17 +934,15 @@ class _BatchWriter(_Writer, ABC):
938
934
  lines (list): list of strings to be written
939
935
 
940
936
  Returns:
937
+ -------
941
938
  bool: The return value. True for success, False otherwise.
939
+
942
940
  """
943
941
  # translate label to PascalCase
944
- label_pascal = self.translator.name_sentence_to_pascal(
945
- parse_label(label)
946
- )
942
+ label_pascal = self.translator.name_sentence_to_pascal(parse_label(label))
947
943
 
948
944
  # list files in self.outdir
949
- files = glob.glob(
950
- os.path.join(self.outdir, f"{label_pascal}-part*.csv")
951
- )
945
+ files = glob.glob(os.path.join(self.outdir, f"{label_pascal}-part*.csv"))
952
946
  # find file with highest part number
953
947
  if not files:
954
948
  next_part = 0
@@ -956,10 +950,7 @@ class _BatchWriter(_Writer, ABC):
956
950
  else:
957
951
  next_part = (
958
952
  max(
959
- [
960
- int(f.split(".")[-2].split("-")[-1].replace("part", ""))
961
- for f in files
962
- ],
953
+ [int(f.split(".")[-2].split("-")[-1].replace("part", "")) for f in files],
963
954
  )
964
955
  + 1
965
956
  )
@@ -984,31 +975,29 @@ class _BatchWriter(_Writer, ABC):
984
975
  self.parts[label].append(part)
985
976
 
986
977
  def get_import_call(self) -> str:
987
- """
988
- Function to return the import call detailing folder and
978
+ """Function to return the import call detailing folder and
989
979
  individual node and edge headers and data files, as well as
990
980
  delimiters and database name.
991
981
 
992
- Returns:
982
+ Returns
983
+ -------
993
984
  str: a bash command for the database import
994
- """
995
985
 
986
+ """
996
987
  return self._construct_import_call()
997
988
 
998
989
  def write_import_call(self) -> str:
999
- """
1000
- Function to write the import call detailing folder and
990
+ """Function to write the import call detailing folder and
1001
991
  individual node and edge headers and data files, as well as
1002
992
  delimiters and database name, to the export folder as txt.
1003
993
 
1004
- Returns:
994
+ Returns
995
+ -------
1005
996
  str: The path of the file holding the import call.
1006
- """
1007
997
 
998
+ """
1008
999
  file_path = os.path.join(self.outdir, self._get_import_script_name())
1009
- logger.info(
1010
- f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`."
1011
- )
1000
+ logger.info(f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`.")
1012
1001
 
1013
1002
  with open(file_path, "w", encoding="utf-8") as f:
1014
1003
  f.write(self._construct_import_call())
@@ -1017,16 +1006,16 @@ class _BatchWriter(_Writer, ABC):
1017
1006
 
1018
1007
 
1019
1008
  def parse_label(label: str) -> str:
1020
- """
1021
-
1022
- Check if the label is compliant with Neo4j naming conventions,
1009
+ """Check if the label is compliant with Neo4j naming conventions,
1023
1010
  https://neo4j.com/docs/cypher-manual/current/syntax/naming/, and if not,
1024
1011
  remove non-compliant characters.
1025
1012
 
1026
1013
  Args:
1014
+ ----
1027
1015
  label (str): The label to check
1028
1016
  Returns:
1029
1017
  str: The compliant label
1018
+
1030
1019
  """
1031
1020
  # Check if the name contains only alphanumeric characters, underscore, or dollar sign
1032
1021
  # and dot (for class hierarchy of BioCypher)
@@ -1036,7 +1025,7 @@ def parse_label(label: str) -> str:
1036
1025
  if non_matches:
1037
1026
  non_matches = list(set(non_matches))
1038
1027
  logger.warning(
1039
- f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}"
1028
+ f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}",
1040
1029
  )
1041
1030
 
1042
1031
  def first_character_compliant(character: str) -> bool:
@@ -1047,7 +1036,5 @@ def parse_label(label: str) -> str:
1047
1036
  if first_character_compliant(c):
1048
1037
  matches = matches[matches.index(c) :]
1049
1038
  break
1050
- logger.warning(
1051
- "Label does not start with an alphabetic character or with $. Removed non compliant characters."
1052
- )
1039
+ logger.warning("Label does not start with an alphabetic character or with $. Removed non compliant characters.")
1053
1040
  return "".join(matches).strip()