biocypher 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

@@ -1,17 +1,17 @@
1
- from abc import ABC, abstractmethod
2
- from types import GeneratorType
3
- from typing import Union, Optional
4
- from collections import OrderedDict, defaultdict
1
+ import glob
5
2
  import os
6
3
  import re
7
- import glob
4
+
5
+ from abc import ABC, abstractmethod
6
+ from collections import OrderedDict, defaultdict
7
+ from types import GeneratorType
8
8
 
9
9
  from more_itertools import peekable
10
10
 
11
11
  from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
12
+ from biocypher._deduplicate import Deduplicator
12
13
  from biocypher._logger import logger
13
14
  from biocypher._translate import Translator
14
- from biocypher._deduplicate import Deduplicator
15
15
  from biocypher.output.write._writer import _Writer
16
16
 
17
17
 
@@ -20,96 +20,88 @@ class _BatchWriter(_Writer, ABC):
20
20
 
21
21
  @abstractmethod
22
22
  def _quote_string(self, value: str) -> str:
23
- """
24
- Abstract method to quote a string. Escaping is handled by the database-specific writer.
25
- """
26
-
23
+ """Abstract method to quote a string. Escaping is handled by the database-specific writer."""
27
24
  raise NotImplementedError(
28
- "Database writer must override '_quote_string'"
25
+ "Database writer must override '_quote_string'",
29
26
  )
30
27
 
31
28
  @abstractmethod
32
29
  def _get_default_import_call_bin_prefix(self):
33
- """
34
- Abstract method to provide the default string for the import call bin prefix.
30
+ """Abstract method to provide the default string for the import call bin prefix.
35
31
 
36
- Returns:
32
+ Returns
33
+ -------
37
34
  str: The database-specific string for the path to the import call bin prefix
35
+
38
36
  """
39
- raise NotImplementedError(
40
- "Database writer must override '_get_default_import_call_bin_prefix'"
41
- )
37
+ raise NotImplementedError("Database writer must override '_get_default_import_call_bin_prefix'")
42
38
 
43
39
  @abstractmethod
44
40
  def _write_array_string(self, string_list):
45
- """
46
- Abstract method to write the string representation of an array into a .csv file.
41
+ """Abstract method to write the string representation of an array into a .csv file.
47
42
  Different databases require different formats of array to optimize import speed.
48
43
 
49
44
  Args:
45
+ ----
50
46
  string_list (list): list of ontology strings
51
47
 
52
48
  Returns:
49
+ -------
53
50
  str: The database-specific string representation of an array
51
+
54
52
  """
55
- raise NotImplementedError(
56
- "Database writer must override '_write_array_string'"
57
- )
53
+ raise NotImplementedError("Database writer must override '_write_array_string'")
58
54
 
59
55
  @abstractmethod
60
56
  def _write_node_headers(self):
61
- """
62
- Abstract method that takes care of importing properties of a graph entity that is represented
57
+ """Abstract method that takes care of importing properties of a graph entity that is represented
63
58
  as a node as per the definition in the `schema_config.yaml`
64
59
 
65
- Returns:
60
+ Returns
61
+ -------
66
62
  bool: The return value. True for success, False otherwise.
63
+
67
64
  """
68
- raise NotImplementedError(
69
- "Database writer must override '_write_node_headers'"
70
- )
65
+ raise NotImplementedError("Database writer must override '_write_node_headers'")
71
66
 
72
67
  @abstractmethod
73
68
  def _write_edge_headers(self):
74
- """
75
- Abstract method to write a database import-file for a graph entity that is represented
69
+ """Abstract method to write a database import-file for a graph entity that is represented
76
70
  as an edge as per the definition in the `schema_config.yaml`,
77
71
  containing only the header for this type of edge.
78
72
 
79
- Returns:
73
+ Returns
74
+ -------
80
75
  bool: The return value. True for success, False otherwise.
76
+
81
77
  """
82
- raise NotImplementedError(
83
- "Database writer must override '_write_edge_headers'"
84
- )
78
+ raise NotImplementedError("Database writer must override '_write_edge_headers'")
85
79
 
86
80
  @abstractmethod
87
81
  def _construct_import_call(self) -> str:
88
- """
89
- Function to construct the import call detailing folder and
82
+ """Function to construct the import call detailing folder and
90
83
  individual node and edge headers and data files, as well as
91
84
  delimiters and database name. Built after all data has been
92
85
  processed to ensure that nodes are called before any edges.
93
86
 
94
- Returns:
87
+ Returns
88
+ -------
95
89
  str: A bash command for csv import.
90
+
96
91
  """
97
- raise NotImplementedError(
98
- "Database writer must override '_construct_import_call'"
99
- )
92
+ raise NotImplementedError("Database writer must override '_construct_import_call'")
100
93
 
101
94
  @abstractmethod
102
95
  def _get_import_script_name(self) -> str:
103
- """
104
- Returns the name of the import script.
96
+ """Returns the name of the import script.
105
97
  The name will be chosen based on the used database.
106
98
 
107
- Returns:
99
+ Returns
100
+ -------
108
101
  str: The name of the import script (ending in .sh)
102
+
109
103
  """
110
- raise NotImplementedError(
111
- "Database writer must override '_get_import_script_name'"
112
- )
104
+ raise NotImplementedError("Database writer must override '_get_import_script_name'")
113
105
 
114
106
  def __init__(
115
107
  self,
@@ -118,10 +110,10 @@ class _BatchWriter(_Writer, ABC):
118
110
  delimiter: str,
119
111
  array_delimiter: str = ",",
120
112
  quote: str = '"',
121
- output_directory: Optional[str] = None,
113
+ output_directory: str | None = None,
122
114
  db_name: str = "neo4j",
123
- import_call_bin_prefix: Optional[str] = None,
124
- import_call_file_prefix: Optional[str] = None,
115
+ import_call_bin_prefix: str | None = None,
116
+ import_call_file_prefix: str | None = None,
125
117
  wipe: bool = True,
126
118
  strict_mode: bool = False,
127
119
  skip_bad_relationships: bool = False,
@@ -133,9 +125,7 @@ class _BatchWriter(_Writer, ABC):
133
125
  rdf_format: str = None,
134
126
  rdf_namespaces: dict = {},
135
127
  ):
136
- """
137
-
138
- Abtract parent class for writing node and edge representations to disk
128
+ """Abtract parent class for writing node and edge representations to disk
139
129
  using the format specified by each database type. The database-specific
140
130
  functions are implemented by the respective child-classes. This abstract
141
131
  class contains all methods expected by a bach writer instance, some of
@@ -156,6 +146,7 @@ class _BatchWriter(_Writer, ABC):
156
146
  - _get_import_script_name
157
147
 
158
148
  Args:
149
+ ----
159
150
  translator:
160
151
  Instance of :py:class:`Translator` to enable translation of
161
152
  nodes and manipulation of properties.
@@ -217,6 +208,7 @@ class _BatchWriter(_Writer, ABC):
217
208
 
218
209
  rdf_namespaces:
219
210
  The namespaces for RDF.
211
+
220
212
  """
221
213
  super().__init__(
222
214
  translator=translator,
@@ -233,17 +225,13 @@ class _BatchWriter(_Writer, ABC):
233
225
  self.rdf_namespaces = rdf_namespaces
234
226
 
235
227
  self.delim, self.escaped_delim = self._process_delimiter(delimiter)
236
- self.adelim, self.escaped_adelim = self._process_delimiter(
237
- array_delimiter
238
- )
228
+ self.adelim, self.escaped_adelim = self._process_delimiter(array_delimiter)
239
229
  self.quote = quote
240
230
  self.skip_bad_relationships = skip_bad_relationships
241
231
  self.skip_duplicate_nodes = skip_duplicate_nodes
242
232
 
243
233
  if import_call_bin_prefix is None:
244
- self.import_call_bin_prefix = (
245
- self._get_default_import_call_bin_prefix()
246
- )
234
+ self.import_call_bin_prefix = self._get_default_import_call_bin_prefix()
247
235
  else:
248
236
  self.import_call_bin_prefix = import_call_bin_prefix
249
237
 
@@ -268,34 +256,27 @@ class _BatchWriter(_Writer, ABC):
268
256
 
269
257
  @property
270
258
  def import_call_file_prefix(self):
271
- """
272
- Property for output directory path.
273
- """
274
-
259
+ """Property for output directory path."""
275
260
  if self._import_call_file_prefix is None:
276
261
  return self.outdir
277
262
  else:
278
263
  return self._import_call_file_prefix
279
264
 
280
265
  def _process_delimiter(self, delimiter: str) -> str:
281
- """
282
- Return escaped characters in case of receiving their string
266
+ """Return escaped characters in case of receiving their string
283
267
  representation (e.g. tab for '\t').
284
268
  """
285
-
286
269
  if delimiter == "\\t":
287
270
  return "\t", "\\t"
288
271
 
289
272
  else:
290
273
  return delimiter, delimiter
291
274
 
292
- def write_nodes(
293
- self, nodes, batch_size: int = int(1e6), force: bool = False
294
- ):
295
- """
296
- Wrapper for writing nodes and their headers.
275
+ def write_nodes(self, nodes, batch_size: int = int(1e6), force: bool = False):
276
+ """Wrapper for writing nodes and their headers.
297
277
 
298
278
  Args:
279
+ ----
299
280
  nodes (BioCypherNode): a list or generator of nodes in
300
281
  :py:class:`BioCypherNode` format
301
282
 
@@ -306,7 +287,9 @@ class _BatchWriter(_Writer, ABC):
306
287
 
307
288
 
308
289
  Returns:
290
+ -------
309
291
  bool: The return value. True for success, False otherwise.
292
+
310
293
  """
311
294
  # TODO check represented_as
312
295
 
@@ -325,19 +308,21 @@ class _BatchWriter(_Writer, ABC):
325
308
 
326
309
  def write_edges(
327
310
  self,
328
- edges: Union[list, GeneratorType],
311
+ edges: list | GeneratorType,
329
312
  batch_size: int = int(1e6),
330
313
  ) -> bool:
331
- """
332
- Wrapper for writing edges and their headers.
314
+ """Wrapper for writing edges and their headers.
333
315
 
334
316
  Args:
317
+ ----
335
318
  edges (BioCypherEdge): a list or generator of edges in
336
319
  :py:class:`BioCypherEdge` or :py:class:`BioCypherRelAsNode`
337
320
  format
338
321
 
339
322
  Returns:
323
+ -------
340
324
  bool: The return value. True for success, False otherwise.
325
+
341
326
  """
342
327
  passed = False
343
328
  edges = list(edges) # force evaluation to handle empty generator
@@ -375,7 +360,6 @@ class _BatchWriter(_Writer, ABC):
375
360
  logger.debug(
376
361
  "No edges to write, possibly due to no matched Biolink classes.",
377
362
  )
378
- pass
379
363
 
380
364
  if not passed:
381
365
  logger.error("Error while writing edge data.")
@@ -389,8 +373,7 @@ class _BatchWriter(_Writer, ABC):
389
373
  return True
390
374
 
391
375
  def _write_node_data(self, nodes, batch_size, force: bool = False):
392
- """
393
- Writes biocypher nodes to CSV conforming to the headers created
376
+ """Writes biocypher nodes to CSV conforming to the headers created
394
377
  with `_write_node_headers()`, and is actually required to be run
395
378
  before calling `_write_node_headers()` to set the
396
379
  :py:attr:`self.node_property_dict` for passing the node properties
@@ -398,14 +381,16 @@ class _BatchWriter(_Writer, ABC):
398
381
  :py:class:`BioCypherNode` class.
399
382
 
400
383
  Args:
384
+ ----
401
385
  nodes (BioCypherNode): a list or generator of nodes in
402
386
  :py:class:`BioCypherNode` format
403
387
 
404
388
  Returns:
389
+ -------
405
390
  bool: The return value. True for success, False otherwise.
406
- """
407
391
 
408
- if isinstance(nodes, GeneratorType) or isinstance(nodes, peekable):
392
+ """
393
+ if isinstance(nodes, GeneratorType | peekable):
409
394
  logger.debug("Writing node CSV from generator.")
410
395
 
411
396
  bins = defaultdict(list) # dict to store a list for each
@@ -432,20 +417,15 @@ class _BatchWriter(_Writer, ABC):
432
417
  logger.warning(f"Node {label} has no id; skipping.")
433
418
  continue
434
419
 
435
- if not label in bins.keys():
420
+ if label not in bins.keys():
436
421
  # start new list
437
422
  all_labels = None
438
423
  bins[label].append(node)
439
424
  bin_l[label] = 1
440
425
 
441
426
  # get properties from config if present
442
- if (
443
- label
444
- in self.translator.ontology.mapping.extended_schema
445
- ):
446
- cprops = self.translator.ontology.mapping.extended_schema.get(
447
- label
448
- ).get(
427
+ if label in self.translator.ontology.mapping.extended_schema:
428
+ cprops = self.translator.ontology.mapping.extended_schema.get(label).get(
449
429
  "properties",
450
430
  )
451
431
  else:
@@ -483,18 +463,13 @@ class _BatchWriter(_Writer, ABC):
483
463
  # get label hierarchy
484
464
  # multiple labels:
485
465
  if not force:
486
- all_labels = self.translator.ontology.get_ancestors(
487
- label
488
- )
466
+ all_labels = self.translator.ontology.get_ancestors(label)
489
467
  else:
490
468
  all_labels = None
491
469
 
492
470
  if all_labels:
493
471
  # convert to pascal case
494
- all_labels = [
495
- self.translator.name_sentence_to_pascal(label)
496
- for label in all_labels
497
- ]
472
+ all_labels = [self.translator.name_sentence_to_pascal(label) for label in all_labels]
498
473
  # remove duplicates
499
474
  all_labels = list(OrderedDict.fromkeys(all_labels))
500
475
  # order alphabetically
@@ -502,9 +477,7 @@ class _BatchWriter(_Writer, ABC):
502
477
  # concatenate with array delimiter
503
478
  all_labels = self._write_array_string(all_labels)
504
479
  else:
505
- all_labels = self.translator.name_sentence_to_pascal(
506
- label
507
- )
480
+ all_labels = self.translator.name_sentence_to_pascal(label)
508
481
 
509
482
  labels[label] = all_labels
510
483
 
@@ -549,16 +522,15 @@ class _BatchWriter(_Writer, ABC):
549
522
  self.node_property_dict[label] = reference_props[label]
550
523
 
551
524
  return True
525
+ elif not isinstance(nodes, list):
526
+ logger.error("Nodes must be passed as list or generator.")
527
+ return False
552
528
  else:
553
- if type(nodes) is not list:
554
- logger.error("Nodes must be passed as list or generator.")
555
- return False
556
- else:
557
529
 
558
- def gen(nodes):
559
- yield from nodes
530
+ def gen(nodes):
531
+ yield from nodes
560
532
 
561
- return self._write_node_data(gen(nodes), batch_size=batch_size)
533
+ return self._write_node_data(gen(nodes), batch_size=batch_size)
562
534
 
563
535
  def _write_single_node_list_to_file(
564
536
  self,
@@ -567,11 +539,11 @@ class _BatchWriter(_Writer, ABC):
567
539
  prop_dict: dict,
568
540
  labels: str,
569
541
  ):
570
- """
571
- This function takes one list of biocypher nodes and writes them
542
+ """This function takes one list of biocypher nodes and writes them
572
543
  to a Neo4j admin import compatible CSV file.
573
544
 
574
545
  Args:
546
+ ----
575
547
  node_list (list): list of BioCypherNodes to be written
576
548
  label (str): the primary label of the node
577
549
  prop_dict (dict): properties of node class passed from parsing
@@ -580,7 +552,9 @@ class _BatchWriter(_Writer, ABC):
580
552
  for the node class
581
553
 
582
554
  Returns:
555
+ -------
583
556
  bool: The return value. True for success, False otherwise.
557
+
584
558
  """
585
559
  if not all(isinstance(n, BioCypherNode) for n in node_list):
586
560
  logger.error("Nodes must be passed as type BioCypherNode.")
@@ -598,7 +572,7 @@ class _BatchWriter(_Writer, ABC):
598
572
  ref_props = list(prop_dict.keys())
599
573
 
600
574
  # compare lists order invariant
601
- if not set(ref_props) == set(n_keys):
575
+ if set(ref_props) != set(n_keys):
602
576
  onode = n.get_id()
603
577
  oprop1 = set(ref_props).difference(n_keys)
604
578
  oprop2 = set(n_keys).difference(ref_props)
@@ -632,11 +606,10 @@ class _BatchWriter(_Writer, ABC):
632
606
  "boolean",
633
607
  ]:
634
608
  plist.append(str(p))
609
+ elif isinstance(p, list):
610
+ plist.append(self._write_array_string(p))
635
611
  else:
636
- if isinstance(p, list):
637
- plist.append(self._write_array_string(p))
638
- else:
639
- plist.append(self._quote_string(str(p)))
612
+ plist.append(f"{self.quote}{p!s}{self.quote}")
640
613
 
641
614
  line.append(self.delim.join(plist))
642
615
  line.append(labels)
@@ -650,8 +623,7 @@ class _BatchWriter(_Writer, ABC):
650
623
  return True
651
624
 
652
625
  def _write_edge_data(self, edges, batch_size):
653
- """
654
- Writes biocypher edges to CSV conforming to the headers created
626
+ """Writes biocypher edges to CSV conforming to the headers created
655
627
  with `_write_edge_headers()`, and is actually required to be run
656
628
  before calling `_write_node_headers()` to set the
657
629
  :py:attr:`self.edge_property_dict` for passing the edge
@@ -659,17 +631,20 @@ class _BatchWriter(_Writer, ABC):
659
631
  from the :py:class:`BioCypherEdge` class.
660
632
 
661
633
  Args:
634
+ ----
662
635
  edges (BioCypherEdge): a list or generator of edges in
663
636
  :py:class:`BioCypherEdge` format
664
637
 
665
638
  Returns:
639
+ -------
666
640
  bool: The return value. True for success, False otherwise.
667
641
 
668
642
  Todo:
643
+ ----
669
644
  - currently works for mixed edges but in practice often is
670
645
  called on one iterable containing one type of edge only
671
- """
672
646
 
647
+ """
673
648
  if isinstance(edges, GeneratorType):
674
649
  logger.debug("Writing edge CSV from generator.")
675
650
 
@@ -685,14 +660,13 @@ class _BatchWriter(_Writer, ABC):
685
660
  for edge in edges:
686
661
  if not (edge.get_source_id() and edge.get_target_id()):
687
662
  logger.error(
688
- "Edge must have source and target node. "
689
- f"Caused by: {edge}",
663
+ f"Edge must have source and target node. Caused by: {edge}",
690
664
  )
691
665
  continue
692
666
 
693
667
  label = edge.get_label()
694
668
 
695
- if not label in bins.keys():
669
+ if label not in bins.keys():
696
670
  # start new list
697
671
  bins[label].append(edge)
698
672
  bin_l[label] = 1
@@ -703,13 +677,8 @@ class _BatchWriter(_Writer, ABC):
703
677
  # (may not be if it is an edge that carries the
704
678
  # "label_as_edge" property)
705
679
  cprops = None
706
- if (
707
- label
708
- in self.translator.ontology.mapping.extended_schema
709
- ):
710
- cprops = self.translator.ontology.mapping.extended_schema.get(
711
- label
712
- ).get(
680
+ if label in self.translator.ontology.mapping.extended_schema:
681
+ cprops = self.translator.ontology.mapping.extended_schema.get(label).get(
713
682
  "properties",
714
683
  )
715
684
  else:
@@ -717,9 +686,7 @@ class _BatchWriter(_Writer, ABC):
717
686
  for (
718
687
  k,
719
688
  v,
720
- ) in (
721
- self.translator.ontology.mapping.extended_schema.items()
722
- ):
689
+ ) in self.translator.ontology.mapping.extended_schema.items():
723
690
  if isinstance(v, dict):
724
691
  if v.get("label_as_edge") == label:
725
692
  cprops = v.get("properties")
@@ -789,16 +756,15 @@ class _BatchWriter(_Writer, ABC):
789
756
  self.edge_property_dict[label] = reference_props[label]
790
757
 
791
758
  return True
759
+ elif not isinstance(edges, list):
760
+ logger.error("Edges must be passed as list or generator.")
761
+ return False
792
762
  else:
793
- if type(edges) is not list:
794
- logger.error("Edges must be passed as list or generator.")
795
- return False
796
- else:
797
763
 
798
- def gen(edges):
799
- yield from edges
764
+ def gen(edges):
765
+ yield from edges
800
766
 
801
- return self._write_edge_data(gen(edges), batch_size=batch_size)
767
+ return self._write_edge_data(gen(edges), batch_size=batch_size)
802
768
 
803
769
  def _write_single_edge_list_to_file(
804
770
  self,
@@ -806,11 +772,11 @@ class _BatchWriter(_Writer, ABC):
806
772
  label: str,
807
773
  prop_dict: dict,
808
774
  ):
809
- """
810
- This function takes one list of biocypher edges and writes them
775
+ """This function takes one list of biocypher edges and writes them
811
776
  to a Neo4j admin import compatible CSV file.
812
777
 
813
778
  Args:
779
+ ----
814
780
  edge_list (list): list of BioCypherEdges to be written
815
781
 
816
782
  label (str): the label (type) of the edge
@@ -819,9 +785,10 @@ class _BatchWriter(_Writer, ABC):
819
785
  function and their types
820
786
 
821
787
  Returns:
788
+ -------
822
789
  bool: The return value. True for success, False otherwise.
823
- """
824
790
 
791
+ """
825
792
  if not all(isinstance(n, BioCypherEdge) for n in edge_list):
826
793
  logger.error("Edges must be passed as type BioCypherEdge.")
827
794
  return False
@@ -836,7 +803,7 @@ class _BatchWriter(_Writer, ABC):
836
803
  ref_props = list(prop_dict.keys())
837
804
 
838
805
  # compare list order invariant
839
- if not set(ref_props) == set(e_keys):
806
+ if set(ref_props) != set(e_keys):
840
807
  oedge = f"{e.get_source_id()}-{e.get_target_id()}"
841
808
  oprop1 = set(ref_props).difference(e_keys)
842
809
  oprop2 = set(e_keys).difference(ref_props)
@@ -867,11 +834,10 @@ class _BatchWriter(_Writer, ABC):
867
834
  "boolean",
868
835
  ]:
869
836
  plist.append(str(p))
837
+ elif isinstance(p, list):
838
+ plist.append(self._write_array_string(p))
870
839
  else:
871
- if isinstance(p, list):
872
- plist.append(self._write_array_string(p))
873
- else:
874
- plist.append(self._quote_string(str(p)))
840
+ plist.append(self.quote + str(p) + self.quote)
875
841
 
876
842
  entries = [e.get_source_id()]
877
843
 
@@ -880,9 +846,7 @@ class _BatchWriter(_Writer, ABC):
880
846
 
881
847
  if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
882
848
  skip_id = True
883
- elif not self.translator.ontology.mapping.extended_schema.get(
884
- label
885
- ):
849
+ elif not self.translator.ontology.mapping.extended_schema.get(label):
886
850
  # find label in schema by label_as_edge
887
851
  for (
888
852
  k,
@@ -897,9 +861,9 @@ class _BatchWriter(_Writer, ABC):
897
861
  if schema_label:
898
862
  if (
899
863
  self.translator.ontology.mapping.extended_schema.get(
900
- schema_label
864
+ schema_label,
901
865
  ).get("use_id")
902
- == False
866
+ == False # noqa: E712 (seems to not work with 'not')
903
867
  ):
904
868
  skip_id = True
905
869
 
@@ -913,7 +877,7 @@ class _BatchWriter(_Writer, ABC):
913
877
  entries.append(
914
878
  self.translator.name_sentence_to_pascal(
915
879
  e.get_label(),
916
- )
880
+ ),
917
881
  )
918
882
 
919
883
  lines.append(
@@ -927,10 +891,10 @@ class _BatchWriter(_Writer, ABC):
927
891
  return True
928
892
 
929
893
  def _write_next_part(self, label: str, lines: list):
930
- """
931
- This function writes a list of strings to a new part file.
894
+ """This function writes a list of strings to a new part file.
932
895
 
933
896
  Args:
897
+ ----
934
898
  label (str): the label (type) of the edge; internal
935
899
  representation sentence case -> needs to become PascalCase
936
900
  for disk representation
@@ -938,17 +902,15 @@ class _BatchWriter(_Writer, ABC):
938
902
  lines (list): list of strings to be written
939
903
 
940
904
  Returns:
905
+ -------
941
906
  bool: The return value. True for success, False otherwise.
907
+
942
908
  """
943
909
  # translate label to PascalCase
944
- label_pascal = self.translator.name_sentence_to_pascal(
945
- parse_label(label)
946
- )
910
+ label_pascal = self.translator.name_sentence_to_pascal(parse_label(label))
947
911
 
948
912
  # list files in self.outdir
949
- files = glob.glob(
950
- os.path.join(self.outdir, f"{label_pascal}-part*.csv")
951
- )
913
+ files = glob.glob(os.path.join(self.outdir, f"{label_pascal}-part*.csv"))
952
914
  # find file with highest part number
953
915
  if not files:
954
916
  next_part = 0
@@ -956,10 +918,7 @@ class _BatchWriter(_Writer, ABC):
956
918
  else:
957
919
  next_part = (
958
920
  max(
959
- [
960
- int(f.split(".")[-2].split("-")[-1].replace("part", ""))
961
- for f in files
962
- ],
921
+ [int(f.split(".")[-2].split("-")[-1].replace("part", "")) for f in files],
963
922
  )
964
923
  + 1
965
924
  )
@@ -984,31 +943,29 @@ class _BatchWriter(_Writer, ABC):
984
943
  self.parts[label].append(part)
985
944
 
986
945
  def get_import_call(self) -> str:
987
- """
988
- Function to return the import call detailing folder and
946
+ """Function to return the import call detailing folder and
989
947
  individual node and edge headers and data files, as well as
990
948
  delimiters and database name.
991
949
 
992
- Returns:
950
+ Returns
951
+ -------
993
952
  str: a bash command for the database import
994
- """
995
953
 
954
+ """
996
955
  return self._construct_import_call()
997
956
 
998
957
  def write_import_call(self) -> str:
999
- """
1000
- Function to write the import call detailing folder and
958
+ """Function to write the import call detailing folder and
1001
959
  individual node and edge headers and data files, as well as
1002
960
  delimiters and database name, to the export folder as txt.
1003
961
 
1004
- Returns:
962
+ Returns
963
+ -------
1005
964
  str: The path of the file holding the import call.
1006
- """
1007
965
 
966
+ """
1008
967
  file_path = os.path.join(self.outdir, self._get_import_script_name())
1009
- logger.info(
1010
- f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`."
1011
- )
968
+ logger.info(f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`.")
1012
969
 
1013
970
  with open(file_path, "w", encoding="utf-8") as f:
1014
971
  f.write(self._construct_import_call())
@@ -1017,16 +974,16 @@ class _BatchWriter(_Writer, ABC):
1017
974
 
1018
975
 
1019
976
  def parse_label(label: str) -> str:
1020
- """
1021
-
1022
- Check if the label is compliant with Neo4j naming conventions,
977
+ """Check if the label is compliant with Neo4j naming conventions,
1023
978
  https://neo4j.com/docs/cypher-manual/current/syntax/naming/, and if not,
1024
979
  remove non-compliant characters.
1025
980
 
1026
981
  Args:
982
+ ----
1027
983
  label (str): The label to check
1028
984
  Returns:
1029
985
  str: The compliant label
986
+
1030
987
  """
1031
988
  # Check if the name contains only alphanumeric characters, underscore, or dollar sign
1032
989
  # and dot (for class hierarchy of BioCypher)
@@ -1036,7 +993,7 @@ def parse_label(label: str) -> str:
1036
993
  if non_matches:
1037
994
  non_matches = list(set(non_matches))
1038
995
  logger.warning(
1039
- f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}"
996
+ f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}",
1040
997
  )
1041
998
 
1042
999
  def first_character_compliant(character: str) -> bool:
@@ -1047,7 +1004,5 @@ def parse_label(label: str) -> str:
1047
1004
  if first_character_compliant(c):
1048
1005
  matches = matches[matches.index(c) :]
1049
1006
  break
1050
- logger.warning(
1051
- "Label does not start with an alphabetic character or with $. Removed non compliant characters."
1052
- )
1007
+ logger.warning("Label does not start with an alphabetic character or with $. Removed non compliant characters.")
1053
1008
  return "".join(matches).strip()