biocypher 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

@@ -1,105 +1,107 @@
1
- from abc import ABC, abstractmethod
2
- from types import GeneratorType
3
- from typing import Union, Optional
4
- from collections import OrderedDict, defaultdict
1
+ import glob
5
2
  import os
6
3
  import re
7
- import glob
4
+
5
+ from abc import ABC, abstractmethod
6
+ from collections import OrderedDict, defaultdict
7
+ from types import GeneratorType
8
8
 
9
9
  from more_itertools import peekable
10
10
 
11
11
  from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
12
+ from biocypher._deduplicate import Deduplicator
12
13
  from biocypher._logger import logger
13
14
  from biocypher._translate import Translator
14
- from biocypher._deduplicate import Deduplicator
15
15
  from biocypher.output.write._writer import _Writer
16
16
 
17
17
 
18
18
  class _BatchWriter(_Writer, ABC):
19
19
  """Abstract batch writer class"""
20
20
 
21
+ @abstractmethod
22
+ def _quote_string(self, value: str) -> str:
23
+ """Abstract method to quote a string. Escaping is handled by the database-specific writer."""
24
+ raise NotImplementedError(
25
+ "Database writer must override '_quote_string'",
26
+ )
27
+
21
28
  @abstractmethod
22
29
  def _get_default_import_call_bin_prefix(self):
23
- """
24
- Abstract method to provide the default string for the import call bin prefix.
30
+ """Abstract method to provide the default string for the import call bin prefix.
25
31
 
26
- Returns:
32
+ Returns
33
+ -------
27
34
  str: The database-specific string for the path to the import call bin prefix
35
+
28
36
  """
29
- raise NotImplementedError(
30
- "Database writer must override '_get_default_import_call_bin_prefix'"
31
- )
37
+ raise NotImplementedError("Database writer must override '_get_default_import_call_bin_prefix'")
32
38
 
33
39
  @abstractmethod
34
40
  def _write_array_string(self, string_list):
35
- """
36
- Abstract method to write the string representation of an array into a .csv file.
41
+ """Abstract method to write the string representation of an array into a .csv file.
37
42
  Different databases require different formats of array to optimize import speed.
38
43
 
39
44
  Args:
45
+ ----
40
46
  string_list (list): list of ontology strings
41
47
 
42
48
  Returns:
49
+ -------
43
50
  str: The database-specific string representation of an array
51
+
44
52
  """
45
- raise NotImplementedError(
46
- "Database writer must override '_write_array_string'"
47
- )
53
+ raise NotImplementedError("Database writer must override '_write_array_string'")
48
54
 
49
55
  @abstractmethod
50
56
  def _write_node_headers(self):
51
- """
52
- Abstract method that takes care of importing properties of a graph entity that is represented
57
+ """Abstract method that takes care of importing properties of a graph entity that is represented
53
58
  as a node as per the definition in the `schema_config.yaml`
54
59
 
55
- Returns:
60
+ Returns
61
+ -------
56
62
  bool: The return value. True for success, False otherwise.
63
+
57
64
  """
58
- raise NotImplementedError(
59
- "Database writer must override '_write_node_headers'"
60
- )
65
+ raise NotImplementedError("Database writer must override '_write_node_headers'")
61
66
 
62
67
  @abstractmethod
63
68
  def _write_edge_headers(self):
64
- """
65
- Abstract method to write a database import-file for a graph entity that is represented
69
+ """Abstract method to write a database import-file for a graph entity that is represented
66
70
  as an edge as per the definition in the `schema_config.yaml`,
67
71
  containing only the header for this type of edge.
68
72
 
69
- Returns:
73
+ Returns
74
+ -------
70
75
  bool: The return value. True for success, False otherwise.
76
+
71
77
  """
72
- raise NotImplementedError(
73
- "Database writer must override '_write_edge_headers'"
74
- )
78
+ raise NotImplementedError("Database writer must override '_write_edge_headers'")
75
79
 
76
80
  @abstractmethod
77
81
  def _construct_import_call(self) -> str:
78
- """
79
- Function to construct the import call detailing folder and
82
+ """Function to construct the import call detailing folder and
80
83
  individual node and edge headers and data files, as well as
81
84
  delimiters and database name. Built after all data has been
82
85
  processed to ensure that nodes are called before any edges.
83
86
 
84
- Returns:
87
+ Returns
88
+ -------
85
89
  str: A bash command for csv import.
90
+
86
91
  """
87
- raise NotImplementedError(
88
- "Database writer must override '_construct_import_call'"
89
- )
92
+ raise NotImplementedError("Database writer must override '_construct_import_call'")
90
93
 
91
94
  @abstractmethod
92
95
  def _get_import_script_name(self) -> str:
93
- """
94
- Returns the name of the import script.
96
+ """Returns the name of the import script.
95
97
  The name will be chosen based on the used database.
96
98
 
97
- Returns:
99
+ Returns
100
+ -------
98
101
  str: The name of the import script (ending in .sh)
102
+
99
103
  """
100
- raise NotImplementedError(
101
- "Database writer must override '_get_import_script_name'"
102
- )
104
+ raise NotImplementedError("Database writer must override '_get_import_script_name'")
103
105
 
104
106
  def __init__(
105
107
  self,
@@ -108,10 +110,10 @@ class _BatchWriter(_Writer, ABC):
108
110
  delimiter: str,
109
111
  array_delimiter: str = ",",
110
112
  quote: str = '"',
111
- output_directory: Optional[str] = None,
113
+ output_directory: str | None = None,
112
114
  db_name: str = "neo4j",
113
- import_call_bin_prefix: Optional[str] = None,
114
- import_call_file_prefix: Optional[str] = None,
115
+ import_call_bin_prefix: str | None = None,
116
+ import_call_file_prefix: str | None = None,
115
117
  wipe: bool = True,
116
118
  strict_mode: bool = False,
117
119
  skip_bad_relationships: bool = False,
@@ -123,9 +125,7 @@ class _BatchWriter(_Writer, ABC):
123
125
  rdf_format: str = None,
124
126
  rdf_namespaces: dict = {},
125
127
  ):
126
- """
127
-
128
- Abtract parent class for writing node and edge representations to disk
128
+ """Abtract parent class for writing node and edge representations to disk
129
129
  using the format specified by each database type. The database-specific
130
130
  functions are implemented by the respective child-classes. This abstract
131
131
  class contains all methods expected by a bach writer instance, some of
@@ -146,6 +146,7 @@ class _BatchWriter(_Writer, ABC):
146
146
  - _get_import_script_name
147
147
 
148
148
  Args:
149
+ ----
149
150
  translator:
150
151
  Instance of :py:class:`Translator` to enable translation of
151
152
  nodes and manipulation of properties.
@@ -207,6 +208,7 @@ class _BatchWriter(_Writer, ABC):
207
208
 
208
209
  rdf_namespaces:
209
210
  The namespaces for RDF.
211
+
210
212
  """
211
213
  super().__init__(
212
214
  translator=translator,
@@ -223,17 +225,13 @@ class _BatchWriter(_Writer, ABC):
223
225
  self.rdf_namespaces = rdf_namespaces
224
226
 
225
227
  self.delim, self.escaped_delim = self._process_delimiter(delimiter)
226
- self.adelim, self.escaped_adelim = self._process_delimiter(
227
- array_delimiter
228
- )
228
+ self.adelim, self.escaped_adelim = self._process_delimiter(array_delimiter)
229
229
  self.quote = quote
230
230
  self.skip_bad_relationships = skip_bad_relationships
231
231
  self.skip_duplicate_nodes = skip_duplicate_nodes
232
232
 
233
233
  if import_call_bin_prefix is None:
234
- self.import_call_bin_prefix = (
235
- self._get_default_import_call_bin_prefix()
236
- )
234
+ self.import_call_bin_prefix = self._get_default_import_call_bin_prefix()
237
235
  else:
238
236
  self.import_call_bin_prefix = import_call_bin_prefix
239
237
 
@@ -258,34 +256,27 @@ class _BatchWriter(_Writer, ABC):
258
256
 
259
257
  @property
260
258
  def import_call_file_prefix(self):
261
- """
262
- Property for output directory path.
263
- """
264
-
259
+ """Property for output directory path."""
265
260
  if self._import_call_file_prefix is None:
266
261
  return self.outdir
267
262
  else:
268
263
  return self._import_call_file_prefix
269
264
 
270
265
  def _process_delimiter(self, delimiter: str) -> str:
271
- """
272
- Return escaped characters in case of receiving their string
266
+ """Return escaped characters in case of receiving their string
273
267
  representation (e.g. tab for '\t').
274
268
  """
275
-
276
269
  if delimiter == "\\t":
277
270
  return "\t", "\\t"
278
271
 
279
272
  else:
280
273
  return delimiter, delimiter
281
274
 
282
- def write_nodes(
283
- self, nodes, batch_size: int = int(1e6), force: bool = False
284
- ):
285
- """
286
- Wrapper for writing nodes and their headers.
275
+ def write_nodes(self, nodes, batch_size: int = int(1e6), force: bool = False):
276
+ """Wrapper for writing nodes and their headers.
287
277
 
288
278
  Args:
279
+ ----
289
280
  nodes (BioCypherNode): a list or generator of nodes in
290
281
  :py:class:`BioCypherNode` format
291
282
 
@@ -296,7 +287,9 @@ class _BatchWriter(_Writer, ABC):
296
287
 
297
288
 
298
289
  Returns:
290
+ -------
299
291
  bool: The return value. True for success, False otherwise.
292
+
300
293
  """
301
294
  # TODO check represented_as
302
295
 
@@ -315,19 +308,21 @@ class _BatchWriter(_Writer, ABC):
315
308
 
316
309
  def write_edges(
317
310
  self,
318
- edges: Union[list, GeneratorType],
311
+ edges: list | GeneratorType,
319
312
  batch_size: int = int(1e6),
320
313
  ) -> bool:
321
- """
322
- Wrapper for writing edges and their headers.
314
+ """Wrapper for writing edges and their headers.
323
315
 
324
316
  Args:
317
+ ----
325
318
  edges (BioCypherEdge): a list or generator of edges in
326
319
  :py:class:`BioCypherEdge` or :py:class:`BioCypherRelAsNode`
327
320
  format
328
321
 
329
322
  Returns:
323
+ -------
330
324
  bool: The return value. True for success, False otherwise.
325
+
331
326
  """
332
327
  passed = False
333
328
  edges = list(edges) # force evaluation to handle empty generator
@@ -365,7 +360,6 @@ class _BatchWriter(_Writer, ABC):
365
360
  logger.debug(
366
361
  "No edges to write, possibly due to no matched Biolink classes.",
367
362
  )
368
- pass
369
363
 
370
364
  if not passed:
371
365
  logger.error("Error while writing edge data.")
@@ -379,8 +373,7 @@ class _BatchWriter(_Writer, ABC):
379
373
  return True
380
374
 
381
375
  def _write_node_data(self, nodes, batch_size, force: bool = False):
382
- """
383
- Writes biocypher nodes to CSV conforming to the headers created
376
+ """Writes biocypher nodes to CSV conforming to the headers created
384
377
  with `_write_node_headers()`, and is actually required to be run
385
378
  before calling `_write_node_headers()` to set the
386
379
  :py:attr:`self.node_property_dict` for passing the node properties
@@ -388,14 +381,16 @@ class _BatchWriter(_Writer, ABC):
388
381
  :py:class:`BioCypherNode` class.
389
382
 
390
383
  Args:
384
+ ----
391
385
  nodes (BioCypherNode): a list or generator of nodes in
392
386
  :py:class:`BioCypherNode` format
393
387
 
394
388
  Returns:
389
+ -------
395
390
  bool: The return value. True for success, False otherwise.
396
- """
397
391
 
398
- if isinstance(nodes, GeneratorType) or isinstance(nodes, peekable):
392
+ """
393
+ if isinstance(nodes, GeneratorType | peekable):
399
394
  logger.debug("Writing node CSV from generator.")
400
395
 
401
396
  bins = defaultdict(list) # dict to store a list for each
@@ -422,20 +417,15 @@ class _BatchWriter(_Writer, ABC):
422
417
  logger.warning(f"Node {label} has no id; skipping.")
423
418
  continue
424
419
 
425
- if not label in bins.keys():
420
+ if label not in bins.keys():
426
421
  # start new list
427
422
  all_labels = None
428
423
  bins[label].append(node)
429
424
  bin_l[label] = 1
430
425
 
431
426
  # get properties from config if present
432
- if (
433
- label
434
- in self.translator.ontology.mapping.extended_schema
435
- ):
436
- cprops = self.translator.ontology.mapping.extended_schema.get(
437
- label
438
- ).get(
427
+ if label in self.translator.ontology.mapping.extended_schema:
428
+ cprops = self.translator.ontology.mapping.extended_schema.get(label).get(
439
429
  "properties",
440
430
  )
441
431
  else:
@@ -473,18 +463,13 @@ class _BatchWriter(_Writer, ABC):
473
463
  # get label hierarchy
474
464
  # multiple labels:
475
465
  if not force:
476
- all_labels = self.translator.ontology.get_ancestors(
477
- label
478
- )
466
+ all_labels = self.translator.ontology.get_ancestors(label)
479
467
  else:
480
468
  all_labels = None
481
469
 
482
470
  if all_labels:
483
471
  # convert to pascal case
484
- all_labels = [
485
- self.translator.name_sentence_to_pascal(label)
486
- for label in all_labels
487
- ]
472
+ all_labels = [self.translator.name_sentence_to_pascal(label) for label in all_labels]
488
473
  # remove duplicates
489
474
  all_labels = list(OrderedDict.fromkeys(all_labels))
490
475
  # order alphabetically
@@ -492,9 +477,7 @@ class _BatchWriter(_Writer, ABC):
492
477
  # concatenate with array delimiter
493
478
  all_labels = self._write_array_string(all_labels)
494
479
  else:
495
- all_labels = self.translator.name_sentence_to_pascal(
496
- label
497
- )
480
+ all_labels = self.translator.name_sentence_to_pascal(label)
498
481
 
499
482
  labels[label] = all_labels
500
483
 
@@ -539,16 +522,15 @@ class _BatchWriter(_Writer, ABC):
539
522
  self.node_property_dict[label] = reference_props[label]
540
523
 
541
524
  return True
525
+ elif not isinstance(nodes, list):
526
+ logger.error("Nodes must be passed as list or generator.")
527
+ return False
542
528
  else:
543
- if type(nodes) is not list:
544
- logger.error("Nodes must be passed as list or generator.")
545
- return False
546
- else:
547
529
 
548
- def gen(nodes):
549
- yield from nodes
530
+ def gen(nodes):
531
+ yield from nodes
550
532
 
551
- return self._write_node_data(gen(nodes), batch_size=batch_size)
533
+ return self._write_node_data(gen(nodes), batch_size=batch_size)
552
534
 
553
535
  def _write_single_node_list_to_file(
554
536
  self,
@@ -557,11 +539,11 @@ class _BatchWriter(_Writer, ABC):
557
539
  prop_dict: dict,
558
540
  labels: str,
559
541
  ):
560
- """
561
- This function takes one list of biocypher nodes and writes them
542
+ """This function takes one list of biocypher nodes and writes them
562
543
  to a Neo4j admin import compatible CSV file.
563
544
 
564
545
  Args:
546
+ ----
565
547
  node_list (list): list of BioCypherNodes to be written
566
548
  label (str): the primary label of the node
567
549
  prop_dict (dict): properties of node class passed from parsing
@@ -570,7 +552,9 @@ class _BatchWriter(_Writer, ABC):
570
552
  for the node class
571
553
 
572
554
  Returns:
555
+ -------
573
556
  bool: The return value. True for success, False otherwise.
557
+
574
558
  """
575
559
  if not all(isinstance(n, BioCypherNode) for n in node_list):
576
560
  logger.error("Nodes must be passed as type BioCypherNode.")
@@ -588,7 +572,7 @@ class _BatchWriter(_Writer, ABC):
588
572
  ref_props = list(prop_dict.keys())
589
573
 
590
574
  # compare lists order invariant
591
- if not set(ref_props) == set(n_keys):
575
+ if set(ref_props) != set(n_keys):
592
576
  onode = n.get_id()
593
577
  oprop1 = set(ref_props).difference(n_keys)
594
578
  oprop2 = set(n_keys).difference(ref_props)
@@ -622,11 +606,10 @@ class _BatchWriter(_Writer, ABC):
622
606
  "boolean",
623
607
  ]:
624
608
  plist.append(str(p))
609
+ elif isinstance(p, list):
610
+ plist.append(self._write_array_string(p))
625
611
  else:
626
- if isinstance(p, list):
627
- plist.append(self._write_array_string(p))
628
- else:
629
- plist.append(f"{self.quote}{str(p)}{self.quote}")
612
+ plist.append(f"{self.quote}{p!s}{self.quote}")
630
613
 
631
614
  line.append(self.delim.join(plist))
632
615
  line.append(labels)
@@ -640,8 +623,7 @@ class _BatchWriter(_Writer, ABC):
640
623
  return True
641
624
 
642
625
  def _write_edge_data(self, edges, batch_size):
643
- """
644
- Writes biocypher edges to CSV conforming to the headers created
626
+ """Writes biocypher edges to CSV conforming to the headers created
645
627
  with `_write_edge_headers()`, and is actually required to be run
646
628
  before calling `_write_node_headers()` to set the
647
629
  :py:attr:`self.edge_property_dict` for passing the edge
@@ -649,17 +631,20 @@ class _BatchWriter(_Writer, ABC):
649
631
  from the :py:class:`BioCypherEdge` class.
650
632
 
651
633
  Args:
634
+ ----
652
635
  edges (BioCypherEdge): a list or generator of edges in
653
636
  :py:class:`BioCypherEdge` format
654
637
 
655
638
  Returns:
639
+ -------
656
640
  bool: The return value. True for success, False otherwise.
657
641
 
658
642
  Todo:
643
+ ----
659
644
  - currently works for mixed edges but in practice often is
660
645
  called on one iterable containing one type of edge only
661
- """
662
646
 
647
+ """
663
648
  if isinstance(edges, GeneratorType):
664
649
  logger.debug("Writing edge CSV from generator.")
665
650
 
@@ -675,14 +660,13 @@ class _BatchWriter(_Writer, ABC):
675
660
  for edge in edges:
676
661
  if not (edge.get_source_id() and edge.get_target_id()):
677
662
  logger.error(
678
- "Edge must have source and target node. "
679
- f"Caused by: {edge}",
663
+ f"Edge must have source and target node. Caused by: {edge}",
680
664
  )
681
665
  continue
682
666
 
683
667
  label = edge.get_label()
684
668
 
685
- if not label in bins.keys():
669
+ if label not in bins.keys():
686
670
  # start new list
687
671
  bins[label].append(edge)
688
672
  bin_l[label] = 1
@@ -693,13 +677,8 @@ class _BatchWriter(_Writer, ABC):
693
677
  # (may not be if it is an edge that carries the
694
678
  # "label_as_edge" property)
695
679
  cprops = None
696
- if (
697
- label
698
- in self.translator.ontology.mapping.extended_schema
699
- ):
700
- cprops = self.translator.ontology.mapping.extended_schema.get(
701
- label
702
- ).get(
680
+ if label in self.translator.ontology.mapping.extended_schema:
681
+ cprops = self.translator.ontology.mapping.extended_schema.get(label).get(
703
682
  "properties",
704
683
  )
705
684
  else:
@@ -707,9 +686,7 @@ class _BatchWriter(_Writer, ABC):
707
686
  for (
708
687
  k,
709
688
  v,
710
- ) in (
711
- self.translator.ontology.mapping.extended_schema.items()
712
- ):
689
+ ) in self.translator.ontology.mapping.extended_schema.items():
713
690
  if isinstance(v, dict):
714
691
  if v.get("label_as_edge") == label:
715
692
  cprops = v.get("properties")
@@ -779,16 +756,15 @@ class _BatchWriter(_Writer, ABC):
779
756
  self.edge_property_dict[label] = reference_props[label]
780
757
 
781
758
  return True
759
+ elif not isinstance(edges, list):
760
+ logger.error("Edges must be passed as list or generator.")
761
+ return False
782
762
  else:
783
- if type(edges) is not list:
784
- logger.error("Edges must be passed as list or generator.")
785
- return False
786
- else:
787
763
 
788
- def gen(edges):
789
- yield from edges
764
+ def gen(edges):
765
+ yield from edges
790
766
 
791
- return self._write_edge_data(gen(edges), batch_size=batch_size)
767
+ return self._write_edge_data(gen(edges), batch_size=batch_size)
792
768
 
793
769
  def _write_single_edge_list_to_file(
794
770
  self,
@@ -796,11 +772,11 @@ class _BatchWriter(_Writer, ABC):
796
772
  label: str,
797
773
  prop_dict: dict,
798
774
  ):
799
- """
800
- This function takes one list of biocypher edges and writes them
775
+ """This function takes one list of biocypher edges and writes them
801
776
  to a Neo4j admin import compatible CSV file.
802
777
 
803
778
  Args:
779
+ ----
804
780
  edge_list (list): list of BioCypherEdges to be written
805
781
 
806
782
  label (str): the label (type) of the edge
@@ -809,9 +785,10 @@ class _BatchWriter(_Writer, ABC):
809
785
  function and their types
810
786
 
811
787
  Returns:
788
+ -------
812
789
  bool: The return value. True for success, False otherwise.
813
- """
814
790
 
791
+ """
815
792
  if not all(isinstance(n, BioCypherEdge) for n in edge_list):
816
793
  logger.error("Edges must be passed as type BioCypherEdge.")
817
794
  return False
@@ -826,7 +803,7 @@ class _BatchWriter(_Writer, ABC):
826
803
  ref_props = list(prop_dict.keys())
827
804
 
828
805
  # compare list order invariant
829
- if not set(ref_props) == set(e_keys):
806
+ if set(ref_props) != set(e_keys):
830
807
  oedge = f"{e.get_source_id()}-{e.get_target_id()}"
831
808
  oprop1 = set(ref_props).difference(e_keys)
832
809
  oprop2 = set(e_keys).difference(ref_props)
@@ -857,11 +834,10 @@ class _BatchWriter(_Writer, ABC):
857
834
  "boolean",
858
835
  ]:
859
836
  plist.append(str(p))
837
+ elif isinstance(p, list):
838
+ plist.append(self._write_array_string(p))
860
839
  else:
861
- if isinstance(p, list):
862
- plist.append(self._write_array_string(p))
863
- else:
864
- plist.append(self.quote + str(p) + self.quote)
840
+ plist.append(self.quote + str(p) + self.quote)
865
841
 
866
842
  entries = [e.get_source_id()]
867
843
 
@@ -870,9 +846,7 @@ class _BatchWriter(_Writer, ABC):
870
846
 
871
847
  if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
872
848
  skip_id = True
873
- elif not self.translator.ontology.mapping.extended_schema.get(
874
- label
875
- ):
849
+ elif not self.translator.ontology.mapping.extended_schema.get(label):
876
850
  # find label in schema by label_as_edge
877
851
  for (
878
852
  k,
@@ -887,9 +861,9 @@ class _BatchWriter(_Writer, ABC):
887
861
  if schema_label:
888
862
  if (
889
863
  self.translator.ontology.mapping.extended_schema.get(
890
- schema_label
864
+ schema_label,
891
865
  ).get("use_id")
892
- == False
866
+ == False # noqa: E712 (seems to not work with 'not')
893
867
  ):
894
868
  skip_id = True
895
869
 
@@ -903,7 +877,7 @@ class _BatchWriter(_Writer, ABC):
903
877
  entries.append(
904
878
  self.translator.name_sentence_to_pascal(
905
879
  e.get_label(),
906
- )
880
+ ),
907
881
  )
908
882
 
909
883
  lines.append(
@@ -917,10 +891,10 @@ class _BatchWriter(_Writer, ABC):
917
891
  return True
918
892
 
919
893
  def _write_next_part(self, label: str, lines: list):
920
- """
921
- This function writes a list of strings to a new part file.
894
+ """This function writes a list of strings to a new part file.
922
895
 
923
896
  Args:
897
+ ----
924
898
  label (str): the label (type) of the edge; internal
925
899
  representation sentence case -> needs to become PascalCase
926
900
  for disk representation
@@ -928,17 +902,15 @@ class _BatchWriter(_Writer, ABC):
928
902
  lines (list): list of strings to be written
929
903
 
930
904
  Returns:
905
+ -------
931
906
  bool: The return value. True for success, False otherwise.
907
+
932
908
  """
933
909
  # translate label to PascalCase
934
- label_pascal = self.translator.name_sentence_to_pascal(
935
- parse_label(label)
936
- )
910
+ label_pascal = self.translator.name_sentence_to_pascal(parse_label(label))
937
911
 
938
912
  # list files in self.outdir
939
- files = glob.glob(
940
- os.path.join(self.outdir, f"{label_pascal}-part*.csv")
941
- )
913
+ files = glob.glob(os.path.join(self.outdir, f"{label_pascal}-part*.csv"))
942
914
  # find file with highest part number
943
915
  if not files:
944
916
  next_part = 0
@@ -946,10 +918,7 @@ class _BatchWriter(_Writer, ABC):
946
918
  else:
947
919
  next_part = (
948
920
  max(
949
- [
950
- int(f.split(".")[-2].split("-")[-1].replace("part", ""))
951
- for f in files
952
- ],
921
+ [int(f.split(".")[-2].split("-")[-1].replace("part", "")) for f in files],
953
922
  )
954
923
  + 1
955
924
  )
@@ -974,31 +943,29 @@ class _BatchWriter(_Writer, ABC):
974
943
  self.parts[label].append(part)
975
944
 
976
945
  def get_import_call(self) -> str:
977
- """
978
- Function to return the import call detailing folder and
946
+ """Function to return the import call detailing folder and
979
947
  individual node and edge headers and data files, as well as
980
948
  delimiters and database name.
981
949
 
982
- Returns:
950
+ Returns
951
+ -------
983
952
  str: a bash command for the database import
984
- """
985
953
 
954
+ """
986
955
  return self._construct_import_call()
987
956
 
988
957
  def write_import_call(self) -> str:
989
- """
990
- Function to write the import call detailing folder and
958
+ """Function to write the import call detailing folder and
991
959
  individual node and edge headers and data files, as well as
992
960
  delimiters and database name, to the export folder as txt.
993
961
 
994
- Returns:
962
+ Returns
963
+ -------
995
964
  str: The path of the file holding the import call.
996
- """
997
965
 
966
+ """
998
967
  file_path = os.path.join(self.outdir, self._get_import_script_name())
999
- logger.info(
1000
- f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`."
1001
- )
968
+ logger.info(f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`.")
1002
969
 
1003
970
  with open(file_path, "w", encoding="utf-8") as f:
1004
971
  f.write(self._construct_import_call())
@@ -1007,16 +974,16 @@ class _BatchWriter(_Writer, ABC):
1007
974
 
1008
975
 
1009
976
  def parse_label(label: str) -> str:
1010
- """
1011
-
1012
- Check if the label is compliant with Neo4j naming conventions,
977
+ """Check if the label is compliant with Neo4j naming conventions,
1013
978
  https://neo4j.com/docs/cypher-manual/current/syntax/naming/, and if not,
1014
979
  remove non-compliant characters.
1015
980
 
1016
981
  Args:
982
+ ----
1017
983
  label (str): The label to check
1018
984
  Returns:
1019
985
  str: The compliant label
986
+
1020
987
  """
1021
988
  # Check if the name contains only alphanumeric characters, underscore, or dollar sign
1022
989
  # and dot (for class hierarchy of BioCypher)
@@ -1026,7 +993,7 @@ def parse_label(label: str) -> str:
1026
993
  if non_matches:
1027
994
  non_matches = list(set(non_matches))
1028
995
  logger.warning(
1029
- f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}"
996
+ f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}",
1030
997
  )
1031
998
 
1032
999
  def first_character_compliant(character: str) -> bool:
@@ -1037,7 +1004,5 @@ def parse_label(label: str) -> str:
1037
1004
  if first_character_compliant(c):
1038
1005
  matches = matches[matches.index(c) :]
1039
1006
  break
1040
- logger.warning(
1041
- "Label does not start with an alphabetic character or with $. Removed non compliant characters."
1042
- )
1007
+ logger.warning("Label does not start with an alphabetic character or with $. Removed non compliant characters.")
1043
1008
  return "".join(matches).strip()