biocypher 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_connect.py CHANGED
@@ -53,8 +53,6 @@ class _Neo4jDriver:
53
53
 
54
54
  increment_version (bool): Whether to increment the version number.
55
55
 
56
- ontology (Ontology): The ontology to use for mapping.
57
-
58
56
  translator (Translator): The translator to use for mapping.
59
57
 
60
58
  """
@@ -66,14 +64,12 @@ class _Neo4jDriver:
66
64
  user: str,
67
65
  password: str,
68
66
  multi_db: bool,
69
- ontology: Ontology,
70
67
  translator: Translator,
71
68
  wipe: bool = False,
72
69
  fetch_size: int = 1000,
73
70
  increment_version: bool = True,
74
71
  ):
75
- self._ontology = ontology
76
- self._translator = translator
72
+ self.translator = translator
77
73
 
78
74
  self._driver = neo4j_utils.Driver(
79
75
  db_name=database_name,
@@ -103,7 +99,7 @@ class _Neo4jDriver:
103
99
  "MATCH (v:BioCypher) " "WHERE NOT (v)-[:PRECEDES]->() " "RETURN v",
104
100
  )
105
101
  # add version node
106
- self.add_biocypher_nodes(self._ontology)
102
+ self.add_biocypher_nodes(self.translator.ontology)
107
103
 
108
104
  # connect version node to previous
109
105
  if db_version[0]:
@@ -111,7 +107,7 @@ class _Neo4jDriver:
111
107
  previous_id = previous["v"]["id"]
112
108
  e_meta = BioCypherEdge(
113
109
  previous_id,
114
- self._ontology.get_dict().get("node_id"),
110
+ self.translator.ontology.get_dict().get("node_id"),
115
111
  "PRECEDES",
116
112
  )
117
113
  self.add_biocypher_edges(e_meta)
@@ -142,7 +138,7 @@ class _Neo4jDriver:
142
138
  logger.info("Creating constraints for node types in config.")
143
139
 
144
140
  # get structure
145
- for leaf in self._ontology.extended_schema.items():
141
+ for leaf in self.translator.ontology.mapping.extended_schema.items():
146
142
  label = _misc.sentencecase_to_pascalcase(leaf[0])
147
143
  if leaf[1]["represented_as"] == "node":
148
144
  s = (
@@ -172,7 +168,7 @@ class _Neo4jDriver:
172
168
  - second entry: Neo4j summary.
173
169
  """
174
170
 
175
- bn = self._translator.translate_nodes(id_type_tuples)
171
+ bn = self.translator.translate_nodes(id_type_tuples)
176
172
  return self.add_biocypher_nodes(bn)
177
173
 
178
174
  def add_edges(self, id_src_tar_type_tuples: Iterable[tuple]) -> tuple:
@@ -204,7 +200,7 @@ class _Neo4jDriver:
204
200
  - second entry: Neo4j summary.
205
201
  """
206
202
 
207
- bn = self._translator.translate_edges(id_src_tar_type_tuples)
203
+ bn = self.translator.translate_edges(id_src_tar_type_tuples)
208
204
  return self.add_biocypher_edges(bn)
209
205
 
210
206
  def add_biocypher_nodes(
@@ -375,7 +371,6 @@ class _Neo4jDriver:
375
371
  def get_driver(
376
372
  dbms: str,
377
373
  translator: "Translator",
378
- ontology: "Ontology",
379
374
  ):
380
375
  """
381
376
  Function to return the writer class.
@@ -394,7 +389,6 @@ def get_driver(
394
389
  user=dbms_config["user"],
395
390
  password=dbms_config["password"],
396
391
  multi_db=dbms_config["multi_db"],
397
- ontology=ontology,
398
392
  translator=translator,
399
393
  )
400
394
 
biocypher/_core.py CHANGED
@@ -13,8 +13,10 @@ BioCypher core module. Interfaces with the user and distributes tasks to
13
13
  submodules.
14
14
  """
15
15
  from typing import Optional
16
+ import os
16
17
 
17
18
  from more_itertools import peekable
19
+ import yaml
18
20
 
19
21
  import pandas as pd
20
22
 
@@ -25,7 +27,7 @@ logger.debug(f"Loading module {__name__}.")
25
27
  from ._write import get_writer
26
28
  from ._config import config as _config
27
29
  from ._config import update_from_file as _file_update
28
- from ._create import BioCypherEdge, BioCypherNode
30
+ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
29
31
  from ._pandas import Pandas
30
32
  from ._connect import get_driver
31
33
  from ._mapping import OntologyMapping
@@ -181,19 +183,6 @@ class BioCypher:
181
183
 
182
184
  return self._ontology_mapping
183
185
 
184
- def _get_translator(self) -> Translator:
185
- """
186
- Create translator if not exists and return.
187
- """
188
-
189
- if not self._translator:
190
- self._translator = Translator(
191
- ontology_mapping=self._get_ontology_mapping(),
192
- strict_mode=self._strict_mode,
193
- )
194
-
195
- return self._translator
196
-
197
186
  def _get_ontology(self) -> Ontology:
198
187
  """
199
188
  Create ontology if not exists and return.
@@ -208,17 +197,28 @@ class BioCypher:
208
197
 
209
198
  return self._ontology
210
199
 
200
+ def _get_translator(self) -> Translator:
201
+ """
202
+ Create translator if not exists and return.
203
+ """
204
+
205
+ if not self._translator:
206
+ self._translator = Translator(
207
+ ontology=self._get_ontology(),
208
+ strict_mode=self._strict_mode,
209
+ )
210
+
211
+ return self._translator
212
+
211
213
  def _get_writer(self):
212
214
  """
213
215
  Create writer if not online. Set as instance variable `self._writer`.
214
216
  """
215
217
 
216
- # Get worker
217
218
  if self._offline:
218
219
  self._writer = get_writer(
219
220
  dbms=self._dbms,
220
221
  translator=self._get_translator(),
221
- ontology=self._get_ontology(),
222
222
  deduplicator=self._get_deduplicator(),
223
223
  output_directory=self._output_directory,
224
224
  strict_mode=self._strict_mode,
@@ -235,7 +235,6 @@ class BioCypher:
235
235
  self._driver = get_driver(
236
236
  dbms=self._dbms,
237
237
  translator=self._get_translator(),
238
- ontology=self._get_ontology(),
239
238
  deduplicator=self._get_deduplicator(),
240
239
  )
241
240
  else:
@@ -318,14 +317,15 @@ class BioCypher:
318
317
  if not self._pd:
319
318
  self._pd = Pandas(
320
319
  translator=self._get_translator(),
321
- ontology=self._get_ontology(),
322
320
  deduplicator=self._get_deduplicator(),
323
321
  )
324
322
 
325
323
  entities = peekable(entities)
326
324
 
327
- if isinstance(entities.peek(), BioCypherNode) or isinstance(
328
- entities.peek(), BioCypherEdge
325
+ if (
326
+ isinstance(entities.peek(), BioCypherNode)
327
+ or isinstance(entities.peek(), BioCypherEdge)
328
+ or isinstance(entities.peek(), BioCypherRelAsNode)
329
329
  ):
330
330
  tentities = entities
331
331
  elif len(entities.peek()) < 4:
@@ -504,6 +504,73 @@ class BioCypher:
504
504
 
505
505
  self._writer.write_import_call()
506
506
 
507
+ def write_schema_info(self) -> None:
508
+ """
509
+ Write an extended schema info YAML file that extends the
510
+ `schema_config.yaml` with run-time information of the built KG. For
511
+ instance, include information on whether something present in the actual
512
+ knowledge graph, whether it is a relationship (which is important in the
513
+ case of representing relationships as nodes) and the actual sources and
514
+ targets of edges. Since this file can be used in place of the original
515
+ `schema_config.yaml` file, it indicates that it is the extended schema
516
+ by setting `is_schema_info` to `true`.
517
+
518
+ We start by using the `extended_schema` dictionary from the ontology
519
+ class instance, which contains all expanded entities and relationships.
520
+ The information of whether something is a relationship can be gathered
521
+ from the deduplicator instance, which keeps track of all entities that
522
+ have been seen.
523
+ """
524
+
525
+ if not self._offline:
526
+ raise NotImplementedError(
527
+ "Cannot write schema info in online mode."
528
+ )
529
+
530
+ ontology = self._get_ontology()
531
+ schema = ontology.mapping.extended_schema
532
+ schema["is_schema_info"] = True
533
+
534
+ deduplicator = self._get_deduplicator()
535
+ for node in deduplicator.entity_types:
536
+ if node in schema.keys():
537
+ schema[node]["present_in_knowledge_graph"] = True
538
+ schema[node]["is_relationship"] = False
539
+ else:
540
+ logger.info(
541
+ f"Node {node} not present in extended schema. "
542
+ "Skipping schema info."
543
+ )
544
+
545
+ # find 'label_as_edge' cases in schema entries
546
+ changed_labels = {}
547
+ for k, v in schema.items():
548
+ if not isinstance(v, dict):
549
+ continue
550
+ if "label_as_edge" in v.keys():
551
+ if v["label_as_edge"] in deduplicator.seen_relationships.keys():
552
+ changed_labels[v["label_as_edge"]] = k
553
+
554
+ for edge in deduplicator.seen_relationships.keys():
555
+ if edge in changed_labels.keys():
556
+ edge = changed_labels[edge]
557
+ if edge in schema.keys():
558
+ schema[edge]["present_in_knowledge_graph"] = True
559
+ schema[edge]["is_relationship"] = True
560
+ # TODO information about source and target nodes
561
+ else:
562
+ logger.info(
563
+ f"Edge {edge} not present in extended schema. "
564
+ "Skipping schema info."
565
+ )
566
+
567
+ # write to output directory as YAML file
568
+ path = os.path.join(self._output_directory, "schema_info.yaml")
569
+ with open(path, "w") as f:
570
+ f.write(yaml.dump(schema))
571
+
572
+ return schema
573
+
507
574
  # TRANSLATION METHODS ###
508
575
 
509
576
  def translate_term(self, term: str) -> str:
biocypher/_deduplicate.py CHANGED
@@ -2,7 +2,7 @@ from ._logger import logger
2
2
 
3
3
  logger.debug(f"Loading module {__name__}.")
4
4
 
5
- from ._create import BioCypherEdge, BioCypherNode
5
+ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
6
6
 
7
7
 
8
8
  class Deduplicator:
@@ -19,15 +19,17 @@ class Deduplicator:
19
19
  """
20
20
 
21
21
  def __init__(self):
22
- self.seen_node_ids = set()
23
- self.duplicate_node_ids = set()
24
- self.duplicate_node_types = set()
22
+ self.seen_entity_ids = set()
23
+ self.duplicate_entity_ids = set()
25
24
 
26
- self.seen_edges = {}
27
- self.duplicate_edge_ids = set()
28
- self.duplicate_edge_types = set()
25
+ self.entity_types = set()
26
+ self.duplicate_entity_types = set()
29
27
 
30
- def node_seen(self, node: BioCypherNode) -> bool:
28
+ self.seen_relationships = {}
29
+ self.duplicate_relationship_ids = set()
30
+ self.duplicate_relationship_types = set()
31
+
32
+ def node_seen(self, entity: BioCypherNode) -> bool:
31
33
  """
32
34
  Adds a node to the instance and checks if it has been seen before.
33
35
 
@@ -37,19 +39,22 @@ class Deduplicator:
37
39
  Returns:
38
40
  True if the node has been seen before, False otherwise.
39
41
  """
40
- if node.get_id() in self.seen_node_ids:
41
- self.duplicate_node_ids.add(node.get_id())
42
- if node.get_label() not in self.duplicate_node_types:
42
+ if entity.get_label() not in self.entity_types:
43
+ self.entity_types.add(entity.get_label())
44
+
45
+ if entity.get_id() in self.seen_entity_ids:
46
+ self.duplicate_entity_ids.add(entity.get_id())
47
+ if entity.get_label() not in self.duplicate_entity_types:
43
48
  logger.warning(
44
- f"Duplicate node type {node.get_label()} found. "
49
+ f"Duplicate node type {entity.get_label()} found. "
45
50
  )
46
- self.duplicate_node_types.add(node.get_label())
51
+ self.duplicate_entity_types.add(entity.get_label())
47
52
  return True
48
53
 
49
- self.seen_node_ids.add(node.get_id())
54
+ self.seen_entity_ids.add(entity.get_id())
50
55
  return False
51
56
 
52
- def edge_seen(self, edge: BioCypherEdge) -> bool:
57
+ def edge_seen(self, relationship: BioCypherEdge) -> bool:
53
58
  """
54
59
  Adds an edge to the instance and checks if it has been seen before.
55
60
 
@@ -59,23 +64,57 @@ class Deduplicator:
59
64
  Returns:
60
65
  True if the edge has been seen before, False otherwise.
61
66
  """
62
- if edge.get_type() not in self.seen_edges:
63
- self.seen_edges[edge.get_type()] = set()
67
+ if relationship.get_type() not in self.seen_relationships:
68
+ self.seen_relationships[relationship.get_type()] = set()
64
69
 
65
70
  # concatenate source and target if no id is present
66
- if not edge.get_id():
67
- _id = f"{edge.get_source_id()}_{edge.get_target_id()}"
71
+ if not relationship.get_id():
72
+ _id = (
73
+ f"{relationship.get_source_id()}_{relationship.get_target_id()}"
74
+ )
68
75
  else:
69
- _id = edge.get_id()
76
+ _id = relationship.get_id()
77
+
78
+ if _id in self.seen_relationships[relationship.get_type()]:
79
+ self.duplicate_relationship_ids.add(_id)
80
+ if relationship.get_type() not in self.duplicate_relationship_types:
81
+ logger.warning(
82
+ f"Duplicate edge type {relationship.get_type()} found. "
83
+ )
84
+ self.duplicate_relationship_types.add(relationship.get_type())
85
+ return True
86
+
87
+ self.seen_relationships[relationship.get_type()].add(_id)
88
+ return False
89
+
90
+ def rel_as_node_seen(self, rel_as_node: BioCypherRelAsNode) -> bool:
91
+ """
92
+ Adds a rel_as_node to the instance (one entity and two relationships)
93
+ and checks if it has been seen before. Only the node is relevant for
94
+ identifying the rel_as_node as a duplicate.
95
+
96
+ Args:
97
+ rel_as_node: BioCypherRelAsNode to be added.
98
+
99
+ Returns:
100
+ True if the rel_as_node has been seen before, False otherwise.
101
+ """
102
+ node = rel_as_node.get_node()
103
+
104
+ if node.get_label() not in self.seen_relationships:
105
+ self.seen_relationships[node.get_label()] = set()
106
+
107
+ # rel as node always has an id
108
+ _id = node.get_id()
70
109
 
71
- if _id in self.seen_edges[edge.get_type()]:
72
- self.duplicate_edge_ids.add(_id)
73
- if edge.get_type() not in self.duplicate_edge_types:
74
- logger.warning(f"Duplicate edge type {edge.get_type()} found. ")
75
- self.duplicate_edge_types.add(edge.get_type())
110
+ if _id in self.seen_relationships[node.get_type()]:
111
+ self.duplicate_relationship_ids.add(_id)
112
+ if node.get_type() not in self.duplicate_relationship_types:
113
+ logger.warning(f"Duplicate edge type {node.get_type()} found. ")
114
+ self.duplicate_relationship_types.add(node.get_type())
76
115
  return True
77
116
 
78
- self.seen_edges[edge.get_type()].add(_id)
117
+ self.seen_relationships[node.get_type()].add(_id)
79
118
  return False
80
119
 
81
120
  def get_duplicate_nodes(self):
@@ -86,8 +125,8 @@ class Deduplicator:
86
125
  list: list of duplicate nodes
87
126
  """
88
127
 
89
- if self.duplicate_node_types:
90
- return (self.duplicate_node_types, self.duplicate_node_ids)
128
+ if self.duplicate_entity_types:
129
+ return (self.duplicate_entity_types, self.duplicate_entity_ids)
91
130
  else:
92
131
  return None
93
132
 
@@ -99,7 +138,10 @@ class Deduplicator:
99
138
  list: list of duplicate edges
100
139
  """
101
140
 
102
- if self.duplicate_edge_types:
103
- return (self.duplicate_edge_types, self.duplicate_edge_ids)
141
+ if self.duplicate_relationship_types:
142
+ return (
143
+ self.duplicate_relationship_types,
144
+ self.duplicate_relationship_ids,
145
+ )
104
146
  else:
105
147
  return None
biocypher/_metadata.py CHANGED
@@ -19,7 +19,7 @@ import importlib.metadata
19
19
 
20
20
  import toml
21
21
 
22
- _VERSION = "0.5.19"
22
+ _VERSION = "0.5.20"
23
23
 
24
24
 
25
25
  def get_metadata():
biocypher/_ontology.py CHANGED
@@ -269,7 +269,7 @@ class Ontology:
269
269
  """
270
270
 
271
271
  self._head_ontology_meta = head_ontology
272
- self.extended_schema = ontology_mapping.extended_schema
272
+ self.mapping = ontology_mapping
273
273
  self._tail_ontology_meta = tail_ontologies
274
274
 
275
275
  self._tail_ontologies = None
@@ -403,7 +403,7 @@ class Ontology:
403
403
  if not self._nx_graph:
404
404
  self._nx_graph = self._head_ontology.get_nx_graph().copy()
405
405
 
406
- for key, value in self.extended_schema.items():
406
+ for key, value in self.mapping.extended_schema.items():
407
407
  if not value.get("is_a"):
408
408
  if self._nx_graph.has_node(value.get("synonym_for")):
409
409
  continue
@@ -485,7 +485,7 @@ class Ontology:
485
485
  setting the synonym as the primary node label.
486
486
  """
487
487
 
488
- for key, value in self.extended_schema.items():
488
+ for key, value in self.mapping.extended_schema.items():
489
489
  if key in self._nx_graph.nodes:
490
490
  self._nx_graph.nodes[key].update(value)
491
491
 
@@ -541,9 +541,9 @@ class Ontology:
541
541
 
542
542
  if not full:
543
543
  # set of leaves and their intermediate parents up to the root
544
- filter_nodes = set(self.extended_schema.keys())
544
+ filter_nodes = set(self.mapping.extended_schema.keys())
545
545
 
546
- for node in self.extended_schema.keys():
546
+ for node in self.mapping.extended_schema.keys():
547
547
  filter_nodes.update(self.get_ancestors(node).nodes)
548
548
 
549
549
  # filter graph
@@ -557,11 +557,13 @@ class Ontology:
557
557
  tree = _misc.create_tree_visualisation(G)
558
558
 
559
559
  # add synonym information
560
- for node in self.extended_schema:
561
- if self.extended_schema[node].get("synonym_for"):
560
+ for node in self.mapping.extended_schema:
561
+ if not isinstance(self.mapping.extended_schema[node], dict):
562
+ continue
563
+ if self.mapping.extended_schema[node].get("synonym_for"):
562
564
  tree.nodes[node].tag = (
563
565
  f"{node} = "
564
- f"{self.extended_schema[node].get('synonym_for')}"
566
+ f"{self.mapping.extended_schema[node].get('synonym_for')}"
565
567
  )
566
568
 
567
569
  tree.show()
@@ -602,7 +604,7 @@ class Ontology:
602
604
  "node_id": self._get_current_id(),
603
605
  "node_label": "BioCypher",
604
606
  "properties": {
605
- "schema": "self.extended_schema",
607
+ "schema": "self.ontology_mapping.extended_schema",
606
608
  },
607
609
  }
608
610
 
biocypher/_pandas.py CHANGED
@@ -1,11 +1,10 @@
1
1
  import pandas as pd
2
2
 
3
- from ._create import BioCypherEdge, BioCypherNode
3
+ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
4
4
 
5
5
 
6
6
  class Pandas:
7
- def __init__(self, ontology, translator, deduplicator):
8
- self.ontology = ontology
7
+ def __init__(self, translator, deduplicator):
9
8
  self.translator = translator
10
9
  self.deduplicator = deduplicator
11
10
 
@@ -18,22 +17,48 @@ class Pandas:
18
17
  """
19
18
  lists = {}
20
19
  for entity in entities:
21
- if not isinstance(entity, BioCypherNode) and not isinstance(
22
- entity, BioCypherEdge
20
+ if (
21
+ not isinstance(entity, BioCypherNode)
22
+ and not isinstance(entity, BioCypherEdge)
23
+ and not isinstance(entity, BioCypherRelAsNode)
23
24
  ):
24
25
  raise TypeError(
25
- f"Expected a BioCypherNode or BioCypherEdge, got {type(entity)}."
26
+ "Expected a BioCypherNode / BioCypherEdge / "
27
+ f"BioCypherRelAsNode, got {type(entity)}."
26
28
  )
27
29
 
28
30
  if isinstance(entity, BioCypherNode):
29
31
  seen = self.deduplicator.node_seen(entity)
30
32
  elif isinstance(entity, BioCypherEdge):
31
33
  seen = self.deduplicator.edge_seen(entity)
34
+ elif isinstance(entity, BioCypherRelAsNode):
35
+ seen = self.deduplicator.rel_as_node_seen(entity)
32
36
 
33
37
  if seen:
34
38
  continue
35
39
 
36
- _type = entity.get_label()
40
+ if isinstance(entity, BioCypherRelAsNode):
41
+ node = entity.get_node()
42
+ source_edge = entity.get_source_edge()
43
+ target_edge = entity.get_target_edge()
44
+
45
+ _type = node.get_type()
46
+ if not _type in lists:
47
+ lists[_type] = []
48
+ lists[_type].append(node)
49
+
50
+ _source_type = source_edge.get_type()
51
+ if not _source_type in lists:
52
+ lists[_source_type] = []
53
+ lists[_source_type].append(source_edge)
54
+
55
+ _target_type = target_edge.get_type()
56
+ if not _target_type in lists:
57
+ lists[_target_type] = []
58
+ lists[_target_type].append(target_edge)
59
+ continue
60
+
61
+ _type = entity.get_type()
37
62
  if not _type in lists:
38
63
  lists[_type] = []
39
64
  lists[_type].append(entity)
biocypher/_translate.py CHANGED
@@ -23,7 +23,7 @@ from more_itertools import peekable
23
23
 
24
24
  from . import _misc
25
25
  from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
26
- from ._mapping import OntologyMapping
26
+ from ._ontology import Ontology
27
27
 
28
28
  __all__ = ["BiolinkAdapter", "Translator"]
29
29
 
@@ -41,9 +41,7 @@ class Translator:
41
41
  and cypher queries.
42
42
  """
43
43
 
44
- def __init__(
45
- self, ontology_mapping: "OntologyMapping", strict_mode: bool = False
46
- ):
44
+ def __init__(self, ontology: "Ontology", strict_mode: bool = False):
47
45
  """
48
46
  Args:
49
47
  leaves:
@@ -57,7 +55,7 @@ class Translator:
57
55
  carry source, licence, and version information.
58
56
  """
59
57
 
60
- self.extended_schema = ontology_mapping.extended_schema
58
+ self.ontology = ontology
61
59
  self.strict_mode = strict_mode
62
60
 
63
61
  # record nodes without biolink type configured in schema_config.yaml
@@ -71,7 +69,7 @@ class Translator:
71
69
 
72
70
  def translate_nodes(
73
71
  self,
74
- id_type_prop_tuples: Iterable,
72
+ node_tuples: Iterable,
75
73
  ) -> Generator[BioCypherNode, None, None]:
76
74
  """
77
75
  Translates input node representation to a representation that
@@ -79,16 +77,16 @@ class Translator:
79
77
  requires explicit statement of node type on pass.
80
78
 
81
79
  Args:
82
- id_type_tuples (list of tuples): collection of tuples
80
+ node_tuples (list of tuples): collection of tuples
83
81
  representing individual nodes by their unique id and a type
84
82
  that is translated from the original database notation to
85
83
  the corresponding BioCypher notation.
86
84
 
87
85
  """
88
86
 
89
- self._log_begin_translate(id_type_prop_tuples, "nodes")
87
+ self._log_begin_translate(node_tuples, "nodes")
90
88
 
91
- for _id, _type, _props in id_type_prop_tuples:
89
+ for _id, _type, _props in node_tuples:
92
90
  # check for strict mode requirements
93
91
  required_props = ["source", "licence", "version"]
94
92
 
@@ -132,8 +130,9 @@ class Translator:
132
130
  """
133
131
 
134
132
  return (
135
- self.extended_schema[_bl_type]["preferred_id"]
136
- if "preferred_id" in self.extended_schema.get(_bl_type, {})
133
+ self.ontology.mapping.extended_schema[_bl_type]["preferred_id"]
134
+ if "preferred_id"
135
+ in self.ontology.mapping.extended_schema.get(_bl_type, {})
137
136
  else "id"
138
137
  )
139
138
 
@@ -142,7 +141,9 @@ class Translator:
142
141
  Filters properties for those specified in schema_config if any.
143
142
  """
144
143
 
145
- filter_props = self.extended_schema[bl_type].get("properties", {})
144
+ filter_props = self.ontology.mapping.extended_schema[bl_type].get(
145
+ "properties", {}
146
+ )
146
147
 
147
148
  # strict mode: add required properties (only if there is a whitelist)
148
149
  if self.strict_mode and filter_props:
@@ -150,7 +151,7 @@ class Translator:
150
151
  {"source": "str", "licence": "str", "version": "str"},
151
152
  )
152
153
 
153
- exclude_props = self.extended_schema[bl_type].get(
154
+ exclude_props = self.ontology.mapping.extended_schema[bl_type].get(
154
155
  "exclude_properties", []
155
156
  )
156
157
 
@@ -188,7 +189,7 @@ class Translator:
188
189
 
189
190
  def translate_edges(
190
191
  self,
191
- id_src_tar_type_prop_tuples: Iterable,
192
+ edge_tuples: Iterable,
192
193
  ) -> Generator[Union[BioCypherEdge, BioCypherRelAsNode], None, None]:
193
194
  """
194
195
  Translates input edge representation to a representation that
@@ -197,7 +198,7 @@ class Translator:
197
198
 
198
199
  Args:
199
200
 
200
- id_src_tar_type_prop_tuples (list of tuples):
201
+ edge_tuples (list of tuples):
201
202
 
202
203
  collection of tuples representing source and target of
203
204
  an interaction via their unique ids as well as the type
@@ -206,18 +207,18 @@ class Translator:
206
207
  Can optionally possess its own ID.
207
208
  """
208
209
 
209
- self._log_begin_translate(id_src_tar_type_prop_tuples, "edges")
210
+ self._log_begin_translate(edge_tuples, "edges")
210
211
 
211
212
  # legacy: deal with 4-tuples (no edge id)
212
213
  # TODO remove for performance reasons once safe
213
- id_src_tar_type_prop_tuples = peekable(id_src_tar_type_prop_tuples)
214
- if len(id_src_tar_type_prop_tuples.peek()) == 4:
215
- id_src_tar_type_prop_tuples = [
214
+ edge_tuples = peekable(edge_tuples)
215
+ if len(edge_tuples.peek()) == 4:
216
+ edge_tuples = [
216
217
  (None, src, tar, typ, props)
217
- for src, tar, typ, props in id_src_tar_type_prop_tuples
218
+ for src, tar, typ, props in edge_tuples
218
219
  ]
219
220
 
220
- for _id, _src, _tar, _type, _props in id_src_tar_type_prop_tuples:
221
+ for _id, _src, _tar, _type, _props in edge_tuples:
221
222
  # check for strict mode requirements
222
223
  if self.strict_mode:
223
224
  if not "source" in _props:
@@ -239,7 +240,9 @@ class Translator:
239
240
  # filter properties for those specified in schema_config if any
240
241
  _filtered_props = self._filter_props(bl_type, _props)
241
242
 
242
- rep = self.extended_schema[bl_type]["represented_as"]
243
+ rep = self.ontology.mapping.extended_schema[bl_type][
244
+ "represented_as"
245
+ ]
243
246
 
244
247
  if rep == "node":
245
248
  if _id:
@@ -295,9 +298,9 @@ class Translator:
295
298
  yield BioCypherRelAsNode(n, e_s, e_t)
296
299
 
297
300
  else:
298
- edge_label = self.extended_schema[bl_type].get(
299
- "label_as_edge"
300
- )
301
+ edge_label = self.ontology.mapping.extended_schema[
302
+ bl_type
303
+ ].get("label_as_edge")
301
304
 
302
305
  if edge_label is None:
303
306
  edge_label = bl_type
@@ -356,7 +359,7 @@ class Translator:
356
359
 
357
360
  self._ontology_mapping = {}
358
361
 
359
- for key, value in self.extended_schema.items():
362
+ for key, value in self.ontology.mapping.extended_schema.items():
360
363
  labels = value.get("input_label") or value.get("label_in_input")
361
364
 
362
365
  if isinstance(labels, str):
biocypher/_write.py CHANGED
@@ -125,7 +125,6 @@ class _BatchWriter(ABC):
125
125
 
126
126
  def __init__(
127
127
  self,
128
- ontology: "Ontology",
129
128
  translator: "Translator",
130
129
  deduplicator: "Deduplicator",
131
130
  delimiter: str,
@@ -167,10 +166,6 @@ class _BatchWriter(ABC):
167
166
  - _get_import_script_name
168
167
 
169
168
  Args:
170
- ontology:
171
- Instance of :py:class:`Ontology` to enable translation and
172
- ontology queries
173
-
174
169
  translator:
175
170
  Instance of :py:class:`Translator` to enable translation of
176
171
  nodes and manipulation of properties.
@@ -251,8 +246,6 @@ class _BatchWriter(ABC):
251
246
  self.wipe = wipe
252
247
  self.strict_mode = strict_mode
253
248
 
254
- self.extended_schema = ontology.extended_schema
255
- self.ontology = ontology
256
249
  self.translator = translator
257
250
  self.deduplicator = deduplicator
258
251
  self.node_property_dict = {}
@@ -352,34 +345,34 @@ class _BatchWriter(ABC):
352
345
  bool: The return value. True for success, False otherwise.
353
346
  """
354
347
  passed = False
355
- # unwrap generator in one step
356
348
  edges = list(edges) # force evaluation to handle empty generator
357
349
  if edges:
358
- z = zip(
359
- *(
360
- (
361
- e.get_node(),
362
- [
363
- e.get_source_edge(),
364
- e.get_target_edge(),
365
- ],
366
- )
367
- if isinstance(e, BioCypherRelAsNode)
368
- else (None, [e])
369
- for e in edges
370
- )
371
- )
372
- nod, edg = (list(a) for a in z)
373
- nod = [n for n in nod if n]
374
- edg = [val for sublist in edg for val in sublist] # flatten
350
+ nodes_flat = []
351
+ edges_flat = []
352
+ for edge in edges:
353
+ if isinstance(edge, BioCypherRelAsNode):
354
+ # check if relationship has already been written, if so skip
355
+ if self.deduplicator.rel_as_node_seen(edge):
356
+ continue
375
357
 
376
- if nod and edg:
377
- passed = self.write_nodes(nod) and self._write_edge_data(
378
- edg,
358
+ nodes_flat.append(edge.get_node())
359
+ edges_flat.append(edge.get_source_edge())
360
+ edges_flat.append(edge.get_target_edge())
361
+
362
+ else:
363
+ # check if relationship has already been written, if so skip
364
+ if self.deduplicator.edge_seen(edge):
365
+ continue
366
+
367
+ edges_flat.append(edge)
368
+
369
+ if nodes_flat and edges_flat:
370
+ passed = self.write_nodes(nodes_flat) and self._write_edge_data(
371
+ edges_flat,
379
372
  batch_size,
380
373
  )
381
374
  else:
382
- passed = self._write_edge_data(edg, batch_size)
375
+ passed = self._write_edge_data(edges_flat, batch_size)
383
376
 
384
377
  else:
385
378
  # is this a problem? if the generator or list is empty, we
@@ -451,8 +444,12 @@ class _BatchWriter(ABC):
451
444
  bin_l[label] = 1
452
445
 
453
446
  # get properties from config if present
454
- cprops = self.extended_schema.get(label).get(
455
- "properties",
447
+ cprops = (
448
+ self.translator.ontology.mapping.extended_schema.get(
449
+ label
450
+ ).get(
451
+ "properties",
452
+ )
456
453
  )
457
454
  if cprops:
458
455
  d = dict(cprops)
@@ -486,7 +483,7 @@ class _BatchWriter(ABC):
486
483
 
487
484
  # get label hierarchy
488
485
  # multiple labels:
489
- all_labels = self.ontology.get_ancestors(label)
486
+ all_labels = self.translator.ontology.get_ancestors(label)
490
487
 
491
488
  if all_labels:
492
489
  # convert to pascal case
@@ -682,10 +679,6 @@ class _BatchWriter(ABC):
682
679
  # for each label to check for consistency and their type
683
680
  # for now, relevant for `int`
684
681
  for edge in edges:
685
- # check for duplicates
686
- if self.deduplicator.edge_seen(edge):
687
- continue
688
-
689
682
  if not (edge.get_source_id() and edge.get_target_id()):
690
683
  logger.error(
691
684
  "Edge must have source and target node. "
@@ -706,13 +699,23 @@ class _BatchWriter(ABC):
706
699
  # (may not be if it is an edge that carries the
707
700
  # "label_as_edge" property)
708
701
  cprops = None
709
- if label in self.extended_schema:
710
- cprops = self.extended_schema.get(label).get(
702
+ if (
703
+ label
704
+ in self.translator.ontology.mapping.extended_schema
705
+ ):
706
+ cprops = self.translator.ontology.mapping.extended_schema.get(
707
+ label
708
+ ).get(
711
709
  "properties",
712
710
  )
713
711
  else:
714
712
  # try via "label_as_edge"
715
- for k, v in self.extended_schema.items():
713
+ for (
714
+ k,
715
+ v,
716
+ ) in (
717
+ self.translator.ontology.mapping.extended_schema.items()
718
+ ):
716
719
  if isinstance(v, dict):
717
720
  if v.get("label_as_edge") == label:
718
721
  cprops = v.get("properties")
@@ -873,9 +876,14 @@ class _BatchWriter(ABC):
873
876
 
874
877
  if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
875
878
  skip_id = True
876
- elif not self.extended_schema.get(label):
879
+ elif not self.translator.ontology.mapping.extended_schema.get(
880
+ label
881
+ ):
877
882
  # find label in schema by label_as_edge
878
- for k, v in self.extended_schema.items():
883
+ for (
884
+ k,
885
+ v,
886
+ ) in self.translator.ontology.mapping.extended_schema.items():
879
887
  if v.get("label_as_edge") == label:
880
888
  schema_label = k
881
889
  break
@@ -884,7 +892,9 @@ class _BatchWriter(ABC):
884
892
 
885
893
  if schema_label:
886
894
  if (
887
- self.extended_schema.get(schema_label).get("use_id")
895
+ self.translator.ontology.mapping.extended_schema.get(
896
+ schema_label
897
+ ).get("use_id")
888
898
  == False
889
899
  ):
890
900
  skip_id = True
@@ -1009,6 +1019,7 @@ class _Neo4jBatchWriter(_BatchWriter):
1009
1019
 
1010
1020
  This class inherits from the abstract class "_BatchWriter" and implements the
1011
1021
  Neo4j-specific methods:
1022
+
1012
1023
  - _write_node_headers
1013
1024
  - _write_edge_headers
1014
1025
  - _construct_import_call
@@ -1181,9 +1192,14 @@ class _Neo4jBatchWriter(_BatchWriter):
1181
1192
 
1182
1193
  if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
1183
1194
  skip_id = True
1184
- elif not self.extended_schema.get(label):
1195
+ elif not self.translator.ontology.mapping.extended_schema.get(
1196
+ label
1197
+ ):
1185
1198
  # find label in schema by label_as_edge
1186
- for k, v in self.extended_schema.items():
1199
+ for (
1200
+ k,
1201
+ v,
1202
+ ) in self.translator.ontology.mapping.extended_schema.items():
1187
1203
  if v.get("label_as_edge") == label:
1188
1204
  schema_label = k
1189
1205
  break
@@ -1194,7 +1210,9 @@ class _Neo4jBatchWriter(_BatchWriter):
1194
1210
 
1195
1211
  if schema_label:
1196
1212
  if (
1197
- self.extended_schema.get(schema_label).get("use_id")
1213
+ self.translator.ontology.mapping.extended_schema.get(
1214
+ schema_label
1215
+ ).get("use_id")
1198
1216
  == False
1199
1217
  ):
1200
1218
  skip_id = True
@@ -1352,9 +1370,9 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
1352
1370
  f.write(row)
1353
1371
 
1354
1372
  # add collection from schema config
1355
- collection = self.extended_schema[label].get(
1356
- "db_collection_name", None
1357
- )
1373
+ collection = self.translator.ontology.mapping.extended_schema[
1374
+ label
1375
+ ].get("db_collection_name", None)
1358
1376
 
1359
1377
  # add file path to neo4 admin import statement
1360
1378
  # do once for each part file
@@ -1433,16 +1451,19 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
1433
1451
  f.write(row)
1434
1452
 
1435
1453
  # add collection from schema config
1436
- if not self.extended_schema.get(label):
1437
- for _, v in self.extended_schema.items():
1454
+ if not self.translator.ontology.mapping.extended_schema.get(label):
1455
+ for (
1456
+ _,
1457
+ v,
1458
+ ) in self.translator.ontology.mapping.extended_schema.items():
1438
1459
  if v.get("label_as_edge") == label:
1439
1460
  collection = v.get("db_collection_name", None)
1440
1461
  break
1441
1462
 
1442
1463
  else:
1443
- collection = self.extended_schema[label].get(
1444
- "db_collection_name", None
1445
- )
1464
+ collection = self.translator.ontology.mapping.extended_schema[
1465
+ label
1466
+ ].get("db_collection_name", None)
1446
1467
 
1447
1468
  # add file path to neo4 admin import statement (import call path
1448
1469
  # may be different from actual output path)
@@ -1520,6 +1541,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
1520
1541
 
1521
1542
  This class inherits from the abstract class "_BatchWriter" and implements the
1522
1543
  PostgreSQL-specific methods:
1544
+
1523
1545
  - _write_node_headers
1524
1546
  - _write_edge_headers
1525
1547
  - _construct_import_call
@@ -1839,7 +1861,6 @@ DBMS_TO_CLASS = {
1839
1861
  def get_writer(
1840
1862
  dbms: str,
1841
1863
  translator: "Translator",
1842
- ontology: "Ontology",
1843
1864
  deduplicator: "Deduplicator",
1844
1865
  output_directory: str,
1845
1866
  strict_mode: bool,
@@ -1854,8 +1875,6 @@ def get_writer(
1854
1875
 
1855
1876
  translator: the Translator object.
1856
1877
 
1857
- ontology: the Ontology object.
1858
-
1859
1878
  output_directory: the directory to write the output files to.
1860
1879
 
1861
1880
  strict_mode: whether to use strict mode.
@@ -1879,7 +1898,6 @@ def get_writer(
1879
1898
 
1880
1899
  if writer is not None:
1881
1900
  return writer(
1882
- ontology=ontology,
1883
1901
  translator=translator,
1884
1902
  deduplicator=deduplicator,
1885
1903
  delimiter=dbms_config.get("delimiter"),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.5.19
3
+ Version: 0.5.20
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -38,7 +38,9 @@ Description-Content-Type: text/markdown
38
38
  ![Python](https://img.shields.io/badge/python-3.10-blue.svg)
39
39
  [![PyPI version](https://badge.fury.io/py/biocypher.svg)](https://badge.fury.io/py/biocypher)
40
40
  [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
41
- ![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yml/badge.svg)
41
+ [![CI](https://github.com/biocypher/biocypher/actions/workflows/ci_cd.yaml/badge.svg)](https://github.com/biocypher/biocypher/actions/workflows/ci_cd.yaml)
42
+ ![Coverage](https://raw.githubusercontent.com/biocypher/biocypher/coverage/coverage.svg)
43
+ [![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yaml/badge.svg)](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yaml)
42
44
  [![Downloads](https://static.pepy.tech/badge/biocypher)](https://pepy.tech/project/biocypher)
43
45
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
44
46
  [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com)
@@ -5,19 +5,19 @@ biocypher/_config/test_config.yaml,sha256=Np8jeS5_EP6HHOvMKb7B_Tkyqd5YaYlYz_DVsX
5
5
  biocypher/_config/test_schema_config.yaml,sha256=D1600WgEj3iTXrumVU9LIivJHJO36iaxfkOgyam9zVU,3129
6
6
  biocypher/_config/test_schema_config_disconnected.yaml,sha256=Qm8FLxEn2spHcyj_5F859KjcDvKSxNhxDvi4b4LLkvQ,68
7
7
  biocypher/_config/test_schema_config_extended.yaml,sha256=wn3A76142hhjnImhMF6RODbCFESTJ2TtPvcFdIFsAT0,3309
8
- biocypher/_connect.py,sha256=i62424Cbdnm2oI4ECLkcMF2V2A6aShCK2eSSwaGLbVE,12603
9
- biocypher/_core.py,sha256=Sg7ESentsTsqp9KbzPC_jh1fRAqOGzyy98Xzma7BBkw,17100
8
+ biocypher/_connect.py,sha256=0oSyO6CEIlKL8rHo-HHE7y0FzGfSi4vnEXSDy1TnIUE,12456
9
+ biocypher/_core.py,sha256=fA0tRorzy3R1mgzzT77mFk-l6oQ01ZAfjg8l6KbPQYM,19882
10
10
  biocypher/_create.py,sha256=vpUchUdEpWupZi1LgFLxAWMtqoBwnWbP7PwEDUCBS4A,10202
11
- biocypher/_deduplicate.py,sha256=ah2i6ONx6ml4MbQMXIe6NfbVzf1bjav0l3gLj1xGDE0,3288
11
+ biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,4949
12
12
  biocypher/_logger.py,sha256=soYtz1DiduLFw3XrMnphWWUxeuJqvSof4AYhlafxl08,2933
13
13
  biocypher/_mapping.py,sha256=XJZjmXTPnXVkyub1ZU0h3EKXQ2YROaGaJOaGyPMqgy4,9338
14
- biocypher/_metadata.py,sha256=24UdhQ8vslHBfHf0S6oF9A5asyiM9SVOjfVqvRPnFvY,1658
14
+ biocypher/_metadata.py,sha256=Hmz4g_CSuqikUJ6EtLEq2GS7Z0BawtAsL0Wk-7AiE8c,1658
15
15
  biocypher/_misc.py,sha256=wsjGVOqBDVM5hxbE_TEaZ69u1kJc8HXwRAtQHUgE8XQ,4545
16
- biocypher/_ontology.py,sha256=vCGIHJn_IH5bmOMTA6GJQZB-eNVOlyjYnMzwmwfni0Q,21375
17
- biocypher/_pandas.py,sha256=2qaCtUCk_nhr8dCqXqUr8zgMhCetPh9EDq-3z-8Qxi0,2021
18
- biocypher/_translate.py,sha256=e5XhPxbPArd0aK-Zk7F533ECV12jMR_ZzoAlGD3TAzc,16540
19
- biocypher/_write.py,sha256=kOb_l1LMu_weu5RLxEDLvSrpgdU1PZZe7ObaNhJRkdU,66943
20
- biocypher-0.5.19.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
21
- biocypher-0.5.19.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
22
- biocypher-0.5.19.dist-info/METADATA,sha256=_7DNxOzmag2EO1vxTpjE7dcsX7YclymnoIVEkoMXlJ4,9103
23
- biocypher-0.5.19.dist-info/RECORD,,
16
+ biocypher/_ontology.py,sha256=pHc4hO8iZx-yg9gzqfBR9khoIni-lKAxWgnRFyNP91E,21530
17
+ biocypher/_pandas.py,sha256=GVCFM68J7yBjh40MpkNVgD8qT1RFMrrIjMOtD3iKsf4,3040
18
+ biocypher/_translate.py,sha256=nj4Y60F0U3JBH36N2dh5pFcC8Ot86rskJ2ChJwje9dI,16494
19
+ biocypher/_write.py,sha256=2ynF-VkvTr8WT2qPt2wji3iupP3WON94TlT6NpfDvCs,67738
20
+ biocypher-0.5.20.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
21
+ biocypher-0.5.20.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
22
+ biocypher-0.5.20.dist-info/METADATA,sha256=B3VOakjkLgCjusCElMML-neoPoc869g4jNI45Bchibo,9429
23
+ biocypher-0.5.20.dist-info/RECORD,,