biocypher 0.5.35__py3-none-any.whl → 0.5.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_logger.py CHANGED
@@ -48,7 +48,7 @@ def get_logger(name: str = "biocypher") -> logging.Logger:
48
48
  # create logger
49
49
  logger = logging.getLogger(name)
50
50
  logger.setLevel(logging.DEBUG)
51
- logger.propagate = False
51
+ logger.propagate = True
52
52
 
53
53
  # formatting
54
54
  file_formatter = logging.Formatter(
biocypher/_metadata.py CHANGED
@@ -19,7 +19,7 @@ import importlib.metadata
19
19
 
20
20
  import toml
21
21
 
22
- _VERSION = "0.5.35"
22
+ _VERSION = "0.5.37"
23
23
 
24
24
 
25
25
  def get_metadata():
biocypher/_misc.py CHANGED
@@ -76,56 +76,80 @@ def ensure_iterable(value: Any) -> Iterable:
76
76
  return value if isinstance(value, LIST_LIKE) else (value,)
77
77
 
78
78
 
79
- def create_tree_visualisation(inheritance_tree: Union[dict, nx.Graph]) -> str:
79
+ def create_tree_visualisation(inheritance_graph: Union[dict, nx.Graph]) -> Tree:
80
80
  """
81
81
  Creates a visualisation of the inheritance tree using treelib.
82
82
  """
83
+ inheritance_tree = _get_inheritance_tree(inheritance_graph)
84
+ classes, root = _find_root_node(inheritance_tree)
85
+
86
+ tree = Tree()
87
+ tree.create_node(root, root)
88
+ while classes:
89
+ for child in classes:
90
+ parent = inheritance_tree[child]
91
+ if parent in tree.nodes.keys() or parent == root:
92
+ tree.create_node(child, child, parent=parent)
93
+
94
+ for node in tree.nodes.keys():
95
+ if node in classes:
96
+ classes.remove(node)
97
+
98
+ return tree
99
+
100
+
101
+ def _get_inheritance_tree(inheritance_graph: Union[dict, nx.Graph]) -> dict:
102
+ """Transforms an inheritance_graph into an inheritance_tree.
103
+
104
+ Args:
105
+ inheritance_graph: A dict or nx.Graph representing the inheritance graph.
106
+
107
+ Returns:
108
+ A dict representing the inheritance tree.
109
+ """
110
+ if isinstance(inheritance_graph, nx.Graph):
111
+ inheritance_tree = nx.to_dict_of_lists(inheritance_graph)
112
+
113
+ multiple_parents_present = _multiple_inheritance_present(
114
+ inheritance_tree
115
+ )
116
+ if multiple_parents_present:
117
+ logger.warning(
118
+ "The ontology contains multiple inheritance (one child node has multiple parent nodes). This is not visualized in the following hierarchy tree (the child node is only added once). If you want to browse all relationships of the parsed ontology write a graphml file to disk and view this file."
119
+ )
83
120
 
84
- if isinstance(inheritance_tree, nx.Graph):
85
- inheritance_tree = nx.to_dict_of_lists(inheritance_tree)
86
121
  # unlist values
87
122
  inheritance_tree = {k: v[0] for k, v in inheritance_tree.items() if v}
123
+ return inheritance_tree
124
+ elif not _multiple_inheritance_present(inheritance_graph):
125
+ return inheritance_graph
126
+
88
127
 
89
- # find root node
128
+ def _multiple_inheritance_present(inheritance_tree: dict) -> bool:
129
+ """Checks if multiple inheritance is present in the inheritance_tree."""
130
+ return any(len(value) > 1 for value in inheritance_tree.values())
131
+
132
+
133
+ def _find_root_node(inheritance_tree: dict) -> tuple[set, str]:
90
134
  classes = set(inheritance_tree.keys())
91
135
  parents = set(inheritance_tree.values())
92
136
  root = list(parents - classes)
93
-
94
137
  if len(root) > 1:
95
138
  if "entity" in root:
96
- root = "entity" # default: good standard? TODO
97
-
139
+ root = "entity" # TODO: default: good standard?
98
140
  else:
99
141
  raise ValueError(
100
142
  "Inheritance tree cannot have more than one root node. "
101
143
  f"Found {len(root)}: {root}."
102
144
  )
103
-
104
145
  else:
105
146
  root = root[0]
106
-
107
147
  if not root:
108
148
  # find key whose value is None
109
149
  root = list(inheritance_tree.keys())[
110
150
  list(inheritance_tree.values()).index(None)
111
151
  ]
112
-
113
- tree = Tree()
114
-
115
- tree.create_node(root, root)
116
-
117
- while classes:
118
- for child in classes:
119
- parent = inheritance_tree[child]
120
-
121
- if parent in tree.nodes.keys() or parent == root:
122
- tree.create_node(child, child, parent=parent)
123
-
124
- for node in tree.nodes.keys():
125
- if node in classes:
126
- classes.remove(node)
127
-
128
- return tree
152
+ return classes, root
129
153
 
130
154
 
131
155
  # string conversion, adapted from Biolink Model Toolkit
biocypher/_ontology.py CHANGED
@@ -93,7 +93,7 @@ class OntologyAdapter:
93
93
  self._reverse_labels = reverse_labels
94
94
  self._remove_prefixes = remove_prefixes
95
95
 
96
- # Load the ontology into an rdflib Graph according to the file extension
96
+ # Load the ontology into a rdflib Graph according to the file extension
97
97
  self._rdf_graph = self._load_rdf_graph(ontology_file)
98
98
 
99
99
  self._nx_graph = self._rdf_to_nx(
@@ -107,56 +107,77 @@ class OntologyAdapter:
107
107
  G = nx.DiGraph()
108
108
 
109
109
  # Define a recursive function to add subclasses to the graph
110
- def add_subclasses(node):
111
- # Only add nodes that have a label
112
- if (node, rdflib.RDFS.label, None) not in g:
110
+ def add_subclasses(parent_node):
111
+ if not has_label(parent_node, g):
113
112
  return
114
113
 
115
- nx_id, nx_label = _get_nx_id_and_label(node)
116
-
117
- if nx_id not in G:
118
- G.add_node(nx_id)
119
- G.nodes[nx_id]["label"] = nx_label
120
-
121
- # Recursively add all subclasses of the node to the graph
122
- for s, _, o in g.triples((None, rdflib.RDFS.subClassOf, node)):
123
- # Only add nodes that have a label
124
- if (s, rdflib.RDFS.label, None) not in g:
125
- continue
126
-
127
- s_id, s_label = _get_nx_id_and_label(s)
128
- G.add_node(s_id)
129
- G.nodes[s_id]["label"] = s_label
114
+ nx_parent_node_id, nx_parent_node_label = _get_nx_id_and_label(
115
+ parent_node
116
+ )
130
117
 
131
- G.add_edge(s_id, nx_id)
132
- add_subclasses(s)
133
- add_parents(s)
118
+ if nx_parent_node_id not in G:
119
+ add_node(nx_parent_node_id, nx_parent_node_label)
120
+
121
+ child_nodes = get_child_nodes(parent_node, g)
122
+
123
+ if child_nodes:
124
+ for child_node in child_nodes:
125
+ if not has_label(child_node, g):
126
+ continue
127
+ (
128
+ nx_child_node_id,
129
+ nx_child_node_label,
130
+ ) = _get_nx_id_and_label(child_node)
131
+ add_node(nx_child_node_id, nx_child_node_label)
132
+ G.add_edge(nx_child_node_id, nx_parent_node_id)
133
+ for child_node in child_nodes:
134
+ add_subclasses(child_node)
135
+ add_parents(child_node)
134
136
 
135
137
  def add_parents(node):
136
- # Only add nodes that have a label
137
- if (node, rdflib.RDFS.label, None) not in g:
138
+ if not has_label(node, g):
138
139
  return
139
140
 
140
141
  nx_id, nx_label = _get_nx_id_and_label(node)
141
142
 
142
143
  # Recursively add all parents of the node to the graph
143
144
  for s, _, o in g.triples((node, rdflib.RDFS.subClassOf, None)):
144
- # Only add nodes that have a label
145
- if (o, rdflib.RDFS.label, None) not in g:
145
+ if not has_label(o, g):
146
146
  continue
147
147
 
148
148
  o_id, o_label = _get_nx_id_and_label(o)
149
149
 
150
- # Skip nodes already in the graph
150
+ # Skip if node already in the graph
151
151
  if o_id in G:
152
152
  continue
153
153
 
154
- G.add_node(o_id)
155
- G.nodes[o_id]["label"] = o_label
154
+ add_node(o_id, o_label)
156
155
 
157
156
  G.add_edge(nx_id, o_id)
158
157
  add_parents(o)
159
158
 
159
+ def has_label(node: rdflib.URIRef, g: rdflib.Graph) -> bool:
160
+ """Does the node have a label in g?
161
+
162
+ Args:
163
+ node (rdflib.URIRef): The node to check
164
+ g (rdflib.Graph): The graph to check in
165
+
166
+ Returns:
167
+ bool: True if the node has a label, False otherwise
168
+ """
169
+ return (node, rdflib.RDFS.label, None) in g
170
+
171
+ def add_node(nx_node_id: str, nx_node_label: str):
172
+ """Add a node to the graph.
173
+
174
+ Args:
175
+ nx_node_id (str): The ID of the node
176
+ nx_node_label (str): The label of the node
177
+ """
178
+ G.add_node(nx_node_id)
179
+ G.nodes[nx_node_id]["label"] = nx_node_label
180
+
160
181
  def _get_nx_id_and_label(node):
161
182
  node_id_str = self._remove_prefix(str(node))
162
183
  node_label_str = str(g.value(node, rdflib.RDFS.label)).replace(
@@ -168,6 +189,79 @@ class OntologyAdapter:
168
189
  nx_label = node_id_str if switch_id_and_label else node_label_str
169
190
  return nx_id, nx_label
170
191
 
192
+ def get_child_nodes(
193
+ parent_node: rdflib.URIRef, g: rdflib.Graph
194
+ ) -> list:
195
+ """Get the child nodes of a node in the ontology.
196
+ Accounts for the case of multiple parents defined in intersectionOf.
197
+
198
+ Args:
199
+ parent_node (rdflib.URIRef): The parent node to get the children of
200
+ g (rdflib.Graph): The graph to get the children from
201
+
202
+ Returns:
203
+ list: A list of the child nodes
204
+ """
205
+ child_nodes = []
206
+ for s, p, o in g.triples((None, rdflib.RDFS.subClassOf, None)):
207
+ if (o, rdflib.RDF.type, rdflib.OWL.Class) in g and (
208
+ o,
209
+ rdflib.OWL.intersectionOf,
210
+ None,
211
+ ) in g:
212
+ # Check if node has multiple parent nodes defined in intersectionOf (one of them = parent_node)
213
+ parent_nodes = get_nodes_in_intersectionof(o)
214
+ if parent_node in parent_nodes:
215
+ child_nodes.append(s)
216
+ for node in parent_nodes:
217
+ add_parents(node)
218
+ elif o == parent_node:
219
+ # only one parent node
220
+ child_nodes.append(s)
221
+ return child_nodes
222
+
223
+ def get_nodes_in_intersectionof(o: rdflib.URIRef) -> list:
224
+ """Get the nodes in an intersectionOf node.
225
+
226
+ Args:
227
+ o (rdflib.URIRef): The intersectionOf node
228
+
229
+ Returns:
230
+ list: A list of the nodes in the intersectionOf node
231
+ """
232
+ anonymous_intersection_nodes = []
233
+ for _, _, anonymous_object in g.triples(
234
+ (o, rdflib.OWL.intersectionOf, None)
235
+ ):
236
+ anonymous_intersection_nodes.append(anonymous_object)
237
+ anonymous_intersection_node = anonymous_intersection_nodes[0]
238
+ nodes_in_intersection = retrieve_rdf_linked_list(
239
+ anonymous_intersection_node
240
+ )
241
+ return nodes_in_intersection
242
+
243
+ def retrieve_rdf_linked_list(subject: rdflib.URIRef) -> list:
244
+ """Recursively retrieves a linked list from RDF.
245
+ Example RDF list with the items [item1, item2]:
246
+ list_node - first -> item1
247
+ list_node - rest -> list_node2
248
+ list_node2 - first -> item2
249
+ list_node2 - rest -> nil
250
+
251
+ Args:
252
+ subject (rdflib.URIRef): One list_node of the RDF list
253
+
254
+ Returns:
255
+ list: The items of the RDF list
256
+ """
257
+ rdf_list = []
258
+ for s, p, o in g.triples((subject, rdflib.RDF.first, None)):
259
+ rdf_list.append(o)
260
+ for s, p, o in g.triples((subject, rdflib.RDF.rest, None)):
261
+ if o != rdflib.RDF.nil:
262
+ rdf_list.extend(retrieve_rdf_linked_list(o))
263
+ return rdf_list
264
+
171
265
  # Add all subclasses of the root node to the graph
172
266
  add_subclasses(root)
173
267
 
biocypher/_write.py CHANGED
@@ -13,6 +13,7 @@ BioCypher 'offline' module. Handles the writing of node and edge representations
13
13
  suitable for import into a DBMS.
14
14
  """
15
15
 
16
+ import re
16
17
  import glob
17
18
 
18
19
  from ._logger import logger
@@ -22,7 +23,6 @@ logger.debug(f"Loading module {__name__}.")
22
23
  from abc import ABC, abstractmethod
23
24
  from types import GeneratorType
24
25
  from typing import TYPE_CHECKING, Union, Optional
25
- from datetime import datetime
26
26
  from collections import OrderedDict, defaultdict
27
27
  import os
28
28
 
@@ -34,7 +34,6 @@ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
34
34
  __all__ = ["get_writer"]
35
35
 
36
36
  if TYPE_CHECKING:
37
- from ._ontology import Ontology
38
37
  from ._translate import Translator
39
38
  from ._deduplicate import Deduplicator
40
39
 
@@ -954,7 +953,9 @@ class _BatchWriter(ABC):
954
953
  bool: The return value. True for success, False otherwise.
955
954
  """
956
955
  # translate label to PascalCase
957
- label_pascal = self.translator.name_sentence_to_pascal(label)
956
+ label_pascal = self.translator.name_sentence_to_pascal(
957
+ parse_label(label)
958
+ )
958
959
 
959
960
  # list files in self.outdir
960
961
  files = glob.glob(
@@ -1086,7 +1087,9 @@ class _Neo4jBatchWriter(_BatchWriter):
1086
1087
  _id = ":ID"
1087
1088
 
1088
1089
  # translate label to PascalCase
1089
- pascal_label = self.translator.name_sentence_to_pascal(label)
1090
+ pascal_label = self.translator.name_sentence_to_pascal(
1091
+ parse_label(label)
1092
+ )
1090
1093
 
1091
1094
  header = f"{pascal_label}-header.csv"
1092
1095
  header_path = os.path.join(
@@ -1165,7 +1168,9 @@ class _Neo4jBatchWriter(_BatchWriter):
1165
1168
 
1166
1169
  for label, props in self.edge_property_dict.items():
1167
1170
  # translate label to PascalCase
1168
- pascal_label = self.translator.name_sentence_to_pascal(label)
1171
+ pascal_label = self.translator.name_sentence_to_pascal(
1172
+ parse_label(label)
1173
+ )
1169
1174
 
1170
1175
  # paths
1171
1176
  header = f"{pascal_label}-header.csv"
@@ -1310,6 +1315,43 @@ class _Neo4jBatchWriter(_BatchWriter):
1310
1315
  return import_call
1311
1316
 
1312
1317
 
1318
+ def parse_label(label: str) -> str:
1319
+ """
1320
+
1321
+ Check if the label is compliant with Neo4j naming conventions,
1322
+ https://neo4j.com/docs/cypher-manual/current/syntax/naming/, and if not,
1323
+ remove non-compliant characters.
1324
+
1325
+ Args:
1326
+ label (str): The label to check
1327
+ Returns:
1328
+ str: The compliant label
1329
+ """
1330
+ # Check if the name contains only alphanumeric characters, underscore, or dollar sign
1331
+ # and dot (for class hierarchy of BioCypher)
1332
+ allowed_chars = r"a-zA-Z0-9_$ ."
1333
+ matches = re.findall(f"[{allowed_chars}]", label)
1334
+ non_matches = re.findall(f"[^{allowed_chars}]", label)
1335
+ if non_matches:
1336
+ non_matches = list(set(non_matches))
1337
+ logger.warning(
1338
+ f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}"
1339
+ )
1340
+
1341
+ def first_character_compliant(character: str) -> bool:
1342
+ return character.isalpha() or character == "$"
1343
+
1344
+ if not first_character_compliant(matches[0]):
1345
+ for c in matches:
1346
+ if first_character_compliant(c):
1347
+ matches = matches[matches.index(c) :]
1348
+ break
1349
+ logger.warning(
1350
+ "Label does not start with an alphabetic character or with $. Removed non compliant characters."
1351
+ )
1352
+ return "".join(matches).strip()
1353
+
1354
+
1313
1355
  class _ArangoDBBatchWriter(_Neo4jBatchWriter):
1314
1356
  """
1315
1357
  Class for writing node and edge representations to disk using the format
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.5.35
3
+ Version: 0.5.37
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -10,15 +10,15 @@ biocypher/_core.py,sha256=2o3hhhM6kfaZI6TU3ZmzoBJc-RJgFFGWGe0MZW-oA3U,22301
10
10
  biocypher/_create.py,sha256=vpUchUdEpWupZi1LgFLxAWMtqoBwnWbP7PwEDUCBS4A,10202
11
11
  biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,4949
12
12
  biocypher/_get.py,sha256=3Kpky3blfNf1JwxKWLsZxTU2aTP_C4sUe8OpiyYj63I,10810
13
- biocypher/_logger.py,sha256=rEn_-ESh9cPxk6vdWZ1a25escCWHBWX4D1gtpNowyvI,3186
13
+ biocypher/_logger.py,sha256=NGXe3hZA79WSujfOgpcxHBf8N2QAfrmvM1LFDpsGK2U,3185
14
14
  biocypher/_mapping.py,sha256=ERSNH2Bg19145KytxbFE4BInPaiP-LWW7osOBot29Eo,9304
15
- biocypher/_metadata.py,sha256=a3jidpC5ah-BbwXswfG9q21ifIobi_4q6ztctXQbrbg,1658
16
- biocypher/_misc.py,sha256=wsjGVOqBDVM5hxbE_TEaZ69u1kJc8HXwRAtQHUgE8XQ,4545
17
- biocypher/_ontology.py,sha256=di0_v5Z99bmv4iiBybbdwZDKGIP05jof0nRL3OED2pw,23047
15
+ biocypher/_metadata.py,sha256=-yWk0B2JDJXt34dPea_5x4t2YASfP8wc6iRVvRKdB1A,1658
16
+ biocypher/_misc.py,sha256=g5B-PO_XJlYEJC7kEVRdCXeB2NW0ZSVr_5KqTEk2ldk,5877
17
+ biocypher/_ontology.py,sha256=53hHroH4K9MbwueK2pAbdkidMRBVH2adlQ66QbI_BiE,26734
18
18
  biocypher/_pandas.py,sha256=GVCFM68J7yBjh40MpkNVgD8qT1RFMrrIjMOtD3iKsf4,3040
19
19
  biocypher/_translate.py,sha256=nj4Y60F0U3JBH36N2dh5pFcC8Ot86rskJ2ChJwje9dI,16494
20
- biocypher/_write.py,sha256=nvb75OwElu8fLUp0FjEBqQ1VNpx6iRrk-t7v_TOlDhg,68165
21
- biocypher-0.5.35.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
22
- biocypher-0.5.35.dist-info/METADATA,sha256=vYgbyXQI7F633kx5IPZDZQ1wSpw1nyLS07OEUK8dHts,10573
23
- biocypher-0.5.35.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
24
- biocypher-0.5.35.dist-info/RECORD,,
20
+ biocypher/_write.py,sha256=EjAnNzayVKvBuIVLw3gY8T9fTnfIPaTGODu275IaRJ8,69554
21
+ biocypher-0.5.37.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
22
+ biocypher-0.5.37.dist-info/METADATA,sha256=iD1g7a9FH1JRfhVynse5qctOAsazZxa0_YDxr-OCDkc,10573
23
+ biocypher-0.5.37.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
24
+ biocypher-0.5.37.dist-info/RECORD,,