biocypher 0.5.17__py3-none-any.whl → 0.5.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_deduplicate.py CHANGED
@@ -1,8 +1,9 @@
1
1
  from ._logger import logger
2
2
 
3
- logger.debug(f'Loading module {__name__}.')
3
+ logger.debug(f"Loading module {__name__}.")
4
+
5
+ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
4
6
 
5
- from ._create import BioCypherEdge, BioCypherNode
6
7
 
7
8
  class Deduplicator:
8
9
  """
@@ -18,15 +19,17 @@ class Deduplicator:
18
19
  """
19
20
 
20
21
  def __init__(self):
21
- self.seen_node_ids = set()
22
- self.duplicate_node_ids = set()
23
- self.duplicate_node_types = set()
22
+ self.seen_entity_ids = set()
23
+ self.duplicate_entity_ids = set()
24
+
25
+ self.entity_types = set()
26
+ self.duplicate_entity_types = set()
24
27
 
25
- self.seen_edges = {}
26
- self.duplicate_edge_ids = set()
27
- self.duplicate_edge_types = set()
28
+ self.seen_relationships = {}
29
+ self.duplicate_relationship_ids = set()
30
+ self.duplicate_relationship_types = set()
28
31
 
29
- def node_seen(self, node: BioCypherNode) -> bool:
32
+ def node_seen(self, entity: BioCypherNode) -> bool:
30
33
  """
31
34
  Adds a node to the instance and checks if it has been seen before.
32
35
 
@@ -36,17 +39,22 @@ class Deduplicator:
36
39
  Returns:
37
40
  True if the node has been seen before, False otherwise.
38
41
  """
39
- if node.get_id() in self.seen_node_ids:
40
- self.duplicate_node_ids.add(node.get_id())
41
- if node.get_label() not in self.duplicate_node_types:
42
- logger.warning(f"Duplicate node type {node.get_label()} found. ")
43
- self.duplicate_node_types.add(node.get_label())
42
+ if entity.get_label() not in self.entity_types:
43
+ self.entity_types.add(entity.get_label())
44
+
45
+ if entity.get_id() in self.seen_entity_ids:
46
+ self.duplicate_entity_ids.add(entity.get_id())
47
+ if entity.get_label() not in self.duplicate_entity_types:
48
+ logger.warning(
49
+ f"Duplicate node type {entity.get_label()} found. "
50
+ )
51
+ self.duplicate_entity_types.add(entity.get_label())
44
52
  return True
45
-
46
- self.seen_node_ids.add(node.get_id())
53
+
54
+ self.seen_entity_ids.add(entity.get_id())
47
55
  return False
48
-
49
- def edge_seen(self, edge: BioCypherEdge) -> bool:
56
+
57
+ def edge_seen(self, relationship: BioCypherEdge) -> bool:
50
58
  """
51
59
  Adds an edge to the instance and checks if it has been seen before.
52
60
 
@@ -56,25 +64,59 @@ class Deduplicator:
56
64
  Returns:
57
65
  True if the edge has been seen before, False otherwise.
58
66
  """
59
- if edge.get_type() not in self.seen_edges:
60
- self.seen_edges[edge.get_type()] = set()
67
+ if relationship.get_type() not in self.seen_relationships:
68
+ self.seen_relationships[relationship.get_type()] = set()
61
69
 
62
70
  # concatenate source and target if no id is present
63
- if not edge.get_id():
64
- _id = f"{edge.get_source_id()}_{edge.get_target_id()}"
71
+ if not relationship.get_id():
72
+ _id = (
73
+ f"{relationship.get_source_id()}_{relationship.get_target_id()}"
74
+ )
65
75
  else:
66
- _id = edge.get_id()
76
+ _id = relationship.get_id()
77
+
78
+ if _id in self.seen_relationships[relationship.get_type()]:
79
+ self.duplicate_relationship_ids.add(_id)
80
+ if relationship.get_type() not in self.duplicate_relationship_types:
81
+ logger.warning(
82
+ f"Duplicate edge type {relationship.get_type()} found. "
83
+ )
84
+ self.duplicate_relationship_types.add(relationship.get_type())
85
+ return True
67
86
 
68
- if _id in self.seen_edges[edge.get_type()]:
69
- self.duplicate_edge_ids.add(_id)
70
- if edge.get_type() not in self.duplicate_edge_types:
71
- logger.warning(f"Duplicate edge type {edge.get_type()} found. ")
72
- self.duplicate_edge_types.add(edge.get_type())
87
+ self.seen_relationships[relationship.get_type()].add(_id)
88
+ return False
89
+
90
+ def rel_as_node_seen(self, rel_as_node: BioCypherRelAsNode) -> bool:
91
+ """
92
+ Adds a rel_as_node to the instance (one entity and two relationships)
93
+ and checks if it has been seen before. Only the node is relevant for
94
+ identifying the rel_as_node as a duplicate.
95
+
96
+ Args:
97
+ rel_as_node: BioCypherRelAsNode to be added.
98
+
99
+ Returns:
100
+ True if the rel_as_node has been seen before, False otherwise.
101
+ """
102
+ node = rel_as_node.get_node()
103
+
104
+ if node.get_label() not in self.seen_relationships:
105
+ self.seen_relationships[node.get_label()] = set()
106
+
107
+ # rel as node always has an id
108
+ _id = node.get_id()
109
+
110
+ if _id in self.seen_relationships[node.get_type()]:
111
+ self.duplicate_relationship_ids.add(_id)
112
+ if node.get_type() not in self.duplicate_relationship_types:
113
+ logger.warning(f"Duplicate edge type {node.get_type()} found. ")
114
+ self.duplicate_relationship_types.add(node.get_type())
73
115
  return True
74
-
75
- self.seen_edges[edge.get_type()].add(_id)
116
+
117
+ self.seen_relationships[node.get_type()].add(_id)
76
118
  return False
77
-
119
+
78
120
  def get_duplicate_nodes(self):
79
121
  """
80
122
  Function to return a list of duplicate nodes.
@@ -83,8 +125,8 @@ class Deduplicator:
83
125
  list: list of duplicate nodes
84
126
  """
85
127
 
86
- if self.duplicate_node_types:
87
- return (self.duplicate_node_types, self.duplicate_node_ids)
128
+ if self.duplicate_entity_types:
129
+ return (self.duplicate_entity_types, self.duplicate_entity_ids)
88
130
  else:
89
131
  return None
90
132
 
@@ -96,7 +138,10 @@ class Deduplicator:
96
138
  list: list of duplicate edges
97
139
  """
98
140
 
99
- if self.duplicate_edge_types:
100
- return (self.duplicate_edge_types, self.duplicate_edge_ids)
141
+ if self.duplicate_relationship_types:
142
+ return (
143
+ self.duplicate_relationship_types,
144
+ self.duplicate_relationship_ids,
145
+ )
101
146
  else:
102
- return None
147
+ return None
biocypher/_logger.py CHANGED
@@ -12,7 +12,7 @@
12
12
  Configuration of the module logger.
13
13
  """
14
14
 
15
- __all__ = ['get_logger', 'log', 'logfile']
15
+ __all__ = ["get_logger", "log", "logfile"]
16
16
 
17
17
  from datetime import datetime
18
18
  import os
@@ -23,7 +23,7 @@ from biocypher import _config
23
23
  from biocypher._metadata import __version__
24
24
 
25
25
 
26
- def get_logger(name: str = 'biocypher') -> logging.Logger:
26
+ def get_logger(name: str = "biocypher") -> logging.Logger:
27
27
  """
28
28
  Access the module logger, create a new one if does not exist yet.
29
29
 
@@ -45,7 +45,6 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
45
45
  """
46
46
 
47
47
  if not logging.getLogger(name).hasHandlers():
48
-
49
48
  # create logger
50
49
  logger = logging.getLogger(name)
51
50
  logger.setLevel(logging.DEBUG)
@@ -53,18 +52,19 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
53
52
 
54
53
  # formatting
55
54
  file_formatter = logging.Formatter(
56
- '%(asctime)s\t%(levelname)s\tmodule:%(module)s\n%(message)s',
55
+ "%(asctime)s\t%(levelname)s\tmodule:%(module)s\n%(message)s",
57
56
  )
58
- stdout_formatter = logging.Formatter('%(levelname)s -- %(message)s')
57
+ stdout_formatter = logging.Formatter("%(levelname)s -- %(message)s")
59
58
 
60
59
  # file name and creation
61
60
  now = datetime.now()
62
- date_time = now.strftime('%Y%m%d-%H%M%S')
61
+ date_time = now.strftime("%Y%m%d-%H%M%S")
63
62
 
64
- logdir = _config.config('biocypher'
65
- ).get('log_directory') or 'biocypher-log'
63
+ logdir = (
64
+ _config.config("biocypher").get("log_directory") or "biocypher-log"
65
+ )
66
66
  os.makedirs(logdir, exist_ok=True)
67
- logfile = os.path.join(logdir, f'biocypher-{date_time}.log')
67
+ logfile = os.path.join(logdir, f"biocypher-{date_time}.log")
68
68
 
69
69
  # handlers
70
70
  # stream handler
@@ -75,7 +75,7 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
75
75
  # file handler
76
76
  file_handler = logging.FileHandler(logfile)
77
77
 
78
- if _config.config('biocypher').get('debug'):
78
+ if _config.config("biocypher").get("debug"):
79
79
  file_handler.setLevel(logging.DEBUG)
80
80
  else:
81
81
  file_handler.setLevel(logging.INFO)
@@ -87,8 +87,8 @@ def get_logger(name: str = 'biocypher') -> logging.Logger:
87
87
  logger.addHandler(stdout_handler)
88
88
 
89
89
  # startup message
90
- logger.info(f'This is BioCypher v{__version__}.')
91
- logger.info(f'Logging into `{logfile}`.')
90
+ logger.info(f"This is BioCypher v{__version__}.")
91
+ logger.info(f"Logging into `{logfile}`.")
92
92
 
93
93
  return logging.getLogger(name)
94
94
 
@@ -107,7 +107,6 @@ def log():
107
107
  """
108
108
 
109
109
  with open(logfile()) as fp:
110
-
111
110
  pydoc.pager(fp.read())
112
111
 
113
112
 
biocypher/_mapping.py CHANGED
@@ -14,7 +14,7 @@ underlying ontology.
14
14
  """
15
15
  from ._logger import logger
16
16
 
17
- logger.debug(f'Loading module {__name__}.')
17
+ logger.debug(f"Loading module {__name__}.")
18
18
 
19
19
  from typing import Optional
20
20
  from urllib.request import urlopen
@@ -29,8 +29,8 @@ class OntologyMapping:
29
29
  """
30
30
  Class to store the ontology mapping and extensions.
31
31
  """
32
- def __init__(self, config_file: str = None):
33
32
 
33
+ def __init__(self, config_file: str = None):
34
34
  self.schema = self._read_config(config_file)
35
35
 
36
36
  self.extended_schema = self._extend_schema()
@@ -40,21 +40,16 @@ class OntologyMapping:
40
40
  Read the configuration file and store the ontology mapping and extensions.
41
41
  """
42
42
  if config_file is None:
43
-
44
- schema_config = _config.module_data('schema_config')
43
+ schema_config = _config.module_data("schema_config")
45
44
 
46
45
  # load yaml file from web
47
- elif config_file.startswith('http'):
48
-
46
+ elif config_file.startswith("http"):
49
47
  with urlopen(config_file) as f:
50
-
51
48
  schema_config = yaml.safe_load(f)
52
49
 
53
50
  # get graph state from config (assume file is local)
54
51
  else:
55
-
56
- with open(config_file, 'r') as f:
57
-
52
+ with open(config_file, "r") as f:
58
53
  schema_config = yaml.safe_load(f)
59
54
 
60
55
  return schema_config
@@ -78,30 +73,28 @@ class OntologyMapping:
78
73
 
79
74
  # first pass: get parent leaves with direct representation in ontology
80
75
  for k, v in d.items():
81
-
82
76
  # k is not an entity
83
- if 'represented_as' not in v:
77
+ if "represented_as" not in v:
84
78
  continue
85
79
 
86
80
  # preferred_id optional: if not provided, use `id`
87
- if not v.get('preferred_id'):
88
- v['preferred_id'] = 'id'
81
+ if not v.get("preferred_id"):
82
+ v["preferred_id"] = "id"
89
83
 
90
84
  # k is an entity that is present in the ontology
91
- if 'is_a' not in v:
85
+ if "is_a" not in v:
92
86
  extended_schema[k] = v
93
87
 
94
88
  # second pass: "vertical" inheritance
95
89
  d = self._vertical_property_inheritance(d)
96
90
  for k, v in d.items():
97
- if 'is_a' in v:
98
-
91
+ if "is_a" in v:
99
92
  # prevent loops
100
- if k == v['is_a']:
93
+ if k == v["is_a"]:
101
94
  logger.warning(
102
- f'Loop detected in ontology mapping: {k} -> {v}. '
103
- 'Removing item. Please fix the inheritance if you want '
104
- 'to use this item.'
95
+ f"Loop detected in ontology mapping: {k} -> {v}. "
96
+ "Removing item. Please fix the inheritance if you want "
97
+ "to use this item."
105
98
  )
106
99
  continue
107
100
 
@@ -112,16 +105,15 @@ class OntologyMapping:
112
105
  mi_leaves = {}
113
106
  ms_leaves = {}
114
107
  for k, v in d.items():
115
-
116
108
  # k is not an entity
117
- if 'represented_as' not in v:
109
+ if "represented_as" not in v:
118
110
  continue
119
111
 
120
- if isinstance(v.get('preferred_id'), list):
112
+ if isinstance(v.get("preferred_id"), list):
121
113
  mi_leaves = self._horizontal_inheritance_pid(k, v)
122
114
  extended_schema.update(mi_leaves)
123
115
 
124
- elif isinstance(v.get('source'), list):
116
+ elif isinstance(v.get("source"), list):
125
117
  ms_leaves = self._horizontal_inheritance_source(k, v)
126
118
  extended_schema.update(ms_leaves)
127
119
 
@@ -132,40 +124,38 @@ class OntologyMapping:
132
124
  Inherit properties from parents to children and update `d` accordingly.
133
125
  """
134
126
  for k, v in d.items():
135
-
136
127
  # k is not an entity
137
- if 'represented_as' not in v:
128
+ if "represented_as" not in v:
138
129
  continue
139
130
 
140
131
  # k is an entity that is present in the ontology
141
- if 'is_a' not in v:
132
+ if "is_a" not in v:
142
133
  continue
143
134
 
144
135
  # "vertical" inheritance: inherit properties from parent
145
- if v.get('inherit_properties', False):
146
-
136
+ if v.get("inherit_properties", False):
147
137
  # get direct ancestor
148
- if isinstance(v['is_a'], list):
149
- parent = v['is_a'][0]
138
+ if isinstance(v["is_a"], list):
139
+ parent = v["is_a"][0]
150
140
  else:
151
- parent = v['is_a']
141
+ parent = v["is_a"]
152
142
 
153
143
  # ensure child has properties and exclude_properties
154
- if 'properties' not in v:
155
- v['properties'] = {}
156
- if 'exclude_properties' not in v:
157
- v['exclude_properties'] = {}
144
+ if "properties" not in v:
145
+ v["properties"] = {}
146
+ if "exclude_properties" not in v:
147
+ v["exclude_properties"] = {}
158
148
 
159
149
  # update properties of child
160
- parent_props = self.schema[parent].get('properties', {})
150
+ parent_props = self.schema[parent].get("properties", {})
161
151
  if parent_props:
162
- v['properties'].update(parent_props)
152
+ v["properties"].update(parent_props)
163
153
 
164
154
  parent_excl_props = self.schema[parent].get(
165
- 'exclude_properties', {}
155
+ "exclude_properties", {}
166
156
  )
167
157
  if parent_excl_props:
168
- v['exclude_properties'].update(parent_excl_props)
158
+ v["exclude_properties"].update(parent_excl_props)
169
159
 
170
160
  # update schema (d)
171
161
  d[k] = v
@@ -182,9 +172,9 @@ class OntologyMapping:
182
172
 
183
173
  leaves = {}
184
174
 
185
- preferred_id = value['preferred_id']
186
- input_label = value.get('input_label') or value['label_in_input']
187
- represented_as = value['represented_as']
175
+ preferred_id = value["preferred_id"]
176
+ input_label = value.get("input_label") or value["label_in_input"]
177
+ represented_as = value["represented_as"]
188
178
 
189
179
  # adjust lengths
190
180
  max_l = max(
@@ -208,40 +198,38 @@ class OntologyMapping:
208
198
  reps = represented_as
209
199
 
210
200
  for pid, lab, rep in zip(pids, input_label, reps):
211
-
212
- skey = pid + '.' + key
201
+ skey = pid + "." + key
213
202
  svalue = {
214
- 'preferred_id': pid,
215
- 'input_label': lab,
216
- 'represented_as': rep,
203
+ "preferred_id": pid,
204
+ "input_label": lab,
205
+ "represented_as": rep,
217
206
  # mark as virtual
218
- 'virtual': True,
207
+ "virtual": True,
219
208
  }
220
209
 
221
210
  # inherit is_a if exists
222
- if 'is_a' in value.keys():
223
-
211
+ if "is_a" in value.keys():
224
212
  # treat as multiple inheritance
225
- if isinstance(value['is_a'], list):
226
- v = list(value['is_a'])
213
+ if isinstance(value["is_a"], list):
214
+ v = list(value["is_a"])
227
215
  v.insert(0, key)
228
- svalue['is_a'] = v
216
+ svalue["is_a"] = v
229
217
 
230
218
  else:
231
- svalue['is_a'] = [key, value['is_a']]
219
+ svalue["is_a"] = [key, value["is_a"]]
232
220
 
233
221
  else:
234
222
  # set parent as is_a
235
- svalue['is_a'] = key
223
+ svalue["is_a"] = key
236
224
 
237
225
  # inherit everything except core attributes
238
226
  for k, v in value.items():
239
227
  if k not in [
240
- 'is_a',
241
- 'preferred_id',
242
- 'input_label',
243
- 'label_in_input',
244
- 'represented_as',
228
+ "is_a",
229
+ "preferred_id",
230
+ "input_label",
231
+ "label_in_input",
232
+ "represented_as",
245
233
  ]:
246
234
  svalue[k] = v
247
235
 
@@ -259,9 +247,9 @@ class OntologyMapping:
259
247
 
260
248
  leaves = {}
261
249
 
262
- source = value['source']
263
- input_label = value.get('input_label') or value['label_in_input']
264
- represented_as = value['represented_as']
250
+ source = value["source"]
251
+ input_label = value.get("input_label") or value["label_in_input"]
252
+ represented_as = value["represented_as"]
265
253
 
266
254
  # adjust lengths
267
255
  src_l = len(source)
@@ -279,40 +267,38 @@ class OntologyMapping:
279
267
  reps = represented_as
280
268
 
281
269
  for src, lab, rep in zip(source, labels, reps):
282
-
283
- skey = src + '.' + key
270
+ skey = src + "." + key
284
271
  svalue = {
285
- 'source': src,
286
- 'input_label': lab,
287
- 'represented_as': rep,
272
+ "source": src,
273
+ "input_label": lab,
274
+ "represented_as": rep,
288
275
  # mark as virtual
289
- 'virtual': True,
276
+ "virtual": True,
290
277
  }
291
278
 
292
279
  # inherit is_a if exists
293
- if 'is_a' in value.keys():
294
-
280
+ if "is_a" in value.keys():
295
281
  # treat as multiple inheritance
296
- if isinstance(value['is_a'], list):
297
- v = list(value['is_a'])
282
+ if isinstance(value["is_a"], list):
283
+ v = list(value["is_a"])
298
284
  v.insert(0, key)
299
- svalue['is_a'] = v
285
+ svalue["is_a"] = v
300
286
 
301
287
  else:
302
- svalue['is_a'] = [key, value['is_a']]
288
+ svalue["is_a"] = [key, value["is_a"]]
303
289
 
304
290
  else:
305
291
  # set parent as is_a
306
- svalue['is_a'] = key
292
+ svalue["is_a"] = key
307
293
 
308
294
  # inherit everything except core attributes
309
295
  for k, v in value.items():
310
296
  if k not in [
311
- 'is_a',
312
- 'source',
313
- 'input_label',
314
- 'label_in_input',
315
- 'represented_as',
297
+ "is_a",
298
+ "source",
299
+ "input_label",
300
+ "label_in_input",
301
+ "represented_as",
316
302
  ]:
317
303
  svalue[k] = v
318
304
 
biocypher/_metadata.py CHANGED
@@ -11,7 +11,7 @@
11
11
  Package metadata (version, authors, etc).
12
12
  """
13
13
 
14
- __all__ = ['get_metadata']
14
+ __all__ = ["get_metadata"]
15
15
 
16
16
  import os
17
17
  import pathlib
@@ -19,7 +19,7 @@ import importlib.metadata
19
19
 
20
20
  import toml
21
21
 
22
- _VERSION = '0.5.17'
22
+ _VERSION = "0.5.20"
23
23
 
24
24
 
25
25
  def get_metadata():
@@ -31,46 +31,41 @@ def get_metadata():
31
31
  """
32
32
 
33
33
  here = pathlib.Path(__file__).parent
34
- pyproj_toml = 'pyproject.toml'
34
+ pyproj_toml = "pyproject.toml"
35
35
  meta = {}
36
36
 
37
37
  for project_dir in (here, here.parent):
38
-
39
38
  toml_path = str(project_dir.joinpath(pyproj_toml).absolute())
40
39
 
41
40
  if os.path.exists(toml_path):
42
-
43
41
  pyproject = toml.load(toml_path)
44
42
 
45
43
  meta = {
46
- 'name': pyproject['tool']['poetry']['name'],
47
- 'version': pyproject['tool']['poetry']['version'],
48
- 'author': pyproject['tool']['poetry']['authors'],
49
- 'license': pyproject['tool']['poetry']['license'],
50
- 'full_metadata': pyproject,
44
+ "name": pyproject["tool"]["poetry"]["name"],
45
+ "version": pyproject["tool"]["poetry"]["version"],
46
+ "author": pyproject["tool"]["poetry"]["authors"],
47
+ "license": pyproject["tool"]["poetry"]["license"],
48
+ "full_metadata": pyproject,
51
49
  }
52
50
 
53
51
  break
54
52
 
55
53
  if not meta:
56
-
57
54
  try:
58
-
59
55
  meta = {
60
56
  k.lower(): v
61
57
  for k, v in importlib.metadata.metadata(here.name).items()
62
58
  }
63
59
 
64
60
  except importlib.metadata.PackageNotFoundError:
65
-
66
61
  pass
67
62
 
68
- meta['version'] = meta.get('version', None) or _VERSION
63
+ meta["version"] = meta.get("version", None) or _VERSION
69
64
 
70
65
  return meta
71
66
 
72
67
 
73
68
  metadata = get_metadata()
74
- __version__ = metadata.get('version', None)
75
- __author__ = metadata.get('author', None)
76
- __license__ = metadata.get('license', None)
69
+ __version__ = metadata.get("version", None)
70
+ __author__ = metadata.get("author", None)
71
+ __license__ = metadata.get("license", None)