biocypher 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_create.py CHANGED
@@ -1,23 +1,16 @@
1
- #!/usr/bin/env python
2
- #
3
- # Copyright 2021, Heidelberg University Clinic
4
- #
5
- # File author(s): Sebastian Lobentanzer
6
- # ...
7
- #
8
- # Distributed under MIT licence, see the file `LICENSE`.
9
- #
10
1
  """
11
2
  BioCypher 'create' module. Handles the creation of BioCypher node and edge
12
3
  dataclasses.
13
4
  """
14
- from ._logger import logger
15
5
 
16
- logger.debug(f"Loading module {__name__}.")
6
+ import os
17
7
 
8
+ from dataclasses import dataclass, field
18
9
  from typing import Union
19
- from dataclasses import field, dataclass
20
- import os
10
+
11
+ from ._logger import logger
12
+
13
+ logger.debug(f"Loading module {__name__}.")
21
14
 
22
15
  __all__ = [
23
16
  "BioCypherEdge",
@@ -71,8 +64,7 @@ class BioCypherNode:
71
64
 
72
65
  if ":TYPE" in self.properties.keys():
73
66
  logger.warning(
74
- "Keyword ':TYPE' is reserved for Neo4j. "
75
- "Removing from properties.",
67
+ "Keyword ':TYPE' is reserved for Neo4j. Removing from properties.",
76
68
  # "Renaming to 'type'."
77
69
  )
78
70
  # self.properties["type"] = self.properties[":TYPE"]
@@ -209,24 +201,21 @@ class BioCypherEdge:
209
201
 
210
202
  if ":TYPE" in self.properties.keys():
211
203
  logger.debug(
212
- "Keyword ':TYPE' is reserved for Neo4j. "
213
- "Removing from properties.",
204
+ "Keyword ':TYPE' is reserved for Neo4j. Removing from properties.",
214
205
  # "Renaming to 'type'."
215
206
  )
216
207
  # self.properties["type"] = self.properties[":TYPE"]
217
208
  del self.properties[":TYPE"]
218
209
  elif "id" in self.properties.keys():
219
210
  logger.debug(
220
- "Keyword 'id' is reserved for Neo4j. "
221
- "Removing from properties.",
211
+ "Keyword 'id' is reserved for Neo4j. Removing from properties.",
222
212
  # "Renaming to 'type'."
223
213
  )
224
214
  # self.properties["type"] = self.properties[":TYPE"]
225
215
  del self.properties["id"]
226
216
  elif "_ID" in self.properties.keys():
227
217
  logger.debug(
228
- "Keyword '_ID' is reserved for Postgres. "
229
- "Removing from properties.",
218
+ "Keyword '_ID' is reserved for Postgres. Removing from properties.",
230
219
  # "Renaming to 'type'."
231
220
  )
232
221
  # self.properties["type"] = self.properties[":TYPE"]
@@ -334,20 +323,17 @@ class BioCypherRelAsNode:
334
323
  def __post_init__(self):
335
324
  if not isinstance(self.node, BioCypherNode):
336
325
  raise TypeError(
337
- f"BioCypherRelAsNode.node must be a BioCypherNode, "
338
- f"not {type(self.node)}.",
326
+ f"BioCypherRelAsNode.node must be a BioCypherNode, " f"not {type(self.node)}.",
339
327
  )
340
328
 
341
329
  if not isinstance(self.source_edge, BioCypherEdge):
342
330
  raise TypeError(
343
- f"BioCypherRelAsNode.source_edge must be a BioCypherEdge, "
344
- f"not {type(self.source_edge)}.",
331
+ f"BioCypherRelAsNode.source_edge must be a BioCypherEdge, " f"not {type(self.source_edge)}.",
345
332
  )
346
333
 
347
334
  if not isinstance(self.target_edge, BioCypherEdge):
348
335
  raise TypeError(
349
- f"BioCypherRelAsNode.target_edge must be a BioCypherEdge, "
350
- f"not {type(self.target_edge)}.",
336
+ f"BioCypherRelAsNode.target_edge must be a BioCypherEdge, " f"not {type(self.target_edge)}.",
351
337
  )
352
338
 
353
339
  def get_node(self) -> BioCypherNode:
biocypher/_deduplicate.py CHANGED
@@ -1,9 +1,8 @@
1
+ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
1
2
  from ._logger import logger
2
3
 
3
4
  logger.debug(f"Loading module {__name__}.")
4
5
 
5
- from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
6
-
7
6
 
8
7
  class Deduplicator:
9
8
  """
@@ -45,9 +44,7 @@ class Deduplicator:
45
44
  if entity.get_id() in self.seen_entity_ids:
46
45
  self.duplicate_entity_ids.add(entity.get_id())
47
46
  if entity.get_label() not in self.duplicate_entity_types:
48
- logger.warning(
49
- f"Duplicate node type {entity.get_label()} found. "
50
- )
47
+ logger.warning(f"Duplicate node type {entity.get_label()} found. ")
51
48
  self.duplicate_entity_types.add(entity.get_label())
52
49
  return True
53
50
 
@@ -69,18 +66,14 @@ class Deduplicator:
69
66
 
70
67
  # concatenate source and target if no id is present
71
68
  if not relationship.get_id():
72
- _id = (
73
- f"{relationship.get_source_id()}_{relationship.get_target_id()}"
74
- )
69
+ _id = f"{relationship.get_source_id()}_{relationship.get_target_id()}"
75
70
  else:
76
71
  _id = relationship.get_id()
77
72
 
78
73
  if _id in self.seen_relationships[relationship.get_type()]:
79
74
  self.duplicate_relationship_ids.add(_id)
80
75
  if relationship.get_type() not in self.duplicate_relationship_types:
81
- logger.warning(
82
- f"Duplicate edge type {relationship.get_type()} found. "
83
- )
76
+ logger.warning(f"Duplicate edge type {relationship.get_type()} found. ")
84
77
  self.duplicate_relationship_types.add(relationship.get_type())
85
78
  return True
86
79
 
biocypher/_get.py CHANGED
@@ -1,38 +1,26 @@
1
- #!/usr/bin/env python
2
-
3
- #
4
- # Copyright 2021, Heidelberg University Clinic
5
- #
6
- # File author(s): Sebastian Lobentanzer
7
- # ...
8
- #
9
- # Distributed under MIT licence, see the file `LICENSE`.
10
- #
11
1
  """
12
2
  BioCypher get module. Used to download and cache data from external sources.
13
3
  """
14
4
 
15
5
  from __future__ import annotations
16
6
 
17
- from typing import Optional
7
+ import ftplib
8
+ import json
9
+ import os
18
10
  import shutil
19
11
 
20
- import requests
21
-
22
- from ._logger import logger
23
-
24
- logger.debug(f"Loading module {__name__}.")
25
-
26
12
  from abc import ABC
27
13
  from datetime import datetime, timedelta
28
14
  from tempfile import TemporaryDirectory
29
- import os
30
- import json
31
- import ftplib
15
+ from typing import Optional
32
16
 
33
17
  import pooch
18
+ import requests
34
19
 
35
- from ._misc import to_list, is_nested
20
+ from ._logger import logger
21
+ from ._misc import is_nested, to_list
22
+
23
+ logger.debug(f"Loading module {__name__}.")
36
24
 
37
25
 
38
26
  class Resource(ABC):
@@ -43,7 +31,6 @@ class Resource(ABC):
43
31
  lifetime: int = 0,
44
32
  ):
45
33
  """
46
-
47
34
  A Resource is a file, a list of files, an API request, or a list of API
48
35
  requests, any of which can be downloaded from the given URL(s) and
49
36
  cached locally. This class implements checks of the minimum requirements
@@ -153,7 +140,6 @@ class Downloader:
153
140
  Returns:
154
141
  list[str]: The path or paths to the downloaded resource(s).
155
142
 
156
-
157
143
  """
158
144
  expired = self._is_cache_expired(resource)
159
145
 
@@ -163,14 +149,10 @@ class Downloader:
163
149
  logger.info(f"Asking for download of resource {resource.name}.")
164
150
  paths = self._download_files(cache, resource)
165
151
  elif isinstance(resource, APIRequest):
166
- logger.info(
167
- f"Asking for download of api request {resource.name}."
168
- )
152
+ logger.info(f"Asking for download of api request {resource.name}.")
169
153
  paths = self._download_api_request(resource)
170
-
171
154
  else:
172
155
  raise TypeError(f"Unknown resource type: {type(resource)}")
173
-
174
156
  else:
175
157
  paths = self.get_cached_version(resource)
176
158
  self._update_cache_record(resource)
@@ -181,17 +163,14 @@ class Downloader:
181
163
  Check if resource or API request cache is expired.
182
164
 
183
165
  Args:
184
-
185
- resource (Resource): The resource or API request to download.
166
+ resource (Resource): The resource to download.
186
167
 
187
168
  Returns:
188
- bool: True if cache is expired, False if not.
169
+ bool: cache is expired or not.
189
170
  """
190
171
  cache_record = self._get_cache_record(resource)
191
172
  if cache_record:
192
- download_time = datetime.strptime(
193
- cache_record.get("date_downloaded"), "%Y-%m-%d %H:%M:%S.%f"
194
- )
173
+ download_time = datetime.strptime(cache_record.get("date_downloaded"), "%Y-%m-%d %H:%M:%S.%f")
195
174
  lifetime = timedelta(days=resource.lifetime)
196
175
  expired = download_time + lifetime < datetime.now()
197
176
  else:
@@ -200,9 +179,7 @@ class Downloader:
200
179
 
201
180
  def _delete_expired_cache(self, resource: Resource):
202
181
  cache_resource_path = self.cache_dir + "/" + resource.name
203
- if os.path.exists(cache_resource_path) and os.path.isdir(
204
- cache_resource_path
205
- ):
182
+ if os.path.exists(cache_resource_path) and os.path.isdir(cache_resource_path):
206
183
  shutil.rmtree(cache_resource_path)
207
184
 
208
185
  def _download_files(self, cache, file_download: FileDownload):
@@ -219,9 +196,7 @@ class Downloader:
219
196
  """
220
197
  if file_download.is_dir:
221
198
  files = self._get_files(file_download)
222
- file_download.url_s = [
223
- file_download.url_s + "/" + file for file in files
224
- ]
199
+ file_download.url_s = [file_download.url_s + "/" + file for file in files]
225
200
  file_download.is_dir = False
226
201
  paths = self._download_or_cache(file_download, cache)
227
202
  elif isinstance(file_download.url_s, list):
@@ -236,9 +211,7 @@ class Downloader:
236
211
  paths.append(path)
237
212
  else:
238
213
  paths = []
239
- fname = file_download.url_s[
240
- file_download.url_s.rfind("/") + 1 :
241
- ].split("?")[0]
214
+ fname = file_download.url_s[file_download.url_s.rfind("/") + 1 :].split("?")[0]
242
215
  results = self._retrieve(
243
216
  url=file_download.url_s,
244
217
  fname=fname,
@@ -259,31 +232,22 @@ class Downloader:
259
232
  Download an API request and return the path.
260
233
 
261
234
  Args:
262
- api_request(APIRequest): The API request result that is being
263
- cached.
235
+ api_request(APIRequest): The API request result that is being cached.
264
236
  Returns:
265
237
  list[str]: The path to the cached API request.
266
238
 
267
239
  """
268
- urls = (
269
- api_request.url_s
270
- if isinstance(api_request.url_s, list)
271
- else [api_request.url_s]
272
- )
240
+ urls = api_request.url_s if isinstance(api_request.url_s, list) else [api_request.url_s]
273
241
  paths = []
274
242
  for url in urls:
275
243
  fname = url[url.rfind("/") + 1 :].rsplit(".", 1)[0]
276
- logger.info(
277
- f"Asking for caching API of {api_request.name} {fname}."
278
- )
244
+ logger.info(f"Asking for caching API of {api_request.name} {fname}.")
279
245
  response = requests.get(url=url)
280
246
 
281
247
  if response.status_code != 200:
282
248
  response.raise_for_status()
283
249
  response_data = response.json()
284
- api_path = os.path.join(
285
- self.cache_dir, api_request.name, f"{fname}.json"
286
- )
250
+ api_path = os.path.join(self.cache_dir, api_request.name, f"{fname}.json")
287
251
 
288
252
  os.makedirs(os.path.dirname(api_path), exist_ok=True)
289
253
  with open(api_path, "w") as f:
@@ -300,7 +264,6 @@ class Downloader:
300
264
 
301
265
  Returns:
302
266
  list[str]: The paths to the cached resource(s).
303
-
304
267
  """
305
268
  cached_location = os.path.join(self.cache_dir, resource.name)
306
269
  logger.info(f"Use cached version from {cached_location}.")
@@ -390,9 +353,7 @@ class Downloader:
390
353
  files = ftp.nlst()
391
354
  ftp.quit()
392
355
  else:
393
- raise NotImplementedError(
394
- "Only FTP directories are supported at the moment."
395
- )
356
+ raise NotImplementedError("Only FTP directories are supported at the moment.")
396
357
 
397
358
  return files
398
359
 
biocypher/_logger.py CHANGED
@@ -1,23 +1,14 @@
1
- #!/usr/bin/env python
2
-
3
- #
4
- # Copyright 2021, Heidelberg University Clinic
5
- #
6
- # File author(s): Sebastian Lobentanzer
7
- # ...
8
- #
9
- # Distributed under MIT licence, see the file `LICENSE`.
10
- #
11
1
  """
12
2
  Configuration of the module logger.
13
3
  """
14
4
 
15
5
  __all__ = ["get_logger", "log", "logfile"]
16
6
 
17
- from datetime import datetime
7
+ import logging
18
8
  import os
19
9
  import pydoc
20
- import logging
10
+
11
+ from datetime import datetime
21
12
 
22
13
  from biocypher import _config
23
14
  from biocypher._metadata import __version__
@@ -63,10 +54,7 @@ def get_logger(name: str = "biocypher") -> logging.Logger:
63
54
  log_to_disk = _config.config("biocypher").get("log_to_disk")
64
55
 
65
56
  if log_to_disk:
66
- logdir = (
67
- _config.config("biocypher").get("log_directory")
68
- or "biocypher-log"
69
- )
57
+ logdir = _config.config("biocypher").get("log_directory") or "biocypher-log"
70
58
  os.makedirs(logdir, exist_ok=True)
71
59
  logfile = os.path.join(logdir, f"biocypher-{date_time}.log")
72
60
 
biocypher/_mapping.py CHANGED
@@ -1,20 +1,7 @@
1
- #!/usr/bin/env python
2
-
3
- #
4
- # Copyright 2021, Heidelberg University Clinic
5
- #
6
- # File author(s): Sebastian Lobentanzer
7
- # ...
8
- #
9
- # Distributed under MIT licence, see the file `LICENSE`.
10
- #
11
1
  """
12
2
  BioCypher 'mapping' module. Handles the mapping of user-defined schema to the
13
3
  underlying ontology.
14
4
  """
15
- from ._logger import logger
16
-
17
- logger.debug(f"Loading module {__name__}.")
18
5
 
19
6
  from typing import Optional
20
7
  from urllib.request import urlopen
@@ -22,7 +9,9 @@ from urllib.request import urlopen
22
9
  import yaml
23
10
 
24
11
  from . import _misc
25
- from ._config import config as _config
12
+ from ._logger import logger
13
+
14
+ logger.debug(f"Loading module {__name__}.")
26
15
 
27
16
 
28
17
  class OntologyMapping:
@@ -151,9 +140,7 @@ class OntologyMapping:
151
140
  if parent_props:
152
141
  v["properties"].update(parent_props)
153
142
 
154
- parent_excl_props = self.schema[parent].get(
155
- "exclude_properties", {}
156
- )
143
+ parent_excl_props = self.schema[parent].get("exclude_properties", {})
157
144
  if parent_excl_props:
158
145
  v["exclude_properties"].update(parent_excl_props)
159
146
 
biocypher/_metadata.py CHANGED
@@ -1,25 +1,16 @@
1
- #!/usr/bin/env python
2
- #
3
- # Copyright 2021, Heidelberg University Clinic
4
- #
5
- # File author(s): Sebastian Lobentanzer
6
- # ...
7
- #
8
- # Distributed under MIT licence, see the file `LICENSE`.
9
- #
10
1
  """
11
2
  Package metadata (version, authors, etc).
12
3
  """
13
4
 
14
5
  __all__ = ["get_metadata"]
15
6
 
7
+ import importlib.metadata
16
8
  import os
17
9
  import pathlib
18
- import importlib.metadata
19
10
 
20
11
  import toml
21
12
 
22
- _VERSION = "0.6.2"
13
+ _VERSION = "0.7.0"
23
14
 
24
15
 
25
16
  def get_metadata():
@@ -52,10 +43,7 @@ def get_metadata():
52
43
 
53
44
  if not meta:
54
45
  try:
55
- meta = {
56
- k.lower(): v
57
- for k, v in importlib.metadata.metadata(here.name).items()
58
- }
46
+ meta = {k.lower(): v for k, v in importlib.metadata.metadata(here.name).items()}
59
47
 
60
48
  except importlib.metadata.PackageNotFoundError:
61
49
  pass
biocypher/_misc.py CHANGED
@@ -1,36 +1,29 @@
1
- #!/usr/bin/env python
2
-
3
- #
4
- # Copyright 2021, Heidelberg University Clinic
5
- #
6
- # File author(s): Sebastian Lobentanzer
7
- # ...
8
- #
9
- # Distributed under MIT licence, see the file `LICENSE`.
10
- #
11
1
  """
12
2
  Handy functions for use in various places.
13
3
  """
14
- from ._logger import logger
15
4
 
16
- logger.debug(f"Loading module {__name__}.")
5
+ import re
17
6
 
7
+ from collections.abc import Iterable
18
8
  from typing import (
19
9
  Any,
20
- Union,
21
- Mapping,
22
- KeysView,
23
10
  Generator,
24
11
  ItemsView,
12
+ KeysView,
13
+ Mapping,
14
+ Union,
25
15
  ValuesView,
26
16
  )
27
- from collections.abc import Iterable
28
- import re
29
17
 
30
- from treelib import Tree
31
18
  import networkx as nx
32
19
  import stringcase
33
20
 
21
+ from treelib import Tree
22
+
23
+ from ._logger import logger
24
+
25
+ logger.debug(f"Loading module {__name__}.")
26
+
34
27
  __all__ = ["LIST_LIKE", "SIMPLE_TYPES", "ensure_iterable", "to_list"]
35
28
 
36
29
  SIMPLE_TYPES = (
@@ -110,9 +103,7 @@ def _get_inheritance_tree(inheritance_graph: Union[dict, nx.Graph]) -> dict:
110
103
  if isinstance(inheritance_graph, nx.Graph):
111
104
  inheritance_tree = nx.to_dict_of_lists(inheritance_graph)
112
105
 
113
- multiple_parents_present = _multiple_inheritance_present(
114
- inheritance_tree
115
- )
106
+ multiple_parents_present = _multiple_inheritance_present(inheritance_tree)
116
107
  if multiple_parents_present:
117
108
  logger.warning(
118
109
  "The ontology contains multiple inheritance (one child node "
@@ -143,17 +134,12 @@ def _find_root_node(inheritance_tree: dict) -> tuple[set, str]:
143
134
  if "entity" in root:
144
135
  root = "entity" # TODO: default: good standard?
145
136
  else:
146
- raise ValueError(
147
- "Inheritance tree cannot have more than one root node. "
148
- f"Found {len(root)}: {root}."
149
- )
137
+ raise ValueError("Inheritance tree cannot have more than one root node. " f"Found {len(root)}: {root}.")
150
138
  else:
151
139
  root = root[0]
152
140
  if not root:
153
141
  # find key whose value is None
154
- root = list(inheritance_tree.keys())[
155
- list(inheritance_tree.values()).index(None)
156
- ]
142
+ root = list(inheritance_tree.keys())[list(inheritance_tree.values()).index(None)]
157
143
  return classes, root
158
144
 
159
145