biocypher 0.5.44__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

Files changed (39) hide show
  1. {biocypher-0.5.44 → biocypher-0.6.0}/PKG-INFO +1 -1
  2. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_get.py +165 -74
  3. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_metadata.py +1 -1
  4. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_misc.py +16 -0
  5. {biocypher-0.5.44 → biocypher-0.6.0}/pyproject.toml +1 -1
  6. {biocypher-0.5.44 → biocypher-0.6.0}/LICENSE +0 -0
  7. {biocypher-0.5.44 → biocypher-0.6.0}/README.md +0 -0
  8. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/__init__.py +0 -0
  9. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_config/__init__.py +0 -0
  10. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_config/biocypher_config.yaml +0 -0
  11. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_config/test_config.yaml +0 -0
  12. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_config/test_schema_config.yaml +0 -0
  13. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_config/test_schema_config_disconnected.yaml +0 -0
  14. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_config/test_schema_config_extended.yaml +0 -0
  15. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_core.py +0 -0
  16. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_create.py +0 -0
  17. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_deduplicate.py +0 -0
  18. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_logger.py +0 -0
  19. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_mapping.py +0 -0
  20. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_ontology.py +0 -0
  21. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_translate.py +0 -0
  22. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/__init__.py +0 -0
  23. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/connect/__init__.py +0 -0
  24. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/connect/_neo4j_driver.py +0 -0
  25. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/in_memory/__init__.py +0 -0
  26. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/in_memory/_pandas.py +0 -0
  27. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/__init__.py +0 -0
  28. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/_batch_writer.py +0 -0
  29. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/_get_writer.py +0 -0
  30. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/_writer.py +0 -0
  31. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/graph/__init__.py +0 -0
  32. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/graph/_arangodb.py +0 -0
  33. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/graph/_neo4j.py +0 -0
  34. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/graph/_networkx.py +0 -0
  35. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/graph/_rdf.py +0 -0
  36. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/relational/__init__.py +0 -0
  37. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/relational/_csv.py +0 -0
  38. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/relational/_postgresql.py +0 -0
  39. {biocypher-0.5.44 → biocypher-0.6.0}/biocypher/output/write/relational/_sqlite.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.5.44
3
+ Version: 0.6.0
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -17,10 +17,13 @@ from __future__ import annotations
17
17
  from typing import Optional
18
18
  import shutil
19
19
 
20
+ import requests
21
+
20
22
  from ._logger import logger
21
23
 
22
24
  logger.debug(f"Loading module {__name__}.")
23
25
 
26
+ from abc import ABC
24
27
  from datetime import datetime, timedelta
25
28
  from tempfile import TemporaryDirectory
26
29
  import os
@@ -29,21 +32,22 @@ import ftplib
29
32
 
30
33
  import pooch
31
34
 
32
- from ._misc import to_list
35
+ from ._misc import to_list, is_nested
33
36
 
34
37
 
35
- class Resource:
38
+ class Resource(ABC):
36
39
  def __init__(
37
40
  self,
38
41
  name: str,
39
42
  url_s: str | list[str],
40
43
  lifetime: int = 0,
41
- is_dir: bool = False,
42
44
  ):
43
45
  """
44
- A resource is a file that can be downloaded from a URL and cached
45
- locally. This class implements checks of the minimum requirements for
46
- a resource, to be implemented by a biocypher adapter.
46
+
47
+ A Resource is a file, a list of files, an API request, or a list of API
48
+ requests, any of which can be downloaded from the given URL(s) and
49
+ cached locally. This class implements checks of the minimum requirements
50
+ for a resource, to be implemented by a biocypher adapter.
47
51
 
48
52
  Args:
49
53
  name (str): The name of the resource.
@@ -52,43 +56,83 @@ class Resource:
52
56
 
53
57
  lifetime (int): The lifetime of the resource in days. If 0, the
54
58
  resource is considered to be permanent.
55
-
56
- is_dir (bool): Whether the resource is a directory or not.
57
59
  """
58
60
  self.name = name
59
61
  self.url_s = url_s
60
62
  self.lifetime = lifetime
63
+
64
+
65
+ class FileDownload(Resource):
66
+ def __init__(
67
+ self,
68
+ name: str,
69
+ url_s: str | list[str],
70
+ lifetime: int = 0,
71
+ is_dir: bool = False,
72
+ ):
73
+ """
74
+ Represents basic information for a File Download.
75
+
76
+ Args:
77
+ name(str): The name of the File Download.
78
+
79
+ url_s(str|list[str]): The URL(s) of the File Download.
80
+
81
+ lifetime(int): The lifetime of the File Download in days. If 0, the
82
+ File Download is cached indefinitely.
83
+
84
+ is_dir (bool): Whether the URL points to a directory or not.
85
+ """
86
+
87
+ super().__init__(name, url_s, lifetime)
61
88
  self.is_dir = is_dir
62
89
 
63
90
 
91
+ class APIRequest(Resource):
92
+ def __init__(self, name: str, url_s: str | list[str], lifetime: int = 0):
93
+ """
94
+ Represents basic information for an API Request.
95
+
96
+ Args:
97
+ name(str): The name of the API Request.
98
+
99
+ url_s(str|list): The URL of the API endpoint.
100
+
101
+ lifetime(int): The lifetime of the API Request in days. If 0, the
102
+ API Request is cached indefinitely.
103
+
104
+ """
105
+ super().__init__(name, url_s, lifetime)
106
+
107
+
64
108
  class Downloader:
65
109
  def __init__(self, cache_dir: Optional[str] = None) -> None:
66
110
  """
67
- A downloader is a collection of resources that can be downloaded
111
+ The Downloader is a class that manages resources that can be downloaded
68
112
  and cached locally. It manages the lifetime of downloaded resources by
69
113
  keeping a JSON record of the download date of each resource.
70
114
 
71
115
  Args:
72
116
  cache_dir (str): The directory where the resources are cached. If
73
117
  not given, a temporary directory is created.
74
-
75
- Returns:
76
- Downloader: The downloader object.
77
118
  """
78
119
  self.cache_dir = cache_dir or TemporaryDirectory().name
79
120
  self.cache_file = os.path.join(self.cache_dir, "cache.json")
80
121
  self.cache_dict = self._load_cache_dict()
81
122
 
82
- # download function that accepts a resource or a list of resources
83
123
  def download(self, *resources: Resource):
84
124
  """
85
- Download one or multiple resources.
125
+ Download one or multiple resources. Load from cache if the resource is
126
+ already downloaded and the cache is not expired.
86
127
 
87
128
  Args:
88
- resources (Resource): The resource or resources to download.
129
+ resources (Resource): The resource(s) to download or load from
130
+ cache.
89
131
 
90
132
  Returns:
91
- str or list: The path or paths to the downloaded resource(s).
133
+ list[str]: The path or paths to the resource(s) that were downloaded
134
+ or loaded from cache.
135
+
92
136
  """
93
137
  paths = []
94
138
  for resource in resources:
@@ -106,16 +150,27 @@ class Downloader:
106
150
 
107
151
  Args:
108
152
  resource (Resource): The resource to download.
109
-
110
153
  Returns:
111
- str or list: The path or paths to the downloaded resource(s).
154
+ list[str]: The path or paths to the downloaded resource(s).
155
+
156
+
112
157
  """
113
158
  expired = self._is_cache_expired(resource)
114
159
 
115
160
  if expired or not cache:
116
- self._delete_expired_resource_cache(resource)
117
- logger.info(f"Asking for download of {resource.name}.")
118
- paths = self._download_resource(cache, resource)
161
+ self._delete_expired_cache(resource)
162
+ if isinstance(resource, FileDownload):
163
+ logger.info(f"Asking for download of resource {resource.name}.")
164
+ paths = self._download_files(cache, resource)
165
+ elif isinstance(resource, APIRequest):
166
+ logger.info(
167
+ f"Asking for download of api request {resource.name}."
168
+ )
169
+ paths = self._download_api_request(resource)
170
+
171
+ else:
172
+ raise TypeError(f"Unknown resource type: {type(resource)}")
173
+
119
174
  else:
120
175
  paths = self.get_cached_version(resource)
121
176
  self._update_cache_record(resource)
@@ -123,13 +178,14 @@ class Downloader:
123
178
 
124
179
  def _is_cache_expired(self, resource: Resource) -> bool:
125
180
  """
126
- Check if resource cache is expired.
181
+ Check if resource or API request cache is expired.
127
182
 
128
183
  Args:
129
- resource (Resource): The resource to download.
184
+
185
+ resource (Resource): The resource or API request to download.
130
186
 
131
187
  Returns:
132
- bool: cache is expired or not.
188
+ bool: True if cache is expired, False if not.
133
189
  """
134
190
  cache_record = self._get_cache_record(resource)
135
191
  if cache_record:
@@ -142,65 +198,116 @@ class Downloader:
142
198
  expired = True
143
199
  return expired
144
200
 
145
- def _delete_expired_resource_cache(self, resource: Resource):
146
- resource_cache_path = self.cache_dir + "/" + resource.name
147
- if os.path.exists(resource_cache_path) and os.path.isdir(
148
- resource_cache_path
201
+ def _delete_expired_cache(self, resource: Resource):
202
+ cache_resource_path = self.cache_dir + "/" + resource.name
203
+ if os.path.exists(cache_resource_path) and os.path.isdir(
204
+ cache_resource_path
149
205
  ):
150
- shutil.rmtree(resource_cache_path)
206
+ shutil.rmtree(cache_resource_path)
151
207
 
152
- def _download_resource(self, cache, resource):
153
- """Download a resource.
208
+ def _download_files(self, cache, file_download: FileDownload):
209
+ """
210
+ Download a resource given it is a file or a directory and return the
211
+ path.
154
212
 
155
213
  Args:
156
214
  cache (bool): Whether to cache the resource or not.
157
- resource (Resource): The resource to download.
215
+ file_download (FileDownload): The resource to download.
158
216
 
159
217
  Returns:
160
- str or list: The path or paths to the downloaded resource(s).
218
+ list[str]: The path or paths to the downloaded resource(s).
161
219
  """
162
- if resource.is_dir:
163
- files = self._get_files(resource)
164
- resource.url_s = [resource.url_s + "/" + file for file in files]
165
- resource.is_dir = False
166
- paths = self._download_or_cache(resource, cache)
167
- elif isinstance(resource.url_s, list):
220
+ if file_download.is_dir:
221
+ files = self._get_files(file_download)
222
+ file_download.url_s = [
223
+ file_download.url_s + "/" + file for file in files
224
+ ]
225
+ file_download.is_dir = False
226
+ paths = self._download_or_cache(file_download, cache)
227
+ elif isinstance(file_download.url_s, list):
168
228
  paths = []
169
- for url in resource.url_s:
170
- fname = url[url.rfind("/") + 1 :]
229
+ for url in file_download.url_s:
230
+ fname = url[url.rfind("/") + 1 :].split("?")[0]
171
231
  paths.append(
172
232
  self._retrieve(
173
233
  url=url,
174
234
  fname=fname,
175
- path=os.path.join(self.cache_dir, resource.name),
235
+ path=os.path.join(self.cache_dir, file_download.name),
176
236
  )
177
237
  )
178
238
  else:
179
- fname = resource.url_s[resource.url_s.rfind("/") + 1 :]
180
- paths = self._retrieve(
181
- url=resource.url_s,
239
+ paths = []
240
+ fname = file_download.url_s[
241
+ file_download.url_s.rfind("/") + 1 :
242
+ ].split("?")[0]
243
+ results = self._retrieve(
244
+ url=file_download.url_s,
182
245
  fname=fname,
183
- path=os.path.join(self.cache_dir, resource.name),
246
+ path=os.path.join(self.cache_dir, file_download.name),
184
247
  )
248
+ if isinstance(results, list):
249
+ paths.extend(results)
250
+ else:
251
+ paths.append(results)
252
+
185
253
  # sometimes a compressed file contains multiple files
186
254
  # TODO ask for a list of files in the archive to be used from the
187
255
  # adapter
188
256
  return paths
189
257
 
190
- def get_cached_version(self, resource) -> list[str]:
258
+ def _download_api_request(self, api_request: APIRequest):
259
+ """
260
+ Download an API request and return the path.
261
+
262
+ Args:
263
+ api_request(APIRequest): The API request result that is being
264
+ cached.
265
+ Returns:
266
+ list[str]: The path to the cached API request.
267
+
268
+ """
269
+ urls = (
270
+ api_request.url_s
271
+ if isinstance(api_request.url_s, list)
272
+ else [api_request.url_s]
273
+ )
274
+ paths = []
275
+ for url in urls:
276
+ fname = url[url.rfind("/") + 1 :].rsplit(".", 1)[0]
277
+ logger.info(
278
+ f"Asking for caching API of {api_request.name} {fname}."
279
+ )
280
+ response = requests.get(url=url)
281
+
282
+ if response.status_code != 200:
283
+ response.raise_for_status()
284
+ response_data = response.json()
285
+ api_path = os.path.join(
286
+ self.cache_dir, api_request.name, f"{fname}.json"
287
+ )
288
+
289
+ os.makedirs(os.path.dirname(api_path), exist_ok=True)
290
+ with open(api_path, "w") as f:
291
+ json.dump(response_data, f)
292
+ logger.info(f"Caching API request to {api_path}.")
293
+ paths.append(api_path)
294
+ return paths
295
+
296
+ def get_cached_version(self, resource: Resource) -> list[str]:
191
297
  """Get the cached version of a resource.
192
298
 
193
299
  Args:
194
- resource (Resource): The resource to get the cached version of.
300
+ resource(Resource): The resource to get the cached version of.
195
301
 
196
302
  Returns:
197
303
  list[str]: The paths to the cached resource(s).
304
+
198
305
  """
199
- cached_resource_location = os.path.join(self.cache_dir, resource.name)
200
- logger.info(f"Use cached version from {cached_resource_location}.")
306
+ cached_location = os.path.join(self.cache_dir, resource.name)
307
+ logger.info(f"Use cached version from {cached_location}.")
201
308
  paths = []
202
- for file in os.listdir(cached_resource_location):
203
- paths.append(os.path.join(cached_resource_location, file))
309
+ for file in os.listdir(cached_location):
310
+ paths.append(os.path.join(cached_location, file))
204
311
  return paths
205
312
 
206
313
  def _retrieve(
@@ -260,23 +367,23 @@ class Downloader:
260
367
  progressbar=True,
261
368
  )
262
369
 
263
- def _get_files(self, resource: Resource):
370
+ def _get_files(self, file_download: FileDownload):
264
371
  """
265
- Get the files contained in a directory resource.
372
+ Get the files contained in a directory file.
266
373
 
267
374
  Args:
268
- resource (Resource): The directory resource.
375
+ file_download (FileDownload): The directory file.
269
376
 
270
377
  Returns:
271
378
  list: The files contained in the directory.
272
379
  """
273
- if resource.url_s.startswith("ftp://"):
380
+ if file_download.url_s.startswith("ftp://"):
274
381
  # remove protocol
275
- url = resource.url_s[6:]
382
+ url = file_download.url_s[6:]
276
383
  # get base url
277
384
  url = url[: url.find("/")]
278
385
  # get directory (remove initial slash as well)
279
- dir = resource.url_s[7 + len(url) :]
386
+ dir = file_download.url_s[7 + len(url) :]
280
387
  # get files
281
388
  ftp = ftplib.FTP(url)
282
389
  ftp.login()
@@ -334,19 +441,3 @@ class Downloader:
334
441
  self.cache_dict[resource.name] = cache_record
335
442
  with open(self.cache_file, "w") as f:
336
443
  json.dump(self.cache_dict, f, default=str)
337
-
338
-
339
- def is_nested(lst):
340
- """
341
- Check if a list is nested.
342
-
343
- Args:
344
- lst (list): The list to check.
345
-
346
- Returns:
347
- bool: True if the list is nested, False otherwise.
348
- """
349
- for item in lst:
350
- if isinstance(item, list):
351
- return True
352
- return False
@@ -19,7 +19,7 @@ import importlib.metadata
19
19
 
20
20
  import toml
21
21
 
22
- _VERSION = "0.5.44"
22
+ _VERSION = "0.6.0"
23
23
 
24
24
 
25
25
  def get_metadata():
@@ -246,3 +246,19 @@ def to_lower_sentence_case(s: str) -> str:
246
246
  return pascalcase_to_sentencecase(s)
247
247
  else:
248
248
  return s
249
+
250
+
251
+ def is_nested(lst) -> bool:
252
+ """
253
+ Check if a list is nested.
254
+
255
+ Args:
256
+ lst (list): The list to check.
257
+
258
+ Returns:
259
+ bool: True if the list is nested, False otherwise.
260
+ """
261
+ for item in lst:
262
+ if isinstance(item, list):
263
+ return True
264
+ return False
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "biocypher"
3
- version = "0.5.44"
3
+ version = "0.6.0"
4
4
  description = "A unifying framework for biomedical research knowledge graphs"
5
5
  authors = [
6
6
  "Sebastian Lobentanzer <sebastian.lobentanzer@gmail.com>",
File without changes
File without changes
File without changes