biocypher 0.5.44__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/__init__.py CHANGED
@@ -21,10 +21,11 @@ __all__ = [
21
21
  "log",
22
22
  "Driver",
23
23
  "BioCypher",
24
- "Resource",
24
+ "FileDownload",
25
+ "APIRequest",
25
26
  ]
26
27
 
27
- from ._get import Resource
28
+ from ._get import APIRequest, FileDownload
28
29
  from ._core import BioCypher
29
30
  from ._config import config, module_data
30
31
  from ._logger import log, logger, logfile
biocypher/_get.py CHANGED
@@ -17,10 +17,13 @@ from __future__ import annotations
17
17
  from typing import Optional
18
18
  import shutil
19
19
 
20
+ import requests
21
+
20
22
  from ._logger import logger
21
23
 
22
24
  logger.debug(f"Loading module {__name__}.")
23
25
 
26
+ from abc import ABC
24
27
  from datetime import datetime, timedelta
25
28
  from tempfile import TemporaryDirectory
26
29
  import os
@@ -29,21 +32,22 @@ import ftplib
29
32
 
30
33
  import pooch
31
34
 
32
- from ._misc import to_list
35
+ from ._misc import to_list, is_nested
33
36
 
34
37
 
35
- class Resource:
38
+ class Resource(ABC):
36
39
  def __init__(
37
40
  self,
38
41
  name: str,
39
42
  url_s: str | list[str],
40
43
  lifetime: int = 0,
41
- is_dir: bool = False,
42
44
  ):
43
45
  """
44
- A resource is a file that can be downloaded from a URL and cached
45
- locally. This class implements checks of the minimum requirements for
46
- a resource, to be implemented by a biocypher adapter.
46
+
47
+ A Resource is a file, a list of files, an API request, or a list of API
48
+ requests, any of which can be downloaded from the given URL(s) and
49
+ cached locally. This class implements checks of the minimum requirements
50
+ for a resource, to be implemented by a biocypher adapter.
47
51
 
48
52
  Args:
49
53
  name (str): The name of the resource.
@@ -52,43 +56,83 @@ class Resource:
52
56
 
53
57
  lifetime (int): The lifetime of the resource in days. If 0, the
54
58
  resource is considered to be permanent.
55
-
56
- is_dir (bool): Whether the resource is a directory or not.
57
59
  """
58
60
  self.name = name
59
61
  self.url_s = url_s
60
62
  self.lifetime = lifetime
63
+
64
+
65
+ class FileDownload(Resource):
66
+ def __init__(
67
+ self,
68
+ name: str,
69
+ url_s: str | list[str],
70
+ lifetime: int = 0,
71
+ is_dir: bool = False,
72
+ ):
73
+ """
74
+ Represents basic information for a File Download.
75
+
76
+ Args:
77
+ name(str): The name of the File Download.
78
+
79
+ url_s(str|list[str]): The URL(s) of the File Download.
80
+
81
+ lifetime(int): The lifetime of the File Download in days. If 0, the
82
+ File Download is cached indefinitely.
83
+
84
+ is_dir (bool): Whether the URL points to a directory or not.
85
+ """
86
+
87
+ super().__init__(name, url_s, lifetime)
61
88
  self.is_dir = is_dir
62
89
 
63
90
 
91
+ class APIRequest(Resource):
92
+ def __init__(self, name: str, url_s: str | list[str], lifetime: int = 0):
93
+ """
94
+ Represents basic information for an API Request.
95
+
96
+ Args:
97
+ name(str): The name of the API Request.
98
+
99
+ url_s(str|list): The URL of the API endpoint.
100
+
101
+ lifetime(int): The lifetime of the API Request in days. If 0, the
102
+ API Request is cached indefinitely.
103
+
104
+ """
105
+ super().__init__(name, url_s, lifetime)
106
+
107
+
64
108
  class Downloader:
65
109
  def __init__(self, cache_dir: Optional[str] = None) -> None:
66
110
  """
67
- A downloader is a collection of resources that can be downloaded
111
+ The Downloader is a class that manages resources that can be downloaded
68
112
  and cached locally. It manages the lifetime of downloaded resources by
69
113
  keeping a JSON record of the download date of each resource.
70
114
 
71
115
  Args:
72
116
  cache_dir (str): The directory where the resources are cached. If
73
117
  not given, a temporary directory is created.
74
-
75
- Returns:
76
- Downloader: The downloader object.
77
118
  """
78
119
  self.cache_dir = cache_dir or TemporaryDirectory().name
79
120
  self.cache_file = os.path.join(self.cache_dir, "cache.json")
80
121
  self.cache_dict = self._load_cache_dict()
81
122
 
82
- # download function that accepts a resource or a list of resources
83
123
  def download(self, *resources: Resource):
84
124
  """
85
- Download one or multiple resources.
125
+ Download one or multiple resources. Load from cache if the resource is
126
+ already downloaded and the cache is not expired.
86
127
 
87
128
  Args:
88
- resources (Resource): The resource or resources to download.
129
+ resources (Resource): The resource(s) to download or load from
130
+ cache.
89
131
 
90
132
  Returns:
91
- str or list: The path or paths to the downloaded resource(s).
133
+ list[str]: The path or paths to the resource(s) that were downloaded
134
+ or loaded from cache.
135
+
92
136
  """
93
137
  paths = []
94
138
  for resource in resources:
@@ -106,16 +150,27 @@ class Downloader:
106
150
 
107
151
  Args:
108
152
  resource (Resource): The resource to download.
109
-
110
153
  Returns:
111
- str or list: The path or paths to the downloaded resource(s).
154
+ list[str]: The path or paths to the downloaded resource(s).
155
+
156
+
112
157
  """
113
158
  expired = self._is_cache_expired(resource)
114
159
 
115
160
  if expired or not cache:
116
- self._delete_expired_resource_cache(resource)
117
- logger.info(f"Asking for download of {resource.name}.")
118
- paths = self._download_resource(cache, resource)
161
+ self._delete_expired_cache(resource)
162
+ if isinstance(resource, FileDownload):
163
+ logger.info(f"Asking for download of resource {resource.name}.")
164
+ paths = self._download_files(cache, resource)
165
+ elif isinstance(resource, APIRequest):
166
+ logger.info(
167
+ f"Asking for download of api request {resource.name}."
168
+ )
169
+ paths = self._download_api_request(resource)
170
+
171
+ else:
172
+ raise TypeError(f"Unknown resource type: {type(resource)}")
173
+
119
174
  else:
120
175
  paths = self.get_cached_version(resource)
121
176
  self._update_cache_record(resource)
@@ -123,13 +178,14 @@ class Downloader:
123
178
 
124
179
  def _is_cache_expired(self, resource: Resource) -> bool:
125
180
  """
126
- Check if resource cache is expired.
181
+ Check if resource or API request cache is expired.
127
182
 
128
183
  Args:
129
- resource (Resource): The resource to download.
184
+
185
+ resource (Resource): The resource or API request to download.
130
186
 
131
187
  Returns:
132
- bool: cache is expired or not.
188
+ bool: True if cache is expired, False if not.
133
189
  """
134
190
  cache_record = self._get_cache_record(resource)
135
191
  if cache_record:
@@ -142,65 +198,115 @@ class Downloader:
142
198
  expired = True
143
199
  return expired
144
200
 
145
- def _delete_expired_resource_cache(self, resource: Resource):
146
- resource_cache_path = self.cache_dir + "/" + resource.name
147
- if os.path.exists(resource_cache_path) and os.path.isdir(
148
- resource_cache_path
201
+ def _delete_expired_cache(self, resource: Resource):
202
+ cache_resource_path = self.cache_dir + "/" + resource.name
203
+ if os.path.exists(cache_resource_path) and os.path.isdir(
204
+ cache_resource_path
149
205
  ):
150
- shutil.rmtree(resource_cache_path)
206
+ shutil.rmtree(cache_resource_path)
151
207
 
152
- def _download_resource(self, cache, resource):
153
- """Download a resource.
208
+ def _download_files(self, cache, file_download: FileDownload):
209
+ """
210
+ Download a resource given it is a file or a directory and return the
211
+ path.
154
212
 
155
213
  Args:
156
214
  cache (bool): Whether to cache the resource or not.
157
- resource (Resource): The resource to download.
215
+ file_download (FileDownload): The resource to download.
158
216
 
159
217
  Returns:
160
- str or list: The path or paths to the downloaded resource(s).
218
+ list[str]: The path or paths to the downloaded resource(s).
161
219
  """
162
- if resource.is_dir:
163
- files = self._get_files(resource)
164
- resource.url_s = [resource.url_s + "/" + file for file in files]
165
- resource.is_dir = False
166
- paths = self._download_or_cache(resource, cache)
167
- elif isinstance(resource.url_s, list):
220
+ if file_download.is_dir:
221
+ files = self._get_files(file_download)
222
+ file_download.url_s = [
223
+ file_download.url_s + "/" + file for file in files
224
+ ]
225
+ file_download.is_dir = False
226
+ paths = self._download_or_cache(file_download, cache)
227
+ elif isinstance(file_download.url_s, list):
168
228
  paths = []
169
- for url in resource.url_s:
170
- fname = url[url.rfind("/") + 1 :]
171
- paths.append(
172
- self._retrieve(
173
- url=url,
174
- fname=fname,
175
- path=os.path.join(self.cache_dir, resource.name),
176
- )
229
+ for url in file_download.url_s:
230
+ fname = url[url.rfind("/") + 1 :].split("?")[0]
231
+ path = self._retrieve(
232
+ url=url,
233
+ fname=fname,
234
+ path=os.path.join(self.cache_dir, file_download.name),
177
235
  )
236
+ paths.append(path)
178
237
  else:
179
- fname = resource.url_s[resource.url_s.rfind("/") + 1 :]
180
- paths = self._retrieve(
181
- url=resource.url_s,
238
+ paths = []
239
+ fname = file_download.url_s[
240
+ file_download.url_s.rfind("/") + 1 :
241
+ ].split("?")[0]
242
+ results = self._retrieve(
243
+ url=file_download.url_s,
182
244
  fname=fname,
183
- path=os.path.join(self.cache_dir, resource.name),
245
+ path=os.path.join(self.cache_dir, file_download.name),
184
246
  )
247
+ if isinstance(results, list):
248
+ paths.extend(results)
249
+ else:
250
+ paths.append(results)
251
+
185
252
  # sometimes a compressed file contains multiple files
186
253
  # TODO ask for a list of files in the archive to be used from the
187
254
  # adapter
188
255
  return paths
189
256
 
190
- def get_cached_version(self, resource) -> list[str]:
257
+ def _download_api_request(self, api_request: APIRequest):
258
+ """
259
+ Download an API request and return the path.
260
+
261
+ Args:
262
+ api_request(APIRequest): The API request result that is being
263
+ cached.
264
+ Returns:
265
+ list[str]: The path to the cached API request.
266
+
267
+ """
268
+ urls = (
269
+ api_request.url_s
270
+ if isinstance(api_request.url_s, list)
271
+ else [api_request.url_s]
272
+ )
273
+ paths = []
274
+ for url in urls:
275
+ fname = url[url.rfind("/") + 1 :].rsplit(".", 1)[0]
276
+ logger.info(
277
+ f"Asking for caching API of {api_request.name} {fname}."
278
+ )
279
+ response = requests.get(url=url)
280
+
281
+ if response.status_code != 200:
282
+ response.raise_for_status()
283
+ response_data = response.json()
284
+ api_path = os.path.join(
285
+ self.cache_dir, api_request.name, f"{fname}.json"
286
+ )
287
+
288
+ os.makedirs(os.path.dirname(api_path), exist_ok=True)
289
+ with open(api_path, "w") as f:
290
+ json.dump(response_data, f)
291
+ logger.info(f"Caching API request to {api_path}.")
292
+ paths.append(api_path)
293
+ return paths
294
+
295
+ def get_cached_version(self, resource: Resource) -> list[str]:
191
296
  """Get the cached version of a resource.
192
297
 
193
298
  Args:
194
- resource (Resource): The resource to get the cached version of.
299
+ resource(Resource): The resource to get the cached version of.
195
300
 
196
301
  Returns:
197
302
  list[str]: The paths to the cached resource(s).
303
+
198
304
  """
199
- cached_resource_location = os.path.join(self.cache_dir, resource.name)
200
- logger.info(f"Use cached version from {cached_resource_location}.")
305
+ cached_location = os.path.join(self.cache_dir, resource.name)
306
+ logger.info(f"Use cached version from {cached_location}.")
201
307
  paths = []
202
- for file in os.listdir(cached_resource_location):
203
- paths.append(os.path.join(cached_resource_location, file))
308
+ for file in os.listdir(cached_location):
309
+ paths.append(os.path.join(cached_location, file))
204
310
  return paths
205
311
 
206
312
  def _retrieve(
@@ -260,23 +366,23 @@ class Downloader:
260
366
  progressbar=True,
261
367
  )
262
368
 
263
- def _get_files(self, resource: Resource):
369
+ def _get_files(self, file_download: FileDownload):
264
370
  """
265
- Get the files contained in a directory resource.
371
+ Get the files contained in a directory file.
266
372
 
267
373
  Args:
268
- resource (Resource): The directory resource.
374
+ file_download (FileDownload): The directory file.
269
375
 
270
376
  Returns:
271
377
  list: The files contained in the directory.
272
378
  """
273
- if resource.url_s.startswith("ftp://"):
379
+ if file_download.url_s.startswith("ftp://"):
274
380
  # remove protocol
275
- url = resource.url_s[6:]
381
+ url = file_download.url_s[6:]
276
382
  # get base url
277
383
  url = url[: url.find("/")]
278
384
  # get directory (remove initial slash as well)
279
- dir = resource.url_s[7 + len(url) :]
385
+ dir = file_download.url_s[7 + len(url) :]
280
386
  # get files
281
387
  ftp = ftplib.FTP(url)
282
388
  ftp.login()
@@ -334,19 +440,3 @@ class Downloader:
334
440
  self.cache_dict[resource.name] = cache_record
335
441
  with open(self.cache_file, "w") as f:
336
442
  json.dump(self.cache_dict, f, default=str)
337
-
338
-
339
- def is_nested(lst):
340
- """
341
- Check if a list is nested.
342
-
343
- Args:
344
- lst (list): The list to check.
345
-
346
- Returns:
347
- bool: True if the list is nested, False otherwise.
348
- """
349
- for item in lst:
350
- if isinstance(item, list):
351
- return True
352
- return False
biocypher/_metadata.py CHANGED
@@ -19,7 +19,7 @@ import importlib.metadata
19
19
 
20
20
  import toml
21
21
 
22
- _VERSION = "0.5.44"
22
+ _VERSION = "0.6.1"
23
23
 
24
24
 
25
25
  def get_metadata():
biocypher/_misc.py CHANGED
@@ -246,3 +246,19 @@ def to_lower_sentence_case(s: str) -> str:
246
246
  return pascalcase_to_sentencecase(s)
247
247
  else:
248
248
  return s
249
+
250
+
251
+ def is_nested(lst) -> bool:
252
+ """
253
+ Check if a list is nested.
254
+
255
+ Args:
256
+ lst (list): The list to check.
257
+
258
+ Returns:
259
+ bool: True if the list is nested, False otherwise.
260
+ """
261
+ for item in lst:
262
+ if isinstance(item, list):
263
+ return True
264
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.5.44
3
+ Version: 0.6.1
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- biocypher/__init__.py,sha256=ejNY53vH_pE3ZbIN8G_ZBYxxPG9aERovRLD0XhDvt4k,942
1
+ biocypher/__init__.py,sha256=-vr7dGUv6QK7f_i9mmUK5WpMsZuFPflUEFC4oElJTCA,980
2
2
  biocypher/_config/__init__.py,sha256=fFHRFYxE2MtDAQWL6upe--MJ1vw3Z8CwIPhF2gW8cRU,3698
3
3
  biocypher/_config/biocypher_config.yaml,sha256=pusj0IjJM3uWRcm0N7U7mb1IX257HCV2reZV3YKFCk0,3037
4
4
  biocypher/_config/test_config.yaml,sha256=Np8jeS5_EP6HHOvMKb7B_Tkyqd5YaYlYz_DVsXypt-A,119
@@ -8,11 +8,11 @@ biocypher/_config/test_schema_config_extended.yaml,sha256=wn3A76142hhjnImhMF6ROD
8
8
  biocypher/_core.py,sha256=m4o4Szv2xY2gl3PnNAA9m7Gg5Sgd8iR9THv3RDyZlQ8,22618
9
9
  biocypher/_create.py,sha256=vpUchUdEpWupZi1LgFLxAWMtqoBwnWbP7PwEDUCBS4A,10202
10
10
  biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,4949
11
- biocypher/_get.py,sha256=3Kpky3blfNf1JwxKWLsZxTU2aTP_C4sUe8OpiyYj63I,10810
11
+ biocypher/_get.py,sha256=1FHs4n2R2k7OVWtVe7euF2J5WpsUXsFAmnpdSYuhLvY,13817
12
12
  biocypher/_logger.py,sha256=NGXe3hZA79WSujfOgpcxHBf8N2QAfrmvM1LFDpsGK2U,3185
13
13
  biocypher/_mapping.py,sha256=ERSNH2Bg19145KytxbFE4BInPaiP-LWW7osOBot29Eo,9304
14
- biocypher/_metadata.py,sha256=HTVcugUBYLbM1r3SEl-J9hiNGlPqic1zqXtMlLyTwH8,1658
15
- biocypher/_misc.py,sha256=18EG2Bei3RnyWXDWc3qtZaT3gybvXI8opi0HvSaF7Lg,6066
14
+ biocypher/_metadata.py,sha256=E4Ei48t7ZASflo58HSMUoLe8WlQnNt0d5w-68FantVA,1657
15
+ biocypher/_misc.py,sha256=oKNfmj9mUKDYtmx-R6FCZxRa7AOut3VKZZm16KFimyY,6363
16
16
  biocypher/_ontology.py,sha256=G5k-bnzvPZUqhLPxtoOPFa4OSQ4JpufgozVakLTjwLg,31789
17
17
  biocypher/_translate.py,sha256=JafvhtVaFSpruRfYh9BzjVbvDF1Mhg7LLKMDZHWkRjg,16496
18
18
  biocypher/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -33,7 +33,7 @@ biocypher/output/write/relational/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
33
33
  biocypher/output/write/relational/_csv.py,sha256=eyAtmwfCNYnuVbkpd0rUoo9KgG2KPgopZVA3X97tRLU,2919
34
34
  biocypher/output/write/relational/_postgresql.py,sha256=6sABZaELzmV7a2aUy2iRksf28WFsc3EA9mdQ2mShPeM,11959
35
35
  biocypher/output/write/relational/_sqlite.py,sha256=ozElhca1YCYq8R-VFh-LDsnPBaXVJm2cvEboBK2LVVY,2073
36
- biocypher-0.5.44.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
37
- biocypher-0.5.44.dist-info/METADATA,sha256=_a0l4S_OPALMmrpEhbKWuwtPcSzWRursqJKMEAkrZaw,10642
38
- biocypher-0.5.44.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
39
- biocypher-0.5.44.dist-info/RECORD,,
36
+ biocypher-0.6.1.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
37
+ biocypher-0.6.1.dist-info/METADATA,sha256=WR311EH67e8M_0wNaHbo88a7zU2Tl4y681GyI1fC4xY,10641
38
+ biocypher-0.6.1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
39
+ biocypher-0.6.1.dist-info/RECORD,,