biocypher 0.5.44__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/_get.py +165 -74
- biocypher/_metadata.py +1 -1
- biocypher/_misc.py +16 -0
- {biocypher-0.5.44.dist-info → biocypher-0.6.0.dist-info}/METADATA +1 -1
- {biocypher-0.5.44.dist-info → biocypher-0.6.0.dist-info}/RECORD +7 -7
- {biocypher-0.5.44.dist-info → biocypher-0.6.0.dist-info}/LICENSE +0 -0
- {biocypher-0.5.44.dist-info → biocypher-0.6.0.dist-info}/WHEEL +0 -0
biocypher/_get.py
CHANGED
|
@@ -17,10 +17,13 @@ from __future__ import annotations
|
|
|
17
17
|
from typing import Optional
|
|
18
18
|
import shutil
|
|
19
19
|
|
|
20
|
+
import requests
|
|
21
|
+
|
|
20
22
|
from ._logger import logger
|
|
21
23
|
|
|
22
24
|
logger.debug(f"Loading module {__name__}.")
|
|
23
25
|
|
|
26
|
+
from abc import ABC
|
|
24
27
|
from datetime import datetime, timedelta
|
|
25
28
|
from tempfile import TemporaryDirectory
|
|
26
29
|
import os
|
|
@@ -29,21 +32,22 @@ import ftplib
|
|
|
29
32
|
|
|
30
33
|
import pooch
|
|
31
34
|
|
|
32
|
-
from ._misc import to_list
|
|
35
|
+
from ._misc import to_list, is_nested
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
class Resource:
|
|
38
|
+
class Resource(ABC):
|
|
36
39
|
def __init__(
|
|
37
40
|
self,
|
|
38
41
|
name: str,
|
|
39
42
|
url_s: str | list[str],
|
|
40
43
|
lifetime: int = 0,
|
|
41
|
-
is_dir: bool = False,
|
|
42
44
|
):
|
|
43
45
|
"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
46
|
+
|
|
47
|
+
A Resource is a file, a list of files, an API request, or a list of API
|
|
48
|
+
requests, any of which can be downloaded from the given URL(s) and
|
|
49
|
+
cached locally. This class implements checks of the minimum requirements
|
|
50
|
+
for a resource, to be implemented by a biocypher adapter.
|
|
47
51
|
|
|
48
52
|
Args:
|
|
49
53
|
name (str): The name of the resource.
|
|
@@ -52,43 +56,83 @@ class Resource:
|
|
|
52
56
|
|
|
53
57
|
lifetime (int): The lifetime of the resource in days. If 0, the
|
|
54
58
|
resource is considered to be permanent.
|
|
55
|
-
|
|
56
|
-
is_dir (bool): Whether the resource is a directory or not.
|
|
57
59
|
"""
|
|
58
60
|
self.name = name
|
|
59
61
|
self.url_s = url_s
|
|
60
62
|
self.lifetime = lifetime
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FileDownload(Resource):
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
name: str,
|
|
69
|
+
url_s: str | list[str],
|
|
70
|
+
lifetime: int = 0,
|
|
71
|
+
is_dir: bool = False,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Represents basic information for a File Download.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
name(str): The name of the File Download.
|
|
78
|
+
|
|
79
|
+
url_s(str|list[str]): The URL(s) of the File Download.
|
|
80
|
+
|
|
81
|
+
lifetime(int): The lifetime of the File Download in days. If 0, the
|
|
82
|
+
File Download is cached indefinitely.
|
|
83
|
+
|
|
84
|
+
is_dir (bool): Whether the URL points to a directory or not.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
super().__init__(name, url_s, lifetime)
|
|
61
88
|
self.is_dir = is_dir
|
|
62
89
|
|
|
63
90
|
|
|
91
|
+
class APIRequest(Resource):
|
|
92
|
+
def __init__(self, name: str, url_s: str | list[str], lifetime: int = 0):
|
|
93
|
+
"""
|
|
94
|
+
Represents basic information for an API Request.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
name(str): The name of the API Request.
|
|
98
|
+
|
|
99
|
+
url_s(str|list): The URL of the API endpoint.
|
|
100
|
+
|
|
101
|
+
lifetime(int): The lifetime of the API Request in days. If 0, the
|
|
102
|
+
API Request is cached indefinitely.
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
super().__init__(name, url_s, lifetime)
|
|
106
|
+
|
|
107
|
+
|
|
64
108
|
class Downloader:
|
|
65
109
|
def __init__(self, cache_dir: Optional[str] = None) -> None:
|
|
66
110
|
"""
|
|
67
|
-
|
|
111
|
+
The Downloader is a class that manages resources that can be downloaded
|
|
68
112
|
and cached locally. It manages the lifetime of downloaded resources by
|
|
69
113
|
keeping a JSON record of the download date of each resource.
|
|
70
114
|
|
|
71
115
|
Args:
|
|
72
116
|
cache_dir (str): The directory where the resources are cached. If
|
|
73
117
|
not given, a temporary directory is created.
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
Downloader: The downloader object.
|
|
77
118
|
"""
|
|
78
119
|
self.cache_dir = cache_dir or TemporaryDirectory().name
|
|
79
120
|
self.cache_file = os.path.join(self.cache_dir, "cache.json")
|
|
80
121
|
self.cache_dict = self._load_cache_dict()
|
|
81
122
|
|
|
82
|
-
# download function that accepts a resource or a list of resources
|
|
83
123
|
def download(self, *resources: Resource):
|
|
84
124
|
"""
|
|
85
|
-
Download one or multiple resources.
|
|
125
|
+
Download one or multiple resources. Load from cache if the resource is
|
|
126
|
+
already downloaded and the cache is not expired.
|
|
86
127
|
|
|
87
128
|
Args:
|
|
88
|
-
resources (Resource): The resource or
|
|
129
|
+
resources (Resource): The resource(s) to download or load from
|
|
130
|
+
cache.
|
|
89
131
|
|
|
90
132
|
Returns:
|
|
91
|
-
str
|
|
133
|
+
list[str]: The path or paths to the resource(s) that were downloaded
|
|
134
|
+
or loaded from cache.
|
|
135
|
+
|
|
92
136
|
"""
|
|
93
137
|
paths = []
|
|
94
138
|
for resource in resources:
|
|
@@ -106,16 +150,27 @@ class Downloader:
|
|
|
106
150
|
|
|
107
151
|
Args:
|
|
108
152
|
resource (Resource): The resource to download.
|
|
109
|
-
|
|
110
153
|
Returns:
|
|
111
|
-
str
|
|
154
|
+
list[str]: The path or paths to the downloaded resource(s).
|
|
155
|
+
|
|
156
|
+
|
|
112
157
|
"""
|
|
113
158
|
expired = self._is_cache_expired(resource)
|
|
114
159
|
|
|
115
160
|
if expired or not cache:
|
|
116
|
-
self.
|
|
117
|
-
|
|
118
|
-
|
|
161
|
+
self._delete_expired_cache(resource)
|
|
162
|
+
if isinstance(resource, FileDownload):
|
|
163
|
+
logger.info(f"Asking for download of resource {resource.name}.")
|
|
164
|
+
paths = self._download_files(cache, resource)
|
|
165
|
+
elif isinstance(resource, APIRequest):
|
|
166
|
+
logger.info(
|
|
167
|
+
f"Asking for download of api request {resource.name}."
|
|
168
|
+
)
|
|
169
|
+
paths = self._download_api_request(resource)
|
|
170
|
+
|
|
171
|
+
else:
|
|
172
|
+
raise TypeError(f"Unknown resource type: {type(resource)}")
|
|
173
|
+
|
|
119
174
|
else:
|
|
120
175
|
paths = self.get_cached_version(resource)
|
|
121
176
|
self._update_cache_record(resource)
|
|
@@ -123,13 +178,14 @@ class Downloader:
|
|
|
123
178
|
|
|
124
179
|
def _is_cache_expired(self, resource: Resource) -> bool:
|
|
125
180
|
"""
|
|
126
|
-
Check if resource cache is expired.
|
|
181
|
+
Check if resource or API request cache is expired.
|
|
127
182
|
|
|
128
183
|
Args:
|
|
129
|
-
|
|
184
|
+
|
|
185
|
+
resource (Resource): The resource or API request to download.
|
|
130
186
|
|
|
131
187
|
Returns:
|
|
132
|
-
bool: cache is expired
|
|
188
|
+
bool: True if cache is expired, False if not.
|
|
133
189
|
"""
|
|
134
190
|
cache_record = self._get_cache_record(resource)
|
|
135
191
|
if cache_record:
|
|
@@ -142,65 +198,116 @@ class Downloader:
|
|
|
142
198
|
expired = True
|
|
143
199
|
return expired
|
|
144
200
|
|
|
145
|
-
def
|
|
146
|
-
|
|
147
|
-
if os.path.exists(
|
|
148
|
-
|
|
201
|
+
def _delete_expired_cache(self, resource: Resource):
|
|
202
|
+
cache_resource_path = self.cache_dir + "/" + resource.name
|
|
203
|
+
if os.path.exists(cache_resource_path) and os.path.isdir(
|
|
204
|
+
cache_resource_path
|
|
149
205
|
):
|
|
150
|
-
shutil.rmtree(
|
|
206
|
+
shutil.rmtree(cache_resource_path)
|
|
151
207
|
|
|
152
|
-
def
|
|
153
|
-
"""
|
|
208
|
+
def _download_files(self, cache, file_download: FileDownload):
|
|
209
|
+
"""
|
|
210
|
+
Download a resource given it is a file or a directory and return the
|
|
211
|
+
path.
|
|
154
212
|
|
|
155
213
|
Args:
|
|
156
214
|
cache (bool): Whether to cache the resource or not.
|
|
157
|
-
|
|
215
|
+
file_download (FileDownload): The resource to download.
|
|
158
216
|
|
|
159
217
|
Returns:
|
|
160
|
-
str
|
|
218
|
+
list[str]: The path or paths to the downloaded resource(s).
|
|
161
219
|
"""
|
|
162
|
-
if
|
|
163
|
-
files = self._get_files(
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
220
|
+
if file_download.is_dir:
|
|
221
|
+
files = self._get_files(file_download)
|
|
222
|
+
file_download.url_s = [
|
|
223
|
+
file_download.url_s + "/" + file for file in files
|
|
224
|
+
]
|
|
225
|
+
file_download.is_dir = False
|
|
226
|
+
paths = self._download_or_cache(file_download, cache)
|
|
227
|
+
elif isinstance(file_download.url_s, list):
|
|
168
228
|
paths = []
|
|
169
|
-
for url in
|
|
170
|
-
fname = url[url.rfind("/") + 1 :]
|
|
229
|
+
for url in file_download.url_s:
|
|
230
|
+
fname = url[url.rfind("/") + 1 :].split("?")[0]
|
|
171
231
|
paths.append(
|
|
172
232
|
self._retrieve(
|
|
173
233
|
url=url,
|
|
174
234
|
fname=fname,
|
|
175
|
-
path=os.path.join(self.cache_dir,
|
|
235
|
+
path=os.path.join(self.cache_dir, file_download.name),
|
|
176
236
|
)
|
|
177
237
|
)
|
|
178
238
|
else:
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
239
|
+
paths = []
|
|
240
|
+
fname = file_download.url_s[
|
|
241
|
+
file_download.url_s.rfind("/") + 1 :
|
|
242
|
+
].split("?")[0]
|
|
243
|
+
results = self._retrieve(
|
|
244
|
+
url=file_download.url_s,
|
|
182
245
|
fname=fname,
|
|
183
|
-
path=os.path.join(self.cache_dir,
|
|
246
|
+
path=os.path.join(self.cache_dir, file_download.name),
|
|
184
247
|
)
|
|
248
|
+
if isinstance(results, list):
|
|
249
|
+
paths.extend(results)
|
|
250
|
+
else:
|
|
251
|
+
paths.append(results)
|
|
252
|
+
|
|
185
253
|
# sometimes a compressed file contains multiple files
|
|
186
254
|
# TODO ask for a list of files in the archive to be used from the
|
|
187
255
|
# adapter
|
|
188
256
|
return paths
|
|
189
257
|
|
|
190
|
-
def
|
|
258
|
+
def _download_api_request(self, api_request: APIRequest):
|
|
259
|
+
"""
|
|
260
|
+
Download an API request and return the path.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
api_request(APIRequest): The API request result that is being
|
|
264
|
+
cached.
|
|
265
|
+
Returns:
|
|
266
|
+
list[str]: The path to the cached API request.
|
|
267
|
+
|
|
268
|
+
"""
|
|
269
|
+
urls = (
|
|
270
|
+
api_request.url_s
|
|
271
|
+
if isinstance(api_request.url_s, list)
|
|
272
|
+
else [api_request.url_s]
|
|
273
|
+
)
|
|
274
|
+
paths = []
|
|
275
|
+
for url in urls:
|
|
276
|
+
fname = url[url.rfind("/") + 1 :].rsplit(".", 1)[0]
|
|
277
|
+
logger.info(
|
|
278
|
+
f"Asking for caching API of {api_request.name} {fname}."
|
|
279
|
+
)
|
|
280
|
+
response = requests.get(url=url)
|
|
281
|
+
|
|
282
|
+
if response.status_code != 200:
|
|
283
|
+
response.raise_for_status()
|
|
284
|
+
response_data = response.json()
|
|
285
|
+
api_path = os.path.join(
|
|
286
|
+
self.cache_dir, api_request.name, f"{fname}.json"
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
os.makedirs(os.path.dirname(api_path), exist_ok=True)
|
|
290
|
+
with open(api_path, "w") as f:
|
|
291
|
+
json.dump(response_data, f)
|
|
292
|
+
logger.info(f"Caching API request to {api_path}.")
|
|
293
|
+
paths.append(api_path)
|
|
294
|
+
return paths
|
|
295
|
+
|
|
296
|
+
def get_cached_version(self, resource: Resource) -> list[str]:
|
|
191
297
|
"""Get the cached version of a resource.
|
|
192
298
|
|
|
193
299
|
Args:
|
|
194
|
-
resource
|
|
300
|
+
resource(Resource): The resource to get the cached version of.
|
|
195
301
|
|
|
196
302
|
Returns:
|
|
197
303
|
list[str]: The paths to the cached resource(s).
|
|
304
|
+
|
|
198
305
|
"""
|
|
199
|
-
|
|
200
|
-
logger.info(f"Use cached version from {
|
|
306
|
+
cached_location = os.path.join(self.cache_dir, resource.name)
|
|
307
|
+
logger.info(f"Use cached version from {cached_location}.")
|
|
201
308
|
paths = []
|
|
202
|
-
for file in os.listdir(
|
|
203
|
-
paths.append(os.path.join(
|
|
309
|
+
for file in os.listdir(cached_location):
|
|
310
|
+
paths.append(os.path.join(cached_location, file))
|
|
204
311
|
return paths
|
|
205
312
|
|
|
206
313
|
def _retrieve(
|
|
@@ -260,23 +367,23 @@ class Downloader:
|
|
|
260
367
|
progressbar=True,
|
|
261
368
|
)
|
|
262
369
|
|
|
263
|
-
def _get_files(self,
|
|
370
|
+
def _get_files(self, file_download: FileDownload):
|
|
264
371
|
"""
|
|
265
|
-
Get the files contained in a directory
|
|
372
|
+
Get the files contained in a directory file.
|
|
266
373
|
|
|
267
374
|
Args:
|
|
268
|
-
|
|
375
|
+
file_download (FileDownload): The directory file.
|
|
269
376
|
|
|
270
377
|
Returns:
|
|
271
378
|
list: The files contained in the directory.
|
|
272
379
|
"""
|
|
273
|
-
if
|
|
380
|
+
if file_download.url_s.startswith("ftp://"):
|
|
274
381
|
# remove protocol
|
|
275
|
-
url =
|
|
382
|
+
url = file_download.url_s[6:]
|
|
276
383
|
# get base url
|
|
277
384
|
url = url[: url.find("/")]
|
|
278
385
|
# get directory (remove initial slash as well)
|
|
279
|
-
dir =
|
|
386
|
+
dir = file_download.url_s[7 + len(url) :]
|
|
280
387
|
# get files
|
|
281
388
|
ftp = ftplib.FTP(url)
|
|
282
389
|
ftp.login()
|
|
@@ -334,19 +441,3 @@ class Downloader:
|
|
|
334
441
|
self.cache_dict[resource.name] = cache_record
|
|
335
442
|
with open(self.cache_file, "w") as f:
|
|
336
443
|
json.dump(self.cache_dict, f, default=str)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
def is_nested(lst):
|
|
340
|
-
"""
|
|
341
|
-
Check if a list is nested.
|
|
342
|
-
|
|
343
|
-
Args:
|
|
344
|
-
lst (list): The list to check.
|
|
345
|
-
|
|
346
|
-
Returns:
|
|
347
|
-
bool: True if the list is nested, False otherwise.
|
|
348
|
-
"""
|
|
349
|
-
for item in lst:
|
|
350
|
-
if isinstance(item, list):
|
|
351
|
-
return True
|
|
352
|
-
return False
|
biocypher/_metadata.py
CHANGED
biocypher/_misc.py
CHANGED
|
@@ -246,3 +246,19 @@ def to_lower_sentence_case(s: str) -> str:
|
|
|
246
246
|
return pascalcase_to_sentencecase(s)
|
|
247
247
|
else:
|
|
248
248
|
return s
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def is_nested(lst) -> bool:
|
|
252
|
+
"""
|
|
253
|
+
Check if a list is nested.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
lst (list): The list to check.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
bool: True if the list is nested, False otherwise.
|
|
260
|
+
"""
|
|
261
|
+
for item in lst:
|
|
262
|
+
if isinstance(item, list):
|
|
263
|
+
return True
|
|
264
|
+
return False
|
|
@@ -8,11 +8,11 @@ biocypher/_config/test_schema_config_extended.yaml,sha256=wn3A76142hhjnImhMF6ROD
|
|
|
8
8
|
biocypher/_core.py,sha256=m4o4Szv2xY2gl3PnNAA9m7Gg5Sgd8iR9THv3RDyZlQ8,22618
|
|
9
9
|
biocypher/_create.py,sha256=vpUchUdEpWupZi1LgFLxAWMtqoBwnWbP7PwEDUCBS4A,10202
|
|
10
10
|
biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,4949
|
|
11
|
-
biocypher/_get.py,sha256=
|
|
11
|
+
biocypher/_get.py,sha256=mOfne16yeAkFQidvoFprrpQjN6CZgS68pQi2BbOJ_-U,13843
|
|
12
12
|
biocypher/_logger.py,sha256=NGXe3hZA79WSujfOgpcxHBf8N2QAfrmvM1LFDpsGK2U,3185
|
|
13
13
|
biocypher/_mapping.py,sha256=ERSNH2Bg19145KytxbFE4BInPaiP-LWW7osOBot29Eo,9304
|
|
14
|
-
biocypher/_metadata.py,sha256=
|
|
15
|
-
biocypher/_misc.py,sha256=
|
|
14
|
+
biocypher/_metadata.py,sha256=A-d_UprHx3ljxyziSQFgk-ts7MsDOnXTGdMnSRERwcw,1657
|
|
15
|
+
biocypher/_misc.py,sha256=oKNfmj9mUKDYtmx-R6FCZxRa7AOut3VKZZm16KFimyY,6363
|
|
16
16
|
biocypher/_ontology.py,sha256=G5k-bnzvPZUqhLPxtoOPFa4OSQ4JpufgozVakLTjwLg,31789
|
|
17
17
|
biocypher/_translate.py,sha256=JafvhtVaFSpruRfYh9BzjVbvDF1Mhg7LLKMDZHWkRjg,16496
|
|
18
18
|
biocypher/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -33,7 +33,7 @@ biocypher/output/write/relational/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
33
33
|
biocypher/output/write/relational/_csv.py,sha256=eyAtmwfCNYnuVbkpd0rUoo9KgG2KPgopZVA3X97tRLU,2919
|
|
34
34
|
biocypher/output/write/relational/_postgresql.py,sha256=6sABZaELzmV7a2aUy2iRksf28WFsc3EA9mdQ2mShPeM,11959
|
|
35
35
|
biocypher/output/write/relational/_sqlite.py,sha256=ozElhca1YCYq8R-VFh-LDsnPBaXVJm2cvEboBK2LVVY,2073
|
|
36
|
-
biocypher-0.
|
|
37
|
-
biocypher-0.
|
|
38
|
-
biocypher-0.
|
|
39
|
-
biocypher-0.
|
|
36
|
+
biocypher-0.6.0.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
|
|
37
|
+
biocypher-0.6.0.dist-info/METADATA,sha256=k1r_9Unas2OGWVwcnWQq5Fs6lWDeVU_ApevfEqx1ddM,10641
|
|
38
|
+
biocypher-0.6.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
39
|
+
biocypher-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|