geoseeq 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- geoseeq/cli/main.py +1 -1
- geoseeq/result/file_download.py +46 -7
- geoseeq/result/resumable_download_tracker.py +99 -0
- geoseeq/upload_download_manager.py +1 -1
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/METADATA +1 -1
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/RECORD +10 -9
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/LICENSE +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/WHEEL +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/top_level.txt +0 -0
geoseeq/cli/main.py
CHANGED
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.1') # remember to update setup
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
geoseeq/result/file_download.py
CHANGED
@@ -2,15 +2,22 @@
|
|
2
2
|
import urllib.request
|
3
3
|
import logging
|
4
4
|
import requests
|
5
|
-
|
5
|
+
import os
|
6
|
+
from os.path import basename, getsize, join, isfile, getmtime, dirname
|
6
7
|
from pathlib import Path
|
7
8
|
from tempfile import NamedTemporaryFile
|
8
9
|
|
9
10
|
from geoseeq.utils import download_ftp
|
10
11
|
from geoseeq.constants import FIVE_MB
|
12
|
+
from hashlib import md5
|
13
|
+
from .resumable_download_tracker import ResumableDownloadTracker
|
11
14
|
|
12
15
|
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
13
16
|
|
17
|
+
def url_to_id(url):
|
18
|
+
url = url.split("?")[0]
|
19
|
+
return md5(url.encode()).hexdigest()[:16]
|
20
|
+
|
14
21
|
|
15
22
|
def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
16
23
|
headers = None
|
@@ -20,11 +27,43 @@ def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
|
20
27
|
response.raise_for_status()
|
21
28
|
total_size_in_bytes = int(response.headers.get('content-length', 0))
|
22
29
|
if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
|
23
|
-
|
30
|
+
if total_size_in_bytes > 10 * FIVE_MB: # Use resumable download
|
31
|
+
print("Using resumable download")
|
32
|
+
return _download_resumable(response, filename, total_size_in_bytes, progress_tracker)
|
33
|
+
else:
|
34
|
+
block_size = FIVE_MB
|
35
|
+
with open(filename, 'wb') as file:
|
36
|
+
for data in response.iter_content(block_size):
|
37
|
+
if progress_tracker: progress_tracker.update(len(data))
|
38
|
+
file.write(data)
|
39
|
+
return filename
|
40
|
+
|
41
|
+
|
42
|
+
def _download_resumable(response, filename, total_size_in_bytes, progress_tracker=None, chunk_size=5 * FIVE_MB, part_prefix=".gs_download_{}_{}."):
|
43
|
+
target_id = url_to_id(response.url)
|
44
|
+
tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
|
45
|
+
if not tracker.download_started: tracker.start_download(response.url)
|
46
|
+
n_chunks = total_size_in_bytes // chunk_size
|
47
|
+
for i in range(n_chunks):
|
48
|
+
bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
|
49
|
+
if tracker.part_has_been_downloaded(i):
|
50
|
+
logger.debug(f"Part {i} has already been downloaded.")
|
51
|
+
else:
|
52
|
+
logger.debug(f"Downloading part {i} of {n_chunks - 1}")
|
53
|
+
part_filename = join(dirname(filename), part_prefix.format(i, n_chunks - 1) + basename(filename))
|
54
|
+
_download_head(response.url, part_filename, head=bytes_end, start=bytes_start, progress_tracker=None)
|
55
|
+
part_info = dict(part_number=i, start=bytes_start, end=bytes_end, part_filename=part_filename)
|
56
|
+
tracker.add_part(part_info)
|
57
|
+
if progress_tracker: progress_tracker.update(bytes_end - bytes_start + 1)
|
58
|
+
|
59
|
+
# at this point all parts have been downloaded
|
24
60
|
with open(filename, 'wb') as file:
|
25
|
-
for
|
26
|
-
|
27
|
-
|
61
|
+
for i in range(n_chunks):
|
62
|
+
part_info = tracker.get_part_info(i)
|
63
|
+
part_filename = part_info["part_filename"]
|
64
|
+
with open(part_filename, 'rb') as part_file:
|
65
|
+
file.write(part_file.read())
|
66
|
+
tracker.cleanup()
|
28
67
|
return filename
|
29
68
|
|
30
69
|
|
@@ -44,7 +83,7 @@ def guess_download_kind(url):
|
|
44
83
|
return 'generic'
|
45
84
|
|
46
85
|
|
47
|
-
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
86
|
+
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None, target_uuid=None):
|
48
87
|
"""Return a local filepath to the downloaded file. Download the file."""
|
49
88
|
if filename and isfile(filename):
|
50
89
|
file_size = getsize(filename)
|
@@ -135,7 +174,7 @@ class ResultFileDownload:
|
|
135
174
|
url = self.get_download_url()
|
136
175
|
filepath = download_url(
|
137
176
|
url, blob_type, filename,
|
138
|
-
head=head, progress_tracker=progress_tracker
|
177
|
+
head=head, progress_tracker=progress_tracker,
|
139
178
|
)
|
140
179
|
if cache and flag_suffix:
|
141
180
|
# create flag file
|
@@ -0,0 +1,99 @@
|
|
1
|
+
|
2
|
+
import time
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from os.path import basename, getsize, join, dirname, isfile, getctime
|
6
|
+
from pathlib import Path
|
7
|
+
from random import random
|
8
|
+
import requests
|
9
|
+
|
10
|
+
from geoseeq.knex import GeoseeqGeneralError
|
11
|
+
from geoseeq.constants import FIVE_MB
|
12
|
+
from geoseeq.utils import md5_checksum
|
13
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14
|
+
from .utils import *
|
15
|
+
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
16
|
+
from .file_chunker import FileChunker
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
class ResumableDownloadTracker:
|
21
|
+
|
22
|
+
def __init__(self, chunk_size, download_target_id, target_local_path, tracker_file_prefix="gs_resumable_download_tracker"):
|
23
|
+
self.open, self.download_started = True, False
|
24
|
+
self.download_target_id = download_target_id
|
25
|
+
self.target_local_path = target_local_path
|
26
|
+
self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'download')
|
27
|
+
self.tracker_file = join(
|
28
|
+
self.tracker_file_dir,
|
29
|
+
tracker_file_prefix + f".{download_target_id}.{chunk_size}." + basename(target_local_path)
|
30
|
+
)
|
31
|
+
try:
|
32
|
+
os.makedirs(self.tracker_file_dir, exist_ok=True)
|
33
|
+
except Exception as e:
|
34
|
+
logger.warning(f'Could not create resumable download tracker directory. {e}')
|
35
|
+
self.open = False
|
36
|
+
self._loaded_parts = {}
|
37
|
+
self._load_parts_from_file()
|
38
|
+
|
39
|
+
def start_download(self, download_url):
|
40
|
+
if not self.open:
|
41
|
+
return
|
42
|
+
if self.download_started:
|
43
|
+
raise GeoseeqGeneralError("Download has already started.")
|
44
|
+
self.download_started = True
|
45
|
+
blob = dict(download_url=download_url,
|
46
|
+
download_target_id=self.download_target_id,
|
47
|
+
start_time=time.time())
|
48
|
+
serialized = json.dumps(blob)
|
49
|
+
with open(self.tracker_file, "w") as f:
|
50
|
+
f.write(serialized + "\n")
|
51
|
+
self.download_url = download_url
|
52
|
+
return self
|
53
|
+
|
54
|
+
def add_part(self, part_download_info):
|
55
|
+
if not self.open:
|
56
|
+
assert False, "Cannot add part to closed ResumableDownloadTracker"
|
57
|
+
part_id = part_download_info["part_number"]
|
58
|
+
serialized = json.dumps(part_download_info)
|
59
|
+
with open(self.tracker_file, "a") as f:
|
60
|
+
f.write(serialized + "\n")
|
61
|
+
self._loaded_parts[part_id] = part_download_info
|
62
|
+
|
63
|
+
def _load_parts_from_file(self):
|
64
|
+
if not isfile(self.tracker_file):
|
65
|
+
return
|
66
|
+
with open(self.tracker_file, "r") as f:
|
67
|
+
header_blob = json.loads(f.readline())
|
68
|
+
self.download_url = header_blob["download_url"]
|
69
|
+
start_time = header_blob["start_time"] # for now we don't expire resumable downloads
|
70
|
+
self.download_started = True
|
71
|
+
for line in f:
|
72
|
+
part_info = json.loads(line)
|
73
|
+
part_id = part_info["part_number"]
|
74
|
+
self._loaded_parts[part_id] = part_info
|
75
|
+
|
76
|
+
def part_has_been_downloaded(self, part_number):
|
77
|
+
if not self.open:
|
78
|
+
return False
|
79
|
+
if part_number not in self._loaded_parts:
|
80
|
+
return False
|
81
|
+
part_info = self._loaded_parts[part_number]
|
82
|
+
part_path = part_info["part_filename"]
|
83
|
+
return isfile(part_path)
|
84
|
+
|
85
|
+
def get_part_info(self, part_number):
|
86
|
+
if not self.open:
|
87
|
+
return None
|
88
|
+
return self._loaded_parts.get(part_number, None)
|
89
|
+
|
90
|
+
def cleanup(self):
|
91
|
+
if not self.open:
|
92
|
+
return
|
93
|
+
for part in self._loaded_parts.values():
|
94
|
+
part_path = part["part_filename"]
|
95
|
+
if isfile(part_path):
|
96
|
+
os.remove(part_path)
|
97
|
+
os.remove(self.tracker_file)
|
98
|
+
self.open = False
|
99
|
+
|
@@ -194,7 +194,7 @@ class GeoSeeqDownloadManager:
|
|
194
194
|
self._convert_result_files_to_urls()
|
195
195
|
download_args = [(
|
196
196
|
url, file_path,
|
197
|
-
self.progress_tracker_factory(
|
197
|
+
self.progress_tracker_factory(file_path),
|
198
198
|
self.ignore_errors, self.head, self.log_level,
|
199
199
|
self.n_parallel_downloads > 1
|
200
200
|
) for url, file_path in self._result_files]
|
@@ -11,7 +11,7 @@ geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
|
|
11
11
|
geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
|
12
12
|
geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
|
-
geoseeq/upload_download_manager.py,sha256=
|
14
|
+
geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
16
16
|
geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
|
17
17
|
geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
|
22
22
|
geoseeq/cli/download.py,sha256=N_Wrg9d1kY9eJ6C1l0xc_YFjiri8gkXBo9JiuHx9xxE,17766
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=omW-_xyCEL-rKyELtNu84Ofi-L4hm_17udQ4X6blr4I,3791
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
@@ -66,10 +66,11 @@ geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEY
|
|
66
66
|
geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
|
67
67
|
geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
|
68
68
|
geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08w,1758
|
69
|
-
geoseeq/result/file_download.py,sha256=
|
69
|
+
geoseeq/result/file_download.py,sha256=2VFy_p20VxAu1ItNNM1PBcDKSp9dhRuyOhcb5UBwYEU,7805
|
70
70
|
geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
|
71
71
|
geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
|
72
72
|
geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
|
73
|
+
geoseeq/result/resumable_download_tracker.py,sha256=YEzqHBBnE7L3XokTvlTAhHZ8TcDTIE_pyTQ7YadOfbU,3667
|
73
74
|
geoseeq/result/resumable_upload_tracker.py,sha256=2aI09gYz2yw63jEXqs8lmCRKQ79TIc3YuPETvP0Jeek,3811
|
74
75
|
geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
|
75
76
|
geoseeq/vc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -84,9 +85,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
84
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
86
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
86
87
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
87
|
-
geoseeq-0.6.
|
88
|
-
geoseeq-0.6.
|
89
|
-
geoseeq-0.6.
|
90
|
-
geoseeq-0.6.
|
91
|
-
geoseeq-0.6.
|
92
|
-
geoseeq-0.6.
|
88
|
+
geoseeq-0.6.1.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
89
|
+
geoseeq-0.6.1.dist-info/METADATA,sha256=aSgyentw6vNb53rZD6IlBy-ZlPSjPRGQK-dq9QnY1no,4803
|
90
|
+
geoseeq-0.6.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
91
|
+
geoseeq-0.6.1.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
92
|
+
geoseeq-0.6.1.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
93
|
+
geoseeq-0.6.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|