geoseeq 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoseeq/cli/main.py +1 -1
- geoseeq/result/file_download.py +46 -7
- geoseeq/result/resumable_download_tracker.py +99 -0
- geoseeq/upload_download_manager.py +1 -1
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/METADATA +1 -1
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/RECORD +10 -9
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/LICENSE +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/WHEEL +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.1.dist-info}/top_level.txt +0 -0
geoseeq/cli/main.py
CHANGED
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.1') # remember to update setup
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
geoseeq/result/file_download.py
CHANGED
@@ -2,15 +2,22 @@
|
|
2
2
|
import urllib.request
|
3
3
|
import logging
|
4
4
|
import requests
|
5
|
-
|
5
|
+
import os
|
6
|
+
from os.path import basename, getsize, join, isfile, getmtime, dirname
|
6
7
|
from pathlib import Path
|
7
8
|
from tempfile import NamedTemporaryFile
|
8
9
|
|
9
10
|
from geoseeq.utils import download_ftp
|
10
11
|
from geoseeq.constants import FIVE_MB
|
12
|
+
from hashlib import md5
|
13
|
+
from .resumable_download_tracker import ResumableDownloadTracker
|
11
14
|
|
12
15
|
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
13
16
|
|
17
|
+
def url_to_id(url):
|
18
|
+
url = url.split("?")[0]
|
19
|
+
return md5(url.encode()).hexdigest()[:16]
|
20
|
+
|
14
21
|
|
15
22
|
def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
16
23
|
headers = None
|
@@ -20,11 +27,43 @@ def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
|
20
27
|
response.raise_for_status()
|
21
28
|
total_size_in_bytes = int(response.headers.get('content-length', 0))
|
22
29
|
if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
|
23
|
-
|
30
|
+
if total_size_in_bytes > 10 * FIVE_MB: # Use resumable download
|
31
|
+
print("Using resumable download")
|
32
|
+
return _download_resumable(response, filename, total_size_in_bytes, progress_tracker)
|
33
|
+
else:
|
34
|
+
block_size = FIVE_MB
|
35
|
+
with open(filename, 'wb') as file:
|
36
|
+
for data in response.iter_content(block_size):
|
37
|
+
if progress_tracker: progress_tracker.update(len(data))
|
38
|
+
file.write(data)
|
39
|
+
return filename
|
40
|
+
|
41
|
+
|
42
|
+
def _download_resumable(response, filename, total_size_in_bytes, progress_tracker=None, chunk_size=5 * FIVE_MB, part_prefix=".gs_download_{}_{}."):
|
43
|
+
target_id = url_to_id(response.url)
|
44
|
+
tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
|
45
|
+
if not tracker.download_started: tracker.start_download(response.url)
|
46
|
+
n_chunks = total_size_in_bytes // chunk_size
|
47
|
+
for i in range(n_chunks):
|
48
|
+
bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
|
49
|
+
if tracker.part_has_been_downloaded(i):
|
50
|
+
logger.debug(f"Part {i} has already been downloaded.")
|
51
|
+
else:
|
52
|
+
logger.debug(f"Downloading part {i} of {n_chunks - 1}")
|
53
|
+
part_filename = join(dirname(filename), part_prefix.format(i, n_chunks - 1) + basename(filename))
|
54
|
+
_download_head(response.url, part_filename, head=bytes_end, start=bytes_start, progress_tracker=None)
|
55
|
+
part_info = dict(part_number=i, start=bytes_start, end=bytes_end, part_filename=part_filename)
|
56
|
+
tracker.add_part(part_info)
|
57
|
+
if progress_tracker: progress_tracker.update(bytes_end - bytes_start + 1)
|
58
|
+
|
59
|
+
# at this point all parts have been downloaded
|
24
60
|
with open(filename, 'wb') as file:
|
25
|
-
for
|
26
|
-
|
27
|
-
|
61
|
+
for i in range(n_chunks):
|
62
|
+
part_info = tracker.get_part_info(i)
|
63
|
+
part_filename = part_info["part_filename"]
|
64
|
+
with open(part_filename, 'rb') as part_file:
|
65
|
+
file.write(part_file.read())
|
66
|
+
tracker.cleanup()
|
28
67
|
return filename
|
29
68
|
|
30
69
|
|
@@ -44,7 +83,7 @@ def guess_download_kind(url):
|
|
44
83
|
return 'generic'
|
45
84
|
|
46
85
|
|
47
|
-
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
86
|
+
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None, target_uuid=None):
|
48
87
|
"""Return a local filepath to the downloaded file. Download the file."""
|
49
88
|
if filename and isfile(filename):
|
50
89
|
file_size = getsize(filename)
|
@@ -135,7 +174,7 @@ class ResultFileDownload:
|
|
135
174
|
url = self.get_download_url()
|
136
175
|
filepath = download_url(
|
137
176
|
url, blob_type, filename,
|
138
|
-
head=head, progress_tracker=progress_tracker
|
177
|
+
head=head, progress_tracker=progress_tracker,
|
139
178
|
)
|
140
179
|
if cache and flag_suffix:
|
141
180
|
# create flag file
|
@@ -0,0 +1,99 @@
|
|
1
|
+
|
2
|
+
import time
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from os.path import basename, getsize, join, dirname, isfile, getctime
|
6
|
+
from pathlib import Path
|
7
|
+
from random import random
|
8
|
+
import requests
|
9
|
+
|
10
|
+
from geoseeq.knex import GeoseeqGeneralError
|
11
|
+
from geoseeq.constants import FIVE_MB
|
12
|
+
from geoseeq.utils import md5_checksum
|
13
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14
|
+
from .utils import *
|
15
|
+
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
16
|
+
from .file_chunker import FileChunker
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
class ResumableDownloadTracker:
|
21
|
+
|
22
|
+
def __init__(self, chunk_size, download_target_id, target_local_path, tracker_file_prefix="gs_resumable_download_tracker"):
|
23
|
+
self.open, self.download_started = True, False
|
24
|
+
self.download_target_id = download_target_id
|
25
|
+
self.target_local_path = target_local_path
|
26
|
+
self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'download')
|
27
|
+
self.tracker_file = join(
|
28
|
+
self.tracker_file_dir,
|
29
|
+
tracker_file_prefix + f".{download_target_id}.{chunk_size}." + basename(target_local_path)
|
30
|
+
)
|
31
|
+
try:
|
32
|
+
os.makedirs(self.tracker_file_dir, exist_ok=True)
|
33
|
+
except Exception as e:
|
34
|
+
logger.warning(f'Could not create resumable download tracker directory. {e}')
|
35
|
+
self.open = False
|
36
|
+
self._loaded_parts = {}
|
37
|
+
self._load_parts_from_file()
|
38
|
+
|
39
|
+
def start_download(self, download_url):
|
40
|
+
if not self.open:
|
41
|
+
return
|
42
|
+
if self.download_started:
|
43
|
+
raise GeoseeqGeneralError("Download has already started.")
|
44
|
+
self.download_started = True
|
45
|
+
blob = dict(download_url=download_url,
|
46
|
+
download_target_id=self.download_target_id,
|
47
|
+
start_time=time.time())
|
48
|
+
serialized = json.dumps(blob)
|
49
|
+
with open(self.tracker_file, "w") as f:
|
50
|
+
f.write(serialized + "\n")
|
51
|
+
self.download_url = download_url
|
52
|
+
return self
|
53
|
+
|
54
|
+
def add_part(self, part_download_info):
|
55
|
+
if not self.open:
|
56
|
+
assert False, "Cannot add part to closed ResumableDownloadTracker"
|
57
|
+
part_id = part_download_info["part_number"]
|
58
|
+
serialized = json.dumps(part_download_info)
|
59
|
+
with open(self.tracker_file, "a") as f:
|
60
|
+
f.write(serialized + "\n")
|
61
|
+
self._loaded_parts[part_id] = part_download_info
|
62
|
+
|
63
|
+
def _load_parts_from_file(self):
|
64
|
+
if not isfile(self.tracker_file):
|
65
|
+
return
|
66
|
+
with open(self.tracker_file, "r") as f:
|
67
|
+
header_blob = json.loads(f.readline())
|
68
|
+
self.download_url = header_blob["download_url"]
|
69
|
+
start_time = header_blob["start_time"] # for now we don't expire resumable downloads
|
70
|
+
self.download_started = True
|
71
|
+
for line in f:
|
72
|
+
part_info = json.loads(line)
|
73
|
+
part_id = part_info["part_number"]
|
74
|
+
self._loaded_parts[part_id] = part_info
|
75
|
+
|
76
|
+
def part_has_been_downloaded(self, part_number):
|
77
|
+
if not self.open:
|
78
|
+
return False
|
79
|
+
if part_number not in self._loaded_parts:
|
80
|
+
return False
|
81
|
+
part_info = self._loaded_parts[part_number]
|
82
|
+
part_path = part_info["part_filename"]
|
83
|
+
return isfile(part_path)
|
84
|
+
|
85
|
+
def get_part_info(self, part_number):
|
86
|
+
if not self.open:
|
87
|
+
return None
|
88
|
+
return self._loaded_parts.get(part_number, None)
|
89
|
+
|
90
|
+
def cleanup(self):
|
91
|
+
if not self.open:
|
92
|
+
return
|
93
|
+
for part in self._loaded_parts.values():
|
94
|
+
part_path = part["part_filename"]
|
95
|
+
if isfile(part_path):
|
96
|
+
os.remove(part_path)
|
97
|
+
os.remove(self.tracker_file)
|
98
|
+
self.open = False
|
99
|
+
|
@@ -194,7 +194,7 @@ class GeoSeeqDownloadManager:
|
|
194
194
|
self._convert_result_files_to_urls()
|
195
195
|
download_args = [(
|
196
196
|
url, file_path,
|
197
|
-
self.progress_tracker_factory(
|
197
|
+
self.progress_tracker_factory(file_path),
|
198
198
|
self.ignore_errors, self.head, self.log_level,
|
199
199
|
self.n_parallel_downloads > 1
|
200
200
|
) for url, file_path in self._result_files]
|
@@ -11,7 +11,7 @@ geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
|
|
11
11
|
geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
|
12
12
|
geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
|
-
geoseeq/upload_download_manager.py,sha256=
|
14
|
+
geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
16
16
|
geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
|
17
17
|
geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
|
22
22
|
geoseeq/cli/download.py,sha256=N_Wrg9d1kY9eJ6C1l0xc_YFjiri8gkXBo9JiuHx9xxE,17766
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=omW-_xyCEL-rKyELtNu84Ofi-L4hm_17udQ4X6blr4I,3791
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
@@ -66,10 +66,11 @@ geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEY
|
|
66
66
|
geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
|
67
67
|
geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
|
68
68
|
geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08w,1758
|
69
|
-
geoseeq/result/file_download.py,sha256=
|
69
|
+
geoseeq/result/file_download.py,sha256=2VFy_p20VxAu1ItNNM1PBcDKSp9dhRuyOhcb5UBwYEU,7805
|
70
70
|
geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
|
71
71
|
geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
|
72
72
|
geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
|
73
|
+
geoseeq/result/resumable_download_tracker.py,sha256=YEzqHBBnE7L3XokTvlTAhHZ8TcDTIE_pyTQ7YadOfbU,3667
|
73
74
|
geoseeq/result/resumable_upload_tracker.py,sha256=2aI09gYz2yw63jEXqs8lmCRKQ79TIc3YuPETvP0Jeek,3811
|
74
75
|
geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
|
75
76
|
geoseeq/vc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -84,9 +85,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
84
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
86
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
86
87
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
87
|
-
geoseeq-0.6.
|
88
|
-
geoseeq-0.6.
|
89
|
-
geoseeq-0.6.
|
90
|
-
geoseeq-0.6.
|
91
|
-
geoseeq-0.6.
|
92
|
-
geoseeq-0.6.
|
88
|
+
geoseeq-0.6.1.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
89
|
+
geoseeq-0.6.1.dist-info/METADATA,sha256=aSgyentw6vNb53rZD6IlBy-ZlPSjPRGQK-dq9QnY1no,4803
|
90
|
+
geoseeq-0.6.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
91
|
+
geoseeq-0.6.1.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
92
|
+
geoseeq-0.6.1.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
93
|
+
geoseeq-0.6.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|