geoseeq 0.6.0__tar.gz → 0.6.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {geoseeq-0.6.0 → geoseeq-0.6.1}/PKG-INFO +1 -1
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/main.py +1 -1
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/file_download.py +46 -7
- geoseeq-0.6.1/geoseeq/result/resumable_download_tracker.py +99 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/upload_download_manager.py +1 -1
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/PKG-INFO +1 -1
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/SOURCES.txt +1 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/pyproject.toml +1 -1
- {geoseeq-0.6.0 → geoseeq-0.6.1}/LICENSE +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/README.md +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/app.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/blob_constructors.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/bulk_creators.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/constants.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/copy.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/detail.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/download.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/fastq_utils.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/get_eula.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/manage.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/progress_bar.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/project.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/raw.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/run.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/search.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/common_state.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/config.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/id_handlers.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/obj_getters.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/upload.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/upload_advanced.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/upload_reads.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/user.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/utils.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/view.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/constants.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/api.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/bioproject.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/cli.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/file_system_cache.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_blobs.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_ids.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_names.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_uuids.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/resolvers.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/utils.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/knex.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/organization.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/pipeline.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/constants.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/highcharts.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/base_layer.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/map.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/overlay.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/selectable.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/project.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/remote_object.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/bioinfo.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/file_chunker.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/file_upload.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/result_file.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/result_folder.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/resumable_upload_tracker.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/utils.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/sample.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/search.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/user.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/utils.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/checksum.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/cli.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/clone.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/constants.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_cache.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_dir.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_sample.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_stub.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/work_orders.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/dependency_links.txt +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/entry_points.txt +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/top_level.txt +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/setup.cfg +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/setup.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/tests/__init__.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/tests/test_api_client.py +0 -0
- {geoseeq-0.6.0 → geoseeq-0.6.1}/tests/test_plotting.py +0 -0
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.1') # remember to update setup
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
@@ -2,15 +2,22 @@
|
|
2
2
|
import urllib.request
|
3
3
|
import logging
|
4
4
|
import requests
|
5
|
-
|
5
|
+
import os
|
6
|
+
from os.path import basename, getsize, join, isfile, getmtime, dirname
|
6
7
|
from pathlib import Path
|
7
8
|
from tempfile import NamedTemporaryFile
|
8
9
|
|
9
10
|
from geoseeq.utils import download_ftp
|
10
11
|
from geoseeq.constants import FIVE_MB
|
12
|
+
from hashlib import md5
|
13
|
+
from .resumable_download_tracker import ResumableDownloadTracker
|
11
14
|
|
12
15
|
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
13
16
|
|
17
|
+
def url_to_id(url):
|
18
|
+
url = url.split("?")[0]
|
19
|
+
return md5(url.encode()).hexdigest()[:16]
|
20
|
+
|
14
21
|
|
15
22
|
def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
16
23
|
headers = None
|
@@ -20,11 +27,43 @@ def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
|
20
27
|
response.raise_for_status()
|
21
28
|
total_size_in_bytes = int(response.headers.get('content-length', 0))
|
22
29
|
if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
|
23
|
-
|
30
|
+
if total_size_in_bytes > 10 * FIVE_MB: # Use resumable download
|
31
|
+
print("Using resumable download")
|
32
|
+
return _download_resumable(response, filename, total_size_in_bytes, progress_tracker)
|
33
|
+
else:
|
34
|
+
block_size = FIVE_MB
|
35
|
+
with open(filename, 'wb') as file:
|
36
|
+
for data in response.iter_content(block_size):
|
37
|
+
if progress_tracker: progress_tracker.update(len(data))
|
38
|
+
file.write(data)
|
39
|
+
return filename
|
40
|
+
|
41
|
+
|
42
|
+
def _download_resumable(response, filename, total_size_in_bytes, progress_tracker=None, chunk_size=5 * FIVE_MB, part_prefix=".gs_download_{}_{}."):
|
43
|
+
target_id = url_to_id(response.url)
|
44
|
+
tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
|
45
|
+
if not tracker.download_started: tracker.start_download(response.url)
|
46
|
+
n_chunks = total_size_in_bytes // chunk_size
|
47
|
+
for i in range(n_chunks):
|
48
|
+
bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
|
49
|
+
if tracker.part_has_been_downloaded(i):
|
50
|
+
logger.debug(f"Part {i} has already been downloaded.")
|
51
|
+
else:
|
52
|
+
logger.debug(f"Downloading part {i} of {n_chunks - 1}")
|
53
|
+
part_filename = join(dirname(filename), part_prefix.format(i, n_chunks - 1) + basename(filename))
|
54
|
+
_download_head(response.url, part_filename, head=bytes_end, start=bytes_start, progress_tracker=None)
|
55
|
+
part_info = dict(part_number=i, start=bytes_start, end=bytes_end, part_filename=part_filename)
|
56
|
+
tracker.add_part(part_info)
|
57
|
+
if progress_tracker: progress_tracker.update(bytes_end - bytes_start + 1)
|
58
|
+
|
59
|
+
# at this point all parts have been downloaded
|
24
60
|
with open(filename, 'wb') as file:
|
25
|
-
for
|
26
|
-
|
27
|
-
|
61
|
+
for i in range(n_chunks):
|
62
|
+
part_info = tracker.get_part_info(i)
|
63
|
+
part_filename = part_info["part_filename"]
|
64
|
+
with open(part_filename, 'rb') as part_file:
|
65
|
+
file.write(part_file.read())
|
66
|
+
tracker.cleanup()
|
28
67
|
return filename
|
29
68
|
|
30
69
|
|
@@ -44,7 +83,7 @@ def guess_download_kind(url):
|
|
44
83
|
return 'generic'
|
45
84
|
|
46
85
|
|
47
|
-
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
86
|
+
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None, target_uuid=None):
|
48
87
|
"""Return a local filepath to the downloaded file. Download the file."""
|
49
88
|
if filename and isfile(filename):
|
50
89
|
file_size = getsize(filename)
|
@@ -135,7 +174,7 @@ class ResultFileDownload:
|
|
135
174
|
url = self.get_download_url()
|
136
175
|
filepath = download_url(
|
137
176
|
url, blob_type, filename,
|
138
|
-
head=head, progress_tracker=progress_tracker
|
177
|
+
head=head, progress_tracker=progress_tracker,
|
139
178
|
)
|
140
179
|
if cache and flag_suffix:
|
141
180
|
# create flag file
|
@@ -0,0 +1,99 @@
|
|
1
|
+
|
2
|
+
import time
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from os.path import basename, getsize, join, dirname, isfile, getctime
|
6
|
+
from pathlib import Path
|
7
|
+
from random import random
|
8
|
+
import requests
|
9
|
+
|
10
|
+
from geoseeq.knex import GeoseeqGeneralError
|
11
|
+
from geoseeq.constants import FIVE_MB
|
12
|
+
from geoseeq.utils import md5_checksum
|
13
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14
|
+
from .utils import *
|
15
|
+
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
16
|
+
from .file_chunker import FileChunker
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
class ResumableDownloadTracker:
|
21
|
+
|
22
|
+
def __init__(self, chunk_size, download_target_id, target_local_path, tracker_file_prefix="gs_resumable_download_tracker"):
|
23
|
+
self.open, self.download_started = True, False
|
24
|
+
self.download_target_id = download_target_id
|
25
|
+
self.target_local_path = target_local_path
|
26
|
+
self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'download')
|
27
|
+
self.tracker_file = join(
|
28
|
+
self.tracker_file_dir,
|
29
|
+
tracker_file_prefix + f".{download_target_id}.{chunk_size}." + basename(target_local_path)
|
30
|
+
)
|
31
|
+
try:
|
32
|
+
os.makedirs(self.tracker_file_dir, exist_ok=True)
|
33
|
+
except Exception as e:
|
34
|
+
logger.warning(f'Could not create resumable download tracker directory. {e}')
|
35
|
+
self.open = False
|
36
|
+
self._loaded_parts = {}
|
37
|
+
self._load_parts_from_file()
|
38
|
+
|
39
|
+
def start_download(self, download_url):
|
40
|
+
if not self.open:
|
41
|
+
return
|
42
|
+
if self.download_started:
|
43
|
+
raise GeoseeqGeneralError("Download has already started.")
|
44
|
+
self.download_started = True
|
45
|
+
blob = dict(download_url=download_url,
|
46
|
+
download_target_id=self.download_target_id,
|
47
|
+
start_time=time.time())
|
48
|
+
serialized = json.dumps(blob)
|
49
|
+
with open(self.tracker_file, "w") as f:
|
50
|
+
f.write(serialized + "\n")
|
51
|
+
self.download_url = download_url
|
52
|
+
return self
|
53
|
+
|
54
|
+
def add_part(self, part_download_info):
|
55
|
+
if not self.open:
|
56
|
+
assert False, "Cannot add part to closed ResumableDownloadTracker"
|
57
|
+
part_id = part_download_info["part_number"]
|
58
|
+
serialized = json.dumps(part_download_info)
|
59
|
+
with open(self.tracker_file, "a") as f:
|
60
|
+
f.write(serialized + "\n")
|
61
|
+
self._loaded_parts[part_id] = part_download_info
|
62
|
+
|
63
|
+
def _load_parts_from_file(self):
|
64
|
+
if not isfile(self.tracker_file):
|
65
|
+
return
|
66
|
+
with open(self.tracker_file, "r") as f:
|
67
|
+
header_blob = json.loads(f.readline())
|
68
|
+
self.download_url = header_blob["download_url"]
|
69
|
+
start_time = header_blob["start_time"] # for now we don't expire resumable downloads
|
70
|
+
self.download_started = True
|
71
|
+
for line in f:
|
72
|
+
part_info = json.loads(line)
|
73
|
+
part_id = part_info["part_number"]
|
74
|
+
self._loaded_parts[part_id] = part_info
|
75
|
+
|
76
|
+
def part_has_been_downloaded(self, part_number):
|
77
|
+
if not self.open:
|
78
|
+
return False
|
79
|
+
if part_number not in self._loaded_parts:
|
80
|
+
return False
|
81
|
+
part_info = self._loaded_parts[part_number]
|
82
|
+
part_path = part_info["part_filename"]
|
83
|
+
return isfile(part_path)
|
84
|
+
|
85
|
+
def get_part_info(self, part_number):
|
86
|
+
if not self.open:
|
87
|
+
return None
|
88
|
+
return self._loaded_parts.get(part_number, None)
|
89
|
+
|
90
|
+
def cleanup(self):
|
91
|
+
if not self.open:
|
92
|
+
return
|
93
|
+
for part in self._loaded_parts.values():
|
94
|
+
part_path = part["part_filename"]
|
95
|
+
if isfile(part_path):
|
96
|
+
os.remove(part_path)
|
97
|
+
os.remove(self.tracker_file)
|
98
|
+
self.open = False
|
99
|
+
|
@@ -194,7 +194,7 @@ class GeoSeeqDownloadManager:
|
|
194
194
|
self._convert_result_files_to_urls()
|
195
195
|
download_args = [(
|
196
196
|
url, file_path,
|
197
|
-
self.progress_tracker_factory(
|
197
|
+
self.progress_tracker_factory(file_path),
|
198
198
|
self.ignore_errors, self.head, self.log_level,
|
199
199
|
self.n_parallel_downloads > 1
|
200
200
|
) for url, file_path in self._result_files]
|
@@ -79,6 +79,7 @@ geoseeq/result/file_download.py
|
|
79
79
|
geoseeq/result/file_upload.py
|
80
80
|
geoseeq/result/result_file.py
|
81
81
|
geoseeq/result/result_folder.py
|
82
|
+
geoseeq/result/resumable_download_tracker.py
|
82
83
|
geoseeq/result/resumable_upload_tracker.py
|
83
84
|
geoseeq/result/utils.py
|
84
85
|
geoseeq/vc/__init__.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|