geoseeq 0.2.1__tar.gz → 0.2.3__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {geoseeq-0.2.1 → geoseeq-0.2.3}/PKG-INFO +1 -1
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/download.py +38 -8
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/main.py +1 -1
- geoseeq-0.2.3/geoseeq/cli/progress_bar.py +28 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/upload/upload_reads.py +1 -30
- geoseeq-0.2.3/geoseeq/result/file_download.py +102 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/file_upload.py +4 -4
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/utils.py +0 -16
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/PKG-INFO +1 -1
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/SOURCES.txt +1 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/setup.py +1 -1
- geoseeq-0.2.1/geoseeq/result/file_download.py +0 -95
- {geoseeq-0.2.1 → geoseeq-0.2.3}/LICENSE +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/README.md +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/blob_constructors.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/bulk_creators.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/add.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/constants.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/copy.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/create.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/delete.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/fastq_utils.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/list.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/common_state.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/id_handlers.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/id_utils.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/obj_getters.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/upload/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/upload/upload.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/user.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/utils.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/view.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/constants.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/api.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/bioproject.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/cli.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/file_system_cache.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/knex.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/organization.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/pipeline.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/project.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/remote_object.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/bioinfo.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/result_file.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/result_folder.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/sample.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/user.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/utils.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/checksum.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/cli.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/clone.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/constants.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_cache.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_dir.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_sample.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_stub.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/work_orders.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/dependency_links.txt +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/entry_points.txt +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/requires.txt +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/top_level.txt +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/pyproject.toml +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/setup.cfg +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/tests/__init__.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/tests/test_api_client.py +0 -0
- {geoseeq-0.2.1 → geoseeq-0.2.3}/tests/test_work_orders.py +0 -0
@@ -5,7 +5,7 @@ from os.path import dirname, join
|
|
5
5
|
|
6
6
|
import click
|
7
7
|
import pandas as pd
|
8
|
-
|
8
|
+
from multiprocessing import Pool
|
9
9
|
from .shared_params import (
|
10
10
|
handle_project_id,
|
11
11
|
project_id_arg,
|
@@ -14,13 +14,14 @@ from .shared_params import (
|
|
14
14
|
use_common_state,
|
15
15
|
flatten_list_of_els_and_files
|
16
16
|
)
|
17
|
-
from geoseeq.result.
|
17
|
+
from geoseeq.result.file_download import download_url
|
18
18
|
from geoseeq.utils import download_ftp
|
19
19
|
from geoseeq.blob_constructors import (
|
20
20
|
sample_result_file_from_uuid,
|
21
21
|
project_result_file_from_uuid,
|
22
22
|
)
|
23
23
|
from geoseeq.knex import GeoseeqNotFoundError
|
24
|
+
from .progress_bar import PBarManager
|
24
25
|
from .utils import convert_size
|
25
26
|
|
26
27
|
logger = logging.getLogger('geoseeq_api')
|
@@ -83,8 +84,16 @@ def cli_download_metadata(state, sample_ids):
|
|
83
84
|
click.echo("Metadata successfully downloaded for samples.", err=True)
|
84
85
|
|
85
86
|
|
87
|
+
def _download_one_file(args):
|
88
|
+
url, file_path, pbar = args
|
89
|
+
return download_url(url, filename=file_path, progress_tracker=pbar)
|
90
|
+
|
91
|
+
|
92
|
+
cores_option = click.option('--cores', default=1, help='Number of downloads to run in parallel')
|
93
|
+
|
86
94
|
@cli_download.command("files")
|
87
95
|
@use_common_state
|
96
|
+
@cores_option
|
88
97
|
@click.option("--target-dir", default=".")
|
89
98
|
@click.option('--yes/--confirm', default=False, help='Skip confirmation prompts')
|
90
99
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
@@ -98,6 +107,7 @@ def cli_download_metadata(state, sample_ids):
|
|
98
107
|
@sample_ids_arg
|
99
108
|
def cli_download_files(
|
100
109
|
state,
|
110
|
+
cores,
|
101
111
|
sample_name_includes,
|
102
112
|
target_dir,
|
103
113
|
yes,
|
@@ -186,23 +196,32 @@ def cli_download_files(
|
|
186
196
|
if not yes:
|
187
197
|
click.confirm('Do you want to download these files?', abort=True)
|
188
198
|
|
199
|
+
download_args = []
|
200
|
+
pbars = PBarManager()
|
189
201
|
for fname, url in response["links"].items():
|
190
202
|
click.echo(f"Downloading file {fname}")
|
191
203
|
file_path = join(target_dir, fname)
|
192
204
|
makedirs(dirname(file_path), exist_ok=True)
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
205
|
+
pbar = pbars.get_new_bar(file_path)
|
206
|
+
download_args.append((url, file_path, pbar))
|
207
|
+
if cores == 1:
|
208
|
+
download_url(url, filename=file_path, progress_tracker=pbar)
|
209
|
+
|
210
|
+
if cores > 1:
|
211
|
+
with Pool(cores) as p:
|
212
|
+
for _ in p.imap_unordered(_download_one_file, download_args):
|
213
|
+
pass
|
197
214
|
|
198
215
|
|
199
216
|
@cli_download.command("ids")
|
200
217
|
@use_common_state
|
218
|
+
@cores_option
|
201
219
|
@click.option("--target-dir", default=".")
|
202
220
|
@click.option('--yes/--confirm', default=False, help='Skip confirmation prompts')
|
203
221
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
222
|
+
@click.option('--head', default=None, type=int, help='Download the first N bytes of each file')
|
204
223
|
@click.argument("ids", nargs=-1)
|
205
|
-
def cli_download_ids(state, target_dir, yes, download, ids):
|
224
|
+
def cli_download_ids(state, cores, target_dir, yes, download, head, ids):
|
206
225
|
"""Download a files from GeoSeeq based on their UUID or GeoSeeq Resource Number (GRN).
|
207
226
|
|
208
227
|
This command downloads files directly based on their ID. This is used for "manual"
|
@@ -228,6 +247,7 @@ def cli_download_ids(state, target_dir, yes, download, ids):
|
|
228
247
|
---
|
229
248
|
"""
|
230
249
|
result_file_ids = flatten_list_of_els_and_files(ids)
|
250
|
+
cores = max(cores, len(result_file_ids)) # don't use more cores than files
|
231
251
|
knex = state.get_knex()
|
232
252
|
result_files = []
|
233
253
|
for result_id in result_file_ids:
|
@@ -249,8 +269,18 @@ def cli_download_ids(state, target_dir, yes, download, ids):
|
|
249
269
|
if not yes:
|
250
270
|
click.confirm('Do you want to download these files?', abort=True)
|
251
271
|
|
272
|
+
download_args = []
|
273
|
+
pbars = PBarManager()
|
252
274
|
for result_file in result_files:
|
253
275
|
click.echo(f"Downloading file {result_file.get_referenced_filename()}")
|
254
276
|
file_path = join(target_dir, result_file.get_referenced_filename())
|
255
277
|
makedirs(dirname(file_path), exist_ok=True)
|
256
|
-
|
278
|
+
pbar = pbars.get_new_bar(file_path)
|
279
|
+
download_args.append((result_file, file_path, pbar))
|
280
|
+
if cores == 1:
|
281
|
+
result_file.download(file_path, progress_tracker=pbar, head=head)
|
282
|
+
|
283
|
+
if cores > 1:
|
284
|
+
with Pool(cores) as p:
|
285
|
+
for _ in p.imap_unordered(_download_one_file, download_args):
|
286
|
+
pass
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from tqdm import tqdm
|
2
|
+
from os.path import basename
|
3
|
+
|
4
|
+
class TQBar:
|
5
|
+
|
6
|
+
def __init__(self, pos, desc) -> None:
|
7
|
+
self.n_bars = 0
|
8
|
+
self.pos = pos
|
9
|
+
self.desc = desc
|
10
|
+
self.bar = None
|
11
|
+
|
12
|
+
def set_num_chunks(self, n_chunks):
|
13
|
+
self.n_bars = n_chunks
|
14
|
+
self.bar = tqdm(total=n_chunks, position=self.pos, desc=self.desc, leave=False)
|
15
|
+
|
16
|
+
def update(self, chunk_num):
|
17
|
+
self.bar.update(chunk_num)
|
18
|
+
|
19
|
+
|
20
|
+
class PBarManager:
|
21
|
+
|
22
|
+
def __init__(self):
|
23
|
+
self.n_bars = 0
|
24
|
+
self.pbars = []
|
25
|
+
|
26
|
+
def get_new_bar(self, filepath):
|
27
|
+
self.n_bars += 1
|
28
|
+
return TQBar(self.n_bars, basename(filepath))
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import logging
|
2
|
-
from tqdm import tqdm
|
3
2
|
import click
|
4
3
|
import requests
|
5
4
|
from os.path import basename
|
@@ -20,39 +19,11 @@ from geoseeq.cli.shared_params import (
|
|
20
19
|
)
|
21
20
|
|
22
21
|
from geoseeq.constants import FASTQ_MODULE_NAMES
|
23
|
-
|
24
|
-
|
22
|
+
from geoseeq.cli.progress_bar import PBarManager
|
25
23
|
|
26
24
|
logger = logging.getLogger('geoseeq_api')
|
27
25
|
|
28
26
|
|
29
|
-
class TQBar:
|
30
|
-
|
31
|
-
def __init__(self, pos, desc) -> None:
|
32
|
-
self.n_bars = 0
|
33
|
-
self.pos = pos
|
34
|
-
self.desc = desc
|
35
|
-
self.bar = None
|
36
|
-
|
37
|
-
def set_num_chunks(self, n_chunks):
|
38
|
-
self.n_bars = n_chunks
|
39
|
-
self.bar = tqdm(total=n_chunks, position=self.pos, desc=self.desc, leave=False)
|
40
|
-
|
41
|
-
def update(self, chunk_num):
|
42
|
-
self.bar.update(chunk_num)
|
43
|
-
|
44
|
-
|
45
|
-
class PBarManager:
|
46
|
-
|
47
|
-
def __init__(self):
|
48
|
-
self.n_bars = 0
|
49
|
-
self.pbars = []
|
50
|
-
|
51
|
-
def get_new_bar(self, filepath):
|
52
|
-
self.n_bars += 1
|
53
|
-
return TQBar(self.n_bars, basename(filepath))
|
54
|
-
|
55
|
-
|
56
27
|
def _make_in_process_logger(log_level):
|
57
28
|
logger = logging.getLogger('geoseeq_api')
|
58
29
|
logger.setLevel(log_level)
|
@@ -0,0 +1,102 @@
|
|
1
|
+
|
2
|
+
import urllib.request
|
3
|
+
import logging
|
4
|
+
import requests
|
5
|
+
from os.path import basename, getsize, join
|
6
|
+
from pathlib import Path
|
7
|
+
from tempfile import NamedTemporaryFile
|
8
|
+
|
9
|
+
from geoseeq.utils import download_ftp
|
10
|
+
from geoseeq.constants import FIVE_MB
|
11
|
+
|
12
|
+
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
13
|
+
|
14
|
+
|
15
|
+
def _download_head(url, filename, head=None, progress_tracker=None):
|
16
|
+
headers = None
|
17
|
+
if head and head > 0:
|
18
|
+
headers = {"Range": f"bytes=0-{head}"}
|
19
|
+
response = requests.get(url, stream=True, headers=headers)
|
20
|
+
total_size_in_bytes = int(response.headers.get('content-length', 0))
|
21
|
+
if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
|
22
|
+
block_size = FIVE_MB
|
23
|
+
with open(filename, 'wb') as file:
|
24
|
+
for data in response.iter_content(block_size):
|
25
|
+
if progress_tracker: progress_tracker.update(len(data))
|
26
|
+
file.write(data)
|
27
|
+
return filename
|
28
|
+
|
29
|
+
|
30
|
+
def _download_generic(url, filename, head=None):
|
31
|
+
urllib.request.urlretrieve(url, filename)
|
32
|
+
return filename
|
33
|
+
|
34
|
+
|
35
|
+
def guess_download_kind(url):
|
36
|
+
if 'azure' in url:
|
37
|
+
return 'azure'
|
38
|
+
elif 's3' in url:
|
39
|
+
return 's3'
|
40
|
+
elif 'ftp' in url:
|
41
|
+
return 'ftp'
|
42
|
+
else:
|
43
|
+
return 'generic'
|
44
|
+
|
45
|
+
|
46
|
+
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
47
|
+
"""Return a local filepath to the downloaded file. Download the file."""
|
48
|
+
if kind == 'guess':
|
49
|
+
kind = guess_download_kind(url)
|
50
|
+
logger.info(f"Guessed download kind: {kind} for {url}")
|
51
|
+
logger.info(f"Downloading {kind} file to {filename}")
|
52
|
+
if kind == 'generic':
|
53
|
+
return _download_generic(url, filename, head=head)
|
54
|
+
elif kind == 's3':
|
55
|
+
return _download_head(url, filename, head=head, progress_tracker=progress_tracker)
|
56
|
+
elif kind == 'azure':
|
57
|
+
return _download_head(url, filename, head=head)
|
58
|
+
elif kind == 'ftp':
|
59
|
+
return download_ftp(url, filename, head=head)
|
60
|
+
else:
|
61
|
+
raise ValueError(f"Unknown download kind: {kind}")
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
class ResultFileDownload:
|
66
|
+
"""Abstract class that handles download methods for result files."""
|
67
|
+
|
68
|
+
def get_download_url(self):
|
69
|
+
"""Return a URL that can be used to download the file for this result."""
|
70
|
+
blob_type = self.stored_data.get("__type__", "").lower()
|
71
|
+
if blob_type not in ["s3", "sra", "ftp", "azure"]:
|
72
|
+
raise ValueError(f'Unknown URL type: "{blob_type}"')
|
73
|
+
key = 'url' if 'url' in self.stored_data else 'uri'
|
74
|
+
if blob_type in ["s3", "azure"]:
|
75
|
+
try:
|
76
|
+
url = self.stored_data["presigned_url"]
|
77
|
+
except KeyError:
|
78
|
+
url = self.stored_data[key]
|
79
|
+
if url.startswith("s3://"):
|
80
|
+
url = self.stored_data["endpoint_url"] + "/" + url[5:]
|
81
|
+
return url
|
82
|
+
else:
|
83
|
+
return self.stored_data[key]
|
84
|
+
|
85
|
+
def download(self, filename=None, cache=True, head=None, progress_tracker=None):
|
86
|
+
"""Return a local filepath to the file this result points to."""
|
87
|
+
if not filename:
|
88
|
+
self._temp_filename = True
|
89
|
+
myfile = NamedTemporaryFile(delete=False)
|
90
|
+
myfile.close()
|
91
|
+
filename = myfile.name
|
92
|
+
blob_type = self.stored_data.get("__type__", "").lower()
|
93
|
+
if cache and self._cached_filename:
|
94
|
+
return self._cached_filename
|
95
|
+
url = self.get_download_url()
|
96
|
+
filepath = download_url(
|
97
|
+
url, blob_type, filename,
|
98
|
+
head=head, progress_tracker=progress_tracker
|
99
|
+
)
|
100
|
+
if cache:
|
101
|
+
self._cached_filename = filepath
|
102
|
+
return filepath
|
@@ -108,7 +108,7 @@ class ResultFileUpload:
|
|
108
108
|
for num, url in enumerate(list(urls.values())):
|
109
109
|
response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
|
110
110
|
complete_parts.append(response_part)
|
111
|
-
progress_tracker.update(file_chunker.get_chunk_size(num))
|
111
|
+
if progress_tracker: progress_tracker.update(file_chunker.get_chunk_size(num))
|
112
112
|
logger.info(f'Uploaded part {num + 1} of {len(urls)} for "{file_chunker.filepath}"')
|
113
113
|
return complete_parts
|
114
114
|
|
@@ -123,7 +123,7 @@ class ResultFileUpload:
|
|
123
123
|
for future in as_completed(futures):
|
124
124
|
response_part = future.result()
|
125
125
|
complete_parts.append(response_part)
|
126
|
-
progress_tracker.update(file_chunker.get_chunk_size(response_part["PartNumber"] - 1))
|
126
|
+
if progress_tracker: progress_tracker.update(file_chunker.get_chunk_size(response_part["PartNumber"] - 1))
|
127
127
|
logger.info(
|
128
128
|
f'Uploaded part {response_part["PartNumber"]} of {len(urls)} for "{file_chunker.filepath}"'
|
129
129
|
)
|
@@ -137,7 +137,7 @@ class ResultFileUpload:
|
|
137
137
|
chunk_size=FIVE_MB,
|
138
138
|
max_retries=3,
|
139
139
|
session=None,
|
140
|
-
progress_tracker=
|
140
|
+
progress_tracker=None,
|
141
141
|
threads=1,
|
142
142
|
):
|
143
143
|
"""Upload a file to S3 using the multipart upload process."""
|
@@ -146,7 +146,7 @@ class ResultFileUpload:
|
|
146
146
|
logger.info(f'Starting upload for "{filepath}"')
|
147
147
|
complete_parts = []
|
148
148
|
file_chunker = FileChunker(filepath, chunk_size).load_all_chunks()
|
149
|
-
progress_tracker.set_num_chunks(file_chunker.file_size)
|
149
|
+
if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
|
150
150
|
complete_parts = self._upload_parts(file_chunker, urls, max_retries, session, progress_tracker, threads)
|
151
151
|
self._finish_multipart_upload(upload_id, complete_parts)
|
152
152
|
logger.info(f'Finished Upload for "{filepath}"')
|
@@ -15,22 +15,6 @@ from geoseeq.utils import download_ftp, md5_checksum
|
|
15
15
|
|
16
16
|
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
17
17
|
logger.addHandler(logging.NullHandler()) # No output unless configured by calling program
|
18
|
-
|
19
|
-
|
20
|
-
def _download_head(url, filename, head=None):
|
21
|
-
if head and head > 0:
|
22
|
-
opener = urllib.request.build_opener()
|
23
|
-
if head:
|
24
|
-
opener.addheaders = [('Range', f'bytes=0-{head}')]
|
25
|
-
urllib.request.install_opener(opener)
|
26
|
-
try:
|
27
|
-
urllib.request.urlretrieve(url, filename) # can throw 416 error if head is too large
|
28
|
-
except urllib.error.HTTPError as e:
|
29
|
-
if e.code == 416:
|
30
|
-
logger.warning(f"HEAD request failed, trying again without HEAD.")
|
31
|
-
_download_head(url, filename, head=None)
|
32
|
-
else:
|
33
|
-
raise e
|
34
18
|
|
35
19
|
|
36
20
|
def diff_dicts(blob1, blob2):
|
@@ -5,7 +5,7 @@ import setuptools
|
|
5
5
|
|
6
6
|
setuptools.setup(
|
7
7
|
name='geoseeq',
|
8
|
-
version='0.2.
|
8
|
+
version='0.2.3', # remember to update version string in CLI as well
|
9
9
|
author="David C. Danko",
|
10
10
|
author_email='dcdanko@biotia.io',
|
11
11
|
description=open('README.md').read(),
|
@@ -1,95 +0,0 @@
|
|
1
|
-
|
2
|
-
import urllib.request
|
3
|
-
from os.path import basename, getsize, join
|
4
|
-
from pathlib import Path
|
5
|
-
from tempfile import NamedTemporaryFile
|
6
|
-
|
7
|
-
from geoseeq.utils import download_ftp
|
8
|
-
|
9
|
-
from .utils import *
|
10
|
-
|
11
|
-
|
12
|
-
class ResultFileDownload:
|
13
|
-
"""Abstract class that handles download methods for result files."""
|
14
|
-
|
15
|
-
def get_download_url(self):
|
16
|
-
"""Return a URL that can be used to download the file for this result."""
|
17
|
-
blob_type = self.stored_data.get("__type__", "").lower()
|
18
|
-
if blob_type not in ["s3", "sra"]:
|
19
|
-
raise TypeError("Cannot fetch a file for a BLOB type result field.")
|
20
|
-
if blob_type == "s3":
|
21
|
-
try:
|
22
|
-
url = self.stored_data["presigned_url"]
|
23
|
-
except KeyError:
|
24
|
-
url = self.stored_data["uri"]
|
25
|
-
if url.startswith("s3://"):
|
26
|
-
url = self.stored_data["endpoint_url"] + "/" + url[5:]
|
27
|
-
return url
|
28
|
-
elif blob_type == "sra":
|
29
|
-
url = self.stored_data["url"]
|
30
|
-
return url
|
31
|
-
|
32
|
-
def download_file(self, filename=None, cache=True, head=None):
|
33
|
-
"""Return a local filepath to the file this result points to."""
|
34
|
-
if not filename:
|
35
|
-
self._temp_filename = True
|
36
|
-
myfile = NamedTemporaryFile(delete=False)
|
37
|
-
myfile.close()
|
38
|
-
filename = myfile.name
|
39
|
-
blob_type = self.stored_data.get("__type__", "").lower()
|
40
|
-
if cache and self._cached_filename:
|
41
|
-
return self._cached_filename
|
42
|
-
if blob_type == "s3":
|
43
|
-
return self._download_s3(filename, cache, head=head)
|
44
|
-
elif blob_type == "sra":
|
45
|
-
return self._download_sra(filename, cache)
|
46
|
-
elif blob_type == "ftp":
|
47
|
-
return self._download_ftp(filename, cache)
|
48
|
-
elif blob_type == "azure":
|
49
|
-
return self._download_azure(filename, cache, head=head)
|
50
|
-
else:
|
51
|
-
raise TypeError("Cannot fetch a file for a BLOB type result field.")
|
52
|
-
|
53
|
-
def _download_s3(self, filename, cache, head=None):
|
54
|
-
logger.info(f"Downloading S3 file to {filename}")
|
55
|
-
try:
|
56
|
-
url = self.stored_data["presigned_url"]
|
57
|
-
except KeyError:
|
58
|
-
key = 'uri' if 'uri' in self.stored_data else 'url'
|
59
|
-
url = self.stored_data[key]
|
60
|
-
if url.startswith("s3://"):
|
61
|
-
url = self.stored_data["endpoint_url"] + "/" + url[5:]
|
62
|
-
_download_head(url, filename, head=head)
|
63
|
-
if cache:
|
64
|
-
self._cached_filename = filename
|
65
|
-
return filename
|
66
|
-
|
67
|
-
def _download_azure(self, filename, cache, head=None):
|
68
|
-
logger.info(f"Downloading Azure file to {filename}")
|
69
|
-
try:
|
70
|
-
url = self.stored_data["presigned_url"]
|
71
|
-
except KeyError:
|
72
|
-
key = 'uri' if 'uri' in self.stored_data else 'url'
|
73
|
-
url = self.stored_data[key]
|
74
|
-
_download_head(url, filename, head=head)
|
75
|
-
if cache:
|
76
|
-
self._cached_filename = filename
|
77
|
-
return filename
|
78
|
-
|
79
|
-
def _download_sra(self, filename, cache):
|
80
|
-
return self._download_generic_url(filename, cache)
|
81
|
-
|
82
|
-
def _download_ftp(self, filename, cache, head=None):
|
83
|
-
logger.info(f"Downloading FTP file to {filename}")
|
84
|
-
key = 'url' if 'url' in self.stored_data else 'uri'
|
85
|
-
download_ftp(self.stored_data[key], filename, head=head)
|
86
|
-
return filename
|
87
|
-
|
88
|
-
def _download_generic_url(self, filename, cache):
|
89
|
-
logger.info(f"Downloading generic URL file to {filename}")
|
90
|
-
key = 'url' if 'url' in self.stored_data else 'uri'
|
91
|
-
url = self.stored_data[key]
|
92
|
-
urllib.request.urlretrieve(url, filename)
|
93
|
-
if cache:
|
94
|
-
self._cached_filename = filename
|
95
|
-
return filename
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|