geoseeq 0.5.6a7__tar.gz → 0.5.6a9__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/PKG-INFO +1 -1
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/main.py +1 -1
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/opts_and_args.py +1 -1
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/upload.py +10 -4
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/upload_reads.py +7 -3
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/file_system_cache.py +2 -2
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/file_upload.py +111 -10
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/result_file.py +8 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/upload_download_manager.py +14 -4
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/PKG-INFO +1 -1
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/pyproject.toml +1 -1
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/setup.py +1 -1
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/LICENSE +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/README.md +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/app.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/blob_constructors.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/bulk_creators.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/constants.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/copy.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/detail.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/download.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/fastq_utils.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/get_eula.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/manage.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/progress_bar.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/run.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/search.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/common_state.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/config.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/id_handlers.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/obj_getters.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/upload_advanced.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/user.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/utils.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/view.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/constants.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/api.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/bioproject.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/cli.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_blobs.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_ids.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_names.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_uuids.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/resolvers.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/utils.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/knex.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/organization.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/pipeline.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/constants.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/highcharts.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/base_layer.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/map.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/overlay.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/selectable.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/project.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/remote_object.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/bioinfo.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/file_download.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/result_folder.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/utils.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/sample.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/search.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/user.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/utils.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/checksum.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/cli.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/clone.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/constants.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_cache.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_dir.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_sample.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_stub.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/work_orders.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/SOURCES.txt +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/dependency_links.txt +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/entry_points.txt +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/top_level.txt +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/setup.cfg +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/tests/__init__.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/tests/test_api_client.py +0 -0
- {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/tests/test_plotting.py +0 -0
@@ -53,7 +53,7 @@ def version():
|
|
53
53
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
54
54
|
Run `geoseeq eula show` to view the EULA.
|
55
55
|
"""
|
56
|
-
click.echo('0.5.
|
56
|
+
click.echo('0.5.6a9') # remember to update setup
|
57
57
|
|
58
58
|
|
59
59
|
@main.group('advanced')
|
@@ -2,7 +2,7 @@ import click
|
|
2
2
|
|
3
3
|
dryrun_option = click.option('--dryrun/--wetrun', default=False, help='Print what will be created without actually creating it')
|
4
4
|
overwrite_option = click.option('--overwrite/--no-overwrite', default=False, help='Overwrite existing samples, files, and data')
|
5
|
-
|
5
|
+
no_new_versions_option = click.option('--no-new-versions/--new-versions', default=False, help='Do not create new versions of the data')
|
6
6
|
def module_option(options, use_default=True, default=None):
|
7
7
|
if use_default:
|
8
8
|
default = default or options[0]
|
@@ -24,6 +24,7 @@ from geoseeq.cli.shared_params import (
|
|
24
24
|
handle_project_id,
|
25
25
|
project_or_sample_id_arg,
|
26
26
|
handle_project_or_sample_id,
|
27
|
+
no_new_versions_option,
|
27
28
|
)
|
28
29
|
from geoseeq.upload_download_manager import GeoSeeqUploadManager
|
29
30
|
|
@@ -41,11 +42,12 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
41
42
|
@link_option
|
42
43
|
@recursive_option
|
43
44
|
@hidden_option
|
45
|
+
@no_new_versions_option
|
44
46
|
@click.option('-n', '--geoseeq-file-name', default=None, multiple=True,
|
45
47
|
help='Specify a different name for the file on GeoSeeq than the local file name.')
|
46
48
|
@folder_id_arg
|
47
49
|
@click.argument('file_paths', type=click.Path(exists=True), nargs=-1)
|
48
|
-
def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, geoseeq_file_name, folder_id, file_paths):
|
50
|
+
def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
|
49
51
|
"""Upload files to GeoSeeq.
|
50
52
|
|
51
53
|
This command uploads files to either a sample or project on GeoSeeq. It can be used to upload
|
@@ -107,7 +109,8 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, ge
|
|
107
109
|
link_type=link_type,
|
108
110
|
progress_tracker_factory=PBarManager().get_new_bar,
|
109
111
|
log_level=state.log_level,
|
110
|
-
|
112
|
+
no_new_versions=no_new_versions,
|
113
|
+
use_cache=state.use_cache,
|
111
114
|
)
|
112
115
|
for geoseeq_file_name, file_path in name_pairs:
|
113
116
|
if isfile(file_path):
|
@@ -130,9 +133,10 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, ge
|
|
130
133
|
@private_option
|
131
134
|
@recursive_option
|
132
135
|
@hidden_option
|
136
|
+
@no_new_versions_option
|
133
137
|
@project_or_sample_id_arg
|
134
138
|
@click.argument('folder_names', type=click.Path(exists=True), nargs=-1)
|
135
|
-
def cli_upload_folder(state, cores, yes, private, recursive, hidden, project_or_sample_id, folder_names):
|
139
|
+
def cli_upload_folder(state, cores, yes, private, recursive, hidden, no_new_versions, project_or_sample_id, folder_names):
|
136
140
|
knex = state.get_knex()
|
137
141
|
root_obj = handle_project_or_sample_id(knex, project_or_sample_id, yes=yes, private=private)
|
138
142
|
upload_manager = GeoSeeqUploadManager(
|
@@ -140,7 +144,9 @@ def cli_upload_folder(state, cores, yes, private, recursive, hidden, project_or_
|
|
140
144
|
link_type='upload',
|
141
145
|
progress_tracker_factory=PBarManager().get_new_bar,
|
142
146
|
log_level=logging.INFO,
|
143
|
-
overwrite=True
|
147
|
+
overwrite=True,
|
148
|
+
use_cache=state.use_cache,
|
149
|
+
no_new_versions=no_new_versions,
|
144
150
|
)
|
145
151
|
for folder_name in folder_names:
|
146
152
|
result_folder = root_obj.result_folder(folder_name).idem()
|
@@ -15,6 +15,7 @@ from geoseeq.cli.shared_params import (
|
|
15
15
|
overwrite_option,
|
16
16
|
yes_option,
|
17
17
|
use_common_state,
|
18
|
+
no_new_versions_option
|
18
19
|
)
|
19
20
|
from geoseeq.upload_download_manager import GeoSeeqUploadManager
|
20
21
|
|
@@ -85,7 +86,7 @@ def _group_files(knex, filepaths, module_name, regex, yes):
|
|
85
86
|
return groups
|
86
87
|
|
87
88
|
|
88
|
-
def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, cores, state):
|
89
|
+
def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, no_new_versions, cores, state):
|
89
90
|
|
90
91
|
with requests.Session() as session:
|
91
92
|
upload_manager = GeoSeeqUploadManager(
|
@@ -95,6 +96,8 @@ def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, cores,
|
|
95
96
|
log_level=state.log_level,
|
96
97
|
overwrite=overwrite,
|
97
98
|
progress_tracker_factory=PBarManager().get_new_bar,
|
99
|
+
use_cache=state.use_cache,
|
100
|
+
no_new_versions=no_new_versions,
|
98
101
|
)
|
99
102
|
for group in groups:
|
100
103
|
sample = lib.sample(group['sample_name']).idem()
|
@@ -138,10 +141,11 @@ def flatten_list_of_fastqs(filepaths):
|
|
138
141
|
@click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
|
139
142
|
@private_option
|
140
143
|
@link_option
|
144
|
+
@no_new_versions_option
|
141
145
|
@module_option(FASTQ_MODULE_NAMES)
|
142
146
|
@project_id_arg
|
143
147
|
@click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
|
144
|
-
def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_type, module_name, project_id, fastq_files):
|
148
|
+
def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_type, no_new_versions, module_name, project_id, fastq_files):
|
145
149
|
"""Upload fastq read files to GeoSeeq.
|
146
150
|
|
147
151
|
This command automatically groups files by their sample name, lane number
|
@@ -195,4 +199,4 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
|
|
195
199
|
click.echo(f'Found {len(filepaths)} files to upload.', err=True)
|
196
200
|
regex = _get_regex(knex, filepaths, module_name, proj, regex)
|
197
201
|
groups = _group_files(knex, filepaths, module_name, regex, yes)
|
198
|
-
_do_upload(groups, module_name, link_type, proj, filepaths, overwrite, cores, state)
|
202
|
+
_do_upload(groups, module_name, link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
|
@@ -15,7 +15,7 @@ CACHE_DIR = join(
|
|
15
15
|
"geoseeq"
|
16
16
|
)
|
17
17
|
USE_GEOSEEQ_CACHE = None
|
18
|
-
|
18
|
+
GEOSEEQ_CACHE_DIR = abspath(f'{CACHE_DIR}/geoseeq_api_cache/v1/')
|
19
19
|
|
20
20
|
def hash_obj(obj):
|
21
21
|
val = obj
|
@@ -41,7 +41,7 @@ class FileSystemCache:
|
|
41
41
|
|
42
42
|
@property
|
43
43
|
def cache_dir_path(self):
|
44
|
-
return
|
44
|
+
return GEOSEEQ_CACHE_DIR
|
45
45
|
|
46
46
|
def setup(self):
|
47
47
|
if self.no_cache:
|
@@ -1,7 +1,8 @@
|
|
1
1
|
|
2
2
|
import time
|
3
3
|
import json
|
4
|
-
|
4
|
+
import os
|
5
|
+
from os.path import basename, getsize, join, dirname, isfile
|
5
6
|
from pathlib import Path
|
6
7
|
|
7
8
|
import requests
|
@@ -11,7 +12,7 @@ from geoseeq.constants import FIVE_MB
|
|
11
12
|
from geoseeq.utils import md5_checksum
|
12
13
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
13
14
|
from .utils import *
|
14
|
-
|
15
|
+
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
15
16
|
|
16
17
|
class FileChunker:
|
17
18
|
|
@@ -38,6 +39,77 @@ class FileChunker:
|
|
38
39
|
def get_chunk_size(self, num):
|
39
40
|
self.load_all_chunks()
|
40
41
|
return len(self.loaded_parts[num])
|
42
|
+
|
43
|
+
|
44
|
+
class ResumableUploadTracker:
|
45
|
+
|
46
|
+
def __init__(self, filepath, chunk_size, tracker_file_prefix="gs_resumable_upload_tracker"):
|
47
|
+
self.open, self.upload_started = True, False
|
48
|
+
self.upload_id, self.urls = None, None
|
49
|
+
self.filepath = filepath
|
50
|
+
self.tracker_file = join(
|
51
|
+
GEOSEEQ_CACHE_DIR, 'upload',
|
52
|
+
tracker_file_prefix + f".{chunk_size}." + basename(filepath)
|
53
|
+
)
|
54
|
+
try:
|
55
|
+
os.makedirs(dirname(self.tracker_file), exist_ok=True)
|
56
|
+
except Exception as e:
|
57
|
+
logger.warning(f'Could not create resumable upload tracker directory. {e}')
|
58
|
+
self.open = False
|
59
|
+
self._loaded_parts = {}
|
60
|
+
self._load_parts_from_file()
|
61
|
+
|
62
|
+
def start_upload(self, upload_id, urls):
|
63
|
+
if not self.open:
|
64
|
+
return
|
65
|
+
if self.upload_started:
|
66
|
+
raise GeoseeqGeneralError("Upload has already started.")
|
67
|
+
blob = dict(upload_id=upload_id, urls=urls)
|
68
|
+
serialized = json.dumps(blob)
|
69
|
+
with open(self.tracker_file, "w") as f:
|
70
|
+
f.write(serialized + "\n")
|
71
|
+
self.upload_id, self.urls = upload_id, urls
|
72
|
+
self.upload_started = True
|
73
|
+
|
74
|
+
def add_part(self, part_upload_info):
|
75
|
+
if not self.open:
|
76
|
+
return
|
77
|
+
part_id = part_upload_info["PartNumber"]
|
78
|
+
serialized = json.dumps(part_upload_info)
|
79
|
+
with open(self.tracker_file, "a") as f:
|
80
|
+
f.write(serialized + "\n")
|
81
|
+
self._loaded_parts[part_id] = part_upload_info
|
82
|
+
if len(self._loaded_parts) == len(self.urls):
|
83
|
+
self.cleanup()
|
84
|
+
self.open = False
|
85
|
+
|
86
|
+
def _load_parts_from_file(self):
|
87
|
+
if not isfile(self.tracker_file):
|
88
|
+
return
|
89
|
+
with open(self.tracker_file, "r") as f:
|
90
|
+
header_blob = json.loads(f.readline())
|
91
|
+
self.upload_id, self.urls = header_blob["upload_id"], header_blob["urls"]
|
92
|
+
self.upload_started = True
|
93
|
+
for line in f:
|
94
|
+
blob = json.loads(line)
|
95
|
+
part_id = blob["PartNumber"]
|
96
|
+
self._loaded_parts[part_id] = blob
|
97
|
+
|
98
|
+
def part_has_been_uploaded(self, part_number):
|
99
|
+
if not self.open:
|
100
|
+
return False
|
101
|
+
return part_number in self._loaded_parts
|
102
|
+
|
103
|
+
def get_part_info(self, part_number):
|
104
|
+
return self._loaded_parts[part_number]
|
105
|
+
|
106
|
+
def cleanup(self):
|
107
|
+
if not self.open:
|
108
|
+
return
|
109
|
+
try:
|
110
|
+
os.remove(self.tracker_file)
|
111
|
+
except FileNotFoundError:
|
112
|
+
pass
|
41
113
|
|
42
114
|
|
43
115
|
class ResultFileUpload:
|
@@ -74,7 +146,10 @@ class ResultFileUpload:
|
|
74
146
|
urls = response
|
75
147
|
return upload_id, urls
|
76
148
|
|
77
|
-
def _upload_one_part(self, file_chunker, url, num, max_retries, session=None):
|
149
|
+
def _upload_one_part(self, file_chunker, url, num, max_retries, session=None, resumable_upload_tracker=None):
|
150
|
+
if resumable_upload_tracker and resumable_upload_tracker.part_has_been_uploaded(num + 1):
|
151
|
+
logger.info(f"Part {num + 1} has already been uploaded. Skipping.")
|
152
|
+
return resumable_upload_tracker.get_part_info(num + 1)
|
78
153
|
file_chunk = file_chunker.get_chunk(num)
|
79
154
|
attempts = 0
|
80
155
|
while attempts < max_retries:
|
@@ -94,7 +169,12 @@ class ResultFileUpload:
|
|
94
169
|
if attempts == max_retries:
|
95
170
|
raise
|
96
171
|
time.sleep(10**attempts) # exponential backoff, (10 ** 2)s default max
|
97
|
-
|
172
|
+
etag = http_response.headers["ETag"].replace('"', "")
|
173
|
+
blob = {"ETag": etag, "PartNumber": num + 1}
|
174
|
+
if resumable_upload_tracker:
|
175
|
+
# TODO technically not thread safe, but should be fine for now
|
176
|
+
resumable_upload_tracker.add_part(blob)
|
177
|
+
return blob
|
98
178
|
|
99
179
|
def _finish_multipart_upload(self, upload_id, complete_parts):
|
100
180
|
response = self.knex.post(
|
@@ -108,12 +188,12 @@ class ResultFileUpload:
|
|
108
188
|
)
|
109
189
|
response.raise_for_status()
|
110
190
|
|
111
|
-
def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads):
|
191
|
+
def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads, resumable_upload_tracker=None):
|
112
192
|
if threads == 1:
|
113
193
|
logger.info(f"Uploading parts in series for {file_chunker.filepath}")
|
114
194
|
complete_parts = []
|
115
195
|
for num, url in enumerate(list(urls.values())):
|
116
|
-
response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
|
196
|
+
response_part = self._upload_one_part(file_chunker, url, num, max_retries, session, resumable_upload_tracker)
|
117
197
|
complete_parts.append(response_part)
|
118
198
|
if progress_tracker: progress_tracker.update(file_chunker.get_chunk_size(num))
|
119
199
|
logger.info(f'Uploaded part {num + 1} of {len(urls)} for "{file_chunker.filepath}"')
|
@@ -124,7 +204,7 @@ class ResultFileUpload:
|
|
124
204
|
futures = []
|
125
205
|
for num, url in enumerate(list(urls.values())):
|
126
206
|
future = executor.submit(
|
127
|
-
self._upload_one_part, file_chunker, url, num, max_retries, session
|
207
|
+
self._upload_one_part, file_chunker, url, num, max_retries, session, resumable_upload_tracker
|
128
208
|
)
|
129
209
|
futures.append(future)
|
130
210
|
complete_parts = []
|
@@ -148,23 +228,44 @@ class ResultFileUpload:
|
|
148
228
|
session=None,
|
149
229
|
progress_tracker=None,
|
150
230
|
threads=1,
|
231
|
+
use_cache=True,
|
151
232
|
):
|
152
233
|
"""Upload a file to S3 using the multipart upload process."""
|
153
234
|
logger.info(f"Uploading {filepath} to S3 using multipart upload.")
|
154
|
-
|
235
|
+
resumable_upload_tracker = None
|
236
|
+
if use_cache and file_size > 10 * FIVE_MB: # only use resumable upload tracker for larger files
|
237
|
+
resumable_upload_tracker = ResumableUploadTracker(filepath, chunk_size)
|
238
|
+
if resumable_upload_tracker and resumable_upload_tracker.upload_started:
|
239
|
+
upload_id, urls = resumable_upload_tracker.upload_id, resumable_upload_tracker.urls
|
240
|
+
logger.info(f'Resuming upload for "{filepath}", upload_id: "{upload_id}"')
|
241
|
+
else:
|
242
|
+
upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields)
|
243
|
+
if resumable_upload_tracker:
|
244
|
+
logger.info(f'Creating new resumable upload for "{filepath}", upload_id: "{upload_id}"')
|
245
|
+
resumable_upload_tracker.start_upload(upload_id, urls)
|
155
246
|
logger.info(f'Starting upload for "{filepath}"')
|
156
247
|
complete_parts = []
|
157
248
|
file_chunker = FileChunker(filepath, chunk_size).load_all_chunks()
|
158
249
|
if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
|
159
|
-
complete_parts = self._upload_parts(
|
250
|
+
complete_parts = self._upload_parts(
|
251
|
+
file_chunker,
|
252
|
+
urls,
|
253
|
+
max_retries,
|
254
|
+
session,
|
255
|
+
progress_tracker,
|
256
|
+
threads,
|
257
|
+
resumable_upload_tracker=resumable_upload_tracker
|
258
|
+
)
|
160
259
|
self._finish_multipart_upload(upload_id, complete_parts)
|
161
260
|
logger.info(f'Finished Upload for "{filepath}"')
|
162
261
|
return self
|
163
262
|
|
164
|
-
def upload_file(self, filepath, multipart_thresh=FIVE_MB, overwrite=True, **kwargs):
|
263
|
+
def upload_file(self, filepath, multipart_thresh=FIVE_MB, overwrite=True, no_new_versions=False, **kwargs):
|
165
264
|
if self.exists() and not overwrite:
|
166
265
|
raise GeoseeqGeneralError(f"Overwrite is set to False and file {self.uuid} already exists.")
|
167
266
|
self.idem()
|
267
|
+
if no_new_versions and self.has_downloadable_file():
|
268
|
+
raise GeoseeqGeneralError(f"File {self} already has a downloadable file. Not uploading a new version.")
|
168
269
|
resolved_path = Path(filepath).resolve()
|
169
270
|
file_size = getsize(resolved_path)
|
170
271
|
return self.multipart_upload_file(filepath, file_size, **kwargs)
|
@@ -53,6 +53,14 @@ class ResultFile(RemoteObject, ResultFileUpload, ResultFileDownload):
|
|
53
53
|
obj_type = "sample" if self.canon_url() == "sample_ar_fields" else "project"
|
54
54
|
brn = f"brn:{self.knex.instance_code()}:{obj_type}_result_field:{self.uuid}"
|
55
55
|
|
56
|
+
def has_downloadable_file(self):
|
57
|
+
"""Return True if this field has a downloadable file."""
|
58
|
+
try:
|
59
|
+
self.download(head=10, cache=False)
|
60
|
+
return True
|
61
|
+
except Exception as e:
|
62
|
+
return False
|
63
|
+
|
56
64
|
def nested_url(self):
|
57
65
|
escaped_name = urllib.parse.quote(self.name, safe="")
|
58
66
|
return self.parent.nested_url() + f"/fields/{escaped_name}"
|
@@ -19,12 +19,18 @@ def _make_in_process_logger(log_level):
|
|
19
19
|
|
20
20
|
|
21
21
|
def _upload_one_file(args):
|
22
|
-
result_file, filepath, session, progress_tracker,
|
22
|
+
(result_file, filepath, session, progress_tracker,
|
23
|
+
link_type, overwrite, log_level, parallel_uploads,
|
24
|
+
use_cache, no_new_versions) = args
|
23
25
|
if parallel_uploads:
|
24
26
|
_make_in_process_logger(log_level)
|
25
27
|
if link_type == 'upload':
|
26
28
|
# TODO: check checksums to see if the file is the same
|
27
|
-
result_file.upload_file(
|
29
|
+
result_file.upload_file(
|
30
|
+
filepath,
|
31
|
+
session=session, overwrite=overwrite, progress_tracker=progress_tracker,
|
32
|
+
threads=4, use_cache=use_cache, no_new_versions=no_new_versions
|
33
|
+
)
|
28
34
|
else:
|
29
35
|
result_file.link_file(link_type, filepath)
|
30
36
|
return result_file
|
@@ -38,7 +44,9 @@ class GeoSeeqUploadManager:
|
|
38
44
|
link_type='upload',
|
39
45
|
progress_tracker_factory=None,
|
40
46
|
log_level=logging.WARNING,
|
41
|
-
overwrite=True
|
47
|
+
overwrite=True,
|
48
|
+
no_new_versions=False,
|
49
|
+
use_cache=True):
|
42
50
|
self.session = session
|
43
51
|
self.n_parallel_uploads = n_parallel_uploads
|
44
52
|
self.progress_tracker_factory = progress_tracker_factory if progress_tracker_factory else lambda x: None
|
@@ -46,6 +54,8 @@ class GeoSeeqUploadManager:
|
|
46
54
|
self.link_type = link_type
|
47
55
|
self.overwrite = overwrite
|
48
56
|
self._result_files = []
|
57
|
+
self.no_new_versions = no_new_versions
|
58
|
+
self.use_cache = use_cache
|
49
59
|
|
50
60
|
def add_result_file(self, result_file, local_path):
|
51
61
|
self._result_files.append((result_file, local_path))
|
@@ -70,7 +80,7 @@ class GeoSeeqUploadManager:
|
|
70
80
|
result_file, local_path,
|
71
81
|
self.session, self.progress_tracker_factory(local_path),
|
72
82
|
self.link_type, self.overwrite, self.log_level,
|
73
|
-
self.n_parallel_uploads > 1
|
83
|
+
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions
|
74
84
|
) for result_file, local_path in self._result_files
|
75
85
|
]
|
76
86
|
out = []
|
@@ -5,7 +5,7 @@ import setuptools
|
|
5
5
|
|
6
6
|
setuptools.setup(
|
7
7
|
name='geoseeq',
|
8
|
-
version='0.5.6a7', # remember to update version string in CLI as well
|
8
|
+
version='0.5.6a7', # DEPRECATED see pyproject.toml remember to update version string in CLI as well
|
9
9
|
author="David C. Danko",
|
10
10
|
author_email='dcdanko@biotia.io',
|
11
11
|
description=open('README.md').read(),
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|