geoseeq 0.5.6a15__tar.gz → 0.6.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/PKG-INFO +1 -1
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/download.py +1 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/main.py +20 -2
- geoseeq-0.6.0/geoseeq/cli/project.py +96 -0
- geoseeq-0.6.0/geoseeq/cli/raw.py +59 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/upload/upload.py +5 -3
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/upload/upload_reads.py +1 -0
- geoseeq-0.6.0/geoseeq/result/file_chunker.py +50 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/result/file_download.py +2 -3
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/result/file_upload.py +63 -144
- geoseeq-0.6.0/geoseeq/result/resumable_upload_tracker.py +100 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/upload_download_manager.py +12 -4
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq.egg-info/PKG-INFO +1 -1
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq.egg-info/SOURCES.txt +4 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/pyproject.toml +1 -1
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/LICENSE +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/README.md +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/app.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/blob_constructors.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/bulk_creators.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/constants.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/copy.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/detail.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/fastq_utils.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/get_eula.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/manage.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/progress_bar.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/run.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/search.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/shared_params/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/shared_params/common_state.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/shared_params/config.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/shared_params/id_handlers.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/shared_params/obj_getters.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/upload/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/upload/upload_advanced.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/user.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/utils.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/cli/view.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/constants.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/contrib/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/contrib/ncbi/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/contrib/ncbi/api.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/contrib/ncbi/bioproject.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/contrib/ncbi/cli.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/file_system_cache.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/id_constructors/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/id_constructors/from_blobs.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/id_constructors/from_ids.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/id_constructors/from_names.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/id_constructors/from_uuids.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/id_constructors/resolvers.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/id_constructors/utils.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/knex.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/organization.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/pipeline.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/constants.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/highcharts.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/map/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/map/base_layer.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/map/map.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/map/overlay.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/plotting/selectable.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/project.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/remote_object.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/result/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/result/bioinfo.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/result/result_file.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/result/result_folder.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/result/utils.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/sample.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/search.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/user.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/utils.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/checksum.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/cli.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/clone.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/constants.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/vc_cache.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/vc_dir.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/vc_sample.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/vc/vc_stub.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq/work_orders.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq.egg-info/dependency_links.txt +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq.egg-info/entry_points.txt +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/geoseeq.egg-info/top_level.txt +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/setup.cfg +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/setup.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/tests/__init__.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/tests/test_api_client.py +0 -0
- {geoseeq-0.5.6a15 → geoseeq-0.6.0}/tests/test_plotting.py +0 -0
@@ -18,6 +18,7 @@ from .shared_params.opts_and_args import overwrite_option, yes_option
|
|
18
18
|
from .detail import cli_detail
|
19
19
|
from .run import cli_app
|
20
20
|
from .get_eula import cli_eula
|
21
|
+
from .project import cli_project
|
21
22
|
|
22
23
|
logger = logging.getLogger('geoseeq_api')
|
23
24
|
handler = logging.StreamHandler()
|
@@ -53,7 +54,7 @@ def version():
|
|
53
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
54
55
|
Run `geoseeq eula show` to view the EULA.
|
55
56
|
"""
|
56
|
-
click.echo('0.
|
57
|
+
click.echo('0.6.0') # remember to update setup
|
57
58
|
|
58
59
|
|
59
60
|
@main.group('advanced')
|
@@ -65,6 +66,7 @@ cli_advanced.add_command(cli_copy)
|
|
65
66
|
cli_advanced.add_command(cli_user)
|
66
67
|
cli_advanced.add_command(cli_detail)
|
67
68
|
cli_advanced.add_command(cli_upload_advanced)
|
69
|
+
cli_advanced.add_command(cli_project)
|
68
70
|
|
69
71
|
@cli_advanced.group('experimental')
|
70
72
|
def cli_experimental():
|
@@ -101,4 +103,20 @@ def cli_config(yes, api_token, endpoint, profile, overwrite):
|
|
101
103
|
click.echo('You must accept the EULA to use the GeoSeeq API.')
|
102
104
|
return
|
103
105
|
set_profile(api_token, endpoint=endpoint, profile=profile, overwrite=overwrite)
|
104
|
-
click.echo(f'Profile configured.')
|
106
|
+
click.echo(f'Profile configured.')
|
107
|
+
|
108
|
+
|
109
|
+
@main.command('clear-cache')
|
110
|
+
@yes_option
|
111
|
+
def cli_clear_cache(yes):
|
112
|
+
"""Clear the local cache.
|
113
|
+
|
114
|
+
---
|
115
|
+
|
116
|
+
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
117
|
+
Run `geoseeq eula show` to view the EULA.
|
118
|
+
"""
|
119
|
+
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
120
|
+
import shutil
|
121
|
+
if yes or click.confirm('Are you sure you want to clear the cache?'):
|
122
|
+
shutil.rmtree(GEOSEEQ_CACHE_DIR, ignore_errors=True)
|
@@ -0,0 +1,96 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from os import makedirs, getcwd
|
4
|
+
from os.path import dirname, join
|
5
|
+
|
6
|
+
import click
|
7
|
+
import pandas as pd
|
8
|
+
from multiprocessing import Pool
|
9
|
+
from .shared_params import (
|
10
|
+
handle_project_id,
|
11
|
+
handle_folder_id,
|
12
|
+
project_id_arg,
|
13
|
+
sample_ids_arg,
|
14
|
+
handle_multiple_sample_ids,
|
15
|
+
handle_multiple_result_file_ids,
|
16
|
+
use_common_state,
|
17
|
+
flatten_list_of_els_and_files,
|
18
|
+
yes_option,
|
19
|
+
module_option,
|
20
|
+
ignore_errors_option,
|
21
|
+
folder_ids_arg,
|
22
|
+
)
|
23
|
+
from geoseeq.result.file_download import download_url
|
24
|
+
from geoseeq.utils import download_ftp
|
25
|
+
from geoseeq.id_constructors import (
|
26
|
+
result_file_from_uuid,
|
27
|
+
result_file_from_name,
|
28
|
+
)
|
29
|
+
from geoseeq.knex import GeoseeqNotFoundError
|
30
|
+
from .progress_bar import PBarManager
|
31
|
+
from .utils import convert_size
|
32
|
+
from geoseeq.constants import FASTQ_MODULE_NAMES
|
33
|
+
from geoseeq.result import ResultFile
|
34
|
+
from geoseeq.upload_download_manager import GeoSeeqDownloadManager
|
35
|
+
from geoseeq.file_system.filesystem_download import (
|
36
|
+
ProjectOnFilesystem,
|
37
|
+
FILE_STATUS_MODIFIED_REMOTE,
|
38
|
+
FILE_STATUS_MODIFIED_LOCAL,
|
39
|
+
FILE_STATUS_NEW_LOCAL,
|
40
|
+
FILE_STATUS_NEW_REMOTE,
|
41
|
+
FILE_STATUS_IS_LOCAL_STUB,
|
42
|
+
)
|
43
|
+
|
44
|
+
|
45
|
+
logger = logging.getLogger('geoseeq_api')
|
46
|
+
|
47
|
+
|
48
|
+
@click.group("project")
|
49
|
+
def cli_project():
|
50
|
+
"""Download data from GeoSeeq."""
|
51
|
+
pass
|
52
|
+
|
53
|
+
|
54
|
+
@cli_project.command("clone")
|
55
|
+
@use_common_state
|
56
|
+
@click.option('--use-stubs/--full-files', default=True, help='Download full files or stubs')
|
57
|
+
@click.option('--target-dir', '-d', default=None, help='Directory to download the project to')
|
58
|
+
@project_id_arg
|
59
|
+
def cli_clone_project(state, use_stubs, target_dir, project_id):
|
60
|
+
"""Clone a project to the local filesystem.
|
61
|
+
"""
|
62
|
+
knex = state.get_knex().set_auth_required()
|
63
|
+
proj = handle_project_id(knex, project_id)
|
64
|
+
logger.info(f"Found project \"{proj.name}\"")
|
65
|
+
if target_dir is None:
|
66
|
+
target_dir = proj.name
|
67
|
+
|
68
|
+
project = ProjectOnFilesystem(proj, target_dir)
|
69
|
+
project.download(use_stubs=use_stubs)
|
70
|
+
|
71
|
+
|
72
|
+
@cli_project.command("status")
|
73
|
+
@use_common_state
|
74
|
+
def cli_project_status(state):
|
75
|
+
"""Check the status of a project on the local filesystem.
|
76
|
+
"""
|
77
|
+
project = ProjectOnFilesystem.from_path(getcwd(), recursive=True)
|
78
|
+
|
79
|
+
objs_by_status = {
|
80
|
+
FILE_STATUS_MODIFIED_LOCAL: [],
|
81
|
+
FILE_STATUS_MODIFIED_REMOTE: [],
|
82
|
+
FILE_STATUS_NEW_LOCAL: [],
|
83
|
+
FILE_STATUS_NEW_REMOTE: [],
|
84
|
+
FILE_STATUS_IS_LOCAL_STUB: [],
|
85
|
+
}
|
86
|
+
for obj_type, status, local_path, obj in project.list_abnormal_objects():
|
87
|
+
objs_by_status[status].append((obj_type, local_path, obj))
|
88
|
+
|
89
|
+
print(f"Project: {project.project.name}")
|
90
|
+
for status, objs in objs_by_status.items():
|
91
|
+
print(f"Status: {status}")
|
92
|
+
for obj_type, local_path, obj in objs:
|
93
|
+
if status in (FILE_STATUS_MODIFIED_LOCAL, FILE_STATUS_NEW_LOCAL):
|
94
|
+
print(f" {obj_type}: {project.path_from_project_root(local_path)} -> {obj}")
|
95
|
+
else:
|
96
|
+
print(f" {obj_type}: {obj} -> {project.path_from_project_root(local_path)}")
|
@@ -0,0 +1,59 @@
|
|
1
|
+
import click
|
2
|
+
import json
|
3
|
+
from .shared_params import use_common_state, overwrite_option
|
4
|
+
from geoseeq import GeoseeqNotFoundError
|
5
|
+
from geoseeq.blob_constructors import (
|
6
|
+
sample_result_file_from_uuid,
|
7
|
+
project_result_file_from_uuid,
|
8
|
+
sample_result_folder_from_uuid,
|
9
|
+
project_result_folder_from_uuid,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
@click.group('raw')
|
14
|
+
def cli_raw():
|
15
|
+
"""Low-level commands for interacting with the API."""
|
16
|
+
pass
|
17
|
+
|
18
|
+
|
19
|
+
@cli_raw.command('get-file-data')
|
20
|
+
@use_common_state
|
21
|
+
@click.argument('file_ids', nargs=-1)
|
22
|
+
def cli_get_file_data(state, file_ids):
|
23
|
+
"""Print the raw stored data in a result file object."""
|
24
|
+
knex = state.get_knex()
|
25
|
+
for file_id in file_ids:
|
26
|
+
file_id = file_id.split(':')[-1]
|
27
|
+
try:
|
28
|
+
result_file = sample_result_file_from_uuid(knex, file_id)
|
29
|
+
except GeoseeqNotFoundError:
|
30
|
+
result_file = project_result_file_from_uuid(knex, file_id)
|
31
|
+
print(json.dumps(result_file.stored_data, indent=2), file=state.outfile)
|
32
|
+
|
33
|
+
|
34
|
+
@cli_raw.command('create-raw-file')
|
35
|
+
@use_common_state
|
36
|
+
@overwrite_option
|
37
|
+
@click.argument('folder_id')
|
38
|
+
@click.argument('result_filename')
|
39
|
+
@click.argument('filename', type=click.File('r'))
|
40
|
+
def cli_get_file_data(state, overwrite, folder_id, result_filename, filename):
|
41
|
+
"""Print the raw stored data in a result file object."""
|
42
|
+
knex = state.get_knex()
|
43
|
+
|
44
|
+
folder_id = folder_id.split(':')[-1]
|
45
|
+
try:
|
46
|
+
result_folder = sample_result_folder_from_uuid(knex, folder_id)
|
47
|
+
except GeoseeqNotFoundError:
|
48
|
+
result_folder = project_result_folder_from_uuid(knex, folder_id)
|
49
|
+
blob = json.load(filename)
|
50
|
+
result_file = result_folder.result_file(result_filename)
|
51
|
+
if overwrite:
|
52
|
+
result_file.idem()
|
53
|
+
result_file.stored_data = blob
|
54
|
+
result_file.save()
|
55
|
+
else:
|
56
|
+
result_file.create()
|
57
|
+
click.echo(f'Created file {result_file.uuid}', file=state.outfile)
|
58
|
+
|
59
|
+
|
@@ -40,7 +40,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
40
40
|
@click.option('--cores', default=1, help='Number of uploads to run in parallel', show_default=True)
|
41
41
|
@click.option('--threads-per-upload', default=4, help='Number of threads used to upload each file', show_default=True)
|
42
42
|
@click.option('--num-retries', default=3, help='Number of times to retry a failed upload', show_default=True)
|
43
|
-
@click.option('--chunk-size-mb', default
|
43
|
+
@click.option('--chunk-size-mb', default=-1, help='Size of chunks to upload in MB', show_default=True)
|
44
44
|
@ignore_errors_option
|
45
45
|
@yes_option
|
46
46
|
@private_option
|
@@ -122,8 +122,9 @@ def cli_upload_file(state, cores, threads_per_upload, num_retries, chunk_size_mb
|
|
122
122
|
use_cache=state.use_cache,
|
123
123
|
num_retries=num_retries,
|
124
124
|
ignore_errors=ignore_errors,
|
125
|
-
|
126
|
-
|
125
|
+
use_atomic_upload=True,
|
126
|
+
session=None, #knex.new_session(),
|
127
|
+
chunk_size_mb=chunk_size_mb if chunk_size_mb > 0 else None,
|
127
128
|
)
|
128
129
|
for geoseeq_file_name, file_path in name_pairs:
|
129
130
|
if isfile(file_path):
|
@@ -160,6 +161,7 @@ def cli_upload_folder(state, cores, yes, private, recursive, hidden, no_new_vers
|
|
160
161
|
overwrite=True,
|
161
162
|
use_cache=state.use_cache,
|
162
163
|
no_new_versions=no_new_versions,
|
164
|
+
use_atomic_upload=True,
|
163
165
|
)
|
164
166
|
for folder_name in folder_names:
|
165
167
|
result_folder = root_obj.result_folder(folder_name).idem()
|
@@ -98,6 +98,7 @@ def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, no_new
|
|
98
98
|
progress_tracker_factory=PBarManager().get_new_bar,
|
99
99
|
use_cache=state.use_cache,
|
100
100
|
no_new_versions=no_new_versions,
|
101
|
+
use_atomic_upload=True,
|
101
102
|
)
|
102
103
|
for group in groups:
|
103
104
|
sample = lib.sample(group['sample_name']).idem()
|
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
from os.path import getsize
|
3
|
+
import logging
|
4
|
+
|
5
|
+
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
6
|
+
logger.addHandler(logging.NullHandler())
|
7
|
+
|
8
|
+
|
9
|
+
class FileChunker:
|
10
|
+
|
11
|
+
def __init__(self, filepath, chunk_size):
|
12
|
+
self.filepath = filepath
|
13
|
+
self.chunk_size = chunk_size
|
14
|
+
self.file_size = getsize(filepath)
|
15
|
+
self.n_parts = int(self.file_size / self.chunk_size) + 1
|
16
|
+
self.loaded_parts = []
|
17
|
+
|
18
|
+
def load_all_chunks(self):
|
19
|
+
if len(self.loaded_parts) != self.n_parts:
|
20
|
+
with open(self.filepath, "rb") as f:
|
21
|
+
f.seek(0)
|
22
|
+
for i in range(self.n_parts):
|
23
|
+
chunk = f.read(self.chunk_size)
|
24
|
+
self.loaded_parts.append(chunk)
|
25
|
+
return self # convenience for chaining
|
26
|
+
|
27
|
+
def chunk_is_preloaded(self, num):
|
28
|
+
return len(self.loaded_parts) > num and self.loaded_parts[num]
|
29
|
+
|
30
|
+
def read_one_chunk(self, num):
|
31
|
+
if not self.chunk_is_preloaded(num):
|
32
|
+
logger.debug(f"Reading chunk {num} from {self.filepath}")
|
33
|
+
with open(self.filepath, "rb") as f:
|
34
|
+
f.seek(num * self.chunk_size)
|
35
|
+
chunk = f.read(self.chunk_size)
|
36
|
+
return chunk
|
37
|
+
return self.loaded_parts[num]
|
38
|
+
|
39
|
+
def get_chunk(self, num):
|
40
|
+
if self.chunk_is_preloaded(num):
|
41
|
+
return self.loaded_parts[num]
|
42
|
+
return self.read_one_chunk(num)
|
43
|
+
|
44
|
+
def get_chunk_size(self, num):
|
45
|
+
if num < (self.n_parts - 1): # all but the last chunk
|
46
|
+
return self.chunk_size
|
47
|
+
if self.chunk_is_preloaded(num): # last chunk, pre-loaded
|
48
|
+
return len(self.loaded_parts[num])
|
49
|
+
return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
|
50
|
+
|
@@ -12,10 +12,10 @@ from geoseeq.constants import FIVE_MB
|
|
12
12
|
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
13
13
|
|
14
14
|
|
15
|
-
def _download_head(url, filename, head=None, progress_tracker=None):
|
15
|
+
def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
16
16
|
headers = None
|
17
17
|
if head and head > 0:
|
18
|
-
headers = {"Range": f"bytes=
|
18
|
+
headers = {"Range": f"bytes={start}-{head}"}
|
19
19
|
response = requests.get(url, stream=True, headers=headers)
|
20
20
|
response.raise_for_status()
|
21
21
|
total_size_in_bytes = int(response.headers.get('content-length', 0))
|
@@ -67,7 +67,6 @@ def download_url(url, kind='guess', filename=None, head=None, progress_tracker=N
|
|
67
67
|
raise ValueError(f"Unknown download kind: {kind}")
|
68
68
|
|
69
69
|
|
70
|
-
|
71
70
|
class ResultFileDownload:
|
72
71
|
"""Abstract class that handles download methods for result files."""
|
73
72
|
|
@@ -13,130 +13,21 @@ from geoseeq.utils import md5_checksum
|
|
13
13
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14
14
|
from .utils import *
|
15
15
|
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
def __init__(self, filepath, chunk_size):
|
20
|
-
self.filepath = filepath
|
21
|
-
self.chunk_size = chunk_size
|
22
|
-
self.file_size = getsize(filepath)
|
23
|
-
self.n_parts = int(self.file_size / self.chunk_size) + 1
|
24
|
-
self.loaded_parts = []
|
25
|
-
|
26
|
-
def load_all_chunks(self):
|
27
|
-
if len(self.loaded_parts) != self.n_parts:
|
28
|
-
with open(self.filepath, "rb") as f:
|
29
|
-
f.seek(0)
|
30
|
-
for i in range(self.n_parts):
|
31
|
-
chunk = f.read(self.chunk_size)
|
32
|
-
self.loaded_parts.append(chunk)
|
33
|
-
return self # convenience for chaining
|
34
|
-
|
35
|
-
def chunk_is_preloaded(self, num):
|
36
|
-
return len(self.loaded_parts) > num and self.loaded_parts[num]
|
37
|
-
|
38
|
-
def read_one_chunk(self, num):
|
39
|
-
if not self.chunk_is_preloaded(num):
|
40
|
-
logger.debug(f"Reading chunk {num} from {self.filepath}")
|
41
|
-
with open(self.filepath, "rb") as f:
|
42
|
-
f.seek(num * self.chunk_size)
|
43
|
-
chunk = f.read(self.chunk_size)
|
44
|
-
return chunk
|
45
|
-
return self.loaded_parts[num]
|
46
|
-
|
47
|
-
def get_chunk(self, num):
|
48
|
-
if self.chunk_is_preloaded(num):
|
49
|
-
return self.loaded_parts[num]
|
50
|
-
return self.read_one_chunk(num)
|
51
|
-
|
52
|
-
def get_chunk_size(self, num):
|
53
|
-
if num < (self.n_parts - 1): # all but the last chunk
|
54
|
-
return self.chunk_size
|
55
|
-
if self.chunk_is_preloaded(num): # last chunk, pre-loaded
|
56
|
-
return len(self.loaded_parts[num])
|
57
|
-
return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
|
58
|
-
|
59
|
-
|
60
|
-
class ResumableUploadTracker:
|
61
|
-
|
62
|
-
def __init__(self, filepath, chunk_size, tracker_file_prefix="gs_resumable_upload_tracker"):
|
63
|
-
self.open, self.upload_started = True, False
|
64
|
-
self.upload_id, self.urls = None, None
|
65
|
-
self.filepath = filepath
|
66
|
-
self.tracker_file = join(
|
67
|
-
GEOSEEQ_CACHE_DIR, 'upload',
|
68
|
-
tracker_file_prefix + f".{chunk_size}.{getsize(filepath)}." + basename(filepath)
|
69
|
-
)
|
70
|
-
try:
|
71
|
-
os.makedirs(dirname(self.tracker_file), exist_ok=True)
|
72
|
-
except Exception as e:
|
73
|
-
logger.warning(f'Could not create resumable upload tracker directory. {e}')
|
74
|
-
self.open = False
|
75
|
-
self._loaded_parts = {}
|
76
|
-
self._load_parts_from_file()
|
77
|
-
|
78
|
-
def start_upload(self, upload_id, urls):
|
79
|
-
if not self.open:
|
80
|
-
return
|
81
|
-
if self.upload_started:
|
82
|
-
raise GeoseeqGeneralError("Upload has already started.")
|
83
|
-
blob = dict(upload_id=upload_id, urls=urls, start_time=time.time())
|
84
|
-
serialized = json.dumps(blob)
|
85
|
-
with open(self.tracker_file, "w") as f:
|
86
|
-
f.write(serialized + "\n")
|
87
|
-
self.upload_id, self.urls = upload_id, urls
|
88
|
-
self.upload_started = True
|
89
|
-
|
90
|
-
def add_part(self, part_upload_info):
|
91
|
-
if not self.open:
|
92
|
-
return
|
93
|
-
part_id = part_upload_info["PartNumber"]
|
94
|
-
serialized = json.dumps(part_upload_info)
|
95
|
-
with open(self.tracker_file, "a") as f:
|
96
|
-
f.write(serialized + "\n")
|
97
|
-
self._loaded_parts[part_id] = part_upload_info
|
98
|
-
if len(self._loaded_parts) == len(self.urls):
|
99
|
-
self.cleanup()
|
100
|
-
self.open = False
|
101
|
-
|
102
|
-
def _load_parts_from_file(self):
|
103
|
-
if not isfile(self.tracker_file):
|
104
|
-
return
|
105
|
-
with open(self.tracker_file, "r") as f:
|
106
|
-
header_blob = json.loads(f.readline())
|
107
|
-
self.upload_id, self.urls = header_blob["upload_id"], header_blob["urls"]
|
108
|
-
start_time = header_blob["start_time"]
|
109
|
-
if (time.time() - start_time) > (60 * 60 * 23):
|
110
|
-
logger.warning(f"Tracker file {self.tracker_file} is too old. Deleting.")
|
111
|
-
os.remove(self.tracker_file)
|
112
|
-
return
|
113
|
-
self.upload_started = True
|
114
|
-
for line in f:
|
115
|
-
blob = json.loads(line)
|
116
|
-
part_id = blob["PartNumber"]
|
117
|
-
self._loaded_parts[part_id] = blob
|
118
|
-
|
119
|
-
def part_has_been_uploaded(self, part_number):
|
120
|
-
if not self.open:
|
121
|
-
return False
|
122
|
-
return part_number in self._loaded_parts
|
123
|
-
|
124
|
-
def get_part_info(self, part_number):
|
125
|
-
return self._loaded_parts[part_number]
|
126
|
-
|
127
|
-
def cleanup(self):
|
128
|
-
if not self.open:
|
129
|
-
return
|
130
|
-
try:
|
131
|
-
os.remove(self.tracker_file)
|
132
|
-
except FileNotFoundError:
|
133
|
-
pass
|
16
|
+
from .file_chunker import FileChunker
|
17
|
+
from .resumable_upload_tracker import ResumableUploadTracker
|
134
18
|
|
135
19
|
|
136
20
|
class ResultFileUpload:
|
137
21
|
"""Abstract class that handles upload methods for result files."""
|
138
22
|
|
139
|
-
def
|
23
|
+
def _result_type(self, atomic=False):
|
24
|
+
if self.is_sample_result:
|
25
|
+
return "sample"
|
26
|
+
if atomic:
|
27
|
+
return "project"
|
28
|
+
return "group"
|
29
|
+
|
30
|
+
def _create_multipart_upload(self, filepath, file_size, optional_fields, atomic=False):
|
140
31
|
optional_fields = optional_fields if optional_fields else {}
|
141
32
|
optional_fields.update(
|
142
33
|
{
|
@@ -147,23 +38,31 @@ class ResultFileUpload:
|
|
147
38
|
data = {
|
148
39
|
"filename": basename(filepath),
|
149
40
|
"optional_fields": optional_fields,
|
150
|
-
"result_type":
|
41
|
+
"result_type": self._result_type(atomic),
|
151
42
|
}
|
152
|
-
|
43
|
+
url = f"/ar_fields/{self.uuid}/create_upload"
|
44
|
+
if atomic:
|
45
|
+
data["fieldname"] = self.name
|
46
|
+
url = f"/ars/{self.parent.uuid}/create_atomic_upload"
|
47
|
+
response = self.knex.post(url, json=data)
|
153
48
|
return response
|
154
49
|
|
155
|
-
def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
|
50
|
+
def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields, atomic=False):
|
156
51
|
n_parts = int(file_size / chunk_size) + 1
|
157
|
-
response = self._create_multipart_upload(filepath, file_size, optional_fields)
|
52
|
+
response = self._create_multipart_upload(filepath, file_size, optional_fields, atomic=atomic)
|
158
53
|
upload_id = response["upload_id"]
|
159
|
-
parts = list(range(1, n_parts + 1))
|
160
54
|
data = {
|
161
|
-
"parts":
|
55
|
+
"parts": list(range(1, n_parts + 1)),
|
162
56
|
"stance": "upload-multipart",
|
163
57
|
"upload_id": upload_id,
|
164
|
-
"result_type":
|
58
|
+
"result_type": self._result_type(atomic),
|
165
59
|
}
|
166
|
-
|
60
|
+
url = f"/ar_fields/{self.uuid}/create_upload_urls"
|
61
|
+
if atomic:
|
62
|
+
data["uuid"] = response["uuid"]
|
63
|
+
data["fieldname"] = self.name
|
64
|
+
url = f"ars/{self.parent.uuid}/create_atomic_upload_urls"
|
65
|
+
response = self.knex.post(url, json=data)
|
167
66
|
urls = response
|
168
67
|
return upload_id, urls
|
169
68
|
|
@@ -175,6 +74,7 @@ class ResultFileUpload:
|
|
175
74
|
attempts = 0
|
176
75
|
while attempts < max_retries:
|
177
76
|
try:
|
77
|
+
# url = url.replace("s3.wasabisys.com", "s3.us-east-1.wasabisys.com")
|
178
78
|
logger.debug(f"Uploading part {num + 1} to {url}. Size: {len(file_chunk)} bytes.")
|
179
79
|
if session:
|
180
80
|
http_response = session.put(url, data=file_chunk)
|
@@ -192,7 +92,7 @@ class ResultFileUpload:
|
|
192
92
|
raise e
|
193
93
|
|
194
94
|
retry_time = min(8 ** attempts, 120) # exponential backoff, max 120s
|
195
|
-
retry_time *= 0.
|
95
|
+
retry_time *= 0.6 + (random() * 0.8) # randomize to avoid thundering herd
|
196
96
|
logger.debug(f"Retrying upload for part {num + 1} in {retry_time} seconds.")
|
197
97
|
time.sleep(retry_time)
|
198
98
|
|
@@ -203,16 +103,17 @@ class ResultFileUpload:
|
|
203
103
|
resumable_upload_tracker.add_part(blob)
|
204
104
|
return blob
|
205
105
|
|
206
|
-
def _finish_multipart_upload(self, upload_id, complete_parts):
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
106
|
+
def _finish_multipart_upload(self, upload_id, complete_parts, atomic=False):
|
107
|
+
data = {
|
108
|
+
"parts": complete_parts,
|
109
|
+
"upload_id": upload_id,
|
110
|
+
"result_type": self._result_type(atomic),
|
111
|
+
}
|
112
|
+
url = f"/ar_fields/{self.uuid}/complete_upload"
|
113
|
+
if atomic:
|
114
|
+
data["fieldname"] = self.name
|
115
|
+
url = f"/ars/{self.parent.uuid}/complete_atomic_upload"
|
116
|
+
response = self.knex.post(url, json=data, json_response=False)
|
216
117
|
response.raise_for_status()
|
217
118
|
|
218
119
|
def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads, resumable_upload_tracker=None):
|
@@ -250,26 +151,38 @@ class ResultFileUpload:
|
|
250
151
|
filepath,
|
251
152
|
file_size,
|
252
153
|
optional_fields=None,
|
253
|
-
chunk_size=
|
154
|
+
chunk_size=None,
|
254
155
|
max_retries=3,
|
255
156
|
session=None,
|
256
157
|
progress_tracker=None,
|
257
158
|
threads=1,
|
258
159
|
use_cache=True,
|
160
|
+
use_atomic_upload=False,
|
259
161
|
):
|
260
162
|
"""Upload a file to S3 using the multipart upload process."""
|
261
163
|
logger.info(f"Uploading {filepath} to S3 using multipart upload.")
|
164
|
+
if not chunk_size:
|
165
|
+
chunk_size = FIVE_MB
|
166
|
+
if file_size >= 10 * FIVE_MB:
|
167
|
+
chunk_size = 5 * FIVE_MB
|
168
|
+
logger.debug(f"Using chunk size of {chunk_size} bytes.")
|
262
169
|
resumable_upload_tracker = None
|
263
170
|
if use_cache and file_size > 10 * FIVE_MB: # only use resumable upload tracker for larger files
|
264
|
-
|
171
|
+
upload_target_uuid = self.parent.uuid if use_atomic_upload else self.uuid
|
172
|
+
resumable_upload_tracker = ResumableUploadTracker(filepath, chunk_size, upload_target_uuid)
|
173
|
+
|
265
174
|
if resumable_upload_tracker and resumable_upload_tracker.upload_started:
|
175
|
+
# a resumable upload for this file has already started
|
176
|
+
resumable_upload_exists_and_is_valid = True
|
266
177
|
upload_id, urls = resumable_upload_tracker.upload_id, resumable_upload_tracker.urls
|
178
|
+
use_atomic_upload = resumable_upload_tracker.is_atomic_upload
|
267
179
|
logger.info(f'Resuming upload for "{filepath}", upload_id: "{upload_id}"')
|
268
180
|
else:
|
269
|
-
upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields)
|
181
|
+
upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields, atomic=use_atomic_upload)
|
270
182
|
if resumable_upload_tracker:
|
271
183
|
logger.info(f'Creating new resumable upload for "{filepath}", upload_id: "{upload_id}"')
|
272
|
-
resumable_upload_tracker.start_upload(upload_id, urls)
|
184
|
+
resumable_upload_tracker.start_upload(upload_id, urls, is_atomic_upload=use_atomic_upload)
|
185
|
+
|
273
186
|
logger.info(f'Starting upload for "{filepath}"')
|
274
187
|
complete_parts = []
|
275
188
|
file_chunker = FileChunker(filepath, chunk_size)
|
@@ -288,14 +201,20 @@ class ResultFileUpload:
|
|
288
201
|
threads,
|
289
202
|
resumable_upload_tracker=resumable_upload_tracker
|
290
203
|
)
|
291
|
-
self._finish_multipart_upload(upload_id, complete_parts)
|
204
|
+
self._finish_multipart_upload(upload_id, complete_parts, atomic=use_atomic_upload)
|
292
205
|
logger.info(f'Finished Upload for "{filepath}"')
|
206
|
+
if use_atomic_upload:
|
207
|
+
# if this was an atomic upload then this result may not have existed on the server before
|
208
|
+
self.get()
|
293
209
|
return self
|
294
210
|
|
295
211
|
def upload_file(self, filepath, multipart_thresh=FIVE_MB, overwrite=True, no_new_versions=False, **kwargs):
|
296
212
|
if self.exists() and not overwrite:
|
297
213
|
raise GeoseeqGeneralError(f"Overwrite is set to False and file {self.uuid} already exists.")
|
298
|
-
|
214
|
+
if not kwargs.get("use_atomic_upload", False):
|
215
|
+
self.idem()
|
216
|
+
else:
|
217
|
+
self.parent.idem()
|
299
218
|
if no_new_versions and self.has_downloadable_file():
|
300
219
|
raise GeoseeqGeneralError(f"File {self} already has a downloadable file. Not uploading a new version.")
|
301
220
|
resolved_path = Path(filepath).resolve()
|
@@ -0,0 +1,100 @@
|
|
1
|
+
|
2
|
+
import time
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from os.path import basename, getsize, join, dirname, isfile, getctime
|
6
|
+
from pathlib import Path
|
7
|
+
from random import random
|
8
|
+
import requests
|
9
|
+
|
10
|
+
from geoseeq.knex import GeoseeqGeneralError
|
11
|
+
from geoseeq.constants import FIVE_MB
|
12
|
+
from geoseeq.utils import md5_checksum
|
13
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14
|
+
from .utils import *
|
15
|
+
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
16
|
+
from .file_chunker import FileChunker
|
17
|
+
|
18
|
+
|
19
|
+
class ResumableUploadTracker:
|
20
|
+
|
21
|
+
def __init__(self, filepath, chunk_size, upload_target_uuid, tracker_file_prefix="gs_resumable_upload_tracker"):
|
22
|
+
self.open, self.upload_started = True, False
|
23
|
+
self.upload_id, self.urls, self.is_atomic_upload = None, None, None
|
24
|
+
self.upload_target_uuid = upload_target_uuid
|
25
|
+
self.filepath = filepath
|
26
|
+
self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'upload')
|
27
|
+
self.tracker_file = join(
|
28
|
+
self.tracker_file_dir,
|
29
|
+
tracker_file_prefix + f".{upload_target_uuid}.{chunk_size}.{getsize(filepath)}." + basename(filepath)
|
30
|
+
)
|
31
|
+
try:
|
32
|
+
os.makedirs(self.tracker_file_dir, exist_ok=True)
|
33
|
+
except Exception as e:
|
34
|
+
logger.warning(f'Could not create resumable upload tracker directory. {e}')
|
35
|
+
self.open = False
|
36
|
+
self._loaded_parts = {}
|
37
|
+
self._load_parts_from_file()
|
38
|
+
|
39
|
+
def start_upload(self, upload_id, urls, is_atomic_upload=False):
|
40
|
+
if not self.open:
|
41
|
+
return
|
42
|
+
if self.upload_started:
|
43
|
+
raise GeoseeqGeneralError("Upload has already started.")
|
44
|
+
self.upload_started = True
|
45
|
+
blob = dict(upload_id=upload_id,
|
46
|
+
urls=urls,
|
47
|
+
is_atomic_upload=is_atomic_upload,
|
48
|
+
upload_target_uuid=self.upload_target_uuid,
|
49
|
+
start_time=time.time())
|
50
|
+
serialized = json.dumps(blob)
|
51
|
+
with open(self.tracker_file, "w") as f:
|
52
|
+
f.write(serialized + "\n")
|
53
|
+
self.upload_id, self.urls, self.is_atomic_upload = upload_id, urls, is_atomic_upload
|
54
|
+
|
55
|
+
def add_part(self, part_upload_info):
|
56
|
+
if not self.open:
|
57
|
+
return
|
58
|
+
part_id = part_upload_info["PartNumber"]
|
59
|
+
serialized = json.dumps(part_upload_info)
|
60
|
+
with open(self.tracker_file, "a") as f:
|
61
|
+
f.write(serialized + "\n")
|
62
|
+
self._loaded_parts[part_id] = part_upload_info
|
63
|
+
if len(self._loaded_parts) == len(self.urls):
|
64
|
+
self.cleanup()
|
65
|
+
self.open = False
|
66
|
+
|
67
|
+
def _load_parts_from_file(self):
|
68
|
+
if not isfile(self.tracker_file):
|
69
|
+
return
|
70
|
+
with open(self.tracker_file, "r") as f:
|
71
|
+
header_blob = json.loads(f.readline())
|
72
|
+
self.upload_id, self.urls, self.is_atomic_upload = (
|
73
|
+
header_blob["upload_id"], header_blob["urls"], header_blob["is_atomic_upload"]
|
74
|
+
)
|
75
|
+
start_time = header_blob["start_time"]
|
76
|
+
if (time.time() - start_time) > (60 * 60 * 23):
|
77
|
+
logger.warning(f"Tracker file {self.tracker_file} is too old. Deleting.")
|
78
|
+
os.remove(self.tracker_file)
|
79
|
+
return
|
80
|
+
self.upload_started = True
|
81
|
+
for line in f:
|
82
|
+
blob = json.loads(line)
|
83
|
+
part_id = blob["PartNumber"]
|
84
|
+
self._loaded_parts[part_id] = blob
|
85
|
+
|
86
|
+
def part_has_been_uploaded(self, part_number):
|
87
|
+
if not self.open:
|
88
|
+
return False
|
89
|
+
return part_number in self._loaded_parts
|
90
|
+
|
91
|
+
def get_part_info(self, part_number):
|
92
|
+
return self._loaded_parts[part_number]
|
93
|
+
|
94
|
+
def cleanup(self):
|
95
|
+
if not self.open:
|
96
|
+
return
|
97
|
+
try:
|
98
|
+
os.remove(self.tracker_file)
|
99
|
+
except FileNotFoundError:
|
100
|
+
pass
|
@@ -22,8 +22,8 @@ def _upload_one_file(args):
|
|
22
22
|
(result_file, filepath, session, progress_tracker,
|
23
23
|
link_type, overwrite, log_level, parallel_uploads,
|
24
24
|
use_cache, no_new_versions, threads_per_upload,
|
25
|
-
num_retries, ignore_errors, chunk_size_mb) = args
|
26
|
-
chunk_size = chunk_size_mb * 1024 * 1024
|
25
|
+
num_retries, ignore_errors, chunk_size_mb, use_atomic_upload) = args
|
26
|
+
chunk_size = chunk_size_mb * 1024 * 1024 if chunk_size_mb else None
|
27
27
|
if parallel_uploads:
|
28
28
|
_make_in_process_logger(log_level)
|
29
29
|
try:
|
@@ -34,6 +34,7 @@ def _upload_one_file(args):
|
|
34
34
|
session=session, overwrite=overwrite, progress_tracker=progress_tracker,
|
35
35
|
threads=threads_per_upload, use_cache=use_cache, chunk_size=chunk_size,
|
36
36
|
no_new_versions=no_new_versions, max_retries=num_retries,
|
37
|
+
use_atomic_upload=use_atomic_upload
|
37
38
|
)
|
38
39
|
else:
|
39
40
|
result_file.link_file(link_type, filepath)
|
@@ -59,6 +60,7 @@ class GeoSeeqUploadManager:
|
|
59
60
|
num_retries=3,
|
60
61
|
ignore_errors=False,
|
61
62
|
chunk_size_mb=5,
|
63
|
+
use_atomic_upload=True,
|
62
64
|
use_cache=True):
|
63
65
|
self.session = session
|
64
66
|
self.n_parallel_uploads = n_parallel_uploads
|
@@ -73,12 +75,18 @@ class GeoSeeqUploadManager:
|
|
73
75
|
self.num_retries = num_retries
|
74
76
|
self.ignore_errors = ignore_errors
|
75
77
|
self.chunk_size_mb = chunk_size_mb
|
78
|
+
self.use_atomic_upload = use_atomic_upload
|
76
79
|
|
77
80
|
def add_result_file(self, result_file, local_path):
|
78
81
|
self._result_files.append((result_file, local_path))
|
79
82
|
|
80
83
|
def add_local_file_to_result_folder(self, result_folder, local_path, geoseeq_file_name=None):
|
81
|
-
|
84
|
+
if not geoseeq_file_name:
|
85
|
+
if local_path.startswith("/"): # if local path is an absolute path use the basename
|
86
|
+
geoseeq_file_name = basename(local_path)
|
87
|
+
else:
|
88
|
+
# remove "./" and "../" from local path to get a geoseeq file name
|
89
|
+
geoseeq_file_name = local_path.replace("./", "").replace("../", "")
|
82
90
|
result_file = result_folder.result_file(geoseeq_file_name)
|
83
91
|
self.add_result_file(result_file, local_path)
|
84
92
|
|
@@ -99,7 +107,7 @@ class GeoSeeqUploadManager:
|
|
99
107
|
self.link_type, self.overwrite, self.log_level,
|
100
108
|
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions,
|
101
109
|
self.threads_per_upload, self.num_retries, self.ignore_errors,
|
102
|
-
self.chunk_size_mb,
|
110
|
+
self.chunk_size_mb, self.use_atomic_upload
|
103
111
|
) for result_file, local_path in self._result_files
|
104
112
|
]
|
105
113
|
out = []
|
@@ -34,6 +34,8 @@ geoseeq/cli/get_eula.py
|
|
34
34
|
geoseeq/cli/main.py
|
35
35
|
geoseeq/cli/manage.py
|
36
36
|
geoseeq/cli/progress_bar.py
|
37
|
+
geoseeq/cli/project.py
|
38
|
+
geoseeq/cli/raw.py
|
37
39
|
geoseeq/cli/run.py
|
38
40
|
geoseeq/cli/search.py
|
39
41
|
geoseeq/cli/user.py
|
@@ -72,10 +74,12 @@ geoseeq/plotting/map/map.py
|
|
72
74
|
geoseeq/plotting/map/overlay.py
|
73
75
|
geoseeq/result/__init__.py
|
74
76
|
geoseeq/result/bioinfo.py
|
77
|
+
geoseeq/result/file_chunker.py
|
75
78
|
geoseeq/result/file_download.py
|
76
79
|
geoseeq/result/file_upload.py
|
77
80
|
geoseeq/result/result_file.py
|
78
81
|
geoseeq/result/result_folder.py
|
82
|
+
geoseeq/result/resumable_upload_tracker.py
|
79
83
|
geoseeq/result/utils.py
|
80
84
|
geoseeq/vc/__init__.py
|
81
85
|
geoseeq/vc/checksum.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|