geoseeq 0.5.6a9__tar.gz → 0.5.6a11__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/PKG-INFO +1 -1
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/download.py +8 -3
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/main.py +1 -1
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/upload/upload.py +3 -1
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/upload/upload_reads.py +86 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/result/file_download.py +5 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/result/file_upload.py +39 -11
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/upload_download_manager.py +7 -3
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq.egg-info/PKG-INFO +1 -1
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/pyproject.toml +1 -1
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/LICENSE +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/README.md +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/app.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/blob_constructors.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/bulk_creators.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/constants.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/copy.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/detail.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/fastq_utils.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/get_eula.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/manage.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/progress_bar.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/run.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/search.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/shared_params/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/shared_params/common_state.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/shared_params/config.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/shared_params/id_handlers.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/shared_params/obj_getters.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/upload/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/upload/upload_advanced.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/user.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/utils.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/cli/view.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/constants.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/contrib/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/contrib/ncbi/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/contrib/ncbi/api.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/contrib/ncbi/bioproject.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/contrib/ncbi/cli.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/file_system_cache.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/id_constructors/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/id_constructors/from_blobs.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/id_constructors/from_ids.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/id_constructors/from_names.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/id_constructors/from_uuids.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/id_constructors/resolvers.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/id_constructors/utils.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/knex.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/organization.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/pipeline.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/constants.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/highcharts.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/map/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/map/base_layer.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/map/map.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/map/overlay.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/plotting/selectable.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/project.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/remote_object.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/result/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/result/bioinfo.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/result/result_file.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/result/result_folder.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/result/utils.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/sample.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/search.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/user.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/utils.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/checksum.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/cli.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/clone.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/constants.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/vc_cache.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/vc_dir.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/vc_sample.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/vc/vc_stub.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq/work_orders.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq.egg-info/SOURCES.txt +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq.egg-info/dependency_links.txt +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq.egg-info/entry_points.txt +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/geoseeq.egg-info/top_level.txt +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/setup.cfg +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/setup.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/tests/__init__.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/tests/test_api_client.py +0 -0
- {geoseeq-0.5.6a9 → geoseeq-0.5.6a11}/tests/test_plotting.py +0 -0
@@ -97,13 +97,14 @@ def cli_download_metadata(state, sample_ids):
|
|
97
97
|
|
98
98
|
|
99
99
|
cores_option = click.option('--cores', default=1, help='Number of downloads to run in parallel')
|
100
|
-
|
100
|
+
head_option = click.option('--head', default=None, type=int, help='Download the first N bytes of each file')
|
101
101
|
|
102
102
|
@cli_download.command("files")
|
103
103
|
@use_common_state
|
104
104
|
@cores_option
|
105
105
|
@click.option("--target-dir", default=".")
|
106
106
|
@yes_option
|
107
|
+
@head_option
|
107
108
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
108
109
|
@click.option("--folder-type", type=click.Choice(['all', 'sample', 'project'], case_sensitive=False), default="all", help='Download files from sample folders, project folders, or both')
|
109
110
|
@click.option("--folder-name", multiple=True, help='Filter folders for names that include this string. Case insensitive.')
|
@@ -120,6 +121,7 @@ def cli_download_files(
|
|
120
121
|
sample_name_includes,
|
121
122
|
target_dir,
|
122
123
|
yes,
|
124
|
+
head,
|
123
125
|
folder_type,
|
124
126
|
folder_name,
|
125
127
|
file_name,
|
@@ -213,6 +215,7 @@ def cli_download_files(
|
|
213
215
|
ignore_errors=ignore_errors,
|
214
216
|
log_level=state.log_level,
|
215
217
|
progress_tracker_factory=PBarManager().get_new_bar,
|
218
|
+
head=head,
|
216
219
|
)
|
217
220
|
for fname, url in response["links"].items():
|
218
221
|
download_manager.add_download(url, join(target_dir, fname))
|
@@ -230,11 +233,12 @@ def cli_download_files(
|
|
230
233
|
@cores_option
|
231
234
|
@click.option("-t", "--target-dir", default=".")
|
232
235
|
@yes_option
|
236
|
+
@head_option
|
233
237
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
234
238
|
@ignore_errors_option
|
235
239
|
@click.option('--hidden/--no-hidden', default=True, help='Download hidden files in folder')
|
236
240
|
@folder_ids_arg
|
237
|
-
def cli_download_folders(state, cores, target_dir, yes, download, ignore_errors, hidden, folder_ids):
|
241
|
+
def cli_download_folders(state, cores, target_dir, yes, head, download, ignore_errors, hidden, folder_ids):
|
238
242
|
"""Download entire folders from GeoSeeq.
|
239
243
|
|
240
244
|
This command downloads folders directly based on their ID. This is used for "manual"
|
@@ -267,6 +271,7 @@ def cli_download_folders(state, cores, target_dir, yes, download, ignore_errors,
|
|
267
271
|
ignore_errors=ignore_errors,
|
268
272
|
log_level=state.log_level,
|
269
273
|
progress_tracker_factory=PBarManager().get_new_bar,
|
274
|
+
head=head,
|
270
275
|
)
|
271
276
|
for result_folder in result_folders:
|
272
277
|
download_manager.add_result_folder_download(
|
@@ -286,7 +291,7 @@ def cli_download_folders(state, cores, target_dir, yes, download, ignore_errors,
|
|
286
291
|
@click.option("-n", "--file-name", multiple=True, help="File name to use for downloaded files. If set you must specify once per ID.")
|
287
292
|
@yes_option
|
288
293
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
289
|
-
@
|
294
|
+
@head_option
|
290
295
|
@ignore_errors_option
|
291
296
|
@click.argument("ids", nargs=-1)
|
292
297
|
def cli_download_ids(state, cores, target_dir, file_name, yes, download, head, ignore_errors, ids):
|
@@ -53,7 +53,7 @@ def version():
|
|
53
53
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
54
54
|
Run `geoseeq eula show` to view the EULA.
|
55
55
|
"""
|
56
|
-
click.echo('0.5.
|
56
|
+
click.echo('0.5.6a11') # remember to update setup
|
57
57
|
|
58
58
|
|
59
59
|
@main.group('advanced')
|
@@ -37,6 +37,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
37
37
|
@click.command('files')
|
38
38
|
@use_common_state
|
39
39
|
@click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
40
|
+
@click.option('--threads-per-upload', default=4, help='Number of threads used to upload each file')
|
40
41
|
@yes_option
|
41
42
|
@private_option
|
42
43
|
@link_option
|
@@ -47,7 +48,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
47
48
|
help='Specify a different name for the file on GeoSeeq than the local file name.')
|
48
49
|
@folder_id_arg
|
49
50
|
@click.argument('file_paths', type=click.Path(exists=True), nargs=-1)
|
50
|
-
def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
|
51
|
+
def cli_upload_file(state, cores, threads_per_upload, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
|
51
52
|
"""Upload files to GeoSeeq.
|
52
53
|
|
53
54
|
This command uploads files to either a sample or project on GeoSeeq. It can be used to upload
|
@@ -106,6 +107,7 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no
|
|
106
107
|
|
107
108
|
upload_manager = GeoSeeqUploadManager(
|
108
109
|
n_parallel_uploads=cores,
|
110
|
+
threads_per_upload=threads_per_upload,
|
109
111
|
link_type=link_type,
|
110
112
|
progress_tracker_factory=PBarManager().get_new_bar,
|
111
113
|
log_level=state.log_level,
|
@@ -132,6 +132,28 @@ def flatten_list_of_fastqs(filepaths):
|
|
132
132
|
return flattened
|
133
133
|
|
134
134
|
|
135
|
+
def _is_bam(path):
|
136
|
+
for ext in ['.bam', '.bai']:
|
137
|
+
if path.endswith(ext):
|
138
|
+
return True
|
139
|
+
return False
|
140
|
+
|
141
|
+
|
142
|
+
def flatten_list_of_bams(filepaths):
|
143
|
+
"""Turn a list of bam filepaths and txt files containing bam filepaths into a single list of bam filepaths."""
|
144
|
+
flattened = []
|
145
|
+
for path in filepaths:
|
146
|
+
if _is_bam(path):
|
147
|
+
flattened.append(path)
|
148
|
+
else:
|
149
|
+
with open(path) as f:
|
150
|
+
for line in f:
|
151
|
+
line = line.strip()
|
152
|
+
if line and not line.startswith('#'):
|
153
|
+
flattened.append(line)
|
154
|
+
return flattened
|
155
|
+
|
156
|
+
|
135
157
|
|
136
158
|
@click.command('reads')
|
137
159
|
@use_common_state
|
@@ -200,3 +222,67 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
|
|
200
222
|
regex = _get_regex(knex, filepaths, module_name, proj, regex)
|
201
223
|
groups = _group_files(knex, filepaths, module_name, regex, yes)
|
202
224
|
_do_upload(groups, module_name, link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
|
225
|
+
|
226
|
+
|
227
|
+
# @click.command('bam')
|
228
|
+
# @use_common_state
|
229
|
+
# @click.option('--genome', default=None, help='The genome aligned to the BAM files. Should be in 2bit format.')
|
230
|
+
# @click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
231
|
+
# @overwrite_option
|
232
|
+
# @yes_option
|
233
|
+
# @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
|
234
|
+
# @private_option
|
235
|
+
# @link_option
|
236
|
+
# @no_new_versions_option
|
237
|
+
# @project_id_arg
|
238
|
+
# @click.argument('files', type=click.Path(exists=True), nargs=-1)
|
239
|
+
# def cli_upload_bams(state, genome, cores, overwrite, yes, regex, private, link_type, no_new_versions, project_id, files):
|
240
|
+
"""Upload BAM files to GeoSeeq.
|
241
|
+
|
242
|
+
This command automatically groups bams with their index files.
|
243
|
+
|
244
|
+
---
|
245
|
+
|
246
|
+
Example Usage:
|
247
|
+
|
248
|
+
\b
|
249
|
+
# Upload a list of BAM files to a project, useful if you have hundreds of files
|
250
|
+
$ ls -1 path/to/bam/files/*.bam > file_list.txt
|
251
|
+
$ geoseeq upload bams "GeoSeeq/Example CLI Project" file_list.txt
|
252
|
+
|
253
|
+
\b
|
254
|
+
# Upload all the BAM files in a directory to a project with BAM indexes
|
255
|
+
$ geoseeq upload bams ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam path/to/bam/files/*.bam.bai
|
256
|
+
|
257
|
+
\b
|
258
|
+
# Upload all the BAM files in a directory to a project, performing 4 uploads in parallel
|
259
|
+
$ geoseeq upload bams --cores 4 ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam
|
260
|
+
|
261
|
+
\b
|
262
|
+
# Upload a list of BAM files to a project, automatically creating a new project and overwriting existing files
|
263
|
+
$ ls -1 path/to/bam/files/*.bam > file_list.txt
|
264
|
+
$ geoseeq upload bams --yes --overwrite "GeoSeeq/Example CLI Project" file_list.txt
|
265
|
+
|
266
|
+
---
|
267
|
+
|
268
|
+
Command Arguments:
|
269
|
+
|
270
|
+
[PROJECT_ID] Can be a project UUID, GeoSeeq Resource Number (GRN), or an
|
271
|
+
organization name and project name separated by a slash.
|
272
|
+
|
273
|
+
\b
|
274
|
+
Examples:
|
275
|
+
- Name pair: "GeoSeeq/Example CLI Project"
|
276
|
+
- UUID: "ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
|
277
|
+
- GRN: "grn:gs1:project:ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
|
278
|
+
|
279
|
+
\b
|
280
|
+
[FILES...] can be paths to BAM files or a file containing a list of paths, or a mix of both.
|
281
|
+
Example: "path/to/bam/files
|
282
|
+
"""
|
283
|
+
knex = state.get_knex()
|
284
|
+
proj = handle_project_id(knex, project_id, yes, private)
|
285
|
+
filepaths = {basename(line): line for line in flatten_list_of_bams(files)}
|
286
|
+
click.echo(f'Found {len(filepaths)} files to upload.', err=True)
|
287
|
+
groups = _group_files(knex, filepaths, 'bam::bam', regex, yes)
|
288
|
+
_do_upload(groups, 'bam::bam', link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
|
@@ -46,6 +46,11 @@ def guess_download_kind(url):
|
|
46
46
|
|
47
47
|
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
48
48
|
"""Return a local filepath to the downloaded file. Download the file."""
|
49
|
+
if filename and isfile(filename):
|
50
|
+
file_size = getsize(filename)
|
51
|
+
if file_size > 0:
|
52
|
+
logger.info(f"File already exists: {filename}. Not overwriting.")
|
53
|
+
return filename
|
49
54
|
if kind == 'guess':
|
50
55
|
kind = guess_download_kind(url)
|
51
56
|
logger.info(f"Guessed download kind: {kind} for {url}")
|
@@ -2,7 +2,7 @@
|
|
2
2
|
import time
|
3
3
|
import json
|
4
4
|
import os
|
5
|
-
from os.path import basename, getsize, join, dirname, isfile
|
5
|
+
from os.path import basename, getsize, join, dirname, isfile, getctime
|
6
6
|
from pathlib import Path
|
7
7
|
|
8
8
|
import requests
|
@@ -31,14 +31,30 @@ class FileChunker:
|
|
31
31
|
chunk = f.read(self.chunk_size)
|
32
32
|
self.loaded_parts.append(chunk)
|
33
33
|
return self # convenience for chaining
|
34
|
+
|
35
|
+
def chunk_is_preloaded(self, num):
|
36
|
+
return len(self.loaded_parts) > num and self.loaded_parts[num]
|
37
|
+
|
38
|
+
def read_one_chunk(self, num):
|
39
|
+
if not self.chunk_is_preloaded(num):
|
40
|
+
logger.debug(f"Reading chunk {num} from {self.filepath}")
|
41
|
+
with open(self.filepath, "rb") as f:
|
42
|
+
f.seek(num * self.chunk_size)
|
43
|
+
chunk = f.read(self.chunk_size)
|
44
|
+
return chunk
|
45
|
+
return self.loaded_parts[num]
|
34
46
|
|
35
47
|
def get_chunk(self, num):
|
36
|
-
self.
|
37
|
-
|
48
|
+
if self.chunk_is_preloaded(num):
|
49
|
+
return self.loaded_parts[num]
|
50
|
+
return self.read_one_chunk(num)
|
38
51
|
|
39
52
|
def get_chunk_size(self, num):
|
40
|
-
self.
|
41
|
-
|
53
|
+
if num < (self.n_parts - 1): # all but the last chunk
|
54
|
+
return self.chunk_size
|
55
|
+
if self.chunk_is_preloaded(num): # last chunk, pre-loaded
|
56
|
+
return len(self.loaded_parts[num])
|
57
|
+
return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
|
42
58
|
|
43
59
|
|
44
60
|
class ResumableUploadTracker:
|
@@ -49,7 +65,7 @@ class ResumableUploadTracker:
|
|
49
65
|
self.filepath = filepath
|
50
66
|
self.tracker_file = join(
|
51
67
|
GEOSEEQ_CACHE_DIR, 'upload',
|
52
|
-
tracker_file_prefix + f".{chunk_size}." + basename(filepath)
|
68
|
+
tracker_file_prefix + f".{chunk_size}.{getsize(filepath)}." + basename(filepath)
|
53
69
|
)
|
54
70
|
try:
|
55
71
|
os.makedirs(dirname(self.tracker_file), exist_ok=True)
|
@@ -64,7 +80,7 @@ class ResumableUploadTracker:
|
|
64
80
|
return
|
65
81
|
if self.upload_started:
|
66
82
|
raise GeoseeqGeneralError("Upload has already started.")
|
67
|
-
blob = dict(upload_id=upload_id, urls=urls)
|
83
|
+
blob = dict(upload_id=upload_id, urls=urls, start_time=time.time())
|
68
84
|
serialized = json.dumps(blob)
|
69
85
|
with open(self.tracker_file, "w") as f:
|
70
86
|
f.write(serialized + "\n")
|
@@ -89,6 +105,11 @@ class ResumableUploadTracker:
|
|
89
105
|
with open(self.tracker_file, "r") as f:
|
90
106
|
header_blob = json.loads(f.readline())
|
91
107
|
self.upload_id, self.urls = header_blob["upload_id"], header_blob["urls"]
|
108
|
+
start_time = header_blob["start_time"]
|
109
|
+
if (time.time() - start_time) > (60 * 60 * 23):
|
110
|
+
logger.warning(f"Tracker file {self.tracker_file} is too old. Deleting.")
|
111
|
+
os.remove(self.tracker_file)
|
112
|
+
return
|
92
113
|
self.upload_started = True
|
93
114
|
for line in f:
|
94
115
|
blob = json.loads(line)
|
@@ -154,6 +175,7 @@ class ResultFileUpload:
|
|
154
175
|
attempts = 0
|
155
176
|
while attempts < max_retries:
|
156
177
|
try:
|
178
|
+
logger.debug(f"Uploading part {num + 1} to {url}. Size: {len(file_chunk)} bytes.")
|
157
179
|
if session:
|
158
180
|
http_response = session.put(url, data=file_chunk)
|
159
181
|
else:
|
@@ -161,14 +183,15 @@ class ResultFileUpload:
|
|
161
183
|
http_response.raise_for_status()
|
162
184
|
logger.debug(f"Upload for part {num + 1} succeeded.")
|
163
185
|
break
|
164
|
-
except requests.exceptions.HTTPError:
|
165
|
-
logger.
|
186
|
+
except (requests.exceptions.HTTPError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e:
|
187
|
+
logger.debug(
|
166
188
|
f"Upload for part {num + 1} failed. Attempt {attempts + 1} of {max_retries}."
|
167
189
|
)
|
168
190
|
attempts += 1
|
169
191
|
if attempts == max_retries:
|
170
|
-
raise
|
192
|
+
raise e
|
171
193
|
time.sleep(10**attempts) # exponential backoff, (10 ** 2)s default max
|
194
|
+
|
172
195
|
etag = http_response.headers["ETag"].replace('"', "")
|
173
196
|
blob = {"ETag": etag, "PartNumber": num + 1}
|
174
197
|
if resumable_upload_tracker:
|
@@ -245,7 +268,12 @@ class ResultFileUpload:
|
|
245
268
|
resumable_upload_tracker.start_upload(upload_id, urls)
|
246
269
|
logger.info(f'Starting upload for "{filepath}"')
|
247
270
|
complete_parts = []
|
248
|
-
file_chunker = FileChunker(filepath, chunk_size)
|
271
|
+
file_chunker = FileChunker(filepath, chunk_size)
|
272
|
+
if file_chunker.file_size < 10 * FIVE_MB:
|
273
|
+
file_chunker.load_all_chunks()
|
274
|
+
logger.debug(f"Preloaded all chunks for {filepath}")
|
275
|
+
else:
|
276
|
+
logger.debug(f"Did not preload chunks for {filepath}")
|
249
277
|
if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
|
250
278
|
complete_parts = self._upload_parts(
|
251
279
|
file_chunker,
|
@@ -21,7 +21,7 @@ def _make_in_process_logger(log_level):
|
|
21
21
|
def _upload_one_file(args):
|
22
22
|
(result_file, filepath, session, progress_tracker,
|
23
23
|
link_type, overwrite, log_level, parallel_uploads,
|
24
|
-
use_cache, no_new_versions) = args
|
24
|
+
use_cache, no_new_versions, threads_per_upload) = args
|
25
25
|
if parallel_uploads:
|
26
26
|
_make_in_process_logger(log_level)
|
27
27
|
if link_type == 'upload':
|
@@ -29,7 +29,8 @@ def _upload_one_file(args):
|
|
29
29
|
result_file.upload_file(
|
30
30
|
filepath,
|
31
31
|
session=session, overwrite=overwrite, progress_tracker=progress_tracker,
|
32
|
-
threads=
|
32
|
+
threads=threads_per_upload, use_cache=use_cache,
|
33
|
+
no_new_versions=no_new_versions
|
33
34
|
)
|
34
35
|
else:
|
35
36
|
result_file.link_file(link_type, filepath)
|
@@ -40,6 +41,7 @@ class GeoSeeqUploadManager:
|
|
40
41
|
|
41
42
|
def __init__(self,
|
42
43
|
n_parallel_uploads=1,
|
44
|
+
threads_per_upload=4,
|
43
45
|
session=None,
|
44
46
|
link_type='upload',
|
45
47
|
progress_tracker_factory=None,
|
@@ -56,6 +58,7 @@ class GeoSeeqUploadManager:
|
|
56
58
|
self._result_files = []
|
57
59
|
self.no_new_versions = no_new_versions
|
58
60
|
self.use_cache = use_cache
|
61
|
+
self.threads_per_upload = threads_per_upload
|
59
62
|
|
60
63
|
def add_result_file(self, result_file, local_path):
|
61
64
|
self._result_files.append((result_file, local_path))
|
@@ -80,7 +83,8 @@ class GeoSeeqUploadManager:
|
|
80
83
|
result_file, local_path,
|
81
84
|
self.session, self.progress_tracker_factory(local_path),
|
82
85
|
self.link_type, self.overwrite, self.log_level,
|
83
|
-
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions
|
86
|
+
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions,
|
87
|
+
self.threads_per_upload
|
84
88
|
) for result_file, local_path in self._result_files
|
85
89
|
]
|
86
90
|
out = []
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|