geoseeq 0.5.6a10__py3-none-any.whl → 0.5.6a11__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- geoseeq/cli/main.py +1 -1
- geoseeq/cli/upload/upload.py +3 -1
- geoseeq/cli/upload/upload_reads.py +86 -0
- geoseeq/result/file_download.py +5 -0
- geoseeq/result/file_upload.py +27 -5
- geoseeq/upload_download_manager.py +7 -3
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/METADATA +1 -1
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/RECORD +12 -12
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/LICENSE +0 -0
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/WHEEL +0 -0
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/top_level.txt +0 -0
geoseeq/cli/main.py
CHANGED
@@ -53,7 +53,7 @@ def version():
|
|
53
53
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
54
54
|
Run `geoseeq eula show` to view the EULA.
|
55
55
|
"""
|
56
|
-
click.echo('0.5.
|
56
|
+
click.echo('0.5.6a11') # remember to update setup
|
57
57
|
|
58
58
|
|
59
59
|
@main.group('advanced')
|
geoseeq/cli/upload/upload.py
CHANGED
@@ -37,6 +37,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
37
37
|
@click.command('files')
|
38
38
|
@use_common_state
|
39
39
|
@click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
40
|
+
@click.option('--threads-per-upload', default=4, help='Number of threads used to upload each file')
|
40
41
|
@yes_option
|
41
42
|
@private_option
|
42
43
|
@link_option
|
@@ -47,7 +48,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
47
48
|
help='Specify a different name for the file on GeoSeeq than the local file name.')
|
48
49
|
@folder_id_arg
|
49
50
|
@click.argument('file_paths', type=click.Path(exists=True), nargs=-1)
|
50
|
-
def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
|
51
|
+
def cli_upload_file(state, cores, threads_per_upload, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
|
51
52
|
"""Upload files to GeoSeeq.
|
52
53
|
|
53
54
|
This command uploads files to either a sample or project on GeoSeeq. It can be used to upload
|
@@ -106,6 +107,7 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no
|
|
106
107
|
|
107
108
|
upload_manager = GeoSeeqUploadManager(
|
108
109
|
n_parallel_uploads=cores,
|
110
|
+
threads_per_upload=threads_per_upload,
|
109
111
|
link_type=link_type,
|
110
112
|
progress_tracker_factory=PBarManager().get_new_bar,
|
111
113
|
log_level=state.log_level,
|
@@ -132,6 +132,28 @@ def flatten_list_of_fastqs(filepaths):
|
|
132
132
|
return flattened
|
133
133
|
|
134
134
|
|
135
|
+
def _is_bam(path):
|
136
|
+
for ext in ['.bam', '.bai']:
|
137
|
+
if path.endswith(ext):
|
138
|
+
return True
|
139
|
+
return False
|
140
|
+
|
141
|
+
|
142
|
+
def flatten_list_of_bams(filepaths):
|
143
|
+
"""Turn a list of bam filepaths and txt files containing bam filepaths into a single list of bam filepaths."""
|
144
|
+
flattened = []
|
145
|
+
for path in filepaths:
|
146
|
+
if _is_bam(path):
|
147
|
+
flattened.append(path)
|
148
|
+
else:
|
149
|
+
with open(path) as f:
|
150
|
+
for line in f:
|
151
|
+
line = line.strip()
|
152
|
+
if line and not line.startswith('#'):
|
153
|
+
flattened.append(line)
|
154
|
+
return flattened
|
155
|
+
|
156
|
+
|
135
157
|
|
136
158
|
@click.command('reads')
|
137
159
|
@use_common_state
|
@@ -200,3 +222,67 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
|
|
200
222
|
regex = _get_regex(knex, filepaths, module_name, proj, regex)
|
201
223
|
groups = _group_files(knex, filepaths, module_name, regex, yes)
|
202
224
|
_do_upload(groups, module_name, link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
|
225
|
+
|
226
|
+
|
227
|
+
# @click.command('bam')
|
228
|
+
# @use_common_state
|
229
|
+
# @click.option('--genome', default=None, help='The genome aligned to the BAM files. Should be in 2bit format.')
|
230
|
+
# @click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
231
|
+
# @overwrite_option
|
232
|
+
# @yes_option
|
233
|
+
# @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
|
234
|
+
# @private_option
|
235
|
+
# @link_option
|
236
|
+
# @no_new_versions_option
|
237
|
+
# @project_id_arg
|
238
|
+
# @click.argument('files', type=click.Path(exists=True), nargs=-1)
|
239
|
+
# def cli_upload_bams(state, genome, cores, overwrite, yes, regex, private, link_type, no_new_versions, project_id, files):
|
240
|
+
"""Upload BAM files to GeoSeeq.
|
241
|
+
|
242
|
+
This command automatically groups bams with their index files.
|
243
|
+
|
244
|
+
---
|
245
|
+
|
246
|
+
Example Usage:
|
247
|
+
|
248
|
+
\b
|
249
|
+
# Upload a list of BAM files to a project, useful if you have hundreds of files
|
250
|
+
$ ls -1 path/to/bam/files/*.bam > file_list.txt
|
251
|
+
$ geoseeq upload bams "GeoSeeq/Example CLI Project" file_list.txt
|
252
|
+
|
253
|
+
\b
|
254
|
+
# Upload all the BAM files in a directory to a project with BAM indexes
|
255
|
+
$ geoseeq upload bams ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam path/to/bam/files/*.bam.bai
|
256
|
+
|
257
|
+
\b
|
258
|
+
# Upload all the BAM files in a directory to a project, performing 4 uploads in parallel
|
259
|
+
$ geoseeq upload bams --cores 4 ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam
|
260
|
+
|
261
|
+
\b
|
262
|
+
# Upload a list of BAM files to a project, automatically creating a new project and overwriting existing files
|
263
|
+
$ ls -1 path/to/bam/files/*.bam > file_list.txt
|
264
|
+
$ geoseeq upload bams --yes --overwrite "GeoSeeq/Example CLI Project" file_list.txt
|
265
|
+
|
266
|
+
---
|
267
|
+
|
268
|
+
Command Arguments:
|
269
|
+
|
270
|
+
[PROJECT_ID] Can be a project UUID, GeoSeeq Resource Number (GRN), or an
|
271
|
+
organization name and project name separated by a slash.
|
272
|
+
|
273
|
+
\b
|
274
|
+
Examples:
|
275
|
+
- Name pair: "GeoSeeq/Example CLI Project"
|
276
|
+
- UUID: "ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
|
277
|
+
- GRN: "grn:gs1:project:ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
|
278
|
+
|
279
|
+
\b
|
280
|
+
[FILES...] can be paths to BAM files or a file containing a list of paths, or a mix of both.
|
281
|
+
Example: "path/to/bam/files
|
282
|
+
"""
|
283
|
+
knex = state.get_knex()
|
284
|
+
proj = handle_project_id(knex, project_id, yes, private)
|
285
|
+
filepaths = {basename(line): line for line in flatten_list_of_bams(files)}
|
286
|
+
click.echo(f'Found {len(filepaths)} files to upload.', err=True)
|
287
|
+
groups = _group_files(knex, filepaths, 'bam::bam', regex, yes)
|
288
|
+
_do_upload(groups, 'bam::bam', link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
|
geoseeq/result/file_download.py
CHANGED
@@ -46,6 +46,11 @@ def guess_download_kind(url):
|
|
46
46
|
|
47
47
|
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
48
48
|
"""Return a local filepath to the downloaded file. Download the file."""
|
49
|
+
if filename and isfile(filename):
|
50
|
+
file_size = getsize(filename)
|
51
|
+
if file_size > 0:
|
52
|
+
logger.info(f"File already exists: {filename}. Not overwriting.")
|
53
|
+
return filename
|
49
54
|
if kind == 'guess':
|
50
55
|
kind = guess_download_kind(url)
|
51
56
|
logger.info(f"Guessed download kind: {kind} for {url}")
|
geoseeq/result/file_upload.py
CHANGED
@@ -31,14 +31,30 @@ class FileChunker:
|
|
31
31
|
chunk = f.read(self.chunk_size)
|
32
32
|
self.loaded_parts.append(chunk)
|
33
33
|
return self # convenience for chaining
|
34
|
+
|
35
|
+
def chunk_is_preloaded(self, num):
|
36
|
+
return len(self.loaded_parts) > num and self.loaded_parts[num]
|
37
|
+
|
38
|
+
def read_one_chunk(self, num):
|
39
|
+
if not self.chunk_is_preloaded(num):
|
40
|
+
logger.debug(f"Reading chunk {num} from {self.filepath}")
|
41
|
+
with open(self.filepath, "rb") as f:
|
42
|
+
f.seek(num * self.chunk_size)
|
43
|
+
chunk = f.read(self.chunk_size)
|
44
|
+
return chunk
|
45
|
+
return self.loaded_parts[num]
|
34
46
|
|
35
47
|
def get_chunk(self, num):
|
36
|
-
self.
|
37
|
-
|
48
|
+
if self.chunk_is_preloaded(num):
|
49
|
+
return self.loaded_parts[num]
|
50
|
+
return self.read_one_chunk(num)
|
38
51
|
|
39
52
|
def get_chunk_size(self, num):
|
40
|
-
self.
|
41
|
-
|
53
|
+
if num < (self.n_parts - 1): # all but the last chunk
|
54
|
+
return self.chunk_size
|
55
|
+
if self.chunk_is_preloaded(num): # last chunk, pre-loaded
|
56
|
+
return len(self.loaded_parts[num])
|
57
|
+
return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
|
42
58
|
|
43
59
|
|
44
60
|
class ResumableUploadTracker:
|
@@ -159,6 +175,7 @@ class ResultFileUpload:
|
|
159
175
|
attempts = 0
|
160
176
|
while attempts < max_retries:
|
161
177
|
try:
|
178
|
+
logger.debug(f"Uploading part {num + 1} to {url}. Size: {len(file_chunk)} bytes.")
|
162
179
|
if session:
|
163
180
|
http_response = session.put(url, data=file_chunk)
|
164
181
|
else:
|
@@ -251,7 +268,12 @@ class ResultFileUpload:
|
|
251
268
|
resumable_upload_tracker.start_upload(upload_id, urls)
|
252
269
|
logger.info(f'Starting upload for "{filepath}"')
|
253
270
|
complete_parts = []
|
254
|
-
file_chunker = FileChunker(filepath, chunk_size)
|
271
|
+
file_chunker = FileChunker(filepath, chunk_size)
|
272
|
+
if file_chunker.file_size < 10 * FIVE_MB:
|
273
|
+
file_chunker.load_all_chunks()
|
274
|
+
logger.debug(f"Preloaded all chunks for {filepath}")
|
275
|
+
else:
|
276
|
+
logger.debug(f"Did not preload chunks for {filepath}")
|
255
277
|
if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
|
256
278
|
complete_parts = self._upload_parts(
|
257
279
|
file_chunker,
|
@@ -21,7 +21,7 @@ def _make_in_process_logger(log_level):
|
|
21
21
|
def _upload_one_file(args):
|
22
22
|
(result_file, filepath, session, progress_tracker,
|
23
23
|
link_type, overwrite, log_level, parallel_uploads,
|
24
|
-
use_cache, no_new_versions) = args
|
24
|
+
use_cache, no_new_versions, threads_per_upload) = args
|
25
25
|
if parallel_uploads:
|
26
26
|
_make_in_process_logger(log_level)
|
27
27
|
if link_type == 'upload':
|
@@ -29,7 +29,8 @@ def _upload_one_file(args):
|
|
29
29
|
result_file.upload_file(
|
30
30
|
filepath,
|
31
31
|
session=session, overwrite=overwrite, progress_tracker=progress_tracker,
|
32
|
-
threads=
|
32
|
+
threads=threads_per_upload, use_cache=use_cache,
|
33
|
+
no_new_versions=no_new_versions
|
33
34
|
)
|
34
35
|
else:
|
35
36
|
result_file.link_file(link_type, filepath)
|
@@ -40,6 +41,7 @@ class GeoSeeqUploadManager:
|
|
40
41
|
|
41
42
|
def __init__(self,
|
42
43
|
n_parallel_uploads=1,
|
44
|
+
threads_per_upload=4,
|
43
45
|
session=None,
|
44
46
|
link_type='upload',
|
45
47
|
progress_tracker_factory=None,
|
@@ -56,6 +58,7 @@ class GeoSeeqUploadManager:
|
|
56
58
|
self._result_files = []
|
57
59
|
self.no_new_versions = no_new_versions
|
58
60
|
self.use_cache = use_cache
|
61
|
+
self.threads_per_upload = threads_per_upload
|
59
62
|
|
60
63
|
def add_result_file(self, result_file, local_path):
|
61
64
|
self._result_files.append((result_file, local_path))
|
@@ -80,7 +83,8 @@ class GeoSeeqUploadManager:
|
|
80
83
|
result_file, local_path,
|
81
84
|
self.session, self.progress_tracker_factory(local_path),
|
82
85
|
self.link_type, self.overwrite, self.log_level,
|
83
|
-
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions
|
86
|
+
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions,
|
87
|
+
self.threads_per_upload
|
84
88
|
) for result_file, local_path in self._result_files
|
85
89
|
]
|
86
90
|
out = []
|
@@ -11,7 +11,7 @@ geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
|
|
11
11
|
geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
|
12
12
|
geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
|
-
geoseeq/upload_download_manager.py,sha256=
|
14
|
+
geoseeq/upload_download_manager.py,sha256=aydSVTAjyupd4gkqmImtcSTXEPBAAqQ1HFgfAk83Scw,7605
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
16
16
|
geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
|
17
17
|
geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
|
22
22
|
geoseeq/cli/download.py,sha256=_upzZo08K0fAPbEsyi1uN0HGNUaY1pl6OoGPcWmvSUY,17765
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=zsPFQY__lqMeG_l4GTjonmddbe8p1FHjksouuK2U07c,3260
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/run.py,sha256=bx2AV6VIqOSTlxUda78xl0XxcZ8TXlQx02-e7iLQPwI,3838
|
@@ -37,9 +37,9 @@ geoseeq/cli/shared_params/id_handlers.py,sha256=501K9sCVkI0YGDQ62vXk_DM5lMMDrdB5
|
|
37
37
|
geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
|
38
38
|
geoseeq/cli/shared_params/opts_and_args.py,sha256=LrDkv9WtUryM4uUMXPRk04-EBcTQ7q5V6Yu-XRDUvvA,2083
|
39
39
|
geoseeq/cli/upload/__init__.py,sha256=3C9_S9t7chmYU-2ot89NV03x-EtmsjibulErKaU9w1k,627
|
40
|
-
geoseeq/cli/upload/upload.py,sha256=
|
40
|
+
geoseeq/cli/upload/upload.py,sha256=_ZR2tkugaB71rVTJFAwRCZLedqGO58sgTsHILebfvDs,9370
|
41
41
|
geoseeq/cli/upload/upload_advanced.py,sha256=Jq5eGe-wOdrzxGWVwaFPg0BAJcW0YSx_eHEmYjJeKuA,3434
|
42
|
-
geoseeq/cli/upload/upload_reads.py,sha256=
|
42
|
+
geoseeq/cli/upload/upload_reads.py,sha256=EMGqyZf11xwN4v2j8gNxMagTbE4kaOd-_hwupmg5I-8,10670
|
43
43
|
geoseeq/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
geoseeq/contrib/ncbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
45
|
geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1056
|
@@ -63,8 +63,8 @@ geoseeq/plotting/map/map.py,sha256=h2QPLGqe-SamhfaTij53S9cQIiO8orCJUAUh0hRicSM,3
|
|
63
63
|
geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEYwE,1795
|
64
64
|
geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
|
65
65
|
geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
|
66
|
-
geoseeq/result/file_download.py,sha256=
|
67
|
-
geoseeq/result/file_upload.py,sha256=
|
66
|
+
geoseeq/result/file_download.py,sha256=vbYo2B4JshTIqLaklcgcBb7NY9cD5pMkas95GuQxW8s,5776
|
67
|
+
geoseeq/result/file_upload.py,sha256=z3ImHlVhli6ZwOHP7GvJqxnVxKYpMyBojqrpdBSBJIs,13176
|
68
68
|
geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
|
69
69
|
geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
|
70
70
|
geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
|
@@ -80,9 +80,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
80
80
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
81
81
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
82
82
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
83
|
-
geoseeq-0.5.
|
84
|
-
geoseeq-0.5.
|
85
|
-
geoseeq-0.5.
|
86
|
-
geoseeq-0.5.
|
87
|
-
geoseeq-0.5.
|
88
|
-
geoseeq-0.5.
|
83
|
+
geoseeq-0.5.6a11.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
84
|
+
geoseeq-0.5.6a11.dist-info/METADATA,sha256=sVpz2que_a-pWGG7WNGLz2IFGoNOeofn27ZKxqZdsts,4806
|
85
|
+
geoseeq-0.5.6a11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
86
|
+
geoseeq-0.5.6a11.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
87
|
+
geoseeq-0.5.6a11.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
88
|
+
geoseeq-0.5.6a11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|