geoseeq 0.5.6a10__py3-none-any.whl → 0.5.6a11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoseeq/cli/main.py +1 -1
- geoseeq/cli/upload/upload.py +3 -1
- geoseeq/cli/upload/upload_reads.py +86 -0
- geoseeq/result/file_download.py +5 -0
- geoseeq/result/file_upload.py +27 -5
- geoseeq/upload_download_manager.py +7 -3
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/METADATA +1 -1
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/RECORD +12 -12
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/LICENSE +0 -0
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/WHEEL +0 -0
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.5.6a10.dist-info → geoseeq-0.5.6a11.dist-info}/top_level.txt +0 -0
geoseeq/cli/main.py
CHANGED
@@ -53,7 +53,7 @@ def version():
|
|
53
53
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
54
54
|
Run `geoseeq eula show` to view the EULA.
|
55
55
|
"""
|
56
|
-
click.echo('0.5.
|
56
|
+
click.echo('0.5.6a11') # remember to update setup
|
57
57
|
|
58
58
|
|
59
59
|
@main.group('advanced')
|
geoseeq/cli/upload/upload.py
CHANGED
@@ -37,6 +37,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
37
37
|
@click.command('files')
|
38
38
|
@use_common_state
|
39
39
|
@click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
40
|
+
@click.option('--threads-per-upload', default=4, help='Number of threads used to upload each file')
|
40
41
|
@yes_option
|
41
42
|
@private_option
|
42
43
|
@link_option
|
@@ -47,7 +48,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
|
|
47
48
|
help='Specify a different name for the file on GeoSeeq than the local file name.')
|
48
49
|
@folder_id_arg
|
49
50
|
@click.argument('file_paths', type=click.Path(exists=True), nargs=-1)
|
50
|
-
def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
|
51
|
+
def cli_upload_file(state, cores, threads_per_upload, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
|
51
52
|
"""Upload files to GeoSeeq.
|
52
53
|
|
53
54
|
This command uploads files to either a sample or project on GeoSeeq. It can be used to upload
|
@@ -106,6 +107,7 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no
|
|
106
107
|
|
107
108
|
upload_manager = GeoSeeqUploadManager(
|
108
109
|
n_parallel_uploads=cores,
|
110
|
+
threads_per_upload=threads_per_upload,
|
109
111
|
link_type=link_type,
|
110
112
|
progress_tracker_factory=PBarManager().get_new_bar,
|
111
113
|
log_level=state.log_level,
|
@@ -132,6 +132,28 @@ def flatten_list_of_fastqs(filepaths):
|
|
132
132
|
return flattened
|
133
133
|
|
134
134
|
|
135
|
+
def _is_bam(path):
|
136
|
+
for ext in ['.bam', '.bai']:
|
137
|
+
if path.endswith(ext):
|
138
|
+
return True
|
139
|
+
return False
|
140
|
+
|
141
|
+
|
142
|
+
def flatten_list_of_bams(filepaths):
|
143
|
+
"""Turn a list of bam filepaths and txt files containing bam filepaths into a single list of bam filepaths."""
|
144
|
+
flattened = []
|
145
|
+
for path in filepaths:
|
146
|
+
if _is_bam(path):
|
147
|
+
flattened.append(path)
|
148
|
+
else:
|
149
|
+
with open(path) as f:
|
150
|
+
for line in f:
|
151
|
+
line = line.strip()
|
152
|
+
if line and not line.startswith('#'):
|
153
|
+
flattened.append(line)
|
154
|
+
return flattened
|
155
|
+
|
156
|
+
|
135
157
|
|
136
158
|
@click.command('reads')
|
137
159
|
@use_common_state
|
@@ -200,3 +222,67 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
|
|
200
222
|
regex = _get_regex(knex, filepaths, module_name, proj, regex)
|
201
223
|
groups = _group_files(knex, filepaths, module_name, regex, yes)
|
202
224
|
_do_upload(groups, module_name, link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
|
225
|
+
|
226
|
+
|
227
|
+
# @click.command('bam')
|
228
|
+
# @use_common_state
|
229
|
+
# @click.option('--genome', default=None, help='The genome aligned to the BAM files. Should be in 2bit format.')
|
230
|
+
# @click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
231
|
+
# @overwrite_option
|
232
|
+
# @yes_option
|
233
|
+
# @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
|
234
|
+
# @private_option
|
235
|
+
# @link_option
|
236
|
+
# @no_new_versions_option
|
237
|
+
# @project_id_arg
|
238
|
+
# @click.argument('files', type=click.Path(exists=True), nargs=-1)
|
239
|
+
# def cli_upload_bams(state, genome, cores, overwrite, yes, regex, private, link_type, no_new_versions, project_id, files):
|
240
|
+
"""Upload BAM files to GeoSeeq.
|
241
|
+
|
242
|
+
This command automatically groups bams with their index files.
|
243
|
+
|
244
|
+
---
|
245
|
+
|
246
|
+
Example Usage:
|
247
|
+
|
248
|
+
\b
|
249
|
+
# Upload a list of BAM files to a project, useful if you have hundreds of files
|
250
|
+
$ ls -1 path/to/bam/files/*.bam > file_list.txt
|
251
|
+
$ geoseeq upload bams "GeoSeeq/Example CLI Project" file_list.txt
|
252
|
+
|
253
|
+
\b
|
254
|
+
# Upload all the BAM files in a directory to a project with BAM indexes
|
255
|
+
$ geoseeq upload bams ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam path/to/bam/files/*.bam.bai
|
256
|
+
|
257
|
+
\b
|
258
|
+
# Upload all the BAM files in a directory to a project, performing 4 uploads in parallel
|
259
|
+
$ geoseeq upload bams --cores 4 ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam
|
260
|
+
|
261
|
+
\b
|
262
|
+
# Upload a list of BAM files to a project, automatically creating a new project and overwriting existing files
|
263
|
+
$ ls -1 path/to/bam/files/*.bam > file_list.txt
|
264
|
+
$ geoseeq upload bams --yes --overwrite "GeoSeeq/Example CLI Project" file_list.txt
|
265
|
+
|
266
|
+
---
|
267
|
+
|
268
|
+
Command Arguments:
|
269
|
+
|
270
|
+
[PROJECT_ID] Can be a project UUID, GeoSeeq Resource Number (GRN), or an
|
271
|
+
organization name and project name separated by a slash.
|
272
|
+
|
273
|
+
\b
|
274
|
+
Examples:
|
275
|
+
- Name pair: "GeoSeeq/Example CLI Project"
|
276
|
+
- UUID: "ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
|
277
|
+
- GRN: "grn:gs1:project:ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
|
278
|
+
|
279
|
+
\b
|
280
|
+
[FILES...] can be paths to BAM files or a file containing a list of paths, or a mix of both.
|
281
|
+
Example: "path/to/bam/files
|
282
|
+
"""
|
283
|
+
knex = state.get_knex()
|
284
|
+
proj = handle_project_id(knex, project_id, yes, private)
|
285
|
+
filepaths = {basename(line): line for line in flatten_list_of_bams(files)}
|
286
|
+
click.echo(f'Found {len(filepaths)} files to upload.', err=True)
|
287
|
+
groups = _group_files(knex, filepaths, 'bam::bam', regex, yes)
|
288
|
+
_do_upload(groups, 'bam::bam', link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
|
geoseeq/result/file_download.py
CHANGED
@@ -46,6 +46,11 @@ def guess_download_kind(url):
|
|
46
46
|
|
47
47
|
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
48
48
|
"""Return a local filepath to the downloaded file. Download the file."""
|
49
|
+
if filename and isfile(filename):
|
50
|
+
file_size = getsize(filename)
|
51
|
+
if file_size > 0:
|
52
|
+
logger.info(f"File already exists: {filename}. Not overwriting.")
|
53
|
+
return filename
|
49
54
|
if kind == 'guess':
|
50
55
|
kind = guess_download_kind(url)
|
51
56
|
logger.info(f"Guessed download kind: {kind} for {url}")
|
geoseeq/result/file_upload.py
CHANGED
@@ -31,14 +31,30 @@ class FileChunker:
|
|
31
31
|
chunk = f.read(self.chunk_size)
|
32
32
|
self.loaded_parts.append(chunk)
|
33
33
|
return self # convenience for chaining
|
34
|
+
|
35
|
+
def chunk_is_preloaded(self, num):
|
36
|
+
return len(self.loaded_parts) > num and self.loaded_parts[num]
|
37
|
+
|
38
|
+
def read_one_chunk(self, num):
|
39
|
+
if not self.chunk_is_preloaded(num):
|
40
|
+
logger.debug(f"Reading chunk {num} from {self.filepath}")
|
41
|
+
with open(self.filepath, "rb") as f:
|
42
|
+
f.seek(num * self.chunk_size)
|
43
|
+
chunk = f.read(self.chunk_size)
|
44
|
+
return chunk
|
45
|
+
return self.loaded_parts[num]
|
34
46
|
|
35
47
|
def get_chunk(self, num):
|
36
|
-
self.
|
37
|
-
|
48
|
+
if self.chunk_is_preloaded(num):
|
49
|
+
return self.loaded_parts[num]
|
50
|
+
return self.read_one_chunk(num)
|
38
51
|
|
39
52
|
def get_chunk_size(self, num):
|
40
|
-
self.
|
41
|
-
|
53
|
+
if num < (self.n_parts - 1): # all but the last chunk
|
54
|
+
return self.chunk_size
|
55
|
+
if self.chunk_is_preloaded(num): # last chunk, pre-loaded
|
56
|
+
return len(self.loaded_parts[num])
|
57
|
+
return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
|
42
58
|
|
43
59
|
|
44
60
|
class ResumableUploadTracker:
|
@@ -159,6 +175,7 @@ class ResultFileUpload:
|
|
159
175
|
attempts = 0
|
160
176
|
while attempts < max_retries:
|
161
177
|
try:
|
178
|
+
logger.debug(f"Uploading part {num + 1} to {url}. Size: {len(file_chunk)} bytes.")
|
162
179
|
if session:
|
163
180
|
http_response = session.put(url, data=file_chunk)
|
164
181
|
else:
|
@@ -251,7 +268,12 @@ class ResultFileUpload:
|
|
251
268
|
resumable_upload_tracker.start_upload(upload_id, urls)
|
252
269
|
logger.info(f'Starting upload for "{filepath}"')
|
253
270
|
complete_parts = []
|
254
|
-
file_chunker = FileChunker(filepath, chunk_size)
|
271
|
+
file_chunker = FileChunker(filepath, chunk_size)
|
272
|
+
if file_chunker.file_size < 10 * FIVE_MB:
|
273
|
+
file_chunker.load_all_chunks()
|
274
|
+
logger.debug(f"Preloaded all chunks for {filepath}")
|
275
|
+
else:
|
276
|
+
logger.debug(f"Did not preload chunks for {filepath}")
|
255
277
|
if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
|
256
278
|
complete_parts = self._upload_parts(
|
257
279
|
file_chunker,
|
@@ -21,7 +21,7 @@ def _make_in_process_logger(log_level):
|
|
21
21
|
def _upload_one_file(args):
|
22
22
|
(result_file, filepath, session, progress_tracker,
|
23
23
|
link_type, overwrite, log_level, parallel_uploads,
|
24
|
-
use_cache, no_new_versions) = args
|
24
|
+
use_cache, no_new_versions, threads_per_upload) = args
|
25
25
|
if parallel_uploads:
|
26
26
|
_make_in_process_logger(log_level)
|
27
27
|
if link_type == 'upload':
|
@@ -29,7 +29,8 @@ def _upload_one_file(args):
|
|
29
29
|
result_file.upload_file(
|
30
30
|
filepath,
|
31
31
|
session=session, overwrite=overwrite, progress_tracker=progress_tracker,
|
32
|
-
threads=
|
32
|
+
threads=threads_per_upload, use_cache=use_cache,
|
33
|
+
no_new_versions=no_new_versions
|
33
34
|
)
|
34
35
|
else:
|
35
36
|
result_file.link_file(link_type, filepath)
|
@@ -40,6 +41,7 @@ class GeoSeeqUploadManager:
|
|
40
41
|
|
41
42
|
def __init__(self,
|
42
43
|
n_parallel_uploads=1,
|
44
|
+
threads_per_upload=4,
|
43
45
|
session=None,
|
44
46
|
link_type='upload',
|
45
47
|
progress_tracker_factory=None,
|
@@ -56,6 +58,7 @@ class GeoSeeqUploadManager:
|
|
56
58
|
self._result_files = []
|
57
59
|
self.no_new_versions = no_new_versions
|
58
60
|
self.use_cache = use_cache
|
61
|
+
self.threads_per_upload = threads_per_upload
|
59
62
|
|
60
63
|
def add_result_file(self, result_file, local_path):
|
61
64
|
self._result_files.append((result_file, local_path))
|
@@ -80,7 +83,8 @@ class GeoSeeqUploadManager:
|
|
80
83
|
result_file, local_path,
|
81
84
|
self.session, self.progress_tracker_factory(local_path),
|
82
85
|
self.link_type, self.overwrite, self.log_level,
|
83
|
-
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions
|
86
|
+
self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions,
|
87
|
+
self.threads_per_upload
|
84
88
|
) for result_file, local_path in self._result_files
|
85
89
|
]
|
86
90
|
out = []
|
@@ -11,7 +11,7 @@ geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
|
|
11
11
|
geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
|
12
12
|
geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
|
-
geoseeq/upload_download_manager.py,sha256=
|
14
|
+
geoseeq/upload_download_manager.py,sha256=aydSVTAjyupd4gkqmImtcSTXEPBAAqQ1HFgfAk83Scw,7605
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
16
16
|
geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
|
17
17
|
geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
|
22
22
|
geoseeq/cli/download.py,sha256=_upzZo08K0fAPbEsyi1uN0HGNUaY1pl6OoGPcWmvSUY,17765
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=zsPFQY__lqMeG_l4GTjonmddbe8p1FHjksouuK2U07c,3260
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/run.py,sha256=bx2AV6VIqOSTlxUda78xl0XxcZ8TXlQx02-e7iLQPwI,3838
|
@@ -37,9 +37,9 @@ geoseeq/cli/shared_params/id_handlers.py,sha256=501K9sCVkI0YGDQ62vXk_DM5lMMDrdB5
|
|
37
37
|
geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
|
38
38
|
geoseeq/cli/shared_params/opts_and_args.py,sha256=LrDkv9WtUryM4uUMXPRk04-EBcTQ7q5V6Yu-XRDUvvA,2083
|
39
39
|
geoseeq/cli/upload/__init__.py,sha256=3C9_S9t7chmYU-2ot89NV03x-EtmsjibulErKaU9w1k,627
|
40
|
-
geoseeq/cli/upload/upload.py,sha256=
|
40
|
+
geoseeq/cli/upload/upload.py,sha256=_ZR2tkugaB71rVTJFAwRCZLedqGO58sgTsHILebfvDs,9370
|
41
41
|
geoseeq/cli/upload/upload_advanced.py,sha256=Jq5eGe-wOdrzxGWVwaFPg0BAJcW0YSx_eHEmYjJeKuA,3434
|
42
|
-
geoseeq/cli/upload/upload_reads.py,sha256=
|
42
|
+
geoseeq/cli/upload/upload_reads.py,sha256=EMGqyZf11xwN4v2j8gNxMagTbE4kaOd-_hwupmg5I-8,10670
|
43
43
|
geoseeq/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
geoseeq/contrib/ncbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
45
|
geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1056
|
@@ -63,8 +63,8 @@ geoseeq/plotting/map/map.py,sha256=h2QPLGqe-SamhfaTij53S9cQIiO8orCJUAUh0hRicSM,3
|
|
63
63
|
geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEYwE,1795
|
64
64
|
geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
|
65
65
|
geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
|
66
|
-
geoseeq/result/file_download.py,sha256=
|
67
|
-
geoseeq/result/file_upload.py,sha256=
|
66
|
+
geoseeq/result/file_download.py,sha256=vbYo2B4JshTIqLaklcgcBb7NY9cD5pMkas95GuQxW8s,5776
|
67
|
+
geoseeq/result/file_upload.py,sha256=z3ImHlVhli6ZwOHP7GvJqxnVxKYpMyBojqrpdBSBJIs,13176
|
68
68
|
geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
|
69
69
|
geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
|
70
70
|
geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
|
@@ -80,9 +80,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
80
80
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
81
81
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
82
82
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
83
|
-
geoseeq-0.5.
|
84
|
-
geoseeq-0.5.
|
85
|
-
geoseeq-0.5.
|
86
|
-
geoseeq-0.5.
|
87
|
-
geoseeq-0.5.
|
88
|
-
geoseeq-0.5.
|
83
|
+
geoseeq-0.5.6a11.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
84
|
+
geoseeq-0.5.6a11.dist-info/METADATA,sha256=sVpz2que_a-pWGG7WNGLz2IFGoNOeofn27ZKxqZdsts,4806
|
85
|
+
geoseeq-0.5.6a11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
86
|
+
geoseeq-0.5.6a11.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
87
|
+
geoseeq-0.5.6a11.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
88
|
+
geoseeq-0.5.6a11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|