geoseeq 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- geoseeq/cli/detail.py +39 -0
- geoseeq/cli/main.py +5 -2
- geoseeq/cli/upload/__init__.py +8 -1
- geoseeq/cli/upload/upload_advanced.py +92 -0
- geoseeq/project.py +4 -1
- geoseeq/result/file_upload.py +11 -4
- {geoseeq-0.3.0.dist-info → geoseeq-0.3.1.dist-info}/METADATA +1 -1
- {geoseeq-0.3.0.dist-info → geoseeq-0.3.1.dist-info}/RECORD +12 -10
- {geoseeq-0.3.0.dist-info → geoseeq-0.3.1.dist-info}/LICENSE +0 -0
- {geoseeq-0.3.0.dist-info → geoseeq-0.3.1.dist-info}/WHEEL +0 -0
- {geoseeq-0.3.0.dist-info → geoseeq-0.3.1.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.3.0.dist-info → geoseeq-0.3.1.dist-info}/top_level.txt +0 -0
geoseeq/cli/detail.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
import click
|
2
|
+
import json
|
3
|
+
from .shared_params import (
|
4
|
+
use_common_state,
|
5
|
+
project_id_arg,
|
6
|
+
sample_ids_arg,
|
7
|
+
yes_option,
|
8
|
+
private_option,
|
9
|
+
org_id_arg,
|
10
|
+
handle_project_id,
|
11
|
+
handle_multiple_sample_ids,
|
12
|
+
handle_org_id,
|
13
|
+
)
|
14
|
+
from geoseeq.id_constructors import resolve_id
|
15
|
+
|
16
|
+
|
17
|
+
@click.group('detail')
|
18
|
+
def cli_detail():
|
19
|
+
"""Detail objects on GeoSeeq."""
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
@cli_detail.command('folder')
|
24
|
+
@use_common_state
|
25
|
+
@click.argument('grn')
|
26
|
+
def detail_folder(state, grn):
|
27
|
+
kind, rfolder = resolve_id(state.get_knex(), grn)
|
28
|
+
assert kind == 'folder'
|
29
|
+
click.echo('Folder:')
|
30
|
+
click.echo(rfolder)
|
31
|
+
click.echo('Created at: {}'.format(rfolder.created_at))
|
32
|
+
click.echo('Updated at: {}'.format(rfolder.updated_at))
|
33
|
+
click.echo('Files:')
|
34
|
+
for rfile in rfolder.get_result_files():
|
35
|
+
click.echo(rfile)
|
36
|
+
click.echo('Created at: {}'.format(rfile.created_at))
|
37
|
+
click.echo('Updated at: {}'.format(rfile.updated_at))
|
38
|
+
click.echo(json.dumps(rfile.stored_data, indent=2))
|
39
|
+
click.echo('--')
|
geoseeq/cli/main.py
CHANGED
@@ -7,7 +7,7 @@ import click
|
|
7
7
|
from .copy import cli_copy
|
8
8
|
from .manage import cli_manage
|
9
9
|
from .download import cli_download
|
10
|
-
from .upload import cli_upload
|
10
|
+
from .upload import cli_upload, cli_upload_advanced
|
11
11
|
from .user import cli_user
|
12
12
|
from .view import cli_view
|
13
13
|
from .search import cli_search
|
@@ -15,6 +15,7 @@ from geoseeq.vc.cli import cli_vc
|
|
15
15
|
from geoseeq.knex import DEFAULT_ENDPOINT
|
16
16
|
from .shared_params.config import set_profile
|
17
17
|
from .shared_params.opts_and_args import overwrite_option
|
18
|
+
from .detail import cli_detail
|
18
19
|
|
19
20
|
logger = logging.getLogger('geoseeq_api')
|
20
21
|
handler = logging.StreamHandler()
|
@@ -35,7 +36,7 @@ main.add_command(cli_search)
|
|
35
36
|
@main.command()
|
36
37
|
def version():
|
37
38
|
"""Print the version of the Geoseeq API being used."""
|
38
|
-
click.echo('0.3.
|
39
|
+
click.echo('0.3.1') # remember to update setup
|
39
40
|
|
40
41
|
|
41
42
|
@main.group('advanced')
|
@@ -45,6 +46,8 @@ def cli_advanced():
|
|
45
46
|
|
46
47
|
cli_advanced.add_command(cli_copy)
|
47
48
|
cli_advanced.add_command(cli_user)
|
49
|
+
cli_advanced.add_command(cli_detail)
|
50
|
+
cli_advanced.add_command(cli_upload_advanced)
|
48
51
|
|
49
52
|
@cli_advanced.group('experimental')
|
50
53
|
def cli_experimental():
|
geoseeq/cli/upload/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from .upload import (
|
|
5
5
|
cli_metadata,
|
6
6
|
)
|
7
7
|
from .upload_reads import cli_upload_reads_wizard
|
8
|
-
|
8
|
+
from .upload_advanced import cli_find_urls_for_reads
|
9
9
|
|
10
10
|
@click.group('upload')
|
11
11
|
def cli_upload():
|
@@ -15,3 +15,10 @@ def cli_upload():
|
|
15
15
|
cli_upload.add_command(cli_upload_reads_wizard)
|
16
16
|
cli_upload.add_command(cli_upload_file)
|
17
17
|
cli_upload.add_command(cli_metadata)
|
18
|
+
|
19
|
+
@click.group('upload')
|
20
|
+
def cli_upload_advanced():
|
21
|
+
"""Advanced tools to upload files to GeoSeeq."""
|
22
|
+
pass
|
23
|
+
|
24
|
+
cli_upload_advanced.add_command(cli_find_urls_for_reads)
|
@@ -0,0 +1,92 @@
|
|
1
|
+
import logging
|
2
|
+
import click
|
3
|
+
import requests
|
4
|
+
from os.path import basename, getsize
|
5
|
+
from .upload_reads import (
|
6
|
+
_make_in_process_logger,
|
7
|
+
_get_regex,
|
8
|
+
_group_files,
|
9
|
+
flatten_list_of_fastqs,
|
10
|
+
)
|
11
|
+
|
12
|
+
from multiprocessing import Pool, current_process
|
13
|
+
|
14
|
+
from geoseeq.cli.constants import *
|
15
|
+
from geoseeq.cli.shared_params import (
|
16
|
+
handle_project_id,
|
17
|
+
private_option,
|
18
|
+
module_option,
|
19
|
+
project_id_arg,
|
20
|
+
overwrite_option,
|
21
|
+
yes_option,
|
22
|
+
use_common_state,
|
23
|
+
)
|
24
|
+
|
25
|
+
from geoseeq.constants import FASTQ_MODULE_NAMES
|
26
|
+
from geoseeq.cli.progress_bar import PBarManager
|
27
|
+
|
28
|
+
logger = logging.getLogger('geoseeq_api')
|
29
|
+
|
30
|
+
|
31
|
+
def _keep_only_authentication_url_args(url):
|
32
|
+
"""Return a url with only the S3 authentication args"""
|
33
|
+
root, args = url.split('?')
|
34
|
+
args = args.split('&')
|
35
|
+
args = [arg for arg in args if arg.startswith('AWSAccessKeyId=') or arg.startswith('Signature=')]
|
36
|
+
return root + '?' + '&'.join(args)
|
37
|
+
|
38
|
+
|
39
|
+
def _get_url_for_one_file(args):
|
40
|
+
"""Return a tuple of the filepath and the url to upload it to"""
|
41
|
+
result_file, filepath, overwrite, log_level = args
|
42
|
+
_make_in_process_logger(log_level)
|
43
|
+
if result_file.exists() and not overwrite:
|
44
|
+
return
|
45
|
+
result_file = result_file.idem()
|
46
|
+
file_size = getsize(filepath)
|
47
|
+
_, urls = result_file._prep_multipart_upload(filepath, file_size, file_size + 1, {})
|
48
|
+
url = _keep_only_authentication_url_args(urls['1'])
|
49
|
+
return filepath, url
|
50
|
+
|
51
|
+
|
52
|
+
def _find_target_urls(groups, module_name, lib, filepaths, overwrite, cores, state):
|
53
|
+
"""Use GeoSeeq to get target urls for a set of files"""
|
54
|
+
with requests.Session() as session:
|
55
|
+
find_url_args = []
|
56
|
+
for group in groups:
|
57
|
+
sample = lib.sample(group['sample_name']).idem()
|
58
|
+
read_folder = sample.result_folder(module_name).idem()
|
59
|
+
|
60
|
+
for field_name, path in group['fields'].items():
|
61
|
+
result_file = read_folder.read_file(field_name)
|
62
|
+
filepath = filepaths[path]
|
63
|
+
find_url_args.append((
|
64
|
+
result_file, filepath, overwrite, state.log_level
|
65
|
+
))
|
66
|
+
|
67
|
+
with Pool(cores) as p:
|
68
|
+
for (file_name, target_url) in p.imap_unordered(_get_url_for_one_file, find_url_args):
|
69
|
+
yield file_name, target_url
|
70
|
+
|
71
|
+
|
72
|
+
@click.command('read-links')
|
73
|
+
@use_common_state
|
74
|
+
@click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
75
|
+
@overwrite_option
|
76
|
+
@yes_option
|
77
|
+
@click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
|
78
|
+
@private_option
|
79
|
+
@module_option(FASTQ_MODULE_NAMES)
|
80
|
+
@project_id_arg
|
81
|
+
@click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
|
82
|
+
def cli_find_urls_for_reads(state, cores, overwrite, yes, regex, private, module_name, project_id, fastq_files):
|
83
|
+
"""Print a two column list with filenames and a target storage URL
|
84
|
+
"""
|
85
|
+
knex = state.get_knex()
|
86
|
+
proj = handle_project_id(knex, project_id, yes, private)
|
87
|
+
filepaths = {basename(line): line for line in flatten_list_of_fastqs(fastq_files)}
|
88
|
+
click.echo(f'Found {len(filepaths)} files to upload.', err=True)
|
89
|
+
regex = _get_regex(knex, filepaths, module_name, proj, regex)
|
90
|
+
groups = _group_files(knex, filepaths, module_name, regex, yes)
|
91
|
+
for file_name, target_url in _find_target_urls(groups, module_name, proj, filepaths, overwrite, cores, state):
|
92
|
+
print(f'{file_name}\t{target_url}', file=state.outfile)
|
geoseeq/project.py
CHANGED
@@ -20,6 +20,7 @@ class Project(RemoteObject):
|
|
20
20
|
]
|
21
21
|
optional_remote_fields = [
|
22
22
|
"privacy_level",
|
23
|
+
"samples_count",
|
23
24
|
]
|
24
25
|
parent_field = "org"
|
25
26
|
url_prefix = "sample_groups"
|
@@ -258,7 +259,9 @@ class Project(RemoteObject):
|
|
258
259
|
@property
|
259
260
|
def n_samples(self):
|
260
261
|
"""Return the number of samples in this project."""
|
261
|
-
|
262
|
+
if self.hasattr('samples_count') and self.samples_count is not None:
|
263
|
+
return self.samples_count
|
264
|
+
return len(list(self.get_sample_uuids()))
|
262
265
|
|
263
266
|
def bulk_find_files(self,
|
264
267
|
sample_uuids=[],
|
geoseeq/result/file_upload.py
CHANGED
@@ -41,14 +41,13 @@ class FileChunker:
|
|
41
41
|
|
42
42
|
class ResultFileUpload:
|
43
43
|
"""Abstract class that handles upload methods for result files."""
|
44
|
-
|
45
|
-
def
|
46
|
-
n_parts = int(file_size / chunk_size) + 1
|
44
|
+
|
45
|
+
def _create_multipart_upload(self, filepath, file_size, optional_fields):
|
47
46
|
optional_fields = optional_fields if optional_fields else {}
|
48
47
|
optional_fields.update(
|
49
48
|
{
|
50
49
|
"md5_checksum": md5_checksum(filepath),
|
51
|
-
"file_size_bytes":
|
50
|
+
"file_size_bytes": file_size,
|
52
51
|
}
|
53
52
|
)
|
54
53
|
data = {
|
@@ -57,6 +56,11 @@ class ResultFileUpload:
|
|
57
56
|
"result_type": "sample" if self.is_sample_result else "group",
|
58
57
|
}
|
59
58
|
response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload", json=data)
|
59
|
+
return response
|
60
|
+
|
61
|
+
def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
|
62
|
+
n_parts = int(file_size / chunk_size) + 1
|
63
|
+
response = self._create_multipart_upload(filepath, file_size, optional_fields)
|
60
64
|
upload_id = response["upload_id"]
|
61
65
|
parts = list(range(1, n_parts + 1))
|
62
66
|
data = {
|
@@ -105,6 +109,7 @@ class ResultFileUpload:
|
|
105
109
|
|
106
110
|
def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads):
|
107
111
|
if threads == 1:
|
112
|
+
logger.info(f"Uploading parts in series for {file_chunker.filepath}")
|
108
113
|
complete_parts = []
|
109
114
|
for num, url in enumerate(list(urls.values())):
|
110
115
|
response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
|
@@ -114,6 +119,7 @@ class ResultFileUpload:
|
|
114
119
|
return complete_parts
|
115
120
|
|
116
121
|
with ThreadPoolExecutor(max_workers=threads) as executor:
|
122
|
+
logger.info(f"Uploading parts in parallel for {file_chunker.filepath} with {threads} threads.")
|
117
123
|
futures = []
|
118
124
|
for num, url in enumerate(list(urls.values())):
|
119
125
|
future = executor.submit(
|
@@ -128,6 +134,7 @@ class ResultFileUpload:
|
|
128
134
|
logger.info(
|
129
135
|
f'Uploaded part {response_part["PartNumber"]} of {len(urls)} for "{file_chunker.filepath}"'
|
130
136
|
)
|
137
|
+
complete_parts = sorted(complete_parts, key=lambda x: x["PartNumber"])
|
131
138
|
return complete_parts
|
132
139
|
|
133
140
|
def multipart_upload_file(
|
@@ -7,7 +7,7 @@ geoseeq/file_system_cache.py,sha256=7pdM-1VncOvjkYv2pOocPHOXnEWz0jKhH6sm1LaXtoU,
|
|
7
7
|
geoseeq/knex.py,sha256=WWYV3rwGuCdWL6zucgeExbaMmTg-I7IYqRHuvxhc6Q4,6007
|
8
8
|
geoseeq/organization.py,sha256=a9xmGDE0tQsjPJfyFkYnWagxZ8xpdeckkwvkhH6LNIk,2462
|
9
9
|
geoseeq/pipeline.py,sha256=RuAHyJM0wb0Z7WUPT7_8O6wB4UH5VUiBaPmWsLVeIKo,6819
|
10
|
-
geoseeq/project.py,sha256=
|
10
|
+
geoseeq/project.py,sha256=8HrK9JRhhXV03E9i7FQHPNj18vaEn7W81M7Ak0_S0-o,13080
|
11
11
|
geoseeq/remote_object.py,sha256=6owZTIubA2wwbA4AiL0HdfWDMpZjOhUgqv_KxNG2XZg,6613
|
12
12
|
geoseeq/sample.py,sha256=348NgY7wQBmFpoTq24T0ffVQ66iKB-hsCx2YdsBGTlc,4854
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
@@ -17,9 +17,10 @@ geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
|
17
17
|
geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
|
18
18
|
geoseeq/cli/constants.py,sha256=Do5AUf9lMO9_P8KpFJ3XwwFBAWsxSjZ6sx9_QEGyC_c,176
|
19
19
|
geoseeq/cli/copy.py,sha256=ocJ-T6vZBTPELUSBA65KndzAugrBWMq-3ux0dbffacE,1997
|
20
|
+
geoseeq/cli/detail.py,sha256=uWxJ3v2GTpfGCxXkFHRqYilEUpZjJTkzES-Mpa2HEe4,1040
|
20
21
|
geoseeq/cli/download.py,sha256=Hn2RhRgUpy1QDZuVkG6JgKlthGLnnfVYXIYrVsYcYL4,11859
|
21
22
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
22
|
-
geoseeq/cli/main.py,sha256=
|
23
|
+
geoseeq/cli/main.py,sha256=JgaUKKK_rm1CuGdUBzg2m_WJHV5fWSRj5DPTSRlf1xM,2050
|
23
24
|
geoseeq/cli/manage.py,sha256=xy_iMfqSSMaSTQNYiTw2xEWpH4mpTAq4Cf_6QemEMnI,5409
|
24
25
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
25
26
|
geoseeq/cli/search.py,sha256=KpqKiq4-wbgO6xrx58NxomYZcx451NqzA_MecgiND1g,881
|
@@ -32,8 +33,9 @@ geoseeq/cli/shared_params/config.py,sha256=06KrZhC3zljveo_21YvaZPcKIzyDBGHT_Yr79
|
|
32
33
|
geoseeq/cli/shared_params/id_handlers.py,sha256=7s2g-UfKoTfwmDX_FI96-M_g_U1yYBUEj-8yy8NC_c8,6493
|
33
34
|
geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
|
34
35
|
geoseeq/cli/shared_params/opts_and_args.py,sha256=Jy2rVQydlvkgjoA_BYjSNuI-wUSPmE2S1C1rN6bycBI,1399
|
35
|
-
geoseeq/cli/upload/__init__.py,sha256=
|
36
|
+
geoseeq/cli/upload/__init__.py,sha256=Wf30XGLllFFnnfeOnpYsO3SXyaq8yclHcYxGUNdSh6o,562
|
36
37
|
geoseeq/cli/upload/upload.py,sha256=Ikly-vfLzDXpaMpyFTim1CfgqNjtrnRQHgOMi_7JubU,6367
|
38
|
+
geoseeq/cli/upload/upload_advanced.py,sha256=Jq5eGe-wOdrzxGWVwaFPg0BAJcW0YSx_eHEmYjJeKuA,3434
|
37
39
|
geoseeq/cli/upload/upload_reads.py,sha256=f5NPbnlZg93zjDUIX_OsjyOyvvEtkIn_ioYLFfClfjw,7583
|
38
40
|
geoseeq/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
41
|
geoseeq/contrib/ncbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -59,7 +61,7 @@ geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEY
|
|
59
61
|
geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
|
60
62
|
geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
|
61
63
|
geoseeq/result/file_download.py,sha256=R087ypOAu2pMZ9vXvkO9babPBNtRXTF3ZhpPldcT_jM,4593
|
62
|
-
geoseeq/result/file_upload.py,sha256=
|
64
|
+
geoseeq/result/file_upload.py,sha256=g29WES3IjV8pmB8g3VRzxUk3wsSNgkCB6HIqZ_4ctuY,7158
|
63
65
|
geoseeq/result/result_file.py,sha256=YMCAivUXgHUNEhH6yTGCHJh9gXHd8OLjjbnccdvW7iw,7138
|
64
66
|
geoseeq/result/result_folder.py,sha256=qmxm-Z6DI1ohGnIJgQfh27AVJ1dbHM_p2XprepwUKEg,7782
|
65
67
|
geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
|
@@ -75,9 +77,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
75
77
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
78
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
77
79
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
78
|
-
geoseeq-0.3.
|
79
|
-
geoseeq-0.3.
|
80
|
-
geoseeq-0.3.
|
81
|
-
geoseeq-0.3.
|
82
|
-
geoseeq-0.3.
|
83
|
-
geoseeq-0.3.
|
80
|
+
geoseeq-0.3.1.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
81
|
+
geoseeq-0.3.1.dist-info/METADATA,sha256=10uUQhfABpFPFN4KwAcA8NyABrFKgStOwecU65HlHvI,454
|
82
|
+
geoseeq-0.3.1.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
83
|
+
geoseeq-0.3.1.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
84
|
+
geoseeq-0.3.1.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
85
|
+
geoseeq-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|