geoseeq 0.2.22__tar.gz → 0.3.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {geoseeq-0.2.22 → geoseeq-0.3.1}/PKG-INFO +1 -1
- geoseeq-0.3.1/geoseeq/cli/detail.py +39 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/download.py +11 -11
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/main.py +5 -2
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/upload/__init__.py +8 -1
- geoseeq-0.3.1/geoseeq/cli/upload/upload_advanced.py +92 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/project.py +86 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/file_upload.py +11 -4
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/PKG-INFO +1 -1
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/SOURCES.txt +2 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/setup.py +1 -1
- {geoseeq-0.2.22 → geoseeq-0.3.1}/LICENSE +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/README.md +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/app.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/blob_constructors.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/bulk_creators.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/constants.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/copy.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/fastq_utils.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/manage.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/progress_bar.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/search.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/common_state.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/config.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/id_handlers.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/obj_getters.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/upload/upload.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/upload/upload_reads.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/user.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/utils.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/view.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/constants.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/api.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/bioproject.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/cli.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/file_system_cache.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_blobs.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_ids.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_names.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_uuids.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/resolvers.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/utils.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/knex.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/organization.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/pipeline.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/constants.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/highcharts.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/base_layer.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/map.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/overlay.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/selectable.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/remote_object.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/bioinfo.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/file_download.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/result_file.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/result_folder.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/utils.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/sample.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/search.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/user.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/utils.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/checksum.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/cli.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/clone.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/constants.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_cache.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_dir.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_sample.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_stub.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/work_orders.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/dependency_links.txt +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/entry_points.txt +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/requires.txt +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/top_level.txt +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/pyproject.toml +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/setup.cfg +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/tests/__init__.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/tests/test_api_client.py +0 -0
- {geoseeq-0.2.22 → geoseeq-0.3.1}/tests/test_plotting.py +0 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
import click
|
2
|
+
import json
|
3
|
+
from .shared_params import (
|
4
|
+
use_common_state,
|
5
|
+
project_id_arg,
|
6
|
+
sample_ids_arg,
|
7
|
+
yes_option,
|
8
|
+
private_option,
|
9
|
+
org_id_arg,
|
10
|
+
handle_project_id,
|
11
|
+
handle_multiple_sample_ids,
|
12
|
+
handle_org_id,
|
13
|
+
)
|
14
|
+
from geoseeq.id_constructors import resolve_id
|
15
|
+
|
16
|
+
|
17
|
+
@click.group('detail')
|
18
|
+
def cli_detail():
|
19
|
+
"""Detail objects on GeoSeeq."""
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
@cli_detail.command('folder')
|
24
|
+
@use_common_state
|
25
|
+
@click.argument('grn')
|
26
|
+
def detail_folder(state, grn):
|
27
|
+
kind, rfolder = resolve_id(state.get_knex(), grn)
|
28
|
+
assert kind == 'folder'
|
29
|
+
click.echo('Folder:')
|
30
|
+
click.echo(rfolder)
|
31
|
+
click.echo('Created at: {}'.format(rfolder.created_at))
|
32
|
+
click.echo('Updated at: {}'.format(rfolder.updated_at))
|
33
|
+
click.echo('Files:')
|
34
|
+
for rfile in rfolder.get_result_files():
|
35
|
+
click.echo(rfile)
|
36
|
+
click.echo('Created at: {}'.format(rfile.created_at))
|
37
|
+
click.echo('Updated at: {}'.format(rfile.updated_at))
|
38
|
+
click.echo(json.dumps(rfile.stored_data, indent=2))
|
39
|
+
click.echo('--')
|
@@ -166,22 +166,22 @@ def cli_download_files(
|
|
166
166
|
"""
|
167
167
|
knex = state.get_knex()
|
168
168
|
proj = handle_project_id(knex, project_id)
|
169
|
+
logger.info(f"Found project \"{proj.name}\"")
|
169
170
|
samples = []
|
170
171
|
if sample_ids:
|
171
172
|
logger.info(f"Fetching info for {len(sample_ids)} samples.")
|
172
173
|
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
|
173
174
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
response = knex.post(url, data)
|
175
|
+
response = proj.bulk_find_files(
|
176
|
+
sample_uuids=[s.uuid for s in samples],
|
177
|
+
sample_name_includes=sample_name_includes,
|
178
|
+
folder_types=folder_type,
|
179
|
+
folder_names=folder_name,
|
180
|
+
file_names=file_name,
|
181
|
+
extensions=extension,
|
182
|
+
with_versions=with_versions,
|
183
|
+
)
|
184
|
+
|
185
185
|
|
186
186
|
if not download:
|
187
187
|
data = json.dumps(response["links"])
|
@@ -7,7 +7,7 @@ import click
|
|
7
7
|
from .copy import cli_copy
|
8
8
|
from .manage import cli_manage
|
9
9
|
from .download import cli_download
|
10
|
-
from .upload import cli_upload
|
10
|
+
from .upload import cli_upload, cli_upload_advanced
|
11
11
|
from .user import cli_user
|
12
12
|
from .view import cli_view
|
13
13
|
from .search import cli_search
|
@@ -15,6 +15,7 @@ from geoseeq.vc.cli import cli_vc
|
|
15
15
|
from geoseeq.knex import DEFAULT_ENDPOINT
|
16
16
|
from .shared_params.config import set_profile
|
17
17
|
from .shared_params.opts_and_args import overwrite_option
|
18
|
+
from .detail import cli_detail
|
18
19
|
|
19
20
|
logger = logging.getLogger('geoseeq_api')
|
20
21
|
handler = logging.StreamHandler()
|
@@ -35,7 +36,7 @@ main.add_command(cli_search)
|
|
35
36
|
@main.command()
|
36
37
|
def version():
|
37
38
|
"""Print the version of the Geoseeq API being used."""
|
38
|
-
click.echo('0.
|
39
|
+
click.echo('0.3.1') # remember to update setup
|
39
40
|
|
40
41
|
|
41
42
|
@main.group('advanced')
|
@@ -45,6 +46,8 @@ def cli_advanced():
|
|
45
46
|
|
46
47
|
cli_advanced.add_command(cli_copy)
|
47
48
|
cli_advanced.add_command(cli_user)
|
49
|
+
cli_advanced.add_command(cli_detail)
|
50
|
+
cli_advanced.add_command(cli_upload_advanced)
|
48
51
|
|
49
52
|
@cli_advanced.group('experimental')
|
50
53
|
def cli_experimental():
|
@@ -5,7 +5,7 @@ from .upload import (
|
|
5
5
|
cli_metadata,
|
6
6
|
)
|
7
7
|
from .upload_reads import cli_upload_reads_wizard
|
8
|
-
|
8
|
+
from .upload_advanced import cli_find_urls_for_reads
|
9
9
|
|
10
10
|
@click.group('upload')
|
11
11
|
def cli_upload():
|
@@ -15,3 +15,10 @@ def cli_upload():
|
|
15
15
|
cli_upload.add_command(cli_upload_reads_wizard)
|
16
16
|
cli_upload.add_command(cli_upload_file)
|
17
17
|
cli_upload.add_command(cli_metadata)
|
18
|
+
|
19
|
+
@click.group('upload')
|
20
|
+
def cli_upload_advanced():
|
21
|
+
"""Advanced tools to upload files to GeoSeeq."""
|
22
|
+
pass
|
23
|
+
|
24
|
+
cli_upload_advanced.add_command(cli_find_urls_for_reads)
|
@@ -0,0 +1,92 @@
|
|
1
|
+
import logging
|
2
|
+
import click
|
3
|
+
import requests
|
4
|
+
from os.path import basename, getsize
|
5
|
+
from .upload_reads import (
|
6
|
+
_make_in_process_logger,
|
7
|
+
_get_regex,
|
8
|
+
_group_files,
|
9
|
+
flatten_list_of_fastqs,
|
10
|
+
)
|
11
|
+
|
12
|
+
from multiprocessing import Pool, current_process
|
13
|
+
|
14
|
+
from geoseeq.cli.constants import *
|
15
|
+
from geoseeq.cli.shared_params import (
|
16
|
+
handle_project_id,
|
17
|
+
private_option,
|
18
|
+
module_option,
|
19
|
+
project_id_arg,
|
20
|
+
overwrite_option,
|
21
|
+
yes_option,
|
22
|
+
use_common_state,
|
23
|
+
)
|
24
|
+
|
25
|
+
from geoseeq.constants import FASTQ_MODULE_NAMES
|
26
|
+
from geoseeq.cli.progress_bar import PBarManager
|
27
|
+
|
28
|
+
logger = logging.getLogger('geoseeq_api')
|
29
|
+
|
30
|
+
|
31
|
+
def _keep_only_authentication_url_args(url):
|
32
|
+
"""Return a url with only the S3 authentication args"""
|
33
|
+
root, args = url.split('?')
|
34
|
+
args = args.split('&')
|
35
|
+
args = [arg for arg in args if arg.startswith('AWSAccessKeyId=') or arg.startswith('Signature=')]
|
36
|
+
return root + '?' + '&'.join(args)
|
37
|
+
|
38
|
+
|
39
|
+
def _get_url_for_one_file(args):
|
40
|
+
"""Return a tuple of the filepath and the url to upload it to"""
|
41
|
+
result_file, filepath, overwrite, log_level = args
|
42
|
+
_make_in_process_logger(log_level)
|
43
|
+
if result_file.exists() and not overwrite:
|
44
|
+
return
|
45
|
+
result_file = result_file.idem()
|
46
|
+
file_size = getsize(filepath)
|
47
|
+
_, urls = result_file._prep_multipart_upload(filepath, file_size, file_size + 1, {})
|
48
|
+
url = _keep_only_authentication_url_args(urls['1'])
|
49
|
+
return filepath, url
|
50
|
+
|
51
|
+
|
52
|
+
def _find_target_urls(groups, module_name, lib, filepaths, overwrite, cores, state):
|
53
|
+
"""Use GeoSeeq to get target urls for a set of files"""
|
54
|
+
with requests.Session() as session:
|
55
|
+
find_url_args = []
|
56
|
+
for group in groups:
|
57
|
+
sample = lib.sample(group['sample_name']).idem()
|
58
|
+
read_folder = sample.result_folder(module_name).idem()
|
59
|
+
|
60
|
+
for field_name, path in group['fields'].items():
|
61
|
+
result_file = read_folder.read_file(field_name)
|
62
|
+
filepath = filepaths[path]
|
63
|
+
find_url_args.append((
|
64
|
+
result_file, filepath, overwrite, state.log_level
|
65
|
+
))
|
66
|
+
|
67
|
+
with Pool(cores) as p:
|
68
|
+
for (file_name, target_url) in p.imap_unordered(_get_url_for_one_file, find_url_args):
|
69
|
+
yield file_name, target_url
|
70
|
+
|
71
|
+
|
72
|
+
@click.command('read-links')
|
73
|
+
@use_common_state
|
74
|
+
@click.option('--cores', default=1, help='Number of uploads to run in parallel')
|
75
|
+
@overwrite_option
|
76
|
+
@yes_option
|
77
|
+
@click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
|
78
|
+
@private_option
|
79
|
+
@module_option(FASTQ_MODULE_NAMES)
|
80
|
+
@project_id_arg
|
81
|
+
@click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
|
82
|
+
def cli_find_urls_for_reads(state, cores, overwrite, yes, regex, private, module_name, project_id, fastq_files):
|
83
|
+
"""Print a two column list with filenames and a target storage URL
|
84
|
+
"""
|
85
|
+
knex = state.get_knex()
|
86
|
+
proj = handle_project_id(knex, project_id, yes, private)
|
87
|
+
filepaths = {basename(line): line for line in flatten_list_of_fastqs(fastq_files)}
|
88
|
+
click.echo(f'Found {len(filepaths)} files to upload.', err=True)
|
89
|
+
regex = _get_regex(knex, filepaths, module_name, proj, regex)
|
90
|
+
groups = _group_files(knex, filepaths, module_name, regex, yes)
|
91
|
+
for file_name, target_url in _find_target_urls(groups, module_name, proj, filepaths, overwrite, cores, state):
|
92
|
+
print(f'{file_name}\t{target_url}', file=state.outfile)
|
@@ -4,7 +4,9 @@ from .sample import Sample
|
|
4
4
|
from .utils import paginated_iterator
|
5
5
|
import json
|
6
6
|
import pandas as pd
|
7
|
+
import logging
|
7
8
|
|
9
|
+
logger = logging.getLogger("geoseeq_api")
|
8
10
|
|
9
11
|
|
10
12
|
class Project(RemoteObject):
|
@@ -14,9 +16,11 @@ class Project(RemoteObject):
|
|
14
16
|
"name",
|
15
17
|
"privacy_level",
|
16
18
|
"description",
|
19
|
+
"samples_count",
|
17
20
|
]
|
18
21
|
optional_remote_fields = [
|
19
22
|
"privacy_level",
|
23
|
+
"samples_count",
|
20
24
|
]
|
21
25
|
parent_field = "org"
|
22
26
|
url_prefix = "sample_groups"
|
@@ -195,6 +199,18 @@ class Project(RemoteObject):
|
|
195
199
|
for sample_blob in paginated_iterator(self.knex, url, error_handler=error_handler):
|
196
200
|
yield sample_blob['uuid']
|
197
201
|
|
202
|
+
def _batch_sample_uuids(self, batch_size, input_sample_uuids=[]):
|
203
|
+
"""Yield batches of sample uuids."""
|
204
|
+
uuids_to_batch = input_sample_uuids if input_sample_uuids else self.get_sample_uuids()
|
205
|
+
sample_uuids = []
|
206
|
+
for sample_uuid in uuids_to_batch:
|
207
|
+
sample_uuids.append(sample_uuid)
|
208
|
+
if len(sample_uuids) == batch_size:
|
209
|
+
yield sample_uuids
|
210
|
+
sample_uuids = []
|
211
|
+
if sample_uuids:
|
212
|
+
yield sample_uuids
|
213
|
+
|
198
214
|
def get_analysis_results(self, cache=True):
|
199
215
|
"""Yield ProjectResultFolder objects for this project fetched from the server.
|
200
216
|
|
@@ -239,6 +255,76 @@ class Project(RemoteObject):
|
|
239
255
|
url = f"sample_groups/{self.uuid}/metadata"
|
240
256
|
blob = self.knex.get(url)
|
241
257
|
return pd.DataFrame.from_dict(blob, orient="index")
|
258
|
+
|
259
|
+
@property
|
260
|
+
def n_samples(self):
|
261
|
+
"""Return the number of samples in this project."""
|
262
|
+
if self.hasattr('samples_count') and self.samples_count is not None:
|
263
|
+
return self.samples_count
|
264
|
+
return len(list(self.get_sample_uuids()))
|
265
|
+
|
266
|
+
def bulk_find_files(self,
|
267
|
+
sample_uuids=[],
|
268
|
+
sample_name_includes=[],
|
269
|
+
folder_types="all",
|
270
|
+
folder_names=[],
|
271
|
+
file_names=[],
|
272
|
+
extensions=[],
|
273
|
+
with_versions=False,
|
274
|
+
use_batches_cutoff=500):
|
275
|
+
"""Return a dict with links to download files that match the given criteria.
|
276
|
+
|
277
|
+
Options:
|
278
|
+
- sample_uuids: list of sample uuids; if blank search all samples in project
|
279
|
+
- sample_name_includes: list of strings; finds samples with names that include these strings
|
280
|
+
- folder_types: "all", "project", "sample"; finds files in folders of these types
|
281
|
+
- folder_names: list of strings; finds files in folders that have these strings in their names
|
282
|
+
- file_names: list of strings; finds files that have these strings in their names
|
283
|
+
- extensions: list of strings; finds files with these file extensions
|
284
|
+
- with_versions: bool; if True, include all versions of files in results
|
285
|
+
"""
|
286
|
+
def _my_bulk_find(sample_uuids=[]): # curry to save typing
|
287
|
+
return self._bulk_find_files_batch(sample_uuids=sample_uuids,
|
288
|
+
sample_name_includes=sample_name_includes,
|
289
|
+
folder_types=folder_types,
|
290
|
+
folder_names=folder_names,
|
291
|
+
file_names=file_names,
|
292
|
+
extensions=extensions,
|
293
|
+
with_versions=with_versions)
|
294
|
+
n_samples = len(sample_uuids) if sample_uuids else self.n_samples
|
295
|
+
if n_samples < use_batches_cutoff:
|
296
|
+
logger.debug(f"Using single batch bulk_find for {n_samples} samples")
|
297
|
+
return _my_bulk_find()
|
298
|
+
else:
|
299
|
+
logger.debug(f"Using multi batch bulk_find for {n_samples} samples")
|
300
|
+
merged_response = {'file_size_bytes': 0, 'links': {}, 'no_size_info_count': 0}
|
301
|
+
for batch in self._batch_sample_uuids(use_batches_cutoff - 1, input_sample_uuids=sample_uuids):
|
302
|
+
response = _my_bulk_find(sample_uuids=batch)
|
303
|
+
merged_response['file_size_bytes'] += response['file_size_bytes']
|
304
|
+
merged_response['links'].update(response['links'])
|
305
|
+
merged_response['no_size_info_count'] += response['no_size_info_count']
|
306
|
+
return merged_response
|
307
|
+
|
308
|
+
def _bulk_find_files_batch(self,
|
309
|
+
sample_uuids=[],
|
310
|
+
sample_name_includes=[],
|
311
|
+
folder_types=[],
|
312
|
+
folder_names=[],
|
313
|
+
file_names=[],
|
314
|
+
extensions=[],
|
315
|
+
with_versions=False):
|
316
|
+
data = {
|
317
|
+
"sample_uuids": sample_uuids,
|
318
|
+
"sample_names": sample_name_includes,
|
319
|
+
"folder_type": folder_types,
|
320
|
+
"folder_names": folder_names,
|
321
|
+
"file_names": file_names,
|
322
|
+
"extensions": extensions,
|
323
|
+
"with_versions": with_versions
|
324
|
+
}
|
325
|
+
url = f"sample_groups/{self.uuid}/download"
|
326
|
+
response = self.knex.post(url, data)
|
327
|
+
return response
|
242
328
|
|
243
329
|
def __str__(self):
|
244
330
|
return f"<Geoseeq::Project {self.name} {self.uuid} />"
|
@@ -41,14 +41,13 @@ class FileChunker:
|
|
41
41
|
|
42
42
|
class ResultFileUpload:
|
43
43
|
"""Abstract class that handles upload methods for result files."""
|
44
|
-
|
45
|
-
def
|
46
|
-
n_parts = int(file_size / chunk_size) + 1
|
44
|
+
|
45
|
+
def _create_multipart_upload(self, filepath, file_size, optional_fields):
|
47
46
|
optional_fields = optional_fields if optional_fields else {}
|
48
47
|
optional_fields.update(
|
49
48
|
{
|
50
49
|
"md5_checksum": md5_checksum(filepath),
|
51
|
-
"file_size_bytes":
|
50
|
+
"file_size_bytes": file_size,
|
52
51
|
}
|
53
52
|
)
|
54
53
|
data = {
|
@@ -57,6 +56,11 @@ class ResultFileUpload:
|
|
57
56
|
"result_type": "sample" if self.is_sample_result else "group",
|
58
57
|
}
|
59
58
|
response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload", json=data)
|
59
|
+
return response
|
60
|
+
|
61
|
+
def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
|
62
|
+
n_parts = int(file_size / chunk_size) + 1
|
63
|
+
response = self._create_multipart_upload(filepath, file_size, optional_fields)
|
60
64
|
upload_id = response["upload_id"]
|
61
65
|
parts = list(range(1, n_parts + 1))
|
62
66
|
data = {
|
@@ -105,6 +109,7 @@ class ResultFileUpload:
|
|
105
109
|
|
106
110
|
def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads):
|
107
111
|
if threads == 1:
|
112
|
+
logger.info(f"Uploading parts in series for {file_chunker.filepath}")
|
108
113
|
complete_parts = []
|
109
114
|
for num, url in enumerate(list(urls.values())):
|
110
115
|
response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
|
@@ -114,6 +119,7 @@ class ResultFileUpload:
|
|
114
119
|
return complete_parts
|
115
120
|
|
116
121
|
with ThreadPoolExecutor(max_workers=threads) as executor:
|
122
|
+
logger.info(f"Uploading parts in parallel for {file_chunker.filepath} with {threads} threads.")
|
117
123
|
futures = []
|
118
124
|
for num, url in enumerate(list(urls.values())):
|
119
125
|
future = executor.submit(
|
@@ -128,6 +134,7 @@ class ResultFileUpload:
|
|
128
134
|
logger.info(
|
129
135
|
f'Uploaded part {response_part["PartNumber"]} of {len(urls)} for "{file_chunker.filepath}"'
|
130
136
|
)
|
137
|
+
complete_parts = sorted(complete_parts, key=lambda x: x["PartNumber"])
|
131
138
|
return complete_parts
|
132
139
|
|
133
140
|
def multipart_upload_file(
|
@@ -27,6 +27,7 @@ geoseeq.egg-info/top_level.txt
|
|
27
27
|
geoseeq/cli/__init__.py
|
28
28
|
geoseeq/cli/constants.py
|
29
29
|
geoseeq/cli/copy.py
|
30
|
+
geoseeq/cli/detail.py
|
30
31
|
geoseeq/cli/download.py
|
31
32
|
geoseeq/cli/fastq_utils.py
|
32
33
|
geoseeq/cli/main.py
|
@@ -44,6 +45,7 @@ geoseeq/cli/shared_params/obj_getters.py
|
|
44
45
|
geoseeq/cli/shared_params/opts_and_args.py
|
45
46
|
geoseeq/cli/upload/__init__.py
|
46
47
|
geoseeq/cli/upload/upload.py
|
48
|
+
geoseeq/cli/upload/upload_advanced.py
|
47
49
|
geoseeq/cli/upload/upload_reads.py
|
48
50
|
geoseeq/contrib/__init__.py
|
49
51
|
geoseeq/contrib/ncbi/__init__.py
|
@@ -5,7 +5,7 @@ import setuptools
|
|
5
5
|
|
6
6
|
setuptools.setup(
|
7
7
|
name='geoseeq',
|
8
|
-
version='0.
|
8
|
+
version='0.3.1', # remember to update version string in CLI as well
|
9
9
|
author="David C. Danko",
|
10
10
|
author_email='dcdanko@biotia.io',
|
11
11
|
description=open('README.md').read(),
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|