geoseeq 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
geoseeq/cli/detail.py ADDED
@@ -0,0 +1,39 @@
1
+ import click
2
+ import json
3
+ from .shared_params import (
4
+ use_common_state,
5
+ project_id_arg,
6
+ sample_ids_arg,
7
+ yes_option,
8
+ private_option,
9
+ org_id_arg,
10
+ handle_project_id,
11
+ handle_multiple_sample_ids,
12
+ handle_org_id,
13
+ )
14
+ from geoseeq.id_constructors import resolve_id
15
+
16
+
17
+ @click.group('detail')
18
+ def cli_detail():
19
+ """Detail objects on GeoSeeq."""
20
+ pass
21
+
22
+
23
+ @cli_detail.command('folder')
24
+ @use_common_state
25
+ @click.argument('grn')
26
+ def detail_folder(state, grn):
27
+ kind, rfolder = resolve_id(state.get_knex(), grn)
28
+ assert kind == 'folder'
29
+ click.echo('Folder:')
30
+ click.echo(rfolder)
31
+ click.echo('Created at: {}'.format(rfolder.created_at))
32
+ click.echo('Updated at: {}'.format(rfolder.updated_at))
33
+ click.echo('Files:')
34
+ for rfile in rfolder.get_result_files():
35
+ click.echo(rfile)
36
+ click.echo('Created at: {}'.format(rfile.created_at))
37
+ click.echo('Updated at: {}'.format(rfile.updated_at))
38
+ click.echo(json.dumps(rfile.stored_data, indent=2))
39
+ click.echo('--')
geoseeq/cli/main.py CHANGED
@@ -7,7 +7,7 @@ import click
7
7
  from .copy import cli_copy
8
8
  from .manage import cli_manage
9
9
  from .download import cli_download
10
- from .upload import cli_upload
10
+ from .upload import cli_upload, cli_upload_advanced
11
11
  from .user import cli_user
12
12
  from .view import cli_view
13
13
  from .search import cli_search
@@ -15,6 +15,7 @@ from geoseeq.vc.cli import cli_vc
15
15
  from geoseeq.knex import DEFAULT_ENDPOINT
16
16
  from .shared_params.config import set_profile
17
17
  from .shared_params.opts_and_args import overwrite_option
18
+ from .detail import cli_detail
18
19
 
19
20
  logger = logging.getLogger('geoseeq_api')
20
21
  handler = logging.StreamHandler()
@@ -35,7 +36,7 @@ main.add_command(cli_search)
35
36
  @main.command()
36
37
  def version():
37
38
  """Print the version of the Geoseeq API being used."""
38
- click.echo('0.3.0') # remember to update setup
39
+ click.echo('0.3.1') # remember to update setup
39
40
 
40
41
 
41
42
  @main.group('advanced')
@@ -45,6 +46,8 @@ def cli_advanced():
45
46
 
46
47
  cli_advanced.add_command(cli_copy)
47
48
  cli_advanced.add_command(cli_user)
49
+ cli_advanced.add_command(cli_detail)
50
+ cli_advanced.add_command(cli_upload_advanced)
48
51
 
49
52
  @cli_advanced.group('experimental')
50
53
  def cli_experimental():
@@ -5,7 +5,7 @@ from .upload import (
5
5
  cli_metadata,
6
6
  )
7
7
  from .upload_reads import cli_upload_reads_wizard
8
-
8
+ from .upload_advanced import cli_find_urls_for_reads
9
9
 
10
10
  @click.group('upload')
11
11
  def cli_upload():
@@ -15,3 +15,10 @@ def cli_upload():
15
15
  cli_upload.add_command(cli_upload_reads_wizard)
16
16
  cli_upload.add_command(cli_upload_file)
17
17
  cli_upload.add_command(cli_metadata)
18
+
19
+ @click.group('upload')
20
+ def cli_upload_advanced():
21
+ """Advanced tools to upload files to GeoSeeq."""
22
+ pass
23
+
24
+ cli_upload_advanced.add_command(cli_find_urls_for_reads)
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import click
3
+ import requests
4
+ from os.path import basename, getsize
5
+ from .upload_reads import (
6
+ _make_in_process_logger,
7
+ _get_regex,
8
+ _group_files,
9
+ flatten_list_of_fastqs,
10
+ )
11
+
12
+ from multiprocessing import Pool, current_process
13
+
14
+ from geoseeq.cli.constants import *
15
+ from geoseeq.cli.shared_params import (
16
+ handle_project_id,
17
+ private_option,
18
+ module_option,
19
+ project_id_arg,
20
+ overwrite_option,
21
+ yes_option,
22
+ use_common_state,
23
+ )
24
+
25
+ from geoseeq.constants import FASTQ_MODULE_NAMES
26
+ from geoseeq.cli.progress_bar import PBarManager
27
+
28
+ logger = logging.getLogger('geoseeq_api')
29
+
30
+
31
+ def _keep_only_authentication_url_args(url):
32
+ """Return a url with only the S3 authentication args"""
33
+ root, args = url.split('?')
34
+ args = args.split('&')
35
+ args = [arg for arg in args if arg.startswith('AWSAccessKeyId=') or arg.startswith('Signature=')]
36
+ return root + '?' + '&'.join(args)
37
+
38
+
39
+ def _get_url_for_one_file(args):
40
+ """Return a tuple of the filepath and the url to upload it to"""
41
+ result_file, filepath, overwrite, log_level = args
42
+ _make_in_process_logger(log_level)
43
+ if result_file.exists() and not overwrite:
44
+ return
45
+ result_file = result_file.idem()
46
+ file_size = getsize(filepath)
47
+ _, urls = result_file._prep_multipart_upload(filepath, file_size, file_size + 1, {})
48
+ url = _keep_only_authentication_url_args(urls['1'])
49
+ return filepath, url
50
+
51
+
52
+ def _find_target_urls(groups, module_name, lib, filepaths, overwrite, cores, state):
53
+ """Use GeoSeeq to get target urls for a set of files"""
54
+ with requests.Session() as session:
55
+ find_url_args = []
56
+ for group in groups:
57
+ sample = lib.sample(group['sample_name']).idem()
58
+ read_folder = sample.result_folder(module_name).idem()
59
+
60
+ for field_name, path in group['fields'].items():
61
+ result_file = read_folder.read_file(field_name)
62
+ filepath = filepaths[path]
63
+ find_url_args.append((
64
+ result_file, filepath, overwrite, state.log_level
65
+ ))
66
+
67
+ with Pool(cores) as p:
68
+ for (file_name, target_url) in p.imap_unordered(_get_url_for_one_file, find_url_args):
69
+ yield file_name, target_url
70
+
71
+
72
+ @click.command('read-links')
73
+ @use_common_state
74
+ @click.option('--cores', default=1, help='Number of uploads to run in parallel')
75
+ @overwrite_option
76
+ @yes_option
77
+ @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
78
+ @private_option
79
+ @module_option(FASTQ_MODULE_NAMES)
80
+ @project_id_arg
81
+ @click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
82
+ def cli_find_urls_for_reads(state, cores, overwrite, yes, regex, private, module_name, project_id, fastq_files):
83
+ """Print a two column list with filenames and a target storage URL
84
+ """
85
+ knex = state.get_knex()
86
+ proj = handle_project_id(knex, project_id, yes, private)
87
+ filepaths = {basename(line): line for line in flatten_list_of_fastqs(fastq_files)}
88
+ click.echo(f'Found {len(filepaths)} files to upload.', err=True)
89
+ regex = _get_regex(knex, filepaths, module_name, proj, regex)
90
+ groups = _group_files(knex, filepaths, module_name, regex, yes)
91
+ for file_name, target_url in _find_target_urls(groups, module_name, proj, filepaths, overwrite, cores, state):
92
+ print(f'{file_name}\t{target_url}', file=state.outfile)
geoseeq/project.py CHANGED
@@ -20,6 +20,7 @@ class Project(RemoteObject):
20
20
  ]
21
21
  optional_remote_fields = [
22
22
  "privacy_level",
23
+ "samples_count",
23
24
  ]
24
25
  parent_field = "org"
25
26
  url_prefix = "sample_groups"
@@ -258,7 +259,9 @@ class Project(RemoteObject):
258
259
  @property
259
260
  def n_samples(self):
260
261
  """Return the number of samples in this project."""
261
- return self.samples_count
262
+ if self.hasattr('samples_count') and self.samples_count is not None:
263
+ return self.samples_count
264
+ return len(list(self.get_sample_uuids()))
262
265
 
263
266
  def bulk_find_files(self,
264
267
  sample_uuids=[],
@@ -41,14 +41,13 @@ class FileChunker:
41
41
 
42
42
  class ResultFileUpload:
43
43
  """Abstract class that handles upload methods for result files."""
44
-
45
- def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
46
- n_parts = int(file_size / chunk_size) + 1
44
+
45
+ def _create_multipart_upload(self, filepath, file_size, optional_fields):
47
46
  optional_fields = optional_fields if optional_fields else {}
48
47
  optional_fields.update(
49
48
  {
50
49
  "md5_checksum": md5_checksum(filepath),
51
- "file_size_bytes": getsize(filepath),
50
+ "file_size_bytes": file_size,
52
51
  }
53
52
  )
54
53
  data = {
@@ -57,6 +56,11 @@ class ResultFileUpload:
57
56
  "result_type": "sample" if self.is_sample_result else "group",
58
57
  }
59
58
  response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload", json=data)
59
+ return response
60
+
61
+ def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
62
+ n_parts = int(file_size / chunk_size) + 1
63
+ response = self._create_multipart_upload(filepath, file_size, optional_fields)
60
64
  upload_id = response["upload_id"]
61
65
  parts = list(range(1, n_parts + 1))
62
66
  data = {
@@ -105,6 +109,7 @@ class ResultFileUpload:
105
109
 
106
110
  def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads):
107
111
  if threads == 1:
112
+ logger.info(f"Uploading parts in series for {file_chunker.filepath}")
108
113
  complete_parts = []
109
114
  for num, url in enumerate(list(urls.values())):
110
115
  response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
@@ -114,6 +119,7 @@ class ResultFileUpload:
114
119
  return complete_parts
115
120
 
116
121
  with ThreadPoolExecutor(max_workers=threads) as executor:
122
+ logger.info(f"Uploading parts in parallel for {file_chunker.filepath} with {threads} threads.")
117
123
  futures = []
118
124
  for num, url in enumerate(list(urls.values())):
119
125
  future = executor.submit(
@@ -128,6 +134,7 @@ class ResultFileUpload:
128
134
  logger.info(
129
135
  f'Uploaded part {response_part["PartNumber"]} of {len(urls)} for "{file_chunker.filepath}"'
130
136
  )
137
+ complete_parts = sorted(complete_parts, key=lambda x: x["PartNumber"])
131
138
  return complete_parts
132
139
 
133
140
  def multipart_upload_file(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: # Geoseeq API Client
5
5
  Author: David C. Danko
6
6
  Author-email: dcdanko@biotia.io
@@ -7,7 +7,7 @@ geoseeq/file_system_cache.py,sha256=7pdM-1VncOvjkYv2pOocPHOXnEWz0jKhH6sm1LaXtoU,
7
7
  geoseeq/knex.py,sha256=WWYV3rwGuCdWL6zucgeExbaMmTg-I7IYqRHuvxhc6Q4,6007
8
8
  geoseeq/organization.py,sha256=a9xmGDE0tQsjPJfyFkYnWagxZ8xpdeckkwvkhH6LNIk,2462
9
9
  geoseeq/pipeline.py,sha256=RuAHyJM0wb0Z7WUPT7_8O6wB4UH5VUiBaPmWsLVeIKo,6819
10
- geoseeq/project.py,sha256=9U_fFFtoableLvsMI2_7zTwJLXJe5C_1oFY7Xuap1Rc,12924
10
+ geoseeq/project.py,sha256=8HrK9JRhhXV03E9i7FQHPNj18vaEn7W81M7Ak0_S0-o,13080
11
11
  geoseeq/remote_object.py,sha256=6owZTIubA2wwbA4AiL0HdfWDMpZjOhUgqv_KxNG2XZg,6613
12
12
  geoseeq/sample.py,sha256=348NgY7wQBmFpoTq24T0ffVQ66iKB-hsCx2YdsBGTlc,4854
13
13
  geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
@@ -17,9 +17,10 @@ geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
17
17
  geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
18
18
  geoseeq/cli/constants.py,sha256=Do5AUf9lMO9_P8KpFJ3XwwFBAWsxSjZ6sx9_QEGyC_c,176
19
19
  geoseeq/cli/copy.py,sha256=ocJ-T6vZBTPELUSBA65KndzAugrBWMq-3ux0dbffacE,1997
20
+ geoseeq/cli/detail.py,sha256=uWxJ3v2GTpfGCxXkFHRqYilEUpZjJTkzES-Mpa2HEe4,1040
20
21
  geoseeq/cli/download.py,sha256=Hn2RhRgUpy1QDZuVkG6JgKlthGLnnfVYXIYrVsYcYL4,11859
21
22
  geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
22
- geoseeq/cli/main.py,sha256=03gd1t0wMSybDKtyubcnmhS6N-J0zZE-k21UCmKKoW4,1915
23
+ geoseeq/cli/main.py,sha256=JgaUKKK_rm1CuGdUBzg2m_WJHV5fWSRj5DPTSRlf1xM,2050
23
24
  geoseeq/cli/manage.py,sha256=xy_iMfqSSMaSTQNYiTw2xEWpH4mpTAq4Cf_6QemEMnI,5409
24
25
  geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
25
26
  geoseeq/cli/search.py,sha256=KpqKiq4-wbgO6xrx58NxomYZcx451NqzA_MecgiND1g,881
@@ -32,8 +33,9 @@ geoseeq/cli/shared_params/config.py,sha256=06KrZhC3zljveo_21YvaZPcKIzyDBGHT_Yr79
32
33
  geoseeq/cli/shared_params/id_handlers.py,sha256=7s2g-UfKoTfwmDX_FI96-M_g_U1yYBUEj-8yy8NC_c8,6493
33
34
  geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
34
35
  geoseeq/cli/shared_params/opts_and_args.py,sha256=Jy2rVQydlvkgjoA_BYjSNuI-wUSPmE2S1C1rN6bycBI,1399
35
- geoseeq/cli/upload/__init__.py,sha256=QqBh0WXD9BaEDcKsKpb-WgWwQ0WzZdQUgYXYwyowW_c,340
36
+ geoseeq/cli/upload/__init__.py,sha256=Wf30XGLllFFnnfeOnpYsO3SXyaq8yclHcYxGUNdSh6o,562
36
37
  geoseeq/cli/upload/upload.py,sha256=Ikly-vfLzDXpaMpyFTim1CfgqNjtrnRQHgOMi_7JubU,6367
38
+ geoseeq/cli/upload/upload_advanced.py,sha256=Jq5eGe-wOdrzxGWVwaFPg0BAJcW0YSx_eHEmYjJeKuA,3434
37
39
  geoseeq/cli/upload/upload_reads.py,sha256=f5NPbnlZg93zjDUIX_OsjyOyvvEtkIn_ioYLFfClfjw,7583
38
40
  geoseeq/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
41
  geoseeq/contrib/ncbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -59,7 +61,7 @@ geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEY
59
61
  geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
60
62
  geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
61
63
  geoseeq/result/file_download.py,sha256=R087ypOAu2pMZ9vXvkO9babPBNtRXTF3ZhpPldcT_jM,4593
62
- geoseeq/result/file_upload.py,sha256=D5PpmraEnF1sNTJDitKnPiHwo1ak1s1UW_TBwYpd_ko,6707
64
+ geoseeq/result/file_upload.py,sha256=g29WES3IjV8pmB8g3VRzxUk3wsSNgkCB6HIqZ_4ctuY,7158
63
65
  geoseeq/result/result_file.py,sha256=YMCAivUXgHUNEhH6yTGCHJh9gXHd8OLjjbnccdvW7iw,7138
64
66
  geoseeq/result/result_folder.py,sha256=qmxm-Z6DI1ohGnIJgQfh27AVJ1dbHM_p2XprepwUKEg,7782
65
67
  geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
@@ -75,9 +77,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
75
77
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
78
  tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
77
79
  tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
78
- geoseeq-0.3.0.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
79
- geoseeq-0.3.0.dist-info/METADATA,sha256=KpwaJ3heZAhI7ptGED_3yevWohc2BS0BKRIcxhwky8M,454
80
- geoseeq-0.3.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
81
- geoseeq-0.3.0.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
82
- geoseeq-0.3.0.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
83
- geoseeq-0.3.0.dist-info/RECORD,,
80
+ geoseeq-0.3.1.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
81
+ geoseeq-0.3.1.dist-info/METADATA,sha256=10uUQhfABpFPFN4KwAcA8NyABrFKgStOwecU65HlHvI,454
82
+ geoseeq-0.3.1.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
83
+ geoseeq-0.3.1.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
84
+ geoseeq-0.3.1.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
85
+ geoseeq-0.3.1.dist-info/RECORD,,