geoseeq 0.2.22__tar.gz → 0.3.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. {geoseeq-0.2.22 → geoseeq-0.3.1}/PKG-INFO +1 -1
  2. geoseeq-0.3.1/geoseeq/cli/detail.py +39 -0
  3. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/download.py +11 -11
  4. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/main.py +5 -2
  5. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/upload/__init__.py +8 -1
  6. geoseeq-0.3.1/geoseeq/cli/upload/upload_advanced.py +92 -0
  7. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/project.py +86 -0
  8. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/file_upload.py +11 -4
  9. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/PKG-INFO +1 -1
  10. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/SOURCES.txt +2 -0
  11. {geoseeq-0.2.22 → geoseeq-0.3.1}/setup.py +1 -1
  12. {geoseeq-0.2.22 → geoseeq-0.3.1}/LICENSE +0 -0
  13. {geoseeq-0.2.22 → geoseeq-0.3.1}/README.md +0 -0
  14. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/__init__.py +0 -0
  15. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/app.py +0 -0
  16. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/blob_constructors.py +0 -0
  17. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/bulk_creators.py +0 -0
  18. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/__init__.py +0 -0
  19. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/constants.py +0 -0
  20. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/copy.py +0 -0
  21. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/fastq_utils.py +0 -0
  22. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/manage.py +0 -0
  23. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/progress_bar.py +0 -0
  24. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/search.py +0 -0
  25. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/__init__.py +0 -0
  26. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/common_state.py +0 -0
  27. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/config.py +0 -0
  28. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  29. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  30. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  31. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/upload/upload.py +0 -0
  32. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/upload/upload_reads.py +0 -0
  33. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/user.py +0 -0
  34. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/utils.py +0 -0
  35. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/cli/view.py +0 -0
  36. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/constants.py +0 -0
  37. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/__init__.py +0 -0
  38. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/__init__.py +0 -0
  39. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/api.py +0 -0
  40. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  41. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/cli.py +0 -0
  42. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  43. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/file_system_cache.py +0 -0
  44. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/__init__.py +0 -0
  45. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_blobs.py +0 -0
  46. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_ids.py +0 -0
  47. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_names.py +0 -0
  48. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/from_uuids.py +0 -0
  49. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/resolvers.py +0 -0
  50. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/id_constructors/utils.py +0 -0
  51. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/knex.py +0 -0
  52. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/organization.py +0 -0
  53. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/pipeline.py +0 -0
  54. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/__init__.py +0 -0
  55. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/constants.py +0 -0
  56. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/highcharts.py +0 -0
  57. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/__init__.py +0 -0
  58. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/base_layer.py +0 -0
  59. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/map.py +0 -0
  60. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/map/overlay.py +0 -0
  61. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/plotting/selectable.py +0 -0
  62. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/remote_object.py +0 -0
  63. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/__init__.py +0 -0
  64. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/bioinfo.py +0 -0
  65. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/file_download.py +0 -0
  66. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/result_file.py +0 -0
  67. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/result_folder.py +0 -0
  68. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/result/utils.py +0 -0
  69. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/sample.py +0 -0
  70. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/search.py +0 -0
  71. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/user.py +0 -0
  72. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/utils.py +0 -0
  73. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/__init__.py +0 -0
  74. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/checksum.py +0 -0
  75. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/cli.py +0 -0
  76. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/clone.py +0 -0
  77. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/constants.py +0 -0
  78. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_cache.py +0 -0
  79. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_dir.py +0 -0
  80. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_sample.py +0 -0
  81. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/vc/vc_stub.py +0 -0
  82. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq/work_orders.py +0 -0
  83. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/dependency_links.txt +0 -0
  84. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/entry_points.txt +0 -0
  85. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/requires.txt +0 -0
  86. {geoseeq-0.2.22 → geoseeq-0.3.1}/geoseeq.egg-info/top_level.txt +0 -0
  87. {geoseeq-0.2.22 → geoseeq-0.3.1}/pyproject.toml +0 -0
  88. {geoseeq-0.2.22 → geoseeq-0.3.1}/setup.cfg +0 -0
  89. {geoseeq-0.2.22 → geoseeq-0.3.1}/tests/__init__.py +0 -0
  90. {geoseeq-0.2.22 → geoseeq-0.3.1}/tests/test_api_client.py +0 -0
  91. {geoseeq-0.2.22 → geoseeq-0.3.1}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.2.22
3
+ Version: 0.3.1
4
4
  Summary: # Geoseeq API Client
5
5
  Author: David C. Danko
6
6
  Author-email: dcdanko@biotia.io
@@ -0,0 +1,39 @@
1
+ import click
2
+ import json
3
+ from .shared_params import (
4
+ use_common_state,
5
+ project_id_arg,
6
+ sample_ids_arg,
7
+ yes_option,
8
+ private_option,
9
+ org_id_arg,
10
+ handle_project_id,
11
+ handle_multiple_sample_ids,
12
+ handle_org_id,
13
+ )
14
+ from geoseeq.id_constructors import resolve_id
15
+
16
+
17
+ @click.group('detail')
18
+ def cli_detail():
19
+ """Detail objects on GeoSeeq."""
20
+ pass
21
+
22
+
23
+ @cli_detail.command('folder')
24
+ @use_common_state
25
+ @click.argument('grn')
26
+ def detail_folder(state, grn):
27
+ kind, rfolder = resolve_id(state.get_knex(), grn)
28
+ assert kind == 'folder'
29
+ click.echo('Folder:')
30
+ click.echo(rfolder)
31
+ click.echo('Created at: {}'.format(rfolder.created_at))
32
+ click.echo('Updated at: {}'.format(rfolder.updated_at))
33
+ click.echo('Files:')
34
+ for rfile in rfolder.get_result_files():
35
+ click.echo(rfile)
36
+ click.echo('Created at: {}'.format(rfile.created_at))
37
+ click.echo('Updated at: {}'.format(rfile.updated_at))
38
+ click.echo(json.dumps(rfile.stored_data, indent=2))
39
+ click.echo('--')
@@ -166,22 +166,22 @@ def cli_download_files(
166
166
  """
167
167
  knex = state.get_knex()
168
168
  proj = handle_project_id(knex, project_id)
169
+ logger.info(f"Found project \"{proj.name}\"")
169
170
  samples = []
170
171
  if sample_ids:
171
172
  logger.info(f"Fetching info for {len(sample_ids)} samples.")
172
173
  samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
173
174
 
174
- data = {
175
- "sample_uuids": [s.uuid for s in samples],
176
- "sample_names": sample_name_includes,
177
- "folder_type": folder_type,
178
- "folder_names": folder_name,
179
- "file_names": file_name,
180
- "extensions": extension,
181
- "with_versions": with_versions
182
- }
183
- url = f"sample_groups/{proj.uuid}/download"
184
- response = knex.post(url, data)
175
+ response = proj.bulk_find_files(
176
+ sample_uuids=[s.uuid for s in samples],
177
+ sample_name_includes=sample_name_includes,
178
+ folder_types=folder_type,
179
+ folder_names=folder_name,
180
+ file_names=file_name,
181
+ extensions=extension,
182
+ with_versions=with_versions,
183
+ )
184
+
185
185
 
186
186
  if not download:
187
187
  data = json.dumps(response["links"])
@@ -7,7 +7,7 @@ import click
7
7
  from .copy import cli_copy
8
8
  from .manage import cli_manage
9
9
  from .download import cli_download
10
- from .upload import cli_upload
10
+ from .upload import cli_upload, cli_upload_advanced
11
11
  from .user import cli_user
12
12
  from .view import cli_view
13
13
  from .search import cli_search
@@ -15,6 +15,7 @@ from geoseeq.vc.cli import cli_vc
15
15
  from geoseeq.knex import DEFAULT_ENDPOINT
16
16
  from .shared_params.config import set_profile
17
17
  from .shared_params.opts_and_args import overwrite_option
18
+ from .detail import cli_detail
18
19
 
19
20
  logger = logging.getLogger('geoseeq_api')
20
21
  handler = logging.StreamHandler()
@@ -35,7 +36,7 @@ main.add_command(cli_search)
35
36
  @main.command()
36
37
  def version():
37
38
  """Print the version of the Geoseeq API being used."""
38
- click.echo('0.2.22') # remember to update setup
39
+ click.echo('0.3.1') # remember to update setup
39
40
 
40
41
 
41
42
  @main.group('advanced')
@@ -45,6 +46,8 @@ def cli_advanced():
45
46
 
46
47
  cli_advanced.add_command(cli_copy)
47
48
  cli_advanced.add_command(cli_user)
49
+ cli_advanced.add_command(cli_detail)
50
+ cli_advanced.add_command(cli_upload_advanced)
48
51
 
49
52
  @cli_advanced.group('experimental')
50
53
  def cli_experimental():
@@ -5,7 +5,7 @@ from .upload import (
5
5
  cli_metadata,
6
6
  )
7
7
  from .upload_reads import cli_upload_reads_wizard
8
-
8
+ from .upload_advanced import cli_find_urls_for_reads
9
9
 
10
10
  @click.group('upload')
11
11
  def cli_upload():
@@ -15,3 +15,10 @@ def cli_upload():
15
15
  cli_upload.add_command(cli_upload_reads_wizard)
16
16
  cli_upload.add_command(cli_upload_file)
17
17
  cli_upload.add_command(cli_metadata)
18
+
19
+ @click.group('upload')
20
+ def cli_upload_advanced():
21
+ """Advanced tools to upload files to GeoSeeq."""
22
+ pass
23
+
24
+ cli_upload_advanced.add_command(cli_find_urls_for_reads)
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import click
3
+ import requests
4
+ from os.path import basename, getsize
5
+ from .upload_reads import (
6
+ _make_in_process_logger,
7
+ _get_regex,
8
+ _group_files,
9
+ flatten_list_of_fastqs,
10
+ )
11
+
12
+ from multiprocessing import Pool, current_process
13
+
14
+ from geoseeq.cli.constants import *
15
+ from geoseeq.cli.shared_params import (
16
+ handle_project_id,
17
+ private_option,
18
+ module_option,
19
+ project_id_arg,
20
+ overwrite_option,
21
+ yes_option,
22
+ use_common_state,
23
+ )
24
+
25
+ from geoseeq.constants import FASTQ_MODULE_NAMES
26
+ from geoseeq.cli.progress_bar import PBarManager
27
+
28
+ logger = logging.getLogger('geoseeq_api')
29
+
30
+
31
+ def _keep_only_authentication_url_args(url):
32
+ """Return a url with only the S3 authentication args"""
33
+ root, args = url.split('?')
34
+ args = args.split('&')
35
+ args = [arg for arg in args if arg.startswith('AWSAccessKeyId=') or arg.startswith('Signature=')]
36
+ return root + '?' + '&'.join(args)
37
+
38
+
39
+ def _get_url_for_one_file(args):
40
+ """Return a tuple of the filepath and the url to upload it to"""
41
+ result_file, filepath, overwrite, log_level = args
42
+ _make_in_process_logger(log_level)
43
+ if result_file.exists() and not overwrite:
44
+ return
45
+ result_file = result_file.idem()
46
+ file_size = getsize(filepath)
47
+ _, urls = result_file._prep_multipart_upload(filepath, file_size, file_size + 1, {})
48
+ url = _keep_only_authentication_url_args(urls['1'])
49
+ return filepath, url
50
+
51
+
52
+ def _find_target_urls(groups, module_name, lib, filepaths, overwrite, cores, state):
53
+ """Use GeoSeeq to get target urls for a set of files"""
54
+ with requests.Session() as session:
55
+ find_url_args = []
56
+ for group in groups:
57
+ sample = lib.sample(group['sample_name']).idem()
58
+ read_folder = sample.result_folder(module_name).idem()
59
+
60
+ for field_name, path in group['fields'].items():
61
+ result_file = read_folder.read_file(field_name)
62
+ filepath = filepaths[path]
63
+ find_url_args.append((
64
+ result_file, filepath, overwrite, state.log_level
65
+ ))
66
+
67
+ with Pool(cores) as p:
68
+ for (file_name, target_url) in p.imap_unordered(_get_url_for_one_file, find_url_args):
69
+ yield file_name, target_url
70
+
71
+
72
+ @click.command('read-links')
73
+ @use_common_state
74
+ @click.option('--cores', default=1, help='Number of uploads to run in parallel')
75
+ @overwrite_option
76
+ @yes_option
77
+ @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
78
+ @private_option
79
+ @module_option(FASTQ_MODULE_NAMES)
80
+ @project_id_arg
81
+ @click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
82
+ def cli_find_urls_for_reads(state, cores, overwrite, yes, regex, private, module_name, project_id, fastq_files):
83
+ """Print a two column list with filenames and a target storage URL
84
+ """
85
+ knex = state.get_knex()
86
+ proj = handle_project_id(knex, project_id, yes, private)
87
+ filepaths = {basename(line): line for line in flatten_list_of_fastqs(fastq_files)}
88
+ click.echo(f'Found {len(filepaths)} files to upload.', err=True)
89
+ regex = _get_regex(knex, filepaths, module_name, proj, regex)
90
+ groups = _group_files(knex, filepaths, module_name, regex, yes)
91
+ for file_name, target_url in _find_target_urls(groups, module_name, proj, filepaths, overwrite, cores, state):
92
+ print(f'{file_name}\t{target_url}', file=state.outfile)
@@ -4,7 +4,9 @@ from .sample import Sample
4
4
  from .utils import paginated_iterator
5
5
  import json
6
6
  import pandas as pd
7
+ import logging
7
8
 
9
+ logger = logging.getLogger("geoseeq_api")
8
10
 
9
11
 
10
12
  class Project(RemoteObject):
@@ -14,9 +16,11 @@ class Project(RemoteObject):
14
16
  "name",
15
17
  "privacy_level",
16
18
  "description",
19
+ "samples_count",
17
20
  ]
18
21
  optional_remote_fields = [
19
22
  "privacy_level",
23
+ "samples_count",
20
24
  ]
21
25
  parent_field = "org"
22
26
  url_prefix = "sample_groups"
@@ -195,6 +199,18 @@ class Project(RemoteObject):
195
199
  for sample_blob in paginated_iterator(self.knex, url, error_handler=error_handler):
196
200
  yield sample_blob['uuid']
197
201
 
202
+ def _batch_sample_uuids(self, batch_size, input_sample_uuids=[]):
203
+ """Yield batches of sample uuids."""
204
+ uuids_to_batch = input_sample_uuids if input_sample_uuids else self.get_sample_uuids()
205
+ sample_uuids = []
206
+ for sample_uuid in uuids_to_batch:
207
+ sample_uuids.append(sample_uuid)
208
+ if len(sample_uuids) == batch_size:
209
+ yield sample_uuids
210
+ sample_uuids = []
211
+ if sample_uuids:
212
+ yield sample_uuids
213
+
198
214
  def get_analysis_results(self, cache=True):
199
215
  """Yield ProjectResultFolder objects for this project fetched from the server.
200
216
 
@@ -239,6 +255,76 @@ class Project(RemoteObject):
239
255
  url = f"sample_groups/{self.uuid}/metadata"
240
256
  blob = self.knex.get(url)
241
257
  return pd.DataFrame.from_dict(blob, orient="index")
258
+
259
+ @property
260
+ def n_samples(self):
261
+ """Return the number of samples in this project."""
262
+ if self.hasattr('samples_count') and self.samples_count is not None:
263
+ return self.samples_count
264
+ return len(list(self.get_sample_uuids()))
265
+
266
+ def bulk_find_files(self,
267
+ sample_uuids=[],
268
+ sample_name_includes=[],
269
+ folder_types="all",
270
+ folder_names=[],
271
+ file_names=[],
272
+ extensions=[],
273
+ with_versions=False,
274
+ use_batches_cutoff=500):
275
+ """Return a dict with links to download files that match the given criteria.
276
+
277
+ Options:
278
+ - sample_uuids: list of sample uuids; if blank search all samples in project
279
+ - sample_name_includes: list of strings; finds samples with names that include these strings
280
+ - folder_types: "all", "project", "sample"; finds files in folders of these types
281
+ - folder_names: list of strings; finds files in folders that have these strings in their names
282
+ - file_names: list of strings; finds files that have these strings in their names
283
+ - extensions: list of strings; finds files with these file extensions
284
+ - with_versions: bool; if True, include all versions of files in results
285
+ """
286
+ def _my_bulk_find(sample_uuids=[]): # curry to save typing
287
+ return self._bulk_find_files_batch(sample_uuids=sample_uuids,
288
+ sample_name_includes=sample_name_includes,
289
+ folder_types=folder_types,
290
+ folder_names=folder_names,
291
+ file_names=file_names,
292
+ extensions=extensions,
293
+ with_versions=with_versions)
294
+ n_samples = len(sample_uuids) if sample_uuids else self.n_samples
295
+ if n_samples < use_batches_cutoff:
296
+ logger.debug(f"Using single batch bulk_find for {n_samples} samples")
297
+ return _my_bulk_find()
298
+ else:
299
+ logger.debug(f"Using multi batch bulk_find for {n_samples} samples")
300
+ merged_response = {'file_size_bytes': 0, 'links': {}, 'no_size_info_count': 0}
301
+ for batch in self._batch_sample_uuids(use_batches_cutoff - 1, input_sample_uuids=sample_uuids):
302
+ response = _my_bulk_find(sample_uuids=batch)
303
+ merged_response['file_size_bytes'] += response['file_size_bytes']
304
+ merged_response['links'].update(response['links'])
305
+ merged_response['no_size_info_count'] += response['no_size_info_count']
306
+ return merged_response
307
+
308
+ def _bulk_find_files_batch(self,
309
+ sample_uuids=[],
310
+ sample_name_includes=[],
311
+ folder_types=[],
312
+ folder_names=[],
313
+ file_names=[],
314
+ extensions=[],
315
+ with_versions=False):
316
+ data = {
317
+ "sample_uuids": sample_uuids,
318
+ "sample_names": sample_name_includes,
319
+ "folder_type": folder_types,
320
+ "folder_names": folder_names,
321
+ "file_names": file_names,
322
+ "extensions": extensions,
323
+ "with_versions": with_versions
324
+ }
325
+ url = f"sample_groups/{self.uuid}/download"
326
+ response = self.knex.post(url, data)
327
+ return response
242
328
 
243
329
  def __str__(self):
244
330
  return f"<Geoseeq::Project {self.name} {self.uuid} />"
@@ -41,14 +41,13 @@ class FileChunker:
41
41
 
42
42
  class ResultFileUpload:
43
43
  """Abstract class that handles upload methods for result files."""
44
-
45
- def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
46
- n_parts = int(file_size / chunk_size) + 1
44
+
45
+ def _create_multipart_upload(self, filepath, file_size, optional_fields):
47
46
  optional_fields = optional_fields if optional_fields else {}
48
47
  optional_fields.update(
49
48
  {
50
49
  "md5_checksum": md5_checksum(filepath),
51
- "file_size_bytes": getsize(filepath),
50
+ "file_size_bytes": file_size,
52
51
  }
53
52
  )
54
53
  data = {
@@ -57,6 +56,11 @@ class ResultFileUpload:
57
56
  "result_type": "sample" if self.is_sample_result else "group",
58
57
  }
59
58
  response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload", json=data)
59
+ return response
60
+
61
+ def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
62
+ n_parts = int(file_size / chunk_size) + 1
63
+ response = self._create_multipart_upload(filepath, file_size, optional_fields)
60
64
  upload_id = response["upload_id"]
61
65
  parts = list(range(1, n_parts + 1))
62
66
  data = {
@@ -105,6 +109,7 @@ class ResultFileUpload:
105
109
 
106
110
  def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads):
107
111
  if threads == 1:
112
+ logger.info(f"Uploading parts in series for {file_chunker.filepath}")
108
113
  complete_parts = []
109
114
  for num, url in enumerate(list(urls.values())):
110
115
  response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
@@ -114,6 +119,7 @@ class ResultFileUpload:
114
119
  return complete_parts
115
120
 
116
121
  with ThreadPoolExecutor(max_workers=threads) as executor:
122
+ logger.info(f"Uploading parts in parallel for {file_chunker.filepath} with {threads} threads.")
117
123
  futures = []
118
124
  for num, url in enumerate(list(urls.values())):
119
125
  future = executor.submit(
@@ -128,6 +134,7 @@ class ResultFileUpload:
128
134
  logger.info(
129
135
  f'Uploaded part {response_part["PartNumber"]} of {len(urls)} for "{file_chunker.filepath}"'
130
136
  )
137
+ complete_parts = sorted(complete_parts, key=lambda x: x["PartNumber"])
131
138
  return complete_parts
132
139
 
133
140
  def multipart_upload_file(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.2.22
3
+ Version: 0.3.1
4
4
  Summary: # Geoseeq API Client
5
5
  Author: David C. Danko
6
6
  Author-email: dcdanko@biotia.io
@@ -27,6 +27,7 @@ geoseeq.egg-info/top_level.txt
27
27
  geoseeq/cli/__init__.py
28
28
  geoseeq/cli/constants.py
29
29
  geoseeq/cli/copy.py
30
+ geoseeq/cli/detail.py
30
31
  geoseeq/cli/download.py
31
32
  geoseeq/cli/fastq_utils.py
32
33
  geoseeq/cli/main.py
@@ -44,6 +45,7 @@ geoseeq/cli/shared_params/obj_getters.py
44
45
  geoseeq/cli/shared_params/opts_and_args.py
45
46
  geoseeq/cli/upload/__init__.py
46
47
  geoseeq/cli/upload/upload.py
48
+ geoseeq/cli/upload/upload_advanced.py
47
49
  geoseeq/cli/upload/upload_reads.py
48
50
  geoseeq/contrib/__init__.py
49
51
  geoseeq/contrib/ncbi/__init__.py
@@ -5,7 +5,7 @@ import setuptools
5
5
 
6
6
  setuptools.setup(
7
7
  name='geoseeq',
8
- version='0.2.22', # remember to update version string in CLI as well
8
+ version='0.3.1', # remember to update version string in CLI as well
9
9
  author="David C. Danko",
10
10
  author_email='dcdanko@biotia.io',
11
11
  description=open('README.md').read(),
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes