geoseeq 0.3.0__tar.gz → 0.3.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. {geoseeq-0.3.0 → geoseeq-0.3.1}/PKG-INFO +1 -1
  2. geoseeq-0.3.1/geoseeq/cli/detail.py +39 -0
  3. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/main.py +5 -2
  4. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/upload/__init__.py +8 -1
  5. geoseeq-0.3.1/geoseeq/cli/upload/upload_advanced.py +92 -0
  6. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/project.py +4 -1
  7. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/result/file_upload.py +11 -4
  8. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq.egg-info/PKG-INFO +1 -1
  9. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq.egg-info/SOURCES.txt +2 -0
  10. {geoseeq-0.3.0 → geoseeq-0.3.1}/setup.py +1 -1
  11. {geoseeq-0.3.0 → geoseeq-0.3.1}/LICENSE +0 -0
  12. {geoseeq-0.3.0 → geoseeq-0.3.1}/README.md +0 -0
  13. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/__init__.py +0 -0
  14. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/app.py +0 -0
  15. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/blob_constructors.py +0 -0
  16. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/bulk_creators.py +0 -0
  17. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/__init__.py +0 -0
  18. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/constants.py +0 -0
  19. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/copy.py +0 -0
  20. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/download.py +0 -0
  21. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/fastq_utils.py +0 -0
  22. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/manage.py +0 -0
  23. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/progress_bar.py +0 -0
  24. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/search.py +0 -0
  25. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/shared_params/__init__.py +0 -0
  26. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/shared_params/common_state.py +0 -0
  27. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/shared_params/config.py +0 -0
  28. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  29. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  30. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  31. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/upload/upload.py +0 -0
  32. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/upload/upload_reads.py +0 -0
  33. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/user.py +0 -0
  34. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/utils.py +0 -0
  35. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/cli/view.py +0 -0
  36. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/constants.py +0 -0
  37. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/contrib/__init__.py +0 -0
  38. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/__init__.py +0 -0
  39. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/api.py +0 -0
  40. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  41. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/cli.py +0 -0
  42. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  43. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/file_system_cache.py +0 -0
  44. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/id_constructors/__init__.py +0 -0
  45. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/id_constructors/from_blobs.py +0 -0
  46. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/id_constructors/from_ids.py +0 -0
  47. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/id_constructors/from_names.py +0 -0
  48. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/id_constructors/from_uuids.py +0 -0
  49. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/id_constructors/resolvers.py +0 -0
  50. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/id_constructors/utils.py +0 -0
  51. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/knex.py +0 -0
  52. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/organization.py +0 -0
  53. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/pipeline.py +0 -0
  54. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/__init__.py +0 -0
  55. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/constants.py +0 -0
  56. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/highcharts.py +0 -0
  57. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/map/__init__.py +0 -0
  58. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/map/base_layer.py +0 -0
  59. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/map/map.py +0 -0
  60. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/map/overlay.py +0 -0
  61. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/plotting/selectable.py +0 -0
  62. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/remote_object.py +0 -0
  63. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/result/__init__.py +0 -0
  64. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/result/bioinfo.py +0 -0
  65. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/result/file_download.py +0 -0
  66. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/result/result_file.py +0 -0
  67. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/result/result_folder.py +0 -0
  68. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/result/utils.py +0 -0
  69. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/sample.py +0 -0
  70. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/search.py +0 -0
  71. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/user.py +0 -0
  72. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/utils.py +0 -0
  73. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/__init__.py +0 -0
  74. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/checksum.py +0 -0
  75. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/cli.py +0 -0
  76. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/clone.py +0 -0
  77. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/constants.py +0 -0
  78. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/vc_cache.py +0 -0
  79. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/vc_dir.py +0 -0
  80. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/vc_sample.py +0 -0
  81. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/vc/vc_stub.py +0 -0
  82. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq/work_orders.py +0 -0
  83. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq.egg-info/dependency_links.txt +0 -0
  84. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq.egg-info/entry_points.txt +0 -0
  85. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq.egg-info/requires.txt +0 -0
  86. {geoseeq-0.3.0 → geoseeq-0.3.1}/geoseeq.egg-info/top_level.txt +0 -0
  87. {geoseeq-0.3.0 → geoseeq-0.3.1}/pyproject.toml +0 -0
  88. {geoseeq-0.3.0 → geoseeq-0.3.1}/setup.cfg +0 -0
  89. {geoseeq-0.3.0 → geoseeq-0.3.1}/tests/__init__.py +0 -0
  90. {geoseeq-0.3.0 → geoseeq-0.3.1}/tests/test_api_client.py +0 -0
  91. {geoseeq-0.3.0 → geoseeq-0.3.1}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: # Geoseeq API Client
5
5
  Author: David C. Danko
6
6
  Author-email: dcdanko@biotia.io
@@ -0,0 +1,39 @@
1
+ import click
2
+ import json
3
+ from .shared_params import (
4
+ use_common_state,
5
+ project_id_arg,
6
+ sample_ids_arg,
7
+ yes_option,
8
+ private_option,
9
+ org_id_arg,
10
+ handle_project_id,
11
+ handle_multiple_sample_ids,
12
+ handle_org_id,
13
+ )
14
+ from geoseeq.id_constructors import resolve_id
15
+
16
+
17
+ @click.group('detail')
18
+ def cli_detail():
19
+ """Detail objects on GeoSeeq."""
20
+ pass
21
+
22
+
23
+ @cli_detail.command('folder')
24
+ @use_common_state
25
+ @click.argument('grn')
26
+ def detail_folder(state, grn):
27
+ kind, rfolder = resolve_id(state.get_knex(), grn)
28
+ assert kind == 'folder'
29
+ click.echo('Folder:')
30
+ click.echo(rfolder)
31
+ click.echo('Created at: {}'.format(rfolder.created_at))
32
+ click.echo('Updated at: {}'.format(rfolder.updated_at))
33
+ click.echo('Files:')
34
+ for rfile in rfolder.get_result_files():
35
+ click.echo(rfile)
36
+ click.echo('Created at: {}'.format(rfile.created_at))
37
+ click.echo('Updated at: {}'.format(rfile.updated_at))
38
+ click.echo(json.dumps(rfile.stored_data, indent=2))
39
+ click.echo('--')
@@ -7,7 +7,7 @@ import click
7
7
  from .copy import cli_copy
8
8
  from .manage import cli_manage
9
9
  from .download import cli_download
10
- from .upload import cli_upload
10
+ from .upload import cli_upload, cli_upload_advanced
11
11
  from .user import cli_user
12
12
  from .view import cli_view
13
13
  from .search import cli_search
@@ -15,6 +15,7 @@ from geoseeq.vc.cli import cli_vc
15
15
  from geoseeq.knex import DEFAULT_ENDPOINT
16
16
  from .shared_params.config import set_profile
17
17
  from .shared_params.opts_and_args import overwrite_option
18
+ from .detail import cli_detail
18
19
 
19
20
  logger = logging.getLogger('geoseeq_api')
20
21
  handler = logging.StreamHandler()
@@ -35,7 +36,7 @@ main.add_command(cli_search)
35
36
  @main.command()
36
37
  def version():
37
38
  """Print the version of the Geoseeq API being used."""
38
- click.echo('0.3.0') # remember to update setup
39
+ click.echo('0.3.1') # remember to update setup
39
40
 
40
41
 
41
42
  @main.group('advanced')
@@ -45,6 +46,8 @@ def cli_advanced():
45
46
 
46
47
  cli_advanced.add_command(cli_copy)
47
48
  cli_advanced.add_command(cli_user)
49
+ cli_advanced.add_command(cli_detail)
50
+ cli_advanced.add_command(cli_upload_advanced)
48
51
 
49
52
  @cli_advanced.group('experimental')
50
53
  def cli_experimental():
@@ -5,7 +5,7 @@ from .upload import (
5
5
  cli_metadata,
6
6
  )
7
7
  from .upload_reads import cli_upload_reads_wizard
8
-
8
+ from .upload_advanced import cli_find_urls_for_reads
9
9
 
10
10
  @click.group('upload')
11
11
  def cli_upload():
@@ -15,3 +15,10 @@ def cli_upload():
15
15
  cli_upload.add_command(cli_upload_reads_wizard)
16
16
  cli_upload.add_command(cli_upload_file)
17
17
  cli_upload.add_command(cli_metadata)
18
+
19
+ @click.group('upload')
20
+ def cli_upload_advanced():
21
+ """Advanced tools to upload files to GeoSeeq."""
22
+ pass
23
+
24
+ cli_upload_advanced.add_command(cli_find_urls_for_reads)
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import click
3
+ import requests
4
+ from os.path import basename, getsize
5
+ from .upload_reads import (
6
+ _make_in_process_logger,
7
+ _get_regex,
8
+ _group_files,
9
+ flatten_list_of_fastqs,
10
+ )
11
+
12
+ from multiprocessing import Pool, current_process
13
+
14
+ from geoseeq.cli.constants import *
15
+ from geoseeq.cli.shared_params import (
16
+ handle_project_id,
17
+ private_option,
18
+ module_option,
19
+ project_id_arg,
20
+ overwrite_option,
21
+ yes_option,
22
+ use_common_state,
23
+ )
24
+
25
+ from geoseeq.constants import FASTQ_MODULE_NAMES
26
+ from geoseeq.cli.progress_bar import PBarManager
27
+
28
+ logger = logging.getLogger('geoseeq_api')
29
+
30
+
31
+ def _keep_only_authentication_url_args(url):
32
+ """Return a url with only the S3 authentication args"""
33
+ root, args = url.split('?')
34
+ args = args.split('&')
35
+ args = [arg for arg in args if arg.startswith('AWSAccessKeyId=') or arg.startswith('Signature=')]
36
+ return root + '?' + '&'.join(args)
37
+
38
+
39
+ def _get_url_for_one_file(args):
40
+ """Return a tuple of the filepath and the url to upload it to"""
41
+ result_file, filepath, overwrite, log_level = args
42
+ _make_in_process_logger(log_level)
43
+ if result_file.exists() and not overwrite:
44
+ return
45
+ result_file = result_file.idem()
46
+ file_size = getsize(filepath)
47
+ _, urls = result_file._prep_multipart_upload(filepath, file_size, file_size + 1, {})
48
+ url = _keep_only_authentication_url_args(urls['1'])
49
+ return filepath, url
50
+
51
+
52
+ def _find_target_urls(groups, module_name, lib, filepaths, overwrite, cores, state):
53
+ """Use GeoSeeq to get target urls for a set of files"""
54
+ with requests.Session() as session:
55
+ find_url_args = []
56
+ for group in groups:
57
+ sample = lib.sample(group['sample_name']).idem()
58
+ read_folder = sample.result_folder(module_name).idem()
59
+
60
+ for field_name, path in group['fields'].items():
61
+ result_file = read_folder.read_file(field_name)
62
+ filepath = filepaths[path]
63
+ find_url_args.append((
64
+ result_file, filepath, overwrite, state.log_level
65
+ ))
66
+
67
+ with Pool(cores) as p:
68
+ for (file_name, target_url) in p.imap_unordered(_get_url_for_one_file, find_url_args):
69
+ yield file_name, target_url
70
+
71
+
72
+ @click.command('read-links')
73
+ @use_common_state
74
+ @click.option('--cores', default=1, help='Number of uploads to run in parallel')
75
+ @overwrite_option
76
+ @yes_option
77
+ @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
78
+ @private_option
79
+ @module_option(FASTQ_MODULE_NAMES)
80
+ @project_id_arg
81
+ @click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
82
+ def cli_find_urls_for_reads(state, cores, overwrite, yes, regex, private, module_name, project_id, fastq_files):
83
+ """Print a two column list with filenames and a target storage URL
84
+ """
85
+ knex = state.get_knex()
86
+ proj = handle_project_id(knex, project_id, yes, private)
87
+ filepaths = {basename(line): line for line in flatten_list_of_fastqs(fastq_files)}
88
+ click.echo(f'Found {len(filepaths)} files to upload.', err=True)
89
+ regex = _get_regex(knex, filepaths, module_name, proj, regex)
90
+ groups = _group_files(knex, filepaths, module_name, regex, yes)
91
+ for file_name, target_url in _find_target_urls(groups, module_name, proj, filepaths, overwrite, cores, state):
92
+ print(f'{file_name}\t{target_url}', file=state.outfile)
@@ -20,6 +20,7 @@ class Project(RemoteObject):
20
20
  ]
21
21
  optional_remote_fields = [
22
22
  "privacy_level",
23
+ "samples_count",
23
24
  ]
24
25
  parent_field = "org"
25
26
  url_prefix = "sample_groups"
@@ -258,7 +259,9 @@ class Project(RemoteObject):
258
259
  @property
259
260
  def n_samples(self):
260
261
  """Return the number of samples in this project."""
261
- return self.samples_count
262
+ if self.hasattr('samples_count') and self.samples_count is not None:
263
+ return self.samples_count
264
+ return len(list(self.get_sample_uuids()))
262
265
 
263
266
  def bulk_find_files(self,
264
267
  sample_uuids=[],
@@ -41,14 +41,13 @@ class FileChunker:
41
41
 
42
42
  class ResultFileUpload:
43
43
  """Abstract class that handles upload methods for result files."""
44
-
45
- def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
46
- n_parts = int(file_size / chunk_size) + 1
44
+
45
+ def _create_multipart_upload(self, filepath, file_size, optional_fields):
47
46
  optional_fields = optional_fields if optional_fields else {}
48
47
  optional_fields.update(
49
48
  {
50
49
  "md5_checksum": md5_checksum(filepath),
51
- "file_size_bytes": getsize(filepath),
50
+ "file_size_bytes": file_size,
52
51
  }
53
52
  )
54
53
  data = {
@@ -57,6 +56,11 @@ class ResultFileUpload:
57
56
  "result_type": "sample" if self.is_sample_result else "group",
58
57
  }
59
58
  response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload", json=data)
59
+ return response
60
+
61
+ def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
62
+ n_parts = int(file_size / chunk_size) + 1
63
+ response = self._create_multipart_upload(filepath, file_size, optional_fields)
60
64
  upload_id = response["upload_id"]
61
65
  parts = list(range(1, n_parts + 1))
62
66
  data = {
@@ -105,6 +109,7 @@ class ResultFileUpload:
105
109
 
106
110
  def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads):
107
111
  if threads == 1:
112
+ logger.info(f"Uploading parts in series for {file_chunker.filepath}")
108
113
  complete_parts = []
109
114
  for num, url in enumerate(list(urls.values())):
110
115
  response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
@@ -114,6 +119,7 @@ class ResultFileUpload:
114
119
  return complete_parts
115
120
 
116
121
  with ThreadPoolExecutor(max_workers=threads) as executor:
122
+ logger.info(f"Uploading parts in parallel for {file_chunker.filepath} with {threads} threads.")
117
123
  futures = []
118
124
  for num, url in enumerate(list(urls.values())):
119
125
  future = executor.submit(
@@ -128,6 +134,7 @@ class ResultFileUpload:
128
134
  logger.info(
129
135
  f'Uploaded part {response_part["PartNumber"]} of {len(urls)} for "{file_chunker.filepath}"'
130
136
  )
137
+ complete_parts = sorted(complete_parts, key=lambda x: x["PartNumber"])
131
138
  return complete_parts
132
139
 
133
140
  def multipart_upload_file(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: # Geoseeq API Client
5
5
  Author: David C. Danko
6
6
  Author-email: dcdanko@biotia.io
@@ -27,6 +27,7 @@ geoseeq.egg-info/top_level.txt
27
27
  geoseeq/cli/__init__.py
28
28
  geoseeq/cli/constants.py
29
29
  geoseeq/cli/copy.py
30
+ geoseeq/cli/detail.py
30
31
  geoseeq/cli/download.py
31
32
  geoseeq/cli/fastq_utils.py
32
33
  geoseeq/cli/main.py
@@ -44,6 +45,7 @@ geoseeq/cli/shared_params/obj_getters.py
44
45
  geoseeq/cli/shared_params/opts_and_args.py
45
46
  geoseeq/cli/upload/__init__.py
46
47
  geoseeq/cli/upload/upload.py
48
+ geoseeq/cli/upload/upload_advanced.py
47
49
  geoseeq/cli/upload/upload_reads.py
48
50
  geoseeq/contrib/__init__.py
49
51
  geoseeq/contrib/ncbi/__init__.py
@@ -5,7 +5,7 @@ import setuptools
5
5
 
6
6
  setuptools.setup(
7
7
  name='geoseeq',
8
- version='0.3.0', # remember to update version string in CLI as well
8
+ version='0.3.1', # remember to update version string in CLI as well
9
9
  author="David C. Danko",
10
10
  author_email='dcdanko@biotia.io',
11
11
  description=open('README.md').read(),
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes