geoseeq 0.5.6a7__tar.gz → 0.5.6a9__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/PKG-INFO +1 -1
  2. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/main.py +1 -1
  3. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/opts_and_args.py +1 -1
  4. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/upload.py +10 -4
  5. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/upload_reads.py +7 -3
  6. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/file_system_cache.py +2 -2
  7. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/file_upload.py +111 -10
  8. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/result_file.py +8 -0
  9. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/upload_download_manager.py +14 -4
  10. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/PKG-INFO +1 -1
  11. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/pyproject.toml +1 -1
  12. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/setup.py +1 -1
  13. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/LICENSE +0 -0
  14. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/README.md +0 -0
  15. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/__init__.py +0 -0
  16. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/app.py +0 -0
  17. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/blob_constructors.py +0 -0
  18. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/bulk_creators.py +0 -0
  19. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/__init__.py +0 -0
  20. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/constants.py +0 -0
  21. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/copy.py +0 -0
  22. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/detail.py +0 -0
  23. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/download.py +0 -0
  24. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/fastq_utils.py +0 -0
  25. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/get_eula.py +0 -0
  26. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/manage.py +0 -0
  27. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/progress_bar.py +0 -0
  28. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/run.py +0 -0
  29. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/search.py +0 -0
  30. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/__init__.py +0 -0
  31. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/common_state.py +0 -0
  32. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/config.py +0 -0
  33. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  34. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  35. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/__init__.py +0 -0
  36. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/upload/upload_advanced.py +0 -0
  37. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/user.py +0 -0
  38. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/utils.py +0 -0
  39. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/cli/view.py +0 -0
  40. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/constants.py +0 -0
  41. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/__init__.py +0 -0
  42. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/__init__.py +0 -0
  43. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/api.py +0 -0
  44. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  45. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/cli.py +0 -0
  46. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  47. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/__init__.py +0 -0
  48. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_blobs.py +0 -0
  49. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_ids.py +0 -0
  50. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_names.py +0 -0
  51. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/from_uuids.py +0 -0
  52. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/resolvers.py +0 -0
  53. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/id_constructors/utils.py +0 -0
  54. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/knex.py +0 -0
  55. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/organization.py +0 -0
  56. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/pipeline.py +0 -0
  57. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/__init__.py +0 -0
  58. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/constants.py +0 -0
  59. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/highcharts.py +0 -0
  60. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/__init__.py +0 -0
  61. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/base_layer.py +0 -0
  62. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/map.py +0 -0
  63. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/map/overlay.py +0 -0
  64. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/plotting/selectable.py +0 -0
  65. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/project.py +0 -0
  66. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/remote_object.py +0 -0
  67. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/__init__.py +0 -0
  68. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/bioinfo.py +0 -0
  69. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/file_download.py +0 -0
  70. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/result_folder.py +0 -0
  71. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/result/utils.py +0 -0
  72. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/sample.py +0 -0
  73. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/search.py +0 -0
  74. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/user.py +0 -0
  75. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/utils.py +0 -0
  76. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/__init__.py +0 -0
  77. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/checksum.py +0 -0
  78. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/cli.py +0 -0
  79. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/clone.py +0 -0
  80. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/constants.py +0 -0
  81. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_cache.py +0 -0
  82. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_dir.py +0 -0
  83. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_sample.py +0 -0
  84. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/vc/vc_stub.py +0 -0
  85. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq/work_orders.py +0 -0
  86. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/SOURCES.txt +0 -0
  87. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/dependency_links.txt +0 -0
  88. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/entry_points.txt +0 -0
  89. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/geoseeq.egg-info/top_level.txt +0 -0
  90. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/setup.cfg +0 -0
  91. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/tests/__init__.py +0 -0
  92. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/tests/test_api_client.py +0 -0
  93. {geoseeq-0.5.6a7 → geoseeq-0.5.6a9}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.5.6a7
3
+ Version: 0.5.6a9
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -53,7 +53,7 @@ def version():
53
53
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
54
54
  Run `geoseeq eula show` to view the EULA.
55
55
  """
56
- click.echo('0.5.6a7') # remember to update setup
56
+ click.echo('0.5.6a9') # remember to update setup
57
57
 
58
58
 
59
59
  @main.group('advanced')
@@ -2,7 +2,7 @@ import click
2
2
 
3
3
  dryrun_option = click.option('--dryrun/--wetrun', default=False, help='Print what will be created without actually creating it')
4
4
  overwrite_option = click.option('--overwrite/--no-overwrite', default=False, help='Overwrite existing samples, files, and data')
5
-
5
+ no_new_versions_option = click.option('--no-new-versions/--new-versions', default=False, help='Do not create new versions of the data')
6
6
  def module_option(options, use_default=True, default=None):
7
7
  if use_default:
8
8
  default = default or options[0]
@@ -24,6 +24,7 @@ from geoseeq.cli.shared_params import (
24
24
  handle_project_id,
25
25
  project_or_sample_id_arg,
26
26
  handle_project_or_sample_id,
27
+ no_new_versions_option,
27
28
  )
28
29
  from geoseeq.upload_download_manager import GeoSeeqUploadManager
29
30
 
@@ -41,11 +42,12 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
41
42
  @link_option
42
43
  @recursive_option
43
44
  @hidden_option
45
+ @no_new_versions_option
44
46
  @click.option('-n', '--geoseeq-file-name', default=None, multiple=True,
45
47
  help='Specify a different name for the file on GeoSeeq than the local file name.')
46
48
  @folder_id_arg
47
49
  @click.argument('file_paths', type=click.Path(exists=True), nargs=-1)
48
- def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, geoseeq_file_name, folder_id, file_paths):
50
+ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
49
51
  """Upload files to GeoSeeq.
50
52
 
51
53
  This command uploads files to either a sample or project on GeoSeeq. It can be used to upload
@@ -107,7 +109,8 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, ge
107
109
  link_type=link_type,
108
110
  progress_tracker_factory=PBarManager().get_new_bar,
109
111
  log_level=state.log_level,
110
- overwrite=True
112
+ no_new_versions=no_new_versions,
113
+ use_cache=state.use_cache,
111
114
  )
112
115
  for geoseeq_file_name, file_path in name_pairs:
113
116
  if isfile(file_path):
@@ -130,9 +133,10 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, ge
130
133
  @private_option
131
134
  @recursive_option
132
135
  @hidden_option
136
+ @no_new_versions_option
133
137
  @project_or_sample_id_arg
134
138
  @click.argument('folder_names', type=click.Path(exists=True), nargs=-1)
135
- def cli_upload_folder(state, cores, yes, private, recursive, hidden, project_or_sample_id, folder_names):
139
+ def cli_upload_folder(state, cores, yes, private, recursive, hidden, no_new_versions, project_or_sample_id, folder_names):
136
140
  knex = state.get_knex()
137
141
  root_obj = handle_project_or_sample_id(knex, project_or_sample_id, yes=yes, private=private)
138
142
  upload_manager = GeoSeeqUploadManager(
@@ -140,7 +144,9 @@ def cli_upload_folder(state, cores, yes, private, recursive, hidden, project_or_
140
144
  link_type='upload',
141
145
  progress_tracker_factory=PBarManager().get_new_bar,
142
146
  log_level=logging.INFO,
143
- overwrite=True
147
+ overwrite=True,
148
+ use_cache=state.use_cache,
149
+ no_new_versions=no_new_versions,
144
150
  )
145
151
  for folder_name in folder_names:
146
152
  result_folder = root_obj.result_folder(folder_name).idem()
@@ -15,6 +15,7 @@ from geoseeq.cli.shared_params import (
15
15
  overwrite_option,
16
16
  yes_option,
17
17
  use_common_state,
18
+ no_new_versions_option
18
19
  )
19
20
  from geoseeq.upload_download_manager import GeoSeeqUploadManager
20
21
 
@@ -85,7 +86,7 @@ def _group_files(knex, filepaths, module_name, regex, yes):
85
86
  return groups
86
87
 
87
88
 
88
- def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, cores, state):
89
+ def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, no_new_versions, cores, state):
89
90
 
90
91
  with requests.Session() as session:
91
92
  upload_manager = GeoSeeqUploadManager(
@@ -95,6 +96,8 @@ def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, cores,
95
96
  log_level=state.log_level,
96
97
  overwrite=overwrite,
97
98
  progress_tracker_factory=PBarManager().get_new_bar,
99
+ use_cache=state.use_cache,
100
+ no_new_versions=no_new_versions,
98
101
  )
99
102
  for group in groups:
100
103
  sample = lib.sample(group['sample_name']).idem()
@@ -138,10 +141,11 @@ def flatten_list_of_fastqs(filepaths):
138
141
  @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
139
142
  @private_option
140
143
  @link_option
144
+ @no_new_versions_option
141
145
  @module_option(FASTQ_MODULE_NAMES)
142
146
  @project_id_arg
143
147
  @click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
144
- def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_type, module_name, project_id, fastq_files):
148
+ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_type, no_new_versions, module_name, project_id, fastq_files):
145
149
  """Upload fastq read files to GeoSeeq.
146
150
 
147
151
  This command automatically groups files by their sample name, lane number
@@ -195,4 +199,4 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
195
199
  click.echo(f'Found {len(filepaths)} files to upload.', err=True)
196
200
  regex = _get_regex(knex, filepaths, module_name, proj, regex)
197
201
  groups = _group_files(knex, filepaths, module_name, regex, yes)
198
- _do_upload(groups, module_name, link_type, proj, filepaths, overwrite, cores, state)
202
+ _do_upload(groups, module_name, link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
@@ -15,7 +15,7 @@ CACHE_DIR = join(
15
15
  "geoseeq"
16
16
  )
17
17
  USE_GEOSEEQ_CACHE = None
18
-
18
+ GEOSEEQ_CACHE_DIR = abspath(f'{CACHE_DIR}/geoseeq_api_cache/v1/')
19
19
 
20
20
  def hash_obj(obj):
21
21
  val = obj
@@ -41,7 +41,7 @@ class FileSystemCache:
41
41
 
42
42
  @property
43
43
  def cache_dir_path(self):
44
- return abspath(f'{CACHE_DIR}/geoseeq_api_cache/v1/')
44
+ return GEOSEEQ_CACHE_DIR
45
45
 
46
46
  def setup(self):
47
47
  if self.no_cache:
@@ -1,7 +1,8 @@
1
1
 
2
2
  import time
3
3
  import json
4
- from os.path import basename, getsize
4
+ import os
5
+ from os.path import basename, getsize, join, dirname, isfile
5
6
  from pathlib import Path
6
7
 
7
8
  import requests
@@ -11,7 +12,7 @@ from geoseeq.constants import FIVE_MB
11
12
  from geoseeq.utils import md5_checksum
12
13
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
14
  from .utils import *
14
-
15
+ from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
15
16
 
16
17
  class FileChunker:
17
18
 
@@ -38,6 +39,77 @@ class FileChunker:
38
39
  def get_chunk_size(self, num):
39
40
  self.load_all_chunks()
40
41
  return len(self.loaded_parts[num])
42
+
43
+
44
+ class ResumableUploadTracker:
45
+
46
+ def __init__(self, filepath, chunk_size, tracker_file_prefix="gs_resumable_upload_tracker"):
47
+ self.open, self.upload_started = True, False
48
+ self.upload_id, self.urls = None, None
49
+ self.filepath = filepath
50
+ self.tracker_file = join(
51
+ GEOSEEQ_CACHE_DIR, 'upload',
52
+ tracker_file_prefix + f".{chunk_size}." + basename(filepath)
53
+ )
54
+ try:
55
+ os.makedirs(dirname(self.tracker_file), exist_ok=True)
56
+ except Exception as e:
57
+ logger.warning(f'Could not create resumable upload tracker directory. {e}')
58
+ self.open = False
59
+ self._loaded_parts = {}
60
+ self._load_parts_from_file()
61
+
62
+ def start_upload(self, upload_id, urls):
63
+ if not self.open:
64
+ return
65
+ if self.upload_started:
66
+ raise GeoseeqGeneralError("Upload has already started.")
67
+ blob = dict(upload_id=upload_id, urls=urls)
68
+ serialized = json.dumps(blob)
69
+ with open(self.tracker_file, "w") as f:
70
+ f.write(serialized + "\n")
71
+ self.upload_id, self.urls = upload_id, urls
72
+ self.upload_started = True
73
+
74
+ def add_part(self, part_upload_info):
75
+ if not self.open:
76
+ return
77
+ part_id = part_upload_info["PartNumber"]
78
+ serialized = json.dumps(part_upload_info)
79
+ with open(self.tracker_file, "a") as f:
80
+ f.write(serialized + "\n")
81
+ self._loaded_parts[part_id] = part_upload_info
82
+ if len(self._loaded_parts) == len(self.urls):
83
+ self.cleanup()
84
+ self.open = False
85
+
86
+ def _load_parts_from_file(self):
87
+ if not isfile(self.tracker_file):
88
+ return
89
+ with open(self.tracker_file, "r") as f:
90
+ header_blob = json.loads(f.readline())
91
+ self.upload_id, self.urls = header_blob["upload_id"], header_blob["urls"]
92
+ self.upload_started = True
93
+ for line in f:
94
+ blob = json.loads(line)
95
+ part_id = blob["PartNumber"]
96
+ self._loaded_parts[part_id] = blob
97
+
98
+ def part_has_been_uploaded(self, part_number):
99
+ if not self.open:
100
+ return False
101
+ return part_number in self._loaded_parts
102
+
103
+ def get_part_info(self, part_number):
104
+ return self._loaded_parts[part_number]
105
+
106
+ def cleanup(self):
107
+ if not self.open:
108
+ return
109
+ try:
110
+ os.remove(self.tracker_file)
111
+ except FileNotFoundError:
112
+ pass
41
113
 
42
114
 
43
115
  class ResultFileUpload:
@@ -74,7 +146,10 @@ class ResultFileUpload:
74
146
  urls = response
75
147
  return upload_id, urls
76
148
 
77
- def _upload_one_part(self, file_chunker, url, num, max_retries, session=None):
149
+ def _upload_one_part(self, file_chunker, url, num, max_retries, session=None, resumable_upload_tracker=None):
150
+ if resumable_upload_tracker and resumable_upload_tracker.part_has_been_uploaded(num + 1):
151
+ logger.info(f"Part {num + 1} has already been uploaded. Skipping.")
152
+ return resumable_upload_tracker.get_part_info(num + 1)
78
153
  file_chunk = file_chunker.get_chunk(num)
79
154
  attempts = 0
80
155
  while attempts < max_retries:
@@ -94,7 +169,12 @@ class ResultFileUpload:
94
169
  if attempts == max_retries:
95
170
  raise
96
171
  time.sleep(10**attempts) # exponential backoff, (10 ** 2)s default max
97
- return {"ETag": http_response.headers["ETag"], "PartNumber": num + 1}
172
+ etag = http_response.headers["ETag"].replace('"', "")
173
+ blob = {"ETag": etag, "PartNumber": num + 1}
174
+ if resumable_upload_tracker:
175
+ # TODO technically not thread safe, but should be fine for now
176
+ resumable_upload_tracker.add_part(blob)
177
+ return blob
98
178
 
99
179
  def _finish_multipart_upload(self, upload_id, complete_parts):
100
180
  response = self.knex.post(
@@ -108,12 +188,12 @@ class ResultFileUpload:
108
188
  )
109
189
  response.raise_for_status()
110
190
 
111
- def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads):
191
+ def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads, resumable_upload_tracker=None):
112
192
  if threads == 1:
113
193
  logger.info(f"Uploading parts in series for {file_chunker.filepath}")
114
194
  complete_parts = []
115
195
  for num, url in enumerate(list(urls.values())):
116
- response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
196
+ response_part = self._upload_one_part(file_chunker, url, num, max_retries, session, resumable_upload_tracker)
117
197
  complete_parts.append(response_part)
118
198
  if progress_tracker: progress_tracker.update(file_chunker.get_chunk_size(num))
119
199
  logger.info(f'Uploaded part {num + 1} of {len(urls)} for "{file_chunker.filepath}"')
@@ -124,7 +204,7 @@ class ResultFileUpload:
124
204
  futures = []
125
205
  for num, url in enumerate(list(urls.values())):
126
206
  future = executor.submit(
127
- self._upload_one_part, file_chunker, url, num, max_retries, session
207
+ self._upload_one_part, file_chunker, url, num, max_retries, session, resumable_upload_tracker
128
208
  )
129
209
  futures.append(future)
130
210
  complete_parts = []
@@ -148,23 +228,44 @@ class ResultFileUpload:
148
228
  session=None,
149
229
  progress_tracker=None,
150
230
  threads=1,
231
+ use_cache=True,
151
232
  ):
152
233
  """Upload a file to S3 using the multipart upload process."""
153
234
  logger.info(f"Uploading {filepath} to S3 using multipart upload.")
154
- upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields)
235
+ resumable_upload_tracker = None
236
+ if use_cache and file_size > 10 * FIVE_MB: # only use resumable upload tracker for larger files
237
+ resumable_upload_tracker = ResumableUploadTracker(filepath, chunk_size)
238
+ if resumable_upload_tracker and resumable_upload_tracker.upload_started:
239
+ upload_id, urls = resumable_upload_tracker.upload_id, resumable_upload_tracker.urls
240
+ logger.info(f'Resuming upload for "{filepath}", upload_id: "{upload_id}"')
241
+ else:
242
+ upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields)
243
+ if resumable_upload_tracker:
244
+ logger.info(f'Creating new resumable upload for "{filepath}", upload_id: "{upload_id}"')
245
+ resumable_upload_tracker.start_upload(upload_id, urls)
155
246
  logger.info(f'Starting upload for "{filepath}"')
156
247
  complete_parts = []
157
248
  file_chunker = FileChunker(filepath, chunk_size).load_all_chunks()
158
249
  if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
159
- complete_parts = self._upload_parts(file_chunker, urls, max_retries, session, progress_tracker, threads)
250
+ complete_parts = self._upload_parts(
251
+ file_chunker,
252
+ urls,
253
+ max_retries,
254
+ session,
255
+ progress_tracker,
256
+ threads,
257
+ resumable_upload_tracker=resumable_upload_tracker
258
+ )
160
259
  self._finish_multipart_upload(upload_id, complete_parts)
161
260
  logger.info(f'Finished Upload for "{filepath}"')
162
261
  return self
163
262
 
164
- def upload_file(self, filepath, multipart_thresh=FIVE_MB, overwrite=True, **kwargs):
263
+ def upload_file(self, filepath, multipart_thresh=FIVE_MB, overwrite=True, no_new_versions=False, **kwargs):
165
264
  if self.exists() and not overwrite:
166
265
  raise GeoseeqGeneralError(f"Overwrite is set to False and file {self.uuid} already exists.")
167
266
  self.idem()
267
+ if no_new_versions and self.has_downloadable_file():
268
+ raise GeoseeqGeneralError(f"File {self} already has a downloadable file. Not uploading a new version.")
168
269
  resolved_path = Path(filepath).resolve()
169
270
  file_size = getsize(resolved_path)
170
271
  return self.multipart_upload_file(filepath, file_size, **kwargs)
@@ -53,6 +53,14 @@ class ResultFile(RemoteObject, ResultFileUpload, ResultFileDownload):
53
53
  obj_type = "sample" if self.canon_url() == "sample_ar_fields" else "project"
54
54
  brn = f"brn:{self.knex.instance_code()}:{obj_type}_result_field:{self.uuid}"
55
55
 
56
+ def has_downloadable_file(self):
57
+ """Return True if this field has a downloadable file."""
58
+ try:
59
+ self.download(head=10, cache=False)
60
+ return True
61
+ except Exception as e:
62
+ return False
63
+
56
64
  def nested_url(self):
57
65
  escaped_name = urllib.parse.quote(self.name, safe="")
58
66
  return self.parent.nested_url() + f"/fields/{escaped_name}"
@@ -19,12 +19,18 @@ def _make_in_process_logger(log_level):
19
19
 
20
20
 
21
21
  def _upload_one_file(args):
22
- result_file, filepath, session, progress_tracker, link_type, overwrite, log_level, parallel_uploads = args
22
+ (result_file, filepath, session, progress_tracker,
23
+ link_type, overwrite, log_level, parallel_uploads,
24
+ use_cache, no_new_versions) = args
23
25
  if parallel_uploads:
24
26
  _make_in_process_logger(log_level)
25
27
  if link_type == 'upload':
26
28
  # TODO: check checksums to see if the file is the same
27
- result_file.upload_file(filepath, session=session, overwrite=overwrite, progress_tracker=progress_tracker, threads=4)
29
+ result_file.upload_file(
30
+ filepath,
31
+ session=session, overwrite=overwrite, progress_tracker=progress_tracker,
32
+ threads=4, use_cache=use_cache, no_new_versions=no_new_versions
33
+ )
28
34
  else:
29
35
  result_file.link_file(link_type, filepath)
30
36
  return result_file
@@ -38,7 +44,9 @@ class GeoSeeqUploadManager:
38
44
  link_type='upload',
39
45
  progress_tracker_factory=None,
40
46
  log_level=logging.WARNING,
41
- overwrite=True):
47
+ overwrite=True,
48
+ no_new_versions=False,
49
+ use_cache=True):
42
50
  self.session = session
43
51
  self.n_parallel_uploads = n_parallel_uploads
44
52
  self.progress_tracker_factory = progress_tracker_factory if progress_tracker_factory else lambda x: None
@@ -46,6 +54,8 @@ class GeoSeeqUploadManager:
46
54
  self.link_type = link_type
47
55
  self.overwrite = overwrite
48
56
  self._result_files = []
57
+ self.no_new_versions = no_new_versions
58
+ self.use_cache = use_cache
49
59
 
50
60
  def add_result_file(self, result_file, local_path):
51
61
  self._result_files.append((result_file, local_path))
@@ -70,7 +80,7 @@ class GeoSeeqUploadManager:
70
80
  result_file, local_path,
71
81
  self.session, self.progress_tracker_factory(local_path),
72
82
  self.link_type, self.overwrite, self.log_level,
73
- self.n_parallel_uploads > 1
83
+ self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions
74
84
  ) for result_file, local_path in self._result_files
75
85
  ]
76
86
  out = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.5.6a7
3
+ Version: 0.5.6a9
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "geoseeq"
7
- version = "0.5.6a7"
7
+ version = "0.5.6a9"
8
8
  authors = [
9
9
  { name="David C. Danko", email="dcdanko@biotia.io" },
10
10
  ]
@@ -5,7 +5,7 @@ import setuptools
5
5
 
6
6
  setuptools.setup(
7
7
  name='geoseeq',
8
- version='0.5.6a7', # remember to update version string in CLI as well
8
+ version='0.5.6a7', # DEPRECATED see pyproject.toml remember to update version string in CLI as well
9
9
  author="David C. Danko",
10
10
  author_email='dcdanko@biotia.io',
11
11
  description=open('README.md').read(),
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes