geoseeq 0.5.6a16__tar.gz → 0.6.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/PKG-INFO +1 -1
  2. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/download.py +1 -0
  3. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/main.py +3 -1
  4. geoseeq-0.6.1/geoseeq/cli/project.py +96 -0
  5. geoseeq-0.6.1/geoseeq/cli/raw.py +59 -0
  6. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/upload/upload.py +2 -0
  7. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/upload/upload_reads.py +1 -0
  8. geoseeq-0.6.1/geoseeq/result/file_chunker.py +50 -0
  9. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/result/file_download.py +48 -10
  10. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/result/file_upload.py +55 -142
  11. geoseeq-0.6.1/geoseeq/result/resumable_download_tracker.py +99 -0
  12. geoseeq-0.6.1/geoseeq/result/resumable_upload_tracker.py +100 -0
  13. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/upload_download_manager.py +12 -4
  14. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq.egg-info/PKG-INFO +1 -1
  15. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq.egg-info/SOURCES.txt +5 -0
  16. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/pyproject.toml +1 -1
  17. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/LICENSE +0 -0
  18. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/README.md +0 -0
  19. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/__init__.py +0 -0
  20. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/app.py +0 -0
  21. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/blob_constructors.py +0 -0
  22. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/bulk_creators.py +0 -0
  23. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/__init__.py +0 -0
  24. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/constants.py +0 -0
  25. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/copy.py +0 -0
  26. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/detail.py +0 -0
  27. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/fastq_utils.py +0 -0
  28. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/get_eula.py +0 -0
  29. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/manage.py +0 -0
  30. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/progress_bar.py +0 -0
  31. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/run.py +0 -0
  32. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/search.py +0 -0
  33. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/shared_params/__init__.py +0 -0
  34. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/shared_params/common_state.py +0 -0
  35. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/shared_params/config.py +0 -0
  36. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  37. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  38. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  39. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/upload/__init__.py +0 -0
  40. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/upload/upload_advanced.py +0 -0
  41. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/user.py +0 -0
  42. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/utils.py +0 -0
  43. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/cli/view.py +0 -0
  44. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/constants.py +0 -0
  45. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/contrib/__init__.py +0 -0
  46. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/__init__.py +0 -0
  47. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/api.py +0 -0
  48. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  49. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/cli.py +0 -0
  50. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  51. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/file_system_cache.py +0 -0
  52. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/id_constructors/__init__.py +0 -0
  53. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/id_constructors/from_blobs.py +0 -0
  54. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/id_constructors/from_ids.py +0 -0
  55. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/id_constructors/from_names.py +0 -0
  56. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/id_constructors/from_uuids.py +0 -0
  57. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/id_constructors/resolvers.py +0 -0
  58. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/id_constructors/utils.py +0 -0
  59. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/knex.py +0 -0
  60. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/organization.py +0 -0
  61. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/pipeline.py +0 -0
  62. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/__init__.py +0 -0
  63. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/constants.py +0 -0
  64. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/highcharts.py +0 -0
  65. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/map/__init__.py +0 -0
  66. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/map/base_layer.py +0 -0
  67. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/map/map.py +0 -0
  68. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/map/overlay.py +0 -0
  69. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/plotting/selectable.py +0 -0
  70. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/project.py +0 -0
  71. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/remote_object.py +0 -0
  72. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/result/__init__.py +0 -0
  73. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/result/bioinfo.py +0 -0
  74. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/result/result_file.py +0 -0
  75. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/result/result_folder.py +0 -0
  76. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/result/utils.py +0 -0
  77. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/sample.py +0 -0
  78. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/search.py +0 -0
  79. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/user.py +0 -0
  80. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/utils.py +0 -0
  81. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/__init__.py +0 -0
  82. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/checksum.py +0 -0
  83. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/cli.py +0 -0
  84. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/clone.py +0 -0
  85. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/constants.py +0 -0
  86. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/vc_cache.py +0 -0
  87. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/vc_dir.py +0 -0
  88. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/vc_sample.py +0 -0
  89. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/vc/vc_stub.py +0 -0
  90. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq/work_orders.py +0 -0
  91. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq.egg-info/dependency_links.txt +0 -0
  92. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq.egg-info/entry_points.txt +0 -0
  93. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/geoseeq.egg-info/top_level.txt +0 -0
  94. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/setup.cfg +0 -0
  95. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/setup.py +0 -0
  96. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/tests/__init__.py +0 -0
  97. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/tests/test_api_client.py +0 -0
  98. {geoseeq-0.5.6a16 → geoseeq-0.6.1}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.5.6a16
3
+ Version: 0.6.1
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -468,3 +468,4 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
468
468
  click.confirm('Continue?', abort=True)
469
469
  logger.info(f'Downloading {len(download_manager)} files to {target_dir}')
470
470
  download_manager.download_files()
471
+
@@ -18,6 +18,7 @@ from .shared_params.opts_and_args import overwrite_option, yes_option
18
18
  from .detail import cli_detail
19
19
  from .run import cli_app
20
20
  from .get_eula import cli_eula
21
+ from .project import cli_project
21
22
 
22
23
  logger = logging.getLogger('geoseeq_api')
23
24
  handler = logging.StreamHandler()
@@ -53,7 +54,7 @@ def version():
53
54
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
54
55
  Run `geoseeq eula show` to view the EULA.
55
56
  """
56
- click.echo('0.5.6a16') # remember to update setup
57
+ click.echo('0.6.1') # remember to update setup
57
58
 
58
59
 
59
60
  @main.group('advanced')
@@ -65,6 +66,7 @@ cli_advanced.add_command(cli_copy)
65
66
  cli_advanced.add_command(cli_user)
66
67
  cli_advanced.add_command(cli_detail)
67
68
  cli_advanced.add_command(cli_upload_advanced)
69
+ cli_advanced.add_command(cli_project)
68
70
 
69
71
  @cli_advanced.group('experimental')
70
72
  def cli_experimental():
@@ -0,0 +1,96 @@
1
+ import json
2
+ import logging
3
+ from os import makedirs, getcwd
4
+ from os.path import dirname, join
5
+
6
+ import click
7
+ import pandas as pd
8
+ from multiprocessing import Pool
9
+ from .shared_params import (
10
+ handle_project_id,
11
+ handle_folder_id,
12
+ project_id_arg,
13
+ sample_ids_arg,
14
+ handle_multiple_sample_ids,
15
+ handle_multiple_result_file_ids,
16
+ use_common_state,
17
+ flatten_list_of_els_and_files,
18
+ yes_option,
19
+ module_option,
20
+ ignore_errors_option,
21
+ folder_ids_arg,
22
+ )
23
+ from geoseeq.result.file_download import download_url
24
+ from geoseeq.utils import download_ftp
25
+ from geoseeq.id_constructors import (
26
+ result_file_from_uuid,
27
+ result_file_from_name,
28
+ )
29
+ from geoseeq.knex import GeoseeqNotFoundError
30
+ from .progress_bar import PBarManager
31
+ from .utils import convert_size
32
+ from geoseeq.constants import FASTQ_MODULE_NAMES
33
+ from geoseeq.result import ResultFile
34
+ from geoseeq.upload_download_manager import GeoSeeqDownloadManager
35
+ from geoseeq.file_system.filesystem_download import (
36
+ ProjectOnFilesystem,
37
+ FILE_STATUS_MODIFIED_REMOTE,
38
+ FILE_STATUS_MODIFIED_LOCAL,
39
+ FILE_STATUS_NEW_LOCAL,
40
+ FILE_STATUS_NEW_REMOTE,
41
+ FILE_STATUS_IS_LOCAL_STUB,
42
+ )
43
+
44
+
45
+ logger = logging.getLogger('geoseeq_api')
46
+
47
+
48
+ @click.group("project")
49
+ def cli_project():
50
+ """Download data from GeoSeeq."""
51
+ pass
52
+
53
+
54
+ @cli_project.command("clone")
55
+ @use_common_state
56
+ @click.option('--use-stubs/--full-files', default=True, help='Download full files or stubs')
57
+ @click.option('--target-dir', '-d', default=None, help='Directory to download the project to')
58
+ @project_id_arg
59
+ def cli_clone_project(state, use_stubs, target_dir, project_id):
60
+ """Clone a project to the local filesystem.
61
+ """
62
+ knex = state.get_knex().set_auth_required()
63
+ proj = handle_project_id(knex, project_id)
64
+ logger.info(f"Found project \"{proj.name}\"")
65
+ if target_dir is None:
66
+ target_dir = proj.name
67
+
68
+ project = ProjectOnFilesystem(proj, target_dir)
69
+ project.download(use_stubs=use_stubs)
70
+
71
+
72
+ @cli_project.command("status")
73
+ @use_common_state
74
+ def cli_project_status(state):
75
+ """Check the status of a project on the local filesystem.
76
+ """
77
+ project = ProjectOnFilesystem.from_path(getcwd(), recursive=True)
78
+
79
+ objs_by_status = {
80
+ FILE_STATUS_MODIFIED_LOCAL: [],
81
+ FILE_STATUS_MODIFIED_REMOTE: [],
82
+ FILE_STATUS_NEW_LOCAL: [],
83
+ FILE_STATUS_NEW_REMOTE: [],
84
+ FILE_STATUS_IS_LOCAL_STUB: [],
85
+ }
86
+ for obj_type, status, local_path, obj in project.list_abnormal_objects():
87
+ objs_by_status[status].append((obj_type, local_path, obj))
88
+
89
+ print(f"Project: {project.project.name}")
90
+ for status, objs in objs_by_status.items():
91
+ print(f"Status: {status}")
92
+ for obj_type, local_path, obj in objs:
93
+ if status in (FILE_STATUS_MODIFIED_LOCAL, FILE_STATUS_NEW_LOCAL):
94
+ print(f" {obj_type}: {project.path_from_project_root(local_path)} -> {obj}")
95
+ else:
96
+ print(f" {obj_type}: {obj} -> {project.path_from_project_root(local_path)}")
@@ -0,0 +1,59 @@
1
+ import click
2
+ import json
3
+ from .shared_params import use_common_state, overwrite_option
4
+ from geoseeq import GeoseeqNotFoundError
5
+ from geoseeq.blob_constructors import (
6
+ sample_result_file_from_uuid,
7
+ project_result_file_from_uuid,
8
+ sample_result_folder_from_uuid,
9
+ project_result_folder_from_uuid,
10
+ )
11
+
12
+
13
+ @click.group('raw')
14
+ def cli_raw():
15
+ """Low-level commands for interacting with the API."""
16
+ pass
17
+
18
+
19
+ @cli_raw.command('get-file-data')
20
+ @use_common_state
21
+ @click.argument('file_ids', nargs=-1)
22
+ def cli_get_file_data(state, file_ids):
23
+ """Print the raw stored data in a result file object."""
24
+ knex = state.get_knex()
25
+ for file_id in file_ids:
26
+ file_id = file_id.split(':')[-1]
27
+ try:
28
+ result_file = sample_result_file_from_uuid(knex, file_id)
29
+ except GeoseeqNotFoundError:
30
+ result_file = project_result_file_from_uuid(knex, file_id)
31
+ print(json.dumps(result_file.stored_data, indent=2), file=state.outfile)
32
+
33
+
34
+ @cli_raw.command('create-raw-file')
35
+ @use_common_state
36
+ @overwrite_option
37
+ @click.argument('folder_id')
38
+ @click.argument('result_filename')
39
+ @click.argument('filename', type=click.File('r'))
40
+ def cli_get_file_data(state, overwrite, folder_id, result_filename, filename):
41
+ """Print the raw stored data in a result file object."""
42
+ knex = state.get_knex()
43
+
44
+ folder_id = folder_id.split(':')[-1]
45
+ try:
46
+ result_folder = sample_result_folder_from_uuid(knex, folder_id)
47
+ except GeoseeqNotFoundError:
48
+ result_folder = project_result_folder_from_uuid(knex, folder_id)
49
+ blob = json.load(filename)
50
+ result_file = result_folder.result_file(result_filename)
51
+ if overwrite:
52
+ result_file.idem()
53
+ result_file.stored_data = blob
54
+ result_file.save()
55
+ else:
56
+ result_file.create()
57
+ click.echo(f'Created file {result_file.uuid}', file=state.outfile)
58
+
59
+
@@ -122,6 +122,7 @@ def cli_upload_file(state, cores, threads_per_upload, num_retries, chunk_size_mb
122
122
  use_cache=state.use_cache,
123
123
  num_retries=num_retries,
124
124
  ignore_errors=ignore_errors,
125
+ use_atomic_upload=True,
125
126
  session=None, #knex.new_session(),
126
127
  chunk_size_mb=chunk_size_mb if chunk_size_mb > 0 else None,
127
128
  )
@@ -160,6 +161,7 @@ def cli_upload_folder(state, cores, yes, private, recursive, hidden, no_new_vers
160
161
  overwrite=True,
161
162
  use_cache=state.use_cache,
162
163
  no_new_versions=no_new_versions,
164
+ use_atomic_upload=True,
163
165
  )
164
166
  for folder_name in folder_names:
165
167
  result_folder = root_obj.result_folder(folder_name).idem()
@@ -98,6 +98,7 @@ def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, no_new
98
98
  progress_tracker_factory=PBarManager().get_new_bar,
99
99
  use_cache=state.use_cache,
100
100
  no_new_versions=no_new_versions,
101
+ use_atomic_upload=True,
101
102
  )
102
103
  for group in groups:
103
104
  sample = lib.sample(group['sample_name']).idem()
@@ -0,0 +1,50 @@
1
+
2
+ from os.path import getsize
3
+ import logging
4
+
5
+ logger = logging.getLogger("geoseeq_api") # Same name as calling module
6
+ logger.addHandler(logging.NullHandler())
7
+
8
+
9
+ class FileChunker:
10
+
11
+ def __init__(self, filepath, chunk_size):
12
+ self.filepath = filepath
13
+ self.chunk_size = chunk_size
14
+ self.file_size = getsize(filepath)
15
+ self.n_parts = int(self.file_size / self.chunk_size) + 1
16
+ self.loaded_parts = []
17
+
18
+ def load_all_chunks(self):
19
+ if len(self.loaded_parts) != self.n_parts:
20
+ with open(self.filepath, "rb") as f:
21
+ f.seek(0)
22
+ for i in range(self.n_parts):
23
+ chunk = f.read(self.chunk_size)
24
+ self.loaded_parts.append(chunk)
25
+ return self # convenience for chaining
26
+
27
+ def chunk_is_preloaded(self, num):
28
+ return len(self.loaded_parts) > num and self.loaded_parts[num]
29
+
30
+ def read_one_chunk(self, num):
31
+ if not self.chunk_is_preloaded(num):
32
+ logger.debug(f"Reading chunk {num} from {self.filepath}")
33
+ with open(self.filepath, "rb") as f:
34
+ f.seek(num * self.chunk_size)
35
+ chunk = f.read(self.chunk_size)
36
+ return chunk
37
+ return self.loaded_parts[num]
38
+
39
+ def get_chunk(self, num):
40
+ if self.chunk_is_preloaded(num):
41
+ return self.loaded_parts[num]
42
+ return self.read_one_chunk(num)
43
+
44
+ def get_chunk_size(self, num):
45
+ if num < (self.n_parts - 1): # all but the last chunk
46
+ return self.chunk_size
47
+ if self.chunk_is_preloaded(num): # last chunk, pre-loaded
48
+ return len(self.loaded_parts[num])
49
+ return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
50
+
@@ -2,29 +2,68 @@
2
2
  import urllib.request
3
3
  import logging
4
4
  import requests
5
- from os.path import basename, getsize, join, isfile, getmtime
5
+ import os
6
+ from os.path import basename, getsize, join, isfile, getmtime, dirname
6
7
  from pathlib import Path
7
8
  from tempfile import NamedTemporaryFile
8
9
 
9
10
  from geoseeq.utils import download_ftp
10
11
  from geoseeq.constants import FIVE_MB
12
+ from hashlib import md5
13
+ from .resumable_download_tracker import ResumableDownloadTracker
11
14
 
12
15
  logger = logging.getLogger("geoseeq_api") # Same name as calling module
13
16
 
17
+ def url_to_id(url):
18
+ url = url.split("?")[0]
19
+ return md5(url.encode()).hexdigest()[:16]
14
20
 
15
- def _download_head(url, filename, head=None, progress_tracker=None):
21
+
22
+ def _download_head(url, filename, head=None, start=0, progress_tracker=None):
16
23
  headers = None
17
24
  if head and head > 0:
18
- headers = {"Range": f"bytes=0-{head}"}
25
+ headers = {"Range": f"bytes={start}-{head}"}
19
26
  response = requests.get(url, stream=True, headers=headers)
20
27
  response.raise_for_status()
21
28
  total_size_in_bytes = int(response.headers.get('content-length', 0))
22
29
  if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
23
- block_size = FIVE_MB
30
+ if total_size_in_bytes > 10 * FIVE_MB: # Use resumable download
31
+ print("Using resumable download")
32
+ return _download_resumable(response, filename, total_size_in_bytes, progress_tracker)
33
+ else:
34
+ block_size = FIVE_MB
35
+ with open(filename, 'wb') as file:
36
+ for data in response.iter_content(block_size):
37
+ if progress_tracker: progress_tracker.update(len(data))
38
+ file.write(data)
39
+ return filename
40
+
41
+
42
+ def _download_resumable(response, filename, total_size_in_bytes, progress_tracker=None, chunk_size=5 * FIVE_MB, part_prefix=".gs_download_{}_{}."):
43
+ target_id = url_to_id(response.url)
44
+ tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
45
+ if not tracker.download_started: tracker.start_download(response.url)
46
+ n_chunks = total_size_in_bytes // chunk_size
47
+ for i in range(n_chunks):
48
+ bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
49
+ if tracker.part_has_been_downloaded(i):
50
+ logger.debug(f"Part {i} has already been downloaded.")
51
+ else:
52
+ logger.debug(f"Downloading part {i} of {n_chunks - 1}")
53
+ part_filename = join(dirname(filename), part_prefix.format(i, n_chunks - 1) + basename(filename))
54
+ _download_head(response.url, part_filename, head=bytes_end, start=bytes_start, progress_tracker=None)
55
+ part_info = dict(part_number=i, start=bytes_start, end=bytes_end, part_filename=part_filename)
56
+ tracker.add_part(part_info)
57
+ if progress_tracker: progress_tracker.update(bytes_end - bytes_start + 1)
58
+
59
+ # at this point all parts have been downloaded
24
60
  with open(filename, 'wb') as file:
25
- for data in response.iter_content(block_size):
26
- if progress_tracker: progress_tracker.update(len(data))
27
- file.write(data)
61
+ for i in range(n_chunks):
62
+ part_info = tracker.get_part_info(i)
63
+ part_filename = part_info["part_filename"]
64
+ with open(part_filename, 'rb') as part_file:
65
+ file.write(part_file.read())
66
+ tracker.cleanup()
28
67
  return filename
29
68
 
30
69
 
@@ -44,7 +83,7 @@ def guess_download_kind(url):
44
83
  return 'generic'
45
84
 
46
85
 
47
- def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
86
+ def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None, target_uuid=None):
48
87
  """Return a local filepath to the downloaded file. Download the file."""
49
88
  if filename and isfile(filename):
50
89
  file_size = getsize(filename)
@@ -67,7 +106,6 @@ def download_url(url, kind='guess', filename=None, head=None, progress_tracker=N
67
106
  raise ValueError(f"Unknown download kind: {kind}")
68
107
 
69
108
 
70
-
71
109
  class ResultFileDownload:
72
110
  """Abstract class that handles download methods for result files."""
73
111
 
@@ -136,7 +174,7 @@ class ResultFileDownload:
136
174
  url = self.get_download_url()
137
175
  filepath = download_url(
138
176
  url, blob_type, filename,
139
- head=head, progress_tracker=progress_tracker
177
+ head=head, progress_tracker=progress_tracker,
140
178
  )
141
179
  if cache and flag_suffix:
142
180
  # create flag file
@@ -13,130 +13,21 @@ from geoseeq.utils import md5_checksum
13
13
  from concurrent.futures import ThreadPoolExecutor, as_completed
14
14
  from .utils import *
15
15
  from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
16
-
17
- class FileChunker:
18
-
19
- def __init__(self, filepath, chunk_size):
20
- self.filepath = filepath
21
- self.chunk_size = chunk_size
22
- self.file_size = getsize(filepath)
23
- self.n_parts = int(self.file_size / self.chunk_size) + 1
24
- self.loaded_parts = []
25
-
26
- def load_all_chunks(self):
27
- if len(self.loaded_parts) != self.n_parts:
28
- with open(self.filepath, "rb") as f:
29
- f.seek(0)
30
- for i in range(self.n_parts):
31
- chunk = f.read(self.chunk_size)
32
- self.loaded_parts.append(chunk)
33
- return self # convenience for chaining
34
-
35
- def chunk_is_preloaded(self, num):
36
- return len(self.loaded_parts) > num and self.loaded_parts[num]
37
-
38
- def read_one_chunk(self, num):
39
- if not self.chunk_is_preloaded(num):
40
- logger.debug(f"Reading chunk {num} from {self.filepath}")
41
- with open(self.filepath, "rb") as f:
42
- f.seek(num * self.chunk_size)
43
- chunk = f.read(self.chunk_size)
44
- return chunk
45
- return self.loaded_parts[num]
46
-
47
- def get_chunk(self, num):
48
- if self.chunk_is_preloaded(num):
49
- return self.loaded_parts[num]
50
- return self.read_one_chunk(num)
51
-
52
- def get_chunk_size(self, num):
53
- if num < (self.n_parts - 1): # all but the last chunk
54
- return self.chunk_size
55
- if self.chunk_is_preloaded(num): # last chunk, pre-loaded
56
- return len(self.loaded_parts[num])
57
- return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
58
-
59
-
60
- class ResumableUploadTracker:
61
-
62
- def __init__(self, filepath, chunk_size, tracker_file_prefix="gs_resumable_upload_tracker"):
63
- self.open, self.upload_started = True, False
64
- self.upload_id, self.urls = None, None
65
- self.filepath = filepath
66
- self.tracker_file = join(
67
- GEOSEEQ_CACHE_DIR, 'upload',
68
- tracker_file_prefix + f".{chunk_size}.{getsize(filepath)}." + basename(filepath)
69
- )
70
- try:
71
- os.makedirs(dirname(self.tracker_file), exist_ok=True)
72
- except Exception as e:
73
- logger.warning(f'Could not create resumable upload tracker directory. {e}')
74
- self.open = False
75
- self._loaded_parts = {}
76
- self._load_parts_from_file()
77
-
78
- def start_upload(self, upload_id, urls):
79
- if not self.open:
80
- return
81
- if self.upload_started:
82
- raise GeoseeqGeneralError("Upload has already started.")
83
- blob = dict(upload_id=upload_id, urls=urls, start_time=time.time())
84
- serialized = json.dumps(blob)
85
- with open(self.tracker_file, "w") as f:
86
- f.write(serialized + "\n")
87
- self.upload_id, self.urls = upload_id, urls
88
- self.upload_started = True
89
-
90
- def add_part(self, part_upload_info):
91
- if not self.open:
92
- return
93
- part_id = part_upload_info["PartNumber"]
94
- serialized = json.dumps(part_upload_info)
95
- with open(self.tracker_file, "a") as f:
96
- f.write(serialized + "\n")
97
- self._loaded_parts[part_id] = part_upload_info
98
- if len(self._loaded_parts) == len(self.urls):
99
- self.cleanup()
100
- self.open = False
101
-
102
- def _load_parts_from_file(self):
103
- if not isfile(self.tracker_file):
104
- return
105
- with open(self.tracker_file, "r") as f:
106
- header_blob = json.loads(f.readline())
107
- self.upload_id, self.urls = header_blob["upload_id"], header_blob["urls"]
108
- start_time = header_blob["start_time"]
109
- if (time.time() - start_time) > (60 * 60 * 23):
110
- logger.warning(f"Tracker file {self.tracker_file} is too old. Deleting.")
111
- os.remove(self.tracker_file)
112
- return
113
- self.upload_started = True
114
- for line in f:
115
- blob = json.loads(line)
116
- part_id = blob["PartNumber"]
117
- self._loaded_parts[part_id] = blob
118
-
119
- def part_has_been_uploaded(self, part_number):
120
- if not self.open:
121
- return False
122
- return part_number in self._loaded_parts
123
-
124
- def get_part_info(self, part_number):
125
- return self._loaded_parts[part_number]
126
-
127
- def cleanup(self):
128
- if not self.open:
129
- return
130
- try:
131
- os.remove(self.tracker_file)
132
- except FileNotFoundError:
133
- pass
16
+ from .file_chunker import FileChunker
17
+ from .resumable_upload_tracker import ResumableUploadTracker
134
18
 
135
19
 
136
20
  class ResultFileUpload:
137
21
  """Abstract class that handles upload methods for result files."""
138
22
 
139
- def _create_multipart_upload(self, filepath, file_size, optional_fields):
23
+ def _result_type(self, atomic=False):
24
+ if self.is_sample_result:
25
+ return "sample"
26
+ if atomic:
27
+ return "project"
28
+ return "group"
29
+
30
+ def _create_multipart_upload(self, filepath, file_size, optional_fields, atomic=False):
140
31
  optional_fields = optional_fields if optional_fields else {}
141
32
  optional_fields.update(
142
33
  {
@@ -147,23 +38,31 @@ class ResultFileUpload:
147
38
  data = {
148
39
  "filename": basename(filepath),
149
40
  "optional_fields": optional_fields,
150
- "result_type": "sample" if self.is_sample_result else "group",
41
+ "result_type": self._result_type(atomic),
151
42
  }
152
- response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload", json=data)
43
+ url = f"/ar_fields/{self.uuid}/create_upload"
44
+ if atomic:
45
+ data["fieldname"] = self.name
46
+ url = f"/ars/{self.parent.uuid}/create_atomic_upload"
47
+ response = self.knex.post(url, json=data)
153
48
  return response
154
49
 
155
- def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
50
+ def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields, atomic=False):
156
51
  n_parts = int(file_size / chunk_size) + 1
157
- response = self._create_multipart_upload(filepath, file_size, optional_fields)
52
+ response = self._create_multipart_upload(filepath, file_size, optional_fields, atomic=atomic)
158
53
  upload_id = response["upload_id"]
159
- parts = list(range(1, n_parts + 1))
160
54
  data = {
161
- "parts": parts,
55
+ "parts": list(range(1, n_parts + 1)),
162
56
  "stance": "upload-multipart",
163
57
  "upload_id": upload_id,
164
- "result_type": "sample" if self.is_sample_result else "group",
58
+ "result_type": self._result_type(atomic),
165
59
  }
166
- response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload_urls", json=data)
60
+ url = f"/ar_fields/{self.uuid}/create_upload_urls"
61
+ if atomic:
62
+ data["uuid"] = response["uuid"]
63
+ data["fieldname"] = self.name
64
+ url = f"ars/{self.parent.uuid}/create_atomic_upload_urls"
65
+ response = self.knex.post(url, json=data)
167
66
  urls = response
168
67
  return upload_id, urls
169
68
 
@@ -204,16 +103,17 @@ class ResultFileUpload:
204
103
  resumable_upload_tracker.add_part(blob)
205
104
  return blob
206
105
 
207
- def _finish_multipart_upload(self, upload_id, complete_parts):
208
- response = self.knex.post(
209
- f"/ar_fields/{self.uuid}/complete_upload",
210
- json={
211
- "parts": complete_parts,
212
- "upload_id": upload_id,
213
- "result_type": "sample" if self.is_sample_result else "group",
214
- },
215
- json_response=False,
216
- )
106
+ def _finish_multipart_upload(self, upload_id, complete_parts, atomic=False):
107
+ data = {
108
+ "parts": complete_parts,
109
+ "upload_id": upload_id,
110
+ "result_type": self._result_type(atomic),
111
+ }
112
+ url = f"/ar_fields/{self.uuid}/complete_upload"
113
+ if atomic:
114
+ data["fieldname"] = self.name
115
+ url = f"/ars/{self.parent.uuid}/complete_atomic_upload"
116
+ response = self.knex.post(url, json=data, json_response=False)
217
117
  response.raise_for_status()
218
118
 
219
119
  def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads, resumable_upload_tracker=None):
@@ -257,6 +157,7 @@ class ResultFileUpload:
257
157
  progress_tracker=None,
258
158
  threads=1,
259
159
  use_cache=True,
160
+ use_atomic_upload=False,
260
161
  ):
261
162
  """Upload a file to S3 using the multipart upload process."""
262
163
  logger.info(f"Uploading {filepath} to S3 using multipart upload.")
@@ -267,15 +168,21 @@ class ResultFileUpload:
267
168
  logger.debug(f"Using chunk size of {chunk_size} bytes.")
268
169
  resumable_upload_tracker = None
269
170
  if use_cache and file_size > 10 * FIVE_MB: # only use resumable upload tracker for larger files
270
- resumable_upload_tracker = ResumableUploadTracker(filepath, chunk_size)
171
+ upload_target_uuid = self.parent.uuid if use_atomic_upload else self.uuid
172
+ resumable_upload_tracker = ResumableUploadTracker(filepath, chunk_size, upload_target_uuid)
173
+
271
174
  if resumable_upload_tracker and resumable_upload_tracker.upload_started:
175
+ # a resumable upload for this file has already started
176
+ resumable_upload_exists_and_is_valid = True
272
177
  upload_id, urls = resumable_upload_tracker.upload_id, resumable_upload_tracker.urls
178
+ use_atomic_upload = resumable_upload_tracker.is_atomic_upload
273
179
  logger.info(f'Resuming upload for "{filepath}", upload_id: "{upload_id}"')
274
180
  else:
275
- upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields)
181
+ upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields, atomic=use_atomic_upload)
276
182
  if resumable_upload_tracker:
277
183
  logger.info(f'Creating new resumable upload for "{filepath}", upload_id: "{upload_id}"')
278
- resumable_upload_tracker.start_upload(upload_id, urls)
184
+ resumable_upload_tracker.start_upload(upload_id, urls, is_atomic_upload=use_atomic_upload)
185
+
279
186
  logger.info(f'Starting upload for "{filepath}"')
280
187
  complete_parts = []
281
188
  file_chunker = FileChunker(filepath, chunk_size)
@@ -294,14 +201,20 @@ class ResultFileUpload:
294
201
  threads,
295
202
  resumable_upload_tracker=resumable_upload_tracker
296
203
  )
297
- self._finish_multipart_upload(upload_id, complete_parts)
204
+ self._finish_multipart_upload(upload_id, complete_parts, atomic=use_atomic_upload)
298
205
  logger.info(f'Finished Upload for "{filepath}"')
206
+ if use_atomic_upload:
207
+ # if this was an atomic upload then this result may not have existed on the server before
208
+ self.get()
299
209
  return self
300
210
 
301
211
  def upload_file(self, filepath, multipart_thresh=FIVE_MB, overwrite=True, no_new_versions=False, **kwargs):
302
212
  if self.exists() and not overwrite:
303
213
  raise GeoseeqGeneralError(f"Overwrite is set to False and file {self.uuid} already exists.")
304
- self.idem()
214
+ if not kwargs.get("use_atomic_upload", False):
215
+ self.idem()
216
+ else:
217
+ self.parent.idem()
305
218
  if no_new_versions and self.has_downloadable_file():
306
219
  raise GeoseeqGeneralError(f"File {self} already has a downloadable file. Not uploading a new version.")
307
220
  resolved_path = Path(filepath).resolve()
@@ -0,0 +1,99 @@
1
+
2
+ import time
3
+ import json
4
+ import os
5
+ from os.path import basename, getsize, join, dirname, isfile, getctime
6
+ from pathlib import Path
7
+ from random import random
8
+ import requests
9
+
10
+ from geoseeq.knex import GeoseeqGeneralError
11
+ from geoseeq.constants import FIVE_MB
12
+ from geoseeq.utils import md5_checksum
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from .utils import *
15
+ from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
16
+ from .file_chunker import FileChunker
17
+
18
+
19
+
20
+ class ResumableDownloadTracker:
21
+
22
+ def __init__(self, chunk_size, download_target_id, target_local_path, tracker_file_prefix="gs_resumable_download_tracker"):
23
+ self.open, self.download_started = True, False
24
+ self.download_target_id = download_target_id
25
+ self.target_local_path = target_local_path
26
+ self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'download')
27
+ self.tracker_file = join(
28
+ self.tracker_file_dir,
29
+ tracker_file_prefix + f".{download_target_id}.{chunk_size}." + basename(target_local_path)
30
+ )
31
+ try:
32
+ os.makedirs(self.tracker_file_dir, exist_ok=True)
33
+ except Exception as e:
34
+ logger.warning(f'Could not create resumable download tracker directory. {e}')
35
+ self.open = False
36
+ self._loaded_parts = {}
37
+ self._load_parts_from_file()
38
+
39
+ def start_download(self, download_url):
40
+ if not self.open:
41
+ return
42
+ if self.download_started:
43
+ raise GeoseeqGeneralError("Download has already started.")
44
+ self.download_started = True
45
+ blob = dict(download_url=download_url,
46
+ download_target_id=self.download_target_id,
47
+ start_time=time.time())
48
+ serialized = json.dumps(blob)
49
+ with open(self.tracker_file, "w") as f:
50
+ f.write(serialized + "\n")
51
+ self.download_url = download_url
52
+ return self
53
+
54
+ def add_part(self, part_download_info):
55
+ if not self.open:
56
+ assert False, "Cannot add part to closed ResumableDownloadTracker"
57
+ part_id = part_download_info["part_number"]
58
+ serialized = json.dumps(part_download_info)
59
+ with open(self.tracker_file, "a") as f:
60
+ f.write(serialized + "\n")
61
+ self._loaded_parts[part_id] = part_download_info
62
+
63
+ def _load_parts_from_file(self):
64
+ if not isfile(self.tracker_file):
65
+ return
66
+ with open(self.tracker_file, "r") as f:
67
+ header_blob = json.loads(f.readline())
68
+ self.download_url = header_blob["download_url"]
69
+ start_time = header_blob["start_time"] # for now we don't expire resumable downloads
70
+ self.download_started = True
71
+ for line in f:
72
+ part_info = json.loads(line)
73
+ part_id = part_info["part_number"]
74
+ self._loaded_parts[part_id] = part_info
75
+
76
+ def part_has_been_downloaded(self, part_number):
77
+ if not self.open:
78
+ return False
79
+ if part_number not in self._loaded_parts:
80
+ return False
81
+ part_info = self._loaded_parts[part_number]
82
+ part_path = part_info["part_filename"]
83
+ return isfile(part_path)
84
+
85
+ def get_part_info(self, part_number):
86
+ if not self.open:
87
+ return None
88
+ return self._loaded_parts.get(part_number, None)
89
+
90
+ def cleanup(self):
91
+ if not self.open:
92
+ return
93
+ for part in self._loaded_parts.values():
94
+ part_path = part["part_filename"]
95
+ if isfile(part_path):
96
+ os.remove(part_path)
97
+ os.remove(self.tracker_file)
98
+ self.open = False
99
+
@@ -0,0 +1,100 @@
1
+
2
+ import time
3
+ import json
4
+ import os
5
+ from os.path import basename, getsize, join, dirname, isfile, getctime
6
+ from pathlib import Path
7
+ from random import random
8
+ import requests
9
+
10
+ from geoseeq.knex import GeoseeqGeneralError
11
+ from geoseeq.constants import FIVE_MB
12
+ from geoseeq.utils import md5_checksum
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from .utils import *
15
+ from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
16
+ from .file_chunker import FileChunker
17
+
18
+
19
+ class ResumableUploadTracker:
20
+
21
+ def __init__(self, filepath, chunk_size, upload_target_uuid, tracker_file_prefix="gs_resumable_upload_tracker"):
22
+ self.open, self.upload_started = True, False
23
+ self.upload_id, self.urls, self.is_atomic_upload = None, None, None
24
+ self.upload_target_uuid = upload_target_uuid
25
+ self.filepath = filepath
26
+ self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'upload')
27
+ self.tracker_file = join(
28
+ self.tracker_file_dir,
29
+ tracker_file_prefix + f".{upload_target_uuid}.{chunk_size}.{getsize(filepath)}." + basename(filepath)
30
+ )
31
+ try:
32
+ os.makedirs(self.tracker_file_dir, exist_ok=True)
33
+ except Exception as e:
34
+ logger.warning(f'Could not create resumable upload tracker directory. {e}')
35
+ self.open = False
36
+ self._loaded_parts = {}
37
+ self._load_parts_from_file()
38
+
39
+ def start_upload(self, upload_id, urls, is_atomic_upload=False):
40
+ if not self.open:
41
+ return
42
+ if self.upload_started:
43
+ raise GeoseeqGeneralError("Upload has already started.")
44
+ self.upload_started = True
45
+ blob = dict(upload_id=upload_id,
46
+ urls=urls,
47
+ is_atomic_upload=is_atomic_upload,
48
+ upload_target_uuid=self.upload_target_uuid,
49
+ start_time=time.time())
50
+ serialized = json.dumps(blob)
51
+ with open(self.tracker_file, "w") as f:
52
+ f.write(serialized + "\n")
53
+ self.upload_id, self.urls, self.is_atomic_upload = upload_id, urls, is_atomic_upload
54
+
55
+ def add_part(self, part_upload_info):
56
+ if not self.open:
57
+ return
58
+ part_id = part_upload_info["PartNumber"]
59
+ serialized = json.dumps(part_upload_info)
60
+ with open(self.tracker_file, "a") as f:
61
+ f.write(serialized + "\n")
62
+ self._loaded_parts[part_id] = part_upload_info
63
+ if len(self._loaded_parts) == len(self.urls):
64
+ self.cleanup()
65
+ self.open = False
66
+
67
+ def _load_parts_from_file(self):
68
+ if not isfile(self.tracker_file):
69
+ return
70
+ with open(self.tracker_file, "r") as f:
71
+ header_blob = json.loads(f.readline())
72
+ self.upload_id, self.urls, self.is_atomic_upload = (
73
+ header_blob["upload_id"], header_blob["urls"], header_blob["is_atomic_upload"]
74
+ )
75
+ start_time = header_blob["start_time"]
76
+ if (time.time() - start_time) > (60 * 60 * 23):
77
+ logger.warning(f"Tracker file {self.tracker_file} is too old. Deleting.")
78
+ os.remove(self.tracker_file)
79
+ return
80
+ self.upload_started = True
81
+ for line in f:
82
+ blob = json.loads(line)
83
+ part_id = blob["PartNumber"]
84
+ self._loaded_parts[part_id] = blob
85
+
86
+ def part_has_been_uploaded(self, part_number):
87
+ if not self.open:
88
+ return False
89
+ return part_number in self._loaded_parts
90
+
91
+ def get_part_info(self, part_number):
92
+ return self._loaded_parts[part_number]
93
+
94
+ def cleanup(self):
95
+ if not self.open:
96
+ return
97
+ try:
98
+ os.remove(self.tracker_file)
99
+ except FileNotFoundError:
100
+ pass
@@ -22,7 +22,7 @@ def _upload_one_file(args):
22
22
  (result_file, filepath, session, progress_tracker,
23
23
  link_type, overwrite, log_level, parallel_uploads,
24
24
  use_cache, no_new_versions, threads_per_upload,
25
- num_retries, ignore_errors, chunk_size_mb) = args
25
+ num_retries, ignore_errors, chunk_size_mb, use_atomic_upload) = args
26
26
  chunk_size = chunk_size_mb * 1024 * 1024 if chunk_size_mb else None
27
27
  if parallel_uploads:
28
28
  _make_in_process_logger(log_level)
@@ -34,6 +34,7 @@ def _upload_one_file(args):
34
34
  session=session, overwrite=overwrite, progress_tracker=progress_tracker,
35
35
  threads=threads_per_upload, use_cache=use_cache, chunk_size=chunk_size,
36
36
  no_new_versions=no_new_versions, max_retries=num_retries,
37
+ use_atomic_upload=use_atomic_upload
37
38
  )
38
39
  else:
39
40
  result_file.link_file(link_type, filepath)
@@ -59,6 +60,7 @@ class GeoSeeqUploadManager:
59
60
  num_retries=3,
60
61
  ignore_errors=False,
61
62
  chunk_size_mb=5,
63
+ use_atomic_upload=True,
62
64
  use_cache=True):
63
65
  self.session = session
64
66
  self.n_parallel_uploads = n_parallel_uploads
@@ -73,12 +75,18 @@ class GeoSeeqUploadManager:
73
75
  self.num_retries = num_retries
74
76
  self.ignore_errors = ignore_errors
75
77
  self.chunk_size_mb = chunk_size_mb
78
+ self.use_atomic_upload = use_atomic_upload
76
79
 
77
80
  def add_result_file(self, result_file, local_path):
78
81
  self._result_files.append((result_file, local_path))
79
82
 
80
83
  def add_local_file_to_result_folder(self, result_folder, local_path, geoseeq_file_name=None):
81
- geoseeq_file_name = geoseeq_file_name if geoseeq_file_name else local_path
84
+ if not geoseeq_file_name:
85
+ if local_path.startswith("/"): # if local path is an absolute path use the basename
86
+ geoseeq_file_name = basename(local_path)
87
+ else:
88
+ # remove "./" and "../" from local path to get a geoseeq file name
89
+ geoseeq_file_name = local_path.replace("./", "").replace("../", "")
82
90
  result_file = result_folder.result_file(geoseeq_file_name)
83
91
  self.add_result_file(result_file, local_path)
84
92
 
@@ -99,7 +107,7 @@ class GeoSeeqUploadManager:
99
107
  self.link_type, self.overwrite, self.log_level,
100
108
  self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions,
101
109
  self.threads_per_upload, self.num_retries, self.ignore_errors,
102
- self.chunk_size_mb,
110
+ self.chunk_size_mb, self.use_atomic_upload
103
111
  ) for result_file, local_path in self._result_files
104
112
  ]
105
113
  out = []
@@ -186,7 +194,7 @@ class GeoSeeqDownloadManager:
186
194
  self._convert_result_files_to_urls()
187
195
  download_args = [(
188
196
  url, file_path,
189
- self.progress_tracker_factory(url),
197
+ self.progress_tracker_factory(file_path),
190
198
  self.ignore_errors, self.head, self.log_level,
191
199
  self.n_parallel_downloads > 1
192
200
  ) for url, file_path in self._result_files]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.5.6a16
3
+ Version: 0.6.1
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -34,6 +34,8 @@ geoseeq/cli/get_eula.py
34
34
  geoseeq/cli/main.py
35
35
  geoseeq/cli/manage.py
36
36
  geoseeq/cli/progress_bar.py
37
+ geoseeq/cli/project.py
38
+ geoseeq/cli/raw.py
37
39
  geoseeq/cli/run.py
38
40
  geoseeq/cli/search.py
39
41
  geoseeq/cli/user.py
@@ -72,10 +74,13 @@ geoseeq/plotting/map/map.py
72
74
  geoseeq/plotting/map/overlay.py
73
75
  geoseeq/result/__init__.py
74
76
  geoseeq/result/bioinfo.py
77
+ geoseeq/result/file_chunker.py
75
78
  geoseeq/result/file_download.py
76
79
  geoseeq/result/file_upload.py
77
80
  geoseeq/result/result_file.py
78
81
  geoseeq/result/result_folder.py
82
+ geoseeq/result/resumable_download_tracker.py
83
+ geoseeq/result/resumable_upload_tracker.py
79
84
  geoseeq/result/utils.py
80
85
  geoseeq/vc/__init__.py
81
86
  geoseeq/vc/checksum.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "geoseeq"
7
- version = "0.5.6a16"
7
+ version = "0.6.1"
8
8
  authors = [
9
9
  { name="David C. Danko", email="dcdanko@biotia.io" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes