geoseeq 0.6.0__tar.gz → 0.6.2__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. {geoseeq-0.6.0 → geoseeq-0.6.2}/PKG-INFO +1 -1
  2. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/main.py +1 -1
  3. geoseeq-0.6.2/geoseeq/file_system/filesystem_download.py +434 -0
  4. geoseeq-0.6.2/geoseeq/file_system/main.py +122 -0
  5. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/file_download.py +46 -7
  6. geoseeq-0.6.2/geoseeq/result/resumable_download_tracker.py +99 -0
  7. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/upload_download_manager.py +1 -1
  8. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq.egg-info/PKG-INFO +1 -1
  9. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq.egg-info/SOURCES.txt +4 -0
  10. {geoseeq-0.6.0 → geoseeq-0.6.2}/pyproject.toml +1 -1
  11. geoseeq-0.6.2/tests/__init__.py +0 -0
  12. {geoseeq-0.6.0 → geoseeq-0.6.2}/LICENSE +0 -0
  13. {geoseeq-0.6.0 → geoseeq-0.6.2}/README.md +0 -0
  14. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/__init__.py +0 -0
  15. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/app.py +0 -0
  16. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/blob_constructors.py +0 -0
  17. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/bulk_creators.py +0 -0
  18. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/__init__.py +0 -0
  19. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/constants.py +0 -0
  20. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/copy.py +0 -0
  21. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/detail.py +0 -0
  22. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/download.py +0 -0
  23. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/fastq_utils.py +0 -0
  24. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/get_eula.py +0 -0
  25. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/manage.py +0 -0
  26. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/progress_bar.py +0 -0
  27. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/project.py +0 -0
  28. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/raw.py +0 -0
  29. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/run.py +0 -0
  30. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/search.py +0 -0
  31. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/shared_params/__init__.py +0 -0
  32. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/shared_params/common_state.py +0 -0
  33. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/shared_params/config.py +0 -0
  34. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  35. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  36. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  37. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/upload/__init__.py +0 -0
  38. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/upload/upload.py +0 -0
  39. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/upload/upload_advanced.py +0 -0
  40. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/upload/upload_reads.py +0 -0
  41. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/user.py +0 -0
  42. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/utils.py +0 -0
  43. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/cli/view.py +0 -0
  44. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/constants.py +0 -0
  45. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/contrib/__init__.py +0 -0
  46. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/contrib/ncbi/__init__.py +0 -0
  47. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/contrib/ncbi/api.py +0 -0
  48. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  49. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/contrib/ncbi/cli.py +0 -0
  50. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  51. {geoseeq-0.6.0/geoseeq/vc → geoseeq-0.6.2/geoseeq/file_system}/__init__.py +0 -0
  52. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/file_system_cache.py +0 -0
  53. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/id_constructors/__init__.py +0 -0
  54. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/id_constructors/from_blobs.py +0 -0
  55. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/id_constructors/from_ids.py +0 -0
  56. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/id_constructors/from_names.py +0 -0
  57. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/id_constructors/from_uuids.py +0 -0
  58. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/id_constructors/resolvers.py +0 -0
  59. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/id_constructors/utils.py +0 -0
  60. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/knex.py +0 -0
  61. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/organization.py +0 -0
  62. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/pipeline.py +0 -0
  63. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/__init__.py +0 -0
  64. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/constants.py +0 -0
  65. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/highcharts.py +0 -0
  66. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/map/__init__.py +0 -0
  67. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/map/base_layer.py +0 -0
  68. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/map/map.py +0 -0
  69. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/map/overlay.py +0 -0
  70. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/plotting/selectable.py +0 -0
  71. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/project.py +0 -0
  72. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/remote_object.py +0 -0
  73. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/__init__.py +0 -0
  74. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/bioinfo.py +0 -0
  75. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/file_chunker.py +0 -0
  76. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/file_upload.py +0 -0
  77. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/result_file.py +0 -0
  78. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/result_folder.py +0 -0
  79. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/resumable_upload_tracker.py +0 -0
  80. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/result/utils.py +0 -0
  81. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/sample.py +0 -0
  82. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/search.py +0 -0
  83. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/user.py +0 -0
  84. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/utils.py +0 -0
  85. {geoseeq-0.6.0/tests → geoseeq-0.6.2/geoseeq/vc}/__init__.py +0 -0
  86. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/checksum.py +0 -0
  87. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/cli.py +0 -0
  88. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/clone.py +0 -0
  89. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/constants.py +0 -0
  90. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/vc_cache.py +0 -0
  91. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/vc_dir.py +0 -0
  92. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/vc_sample.py +0 -0
  93. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/vc/vc_stub.py +0 -0
  94. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq/work_orders.py +0 -0
  95. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq.egg-info/dependency_links.txt +0 -0
  96. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq.egg-info/entry_points.txt +0 -0
  97. {geoseeq-0.6.0 → geoseeq-0.6.2}/geoseeq.egg-info/top_level.txt +0 -0
  98. {geoseeq-0.6.0 → geoseeq-0.6.2}/setup.cfg +0 -0
  99. {geoseeq-0.6.0 → geoseeq-0.6.2}/setup.py +0 -0
  100. {geoseeq-0.6.0 → geoseeq-0.6.2}/tests/test_api_client.py +0 -0
  101. {geoseeq-0.6.0 → geoseeq-0.6.2}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -54,7 +54,7 @@ def version():
54
54
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
55
55
  Run `geoseeq eula show` to view the EULA.
56
56
  """
57
- click.echo('0.6.0') # remember to update setup
57
+ click.echo('0.6.2') # remember to update setup
58
58
 
59
59
 
60
60
  @main.group('advanced')
@@ -0,0 +1,434 @@
1
+
2
+ import os
3
+ import json
4
+ from geoseeq import (
5
+ result_file_from_id,
6
+ result_folder_from_id,
7
+ sample_from_id,
8
+ project_from_id,
9
+ )
10
+ from geoseeq.utils import md5_checksum
11
+ from time import time
12
+
13
+ FILE_STATUS_MODIFIED_REMOTE = 'MODIFIED_REMOTE'
14
+ FILE_STATUS_MODIFIED_LOCAL = 'MODIFIED_LOCAL'
15
+ FILE_STATUS_NEW_LOCAL = 'NEW_LOCAL'
16
+ FILE_STATUS_NEW_REMOTE = 'NEW_REMOTE'
17
+ FILE_STATUS_IS_LOCAL_STUB = 'IS_LOCAL_STUB'
18
+
19
+
20
+ def dedupe_modified_files(modified_files):
21
+ """Remove duplicates from a list of modified files.
22
+
23
+ This function will remove duplicates from a list of modified files
24
+ based on the path to the file. The first instance of the file will be
25
+ kept and all others will be removed.
26
+ """
27
+ seen = set()
28
+ deduped = []
29
+ for x in modified_files:
30
+ if x[2] not in seen:
31
+ deduped.append(x)
32
+ seen.add(x[2])
33
+ return deduped
34
+
35
+
36
+ class ResultFileOnFilesystem:
37
+ """
38
+
39
+ Note: unlike other filesystem classes the `path` is a file, not
40
+ a directory. This is because the file is downloaded directly to
41
+ the path.
42
+ """
43
+
44
+ def __init__(self, result_file, path, kind):
45
+ self.result_file = result_file
46
+ self.path = path
47
+ self.kind = kind
48
+
49
+ @property
50
+ def info_filepath(self):
51
+ dirpath = os.path.dirname(self.path)
52
+ basename = os.path.basename(self.path)
53
+ return os.path.join(dirpath, f'.gs_result_file__{basename}')
54
+
55
+ @property
56
+ def is_stub(self):
57
+ return os.path.exists(self.path) and os.path.getsize(self.path) == 0
58
+
59
+ def file_is_ok(self, stubs_are_ok=False):
60
+ if self.is_stub:
61
+ return stubs_are_ok
62
+ return self.result_file.download_needs_update(self.path)
63
+
64
+ def download(self, use_stubs=False, exists_ok=False):
65
+ if os.path.exists(self.info_filepath):
66
+ if exists_ok and self.file_is_ok(stubs_are_ok=use_stubs):
67
+ return
68
+ elif not exists_ok:
69
+ raise ValueError('Result file already exists at path: {}'.format(self.info_filepath))
70
+
71
+ # Download the file
72
+ if use_stubs:
73
+ open(self.path, 'w').close()
74
+ else:
75
+ self.result_file.download(self.path)
76
+
77
+ self.write_info_file()
78
+
79
+ def local_file_checksum(self):
80
+ if self.is_stub:
81
+ return "__STUB__"
82
+ return md5_checksum(self.path)
83
+
84
+ def locally_modified(self):
85
+ raise NotImplementedError('This function is not implemented')
86
+
87
+ def status_is_ok(self, stubs_are_ok=False):
88
+ # check for an info file
89
+ if not os.path.exists(self.info_filepath):
90
+ return False
91
+ if stubs_are_ok:
92
+ return True
93
+ return not self.result_file.download_needs_update(self.path)
94
+
95
+ def write_info_file(self):
96
+ result_file_info = {
97
+ "uuid": self.result_file.uuid,
98
+ "kind": self.kind,
99
+ "checksum": self.local_file_checksum(),
100
+ }
101
+ with open(self.info_filepath, 'w') as f:
102
+ json.dump(result_file_info, f)
103
+
104
+ @classmethod
105
+ def from_path(cls, path):
106
+ obj = cls(None, path, None)
107
+ try:
108
+ with open(obj.info_filepath, 'r') as f:
109
+ result_file_info = json.load(f)
110
+ obj.result_file = result_file_from_id(result_file_info['uuid'])
111
+ obj.kind = result_file_info['kind']
112
+ obj.stored_checksum = result_file_info['checksum']
113
+ except FileNotFoundError:
114
+ pass
115
+ return obj
116
+
117
+ def write_info_file(self):
118
+ result_file_info = {
119
+ "uuid": self.result_file.uuid,
120
+ "kind": self.kind,
121
+ "checksum": self.local_file_checksum(),
122
+ }
123
+ with open(self.info_filepath, 'w') as f:
124
+ json.dump(result_file_info, f)
125
+
126
+ def list_abnormal_objects(self):
127
+ """Return a list of files that have been modified.
128
+
129
+ Since this class is a single file the list will either be empty
130
+ or have one element.
131
+
132
+ Note that if a file was modified locally then uploaded to the server
133
+ the file will be marked as modified remote.
134
+ """
135
+ if self.result_file is None:
136
+ return [('FILE', FILE_STATUS_NEW_LOCAL, self.path, None)]
137
+ if not os.path.exists(self.path):
138
+ return [('FILE', FILE_STATUS_NEW_REMOTE, self.path, self.result_file)]
139
+ if self.is_stub:
140
+ return [('FILE', FILE_STATUS_IS_LOCAL_STUB, self.path, self.result_file)]
141
+ if self.result_file and self.result_file.download_needs_update(self.path):
142
+ return [('FILE', FILE_STATUS_MODIFIED_REMOTE, self.path, self.result_file)]
143
+ if self.locally_modified():
144
+ return [('FILE', FILE_STATUS_MODIFIED_LOCAL, self.path, self.result_file)]
145
+
146
+ return []
147
+
148
+
149
+ class ResultFolderOnFilesystem:
150
+
151
+ def __init__(self, result_folder, path, kind):
152
+ self.result_folder = result_folder
153
+ self.path = path
154
+ self.kind = kind
155
+
156
+ @property
157
+ def info_filepath(self):
158
+ return os.path.join(self.path, '.gs_result_folder')
159
+
160
+ def download(self, use_stubs=False, exists_ok=False):
161
+ if os.path.exists(self.info_filepath) and not exists_ok:
162
+ raise ValueError('Result folder already exists at path: {}'.format(self.info_filepath))
163
+
164
+ # Download the files in the result folder
165
+ for result_file in self.result_folder.get_fields():
166
+ result_file_local_path = os.path.join(self.path, result_file.name)
167
+ os.makedirs(os.path.dirname(result_file_local_path), exist_ok=True)
168
+ ResultFileOnFilesystem(result_file, result_file_local_path, self.kind)\
169
+ .download(use_stubs=use_stubs, exists_ok=exists_ok)
170
+
171
+ # Write the result folder data
172
+ result_folder_info = {
173
+ "uuid": self.result_folder.uuid,
174
+ "kind": self.kind
175
+ }
176
+ with open(self.info_filepath, 'w') as f:
177
+ json.dump(result_folder_info, f)
178
+
179
+ def status_is_ok(self):
180
+ # check for an info file
181
+ if not os.path.exists(self.info_filepath):
182
+ return False
183
+
184
+ # check that all files are downloaded
185
+ for result_file in self.result_folder.get_files():
186
+ result_file_path = os.path.join(self.path, result_file.name)
187
+ if not os.path.exists(result_file_path):
188
+ return False
189
+
190
+ return True
191
+
192
+ @classmethod
193
+ def from_path(cls, path):
194
+ obj = cls(None, path, None)
195
+ try:
196
+ with open(os.path.join(path, '.gs_result_folder'), 'r') as f:
197
+ result_folder_info = json.load(f)
198
+ obj.result_folder = result_folder_from_id(result_folder_info['uuid'])
199
+ obj.kind = result_folder_info['kind']
200
+ except FileNotFoundError:
201
+ pass
202
+ return obj
203
+
204
+ def list_abnormal_objects(self):
205
+ """Return a list of files that have been modified.
206
+
207
+ This function will return a list of tuples where the first element
208
+ is the status of the file and the second element is the path to the file.
209
+ """
210
+ modified_files = []
211
+ if not self.result_folder:
212
+ modified_files.append(('FOLDER', FILE_STATUS_NEW_LOCAL, self.path, None))
213
+ if not os.path.exists(self.path):
214
+ modified_files.append(('FOLDER', FILE_STATUS_NEW_REMOTE, self.path, self.result_folder))
215
+
216
+ # list local files
217
+ if os.path.exists(self.path):
218
+ for local_file in os.listdir(self.path):
219
+ if local_file.startswith('.gs_'):
220
+ continue
221
+ local_file_path = os.path.join(self.path, local_file)
222
+ result_file_on_fs = ResultFileOnFilesystem.from_path(local_file_path)
223
+ modified_files.extend(result_file_on_fs.list_abnormal_objects())
224
+
225
+ # list remote files
226
+ if self.result_folder:
227
+ for result_file in self.result_folder.get_fields():
228
+ result_file_path = os.path.join(self.path, result_file.name)
229
+ result_file_on_fs = ResultFileOnFilesystem(result_file, result_file_path, self.kind)
230
+ modified_files.extend(result_file_on_fs.list_abnormal_objects())
231
+
232
+ return dedupe_modified_files(modified_files)
233
+
234
+
235
+ class SampleOnFilesystem:
236
+
237
+ def __init__(self, sample, path):
238
+ self.sample = sample
239
+ self.path = path if path[-1] != '/' else path[:-1] # remove trailing slash
240
+
241
+ @property
242
+ def info_filepath(self):
243
+ return os.path.join(self.path, '.gs_sample')
244
+
245
+ def download(self, use_stubs=False, exists_ok=False):
246
+ if os.path.exists(self.info_filepath) and not exists_ok:
247
+ raise ValueError('Sample already exists at path: {}'.format(self.info_filepath))
248
+
249
+ # download result folders
250
+ for result_folder in self.sample.get_result_folders():
251
+ result_folder_local_path = os.path.join(self.path, result_folder.name)
252
+ os.makedirs(result_folder_local_path, exist_ok=True)
253
+ ResultFolderOnFilesystem(result_folder, result_folder_local_path, "sample")\
254
+ .download(use_stubs=use_stubs, exists_ok=exists_ok)
255
+
256
+ # Write the sample data
257
+ sample_info = {
258
+ "uuid": self.sample.uuid
259
+ }
260
+ with open(self.info_filepath, 'w') as f:
261
+ json.dump(sample_info, f)
262
+
263
+ def status_is_ok(self):
264
+ # check for an info file
265
+ if not os.path.exists(self.info_filepath):
266
+ return False
267
+
268
+ # check that all result folders are downloaded
269
+ for result_folder in self.sample.get_result_folders():
270
+ result_folder_local_path = os.path.join(self.path, result_folder.name)
271
+ result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "sample")
272
+ if not result_folder_on_fs.status_is_ok():
273
+ return False
274
+
275
+ return True
276
+
277
+ @classmethod
278
+ def from_path(cls, path):
279
+ obj = cls(None, path)
280
+ try:
281
+ with open(os.path.join(path, '.gs_sample'), 'r') as f:
282
+ sample_info = json.load(f)
283
+ obj.sample = sample_from_id(sample_info['uuid'])
284
+ except FileNotFoundError:
285
+ pass
286
+ return obj
287
+
288
+ def list_abnormal_objects(self):
289
+ """Return a list of files that have been modified.
290
+
291
+ This function will return a list of tuples where the first element
292
+ is the status of the file and the second element is the path to the file.
293
+ """
294
+ modified_files = []
295
+ if not self.sample:
296
+ modified_files.append(('SAMPLE', FILE_STATUS_NEW_LOCAL, self.path, None))
297
+ if not os.path.exists(self.path):
298
+ modified_files.append(('SAMPLE', FILE_STATUS_NEW_REMOTE, self.path, self.sample))
299
+
300
+ # list local folders
301
+ if os.path.exists(self.path):
302
+ for local_folder in os.listdir(self.path):
303
+ local_folder_path = os.path.join(self.path, local_folder)
304
+ if not os.path.isdir(local_folder_path):
305
+ continue
306
+ result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_folder_path)
307
+ modified_files.extend(result_folder_on_fs.list_abnormal_objects())
308
+
309
+ # list remote folders
310
+ if self.sample:
311
+ for result_folder in self.sample.get_result_folders():
312
+ result_folder_path = os.path.join(self.path, result_folder.name)
313
+ result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "sample")
314
+ modified_files.extend(result_folder_on_fs.list_abnormal_objects())
315
+
316
+ return dedupe_modified_files(modified_files)
317
+
318
+
319
+ class ProjectOnFilesystem:
320
+
321
+ def __init__(self, project, path):
322
+ self.project = project
323
+ self.path = path
324
+
325
+ @property
326
+ def info_filepath(self):
327
+ return os.path.join(self.path, '.gs_project')
328
+
329
+ def download(self, use_stubs=False, exists_ok=False):
330
+ if os.path.exists(self.info_filepath) and not exists_ok:
331
+ raise ValueError('Project already exists at path: {}'.format(self.info_filepath))
332
+
333
+ # download samples
334
+ for sample in self.project.get_samples():
335
+ sample_local_path = os.path.join(self.path, "sample_results", sample.name)
336
+ os.makedirs(sample_local_path, exist_ok=True)
337
+ SampleOnFilesystem(sample, sample_local_path)\
338
+ .download(use_stubs=use_stubs, exists_ok=exists_ok)
339
+
340
+ # download project result folders
341
+ for result_folder in self.project.get_result_folders():
342
+ result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
343
+ os.makedirs(result_folder_local_path, exist_ok=True)
344
+ ResultFolderOnFilesystem(result_folder, result_folder_local_path, "project")\
345
+ .download(use_stubs=use_stubs, exists_ok=exists_ok)
346
+
347
+ # Write the project data
348
+ project_info = {
349
+ "uuid": self.project.uuid
350
+ }
351
+ with open(self.info_filepath, 'w') as f:
352
+ json.dump(project_info, f)
353
+
354
+ def status_is_ok(self):
355
+ # check for an info file
356
+ if not os.path.exists(self.info_filepath):
357
+ return False
358
+
359
+ # check that all samples are downloaded
360
+ for sample in self.project.get_samples():
361
+ sample_local_path = os.path.join(self.path, "sample_results", sample.name)
362
+ sample_on_fs = SampleOnFilesystem.from_path(sample_local_path)
363
+ if not sample_on_fs.status_is_ok():
364
+ return False
365
+
366
+ # check that all project result folders are downloaded
367
+ for result_folder in self.project.get_result_folders():
368
+ result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
369
+ result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "project")
370
+ if not result_folder_on_fs.status_is_ok():
371
+ return False
372
+
373
+ return True
374
+
375
+ @classmethod
376
+ def from_path(cls, path, recursive=False):
377
+ try:
378
+ with open(os.path.join(path, '.gs_project'), 'r') as f:
379
+ project_info = json.load(f)
380
+ project = project_from_id(project_info['uuid'])
381
+ return cls(project, path)
382
+ except FileNotFoundError:
383
+ if not recursive:
384
+ raise ValueError('No project found in path or parent directories')
385
+ updir = os.path.dirname(os.path.abspath(path))
386
+ if updir == path:
387
+ raise ValueError('No project found in path or parent directories')
388
+ return cls.from_path(updir, recursive=recursive)
389
+
390
+ def path_from_project_root(self, path):
391
+ if path[0] == "/":
392
+ return path.replace(self.path, "")[1:]
393
+ return path
394
+
395
+ def list_abnormal_objects(self):
396
+ """Return a list of files that have been modified.
397
+
398
+ This function will return a list of tuples where the first element
399
+ is the status of the file and the second element is the path to the file.
400
+ """
401
+ modified_files = []
402
+
403
+ # list remote samples
404
+ for sample in self.project.get_samples():
405
+ sample_path = os.path.join(self.path, "sample_results", sample.name)
406
+ sample_on_fs = SampleOnFilesystem(sample, sample_path)
407
+ modified_files.extend(sample_on_fs.list_abnormal_objects())
408
+
409
+ # list remote project result folders
410
+ for result_folder in self.project.get_result_folders():
411
+ result_folder_path = os.path.join(self.path, "project_results", result_folder.name)
412
+
413
+ result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "project")
414
+ modified_files.extend(result_folder_on_fs.list_abnormal_objects())
415
+
416
+ # list local samples
417
+ for local_sample in os.listdir(os.path.join(self.path, "sample_results")):
418
+ local_sample_path = os.path.join(self.path, "sample_results", local_sample)
419
+ if not os.path.isdir(local_sample_path):
420
+ continue
421
+ sample_on_fs = SampleOnFilesystem.from_path(local_sample_path)
422
+ modified_files.extend(sample_on_fs.list_abnormal_objects())
423
+
424
+ # list local project result folders
425
+ for local_result_folder in os.listdir(os.path.join(self.path, "project_results")):
426
+ local_result_folder_path = os.path.join(self.path, "project_results", local_result_folder)
427
+ if not os.path.isdir(local_result_folder_path):
428
+ continue
429
+ result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_result_folder_path)
430
+ modified_files.extend(result_folder_on_fs.list_abnormal_objects())
431
+ return dedupe_modified_files(modified_files)
432
+
433
+
434
+
@@ -0,0 +1,122 @@
1
+ from fuse import FUSE, Operations
2
+ import os
3
+
4
+
5
+ class GeoSeeqProjectFileSystem(Operations):
6
+ """Mount a GeoSeeq project as a filesystem.
7
+
8
+ The project will automatically have this directory structure:
9
+ - <root>/project_results/<project_result_folder_name>/...
10
+ - <root>/sample_results/<sample_name>/...
11
+ - <root>/metadata/sample_metadata.csv
12
+ - <root>/.config/config.json
13
+ """
14
+
15
+ def __init__(self, root, project):
16
+ self.root = root
17
+ self.project = project
18
+
19
+ def access(self, path, mode):
20
+ pass
21
+
22
+ def chmod(self, path, mode):
23
+ pass
24
+
25
+ def chown(self, path, uid, gid):
26
+ pass
27
+
28
+ def getattr(self, path, fh=None):
29
+ pass
30
+
31
+ def readdir(self, path, fh):
32
+ pass
33
+
34
+ def readlink(self, path):
35
+ pass
36
+
37
+ def mknod(self, path, mode, dev):
38
+ pass
39
+
40
+ def rmdir(self, path):
41
+ pass
42
+
43
+ def mkdir(self, path, mode):
44
+ pass
45
+
46
+ def statfs(self, path):
47
+ pass
48
+
49
+ def unlink(self, path):
50
+ pass
51
+
52
+ def symlink(self, name, target):
53
+ pass
54
+
55
+ def rename(self, old, new):
56
+ pass
57
+
58
+ def link(self, target, name):
59
+ pass
60
+
61
+ def utimens(self, path, times=None):
62
+ pass
63
+
64
+ def open(self, path, flags):
65
+ tkns = path.split('/')
66
+ if tkns[0] == 'project_results':
67
+ result_folder_name, result_file_name = tkns[2], '/'.join(tkns[3:])
68
+ result_folder = self.project.get_result_folder(result_folder_name).get()
69
+ result_file = result_folder.get_file(result_file_name).get()
70
+ result_file.download(path)
71
+ elif tkns[0] == 'sample_results':
72
+ sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
73
+ sample = self.project.get_sample(sample_name).get()
74
+ result_folder = sample.get_result_folder(result_folder_name).get()
75
+ result_file = result_folder.get_file(result_file_name).get()
76
+ result_file.download(path)
77
+ elif tkns[0] == 'metadata':
78
+ raise NotImplementedError('TODO')
79
+
80
+ return os.open(self._full_local_path(path), flags)
81
+
82
+ def create(self, path, mode, fi=None):
83
+ tkns = path.split('/')
84
+ if tkns[0] == 'project_results':
85
+ result_name, file_name = tkns[2], '/'.join(tkns[3:])
86
+ result_folder = self.project.get_result_folder(result_name).idem()
87
+ result_file = result_folder.get_file(file_name).create()
88
+ result_file.download(path) # nothing to download at this point
89
+ elif tkns[0] == 'sample_results':
90
+ sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
91
+ sample = self.project.get_sample(sample_name).idem()
92
+ result_folder = sample.get_result_folder(result_folder_name).idem()
93
+ result_file = result_folder.get_file(result_file_name).create()
94
+ result_file.download(path) # nothing to download at this point
95
+ elif tkns[0] == 'metadata':
96
+ raise NotImplementedError('TODO')
97
+
98
+ def read(self, path, length, offset, fh):
99
+ os.lseek(fh, offset, os.SEEK_SET)
100
+ return os.read(fh, length)
101
+
102
+ def write(self, path, buf, offset, fh):
103
+ pass
104
+
105
+ def truncate(self, path, length, fh=None):
106
+ pass
107
+
108
+ def flush(self, path, fh):
109
+ pass
110
+
111
+ def release(self, path, fh):
112
+ pass
113
+
114
+ def fsync(self, path, fdatasync, fh):
115
+ pass
116
+
117
+ def _full_local_path(self, partial):
118
+ if partial.startswith("/"):
119
+ partial = partial[1:]
120
+ return os.path.join(self.root, partial)
121
+
122
+
@@ -2,15 +2,22 @@
2
2
  import urllib.request
3
3
  import logging
4
4
  import requests
5
- from os.path import basename, getsize, join, isfile, getmtime
5
+ import os
6
+ from os.path import basename, getsize, join, isfile, getmtime, dirname
6
7
  from pathlib import Path
7
8
  from tempfile import NamedTemporaryFile
8
9
 
9
10
  from geoseeq.utils import download_ftp
10
11
  from geoseeq.constants import FIVE_MB
12
+ from hashlib import md5
13
+ from .resumable_download_tracker import ResumableDownloadTracker
11
14
 
12
15
  logger = logging.getLogger("geoseeq_api") # Same name as calling module
13
16
 
17
+ def url_to_id(url):
18
+ url = url.split("?")[0]
19
+ return md5(url.encode()).hexdigest()[:16]
20
+
14
21
 
15
22
  def _download_head(url, filename, head=None, start=0, progress_tracker=None):
16
23
  headers = None
@@ -20,11 +27,43 @@ def _download_head(url, filename, head=None, start=0, progress_tracker=None):
20
27
  response.raise_for_status()
21
28
  total_size_in_bytes = int(response.headers.get('content-length', 0))
22
29
  if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
23
- block_size = FIVE_MB
30
+ if total_size_in_bytes > 10 * FIVE_MB: # Use resumable download
31
+ print("Using resumable download")
32
+ return _download_resumable(response, filename, total_size_in_bytes, progress_tracker)
33
+ else:
34
+ block_size = FIVE_MB
35
+ with open(filename, 'wb') as file:
36
+ for data in response.iter_content(block_size):
37
+ if progress_tracker: progress_tracker.update(len(data))
38
+ file.write(data)
39
+ return filename
40
+
41
+
42
+ def _download_resumable(response, filename, total_size_in_bytes, progress_tracker=None, chunk_size=5 * FIVE_MB, part_prefix=".gs_download_{}_{}."):
43
+ target_id = url_to_id(response.url)
44
+ tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
45
+ if not tracker.download_started: tracker.start_download(response.url)
46
+ n_chunks = total_size_in_bytes // chunk_size
47
+ for i in range(n_chunks):
48
+ bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
49
+ if tracker.part_has_been_downloaded(i):
50
+ logger.debug(f"Part {i} has already been downloaded.")
51
+ else:
52
+ logger.debug(f"Downloading part {i} of {n_chunks - 1}")
53
+ part_filename = join(dirname(filename), part_prefix.format(i, n_chunks - 1) + basename(filename))
54
+ _download_head(response.url, part_filename, head=bytes_end, start=bytes_start, progress_tracker=None)
55
+ part_info = dict(part_number=i, start=bytes_start, end=bytes_end, part_filename=part_filename)
56
+ tracker.add_part(part_info)
57
+ if progress_tracker: progress_tracker.update(bytes_end - bytes_start + 1)
58
+
59
+ # at this point all parts have been downloaded
24
60
  with open(filename, 'wb') as file:
25
- for data in response.iter_content(block_size):
26
- if progress_tracker: progress_tracker.update(len(data))
27
- file.write(data)
61
+ for i in range(n_chunks):
62
+ part_info = tracker.get_part_info(i)
63
+ part_filename = part_info["part_filename"]
64
+ with open(part_filename, 'rb') as part_file:
65
+ file.write(part_file.read())
66
+ tracker.cleanup()
28
67
  return filename
29
68
 
30
69
 
@@ -44,7 +83,7 @@ def guess_download_kind(url):
44
83
  return 'generic'
45
84
 
46
85
 
47
- def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
86
+ def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None, target_uuid=None):
48
87
  """Return a local filepath to the downloaded file. Download the file."""
49
88
  if filename and isfile(filename):
50
89
  file_size = getsize(filename)
@@ -135,7 +174,7 @@ class ResultFileDownload:
135
174
  url = self.get_download_url()
136
175
  filepath = download_url(
137
176
  url, blob_type, filename,
138
- head=head, progress_tracker=progress_tracker
177
+ head=head, progress_tracker=progress_tracker,
139
178
  )
140
179
  if cache and flag_suffix:
141
180
  # create flag file
@@ -0,0 +1,99 @@
1
+
2
+ import time
3
+ import json
4
+ import os
5
+ from os.path import basename, getsize, join, dirname, isfile, getctime
6
+ from pathlib import Path
7
+ from random import random
8
+ import requests
9
+
10
+ from geoseeq.knex import GeoseeqGeneralError
11
+ from geoseeq.constants import FIVE_MB
12
+ from geoseeq.utils import md5_checksum
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from .utils import *
15
+ from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
16
+ from .file_chunker import FileChunker
17
+
18
+
19
+
20
+ class ResumableDownloadTracker:
21
+
22
+ def __init__(self, chunk_size, download_target_id, target_local_path, tracker_file_prefix="gs_resumable_download_tracker"):
23
+ self.open, self.download_started = True, False
24
+ self.download_target_id = download_target_id
25
+ self.target_local_path = target_local_path
26
+ self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'download')
27
+ self.tracker_file = join(
28
+ self.tracker_file_dir,
29
+ tracker_file_prefix + f".{download_target_id}.{chunk_size}." + basename(target_local_path)
30
+ )
31
+ try:
32
+ os.makedirs(self.tracker_file_dir, exist_ok=True)
33
+ except Exception as e:
34
+ logger.warning(f'Could not create resumable download tracker directory. {e}')
35
+ self.open = False
36
+ self._loaded_parts = {}
37
+ self._load_parts_from_file()
38
+
39
+ def start_download(self, download_url):
40
+ if not self.open:
41
+ return
42
+ if self.download_started:
43
+ raise GeoseeqGeneralError("Download has already started.")
44
+ self.download_started = True
45
+ blob = dict(download_url=download_url,
46
+ download_target_id=self.download_target_id,
47
+ start_time=time.time())
48
+ serialized = json.dumps(blob)
49
+ with open(self.tracker_file, "w") as f:
50
+ f.write(serialized + "\n")
51
+ self.download_url = download_url
52
+ return self
53
+
54
+ def add_part(self, part_download_info):
55
+ if not self.open:
56
+ assert False, "Cannot add part to closed ResumableDownloadTracker"
57
+ part_id = part_download_info["part_number"]
58
+ serialized = json.dumps(part_download_info)
59
+ with open(self.tracker_file, "a") as f:
60
+ f.write(serialized + "\n")
61
+ self._loaded_parts[part_id] = part_download_info
62
+
63
+ def _load_parts_from_file(self):
64
+ if not isfile(self.tracker_file):
65
+ return
66
+ with open(self.tracker_file, "r") as f:
67
+ header_blob = json.loads(f.readline())
68
+ self.download_url = header_blob["download_url"]
69
+ start_time = header_blob["start_time"] # for now we don't expire resumable downloads
70
+ self.download_started = True
71
+ for line in f:
72
+ part_info = json.loads(line)
73
+ part_id = part_info["part_number"]
74
+ self._loaded_parts[part_id] = part_info
75
+
76
+ def part_has_been_downloaded(self, part_number):
77
+ if not self.open:
78
+ return False
79
+ if part_number not in self._loaded_parts:
80
+ return False
81
+ part_info = self._loaded_parts[part_number]
82
+ part_path = part_info["part_filename"]
83
+ return isfile(part_path)
84
+
85
+ def get_part_info(self, part_number):
86
+ if not self.open:
87
+ return None
88
+ return self._loaded_parts.get(part_number, None)
89
+
90
+ def cleanup(self):
91
+ if not self.open:
92
+ return
93
+ for part in self._loaded_parts.values():
94
+ part_path = part["part_filename"]
95
+ if isfile(part_path):
96
+ os.remove(part_path)
97
+ os.remove(self.tracker_file)
98
+ self.open = False
99
+
@@ -194,7 +194,7 @@ class GeoSeeqDownloadManager:
194
194
  self._convert_result_files_to_urls()
195
195
  download_args = [(
196
196
  url, file_path,
197
- self.progress_tracker_factory(url),
197
+ self.progress_tracker_factory(file_path),
198
198
  self.ignore_errors, self.head, self.log_level,
199
199
  self.n_parallel_downloads > 1
200
200
  ) for url, file_path in self._result_files]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -57,6 +57,9 @@ geoseeq/contrib/ncbi/api.py
57
57
  geoseeq/contrib/ncbi/bioproject.py
58
58
  geoseeq/contrib/ncbi/cli.py
59
59
  geoseeq/contrib/ncbi/setup_logging.py
60
+ geoseeq/file_system/__init__.py
61
+ geoseeq/file_system/filesystem_download.py
62
+ geoseeq/file_system/main.py
60
63
  geoseeq/id_constructors/__init__.py
61
64
  geoseeq/id_constructors/from_blobs.py
62
65
  geoseeq/id_constructors/from_ids.py
@@ -79,6 +82,7 @@ geoseeq/result/file_download.py
79
82
  geoseeq/result/file_upload.py
80
83
  geoseeq/result/result_file.py
81
84
  geoseeq/result/result_folder.py
85
+ geoseeq/result/resumable_download_tracker.py
82
86
  geoseeq/result/resumable_upload_tracker.py
83
87
  geoseeq/result/utils.py
84
88
  geoseeq/vc/__init__.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "geoseeq"
7
- version = "0.6.0"
7
+ version = "0.6.2"
8
8
  authors = [
9
9
  { name="David C. Danko", email="dcdanko@biotia.io" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes