geoseeq 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoseeq/cli/main.py +1 -1
- geoseeq/file_system/__init__.py +0 -0
- geoseeq/file_system/filesystem_download.py +434 -0
- geoseeq/file_system/main.py +122 -0
- geoseeq/result/file_download.py +46 -7
- geoseeq/result/resumable_download_tracker.py +99 -0
- geoseeq/upload_download_manager.py +1 -1
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.2.dist-info}/METADATA +1 -1
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.2.dist-info}/RECORD +13 -9
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.2.dist-info}/LICENSE +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.2.dist-info}/WHEEL +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.2.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.6.0.dist-info → geoseeq-0.6.2.dist-info}/top_level.txt +0 -0
geoseeq/cli/main.py
CHANGED
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.2') # remember to update setup
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
File without changes
|
@@ -0,0 +1,434 @@
|
|
1
|
+
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
from geoseeq import (
|
5
|
+
result_file_from_id,
|
6
|
+
result_folder_from_id,
|
7
|
+
sample_from_id,
|
8
|
+
project_from_id,
|
9
|
+
)
|
10
|
+
from geoseeq.utils import md5_checksum
|
11
|
+
from time import time
|
12
|
+
|
13
|
+
FILE_STATUS_MODIFIED_REMOTE = 'MODIFIED_REMOTE'
|
14
|
+
FILE_STATUS_MODIFIED_LOCAL = 'MODIFIED_LOCAL'
|
15
|
+
FILE_STATUS_NEW_LOCAL = 'NEW_LOCAL'
|
16
|
+
FILE_STATUS_NEW_REMOTE = 'NEW_REMOTE'
|
17
|
+
FILE_STATUS_IS_LOCAL_STUB = 'IS_LOCAL_STUB'
|
18
|
+
|
19
|
+
|
20
|
+
def dedupe_modified_files(modified_files):
|
21
|
+
"""Remove duplicates from a list of modified files.
|
22
|
+
|
23
|
+
This function will remove duplicates from a list of modified files
|
24
|
+
based on the path to the file. The first instance of the file will be
|
25
|
+
kept and all others will be removed.
|
26
|
+
"""
|
27
|
+
seen = set()
|
28
|
+
deduped = []
|
29
|
+
for x in modified_files:
|
30
|
+
if x[2] not in seen:
|
31
|
+
deduped.append(x)
|
32
|
+
seen.add(x[2])
|
33
|
+
return deduped
|
34
|
+
|
35
|
+
|
36
|
+
class ResultFileOnFilesystem:
|
37
|
+
"""
|
38
|
+
|
39
|
+
Note: unlike other filesystem classes the `path` is a file, not
|
40
|
+
a directory. This is because the file is downloaded directly to
|
41
|
+
the path.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self, result_file, path, kind):
|
45
|
+
self.result_file = result_file
|
46
|
+
self.path = path
|
47
|
+
self.kind = kind
|
48
|
+
|
49
|
+
@property
|
50
|
+
def info_filepath(self):
|
51
|
+
dirpath = os.path.dirname(self.path)
|
52
|
+
basename = os.path.basename(self.path)
|
53
|
+
return os.path.join(dirpath, f'.gs_result_file__{basename}')
|
54
|
+
|
55
|
+
@property
|
56
|
+
def is_stub(self):
|
57
|
+
return os.path.exists(self.path) and os.path.getsize(self.path) == 0
|
58
|
+
|
59
|
+
def file_is_ok(self, stubs_are_ok=False):
|
60
|
+
if self.is_stub:
|
61
|
+
return stubs_are_ok
|
62
|
+
return self.result_file.download_needs_update(self.path)
|
63
|
+
|
64
|
+
def download(self, use_stubs=False, exists_ok=False):
|
65
|
+
if os.path.exists(self.info_filepath):
|
66
|
+
if exists_ok and self.file_is_ok(stubs_are_ok=use_stubs):
|
67
|
+
return
|
68
|
+
elif not exists_ok:
|
69
|
+
raise ValueError('Result file already exists at path: {}'.format(self.info_filepath))
|
70
|
+
|
71
|
+
# Download the file
|
72
|
+
if use_stubs:
|
73
|
+
open(self.path, 'w').close()
|
74
|
+
else:
|
75
|
+
self.result_file.download(self.path)
|
76
|
+
|
77
|
+
self.write_info_file()
|
78
|
+
|
79
|
+
def local_file_checksum(self):
|
80
|
+
if self.is_stub:
|
81
|
+
return "__STUB__"
|
82
|
+
return md5_checksum(self.path)
|
83
|
+
|
84
|
+
def locally_modified(self):
|
85
|
+
raise NotImplementedError('This function is not implemented')
|
86
|
+
|
87
|
+
def status_is_ok(self, stubs_are_ok=False):
|
88
|
+
# check for an info file
|
89
|
+
if not os.path.exists(self.info_filepath):
|
90
|
+
return False
|
91
|
+
if stubs_are_ok:
|
92
|
+
return True
|
93
|
+
return not self.result_file.download_needs_update(self.path)
|
94
|
+
|
95
|
+
def write_info_file(self):
|
96
|
+
result_file_info = {
|
97
|
+
"uuid": self.result_file.uuid,
|
98
|
+
"kind": self.kind,
|
99
|
+
"checksum": self.local_file_checksum(),
|
100
|
+
}
|
101
|
+
with open(self.info_filepath, 'w') as f:
|
102
|
+
json.dump(result_file_info, f)
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def from_path(cls, path):
|
106
|
+
obj = cls(None, path, None)
|
107
|
+
try:
|
108
|
+
with open(obj.info_filepath, 'r') as f:
|
109
|
+
result_file_info = json.load(f)
|
110
|
+
obj.result_file = result_file_from_id(result_file_info['uuid'])
|
111
|
+
obj.kind = result_file_info['kind']
|
112
|
+
obj.stored_checksum = result_file_info['checksum']
|
113
|
+
except FileNotFoundError:
|
114
|
+
pass
|
115
|
+
return obj
|
116
|
+
|
117
|
+
def write_info_file(self):
|
118
|
+
result_file_info = {
|
119
|
+
"uuid": self.result_file.uuid,
|
120
|
+
"kind": self.kind,
|
121
|
+
"checksum": self.local_file_checksum(),
|
122
|
+
}
|
123
|
+
with open(self.info_filepath, 'w') as f:
|
124
|
+
json.dump(result_file_info, f)
|
125
|
+
|
126
|
+
def list_abnormal_objects(self):
|
127
|
+
"""Return a list of files that have been modified.
|
128
|
+
|
129
|
+
Since this class is a single file the list will either be empty
|
130
|
+
or have one element.
|
131
|
+
|
132
|
+
Note that if a file was modified locally then uploaded to the server
|
133
|
+
the file will be marked as modified remote.
|
134
|
+
"""
|
135
|
+
if self.result_file is None:
|
136
|
+
return [('FILE', FILE_STATUS_NEW_LOCAL, self.path, None)]
|
137
|
+
if not os.path.exists(self.path):
|
138
|
+
return [('FILE', FILE_STATUS_NEW_REMOTE, self.path, self.result_file)]
|
139
|
+
if self.is_stub:
|
140
|
+
return [('FILE', FILE_STATUS_IS_LOCAL_STUB, self.path, self.result_file)]
|
141
|
+
if self.result_file and self.result_file.download_needs_update(self.path):
|
142
|
+
return [('FILE', FILE_STATUS_MODIFIED_REMOTE, self.path, self.result_file)]
|
143
|
+
if self.locally_modified():
|
144
|
+
return [('FILE', FILE_STATUS_MODIFIED_LOCAL, self.path, self.result_file)]
|
145
|
+
|
146
|
+
return []
|
147
|
+
|
148
|
+
|
149
|
+
class ResultFolderOnFilesystem:
|
150
|
+
|
151
|
+
def __init__(self, result_folder, path, kind):
|
152
|
+
self.result_folder = result_folder
|
153
|
+
self.path = path
|
154
|
+
self.kind = kind
|
155
|
+
|
156
|
+
@property
|
157
|
+
def info_filepath(self):
|
158
|
+
return os.path.join(self.path, '.gs_result_folder')
|
159
|
+
|
160
|
+
def download(self, use_stubs=False, exists_ok=False):
|
161
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
162
|
+
raise ValueError('Result folder already exists at path: {}'.format(self.info_filepath))
|
163
|
+
|
164
|
+
# Download the files in the result folder
|
165
|
+
for result_file in self.result_folder.get_fields():
|
166
|
+
result_file_local_path = os.path.join(self.path, result_file.name)
|
167
|
+
os.makedirs(os.path.dirname(result_file_local_path), exist_ok=True)
|
168
|
+
ResultFileOnFilesystem(result_file, result_file_local_path, self.kind)\
|
169
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
170
|
+
|
171
|
+
# Write the result folder data
|
172
|
+
result_folder_info = {
|
173
|
+
"uuid": self.result_folder.uuid,
|
174
|
+
"kind": self.kind
|
175
|
+
}
|
176
|
+
with open(self.info_filepath, 'w') as f:
|
177
|
+
json.dump(result_folder_info, f)
|
178
|
+
|
179
|
+
def status_is_ok(self):
|
180
|
+
# check for an info file
|
181
|
+
if not os.path.exists(self.info_filepath):
|
182
|
+
return False
|
183
|
+
|
184
|
+
# check that all files are downloaded
|
185
|
+
for result_file in self.result_folder.get_files():
|
186
|
+
result_file_path = os.path.join(self.path, result_file.name)
|
187
|
+
if not os.path.exists(result_file_path):
|
188
|
+
return False
|
189
|
+
|
190
|
+
return True
|
191
|
+
|
192
|
+
@classmethod
|
193
|
+
def from_path(cls, path):
|
194
|
+
obj = cls(None, path, None)
|
195
|
+
try:
|
196
|
+
with open(os.path.join(path, '.gs_result_folder'), 'r') as f:
|
197
|
+
result_folder_info = json.load(f)
|
198
|
+
obj.result_folder = result_folder_from_id(result_folder_info['uuid'])
|
199
|
+
obj.kind = result_folder_info['kind']
|
200
|
+
except FileNotFoundError:
|
201
|
+
pass
|
202
|
+
return obj
|
203
|
+
|
204
|
+
def list_abnormal_objects(self):
|
205
|
+
"""Return a list of files that have been modified.
|
206
|
+
|
207
|
+
This function will return a list of tuples where the first element
|
208
|
+
is the status of the file and the second element is the path to the file.
|
209
|
+
"""
|
210
|
+
modified_files = []
|
211
|
+
if not self.result_folder:
|
212
|
+
modified_files.append(('FOLDER', FILE_STATUS_NEW_LOCAL, self.path, None))
|
213
|
+
if not os.path.exists(self.path):
|
214
|
+
modified_files.append(('FOLDER', FILE_STATUS_NEW_REMOTE, self.path, self.result_folder))
|
215
|
+
|
216
|
+
# list local files
|
217
|
+
if os.path.exists(self.path):
|
218
|
+
for local_file in os.listdir(self.path):
|
219
|
+
if local_file.startswith('.gs_'):
|
220
|
+
continue
|
221
|
+
local_file_path = os.path.join(self.path, local_file)
|
222
|
+
result_file_on_fs = ResultFileOnFilesystem.from_path(local_file_path)
|
223
|
+
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
224
|
+
|
225
|
+
# list remote files
|
226
|
+
if self.result_folder:
|
227
|
+
for result_file in self.result_folder.get_fields():
|
228
|
+
result_file_path = os.path.join(self.path, result_file.name)
|
229
|
+
result_file_on_fs = ResultFileOnFilesystem(result_file, result_file_path, self.kind)
|
230
|
+
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
231
|
+
|
232
|
+
return dedupe_modified_files(modified_files)
|
233
|
+
|
234
|
+
|
235
|
+
class SampleOnFilesystem:
|
236
|
+
|
237
|
+
def __init__(self, sample, path):
|
238
|
+
self.sample = sample
|
239
|
+
self.path = path if path[-1] != '/' else path[:-1] # remove trailing slash
|
240
|
+
|
241
|
+
@property
|
242
|
+
def info_filepath(self):
|
243
|
+
return os.path.join(self.path, '.gs_sample')
|
244
|
+
|
245
|
+
def download(self, use_stubs=False, exists_ok=False):
|
246
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
247
|
+
raise ValueError('Sample already exists at path: {}'.format(self.info_filepath))
|
248
|
+
|
249
|
+
# download result folders
|
250
|
+
for result_folder in self.sample.get_result_folders():
|
251
|
+
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
252
|
+
os.makedirs(result_folder_local_path, exist_ok=True)
|
253
|
+
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "sample")\
|
254
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
255
|
+
|
256
|
+
# Write the sample data
|
257
|
+
sample_info = {
|
258
|
+
"uuid": self.sample.uuid
|
259
|
+
}
|
260
|
+
with open(self.info_filepath, 'w') as f:
|
261
|
+
json.dump(sample_info, f)
|
262
|
+
|
263
|
+
def status_is_ok(self):
|
264
|
+
# check for an info file
|
265
|
+
if not os.path.exists(self.info_filepath):
|
266
|
+
return False
|
267
|
+
|
268
|
+
# check that all result folders are downloaded
|
269
|
+
for result_folder in self.sample.get_result_folders():
|
270
|
+
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
271
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "sample")
|
272
|
+
if not result_folder_on_fs.status_is_ok():
|
273
|
+
return False
|
274
|
+
|
275
|
+
return True
|
276
|
+
|
277
|
+
@classmethod
|
278
|
+
def from_path(cls, path):
|
279
|
+
obj = cls(None, path)
|
280
|
+
try:
|
281
|
+
with open(os.path.join(path, '.gs_sample'), 'r') as f:
|
282
|
+
sample_info = json.load(f)
|
283
|
+
obj.sample = sample_from_id(sample_info['uuid'])
|
284
|
+
except FileNotFoundError:
|
285
|
+
pass
|
286
|
+
return obj
|
287
|
+
|
288
|
+
def list_abnormal_objects(self):
|
289
|
+
"""Return a list of files that have been modified.
|
290
|
+
|
291
|
+
This function will return a list of tuples where the first element
|
292
|
+
is the status of the file and the second element is the path to the file.
|
293
|
+
"""
|
294
|
+
modified_files = []
|
295
|
+
if not self.sample:
|
296
|
+
modified_files.append(('SAMPLE', FILE_STATUS_NEW_LOCAL, self.path, None))
|
297
|
+
if not os.path.exists(self.path):
|
298
|
+
modified_files.append(('SAMPLE', FILE_STATUS_NEW_REMOTE, self.path, self.sample))
|
299
|
+
|
300
|
+
# list local folders
|
301
|
+
if os.path.exists(self.path):
|
302
|
+
for local_folder in os.listdir(self.path):
|
303
|
+
local_folder_path = os.path.join(self.path, local_folder)
|
304
|
+
if not os.path.isdir(local_folder_path):
|
305
|
+
continue
|
306
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_folder_path)
|
307
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
308
|
+
|
309
|
+
# list remote folders
|
310
|
+
if self.sample:
|
311
|
+
for result_folder in self.sample.get_result_folders():
|
312
|
+
result_folder_path = os.path.join(self.path, result_folder.name)
|
313
|
+
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "sample")
|
314
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
315
|
+
|
316
|
+
return dedupe_modified_files(modified_files)
|
317
|
+
|
318
|
+
|
319
|
+
class ProjectOnFilesystem:
|
320
|
+
|
321
|
+
def __init__(self, project, path):
|
322
|
+
self.project = project
|
323
|
+
self.path = path
|
324
|
+
|
325
|
+
@property
|
326
|
+
def info_filepath(self):
|
327
|
+
return os.path.join(self.path, '.gs_project')
|
328
|
+
|
329
|
+
def download(self, use_stubs=False, exists_ok=False):
|
330
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
331
|
+
raise ValueError('Project already exists at path: {}'.format(self.info_filepath))
|
332
|
+
|
333
|
+
# download samples
|
334
|
+
for sample in self.project.get_samples():
|
335
|
+
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
336
|
+
os.makedirs(sample_local_path, exist_ok=True)
|
337
|
+
SampleOnFilesystem(sample, sample_local_path)\
|
338
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
339
|
+
|
340
|
+
# download project result folders
|
341
|
+
for result_folder in self.project.get_result_folders():
|
342
|
+
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
343
|
+
os.makedirs(result_folder_local_path, exist_ok=True)
|
344
|
+
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "project")\
|
345
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
346
|
+
|
347
|
+
# Write the project data
|
348
|
+
project_info = {
|
349
|
+
"uuid": self.project.uuid
|
350
|
+
}
|
351
|
+
with open(self.info_filepath, 'w') as f:
|
352
|
+
json.dump(project_info, f)
|
353
|
+
|
354
|
+
def status_is_ok(self):
|
355
|
+
# check for an info file
|
356
|
+
if not os.path.exists(self.info_filepath):
|
357
|
+
return False
|
358
|
+
|
359
|
+
# check that all samples are downloaded
|
360
|
+
for sample in self.project.get_samples():
|
361
|
+
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
362
|
+
sample_on_fs = SampleOnFilesystem.from_path(sample_local_path)
|
363
|
+
if not sample_on_fs.status_is_ok():
|
364
|
+
return False
|
365
|
+
|
366
|
+
# check that all project result folders are downloaded
|
367
|
+
for result_folder in self.project.get_result_folders():
|
368
|
+
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
369
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "project")
|
370
|
+
if not result_folder_on_fs.status_is_ok():
|
371
|
+
return False
|
372
|
+
|
373
|
+
return True
|
374
|
+
|
375
|
+
@classmethod
|
376
|
+
def from_path(cls, path, recursive=False):
|
377
|
+
try:
|
378
|
+
with open(os.path.join(path, '.gs_project'), 'r') as f:
|
379
|
+
project_info = json.load(f)
|
380
|
+
project = project_from_id(project_info['uuid'])
|
381
|
+
return cls(project, path)
|
382
|
+
except FileNotFoundError:
|
383
|
+
if not recursive:
|
384
|
+
raise ValueError('No project found in path or parent directories')
|
385
|
+
updir = os.path.dirname(os.path.abspath(path))
|
386
|
+
if updir == path:
|
387
|
+
raise ValueError('No project found in path or parent directories')
|
388
|
+
return cls.from_path(updir, recursive=recursive)
|
389
|
+
|
390
|
+
def path_from_project_root(self, path):
|
391
|
+
if path[0] == "/":
|
392
|
+
return path.replace(self.path, "")[1:]
|
393
|
+
return path
|
394
|
+
|
395
|
+
def list_abnormal_objects(self):
|
396
|
+
"""Return a list of files that have been modified.
|
397
|
+
|
398
|
+
This function will return a list of tuples where the first element
|
399
|
+
is the status of the file and the second element is the path to the file.
|
400
|
+
"""
|
401
|
+
modified_files = []
|
402
|
+
|
403
|
+
# list remote samples
|
404
|
+
for sample in self.project.get_samples():
|
405
|
+
sample_path = os.path.join(self.path, "sample_results", sample.name)
|
406
|
+
sample_on_fs = SampleOnFilesystem(sample, sample_path)
|
407
|
+
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
408
|
+
|
409
|
+
# list remote project result folders
|
410
|
+
for result_folder in self.project.get_result_folders():
|
411
|
+
result_folder_path = os.path.join(self.path, "project_results", result_folder.name)
|
412
|
+
|
413
|
+
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "project")
|
414
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
415
|
+
|
416
|
+
# list local samples
|
417
|
+
for local_sample in os.listdir(os.path.join(self.path, "sample_results")):
|
418
|
+
local_sample_path = os.path.join(self.path, "sample_results", local_sample)
|
419
|
+
if not os.path.isdir(local_sample_path):
|
420
|
+
continue
|
421
|
+
sample_on_fs = SampleOnFilesystem.from_path(local_sample_path)
|
422
|
+
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
423
|
+
|
424
|
+
# list local project result folders
|
425
|
+
for local_result_folder in os.listdir(os.path.join(self.path, "project_results")):
|
426
|
+
local_result_folder_path = os.path.join(self.path, "project_results", local_result_folder)
|
427
|
+
if not os.path.isdir(local_result_folder_path):
|
428
|
+
continue
|
429
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_result_folder_path)
|
430
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
431
|
+
return dedupe_modified_files(modified_files)
|
432
|
+
|
433
|
+
|
434
|
+
|
@@ -0,0 +1,122 @@
|
|
1
|
+
from fuse import FUSE, Operations
|
2
|
+
import os
|
3
|
+
|
4
|
+
|
5
|
+
class GeoSeeqProjectFileSystem(Operations):
|
6
|
+
"""Mount a GeoSeeq project as a filesystem.
|
7
|
+
|
8
|
+
The project will automatically have this directory structure:
|
9
|
+
- <root>/project_results/<project_result_folder_name>/...
|
10
|
+
- <root>/sample_results/<sample_name>/...
|
11
|
+
- <root>/metadata/sample_metadata.csv
|
12
|
+
- <root>/.config/config.json
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, root, project):
|
16
|
+
self.root = root
|
17
|
+
self.project = project
|
18
|
+
|
19
|
+
def access(self, path, mode):
|
20
|
+
pass
|
21
|
+
|
22
|
+
def chmod(self, path, mode):
|
23
|
+
pass
|
24
|
+
|
25
|
+
def chown(self, path, uid, gid):
|
26
|
+
pass
|
27
|
+
|
28
|
+
def getattr(self, path, fh=None):
|
29
|
+
pass
|
30
|
+
|
31
|
+
def readdir(self, path, fh):
|
32
|
+
pass
|
33
|
+
|
34
|
+
def readlink(self, path):
|
35
|
+
pass
|
36
|
+
|
37
|
+
def mknod(self, path, mode, dev):
|
38
|
+
pass
|
39
|
+
|
40
|
+
def rmdir(self, path):
|
41
|
+
pass
|
42
|
+
|
43
|
+
def mkdir(self, path, mode):
|
44
|
+
pass
|
45
|
+
|
46
|
+
def statfs(self, path):
|
47
|
+
pass
|
48
|
+
|
49
|
+
def unlink(self, path):
|
50
|
+
pass
|
51
|
+
|
52
|
+
def symlink(self, name, target):
|
53
|
+
pass
|
54
|
+
|
55
|
+
def rename(self, old, new):
|
56
|
+
pass
|
57
|
+
|
58
|
+
def link(self, target, name):
|
59
|
+
pass
|
60
|
+
|
61
|
+
def utimens(self, path, times=None):
|
62
|
+
pass
|
63
|
+
|
64
|
+
def open(self, path, flags):
|
65
|
+
tkns = path.split('/')
|
66
|
+
if tkns[0] == 'project_results':
|
67
|
+
result_folder_name, result_file_name = tkns[2], '/'.join(tkns[3:])
|
68
|
+
result_folder = self.project.get_result_folder(result_folder_name).get()
|
69
|
+
result_file = result_folder.get_file(result_file_name).get()
|
70
|
+
result_file.download(path)
|
71
|
+
elif tkns[0] == 'sample_results':
|
72
|
+
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
73
|
+
sample = self.project.get_sample(sample_name).get()
|
74
|
+
result_folder = sample.get_result_folder(result_folder_name).get()
|
75
|
+
result_file = result_folder.get_file(result_file_name).get()
|
76
|
+
result_file.download(path)
|
77
|
+
elif tkns[0] == 'metadata':
|
78
|
+
raise NotImplementedError('TODO')
|
79
|
+
|
80
|
+
return os.open(self._full_local_path(path), flags)
|
81
|
+
|
82
|
+
def create(self, path, mode, fi=None):
|
83
|
+
tkns = path.split('/')
|
84
|
+
if tkns[0] == 'project_results':
|
85
|
+
result_name, file_name = tkns[2], '/'.join(tkns[3:])
|
86
|
+
result_folder = self.project.get_result_folder(result_name).idem()
|
87
|
+
result_file = result_folder.get_file(file_name).create()
|
88
|
+
result_file.download(path) # nothing to download at this point
|
89
|
+
elif tkns[0] == 'sample_results':
|
90
|
+
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
91
|
+
sample = self.project.get_sample(sample_name).idem()
|
92
|
+
result_folder = sample.get_result_folder(result_folder_name).idem()
|
93
|
+
result_file = result_folder.get_file(result_file_name).create()
|
94
|
+
result_file.download(path) # nothing to download at this point
|
95
|
+
elif tkns[0] == 'metadata':
|
96
|
+
raise NotImplementedError('TODO')
|
97
|
+
|
98
|
+
def read(self, path, length, offset, fh):
|
99
|
+
os.lseek(fh, offset, os.SEEK_SET)
|
100
|
+
return os.read(fh, length)
|
101
|
+
|
102
|
+
def write(self, path, buf, offset, fh):
|
103
|
+
pass
|
104
|
+
|
105
|
+
def truncate(self, path, length, fh=None):
|
106
|
+
pass
|
107
|
+
|
108
|
+
def flush(self, path, fh):
|
109
|
+
pass
|
110
|
+
|
111
|
+
def release(self, path, fh):
|
112
|
+
pass
|
113
|
+
|
114
|
+
def fsync(self, path, fdatasync, fh):
|
115
|
+
pass
|
116
|
+
|
117
|
+
def _full_local_path(self, partial):
|
118
|
+
if partial.startswith("/"):
|
119
|
+
partial = partial[1:]
|
120
|
+
return os.path.join(self.root, partial)
|
121
|
+
|
122
|
+
|
geoseeq/result/file_download.py
CHANGED
@@ -2,15 +2,22 @@
|
|
2
2
|
import urllib.request
|
3
3
|
import logging
|
4
4
|
import requests
|
5
|
-
|
5
|
+
import os
|
6
|
+
from os.path import basename, getsize, join, isfile, getmtime, dirname
|
6
7
|
from pathlib import Path
|
7
8
|
from tempfile import NamedTemporaryFile
|
8
9
|
|
9
10
|
from geoseeq.utils import download_ftp
|
10
11
|
from geoseeq.constants import FIVE_MB
|
12
|
+
from hashlib import md5
|
13
|
+
from .resumable_download_tracker import ResumableDownloadTracker
|
11
14
|
|
12
15
|
logger = logging.getLogger("geoseeq_api") # Same name as calling module
|
13
16
|
|
17
|
+
def url_to_id(url):
|
18
|
+
url = url.split("?")[0]
|
19
|
+
return md5(url.encode()).hexdigest()[:16]
|
20
|
+
|
14
21
|
|
15
22
|
def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
16
23
|
headers = None
|
@@ -20,11 +27,43 @@ def _download_head(url, filename, head=None, start=0, progress_tracker=None):
|
|
20
27
|
response.raise_for_status()
|
21
28
|
total_size_in_bytes = int(response.headers.get('content-length', 0))
|
22
29
|
if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
|
23
|
-
|
30
|
+
if total_size_in_bytes > 10 * FIVE_MB: # Use resumable download
|
31
|
+
print("Using resumable download")
|
32
|
+
return _download_resumable(response, filename, total_size_in_bytes, progress_tracker)
|
33
|
+
else:
|
34
|
+
block_size = FIVE_MB
|
35
|
+
with open(filename, 'wb') as file:
|
36
|
+
for data in response.iter_content(block_size):
|
37
|
+
if progress_tracker: progress_tracker.update(len(data))
|
38
|
+
file.write(data)
|
39
|
+
return filename
|
40
|
+
|
41
|
+
|
42
|
+
def _download_resumable(response, filename, total_size_in_bytes, progress_tracker=None, chunk_size=5 * FIVE_MB, part_prefix=".gs_download_{}_{}."):
|
43
|
+
target_id = url_to_id(response.url)
|
44
|
+
tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
|
45
|
+
if not tracker.download_started: tracker.start_download(response.url)
|
46
|
+
n_chunks = total_size_in_bytes // chunk_size
|
47
|
+
for i in range(n_chunks):
|
48
|
+
bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
|
49
|
+
if tracker.part_has_been_downloaded(i):
|
50
|
+
logger.debug(f"Part {i} has already been downloaded.")
|
51
|
+
else:
|
52
|
+
logger.debug(f"Downloading part {i} of {n_chunks - 1}")
|
53
|
+
part_filename = join(dirname(filename), part_prefix.format(i, n_chunks - 1) + basename(filename))
|
54
|
+
_download_head(response.url, part_filename, head=bytes_end, start=bytes_start, progress_tracker=None)
|
55
|
+
part_info = dict(part_number=i, start=bytes_start, end=bytes_end, part_filename=part_filename)
|
56
|
+
tracker.add_part(part_info)
|
57
|
+
if progress_tracker: progress_tracker.update(bytes_end - bytes_start + 1)
|
58
|
+
|
59
|
+
# at this point all parts have been downloaded
|
24
60
|
with open(filename, 'wb') as file:
|
25
|
-
for
|
26
|
-
|
27
|
-
|
61
|
+
for i in range(n_chunks):
|
62
|
+
part_info = tracker.get_part_info(i)
|
63
|
+
part_filename = part_info["part_filename"]
|
64
|
+
with open(part_filename, 'rb') as part_file:
|
65
|
+
file.write(part_file.read())
|
66
|
+
tracker.cleanup()
|
28
67
|
return filename
|
29
68
|
|
30
69
|
|
@@ -44,7 +83,7 @@ def guess_download_kind(url):
|
|
44
83
|
return 'generic'
|
45
84
|
|
46
85
|
|
47
|
-
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
|
86
|
+
def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None, target_uuid=None):
|
48
87
|
"""Return a local filepath to the downloaded file. Download the file."""
|
49
88
|
if filename and isfile(filename):
|
50
89
|
file_size = getsize(filename)
|
@@ -135,7 +174,7 @@ class ResultFileDownload:
|
|
135
174
|
url = self.get_download_url()
|
136
175
|
filepath = download_url(
|
137
176
|
url, blob_type, filename,
|
138
|
-
head=head, progress_tracker=progress_tracker
|
177
|
+
head=head, progress_tracker=progress_tracker,
|
139
178
|
)
|
140
179
|
if cache and flag_suffix:
|
141
180
|
# create flag file
|
@@ -0,0 +1,99 @@
|
|
1
|
+
|
2
|
+
import time
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from os.path import basename, getsize, join, dirname, isfile, getctime
|
6
|
+
from pathlib import Path
|
7
|
+
from random import random
|
8
|
+
import requests
|
9
|
+
|
10
|
+
from geoseeq.knex import GeoseeqGeneralError
|
11
|
+
from geoseeq.constants import FIVE_MB
|
12
|
+
from geoseeq.utils import md5_checksum
|
13
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14
|
+
from .utils import *
|
15
|
+
from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
|
16
|
+
from .file_chunker import FileChunker
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
class ResumableDownloadTracker:
|
21
|
+
|
22
|
+
def __init__(self, chunk_size, download_target_id, target_local_path, tracker_file_prefix="gs_resumable_download_tracker"):
|
23
|
+
self.open, self.download_started = True, False
|
24
|
+
self.download_target_id = download_target_id
|
25
|
+
self.target_local_path = target_local_path
|
26
|
+
self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'download')
|
27
|
+
self.tracker_file = join(
|
28
|
+
self.tracker_file_dir,
|
29
|
+
tracker_file_prefix + f".{download_target_id}.{chunk_size}." + basename(target_local_path)
|
30
|
+
)
|
31
|
+
try:
|
32
|
+
os.makedirs(self.tracker_file_dir, exist_ok=True)
|
33
|
+
except Exception as e:
|
34
|
+
logger.warning(f'Could not create resumable download tracker directory. {e}')
|
35
|
+
self.open = False
|
36
|
+
self._loaded_parts = {}
|
37
|
+
self._load_parts_from_file()
|
38
|
+
|
39
|
+
def start_download(self, download_url):
|
40
|
+
if not self.open:
|
41
|
+
return
|
42
|
+
if self.download_started:
|
43
|
+
raise GeoseeqGeneralError("Download has already started.")
|
44
|
+
self.download_started = True
|
45
|
+
blob = dict(download_url=download_url,
|
46
|
+
download_target_id=self.download_target_id,
|
47
|
+
start_time=time.time())
|
48
|
+
serialized = json.dumps(blob)
|
49
|
+
with open(self.tracker_file, "w") as f:
|
50
|
+
f.write(serialized + "\n")
|
51
|
+
self.download_url = download_url
|
52
|
+
return self
|
53
|
+
|
54
|
+
def add_part(self, part_download_info):
|
55
|
+
if not self.open:
|
56
|
+
assert False, "Cannot add part to closed ResumableDownloadTracker"
|
57
|
+
part_id = part_download_info["part_number"]
|
58
|
+
serialized = json.dumps(part_download_info)
|
59
|
+
with open(self.tracker_file, "a") as f:
|
60
|
+
f.write(serialized + "\n")
|
61
|
+
self._loaded_parts[part_id] = part_download_info
|
62
|
+
|
63
|
+
def _load_parts_from_file(self):
|
64
|
+
if not isfile(self.tracker_file):
|
65
|
+
return
|
66
|
+
with open(self.tracker_file, "r") as f:
|
67
|
+
header_blob = json.loads(f.readline())
|
68
|
+
self.download_url = header_blob["download_url"]
|
69
|
+
start_time = header_blob["start_time"] # for now we don't expire resumable downloads
|
70
|
+
self.download_started = True
|
71
|
+
for line in f:
|
72
|
+
part_info = json.loads(line)
|
73
|
+
part_id = part_info["part_number"]
|
74
|
+
self._loaded_parts[part_id] = part_info
|
75
|
+
|
76
|
+
def part_has_been_downloaded(self, part_number):
|
77
|
+
if not self.open:
|
78
|
+
return False
|
79
|
+
if part_number not in self._loaded_parts:
|
80
|
+
return False
|
81
|
+
part_info = self._loaded_parts[part_number]
|
82
|
+
part_path = part_info["part_filename"]
|
83
|
+
return isfile(part_path)
|
84
|
+
|
85
|
+
def get_part_info(self, part_number):
|
86
|
+
if not self.open:
|
87
|
+
return None
|
88
|
+
return self._loaded_parts.get(part_number, None)
|
89
|
+
|
90
|
+
def cleanup(self):
|
91
|
+
if not self.open:
|
92
|
+
return
|
93
|
+
for part in self._loaded_parts.values():
|
94
|
+
part_path = part["part_filename"]
|
95
|
+
if isfile(part_path):
|
96
|
+
os.remove(part_path)
|
97
|
+
os.remove(self.tracker_file)
|
98
|
+
self.open = False
|
99
|
+
|
@@ -194,7 +194,7 @@ class GeoSeeqDownloadManager:
|
|
194
194
|
self._convert_result_files_to_urls()
|
195
195
|
download_args = [(
|
196
196
|
url, file_path,
|
197
|
-
self.progress_tracker_factory(
|
197
|
+
self.progress_tracker_factory(file_path),
|
198
198
|
self.ignore_errors, self.head, self.log_level,
|
199
199
|
self.n_parallel_downloads > 1
|
200
200
|
) for url, file_path in self._result_files]
|
@@ -11,7 +11,7 @@ geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
|
|
11
11
|
geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
|
12
12
|
geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
|
-
geoseeq/upload_download_manager.py,sha256=
|
14
|
+
geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
16
16
|
geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
|
17
17
|
geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
|
22
22
|
geoseeq/cli/download.py,sha256=N_Wrg9d1kY9eJ6C1l0xc_YFjiri8gkXBo9JiuHx9xxE,17766
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=y6OK6ryYf7TyMtufl0kGESro5Fy5Hu7_xzIc3aYDKCo,3791
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
@@ -48,6 +48,9 @@ geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1
|
|
48
48
|
geoseeq/contrib/ncbi/bioproject.py,sha256=_oThTd_iLDOC8cLOlJKAatSr362OBYZCEV3YrqodhFg,4341
|
49
49
|
geoseeq/contrib/ncbi/cli.py,sha256=j9zEcaZPTryK3a4xluRxigcJKDhRpRxbp3KZSx-Bfhk,2400
|
50
50
|
geoseeq/contrib/ncbi/setup_logging.py,sha256=Tp1bY1U0f-o739aHpvVYriG2qdd1lFvCYBXZeXQgt-w,175
|
51
|
+
geoseeq/file_system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
|
+
geoseeq/file_system/filesystem_download.py,sha256=8bcnxjWltekmCvb5N0b1guBIjLp4-CL2VtsEok-snv4,16963
|
53
|
+
geoseeq/file_system/main.py,sha256=4HgYGq7WhlF96JlVIf16iFBTDujlBpxImmtoh4VCzDA,3627
|
51
54
|
geoseeq/id_constructors/__init__.py,sha256=w5E0PNQ9UuAxBeZbDI7KBnUoERd85gGz3nScz45bd2o,126
|
52
55
|
geoseeq/id_constructors/from_blobs.py,sha256=aj7M7NRpKGs3u3xUvuFJwmJdFeIcJPmaI2_bhwbFfEs,5702
|
53
56
|
geoseeq/id_constructors/from_ids.py,sha256=bbAJX4LnuN70v9bny6N-jAwOudb2-ztHvlMBgRuSDz0,3151
|
@@ -66,10 +69,11 @@ geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEY
|
|
66
69
|
geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
|
67
70
|
geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
|
68
71
|
geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08w,1758
|
69
|
-
geoseeq/result/file_download.py,sha256=
|
72
|
+
geoseeq/result/file_download.py,sha256=2VFy_p20VxAu1ItNNM1PBcDKSp9dhRuyOhcb5UBwYEU,7805
|
70
73
|
geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
|
71
74
|
geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
|
72
75
|
geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
|
76
|
+
geoseeq/result/resumable_download_tracker.py,sha256=YEzqHBBnE7L3XokTvlTAhHZ8TcDTIE_pyTQ7YadOfbU,3667
|
73
77
|
geoseeq/result/resumable_upload_tracker.py,sha256=2aI09gYz2yw63jEXqs8lmCRKQ79TIc3YuPETvP0Jeek,3811
|
74
78
|
geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
|
75
79
|
geoseeq/vc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -84,9 +88,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
84
88
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
89
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
86
90
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
87
|
-
geoseeq-0.6.
|
88
|
-
geoseeq-0.6.
|
89
|
-
geoseeq-0.6.
|
90
|
-
geoseeq-0.6.
|
91
|
-
geoseeq-0.6.
|
92
|
-
geoseeq-0.6.
|
91
|
+
geoseeq-0.6.2.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
92
|
+
geoseeq-0.6.2.dist-info/METADATA,sha256=WSI2kZ4-2pMME7jQCQ3Hzg9dU3Gm7R6tFrKdTj1PCbg,4803
|
93
|
+
geoseeq-0.6.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
94
|
+
geoseeq-0.6.2.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
95
|
+
geoseeq-0.6.2.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
96
|
+
geoseeq-0.6.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|