geoseeq 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- geoseeq/cli/download.py +2 -2
- geoseeq/cli/main.py +2 -2
- geoseeq/file_system/__init__.py +0 -0
- geoseeq/file_system/filesystem_download.py +434 -0
- geoseeq/file_system/main.py +122 -0
- geoseeq/result/file_download.py +2 -1
- {geoseeq-0.6.1.dist-info → geoseeq-0.6.3.dist-info}/METADATA +6 -1
- {geoseeq-0.6.1.dist-info → geoseeq-0.6.3.dist-info}/RECORD +12 -9
- {geoseeq-0.6.1.dist-info → geoseeq-0.6.3.dist-info}/WHEEL +1 -1
- {geoseeq-0.6.1.dist-info → geoseeq-0.6.3.dist-info}/LICENSE +0 -0
- {geoseeq-0.6.1.dist-info → geoseeq-0.6.3.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.6.1.dist-info → geoseeq-0.6.3.dist-info}/top_level.txt +0 -0
geoseeq/cli/download.py
CHANGED
@@ -160,8 +160,8 @@ def cli_download_files(
|
|
160
160
|
|
161
161
|
\b
|
162
162
|
# Download assembly contigs from two samples in the MetaSUB Consortium CSD16 project
|
163
|
-
$ geoseeq download files "MetaSUB Consortium/CSD16" `# specify the project`
|
164
|
-
haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name`
|
163
|
+
$ geoseeq download files "MetaSUB Consortium/CSD16" `# specify the project` \\
|
164
|
+
haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name` \\
|
165
165
|
--folder-type sample --extension '.contigs.fasta' # filter for contig files
|
166
166
|
|
167
167
|
---
|
geoseeq/cli/main.py
CHANGED
@@ -26,7 +26,7 @@ handler.setFormatter(logging.Formatter('[%(levelname)s] %(name)s :: %(message)s'
|
|
26
26
|
logger.addHandler(handler)
|
27
27
|
|
28
28
|
|
29
|
-
@click.group()
|
29
|
+
@click.group(context_settings={'show_default': True})
|
30
30
|
def main():
|
31
31
|
"""Command line interface for the GeoSeeq API.
|
32
32
|
|
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.3') # remember to update setup
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
File without changes
|
@@ -0,0 +1,434 @@
|
|
1
|
+
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
from geoseeq import (
|
5
|
+
result_file_from_id,
|
6
|
+
result_folder_from_id,
|
7
|
+
sample_from_id,
|
8
|
+
project_from_id,
|
9
|
+
)
|
10
|
+
from geoseeq.utils import md5_checksum
|
11
|
+
from time import time
|
12
|
+
|
13
|
+
FILE_STATUS_MODIFIED_REMOTE = 'MODIFIED_REMOTE'
|
14
|
+
FILE_STATUS_MODIFIED_LOCAL = 'MODIFIED_LOCAL'
|
15
|
+
FILE_STATUS_NEW_LOCAL = 'NEW_LOCAL'
|
16
|
+
FILE_STATUS_NEW_REMOTE = 'NEW_REMOTE'
|
17
|
+
FILE_STATUS_IS_LOCAL_STUB = 'IS_LOCAL_STUB'
|
18
|
+
|
19
|
+
|
20
|
+
def dedupe_modified_files(modified_files):
|
21
|
+
"""Remove duplicates from a list of modified files.
|
22
|
+
|
23
|
+
This function will remove duplicates from a list of modified files
|
24
|
+
based on the path to the file. The first instance of the file will be
|
25
|
+
kept and all others will be removed.
|
26
|
+
"""
|
27
|
+
seen = set()
|
28
|
+
deduped = []
|
29
|
+
for x in modified_files:
|
30
|
+
if x[2] not in seen:
|
31
|
+
deduped.append(x)
|
32
|
+
seen.add(x[2])
|
33
|
+
return deduped
|
34
|
+
|
35
|
+
|
36
|
+
class ResultFileOnFilesystem:
|
37
|
+
"""
|
38
|
+
|
39
|
+
Note: unlike other filesystem classes the `path` is a file, not
|
40
|
+
a directory. This is because the file is downloaded directly to
|
41
|
+
the path.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self, result_file, path, kind):
|
45
|
+
self.result_file = result_file
|
46
|
+
self.path = path
|
47
|
+
self.kind = kind
|
48
|
+
|
49
|
+
@property
|
50
|
+
def info_filepath(self):
|
51
|
+
dirpath = os.path.dirname(self.path)
|
52
|
+
basename = os.path.basename(self.path)
|
53
|
+
return os.path.join(dirpath, f'.gs_result_file__{basename}')
|
54
|
+
|
55
|
+
@property
|
56
|
+
def is_stub(self):
|
57
|
+
return os.path.exists(self.path) and os.path.getsize(self.path) == 0
|
58
|
+
|
59
|
+
def file_is_ok(self, stubs_are_ok=False):
|
60
|
+
if self.is_stub:
|
61
|
+
return stubs_are_ok
|
62
|
+
return self.result_file.download_needs_update(self.path)
|
63
|
+
|
64
|
+
def download(self, use_stubs=False, exists_ok=False):
|
65
|
+
if os.path.exists(self.info_filepath):
|
66
|
+
if exists_ok and self.file_is_ok(stubs_are_ok=use_stubs):
|
67
|
+
return
|
68
|
+
elif not exists_ok:
|
69
|
+
raise ValueError('Result file already exists at path: {}'.format(self.info_filepath))
|
70
|
+
|
71
|
+
# Download the file
|
72
|
+
if use_stubs:
|
73
|
+
open(self.path, 'w').close()
|
74
|
+
else:
|
75
|
+
self.result_file.download(self.path)
|
76
|
+
|
77
|
+
self.write_info_file()
|
78
|
+
|
79
|
+
def local_file_checksum(self):
|
80
|
+
if self.is_stub:
|
81
|
+
return "__STUB__"
|
82
|
+
return md5_checksum(self.path)
|
83
|
+
|
84
|
+
def locally_modified(self):
|
85
|
+
raise NotImplementedError('This function is not implemented')
|
86
|
+
|
87
|
+
def status_is_ok(self, stubs_are_ok=False):
|
88
|
+
# check for an info file
|
89
|
+
if not os.path.exists(self.info_filepath):
|
90
|
+
return False
|
91
|
+
if stubs_are_ok:
|
92
|
+
return True
|
93
|
+
return not self.result_file.download_needs_update(self.path)
|
94
|
+
|
95
|
+
def write_info_file(self):
|
96
|
+
result_file_info = {
|
97
|
+
"uuid": self.result_file.uuid,
|
98
|
+
"kind": self.kind,
|
99
|
+
"checksum": self.local_file_checksum(),
|
100
|
+
}
|
101
|
+
with open(self.info_filepath, 'w') as f:
|
102
|
+
json.dump(result_file_info, f)
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def from_path(cls, path):
|
106
|
+
obj = cls(None, path, None)
|
107
|
+
try:
|
108
|
+
with open(obj.info_filepath, 'r') as f:
|
109
|
+
result_file_info = json.load(f)
|
110
|
+
obj.result_file = result_file_from_id(result_file_info['uuid'])
|
111
|
+
obj.kind = result_file_info['kind']
|
112
|
+
obj.stored_checksum = result_file_info['checksum']
|
113
|
+
except FileNotFoundError:
|
114
|
+
pass
|
115
|
+
return obj
|
116
|
+
|
117
|
+
def write_info_file(self):
|
118
|
+
result_file_info = {
|
119
|
+
"uuid": self.result_file.uuid,
|
120
|
+
"kind": self.kind,
|
121
|
+
"checksum": self.local_file_checksum(),
|
122
|
+
}
|
123
|
+
with open(self.info_filepath, 'w') as f:
|
124
|
+
json.dump(result_file_info, f)
|
125
|
+
|
126
|
+
def list_abnormal_objects(self):
|
127
|
+
"""Return a list of files that have been modified.
|
128
|
+
|
129
|
+
Since this class is a single file the list will either be empty
|
130
|
+
or have one element.
|
131
|
+
|
132
|
+
Note that if a file was modified locally then uploaded to the server
|
133
|
+
the file will be marked as modified remote.
|
134
|
+
"""
|
135
|
+
if self.result_file is None:
|
136
|
+
return [('FILE', FILE_STATUS_NEW_LOCAL, self.path, None)]
|
137
|
+
if not os.path.exists(self.path):
|
138
|
+
return [('FILE', FILE_STATUS_NEW_REMOTE, self.path, self.result_file)]
|
139
|
+
if self.is_stub:
|
140
|
+
return [('FILE', FILE_STATUS_IS_LOCAL_STUB, self.path, self.result_file)]
|
141
|
+
if self.result_file and self.result_file.download_needs_update(self.path):
|
142
|
+
return [('FILE', FILE_STATUS_MODIFIED_REMOTE, self.path, self.result_file)]
|
143
|
+
if self.locally_modified():
|
144
|
+
return [('FILE', FILE_STATUS_MODIFIED_LOCAL, self.path, self.result_file)]
|
145
|
+
|
146
|
+
return []
|
147
|
+
|
148
|
+
|
149
|
+
class ResultFolderOnFilesystem:
|
150
|
+
|
151
|
+
def __init__(self, result_folder, path, kind):
|
152
|
+
self.result_folder = result_folder
|
153
|
+
self.path = path
|
154
|
+
self.kind = kind
|
155
|
+
|
156
|
+
@property
|
157
|
+
def info_filepath(self):
|
158
|
+
return os.path.join(self.path, '.gs_result_folder')
|
159
|
+
|
160
|
+
def download(self, use_stubs=False, exists_ok=False):
|
161
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
162
|
+
raise ValueError('Result folder already exists at path: {}'.format(self.info_filepath))
|
163
|
+
|
164
|
+
# Download the files in the result folder
|
165
|
+
for result_file in self.result_folder.get_fields():
|
166
|
+
result_file_local_path = os.path.join(self.path, result_file.name)
|
167
|
+
os.makedirs(os.path.dirname(result_file_local_path), exist_ok=True)
|
168
|
+
ResultFileOnFilesystem(result_file, result_file_local_path, self.kind)\
|
169
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
170
|
+
|
171
|
+
# Write the result folder data
|
172
|
+
result_folder_info = {
|
173
|
+
"uuid": self.result_folder.uuid,
|
174
|
+
"kind": self.kind
|
175
|
+
}
|
176
|
+
with open(self.info_filepath, 'w') as f:
|
177
|
+
json.dump(result_folder_info, f)
|
178
|
+
|
179
|
+
def status_is_ok(self):
|
180
|
+
# check for an info file
|
181
|
+
if not os.path.exists(self.info_filepath):
|
182
|
+
return False
|
183
|
+
|
184
|
+
# check that all files are downloaded
|
185
|
+
for result_file in self.result_folder.get_files():
|
186
|
+
result_file_path = os.path.join(self.path, result_file.name)
|
187
|
+
if not os.path.exists(result_file_path):
|
188
|
+
return False
|
189
|
+
|
190
|
+
return True
|
191
|
+
|
192
|
+
@classmethod
|
193
|
+
def from_path(cls, path):
|
194
|
+
obj = cls(None, path, None)
|
195
|
+
try:
|
196
|
+
with open(os.path.join(path, '.gs_result_folder'), 'r') as f:
|
197
|
+
result_folder_info = json.load(f)
|
198
|
+
obj.result_folder = result_folder_from_id(result_folder_info['uuid'])
|
199
|
+
obj.kind = result_folder_info['kind']
|
200
|
+
except FileNotFoundError:
|
201
|
+
pass
|
202
|
+
return obj
|
203
|
+
|
204
|
+
def list_abnormal_objects(self):
|
205
|
+
"""Return a list of files that have been modified.
|
206
|
+
|
207
|
+
This function will return a list of tuples where the first element
|
208
|
+
is the status of the file and the second element is the path to the file.
|
209
|
+
"""
|
210
|
+
modified_files = []
|
211
|
+
if not self.result_folder:
|
212
|
+
modified_files.append(('FOLDER', FILE_STATUS_NEW_LOCAL, self.path, None))
|
213
|
+
if not os.path.exists(self.path):
|
214
|
+
modified_files.append(('FOLDER', FILE_STATUS_NEW_REMOTE, self.path, self.result_folder))
|
215
|
+
|
216
|
+
# list local files
|
217
|
+
if os.path.exists(self.path):
|
218
|
+
for local_file in os.listdir(self.path):
|
219
|
+
if local_file.startswith('.gs_'):
|
220
|
+
continue
|
221
|
+
local_file_path = os.path.join(self.path, local_file)
|
222
|
+
result_file_on_fs = ResultFileOnFilesystem.from_path(local_file_path)
|
223
|
+
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
224
|
+
|
225
|
+
# list remote files
|
226
|
+
if self.result_folder:
|
227
|
+
for result_file in self.result_folder.get_fields():
|
228
|
+
result_file_path = os.path.join(self.path, result_file.name)
|
229
|
+
result_file_on_fs = ResultFileOnFilesystem(result_file, result_file_path, self.kind)
|
230
|
+
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
231
|
+
|
232
|
+
return dedupe_modified_files(modified_files)
|
233
|
+
|
234
|
+
|
235
|
+
class SampleOnFilesystem:
|
236
|
+
|
237
|
+
def __init__(self, sample, path):
|
238
|
+
self.sample = sample
|
239
|
+
self.path = path if path[-1] != '/' else path[:-1] # remove trailing slash
|
240
|
+
|
241
|
+
@property
|
242
|
+
def info_filepath(self):
|
243
|
+
return os.path.join(self.path, '.gs_sample')
|
244
|
+
|
245
|
+
def download(self, use_stubs=False, exists_ok=False):
|
246
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
247
|
+
raise ValueError('Sample already exists at path: {}'.format(self.info_filepath))
|
248
|
+
|
249
|
+
# download result folders
|
250
|
+
for result_folder in self.sample.get_result_folders():
|
251
|
+
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
252
|
+
os.makedirs(result_folder_local_path, exist_ok=True)
|
253
|
+
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "sample")\
|
254
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
255
|
+
|
256
|
+
# Write the sample data
|
257
|
+
sample_info = {
|
258
|
+
"uuid": self.sample.uuid
|
259
|
+
}
|
260
|
+
with open(self.info_filepath, 'w') as f:
|
261
|
+
json.dump(sample_info, f)
|
262
|
+
|
263
|
+
def status_is_ok(self):
|
264
|
+
# check for an info file
|
265
|
+
if not os.path.exists(self.info_filepath):
|
266
|
+
return False
|
267
|
+
|
268
|
+
# check that all result folders are downloaded
|
269
|
+
for result_folder in self.sample.get_result_folders():
|
270
|
+
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
271
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "sample")
|
272
|
+
if not result_folder_on_fs.status_is_ok():
|
273
|
+
return False
|
274
|
+
|
275
|
+
return True
|
276
|
+
|
277
|
+
@classmethod
|
278
|
+
def from_path(cls, path):
|
279
|
+
obj = cls(None, path)
|
280
|
+
try:
|
281
|
+
with open(os.path.join(path, '.gs_sample'), 'r') as f:
|
282
|
+
sample_info = json.load(f)
|
283
|
+
obj.sample = sample_from_id(sample_info['uuid'])
|
284
|
+
except FileNotFoundError:
|
285
|
+
pass
|
286
|
+
return obj
|
287
|
+
|
288
|
+
def list_abnormal_objects(self):
|
289
|
+
"""Return a list of files that have been modified.
|
290
|
+
|
291
|
+
This function will return a list of tuples where the first element
|
292
|
+
is the status of the file and the second element is the path to the file.
|
293
|
+
"""
|
294
|
+
modified_files = []
|
295
|
+
if not self.sample:
|
296
|
+
modified_files.append(('SAMPLE', FILE_STATUS_NEW_LOCAL, self.path, None))
|
297
|
+
if not os.path.exists(self.path):
|
298
|
+
modified_files.append(('SAMPLE', FILE_STATUS_NEW_REMOTE, self.path, self.sample))
|
299
|
+
|
300
|
+
# list local folders
|
301
|
+
if os.path.exists(self.path):
|
302
|
+
for local_folder in os.listdir(self.path):
|
303
|
+
local_folder_path = os.path.join(self.path, local_folder)
|
304
|
+
if not os.path.isdir(local_folder_path):
|
305
|
+
continue
|
306
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_folder_path)
|
307
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
308
|
+
|
309
|
+
# list remote folders
|
310
|
+
if self.sample:
|
311
|
+
for result_folder in self.sample.get_result_folders():
|
312
|
+
result_folder_path = os.path.join(self.path, result_folder.name)
|
313
|
+
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "sample")
|
314
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
315
|
+
|
316
|
+
return dedupe_modified_files(modified_files)
|
317
|
+
|
318
|
+
|
319
|
+
class ProjectOnFilesystem:
|
320
|
+
|
321
|
+
def __init__(self, project, path):
|
322
|
+
self.project = project
|
323
|
+
self.path = path
|
324
|
+
|
325
|
+
@property
|
326
|
+
def info_filepath(self):
|
327
|
+
return os.path.join(self.path, '.gs_project')
|
328
|
+
|
329
|
+
def download(self, use_stubs=False, exists_ok=False):
|
330
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
331
|
+
raise ValueError('Project already exists at path: {}'.format(self.info_filepath))
|
332
|
+
|
333
|
+
# download samples
|
334
|
+
for sample in self.project.get_samples():
|
335
|
+
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
336
|
+
os.makedirs(sample_local_path, exist_ok=True)
|
337
|
+
SampleOnFilesystem(sample, sample_local_path)\
|
338
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
339
|
+
|
340
|
+
# download project result folders
|
341
|
+
for result_folder in self.project.get_result_folders():
|
342
|
+
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
343
|
+
os.makedirs(result_folder_local_path, exist_ok=True)
|
344
|
+
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "project")\
|
345
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
346
|
+
|
347
|
+
# Write the project data
|
348
|
+
project_info = {
|
349
|
+
"uuid": self.project.uuid
|
350
|
+
}
|
351
|
+
with open(self.info_filepath, 'w') as f:
|
352
|
+
json.dump(project_info, f)
|
353
|
+
|
354
|
+
def status_is_ok(self):
|
355
|
+
# check for an info file
|
356
|
+
if not os.path.exists(self.info_filepath):
|
357
|
+
return False
|
358
|
+
|
359
|
+
# check that all samples are downloaded
|
360
|
+
for sample in self.project.get_samples():
|
361
|
+
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
362
|
+
sample_on_fs = SampleOnFilesystem.from_path(sample_local_path)
|
363
|
+
if not sample_on_fs.status_is_ok():
|
364
|
+
return False
|
365
|
+
|
366
|
+
# check that all project result folders are downloaded
|
367
|
+
for result_folder in self.project.get_result_folders():
|
368
|
+
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
369
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "project")
|
370
|
+
if not result_folder_on_fs.status_is_ok():
|
371
|
+
return False
|
372
|
+
|
373
|
+
return True
|
374
|
+
|
375
|
+
@classmethod
|
376
|
+
def from_path(cls, path, recursive=False):
|
377
|
+
try:
|
378
|
+
with open(os.path.join(path, '.gs_project'), 'r') as f:
|
379
|
+
project_info = json.load(f)
|
380
|
+
project = project_from_id(project_info['uuid'])
|
381
|
+
return cls(project, path)
|
382
|
+
except FileNotFoundError:
|
383
|
+
if not recursive:
|
384
|
+
raise ValueError('No project found in path or parent directories')
|
385
|
+
updir = os.path.dirname(os.path.abspath(path))
|
386
|
+
if updir == path:
|
387
|
+
raise ValueError('No project found in path or parent directories')
|
388
|
+
return cls.from_path(updir, recursive=recursive)
|
389
|
+
|
390
|
+
def path_from_project_root(self, path):
|
391
|
+
if path[0] == "/":
|
392
|
+
return path.replace(self.path, "")[1:]
|
393
|
+
return path
|
394
|
+
|
395
|
+
def list_abnormal_objects(self):
|
396
|
+
"""Return a list of files that have been modified.
|
397
|
+
|
398
|
+
This function will return a list of tuples where the first element
|
399
|
+
is the status of the file and the second element is the path to the file.
|
400
|
+
"""
|
401
|
+
modified_files = []
|
402
|
+
|
403
|
+
# list remote samples
|
404
|
+
for sample in self.project.get_samples():
|
405
|
+
sample_path = os.path.join(self.path, "sample_results", sample.name)
|
406
|
+
sample_on_fs = SampleOnFilesystem(sample, sample_path)
|
407
|
+
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
408
|
+
|
409
|
+
# list remote project result folders
|
410
|
+
for result_folder in self.project.get_result_folders():
|
411
|
+
result_folder_path = os.path.join(self.path, "project_results", result_folder.name)
|
412
|
+
|
413
|
+
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "project")
|
414
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
415
|
+
|
416
|
+
# list local samples
|
417
|
+
for local_sample in os.listdir(os.path.join(self.path, "sample_results")):
|
418
|
+
local_sample_path = os.path.join(self.path, "sample_results", local_sample)
|
419
|
+
if not os.path.isdir(local_sample_path):
|
420
|
+
continue
|
421
|
+
sample_on_fs = SampleOnFilesystem.from_path(local_sample_path)
|
422
|
+
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
423
|
+
|
424
|
+
# list local project result folders
|
425
|
+
for local_result_folder in os.listdir(os.path.join(self.path, "project_results")):
|
426
|
+
local_result_folder_path = os.path.join(self.path, "project_results", local_result_folder)
|
427
|
+
if not os.path.isdir(local_result_folder_path):
|
428
|
+
continue
|
429
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_result_folder_path)
|
430
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
431
|
+
return dedupe_modified_files(modified_files)
|
432
|
+
|
433
|
+
|
434
|
+
|
@@ -0,0 +1,122 @@
|
|
1
|
+
from fuse import FUSE, Operations
|
2
|
+
import os
|
3
|
+
|
4
|
+
|
5
|
+
class GeoSeeqProjectFileSystem(Operations):
|
6
|
+
"""Mount a GeoSeeq project as a filesystem.
|
7
|
+
|
8
|
+
The project will automatically have this directory structure:
|
9
|
+
- <root>/project_results/<project_result_folder_name>/...
|
10
|
+
- <root>/sample_results/<sample_name>/...
|
11
|
+
- <root>/metadata/sample_metadata.csv
|
12
|
+
- <root>/.config/config.json
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, root, project):
|
16
|
+
self.root = root
|
17
|
+
self.project = project
|
18
|
+
|
19
|
+
def access(self, path, mode):
|
20
|
+
pass
|
21
|
+
|
22
|
+
def chmod(self, path, mode):
|
23
|
+
pass
|
24
|
+
|
25
|
+
def chown(self, path, uid, gid):
|
26
|
+
pass
|
27
|
+
|
28
|
+
def getattr(self, path, fh=None):
|
29
|
+
pass
|
30
|
+
|
31
|
+
def readdir(self, path, fh):
|
32
|
+
pass
|
33
|
+
|
34
|
+
def readlink(self, path):
|
35
|
+
pass
|
36
|
+
|
37
|
+
def mknod(self, path, mode, dev):
|
38
|
+
pass
|
39
|
+
|
40
|
+
def rmdir(self, path):
|
41
|
+
pass
|
42
|
+
|
43
|
+
def mkdir(self, path, mode):
|
44
|
+
pass
|
45
|
+
|
46
|
+
def statfs(self, path):
|
47
|
+
pass
|
48
|
+
|
49
|
+
def unlink(self, path):
|
50
|
+
pass
|
51
|
+
|
52
|
+
def symlink(self, name, target):
|
53
|
+
pass
|
54
|
+
|
55
|
+
def rename(self, old, new):
|
56
|
+
pass
|
57
|
+
|
58
|
+
def link(self, target, name):
|
59
|
+
pass
|
60
|
+
|
61
|
+
def utimens(self, path, times=None):
|
62
|
+
pass
|
63
|
+
|
64
|
+
def open(self, path, flags):
|
65
|
+
tkns = path.split('/')
|
66
|
+
if tkns[0] == 'project_results':
|
67
|
+
result_folder_name, result_file_name = tkns[2], '/'.join(tkns[3:])
|
68
|
+
result_folder = self.project.get_result_folder(result_folder_name).get()
|
69
|
+
result_file = result_folder.get_file(result_file_name).get()
|
70
|
+
result_file.download(path)
|
71
|
+
elif tkns[0] == 'sample_results':
|
72
|
+
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
73
|
+
sample = self.project.get_sample(sample_name).get()
|
74
|
+
result_folder = sample.get_result_folder(result_folder_name).get()
|
75
|
+
result_file = result_folder.get_file(result_file_name).get()
|
76
|
+
result_file.download(path)
|
77
|
+
elif tkns[0] == 'metadata':
|
78
|
+
raise NotImplementedError('TODO')
|
79
|
+
|
80
|
+
return os.open(self._full_local_path(path), flags)
|
81
|
+
|
82
|
+
def create(self, path, mode, fi=None):
|
83
|
+
tkns = path.split('/')
|
84
|
+
if tkns[0] == 'project_results':
|
85
|
+
result_name, file_name = tkns[2], '/'.join(tkns[3:])
|
86
|
+
result_folder = self.project.get_result_folder(result_name).idem()
|
87
|
+
result_file = result_folder.get_file(file_name).create()
|
88
|
+
result_file.download(path) # nothing to download at this point
|
89
|
+
elif tkns[0] == 'sample_results':
|
90
|
+
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
91
|
+
sample = self.project.get_sample(sample_name).idem()
|
92
|
+
result_folder = sample.get_result_folder(result_folder_name).idem()
|
93
|
+
result_file = result_folder.get_file(result_file_name).create()
|
94
|
+
result_file.download(path) # nothing to download at this point
|
95
|
+
elif tkns[0] == 'metadata':
|
96
|
+
raise NotImplementedError('TODO')
|
97
|
+
|
98
|
+
def read(self, path, length, offset, fh):
|
99
|
+
os.lseek(fh, offset, os.SEEK_SET)
|
100
|
+
return os.read(fh, length)
|
101
|
+
|
102
|
+
def write(self, path, buf, offset, fh):
|
103
|
+
pass
|
104
|
+
|
105
|
+
def truncate(self, path, length, fh=None):
|
106
|
+
pass
|
107
|
+
|
108
|
+
def flush(self, path, fh):
|
109
|
+
pass
|
110
|
+
|
111
|
+
def release(self, path, fh):
|
112
|
+
pass
|
113
|
+
|
114
|
+
def fsync(self, path, fdatasync, fh):
|
115
|
+
pass
|
116
|
+
|
117
|
+
def _full_local_path(self, partial):
|
118
|
+
if partial.startswith("/"):
|
119
|
+
partial = partial[1:]
|
120
|
+
return os.path.join(self.root, partial)
|
121
|
+
|
122
|
+
|
geoseeq/result/file_download.py
CHANGED
@@ -6,6 +6,7 @@ import os
|
|
6
6
|
from os.path import basename, getsize, join, isfile, getmtime, dirname
|
7
7
|
from pathlib import Path
|
8
8
|
from tempfile import NamedTemporaryFile
|
9
|
+
from math import ceil
|
9
10
|
|
10
11
|
from geoseeq.utils import download_ftp
|
11
12
|
from geoseeq.constants import FIVE_MB
|
@@ -43,7 +44,7 @@ def _download_resumable(response, filename, total_size_in_bytes, progress_tracke
|
|
43
44
|
target_id = url_to_id(response.url)
|
44
45
|
tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
|
45
46
|
if not tracker.download_started: tracker.start_download(response.url)
|
46
|
-
n_chunks = total_size_in_bytes
|
47
|
+
n_chunks = ceil(total_size_in_bytes / chunk_size)
|
47
48
|
for i in range(n_chunks):
|
48
49
|
bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
|
49
50
|
if tracker.part_has_been_downloaded(i):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: geoseeq
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.3
|
4
4
|
Summary: GeoSeeq command line tools and python API
|
5
5
|
Author: David C. Danko
|
6
6
|
Author-email: "David C. Danko" <dcdanko@biotia.io>
|
@@ -12,6 +12,11 @@ Classifier: Operating System :: OS Independent
|
|
12
12
|
Requires-Python: >=3.8
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
|
+
Requires-Dist: requests
|
16
|
+
Requires-Dist: click
|
17
|
+
Requires-Dist: pandas
|
18
|
+
Requires-Dist: biopython
|
19
|
+
Requires-Dist: tqdm
|
15
20
|
|
16
21
|
# Geoseeq API Client
|
17
22
|
|
@@ -19,10 +19,10 @@ geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
|
|
19
19
|
geoseeq/cli/constants.py,sha256=Do5AUf9lMO9_P8KpFJ3XwwFBAWsxSjZ6sx9_QEGyC_c,176
|
20
20
|
geoseeq/cli/copy.py,sha256=02U9kdrAIbbM8MlRMLL6p-LMYFSuRObE3h5jyvcL__M,2275
|
21
21
|
geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
22
|
-
geoseeq/cli/download.py,sha256=
|
22
|
+
geoseeq/cli/download.py,sha256=QTNA7qFjCdRJg2vKbAm5yH8WGlcF5fb5bSjm5QiI4XE,17768
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=9yQRXR8Bs304a0m0eVFnrtmTClChED9S9C3ns2I0szA,3830
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
@@ -48,6 +48,9 @@ geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1
|
|
48
48
|
geoseeq/contrib/ncbi/bioproject.py,sha256=_oThTd_iLDOC8cLOlJKAatSr362OBYZCEV3YrqodhFg,4341
|
49
49
|
geoseeq/contrib/ncbi/cli.py,sha256=j9zEcaZPTryK3a4xluRxigcJKDhRpRxbp3KZSx-Bfhk,2400
|
50
50
|
geoseeq/contrib/ncbi/setup_logging.py,sha256=Tp1bY1U0f-o739aHpvVYriG2qdd1lFvCYBXZeXQgt-w,175
|
51
|
+
geoseeq/file_system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
|
+
geoseeq/file_system/filesystem_download.py,sha256=8bcnxjWltekmCvb5N0b1guBIjLp4-CL2VtsEok-snv4,16963
|
53
|
+
geoseeq/file_system/main.py,sha256=4HgYGq7WhlF96JlVIf16iFBTDujlBpxImmtoh4VCzDA,3627
|
51
54
|
geoseeq/id_constructors/__init__.py,sha256=w5E0PNQ9UuAxBeZbDI7KBnUoERd85gGz3nScz45bd2o,126
|
52
55
|
geoseeq/id_constructors/from_blobs.py,sha256=aj7M7NRpKGs3u3xUvuFJwmJdFeIcJPmaI2_bhwbFfEs,5702
|
53
56
|
geoseeq/id_constructors/from_ids.py,sha256=bbAJX4LnuN70v9bny6N-jAwOudb2-ztHvlMBgRuSDz0,3151
|
@@ -66,7 +69,7 @@ geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEY
|
|
66
69
|
geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
|
67
70
|
geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
|
68
71
|
geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08w,1758
|
69
|
-
geoseeq/result/file_download.py,sha256=
|
72
|
+
geoseeq/result/file_download.py,sha256=KalIkwBbFI8xRpbhToixfd1KMAu_0FYwxdKq146NAHw,7832
|
70
73
|
geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
|
71
74
|
geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
|
72
75
|
geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
|
@@ -85,9 +88,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
85
88
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
89
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
87
90
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
88
|
-
geoseeq-0.6.
|
89
|
-
geoseeq-0.6.
|
90
|
-
geoseeq-0.6.
|
91
|
-
geoseeq-0.6.
|
92
|
-
geoseeq-0.6.
|
93
|
-
geoseeq-0.6.
|
91
|
+
geoseeq-0.6.3.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
92
|
+
geoseeq-0.6.3.dist-info/METADATA,sha256=IH-VB_1NsbGTLGO_j2OuJunReO7L-FSnougvAgZUQkE,4915
|
93
|
+
geoseeq-0.6.3.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
|
94
|
+
geoseeq-0.6.3.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
95
|
+
geoseeq-0.6.3.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
96
|
+
geoseeq-0.6.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|