geoseeq 0.6.1__tar.gz → 0.6.3__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {geoseeq-0.6.1 → geoseeq-0.6.3}/PKG-INFO +6 -1
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/download.py +2 -2
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/main.py +2 -2
- geoseeq-0.6.3/geoseeq/file_system/filesystem_download.py +434 -0
- geoseeq-0.6.3/geoseeq/file_system/main.py +122 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/file_download.py +2 -1
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq.egg-info/PKG-INFO +6 -1
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq.egg-info/SOURCES.txt +4 -0
- geoseeq-0.6.3/geoseeq.egg-info/requires.txt +5 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/pyproject.toml +9 -2
- geoseeq-0.6.3/tests/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/LICENSE +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/README.md +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/app.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/blob_constructors.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/bulk_creators.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/constants.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/copy.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/detail.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/fastq_utils.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/get_eula.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/manage.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/progress_bar.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/project.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/raw.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/run.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/search.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/shared_params/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/shared_params/common_state.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/shared_params/config.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/shared_params/id_handlers.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/shared_params/obj_getters.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/upload/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/upload/upload.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/upload/upload_advanced.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/upload/upload_reads.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/user.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/utils.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/cli/view.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/constants.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/contrib/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/contrib/ncbi/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/contrib/ncbi/api.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/contrib/ncbi/bioproject.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/contrib/ncbi/cli.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
- {geoseeq-0.6.1/geoseeq/vc → geoseeq-0.6.3/geoseeq/file_system}/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/file_system_cache.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/id_constructors/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/id_constructors/from_blobs.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/id_constructors/from_ids.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/id_constructors/from_names.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/id_constructors/from_uuids.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/id_constructors/resolvers.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/id_constructors/utils.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/knex.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/organization.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/pipeline.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/constants.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/highcharts.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/map/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/map/base_layer.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/map/map.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/map/overlay.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/plotting/selectable.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/project.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/remote_object.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/bioinfo.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/file_chunker.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/file_upload.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/result_file.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/result_folder.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/resumable_download_tracker.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/resumable_upload_tracker.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/result/utils.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/sample.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/search.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/upload_download_manager.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/user.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/utils.py +0 -0
- {geoseeq-0.6.1/tests → geoseeq-0.6.3/geoseeq/vc}/__init__.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/checksum.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/cli.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/clone.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/constants.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/vc_cache.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/vc_dir.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/vc_sample.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/vc/vc_stub.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq/work_orders.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq.egg-info/dependency_links.txt +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq.egg-info/entry_points.txt +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/geoseeq.egg-info/top_level.txt +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/setup.cfg +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/setup.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/tests/test_api_client.py +0 -0
- {geoseeq-0.6.1 → geoseeq-0.6.3}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: geoseeq
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.3
|
4
4
|
Summary: GeoSeeq command line tools and python API
|
5
5
|
Author: David C. Danko
|
6
6
|
Author-email: "David C. Danko" <dcdanko@biotia.io>
|
@@ -12,6 +12,11 @@ Classifier: Operating System :: OS Independent
|
|
12
12
|
Requires-Python: >=3.8
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
|
+
Requires-Dist: requests
|
16
|
+
Requires-Dist: click
|
17
|
+
Requires-Dist: pandas
|
18
|
+
Requires-Dist: biopython
|
19
|
+
Requires-Dist: tqdm
|
15
20
|
|
16
21
|
# Geoseeq API Client
|
17
22
|
|
@@ -160,8 +160,8 @@ def cli_download_files(
|
|
160
160
|
|
161
161
|
\b
|
162
162
|
# Download assembly contigs from two samples in the MetaSUB Consortium CSD16 project
|
163
|
-
$ geoseeq download files "MetaSUB Consortium/CSD16" `# specify the project`
|
164
|
-
haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name`
|
163
|
+
$ geoseeq download files "MetaSUB Consortium/CSD16" `# specify the project` \\
|
164
|
+
haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name` \\
|
165
165
|
--folder-type sample --extension '.contigs.fasta' # filter for contig files
|
166
166
|
|
167
167
|
---
|
@@ -26,7 +26,7 @@ handler.setFormatter(logging.Formatter('[%(levelname)s] %(name)s :: %(message)s'
|
|
26
26
|
logger.addHandler(handler)
|
27
27
|
|
28
28
|
|
29
|
-
@click.group()
|
29
|
+
@click.group(context_settings={'show_default': True})
|
30
30
|
def main():
|
31
31
|
"""Command line interface for the GeoSeeq API.
|
32
32
|
|
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.3') # remember to update setup
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
@@ -0,0 +1,434 @@
|
|
1
|
+
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
from geoseeq import (
|
5
|
+
result_file_from_id,
|
6
|
+
result_folder_from_id,
|
7
|
+
sample_from_id,
|
8
|
+
project_from_id,
|
9
|
+
)
|
10
|
+
from geoseeq.utils import md5_checksum
|
11
|
+
from time import time
|
12
|
+
|
13
|
+
FILE_STATUS_MODIFIED_REMOTE = 'MODIFIED_REMOTE'
|
14
|
+
FILE_STATUS_MODIFIED_LOCAL = 'MODIFIED_LOCAL'
|
15
|
+
FILE_STATUS_NEW_LOCAL = 'NEW_LOCAL'
|
16
|
+
FILE_STATUS_NEW_REMOTE = 'NEW_REMOTE'
|
17
|
+
FILE_STATUS_IS_LOCAL_STUB = 'IS_LOCAL_STUB'
|
18
|
+
|
19
|
+
|
20
|
+
def dedupe_modified_files(modified_files):
|
21
|
+
"""Remove duplicates from a list of modified files.
|
22
|
+
|
23
|
+
This function will remove duplicates from a list of modified files
|
24
|
+
based on the path to the file. The first instance of the file will be
|
25
|
+
kept and all others will be removed.
|
26
|
+
"""
|
27
|
+
seen = set()
|
28
|
+
deduped = []
|
29
|
+
for x in modified_files:
|
30
|
+
if x[2] not in seen:
|
31
|
+
deduped.append(x)
|
32
|
+
seen.add(x[2])
|
33
|
+
return deduped
|
34
|
+
|
35
|
+
|
36
|
+
class ResultFileOnFilesystem:
|
37
|
+
"""
|
38
|
+
|
39
|
+
Note: unlike other filesystem classes the `path` is a file, not
|
40
|
+
a directory. This is because the file is downloaded directly to
|
41
|
+
the path.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self, result_file, path, kind):
|
45
|
+
self.result_file = result_file
|
46
|
+
self.path = path
|
47
|
+
self.kind = kind
|
48
|
+
|
49
|
+
@property
|
50
|
+
def info_filepath(self):
|
51
|
+
dirpath = os.path.dirname(self.path)
|
52
|
+
basename = os.path.basename(self.path)
|
53
|
+
return os.path.join(dirpath, f'.gs_result_file__{basename}')
|
54
|
+
|
55
|
+
@property
|
56
|
+
def is_stub(self):
|
57
|
+
return os.path.exists(self.path) and os.path.getsize(self.path) == 0
|
58
|
+
|
59
|
+
def file_is_ok(self, stubs_are_ok=False):
|
60
|
+
if self.is_stub:
|
61
|
+
return stubs_are_ok
|
62
|
+
return self.result_file.download_needs_update(self.path)
|
63
|
+
|
64
|
+
def download(self, use_stubs=False, exists_ok=False):
|
65
|
+
if os.path.exists(self.info_filepath):
|
66
|
+
if exists_ok and self.file_is_ok(stubs_are_ok=use_stubs):
|
67
|
+
return
|
68
|
+
elif not exists_ok:
|
69
|
+
raise ValueError('Result file already exists at path: {}'.format(self.info_filepath))
|
70
|
+
|
71
|
+
# Download the file
|
72
|
+
if use_stubs:
|
73
|
+
open(self.path, 'w').close()
|
74
|
+
else:
|
75
|
+
self.result_file.download(self.path)
|
76
|
+
|
77
|
+
self.write_info_file()
|
78
|
+
|
79
|
+
def local_file_checksum(self):
|
80
|
+
if self.is_stub:
|
81
|
+
return "__STUB__"
|
82
|
+
return md5_checksum(self.path)
|
83
|
+
|
84
|
+
def locally_modified(self):
|
85
|
+
raise NotImplementedError('This function is not implemented')
|
86
|
+
|
87
|
+
def status_is_ok(self, stubs_are_ok=False):
|
88
|
+
# check for an info file
|
89
|
+
if not os.path.exists(self.info_filepath):
|
90
|
+
return False
|
91
|
+
if stubs_are_ok:
|
92
|
+
return True
|
93
|
+
return not self.result_file.download_needs_update(self.path)
|
94
|
+
|
95
|
+
def write_info_file(self):
|
96
|
+
result_file_info = {
|
97
|
+
"uuid": self.result_file.uuid,
|
98
|
+
"kind": self.kind,
|
99
|
+
"checksum": self.local_file_checksum(),
|
100
|
+
}
|
101
|
+
with open(self.info_filepath, 'w') as f:
|
102
|
+
json.dump(result_file_info, f)
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def from_path(cls, path):
|
106
|
+
obj = cls(None, path, None)
|
107
|
+
try:
|
108
|
+
with open(obj.info_filepath, 'r') as f:
|
109
|
+
result_file_info = json.load(f)
|
110
|
+
obj.result_file = result_file_from_id(result_file_info['uuid'])
|
111
|
+
obj.kind = result_file_info['kind']
|
112
|
+
obj.stored_checksum = result_file_info['checksum']
|
113
|
+
except FileNotFoundError:
|
114
|
+
pass
|
115
|
+
return obj
|
116
|
+
|
117
|
+
def write_info_file(self):
|
118
|
+
result_file_info = {
|
119
|
+
"uuid": self.result_file.uuid,
|
120
|
+
"kind": self.kind,
|
121
|
+
"checksum": self.local_file_checksum(),
|
122
|
+
}
|
123
|
+
with open(self.info_filepath, 'w') as f:
|
124
|
+
json.dump(result_file_info, f)
|
125
|
+
|
126
|
+
def list_abnormal_objects(self):
|
127
|
+
"""Return a list of files that have been modified.
|
128
|
+
|
129
|
+
Since this class is a single file the list will either be empty
|
130
|
+
or have one element.
|
131
|
+
|
132
|
+
Note that if a file was modified locally then uploaded to the server
|
133
|
+
the file will be marked as modified remote.
|
134
|
+
"""
|
135
|
+
if self.result_file is None:
|
136
|
+
return [('FILE', FILE_STATUS_NEW_LOCAL, self.path, None)]
|
137
|
+
if not os.path.exists(self.path):
|
138
|
+
return [('FILE', FILE_STATUS_NEW_REMOTE, self.path, self.result_file)]
|
139
|
+
if self.is_stub:
|
140
|
+
return [('FILE', FILE_STATUS_IS_LOCAL_STUB, self.path, self.result_file)]
|
141
|
+
if self.result_file and self.result_file.download_needs_update(self.path):
|
142
|
+
return [('FILE', FILE_STATUS_MODIFIED_REMOTE, self.path, self.result_file)]
|
143
|
+
if self.locally_modified():
|
144
|
+
return [('FILE', FILE_STATUS_MODIFIED_LOCAL, self.path, self.result_file)]
|
145
|
+
|
146
|
+
return []
|
147
|
+
|
148
|
+
|
149
|
+
class ResultFolderOnFilesystem:
|
150
|
+
|
151
|
+
def __init__(self, result_folder, path, kind):
|
152
|
+
self.result_folder = result_folder
|
153
|
+
self.path = path
|
154
|
+
self.kind = kind
|
155
|
+
|
156
|
+
@property
|
157
|
+
def info_filepath(self):
|
158
|
+
return os.path.join(self.path, '.gs_result_folder')
|
159
|
+
|
160
|
+
def download(self, use_stubs=False, exists_ok=False):
|
161
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
162
|
+
raise ValueError('Result folder already exists at path: {}'.format(self.info_filepath))
|
163
|
+
|
164
|
+
# Download the files in the result folder
|
165
|
+
for result_file in self.result_folder.get_fields():
|
166
|
+
result_file_local_path = os.path.join(self.path, result_file.name)
|
167
|
+
os.makedirs(os.path.dirname(result_file_local_path), exist_ok=True)
|
168
|
+
ResultFileOnFilesystem(result_file, result_file_local_path, self.kind)\
|
169
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
170
|
+
|
171
|
+
# Write the result folder data
|
172
|
+
result_folder_info = {
|
173
|
+
"uuid": self.result_folder.uuid,
|
174
|
+
"kind": self.kind
|
175
|
+
}
|
176
|
+
with open(self.info_filepath, 'w') as f:
|
177
|
+
json.dump(result_folder_info, f)
|
178
|
+
|
179
|
+
def status_is_ok(self):
|
180
|
+
# check for an info file
|
181
|
+
if not os.path.exists(self.info_filepath):
|
182
|
+
return False
|
183
|
+
|
184
|
+
# check that all files are downloaded
|
185
|
+
for result_file in self.result_folder.get_files():
|
186
|
+
result_file_path = os.path.join(self.path, result_file.name)
|
187
|
+
if not os.path.exists(result_file_path):
|
188
|
+
return False
|
189
|
+
|
190
|
+
return True
|
191
|
+
|
192
|
+
@classmethod
|
193
|
+
def from_path(cls, path):
|
194
|
+
obj = cls(None, path, None)
|
195
|
+
try:
|
196
|
+
with open(os.path.join(path, '.gs_result_folder'), 'r') as f:
|
197
|
+
result_folder_info = json.load(f)
|
198
|
+
obj.result_folder = result_folder_from_id(result_folder_info['uuid'])
|
199
|
+
obj.kind = result_folder_info['kind']
|
200
|
+
except FileNotFoundError:
|
201
|
+
pass
|
202
|
+
return obj
|
203
|
+
|
204
|
+
def list_abnormal_objects(self):
|
205
|
+
"""Return a list of files that have been modified.
|
206
|
+
|
207
|
+
This function will return a list of tuples where the first element
|
208
|
+
is the status of the file and the second element is the path to the file.
|
209
|
+
"""
|
210
|
+
modified_files = []
|
211
|
+
if not self.result_folder:
|
212
|
+
modified_files.append(('FOLDER', FILE_STATUS_NEW_LOCAL, self.path, None))
|
213
|
+
if not os.path.exists(self.path):
|
214
|
+
modified_files.append(('FOLDER', FILE_STATUS_NEW_REMOTE, self.path, self.result_folder))
|
215
|
+
|
216
|
+
# list local files
|
217
|
+
if os.path.exists(self.path):
|
218
|
+
for local_file in os.listdir(self.path):
|
219
|
+
if local_file.startswith('.gs_'):
|
220
|
+
continue
|
221
|
+
local_file_path = os.path.join(self.path, local_file)
|
222
|
+
result_file_on_fs = ResultFileOnFilesystem.from_path(local_file_path)
|
223
|
+
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
224
|
+
|
225
|
+
# list remote files
|
226
|
+
if self.result_folder:
|
227
|
+
for result_file in self.result_folder.get_fields():
|
228
|
+
result_file_path = os.path.join(self.path, result_file.name)
|
229
|
+
result_file_on_fs = ResultFileOnFilesystem(result_file, result_file_path, self.kind)
|
230
|
+
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
231
|
+
|
232
|
+
return dedupe_modified_files(modified_files)
|
233
|
+
|
234
|
+
|
235
|
+
class SampleOnFilesystem:
|
236
|
+
|
237
|
+
def __init__(self, sample, path):
|
238
|
+
self.sample = sample
|
239
|
+
self.path = path if path[-1] != '/' else path[:-1] # remove trailing slash
|
240
|
+
|
241
|
+
@property
|
242
|
+
def info_filepath(self):
|
243
|
+
return os.path.join(self.path, '.gs_sample')
|
244
|
+
|
245
|
+
def download(self, use_stubs=False, exists_ok=False):
|
246
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
247
|
+
raise ValueError('Sample already exists at path: {}'.format(self.info_filepath))
|
248
|
+
|
249
|
+
# download result folders
|
250
|
+
for result_folder in self.sample.get_result_folders():
|
251
|
+
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
252
|
+
os.makedirs(result_folder_local_path, exist_ok=True)
|
253
|
+
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "sample")\
|
254
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
255
|
+
|
256
|
+
# Write the sample data
|
257
|
+
sample_info = {
|
258
|
+
"uuid": self.sample.uuid
|
259
|
+
}
|
260
|
+
with open(self.info_filepath, 'w') as f:
|
261
|
+
json.dump(sample_info, f)
|
262
|
+
|
263
|
+
def status_is_ok(self):
|
264
|
+
# check for an info file
|
265
|
+
if not os.path.exists(self.info_filepath):
|
266
|
+
return False
|
267
|
+
|
268
|
+
# check that all result folders are downloaded
|
269
|
+
for result_folder in self.sample.get_result_folders():
|
270
|
+
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
271
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "sample")
|
272
|
+
if not result_folder_on_fs.status_is_ok():
|
273
|
+
return False
|
274
|
+
|
275
|
+
return True
|
276
|
+
|
277
|
+
@classmethod
|
278
|
+
def from_path(cls, path):
|
279
|
+
obj = cls(None, path)
|
280
|
+
try:
|
281
|
+
with open(os.path.join(path, '.gs_sample'), 'r') as f:
|
282
|
+
sample_info = json.load(f)
|
283
|
+
obj.sample = sample_from_id(sample_info['uuid'])
|
284
|
+
except FileNotFoundError:
|
285
|
+
pass
|
286
|
+
return obj
|
287
|
+
|
288
|
+
def list_abnormal_objects(self):
|
289
|
+
"""Return a list of files that have been modified.
|
290
|
+
|
291
|
+
This function will return a list of tuples where the first element
|
292
|
+
is the status of the file and the second element is the path to the file.
|
293
|
+
"""
|
294
|
+
modified_files = []
|
295
|
+
if not self.sample:
|
296
|
+
modified_files.append(('SAMPLE', FILE_STATUS_NEW_LOCAL, self.path, None))
|
297
|
+
if not os.path.exists(self.path):
|
298
|
+
modified_files.append(('SAMPLE', FILE_STATUS_NEW_REMOTE, self.path, self.sample))
|
299
|
+
|
300
|
+
# list local folders
|
301
|
+
if os.path.exists(self.path):
|
302
|
+
for local_folder in os.listdir(self.path):
|
303
|
+
local_folder_path = os.path.join(self.path, local_folder)
|
304
|
+
if not os.path.isdir(local_folder_path):
|
305
|
+
continue
|
306
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_folder_path)
|
307
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
308
|
+
|
309
|
+
# list remote folders
|
310
|
+
if self.sample:
|
311
|
+
for result_folder in self.sample.get_result_folders():
|
312
|
+
result_folder_path = os.path.join(self.path, result_folder.name)
|
313
|
+
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "sample")
|
314
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
315
|
+
|
316
|
+
return dedupe_modified_files(modified_files)
|
317
|
+
|
318
|
+
|
319
|
+
class ProjectOnFilesystem:
|
320
|
+
|
321
|
+
def __init__(self, project, path):
|
322
|
+
self.project = project
|
323
|
+
self.path = path
|
324
|
+
|
325
|
+
@property
|
326
|
+
def info_filepath(self):
|
327
|
+
return os.path.join(self.path, '.gs_project')
|
328
|
+
|
329
|
+
def download(self, use_stubs=False, exists_ok=False):
|
330
|
+
if os.path.exists(self.info_filepath) and not exists_ok:
|
331
|
+
raise ValueError('Project already exists at path: {}'.format(self.info_filepath))
|
332
|
+
|
333
|
+
# download samples
|
334
|
+
for sample in self.project.get_samples():
|
335
|
+
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
336
|
+
os.makedirs(sample_local_path, exist_ok=True)
|
337
|
+
SampleOnFilesystem(sample, sample_local_path)\
|
338
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
339
|
+
|
340
|
+
# download project result folders
|
341
|
+
for result_folder in self.project.get_result_folders():
|
342
|
+
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
343
|
+
os.makedirs(result_folder_local_path, exist_ok=True)
|
344
|
+
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "project")\
|
345
|
+
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
346
|
+
|
347
|
+
# Write the project data
|
348
|
+
project_info = {
|
349
|
+
"uuid": self.project.uuid
|
350
|
+
}
|
351
|
+
with open(self.info_filepath, 'w') as f:
|
352
|
+
json.dump(project_info, f)
|
353
|
+
|
354
|
+
def status_is_ok(self):
|
355
|
+
# check for an info file
|
356
|
+
if not os.path.exists(self.info_filepath):
|
357
|
+
return False
|
358
|
+
|
359
|
+
# check that all samples are downloaded
|
360
|
+
for sample in self.project.get_samples():
|
361
|
+
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
362
|
+
sample_on_fs = SampleOnFilesystem.from_path(sample_local_path)
|
363
|
+
if not sample_on_fs.status_is_ok():
|
364
|
+
return False
|
365
|
+
|
366
|
+
# check that all project result folders are downloaded
|
367
|
+
for result_folder in self.project.get_result_folders():
|
368
|
+
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
369
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "project")
|
370
|
+
if not result_folder_on_fs.status_is_ok():
|
371
|
+
return False
|
372
|
+
|
373
|
+
return True
|
374
|
+
|
375
|
+
@classmethod
|
376
|
+
def from_path(cls, path, recursive=False):
|
377
|
+
try:
|
378
|
+
with open(os.path.join(path, '.gs_project'), 'r') as f:
|
379
|
+
project_info = json.load(f)
|
380
|
+
project = project_from_id(project_info['uuid'])
|
381
|
+
return cls(project, path)
|
382
|
+
except FileNotFoundError:
|
383
|
+
if not recursive:
|
384
|
+
raise ValueError('No project found in path or parent directories')
|
385
|
+
updir = os.path.dirname(os.path.abspath(path))
|
386
|
+
if updir == path:
|
387
|
+
raise ValueError('No project found in path or parent directories')
|
388
|
+
return cls.from_path(updir, recursive=recursive)
|
389
|
+
|
390
|
+
def path_from_project_root(self, path):
|
391
|
+
if path[0] == "/":
|
392
|
+
return path.replace(self.path, "")[1:]
|
393
|
+
return path
|
394
|
+
|
395
|
+
def list_abnormal_objects(self):
|
396
|
+
"""Return a list of files that have been modified.
|
397
|
+
|
398
|
+
This function will return a list of tuples where the first element
|
399
|
+
is the status of the file and the second element is the path to the file.
|
400
|
+
"""
|
401
|
+
modified_files = []
|
402
|
+
|
403
|
+
# list remote samples
|
404
|
+
for sample in self.project.get_samples():
|
405
|
+
sample_path = os.path.join(self.path, "sample_results", sample.name)
|
406
|
+
sample_on_fs = SampleOnFilesystem(sample, sample_path)
|
407
|
+
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
408
|
+
|
409
|
+
# list remote project result folders
|
410
|
+
for result_folder in self.project.get_result_folders():
|
411
|
+
result_folder_path = os.path.join(self.path, "project_results", result_folder.name)
|
412
|
+
|
413
|
+
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "project")
|
414
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
415
|
+
|
416
|
+
# list local samples
|
417
|
+
for local_sample in os.listdir(os.path.join(self.path, "sample_results")):
|
418
|
+
local_sample_path = os.path.join(self.path, "sample_results", local_sample)
|
419
|
+
if not os.path.isdir(local_sample_path):
|
420
|
+
continue
|
421
|
+
sample_on_fs = SampleOnFilesystem.from_path(local_sample_path)
|
422
|
+
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
423
|
+
|
424
|
+
# list local project result folders
|
425
|
+
for local_result_folder in os.listdir(os.path.join(self.path, "project_results")):
|
426
|
+
local_result_folder_path = os.path.join(self.path, "project_results", local_result_folder)
|
427
|
+
if not os.path.isdir(local_result_folder_path):
|
428
|
+
continue
|
429
|
+
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_result_folder_path)
|
430
|
+
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
431
|
+
return dedupe_modified_files(modified_files)
|
432
|
+
|
433
|
+
|
434
|
+
|
@@ -0,0 +1,122 @@
|
|
1
|
+
from fuse import FUSE, Operations
|
2
|
+
import os
|
3
|
+
|
4
|
+
|
5
|
+
class GeoSeeqProjectFileSystem(Operations):
|
6
|
+
"""Mount a GeoSeeq project as a filesystem.
|
7
|
+
|
8
|
+
The project will automatically have this directory structure:
|
9
|
+
- <root>/project_results/<project_result_folder_name>/...
|
10
|
+
- <root>/sample_results/<sample_name>/...
|
11
|
+
- <root>/metadata/sample_metadata.csv
|
12
|
+
- <root>/.config/config.json
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, root, project):
|
16
|
+
self.root = root
|
17
|
+
self.project = project
|
18
|
+
|
19
|
+
def access(self, path, mode):
|
20
|
+
pass
|
21
|
+
|
22
|
+
def chmod(self, path, mode):
|
23
|
+
pass
|
24
|
+
|
25
|
+
def chown(self, path, uid, gid):
|
26
|
+
pass
|
27
|
+
|
28
|
+
def getattr(self, path, fh=None):
|
29
|
+
pass
|
30
|
+
|
31
|
+
def readdir(self, path, fh):
|
32
|
+
pass
|
33
|
+
|
34
|
+
def readlink(self, path):
|
35
|
+
pass
|
36
|
+
|
37
|
+
def mknod(self, path, mode, dev):
|
38
|
+
pass
|
39
|
+
|
40
|
+
def rmdir(self, path):
|
41
|
+
pass
|
42
|
+
|
43
|
+
def mkdir(self, path, mode):
|
44
|
+
pass
|
45
|
+
|
46
|
+
def statfs(self, path):
|
47
|
+
pass
|
48
|
+
|
49
|
+
def unlink(self, path):
|
50
|
+
pass
|
51
|
+
|
52
|
+
def symlink(self, name, target):
|
53
|
+
pass
|
54
|
+
|
55
|
+
def rename(self, old, new):
|
56
|
+
pass
|
57
|
+
|
58
|
+
def link(self, target, name):
|
59
|
+
pass
|
60
|
+
|
61
|
+
def utimens(self, path, times=None):
|
62
|
+
pass
|
63
|
+
|
64
|
+
def open(self, path, flags):
|
65
|
+
tkns = path.split('/')
|
66
|
+
if tkns[0] == 'project_results':
|
67
|
+
result_folder_name, result_file_name = tkns[2], '/'.join(tkns[3:])
|
68
|
+
result_folder = self.project.get_result_folder(result_folder_name).get()
|
69
|
+
result_file = result_folder.get_file(result_file_name).get()
|
70
|
+
result_file.download(path)
|
71
|
+
elif tkns[0] == 'sample_results':
|
72
|
+
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
73
|
+
sample = self.project.get_sample(sample_name).get()
|
74
|
+
result_folder = sample.get_result_folder(result_folder_name).get()
|
75
|
+
result_file = result_folder.get_file(result_file_name).get()
|
76
|
+
result_file.download(path)
|
77
|
+
elif tkns[0] == 'metadata':
|
78
|
+
raise NotImplementedError('TODO')
|
79
|
+
|
80
|
+
return os.open(self._full_local_path(path), flags)
|
81
|
+
|
82
|
+
def create(self, path, mode, fi=None):
|
83
|
+
tkns = path.split('/')
|
84
|
+
if tkns[0] == 'project_results':
|
85
|
+
result_name, file_name = tkns[2], '/'.join(tkns[3:])
|
86
|
+
result_folder = self.project.get_result_folder(result_name).idem()
|
87
|
+
result_file = result_folder.get_file(file_name).create()
|
88
|
+
result_file.download(path) # nothing to download at this point
|
89
|
+
elif tkns[0] == 'sample_results':
|
90
|
+
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
91
|
+
sample = self.project.get_sample(sample_name).idem()
|
92
|
+
result_folder = sample.get_result_folder(result_folder_name).idem()
|
93
|
+
result_file = result_folder.get_file(result_file_name).create()
|
94
|
+
result_file.download(path) # nothing to download at this point
|
95
|
+
elif tkns[0] == 'metadata':
|
96
|
+
raise NotImplementedError('TODO')
|
97
|
+
|
98
|
+
def read(self, path, length, offset, fh):
|
99
|
+
os.lseek(fh, offset, os.SEEK_SET)
|
100
|
+
return os.read(fh, length)
|
101
|
+
|
102
|
+
def write(self, path, buf, offset, fh):
|
103
|
+
pass
|
104
|
+
|
105
|
+
def truncate(self, path, length, fh=None):
|
106
|
+
pass
|
107
|
+
|
108
|
+
def flush(self, path, fh):
|
109
|
+
pass
|
110
|
+
|
111
|
+
def release(self, path, fh):
|
112
|
+
pass
|
113
|
+
|
114
|
+
def fsync(self, path, fdatasync, fh):
|
115
|
+
pass
|
116
|
+
|
117
|
+
def _full_local_path(self, partial):
|
118
|
+
if partial.startswith("/"):
|
119
|
+
partial = partial[1:]
|
120
|
+
return os.path.join(self.root, partial)
|
121
|
+
|
122
|
+
|
@@ -6,6 +6,7 @@ import os
|
|
6
6
|
from os.path import basename, getsize, join, isfile, getmtime, dirname
|
7
7
|
from pathlib import Path
|
8
8
|
from tempfile import NamedTemporaryFile
|
9
|
+
from math import ceil
|
9
10
|
|
10
11
|
from geoseeq.utils import download_ftp
|
11
12
|
from geoseeq.constants import FIVE_MB
|
@@ -43,7 +44,7 @@ def _download_resumable(response, filename, total_size_in_bytes, progress_tracke
|
|
43
44
|
target_id = url_to_id(response.url)
|
44
45
|
tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
|
45
46
|
if not tracker.download_started: tracker.start_download(response.url)
|
46
|
-
n_chunks = total_size_in_bytes
|
47
|
+
n_chunks = ceil(total_size_in_bytes / chunk_size)
|
47
48
|
for i in range(n_chunks):
|
48
49
|
bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
|
49
50
|
if tracker.part_has_been_downloaded(i):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: geoseeq
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.3
|
4
4
|
Summary: GeoSeeq command line tools and python API
|
5
5
|
Author: David C. Danko
|
6
6
|
Author-email: "David C. Danko" <dcdanko@biotia.io>
|
@@ -12,6 +12,11 @@ Classifier: Operating System :: OS Independent
|
|
12
12
|
Requires-Python: >=3.8
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
|
+
Requires-Dist: requests
|
16
|
+
Requires-Dist: click
|
17
|
+
Requires-Dist: pandas
|
18
|
+
Requires-Dist: biopython
|
19
|
+
Requires-Dist: tqdm
|
15
20
|
|
16
21
|
# Geoseeq API Client
|
17
22
|
|
@@ -23,6 +23,7 @@ geoseeq.egg-info/PKG-INFO
|
|
23
23
|
geoseeq.egg-info/SOURCES.txt
|
24
24
|
geoseeq.egg-info/dependency_links.txt
|
25
25
|
geoseeq.egg-info/entry_points.txt
|
26
|
+
geoseeq.egg-info/requires.txt
|
26
27
|
geoseeq.egg-info/top_level.txt
|
27
28
|
geoseeq/cli/__init__.py
|
28
29
|
geoseeq/cli/constants.py
|
@@ -57,6 +58,9 @@ geoseeq/contrib/ncbi/api.py
|
|
57
58
|
geoseeq/contrib/ncbi/bioproject.py
|
58
59
|
geoseeq/contrib/ncbi/cli.py
|
59
60
|
geoseeq/contrib/ncbi/setup_logging.py
|
61
|
+
geoseeq/file_system/__init__.py
|
62
|
+
geoseeq/file_system/filesystem_download.py
|
63
|
+
geoseeq/file_system/main.py
|
60
64
|
geoseeq/id_constructors/__init__.py
|
61
65
|
geoseeq/id_constructors/from_blobs.py
|
62
66
|
geoseeq/id_constructors/from_ids.py
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "geoseeq"
|
7
|
-
version = "0.6.
|
7
|
+
version = "0.6.3"
|
8
8
|
authors = [
|
9
9
|
{ name="David C. Danko", email="dcdanko@biotia.io" },
|
10
10
|
]
|
@@ -16,10 +16,17 @@ classifiers = [
|
|
16
16
|
"License :: OSI Approved :: MIT License",
|
17
17
|
"Operating System :: OS Independent",
|
18
18
|
]
|
19
|
+
dependencies = [
|
20
|
+
"requests",
|
21
|
+
"click",
|
22
|
+
"pandas",
|
23
|
+
"biopython",
|
24
|
+
"tqdm",
|
25
|
+
]
|
19
26
|
|
20
27
|
[project.urls]
|
21
28
|
Homepage = "https://github.com/biotia/geoseeq_api_client"
|
22
29
|
Issues = "https://github.com/biotia/geoseeq_api_client/issues"
|
23
30
|
|
24
31
|
[project.scripts]
|
25
|
-
geoseeq = "geoseeq.cli:main"
|
32
|
+
geoseeq = "geoseeq.cli:main"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|