geoseeq 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- geoseeq/cli/main.py +9 -3
- geoseeq/constants.py +1 -1
- geoseeq/organization.py +5 -2
- geoseeq/project.py +9 -6
- geoseeq/result/result_folder.py +4 -6
- geoseeq/sample.py +16 -3
- {geoseeq-0.6.3.dist-info → geoseeq-0.6.5.dist-info}/METADATA +1 -1
- {geoseeq-0.6.3.dist-info → geoseeq-0.6.5.dist-info}/RECORD +12 -15
- {geoseeq-0.6.3.dist-info → geoseeq-0.6.5.dist-info}/WHEEL +1 -1
- geoseeq/file_system/__init__.py +0 -0
- geoseeq/file_system/filesystem_download.py +0 -434
- geoseeq/file_system/main.py +0 -122
- {geoseeq-0.6.3.dist-info → geoseeq-0.6.5.dist-info}/LICENSE +0 -0
- {geoseeq-0.6.3.dist-info → geoseeq-0.6.5.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.6.3.dist-info → geoseeq-0.6.5.dist-info}/top_level.txt +0 -0
geoseeq/cli/main.py
CHANGED
@@ -11,7 +11,7 @@ from .upload import cli_upload, cli_upload_advanced
|
|
11
11
|
from .user import cli_user
|
12
12
|
from .view import cli_view
|
13
13
|
from .search import cli_search
|
14
|
-
|
14
|
+
|
15
15
|
from geoseeq.knex import DEFAULT_ENDPOINT
|
16
16
|
from geoseeq.utils import set_profile
|
17
17
|
from .shared_params.opts_and_args import overwrite_option, yes_option
|
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.5') # remember to update setup
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
@@ -73,7 +73,13 @@ def cli_experimental():
|
|
73
73
|
"""Experimental commands."""
|
74
74
|
pass
|
75
75
|
|
76
|
-
|
76
|
+
|
77
|
+
|
78
|
+
try:
|
79
|
+
from geoseeq.vc.cli import cli_vc
|
80
|
+
cli_experimental.add_command(cli_vc)
|
81
|
+
except (ModuleNotFoundError, ImportError):
|
82
|
+
pass
|
77
83
|
|
78
84
|
@main.command('config')
|
79
85
|
@yes_option
|
geoseeq/constants.py
CHANGED
@@ -2,7 +2,7 @@ from os import environ
|
|
2
2
|
from os.path import join
|
3
3
|
|
4
4
|
FIVE_MB = 5 * (1024 ** 2)
|
5
|
-
FASTQ_MODULE_NAMES = ['short_read::paired_end', 'short_read::single_end', 'long_read::nanopore']
|
5
|
+
FASTQ_MODULE_NAMES = ['short_read::paired_end', 'short_read::single_end', 'long_read::nanopore', 'raw::raw_reads']
|
6
6
|
DEFAULT_ENDPOINT = "https://backend.geoseeq.com"
|
7
7
|
|
8
8
|
CONFIG_FOLDER = environ.get("XDG_CONFIG_HOME", join(environ["HOME"], ".config"))
|
geoseeq/organization.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
|
2
|
-
|
2
|
+
import urllib
|
3
|
+
|
3
4
|
from .project import Project
|
5
|
+
from .remote_object import RemoteObject
|
4
6
|
|
5
7
|
|
6
8
|
class Organization(RemoteObject):
|
@@ -19,7 +21,8 @@ class Organization(RemoteObject):
|
|
19
21
|
self.name = name
|
20
22
|
|
21
23
|
def nested_url(self):
|
22
|
-
|
24
|
+
escaped_name = urllib.parse.quote(self.name, safe="")
|
25
|
+
return f'nested/{escaped_name}'
|
23
26
|
|
24
27
|
def _save(self):
|
25
28
|
data = {
|
geoseeq/project.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
|
1
|
+
import logging
|
2
|
+
import urllib
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
from .pipeline import Pipeline
|
2
7
|
from .remote_object import RemoteObject
|
8
|
+
from .result import ProjectResultFolder
|
3
9
|
from .sample import Sample
|
4
10
|
from .utils import paginated_iterator
|
5
|
-
from .pipeline import Pipeline
|
6
|
-
import json
|
7
|
-
import pandas as pd
|
8
|
-
import logging
|
9
11
|
|
10
12
|
logger = logging.getLogger("geoseeq_api")
|
11
13
|
|
@@ -72,7 +74,8 @@ class Project(RemoteObject):
|
|
72
74
|
return data
|
73
75
|
|
74
76
|
def nested_url(self):
|
75
|
-
|
77
|
+
escaped_name = urllib.parse.quote(self.name, safe="")
|
78
|
+
return self.org.nested_url() + f"/sample_groups/{escaped_name}"
|
76
79
|
|
77
80
|
def _save_group_obj(self):
|
78
81
|
data = self.get_post_data()
|
geoseeq/result/result_folder.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1
|
-
import json
|
2
|
-
import logging
|
3
1
|
import os
|
4
|
-
import
|
5
|
-
import
|
6
|
-
from os.path import basename, getsize, join, isfile, isdir, dirname
|
2
|
+
import urllib
|
3
|
+
from os.path import basename, dirname, getsize, isdir, isfile, join
|
7
4
|
from pathlib import Path
|
8
5
|
from tempfile import NamedTemporaryFile
|
9
6
|
|
@@ -155,7 +152,8 @@ class SampleResultFolder(ResultFolder, SampleBioInfoFolder):
|
|
155
152
|
self.is_private = is_private
|
156
153
|
|
157
154
|
def nested_url(self):
|
158
|
-
|
155
|
+
escaped_name = urllib.parse.quote(self.module_name, safe="")
|
156
|
+
return self.sample.nested_url() + f"/analysis_results/{escaped_name}"
|
159
157
|
|
160
158
|
def _save(self):
|
161
159
|
data = {field: getattr(self, field) for field in self.remote_fields if hasattr(self, field)}
|
geoseeq/sample.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
|
1
|
+
import urllib
|
2
|
+
|
2
3
|
from .remote_object import RemoteObject
|
4
|
+
from .result import SampleResultFile, SampleResultFolder
|
3
5
|
|
4
6
|
|
5
7
|
class Sample(RemoteObject):
|
@@ -33,7 +35,8 @@ class Sample(RemoteObject):
|
|
33
35
|
return f'brn:{self.knex.instance_code()}:sample:{self.uuid}'
|
34
36
|
|
35
37
|
def nested_url(self):
|
36
|
-
|
38
|
+
escaped_name = urllib.parse.quote(self.name, safe="")
|
39
|
+
return self.lib.nested_url() + f"/samples/{escaped_name}"
|
37
40
|
|
38
41
|
def change_library(self, new_lib):
|
39
42
|
self.new_lib = new_lib
|
@@ -212,9 +215,19 @@ class Sample(RemoteObject):
|
|
212
215
|
)
|
213
216
|
else:
|
214
217
|
files[read_type][folder_name].append(
|
215
|
-
self._grn_to_file(file_grn
|
218
|
+
self._grn_to_file(file_grn)
|
216
219
|
)
|
217
220
|
return files
|
221
|
+
|
222
|
+
def get_one_fasta(self):
|
223
|
+
"""Return a 2-ple, a fasta ResultFile and a string with the read type.
|
224
|
+
|
225
|
+
Does not download the file.
|
226
|
+
"""
|
227
|
+
url = f"data/samples/{self.uuid}/one-fasta"
|
228
|
+
blob = self.knex.get(url)
|
229
|
+
file = self._grn_to_file(blob["grn"])
|
230
|
+
return file, blob["read_type"]
|
218
231
|
|
219
232
|
def __str__(self):
|
220
233
|
return f"<Geoseeq::Sample {self.name} {self.uuid} />"
|
@@ -2,14 +2,14 @@ geoseeq/__init__.py,sha256=4_5QIXLt29gDwxjDLtyJY6y5Yi5p0usvKnAuJkkmECY,946
|
|
2
2
|
geoseeq/app.py,sha256=Y6d1UzxFLfE3RNccATbFCVi6kH3eFmzwoUbeR2Ry09A,2387
|
3
3
|
geoseeq/blob_constructors.py,sha256=AkWpDQY0EdGMxF1p6eRspyHKubcUdiW4it-_Q7S2QWk,188
|
4
4
|
geoseeq/bulk_creators.py,sha256=pdn-Dv7yv5SFv-PfDuQbuOnw2W4-BfIfRJVRAhM8U6s,2115
|
5
|
-
geoseeq/constants.py,sha256=
|
5
|
+
geoseeq/constants.py,sha256=z_ninEd7WsS5DaLntdR-sqAFib6Ie22jlhPKzLvLerw,449
|
6
6
|
geoseeq/file_system_cache.py,sha256=HzVZWtwLD2fjWWSo_UfWmGeBltm9He4lP_OqzKwNGWg,4138
|
7
7
|
geoseeq/knex.py,sha256=SlK3Z9Y51APecIeJep4eNvFqlwKpQzvtokBnKe0L5Oc,7965
|
8
|
-
geoseeq/organization.py,sha256=
|
8
|
+
geoseeq/organization.py,sha256=bJkYL8_D-k6IYAaii2ZbxjwYnXy6lvu6iLXscxKlA3w,2542
|
9
9
|
geoseeq/pipeline.py,sha256=89mhWaecsKnm6tyRkdkaVp4dmZh62_v42Ze0oXf8OTY,9873
|
10
|
-
geoseeq/project.py,sha256
|
10
|
+
geoseeq/project.py,sha256=pVx4etzkYmYAYwcPJsjN9PrI-7GZEkAaz2Q5GFdng1s,13810
|
11
11
|
geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
|
12
|
-
geoseeq/sample.py,sha256=
|
12
|
+
geoseeq/sample.py,sha256=KkN9fUTkjTIvozR5y2pS9c6rt4jwHN2Bap38dU_Lrck,8391
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
14
|
geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
|
22
22
|
geoseeq/cli/download.py,sha256=QTNA7qFjCdRJg2vKbAm5yH8WGlcF5fb5bSjm5QiI4XE,17768
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=6ctfMtfT9-V5rUquEqeY39xyPrIeelCQw_8WmHep18I,3898
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
@@ -48,9 +48,6 @@ geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1
|
|
48
48
|
geoseeq/contrib/ncbi/bioproject.py,sha256=_oThTd_iLDOC8cLOlJKAatSr362OBYZCEV3YrqodhFg,4341
|
49
49
|
geoseeq/contrib/ncbi/cli.py,sha256=j9zEcaZPTryK3a4xluRxigcJKDhRpRxbp3KZSx-Bfhk,2400
|
50
50
|
geoseeq/contrib/ncbi/setup_logging.py,sha256=Tp1bY1U0f-o739aHpvVYriG2qdd1lFvCYBXZeXQgt-w,175
|
51
|
-
geoseeq/file_system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
|
-
geoseeq/file_system/filesystem_download.py,sha256=8bcnxjWltekmCvb5N0b1guBIjLp4-CL2VtsEok-snv4,16963
|
53
|
-
geoseeq/file_system/main.py,sha256=4HgYGq7WhlF96JlVIf16iFBTDujlBpxImmtoh4VCzDA,3627
|
54
51
|
geoseeq/id_constructors/__init__.py,sha256=w5E0PNQ9UuAxBeZbDI7KBnUoERd85gGz3nScz45bd2o,126
|
55
52
|
geoseeq/id_constructors/from_blobs.py,sha256=aj7M7NRpKGs3u3xUvuFJwmJdFeIcJPmaI2_bhwbFfEs,5702
|
56
53
|
geoseeq/id_constructors/from_ids.py,sha256=bbAJX4LnuN70v9bny6N-jAwOudb2-ztHvlMBgRuSDz0,3151
|
@@ -72,7 +69,7 @@ geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08
|
|
72
69
|
geoseeq/result/file_download.py,sha256=KalIkwBbFI8xRpbhToixfd1KMAu_0FYwxdKq146NAHw,7832
|
73
70
|
geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
|
74
71
|
geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
|
75
|
-
geoseeq/result/result_folder.py,sha256
|
72
|
+
geoseeq/result/result_folder.py,sha256=-m1lDVLpNHKy-JUGihboVzvdMJEnHossyRnxmBe1XLo,11140
|
76
73
|
geoseeq/result/resumable_download_tracker.py,sha256=YEzqHBBnE7L3XokTvlTAhHZ8TcDTIE_pyTQ7YadOfbU,3667
|
77
74
|
geoseeq/result/resumable_upload_tracker.py,sha256=2aI09gYz2yw63jEXqs8lmCRKQ79TIc3YuPETvP0Jeek,3811
|
78
75
|
geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
|
@@ -88,9 +85,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
88
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
89
86
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
90
87
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
91
|
-
geoseeq-0.6.
|
92
|
-
geoseeq-0.6.
|
93
|
-
geoseeq-0.6.
|
94
|
-
geoseeq-0.6.
|
95
|
-
geoseeq-0.6.
|
96
|
-
geoseeq-0.6.
|
88
|
+
geoseeq-0.6.5.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
89
|
+
geoseeq-0.6.5.dist-info/METADATA,sha256=C8OwRXrQg0qp_R7g13Y4qheE5XaoFE6a_n3X471ui_k,4915
|
90
|
+
geoseeq-0.6.5.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
91
|
+
geoseeq-0.6.5.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
92
|
+
geoseeq-0.6.5.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
93
|
+
geoseeq-0.6.5.dist-info/RECORD,,
|
geoseeq/file_system/__init__.py
DELETED
File without changes
|
@@ -1,434 +0,0 @@
|
|
1
|
-
|
2
|
-
import os
|
3
|
-
import json
|
4
|
-
from geoseeq import (
|
5
|
-
result_file_from_id,
|
6
|
-
result_folder_from_id,
|
7
|
-
sample_from_id,
|
8
|
-
project_from_id,
|
9
|
-
)
|
10
|
-
from geoseeq.utils import md5_checksum
|
11
|
-
from time import time
|
12
|
-
|
13
|
-
FILE_STATUS_MODIFIED_REMOTE = 'MODIFIED_REMOTE'
|
14
|
-
FILE_STATUS_MODIFIED_LOCAL = 'MODIFIED_LOCAL'
|
15
|
-
FILE_STATUS_NEW_LOCAL = 'NEW_LOCAL'
|
16
|
-
FILE_STATUS_NEW_REMOTE = 'NEW_REMOTE'
|
17
|
-
FILE_STATUS_IS_LOCAL_STUB = 'IS_LOCAL_STUB'
|
18
|
-
|
19
|
-
|
20
|
-
def dedupe_modified_files(modified_files):
|
21
|
-
"""Remove duplicates from a list of modified files.
|
22
|
-
|
23
|
-
This function will remove duplicates from a list of modified files
|
24
|
-
based on the path to the file. The first instance of the file will be
|
25
|
-
kept and all others will be removed.
|
26
|
-
"""
|
27
|
-
seen = set()
|
28
|
-
deduped = []
|
29
|
-
for x in modified_files:
|
30
|
-
if x[2] not in seen:
|
31
|
-
deduped.append(x)
|
32
|
-
seen.add(x[2])
|
33
|
-
return deduped
|
34
|
-
|
35
|
-
|
36
|
-
class ResultFileOnFilesystem:
|
37
|
-
"""
|
38
|
-
|
39
|
-
Note: unlike other filesystem classes the `path` is a file, not
|
40
|
-
a directory. This is because the file is downloaded directly to
|
41
|
-
the path.
|
42
|
-
"""
|
43
|
-
|
44
|
-
def __init__(self, result_file, path, kind):
|
45
|
-
self.result_file = result_file
|
46
|
-
self.path = path
|
47
|
-
self.kind = kind
|
48
|
-
|
49
|
-
@property
|
50
|
-
def info_filepath(self):
|
51
|
-
dirpath = os.path.dirname(self.path)
|
52
|
-
basename = os.path.basename(self.path)
|
53
|
-
return os.path.join(dirpath, f'.gs_result_file__{basename}')
|
54
|
-
|
55
|
-
@property
|
56
|
-
def is_stub(self):
|
57
|
-
return os.path.exists(self.path) and os.path.getsize(self.path) == 0
|
58
|
-
|
59
|
-
def file_is_ok(self, stubs_are_ok=False):
|
60
|
-
if self.is_stub:
|
61
|
-
return stubs_are_ok
|
62
|
-
return self.result_file.download_needs_update(self.path)
|
63
|
-
|
64
|
-
def download(self, use_stubs=False, exists_ok=False):
|
65
|
-
if os.path.exists(self.info_filepath):
|
66
|
-
if exists_ok and self.file_is_ok(stubs_are_ok=use_stubs):
|
67
|
-
return
|
68
|
-
elif not exists_ok:
|
69
|
-
raise ValueError('Result file already exists at path: {}'.format(self.info_filepath))
|
70
|
-
|
71
|
-
# Download the file
|
72
|
-
if use_stubs:
|
73
|
-
open(self.path, 'w').close()
|
74
|
-
else:
|
75
|
-
self.result_file.download(self.path)
|
76
|
-
|
77
|
-
self.write_info_file()
|
78
|
-
|
79
|
-
def local_file_checksum(self):
|
80
|
-
if self.is_stub:
|
81
|
-
return "__STUB__"
|
82
|
-
return md5_checksum(self.path)
|
83
|
-
|
84
|
-
def locally_modified(self):
|
85
|
-
raise NotImplementedError('This function is not implemented')
|
86
|
-
|
87
|
-
def status_is_ok(self, stubs_are_ok=False):
|
88
|
-
# check for an info file
|
89
|
-
if not os.path.exists(self.info_filepath):
|
90
|
-
return False
|
91
|
-
if stubs_are_ok:
|
92
|
-
return True
|
93
|
-
return not self.result_file.download_needs_update(self.path)
|
94
|
-
|
95
|
-
def write_info_file(self):
|
96
|
-
result_file_info = {
|
97
|
-
"uuid": self.result_file.uuid,
|
98
|
-
"kind": self.kind,
|
99
|
-
"checksum": self.local_file_checksum(),
|
100
|
-
}
|
101
|
-
with open(self.info_filepath, 'w') as f:
|
102
|
-
json.dump(result_file_info, f)
|
103
|
-
|
104
|
-
@classmethod
|
105
|
-
def from_path(cls, path):
|
106
|
-
obj = cls(None, path, None)
|
107
|
-
try:
|
108
|
-
with open(obj.info_filepath, 'r') as f:
|
109
|
-
result_file_info = json.load(f)
|
110
|
-
obj.result_file = result_file_from_id(result_file_info['uuid'])
|
111
|
-
obj.kind = result_file_info['kind']
|
112
|
-
obj.stored_checksum = result_file_info['checksum']
|
113
|
-
except FileNotFoundError:
|
114
|
-
pass
|
115
|
-
return obj
|
116
|
-
|
117
|
-
def write_info_file(self):
|
118
|
-
result_file_info = {
|
119
|
-
"uuid": self.result_file.uuid,
|
120
|
-
"kind": self.kind,
|
121
|
-
"checksum": self.local_file_checksum(),
|
122
|
-
}
|
123
|
-
with open(self.info_filepath, 'w') as f:
|
124
|
-
json.dump(result_file_info, f)
|
125
|
-
|
126
|
-
def list_abnormal_objects(self):
|
127
|
-
"""Return a list of files that have been modified.
|
128
|
-
|
129
|
-
Since this class is a single file the list will either be empty
|
130
|
-
or have one element.
|
131
|
-
|
132
|
-
Note that if a file was modified locally then uploaded to the server
|
133
|
-
the file will be marked as modified remote.
|
134
|
-
"""
|
135
|
-
if self.result_file is None:
|
136
|
-
return [('FILE', FILE_STATUS_NEW_LOCAL, self.path, None)]
|
137
|
-
if not os.path.exists(self.path):
|
138
|
-
return [('FILE', FILE_STATUS_NEW_REMOTE, self.path, self.result_file)]
|
139
|
-
if self.is_stub:
|
140
|
-
return [('FILE', FILE_STATUS_IS_LOCAL_STUB, self.path, self.result_file)]
|
141
|
-
if self.result_file and self.result_file.download_needs_update(self.path):
|
142
|
-
return [('FILE', FILE_STATUS_MODIFIED_REMOTE, self.path, self.result_file)]
|
143
|
-
if self.locally_modified():
|
144
|
-
return [('FILE', FILE_STATUS_MODIFIED_LOCAL, self.path, self.result_file)]
|
145
|
-
|
146
|
-
return []
|
147
|
-
|
148
|
-
|
149
|
-
class ResultFolderOnFilesystem:
|
150
|
-
|
151
|
-
def __init__(self, result_folder, path, kind):
|
152
|
-
self.result_folder = result_folder
|
153
|
-
self.path = path
|
154
|
-
self.kind = kind
|
155
|
-
|
156
|
-
@property
|
157
|
-
def info_filepath(self):
|
158
|
-
return os.path.join(self.path, '.gs_result_folder')
|
159
|
-
|
160
|
-
def download(self, use_stubs=False, exists_ok=False):
|
161
|
-
if os.path.exists(self.info_filepath) and not exists_ok:
|
162
|
-
raise ValueError('Result folder already exists at path: {}'.format(self.info_filepath))
|
163
|
-
|
164
|
-
# Download the files in the result folder
|
165
|
-
for result_file in self.result_folder.get_fields():
|
166
|
-
result_file_local_path = os.path.join(self.path, result_file.name)
|
167
|
-
os.makedirs(os.path.dirname(result_file_local_path), exist_ok=True)
|
168
|
-
ResultFileOnFilesystem(result_file, result_file_local_path, self.kind)\
|
169
|
-
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
170
|
-
|
171
|
-
# Write the result folder data
|
172
|
-
result_folder_info = {
|
173
|
-
"uuid": self.result_folder.uuid,
|
174
|
-
"kind": self.kind
|
175
|
-
}
|
176
|
-
with open(self.info_filepath, 'w') as f:
|
177
|
-
json.dump(result_folder_info, f)
|
178
|
-
|
179
|
-
def status_is_ok(self):
|
180
|
-
# check for an info file
|
181
|
-
if not os.path.exists(self.info_filepath):
|
182
|
-
return False
|
183
|
-
|
184
|
-
# check that all files are downloaded
|
185
|
-
for result_file in self.result_folder.get_files():
|
186
|
-
result_file_path = os.path.join(self.path, result_file.name)
|
187
|
-
if not os.path.exists(result_file_path):
|
188
|
-
return False
|
189
|
-
|
190
|
-
return True
|
191
|
-
|
192
|
-
@classmethod
|
193
|
-
def from_path(cls, path):
|
194
|
-
obj = cls(None, path, None)
|
195
|
-
try:
|
196
|
-
with open(os.path.join(path, '.gs_result_folder'), 'r') as f:
|
197
|
-
result_folder_info = json.load(f)
|
198
|
-
obj.result_folder = result_folder_from_id(result_folder_info['uuid'])
|
199
|
-
obj.kind = result_folder_info['kind']
|
200
|
-
except FileNotFoundError:
|
201
|
-
pass
|
202
|
-
return obj
|
203
|
-
|
204
|
-
def list_abnormal_objects(self):
|
205
|
-
"""Return a list of files that have been modified.
|
206
|
-
|
207
|
-
This function will return a list of tuples where the first element
|
208
|
-
is the status of the file and the second element is the path to the file.
|
209
|
-
"""
|
210
|
-
modified_files = []
|
211
|
-
if not self.result_folder:
|
212
|
-
modified_files.append(('FOLDER', FILE_STATUS_NEW_LOCAL, self.path, None))
|
213
|
-
if not os.path.exists(self.path):
|
214
|
-
modified_files.append(('FOLDER', FILE_STATUS_NEW_REMOTE, self.path, self.result_folder))
|
215
|
-
|
216
|
-
# list local files
|
217
|
-
if os.path.exists(self.path):
|
218
|
-
for local_file in os.listdir(self.path):
|
219
|
-
if local_file.startswith('.gs_'):
|
220
|
-
continue
|
221
|
-
local_file_path = os.path.join(self.path, local_file)
|
222
|
-
result_file_on_fs = ResultFileOnFilesystem.from_path(local_file_path)
|
223
|
-
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
224
|
-
|
225
|
-
# list remote files
|
226
|
-
if self.result_folder:
|
227
|
-
for result_file in self.result_folder.get_fields():
|
228
|
-
result_file_path = os.path.join(self.path, result_file.name)
|
229
|
-
result_file_on_fs = ResultFileOnFilesystem(result_file, result_file_path, self.kind)
|
230
|
-
modified_files.extend(result_file_on_fs.list_abnormal_objects())
|
231
|
-
|
232
|
-
return dedupe_modified_files(modified_files)
|
233
|
-
|
234
|
-
|
235
|
-
class SampleOnFilesystem:
|
236
|
-
|
237
|
-
def __init__(self, sample, path):
|
238
|
-
self.sample = sample
|
239
|
-
self.path = path if path[-1] != '/' else path[:-1] # remove trailing slash
|
240
|
-
|
241
|
-
@property
|
242
|
-
def info_filepath(self):
|
243
|
-
return os.path.join(self.path, '.gs_sample')
|
244
|
-
|
245
|
-
def download(self, use_stubs=False, exists_ok=False):
|
246
|
-
if os.path.exists(self.info_filepath) and not exists_ok:
|
247
|
-
raise ValueError('Sample already exists at path: {}'.format(self.info_filepath))
|
248
|
-
|
249
|
-
# download result folders
|
250
|
-
for result_folder in self.sample.get_result_folders():
|
251
|
-
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
252
|
-
os.makedirs(result_folder_local_path, exist_ok=True)
|
253
|
-
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "sample")\
|
254
|
-
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
255
|
-
|
256
|
-
# Write the sample data
|
257
|
-
sample_info = {
|
258
|
-
"uuid": self.sample.uuid
|
259
|
-
}
|
260
|
-
with open(self.info_filepath, 'w') as f:
|
261
|
-
json.dump(sample_info, f)
|
262
|
-
|
263
|
-
def status_is_ok(self):
|
264
|
-
# check for an info file
|
265
|
-
if not os.path.exists(self.info_filepath):
|
266
|
-
return False
|
267
|
-
|
268
|
-
# check that all result folders are downloaded
|
269
|
-
for result_folder in self.sample.get_result_folders():
|
270
|
-
result_folder_local_path = os.path.join(self.path, result_folder.name)
|
271
|
-
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "sample")
|
272
|
-
if not result_folder_on_fs.status_is_ok():
|
273
|
-
return False
|
274
|
-
|
275
|
-
return True
|
276
|
-
|
277
|
-
@classmethod
|
278
|
-
def from_path(cls, path):
|
279
|
-
obj = cls(None, path)
|
280
|
-
try:
|
281
|
-
with open(os.path.join(path, '.gs_sample'), 'r') as f:
|
282
|
-
sample_info = json.load(f)
|
283
|
-
obj.sample = sample_from_id(sample_info['uuid'])
|
284
|
-
except FileNotFoundError:
|
285
|
-
pass
|
286
|
-
return obj
|
287
|
-
|
288
|
-
def list_abnormal_objects(self):
|
289
|
-
"""Return a list of files that have been modified.
|
290
|
-
|
291
|
-
This function will return a list of tuples where the first element
|
292
|
-
is the status of the file and the second element is the path to the file.
|
293
|
-
"""
|
294
|
-
modified_files = []
|
295
|
-
if not self.sample:
|
296
|
-
modified_files.append(('SAMPLE', FILE_STATUS_NEW_LOCAL, self.path, None))
|
297
|
-
if not os.path.exists(self.path):
|
298
|
-
modified_files.append(('SAMPLE', FILE_STATUS_NEW_REMOTE, self.path, self.sample))
|
299
|
-
|
300
|
-
# list local folders
|
301
|
-
if os.path.exists(self.path):
|
302
|
-
for local_folder in os.listdir(self.path):
|
303
|
-
local_folder_path = os.path.join(self.path, local_folder)
|
304
|
-
if not os.path.isdir(local_folder_path):
|
305
|
-
continue
|
306
|
-
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_folder_path)
|
307
|
-
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
308
|
-
|
309
|
-
# list remote folders
|
310
|
-
if self.sample:
|
311
|
-
for result_folder in self.sample.get_result_folders():
|
312
|
-
result_folder_path = os.path.join(self.path, result_folder.name)
|
313
|
-
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "sample")
|
314
|
-
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
315
|
-
|
316
|
-
return dedupe_modified_files(modified_files)
|
317
|
-
|
318
|
-
|
319
|
-
class ProjectOnFilesystem:
|
320
|
-
|
321
|
-
def __init__(self, project, path):
|
322
|
-
self.project = project
|
323
|
-
self.path = path
|
324
|
-
|
325
|
-
@property
|
326
|
-
def info_filepath(self):
|
327
|
-
return os.path.join(self.path, '.gs_project')
|
328
|
-
|
329
|
-
def download(self, use_stubs=False, exists_ok=False):
|
330
|
-
if os.path.exists(self.info_filepath) and not exists_ok:
|
331
|
-
raise ValueError('Project already exists at path: {}'.format(self.info_filepath))
|
332
|
-
|
333
|
-
# download samples
|
334
|
-
for sample in self.project.get_samples():
|
335
|
-
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
336
|
-
os.makedirs(sample_local_path, exist_ok=True)
|
337
|
-
SampleOnFilesystem(sample, sample_local_path)\
|
338
|
-
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
339
|
-
|
340
|
-
# download project result folders
|
341
|
-
for result_folder in self.project.get_result_folders():
|
342
|
-
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
343
|
-
os.makedirs(result_folder_local_path, exist_ok=True)
|
344
|
-
ResultFolderOnFilesystem(result_folder, result_folder_local_path, "project")\
|
345
|
-
.download(use_stubs=use_stubs, exists_ok=exists_ok)
|
346
|
-
|
347
|
-
# Write the project data
|
348
|
-
project_info = {
|
349
|
-
"uuid": self.project.uuid
|
350
|
-
}
|
351
|
-
with open(self.info_filepath, 'w') as f:
|
352
|
-
json.dump(project_info, f)
|
353
|
-
|
354
|
-
def status_is_ok(self):
|
355
|
-
# check for an info file
|
356
|
-
if not os.path.exists(self.info_filepath):
|
357
|
-
return False
|
358
|
-
|
359
|
-
# check that all samples are downloaded
|
360
|
-
for sample in self.project.get_samples():
|
361
|
-
sample_local_path = os.path.join(self.path, "sample_results", sample.name)
|
362
|
-
sample_on_fs = SampleOnFilesystem.from_path(sample_local_path)
|
363
|
-
if not sample_on_fs.status_is_ok():
|
364
|
-
return False
|
365
|
-
|
366
|
-
# check that all project result folders are downloaded
|
367
|
-
for result_folder in self.project.get_result_folders():
|
368
|
-
result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
|
369
|
-
result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "project")
|
370
|
-
if not result_folder_on_fs.status_is_ok():
|
371
|
-
return False
|
372
|
-
|
373
|
-
return True
|
374
|
-
|
375
|
-
@classmethod
|
376
|
-
def from_path(cls, path, recursive=False):
|
377
|
-
try:
|
378
|
-
with open(os.path.join(path, '.gs_project'), 'r') as f:
|
379
|
-
project_info = json.load(f)
|
380
|
-
project = project_from_id(project_info['uuid'])
|
381
|
-
return cls(project, path)
|
382
|
-
except FileNotFoundError:
|
383
|
-
if not recursive:
|
384
|
-
raise ValueError('No project found in path or parent directories')
|
385
|
-
updir = os.path.dirname(os.path.abspath(path))
|
386
|
-
if updir == path:
|
387
|
-
raise ValueError('No project found in path or parent directories')
|
388
|
-
return cls.from_path(updir, recursive=recursive)
|
389
|
-
|
390
|
-
def path_from_project_root(self, path):
|
391
|
-
if path[0] == "/":
|
392
|
-
return path.replace(self.path, "")[1:]
|
393
|
-
return path
|
394
|
-
|
395
|
-
def list_abnormal_objects(self):
|
396
|
-
"""Return a list of files that have been modified.
|
397
|
-
|
398
|
-
This function will return a list of tuples where the first element
|
399
|
-
is the status of the file and the second element is the path to the file.
|
400
|
-
"""
|
401
|
-
modified_files = []
|
402
|
-
|
403
|
-
# list remote samples
|
404
|
-
for sample in self.project.get_samples():
|
405
|
-
sample_path = os.path.join(self.path, "sample_results", sample.name)
|
406
|
-
sample_on_fs = SampleOnFilesystem(sample, sample_path)
|
407
|
-
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
408
|
-
|
409
|
-
# list remote project result folders
|
410
|
-
for result_folder in self.project.get_result_folders():
|
411
|
-
result_folder_path = os.path.join(self.path, "project_results", result_folder.name)
|
412
|
-
|
413
|
-
result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "project")
|
414
|
-
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
415
|
-
|
416
|
-
# list local samples
|
417
|
-
for local_sample in os.listdir(os.path.join(self.path, "sample_results")):
|
418
|
-
local_sample_path = os.path.join(self.path, "sample_results", local_sample)
|
419
|
-
if not os.path.isdir(local_sample_path):
|
420
|
-
continue
|
421
|
-
sample_on_fs = SampleOnFilesystem.from_path(local_sample_path)
|
422
|
-
modified_files.extend(sample_on_fs.list_abnormal_objects())
|
423
|
-
|
424
|
-
# list local project result folders
|
425
|
-
for local_result_folder in os.listdir(os.path.join(self.path, "project_results")):
|
426
|
-
local_result_folder_path = os.path.join(self.path, "project_results", local_result_folder)
|
427
|
-
if not os.path.isdir(local_result_folder_path):
|
428
|
-
continue
|
429
|
-
result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_result_folder_path)
|
430
|
-
modified_files.extend(result_folder_on_fs.list_abnormal_objects())
|
431
|
-
return dedupe_modified_files(modified_files)
|
432
|
-
|
433
|
-
|
434
|
-
|
geoseeq/file_system/main.py
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
from fuse import FUSE, Operations
|
2
|
-
import os
|
3
|
-
|
4
|
-
|
5
|
-
class GeoSeeqProjectFileSystem(Operations):
|
6
|
-
"""Mount a GeoSeeq project as a filesystem.
|
7
|
-
|
8
|
-
The project will automatically have this directory structure:
|
9
|
-
- <root>/project_results/<project_result_folder_name>/...
|
10
|
-
- <root>/sample_results/<sample_name>/...
|
11
|
-
- <root>/metadata/sample_metadata.csv
|
12
|
-
- <root>/.config/config.json
|
13
|
-
"""
|
14
|
-
|
15
|
-
def __init__(self, root, project):
|
16
|
-
self.root = root
|
17
|
-
self.project = project
|
18
|
-
|
19
|
-
def access(self, path, mode):
|
20
|
-
pass
|
21
|
-
|
22
|
-
def chmod(self, path, mode):
|
23
|
-
pass
|
24
|
-
|
25
|
-
def chown(self, path, uid, gid):
|
26
|
-
pass
|
27
|
-
|
28
|
-
def getattr(self, path, fh=None):
|
29
|
-
pass
|
30
|
-
|
31
|
-
def readdir(self, path, fh):
|
32
|
-
pass
|
33
|
-
|
34
|
-
def readlink(self, path):
|
35
|
-
pass
|
36
|
-
|
37
|
-
def mknod(self, path, mode, dev):
|
38
|
-
pass
|
39
|
-
|
40
|
-
def rmdir(self, path):
|
41
|
-
pass
|
42
|
-
|
43
|
-
def mkdir(self, path, mode):
|
44
|
-
pass
|
45
|
-
|
46
|
-
def statfs(self, path):
|
47
|
-
pass
|
48
|
-
|
49
|
-
def unlink(self, path):
|
50
|
-
pass
|
51
|
-
|
52
|
-
def symlink(self, name, target):
|
53
|
-
pass
|
54
|
-
|
55
|
-
def rename(self, old, new):
|
56
|
-
pass
|
57
|
-
|
58
|
-
def link(self, target, name):
|
59
|
-
pass
|
60
|
-
|
61
|
-
def utimens(self, path, times=None):
|
62
|
-
pass
|
63
|
-
|
64
|
-
def open(self, path, flags):
|
65
|
-
tkns = path.split('/')
|
66
|
-
if tkns[0] == 'project_results':
|
67
|
-
result_folder_name, result_file_name = tkns[2], '/'.join(tkns[3:])
|
68
|
-
result_folder = self.project.get_result_folder(result_folder_name).get()
|
69
|
-
result_file = result_folder.get_file(result_file_name).get()
|
70
|
-
result_file.download(path)
|
71
|
-
elif tkns[0] == 'sample_results':
|
72
|
-
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
73
|
-
sample = self.project.get_sample(sample_name).get()
|
74
|
-
result_folder = sample.get_result_folder(result_folder_name).get()
|
75
|
-
result_file = result_folder.get_file(result_file_name).get()
|
76
|
-
result_file.download(path)
|
77
|
-
elif tkns[0] == 'metadata':
|
78
|
-
raise NotImplementedError('TODO')
|
79
|
-
|
80
|
-
return os.open(self._full_local_path(path), flags)
|
81
|
-
|
82
|
-
def create(self, path, mode, fi=None):
|
83
|
-
tkns = path.split('/')
|
84
|
-
if tkns[0] == 'project_results':
|
85
|
-
result_name, file_name = tkns[2], '/'.join(tkns[3:])
|
86
|
-
result_folder = self.project.get_result_folder(result_name).idem()
|
87
|
-
result_file = result_folder.get_file(file_name).create()
|
88
|
-
result_file.download(path) # nothing to download at this point
|
89
|
-
elif tkns[0] == 'sample_results':
|
90
|
-
sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
|
91
|
-
sample = self.project.get_sample(sample_name).idem()
|
92
|
-
result_folder = sample.get_result_folder(result_folder_name).idem()
|
93
|
-
result_file = result_folder.get_file(result_file_name).create()
|
94
|
-
result_file.download(path) # nothing to download at this point
|
95
|
-
elif tkns[0] == 'metadata':
|
96
|
-
raise NotImplementedError('TODO')
|
97
|
-
|
98
|
-
def read(self, path, length, offset, fh):
|
99
|
-
os.lseek(fh, offset, os.SEEK_SET)
|
100
|
-
return os.read(fh, length)
|
101
|
-
|
102
|
-
def write(self, path, buf, offset, fh):
|
103
|
-
pass
|
104
|
-
|
105
|
-
def truncate(self, path, length, fh=None):
|
106
|
-
pass
|
107
|
-
|
108
|
-
def flush(self, path, fh):
|
109
|
-
pass
|
110
|
-
|
111
|
-
def release(self, path, fh):
|
112
|
-
pass
|
113
|
-
|
114
|
-
def fsync(self, path, fdatasync, fh):
|
115
|
-
pass
|
116
|
-
|
117
|
-
def _full_local_path(self, partial):
|
118
|
-
if partial.startswith("/"):
|
119
|
-
partial = partial[1:]
|
120
|
-
return os.path.join(self.root, partial)
|
121
|
-
|
122
|
-
|
File without changes
|
File without changes
|
File without changes
|