geoseeq 0.6.3__py3-none-any.whl → 0.6.4__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
geoseeq/cli/main.py CHANGED
@@ -54,7 +54,7 @@ def version():
54
54
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
55
55
  Run `geoseeq eula show` to view the EULA.
56
56
  """
57
- click.echo('0.6.3') # remember to update setup
57
+ click.echo('0.6.4') # remember to update setup
58
58
 
59
59
 
60
60
  @main.group('advanced')
geoseeq/constants.py CHANGED
@@ -2,7 +2,7 @@ from os import environ
2
2
  from os.path import join
3
3
 
4
4
  FIVE_MB = 5 * (1024 ** 2)
5
- FASTQ_MODULE_NAMES = ['short_read::paired_end', 'short_read::single_end', 'long_read::nanopore']
5
+ FASTQ_MODULE_NAMES = ['short_read::paired_end', 'short_read::single_end', 'long_read::nanopore', 'raw::raw_reads']
6
6
  DEFAULT_ENDPOINT = "https://backend.geoseeq.com"
7
7
 
8
8
  CONFIG_FOLDER = environ.get("XDG_CONFIG_HOME", join(environ["HOME"], ".config"))
geoseeq/organization.py CHANGED
@@ -1,6 +1,8 @@
1
1
 
2
- from .remote_object import RemoteObject
2
+ import urllib
3
+
3
4
  from .project import Project
5
+ from .remote_object import RemoteObject
4
6
 
5
7
 
6
8
  class Organization(RemoteObject):
@@ -19,7 +21,8 @@ class Organization(RemoteObject):
19
21
  self.name = name
20
22
 
21
23
  def nested_url(self):
22
- return f'nested/{self.name}'
24
+ escaped_name = urllib.parse.quote(self.name, safe="")
25
+ return f'nested/{escaped_name}'
23
26
 
24
27
  def _save(self):
25
28
  data = {
geoseeq/project.py CHANGED
@@ -1,11 +1,13 @@
1
- from .result import ProjectResultFolder
1
+ import logging
2
+ import urllib
3
+
4
+ import pandas as pd
5
+
6
+ from .pipeline import Pipeline
2
7
  from .remote_object import RemoteObject
8
+ from .result import ProjectResultFolder
3
9
  from .sample import Sample
4
10
  from .utils import paginated_iterator
5
- from .pipeline import Pipeline
6
- import json
7
- import pandas as pd
8
- import logging
9
11
 
10
12
  logger = logging.getLogger("geoseeq_api")
11
13
 
@@ -72,7 +74,8 @@ class Project(RemoteObject):
72
74
  return data
73
75
 
74
76
  def nested_url(self):
75
- return self.org.nested_url() + f"/sample_groups/{self.name}"
77
+ escaped_name = urllib.parse.quote(self.name, safe="")
78
+ return self.org.nested_url() + f"/sample_groups/{escaped_name}"
76
79
 
77
80
  def _save_group_obj(self):
78
81
  data = self.get_post_data()
@@ -1,9 +1,6 @@
1
- import json
2
- import logging
3
1
  import os
4
- import time
5
- import urllib.request
6
- from os.path import basename, getsize, join, isfile, isdir, dirname
2
+ import urllib
3
+ from os.path import basename, dirname, getsize, isdir, isfile, join
7
4
  from pathlib import Path
8
5
  from tempfile import NamedTemporaryFile
9
6
 
@@ -155,7 +152,8 @@ class SampleResultFolder(ResultFolder, SampleBioInfoFolder):
155
152
  self.is_private = is_private
156
153
 
157
154
  def nested_url(self):
158
- return self.sample.nested_url() + f"/analysis_results/{self.module_name}"
155
+ escaped_name = urllib.parse.quote(self.module_name, safe="")
156
+ return self.sample.nested_url() + f"/analysis_results/{escaped_name}"
159
157
 
160
158
  def _save(self):
161
159
  data = {field: getattr(self, field) for field in self.remote_fields if hasattr(self, field)}
geoseeq/sample.py CHANGED
@@ -1,5 +1,7 @@
1
- from .result import SampleResultFolder, SampleResultFile
1
+ import urllib
2
+
2
3
  from .remote_object import RemoteObject
4
+ from .result import SampleResultFile, SampleResultFolder
3
5
 
4
6
 
5
7
  class Sample(RemoteObject):
@@ -33,7 +35,8 @@ class Sample(RemoteObject):
33
35
  return f'brn:{self.knex.instance_code()}:sample:{self.uuid}'
34
36
 
35
37
  def nested_url(self):
36
- return self.lib.nested_url() + f"/samples/{self.name}"
38
+ escaped_name = urllib.parse.quote(self.name, safe="")
39
+ return self.lib.nested_url() + f"/samples/{escaped_name}"
37
40
 
38
41
  def change_library(self, new_lib):
39
42
  self.new_lib = new_lib
@@ -212,9 +215,19 @@ class Sample(RemoteObject):
212
215
  )
213
216
  else:
214
217
  files[read_type][folder_name].append(
215
- self._grn_to_file(file_grn[0])
218
+ self._grn_to_file(file_grn)
216
219
  )
217
220
  return files
221
+
222
+ def get_one_fasta(self):
223
+ """Return a 2-ple, a fasta ResultFile and a string with the read type.
224
+
225
+ Does not download the file.
226
+ """
227
+ url = f"data/samples/{self.uuid}/one-fasta"
228
+ blob = self.knex.get(url)
229
+ file = self._grn_to_file(blob["grn"])
230
+ return file, blob["read_type"]
218
231
 
219
232
  def __str__(self):
220
233
  return f"<Geoseeq::Sample {self.name} {self.uuid} />"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.3
3
+ Version: 0.6.4
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -2,14 +2,14 @@ geoseeq/__init__.py,sha256=4_5QIXLt29gDwxjDLtyJY6y5Yi5p0usvKnAuJkkmECY,946
2
2
  geoseeq/app.py,sha256=Y6d1UzxFLfE3RNccATbFCVi6kH3eFmzwoUbeR2Ry09A,2387
3
3
  geoseeq/blob_constructors.py,sha256=AkWpDQY0EdGMxF1p6eRspyHKubcUdiW4it-_Q7S2QWk,188
4
4
  geoseeq/bulk_creators.py,sha256=pdn-Dv7yv5SFv-PfDuQbuOnw2W4-BfIfRJVRAhM8U6s,2115
5
- geoseeq/constants.py,sha256=h9RURz4xs2bZyDrSGocej7ANJvRLr_H1H7JRxpNUXJM,431
5
+ geoseeq/constants.py,sha256=z_ninEd7WsS5DaLntdR-sqAFib6Ie22jlhPKzLvLerw,449
6
6
  geoseeq/file_system_cache.py,sha256=HzVZWtwLD2fjWWSo_UfWmGeBltm9He4lP_OqzKwNGWg,4138
7
7
  geoseeq/knex.py,sha256=SlK3Z9Y51APecIeJep4eNvFqlwKpQzvtokBnKe0L5Oc,7965
8
- geoseeq/organization.py,sha256=a9xmGDE0tQsjPJfyFkYnWagxZ8xpdeckkwvkhH6LNIk,2462
8
+ geoseeq/organization.py,sha256=bJkYL8_D-k6IYAaii2ZbxjwYnXy6lvu6iLXscxKlA3w,2542
9
9
  geoseeq/pipeline.py,sha256=89mhWaecsKnm6tyRkdkaVp4dmZh62_v42Ze0oXf8OTY,9873
10
- geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
10
+ geoseeq/project.py,sha256=pVx4etzkYmYAYwcPJsjN9PrI-7GZEkAaz2Q5GFdng1s,13810
11
11
  geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
12
- geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
12
+ geoseeq/sample.py,sha256=KkN9fUTkjTIvozR5y2pS9c6rt4jwHN2Bap38dU_Lrck,8391
13
13
  geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
14
14
  geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
15
15
  geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
22
22
  geoseeq/cli/download.py,sha256=QTNA7qFjCdRJg2vKbAm5yH8WGlcF5fb5bSjm5QiI4XE,17768
23
23
  geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
24
24
  geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
25
- geoseeq/cli/main.py,sha256=9yQRXR8Bs304a0m0eVFnrtmTClChED9S9C3ns2I0szA,3830
25
+ geoseeq/cli/main.py,sha256=on3GfFKfGkNFGf1o3EsurGEs3bDhR3h31qSWUu9_H5E,3830
26
26
  geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
27
27
  geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
28
28
  geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
@@ -48,9 +48,6 @@ geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1
48
48
  geoseeq/contrib/ncbi/bioproject.py,sha256=_oThTd_iLDOC8cLOlJKAatSr362OBYZCEV3YrqodhFg,4341
49
49
  geoseeq/contrib/ncbi/cli.py,sha256=j9zEcaZPTryK3a4xluRxigcJKDhRpRxbp3KZSx-Bfhk,2400
50
50
  geoseeq/contrib/ncbi/setup_logging.py,sha256=Tp1bY1U0f-o739aHpvVYriG2qdd1lFvCYBXZeXQgt-w,175
51
- geoseeq/file_system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- geoseeq/file_system/filesystem_download.py,sha256=8bcnxjWltekmCvb5N0b1guBIjLp4-CL2VtsEok-snv4,16963
53
- geoseeq/file_system/main.py,sha256=4HgYGq7WhlF96JlVIf16iFBTDujlBpxImmtoh4VCzDA,3627
54
51
  geoseeq/id_constructors/__init__.py,sha256=w5E0PNQ9UuAxBeZbDI7KBnUoERd85gGz3nScz45bd2o,126
55
52
  geoseeq/id_constructors/from_blobs.py,sha256=aj7M7NRpKGs3u3xUvuFJwmJdFeIcJPmaI2_bhwbFfEs,5702
56
53
  geoseeq/id_constructors/from_ids.py,sha256=bbAJX4LnuN70v9bny6N-jAwOudb2-ztHvlMBgRuSDz0,3151
@@ -72,7 +69,7 @@ geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08
72
69
  geoseeq/result/file_download.py,sha256=KalIkwBbFI8xRpbhToixfd1KMAu_0FYwxdKq146NAHw,7832
73
70
  geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
74
71
  geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
75
- geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
72
+ geoseeq/result/result_folder.py,sha256=-m1lDVLpNHKy-JUGihboVzvdMJEnHossyRnxmBe1XLo,11140
76
73
  geoseeq/result/resumable_download_tracker.py,sha256=YEzqHBBnE7L3XokTvlTAhHZ8TcDTIE_pyTQ7YadOfbU,3667
77
74
  geoseeq/result/resumable_upload_tracker.py,sha256=2aI09gYz2yw63jEXqs8lmCRKQ79TIc3YuPETvP0Jeek,3811
78
75
  geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
@@ -88,9 +85,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
88
85
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
86
  tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
90
87
  tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
91
- geoseeq-0.6.3.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
92
- geoseeq-0.6.3.dist-info/METADATA,sha256=IH-VB_1NsbGTLGO_j2OuJunReO7L-FSnougvAgZUQkE,4915
93
- geoseeq-0.6.3.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
94
- geoseeq-0.6.3.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
95
- geoseeq-0.6.3.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
96
- geoseeq-0.6.3.dist-info/RECORD,,
88
+ geoseeq-0.6.4.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
89
+ geoseeq-0.6.4.dist-info/METADATA,sha256=5U3KU8JYFEucNZdtiR6u3muN-G4e88qg89MgOBaPm8M,4915
90
+ geoseeq-0.6.4.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
91
+ geoseeq-0.6.4.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
92
+ geoseeq-0.6.4.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
93
+ geoseeq-0.6.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.2.0)
2
+ Generator: setuptools (74.1.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
File without changes
@@ -1,434 +0,0 @@
1
-
2
- import os
3
- import json
4
- from geoseeq import (
5
- result_file_from_id,
6
- result_folder_from_id,
7
- sample_from_id,
8
- project_from_id,
9
- )
10
- from geoseeq.utils import md5_checksum
11
- from time import time
12
-
13
- FILE_STATUS_MODIFIED_REMOTE = 'MODIFIED_REMOTE'
14
- FILE_STATUS_MODIFIED_LOCAL = 'MODIFIED_LOCAL'
15
- FILE_STATUS_NEW_LOCAL = 'NEW_LOCAL'
16
- FILE_STATUS_NEW_REMOTE = 'NEW_REMOTE'
17
- FILE_STATUS_IS_LOCAL_STUB = 'IS_LOCAL_STUB'
18
-
19
-
20
- def dedupe_modified_files(modified_files):
21
- """Remove duplicates from a list of modified files.
22
-
23
- This function will remove duplicates from a list of modified files
24
- based on the path to the file. The first instance of the file will be
25
- kept and all others will be removed.
26
- """
27
- seen = set()
28
- deduped = []
29
- for x in modified_files:
30
- if x[2] not in seen:
31
- deduped.append(x)
32
- seen.add(x[2])
33
- return deduped
34
-
35
-
36
- class ResultFileOnFilesystem:
37
- """
38
-
39
- Note: unlike other filesystem classes the `path` is a file, not
40
- a directory. This is because the file is downloaded directly to
41
- the path.
42
- """
43
-
44
- def __init__(self, result_file, path, kind):
45
- self.result_file = result_file
46
- self.path = path
47
- self.kind = kind
48
-
49
- @property
50
- def info_filepath(self):
51
- dirpath = os.path.dirname(self.path)
52
- basename = os.path.basename(self.path)
53
- return os.path.join(dirpath, f'.gs_result_file__{basename}')
54
-
55
- @property
56
- def is_stub(self):
57
- return os.path.exists(self.path) and os.path.getsize(self.path) == 0
58
-
59
- def file_is_ok(self, stubs_are_ok=False):
60
- if self.is_stub:
61
- return stubs_are_ok
62
- return self.result_file.download_needs_update(self.path)
63
-
64
- def download(self, use_stubs=False, exists_ok=False):
65
- if os.path.exists(self.info_filepath):
66
- if exists_ok and self.file_is_ok(stubs_are_ok=use_stubs):
67
- return
68
- elif not exists_ok:
69
- raise ValueError('Result file already exists at path: {}'.format(self.info_filepath))
70
-
71
- # Download the file
72
- if use_stubs:
73
- open(self.path, 'w').close()
74
- else:
75
- self.result_file.download(self.path)
76
-
77
- self.write_info_file()
78
-
79
- def local_file_checksum(self):
80
- if self.is_stub:
81
- return "__STUB__"
82
- return md5_checksum(self.path)
83
-
84
- def locally_modified(self):
85
- raise NotImplementedError('This function is not implemented')
86
-
87
- def status_is_ok(self, stubs_are_ok=False):
88
- # check for an info file
89
- if not os.path.exists(self.info_filepath):
90
- return False
91
- if stubs_are_ok:
92
- return True
93
- return not self.result_file.download_needs_update(self.path)
94
-
95
- def write_info_file(self):
96
- result_file_info = {
97
- "uuid": self.result_file.uuid,
98
- "kind": self.kind,
99
- "checksum": self.local_file_checksum(),
100
- }
101
- with open(self.info_filepath, 'w') as f:
102
- json.dump(result_file_info, f)
103
-
104
- @classmethod
105
- def from_path(cls, path):
106
- obj = cls(None, path, None)
107
- try:
108
- with open(obj.info_filepath, 'r') as f:
109
- result_file_info = json.load(f)
110
- obj.result_file = result_file_from_id(result_file_info['uuid'])
111
- obj.kind = result_file_info['kind']
112
- obj.stored_checksum = result_file_info['checksum']
113
- except FileNotFoundError:
114
- pass
115
- return obj
116
-
117
- def write_info_file(self):
118
- result_file_info = {
119
- "uuid": self.result_file.uuid,
120
- "kind": self.kind,
121
- "checksum": self.local_file_checksum(),
122
- }
123
- with open(self.info_filepath, 'w') as f:
124
- json.dump(result_file_info, f)
125
-
126
- def list_abnormal_objects(self):
127
- """Return a list of files that have been modified.
128
-
129
- Since this class is a single file the list will either be empty
130
- or have one element.
131
-
132
- Note that if a file was modified locally then uploaded to the server
133
- the file will be marked as modified remote.
134
- """
135
- if self.result_file is None:
136
- return [('FILE', FILE_STATUS_NEW_LOCAL, self.path, None)]
137
- if not os.path.exists(self.path):
138
- return [('FILE', FILE_STATUS_NEW_REMOTE, self.path, self.result_file)]
139
- if self.is_stub:
140
- return [('FILE', FILE_STATUS_IS_LOCAL_STUB, self.path, self.result_file)]
141
- if self.result_file and self.result_file.download_needs_update(self.path):
142
- return [('FILE', FILE_STATUS_MODIFIED_REMOTE, self.path, self.result_file)]
143
- if self.locally_modified():
144
- return [('FILE', FILE_STATUS_MODIFIED_LOCAL, self.path, self.result_file)]
145
-
146
- return []
147
-
148
-
149
- class ResultFolderOnFilesystem:
150
-
151
- def __init__(self, result_folder, path, kind):
152
- self.result_folder = result_folder
153
- self.path = path
154
- self.kind = kind
155
-
156
- @property
157
- def info_filepath(self):
158
- return os.path.join(self.path, '.gs_result_folder')
159
-
160
- def download(self, use_stubs=False, exists_ok=False):
161
- if os.path.exists(self.info_filepath) and not exists_ok:
162
- raise ValueError('Result folder already exists at path: {}'.format(self.info_filepath))
163
-
164
- # Download the files in the result folder
165
- for result_file in self.result_folder.get_fields():
166
- result_file_local_path = os.path.join(self.path, result_file.name)
167
- os.makedirs(os.path.dirname(result_file_local_path), exist_ok=True)
168
- ResultFileOnFilesystem(result_file, result_file_local_path, self.kind)\
169
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
170
-
171
- # Write the result folder data
172
- result_folder_info = {
173
- "uuid": self.result_folder.uuid,
174
- "kind": self.kind
175
- }
176
- with open(self.info_filepath, 'w') as f:
177
- json.dump(result_folder_info, f)
178
-
179
- def status_is_ok(self):
180
- # check for an info file
181
- if not os.path.exists(self.info_filepath):
182
- return False
183
-
184
- # check that all files are downloaded
185
- for result_file in self.result_folder.get_files():
186
- result_file_path = os.path.join(self.path, result_file.name)
187
- if not os.path.exists(result_file_path):
188
- return False
189
-
190
- return True
191
-
192
- @classmethod
193
- def from_path(cls, path):
194
- obj = cls(None, path, None)
195
- try:
196
- with open(os.path.join(path, '.gs_result_folder'), 'r') as f:
197
- result_folder_info = json.load(f)
198
- obj.result_folder = result_folder_from_id(result_folder_info['uuid'])
199
- obj.kind = result_folder_info['kind']
200
- except FileNotFoundError:
201
- pass
202
- return obj
203
-
204
- def list_abnormal_objects(self):
205
- """Return a list of files that have been modified.
206
-
207
- This function will return a list of tuples where the first element
208
- is the status of the file and the second element is the path to the file.
209
- """
210
- modified_files = []
211
- if not self.result_folder:
212
- modified_files.append(('FOLDER', FILE_STATUS_NEW_LOCAL, self.path, None))
213
- if not os.path.exists(self.path):
214
- modified_files.append(('FOLDER', FILE_STATUS_NEW_REMOTE, self.path, self.result_folder))
215
-
216
- # list local files
217
- if os.path.exists(self.path):
218
- for local_file in os.listdir(self.path):
219
- if local_file.startswith('.gs_'):
220
- continue
221
- local_file_path = os.path.join(self.path, local_file)
222
- result_file_on_fs = ResultFileOnFilesystem.from_path(local_file_path)
223
- modified_files.extend(result_file_on_fs.list_abnormal_objects())
224
-
225
- # list remote files
226
- if self.result_folder:
227
- for result_file in self.result_folder.get_fields():
228
- result_file_path = os.path.join(self.path, result_file.name)
229
- result_file_on_fs = ResultFileOnFilesystem(result_file, result_file_path, self.kind)
230
- modified_files.extend(result_file_on_fs.list_abnormal_objects())
231
-
232
- return dedupe_modified_files(modified_files)
233
-
234
-
235
- class SampleOnFilesystem:
236
-
237
- def __init__(self, sample, path):
238
- self.sample = sample
239
- self.path = path if path[-1] != '/' else path[:-1] # remove trailing slash
240
-
241
- @property
242
- def info_filepath(self):
243
- return os.path.join(self.path, '.gs_sample')
244
-
245
- def download(self, use_stubs=False, exists_ok=False):
246
- if os.path.exists(self.info_filepath) and not exists_ok:
247
- raise ValueError('Sample already exists at path: {}'.format(self.info_filepath))
248
-
249
- # download result folders
250
- for result_folder in self.sample.get_result_folders():
251
- result_folder_local_path = os.path.join(self.path, result_folder.name)
252
- os.makedirs(result_folder_local_path, exist_ok=True)
253
- ResultFolderOnFilesystem(result_folder, result_folder_local_path, "sample")\
254
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
255
-
256
- # Write the sample data
257
- sample_info = {
258
- "uuid": self.sample.uuid
259
- }
260
- with open(self.info_filepath, 'w') as f:
261
- json.dump(sample_info, f)
262
-
263
- def status_is_ok(self):
264
- # check for an info file
265
- if not os.path.exists(self.info_filepath):
266
- return False
267
-
268
- # check that all result folders are downloaded
269
- for result_folder in self.sample.get_result_folders():
270
- result_folder_local_path = os.path.join(self.path, result_folder.name)
271
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "sample")
272
- if not result_folder_on_fs.status_is_ok():
273
- return False
274
-
275
- return True
276
-
277
- @classmethod
278
- def from_path(cls, path):
279
- obj = cls(None, path)
280
- try:
281
- with open(os.path.join(path, '.gs_sample'), 'r') as f:
282
- sample_info = json.load(f)
283
- obj.sample = sample_from_id(sample_info['uuid'])
284
- except FileNotFoundError:
285
- pass
286
- return obj
287
-
288
- def list_abnormal_objects(self):
289
- """Return a list of files that have been modified.
290
-
291
- This function will return a list of tuples where the first element
292
- is the status of the file and the second element is the path to the file.
293
- """
294
- modified_files = []
295
- if not self.sample:
296
- modified_files.append(('SAMPLE', FILE_STATUS_NEW_LOCAL, self.path, None))
297
- if not os.path.exists(self.path):
298
- modified_files.append(('SAMPLE', FILE_STATUS_NEW_REMOTE, self.path, self.sample))
299
-
300
- # list local folders
301
- if os.path.exists(self.path):
302
- for local_folder in os.listdir(self.path):
303
- local_folder_path = os.path.join(self.path, local_folder)
304
- if not os.path.isdir(local_folder_path):
305
- continue
306
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_folder_path)
307
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
308
-
309
- # list remote folders
310
- if self.sample:
311
- for result_folder in self.sample.get_result_folders():
312
- result_folder_path = os.path.join(self.path, result_folder.name)
313
- result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "sample")
314
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
315
-
316
- return dedupe_modified_files(modified_files)
317
-
318
-
319
- class ProjectOnFilesystem:
320
-
321
- def __init__(self, project, path):
322
- self.project = project
323
- self.path = path
324
-
325
- @property
326
- def info_filepath(self):
327
- return os.path.join(self.path, '.gs_project')
328
-
329
- def download(self, use_stubs=False, exists_ok=False):
330
- if os.path.exists(self.info_filepath) and not exists_ok:
331
- raise ValueError('Project already exists at path: {}'.format(self.info_filepath))
332
-
333
- # download samples
334
- for sample in self.project.get_samples():
335
- sample_local_path = os.path.join(self.path, "sample_results", sample.name)
336
- os.makedirs(sample_local_path, exist_ok=True)
337
- SampleOnFilesystem(sample, sample_local_path)\
338
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
339
-
340
- # download project result folders
341
- for result_folder in self.project.get_result_folders():
342
- result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
343
- os.makedirs(result_folder_local_path, exist_ok=True)
344
- ResultFolderOnFilesystem(result_folder, result_folder_local_path, "project")\
345
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
346
-
347
- # Write the project data
348
- project_info = {
349
- "uuid": self.project.uuid
350
- }
351
- with open(self.info_filepath, 'w') as f:
352
- json.dump(project_info, f)
353
-
354
- def status_is_ok(self):
355
- # check for an info file
356
- if not os.path.exists(self.info_filepath):
357
- return False
358
-
359
- # check that all samples are downloaded
360
- for sample in self.project.get_samples():
361
- sample_local_path = os.path.join(self.path, "sample_results", sample.name)
362
- sample_on_fs = SampleOnFilesystem.from_path(sample_local_path)
363
- if not sample_on_fs.status_is_ok():
364
- return False
365
-
366
- # check that all project result folders are downloaded
367
- for result_folder in self.project.get_result_folders():
368
- result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
369
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "project")
370
- if not result_folder_on_fs.status_is_ok():
371
- return False
372
-
373
- return True
374
-
375
- @classmethod
376
- def from_path(cls, path, recursive=False):
377
- try:
378
- with open(os.path.join(path, '.gs_project'), 'r') as f:
379
- project_info = json.load(f)
380
- project = project_from_id(project_info['uuid'])
381
- return cls(project, path)
382
- except FileNotFoundError:
383
- if not recursive:
384
- raise ValueError('No project found in path or parent directories')
385
- updir = os.path.dirname(os.path.abspath(path))
386
- if updir == path:
387
- raise ValueError('No project found in path or parent directories')
388
- return cls.from_path(updir, recursive=recursive)
389
-
390
- def path_from_project_root(self, path):
391
- if path[0] == "/":
392
- return path.replace(self.path, "")[1:]
393
- return path
394
-
395
- def list_abnormal_objects(self):
396
- """Return a list of files that have been modified.
397
-
398
- This function will return a list of tuples where the first element
399
- is the status of the file and the second element is the path to the file.
400
- """
401
- modified_files = []
402
-
403
- # list remote samples
404
- for sample in self.project.get_samples():
405
- sample_path = os.path.join(self.path, "sample_results", sample.name)
406
- sample_on_fs = SampleOnFilesystem(sample, sample_path)
407
- modified_files.extend(sample_on_fs.list_abnormal_objects())
408
-
409
- # list remote project result folders
410
- for result_folder in self.project.get_result_folders():
411
- result_folder_path = os.path.join(self.path, "project_results", result_folder.name)
412
-
413
- result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "project")
414
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
415
-
416
- # list local samples
417
- for local_sample in os.listdir(os.path.join(self.path, "sample_results")):
418
- local_sample_path = os.path.join(self.path, "sample_results", local_sample)
419
- if not os.path.isdir(local_sample_path):
420
- continue
421
- sample_on_fs = SampleOnFilesystem.from_path(local_sample_path)
422
- modified_files.extend(sample_on_fs.list_abnormal_objects())
423
-
424
- # list local project result folders
425
- for local_result_folder in os.listdir(os.path.join(self.path, "project_results")):
426
- local_result_folder_path = os.path.join(self.path, "project_results", local_result_folder)
427
- if not os.path.isdir(local_result_folder_path):
428
- continue
429
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_result_folder_path)
430
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
431
- return dedupe_modified_files(modified_files)
432
-
433
-
434
-
@@ -1,122 +0,0 @@
1
- from fuse import FUSE, Operations
2
- import os
3
-
4
-
5
- class GeoSeeqProjectFileSystem(Operations):
6
- """Mount a GeoSeeq project as a filesystem.
7
-
8
- The project will automatically have this directory structure:
9
- - <root>/project_results/<project_result_folder_name>/...
10
- - <root>/sample_results/<sample_name>/...
11
- - <root>/metadata/sample_metadata.csv
12
- - <root>/.config/config.json
13
- """
14
-
15
- def __init__(self, root, project):
16
- self.root = root
17
- self.project = project
18
-
19
- def access(self, path, mode):
20
- pass
21
-
22
- def chmod(self, path, mode):
23
- pass
24
-
25
- def chown(self, path, uid, gid):
26
- pass
27
-
28
- def getattr(self, path, fh=None):
29
- pass
30
-
31
- def readdir(self, path, fh):
32
- pass
33
-
34
- def readlink(self, path):
35
- pass
36
-
37
- def mknod(self, path, mode, dev):
38
- pass
39
-
40
- def rmdir(self, path):
41
- pass
42
-
43
- def mkdir(self, path, mode):
44
- pass
45
-
46
- def statfs(self, path):
47
- pass
48
-
49
- def unlink(self, path):
50
- pass
51
-
52
- def symlink(self, name, target):
53
- pass
54
-
55
- def rename(self, old, new):
56
- pass
57
-
58
- def link(self, target, name):
59
- pass
60
-
61
- def utimens(self, path, times=None):
62
- pass
63
-
64
- def open(self, path, flags):
65
- tkns = path.split('/')
66
- if tkns[0] == 'project_results':
67
- result_folder_name, result_file_name = tkns[2], '/'.join(tkns[3:])
68
- result_folder = self.project.get_result_folder(result_folder_name).get()
69
- result_file = result_folder.get_file(result_file_name).get()
70
- result_file.download(path)
71
- elif tkns[0] == 'sample_results':
72
- sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
73
- sample = self.project.get_sample(sample_name).get()
74
- result_folder = sample.get_result_folder(result_folder_name).get()
75
- result_file = result_folder.get_file(result_file_name).get()
76
- result_file.download(path)
77
- elif tkns[0] == 'metadata':
78
- raise NotImplementedError('TODO')
79
-
80
- return os.open(self._full_local_path(path), flags)
81
-
82
- def create(self, path, mode, fi=None):
83
- tkns = path.split('/')
84
- if tkns[0] == 'project_results':
85
- result_name, file_name = tkns[2], '/'.join(tkns[3:])
86
- result_folder = self.project.get_result_folder(result_name).idem()
87
- result_file = result_folder.get_file(file_name).create()
88
- result_file.download(path) # nothing to download at this point
89
- elif tkns[0] == 'sample_results':
90
- sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
91
- sample = self.project.get_sample(sample_name).idem()
92
- result_folder = sample.get_result_folder(result_folder_name).idem()
93
- result_file = result_folder.get_file(result_file_name).create()
94
- result_file.download(path) # nothing to download at this point
95
- elif tkns[0] == 'metadata':
96
- raise NotImplementedError('TODO')
97
-
98
- def read(self, path, length, offset, fh):
99
- os.lseek(fh, offset, os.SEEK_SET)
100
- return os.read(fh, length)
101
-
102
- def write(self, path, buf, offset, fh):
103
- pass
104
-
105
- def truncate(self, path, length, fh=None):
106
- pass
107
-
108
- def flush(self, path, fh):
109
- pass
110
-
111
- def release(self, path, fh):
112
- pass
113
-
114
- def fsync(self, path, fdatasync, fh):
115
- pass
116
-
117
- def _full_local_path(self, partial):
118
- if partial.startswith("/"):
119
- partial = partial[1:]
120
- return os.path.join(self.root, partial)
121
-
122
-