geoseeq 0.6.2__tar.gz → 0.6.4__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. {geoseeq-0.6.2 → geoseeq-0.6.4}/PKG-INFO +6 -1
  2. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/download.py +2 -2
  3. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/main.py +2 -2
  4. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/constants.py +1 -1
  5. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/organization.py +5 -2
  6. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/project.py +9 -6
  7. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/file_download.py +2 -1
  8. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/result_folder.py +4 -6
  9. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/sample.py +16 -3
  10. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq.egg-info/PKG-INFO +6 -1
  11. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq.egg-info/SOURCES.txt +1 -3
  12. geoseeq-0.6.4/geoseeq.egg-info/requires.txt +5 -0
  13. {geoseeq-0.6.2 → geoseeq-0.6.4}/pyproject.toml +9 -2
  14. geoseeq-0.6.2/geoseeq/file_system/filesystem_download.py +0 -434
  15. geoseeq-0.6.2/geoseeq/file_system/main.py +0 -122
  16. geoseeq-0.6.2/tests/__init__.py +0 -0
  17. {geoseeq-0.6.2 → geoseeq-0.6.4}/LICENSE +0 -0
  18. {geoseeq-0.6.2 → geoseeq-0.6.4}/README.md +0 -0
  19. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/__init__.py +0 -0
  20. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/app.py +0 -0
  21. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/blob_constructors.py +0 -0
  22. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/bulk_creators.py +0 -0
  23. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/__init__.py +0 -0
  24. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/constants.py +0 -0
  25. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/copy.py +0 -0
  26. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/detail.py +0 -0
  27. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/fastq_utils.py +0 -0
  28. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/get_eula.py +0 -0
  29. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/manage.py +0 -0
  30. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/progress_bar.py +0 -0
  31. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/project.py +0 -0
  32. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/raw.py +0 -0
  33. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/run.py +0 -0
  34. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/search.py +0 -0
  35. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/shared_params/__init__.py +0 -0
  36. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/shared_params/common_state.py +0 -0
  37. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/shared_params/config.py +0 -0
  38. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  39. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  40. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  41. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/upload/__init__.py +0 -0
  42. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/upload/upload.py +0 -0
  43. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/upload/upload_advanced.py +0 -0
  44. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/upload/upload_reads.py +0 -0
  45. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/user.py +0 -0
  46. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/utils.py +0 -0
  47. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/cli/view.py +0 -0
  48. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/contrib/__init__.py +0 -0
  49. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/contrib/ncbi/__init__.py +0 -0
  50. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/contrib/ncbi/api.py +0 -0
  51. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  52. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/contrib/ncbi/cli.py +0 -0
  53. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  54. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/file_system_cache.py +0 -0
  55. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/id_constructors/__init__.py +0 -0
  56. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/id_constructors/from_blobs.py +0 -0
  57. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/id_constructors/from_ids.py +0 -0
  58. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/id_constructors/from_names.py +0 -0
  59. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/id_constructors/from_uuids.py +0 -0
  60. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/id_constructors/resolvers.py +0 -0
  61. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/id_constructors/utils.py +0 -0
  62. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/knex.py +0 -0
  63. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/pipeline.py +0 -0
  64. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/__init__.py +0 -0
  65. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/constants.py +0 -0
  66. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/highcharts.py +0 -0
  67. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/map/__init__.py +0 -0
  68. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/map/base_layer.py +0 -0
  69. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/map/map.py +0 -0
  70. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/map/overlay.py +0 -0
  71. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/plotting/selectable.py +0 -0
  72. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/remote_object.py +0 -0
  73. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/__init__.py +0 -0
  74. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/bioinfo.py +0 -0
  75. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/file_chunker.py +0 -0
  76. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/file_upload.py +0 -0
  77. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/result_file.py +0 -0
  78. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/resumable_download_tracker.py +0 -0
  79. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/resumable_upload_tracker.py +0 -0
  80. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/result/utils.py +0 -0
  81. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/search.py +0 -0
  82. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/upload_download_manager.py +0 -0
  83. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/user.py +0 -0
  84. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/utils.py +0 -0
  85. {geoseeq-0.6.2/geoseeq/file_system → geoseeq-0.6.4/geoseeq/vc}/__init__.py +0 -0
  86. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/checksum.py +0 -0
  87. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/cli.py +0 -0
  88. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/clone.py +0 -0
  89. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/constants.py +0 -0
  90. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/vc_cache.py +0 -0
  91. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/vc_dir.py +0 -0
  92. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/vc_sample.py +0 -0
  93. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/vc/vc_stub.py +0 -0
  94. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq/work_orders.py +0 -0
  95. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq.egg-info/dependency_links.txt +0 -0
  96. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq.egg-info/entry_points.txt +0 -0
  97. {geoseeq-0.6.2 → geoseeq-0.6.4}/geoseeq.egg-info/top_level.txt +0 -0
  98. {geoseeq-0.6.2 → geoseeq-0.6.4}/setup.cfg +0 -0
  99. {geoseeq-0.6.2 → geoseeq-0.6.4}/setup.py +0 -0
  100. {geoseeq-0.6.2/geoseeq/vc → geoseeq-0.6.4/tests}/__init__.py +0 -0
  101. {geoseeq-0.6.2 → geoseeq-0.6.4}/tests/test_api_client.py +0 -0
  102. {geoseeq-0.6.2 → geoseeq-0.6.4}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -12,6 +12,11 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.8
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
+ Requires-Dist: requests
16
+ Requires-Dist: click
17
+ Requires-Dist: pandas
18
+ Requires-Dist: biopython
19
+ Requires-Dist: tqdm
15
20
 
16
21
  # Geoseeq API Client
17
22
 
@@ -160,8 +160,8 @@ def cli_download_files(
160
160
 
161
161
  \b
162
162
  # Download assembly contigs from two samples in the MetaSUB Consortium CSD16 project
163
- $ geoseeq download files "MetaSUB Consortium/CSD16" `# specify the project` \
164
- haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name` \
163
+ $ geoseeq download files "MetaSUB Consortium/CSD16" `# specify the project` \\
164
+ haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name` \\
165
165
  --folder-type sample --extension '.contigs.fasta' # filter for contig files
166
166
 
167
167
  ---
@@ -26,7 +26,7 @@ handler.setFormatter(logging.Formatter('[%(levelname)s] %(name)s :: %(message)s'
26
26
  logger.addHandler(handler)
27
27
 
28
28
 
29
- @click.group()
29
+ @click.group(context_settings={'show_default': True})
30
30
  def main():
31
31
  """Command line interface for the GeoSeeq API.
32
32
 
@@ -54,7 +54,7 @@ def version():
54
54
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
55
55
  Run `geoseeq eula show` to view the EULA.
56
56
  """
57
- click.echo('0.6.2') # remember to update setup
57
+ click.echo('0.6.4') # remember to update setup
58
58
 
59
59
 
60
60
  @main.group('advanced')
@@ -2,7 +2,7 @@ from os import environ
2
2
  from os.path import join
3
3
 
4
4
  FIVE_MB = 5 * (1024 ** 2)
5
- FASTQ_MODULE_NAMES = ['short_read::paired_end', 'short_read::single_end', 'long_read::nanopore']
5
+ FASTQ_MODULE_NAMES = ['short_read::paired_end', 'short_read::single_end', 'long_read::nanopore', 'raw::raw_reads']
6
6
  DEFAULT_ENDPOINT = "https://backend.geoseeq.com"
7
7
 
8
8
  CONFIG_FOLDER = environ.get("XDG_CONFIG_HOME", join(environ["HOME"], ".config"))
@@ -1,6 +1,8 @@
1
1
 
2
- from .remote_object import RemoteObject
2
+ import urllib
3
+
3
4
  from .project import Project
5
+ from .remote_object import RemoteObject
4
6
 
5
7
 
6
8
  class Organization(RemoteObject):
@@ -19,7 +21,8 @@ class Organization(RemoteObject):
19
21
  self.name = name
20
22
 
21
23
  def nested_url(self):
22
- return f'nested/{self.name}'
24
+ escaped_name = urllib.parse.quote(self.name, safe="")
25
+ return f'nested/{escaped_name}'
23
26
 
24
27
  def _save(self):
25
28
  data = {
@@ -1,11 +1,13 @@
1
- from .result import ProjectResultFolder
1
+ import logging
2
+ import urllib
3
+
4
+ import pandas as pd
5
+
6
+ from .pipeline import Pipeline
2
7
  from .remote_object import RemoteObject
8
+ from .result import ProjectResultFolder
3
9
  from .sample import Sample
4
10
  from .utils import paginated_iterator
5
- from .pipeline import Pipeline
6
- import json
7
- import pandas as pd
8
- import logging
9
11
 
10
12
  logger = logging.getLogger("geoseeq_api")
11
13
 
@@ -72,7 +74,8 @@ class Project(RemoteObject):
72
74
  return data
73
75
 
74
76
  def nested_url(self):
75
- return self.org.nested_url() + f"/sample_groups/{self.name}"
77
+ escaped_name = urllib.parse.quote(self.name, safe="")
78
+ return self.org.nested_url() + f"/sample_groups/{escaped_name}"
76
79
 
77
80
  def _save_group_obj(self):
78
81
  data = self.get_post_data()
@@ -6,6 +6,7 @@ import os
6
6
  from os.path import basename, getsize, join, isfile, getmtime, dirname
7
7
  from pathlib import Path
8
8
  from tempfile import NamedTemporaryFile
9
+ from math import ceil
9
10
 
10
11
  from geoseeq.utils import download_ftp
11
12
  from geoseeq.constants import FIVE_MB
@@ -43,7 +44,7 @@ def _download_resumable(response, filename, total_size_in_bytes, progress_tracke
43
44
  target_id = url_to_id(response.url)
44
45
  tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
45
46
  if not tracker.download_started: tracker.start_download(response.url)
46
- n_chunks = total_size_in_bytes // chunk_size
47
+ n_chunks = ceil(total_size_in_bytes / chunk_size)
47
48
  for i in range(n_chunks):
48
49
  bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
49
50
  if tracker.part_has_been_downloaded(i):
@@ -1,9 +1,6 @@
1
- import json
2
- import logging
3
1
  import os
4
- import time
5
- import urllib.request
6
- from os.path import basename, getsize, join, isfile, isdir, dirname
2
+ import urllib
3
+ from os.path import basename, dirname, getsize, isdir, isfile, join
7
4
  from pathlib import Path
8
5
  from tempfile import NamedTemporaryFile
9
6
 
@@ -155,7 +152,8 @@ class SampleResultFolder(ResultFolder, SampleBioInfoFolder):
155
152
  self.is_private = is_private
156
153
 
157
154
  def nested_url(self):
158
- return self.sample.nested_url() + f"/analysis_results/{self.module_name}"
155
+ escaped_name = urllib.parse.quote(self.module_name, safe="")
156
+ return self.sample.nested_url() + f"/analysis_results/{escaped_name}"
159
157
 
160
158
  def _save(self):
161
159
  data = {field: getattr(self, field) for field in self.remote_fields if hasattr(self, field)}
@@ -1,5 +1,7 @@
1
- from .result import SampleResultFolder, SampleResultFile
1
+ import urllib
2
+
2
3
  from .remote_object import RemoteObject
4
+ from .result import SampleResultFile, SampleResultFolder
3
5
 
4
6
 
5
7
  class Sample(RemoteObject):
@@ -33,7 +35,8 @@ class Sample(RemoteObject):
33
35
  return f'brn:{self.knex.instance_code()}:sample:{self.uuid}'
34
36
 
35
37
  def nested_url(self):
36
- return self.lib.nested_url() + f"/samples/{self.name}"
38
+ escaped_name = urllib.parse.quote(self.name, safe="")
39
+ return self.lib.nested_url() + f"/samples/{escaped_name}"
37
40
 
38
41
  def change_library(self, new_lib):
39
42
  self.new_lib = new_lib
@@ -212,9 +215,19 @@ class Sample(RemoteObject):
212
215
  )
213
216
  else:
214
217
  files[read_type][folder_name].append(
215
- self._grn_to_file(file_grn[0])
218
+ self._grn_to_file(file_grn)
216
219
  )
217
220
  return files
221
+
222
+ def get_one_fasta(self):
223
+ """Return a 2-ple, a fasta ResultFile and a string with the read type.
224
+
225
+ Does not download the file.
226
+ """
227
+ url = f"data/samples/{self.uuid}/one-fasta"
228
+ blob = self.knex.get(url)
229
+ file = self._grn_to_file(blob["grn"])
230
+ return file, blob["read_type"]
218
231
 
219
232
  def __str__(self):
220
233
  return f"<Geoseeq::Sample {self.name} {self.uuid} />"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -12,6 +12,11 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.8
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
+ Requires-Dist: requests
16
+ Requires-Dist: click
17
+ Requires-Dist: pandas
18
+ Requires-Dist: biopython
19
+ Requires-Dist: tqdm
15
20
 
16
21
  # Geoseeq API Client
17
22
 
@@ -23,6 +23,7 @@ geoseeq.egg-info/PKG-INFO
23
23
  geoseeq.egg-info/SOURCES.txt
24
24
  geoseeq.egg-info/dependency_links.txt
25
25
  geoseeq.egg-info/entry_points.txt
26
+ geoseeq.egg-info/requires.txt
26
27
  geoseeq.egg-info/top_level.txt
27
28
  geoseeq/cli/__init__.py
28
29
  geoseeq/cli/constants.py
@@ -57,9 +58,6 @@ geoseeq/contrib/ncbi/api.py
57
58
  geoseeq/contrib/ncbi/bioproject.py
58
59
  geoseeq/contrib/ncbi/cli.py
59
60
  geoseeq/contrib/ncbi/setup_logging.py
60
- geoseeq/file_system/__init__.py
61
- geoseeq/file_system/filesystem_download.py
62
- geoseeq/file_system/main.py
63
61
  geoseeq/id_constructors/__init__.py
64
62
  geoseeq/id_constructors/from_blobs.py
65
63
  geoseeq/id_constructors/from_ids.py
@@ -0,0 +1,5 @@
1
+ requests
2
+ click
3
+ pandas
4
+ biopython
5
+ tqdm
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "geoseeq"
7
- version = "0.6.2"
7
+ version = "0.6.4"
8
8
  authors = [
9
9
  { name="David C. Danko", email="dcdanko@biotia.io" },
10
10
  ]
@@ -16,10 +16,17 @@ classifiers = [
16
16
  "License :: OSI Approved :: MIT License",
17
17
  "Operating System :: OS Independent",
18
18
  ]
19
+ dependencies = [
20
+ "requests",
21
+ "click",
22
+ "pandas",
23
+ "biopython",
24
+ "tqdm",
25
+ ]
19
26
 
20
27
  [project.urls]
21
28
  Homepage = "https://github.com/biotia/geoseeq_api_client"
22
29
  Issues = "https://github.com/biotia/geoseeq_api_client/issues"
23
30
 
24
31
  [project.scripts]
25
- geoseeq = "geoseeq.cli:main"
32
+ geoseeq = "geoseeq.cli:main"
@@ -1,434 +0,0 @@
1
-
2
- import os
3
- import json
4
- from geoseeq import (
5
- result_file_from_id,
6
- result_folder_from_id,
7
- sample_from_id,
8
- project_from_id,
9
- )
10
- from geoseeq.utils import md5_checksum
11
- from time import time
12
-
13
- FILE_STATUS_MODIFIED_REMOTE = 'MODIFIED_REMOTE'
14
- FILE_STATUS_MODIFIED_LOCAL = 'MODIFIED_LOCAL'
15
- FILE_STATUS_NEW_LOCAL = 'NEW_LOCAL'
16
- FILE_STATUS_NEW_REMOTE = 'NEW_REMOTE'
17
- FILE_STATUS_IS_LOCAL_STUB = 'IS_LOCAL_STUB'
18
-
19
-
20
- def dedupe_modified_files(modified_files):
21
- """Remove duplicates from a list of modified files.
22
-
23
- This function will remove duplicates from a list of modified files
24
- based on the path to the file. The first instance of the file will be
25
- kept and all others will be removed.
26
- """
27
- seen = set()
28
- deduped = []
29
- for x in modified_files:
30
- if x[2] not in seen:
31
- deduped.append(x)
32
- seen.add(x[2])
33
- return deduped
34
-
35
-
36
- class ResultFileOnFilesystem:
37
- """
38
-
39
- Note: unlike other filesystem classes the `path` is a file, not
40
- a directory. This is because the file is downloaded directly to
41
- the path.
42
- """
43
-
44
- def __init__(self, result_file, path, kind):
45
- self.result_file = result_file
46
- self.path = path
47
- self.kind = kind
48
-
49
- @property
50
- def info_filepath(self):
51
- dirpath = os.path.dirname(self.path)
52
- basename = os.path.basename(self.path)
53
- return os.path.join(dirpath, f'.gs_result_file__{basename}')
54
-
55
- @property
56
- def is_stub(self):
57
- return os.path.exists(self.path) and os.path.getsize(self.path) == 0
58
-
59
- def file_is_ok(self, stubs_are_ok=False):
60
- if self.is_stub:
61
- return stubs_are_ok
62
- return self.result_file.download_needs_update(self.path)
63
-
64
- def download(self, use_stubs=False, exists_ok=False):
65
- if os.path.exists(self.info_filepath):
66
- if exists_ok and self.file_is_ok(stubs_are_ok=use_stubs):
67
- return
68
- elif not exists_ok:
69
- raise ValueError('Result file already exists at path: {}'.format(self.info_filepath))
70
-
71
- # Download the file
72
- if use_stubs:
73
- open(self.path, 'w').close()
74
- else:
75
- self.result_file.download(self.path)
76
-
77
- self.write_info_file()
78
-
79
- def local_file_checksum(self):
80
- if self.is_stub:
81
- return "__STUB__"
82
- return md5_checksum(self.path)
83
-
84
- def locally_modified(self):
85
- raise NotImplementedError('This function is not implemented')
86
-
87
- def status_is_ok(self, stubs_are_ok=False):
88
- # check for an info file
89
- if not os.path.exists(self.info_filepath):
90
- return False
91
- if stubs_are_ok:
92
- return True
93
- return not self.result_file.download_needs_update(self.path)
94
-
95
- def write_info_file(self):
96
- result_file_info = {
97
- "uuid": self.result_file.uuid,
98
- "kind": self.kind,
99
- "checksum": self.local_file_checksum(),
100
- }
101
- with open(self.info_filepath, 'w') as f:
102
- json.dump(result_file_info, f)
103
-
104
- @classmethod
105
- def from_path(cls, path):
106
- obj = cls(None, path, None)
107
- try:
108
- with open(obj.info_filepath, 'r') as f:
109
- result_file_info = json.load(f)
110
- obj.result_file = result_file_from_id(result_file_info['uuid'])
111
- obj.kind = result_file_info['kind']
112
- obj.stored_checksum = result_file_info['checksum']
113
- except FileNotFoundError:
114
- pass
115
- return obj
116
-
117
- def write_info_file(self):
118
- result_file_info = {
119
- "uuid": self.result_file.uuid,
120
- "kind": self.kind,
121
- "checksum": self.local_file_checksum(),
122
- }
123
- with open(self.info_filepath, 'w') as f:
124
- json.dump(result_file_info, f)
125
-
126
- def list_abnormal_objects(self):
127
- """Return a list of files that have been modified.
128
-
129
- Since this class is a single file the list will either be empty
130
- or have one element.
131
-
132
- Note that if a file was modified locally then uploaded to the server
133
- the file will be marked as modified remote.
134
- """
135
- if self.result_file is None:
136
- return [('FILE', FILE_STATUS_NEW_LOCAL, self.path, None)]
137
- if not os.path.exists(self.path):
138
- return [('FILE', FILE_STATUS_NEW_REMOTE, self.path, self.result_file)]
139
- if self.is_stub:
140
- return [('FILE', FILE_STATUS_IS_LOCAL_STUB, self.path, self.result_file)]
141
- if self.result_file and self.result_file.download_needs_update(self.path):
142
- return [('FILE', FILE_STATUS_MODIFIED_REMOTE, self.path, self.result_file)]
143
- if self.locally_modified():
144
- return [('FILE', FILE_STATUS_MODIFIED_LOCAL, self.path, self.result_file)]
145
-
146
- return []
147
-
148
-
149
- class ResultFolderOnFilesystem:
150
-
151
- def __init__(self, result_folder, path, kind):
152
- self.result_folder = result_folder
153
- self.path = path
154
- self.kind = kind
155
-
156
- @property
157
- def info_filepath(self):
158
- return os.path.join(self.path, '.gs_result_folder')
159
-
160
- def download(self, use_stubs=False, exists_ok=False):
161
- if os.path.exists(self.info_filepath) and not exists_ok:
162
- raise ValueError('Result folder already exists at path: {}'.format(self.info_filepath))
163
-
164
- # Download the files in the result folder
165
- for result_file in self.result_folder.get_fields():
166
- result_file_local_path = os.path.join(self.path, result_file.name)
167
- os.makedirs(os.path.dirname(result_file_local_path), exist_ok=True)
168
- ResultFileOnFilesystem(result_file, result_file_local_path, self.kind)\
169
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
170
-
171
- # Write the result folder data
172
- result_folder_info = {
173
- "uuid": self.result_folder.uuid,
174
- "kind": self.kind
175
- }
176
- with open(self.info_filepath, 'w') as f:
177
- json.dump(result_folder_info, f)
178
-
179
- def status_is_ok(self):
180
- # check for an info file
181
- if not os.path.exists(self.info_filepath):
182
- return False
183
-
184
- # check that all files are downloaded
185
- for result_file in self.result_folder.get_files():
186
- result_file_path = os.path.join(self.path, result_file.name)
187
- if not os.path.exists(result_file_path):
188
- return False
189
-
190
- return True
191
-
192
- @classmethod
193
- def from_path(cls, path):
194
- obj = cls(None, path, None)
195
- try:
196
- with open(os.path.join(path, '.gs_result_folder'), 'r') as f:
197
- result_folder_info = json.load(f)
198
- obj.result_folder = result_folder_from_id(result_folder_info['uuid'])
199
- obj.kind = result_folder_info['kind']
200
- except FileNotFoundError:
201
- pass
202
- return obj
203
-
204
- def list_abnormal_objects(self):
205
- """Return a list of files that have been modified.
206
-
207
- This function will return a list of tuples where the first element
208
- is the status of the file and the second element is the path to the file.
209
- """
210
- modified_files = []
211
- if not self.result_folder:
212
- modified_files.append(('FOLDER', FILE_STATUS_NEW_LOCAL, self.path, None))
213
- if not os.path.exists(self.path):
214
- modified_files.append(('FOLDER', FILE_STATUS_NEW_REMOTE, self.path, self.result_folder))
215
-
216
- # list local files
217
- if os.path.exists(self.path):
218
- for local_file in os.listdir(self.path):
219
- if local_file.startswith('.gs_'):
220
- continue
221
- local_file_path = os.path.join(self.path, local_file)
222
- result_file_on_fs = ResultFileOnFilesystem.from_path(local_file_path)
223
- modified_files.extend(result_file_on_fs.list_abnormal_objects())
224
-
225
- # list remote files
226
- if self.result_folder:
227
- for result_file in self.result_folder.get_fields():
228
- result_file_path = os.path.join(self.path, result_file.name)
229
- result_file_on_fs = ResultFileOnFilesystem(result_file, result_file_path, self.kind)
230
- modified_files.extend(result_file_on_fs.list_abnormal_objects())
231
-
232
- return dedupe_modified_files(modified_files)
233
-
234
-
235
- class SampleOnFilesystem:
236
-
237
- def __init__(self, sample, path):
238
- self.sample = sample
239
- self.path = path if path[-1] != '/' else path[:-1] # remove trailing slash
240
-
241
- @property
242
- def info_filepath(self):
243
- return os.path.join(self.path, '.gs_sample')
244
-
245
- def download(self, use_stubs=False, exists_ok=False):
246
- if os.path.exists(self.info_filepath) and not exists_ok:
247
- raise ValueError('Sample already exists at path: {}'.format(self.info_filepath))
248
-
249
- # download result folders
250
- for result_folder in self.sample.get_result_folders():
251
- result_folder_local_path = os.path.join(self.path, result_folder.name)
252
- os.makedirs(result_folder_local_path, exist_ok=True)
253
- ResultFolderOnFilesystem(result_folder, result_folder_local_path, "sample")\
254
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
255
-
256
- # Write the sample data
257
- sample_info = {
258
- "uuid": self.sample.uuid
259
- }
260
- with open(self.info_filepath, 'w') as f:
261
- json.dump(sample_info, f)
262
-
263
- def status_is_ok(self):
264
- # check for an info file
265
- if not os.path.exists(self.info_filepath):
266
- return False
267
-
268
- # check that all result folders are downloaded
269
- for result_folder in self.sample.get_result_folders():
270
- result_folder_local_path = os.path.join(self.path, result_folder.name)
271
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "sample")
272
- if not result_folder_on_fs.status_is_ok():
273
- return False
274
-
275
- return True
276
-
277
- @classmethod
278
- def from_path(cls, path):
279
- obj = cls(None, path)
280
- try:
281
- with open(os.path.join(path, '.gs_sample'), 'r') as f:
282
- sample_info = json.load(f)
283
- obj.sample = sample_from_id(sample_info['uuid'])
284
- except FileNotFoundError:
285
- pass
286
- return obj
287
-
288
- def list_abnormal_objects(self):
289
- """Return a list of files that have been modified.
290
-
291
- This function will return a list of tuples where the first element
292
- is the status of the file and the second element is the path to the file.
293
- """
294
- modified_files = []
295
- if not self.sample:
296
- modified_files.append(('SAMPLE', FILE_STATUS_NEW_LOCAL, self.path, None))
297
- if not os.path.exists(self.path):
298
- modified_files.append(('SAMPLE', FILE_STATUS_NEW_REMOTE, self.path, self.sample))
299
-
300
- # list local folders
301
- if os.path.exists(self.path):
302
- for local_folder in os.listdir(self.path):
303
- local_folder_path = os.path.join(self.path, local_folder)
304
- if not os.path.isdir(local_folder_path):
305
- continue
306
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_folder_path)
307
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
308
-
309
- # list remote folders
310
- if self.sample:
311
- for result_folder in self.sample.get_result_folders():
312
- result_folder_path = os.path.join(self.path, result_folder.name)
313
- result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "sample")
314
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
315
-
316
- return dedupe_modified_files(modified_files)
317
-
318
-
319
- class ProjectOnFilesystem:
320
-
321
- def __init__(self, project, path):
322
- self.project = project
323
- self.path = path
324
-
325
- @property
326
- def info_filepath(self):
327
- return os.path.join(self.path, '.gs_project')
328
-
329
- def download(self, use_stubs=False, exists_ok=False):
330
- if os.path.exists(self.info_filepath) and not exists_ok:
331
- raise ValueError('Project already exists at path: {}'.format(self.info_filepath))
332
-
333
- # download samples
334
- for sample in self.project.get_samples():
335
- sample_local_path = os.path.join(self.path, "sample_results", sample.name)
336
- os.makedirs(sample_local_path, exist_ok=True)
337
- SampleOnFilesystem(sample, sample_local_path)\
338
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
339
-
340
- # download project result folders
341
- for result_folder in self.project.get_result_folders():
342
- result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
343
- os.makedirs(result_folder_local_path, exist_ok=True)
344
- ResultFolderOnFilesystem(result_folder, result_folder_local_path, "project")\
345
- .download(use_stubs=use_stubs, exists_ok=exists_ok)
346
-
347
- # Write the project data
348
- project_info = {
349
- "uuid": self.project.uuid
350
- }
351
- with open(self.info_filepath, 'w') as f:
352
- json.dump(project_info, f)
353
-
354
- def status_is_ok(self):
355
- # check for an info file
356
- if not os.path.exists(self.info_filepath):
357
- return False
358
-
359
- # check that all samples are downloaded
360
- for sample in self.project.get_samples():
361
- sample_local_path = os.path.join(self.path, "sample_results", sample.name)
362
- sample_on_fs = SampleOnFilesystem.from_path(sample_local_path)
363
- if not sample_on_fs.status_is_ok():
364
- return False
365
-
366
- # check that all project result folders are downloaded
367
- for result_folder in self.project.get_result_folders():
368
- result_folder_local_path = os.path.join(self.path, "project_results", result_folder.name)
369
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(result_folder_local_path, "project")
370
- if not result_folder_on_fs.status_is_ok():
371
- return False
372
-
373
- return True
374
-
375
- @classmethod
376
- def from_path(cls, path, recursive=False):
377
- try:
378
- with open(os.path.join(path, '.gs_project'), 'r') as f:
379
- project_info = json.load(f)
380
- project = project_from_id(project_info['uuid'])
381
- return cls(project, path)
382
- except FileNotFoundError:
383
- if not recursive:
384
- raise ValueError('No project found in path or parent directories')
385
- updir = os.path.dirname(os.path.abspath(path))
386
- if updir == path:
387
- raise ValueError('No project found in path or parent directories')
388
- return cls.from_path(updir, recursive=recursive)
389
-
390
- def path_from_project_root(self, path):
391
- if path[0] == "/":
392
- return path.replace(self.path, "")[1:]
393
- return path
394
-
395
- def list_abnormal_objects(self):
396
- """Return a list of files that have been modified.
397
-
398
- This function will return a list of tuples where the first element
399
- is the status of the file and the second element is the path to the file.
400
- """
401
- modified_files = []
402
-
403
- # list remote samples
404
- for sample in self.project.get_samples():
405
- sample_path = os.path.join(self.path, "sample_results", sample.name)
406
- sample_on_fs = SampleOnFilesystem(sample, sample_path)
407
- modified_files.extend(sample_on_fs.list_abnormal_objects())
408
-
409
- # list remote project result folders
410
- for result_folder in self.project.get_result_folders():
411
- result_folder_path = os.path.join(self.path, "project_results", result_folder.name)
412
-
413
- result_folder_on_fs = ResultFolderOnFilesystem(result_folder, result_folder_path, "project")
414
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
415
-
416
- # list local samples
417
- for local_sample in os.listdir(os.path.join(self.path, "sample_results")):
418
- local_sample_path = os.path.join(self.path, "sample_results", local_sample)
419
- if not os.path.isdir(local_sample_path):
420
- continue
421
- sample_on_fs = SampleOnFilesystem.from_path(local_sample_path)
422
- modified_files.extend(sample_on_fs.list_abnormal_objects())
423
-
424
- # list local project result folders
425
- for local_result_folder in os.listdir(os.path.join(self.path, "project_results")):
426
- local_result_folder_path = os.path.join(self.path, "project_results", local_result_folder)
427
- if not os.path.isdir(local_result_folder_path):
428
- continue
429
- result_folder_on_fs = ResultFolderOnFilesystem.from_path(local_result_folder_path)
430
- modified_files.extend(result_folder_on_fs.list_abnormal_objects())
431
- return dedupe_modified_files(modified_files)
432
-
433
-
434
-
@@ -1,122 +0,0 @@
1
- from fuse import FUSE, Operations
2
- import os
3
-
4
-
5
- class GeoSeeqProjectFileSystem(Operations):
6
- """Mount a GeoSeeq project as a filesystem.
7
-
8
- The project will automatically have this directory structure:
9
- - <root>/project_results/<project_result_folder_name>/...
10
- - <root>/sample_results/<sample_name>/...
11
- - <root>/metadata/sample_metadata.csv
12
- - <root>/.config/config.json
13
- """
14
-
15
- def __init__(self, root, project):
16
- self.root = root
17
- self.project = project
18
-
19
- def access(self, path, mode):
20
- pass
21
-
22
- def chmod(self, path, mode):
23
- pass
24
-
25
- def chown(self, path, uid, gid):
26
- pass
27
-
28
- def getattr(self, path, fh=None):
29
- pass
30
-
31
- def readdir(self, path, fh):
32
- pass
33
-
34
- def readlink(self, path):
35
- pass
36
-
37
- def mknod(self, path, mode, dev):
38
- pass
39
-
40
- def rmdir(self, path):
41
- pass
42
-
43
- def mkdir(self, path, mode):
44
- pass
45
-
46
- def statfs(self, path):
47
- pass
48
-
49
- def unlink(self, path):
50
- pass
51
-
52
- def symlink(self, name, target):
53
- pass
54
-
55
- def rename(self, old, new):
56
- pass
57
-
58
- def link(self, target, name):
59
- pass
60
-
61
- def utimens(self, path, times=None):
62
- pass
63
-
64
- def open(self, path, flags):
65
- tkns = path.split('/')
66
- if tkns[0] == 'project_results':
67
- result_folder_name, result_file_name = tkns[2], '/'.join(tkns[3:])
68
- result_folder = self.project.get_result_folder(result_folder_name).get()
69
- result_file = result_folder.get_file(result_file_name).get()
70
- result_file.download(path)
71
- elif tkns[0] == 'sample_results':
72
- sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
73
- sample = self.project.get_sample(sample_name).get()
74
- result_folder = sample.get_result_folder(result_folder_name).get()
75
- result_file = result_folder.get_file(result_file_name).get()
76
- result_file.download(path)
77
- elif tkns[0] == 'metadata':
78
- raise NotImplementedError('TODO')
79
-
80
- return os.open(self._full_local_path(path), flags)
81
-
82
- def create(self, path, mode, fi=None):
83
- tkns = path.split('/')
84
- if tkns[0] == 'project_results':
85
- result_name, file_name = tkns[2], '/'.join(tkns[3:])
86
- result_folder = self.project.get_result_folder(result_name).idem()
87
- result_file = result_folder.get_file(file_name).create()
88
- result_file.download(path) # nothing to download at this point
89
- elif tkns[0] == 'sample_results':
90
- sample_name, result_folder_name, result_file_name = tkns[2], tkns[3], '/'.join(tkns[4:])
91
- sample = self.project.get_sample(sample_name).idem()
92
- result_folder = sample.get_result_folder(result_folder_name).idem()
93
- result_file = result_folder.get_file(result_file_name).create()
94
- result_file.download(path) # nothing to download at this point
95
- elif tkns[0] == 'metadata':
96
- raise NotImplementedError('TODO')
97
-
98
- def read(self, path, length, offset, fh):
99
- os.lseek(fh, offset, os.SEEK_SET)
100
- return os.read(fh, length)
101
-
102
- def write(self, path, buf, offset, fh):
103
- pass
104
-
105
- def truncate(self, path, length, fh=None):
106
- pass
107
-
108
- def flush(self, path, fh):
109
- pass
110
-
111
- def release(self, path, fh):
112
- pass
113
-
114
- def fsync(self, path, fdatasync, fh):
115
- pass
116
-
117
- def _full_local_path(self, partial):
118
- if partial.startswith("/"):
119
- partial = partial[1:]
120
- return os.path.join(self.root, partial)
121
-
122
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes