geoseeq 0.6.0__tar.gz → 0.6.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. {geoseeq-0.6.0 → geoseeq-0.6.1}/PKG-INFO +1 -1
  2. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/main.py +1 -1
  3. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/file_download.py +46 -7
  4. geoseeq-0.6.1/geoseeq/result/resumable_download_tracker.py +99 -0
  5. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/upload_download_manager.py +1 -1
  6. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/PKG-INFO +1 -1
  7. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/SOURCES.txt +1 -0
  8. {geoseeq-0.6.0 → geoseeq-0.6.1}/pyproject.toml +1 -1
  9. {geoseeq-0.6.0 → geoseeq-0.6.1}/LICENSE +0 -0
  10. {geoseeq-0.6.0 → geoseeq-0.6.1}/README.md +0 -0
  11. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/__init__.py +0 -0
  12. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/app.py +0 -0
  13. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/blob_constructors.py +0 -0
  14. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/bulk_creators.py +0 -0
  15. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/__init__.py +0 -0
  16. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/constants.py +0 -0
  17. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/copy.py +0 -0
  18. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/detail.py +0 -0
  19. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/download.py +0 -0
  20. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/fastq_utils.py +0 -0
  21. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/get_eula.py +0 -0
  22. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/manage.py +0 -0
  23. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/progress_bar.py +0 -0
  24. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/project.py +0 -0
  25. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/raw.py +0 -0
  26. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/run.py +0 -0
  27. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/search.py +0 -0
  28. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/__init__.py +0 -0
  29. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/common_state.py +0 -0
  30. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/config.py +0 -0
  31. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  32. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  33. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  34. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/__init__.py +0 -0
  35. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/upload.py +0 -0
  36. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/upload_advanced.py +0 -0
  37. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/upload/upload_reads.py +0 -0
  38. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/user.py +0 -0
  39. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/utils.py +0 -0
  40. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/cli/view.py +0 -0
  41. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/constants.py +0 -0
  42. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/__init__.py +0 -0
  43. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/__init__.py +0 -0
  44. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/api.py +0 -0
  45. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  46. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/cli.py +0 -0
  47. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  48. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/file_system_cache.py +0 -0
  49. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/__init__.py +0 -0
  50. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_blobs.py +0 -0
  51. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_ids.py +0 -0
  52. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_names.py +0 -0
  53. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/from_uuids.py +0 -0
  54. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/resolvers.py +0 -0
  55. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/id_constructors/utils.py +0 -0
  56. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/knex.py +0 -0
  57. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/organization.py +0 -0
  58. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/pipeline.py +0 -0
  59. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/__init__.py +0 -0
  60. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/constants.py +0 -0
  61. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/highcharts.py +0 -0
  62. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/__init__.py +0 -0
  63. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/base_layer.py +0 -0
  64. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/map.py +0 -0
  65. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/map/overlay.py +0 -0
  66. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/plotting/selectable.py +0 -0
  67. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/project.py +0 -0
  68. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/remote_object.py +0 -0
  69. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/__init__.py +0 -0
  70. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/bioinfo.py +0 -0
  71. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/file_chunker.py +0 -0
  72. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/file_upload.py +0 -0
  73. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/result_file.py +0 -0
  74. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/result_folder.py +0 -0
  75. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/resumable_upload_tracker.py +0 -0
  76. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/result/utils.py +0 -0
  77. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/sample.py +0 -0
  78. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/search.py +0 -0
  79. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/user.py +0 -0
  80. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/utils.py +0 -0
  81. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/__init__.py +0 -0
  82. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/checksum.py +0 -0
  83. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/cli.py +0 -0
  84. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/clone.py +0 -0
  85. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/constants.py +0 -0
  86. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_cache.py +0 -0
  87. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_dir.py +0 -0
  88. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_sample.py +0 -0
  89. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/vc/vc_stub.py +0 -0
  90. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq/work_orders.py +0 -0
  91. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/dependency_links.txt +0 -0
  92. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/entry_points.txt +0 -0
  93. {geoseeq-0.6.0 → geoseeq-0.6.1}/geoseeq.egg-info/top_level.txt +0 -0
  94. {geoseeq-0.6.0 → geoseeq-0.6.1}/setup.cfg +0 -0
  95. {geoseeq-0.6.0 → geoseeq-0.6.1}/setup.py +0 -0
  96. {geoseeq-0.6.0 → geoseeq-0.6.1}/tests/__init__.py +0 -0
  97. {geoseeq-0.6.0 → geoseeq-0.6.1}/tests/test_api_client.py +0 -0
  98. {geoseeq-0.6.0 → geoseeq-0.6.1}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -54,7 +54,7 @@ def version():
54
54
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
55
55
  Run `geoseeq eula show` to view the EULA.
56
56
  """
57
- click.echo('0.6.0') # remember to update setup
57
+ click.echo('0.6.1') # remember to update setup
58
58
 
59
59
 
60
60
  @main.group('advanced')
@@ -2,15 +2,22 @@
2
2
  import urllib.request
3
3
  import logging
4
4
  import requests
5
- from os.path import basename, getsize, join, isfile, getmtime
5
+ import os
6
+ from os.path import basename, getsize, join, isfile, getmtime, dirname
6
7
  from pathlib import Path
7
8
  from tempfile import NamedTemporaryFile
8
9
 
9
10
  from geoseeq.utils import download_ftp
10
11
  from geoseeq.constants import FIVE_MB
12
+ from hashlib import md5
13
+ from .resumable_download_tracker import ResumableDownloadTracker
11
14
 
12
15
  logger = logging.getLogger("geoseeq_api") # Same name as calling module
13
16
 
17
+ def url_to_id(url):
18
+ url = url.split("?")[0]
19
+ return md5(url.encode()).hexdigest()[:16]
20
+
14
21
 
15
22
  def _download_head(url, filename, head=None, start=0, progress_tracker=None):
16
23
  headers = None
@@ -20,11 +27,43 @@ def _download_head(url, filename, head=None, start=0, progress_tracker=None):
20
27
  response.raise_for_status()
21
28
  total_size_in_bytes = int(response.headers.get('content-length', 0))
22
29
  if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
23
- block_size = FIVE_MB
30
+ if total_size_in_bytes > 10 * FIVE_MB: # Use resumable download
31
+ print("Using resumable download")
32
+ return _download_resumable(response, filename, total_size_in_bytes, progress_tracker)
33
+ else:
34
+ block_size = FIVE_MB
35
+ with open(filename, 'wb') as file:
36
+ for data in response.iter_content(block_size):
37
+ if progress_tracker: progress_tracker.update(len(data))
38
+ file.write(data)
39
+ return filename
40
+
41
+
42
+ def _download_resumable(response, filename, total_size_in_bytes, progress_tracker=None, chunk_size=5 * FIVE_MB, part_prefix=".gs_download_{}_{}."):
43
+ target_id = url_to_id(response.url)
44
+ tracker = ResumableDownloadTracker(chunk_size, target_id, filename)
45
+ if not tracker.download_started: tracker.start_download(response.url)
46
+ n_chunks = total_size_in_bytes // chunk_size
47
+ for i in range(n_chunks):
48
+ bytes_start, bytes_end = i * chunk_size, min((i + 1) * chunk_size - 1, total_size_in_bytes - 1)
49
+ if tracker.part_has_been_downloaded(i):
50
+ logger.debug(f"Part {i} has already been downloaded.")
51
+ else:
52
+ logger.debug(f"Downloading part {i} of {n_chunks - 1}")
53
+ part_filename = join(dirname(filename), part_prefix.format(i, n_chunks - 1) + basename(filename))
54
+ _download_head(response.url, part_filename, head=bytes_end, start=bytes_start, progress_tracker=None)
55
+ part_info = dict(part_number=i, start=bytes_start, end=bytes_end, part_filename=part_filename)
56
+ tracker.add_part(part_info)
57
+ if progress_tracker: progress_tracker.update(bytes_end - bytes_start + 1)
58
+
59
+ # at this point all parts have been downloaded
24
60
  with open(filename, 'wb') as file:
25
- for data in response.iter_content(block_size):
26
- if progress_tracker: progress_tracker.update(len(data))
27
- file.write(data)
61
+ for i in range(n_chunks):
62
+ part_info = tracker.get_part_info(i)
63
+ part_filename = part_info["part_filename"]
64
+ with open(part_filename, 'rb') as part_file:
65
+ file.write(part_file.read())
66
+ tracker.cleanup()
28
67
  return filename
29
68
 
30
69
 
@@ -44,7 +83,7 @@ def guess_download_kind(url):
44
83
  return 'generic'
45
84
 
46
85
 
47
- def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
86
+ def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None, target_uuid=None):
48
87
  """Return a local filepath to the downloaded file. Download the file."""
49
88
  if filename and isfile(filename):
50
89
  file_size = getsize(filename)
@@ -135,7 +174,7 @@ class ResultFileDownload:
135
174
  url = self.get_download_url()
136
175
  filepath = download_url(
137
176
  url, blob_type, filename,
138
- head=head, progress_tracker=progress_tracker
177
+ head=head, progress_tracker=progress_tracker,
139
178
  )
140
179
  if cache and flag_suffix:
141
180
  # create flag file
@@ -0,0 +1,99 @@
1
+
2
+ import time
3
+ import json
4
+ import os
5
+ from os.path import basename, getsize, join, dirname, isfile, getctime
6
+ from pathlib import Path
7
+ from random import random
8
+ import requests
9
+
10
+ from geoseeq.knex import GeoseeqGeneralError
11
+ from geoseeq.constants import FIVE_MB
12
+ from geoseeq.utils import md5_checksum
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from .utils import *
15
+ from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
16
+ from .file_chunker import FileChunker
17
+
18
+
19
+
20
+ class ResumableDownloadTracker:
21
+
22
+ def __init__(self, chunk_size, download_target_id, target_local_path, tracker_file_prefix="gs_resumable_download_tracker"):
23
+ self.open, self.download_started = True, False
24
+ self.download_target_id = download_target_id
25
+ self.target_local_path = target_local_path
26
+ self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'download')
27
+ self.tracker_file = join(
28
+ self.tracker_file_dir,
29
+ tracker_file_prefix + f".{download_target_id}.{chunk_size}." + basename(target_local_path)
30
+ )
31
+ try:
32
+ os.makedirs(self.tracker_file_dir, exist_ok=True)
33
+ except Exception as e:
34
+ logger.warning(f'Could not create resumable download tracker directory. {e}')
35
+ self.open = False
36
+ self._loaded_parts = {}
37
+ self._load_parts_from_file()
38
+
39
+ def start_download(self, download_url):
40
+ if not self.open:
41
+ return
42
+ if self.download_started:
43
+ raise GeoseeqGeneralError("Download has already started.")
44
+ self.download_started = True
45
+ blob = dict(download_url=download_url,
46
+ download_target_id=self.download_target_id,
47
+ start_time=time.time())
48
+ serialized = json.dumps(blob)
49
+ with open(self.tracker_file, "w") as f:
50
+ f.write(serialized + "\n")
51
+ self.download_url = download_url
52
+ return self
53
+
54
+ def add_part(self, part_download_info):
55
+ if not self.open:
56
+ assert False, "Cannot add part to closed ResumableDownloadTracker"
57
+ part_id = part_download_info["part_number"]
58
+ serialized = json.dumps(part_download_info)
59
+ with open(self.tracker_file, "a") as f:
60
+ f.write(serialized + "\n")
61
+ self._loaded_parts[part_id] = part_download_info
62
+
63
+ def _load_parts_from_file(self):
64
+ if not isfile(self.tracker_file):
65
+ return
66
+ with open(self.tracker_file, "r") as f:
67
+ header_blob = json.loads(f.readline())
68
+ self.download_url = header_blob["download_url"]
69
+ start_time = header_blob["start_time"] # for now we don't expire resumable downloads
70
+ self.download_started = True
71
+ for line in f:
72
+ part_info = json.loads(line)
73
+ part_id = part_info["part_number"]
74
+ self._loaded_parts[part_id] = part_info
75
+
76
+ def part_has_been_downloaded(self, part_number):
77
+ if not self.open:
78
+ return False
79
+ if part_number not in self._loaded_parts:
80
+ return False
81
+ part_info = self._loaded_parts[part_number]
82
+ part_path = part_info["part_filename"]
83
+ return isfile(part_path)
84
+
85
+ def get_part_info(self, part_number):
86
+ if not self.open:
87
+ return None
88
+ return self._loaded_parts.get(part_number, None)
89
+
90
+ def cleanup(self):
91
+ if not self.open:
92
+ return
93
+ for part in self._loaded_parts.values():
94
+ part_path = part["part_filename"]
95
+ if isfile(part_path):
96
+ os.remove(part_path)
97
+ os.remove(self.tracker_file)
98
+ self.open = False
99
+
@@ -194,7 +194,7 @@ class GeoSeeqDownloadManager:
194
194
  self._convert_result_files_to_urls()
195
195
  download_args = [(
196
196
  url, file_path,
197
- self.progress_tracker_factory(url),
197
+ self.progress_tracker_factory(file_path),
198
198
  self.ignore_errors, self.head, self.log_level,
199
199
  self.n_parallel_downloads > 1
200
200
  ) for url, file_path in self._result_files]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -79,6 +79,7 @@ geoseeq/result/file_download.py
79
79
  geoseeq/result/file_upload.py
80
80
  geoseeq/result/result_file.py
81
81
  geoseeq/result/result_folder.py
82
+ geoseeq/result/resumable_download_tracker.py
82
83
  geoseeq/result/resumable_upload_tracker.py
83
84
  geoseeq/result/utils.py
84
85
  geoseeq/vc/__init__.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "geoseeq"
7
- version = "0.6.0"
7
+ version = "0.6.1"
8
8
  authors = [
9
9
  { name="David C. Danko", email="dcdanko@biotia.io" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes