geoseeq 0.2.1__tar.gz → 0.2.3__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. {geoseeq-0.2.1 → geoseeq-0.2.3}/PKG-INFO +1 -1
  2. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/download.py +38 -8
  3. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/main.py +1 -1
  4. geoseeq-0.2.3/geoseeq/cli/progress_bar.py +28 -0
  5. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/upload/upload_reads.py +1 -30
  6. geoseeq-0.2.3/geoseeq/result/file_download.py +102 -0
  7. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/file_upload.py +4 -4
  8. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/utils.py +0 -16
  9. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/PKG-INFO +1 -1
  10. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/SOURCES.txt +1 -0
  11. {geoseeq-0.2.1 → geoseeq-0.2.3}/setup.py +1 -1
  12. geoseeq-0.2.1/geoseeq/result/file_download.py +0 -95
  13. {geoseeq-0.2.1 → geoseeq-0.2.3}/LICENSE +0 -0
  14. {geoseeq-0.2.1 → geoseeq-0.2.3}/README.md +0 -0
  15. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/__init__.py +0 -0
  16. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/blob_constructors.py +0 -0
  17. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/bulk_creators.py +0 -0
  18. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/__init__.py +0 -0
  19. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/add.py +0 -0
  20. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/constants.py +0 -0
  21. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/copy.py +0 -0
  22. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/create.py +0 -0
  23. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/delete.py +0 -0
  24. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/fastq_utils.py +0 -0
  25. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/list.py +0 -0
  26. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/__init__.py +0 -0
  27. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/common_state.py +0 -0
  28. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  29. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/id_utils.py +0 -0
  30. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  31. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  32. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/upload/__init__.py +0 -0
  33. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/upload/upload.py +0 -0
  34. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/user.py +0 -0
  35. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/utils.py +0 -0
  36. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/cli/view.py +0 -0
  37. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/constants.py +0 -0
  38. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/__init__.py +0 -0
  39. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/__init__.py +0 -0
  40. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/api.py +0 -0
  41. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  42. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/cli.py +0 -0
  43. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  44. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/file_system_cache.py +0 -0
  45. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/knex.py +0 -0
  46. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/organization.py +0 -0
  47. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/pipeline.py +0 -0
  48. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/project.py +0 -0
  49. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/remote_object.py +0 -0
  50. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/__init__.py +0 -0
  51. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/bioinfo.py +0 -0
  52. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/result_file.py +0 -0
  53. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/result/result_folder.py +0 -0
  54. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/sample.py +0 -0
  55. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/user.py +0 -0
  56. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/utils.py +0 -0
  57. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/__init__.py +0 -0
  58. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/checksum.py +0 -0
  59. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/cli.py +0 -0
  60. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/clone.py +0 -0
  61. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/constants.py +0 -0
  62. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_cache.py +0 -0
  63. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_dir.py +0 -0
  64. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_sample.py +0 -0
  65. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/vc/vc_stub.py +0 -0
  66. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq/work_orders.py +0 -0
  67. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/dependency_links.txt +0 -0
  68. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/entry_points.txt +0 -0
  69. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/requires.txt +0 -0
  70. {geoseeq-0.2.1 → geoseeq-0.2.3}/geoseeq.egg-info/top_level.txt +0 -0
  71. {geoseeq-0.2.1 → geoseeq-0.2.3}/pyproject.toml +0 -0
  72. {geoseeq-0.2.1 → geoseeq-0.2.3}/setup.cfg +0 -0
  73. {geoseeq-0.2.1 → geoseeq-0.2.3}/tests/__init__.py +0 -0
  74. {geoseeq-0.2.1 → geoseeq-0.2.3}/tests/test_api_client.py +0 -0
  75. {geoseeq-0.2.1 → geoseeq-0.2.3}/tests/test_work_orders.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: # Geoseeq API Client
5
5
  Author: David C. Danko
6
6
  Author-email: dcdanko@biotia.io
@@ -5,7 +5,7 @@ from os.path import dirname, join
5
5
 
6
6
  import click
7
7
  import pandas as pd
8
-
8
+ from multiprocessing import Pool
9
9
  from .shared_params import (
10
10
  handle_project_id,
11
11
  project_id_arg,
@@ -14,13 +14,14 @@ from .shared_params import (
14
14
  use_common_state,
15
15
  flatten_list_of_els_and_files
16
16
  )
17
- from geoseeq.result.utils import _download_head
17
+ from geoseeq.result.file_download import download_url
18
18
  from geoseeq.utils import download_ftp
19
19
  from geoseeq.blob_constructors import (
20
20
  sample_result_file_from_uuid,
21
21
  project_result_file_from_uuid,
22
22
  )
23
23
  from geoseeq.knex import GeoseeqNotFoundError
24
+ from .progress_bar import PBarManager
24
25
  from .utils import convert_size
25
26
 
26
27
  logger = logging.getLogger('geoseeq_api')
@@ -83,8 +84,16 @@ def cli_download_metadata(state, sample_ids):
83
84
  click.echo("Metadata successfully downloaded for samples.", err=True)
84
85
 
85
86
 
87
+ def _download_one_file(args):
88
+ url, file_path, pbar = args
89
+ return download_url(url, filename=file_path, progress_tracker=pbar)
90
+
91
+
92
+ cores_option = click.option('--cores', default=1, help='Number of downloads to run in parallel')
93
+
86
94
  @cli_download.command("files")
87
95
  @use_common_state
96
+ @cores_option
88
97
  @click.option("--target-dir", default=".")
89
98
  @click.option('--yes/--confirm', default=False, help='Skip confirmation prompts')
90
99
  @click.option("--download/--urls-only", default=True, help="Download files or just print urls")
@@ -98,6 +107,7 @@ def cli_download_metadata(state, sample_ids):
98
107
  @sample_ids_arg
99
108
  def cli_download_files(
100
109
  state,
110
+ cores,
101
111
  sample_name_includes,
102
112
  target_dir,
103
113
  yes,
@@ -186,23 +196,32 @@ def cli_download_files(
186
196
  if not yes:
187
197
  click.confirm('Do you want to download these files?', abort=True)
188
198
 
199
+ download_args = []
200
+ pbars = PBarManager()
189
201
  for fname, url in response["links"].items():
190
202
  click.echo(f"Downloading file {fname}")
191
203
  file_path = join(target_dir, fname)
192
204
  makedirs(dirname(file_path), exist_ok=True)
193
- if url.startswith("ftp"):
194
- download_ftp(url, file_path)
195
- else:
196
- _download_head(url, file_path)
205
+ pbar = pbars.get_new_bar(file_path)
206
+ download_args.append((url, file_path, pbar))
207
+ if cores == 1:
208
+ download_url(url, filename=file_path, progress_tracker=pbar)
209
+
210
+ if cores > 1:
211
+ with Pool(cores) as p:
212
+ for _ in p.imap_unordered(_download_one_file, download_args):
213
+ pass
197
214
 
198
215
 
199
216
  @cli_download.command("ids")
200
217
  @use_common_state
218
+ @cores_option
201
219
  @click.option("--target-dir", default=".")
202
220
  @click.option('--yes/--confirm', default=False, help='Skip confirmation prompts')
203
221
  @click.option("--download/--urls-only", default=True, help="Download files or just print urls")
222
+ @click.option('--head', default=None, type=int, help='Download the first N bytes of each file')
204
223
  @click.argument("ids", nargs=-1)
205
- def cli_download_ids(state, target_dir, yes, download, ids):
224
+ def cli_download_ids(state, cores, target_dir, yes, download, head, ids):
206
225
  """Download a files from GeoSeeq based on their UUID or GeoSeeq Resource Number (GRN).
207
226
 
208
227
  This command downloads files directly based on their ID. This is used for "manual"
@@ -228,6 +247,7 @@ def cli_download_ids(state, target_dir, yes, download, ids):
228
247
  ---
229
248
  """
230
249
  result_file_ids = flatten_list_of_els_and_files(ids)
250
+ cores = max(cores, len(result_file_ids)) # don't use more cores than files
231
251
  knex = state.get_knex()
232
252
  result_files = []
233
253
  for result_id in result_file_ids:
@@ -249,8 +269,18 @@ def cli_download_ids(state, target_dir, yes, download, ids):
249
269
  if not yes:
250
270
  click.confirm('Do you want to download these files?', abort=True)
251
271
 
272
+ download_args = []
273
+ pbars = PBarManager()
252
274
  for result_file in result_files:
253
275
  click.echo(f"Downloading file {result_file.get_referenced_filename()}")
254
276
  file_path = join(target_dir, result_file.get_referenced_filename())
255
277
  makedirs(dirname(file_path), exist_ok=True)
256
- result_file.download(file_path)
278
+ pbar = pbars.get_new_bar(file_path)
279
+ download_args.append((result_file, file_path, pbar))
280
+ if cores == 1:
281
+ result_file.download(file_path, progress_tracker=pbar, head=head)
282
+
283
+ if cores > 1:
284
+ with Pool(cores) as p:
285
+ for _ in p.imap_unordered(_download_one_file, download_args):
286
+ pass
@@ -31,7 +31,7 @@ main.add_command(cli_upload)
31
31
  @main.command()
32
32
  def version():
33
33
  """Print the version of the Geoseeq API being used."""
34
- click.echo('0.2.1') # remember to update setup
34
+ click.echo('0.2.3') # remember to update setup
35
35
 
36
36
 
37
37
  @main.group('advanced')
@@ -0,0 +1,28 @@
1
+ from tqdm import tqdm
2
+ from os.path import basename
3
+
4
+ class TQBar:
5
+
6
+ def __init__(self, pos, desc) -> None:
7
+ self.n_bars = 0
8
+ self.pos = pos
9
+ self.desc = desc
10
+ self.bar = None
11
+
12
+ def set_num_chunks(self, n_chunks):
13
+ self.n_bars = n_chunks
14
+ self.bar = tqdm(total=n_chunks, position=self.pos, desc=self.desc, leave=False)
15
+
16
+ def update(self, chunk_num):
17
+ self.bar.update(chunk_num)
18
+
19
+
20
+ class PBarManager:
21
+
22
+ def __init__(self):
23
+ self.n_bars = 0
24
+ self.pbars = []
25
+
26
+ def get_new_bar(self, filepath):
27
+ self.n_bars += 1
28
+ return TQBar(self.n_bars, basename(filepath))
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from tqdm import tqdm
3
2
  import click
4
3
  import requests
5
4
  from os.path import basename
@@ -20,39 +19,11 @@ from geoseeq.cli.shared_params import (
20
19
  )
21
20
 
22
21
  from geoseeq.constants import FASTQ_MODULE_NAMES
23
-
24
-
22
+ from geoseeq.cli.progress_bar import PBarManager
25
23
 
26
24
  logger = logging.getLogger('geoseeq_api')
27
25
 
28
26
 
29
- class TQBar:
30
-
31
- def __init__(self, pos, desc) -> None:
32
- self.n_bars = 0
33
- self.pos = pos
34
- self.desc = desc
35
- self.bar = None
36
-
37
- def set_num_chunks(self, n_chunks):
38
- self.n_bars = n_chunks
39
- self.bar = tqdm(total=n_chunks, position=self.pos, desc=self.desc, leave=False)
40
-
41
- def update(self, chunk_num):
42
- self.bar.update(chunk_num)
43
-
44
-
45
- class PBarManager:
46
-
47
- def __init__(self):
48
- self.n_bars = 0
49
- self.pbars = []
50
-
51
- def get_new_bar(self, filepath):
52
- self.n_bars += 1
53
- return TQBar(self.n_bars, basename(filepath))
54
-
55
-
56
27
  def _make_in_process_logger(log_level):
57
28
  logger = logging.getLogger('geoseeq_api')
58
29
  logger.setLevel(log_level)
@@ -0,0 +1,102 @@
1
+
2
+ import urllib.request
3
+ import logging
4
+ import requests
5
+ from os.path import basename, getsize, join
6
+ from pathlib import Path
7
+ from tempfile import NamedTemporaryFile
8
+
9
+ from geoseeq.utils import download_ftp
10
+ from geoseeq.constants import FIVE_MB
11
+
12
+ logger = logging.getLogger("geoseeq_api") # Same name as calling module
13
+
14
+
15
+ def _download_head(url, filename, head=None, progress_tracker=None):
16
+ headers = None
17
+ if head and head > 0:
18
+ headers = {"Range": f"bytes=0-{head}"}
19
+ response = requests.get(url, stream=True, headers=headers)
20
+ total_size_in_bytes = int(response.headers.get('content-length', 0))
21
+ if progress_tracker: progress_tracker.set_num_chunks(total_size_in_bytes)
22
+ block_size = FIVE_MB
23
+ with open(filename, 'wb') as file:
24
+ for data in response.iter_content(block_size):
25
+ if progress_tracker: progress_tracker.update(len(data))
26
+ file.write(data)
27
+ return filename
28
+
29
+
30
+ def _download_generic(url, filename, head=None):
31
+ urllib.request.urlretrieve(url, filename)
32
+ return filename
33
+
34
+
35
+ def guess_download_kind(url):
36
+ if 'azure' in url:
37
+ return 'azure'
38
+ elif 's3' in url:
39
+ return 's3'
40
+ elif 'ftp' in url:
41
+ return 'ftp'
42
+ else:
43
+ return 'generic'
44
+
45
+
46
+ def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
47
+ """Return a local filepath to the downloaded file. Download the file."""
48
+ if kind == 'guess':
49
+ kind = guess_download_kind(url)
50
+ logger.info(f"Guessed download kind: {kind} for {url}")
51
+ logger.info(f"Downloading {kind} file to {filename}")
52
+ if kind == 'generic':
53
+ return _download_generic(url, filename, head=head)
54
+ elif kind == 's3':
55
+ return _download_head(url, filename, head=head, progress_tracker=progress_tracker)
56
+ elif kind == 'azure':
57
+ return _download_head(url, filename, head=head)
58
+ elif kind == 'ftp':
59
+ return download_ftp(url, filename, head=head)
60
+ else:
61
+ raise ValueError(f"Unknown download kind: {kind}")
62
+
63
+
64
+
65
+ class ResultFileDownload:
66
+ """Abstract class that handles download methods for result files."""
67
+
68
+ def get_download_url(self):
69
+ """Return a URL that can be used to download the file for this result."""
70
+ blob_type = self.stored_data.get("__type__", "").lower()
71
+ if blob_type not in ["s3", "sra", "ftp", "azure"]:
72
+ raise ValueError(f'Unknown URL type: "{blob_type}"')
73
+ key = 'url' if 'url' in self.stored_data else 'uri'
74
+ if blob_type in ["s3", "azure"]:
75
+ try:
76
+ url = self.stored_data["presigned_url"]
77
+ except KeyError:
78
+ url = self.stored_data[key]
79
+ if url.startswith("s3://"):
80
+ url = self.stored_data["endpoint_url"] + "/" + url[5:]
81
+ return url
82
+ else:
83
+ return self.stored_data[key]
84
+
85
+ def download(self, filename=None, cache=True, head=None, progress_tracker=None):
86
+ """Return a local filepath to the file this result points to."""
87
+ if not filename:
88
+ self._temp_filename = True
89
+ myfile = NamedTemporaryFile(delete=False)
90
+ myfile.close()
91
+ filename = myfile.name
92
+ blob_type = self.stored_data.get("__type__", "").lower()
93
+ if cache and self._cached_filename:
94
+ return self._cached_filename
95
+ url = self.get_download_url()
96
+ filepath = download_url(
97
+ url, blob_type, filename,
98
+ head=head, progress_tracker=progress_tracker
99
+ )
100
+ if cache:
101
+ self._cached_filename = filepath
102
+ return filepath
@@ -108,7 +108,7 @@ class ResultFileUpload:
108
108
  for num, url in enumerate(list(urls.values())):
109
109
  response_part = self._upload_one_part(file_chunker, url, num, max_retries, session)
110
110
  complete_parts.append(response_part)
111
- progress_tracker.update(file_chunker.get_chunk_size(num))
111
+ if progress_tracker: progress_tracker.update(file_chunker.get_chunk_size(num))
112
112
  logger.info(f'Uploaded part {num + 1} of {len(urls)} for "{file_chunker.filepath}"')
113
113
  return complete_parts
114
114
 
@@ -123,7 +123,7 @@ class ResultFileUpload:
123
123
  for future in as_completed(futures):
124
124
  response_part = future.result()
125
125
  complete_parts.append(response_part)
126
- progress_tracker.update(file_chunker.get_chunk_size(response_part["PartNumber"] - 1))
126
+ if progress_tracker: progress_tracker.update(file_chunker.get_chunk_size(response_part["PartNumber"] - 1))
127
127
  logger.info(
128
128
  f'Uploaded part {response_part["PartNumber"]} of {len(urls)} for "{file_chunker.filepath}"'
129
129
  )
@@ -137,7 +137,7 @@ class ResultFileUpload:
137
137
  chunk_size=FIVE_MB,
138
138
  max_retries=3,
139
139
  session=None,
140
- progress_tracker=lambda x: None,
140
+ progress_tracker=None,
141
141
  threads=1,
142
142
  ):
143
143
  """Upload a file to S3 using the multipart upload process."""
@@ -146,7 +146,7 @@ class ResultFileUpload:
146
146
  logger.info(f'Starting upload for "{filepath}"')
147
147
  complete_parts = []
148
148
  file_chunker = FileChunker(filepath, chunk_size).load_all_chunks()
149
- progress_tracker.set_num_chunks(file_chunker.file_size)
149
+ if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
150
150
  complete_parts = self._upload_parts(file_chunker, urls, max_retries, session, progress_tracker, threads)
151
151
  self._finish_multipart_upload(upload_id, complete_parts)
152
152
  logger.info(f'Finished Upload for "{filepath}"')
@@ -15,22 +15,6 @@ from geoseeq.utils import download_ftp, md5_checksum
15
15
 
16
16
  logger = logging.getLogger("geoseeq_api") # Same name as calling module
17
17
  logger.addHandler(logging.NullHandler()) # No output unless configured by calling program
18
-
19
-
20
- def _download_head(url, filename, head=None):
21
- if head and head > 0:
22
- opener = urllib.request.build_opener()
23
- if head:
24
- opener.addheaders = [('Range', f'bytes=0-{head}')]
25
- urllib.request.install_opener(opener)
26
- try:
27
- urllib.request.urlretrieve(url, filename) # can throw 416 error if head is too large
28
- except urllib.error.HTTPError as e:
29
- if e.code == 416:
30
- logger.warning(f"HEAD request failed, trying again without HEAD.")
31
- _download_head(url, filename, head=None)
32
- else:
33
- raise e
34
18
 
35
19
 
36
20
  def diff_dicts(blob1, blob2):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: # Geoseeq API Client
5
5
  Author: David C. Danko
6
6
  Author-email: dcdanko@biotia.io
@@ -32,6 +32,7 @@ geoseeq/cli/download.py
32
32
  geoseeq/cli/fastq_utils.py
33
33
  geoseeq/cli/list.py
34
34
  geoseeq/cli/main.py
35
+ geoseeq/cli/progress_bar.py
35
36
  geoseeq/cli/user.py
36
37
  geoseeq/cli/utils.py
37
38
  geoseeq/cli/view.py
@@ -5,7 +5,7 @@ import setuptools
5
5
 
6
6
  setuptools.setup(
7
7
  name='geoseeq',
8
- version='0.2.1', # remember to update version string in CLI as well
8
+ version='0.2.3', # remember to update version string in CLI as well
9
9
  author="David C. Danko",
10
10
  author_email='dcdanko@biotia.io',
11
11
  description=open('README.md').read(),
@@ -1,95 +0,0 @@
1
-
2
- import urllib.request
3
- from os.path import basename, getsize, join
4
- from pathlib import Path
5
- from tempfile import NamedTemporaryFile
6
-
7
- from geoseeq.utils import download_ftp
8
-
9
- from .utils import *
10
-
11
-
12
- class ResultFileDownload:
13
- """Abstract class that handles download methods for result files."""
14
-
15
- def get_download_url(self):
16
- """Return a URL that can be used to download the file for this result."""
17
- blob_type = self.stored_data.get("__type__", "").lower()
18
- if blob_type not in ["s3", "sra"]:
19
- raise TypeError("Cannot fetch a file for a BLOB type result field.")
20
- if blob_type == "s3":
21
- try:
22
- url = self.stored_data["presigned_url"]
23
- except KeyError:
24
- url = self.stored_data["uri"]
25
- if url.startswith("s3://"):
26
- url = self.stored_data["endpoint_url"] + "/" + url[5:]
27
- return url
28
- elif blob_type == "sra":
29
- url = self.stored_data["url"]
30
- return url
31
-
32
- def download_file(self, filename=None, cache=True, head=None):
33
- """Return a local filepath to the file this result points to."""
34
- if not filename:
35
- self._temp_filename = True
36
- myfile = NamedTemporaryFile(delete=False)
37
- myfile.close()
38
- filename = myfile.name
39
- blob_type = self.stored_data.get("__type__", "").lower()
40
- if cache and self._cached_filename:
41
- return self._cached_filename
42
- if blob_type == "s3":
43
- return self._download_s3(filename, cache, head=head)
44
- elif blob_type == "sra":
45
- return self._download_sra(filename, cache)
46
- elif blob_type == "ftp":
47
- return self._download_ftp(filename, cache)
48
- elif blob_type == "azure":
49
- return self._download_azure(filename, cache, head=head)
50
- else:
51
- raise TypeError("Cannot fetch a file for a BLOB type result field.")
52
-
53
- def _download_s3(self, filename, cache, head=None):
54
- logger.info(f"Downloading S3 file to {filename}")
55
- try:
56
- url = self.stored_data["presigned_url"]
57
- except KeyError:
58
- key = 'uri' if 'uri' in self.stored_data else 'url'
59
- url = self.stored_data[key]
60
- if url.startswith("s3://"):
61
- url = self.stored_data["endpoint_url"] + "/" + url[5:]
62
- _download_head(url, filename, head=head)
63
- if cache:
64
- self._cached_filename = filename
65
- return filename
66
-
67
- def _download_azure(self, filename, cache, head=None):
68
- logger.info(f"Downloading Azure file to {filename}")
69
- try:
70
- url = self.stored_data["presigned_url"]
71
- except KeyError:
72
- key = 'uri' if 'uri' in self.stored_data else 'url'
73
- url = self.stored_data[key]
74
- _download_head(url, filename, head=head)
75
- if cache:
76
- self._cached_filename = filename
77
- return filename
78
-
79
- def _download_sra(self, filename, cache):
80
- return self._download_generic_url(filename, cache)
81
-
82
- def _download_ftp(self, filename, cache, head=None):
83
- logger.info(f"Downloading FTP file to {filename}")
84
- key = 'url' if 'url' in self.stored_data else 'uri'
85
- download_ftp(self.stored_data[key], filename, head=head)
86
- return filename
87
-
88
- def _download_generic_url(self, filename, cache):
89
- logger.info(f"Downloading generic URL file to {filename}")
90
- key = 'url' if 'url' in self.stored_data else 'uri'
91
- url = self.stored_data[key]
92
- urllib.request.urlretrieve(url, filename)
93
- if cache:
94
- self._cached_filename = filename
95
- return filename
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes