geoseeq 0.5.6a10__py3-none-any.whl → 0.5.6a11__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
geoseeq/cli/main.py CHANGED
@@ -53,7 +53,7 @@ def version():
53
53
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
54
54
  Run `geoseeq eula show` to view the EULA.
55
55
  """
56
- click.echo('0.5.6a10') # remember to update setup
56
+ click.echo('0.5.6a11') # remember to update setup
57
57
 
58
58
 
59
59
  @main.group('advanced')
@@ -37,6 +37,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
37
37
  @click.command('files')
38
38
  @use_common_state
39
39
  @click.option('--cores', default=1, help='Number of uploads to run in parallel')
40
+ @click.option('--threads-per-upload', default=4, help='Number of threads used to upload each file')
40
41
  @yes_option
41
42
  @private_option
42
43
  @link_option
@@ -47,7 +48,7 @@ hidden_option = click.option('--hidden/--no-hidden', default=False, help='Upload
47
48
  help='Specify a different name for the file on GeoSeeq than the local file name.')
48
49
  @folder_id_arg
49
50
  @click.argument('file_paths', type=click.Path(exists=True), nargs=-1)
50
- def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
51
+ def cli_upload_file(state, cores, threads_per_upload, yes, private, link_type, recursive, hidden, no_new_versions, geoseeq_file_name, folder_id, file_paths):
51
52
  """Upload files to GeoSeeq.
52
53
 
53
54
  This command uploads files to either a sample or project on GeoSeeq. It can be used to upload
@@ -106,6 +107,7 @@ def cli_upload_file(state, cores, yes, private, link_type, recursive, hidden, no
106
107
 
107
108
  upload_manager = GeoSeeqUploadManager(
108
109
  n_parallel_uploads=cores,
110
+ threads_per_upload=threads_per_upload,
109
111
  link_type=link_type,
110
112
  progress_tracker_factory=PBarManager().get_new_bar,
111
113
  log_level=state.log_level,
@@ -132,6 +132,28 @@ def flatten_list_of_fastqs(filepaths):
132
132
  return flattened
133
133
 
134
134
 
135
+ def _is_bam(path):
136
+ for ext in ['.bam', '.bai']:
137
+ if path.endswith(ext):
138
+ return True
139
+ return False
140
+
141
+
142
+ def flatten_list_of_bams(filepaths):
143
+ """Turn a list of bam filepaths and txt files containing bam filepaths into a single list of bam filepaths."""
144
+ flattened = []
145
+ for path in filepaths:
146
+ if _is_bam(path):
147
+ flattened.append(path)
148
+ else:
149
+ with open(path) as f:
150
+ for line in f:
151
+ line = line.strip()
152
+ if line and not line.startswith('#'):
153
+ flattened.append(line)
154
+ return flattened
155
+
156
+
135
157
 
136
158
  @click.command('reads')
137
159
  @use_common_state
@@ -200,3 +222,67 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
200
222
  regex = _get_regex(knex, filepaths, module_name, proj, regex)
201
223
  groups = _group_files(knex, filepaths, module_name, regex, yes)
202
224
  _do_upload(groups, module_name, link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
225
+
226
+
227
+ # @click.command('bam')
228
+ # @use_common_state
229
+ # @click.option('--genome', default=None, help='The genome aligned to the BAM files. Should be in 2bit format.')
230
+ # @click.option('--cores', default=1, help='Number of uploads to run in parallel')
231
+ # @overwrite_option
232
+ # @yes_option
233
+ # @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
234
+ # @private_option
235
+ # @link_option
236
+ # @no_new_versions_option
237
+ # @project_id_arg
238
+ # @click.argument('files', type=click.Path(exists=True), nargs=-1)
239
+ # def cli_upload_bams(state, genome, cores, overwrite, yes, regex, private, link_type, no_new_versions, project_id, files):
240
+ """Upload BAM files to GeoSeeq.
241
+
242
+ This command automatically groups bams with their index files.
243
+
244
+ ---
245
+
246
+ Example Usage:
247
+
248
+ \b
249
+ # Upload a list of BAM files to a project, useful if you have hundreds of files
250
+ $ ls -1 path/to/bam/files/*.bam > file_list.txt
251
+ $ geoseeq upload bams "GeoSeeq/Example CLI Project" file_list.txt
252
+
253
+ \b
254
+ # Upload all the BAM files in a directory to a project with BAM indexes
255
+ $ geoseeq upload bams ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam path/to/bam/files/*.bam.bai
256
+
257
+ \b
258
+ # Upload all the BAM files in a directory to a project, performing 4 uploads in parallel
259
+ $ geoseeq upload bams --cores 4 ed59b913-91ec-489b-a1b9-4ea137a6e5cf path/to/bam/files/*.bam
260
+
261
+ \b
262
+ # Upload a list of BAM files to a project, automatically creating a new project and overwriting existing files
263
+ $ ls -1 path/to/bam/files/*.bam > file_list.txt
264
+ $ geoseeq upload bams --yes --overwrite "GeoSeeq/Example CLI Project" file_list.txt
265
+
266
+ ---
267
+
268
+ Command Arguments:
269
+
270
+ [PROJECT_ID] Can be a project UUID, GeoSeeq Resource Number (GRN), or an
271
+ organization name and project name separated by a slash.
272
+
273
+ \b
274
+ Examples:
275
+ - Name pair: "GeoSeeq/Example CLI Project"
276
+ - UUID: "ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
277
+ - GRN: "grn:gs1:project:ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
278
+
279
+ \b
280
+ [FILES...] can be paths to BAM files or a file containing a list of paths, or a mix of both.
281
+ Example: "path/to/bam/files
282
+ """
283
+ knex = state.get_knex()
284
+ proj = handle_project_id(knex, project_id, yes, private)
285
+ filepaths = {basename(line): line for line in flatten_list_of_bams(files)}
286
+ click.echo(f'Found {len(filepaths)} files to upload.', err=True)
287
+ groups = _group_files(knex, filepaths, 'bam::bam', regex, yes)
288
+ _do_upload(groups, 'bam::bam', link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
@@ -46,6 +46,11 @@ def guess_download_kind(url):
46
46
 
47
47
  def download_url(url, kind='guess', filename=None, head=None, progress_tracker=None):
48
48
  """Return a local filepath to the downloaded file. Download the file."""
49
+ if filename and isfile(filename):
50
+ file_size = getsize(filename)
51
+ if file_size > 0:
52
+ logger.info(f"File already exists: {filename}. Not overwriting.")
53
+ return filename
49
54
  if kind == 'guess':
50
55
  kind = guess_download_kind(url)
51
56
  logger.info(f"Guessed download kind: {kind} for {url}")
@@ -31,14 +31,30 @@ class FileChunker:
31
31
  chunk = f.read(self.chunk_size)
32
32
  self.loaded_parts.append(chunk)
33
33
  return self # convenience for chaining
34
+
35
+ def chunk_is_preloaded(self, num):
36
+ return len(self.loaded_parts) > num and self.loaded_parts[num]
37
+
38
+ def read_one_chunk(self, num):
39
+ if not self.chunk_is_preloaded(num):
40
+ logger.debug(f"Reading chunk {num} from {self.filepath}")
41
+ with open(self.filepath, "rb") as f:
42
+ f.seek(num * self.chunk_size)
43
+ chunk = f.read(self.chunk_size)
44
+ return chunk
45
+ return self.loaded_parts[num]
34
46
 
35
47
  def get_chunk(self, num):
36
- self.load_all_chunks()
37
- return self.loaded_parts[num]
48
+ if self.chunk_is_preloaded(num):
49
+ return self.loaded_parts[num]
50
+ return self.read_one_chunk(num)
38
51
 
39
52
  def get_chunk_size(self, num):
40
- self.load_all_chunks()
41
- return len(self.loaded_parts[num])
53
+ if num < (self.n_parts - 1): # all but the last chunk
54
+ return self.chunk_size
55
+ if self.chunk_is_preloaded(num): # last chunk, pre-loaded
56
+ return len(self.loaded_parts[num])
57
+ return len(self.read_one_chunk(num)) # last chunk, not pre-loaded
42
58
 
43
59
 
44
60
  class ResumableUploadTracker:
@@ -159,6 +175,7 @@ class ResultFileUpload:
159
175
  attempts = 0
160
176
  while attempts < max_retries:
161
177
  try:
178
+ logger.debug(f"Uploading part {num + 1} to {url}. Size: {len(file_chunk)} bytes.")
162
179
  if session:
163
180
  http_response = session.put(url, data=file_chunk)
164
181
  else:
@@ -251,7 +268,12 @@ class ResultFileUpload:
251
268
  resumable_upload_tracker.start_upload(upload_id, urls)
252
269
  logger.info(f'Starting upload for "{filepath}"')
253
270
  complete_parts = []
254
- file_chunker = FileChunker(filepath, chunk_size).load_all_chunks()
271
+ file_chunker = FileChunker(filepath, chunk_size)
272
+ if file_chunker.file_size < 10 * FIVE_MB:
273
+ file_chunker.load_all_chunks()
274
+ logger.debug(f"Preloaded all chunks for {filepath}")
275
+ else:
276
+ logger.debug(f"Did not preload chunks for {filepath}")
255
277
  if progress_tracker: progress_tracker.set_num_chunks(file_chunker.file_size)
256
278
  complete_parts = self._upload_parts(
257
279
  file_chunker,
@@ -21,7 +21,7 @@ def _make_in_process_logger(log_level):
21
21
  def _upload_one_file(args):
22
22
  (result_file, filepath, session, progress_tracker,
23
23
  link_type, overwrite, log_level, parallel_uploads,
24
- use_cache, no_new_versions) = args
24
+ use_cache, no_new_versions, threads_per_upload) = args
25
25
  if parallel_uploads:
26
26
  _make_in_process_logger(log_level)
27
27
  if link_type == 'upload':
@@ -29,7 +29,8 @@ def _upload_one_file(args):
29
29
  result_file.upload_file(
30
30
  filepath,
31
31
  session=session, overwrite=overwrite, progress_tracker=progress_tracker,
32
- threads=4, use_cache=use_cache, no_new_versions=no_new_versions
32
+ threads=threads_per_upload, use_cache=use_cache,
33
+ no_new_versions=no_new_versions
33
34
  )
34
35
  else:
35
36
  result_file.link_file(link_type, filepath)
@@ -40,6 +41,7 @@ class GeoSeeqUploadManager:
40
41
 
41
42
  def __init__(self,
42
43
  n_parallel_uploads=1,
44
+ threads_per_upload=4,
43
45
  session=None,
44
46
  link_type='upload',
45
47
  progress_tracker_factory=None,
@@ -56,6 +58,7 @@ class GeoSeeqUploadManager:
56
58
  self._result_files = []
57
59
  self.no_new_versions = no_new_versions
58
60
  self.use_cache = use_cache
61
+ self.threads_per_upload = threads_per_upload
59
62
 
60
63
  def add_result_file(self, result_file, local_path):
61
64
  self._result_files.append((result_file, local_path))
@@ -80,7 +83,8 @@ class GeoSeeqUploadManager:
80
83
  result_file, local_path,
81
84
  self.session, self.progress_tracker_factory(local_path),
82
85
  self.link_type, self.overwrite, self.log_level,
83
- self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions
86
+ self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions,
87
+ self.threads_per_upload
84
88
  ) for result_file, local_path in self._result_files
85
89
  ]
86
90
  out = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.5.6a10
3
+ Version: 0.5.6a11
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -11,7 +11,7 @@ geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
11
11
  geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
12
12
  geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
13
13
  geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
14
- geoseeq/upload_download_manager.py,sha256=BGaEBAKu05CqftwRu3BjzL6FvcHp_w122yOS3LVVzd4,7423
14
+ geoseeq/upload_download_manager.py,sha256=aydSVTAjyupd4gkqmImtcSTXEPBAAqQ1HFgfAk83Scw,7605
15
15
  geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
16
16
  geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
17
17
  geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
@@ -22,7 +22,7 @@ geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
22
22
  geoseeq/cli/download.py,sha256=_upzZo08K0fAPbEsyi1uN0HGNUaY1pl6OoGPcWmvSUY,17765
23
23
  geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
24
24
  geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
25
- geoseeq/cli/main.py,sha256=xbSTOVLf1iFdw-aUu6l_UEVeiVrPgi9Liylt5UjNRzU,3260
25
+ geoseeq/cli/main.py,sha256=zsPFQY__lqMeG_l4GTjonmddbe8p1FHjksouuK2U07c,3260
26
26
  geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
27
27
  geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
28
28
  geoseeq/cli/run.py,sha256=bx2AV6VIqOSTlxUda78xl0XxcZ8TXlQx02-e7iLQPwI,3838
@@ -37,9 +37,9 @@ geoseeq/cli/shared_params/id_handlers.py,sha256=501K9sCVkI0YGDQ62vXk_DM5lMMDrdB5
37
37
  geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
38
38
  geoseeq/cli/shared_params/opts_and_args.py,sha256=LrDkv9WtUryM4uUMXPRk04-EBcTQ7q5V6Yu-XRDUvvA,2083
39
39
  geoseeq/cli/upload/__init__.py,sha256=3C9_S9t7chmYU-2ot89NV03x-EtmsjibulErKaU9w1k,627
40
- geoseeq/cli/upload/upload.py,sha256=vbUslOSPf8EDYFzFxukE4EFK_h8nfMYNbSczR2AJNxk,9203
40
+ geoseeq/cli/upload/upload.py,sha256=_ZR2tkugaB71rVTJFAwRCZLedqGO58sgTsHILebfvDs,9370
41
41
  geoseeq/cli/upload/upload_advanced.py,sha256=Jq5eGe-wOdrzxGWVwaFPg0BAJcW0YSx_eHEmYjJeKuA,3434
42
- geoseeq/cli/upload/upload_reads.py,sha256=iInPDnfgEuLa8E4gfENFaZ_Uhxfv1Zfkc38iPvTsTwg,7450
42
+ geoseeq/cli/upload/upload_reads.py,sha256=EMGqyZf11xwN4v2j8gNxMagTbE4kaOd-_hwupmg5I-8,10670
43
43
  geoseeq/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  geoseeq/contrib/ncbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1056
@@ -63,8 +63,8 @@ geoseeq/plotting/map/map.py,sha256=h2QPLGqe-SamhfaTij53S9cQIiO8orCJUAUh0hRicSM,3
63
63
  geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEYwE,1795
64
64
  geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
65
65
  geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
66
- geoseeq/result/file_download.py,sha256=XQA5bdQJJSZIgbFcN09OvVdLq12fnA98kPCIONAkLk0,5568
67
- geoseeq/result/file_upload.py,sha256=D8gbdsyzw5ztjT7Vmq4InxKhyl7l8n2leH4GYVD-seM,12109
66
+ geoseeq/result/file_download.py,sha256=vbYo2B4JshTIqLaklcgcBb7NY9cD5pMkas95GuQxW8s,5776
67
+ geoseeq/result/file_upload.py,sha256=z3ImHlVhli6ZwOHP7GvJqxnVxKYpMyBojqrpdBSBJIs,13176
68
68
  geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
69
69
  geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
70
70
  geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
@@ -80,9 +80,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
80
80
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
81
  tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
82
82
  tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
83
- geoseeq-0.5.6a10.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
84
- geoseeq-0.5.6a10.dist-info/METADATA,sha256=RKguQ4reNa4rH6Pv20JBMCbyo2inLjKcO5VFkcCgyxg,4806
85
- geoseeq-0.5.6a10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
86
- geoseeq-0.5.6a10.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
87
- geoseeq-0.5.6a10.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
88
- geoseeq-0.5.6a10.dist-info/RECORD,,
83
+ geoseeq-0.5.6a11.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
84
+ geoseeq-0.5.6a11.dist-info/METADATA,sha256=sVpz2que_a-pWGG7WNGLz2IFGoNOeofn27ZKxqZdsts,4806
85
+ geoseeq-0.5.6a11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
86
+ geoseeq-0.5.6a11.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
87
+ geoseeq-0.5.6a11.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
88
+ geoseeq-0.5.6a11.dist-info/RECORD,,