geoseeq 0.7.4__py3-none-any.whl → 0.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoseeq/cli/download.py +95 -15
- geoseeq/cli/main.py +1 -1
- geoseeq/result/result_file.py +12 -0
- {geoseeq-0.7.4.dist-info → geoseeq-0.7.5.dist-info}/METADATA +1 -1
- {geoseeq-0.7.4.dist-info → geoseeq-0.7.5.dist-info}/RECORD +8 -8
- {geoseeq-0.7.4.dist-info → geoseeq-0.7.5.dist-info}/WHEEL +0 -0
- {geoseeq-0.7.4.dist-info → geoseeq-0.7.5.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.7.4.dist-info → geoseeq-0.7.5.dist-info}/licenses/LICENSE +0 -0
geoseeq/cli/download.py
CHANGED
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
from os import makedirs
|
|
4
4
|
from os.path import dirname, join
|
|
5
5
|
|
|
6
|
+
import gzip
|
|
6
7
|
import click
|
|
7
8
|
import pandas as pd
|
|
8
9
|
from multiprocessing import Pool
|
|
@@ -32,6 +33,7 @@ from .utils import convert_size
|
|
|
32
33
|
from geoseeq.constants import FASTQ_MODULE_NAMES
|
|
33
34
|
from geoseeq.result import ResultFile
|
|
34
35
|
from geoseeq.upload_download_manager import GeoSeeqDownloadManager
|
|
36
|
+
import os
|
|
35
37
|
|
|
36
38
|
logger = logging.getLogger('geoseeq_api')
|
|
37
39
|
|
|
@@ -378,7 +380,27 @@ def cli_download_ids(state, cores, target_dir, file_name, yes, download, head, i
|
|
|
378
380
|
download_manager.download_files()
|
|
379
381
|
|
|
380
382
|
|
|
381
|
-
def
|
|
383
|
+
def _get_local_filename_for_fastq(sample, result_file, read_type, read_num, lane_num, file_name_mode):
|
|
384
|
+
"""Return a local filename for a fastq file based on the specified naming mode."""
|
|
385
|
+
if file_name_mode == "original":
|
|
386
|
+
return result_file.get_stored_data_filename()
|
|
387
|
+
elif file_name_mode == "geoseeq":
|
|
388
|
+
sname = sample.name.replace(".", "-").replace(" ", "_").lower()
|
|
389
|
+
rtype = read_type.replace("::", "__").replace(".", "-").replace(" ", "_").lower()
|
|
390
|
+
filename = f"{sname}.{rtype}.R{read_num}.L{lane_num}.fastq.gz"
|
|
391
|
+
return filename
|
|
392
|
+
elif file_name_mode == "sample-uuid":
|
|
393
|
+
filename = f"{sample.uuid}.R{read_num}.L{lane_num}.fastq.gz"
|
|
394
|
+
return filename
|
|
395
|
+
elif file_name_mode == "file-uuid":
|
|
396
|
+
filename = f"{result_file.uuid}.fastq.gz"
|
|
397
|
+
return filename
|
|
398
|
+
else:
|
|
399
|
+
raise ValueError(f"Unknown file name mode: {file_name_mode}")
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _get_sample_result_files_with_names(sample, module_name=None, which_fastqs_mode='all', file_name_mode='original'):
|
|
403
|
+
"""Return list of (result_file, filename, key) tuples for all fastq files in a sample."""
|
|
382
404
|
result_files_with_names = []
|
|
383
405
|
for read_type, folder in sample.get_all_fastqs().items():
|
|
384
406
|
if module_name and module_name != read_type:
|
|
@@ -388,19 +410,18 @@ def _get_sample_result_files_with_names(sample, module_name=None, first=False):
|
|
|
388
410
|
lane_num = lane_num + 1 # 1 indexed
|
|
389
411
|
if read_type in ["short_read::paired_end"]:
|
|
390
412
|
key = (sample, read_type, 1, lane_num) # sample name, read type, read number, lane number
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
413
|
+
fname = _get_local_filename_for_fastq(sample, result_file[0], read_type, 1, lane_num, file_name_mode)
|
|
414
|
+
result_files_with_names.append((result_file[0], fname, key))
|
|
415
|
+
if which_fastqs_mode == "first-r1":
|
|
416
|
+
break
|
|
394
417
|
key = (sample, read_type, 2, lane_num)
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
)
|
|
418
|
+
fname = _get_local_filename_for_fastq(sample, result_file[1], read_type, 2, lane_num, file_name_mode)
|
|
419
|
+
result_files_with_names.append((result_file[1], fname, key))
|
|
398
420
|
else:
|
|
399
421
|
key = (sample, read_type, 1, lane_num)
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
if first:
|
|
422
|
+
fname = _get_local_filename_for_fastq(sample, result_file, read_type, 1, lane_num, file_name_mode)
|
|
423
|
+
result_files_with_names.append((result_file, fname, key))
|
|
424
|
+
if which_fastqs_mode in ["first-all", "first-r1"]:
|
|
404
425
|
break
|
|
405
426
|
|
|
406
427
|
return result_files_with_names
|
|
@@ -442,14 +463,52 @@ def _make_read_configs(download_results, config_dir="."):
|
|
|
442
463
|
with open(config_path, "w") as f:
|
|
443
464
|
json.dump(config_blob, f, indent=4)
|
|
444
465
|
|
|
466
|
+
def _open_maybe_gzip(local_path):
|
|
467
|
+
"""Open a file that may be gzipped. Do not rely on file extension."""
|
|
468
|
+
with open(local_path, "rb") as f:
|
|
469
|
+
magic_number = f.read(2)
|
|
470
|
+
if magic_number == b'\x1f\x8b':
|
|
471
|
+
return gzip.open(local_path, "rt")
|
|
472
|
+
else:
|
|
473
|
+
return open(local_path, "r")
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _trim_fastq_to_complete_reads(key, local_path):
|
|
477
|
+
"""Trim a fastq file to the nearest complete read boundary under head_bytes.
|
|
478
|
+
|
|
479
|
+
Write the output as a gzipped file regardless of input compression.
|
|
480
|
+
"""
|
|
481
|
+
temp_path = local_path + ".tmp"
|
|
482
|
+
with _open_maybe_gzip(local_path) as infile, gzip.open(temp_path, "wt") as outfile:
|
|
483
|
+
lines_written = 0
|
|
484
|
+
while True:
|
|
485
|
+
read_lines = []
|
|
486
|
+
for _ in range(4):
|
|
487
|
+
line = infile.readline()
|
|
488
|
+
if not line:
|
|
489
|
+
break
|
|
490
|
+
read_lines.append(line)
|
|
491
|
+
if len(read_lines) < 4:
|
|
492
|
+
break # end of file
|
|
493
|
+
if infile.tell() > key[4]: # key[4] is head_bytes
|
|
494
|
+
break # reached head limit
|
|
495
|
+
for line in read_lines:
|
|
496
|
+
outfile.write(line)
|
|
497
|
+
lines_written += 4
|
|
498
|
+
# Replace original file with trimmed file
|
|
499
|
+
|
|
500
|
+
os.replace(temp_path, local_path)
|
|
501
|
+
|
|
445
502
|
|
|
446
503
|
@cli_download.command("fastqs")
|
|
447
504
|
@use_common_state
|
|
448
505
|
@cores_option
|
|
449
506
|
@click.option("--target-dir", default=".")
|
|
450
507
|
@yes_option
|
|
451
|
-
@click.option(
|
|
508
|
+
@click.option('--file-name-mode', type=click.Choice(['original', 'geoseeq', 'sample-uuid', 'file-uuid']), help="Choose how the downloaded fastq files are named.", default='original')
|
|
509
|
+
@click.option("--which-fastqs-mode", type=click.Choice(["first-all", "first-r1", "all"]), default="all", help="Choose which fastq files to download per sample. ")
|
|
452
510
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
|
511
|
+
@head_option
|
|
453
512
|
@click.option("--config-dir", default=None, help="Directory to write read config files. If unset do not write config files.")
|
|
454
513
|
@module_option(FASTQ_MODULE_NAMES, use_default=False)
|
|
455
514
|
@ignore_errors_option
|
|
@@ -460,8 +519,10 @@ def cli_download_fastqs(state,
|
|
|
460
519
|
cores,
|
|
461
520
|
target_dir,
|
|
462
521
|
yes,
|
|
463
|
-
|
|
522
|
+
file_name_mode,
|
|
523
|
+
which_fastqs_mode,
|
|
464
524
|
download,
|
|
525
|
+
head,
|
|
465
526
|
config_dir,
|
|
466
527
|
module_name,
|
|
467
528
|
ignore_errors,
|
|
@@ -474,6 +535,20 @@ def cli_download_fastqs(state,
|
|
|
474
535
|
This command will download fastq files from a GeoSeeq project. You can filter
|
|
475
536
|
files by sample name and by specific fastq read types.
|
|
476
537
|
|
|
538
|
+
The filenames of the downloaded fastq files can be controlled using the --file-name-mode option:
|
|
539
|
+
- original: Use the original filename as uploaded to GeoSeeq (default)
|
|
540
|
+
- geoseeq: Use a normalized GeoSeeq generated filename that includes the sample name, read type, read number, and lane number.
|
|
541
|
+
- sample-uuid: Use the GeoSeeq UUID of the sample along with lane number and read number.
|
|
542
|
+
- file-uuid: Use the GeoSeeq UUID of the result file only.
|
|
543
|
+
|
|
544
|
+
If the --head option is used to only download the first N bytes of each fastq file, this command
|
|
545
|
+
will automatically clip the fastq files at the nearest complete read boundary to avoid incomplete reads.
|
|
546
|
+
|
|
547
|
+
The --which-fastqs-mode option controls which fastq files are downloaded per sample:
|
|
548
|
+
- first-all: Download all fastq files but from the first fastq folder only.
|
|
549
|
+
- first-r1: Download only the first read (R1) fastq file from the first fastq folder.
|
|
550
|
+
- all: Download all fastq files from all folders.
|
|
551
|
+
|
|
477
552
|
---
|
|
478
553
|
|
|
479
554
|
Example Usage:
|
|
@@ -523,7 +598,7 @@ def cli_download_fastqs(state,
|
|
|
523
598
|
result_files_with_names = []
|
|
524
599
|
for sample in samples:
|
|
525
600
|
try:
|
|
526
|
-
result_files_with_names += _get_sample_result_files_with_names(sample, module_name,
|
|
601
|
+
result_files_with_names += _get_sample_result_files_with_names(sample, module_name, which_fastqs_mode, file_name_mode)
|
|
527
602
|
except Exception as e:
|
|
528
603
|
logger.error(f"Error fetching fastq files for sample {sample.name}: {e}")
|
|
529
604
|
if not ignore_errors:
|
|
@@ -538,9 +613,14 @@ def cli_download_fastqs(state,
|
|
|
538
613
|
ignore_errors=ignore_errors,
|
|
539
614
|
log_level=state.log_level,
|
|
540
615
|
progress_tracker_factory=PBarManager().get_new_bar,
|
|
616
|
+
head=head,
|
|
541
617
|
)
|
|
542
618
|
for result_file, filename, key in result_files_with_names:
|
|
543
|
-
|
|
619
|
+
callback = None
|
|
620
|
+
if head:
|
|
621
|
+
callback = _trim_fastq_to_complete_reads
|
|
622
|
+
key = key + (head,) # append head bytes to key
|
|
623
|
+
download_manager.add_download(result_file, join(target_dir, filename), key=key, callback=callback)
|
|
544
624
|
if not download:
|
|
545
625
|
print(download_manager.get_url_string(), file=state.outfile)
|
|
546
626
|
else:
|
geoseeq/cli/main.py
CHANGED
|
@@ -55,7 +55,7 @@ def version():
|
|
|
55
55
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
|
56
56
|
Run `geoseeq eula show` to view the EULA.
|
|
57
57
|
"""
|
|
58
|
-
click.echo("0.7.
|
|
58
|
+
click.echo("0.7.5") # remember to update pyproject.toml
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
@main.group("advanced")
|
geoseeq/result/result_file.py
CHANGED
|
@@ -103,6 +103,18 @@ class ResultFile(RemoteObject, ResultFileUpload, ResultFileDownload, ResultFileS
|
|
|
103
103
|
# except TypeError:
|
|
104
104
|
# return basename(self.get_blob_filename())
|
|
105
105
|
|
|
106
|
+
def get_stored_data_filename(self):
|
|
107
|
+
"""Return the filename that is stored in the stored_data field.
|
|
108
|
+
|
|
109
|
+
This is typically the filename that was originally uploaded to create this result file.
|
|
110
|
+
"""
|
|
111
|
+
try:
|
|
112
|
+
key = [k for k in ["filename", "uri", "url"] if k in self.stored_data][0]
|
|
113
|
+
except IndexError:
|
|
114
|
+
raise TypeError("Cannot make a reference filename for a BLOB type result field.")
|
|
115
|
+
filepath = self.stored_data[key]
|
|
116
|
+
return basename(filepath)
|
|
117
|
+
|
|
106
118
|
def _save(self):
|
|
107
119
|
data = {field: getattr(self, field) for field in self.remote_fields if hasattr(self, field)}
|
|
108
120
|
data["analysis_result"] = self.parent.uuid
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: geoseeq
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.5
|
|
4
4
|
Summary: GeoSeeq command line tools and python API
|
|
5
5
|
Project-URL: Homepage, https://github.com/biotia/geoseeq_api_client
|
|
6
6
|
Project-URL: Issues, https://github.com/biotia/geoseeq_api_client/issues
|
|
@@ -21,11 +21,11 @@ geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
|
|
|
21
21
|
geoseeq/cli/constants.py,sha256=NtRSNBuna42605LE0sVywTPfmzYQnG-3yrT_M7Ml5B0,213
|
|
22
22
|
geoseeq/cli/copy.py,sha256=02U9kdrAIbbM8MlRMLL6p-LMYFSuRObE3h5jyvcL__M,2275
|
|
23
23
|
geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
|
24
|
-
geoseeq/cli/download.py,sha256=
|
|
24
|
+
geoseeq/cli/download.py,sha256=U4x-Y5DMkRGcWctVdp6YIFviLZI6tpMMyXEeV2tbzaM,25407
|
|
25
25
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
|
26
26
|
geoseeq/cli/find_grn.py,sha256=oMDxkzGQBQb2_cCuvmwoeHOsFHqyO9RLeJzrB6bAe5M,439
|
|
27
27
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
|
28
|
-
geoseeq/cli/main.py,sha256=
|
|
28
|
+
geoseeq/cli/main.py,sha256=Gvi5Bh5zbEmYPdShcrY0C3KAwQdRiNxsMUhATVtDWMc,4133
|
|
29
29
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
|
30
30
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
|
31
31
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
|
@@ -76,7 +76,7 @@ geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,178
|
|
|
76
76
|
geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08w,1758
|
|
77
77
|
geoseeq/result/file_download.py,sha256=5IXg_dIWlrRHBJQssO42da5_bIJOyH0_b8K2KWVAFBE,8210
|
|
78
78
|
geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
|
|
79
|
-
geoseeq/result/result_file.py,sha256=
|
|
79
|
+
geoseeq/result/result_file.py,sha256=Mu_8cJYN3tVlkLYnbd_pyGinBoePlmQxLFIbeF-PWyo,9698
|
|
80
80
|
geoseeq/result/result_folder.py,sha256=iyO0hwZWokrH6oWhBgHlunWMpCMpejKb8v2sHFhecws,11283
|
|
81
81
|
geoseeq/result/resumable_download_tracker.py,sha256=YEzqHBBnE7L3XokTvlTAhHZ8TcDTIE_pyTQ7YadOfbU,3667
|
|
82
82
|
geoseeq/result/resumable_upload_tracker.py,sha256=2aI09gYz2yw63jEXqs8lmCRKQ79TIc3YuPETvP0Jeek,3811
|
|
@@ -92,8 +92,8 @@ geoseeq/vc/vc_cache.py,sha256=P4LXTbq2zOIv1OhP7Iw5MmypR2vXuy29Pq5K6gRvi-M,730
|
|
|
92
92
|
geoseeq/vc/vc_dir.py,sha256=A9CLTh2wWCRzZjiLyqXD1vhtsWZGD3OjaMT5KqlfAXI,457
|
|
93
93
|
geoseeq/vc/vc_sample.py,sha256=qZeioWydXvfu4rGMs20nICfNcp46y_XkND-bHdV6P5M,3850
|
|
94
94
|
geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
95
|
-
geoseeq-0.7.
|
|
96
|
-
geoseeq-0.7.
|
|
97
|
-
geoseeq-0.7.
|
|
98
|
-
geoseeq-0.7.
|
|
99
|
-
geoseeq-0.7.
|
|
95
|
+
geoseeq-0.7.5.dist-info/METADATA,sha256=SzijU4fXQxHJBEgmy8c8Uv8Y2JmdNDOQYD92EwmPTBw,5652
|
|
96
|
+
geoseeq-0.7.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
97
|
+
geoseeq-0.7.5.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
|
98
|
+
geoseeq-0.7.5.dist-info/licenses/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
|
99
|
+
geoseeq-0.7.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|