rdrpcatch 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdrpcatch/rdrpcatch_scripts/utils.py +30 -15
- rdrpcatch/rdrpcatch_wrapper.py +6 -6
- {rdrpcatch-0.0.2.dist-info → rdrpcatch-0.0.4.dist-info}/METADATA +1 -1
- {rdrpcatch-0.0.2.dist-info → rdrpcatch-0.0.4.dist-info}/RECORD +7 -7
- {rdrpcatch-0.0.2.dist-info → rdrpcatch-0.0.4.dist-info}/WHEEL +0 -0
- {rdrpcatch-0.0.2.dist-info → rdrpcatch-0.0.4.dist-info}/entry_points.txt +0 -0
- {rdrpcatch-0.0.2.dist-info → rdrpcatch-0.0.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -255,33 +255,48 @@ class fasta:
|
|
|
255
255
|
self.logger.silent_log(f"Processing {len(rdrp_coords_list)} coordinates")
|
|
256
256
|
self.logger.silent_log(f"First few coordinates: {rdrp_coords_list[:3]}")
|
|
257
257
|
|
|
258
|
+
contig_dict = {}
|
|
259
|
+
for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
|
|
260
|
+
contig_key = str(contig_name).strip()
|
|
261
|
+
if contig_key not in contig_dict:
|
|
262
|
+
contig_dict[contig_key] = []
|
|
263
|
+
contig_dict[contig_key].append((rdrp_from, rdrp_to))
|
|
264
|
+
|
|
258
265
|
reader = needletail.parse_fastx_file(self.fasta_file)
|
|
259
266
|
matches_found = 0
|
|
260
267
|
with open(outfile, 'w') as out_handle:
|
|
261
268
|
for record in reader:
|
|
262
|
-
#
|
|
269
|
+
# Get the record ID
|
|
263
270
|
record_id = record.id.strip().split(" ")[0]
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
contig_name = str(contig_name).strip()
|
|
271
|
+
|
|
272
|
+
# Check if this record matches any of our target contigs
|
|
273
|
+
if record_id in contig_dict:
|
|
268
274
|
if self.logger:
|
|
269
|
-
self.logger.silent_log(f"
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
275
|
+
self.logger.silent_log(f"Match found for record ID: '{record_id}'")
|
|
276
|
+
|
|
277
|
+
# Process all matching coordinates for this contig
|
|
278
|
+
for rdrp_from, rdrp_to in contig_dict[record_id]:
|
|
279
|
+
seq = record.seq[rdrp_from - 1:rdrp_to]
|
|
273
280
|
fasta_header = f"{record_id}_RdRp_{rdrp_from}-{rdrp_to}"
|
|
274
281
|
out_handle.write(f">{fasta_header}\n{seq}\n")
|
|
282
|
+
matches_found += 1
|
|
283
|
+
|
|
284
|
+
# Remove the processed contig to avoid future checks
|
|
285
|
+
del contig_dict[record_id]
|
|
286
|
+
|
|
287
|
+
# If all contigs have been found, exit early
|
|
288
|
+
if not contig_dict:
|
|
275
289
|
if self.logger:
|
|
276
|
-
self.logger.silent_log(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
self.logger.silent_log(f"No match - lengths: {len(record_id)}|{len(contig_name)}, "
|
|
280
|
-
f"record_id bytes: {record_id.encode()}, contig bytes: {contig_name.encode()}")
|
|
281
|
-
|
|
290
|
+
self.logger.silent_log("All contigs processed. Exiting early.")
|
|
291
|
+
break
|
|
292
|
+
|
|
282
293
|
if self.logger:
|
|
283
294
|
self.logger.silent_log(f"Total matches found: {matches_found}")
|
|
284
295
|
|
|
296
|
+
return matches_found
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
285
300
|
|
|
286
301
|
class mmseqs_parser:
|
|
287
302
|
|
rdrpcatch/rdrpcatch_wrapper.py
CHANGED
|
@@ -458,7 +458,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
458
458
|
outputs.mmseqs_e_search_output_path, 7, cpus, outputs.mmseqs_e_search_log_path).run_mmseqs_e_search()
|
|
459
459
|
|
|
460
460
|
utils.mmseqs_parser(outputs.mmseqs_tax_output_lca_path, outputs.mmseqs_e_search_output_path).tax_to_rdrpcatch(
|
|
461
|
-
outputs.
|
|
461
|
+
outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
|
|
462
462
|
|
|
463
463
|
|
|
464
464
|
elif seq_type == 'prot':
|
|
@@ -581,9 +581,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
581
581
|
utils.fasta(input_file).write_fasta(utils.fasta(input_file).extract_contigs(combined_set), outputs.fasta_prot_out_path)
|
|
582
582
|
|
|
583
583
|
if verbose:
|
|
584
|
-
logger.loud_log(f"
|
|
584
|
+
logger.loud_log(f"Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
|
|
585
585
|
else:
|
|
586
|
-
logger.silent_log(f"
|
|
586
|
+
logger.silent_log(f" Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
|
|
587
587
|
|
|
588
588
|
if not os.path.exists(outputs.gff_output_dir):
|
|
589
589
|
outputs.gff_output_dir.mkdir(parents=True)
|
|
@@ -594,9 +594,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
594
594
|
utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
|
|
595
595
|
|
|
596
596
|
if verbose:
|
|
597
|
-
logger.loud_log(f"
|
|
597
|
+
logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
598
598
|
else:
|
|
599
|
-
logger.silent_log(f"
|
|
599
|
+
logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
600
600
|
|
|
601
601
|
if not os.path.exists(outputs.mmseqs_tax_output_dir):
|
|
602
602
|
outputs.mmseqs_tax_output_dir.mkdir(parents=True)
|
|
@@ -622,7 +622,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
622
622
|
outputs.mmseqs_e_search_output_path, 7, cpus, outputs.mmseqs_e_search_log_path).run_mmseqs_e_search()
|
|
623
623
|
|
|
624
624
|
utils.mmseqs_parser(outputs.mmseqs_tax_output_lca_path, outputs.mmseqs_e_search_output_path).tax_to_rdrpcatch(
|
|
625
|
-
outputs.
|
|
625
|
+
outputs.rdrpcatch_output_tsv, outputs.extended_rdrpcatch_output, seq_type)
|
|
626
626
|
|
|
627
627
|
|
|
628
628
|
end_time = logger.stop_timer(start_time, verbose)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rdrpcatch/rdrpcatch_wrapper.py,sha256=
|
|
2
|
+
rdrpcatch/rdrpcatch_wrapper.py,sha256=PUMzQ3tIU0VuBY6XrRXupo-NruszwHreyxWXzEARjM4,30550
|
|
3
3
|
rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
|
|
5
5
|
rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -11,9 +11,9 @@ rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQ
|
|
|
11
11
|
rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LYWqwcJI,5938
|
|
12
12
|
rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
|
|
13
13
|
rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
|
|
14
|
-
rdrpcatch/rdrpcatch_scripts/utils.py,sha256=
|
|
15
|
-
rdrpcatch-0.0.
|
|
16
|
-
rdrpcatch-0.0.
|
|
17
|
-
rdrpcatch-0.0.
|
|
18
|
-
rdrpcatch-0.0.
|
|
19
|
-
rdrpcatch-0.0.
|
|
14
|
+
rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
|
|
15
|
+
rdrpcatch-0.0.4.dist-info/METADATA,sha256=rhb3kvfpy5zj9dUgIB1MbRfv9NNPZeQlOS1YDB7ZkrA,14000
|
|
16
|
+
rdrpcatch-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
17
|
+
rdrpcatch-0.0.4.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
|
|
18
|
+
rdrpcatch-0.0.4.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
|
|
19
|
+
rdrpcatch-0.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|