rdrpcatch 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -255,33 +255,48 @@ class fasta:
255
255
  self.logger.silent_log(f"Processing {len(rdrp_coords_list)} coordinates")
256
256
  self.logger.silent_log(f"First few coordinates: {rdrp_coords_list[:3]}")
257
257
 
258
+ contig_dict = {}
259
+ for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
260
+ contig_key = str(contig_name).strip()
261
+ if contig_key not in contig_dict:
262
+ contig_dict[contig_key] = []
263
+ contig_dict[contig_key].append((rdrp_from, rdrp_to))
264
+
258
265
  reader = needletail.parse_fastx_file(self.fasta_file)
259
266
  matches_found = 0
260
267
  with open(outfile, 'w') as out_handle:
261
268
  for record in reader:
262
- # pyhmmer uses the first word of the header as the ID, so split on whitespace
269
+ # Get the record ID
263
270
  record_id = record.id.strip().split(" ")[0]
264
- if self.logger:
265
- self.logger.silent_log(f"Processing record with ID: '{record_id}'")
266
- for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
267
- contig_name = str(contig_name).strip()
271
+
272
+ # Check if this record matches any of our target contigs
273
+ if record_id in contig_dict:
268
274
  if self.logger:
269
- self.logger.silent_log(f"Comparing record '{record_id}' with contig '{contig_name}'")
270
- if record_id == contig_name:
271
- matches_found += 1
272
- seq = record.seq[rdrp_from-1:rdrp_to]
275
+ self.logger.silent_log(f"Match found for record ID: '{record_id}'")
276
+
277
+ # Process all matching coordinates for this contig
278
+ for rdrp_from, rdrp_to in contig_dict[record_id]:
279
+ seq = record.seq[rdrp_from - 1:rdrp_to]
273
280
  fasta_header = f"{record_id}_RdRp_{rdrp_from}-{rdrp_to}"
274
281
  out_handle.write(f">{fasta_header}\n{seq}\n")
282
+ matches_found += 1
283
+
284
+ # Remove the processed contig to avoid future checks
285
+ del contig_dict[record_id]
286
+
287
+ # If all contigs have been found, exit early
288
+ if not contig_dict:
275
289
  if self.logger:
276
- self.logger.silent_log(f"Match found! Writing sequence of length {len(seq)}")
277
- else:
278
- if self.logger:
279
- self.logger.silent_log(f"No match - lengths: {len(record_id)}|{len(contig_name)}, "
280
- f"record_id bytes: {record_id.encode()}, contig bytes: {contig_name.encode()}")
281
-
290
+ self.logger.silent_log("All contigs processed. Exiting early.")
291
+ break
292
+
282
293
  if self.logger:
283
294
  self.logger.silent_log(f"Total matches found: {matches_found}")
284
295
 
296
+ return matches_found
297
+
298
+
299
+
285
300
 
286
301
  class mmseqs_parser:
287
302
 
@@ -581,9 +581,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
581
581
  utils.fasta(input_file).write_fasta(utils.fasta(input_file).extract_contigs(combined_set), outputs.fasta_prot_out_path)
582
582
 
583
583
  if verbose:
584
- logger.loud_log(f"Contigs written to: {outputs.fasta_prot_out_path}")
584
+ logger.loud_log(f"Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
585
585
  else:
586
- logger.silent_log(f"Contigs written to: {outputs.fasta_prot_out_path}")
586
+ logger.silent_log(f" Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
587
587
 
588
588
  if not os.path.exists(outputs.gff_output_dir):
589
589
  outputs.gff_output_dir.mkdir(parents=True)
@@ -594,9 +594,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
594
594
  utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
595
595
 
596
596
  if verbose:
597
- logger.loud_log(f"RdRpCATCH output file written to: {outputs.fasta_prot_out_path}")
597
+ logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
598
598
  else:
599
- logger.silent_log(f"RdRpCATCH output file written to: {outputs.fasta_prot_out_path}")
599
+ logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
600
600
 
601
601
  if not os.path.exists(outputs.mmseqs_tax_output_dir):
602
602
  outputs.mmseqs_tax_output_dir.mkdir(parents=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdrpcatch
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Dynamic: Summary
5
5
  Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
6
6
  Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
@@ -1,5 +1,5 @@
1
1
  rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- rdrpcatch/rdrpcatch_wrapper.py,sha256=skWDoNCTEKc_7eA6HjBIGe8jk-J1xnzU-zyOzCiA_jo,30525
2
+ rdrpcatch/rdrpcatch_wrapper.py,sha256=PUMzQ3tIU0VuBY6XrRXupo-NruszwHreyxWXzEARjM4,30550
3
3
  rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
5
5
  rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -11,9 +11,9 @@ rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQ
11
11
  rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LYWqwcJI,5938
12
12
  rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
13
13
  rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
14
- rdrpcatch/rdrpcatch_scripts/utils.py,sha256=Wx1GXhAPBfJw7x67sOu7WclZzMo0N3O-hxNYTVxc3v4,16780
15
- rdrpcatch-0.0.3.dist-info/METADATA,sha256=8tUKJfUQb2uEdha9EQuhI1OyEjXnWD4byUM6lzFBlZE,14000
16
- rdrpcatch-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- rdrpcatch-0.0.3.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
- rdrpcatch-0.0.3.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
- rdrpcatch-0.0.3.dist-info/RECORD,,
14
+ rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
15
+ rdrpcatch-0.0.4.dist-info/METADATA,sha256=rhb3kvfpy5zj9dUgIB1MbRfv9NNPZeQlOS1YDB7ZkrA,14000
16
+ rdrpcatch-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ rdrpcatch-0.0.4.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
18
+ rdrpcatch-0.0.4.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
19
+ rdrpcatch-0.0.4.dist-info/RECORD,,