rdrpcatch 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py +1 -1
- rdrpcatch/rdrpcatch_scripts/utils.py +30 -15
- rdrpcatch/rdrpcatch_wrapper.py +7 -8
- {rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/METADATA +2 -2
- {rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/RECORD +8 -8
- {rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/WHEEL +0 -0
- {rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/entry_points.txt +0 -0
- {rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -68,7 +68,7 @@ class hmmsearch_formatter:
|
|
|
68
68
|
# Check if the dataframe is empty
|
|
69
69
|
if data_df.is_empty():
|
|
70
70
|
title_line= ['Contig_name', 'Translated_contig_name (frame)', 'Sequence_length(AA)', 'Profile_name',
|
|
71
|
-
'Profile_length', 'E-value', 'score',
|
|
71
|
+
'Profile_length', 'E-value', 'score','norm_bitscore_profile',
|
|
72
72
|
'norm_bitscore_contig', 'ID_score', 'RdRp_from(AA)', 'RdRp_to(AA)', 'profile_coverage',
|
|
73
73
|
'contig_coverage']
|
|
74
74
|
data_df = pl.DataFrame({col: [] for col in title_line})
|
|
@@ -255,33 +255,48 @@ class fasta:
|
|
|
255
255
|
self.logger.silent_log(f"Processing {len(rdrp_coords_list)} coordinates")
|
|
256
256
|
self.logger.silent_log(f"First few coordinates: {rdrp_coords_list[:3]}")
|
|
257
257
|
|
|
258
|
+
contig_dict = {}
|
|
259
|
+
for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
|
|
260
|
+
contig_key = str(contig_name).strip()
|
|
261
|
+
if contig_key not in contig_dict:
|
|
262
|
+
contig_dict[contig_key] = []
|
|
263
|
+
contig_dict[contig_key].append((rdrp_from, rdrp_to))
|
|
264
|
+
|
|
258
265
|
reader = needletail.parse_fastx_file(self.fasta_file)
|
|
259
266
|
matches_found = 0
|
|
260
267
|
with open(outfile, 'w') as out_handle:
|
|
261
268
|
for record in reader:
|
|
262
|
-
#
|
|
269
|
+
# Get the record ID
|
|
263
270
|
record_id = record.id.strip().split(" ")[0]
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
contig_name = str(contig_name).strip()
|
|
271
|
+
|
|
272
|
+
# Check if this record matches any of our target contigs
|
|
273
|
+
if record_id in contig_dict:
|
|
268
274
|
if self.logger:
|
|
269
|
-
self.logger.silent_log(f"
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
275
|
+
self.logger.silent_log(f"Match found for record ID: '{record_id}'")
|
|
276
|
+
|
|
277
|
+
# Process all matching coordinates for this contig
|
|
278
|
+
for rdrp_from, rdrp_to in contig_dict[record_id]:
|
|
279
|
+
seq = record.seq[rdrp_from - 1:rdrp_to]
|
|
273
280
|
fasta_header = f"{record_id}_RdRp_{rdrp_from}-{rdrp_to}"
|
|
274
281
|
out_handle.write(f">{fasta_header}\n{seq}\n")
|
|
282
|
+
matches_found += 1
|
|
283
|
+
|
|
284
|
+
# Remove the processed contig to avoid future checks
|
|
285
|
+
del contig_dict[record_id]
|
|
286
|
+
|
|
287
|
+
# If all contigs have been found, exit early
|
|
288
|
+
if not contig_dict:
|
|
275
289
|
if self.logger:
|
|
276
|
-
self.logger.silent_log(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
self.logger.silent_log(f"No match - lengths: {len(record_id)}|{len(contig_name)}, "
|
|
280
|
-
f"record_id bytes: {record_id.encode()}, contig bytes: {contig_name.encode()}")
|
|
281
|
-
|
|
290
|
+
self.logger.silent_log("All contigs processed. Exiting early.")
|
|
291
|
+
break
|
|
292
|
+
|
|
282
293
|
if self.logger:
|
|
283
294
|
self.logger.silent_log(f"Total matches found: {matches_found}")
|
|
284
295
|
|
|
296
|
+
return matches_found
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
285
300
|
|
|
286
301
|
class mmseqs_parser:
|
|
287
302
|
|
rdrpcatch/rdrpcatch_wrapper.py
CHANGED
|
@@ -111,7 +111,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
111
111
|
if not os.path.exists(outputs.output_dir):
|
|
112
112
|
os.makedirs(outputs.output_dir)
|
|
113
113
|
else:
|
|
114
|
-
raise FileExistsError(f"Output directory already exists: {outputs.output_dir},
|
|
114
|
+
raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory.")
|
|
115
115
|
if not os.path.exists(outputs.log_dir):
|
|
116
116
|
os.makedirs(outputs.log_dir)
|
|
117
117
|
|
|
@@ -363,7 +363,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
363
363
|
|
|
364
364
|
|
|
365
365
|
# Combine all the dataframes in the list
|
|
366
|
-
combined_df = pl.concat(df_list, how='
|
|
366
|
+
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
367
367
|
# Write the combined dataframe to a tsv file
|
|
368
368
|
for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
|
|
369
369
|
'ID_score', 'profile_coverage', 'contig_coverage']:
|
|
@@ -526,9 +526,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
526
526
|
outputs.tsv_outdir.mkdir(parents=True)
|
|
527
527
|
|
|
528
528
|
|
|
529
|
-
|
|
530
529
|
# Combine all the dataframes in the list
|
|
531
|
-
combined_df = pl.concat(df_list, how='
|
|
530
|
+
combined_df = pl.concat(df_list, how='vertical_relaxed')
|
|
532
531
|
# Write the combined dataframe to a tsv file
|
|
533
532
|
for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
|
|
534
533
|
'ID_score', 'profile_coverage', 'contig_coverage']:
|
|
@@ -581,9 +580,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
581
580
|
utils.fasta(input_file).write_fasta(utils.fasta(input_file).extract_contigs(combined_set), outputs.fasta_prot_out_path)
|
|
582
581
|
|
|
583
582
|
if verbose:
|
|
584
|
-
logger.loud_log(f"
|
|
583
|
+
logger.loud_log(f"Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
|
|
585
584
|
else:
|
|
586
|
-
logger.silent_log(f"
|
|
585
|
+
logger.silent_log(f" Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
|
|
587
586
|
|
|
588
587
|
if not os.path.exists(outputs.gff_output_dir):
|
|
589
588
|
outputs.gff_output_dir.mkdir(parents=True)
|
|
@@ -594,9 +593,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
|
|
|
594
593
|
utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
|
|
595
594
|
|
|
596
595
|
if verbose:
|
|
597
|
-
logger.loud_log(f"
|
|
596
|
+
logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
598
597
|
else:
|
|
599
|
-
logger.silent_log(f"
|
|
598
|
+
logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
|
|
600
599
|
|
|
601
600
|
if not os.path.exists(outputs.mmseqs_tax_output_dir):
|
|
602
601
|
outputs.mmseqs_tax_output_dir.mkdir(parents=True)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdrpcatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Dynamic: Summary
|
|
5
5
|
Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
6
6
|
Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
|
|
@@ -81,7 +81,7 @@ The dependencies can be installed using conda or mamba. Follow these steps:
|
|
|
81
81
|
|
|
82
82
|
Create a new conda environment and install the dependencies:
|
|
83
83
|
```bash
|
|
84
|
-
conda create -n rdrpcatch python=3.12
|
|
84
|
+
conda env create -n rdrpcatch python=3.12
|
|
85
85
|
conda activate rdrpcatch
|
|
86
86
|
conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
|
|
87
87
|
```
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rdrpcatch/rdrpcatch_wrapper.py,sha256=
|
|
2
|
+
rdrpcatch/rdrpcatch_wrapper.py,sha256=bZ5w4NuTlCSUsCx9baEtJSk7jGiyp-6XthO80IKaMXI,30564
|
|
3
3
|
rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
|
|
5
5
|
rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
|
|
7
|
-
rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=
|
|
7
|
+
rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=2_ERXFQK2lpVReWl0jwQdnKIObv_zq07uFJOzGsTHlo,25025
|
|
8
8
|
rdrpcatch/rdrpcatch_scripts/gui.py,sha256=he8kx_4VJWB7SVv9XSQPk0DmkOjEFIg-uGMAtDp3t-w,10576
|
|
9
9
|
rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py,sha256=bwzuCxu8nHQ5OC0Yr5Lyvhcyk9OWjuamInqe0T0lc38,3809
|
|
10
10
|
rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQGKjOCP4,4710
|
|
11
11
|
rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LYWqwcJI,5938
|
|
12
12
|
rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
|
|
13
13
|
rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
|
|
14
|
-
rdrpcatch/rdrpcatch_scripts/utils.py,sha256=
|
|
15
|
-
rdrpcatch-0.0.
|
|
16
|
-
rdrpcatch-0.0.
|
|
17
|
-
rdrpcatch-0.0.
|
|
18
|
-
rdrpcatch-0.0.
|
|
19
|
-
rdrpcatch-0.0.
|
|
14
|
+
rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
|
|
15
|
+
rdrpcatch-0.0.5.dist-info/METADATA,sha256=X3wolDh_nUrk7caPG4jFMvsF7FHZCvYuGzjPLZnC4VA,14004
|
|
16
|
+
rdrpcatch-0.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
17
|
+
rdrpcatch-0.0.5.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
|
|
18
|
+
rdrpcatch-0.0.5.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
|
|
19
|
+
rdrpcatch-0.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|