PyPI - rdrpcatch - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

rdrpcatch 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py CHANGED Viewed

@@ -68,7 +68,7 @@ class hmmsearch_formatter:
         # Check if the dataframe is empty
         if data_df.is_empty():
             title_line= ['Contig_name', 'Translated_contig_name (frame)', 'Sequence_length(AA)', 'Profile_name',
-                         'Profile_length', 'E-value', 'score', 'acc', 'norm_bitscore_profile',
+                         'Profile_length', 'E-value', 'score','norm_bitscore_profile',
                          'norm_bitscore_contig', 'ID_score', 'RdRp_from(AA)', 'RdRp_to(AA)', 'profile_coverage',
                          'contig_coverage']
             data_df = pl.DataFrame({col: [] for col in title_line})

rdrpcatch/rdrpcatch_scripts/utils.py CHANGED Viewed

@@ -255,33 +255,48 @@ class fasta:
             self.logger.silent_log(f"Processing {len(rdrp_coords_list)} coordinates")
             self.logger.silent_log(f"First few coordinates: {rdrp_coords_list[:3]}")
+        contig_dict = {}
+        for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
+            contig_key = str(contig_name).strip()
+            if contig_key not in contig_dict:
+                contig_dict[contig_key] = []
+            contig_dict[contig_key].append((rdrp_from, rdrp_to))
         reader = needletail.parse_fastx_file(self.fasta_file)
         matches_found = 0
         with open(outfile, 'w') as out_handle:
             for record in reader:
-                # pyhmmer uses the first word of the header as the ID, so split on whitespace
+                # Get the record ID
                 record_id = record.id.strip().split(" ")[0]
-                if self.logger:
-                    self.logger.silent_log(f"Processing record with ID: '{record_id}'")
-                for contig_name, rdrp_from, rdrp_to in rdrp_coords_list:
-                    contig_name = str(contig_name).strip()
+                # Check if this record matches any of our target contigs
+                if record_id in contig_dict:
                     if self.logger:
-                        self.logger.silent_log(f"Comparing record '{record_id}' with contig '{contig_name}'")
-                    if record_id == contig_name:
-                        matches_found += 1
-                        seq = record.seq[rdrp_from-1:rdrp_to]
+                        self.logger.silent_log(f"Match found for record ID: '{record_id}'")
+                    # Process all matching coordinates for this contig
+                    for rdrp_from, rdrp_to in contig_dict[record_id]:
+                        seq = record.seq[rdrp_from - 1:rdrp_to]
                         fasta_header = f"{record_id}_RdRp_{rdrp_from}-{rdrp_to}"
                         out_handle.write(f">{fasta_header}\n{seq}\n")
+                        matches_found += 1
+                    # Remove the processed contig to avoid future checks
+                    del contig_dict[record_id]
+                    # If all contigs have been found, exit early
+                    if not contig_dict:
                         if self.logger:
-                            self.logger.silent_log(f"Match found! Writing sequence of length {len(seq)}")
-                    else:
-                        if self.logger:
-                            self.logger.silent_log(f"No match - lengths: {len(record_id)}|{len(contig_name)}, "
-                                                 f"record_id bytes: {record_id.encode()}, contig bytes: {contig_name.encode()}")
+                            self.logger.silent_log("All contigs processed. Exiting early.")
+                        break
         if self.logger:
             self.logger.silent_log(f"Total matches found: {matches_found}")
+        return matches_found
 class mmseqs_parser:

rdrpcatch/rdrpcatch_wrapper.py CHANGED Viewed

@@ -111,7 +111,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
     if not os.path.exists(outputs.output_dir):
         os.makedirs(outputs.output_dir)
     else:
-        raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, please choose a different directory.")
+        raise FileExistsError(f"Output directory already exists: {outputs.output_dir}, Please choose a different directory.")
     if not os.path.exists(outputs.log_dir):
         os.makedirs(outputs.log_dir)
@@ -363,7 +363,7 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         # Combine all the dataframes in the list
-        combined_df = pl.concat(df_list, how='vertical')
+        combined_df = pl.concat(df_list, how='vertical_relaxed')
         # Write the combined dataframe to a tsv file
         for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
                     'ID_score', 'profile_coverage', 'contig_coverage']:
@@ -526,9 +526,8 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
             outputs.tsv_outdir.mkdir(parents=True)
         # Combine all the dataframes in the list
-        combined_df = pl.concat(df_list, how='vertical')
+        combined_df = pl.concat(df_list, how='vertical_relaxed')
         # Write the combined dataframe to a tsv file
         for col in ['E-value', 'score', 'norm_bitscore_profile', 'norm_bitscore_contig',
                     'ID_score', 'profile_coverage', 'contig_coverage']:
@@ -581,9 +580,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         utils.fasta(input_file).write_fasta(utils.fasta(input_file).extract_contigs(combined_set), outputs.fasta_prot_out_path)
         if verbose:
-            logger.loud_log(f"Contigs written to: {outputs.fasta_prot_out_path}")
+            logger.loud_log(f"Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
         else:
-            logger.silent_log(f"Contigs written to: {outputs.fasta_prot_out_path}")
+            logger.silent_log(f" Full aminoacid contigs written to: {outputs.fasta_prot_out_path}")
         if not os.path.exists(outputs.gff_output_dir):
             outputs.gff_output_dir.mkdir(parents=True)
@@ -594,9 +593,9 @@ def run_scan(input_file, output_dir, db_options, db_dir, seq_type, verbose, e,in
         utils.fasta(input_file, logger).write_fasta_coords(rdrp_coords_list,outputs.fasta_trimmed_out_path, seq_type)
         if verbose:
-            logger.loud_log(f"RdRpCATCH output file written to: {outputs.fasta_prot_out_path}")
+            logger.loud_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
         else:
-            logger.silent_log(f"RdRpCATCH output file written to: {outputs.fasta_prot_out_path}")
+            logger.silent_log(f"Trimmed contigs written to: {outputs.fasta_trimmed_out_path}")
         if not os.path.exists(outputs.mmseqs_tax_output_dir):
             outputs.mmseqs_tax_output_dir.mkdir(parents=True)

{rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rdrpcatch
-Version: 0.0.3
+Version: 0.0.5
 Dynamic: Summary
 Project-URL: Home, https://github.com/dimitris-karapliafis/RdRpCATCH
 Project-URL: Source, https://github.com/dimitris-karapliafis/RdRpCATCH
@@ -81,7 +81,7 @@ The dependencies can be installed using conda or mamba. Follow these steps:
 Create a new conda environment and install the dependencies:
 ```bash
-conda create -n rdrpcatch python=3.12
+conda env create -n rdrpcatch python=3.12
 conda activate rdrpcatch
 conda install -c bioconda mmseqs2==17.b804f seqkit==2.10.0
 ```

{rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/RECORD RENAMED Viewed

@@ -1,19 +1,19 @@
 rdrpcatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rdrpcatch/rdrpcatch_wrapper.py,sha256=skWDoNCTEKc_7eA6HjBIGe8jk-J1xnzU-zyOzCiA_jo,30525
+rdrpcatch/rdrpcatch_wrapper.py,sha256=bZ5w4NuTlCSUsCx9baEtJSk7jGiyp-6XthO80IKaMXI,30564
 rdrpcatch/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rdrpcatch/cli/args.py,sha256=2E2gXY42hNasUP94HmPxpgVCA1glk_oN7D5ftbu6W2c,15805
 rdrpcatch/rdrpcatch_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rdrpcatch/rdrpcatch_scripts/fetch_dbs.py,sha256=e9ShColfLgBvWSZpGOvY3zKhEgIg3rw1IIV__KX7N-g,11054
-rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=uah2XPrGNkkeptCv_WWBz_qTn5AtDTfVm6XjbCmNO00,25033
+rdrpcatch/rdrpcatch_scripts/format_pyhmmer_out.py,sha256=2_ERXFQK2lpVReWl0jwQdnKIObv_zq07uFJOzGsTHlo,25025
 rdrpcatch/rdrpcatch_scripts/gui.py,sha256=he8kx_4VJWB7SVv9XSQPk0DmkOjEFIg-uGMAtDp3t-w,10576
 rdrpcatch/rdrpcatch_scripts/mmseqs_tax.py,sha256=bwzuCxu8nHQ5OC0Yr5Lyvhcyk9OWjuamInqe0T0lc38,3809
 rdrpcatch/rdrpcatch_scripts/paths.py,sha256=roTZ2QPF4Fii7jtHkS9I6INJg1Vu78Dc_ieQGKjOCP4,4710
 rdrpcatch/rdrpcatch_scripts/plot.py,sha256=Y1mZL7rkKHFKEs2D7T2Qj2kpfiORmFwRLq1LYWqwcJI,5938
 rdrpcatch/rdrpcatch_scripts/run_pyhmmer.py,sha256=9zcMzaIwQ4_-NgYzG9kejxOBaDi-gbzaqpvZti8ZXA4,9008
 rdrpcatch/rdrpcatch_scripts/run_seqkit.py,sha256=5y7DtJ6NLa4sRoBQOcjBfczKlqG_LibNrEqNmKLrHu0,4361
-rdrpcatch/rdrpcatch_scripts/utils.py,sha256=Wx1GXhAPBfJw7x67sOu7WclZzMo0N3O-hxNYTVxc3v4,16780
-rdrpcatch-0.0.3.dist-info/METADATA,sha256=8tUKJfUQb2uEdha9EQuhI1OyEjXnWD4byUM6lzFBlZE,14000
-rdrpcatch-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-rdrpcatch-0.0.3.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
-rdrpcatch-0.0.3.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
-rdrpcatch-0.0.3.dist-info/RECORD,,
+rdrpcatch/rdrpcatch_scripts/utils.py,sha256=jvpyPxchAMn6BeLV7HOFECSY_a3nbkxDBBL8tunmM8A,16938
+rdrpcatch-0.0.5.dist-info/METADATA,sha256=X3wolDh_nUrk7caPG4jFMvsF7FHZCvYuGzjPLZnC4VA,14004
+rdrpcatch-0.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+rdrpcatch-0.0.5.dist-info/entry_points.txt,sha256=uiyoPO41jNz_KVOt2JdPak9NbVei-D8WQ6saMeMBFpE,53
+rdrpcatch-0.0.5.dist-info/licenses/LICENSE,sha256=3jm5vKRMIaiETEFfNN34-oyWUShxZtmDmL38PNAwlUI,1120
+rdrpcatch-0.0.5.dist-info/RECORD,,

{rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rdrpcatch-0.0.3.dist-info → rdrpcatch-0.0.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

rdrpcatch 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

rdrpcatch 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl