PyPI - PyamilySeq - Versions diffs - 1.1.2__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

PyamilySeq 1.1.2py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

PyamilySeq/PyamilySeq.py +2 -1
PyamilySeq/PyamilySeq_Species.py +185 -33
PyamilySeq/Seq_Combiner.py +2 -2
PyamilySeq/Seq_Extractor.py +6 -1
PyamilySeq/clusterings.py +2 -1
PyamilySeq/constants.py +1 -1
PyamilySeq/utils.py +1 -3
{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/METADATA +6 -6
pyamilyseq-1.3.0.dist-info/RECORD +22 -0
{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/WHEEL +1 -1
{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/entry_points.txt +3 -0
pyamilyseq-1.1.2.dist-info/RECORD +0 -22
{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/licenses/LICENSE +0 -0
{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/top_level.txt +0 -0

PyamilySeq/PyamilySeq.py CHANGED Viewed

@@ -249,7 +249,6 @@ def main():
             run_cd_hit(options, file_to_cluster, clustering_output, clustering_mode)
         elif options.input_type == 'fasta':
             combined_out_file = options.input_fasta
-            ### FIX write code to detect if DNA or AA and if sequence tpye is AA then translate
             # Detect if the input FASTA file contains DNA or AA sequences
             is_dna = detect_sequence_type(options.input_fasta)
             # If the sequence type is AA and the input is DNA, translate the DNA to AA
@@ -315,7 +314,9 @@ def main():
     # Save arguments to a text file
+    from datetime import datetime
     with open(output_path+"/PyamilySeq_params.txt", "w") as outfile:
+        outfile.write(f"Timestamp: {datetime.now().isoformat()}\n")
         for arg, value in vars(options).items():
             outfile.write(f"{arg}: {value}\n")

PyamilySeq/PyamilySeq_Species.py CHANGED Viewed

@@ -9,35 +9,163 @@ except (ModuleNotFoundError, ImportError, NameError, TypeError) as error:
     from utils import *
-def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted, pangenome_clusters_First_sequences_sorted):
+def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
+                                 pangenome_clusters_First_sequences_sorted,
+                                 combined_pangenome_clusters_First_Second_clustered=None,
+                                 combined_pangenome_clusters_Second_sequences_sorted=None):
     print("Outputting gene_presence_absence file")
     output_dir = os.path.abspath(options.output_dir)
-    #in_name = options.clusters.split('.')[0].split('/')[-1]
     gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
-    gpa_outfile = open(gpa_outfile, 'w')
     genome_dict = OrderedDict(sorted(genome_dict.items()))
-    gpa_outfile.write('"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
-                     '"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
-    gpa_outfile.write('","'.join(genome_dict.keys()))
-    gpa_outfile.write('"\n')
+    # Build a unified list of all clusters with their data
+    all_clusters = []
+    # Track which Second cluster IDs have sequences that were merged into First clusters
+    merged_second_cluster_ids = set()
+    # Process First clusters and their associated Second sequences
     for cluster, sequences in pangenome_clusters_First_sequences_sorted.items():
-        average_sequences_per_genome = len(sequences) / len(pangenome_clusters_First_sorted[cluster])
-        gpa_outfile.write('"group_'+str(cluster)+'","","","'+str(len(pangenome_clusters_First_sorted[cluster]))+'","'+str(len(sequences))+'","'+str(average_sequences_per_genome)+
-                         '","","","","","","","",""')
-        for genome in genome_dict.keys():
-            full_out = ''
-            tmp_list = []
-            for value in sequences:
-                if value.split('|')[0] == genome:
-                    tmp_list.append(value.split('|')[1])
-            if tmp_list:
-                full_out += ',"'+'  '.join(tmp_list)+'"'
-            else:
-                full_out = ',""'
-            gpa_outfile.write(full_out)
-        gpa_outfile.write('\n')
+        all_sequences = list(sequences)
+        has_second_sequences = False
+        # Add Second sequences that were clustered with this First cluster
+        if combined_pangenome_clusters_First_Second_clustered:
+            for seq in sequences:
+                if seq in combined_pangenome_clusters_First_Second_clustered:
+                    for clustered_seq in combined_pangenome_clusters_First_Second_clustered[seq]:
+                        if clustered_seq not in all_sequences:
+                            all_sequences.append(clustered_seq)
+                            # Check if this is a Second sequence (has the sequence_tag)
+                            if options.sequence_tag in clustered_seq:
+                                has_second_sequences = True
+                            # Track which Second cluster this sequence came from
+                            if combined_pangenome_clusters_Second_sequences_sorted:
+                                for second_cluster_id, second_seqs in combined_pangenome_clusters_Second_sequences_sorted.items():
+                                    if clustered_seq in second_seqs:
+                                        merged_second_cluster_ids.add(second_cluster_id)
+        # Calculate statistics based on number of genomes (not sequences)
+        genomes_in_cluster = set()
+        for seq in all_sequences:
+            genome = seq.split('|')[0]
+            genomes_in_cluster.add(genome)
+        num_isolates = len(genomes_in_cluster)
+        num_sequences = len(all_sequences)
+        # Name the cluster based on whether it has Second sequences
+        cluster_name = 'combined_group_' + str(cluster) if has_second_sequences else 'group_' + str(cluster)
+        all_clusters.append({
+            'name': cluster_name,
+            'num_genomes': num_isolates,
+            'num_sequences': num_sequences,
+            'sequences': all_sequences
+        })
+    # Process Second-only clusters (those not merged with First clusters)
+    if combined_pangenome_clusters_Second_sequences_sorted:
+        for cluster, sequences in combined_pangenome_clusters_Second_sequences_sorted.items():
+            # Only skip if this specific cluster ID had its sequences merged
+            if cluster in merged_second_cluster_ids:
+                continue
+            # Skip empty clusters
+            if not sequences or len(sequences) == 0:
+                continue
+            # This is a genuine Second-only cluster
+            all_sequences = list(sequences)
+            # Calculate statistics
+            genomes_in_cluster = set()
+            for seq in all_sequences:
+                genome = seq.split('|')[0]
+                genomes_in_cluster.add(genome)
+            num_isolates = len(genomes_in_cluster)
+            num_sequences = len(all_sequences)
+            # Skip if no genomes (shouldn't happen, but safety check)
+            if num_isolates == 0 or num_sequences == 0:
+                continue
+            all_clusters.append({
+                'name': 'Second_group_' + str(cluster),
+                'num_genomes': num_isolates,
+                'num_sequences': num_sequences,
+                'sequences': all_sequences
+            })
+    # Sort all clusters by number of genomes (descending), then by number of sequences
+    all_clusters.sort(key=lambda x: (x['num_genomes'], x['num_sequences']), reverse=True)
+    # Write to file
+    with open(gpa_outfile, 'w') as outfile:
+        # Write header
+        outfile.write(
+            '"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
+            '"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
+        outfile.write('","'.join(genome_dict.keys()))
+        outfile.write('"\n')
+        # Write all clusters in sorted order
+        for cluster_data in all_clusters:
+            num_isolates = cluster_data['num_genomes']
+            num_sequences = cluster_data['num_sequences']
+            average_sequences_per_genome = num_sequences / num_isolates if num_isolates > 0 else 0
+            # Write cluster info
+            outfile.write('"' + cluster_data['name'] + '","","","' + str(num_isolates) + '","' +
+                          str(num_sequences) + '","' + str(average_sequences_per_genome) + '","","","","","","","",""')
+            # Write presence/absence for each genome
+            for genome in genome_dict.keys():
+                tmp_list = []
+                for seq in cluster_data['sequences']:
+                    if seq.split('|')[0] == genome:
+                        tmp_list.append(seq.split('|')[1])
+                if tmp_list:
+                    outfile.write(',"' + '  '.join(tmp_list) + '"')
+                else:
+                    outfile.write(',""')
+            outfile.write('\n')
+    print(f"Total clusters written: {len(all_clusters)}")
+    if options.reclustered is not None:
+        print(f"Merged Second cluster IDs: {len(merged_second_cluster_ids)}")
+# def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted, pangenome_clusters_First_sequences_sorted):
+#     print("Outputting gene_presence_absence file")
+#     output_dir = os.path.abspath(options.output_dir)
+#     #in_name = options.clusters.split('.')[0].split('/')[-1]
+#     gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
+#     gpa_outfile = open(gpa_outfile, 'w')
+#     genome_dict = OrderedDict(sorted(genome_dict.items()))
+#     gpa_outfile.write('"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
+#                      '"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
+#     gpa_outfile.write('","'.join(genome_dict.keys()))
+#     gpa_outfile.write('"\n')
+#     for cluster, sequences in pangenome_clusters_First_sequences_sorted.items():
+#         average_sequences_per_genome = len(sequences) / len(pangenome_clusters_First_sorted[cluster])
+#         gpa_outfile.write('"group_'+str(cluster)+'","","","'+str(len(pangenome_clusters_First_sorted[cluster]))+'","'+str(len(sequences))+'","'+str(average_sequences_per_genome)+
+#                          '","","","","","","","",""')
+#
+#
+#         for genome in genome_dict.keys():
+#             full_out = ''
+#             tmp_list = []
+#             for value in sequences:
+#                 if value.split('|')[0] == genome:
+#                     tmp_list.append(value.split('|')[1])
+#             if tmp_list:
+#                 full_out += ',"'+'  '.join(tmp_list)+'"'
+#             else:
+#                 full_out = ',""'
+#             gpa_outfile.write(full_out)
+#         gpa_outfile.write('\n')
 ### Below is some unfinished code
     # edge_list_outfile = open(in_name+'_edge_list.csv','w')
@@ -147,22 +275,37 @@ def cluster(options):
     if options.reclustered != None: #FIX
         if options.cluster_format == 'CD-HIT':
-            combined_pangenome_clusters_First_Second_clustered,not_Second_only_cluster_ids,combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_CDHIT(options, genome_dict, '|')
+            combined_pangenome_clusters_First_Second_clustered, not_Second_only_cluster_ids, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_CDHIT(options, genome_dict, '|')
         elif 'TSV' in options.cluster_format or 'CSV' in options.cluster_format:
             #Fix
-            combined_pangenome_clusters_First_Second_clustered,not_Second_only_cluster_ids,combined_pangenome_clusters_Second,combined_pangenome_clusters_Second_sequences  = combined_clustering_Edge_List(options, '|')
-        pangenome_clusters_Type = combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, '|')
+            combined_pangenome_clusters_First_Second_clustered, not_Second_only_cluster_ids, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences  = combined_clustering_Edge_List(options, '|')
+        pangenome_clusters_Type = combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences,  '|')
+        # Sort First clusters
+        sorted_First_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
+        pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_First_keys)
+        pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences, sorted_First_keys)
+        pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_First_keys)
+        # Sort Second clusters independently (no need to align with First)
+        sorted_Second_keys = sort_keys_by_values(combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences)
+        #combined_pangenome_clusters_Second_sorted = reorder_dict_by_keys(combined_pangenome_clusters_Second,sorted_Second_keys)
+        combined_pangenome_clusters_Second_sequences_sorted = reorder_dict_by_keys(combined_pangenome_clusters_Second_sequences, sorted_Second_keys)
     else:
         pangenome_clusters_Type = single_clustering_counting(pangenome_clusters_First, reps)
+        sorted_First_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
+        pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_First_keys)
+        pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences,
+                                                                         sorted_First_keys)
+        pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_First_keys)
     Number_Of_Second_Extending_But_Same_Genomes = 0
-    sorted_first_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
-    pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_first_keys)
-    pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences, sorted_first_keys)
-    pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_first_keys)
     print("Calculating Groups")
     seen_groupings = []
@@ -228,9 +371,18 @@ def cluster(options):
                 len(combined_pangenome_clusters_Second_sequences)))
             outfile.write("\nTotal Number of First Gene Groups That Had Additional Second Sequences But Not New Genomes: " + str(
                 Number_Of_Second_Extending_But_Same_Genomes))
-        #Report number of first and second clusters and do the ame for genus
     if options.gene_presence_absence_out != False:
-        gene_presence_absence_output(options,genome_dict, pangenome_clusters_First_sorted, pangenome_clusters_First_sequences_sorted)
+        if options.reclustered != None:
+            # Pass both First and Second clustering data
+            gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
+                                         pangenome_clusters_First_sequences_sorted,
+                                         combined_pangenome_clusters_First_Second_clustered,
+                                         combined_pangenome_clusters_Second_sequences_sorted)
+        else:
+            # Only First clustering data available
+            gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
+                                         pangenome_clusters_First_sequences_sorted)
     ###Need to fix this below. If full/partial the ifs need to be different. If full we first need to output the gfs then align. if -wruite-groups not presented then it needs

PyamilySeq/Seq_Combiner.py CHANGED Viewed

@@ -59,7 +59,7 @@ def main():
         exit(1)
     if options.input_type == 'fasta' and options.name_split_fasta is None:
         print("Please provide a substring to split the filename and extract the genome name.")
-        exit
+        exit(1)
     output_path = os.path.abspath(options.output_dir)
     if not os.path.exists(output_path):
@@ -77,7 +77,7 @@ def main():
     elif options.input_type == 'combined':
         read_combined_files(options.input_dir, options.name_split_gff, options.gene_ident, combined_out_file, options.translate, True)
     elif options.input_type == 'fasta':
-        read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate)
+        read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate, True)
 if __name__ == "__main__":
     main()

PyamilySeq/Seq_Extractor.py CHANGED Viewed

@@ -9,8 +9,13 @@ def find_gene_ids_in_csv(csv_file, group_name):
             cells = line.strip().split(',')
             if cells[0].replace('"','') == group_name:
                 # Collect gene IDs from column 14 onward
+                # for cell in cells[14:]:
+                #     gene_ids.extend(cell.strip().replace('"','').split())  # Splitting by spaces if there are multiple IDs in a cell                break
                 for cell in cells[14:]:
-                    gene_ids.extend(cell.strip().replace('"','').split())  # Splitting by spaces if there are multiple IDs in a cell                break
+                    for gene in cell.strip().replace('"', '').split(';'):
+                        if gene:
+                            gene_ids.append(gene)
     return gene_ids
 def extract_sequences(fasta_file, gene_ids):

PyamilySeq/clusterings.py CHANGED Viewed

@@ -156,7 +156,7 @@ def cluster_MMseqs(options,splitter):
 #@profile
-def combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, splitter):
+def combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences, splitter):
     num_clustered_First = defaultdict(list)
     pangenome_clusters_Type = copy.deepcopy(pangenome_clusters_First)
     list_of_reps = list(reps.keys())
@@ -336,6 +336,7 @@ def combined_clustering_CDHIT(options, taxa_dict, splitter):
 # def cluster_BLAST(options, splitter):
 #     separator = '\t'
 #     First_in = open(options.clusters, 'r')

PyamilySeq/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-PyamilySeq_Version = 'v1.1.2'
+PyamilySeq_Version = 'v1.3.0'

PyamilySeq/utils.py CHANGED Viewed

@@ -7,7 +7,6 @@ from tempfile import NamedTemporaryFile
 import sys
 import re
 import math
-#from config import config_params
 ####
 # Placeholder for the distance function
@@ -15,11 +14,10 @@ levenshtein_distance_cal = None
 # Check for Levenshtein library once
 try:
     import Levenshtein as LV
-    # Assign the optimized function
+    # Assign the optimised function
     def levenshtein_distance_calc(seq1, seq2):
         return LV.distance(seq1, seq2)
 except (ModuleNotFoundError, ImportError):
-    #if config_params.verbose == True: - Not implemented yet
     print("Levenshtein package not installed - Will fallback to slower Python implementation.")
     # Fallback implementation
     def levenshtein_distance_calc(seq1, seq2):

{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: PyamilySeq
-Version: 1.1.2
+Version: 1.3.0
 Summary: PyamilySeq - A a tool to investigate sequence-based gene groups identified by clustering methods such as CD-HIT, DIAMOND, BLAST or MMseqs2.
 Home-page: https://github.com/NickJD/PyamilySeq
 Author: Nicholas Dimonaco
@@ -46,7 +46,7 @@ To update to the newest version add '-U' to end of the pip install command.
 ```commandline
 usage: PyamilySeq.py [-h] {Full,Partial} ...
-PyamilySeq v1.1.2: A tool for gene clustering and analysis.
+PyamilySeq v1.3.0: A tool for gene clustering and analysis.
 positional arguments:
   {Full,Partial}  Choose a mode: 'Full' or 'Partial'.
@@ -76,7 +76,7 @@ Escherichia_coli_110957|ENSB_TIZS9kbTvShDvyX	Escherichia_coli_110957|ENSB_TIZS9k
 ```
 ### Example output:
 ```
-Running PyamilySeq v1.1.2
+Running PyamilySeq v1.3.0
 Calculating Groups
 Number of Genomes: 10
 Gene Groups
@@ -221,7 +221,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split_gff .gff3 -output_dir
 ```
 usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} [-name_split_gff NAME_SPLIT_GFF] [-name_split_fasta NAME_SPLIT_FASTA] -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
-PyamilySeq v1.1.2: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
+PyamilySeq v1.3.0: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
 options:
   -h, --help            show this help message and exit
@@ -264,7 +264,7 @@ usage: Group_Splitter.py [-h] -input_fasta INPUT_FASTA -sequence_type {AA,DNA}
                          [-M CLUSTERING_MEMORY] [-no_delete_temp_files]
                          [-verbose] [-v]
-PyamilySeq v1.1.2: Group-Splitter - A tool to split multi-copy gene groups
+PyamilySeq v1.3.0: Group-Splitter - A tool to split multi-copy gene groups
 identified by PyamilySeq.
 options:
@@ -317,7 +317,7 @@ Cluster-Summary -genome_num 10 -input_clstr .../test_data/species/E-coli/E-coli_
 usage: Cluster_Summary.py [-h] -input_clstr INPUT_CLSTR -output OUTPUT -genome_num GENOME_NUM
                           [-output_dir OUTPUT_DIR] [-verbose] [-v]
-PyamilySeq v1.1.2: Cluster-Summary - A tool to summarise CD-HIT clustering files.
+PyamilySeq v1.3.0: Cluster-Summary - A tool to summarise CD-HIT clustering files.
 options:
   -h, --help            show this help message and exit

pyamilyseq-1.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,22 @@
+PyamilySeq/Cluster_Compare.py,sha256=2jRXBYN8T9TUDLV9bj3SWFQ2pBUH3BAKW1FYrDYSQBw,4421
+PyamilySeq/Cluster_Summary.py,sha256=efXMfGvATERCTxwaqbauhZwt_5Hrf9KpGKY3EgsHVDk,6720
+PyamilySeq/Group_Extractor.py,sha256=oe2VmOVxdvTmAcy8NKwD1F27IdN2utAfczxsyxg96yc,2898
+PyamilySeq/Group_Sizes.py,sha256=3snkAN19o3Y4IY6IqSim1qy415FfQe1Wb8vzWTKF0Wo,3028
+PyamilySeq/Group_Splitter.py,sha256=OcMj9GnAyybs_DaNKRyvfL_nl2dB2gUI4BD_EQrBbWo,25653
+PyamilySeq/PyamilySeq.py,sha256=vc_mFuK-Cmqa3elusyIg5DksTHxdwCcFv2ki2ACqXIA,17468
+PyamilySeq/PyamilySeq_Genus.py,sha256=KUC0QkCRpKQ9HEgxyTSD7Nc63wSXtriWyIqt_YOy5ys,12470
+PyamilySeq/PyamilySeq_Species.py,sha256=1P_xbdUrZPykyS44KFyRdyxU-XPX_yKLpJhjhB8VTpg,24029
+PyamilySeq/Seq_Combiner.py,sha256=G49zthWtsTfqYX1tqc7op9a9cSia1IL0VTiAtwgdTwc,4746
+PyamilySeq/Seq_Extractor.py,sha256=IQk4Qn6LJkPXD1O4TQesneS3_ZN8hBsTVZQGlZ1c-Dk,3072
+PyamilySeq/Seq_Finder.py,sha256=ht-fSQ_opWKydcoWI9D3nTwLt6Rpgevnf2y0KxVjw4M,1881
+PyamilySeq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+PyamilySeq/clusterings.py,sha256=bfST7_i6qnj9ogclmOSkQhurZaEWBBq0H48FAN7JpOg,22374
+PyamilySeq/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+PyamilySeq/constants.py,sha256=cykSvqrDAsDJCo9FJJ7lRvoOho2H7xCGE923X9iOE4U,31
+PyamilySeq/utils.py,sha256=1U794Xd5qzmaIz2VujdnPkND729kr7rKjei0Y57f-QE,32972
+pyamilyseq-1.3.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+pyamilyseq-1.3.0.dist-info/METADATA,sha256=vl5fx_QOahTxZfwDXVzJPgMWIIiLGDnr2VmP-rxWIWo,17979
+pyamilyseq-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pyamilyseq-1.3.0.dist-info/entry_points.txt,sha256=5RkUWIneXu-kCnClJhv0u27lWHAoyoVmospZMU5Cs2U,846
+pyamilyseq-1.3.0.dist-info/top_level.txt,sha256=J6JhugUQTq4rq96yibAlQu3o4KCM9WuYfqr3w1r119M,11
+pyamilyseq-1.3.0.dist-info/RECORD,,

{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/entry_points.txt RENAMED Viewed

@@ -8,6 +8,9 @@ Seq-Extractor = PyamilySeq.Seq_Extractor:main
 Seq-Finder = PyamilySeq.Seq_Finder:main
 cluster-extractor = PyamilySeq.Cluster_Extractor:main
 cluster-summary = PyamilySeq.Cluster_Summary:main
+compare-contree-singletrees = aux_tools.RF.compare_contree_singletrees:main
+compare-rf = aux_tools.RF.compare_RF:main
+compute-singletrees-rf = aux_tools.RF.Compute_SingleTree_RFs:main
 group-splitter = PyamilySeq.Group_Splitter:main
 pyamilyseq = PyamilySeq.PyamilySeq:main
 seq-combiner = PyamilySeq.Seq_Combiner:main

pyamilyseq-1.1.2.dist-info/RECORD DELETED Viewed

@@ -1,22 +0,0 @@
-PyamilySeq/Cluster_Compare.py,sha256=2jRXBYN8T9TUDLV9bj3SWFQ2pBUH3BAKW1FYrDYSQBw,4421
-PyamilySeq/Cluster_Summary.py,sha256=efXMfGvATERCTxwaqbauhZwt_5Hrf9KpGKY3EgsHVDk,6720
-PyamilySeq/Group_Extractor.py,sha256=oe2VmOVxdvTmAcy8NKwD1F27IdN2utAfczxsyxg96yc,2898
-PyamilySeq/Group_Sizes.py,sha256=3snkAN19o3Y4IY6IqSim1qy415FfQe1Wb8vzWTKF0Wo,3028
-PyamilySeq/Group_Splitter.py,sha256=OcMj9GnAyybs_DaNKRyvfL_nl2dB2gUI4BD_EQrBbWo,25653
-PyamilySeq/PyamilySeq.py,sha256=tdmIDB2ZYCRfMFQSuWrN0Psr5ggSaoUcT2wEv54jWos,17462
-PyamilySeq/PyamilySeq_Genus.py,sha256=KUC0QkCRpKQ9HEgxyTSD7Nc63wSXtriWyIqt_YOy5ys,12470
-PyamilySeq/PyamilySeq_Species.py,sha256=gJy8Pn82Za44l6y9tg7bWJri2k_0OwZiplANIEH2o-c,16289
-PyamilySeq/Seq_Combiner.py,sha256=3iJy7LNp7uBa3sU1F5bmov1ghvbcviOYqgkhbrbV1QQ,4737
-PyamilySeq/Seq_Extractor.py,sha256=KMR0KcTJzrh99HcBN4qb76R2FuBvpYCDf4NwkmwhTPU,2870
-PyamilySeq/Seq_Finder.py,sha256=ht-fSQ_opWKydcoWI9D3nTwLt6Rpgevnf2y0KxVjw4M,1881
-PyamilySeq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-PyamilySeq/clusterings.py,sha256=9t9Q7IYb9x9gXxcv_FxsWqgdMQ-MYa-5OpkBzpgbrXc,22291
-PyamilySeq/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-PyamilySeq/constants.py,sha256=WVns7PIMu89mNbb_lhu_Hf8fcX4AiUKiMKWAnwEHBvM,31
-PyamilySeq/utils.py,sha256=aebXIUWIXsL3Zb47ONYqVoF1X504lJ4amewhpO1hNWE,33067
-pyamilyseq-1.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-pyamilyseq-1.1.2.dist-info/METADATA,sha256=YlUvYX1GX0Acoh2V28jq0aMC-reFzEwoUWre8W2eK54,17979
-pyamilyseq-1.1.2.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-pyamilyseq-1.1.2.dist-info/entry_points.txt,sha256=mFq5TNzPI_B9vDRGEaT9pNPRGWFAgf_SE3R-dDNf1pM,662
-pyamilyseq-1.1.2.dist-info/top_level.txt,sha256=J6JhugUQTq4rq96yibAlQu3o4KCM9WuYfqr3w1r119M,11
-pyamilyseq-1.1.2.dist-info/RECORD,,

{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pyamilyseq-1.1.2.dist-info → pyamilyseq-1.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

PyamilySeq 1.1.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

PyamilySeq 1.1.2py3-none-any.whl → 1.3.0py3-none-any.whl