PyamilySeq 1.1.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
PyamilySeq/PyamilySeq.py CHANGED
@@ -249,7 +249,6 @@ def main():
249
249
  run_cd_hit(options, file_to_cluster, clustering_output, clustering_mode)
250
250
  elif options.input_type == 'fasta':
251
251
  combined_out_file = options.input_fasta
252
- ### FIX write code to detect if DNA or AA and if sequence tpye is AA then translate
253
252
  # Detect if the input FASTA file contains DNA or AA sequences
254
253
  is_dna = detect_sequence_type(options.input_fasta)
255
254
  # If the sequence type is AA and the input is DNA, translate the DNA to AA
@@ -315,7 +314,9 @@ def main():
315
314
 
316
315
 
317
316
  # Save arguments to a text file
317
+ from datetime import datetime
318
318
  with open(output_path+"/PyamilySeq_params.txt", "w") as outfile:
319
+ outfile.write(f"Timestamp: {datetime.now().isoformat()}\n")
319
320
  for arg, value in vars(options).items():
320
321
  outfile.write(f"{arg}: {value}\n")
321
322
 
@@ -9,35 +9,163 @@ except (ModuleNotFoundError, ImportError, NameError, TypeError) as error:
9
9
  from utils import *
10
10
 
11
11
 
12
- def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted, pangenome_clusters_First_sequences_sorted):
12
+ def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
13
+ pangenome_clusters_First_sequences_sorted,
14
+ combined_pangenome_clusters_First_Second_clustered=None,
15
+ combined_pangenome_clusters_Second_sequences_sorted=None):
13
16
  print("Outputting gene_presence_absence file")
14
17
  output_dir = os.path.abspath(options.output_dir)
15
- #in_name = options.clusters.split('.')[0].split('/')[-1]
16
18
  gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
17
- gpa_outfile = open(gpa_outfile, 'w')
18
19
  genome_dict = OrderedDict(sorted(genome_dict.items()))
19
- gpa_outfile.write('"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
20
- '"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
21
- gpa_outfile.write('","'.join(genome_dict.keys()))
22
- gpa_outfile.write('"\n')
20
+
21
+ # Build a unified list of all clusters with their data
22
+ all_clusters = []
23
+
24
+ # Track which Second cluster IDs have sequences that were merged into First clusters
25
+ merged_second_cluster_ids = set()
26
+
27
+ # Process First clusters and their associated Second sequences
23
28
  for cluster, sequences in pangenome_clusters_First_sequences_sorted.items():
24
- average_sequences_per_genome = len(sequences) / len(pangenome_clusters_First_sorted[cluster])
25
- gpa_outfile.write('"group_'+str(cluster)+'","","","'+str(len(pangenome_clusters_First_sorted[cluster]))+'","'+str(len(sequences))+'","'+str(average_sequences_per_genome)+
26
- '","","","","","","","",""')
27
-
28
-
29
- for genome in genome_dict.keys():
30
- full_out = ''
31
- tmp_list = []
32
- for value in sequences:
33
- if value.split('|')[0] == genome:
34
- tmp_list.append(value.split('|')[1])
35
- if tmp_list:
36
- full_out += ',"'+' '.join(tmp_list)+'"'
37
- else:
38
- full_out = ',""'
39
- gpa_outfile.write(full_out)
40
- gpa_outfile.write('\n')
29
+ all_sequences = list(sequences)
30
+ has_second_sequences = False
31
+
32
+ # Add Second sequences that were clustered with this First cluster
33
+ if combined_pangenome_clusters_First_Second_clustered:
34
+ for seq in sequences:
35
+ if seq in combined_pangenome_clusters_First_Second_clustered:
36
+ for clustered_seq in combined_pangenome_clusters_First_Second_clustered[seq]:
37
+ if clustered_seq not in all_sequences:
38
+ all_sequences.append(clustered_seq)
39
+ # Check if this is a Second sequence (has the sequence_tag)
40
+ if options.sequence_tag in clustered_seq:
41
+ has_second_sequences = True
42
+ # Track which Second cluster this sequence came from
43
+ if combined_pangenome_clusters_Second_sequences_sorted:
44
+ for second_cluster_id, second_seqs in combined_pangenome_clusters_Second_sequences_sorted.items():
45
+ if clustered_seq in second_seqs:
46
+ merged_second_cluster_ids.add(second_cluster_id)
47
+
48
+ # Calculate statistics based on number of genomes (not sequences)
49
+ genomes_in_cluster = set()
50
+ for seq in all_sequences:
51
+ genome = seq.split('|')[0]
52
+ genomes_in_cluster.add(genome)
53
+
54
+ num_isolates = len(genomes_in_cluster)
55
+ num_sequences = len(all_sequences)
56
+
57
+ # Name the cluster based on whether it has Second sequences
58
+ cluster_name = 'combined_group_' + str(cluster) if has_second_sequences else 'group_' + str(cluster)
59
+
60
+ all_clusters.append({
61
+ 'name': cluster_name,
62
+ 'num_genomes': num_isolates,
63
+ 'num_sequences': num_sequences,
64
+ 'sequences': all_sequences
65
+ })
66
+
67
+ # Process Second-only clusters (those not merged with First clusters)
68
+ if combined_pangenome_clusters_Second_sequences_sorted:
69
+ for cluster, sequences in combined_pangenome_clusters_Second_sequences_sorted.items():
70
+ # Only skip if this specific cluster ID had its sequences merged
71
+ if cluster in merged_second_cluster_ids:
72
+ continue
73
+
74
+ # Skip empty clusters
75
+ if not sequences or len(sequences) == 0:
76
+ continue
77
+
78
+ # This is a genuine Second-only cluster
79
+ all_sequences = list(sequences)
80
+
81
+ # Calculate statistics
82
+ genomes_in_cluster = set()
83
+ for seq in all_sequences:
84
+ genome = seq.split('|')[0]
85
+ genomes_in_cluster.add(genome)
86
+
87
+ num_isolates = len(genomes_in_cluster)
88
+ num_sequences = len(all_sequences)
89
+
90
+ # Skip if no genomes (shouldn't happen, but safety check)
91
+ if num_isolates == 0 or num_sequences == 0:
92
+ continue
93
+
94
+ all_clusters.append({
95
+ 'name': 'Second_group_' + str(cluster),
96
+ 'num_genomes': num_isolates,
97
+ 'num_sequences': num_sequences,
98
+ 'sequences': all_sequences
99
+ })
100
+
101
+ # Sort all clusters by number of genomes (descending), then by number of sequences
102
+ all_clusters.sort(key=lambda x: (x['num_genomes'], x['num_sequences']), reverse=True)
103
+
104
+ # Write to file
105
+ with open(gpa_outfile, 'w') as outfile:
106
+ # Write header
107
+ outfile.write(
108
+ '"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
109
+ '"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
110
+ outfile.write('","'.join(genome_dict.keys()))
111
+ outfile.write('"\n')
112
+
113
+ # Write all clusters in sorted order
114
+ for cluster_data in all_clusters:
115
+ num_isolates = cluster_data['num_genomes']
116
+ num_sequences = cluster_data['num_sequences']
117
+ average_sequences_per_genome = num_sequences / num_isolates if num_isolates > 0 else 0
118
+
119
+ # Write cluster info
120
+ outfile.write('"' + cluster_data['name'] + '","","","' + str(num_isolates) + '","' +
121
+ str(num_sequences) + '","' + str(average_sequences_per_genome) + '","","","","","","","",""')
122
+
123
+ # Write presence/absence for each genome
124
+ for genome in genome_dict.keys():
125
+ tmp_list = []
126
+ for seq in cluster_data['sequences']:
127
+ if seq.split('|')[0] == genome:
128
+ tmp_list.append(seq.split('|')[1])
129
+
130
+ if tmp_list:
131
+ outfile.write(',"' + ' '.join(tmp_list) + '"')
132
+ else:
133
+ outfile.write(',""')
134
+ outfile.write('\n')
135
+
136
+ print(f"Total clusters written: {len(all_clusters)}")
137
+ if options.reclustered is not None:
138
+ print(f"Merged Second cluster IDs: {len(merged_second_cluster_ids)}")
139
+
140
+ # def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted, pangenome_clusters_First_sequences_sorted):
141
+ # print("Outputting gene_presence_absence file")
142
+ # output_dir = os.path.abspath(options.output_dir)
143
+ # #in_name = options.clusters.split('.')[0].split('/')[-1]
144
+ # gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
145
+ # gpa_outfile = open(gpa_outfile, 'w')
146
+ # genome_dict = OrderedDict(sorted(genome_dict.items()))
147
+ # gpa_outfile.write('"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
148
+ # '"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
149
+ # gpa_outfile.write('","'.join(genome_dict.keys()))
150
+ # gpa_outfile.write('"\n')
151
+ # for cluster, sequences in pangenome_clusters_First_sequences_sorted.items():
152
+ # average_sequences_per_genome = len(sequences) / len(pangenome_clusters_First_sorted[cluster])
153
+ # gpa_outfile.write('"group_'+str(cluster)+'","","","'+str(len(pangenome_clusters_First_sorted[cluster]))+'","'+str(len(sequences))+'","'+str(average_sequences_per_genome)+
154
+ # '","","","","","","","",""')
155
+ #
156
+ #
157
+ # for genome in genome_dict.keys():
158
+ # full_out = ''
159
+ # tmp_list = []
160
+ # for value in sequences:
161
+ # if value.split('|')[0] == genome:
162
+ # tmp_list.append(value.split('|')[1])
163
+ # if tmp_list:
164
+ # full_out += ',"'+' '.join(tmp_list)+'"'
165
+ # else:
166
+ # full_out = ',""'
167
+ # gpa_outfile.write(full_out)
168
+ # gpa_outfile.write('\n')
41
169
 
42
170
  ### Below is some unfinished code
43
171
  # edge_list_outfile = open(in_name+'_edge_list.csv','w')
@@ -147,22 +275,37 @@ def cluster(options):
147
275
 
148
276
  if options.reclustered != None: #FIX
149
277
  if options.cluster_format == 'CD-HIT':
150
- combined_pangenome_clusters_First_Second_clustered,not_Second_only_cluster_ids,combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_CDHIT(options, genome_dict, '|')
278
+ combined_pangenome_clusters_First_Second_clustered, not_Second_only_cluster_ids, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_CDHIT(options, genome_dict, '|')
151
279
  elif 'TSV' in options.cluster_format or 'CSV' in options.cluster_format:
152
280
  #Fix
153
- combined_pangenome_clusters_First_Second_clustered,not_Second_only_cluster_ids,combined_pangenome_clusters_Second,combined_pangenome_clusters_Second_sequences = combined_clustering_Edge_List(options, '|')
154
- pangenome_clusters_Type = combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, '|')
281
+ combined_pangenome_clusters_First_Second_clustered, not_Second_only_cluster_ids, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_Edge_List(options, '|')
282
+
283
+ pangenome_clusters_Type = combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences, '|')
284
+
285
+ # Sort First clusters
286
+ sorted_First_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
287
+ pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_First_keys)
288
+ pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences, sorted_First_keys)
289
+ pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_First_keys)
290
+
291
+ # Sort Second clusters independently (no need to align with First)
292
+ sorted_Second_keys = sort_keys_by_values(combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences)
293
+ #combined_pangenome_clusters_Second_sorted = reorder_dict_by_keys(combined_pangenome_clusters_Second,sorted_Second_keys)
294
+ combined_pangenome_clusters_Second_sequences_sorted = reorder_dict_by_keys(combined_pangenome_clusters_Second_sequences, sorted_Second_keys)
295
+
155
296
  else:
156
297
  pangenome_clusters_Type = single_clustering_counting(pangenome_clusters_First, reps)
298
+ sorted_First_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
299
+ pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_First_keys)
300
+ pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences,
301
+ sorted_First_keys)
302
+ pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_First_keys)
157
303
 
158
304
 
159
305
 
160
306
  Number_Of_Second_Extending_But_Same_Genomes = 0
161
307
 
162
- sorted_first_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
163
- pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_first_keys)
164
- pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences, sorted_first_keys)
165
- pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_first_keys)
308
+
166
309
 
167
310
  print("Calculating Groups")
168
311
  seen_groupings = []
@@ -228,9 +371,18 @@ def cluster(options):
228
371
  len(combined_pangenome_clusters_Second_sequences)))
229
372
  outfile.write("\nTotal Number of First Gene Groups That Had Additional Second Sequences But Not New Genomes: " + str(
230
373
  Number_Of_Second_Extending_But_Same_Genomes))
231
- #Report number of first and second clusters and do the ame for genus
374
+
232
375
  if options.gene_presence_absence_out != False:
233
- gene_presence_absence_output(options,genome_dict, pangenome_clusters_First_sorted, pangenome_clusters_First_sequences_sorted)
376
+ if options.reclustered != None:
377
+ # Pass both First and Second clustering data
378
+ gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
379
+ pangenome_clusters_First_sequences_sorted,
380
+ combined_pangenome_clusters_First_Second_clustered,
381
+ combined_pangenome_clusters_Second_sequences_sorted)
382
+ else:
383
+ # Only First clustering data available
384
+ gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
385
+ pangenome_clusters_First_sequences_sorted)
234
386
 
235
387
 
236
388
  ###Need to fix this below. If full/partial the ifs need to be different. If full we first need to output the gfs then align. if -wruite-groups not presented then it needs
@@ -59,7 +59,7 @@ def main():
59
59
  exit(1)
60
60
  if options.input_type == 'fasta' and options.name_split_fasta is None:
61
61
  print("Please provide a substring to split the filename and extract the genome name.")
62
- exit
62
+ exit(1)
63
63
 
64
64
  output_path = os.path.abspath(options.output_dir)
65
65
  if not os.path.exists(output_path):
@@ -77,7 +77,7 @@ def main():
77
77
  elif options.input_type == 'combined':
78
78
  read_combined_files(options.input_dir, options.name_split_gff, options.gene_ident, combined_out_file, options.translate, True)
79
79
  elif options.input_type == 'fasta':
80
- read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate)
80
+ read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate, True)
81
81
 
82
82
  if __name__ == "__main__":
83
83
  main()
@@ -9,8 +9,13 @@ def find_gene_ids_in_csv(csv_file, group_name):
9
9
  cells = line.strip().split(',')
10
10
  if cells[0].replace('"','') == group_name:
11
11
  # Collect gene IDs from column 14 onward
12
+ # for cell in cells[14:]:
13
+ # gene_ids.extend(cell.strip().replace('"','').split()) # Splitting by spaces if there are multiple IDs in a cell break
12
14
  for cell in cells[14:]:
13
- gene_ids.extend(cell.strip().replace('"','').split()) # Splitting by spaces if there are multiple IDs in a cell break
15
+ for gene in cell.strip().replace('"', '').split(';'):
16
+ if gene:
17
+ gene_ids.append(gene)
18
+
14
19
  return gene_ids
15
20
 
16
21
  def extract_sequences(fasta_file, gene_ids):
PyamilySeq/clusterings.py CHANGED
@@ -156,7 +156,7 @@ def cluster_MMseqs(options,splitter):
156
156
 
157
157
 
158
158
  #@profile
159
- def combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, splitter):
159
+ def combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences, splitter):
160
160
  num_clustered_First = defaultdict(list)
161
161
  pangenome_clusters_Type = copy.deepcopy(pangenome_clusters_First)
162
162
  list_of_reps = list(reps.keys())
@@ -336,6 +336,7 @@ def combined_clustering_CDHIT(options, taxa_dict, splitter):
336
336
 
337
337
 
338
338
 
339
+
339
340
  # def cluster_BLAST(options, splitter):
340
341
  # separator = '\t'
341
342
  # First_in = open(options.clusters, 'r')
PyamilySeq/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- PyamilySeq_Version = 'v1.1.2'
1
+ PyamilySeq_Version = 'v1.3.0'
2
2
 
PyamilySeq/utils.py CHANGED
@@ -7,7 +7,6 @@ from tempfile import NamedTemporaryFile
7
7
  import sys
8
8
  import re
9
9
  import math
10
- #from config import config_params
11
10
 
12
11
  ####
13
12
  # Placeholder for the distance function
@@ -15,11 +14,10 @@ levenshtein_distance_cal = None
15
14
  # Check for Levenshtein library once
16
15
  try:
17
16
  import Levenshtein as LV
18
- # Assign the optimized function
17
+ # Assign the optimised function
19
18
  def levenshtein_distance_calc(seq1, seq2):
20
19
  return LV.distance(seq1, seq2)
21
20
  except (ModuleNotFoundError, ImportError):
22
- #if config_params.verbose == True: - Not implemented yet
23
21
  print("Levenshtein package not installed - Will fallback to slower Python implementation.")
24
22
  # Fallback implementation
25
23
  def levenshtein_distance_calc(seq1, seq2):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyamilySeq
3
- Version: 1.1.2
3
+ Version: 1.3.0
4
4
  Summary: PyamilySeq - A a tool to investigate sequence-based gene groups identified by clustering methods such as CD-HIT, DIAMOND, BLAST or MMseqs2.
5
5
  Home-page: https://github.com/NickJD/PyamilySeq
6
6
  Author: Nicholas Dimonaco
@@ -46,7 +46,7 @@ To update to the newest version add '-U' to end of the pip install command.
46
46
  ```commandline
47
47
  usage: PyamilySeq.py [-h] {Full,Partial} ...
48
48
 
49
- PyamilySeq v1.1.2: A tool for gene clustering and analysis.
49
+ PyamilySeq v1.3.0: A tool for gene clustering and analysis.
50
50
 
51
51
  positional arguments:
52
52
  {Full,Partial} Choose a mode: 'Full' or 'Partial'.
@@ -76,7 +76,7 @@ Escherichia_coli_110957|ENSB_TIZS9kbTvShDvyX Escherichia_coli_110957|ENSB_TIZS9k
76
76
  ```
77
77
  ### Example output:
78
78
  ```
79
- Running PyamilySeq v1.1.2
79
+ Running PyamilySeq v1.3.0
80
80
  Calculating Groups
81
81
  Number of Genomes: 10
82
82
  Gene Groups
@@ -221,7 +221,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split_gff .gff3 -output_dir
221
221
  ```
222
222
  usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} [-name_split_gff NAME_SPLIT_GFF] [-name_split_fasta NAME_SPLIT_FASTA] -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
223
223
 
224
- PyamilySeq v1.1.2: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
224
+ PyamilySeq v1.3.0: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
225
225
 
226
226
  options:
227
227
  -h, --help show this help message and exit
@@ -264,7 +264,7 @@ usage: Group_Splitter.py [-h] -input_fasta INPUT_FASTA -sequence_type {AA,DNA}
264
264
  [-M CLUSTERING_MEMORY] [-no_delete_temp_files]
265
265
  [-verbose] [-v]
266
266
 
267
- PyamilySeq v1.1.2: Group-Splitter - A tool to split multi-copy gene groups
267
+ PyamilySeq v1.3.0: Group-Splitter - A tool to split multi-copy gene groups
268
268
  identified by PyamilySeq.
269
269
 
270
270
  options:
@@ -317,7 +317,7 @@ Cluster-Summary -genome_num 10 -input_clstr .../test_data/species/E-coli/E-coli_
317
317
  usage: Cluster_Summary.py [-h] -input_clstr INPUT_CLSTR -output OUTPUT -genome_num GENOME_NUM
318
318
  [-output_dir OUTPUT_DIR] [-verbose] [-v]
319
319
 
320
- PyamilySeq v1.1.2: Cluster-Summary - A tool to summarise CD-HIT clustering files.
320
+ PyamilySeq v1.3.0: Cluster-Summary - A tool to summarise CD-HIT clustering files.
321
321
 
322
322
  options:
323
323
  -h, --help show this help message and exit
@@ -0,0 +1,22 @@
1
+ PyamilySeq/Cluster_Compare.py,sha256=2jRXBYN8T9TUDLV9bj3SWFQ2pBUH3BAKW1FYrDYSQBw,4421
2
+ PyamilySeq/Cluster_Summary.py,sha256=efXMfGvATERCTxwaqbauhZwt_5Hrf9KpGKY3EgsHVDk,6720
3
+ PyamilySeq/Group_Extractor.py,sha256=oe2VmOVxdvTmAcy8NKwD1F27IdN2utAfczxsyxg96yc,2898
4
+ PyamilySeq/Group_Sizes.py,sha256=3snkAN19o3Y4IY6IqSim1qy415FfQe1Wb8vzWTKF0Wo,3028
5
+ PyamilySeq/Group_Splitter.py,sha256=OcMj9GnAyybs_DaNKRyvfL_nl2dB2gUI4BD_EQrBbWo,25653
6
+ PyamilySeq/PyamilySeq.py,sha256=vc_mFuK-Cmqa3elusyIg5DksTHxdwCcFv2ki2ACqXIA,17468
7
+ PyamilySeq/PyamilySeq_Genus.py,sha256=KUC0QkCRpKQ9HEgxyTSD7Nc63wSXtriWyIqt_YOy5ys,12470
8
+ PyamilySeq/PyamilySeq_Species.py,sha256=1P_xbdUrZPykyS44KFyRdyxU-XPX_yKLpJhjhB8VTpg,24029
9
+ PyamilySeq/Seq_Combiner.py,sha256=G49zthWtsTfqYX1tqc7op9a9cSia1IL0VTiAtwgdTwc,4746
10
+ PyamilySeq/Seq_Extractor.py,sha256=IQk4Qn6LJkPXD1O4TQesneS3_ZN8hBsTVZQGlZ1c-Dk,3072
11
+ PyamilySeq/Seq_Finder.py,sha256=ht-fSQ_opWKydcoWI9D3nTwLt6Rpgevnf2y0KxVjw4M,1881
12
+ PyamilySeq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ PyamilySeq/clusterings.py,sha256=bfST7_i6qnj9ogclmOSkQhurZaEWBBq0H48FAN7JpOg,22374
14
+ PyamilySeq/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ PyamilySeq/constants.py,sha256=cykSvqrDAsDJCo9FJJ7lRvoOho2H7xCGE923X9iOE4U,31
16
+ PyamilySeq/utils.py,sha256=1U794Xd5qzmaIz2VujdnPkND729kr7rKjei0Y57f-QE,32972
17
+ pyamilyseq-1.3.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
18
+ pyamilyseq-1.3.0.dist-info/METADATA,sha256=vl5fx_QOahTxZfwDXVzJPgMWIIiLGDnr2VmP-rxWIWo,17979
19
+ pyamilyseq-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
+ pyamilyseq-1.3.0.dist-info/entry_points.txt,sha256=5RkUWIneXu-kCnClJhv0u27lWHAoyoVmospZMU5Cs2U,846
21
+ pyamilyseq-1.3.0.dist-info/top_level.txt,sha256=J6JhugUQTq4rq96yibAlQu3o4KCM9WuYfqr3w1r119M,11
22
+ pyamilyseq-1.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -8,6 +8,9 @@ Seq-Extractor = PyamilySeq.Seq_Extractor:main
8
8
  Seq-Finder = PyamilySeq.Seq_Finder:main
9
9
  cluster-extractor = PyamilySeq.Cluster_Extractor:main
10
10
  cluster-summary = PyamilySeq.Cluster_Summary:main
11
+ compare-contree-singletrees = aux_tools.RF.compare_contree_singletrees:main
12
+ compare-rf = aux_tools.RF.compare_RF:main
13
+ compute-singletrees-rf = aux_tools.RF.Compute_SingleTree_RFs:main
11
14
  group-splitter = PyamilySeq.Group_Splitter:main
12
15
  pyamilyseq = PyamilySeq.PyamilySeq:main
13
16
  seq-combiner = PyamilySeq.Seq_Combiner:main
@@ -1,22 +0,0 @@
1
- PyamilySeq/Cluster_Compare.py,sha256=2jRXBYN8T9TUDLV9bj3SWFQ2pBUH3BAKW1FYrDYSQBw,4421
2
- PyamilySeq/Cluster_Summary.py,sha256=efXMfGvATERCTxwaqbauhZwt_5Hrf9KpGKY3EgsHVDk,6720
3
- PyamilySeq/Group_Extractor.py,sha256=oe2VmOVxdvTmAcy8NKwD1F27IdN2utAfczxsyxg96yc,2898
4
- PyamilySeq/Group_Sizes.py,sha256=3snkAN19o3Y4IY6IqSim1qy415FfQe1Wb8vzWTKF0Wo,3028
5
- PyamilySeq/Group_Splitter.py,sha256=OcMj9GnAyybs_DaNKRyvfL_nl2dB2gUI4BD_EQrBbWo,25653
6
- PyamilySeq/PyamilySeq.py,sha256=tdmIDB2ZYCRfMFQSuWrN0Psr5ggSaoUcT2wEv54jWos,17462
7
- PyamilySeq/PyamilySeq_Genus.py,sha256=KUC0QkCRpKQ9HEgxyTSD7Nc63wSXtriWyIqt_YOy5ys,12470
8
- PyamilySeq/PyamilySeq_Species.py,sha256=gJy8Pn82Za44l6y9tg7bWJri2k_0OwZiplANIEH2o-c,16289
9
- PyamilySeq/Seq_Combiner.py,sha256=3iJy7LNp7uBa3sU1F5bmov1ghvbcviOYqgkhbrbV1QQ,4737
10
- PyamilySeq/Seq_Extractor.py,sha256=KMR0KcTJzrh99HcBN4qb76R2FuBvpYCDf4NwkmwhTPU,2870
11
- PyamilySeq/Seq_Finder.py,sha256=ht-fSQ_opWKydcoWI9D3nTwLt6Rpgevnf2y0KxVjw4M,1881
12
- PyamilySeq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- PyamilySeq/clusterings.py,sha256=9t9Q7IYb9x9gXxcv_FxsWqgdMQ-MYa-5OpkBzpgbrXc,22291
14
- PyamilySeq/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- PyamilySeq/constants.py,sha256=WVns7PIMu89mNbb_lhu_Hf8fcX4AiUKiMKWAnwEHBvM,31
16
- PyamilySeq/utils.py,sha256=aebXIUWIXsL3Zb47ONYqVoF1X504lJ4amewhpO1hNWE,33067
17
- pyamilyseq-1.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
18
- pyamilyseq-1.1.2.dist-info/METADATA,sha256=YlUvYX1GX0Acoh2V28jq0aMC-reFzEwoUWre8W2eK54,17979
19
- pyamilyseq-1.1.2.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
20
- pyamilyseq-1.1.2.dist-info/entry_points.txt,sha256=mFq5TNzPI_B9vDRGEaT9pNPRGWFAgf_SE3R-dDNf1pM,662
21
- pyamilyseq-1.1.2.dist-info/top_level.txt,sha256=J6JhugUQTq4rq96yibAlQu3o4KCM9WuYfqr3w1r119M,11
22
- pyamilyseq-1.1.2.dist-info/RECORD,,