PyamilySeq 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyamilySeq/PyamilySeq.py +2 -1
- PyamilySeq/PyamilySeq_Species.py +185 -33
- PyamilySeq/clusterings.py +2 -1
- PyamilySeq/constants.py +1 -1
- {pyamilyseq-1.2.0.dist-info → pyamilyseq-1.3.0.dist-info}/METADATA +6 -6
- {pyamilyseq-1.2.0.dist-info → pyamilyseq-1.3.0.dist-info}/RECORD +10 -10
- {pyamilyseq-1.2.0.dist-info → pyamilyseq-1.3.0.dist-info}/WHEEL +0 -0
- {pyamilyseq-1.2.0.dist-info → pyamilyseq-1.3.0.dist-info}/entry_points.txt +0 -0
- {pyamilyseq-1.2.0.dist-info → pyamilyseq-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {pyamilyseq-1.2.0.dist-info → pyamilyseq-1.3.0.dist-info}/top_level.txt +0 -0
PyamilySeq/PyamilySeq.py
CHANGED
|
@@ -249,7 +249,6 @@ def main():
|
|
|
249
249
|
run_cd_hit(options, file_to_cluster, clustering_output, clustering_mode)
|
|
250
250
|
elif options.input_type == 'fasta':
|
|
251
251
|
combined_out_file = options.input_fasta
|
|
252
|
-
### FIX write code to detect if DNA or AA and if sequence tpye is AA then translate
|
|
253
252
|
# Detect if the input FASTA file contains DNA or AA sequences
|
|
254
253
|
is_dna = detect_sequence_type(options.input_fasta)
|
|
255
254
|
# If the sequence type is AA and the input is DNA, translate the DNA to AA
|
|
@@ -315,7 +314,9 @@ def main():
|
|
|
315
314
|
|
|
316
315
|
|
|
317
316
|
# Save arguments to a text file
|
|
317
|
+
from datetime import datetime
|
|
318
318
|
with open(output_path+"/PyamilySeq_params.txt", "w") as outfile:
|
|
319
|
+
outfile.write(f"Timestamp: {datetime.now().isoformat()}\n")
|
|
319
320
|
for arg, value in vars(options).items():
|
|
320
321
|
outfile.write(f"{arg}: {value}\n")
|
|
321
322
|
|
PyamilySeq/PyamilySeq_Species.py
CHANGED
|
@@ -9,35 +9,163 @@ except (ModuleNotFoundError, ImportError, NameError, TypeError) as error:
|
|
|
9
9
|
from utils import *
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
|
|
12
|
+
def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
|
|
13
|
+
pangenome_clusters_First_sequences_sorted,
|
|
14
|
+
combined_pangenome_clusters_First_Second_clustered=None,
|
|
15
|
+
combined_pangenome_clusters_Second_sequences_sorted=None):
|
|
13
16
|
print("Outputting gene_presence_absence file")
|
|
14
17
|
output_dir = os.path.abspath(options.output_dir)
|
|
15
|
-
#in_name = options.clusters.split('.')[0].split('/')[-1]
|
|
16
18
|
gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
|
|
17
|
-
gpa_outfile = open(gpa_outfile, 'w')
|
|
18
19
|
genome_dict = OrderedDict(sorted(genome_dict.items()))
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
|
|
21
|
+
# Build a unified list of all clusters with their data
|
|
22
|
+
all_clusters = []
|
|
23
|
+
|
|
24
|
+
# Track which Second cluster IDs have sequences that were merged into First clusters
|
|
25
|
+
merged_second_cluster_ids = set()
|
|
26
|
+
|
|
27
|
+
# Process First clusters and their associated Second sequences
|
|
23
28
|
for cluster, sequences in pangenome_clusters_First_sequences_sorted.items():
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
29
|
+
all_sequences = list(sequences)
|
|
30
|
+
has_second_sequences = False
|
|
31
|
+
|
|
32
|
+
# Add Second sequences that were clustered with this First cluster
|
|
33
|
+
if combined_pangenome_clusters_First_Second_clustered:
|
|
34
|
+
for seq in sequences:
|
|
35
|
+
if seq in combined_pangenome_clusters_First_Second_clustered:
|
|
36
|
+
for clustered_seq in combined_pangenome_clusters_First_Second_clustered[seq]:
|
|
37
|
+
if clustered_seq not in all_sequences:
|
|
38
|
+
all_sequences.append(clustered_seq)
|
|
39
|
+
# Check if this is a Second sequence (has the sequence_tag)
|
|
40
|
+
if options.sequence_tag in clustered_seq:
|
|
41
|
+
has_second_sequences = True
|
|
42
|
+
# Track which Second cluster this sequence came from
|
|
43
|
+
if combined_pangenome_clusters_Second_sequences_sorted:
|
|
44
|
+
for second_cluster_id, second_seqs in combined_pangenome_clusters_Second_sequences_sorted.items():
|
|
45
|
+
if clustered_seq in second_seqs:
|
|
46
|
+
merged_second_cluster_ids.add(second_cluster_id)
|
|
47
|
+
|
|
48
|
+
# Calculate statistics based on number of genomes (not sequences)
|
|
49
|
+
genomes_in_cluster = set()
|
|
50
|
+
for seq in all_sequences:
|
|
51
|
+
genome = seq.split('|')[0]
|
|
52
|
+
genomes_in_cluster.add(genome)
|
|
53
|
+
|
|
54
|
+
num_isolates = len(genomes_in_cluster)
|
|
55
|
+
num_sequences = len(all_sequences)
|
|
56
|
+
|
|
57
|
+
# Name the cluster based on whether it has Second sequences
|
|
58
|
+
cluster_name = 'combined_group_' + str(cluster) if has_second_sequences else 'group_' + str(cluster)
|
|
59
|
+
|
|
60
|
+
all_clusters.append({
|
|
61
|
+
'name': cluster_name,
|
|
62
|
+
'num_genomes': num_isolates,
|
|
63
|
+
'num_sequences': num_sequences,
|
|
64
|
+
'sequences': all_sequences
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
# Process Second-only clusters (those not merged with First clusters)
|
|
68
|
+
if combined_pangenome_clusters_Second_sequences_sorted:
|
|
69
|
+
for cluster, sequences in combined_pangenome_clusters_Second_sequences_sorted.items():
|
|
70
|
+
# Only skip if this specific cluster ID had its sequences merged
|
|
71
|
+
if cluster in merged_second_cluster_ids:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
# Skip empty clusters
|
|
75
|
+
if not sequences or len(sequences) == 0:
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
# This is a genuine Second-only cluster
|
|
79
|
+
all_sequences = list(sequences)
|
|
80
|
+
|
|
81
|
+
# Calculate statistics
|
|
82
|
+
genomes_in_cluster = set()
|
|
83
|
+
for seq in all_sequences:
|
|
84
|
+
genome = seq.split('|')[0]
|
|
85
|
+
genomes_in_cluster.add(genome)
|
|
86
|
+
|
|
87
|
+
num_isolates = len(genomes_in_cluster)
|
|
88
|
+
num_sequences = len(all_sequences)
|
|
89
|
+
|
|
90
|
+
# Skip if no genomes (shouldn't happen, but safety check)
|
|
91
|
+
if num_isolates == 0 or num_sequences == 0:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
all_clusters.append({
|
|
95
|
+
'name': 'Second_group_' + str(cluster),
|
|
96
|
+
'num_genomes': num_isolates,
|
|
97
|
+
'num_sequences': num_sequences,
|
|
98
|
+
'sequences': all_sequences
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
# Sort all clusters by number of genomes (descending), then by number of sequences
|
|
102
|
+
all_clusters.sort(key=lambda x: (x['num_genomes'], x['num_sequences']), reverse=True)
|
|
103
|
+
|
|
104
|
+
# Write to file
|
|
105
|
+
with open(gpa_outfile, 'w') as outfile:
|
|
106
|
+
# Write header
|
|
107
|
+
outfile.write(
|
|
108
|
+
'"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
|
|
109
|
+
'"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
|
|
110
|
+
outfile.write('","'.join(genome_dict.keys()))
|
|
111
|
+
outfile.write('"\n')
|
|
112
|
+
|
|
113
|
+
# Write all clusters in sorted order
|
|
114
|
+
for cluster_data in all_clusters:
|
|
115
|
+
num_isolates = cluster_data['num_genomes']
|
|
116
|
+
num_sequences = cluster_data['num_sequences']
|
|
117
|
+
average_sequences_per_genome = num_sequences / num_isolates if num_isolates > 0 else 0
|
|
118
|
+
|
|
119
|
+
# Write cluster info
|
|
120
|
+
outfile.write('"' + cluster_data['name'] + '","","","' + str(num_isolates) + '","' +
|
|
121
|
+
str(num_sequences) + '","' + str(average_sequences_per_genome) + '","","","","","","","",""')
|
|
122
|
+
|
|
123
|
+
# Write presence/absence for each genome
|
|
124
|
+
for genome in genome_dict.keys():
|
|
125
|
+
tmp_list = []
|
|
126
|
+
for seq in cluster_data['sequences']:
|
|
127
|
+
if seq.split('|')[0] == genome:
|
|
128
|
+
tmp_list.append(seq.split('|')[1])
|
|
129
|
+
|
|
130
|
+
if tmp_list:
|
|
131
|
+
outfile.write(',"' + ' '.join(tmp_list) + '"')
|
|
132
|
+
else:
|
|
133
|
+
outfile.write(',""')
|
|
134
|
+
outfile.write('\n')
|
|
135
|
+
|
|
136
|
+
print(f"Total clusters written: {len(all_clusters)}")
|
|
137
|
+
if options.reclustered is not None:
|
|
138
|
+
print(f"Merged Second cluster IDs: {len(merged_second_cluster_ids)}")
|
|
139
|
+
|
|
140
|
+
# def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted, pangenome_clusters_First_sequences_sorted):
|
|
141
|
+
# print("Outputting gene_presence_absence file")
|
|
142
|
+
# output_dir = os.path.abspath(options.output_dir)
|
|
143
|
+
# #in_name = options.clusters.split('.')[0].split('/')[-1]
|
|
144
|
+
# gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
|
|
145
|
+
# gpa_outfile = open(gpa_outfile, 'w')
|
|
146
|
+
# genome_dict = OrderedDict(sorted(genome_dict.items()))
|
|
147
|
+
# gpa_outfile.write('"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
|
|
148
|
+
# '"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
|
|
149
|
+
# gpa_outfile.write('","'.join(genome_dict.keys()))
|
|
150
|
+
# gpa_outfile.write('"\n')
|
|
151
|
+
# for cluster, sequences in pangenome_clusters_First_sequences_sorted.items():
|
|
152
|
+
# average_sequences_per_genome = len(sequences) / len(pangenome_clusters_First_sorted[cluster])
|
|
153
|
+
# gpa_outfile.write('"group_'+str(cluster)+'","","","'+str(len(pangenome_clusters_First_sorted[cluster]))+'","'+str(len(sequences))+'","'+str(average_sequences_per_genome)+
|
|
154
|
+
# '","","","","","","","",""')
|
|
155
|
+
#
|
|
156
|
+
#
|
|
157
|
+
# for genome in genome_dict.keys():
|
|
158
|
+
# full_out = ''
|
|
159
|
+
# tmp_list = []
|
|
160
|
+
# for value in sequences:
|
|
161
|
+
# if value.split('|')[0] == genome:
|
|
162
|
+
# tmp_list.append(value.split('|')[1])
|
|
163
|
+
# if tmp_list:
|
|
164
|
+
# full_out += ',"'+' '.join(tmp_list)+'"'
|
|
165
|
+
# else:
|
|
166
|
+
# full_out = ',""'
|
|
167
|
+
# gpa_outfile.write(full_out)
|
|
168
|
+
# gpa_outfile.write('\n')
|
|
41
169
|
|
|
42
170
|
### Below is some unfinished code
|
|
43
171
|
# edge_list_outfile = open(in_name+'_edge_list.csv','w')
|
|
@@ -147,22 +275,37 @@ def cluster(options):
|
|
|
147
275
|
|
|
148
276
|
if options.reclustered != None: #FIX
|
|
149
277
|
if options.cluster_format == 'CD-HIT':
|
|
150
|
-
combined_pangenome_clusters_First_Second_clustered,not_Second_only_cluster_ids,combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_CDHIT(options, genome_dict, '|')
|
|
278
|
+
combined_pangenome_clusters_First_Second_clustered, not_Second_only_cluster_ids, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_CDHIT(options, genome_dict, '|')
|
|
151
279
|
elif 'TSV' in options.cluster_format or 'CSV' in options.cluster_format:
|
|
152
280
|
#Fix
|
|
153
|
-
combined_pangenome_clusters_First_Second_clustered,not_Second_only_cluster_ids,combined_pangenome_clusters_Second,combined_pangenome_clusters_Second_sequences = combined_clustering_Edge_List(options, '|')
|
|
154
|
-
|
|
281
|
+
combined_pangenome_clusters_First_Second_clustered, not_Second_only_cluster_ids, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences = combined_clustering_Edge_List(options, '|')
|
|
282
|
+
|
|
283
|
+
pangenome_clusters_Type = combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences, '|')
|
|
284
|
+
|
|
285
|
+
# Sort First clusters
|
|
286
|
+
sorted_First_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
|
|
287
|
+
pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_First_keys)
|
|
288
|
+
pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences, sorted_First_keys)
|
|
289
|
+
pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_First_keys)
|
|
290
|
+
|
|
291
|
+
# Sort Second clusters independently (no need to align with First)
|
|
292
|
+
sorted_Second_keys = sort_keys_by_values(combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences)
|
|
293
|
+
#combined_pangenome_clusters_Second_sorted = reorder_dict_by_keys(combined_pangenome_clusters_Second,sorted_Second_keys)
|
|
294
|
+
combined_pangenome_clusters_Second_sequences_sorted = reorder_dict_by_keys(combined_pangenome_clusters_Second_sequences, sorted_Second_keys)
|
|
295
|
+
|
|
155
296
|
else:
|
|
156
297
|
pangenome_clusters_Type = single_clustering_counting(pangenome_clusters_First, reps)
|
|
298
|
+
sorted_First_keys = sort_keys_by_values(pangenome_clusters_First, pangenome_clusters_First_sequences)
|
|
299
|
+
pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_First_keys)
|
|
300
|
+
pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences,
|
|
301
|
+
sorted_First_keys)
|
|
302
|
+
pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_First_keys)
|
|
157
303
|
|
|
158
304
|
|
|
159
305
|
|
|
160
306
|
Number_Of_Second_Extending_But_Same_Genomes = 0
|
|
161
307
|
|
|
162
|
-
|
|
163
|
-
pangenome_clusters_First_sorted = reorder_dict_by_keys(pangenome_clusters_First, sorted_first_keys)
|
|
164
|
-
pangenome_clusters_First_sequences_sorted = reorder_dict_by_keys(pangenome_clusters_First_sequences, sorted_first_keys)
|
|
165
|
-
pangenome_clusters_Type_sorted = reorder_dict_by_keys(pangenome_clusters_Type, sorted_first_keys)
|
|
308
|
+
|
|
166
309
|
|
|
167
310
|
print("Calculating Groups")
|
|
168
311
|
seen_groupings = []
|
|
@@ -228,9 +371,18 @@ def cluster(options):
|
|
|
228
371
|
len(combined_pangenome_clusters_Second_sequences)))
|
|
229
372
|
outfile.write("\nTotal Number of First Gene Groups That Had Additional Second Sequences But Not New Genomes: " + str(
|
|
230
373
|
Number_Of_Second_Extending_But_Same_Genomes))
|
|
231
|
-
|
|
374
|
+
|
|
232
375
|
if options.gene_presence_absence_out != False:
|
|
233
|
-
|
|
376
|
+
if options.reclustered != None:
|
|
377
|
+
# Pass both First and Second clustering data
|
|
378
|
+
gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
|
|
379
|
+
pangenome_clusters_First_sequences_sorted,
|
|
380
|
+
combined_pangenome_clusters_First_Second_clustered,
|
|
381
|
+
combined_pangenome_clusters_Second_sequences_sorted)
|
|
382
|
+
else:
|
|
383
|
+
# Only First clustering data available
|
|
384
|
+
gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_sorted,
|
|
385
|
+
pangenome_clusters_First_sequences_sorted)
|
|
234
386
|
|
|
235
387
|
|
|
236
388
|
###Need to fix this below. If full/partial the ifs need to be different. If full we first need to output the gfs then align. if -wruite-groups not presented then it needs
|
PyamilySeq/clusterings.py
CHANGED
|
@@ -156,7 +156,7 @@ def cluster_MMseqs(options,splitter):
|
|
|
156
156
|
|
|
157
157
|
|
|
158
158
|
#@profile
|
|
159
|
-
def combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, splitter):
|
|
159
|
+
def combined_clustering_counting(options, pangenome_clusters_First, reps, combined_pangenome_clusters_First_Second_clustered, pangenome_clusters_First_genomes, combined_pangenome_clusters_Second, combined_pangenome_clusters_Second_sequences, splitter):
|
|
160
160
|
num_clustered_First = defaultdict(list)
|
|
161
161
|
pangenome_clusters_Type = copy.deepcopy(pangenome_clusters_First)
|
|
162
162
|
list_of_reps = list(reps.keys())
|
|
@@ -336,6 +336,7 @@ def combined_clustering_CDHIT(options, taxa_dict, splitter):
|
|
|
336
336
|
|
|
337
337
|
|
|
338
338
|
|
|
339
|
+
|
|
339
340
|
# def cluster_BLAST(options, splitter):
|
|
340
341
|
# separator = '\t'
|
|
341
342
|
# First_in = open(options.clusters, 'r')
|
PyamilySeq/constants.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
PyamilySeq_Version = 'v1.
|
|
1
|
+
PyamilySeq_Version = 'v1.3.0'
|
|
2
2
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyamilySeq
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: PyamilySeq - A a tool to investigate sequence-based gene groups identified by clustering methods such as CD-HIT, DIAMOND, BLAST or MMseqs2.
|
|
5
5
|
Home-page: https://github.com/NickJD/PyamilySeq
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -46,7 +46,7 @@ To update to the newest version add '-U' to end of the pip install command.
|
|
|
46
46
|
```commandline
|
|
47
47
|
usage: PyamilySeq.py [-h] {Full,Partial} ...
|
|
48
48
|
|
|
49
|
-
PyamilySeq v1.
|
|
49
|
+
PyamilySeq v1.3.0: A tool for gene clustering and analysis.
|
|
50
50
|
|
|
51
51
|
positional arguments:
|
|
52
52
|
{Full,Partial} Choose a mode: 'Full' or 'Partial'.
|
|
@@ -76,7 +76,7 @@ Escherichia_coli_110957|ENSB_TIZS9kbTvShDvyX Escherichia_coli_110957|ENSB_TIZS9k
|
|
|
76
76
|
```
|
|
77
77
|
### Example output:
|
|
78
78
|
```
|
|
79
|
-
Running PyamilySeq v1.
|
|
79
|
+
Running PyamilySeq v1.3.0
|
|
80
80
|
Calculating Groups
|
|
81
81
|
Number of Genomes: 10
|
|
82
82
|
Gene Groups
|
|
@@ -221,7 +221,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split_gff .gff3 -output_dir
|
|
|
221
221
|
```
|
|
222
222
|
usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} [-name_split_gff NAME_SPLIT_GFF] [-name_split_fasta NAME_SPLIT_FASTA] -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
|
|
223
223
|
|
|
224
|
-
PyamilySeq v1.
|
|
224
|
+
PyamilySeq v1.3.0: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
|
|
225
225
|
|
|
226
226
|
options:
|
|
227
227
|
-h, --help show this help message and exit
|
|
@@ -264,7 +264,7 @@ usage: Group_Splitter.py [-h] -input_fasta INPUT_FASTA -sequence_type {AA,DNA}
|
|
|
264
264
|
[-M CLUSTERING_MEMORY] [-no_delete_temp_files]
|
|
265
265
|
[-verbose] [-v]
|
|
266
266
|
|
|
267
|
-
PyamilySeq v1.
|
|
267
|
+
PyamilySeq v1.3.0: Group-Splitter - A tool to split multi-copy gene groups
|
|
268
268
|
identified by PyamilySeq.
|
|
269
269
|
|
|
270
270
|
options:
|
|
@@ -317,7 +317,7 @@ Cluster-Summary -genome_num 10 -input_clstr .../test_data/species/E-coli/E-coli_
|
|
|
317
317
|
usage: Cluster_Summary.py [-h] -input_clstr INPUT_CLSTR -output OUTPUT -genome_num GENOME_NUM
|
|
318
318
|
[-output_dir OUTPUT_DIR] [-verbose] [-v]
|
|
319
319
|
|
|
320
|
-
PyamilySeq v1.
|
|
320
|
+
PyamilySeq v1.3.0: Cluster-Summary - A tool to summarise CD-HIT clustering files.
|
|
321
321
|
|
|
322
322
|
options:
|
|
323
323
|
-h, --help show this help message and exit
|
|
@@ -3,20 +3,20 @@ PyamilySeq/Cluster_Summary.py,sha256=efXMfGvATERCTxwaqbauhZwt_5Hrf9KpGKY3EgsHVDk
|
|
|
3
3
|
PyamilySeq/Group_Extractor.py,sha256=oe2VmOVxdvTmAcy8NKwD1F27IdN2utAfczxsyxg96yc,2898
|
|
4
4
|
PyamilySeq/Group_Sizes.py,sha256=3snkAN19o3Y4IY6IqSim1qy415FfQe1Wb8vzWTKF0Wo,3028
|
|
5
5
|
PyamilySeq/Group_Splitter.py,sha256=OcMj9GnAyybs_DaNKRyvfL_nl2dB2gUI4BD_EQrBbWo,25653
|
|
6
|
-
PyamilySeq/PyamilySeq.py,sha256=
|
|
6
|
+
PyamilySeq/PyamilySeq.py,sha256=vc_mFuK-Cmqa3elusyIg5DksTHxdwCcFv2ki2ACqXIA,17468
|
|
7
7
|
PyamilySeq/PyamilySeq_Genus.py,sha256=KUC0QkCRpKQ9HEgxyTSD7Nc63wSXtriWyIqt_YOy5ys,12470
|
|
8
|
-
PyamilySeq/PyamilySeq_Species.py,sha256=
|
|
8
|
+
PyamilySeq/PyamilySeq_Species.py,sha256=1P_xbdUrZPykyS44KFyRdyxU-XPX_yKLpJhjhB8VTpg,24029
|
|
9
9
|
PyamilySeq/Seq_Combiner.py,sha256=G49zthWtsTfqYX1tqc7op9a9cSia1IL0VTiAtwgdTwc,4746
|
|
10
10
|
PyamilySeq/Seq_Extractor.py,sha256=IQk4Qn6LJkPXD1O4TQesneS3_ZN8hBsTVZQGlZ1c-Dk,3072
|
|
11
11
|
PyamilySeq/Seq_Finder.py,sha256=ht-fSQ_opWKydcoWI9D3nTwLt6Rpgevnf2y0KxVjw4M,1881
|
|
12
12
|
PyamilySeq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
PyamilySeq/clusterings.py,sha256=
|
|
13
|
+
PyamilySeq/clusterings.py,sha256=bfST7_i6qnj9ogclmOSkQhurZaEWBBq0H48FAN7JpOg,22374
|
|
14
14
|
PyamilySeq/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
PyamilySeq/constants.py,sha256=
|
|
15
|
+
PyamilySeq/constants.py,sha256=cykSvqrDAsDJCo9FJJ7lRvoOho2H7xCGE923X9iOE4U,31
|
|
16
16
|
PyamilySeq/utils.py,sha256=1U794Xd5qzmaIz2VujdnPkND729kr7rKjei0Y57f-QE,32972
|
|
17
|
-
pyamilyseq-1.
|
|
18
|
-
pyamilyseq-1.
|
|
19
|
-
pyamilyseq-1.
|
|
20
|
-
pyamilyseq-1.
|
|
21
|
-
pyamilyseq-1.
|
|
22
|
-
pyamilyseq-1.
|
|
17
|
+
pyamilyseq-1.3.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
18
|
+
pyamilyseq-1.3.0.dist-info/METADATA,sha256=vl5fx_QOahTxZfwDXVzJPgMWIIiLGDnr2VmP-rxWIWo,17979
|
|
19
|
+
pyamilyseq-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
pyamilyseq-1.3.0.dist-info/entry_points.txt,sha256=5RkUWIneXu-kCnClJhv0u27lWHAoyoVmospZMU5Cs2U,846
|
|
21
|
+
pyamilyseq-1.3.0.dist-info/top_level.txt,sha256=J6JhugUQTq4rq96yibAlQu3o4KCM9WuYfqr3w1r119M,11
|
|
22
|
+
pyamilyseq-1.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|