PyamilySeq 1.1.1__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/PKG-INFO +8 -7
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/README.md +5 -5
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/setup.cfg +7 -1
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/PyamilySeq.py +13 -11
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/PyamilySeq_Genus.py +2 -1
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/PyamilySeq_Species.py +3 -2
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Seq_Combiner.py +2 -2
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Seq_Extractor.py +6 -1
- pyamilyseq-1.2.0/src/PyamilySeq/config.py +0 -0
- pyamilyseq-1.2.0/src/PyamilySeq/constants.py +2 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/utils.py +1 -1
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq.egg-info/PKG-INFO +8 -7
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq.egg-info/SOURCES.txt +1 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq.egg-info/entry_points.txt +5 -0
- pyamilyseq-1.1.1/src/PyamilySeq/constants.py +0 -2
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/LICENSE +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/pyproject.toml +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Cluster_Compare.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Cluster_Summary.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Group_Extractor.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Group_Sizes.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Group_Splitter.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/Seq_Finder.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/__init__.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq/clusterings.py +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq.egg-info/dependency_links.txt +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq.egg-info/requires.txt +0 -0
- {pyamilyseq-1.1.1 → pyamilyseq-1.2.0}/src/PyamilySeq.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: PyamilySeq
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: PyamilySeq - A a tool to investigate sequence-based gene groups identified by clustering methods such as CD-HIT, DIAMOND, BLAST or MMseqs2.
|
|
5
5
|
Home-page: https://github.com/NickJD/PyamilySeq
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -13,6 +13,7 @@ Requires-Python: >=3.6
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: levenshtein
|
|
16
|
+
Dynamic: license-file
|
|
16
17
|
|
|
17
18
|
# PyamilySeq
|
|
18
19
|
**PyamilySeq** is a Python tool for clustering gene sequences into groups based on sequence similarity identified by tools such as CD-HIT, BLAST, DIAMOND or MMseqs2.
|
|
@@ -45,7 +46,7 @@ To update to the newest version add '-U' to end of the pip install command.
|
|
|
45
46
|
```commandline
|
|
46
47
|
usage: PyamilySeq.py [-h] {Full,Partial} ...
|
|
47
48
|
|
|
48
|
-
PyamilySeq v1.
|
|
49
|
+
PyamilySeq v1.2.0: A tool for gene clustering and analysis.
|
|
49
50
|
|
|
50
51
|
positional arguments:
|
|
51
52
|
{Full,Partial} Choose a mode: 'Full' or 'Partial'.
|
|
@@ -75,7 +76,7 @@ Escherichia_coli_110957|ENSB_TIZS9kbTvShDvyX Escherichia_coli_110957|ENSB_TIZS9k
|
|
|
75
76
|
```
|
|
76
77
|
### Example output:
|
|
77
78
|
```
|
|
78
|
-
Running PyamilySeq v1.
|
|
79
|
+
Running PyamilySeq v1.2.0
|
|
79
80
|
Calculating Groups
|
|
80
81
|
Number of Genomes: 10
|
|
81
82
|
Gene Groups
|
|
@@ -220,7 +221,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split_gff .gff3 -output_dir
|
|
|
220
221
|
```
|
|
221
222
|
usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} [-name_split_gff NAME_SPLIT_GFF] [-name_split_fasta NAME_SPLIT_FASTA] -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
|
|
222
223
|
|
|
223
|
-
PyamilySeq v1.
|
|
224
|
+
PyamilySeq v1.2.0: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
|
|
224
225
|
|
|
225
226
|
options:
|
|
226
227
|
-h, --help show this help message and exit
|
|
@@ -263,7 +264,7 @@ usage: Group_Splitter.py [-h] -input_fasta INPUT_FASTA -sequence_type {AA,DNA}
|
|
|
263
264
|
[-M CLUSTERING_MEMORY] [-no_delete_temp_files]
|
|
264
265
|
[-verbose] [-v]
|
|
265
266
|
|
|
266
|
-
PyamilySeq v1.
|
|
267
|
+
PyamilySeq v1.2.0: Group-Splitter - A tool to split multi-copy gene groups
|
|
267
268
|
identified by PyamilySeq.
|
|
268
269
|
|
|
269
270
|
options:
|
|
@@ -316,7 +317,7 @@ Cluster-Summary -genome_num 10 -input_clstr .../test_data/species/E-coli/E-coli_
|
|
|
316
317
|
usage: Cluster_Summary.py [-h] -input_clstr INPUT_CLSTR -output OUTPUT -genome_num GENOME_NUM
|
|
317
318
|
[-output_dir OUTPUT_DIR] [-verbose] [-v]
|
|
318
319
|
|
|
319
|
-
PyamilySeq v1.
|
|
320
|
+
PyamilySeq v1.2.0: Cluster-Summary - A tool to summarise CD-HIT clustering files.
|
|
320
321
|
|
|
321
322
|
options:
|
|
322
323
|
-h, --help show this help message and exit
|
|
@@ -29,7 +29,7 @@ To update to the newest version add '-U' to end of the pip install command.
|
|
|
29
29
|
```commandline
|
|
30
30
|
usage: PyamilySeq.py [-h] {Full,Partial} ...
|
|
31
31
|
|
|
32
|
-
PyamilySeq v1.
|
|
32
|
+
PyamilySeq v1.2.0: A tool for gene clustering and analysis.
|
|
33
33
|
|
|
34
34
|
positional arguments:
|
|
35
35
|
{Full,Partial} Choose a mode: 'Full' or 'Partial'.
|
|
@@ -59,7 +59,7 @@ Escherichia_coli_110957|ENSB_TIZS9kbTvShDvyX Escherichia_coli_110957|ENSB_TIZS9k
|
|
|
59
59
|
```
|
|
60
60
|
### Example output:
|
|
61
61
|
```
|
|
62
|
-
Running PyamilySeq v1.
|
|
62
|
+
Running PyamilySeq v1.2.0
|
|
63
63
|
Calculating Groups
|
|
64
64
|
Number of Genomes: 10
|
|
65
65
|
Gene Groups
|
|
@@ -204,7 +204,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split_gff .gff3 -output_dir
|
|
|
204
204
|
```
|
|
205
205
|
usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} [-name_split_gff NAME_SPLIT_GFF] [-name_split_fasta NAME_SPLIT_FASTA] -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
|
|
206
206
|
|
|
207
|
-
PyamilySeq v1.
|
|
207
|
+
PyamilySeq v1.2.0: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
|
|
208
208
|
|
|
209
209
|
options:
|
|
210
210
|
-h, --help show this help message and exit
|
|
@@ -247,7 +247,7 @@ usage: Group_Splitter.py [-h] -input_fasta INPUT_FASTA -sequence_type {AA,DNA}
|
|
|
247
247
|
[-M CLUSTERING_MEMORY] [-no_delete_temp_files]
|
|
248
248
|
[-verbose] [-v]
|
|
249
249
|
|
|
250
|
-
PyamilySeq v1.
|
|
250
|
+
PyamilySeq v1.2.0: Group-Splitter - A tool to split multi-copy gene groups
|
|
251
251
|
identified by PyamilySeq.
|
|
252
252
|
|
|
253
253
|
options:
|
|
@@ -300,7 +300,7 @@ Cluster-Summary -genome_num 10 -input_clstr .../test_data/species/E-coli/E-coli_
|
|
|
300
300
|
usage: Cluster_Summary.py [-h] -input_clstr INPUT_CLSTR -output OUTPUT -genome_num GENOME_NUM
|
|
301
301
|
[-output_dir OUTPUT_DIR] [-verbose] [-v]
|
|
302
302
|
|
|
303
|
-
PyamilySeq v1.
|
|
303
|
+
PyamilySeq v1.2.0: Cluster-Summary - A tool to summarise CD-HIT clustering files.
|
|
304
304
|
|
|
305
305
|
options:
|
|
306
306
|
-h, --help show this help message and exit
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = PyamilySeq
|
|
3
|
-
version = v1.
|
|
3
|
+
version = v1.2.0
|
|
4
4
|
license_files = LICENSE
|
|
5
5
|
author = Nicholas Dimonaco
|
|
6
6
|
author_email = nicholas@dimonaco.co.uk
|
|
@@ -37,10 +37,16 @@ console_scripts =
|
|
|
37
37
|
group-splitter = PyamilySeq.Group_Splitter:main
|
|
38
38
|
Cluster-Summary = PyamilySeq.Cluster_Summary:main
|
|
39
39
|
cluster-summary = PyamilySeq.Cluster_Summary:main
|
|
40
|
+
Cluster-Extractor = PyamilySeq.Cluster_Extractor:main
|
|
41
|
+
cluster-extractor = PyamilySeq.Cluster_Extractor:main
|
|
40
42
|
Seq-Finder = PyamilySeq.Seq_Finder:main
|
|
41
43
|
seq-finder = PyamilySeq.Seq_Finder:main
|
|
42
44
|
Seq-Extractor = PyamilySeq.Seq_Extractor:main
|
|
43
45
|
seq-extractor = PyamilySeq.Seq_Extractor:main
|
|
46
|
+
|
|
47
|
+
compute-singletrees-rf = aux_tools.RF.Compute_SingleTree_RFs:main
|
|
48
|
+
compare-rf = aux_tools.RF.compare_RF:main
|
|
49
|
+
compare-contree-singletrees = aux_tools.RF.compare_contree_singletrees:main
|
|
44
50
|
|
|
45
51
|
[egg_info]
|
|
46
52
|
tag_build =
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
|
|
2
|
+
#from config import config_params
|
|
3
3
|
|
|
4
4
|
try:
|
|
5
5
|
from .PyamilySeq_Species import cluster as species_cluster
|
|
@@ -67,7 +67,7 @@ def main():
|
|
|
67
67
|
full_parser.add_argument("-s", type=str, dest="len_diff", default="0.80", required=False,
|
|
68
68
|
help="Length difference threshold for clustering (default: 0.80) - CD-HIT parameter '-s'.")
|
|
69
69
|
|
|
70
|
-
full_parser.add_argument("-fast_mode", action="store_true",
|
|
70
|
+
full_parser.add_argument("-fast_mode", action="store_true",
|
|
71
71
|
help="Enable fast mode for CD-HIT (not recommended) - CD-HIT parameter '-g'.")
|
|
72
72
|
|
|
73
73
|
|
|
@@ -95,14 +95,14 @@ def main():
|
|
|
95
95
|
subparser.add_argument("-genus_groups", default="1,2,3,4,5,6,7,8,9,10", required=False,
|
|
96
96
|
help="Gene groupings for 'Genus' mode (default: '1-10').")
|
|
97
97
|
subparser.add_argument("-write_groups", default=None, dest="write_groups", required=False,
|
|
98
|
-
help="Output gene groups as a single FASTA file (
|
|
99
|
-
subparser.add_argument("-write_individual_groups", action="store_true", dest="write_individual_groups",
|
|
98
|
+
help="Output gene groups as a single FASTA file (e.g., '99,95'). Triggers writing individual groups.")
|
|
99
|
+
subparser.add_argument("-write_individual_groups", action="store_true", dest="write_individual_groups",
|
|
100
100
|
help="Output individual FASTA files for each group.")
|
|
101
|
-
subparser.add_argument("-align", action="store_true", dest="align_core",
|
|
102
|
-
help="Align and concatenate sequences for 'core' groups (those in 99-100
|
|
103
|
-
subparser.add_argument("-align_aa", action="store_true",
|
|
101
|
+
subparser.add_argument("-align", action="store_true", dest="align_core",
|
|
102
|
+
help="Align and concatenate sequences for 'core' groups (those in 99-100%% of genomes).")
|
|
103
|
+
subparser.add_argument("-align_aa", action="store_true",
|
|
104
104
|
help="Align sequences as amino acids.")
|
|
105
|
-
subparser.add_argument("-no_gpa", action="store_false", dest="gene_presence_absence_out",
|
|
105
|
+
subparser.add_argument("-no_gpa", action="store_false", dest="gene_presence_absence_out",
|
|
106
106
|
help="Skip creation of gene_presence_absence.csv.")
|
|
107
107
|
subparser.add_argument("-M", type=int, default=4000, dest="mem", required=False,
|
|
108
108
|
help="Memory allocation for clustering (MB) - CD-HIT parameter '-M'.")
|
|
@@ -110,13 +110,15 @@ def main():
|
|
|
110
110
|
help="Number of threads for clustering/alignment - CD-HIT parameter '-T' | MAFFT parameter '--thread'.")
|
|
111
111
|
|
|
112
112
|
# Miscellaneous Arguments
|
|
113
|
-
subparser.add_argument("-verbose", action="store_true",
|
|
113
|
+
subparser.add_argument("-verbose", action="store_true",
|
|
114
114
|
help="Print verbose output.")
|
|
115
115
|
subparser.add_argument("-v", "--version", action="version",
|
|
116
|
-
version=f"PyamilySeq {PyamilySeq_Version}: Exiting."
|
|
116
|
+
version=f"PyamilySeq {PyamilySeq_Version}: Exiting.")
|
|
117
117
|
|
|
118
118
|
# Parse Arguments
|
|
119
119
|
options = parser.parse_args()
|
|
120
|
+
## Configuration
|
|
121
|
+
|
|
120
122
|
|
|
121
123
|
if options.write_groups != None and options.write_individual_groups == False:
|
|
122
124
|
options.write_individual_groups = True
|
|
@@ -147,7 +149,7 @@ def main():
|
|
|
147
149
|
if options.align_core:
|
|
148
150
|
options.write_individual_groups = True
|
|
149
151
|
if options.write_groups == None:
|
|
150
|
-
sys.exit('Must provide "-
|
|
152
|
+
sys.exit('Must provide "-write_groups" to output gene groups before alignment "-align" can be done.')
|
|
151
153
|
elif options.run_mode == 'Partial':
|
|
152
154
|
required_partial_mode = [options.cluster_file, options.original_fasta]
|
|
153
155
|
if all(required_partial_mode):
|
|
@@ -17,7 +17,8 @@ def gene_presence_absence_output(options, genus_dict, pangenome_clusters_First_s
|
|
|
17
17
|
#in_name = options.clusters.split('.')[0].split('/')[-1]
|
|
18
18
|
gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
|
|
19
19
|
gpa_outfile = open(gpa_outfile, 'w')
|
|
20
|
-
|
|
20
|
+
genus_dict = OrderedDict(sorted(genus_dict.items()))
|
|
21
|
+
gpa_outfile.write('"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
|
|
21
22
|
'"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
|
|
22
23
|
gpa_outfile.write('","'.join(genus_dict.keys()))
|
|
23
24
|
gpa_outfile.write('"\n')
|
|
@@ -15,14 +15,15 @@ def gene_presence_absence_output(options, genome_dict, pangenome_clusters_First_
|
|
|
15
15
|
#in_name = options.clusters.split('.')[0].split('/')[-1]
|
|
16
16
|
gpa_outfile = os.path.join(output_dir, 'gene_presence_absence.csv')
|
|
17
17
|
gpa_outfile = open(gpa_outfile, 'w')
|
|
18
|
-
|
|
18
|
+
genome_dict = OrderedDict(sorted(genome_dict.items()))
|
|
19
|
+
gpa_outfile.write('"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment",'
|
|
19
20
|
'"Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","')
|
|
20
21
|
gpa_outfile.write('","'.join(genome_dict.keys()))
|
|
21
22
|
gpa_outfile.write('"\n')
|
|
22
23
|
for cluster, sequences in pangenome_clusters_First_sequences_sorted.items():
|
|
23
24
|
average_sequences_per_genome = len(sequences) / len(pangenome_clusters_First_sorted[cluster])
|
|
24
25
|
gpa_outfile.write('"group_'+str(cluster)+'","","","'+str(len(pangenome_clusters_First_sorted[cluster]))+'","'+str(len(sequences))+'","'+str(average_sequences_per_genome)+
|
|
25
|
-
'","","","","","","","",""
|
|
26
|
+
'","","","","","","","",""')
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
for genome in genome_dict.keys():
|
|
@@ -59,7 +59,7 @@ def main():
|
|
|
59
59
|
exit(1)
|
|
60
60
|
if options.input_type == 'fasta' and options.name_split_fasta is None:
|
|
61
61
|
print("Please provide a substring to split the filename and extract the genome name.")
|
|
62
|
-
exit
|
|
62
|
+
exit(1)
|
|
63
63
|
|
|
64
64
|
output_path = os.path.abspath(options.output_dir)
|
|
65
65
|
if not os.path.exists(output_path):
|
|
@@ -77,7 +77,7 @@ def main():
|
|
|
77
77
|
elif options.input_type == 'combined':
|
|
78
78
|
read_combined_files(options.input_dir, options.name_split_gff, options.gene_ident, combined_out_file, options.translate, True)
|
|
79
79
|
elif options.input_type == 'fasta':
|
|
80
|
-
read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate)
|
|
80
|
+
read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate, True)
|
|
81
81
|
|
|
82
82
|
if __name__ == "__main__":
|
|
83
83
|
main()
|
|
@@ -9,8 +9,13 @@ def find_gene_ids_in_csv(csv_file, group_name):
|
|
|
9
9
|
cells = line.strip().split(',')
|
|
10
10
|
if cells[0].replace('"','') == group_name:
|
|
11
11
|
# Collect gene IDs from column 14 onward
|
|
12
|
+
# for cell in cells[14:]:
|
|
13
|
+
# gene_ids.extend(cell.strip().replace('"','').split()) # Splitting by spaces if there are multiple IDs in a cell break
|
|
12
14
|
for cell in cells[14:]:
|
|
13
|
-
|
|
15
|
+
for gene in cell.strip().replace('"', '').split(';'):
|
|
16
|
+
if gene:
|
|
17
|
+
gene_ids.append(gene)
|
|
18
|
+
|
|
14
19
|
return gene_ids
|
|
15
20
|
|
|
16
21
|
def extract_sequences(fasta_file, gene_ids):
|
|
File without changes
|
|
@@ -14,7 +14,7 @@ levenshtein_distance_cal = None
|
|
|
14
14
|
# Check for Levenshtein library once
|
|
15
15
|
try:
|
|
16
16
|
import Levenshtein as LV
|
|
17
|
-
# Assign the
|
|
17
|
+
# Assign the optimised function
|
|
18
18
|
def levenshtein_distance_calc(seq1, seq2):
|
|
19
19
|
return LV.distance(seq1, seq2)
|
|
20
20
|
except (ModuleNotFoundError, ImportError):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: PyamilySeq
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: PyamilySeq - A a tool to investigate sequence-based gene groups identified by clustering methods such as CD-HIT, DIAMOND, BLAST or MMseqs2.
|
|
5
5
|
Home-page: https://github.com/NickJD/PyamilySeq
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -13,6 +13,7 @@ Requires-Python: >=3.6
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: levenshtein
|
|
16
|
+
Dynamic: license-file
|
|
16
17
|
|
|
17
18
|
# PyamilySeq
|
|
18
19
|
**PyamilySeq** is a Python tool for clustering gene sequences into groups based on sequence similarity identified by tools such as CD-HIT, BLAST, DIAMOND or MMseqs2.
|
|
@@ -45,7 +46,7 @@ To update to the newest version add '-U' to end of the pip install command.
|
|
|
45
46
|
```commandline
|
|
46
47
|
usage: PyamilySeq.py [-h] {Full,Partial} ...
|
|
47
48
|
|
|
48
|
-
PyamilySeq v1.
|
|
49
|
+
PyamilySeq v1.2.0: A tool for gene clustering and analysis.
|
|
49
50
|
|
|
50
51
|
positional arguments:
|
|
51
52
|
{Full,Partial} Choose a mode: 'Full' or 'Partial'.
|
|
@@ -75,7 +76,7 @@ Escherichia_coli_110957|ENSB_TIZS9kbTvShDvyX Escherichia_coli_110957|ENSB_TIZS9k
|
|
|
75
76
|
```
|
|
76
77
|
### Example output:
|
|
77
78
|
```
|
|
78
|
-
Running PyamilySeq v1.
|
|
79
|
+
Running PyamilySeq v1.2.0
|
|
79
80
|
Calculating Groups
|
|
80
81
|
Number of Genomes: 10
|
|
81
82
|
Gene Groups
|
|
@@ -220,7 +221,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split_gff .gff3 -output_dir
|
|
|
220
221
|
```
|
|
221
222
|
usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} [-name_split_gff NAME_SPLIT_GFF] [-name_split_fasta NAME_SPLIT_FASTA] -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
|
|
222
223
|
|
|
223
|
-
PyamilySeq v1.
|
|
224
|
+
PyamilySeq v1.2.0: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
|
|
224
225
|
|
|
225
226
|
options:
|
|
226
227
|
-h, --help show this help message and exit
|
|
@@ -263,7 +264,7 @@ usage: Group_Splitter.py [-h] -input_fasta INPUT_FASTA -sequence_type {AA,DNA}
|
|
|
263
264
|
[-M CLUSTERING_MEMORY] [-no_delete_temp_files]
|
|
264
265
|
[-verbose] [-v]
|
|
265
266
|
|
|
266
|
-
PyamilySeq v1.
|
|
267
|
+
PyamilySeq v1.2.0: Group-Splitter - A tool to split multi-copy gene groups
|
|
267
268
|
identified by PyamilySeq.
|
|
268
269
|
|
|
269
270
|
options:
|
|
@@ -316,7 +317,7 @@ Cluster-Summary -genome_num 10 -input_clstr .../test_data/species/E-coli/E-coli_
|
|
|
316
317
|
usage: Cluster_Summary.py [-h] -input_clstr INPUT_CLSTR -output OUTPUT -genome_num GENOME_NUM
|
|
317
318
|
[-output_dir OUTPUT_DIR] [-verbose] [-v]
|
|
318
319
|
|
|
319
|
-
PyamilySeq v1.
|
|
320
|
+
PyamilySeq v1.2.0: Cluster-Summary - A tool to summarise CD-HIT clustering files.
|
|
320
321
|
|
|
321
322
|
options:
|
|
322
323
|
-h, --help show this help message and exit
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
|
+
Cluster-Extractor = PyamilySeq.Cluster_Extractor:main
|
|
2
3
|
Cluster-Summary = PyamilySeq.Cluster_Summary:main
|
|
3
4
|
Group-Splitter = PyamilySeq.Group_Splitter:main
|
|
4
5
|
PyamilySeq = PyamilySeq.PyamilySeq:main
|
|
5
6
|
Seq-Combiner = PyamilySeq.Seq_Combiner:main
|
|
6
7
|
Seq-Extractor = PyamilySeq.Seq_Extractor:main
|
|
7
8
|
Seq-Finder = PyamilySeq.Seq_Finder:main
|
|
9
|
+
cluster-extractor = PyamilySeq.Cluster_Extractor:main
|
|
8
10
|
cluster-summary = PyamilySeq.Cluster_Summary:main
|
|
11
|
+
compare-contree-singletrees = aux_tools.RF.compare_contree_singletrees:main
|
|
12
|
+
compare-rf = aux_tools.RF.compare_RF:main
|
|
13
|
+
compute-singletrees-rf = aux_tools.RF.Compute_SingleTree_RFs:main
|
|
9
14
|
group-splitter = PyamilySeq.Group_Splitter:main
|
|
10
15
|
pyamilyseq = PyamilySeq.PyamilySeq:main
|
|
11
16
|
seq-combiner = PyamilySeq.Seq_Combiner:main
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|