PyamilySeq 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyamilySeq/Constants.py +1 -1
- PyamilySeq/PyamilySeq.py +13 -2
- {PyamilySeq-0.7.0.dist-info → PyamilySeq-0.7.1.dist-info}/METADATA +32 -33
- {PyamilySeq-0.7.0.dist-info → PyamilySeq-0.7.1.dist-info}/RECORD +8 -8
- {PyamilySeq-0.7.0.dist-info → PyamilySeq-0.7.1.dist-info}/WHEEL +1 -1
- {PyamilySeq-0.7.0.dist-info → PyamilySeq-0.7.1.dist-info}/LICENSE +0 -0
- {PyamilySeq-0.7.0.dist-info → PyamilySeq-0.7.1.dist-info}/entry_points.txt +0 -0
- {PyamilySeq-0.7.0.dist-info → PyamilySeq-0.7.1.dist-info}/top_level.txt +0 -0
PyamilySeq/Constants.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
PyamilySeq_Version = 'v0.7.
|
|
1
|
+
PyamilySeq_Version = 'v0.7.1'
|
|
2
2
|
|
PyamilySeq/PyamilySeq.py
CHANGED
|
@@ -41,6 +41,7 @@ def run_cd_hit(options, input_file, clustering_output, clustering_mode):
|
|
|
41
41
|
|
|
42
42
|
def main():
|
|
43
43
|
parser = argparse.ArgumentParser(description='PyamilySeq ' + PyamilySeq_Version + ': A tool that groups genes into unique clusters.')
|
|
44
|
+
vparser = argparse.ArgumentParser()
|
|
44
45
|
### Required Arguments
|
|
45
46
|
required = parser.add_argument_group('Required Arguments')
|
|
46
47
|
required.add_argument('-run_mode', action='store', dest='run_mode', choices=['Full','Partial'],
|
|
@@ -132,10 +133,20 @@ def main():
|
|
|
132
133
|
misc = parser.add_argument_group('Misc')
|
|
133
134
|
misc.add_argument('-verbose', action='store_true', dest='verbose', default=None, help='Default - False: Print out runtime messages',
|
|
134
135
|
required = False)
|
|
135
|
-
|
|
136
|
+
|
|
137
|
+
### Version Arguments
|
|
138
|
+
version = vparser.add_argument_group('Version')
|
|
139
|
+
version.add_argument('-v', action='store_true', dest='version',
|
|
136
140
|
help='Default - False: Print out version number and exit',
|
|
137
141
|
required=False)
|
|
138
142
|
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
args, unknown = vparser.parse_known_args()
|
|
146
|
+
|
|
147
|
+
if args.version == True:
|
|
148
|
+
sys.exit("PyamilySeq version: "+PyamilySeq_Version)
|
|
149
|
+
|
|
139
150
|
options = parser.parse_args()
|
|
140
151
|
|
|
141
152
|
### Checking all required parameters are provided by user #!!# Doesn't seem to work
|
|
@@ -288,5 +299,5 @@ def main():
|
|
|
288
299
|
"Please report any issues to: https://github.com/NickJD/PyamilySeq/issues\n#####")
|
|
289
300
|
|
|
290
301
|
if __name__ == "__main__":
|
|
291
|
-
print("Running PyamilySeq "+PyamilySeq_Version)
|
|
302
|
+
#print("Running PyamilySeq "+PyamilySeq_Version)
|
|
292
303
|
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PyamilySeq
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: PyamilySeq - A a tool to look for sequence-based gene groups identified by clustering methods such as CD-HIT, DIAMOND, BLAST or MMseqs2.
|
|
5
5
|
Home-page: https://github.com/NickJD/PyamilySeq
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -58,7 +58,7 @@ Escherichia_coli_110957|ENSB:TIZS9kbTvShDvyX Escherichia_coli_110957|ENSB:TIZS9k
|
|
|
58
58
|
```
|
|
59
59
|
### Example output:
|
|
60
60
|
```
|
|
61
|
-
Running PyamilySeq v0.7.
|
|
61
|
+
Running PyamilySeq v0.7.1
|
|
62
62
|
Calculating Groups
|
|
63
63
|
Gene Groups:
|
|
64
64
|
First_core_99: 2682
|
|
@@ -80,7 +80,7 @@ PyamilySeq -run_mode Partial -group_mode Genus -clustering_format CD-HIT -output
|
|
|
80
80
|
-cluster_file .../test_data/genus/CD-HIT/combined_cds_cd-hit_80_60.clstr -gpa
|
|
81
81
|
```
|
|
82
82
|
```commandline
|
|
83
|
-
Running PyamilySeq v0.7.
|
|
83
|
+
Running PyamilySeq v0.7.1
|
|
84
84
|
Calculating Groups
|
|
85
85
|
Genus Groups:
|
|
86
86
|
First_genera_1: 28549
|
|
@@ -98,37 +98,36 @@ Please report any issues to: https://github.com/NickJD/PyamilySeq/issues
|
|
|
98
98
|
### Reclustering can be used to see where additional sequences/genes lay in relation to a contemporary pangenome/gene grouping.
|
|
99
99
|
```
|
|
100
100
|
PyamilySeq -run_mode Partial -group_mode Species -clustering_format CD-HIT -output_dir .../test_data/species/CD-HIT/testing
|
|
101
|
-
-cluster_file .../test_data/species/CD-HIT/E-coli_extracted_cds_cd-
|
|
102
|
-
-reclustered .../test_data/species/CD-HIT/E-coli_extracted_cds_cd-
|
|
101
|
+
-cluster_file .../test_data/species/CD-HIT/E-coli_extracted_cds_cd-hit_80_60.clstr -gpa
|
|
102
|
+
-reclustered .../test_data/species/CD-HIT/E-coli_extracted_cds_cd-hit_80_60_And_StORFs_cds_80_60.clstr
|
|
103
103
|
```
|
|
104
104
|
#### As can be seen below, the additional sequences recovered by the StORF-Reporter annotation tool have 'extended' contemporary or created entirely new gene groups. 'First' corresponds to the groups identified from the first clustering round and 'Second' for the second. In 'reclustering' mode, First_core_# groups are unaffected thus retaining the initial grouping information.
|
|
105
105
|
```commandline
|
|
106
|
-
Running PyamilySeq v0.7.0
|
|
107
106
|
Calculating Groups
|
|
108
107
|
Gene Groups:
|
|
109
|
-
First_core_99:
|
|
110
|
-
First_core_95:
|
|
111
|
-
First_core_15:
|
|
112
|
-
First_core_0:
|
|
113
|
-
extended_core_99:
|
|
114
|
-
extended_core_95:
|
|
115
|
-
extended_core_15:
|
|
116
|
-
extended_core_0:
|
|
117
|
-
combined_core_99:
|
|
118
|
-
combined_core_95:
|
|
119
|
-
combined_core_15:
|
|
120
|
-
combined_core_0:
|
|
108
|
+
First_core_99: 587
|
|
109
|
+
First_core_95: 1529
|
|
110
|
+
First_core_15: 3708
|
|
111
|
+
First_core_0: 29992
|
|
112
|
+
extended_core_99: 29
|
|
113
|
+
extended_core_95: 67
|
|
114
|
+
extended_core_15: 431
|
|
115
|
+
extended_core_0: 1331
|
|
116
|
+
combined_core_99: 2
|
|
117
|
+
combined_core_95: 4
|
|
118
|
+
combined_core_15: 5
|
|
119
|
+
combined_core_0: 4
|
|
121
120
|
Second_core_99: 0
|
|
122
|
-
Second_core_95:
|
|
123
|
-
Second_core_15:
|
|
124
|
-
Second_core_0:
|
|
125
|
-
only_Second_core_99:
|
|
126
|
-
only_Second_core_95:
|
|
127
|
-
only_Second_core_15:
|
|
128
|
-
only_Second_core_0:
|
|
129
|
-
Total Number of First Gene Groups (Including Singletons):
|
|
130
|
-
Total Number of Second Gene Groups (Including Singletons):
|
|
131
|
-
Total Number of First Gene Groups That Had Additional Second Sequences But Not New Genomes:
|
|
121
|
+
Second_core_95: 6
|
|
122
|
+
Second_core_15: 172
|
|
123
|
+
Second_core_0: 1825
|
|
124
|
+
only_Second_core_99: 53
|
|
125
|
+
only_Second_core_95: 493
|
|
126
|
+
only_Second_core_15: 3806
|
|
127
|
+
only_Second_core_0: 27569
|
|
128
|
+
Total Number of First Gene Groups (Including Singletons): 35816
|
|
129
|
+
Total Number of Second Gene Groups (Including Singletons): 67728
|
|
130
|
+
Total Number of First Gene Groups That Had Additional Second Sequences But Not New Genomes: 136
|
|
132
131
|
Outputting gene_presence_absence file
|
|
133
132
|
Thank you for using PyamilySeq -- A detailed user manual can be found at https://github.com/NickJD/PyamilySeq
|
|
134
133
|
Please report any issues to: https://github.com/NickJD/PyamilySeq/issues
|
|
@@ -138,14 +137,14 @@ Please report any issues to: https://github.com/NickJD/PyamilySeq/issues
|
|
|
138
137
|
## PyamilySeq - Menu:
|
|
139
138
|
### PyamilySeq is separated into two main 'run modes', Full and Partial. They each have their own set of required and optional arguments.
|
|
140
139
|
```
|
|
141
|
-
Running PyamilySeq v0.7.
|
|
140
|
+
Running PyamilySeq v0.7.1
|
|
142
141
|
usage: PyamilySeq.py [-h] -run_mode {Full,Partial} -group_mode {Species,Genus} -clustering_format {CD-HIT,TSV,CSV} -output_dir OUTPUT_DIR
|
|
143
142
|
[-input_type {separate,combined}] [-input_dir INPUT_DIR] [-name_split NAME_SPLIT] [-sequence_type {AA,DNA}] [-gene_ident GENE_IDENT]
|
|
144
143
|
[-pid PIDENT] [-len_diff LEN_DIFF] [-mem CLUSTERING_MEMORY] [-t CLUSTERING_THREADS] [-cluster_file CLUSTER_FILE]
|
|
145
144
|
[-reclustered RECLUSTERED] [-seq_tag SEQUENCE_TAG] [-core_groups CORE_GROUPS] [-genus_groups GENUS_GROUPS] [-w WRITE_GROUPS] [-a]
|
|
146
145
|
[-original_fasta ORIGINAL_FASTA] [-gpa] [-verbose] [-v]
|
|
147
146
|
|
|
148
|
-
PyamilySeq v0.7.
|
|
147
|
+
PyamilySeq v0.7.1: A tool that groups genes into unique clusters.
|
|
149
148
|
|
|
150
149
|
options:
|
|
151
150
|
-h, --help show this help message and exit
|
|
@@ -198,9 +197,9 @@ Output Parameters:
|
|
|
198
197
|
-w WRITE_GROUPS Default - No output: Output sequences of identified groups (provide levels at which to output - Species "-w 99,95" Genus "-w 2,3" -
|
|
199
198
|
Must provide FASTA file with -original_fasta if in Partial run mode.
|
|
200
199
|
-a Default - No output: SLOW! (Only works for Species mode) Output aligned and concatinated sequences of identified groups -provide
|
|
201
|
-
group levels at which to output "-w 99,95" - Must provide FASTA file with -original_fasta in
|
|
200
|
+
group levels at which to output "-w 99,95" - Must provide FASTA file with -original_fasta in Partial run mode.
|
|
202
201
|
-original_fasta ORIGINAL_FASTA
|
|
203
|
-
FASTA file to use in conjunction with "-w" or "-
|
|
202
|
+
FASTA file to use in conjunction with "-w" or "-a" when running in Partial Mode.
|
|
204
203
|
-gpa Default - False: If selected, a Roary/Panaroo formatted gene_presence_absence.csv will be created - Required for Coinfinder and
|
|
205
204
|
other downstream tools
|
|
206
205
|
|
|
@@ -222,7 +221,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split _combined.gff3 -output
|
|
|
222
221
|
```
|
|
223
222
|
usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} -name_split NAME_SPLIT -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
|
|
224
223
|
|
|
225
|
-
Seq-Combiner v0.7.
|
|
224
|
+
Seq-Combiner v0.7.1: A tool to extract sequences from GFF/FASTA files.
|
|
226
225
|
|
|
227
226
|
options:
|
|
228
227
|
-h, --help show this help message and exit
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
PyamilySeq/Constants.py,sha256=
|
|
2
|
-
PyamilySeq/PyamilySeq.py,sha256=
|
|
1
|
+
PyamilySeq/Constants.py,sha256=4MNcQLwJguoC9fHBLbreAe-GNgNvtzYrF0MBM6BFY_s,31
|
|
2
|
+
PyamilySeq/PyamilySeq.py,sha256=RbM6G1yU64jlb9r7QRry1vw5mQsxndM6TrvMvq3BVik,15466
|
|
3
3
|
PyamilySeq/PyamilySeq_Genus.py,sha256=ZjD61mTW7NgmsfGfFVEXeIZoSCha9PaLtMPnqdTtacU,12413
|
|
4
4
|
PyamilySeq/PyamilySeq_Species.py,sha256=WL6pu8hlGpnemcpu1tLFmlUlPd4vJpQSW4Om5Hclu_k,14438
|
|
5
5
|
PyamilySeq/Seq_Combiner.py,sha256=dPDu6LlT3B-ZDn3wKZ3AeWraDgv2Tub_16l9CLc3tQ0,3353
|
|
6
6
|
PyamilySeq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
PyamilySeq/clusterings.py,sha256=rcWFv0IiWoS4aUNRjDDwNEL86l1wIKa4vK4htAxy8Hg,18787
|
|
8
8
|
PyamilySeq/utils.py,sha256=-0OZxmX96kOTzms8gnbFBvc5DL6NsqNHNpLpQ4UjNk8,15726
|
|
9
|
-
PyamilySeq-0.7.
|
|
10
|
-
PyamilySeq-0.7.
|
|
11
|
-
PyamilySeq-0.7.
|
|
12
|
-
PyamilySeq-0.7.
|
|
13
|
-
PyamilySeq-0.7.
|
|
14
|
-
PyamilySeq-0.7.
|
|
9
|
+
PyamilySeq-0.7.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
10
|
+
PyamilySeq-0.7.1.dist-info/METADATA,sha256=IpbThlfEmO-S8Nl617eQB64Xzu9GJDz19L4Jhx7lwGY,13076
|
|
11
|
+
PyamilySeq-0.7.1.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
|
12
|
+
PyamilySeq-0.7.1.dist-info/entry_points.txt,sha256=QtXD1tmnLvRAkIpGWZgXm1lfLH8GGeCwxmgoHZaTp98,102
|
|
13
|
+
PyamilySeq-0.7.1.dist-info/top_level.txt,sha256=J6JhugUQTq4rq96yibAlQu3o4KCM9WuYfqr3w1r119M,11
|
|
14
|
+
PyamilySeq-0.7.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|