PySyntenyViz 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysyntenyviz-0.1.1/LICENSE +21 -0
- pysyntenyviz-0.1.1/PKG-INFO +165 -0
- pysyntenyviz-0.1.1/PySyntenyViz/__init__.py +0 -0
- pysyntenyviz-0.1.1/PySyntenyViz/change_gbk_origin.py +109 -0
- pysyntenyviz-0.1.1/PySyntenyViz/cli.py +39 -0
- pysyntenyviz-0.1.1/PySyntenyViz/gbk_rc.py +72 -0
- pysyntenyviz-0.1.1/PySyntenyViz/reorder_gbk.py +66 -0
- pysyntenyviz-0.1.1/PySyntenyViz/synteny_viz.py +192 -0
- pysyntenyviz-0.1.1/PySyntenyViz.egg-info/PKG-INFO +165 -0
- pysyntenyviz-0.1.1/PySyntenyViz.egg-info/SOURCES.txt +16 -0
- pysyntenyviz-0.1.1/PySyntenyViz.egg-info/dependency_links.txt +1 -0
- pysyntenyviz-0.1.1/PySyntenyViz.egg-info/entry_points.txt +2 -0
- pysyntenyviz-0.1.1/PySyntenyViz.egg-info/requires.txt +4 -0
- pysyntenyviz-0.1.1/PySyntenyViz.egg-info/top_level.txt +1 -0
- pysyntenyviz-0.1.1/README.md +150 -0
- pysyntenyviz-0.1.1/pyproject.toml +27 -0
- pysyntenyviz-0.1.1/setup.cfg +4 -0
- pysyntenyviz-0.1.1/setup.py +3 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Arafat Rahman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: PySyntenyViz
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: A CLI to create and annotate synteny plots for microbial genomes or plasmids. It uses GenBank files as input and creates alignment on the fly. It provides additional tools to edit the GenBank files to customize the synteny plot.
|
|
5
|
+
Author-email: Arafat Rahman <ac.arafat@gmail.com>
|
|
6
|
+
Project-URL: homepage, https://github.com/acarafat/PySyntenyViz
|
|
7
|
+
Project-URL: Repository, https://github.com/acarafat/PySyntenyViz
|
|
8
|
+
Requires-Python: >=3.6
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: biopython
|
|
12
|
+
Requires-Dist: argparse
|
|
13
|
+
Requires-Dist: pandas
|
|
14
|
+
Requires-Dist: pygenomeviz
|
|
15
|
+
|
|
16
|
+
# PySyntenyViz
|
|
17
|
+
A CLI to create and annotate synteny plots for microbial genomes or plasmids. It uses GenBank files as input and creates alignment on the fly. It provides additional tools to edit the GenBank files to customize the synteny plot.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Requirement
|
|
21
|
+
`MUMmer` and/or `MMSeqs` should be installed to run the aligner.
|
|
22
|
+
|
|
23
|
+
# Installation
|
|
24
|
+
Install from PyPI
|
|
25
|
+
```
|
|
26
|
+
pip install PySyntenyViz
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Install from source:
|
|
30
|
+
```
|
|
31
|
+
git clone https://github.com/acarafat/PySyntenyViz/
|
|
32
|
+
cd PySyntenyViz
|
|
33
|
+
pip install .
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Alternatively to build and install from source using `pip wheel`:
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install wheel
|
|
40
|
+
git clone https://github.com/acarafat/PySyntenyViz/
|
|
41
|
+
cd PySyntenyViz
|
|
42
|
+
python3 setup.py sdist bdist_wheel
|
|
43
|
+
pip install dist/bioinfutils-0.1.0-py3-none-any.whl --force-reinstall
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
```
|
|
48
|
+
synviz <command> [<args>]
|
|
49
|
+
```
|
|
50
|
+
Available commands: `synteny`, `revcomp`, `reorder`, `change_origin`
|
|
51
|
+
|
|
52
|
+
## Commands
|
|
53
|
+
- `synteny`: Generate synten plot
|
|
54
|
+
- `change_origin`: Change origin of a GenBank file
|
|
55
|
+
- `revcomp`: Reverse-complement particular contig or whole GenBank file sequence
|
|
56
|
+
- `reorder`: Reorder contigs of GenBakn file
|
|
57
|
+
|
|
58
|
+
## Getting help
|
|
59
|
+
Use `-h` or `--h` flag to get details of the command, i.e.: `synviz <command> --help`
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
## Annotation options
|
|
63
|
+
There are two options for annotating the synteny plot. One option is by providing GenBank features, which will annotate the particular feature of interest based on its presence in the GenBank file. Another option is to provide exact coordinates, so that those coordinates will be annotated specifically.
|
|
64
|
+
|
|
65
|
+
## Generating Synteny with two different annotation options
|
|
66
|
+
|
|
67
|
+

|
|
68
|
+
|
|
69
|
+
Input file `strainlist.txt`:
|
|
70
|
+
```
|
|
71
|
+
/path/to/Strain_1.gbk
|
|
72
|
+
/path/to/Strain_2.gbk
|
|
73
|
+
/path/to/Strain_3.gbk
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Coordinate file for annotation using `--coordinate` flag: `coordinates.tsv`
|
|
77
|
+
| gbk | locus | label | start | end | color | strand | plotstyle |
|
|
78
|
+
|-----------|-----------|----------------|--------|-----------|---------|--------|-----------|
|
|
79
|
+
| Strain_1 | Contig_1 | nod | 5413 | 14992 | blue | 1 | box |
|
|
80
|
+
| Strain_1 | Contig_1 | nif/fix | 19407 | 42752 | magenta | 1 | box |
|
|
81
|
+
| Strain_1 | Contig_1 | nif/fix | 175637 | 187210 | magenta | 1 | box |
|
|
82
|
+
| Strain_1 | Contig_1 | T4SS | 357052 | 370816 | brown | 1 | box |
|
|
83
|
+
| Strain_1 | Contig_1 | bio | 377557 | 381609 | black | 1 | box |
|
|
84
|
+
| Strain_1 | Contig_1 | pan | 386274 | 387991 | black | 1 | box |
|
|
85
|
+
| Strain_1 | Contig_1 | nod | 391054 | 393012 | blue | 1 | box |
|
|
86
|
+
| Strain_1 | Contig_1 | nif/fix | 421473 | 429473 | magenta | 1 | box |
|
|
87
|
+
| Strain_1 | Contig_1 | T4SS | 557560 | 569180 | brown | 1 | box |
|
|
88
|
+
| Strain_2 | Contig_1 | nod | 3972 | 9259 | blue | 1 | box |
|
|
89
|
+
| Strain_2 | Contig_1 | nif/fix | 30871 | 44544 | magenta | 1 | box |
|
|
90
|
+
| Strain_2 | Contig_1 | nif/fix | 68132 | 84403 | magenta | 1 | box |
|
|
91
|
+
| Strain_2 | Contig_1 | bio | 297983 | 302023 | black | 1 | box |
|
|
92
|
+
| Strain_2 | Contig_1 | pan | 306814 | 308405 | black | 1 | box |
|
|
93
|
+
| Strain_2 | Contig_1 | nif/fix | 330400 | 338787 | magenta | 1 | box |
|
|
94
|
+
| Strain_2 | Contig_1 | T4SS | 392045 | 403669 | brown | 1 | box |
|
|
95
|
+
| Strain_3 | Contig_1 | nif/fix | 479987 | 488559 | magenta | 1 | box |
|
|
96
|
+
| Strain_3 | Contig_1 | T4SS | 467546 | 479162 | brown | 1 | box |
|
|
97
|
+
| Strain_3 | Contig_1 | T4SS | 429923 | 451808 | brown | 1 | box |
|
|
98
|
+
| Strain_3 | Contig_1 | nod | 369759 | 373679 | blue | 1 | box |
|
|
99
|
+
| Strain_3 | Contig_1 | nod | 357170 | 357766 | blue | 1 | box |
|
|
100
|
+
| Strain_3 | Contig_1 | nif/fix | 157877 | 164992 | magenta | 1 | box |
|
|
101
|
+
| Strain_3 | Contig_1 | nif/fix | 108463 | 122788 | magenta | 1 | box |
|
|
102
|
+
| Strain_3 | Contig_1 | bio | 78637 | 84154 | black | 1 | box |
|
|
103
|
+
| Strain_3 | Contig_1 | nod | 74067 | 77670 | blue | 1 | box |
|
|
104
|
+
|
|
105
|
+
Command for synteny plot:
|
|
106
|
+
```
|
|
107
|
+
synviz synteny --input_list strainlist.txt --output synteny_output.pdf --alignment mmseqs --coordinate coordinates.tsv
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
If you want to use feature types for generic annotation, use the `--annotate` flag and provide `annotation.tsv` file instead:
|
|
112
|
+
| feature_type | qualifier | value | face_color | label |
|
|
113
|
+
|----------------|---------------------|-------|------------|-------|
|
|
114
|
+
| CDS | product | bio | black | bio |
|
|
115
|
+
| gene | gene | nif | magenta | nif |
|
|
116
|
+
| gene | gene | nod | magenta | nod |
|
|
117
|
+
| gene | gene | fix | magenta | fix |
|
|
118
|
+
| mobile_element | mobile_element_type | T4SS | brown | ICE |
|
|
119
|
+
|
|
120
|
+
Command:
|
|
121
|
+
```
|
|
122
|
+
synviz synteny --input_list strainlist.txt --output synteny_output.pdf --alignment mmseqs --annotate annotate.tsv
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Examples
|
|
126
|
+
Read GenBank files as input from directory, plot Agrobacterium synteny with MMSeqs2 alignment and annotate by GenBank feature:
|
|
127
|
+
```
|
|
128
|
+
synviz synteny --input_dir agrobacterium --output agro_original.png --annotate annotate_synteny.tsv --alignment mmseqs
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Use MUMmer alignment instead:
|
|
132
|
+
```
|
|
133
|
+
synviz synteny --input_dir agrobacterium --output agro_original.mummer.png --annotate annotate_synteny.tsv
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Reverse-complement a contig and plot synteny, take input from a list:
|
|
137
|
+
```
|
|
138
|
+
synviz revcomp -i agrobacterium/47_2_polished_final_renamed.gbk -o agrobacterium/47_2_polished_final_renamed.rc.gbk -c chromosome
|
|
139
|
+
synviz synteny --input_list agrolist.txt --output agro_rc.mmseqs.png --alignment mmseqs --annotate annotate_synteny.tsv
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Reorder contigs by custom-order and plot synteny:
|
|
143
|
+
```
|
|
144
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.gbk
|
|
145
|
+
synviz reorder --input agrobacterium/47_2_polished_final_renamed.rc.gbk --output agrobacterium/47_2_polished_final_renamed.rc.order.gbk --order chromosome chromid pTi plasmid1 plasmid2
|
|
146
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.order.gbk
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Reorder contigs by size and plot synteny:
|
|
150
|
+
```
|
|
151
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.gbk
|
|
152
|
+
synviz reorder --input agrobacterium/47_2_polished_final_renamed.rc.gbk --output agrobacterium/47_2_polished_final_renamed.rc.order.gbk --by_size
|
|
153
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.order.gbk
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Change contig origin in GenBank file and plot synteny:
|
|
157
|
+
```
|
|
158
|
+
synviz change_origin -i agrobacterium/47_2_polished_final_renamed.rc.order.gbk -o agrobacterium/47_2_polished_final_renamed.rc.order.origin.gbk --origin 346694 --contig chromosome_rc
|
|
159
|
+
synviz synteny --input_list agrolist.txt --output agro_rc.mmseqs.png --alignment mmseqs --annotate annotate_synteny.tsv
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Annotate synteny with specific feature coordinates:
|
|
163
|
+
```
|
|
164
|
+
synviz synteny --input_list bradylist.txt --output brady_original.mmseqs.png --alignment mmseqs --coordinate coordinates.tsv
|
|
165
|
+
```
|
|
File without changes
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
|
|
3
|
+
import argparse, sys, os.path
|
|
4
|
+
from Bio import SeqIO
|
|
5
|
+
from Bio.SeqFeature import *
|
|
6
|
+
|
|
7
|
+
def open_sequence(arg):
|
|
8
|
+
try:
|
|
9
|
+
return SeqIO.parse(arg, 'genbank')
|
|
10
|
+
except ValueError as e:
|
|
11
|
+
print('Error while parsing \'{}\': {}'.format(arg, e))
|
|
12
|
+
sys.exit(-1)
|
|
13
|
+
|
|
14
|
+
def get_default_output(input_file, origin):
|
|
15
|
+
(root, ext) = os.path.splitext(input_file)
|
|
16
|
+
return "{}_{}{}".format(root, origin+1, ext)
|
|
17
|
+
|
|
18
|
+
def change_feature_location(f, origin, record):
|
|
19
|
+
L = len(record.seq)
|
|
20
|
+
|
|
21
|
+
parts = [p + (L-origin) for p in f.location.parts]
|
|
22
|
+
|
|
23
|
+
parts = [ (FeatureLocation(p.start-L, p.end-L, strand=p.strand)
|
|
24
|
+
if p.start > L else p) for p in parts]
|
|
25
|
+
|
|
26
|
+
if len(parts) > 1:
|
|
27
|
+
# see if any join
|
|
28
|
+
_parts = []
|
|
29
|
+
last = None
|
|
30
|
+
for part in parts:
|
|
31
|
+
if last:
|
|
32
|
+
# if the part will join with the last one
|
|
33
|
+
if last.end == part.start and last.strand == part.strand:
|
|
34
|
+
last = FeatureLocation(last.start, part.end, strand=part.strand)
|
|
35
|
+
continue
|
|
36
|
+
else:
|
|
37
|
+
_parts.append(last)
|
|
38
|
+
last = part
|
|
39
|
+
_parts.append(last)
|
|
40
|
+
|
|
41
|
+
parts = _parts
|
|
42
|
+
|
|
43
|
+
# split parts which are past the end
|
|
44
|
+
parts = [(CompoundLocation([FeatureLocation(p.start, L, strand=p.strand),
|
|
45
|
+
FeatureLocation(0, p.end-L, strand=p.strand)])
|
|
46
|
+
if p.end > L else p) for p in parts]
|
|
47
|
+
|
|
48
|
+
if len(parts) == 1:
|
|
49
|
+
f.location = parts[0]
|
|
50
|
+
else:
|
|
51
|
+
f.location = CompoundLocation(parts)
|
|
52
|
+
|
|
53
|
+
return f
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def rotate_record_origin(record, origin):
|
|
57
|
+
# Move sequence
|
|
58
|
+
record.seq = record.seq[origin:] + record.seq[:origin]
|
|
59
|
+
|
|
60
|
+
# Move features
|
|
61
|
+
record.features = [change_feature_location(f, origin, record)
|
|
62
|
+
for f in record.features]
|
|
63
|
+
|
|
64
|
+
return record
|
|
65
|
+
|
|
66
|
+
def main(args=None):
|
|
67
|
+
parser = argparse.ArgumentParser(description='Change the origin of a circular DNA GenBank file')
|
|
68
|
+
|
|
69
|
+
parser.add_argument('--origin', '-n',
|
|
70
|
+
type=int,
|
|
71
|
+
required=True,
|
|
72
|
+
help='Position where the new rotated contig will start')
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
parser.add_argument('--input', '-i',
|
|
76
|
+
type=str,
|
|
77
|
+
required=True,
|
|
78
|
+
help='The GenBank file to process')
|
|
79
|
+
parser.add_argument('--output', '-o',
|
|
80
|
+
type=str,
|
|
81
|
+
required=True,
|
|
82
|
+
help='File to write to (optional, defaults to input file name with origin appended)')
|
|
83
|
+
parser.add_argument('--contig', '-c',
|
|
84
|
+
type=str,
|
|
85
|
+
required=True,
|
|
86
|
+
help='The specific contig to rotate (by ID)')
|
|
87
|
+
|
|
88
|
+
args = parser.parse_args(args)
|
|
89
|
+
|
|
90
|
+
records = open_sequence(args.input)
|
|
91
|
+
origin = args.origin - 1
|
|
92
|
+
output = args.output if args.output else get_default_output(args.input, origin)
|
|
93
|
+
|
|
94
|
+
rotated_records = []
|
|
95
|
+
|
|
96
|
+
for record in records:
|
|
97
|
+
if record.id == args.contig:
|
|
98
|
+
if origin >= len(record.seq):
|
|
99
|
+
print('Error: New origin is larger than sequence length for contig {}'.format(args.contig))
|
|
100
|
+
sys.exit(-1)
|
|
101
|
+
rotated_record = rotate_record_origin(record, origin)
|
|
102
|
+
rotated_records.append(rotated_record)
|
|
103
|
+
else:
|
|
104
|
+
rotated_records.append(record)
|
|
105
|
+
|
|
106
|
+
SeqIO.write(rotated_records, output, 'genbank')
|
|
107
|
+
|
|
108
|
+
if __name__ == '__main__':
|
|
109
|
+
main()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import importlib
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# Mapping of subcommands to their respective module paths
|
|
6
|
+
commands = {
|
|
7
|
+
'change_origin': 'PySyntenyViz.change_gbk_origin',
|
|
8
|
+
'revcomp': 'PySyntenyViz.gbk_rc',
|
|
9
|
+
'reorder': 'PySyntenyViz.reorder_gbk',
|
|
10
|
+
'synteny': 'PySyntenyViz.synteny_viz',
|
|
11
|
+
# Add more commands here
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main(args=None):
|
|
16
|
+
if len(sys.argv) < 2:
|
|
17
|
+
print("Usage: synviz <command> [<args>]")
|
|
18
|
+
print("")
|
|
19
|
+
print("Available commands: synteny, revcomp, reorder, change_origin")
|
|
20
|
+
print("")
|
|
21
|
+
print("Use `-h` or `--h` flag to get details of the command, i.e.: `synviz <command> --help`")
|
|
22
|
+
sys.exit(1)
|
|
23
|
+
|
|
24
|
+
command = sys.argv[1]
|
|
25
|
+
|
|
26
|
+
if command in commands:
|
|
27
|
+
# Import the appropriate module based on the command
|
|
28
|
+
module = importlib.import_module(commands[command])
|
|
29
|
+
|
|
30
|
+
# Call the main() function of the module, passing all remaining sys.argv arguments
|
|
31
|
+
module.main(sys.argv[2:]) # Pass remaining arguments to module's main()
|
|
32
|
+
else:
|
|
33
|
+
print(f"Error: Unrecognized command '{command}'")
|
|
34
|
+
print("Available commands:", ", ".join(commands.keys()))
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
main()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from Bio import SeqIO
|
|
2
|
+
from Bio.Seq import Seq
|
|
3
|
+
from Bio.SeqFeature import SeqFeature, FeatureLocation
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Function to reverse complement a specific locus in a GenBank file
|
|
8
|
+
def reverse_complement_genbank(input_file, contig):
|
|
9
|
+
reversed_records = []
|
|
10
|
+
|
|
11
|
+
# Parse the GenBank file
|
|
12
|
+
records = SeqIO.parse(input_file, "genbank")
|
|
13
|
+
|
|
14
|
+
# Iterate through each record in the GenBank file
|
|
15
|
+
for record in records:
|
|
16
|
+
if record.id == contig:
|
|
17
|
+
# Reverse complement the sequence for the specified contig
|
|
18
|
+
reversed_sequence = record.seq.reverse_complement()
|
|
19
|
+
|
|
20
|
+
# Reverse complement features
|
|
21
|
+
new_features = []
|
|
22
|
+
for feature in record.features:
|
|
23
|
+
# Reverse complement the location of the features
|
|
24
|
+
new_location = FeatureLocation(
|
|
25
|
+
len(record.seq) - feature.location.end,
|
|
26
|
+
len(record.seq) - feature.location.start,
|
|
27
|
+
strand=-feature.location.strand,
|
|
28
|
+
)
|
|
29
|
+
new_feature = SeqFeature(
|
|
30
|
+
location=new_location,
|
|
31
|
+
type=feature.type,
|
|
32
|
+
qualifiers=feature.qualifiers,
|
|
33
|
+
)
|
|
34
|
+
new_features.append(new_feature)
|
|
35
|
+
|
|
36
|
+
# Create a new record with the reversed sequence and features
|
|
37
|
+
reversed_record = record.__class__(
|
|
38
|
+
id=record.id + "_rc",
|
|
39
|
+
name=record.name,
|
|
40
|
+
description="Reversed complement of " + record.description,
|
|
41
|
+
seq=reversed_sequence,
|
|
42
|
+
features=new_features,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Copy annotations and update molecule_type
|
|
46
|
+
reversed_record.annotations = record.annotations.copy()
|
|
47
|
+
reversed_record.annotations["molecule_type"] = record.annotations.get("molecule_type", "DNA")
|
|
48
|
+
|
|
49
|
+
# Add the reversed record to the list
|
|
50
|
+
reversed_records.append(reversed_record)
|
|
51
|
+
else:
|
|
52
|
+
# Keep the record unchanged if it doesn't match the specified contig
|
|
53
|
+
reversed_records.append(record)
|
|
54
|
+
|
|
55
|
+
return reversed_records
|
|
56
|
+
|
|
57
|
+
def main(args=None):
|
|
58
|
+
parser = argparse.ArgumentParser()
|
|
59
|
+
|
|
60
|
+
parser.add_argument('--input', '-i', type=str, required=True)
|
|
61
|
+
parser.add_argument('--output', '-o', type=str, required=True)
|
|
62
|
+
parser.add_argument('--contig', '-c', type=str, required=True)
|
|
63
|
+
|
|
64
|
+
args = parser.parse_args(args)
|
|
65
|
+
|
|
66
|
+
revcomp_records = reverse_complement_genbank(args.input, args.contig)
|
|
67
|
+
|
|
68
|
+
SeqIO.write(revcomp_records, args.output, "genbank")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
if __name__ == "__main__":
|
|
72
|
+
main()
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from Bio import SeqIO
|
|
3
|
+
|
|
4
|
+
def reorder_by_custom_order(records, order):
|
|
5
|
+
"""
|
|
6
|
+
Reorder records based on a custom order.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
records (list): List of SeqRecords.
|
|
10
|
+
order (list): List of record names in the desired order.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
list: Reordered SeqRecords.
|
|
14
|
+
"""
|
|
15
|
+
record_dict = {record.name: record for record in records}
|
|
16
|
+
return [record_dict[name] for name in order if name in record_dict]
|
|
17
|
+
|
|
18
|
+
def reorder_by_size(records):
|
|
19
|
+
"""
|
|
20
|
+
Reorder records by contig size in descending order.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
records (list): List of SeqRecords.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
list: SeqRecords sorted by sequence length.
|
|
27
|
+
"""
|
|
28
|
+
return sorted(records, key=lambda x: len(x.seq), reverse=True)
|
|
29
|
+
|
|
30
|
+
def main(args=None):
|
|
31
|
+
parser = argparse.ArgumentParser(description="Reorder contigs in a GenBank file.")
|
|
32
|
+
parser.add_argument("--input", "-i", type=str, help="Path to the input GenBank file.")
|
|
33
|
+
parser.add_argument("--output", "-o", type=str, help="Path to save the reordered GenBank file.")
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--order",
|
|
36
|
+
type=str,
|
|
37
|
+
nargs="+",
|
|
38
|
+
help="Custom order of contig names (space-separated)."
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"--by_size",
|
|
42
|
+
action="store_true",
|
|
43
|
+
help="Reorder contigs by size in descending order."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
args = parser.parse_args(args)
|
|
47
|
+
|
|
48
|
+
# Read all records from the GenBank file
|
|
49
|
+
records = list(SeqIO.parse(args.input, "genbank"))
|
|
50
|
+
|
|
51
|
+
if args.order:
|
|
52
|
+
# Reorder by custom order if provided
|
|
53
|
+
reordered_records = reorder_by_custom_order(records, args.order)
|
|
54
|
+
elif args.by_size:
|
|
55
|
+
# Reorder by size if the option is selected
|
|
56
|
+
reordered_records = reorder_by_size(records)
|
|
57
|
+
else:
|
|
58
|
+
# If no option is provided, maintain the original order
|
|
59
|
+
reordered_records = records
|
|
60
|
+
|
|
61
|
+
# Save the reordered records to a new GenBank file
|
|
62
|
+
with open(args.output, "w") as out_handle:
|
|
63
|
+
SeqIO.write(reordered_records, out_handle, "genbank")
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
main()
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
#!/bin/python3
|
|
2
|
+
|
|
3
|
+
import os, sys
|
|
4
|
+
import csv
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
|
|
8
|
+
from pygenomeviz import GenomeViz
|
|
9
|
+
from pygenomeviz.parser import Genbank
|
|
10
|
+
from pygenomeviz.align import MUMmer, MMseqs
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Get absulate paths of GBK files in a directory
|
|
14
|
+
def get_gbk_path(path_to_gbk):
|
|
15
|
+
'''
|
|
16
|
+
INPUT: A directory containing genbank as input for synteny plotting
|
|
17
|
+
OUTPUT: List of parsed genback file as Genbank object, sorted alphabetically.
|
|
18
|
+
'''
|
|
19
|
+
root, _, files = next(os.walk(path_to_gbk, topdown=True))
|
|
20
|
+
|
|
21
|
+
gbk_files = [ os.path.abspath(os.path.join(root, f)) for f in files ]
|
|
22
|
+
|
|
23
|
+
gbk_list = [Genbank(f) for f in gbk_files]
|
|
24
|
+
gbk_list.sort(key=lambda x: x.full_genome_length)
|
|
25
|
+
|
|
26
|
+
return gbk_list
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Given a text file containing gbk paths
|
|
30
|
+
def get_gbk_file(gbk_path_file):
|
|
31
|
+
'''
|
|
32
|
+
INPUT: A textfile containing genbank as input for synteny plotting
|
|
33
|
+
OUTPUT: List of parsed genback file as Genbank object.
|
|
34
|
+
'''
|
|
35
|
+
with open(gbk_path_file) as handle:
|
|
36
|
+
gbk_files = handle.read().splitlines()
|
|
37
|
+
|
|
38
|
+
gbk_list = [Genbank(f) for f in gbk_files]
|
|
39
|
+
|
|
40
|
+
return gbk_list
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Parse annotation meta file
|
|
44
|
+
def load_face_colors(file_path):
|
|
45
|
+
'''
|
|
46
|
+
INPUT: Annotation tsv file which should have `feature_type`, `qualifier`, `value`, `label`, and `face_color`.
|
|
47
|
+
OUTPUT: Parse the file as dictionary
|
|
48
|
+
'''
|
|
49
|
+
face_colors = []
|
|
50
|
+
with open(file_path, newline='') as file:
|
|
51
|
+
reader = csv.DictReader(file, delimiter='\t')
|
|
52
|
+
for row in reader:
|
|
53
|
+
face_colors.append(row)
|
|
54
|
+
|
|
55
|
+
return face_colors
|
|
56
|
+
|
|
57
|
+
# Given a paritcular feature, define it's face color and label based on face_color_dict
|
|
58
|
+
def parse_feature_fc(feature, face_color_dict):
|
|
59
|
+
'''
|
|
60
|
+
INPUT: A feature and face_color_dict with feature_type, qualifier, value, label, and face_color.
|
|
61
|
+
OUTPUT: Based on SeqFeature type, returns a specific color to be used as face-color
|
|
62
|
+
'''
|
|
63
|
+
# Set default
|
|
64
|
+
fc = "ivory"
|
|
65
|
+
f_lab = ''
|
|
66
|
+
|
|
67
|
+
for entry in face_color_dict:
|
|
68
|
+
if feature.type == entry['feature_type']:
|
|
69
|
+
qualifier = entry['qualifier']
|
|
70
|
+
if qualifier in feature.qualifiers:
|
|
71
|
+
if feature.qualifiers[qualifier][0] == entry['value']:
|
|
72
|
+
fc = entry['face_color']
|
|
73
|
+
f_lab = entry['label']
|
|
74
|
+
break
|
|
75
|
+
return fc, f_lab
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# Plot synteny
|
|
79
|
+
def plot_synteny(gbk_list, output_png, annotate_file=None, coordinate_file=None, alignment=None):
|
|
80
|
+
'''
|
|
81
|
+
INPUT: A list contianing parsed Genbank objects and A list of pairwise coordinates of Mummer alignment
|
|
82
|
+
OUTPUT: It plots the synteny plot. Nothing returns in output.
|
|
83
|
+
'''
|
|
84
|
+
########################
|
|
85
|
+
# Set GenomeViz object #
|
|
86
|
+
########################
|
|
87
|
+
gv = GenomeViz(fig_track_height=1,
|
|
88
|
+
feature_track_ratio=0.2,
|
|
89
|
+
#tick_track_ratio=0.4,
|
|
90
|
+
#tick_style="bar",
|
|
91
|
+
#align_type="center",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if annotate_file != None:
|
|
95
|
+
fc_dict = load_face_colors(annotate_file)
|
|
96
|
+
if coordinate_file != None:
|
|
97
|
+
coord_dict = load_face_colors(coordinate_file)
|
|
98
|
+
|
|
99
|
+
#############################
|
|
100
|
+
# Plot contigs and features #
|
|
101
|
+
#############################
|
|
102
|
+
for gbk in gbk_list:
|
|
103
|
+
track = gv.add_feature_track(gbk.name, gbk.get_seqid2size())
|
|
104
|
+
|
|
105
|
+
# Make sure all GenBank locus has `source` feature
|
|
106
|
+
gbk_f_types = set()
|
|
107
|
+
for rec in gbk.records:
|
|
108
|
+
gbk_f_types.update(set(f.type for f in rec.features))
|
|
109
|
+
if "source" not in gbk_f_types:
|
|
110
|
+
sys.exit(f"Error: Make sure {rec.id} in {gbk.name} has `source` feature!")
|
|
111
|
+
|
|
112
|
+
# Plot individual contigs.
|
|
113
|
+
for seqid, features in gbk.get_seqid2features(feature_type = 'source').items():
|
|
114
|
+
segment = track.get_segment(seqid)
|
|
115
|
+
segment.add_features(features, fc="skyblue", lw=0.5, label_handler=lambda s: str(seqid))
|
|
116
|
+
|
|
117
|
+
# Plot target features based on the coordinate tsv file, if provided
|
|
118
|
+
if annotate_file == None and coordinate_file != None:
|
|
119
|
+
for entry in coord_dict:
|
|
120
|
+
if entry['gbk'] == gbk.name and entry['locus'] == str(seqid):
|
|
121
|
+
segment.add_feature(int(entry['start']), int(entry['end']), int(entry['strand']),
|
|
122
|
+
fc = entry['color'], label = entry['label'], plotstyle=entry['plotstyle'])
|
|
123
|
+
|
|
124
|
+
# Remove `source`
|
|
125
|
+
gbk_f_types.remove("source")
|
|
126
|
+
|
|
127
|
+
########################
|
|
128
|
+
# Plot target features #
|
|
129
|
+
########################
|
|
130
|
+
# Plot based on the feature annotation tsv file
|
|
131
|
+
for seqid, features in gbk.get_seqid2features(feature_type = gbk_f_types).items():
|
|
132
|
+
for f in features:
|
|
133
|
+
if annotate_file != None and coordinate_file == None:
|
|
134
|
+
face_color, f_lab = parse_feature_fc(f, fc_dict)
|
|
135
|
+
if f_lab != '':
|
|
136
|
+
segment = track.get_segment(seqid)
|
|
137
|
+
# Add features to the segment with dynamic face color
|
|
138
|
+
segment.add_features(f, fc=face_color, lw=0.5, plotstyle='rbox', label_handler = lambda s: f_lab)
|
|
139
|
+
else:
|
|
140
|
+
face_color = 'ivory'
|
|
141
|
+
f_lab = ''
|
|
142
|
+
|
|
143
|
+
#############
|
|
144
|
+
# Alignment #
|
|
145
|
+
#############
|
|
146
|
+
if alignment in ["mummer", None]:
|
|
147
|
+
print('Creating MUMmer alignment ...')
|
|
148
|
+
align_coords = MUMmer(gbk_list).run()
|
|
149
|
+
|
|
150
|
+
elif alignment == "mmseqs":
|
|
151
|
+
print('Creating MMseqs alignment ...')
|
|
152
|
+
align_coords = MMseqs(gbk_list).run()
|
|
153
|
+
|
|
154
|
+
#######################################
|
|
155
|
+
# Plot MUMmer/MMseqs RBH search links #
|
|
156
|
+
#######################################
|
|
157
|
+
print('Plotting synteny ...')
|
|
158
|
+
if len(align_coords) > 0:
|
|
159
|
+
min_ident = int(min([ac.identity for ac in align_coords if ac.identity]))
|
|
160
|
+
color, inverted_color = "chocolate", "limegreen"
|
|
161
|
+
for ac in align_coords:
|
|
162
|
+
gv.add_link(ac.query_link, ac.ref_link, color=color, inverted_color=inverted_color, v=ac.identity, vmin=min_ident, curve=True)
|
|
163
|
+
gv.set_colorbar([color, inverted_color], vmin=min_ident)
|
|
164
|
+
|
|
165
|
+
fig = gv.plotfig()
|
|
166
|
+
fig.savefig(f"{output_png}")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# Parse input, program logic-flow
|
|
170
|
+
def main(args=None):
|
|
171
|
+
parser = argparse.ArgumentParser()
|
|
172
|
+
|
|
173
|
+
parser.add_argument('--input_dir', '-i', type=str, required=False, help="Path to directory containing GenBank files")
|
|
174
|
+
parser.add_argument('--input_list', type=str, required=False, help="Textfile containing paths of GenBank files seperated by lines. The order of GenBank files will be used to plot.")
|
|
175
|
+
parser.add_argument('--output', '-o', type=str, required=True, help="Output image file. Use .png/.pdf etc. extensions for desiarable format.")
|
|
176
|
+
parser.add_argument('--annotate', '-a', type=str, required=False, help="Sequence features from GenBank file to annotate.")
|
|
177
|
+
parser.add_argument('--coordinate', '-c', type=str, required=False, help="Coordinate position from GenBank file to annotate.")
|
|
178
|
+
parser.add_argument('--alignment', '-t', type=str, required=False, help="Alignment algorithm to use. Default MMSeqs. Options: `mummer` and `mmseqs` (mummer for fast genome level alignment, mmseqs for fast protein level alignment).")
|
|
179
|
+
|
|
180
|
+
args = parser.parse_args(args)
|
|
181
|
+
|
|
182
|
+
print('Getting all the GenBank files ...')
|
|
183
|
+
if args.input_dir != None:
|
|
184
|
+
gbk_list = get_gbk_path(args.input_dir)
|
|
185
|
+
else:
|
|
186
|
+
gbk_list = get_gbk_file(args.input_list)
|
|
187
|
+
|
|
188
|
+
plot_synteny(gbk_list, args.output, args.annotate, args.coordinate, args.alignment)
|
|
189
|
+
|
|
190
|
+
# Main
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
main()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: PySyntenyViz
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: A CLI to create and annotate synteny plots for microbial genomes or plasmids. It uses GenBank files as input and creates alignment on the fly. It provides additional tools to edit the GenBank files to customize the synteny plot.
|
|
5
|
+
Author-email: Arafat Rahman <ac.arafat@gmail.com>
|
|
6
|
+
Project-URL: homepage, https://github.com/acarafat/PySyntenyViz
|
|
7
|
+
Project-URL: Repository, https://github.com/acarafat/PySyntenyViz
|
|
8
|
+
Requires-Python: >=3.6
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: biopython
|
|
12
|
+
Requires-Dist: argparse
|
|
13
|
+
Requires-Dist: pandas
|
|
14
|
+
Requires-Dist: pygenomeviz
|
|
15
|
+
|
|
16
|
+
# PySyntenyViz
|
|
17
|
+
A CLI to create and annotate synteny plots for microbial genomes or plasmids. It uses GenBank files as input and creates alignment on the fly. It provides additional tools to edit the GenBank files to customize the synteny plot.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Requirement
|
|
21
|
+
`MUMmer` and/or `MMSeqs` should be installed to run the aligner.
|
|
22
|
+
|
|
23
|
+
# Installation
|
|
24
|
+
Install from PyPI
|
|
25
|
+
```
|
|
26
|
+
pip install PySyntenyViz
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Install from source:
|
|
30
|
+
```
|
|
31
|
+
git clone https://github.com/acarafat/PySyntenyViz/
|
|
32
|
+
cd PySyntenyViz
|
|
33
|
+
pip install .
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Alternatively to build and install from source using `pip wheel`:
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install wheel
|
|
40
|
+
git clone https://github.com/acarafat/PySyntenyViz/
|
|
41
|
+
cd PySyntenyViz
|
|
42
|
+
python3 setup.py sdist bdist_wheel
|
|
43
|
+
pip install dist/bioinfutils-0.1.0-py3-none-any.whl --force-reinstall
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
```
|
|
48
|
+
synviz <command> [<args>]
|
|
49
|
+
```
|
|
50
|
+
Available commands: `synteny`, `revcomp`, `reorder`, `change_origin`
|
|
51
|
+
|
|
52
|
+
## Commands
|
|
53
|
+
- `synteny`: Generate synten plot
|
|
54
|
+
- `change_origin`: Change origin of a GenBank file
|
|
55
|
+
- `revcomp`: Reverse-complement particular contig or whole GenBank file sequence
|
|
56
|
+
- `reorder`: Reorder contigs of GenBakn file
|
|
57
|
+
|
|
58
|
+
## Getting help
|
|
59
|
+
Use `-h` or `--h` flag to get details of the command, i.e.: `synviz <command> --help`
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
## Annotation options
|
|
63
|
+
There are two options for annotating the synteny plot. One option is by providing GenBank features, which will annotate the particular feature of interest based on its presence in the GenBank file. Another option is to provide exact coordinates, so that those coordinates will be annotated specifically.
|
|
64
|
+
|
|
65
|
+
## Generating Synteny with two different annotation options
|
|
66
|
+
|
|
67
|
+

|
|
68
|
+
|
|
69
|
+
Input file `strainlist.txt`:
|
|
70
|
+
```
|
|
71
|
+
/path/to/Strain_1.gbk
|
|
72
|
+
/path/to/Strain_2.gbk
|
|
73
|
+
/path/to/Strain_3.gbk
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Coordinate file for annotation using `--coordinate` flag: `coordinates.tsv`
|
|
77
|
+
| gbk | locus | label | start | end | color | strand | plotstyle |
|
|
78
|
+
|-----------|-----------|----------------|--------|-----------|---------|--------|-----------|
|
|
79
|
+
| Strain_1 | Contig_1 | nod | 5413 | 14992 | blue | 1 | box |
|
|
80
|
+
| Strain_1 | Contig_1 | nif/fix | 19407 | 42752 | magenta | 1 | box |
|
|
81
|
+
| Strain_1 | Contig_1 | nif/fix | 175637 | 187210 | magenta | 1 | box |
|
|
82
|
+
| Strain_1 | Contig_1 | T4SS | 357052 | 370816 | brown | 1 | box |
|
|
83
|
+
| Strain_1 | Contig_1 | bio | 377557 | 381609 | black | 1 | box |
|
|
84
|
+
| Strain_1 | Contig_1 | pan | 386274 | 387991 | black | 1 | box |
|
|
85
|
+
| Strain_1 | Contig_1 | nod | 391054 | 393012 | blue | 1 | box |
|
|
86
|
+
| Strain_1 | Contig_1 | nif/fix | 421473 | 429473 | magenta | 1 | box |
|
|
87
|
+
| Strain_1 | Contig_1 | T4SS | 557560 | 569180 | brown | 1 | box |
|
|
88
|
+
| Strain_2 | Contig_1 | nod | 3972 | 9259 | blue | 1 | box |
|
|
89
|
+
| Strain_2 | Contig_1 | nif/fix | 30871 | 44544 | magenta | 1 | box |
|
|
90
|
+
| Strain_2 | Contig_1 | nif/fix | 68132 | 84403 | magenta | 1 | box |
|
|
91
|
+
| Strain_2 | Contig_1 | bio | 297983 | 302023 | black | 1 | box |
|
|
92
|
+
| Strain_2 | Contig_1 | pan | 306814 | 308405 | black | 1 | box |
|
|
93
|
+
| Strain_2 | Contig_1 | nif/fix | 330400 | 338787 | magenta | 1 | box |
|
|
94
|
+
| Strain_2 | Contig_1 | T4SS | 392045 | 403669 | brown | 1 | box |
|
|
95
|
+
| Strain_3 | Contig_1 | nif/fix | 479987 | 488559 | magenta | 1 | box |
|
|
96
|
+
| Strain_3 | Contig_1 | T4SS | 467546 | 479162 | brown | 1 | box |
|
|
97
|
+
| Strain_3 | Contig_1 | T4SS | 429923 | 451808 | brown | 1 | box |
|
|
98
|
+
| Strain_3 | Contig_1 | nod | 369759 | 373679 | blue | 1 | box |
|
|
99
|
+
| Strain_3 | Contig_1 | nod | 357170 | 357766 | blue | 1 | box |
|
|
100
|
+
| Strain_3 | Contig_1 | nif/fix | 157877 | 164992 | magenta | 1 | box |
|
|
101
|
+
| Strain_3 | Contig_1 | nif/fix | 108463 | 122788 | magenta | 1 | box |
|
|
102
|
+
| Strain_3 | Contig_1 | bio | 78637 | 84154 | black | 1 | box |
|
|
103
|
+
| Strain_3 | Contig_1 | nod | 74067 | 77670 | blue | 1 | box |
|
|
104
|
+
|
|
105
|
+
Command for synteny plot:
|
|
106
|
+
```
|
|
107
|
+
synviz synteny --input_list strainlist.txt --output synteny_output.pdf --alignment mmseqs --coordinate coordinates.tsv
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
If you want to use feature types for generic annotation, use the `--annotate` flag and provide `annotation.tsv` file instead:
|
|
112
|
+
| feature_type | qualifier | value | face_color | label |
|
|
113
|
+
|----------------|---------------------|-------|------------|-------|
|
|
114
|
+
| CDS | product | bio | black | bio |
|
|
115
|
+
| gene | gene | nif | magenta | nif |
|
|
116
|
+
| gene | gene | nod | magenta | nod |
|
|
117
|
+
| gene | gene | fix | magenta | fix |
|
|
118
|
+
| mobile_element | mobile_element_type | T4SS | brown | ICE |
|
|
119
|
+
|
|
120
|
+
Command:
|
|
121
|
+
```
|
|
122
|
+
synviz synteny --input_list strainlist.txt --output synteny_output.pdf --alignment mmseqs --annotate annotate.tsv
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Examples
|
|
126
|
+
Read GenBank files as input from directory, plot Agrobacterium synteny with MMSeqs2 alignment and annotate by GenBank feature:
|
|
127
|
+
```
|
|
128
|
+
synviz synteny --input_dir agrobacterium --output agro_original.png --annotate annotate_synteny.tsv --alignment mmseqs
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Use MUMmer alignment instead:
|
|
132
|
+
```
|
|
133
|
+
synviz synteny --input_dir agrobacterium --output agro_original.mummer.png --annotate annotate_synteny.tsv
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Reverse-complement a contig and plot synteny, take input from a list:
|
|
137
|
+
```
|
|
138
|
+
synviz revcomp -i agrobacterium/47_2_polished_final_renamed.gbk -o agrobacterium/47_2_polished_final_renamed.rc.gbk -c chromosome
|
|
139
|
+
synviz synteny --input_list agrolist.txt --output agro_rc.mmseqs.png --alignment mmseqs --annotate annotate_synteny.tsv
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Reorder contigs by custom-order and plot synteny:
|
|
143
|
+
```
|
|
144
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.gbk
|
|
145
|
+
synviz reorder --input agrobacterium/47_2_polished_final_renamed.rc.gbk --output agrobacterium/47_2_polished_final_renamed.rc.order.gbk --order chromosome chromid pTi plasmid1 plasmid2
|
|
146
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.order.gbk
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Reorder contigs by size and plot synteny:
|
|
150
|
+
```
|
|
151
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.gbk
|
|
152
|
+
synviz reorder --input agrobacterium/47_2_polished_final_renamed.rc.gbk --output agrobacterium/47_2_polished_final_renamed.rc.order.gbk --by_size
|
|
153
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.order.gbk
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Change contig origin in GenBank file and plot synteny:
|
|
157
|
+
```
|
|
158
|
+
synviz change_origin -i agrobacterium/47_2_polished_final_renamed.rc.order.gbk -o agrobacterium/47_2_polished_final_renamed.rc.order.origin.gbk --origin 346694 --contig chromosome_rc
|
|
159
|
+
synviz synteny --input_list agrolist.txt --output agro_rc.mmseqs.png --alignment mmseqs --annotate annotate_synteny.tsv
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Annotate synteny with specific feature coordinates:
|
|
163
|
+
```
|
|
164
|
+
synviz synteny --input_list bradylist.txt --output brady_original.mmseqs.png --alignment mmseqs --coordinate coordinates.tsv
|
|
165
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
PySyntenyViz/__init__.py
|
|
6
|
+
PySyntenyViz/change_gbk_origin.py
|
|
7
|
+
PySyntenyViz/cli.py
|
|
8
|
+
PySyntenyViz/gbk_rc.py
|
|
9
|
+
PySyntenyViz/reorder_gbk.py
|
|
10
|
+
PySyntenyViz/synteny_viz.py
|
|
11
|
+
PySyntenyViz.egg-info/PKG-INFO
|
|
12
|
+
PySyntenyViz.egg-info/SOURCES.txt
|
|
13
|
+
PySyntenyViz.egg-info/dependency_links.txt
|
|
14
|
+
PySyntenyViz.egg-info/entry_points.txt
|
|
15
|
+
PySyntenyViz.egg-info/requires.txt
|
|
16
|
+
PySyntenyViz.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
PySyntenyViz
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# PySyntenyViz
|
|
2
|
+
A CLI to create and annotate synteny plots for microbial genomes or plasmids. It uses GenBank files as input and creates alignment on the fly. It provides additional tools to edit the GenBank files to customize the synteny plot.
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# Requirement
|
|
6
|
+
`MUMmer` and/or `MMSeqs` should be installed to run the aligner.
|
|
7
|
+
|
|
8
|
+
# Installation
|
|
9
|
+
Install from PyPI
|
|
10
|
+
```
|
|
11
|
+
pip install PySyntenyViz
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Install from source:
|
|
15
|
+
```
|
|
16
|
+
git clone https://github.com/acarafat/PySyntenyViz/
|
|
17
|
+
cd PySyntenyViz
|
|
18
|
+
pip install .
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Alternatively to build and install from source using `pip wheel`:
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
pip install wheel
|
|
25
|
+
git clone https://github.com/acarafat/PySyntenyViz/
|
|
26
|
+
cd PySyntenyViz
|
|
27
|
+
python3 setup.py sdist bdist_wheel
|
|
28
|
+
pip install dist/bioinfutils-0.1.0-py3-none-any.whl --force-reinstall
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
```
|
|
33
|
+
synviz <command> [<args>]
|
|
34
|
+
```
|
|
35
|
+
Available commands: `synteny`, `revcomp`, `reorder`, `change_origin`
|
|
36
|
+
|
|
37
|
+
## Commands
|
|
38
|
+
- `synteny`: Generate synten plot
|
|
39
|
+
- `change_origin`: Change origin of a GenBank file
|
|
40
|
+
- `revcomp`: Reverse-complement particular contig or whole GenBank file sequence
|
|
41
|
+
- `reorder`: Reorder contigs of GenBakn file
|
|
42
|
+
|
|
43
|
+
## Getting help
|
|
44
|
+
Use `-h` or `--h` flag to get details of the command, i.e.: `synviz <command> --help`
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
## Annotation options
|
|
48
|
+
There are two options for annotating the synteny plot. One option is by providing GenBank features, which will annotate the particular feature of interest based on its presence in the GenBank file. Another option is to provide exact coordinates, so that those coordinates will be annotated specifically.
|
|
49
|
+
|
|
50
|
+
## Generating Synteny with two different annotation options
|
|
51
|
+
|
|
52
|
+

|
|
53
|
+
|
|
54
|
+
Input file `strainlist.txt`:
|
|
55
|
+
```
|
|
56
|
+
/path/to/Strain_1.gbk
|
|
57
|
+
/path/to/Strain_2.gbk
|
|
58
|
+
/path/to/Strain_3.gbk
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Coordinate file for annotation using `--coordinate` flag: `coordinates.tsv`
|
|
62
|
+
| gbk | locus | label | start | end | color | strand | plotstyle |
|
|
63
|
+
|-----------|-----------|----------------|--------|-----------|---------|--------|-----------|
|
|
64
|
+
| Strain_1 | Contig_1 | nod | 5413 | 14992 | blue | 1 | box |
|
|
65
|
+
| Strain_1 | Contig_1 | nif/fix | 19407 | 42752 | magenta | 1 | box |
|
|
66
|
+
| Strain_1 | Contig_1 | nif/fix | 175637 | 187210 | magenta | 1 | box |
|
|
67
|
+
| Strain_1 | Contig_1 | T4SS | 357052 | 370816 | brown | 1 | box |
|
|
68
|
+
| Strain_1 | Contig_1 | bio | 377557 | 381609 | black | 1 | box |
|
|
69
|
+
| Strain_1 | Contig_1 | pan | 386274 | 387991 | black | 1 | box |
|
|
70
|
+
| Strain_1 | Contig_1 | nod | 391054 | 393012 | blue | 1 | box |
|
|
71
|
+
| Strain_1 | Contig_1 | nif/fix | 421473 | 429473 | magenta | 1 | box |
|
|
72
|
+
| Strain_1 | Contig_1 | T4SS | 557560 | 569180 | brown | 1 | box |
|
|
73
|
+
| Strain_2 | Contig_1 | nod | 3972 | 9259 | blue | 1 | box |
|
|
74
|
+
| Strain_2 | Contig_1 | nif/fix | 30871 | 44544 | magenta | 1 | box |
|
|
75
|
+
| Strain_2 | Contig_1 | nif/fix | 68132 | 84403 | magenta | 1 | box |
|
|
76
|
+
| Strain_2 | Contig_1 | bio | 297983 | 302023 | black | 1 | box |
|
|
77
|
+
| Strain_2 | Contig_1 | pan | 306814 | 308405 | black | 1 | box |
|
|
78
|
+
| Strain_2 | Contig_1 | nif/fix | 330400 | 338787 | magenta | 1 | box |
|
|
79
|
+
| Strain_2 | Contig_1 | T4SS | 392045 | 403669 | brown | 1 | box |
|
|
80
|
+
| Strain_3 | Contig_1 | nif/fix | 479987 | 488559 | magenta | 1 | box |
|
|
81
|
+
| Strain_3 | Contig_1 | T4SS | 467546 | 479162 | brown | 1 | box |
|
|
82
|
+
| Strain_3 | Contig_1 | T4SS | 429923 | 451808 | brown | 1 | box |
|
|
83
|
+
| Strain_3 | Contig_1 | nod | 369759 | 373679 | blue | 1 | box |
|
|
84
|
+
| Strain_3 | Contig_1 | nod | 357170 | 357766 | blue | 1 | box |
|
|
85
|
+
| Strain_3 | Contig_1 | nif/fix | 157877 | 164992 | magenta | 1 | box |
|
|
86
|
+
| Strain_3 | Contig_1 | nif/fix | 108463 | 122788 | magenta | 1 | box |
|
|
87
|
+
| Strain_3 | Contig_1 | bio | 78637 | 84154 | black | 1 | box |
|
|
88
|
+
| Strain_3 | Contig_1 | nod | 74067 | 77670 | blue | 1 | box |
|
|
89
|
+
|
|
90
|
+
Command for synteny plot:
|
|
91
|
+
```
|
|
92
|
+
synviz synteny --input_list strainlist.txt --output synteny_output.pdf --alignment mmseqs --coordinate coordinates.tsv
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
If you want to use feature types for generic annotation, use the `--annotate` flag and provide `annotation.tsv` file instead:
|
|
97
|
+
| feature_type | qualifier | value | face_color | label |
|
|
98
|
+
|----------------|---------------------|-------|------------|-------|
|
|
99
|
+
| CDS | product | bio | black | bio |
|
|
100
|
+
| gene | gene | nif | magenta | nif |
|
|
101
|
+
| gene | gene | nod | magenta | nod |
|
|
102
|
+
| gene | gene | fix | magenta | fix |
|
|
103
|
+
| mobile_element | mobile_element_type | T4SS | brown | ICE |
|
|
104
|
+
|
|
105
|
+
Command:
|
|
106
|
+
```
|
|
107
|
+
synviz synteny --input_list strainlist.txt --output synteny_output.pdf --alignment mmseqs --annotate annotate.tsv
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Examples
|
|
111
|
+
Read GenBank files as input from directory, plot Agrobacterium synteny with MMSeqs2 alignment and annotate by GenBank feature:
|
|
112
|
+
```
|
|
113
|
+
synviz synteny --input_dir agrobacterium --output agro_original.png --annotate annotate_synteny.tsv --alignment mmseqs
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Use MUMmer alignment instead:
|
|
117
|
+
```
|
|
118
|
+
synviz synteny --input_dir agrobacterium --output agro_original.mummer.png --annotate annotate_synteny.tsv
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Reverse-complement a contig and plot synteny, take input from a list:
|
|
122
|
+
```
|
|
123
|
+
synviz revcomp -i agrobacterium/47_2_polished_final_renamed.gbk -o agrobacterium/47_2_polished_final_renamed.rc.gbk -c chromosome
|
|
124
|
+
synviz synteny --input_list agrolist.txt --output agro_rc.mmseqs.png --alignment mmseqs --annotate annotate_synteny.tsv
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Reorder contigs by custom-order and plot synteny:
|
|
128
|
+
```
|
|
129
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.gbk
|
|
130
|
+
synviz reorder --input agrobacterium/47_2_polished_final_renamed.rc.gbk --output agrobacterium/47_2_polished_final_renamed.rc.order.gbk --order chromosome chromid pTi plasmid1 plasmid2
|
|
131
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.order.gbk
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Reorder contigs by size and plot synteny:
|
|
135
|
+
```
|
|
136
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.gbk
|
|
137
|
+
synviz reorder --input agrobacterium/47_2_polished_final_renamed.rc.gbk --output agrobacterium/47_2_polished_final_renamed.rc.order.gbk --by_size
|
|
138
|
+
grep "LOCUS" agrobacterium/47_2_polished_final_renamed.rc.order.gbk
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Change contig origin in GenBank file and plot synteny:
|
|
142
|
+
```
|
|
143
|
+
synviz change_origin -i agrobacterium/47_2_polished_final_renamed.rc.order.gbk -o agrobacterium/47_2_polished_final_renamed.rc.order.origin.gbk --origin 346694 --contig chromosome_rc
|
|
144
|
+
synviz synteny --input_list agrolist.txt --output agro_rc.mmseqs.png --alignment mmseqs --annotate annotate_synteny.tsv
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Annotate synteny with specific feature coordinates:
|
|
148
|
+
```
|
|
149
|
+
synviz synteny --input_list bradylist.txt --output brady_original.mmseqs.png --alignment mmseqs --coordinate coordinates.tsv
|
|
150
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "PySyntenyViz"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "A CLI to create and annotate synteny plots for microbial genomes or plasmids. It uses GenBank files as input and creates alignment on the fly. It provides additional tools to edit the GenBank files to customize the synteny plot."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.6"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"biopython",
|
|
13
|
+
"argparse",
|
|
14
|
+
"pandas",
|
|
15
|
+
"pygenomeviz"
|
|
16
|
+
]
|
|
17
|
+
authors = [
|
|
18
|
+
{ name = "Arafat Rahman", email = "ac.arafat@gmail.com" }
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
homepage = "https://github.com/acarafat/PySyntenyViz"
|
|
24
|
+
Repository = "https://github.com/acarafat/PySyntenyViz"
|
|
25
|
+
|
|
26
|
+
[project.entry-points.console_scripts]
|
|
27
|
+
synviz = "PySyntenyViz.cli:main"
|