bio-alignment 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +18 -22
- data/Rakefile +0 -8
- data/VERSION +1 -1
- data/bin/pal2nal +118 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d0b2b23668eee191b0d1df61459aaa06cc6c6c6
|
4
|
+
data.tar.gz: 3a8ed0466b5351b791d6bfd40f3556da402e1b84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a292dc8d7b75289c10f214b88e207634607af7f9b1dbafdccea7e7a34091c6b7140cb3461d73ee708e736b24b39d814fc2b302f2299cdc15a7006436163c7333
|
7
|
+
data.tar.gz: 4abd0eee884870f58c46f19d0c0fe030e6a79a9a3676eaa640c54a8d8fde48f41f3265a2e09be72456d69690acd985de15daad7911003aa00b34613ddd60a9dd
|
data/Gemfile
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
gem "bio-logger"
|
3
|
+
gem "bio-bigbio"
|
3
4
|
gem "bio", ">= 1.5.0" # for translation tables, BioRuby compat and Newick parser
|
4
5
|
|
5
6
|
# Add dependencies to develop your gem here.
|
@@ -7,6 +8,6 @@ gem "bio", ">= 1.5.0" # for translation tables, BioRuby compat and Newick p
|
|
7
8
|
group :development do
|
8
9
|
gem "rake"
|
9
10
|
gem "bio-bigbio" # for reading FASTA files in tests
|
10
|
-
gem "cucumber"
|
11
|
-
gem "rspec"
|
11
|
+
# gem "cucumber" - disabled tests
|
12
|
+
# gem "rspec"
|
12
13
|
end
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -36,6 +36,13 @@ Bio::BioAlignment
|
|
36
36
|
document](https://github.com/pjotrp/bioruby-alignment/blob/master/doc/bio-alignment-design.md)
|
37
37
|
for Ruby.
|
38
38
|
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
Dependencies are bioruby, bio-bigbio and bio-logger gems. To run the
|
42
|
+
tests you also need to install rspec and cucumber gems.
|
43
|
+
|
44
|
+
Bundler is no longer used.
|
45
|
+
|
39
46
|
## Command line
|
40
47
|
|
41
48
|
bio-alignment comes with a command line interface (CLI), which can apply a number
|
@@ -82,7 +89,7 @@ aligmment (note codon gaps are represented by '---')
|
|
82
89
|
|
83
90
|
include Bio::BioAlignment
|
84
91
|
aln = Alignment.new
|
85
|
-
|
92
|
+
fasta = FastaReader.new('codon-alignment.fa')
|
86
93
|
fasta.each do | rec |
|
87
94
|
aln << CodonSequence.new(rec.id, rec.seq)
|
88
95
|
end
|
@@ -171,23 +178,9 @@ Enumerable).
|
|
171
178
|
|
172
179
|
### Pal2nal
|
173
180
|
|
174
|
-
A protein (amino acid) to nucleotide alignment would first load
|
175
|
-
|
176
|
-
|
177
|
-
```ruby
|
178
|
-
aln1 = Alignment.new
|
179
|
-
fasta1 = FastaWriter.new('aa-aln.fa')
|
180
|
-
aln1.rows.each do | row |
|
181
|
-
fasta1.write(row.id, row.to_aa.to_s)
|
182
|
-
end
|
183
|
-
aln2 = Alignment.new
|
184
|
-
fasta2 = FastaReader.new('nt.fa')
|
185
|
-
fasta2.each do | rec |
|
186
|
-
aln2 << Sequence.new(rec.id, rec.seq)
|
187
|
-
end
|
188
|
-
```
|
189
|
-
|
190
|
-
Writing a (simple) version of pal2nal would be something like
|
181
|
+
A protein (amino acid) to nucleotide alignment would first load the
|
182
|
+
sequences and align them. Writing a (simple) version of pal2nal would
|
183
|
+
be something like
|
191
184
|
|
192
185
|
```ruby
|
193
186
|
fasta3 = FastaWriter.new('nt-aln.fa')
|
@@ -210,14 +203,17 @@ Writing a (simple) version of pal2nal would be something like
|
|
210
203
|
end
|
211
204
|
```
|
212
205
|
|
213
|
-
With amino acid aa_aln and
|
214
|
-
version of pal2nal includes
|
206
|
+
We included a version (of course). With amino acid aa_aln and
|
207
|
+
nucleotide nt_aln loaded, the library version of pal2nal includes
|
208
|
+
validation
|
215
209
|
|
216
210
|
```ruby
|
217
211
|
aln = aa_aln.pal2nal(nt_aln, :codon_table => 3, :do_validate => true)
|
218
212
|
```
|
219
213
|
|
220
|
-
resulting in the codon alignment.
|
214
|
+
resulting in the codon alignment. A command line
|
215
|
+
[pal2nal](./bin/pal2nal) is also available on installing the gem.
|
216
|
+
|
221
217
|
|
222
218
|
### Phylogeny
|
223
219
|
|
@@ -368,4 +364,4 @@ This Biogem is published at [#bio-alignment](http://biogems.info/index.html)
|
|
368
364
|
|
369
365
|
## Copyright
|
370
366
|
|
371
|
-
Copyright (c) 2012-
|
367
|
+
Copyright (c) 2012-2017 Pjotr Prins. See LICENSE.txt for further details.
|
data/Rakefile
CHANGED
@@ -1,14 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
# require 'bundler'
|
5
|
-
# begin
|
6
|
-
# Bundler.setup(:default, :development)
|
7
|
-
# rescue Bundler::BundlerError => e
|
8
|
-
# $stderr.puts e.message
|
9
|
-
# $stderr.puts "Run `bundle install` to install missing gems"
|
10
|
-
# exit e.status_code
|
11
|
-
# end
|
12
4
|
require 'rake'
|
13
5
|
|
14
6
|
require 'rspec/core'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bin/pal2nal
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
|
4
|
+
USAGE = """
|
5
|
+
Simple Pal2Nal implementation
|
6
|
+
|
7
|
+
Pal2Nal converts a multiple sequence alignment of proteins and the
|
8
|
+
corresponding DNA (or mRNA) sequences into a codon-based DNA
|
9
|
+
alignment. This version implements a simple 1-to-1 matching with use
|
10
|
+
of a codon table and validation(!)
|
11
|
+
|
12
|
+
The resulting codon-based DNA alignment can further be subjected to
|
13
|
+
the calculation of synonymous (Ks) and non-synonymous (Ka)
|
14
|
+
substitution rates and be fed into PAML.
|
15
|
+
|
16
|
+
pep.aln Protein (AA) alignment
|
17
|
+
nuc.fasta Nucleotide sequences
|
18
|
+
|
19
|
+
Example:
|
20
|
+
|
21
|
+
./bin/pal2nal test/data/fasta/codon/aa-alignment.fa test/data/fasta/codon/nt.fa
|
22
|
+
"""
|
23
|
+
|
24
|
+
gempath = File.dirname(File.dirname(__FILE__))
|
25
|
+
$: << File.join(gempath,'lib')
|
26
|
+
|
27
|
+
VERSION_FILENAME=File.join(gempath,'VERSION')
|
28
|
+
version = File.new(VERSION_FILENAME).read.chomp
|
29
|
+
|
30
|
+
if ARGV.size == 0
|
31
|
+
print USAGE
|
32
|
+
end
|
33
|
+
|
34
|
+
require 'optparse'
|
35
|
+
require 'bio-alignment'
|
36
|
+
require 'bigbio'
|
37
|
+
|
38
|
+
include Bio::BioAlignment
|
39
|
+
|
40
|
+
options = {show_help: false, codon_table: 1, validate: true}
|
41
|
+
|
42
|
+
opts = OptionParser.new do |o|
|
43
|
+
o.banner = "Usage: #{File.basename($0)} pep.aln nuc.fasta [options]"
|
44
|
+
|
45
|
+
o.on("--codon-table [int]", Integer, "Codon table (default 1)") do |ct|
|
46
|
+
options[:codon_table] = ct
|
47
|
+
end
|
48
|
+
|
49
|
+
o.on("--no-validate", "Validate codons") do |b|
|
50
|
+
options[:validate] = false
|
51
|
+
end
|
52
|
+
|
53
|
+
o.on("-q", "--quiet", "Run quietly") do |q|
|
54
|
+
options[:quiet] = true
|
55
|
+
end
|
56
|
+
|
57
|
+
o.on("-v","--verbose", "Run verbosely") do |v|
|
58
|
+
options[:verbose] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
o.on("-d", "--debug", "Debug mode") do |v|
|
62
|
+
options[:debug] = true
|
63
|
+
end
|
64
|
+
|
65
|
+
o.separator ""
|
66
|
+
o.on_tail('-h', '--help', 'display this help and exit') do
|
67
|
+
options[:show_help] = true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
begin
|
72
|
+
opts.parse!(ARGV)
|
73
|
+
|
74
|
+
$stderr.print "Pal2Nal #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2017\n" if !options[:quiet]
|
75
|
+
|
76
|
+
if options[:show_help] or ARGV.size < 2
|
77
|
+
print opts
|
78
|
+
print USAGE
|
79
|
+
exit 1
|
80
|
+
end
|
81
|
+
|
82
|
+
$stderr.print "Options: ",options,"\n" if !options[:quiet]
|
83
|
+
|
84
|
+
rescue OptionParser::InvalidOption => e
|
85
|
+
options[:invalid_argument] = e.message
|
86
|
+
end
|
87
|
+
|
88
|
+
aafn = ARGV.shift
|
89
|
+
ntfn = ARGV.shift
|
90
|
+
|
91
|
+
aa_aln = Alignment.new
|
92
|
+
aa = FastaReader.new(aafn)
|
93
|
+
aa.each do | rec |
|
94
|
+
aa_aln << Sequence.new(rec.id, rec.seq)
|
95
|
+
end
|
96
|
+
nt_aln = Alignment.new
|
97
|
+
nt = FastaReader.new(ntfn)
|
98
|
+
nt.each do | rec |
|
99
|
+
nt_aln << Sequence.new(rec.id, rec.seq)
|
100
|
+
end
|
101
|
+
|
102
|
+
pal2nal = aa_aln.pal2nal(nt_aln, :codon_table => options[:codon_table], :do_validate => options[:validate])
|
103
|
+
|
104
|
+
LINELEN = 60
|
105
|
+
offset = 0
|
106
|
+
size = pal2nal.first.seq.size * 3
|
107
|
+
|
108
|
+
print "CLUSTAL W multiple sequence alignment\n"
|
109
|
+
while size > 0
|
110
|
+
print "\n"
|
111
|
+
pal2nal.each do | seq |
|
112
|
+
print seq.id," "*(18-seq.id.size)
|
113
|
+
print seq.to_nt[offset..offset+LINELEN-1],"\n"
|
114
|
+
end
|
115
|
+
offset += LINELEN
|
116
|
+
size -= LINELEN
|
117
|
+
end
|
118
|
+
print "\n"
|
metadata
CHANGED
@@ -1,19 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: pjotr.public01@thebird.nl
|
15
15
|
executables:
|
16
16
|
- bio-alignment
|
17
|
+
- pal2nal
|
17
18
|
extensions: []
|
18
19
|
extra_rdoc_files:
|
19
20
|
- LICENSE.txt
|
@@ -30,6 +31,7 @@ files:
|
|
30
31
|
- TODO
|
31
32
|
- VERSION
|
32
33
|
- bin/bio-alignment
|
34
|
+
- bin/pal2nal
|
33
35
|
- doc/bio-alignment-design.md
|
34
36
|
- features/bioruby-feature.rb
|
35
37
|
- features/bioruby.feature
|
@@ -109,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
111
|
version: '0'
|
110
112
|
requirements: []
|
111
113
|
rubyforge_project:
|
112
|
-
rubygems_version: 2.
|
114
|
+
rubygems_version: 2.6.8
|
113
115
|
signing_key:
|
114
116
|
specification_version: 4
|
115
117
|
summary: Support for multiple sequence alignments (MSA)
|