bio-alignment 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +18 -22
- data/Rakefile +0 -8
- data/VERSION +1 -1
- data/bin/pal2nal +118 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d0b2b23668eee191b0d1df61459aaa06cc6c6c6
|
4
|
+
data.tar.gz: 3a8ed0466b5351b791d6bfd40f3556da402e1b84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a292dc8d7b75289c10f214b88e207634607af7f9b1dbafdccea7e7a34091c6b7140cb3461d73ee708e736b24b39d814fc2b302f2299cdc15a7006436163c7333
|
7
|
+
data.tar.gz: 4abd0eee884870f58c46f19d0c0fe030e6a79a9a3676eaa640c54a8d8fde48f41f3265a2e09be72456d69690acd985de15daad7911003aa00b34613ddd60a9dd
|
data/Gemfile
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
gem "bio-logger"
|
3
|
+
gem "bio-bigbio"
|
3
4
|
gem "bio", ">= 1.5.0" # for translation tables, BioRuby compat and Newick parser
|
4
5
|
|
5
6
|
# Add dependencies to develop your gem here.
|
@@ -7,6 +8,6 @@ gem "bio", ">= 1.5.0" # for translation tables, BioRuby compat and Newick p
|
|
7
8
|
group :development do
|
8
9
|
gem "rake"
|
9
10
|
gem "bio-bigbio" # for reading FASTA files in tests
|
10
|
-
gem "cucumber"
|
11
|
-
gem "rspec"
|
11
|
+
# gem "cucumber" - disabled tests
|
12
|
+
# gem "rspec"
|
12
13
|
end
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -36,6 +36,13 @@ Bio::BioAlignment
|
|
36
36
|
document](https://github.com/pjotrp/bioruby-alignment/blob/master/doc/bio-alignment-design.md)
|
37
37
|
for Ruby.
|
38
38
|
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
Dependencies are bioruby, bio-bigbio and bio-logger gems. To run the
|
42
|
+
tests you also need to install rspec and cucumber gems.
|
43
|
+
|
44
|
+
Bundler is no longer used.
|
45
|
+
|
39
46
|
## Command line
|
40
47
|
|
41
48
|
bio-alignment comes with a command line interface (CLI), which can apply a number
|
@@ -82,7 +89,7 @@ aligmment (note codon gaps are represented by '---')
|
|
82
89
|
|
83
90
|
include Bio::BioAlignment
|
84
91
|
aln = Alignment.new
|
85
|
-
|
92
|
+
fasta = FastaReader.new('codon-alignment.fa')
|
86
93
|
fasta.each do | rec |
|
87
94
|
aln << CodonSequence.new(rec.id, rec.seq)
|
88
95
|
end
|
@@ -171,23 +178,9 @@ Enumerable).
|
|
171
178
|
|
172
179
|
### Pal2nal
|
173
180
|
|
174
|
-
A protein (amino acid) to nucleotide alignment would first load
|
175
|
-
|
176
|
-
|
177
|
-
```ruby
|
178
|
-
aln1 = Alignment.new
|
179
|
-
fasta1 = FastaWriter.new('aa-aln.fa')
|
180
|
-
aln1.rows.each do | row |
|
181
|
-
fasta1.write(row.id, row.to_aa.to_s)
|
182
|
-
end
|
183
|
-
aln2 = Alignment.new
|
184
|
-
fasta2 = FastaReader.new('nt.fa')
|
185
|
-
fasta2.each do | rec |
|
186
|
-
aln2 << Sequence.new(rec.id, rec.seq)
|
187
|
-
end
|
188
|
-
```
|
189
|
-
|
190
|
-
Writing a (simple) version of pal2nal would be something like
|
181
|
+
A protein (amino acid) to nucleotide alignment would first load the
|
182
|
+
sequences and align them. Writing a (simple) version of pal2nal would
|
183
|
+
be something like
|
191
184
|
|
192
185
|
```ruby
|
193
186
|
fasta3 = FastaWriter.new('nt-aln.fa')
|
@@ -210,14 +203,17 @@ Writing a (simple) version of pal2nal would be something like
|
|
210
203
|
end
|
211
204
|
```
|
212
205
|
|
213
|
-
With amino acid aa_aln and
|
214
|
-
version of pal2nal includes
|
206
|
+
We included a version (of course). With amino acid aa_aln and
|
207
|
+
nucleotide nt_aln loaded, the library version of pal2nal includes
|
208
|
+
validation
|
215
209
|
|
216
210
|
```ruby
|
217
211
|
aln = aa_aln.pal2nal(nt_aln, :codon_table => 3, :do_validate => true)
|
218
212
|
```
|
219
213
|
|
220
|
-
resulting in the codon alignment.
|
214
|
+
resulting in the codon alignment. A command line
|
215
|
+
[pal2nal](./bin/pal2nal) is also available on installing the gem.
|
216
|
+
|
221
217
|
|
222
218
|
### Phylogeny
|
223
219
|
|
@@ -368,4 +364,4 @@ This Biogem is published at [#bio-alignment](http://biogems.info/index.html)
|
|
368
364
|
|
369
365
|
## Copyright
|
370
366
|
|
371
|
-
Copyright (c) 2012-
|
367
|
+
Copyright (c) 2012-2017 Pjotr Prins. See LICENSE.txt for further details.
|
data/Rakefile
CHANGED
@@ -1,14 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
# require 'bundler'
|
5
|
-
# begin
|
6
|
-
# Bundler.setup(:default, :development)
|
7
|
-
# rescue Bundler::BundlerError => e
|
8
|
-
# $stderr.puts e.message
|
9
|
-
# $stderr.puts "Run `bundle install` to install missing gems"
|
10
|
-
# exit e.status_code
|
11
|
-
# end
|
12
4
|
require 'rake'
|
13
5
|
|
14
6
|
require 'rspec/core'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bin/pal2nal
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
|
4
|
+
USAGE = """
|
5
|
+
Simple Pal2Nal implementation
|
6
|
+
|
7
|
+
Pal2Nal converts a multiple sequence alignment of proteins and the
|
8
|
+
corresponding DNA (or mRNA) sequences into a codon-based DNA
|
9
|
+
alignment. This version implements a simple 1-to-1 matching with use
|
10
|
+
of a codon table and validation(!)
|
11
|
+
|
12
|
+
The resulting codon-based DNA alignment can further be subjected to
|
13
|
+
the calculation of synonymous (Ks) and non-synonymous (Ka)
|
14
|
+
substitution rates and be fed into PAML.
|
15
|
+
|
16
|
+
pep.aln Protein (AA) alignment
|
17
|
+
nuc.fasta Nucleotide sequences
|
18
|
+
|
19
|
+
Example:
|
20
|
+
|
21
|
+
./bin/pal2nal test/data/fasta/codon/aa-alignment.fa test/data/fasta/codon/nt.fa
|
22
|
+
"""
|
23
|
+
|
24
|
+
gempath = File.dirname(File.dirname(__FILE__))
|
25
|
+
$: << File.join(gempath,'lib')
|
26
|
+
|
27
|
+
VERSION_FILENAME=File.join(gempath,'VERSION')
|
28
|
+
version = File.new(VERSION_FILENAME).read.chomp
|
29
|
+
|
30
|
+
if ARGV.size == 0
|
31
|
+
print USAGE
|
32
|
+
end
|
33
|
+
|
34
|
+
require 'optparse'
|
35
|
+
require 'bio-alignment'
|
36
|
+
require 'bigbio'
|
37
|
+
|
38
|
+
include Bio::BioAlignment
|
39
|
+
|
40
|
+
options = {show_help: false, codon_table: 1, validate: true}
|
41
|
+
|
42
|
+
opts = OptionParser.new do |o|
|
43
|
+
o.banner = "Usage: #{File.basename($0)} pep.aln nuc.fasta [options]"
|
44
|
+
|
45
|
+
o.on("--codon-table [int]", Integer, "Codon table (default 1)") do |ct|
|
46
|
+
options[:codon_table] = ct
|
47
|
+
end
|
48
|
+
|
49
|
+
o.on("--no-validate", "Validate codons") do |b|
|
50
|
+
options[:validate] = false
|
51
|
+
end
|
52
|
+
|
53
|
+
o.on("-q", "--quiet", "Run quietly") do |q|
|
54
|
+
options[:quiet] = true
|
55
|
+
end
|
56
|
+
|
57
|
+
o.on("-v","--verbose", "Run verbosely") do |v|
|
58
|
+
options[:verbose] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
o.on("-d", "--debug", "Debug mode") do |v|
|
62
|
+
options[:debug] = true
|
63
|
+
end
|
64
|
+
|
65
|
+
o.separator ""
|
66
|
+
o.on_tail('-h', '--help', 'display this help and exit') do
|
67
|
+
options[:show_help] = true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
begin
|
72
|
+
opts.parse!(ARGV)
|
73
|
+
|
74
|
+
$stderr.print "Pal2Nal #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2017\n" if !options[:quiet]
|
75
|
+
|
76
|
+
if options[:show_help] or ARGV.size < 2
|
77
|
+
print opts
|
78
|
+
print USAGE
|
79
|
+
exit 1
|
80
|
+
end
|
81
|
+
|
82
|
+
$stderr.print "Options: ",options,"\n" if !options[:quiet]
|
83
|
+
|
84
|
+
rescue OptionParser::InvalidOption => e
|
85
|
+
options[:invalid_argument] = e.message
|
86
|
+
end
|
87
|
+
|
88
|
+
aafn = ARGV.shift
|
89
|
+
ntfn = ARGV.shift
|
90
|
+
|
91
|
+
aa_aln = Alignment.new
|
92
|
+
aa = FastaReader.new(aafn)
|
93
|
+
aa.each do | rec |
|
94
|
+
aa_aln << Sequence.new(rec.id, rec.seq)
|
95
|
+
end
|
96
|
+
nt_aln = Alignment.new
|
97
|
+
nt = FastaReader.new(ntfn)
|
98
|
+
nt.each do | rec |
|
99
|
+
nt_aln << Sequence.new(rec.id, rec.seq)
|
100
|
+
end
|
101
|
+
|
102
|
+
pal2nal = aa_aln.pal2nal(nt_aln, :codon_table => options[:codon_table], :do_validate => options[:validate])
|
103
|
+
|
104
|
+
LINELEN = 60
|
105
|
+
offset = 0
|
106
|
+
size = pal2nal.first.seq.size * 3
|
107
|
+
|
108
|
+
print "CLUSTAL W multiple sequence alignment\n"
|
109
|
+
while size > 0
|
110
|
+
print "\n"
|
111
|
+
pal2nal.each do | seq |
|
112
|
+
print seq.id," "*(18-seq.id.size)
|
113
|
+
print seq.to_nt[offset..offset+LINELEN-1],"\n"
|
114
|
+
end
|
115
|
+
offset += LINELEN
|
116
|
+
size -= LINELEN
|
117
|
+
end
|
118
|
+
print "\n"
|
metadata
CHANGED
@@ -1,19 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: pjotr.public01@thebird.nl
|
15
15
|
executables:
|
16
16
|
- bio-alignment
|
17
|
+
- pal2nal
|
17
18
|
extensions: []
|
18
19
|
extra_rdoc_files:
|
19
20
|
- LICENSE.txt
|
@@ -30,6 +31,7 @@ files:
|
|
30
31
|
- TODO
|
31
32
|
- VERSION
|
32
33
|
- bin/bio-alignment
|
34
|
+
- bin/pal2nal
|
33
35
|
- doc/bio-alignment-design.md
|
34
36
|
- features/bioruby-feature.rb
|
35
37
|
- features/bioruby.feature
|
@@ -109,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
111
|
version: '0'
|
110
112
|
requirements: []
|
111
113
|
rubyforge_project:
|
112
|
-
rubygems_version: 2.
|
114
|
+
rubygems_version: 2.6.8
|
113
115
|
signing_key:
|
114
116
|
specification_version: 4
|
115
117
|
summary: Support for multiple sequence alignments (MSA)
|