bio-alignment 0.0.1.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +20 -0
- data/README.md +37 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/bin/bio-alignment +76 -0
- data/doc/bio-alignment-design.md +41 -0
- data/lib/bio-alignment.rb +5 -0
- data/spec/bio-alignment_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- metadata +109 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "rspec", "~> 2.3.0"
|
10
|
+
gem "bundler", "~> 1.0.0"
|
11
|
+
gem "jeweler", "~> 1.7.0"
|
12
|
+
gem "bio", ">= 1.4.2"
|
13
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Pjotr Prins
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# bio-alignment
|
2
|
+
|
3
|
+
Alignment handler for multiple sequence alignments (MSA).
|
4
|
+
|
5
|
+
This alignment handler makes no assumptions about the underlying
|
6
|
+
sequence object. Support for any nucleotide, amino acid and codon
|
7
|
+
sequences that are lists. Any list with payload can be used (e.g.
|
8
|
+
nucleotide quality score, codon annotation). The only requirement is
|
9
|
+
that the list is iterable and can be indexed.
|
10
|
+
|
11
|
+
This work is based on Pjotr's experience designing the BioScala
|
12
|
+
Alignment handler and BioRuby's PAML support. See also the
|
13
|
+
[design document](https://github.com/pjotrp/bioruby-alignment/blob/master/doc/bio-alignment-design.md)
|
14
|
+
|
15
|
+
Note: this software is under active development.
|
16
|
+
|
17
|
+
## Developers
|
18
|
+
|
19
|
+
To use the library
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
require 'bio-alignment'
|
23
|
+
```
|
24
|
+
|
25
|
+
The API doc is online. For more code examples see ./spec/*.rb
|
26
|
+
|
27
|
+
## Cite
|
28
|
+
|
29
|
+
If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
30
|
+
|
31
|
+
## Copyright
|
32
|
+
|
33
|
+
Copyright (c) 2012 Pjotr Prins. See LICENSE.txt for further details.
|
34
|
+
|
35
|
+
## Biogems.info
|
36
|
+
|
37
|
+
This exciting Ruby Biogem is published on http://biogems.info/
|
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-alignment"
|
18
|
+
gem.homepage = "http://github.com/pjotrp/bioruby-alignment"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Multiple sequence alignments (MSA)}
|
21
|
+
gem.description = %Q{Alignment handler for multiple sequence alignments. Support for any nucleotide, amino acid and codon sequences that are lists. I.e. any list with payload can be used, as long as it can be indexed}
|
22
|
+
gem.email = "pjotr.public01@thebird.nl"
|
23
|
+
gem.authors = ["Pjotr Prins"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
40
|
+
|
41
|
+
require 'rdoc/task'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
+
|
45
|
+
rdoc.rdoc_dir = 'rdoc'
|
46
|
+
rdoc.title = "bio-alignment #{version}"
|
47
|
+
rdoc.rdoc_files.include('README*')
|
48
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1.alpha
|
data/bin/bio-alignment
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# BioRuby bio-alignment Plugin
|
4
|
+
# Version 0.0.0
|
5
|
+
# Author:: Pjotr Prins
|
6
|
+
# Copyright:: 2012
|
7
|
+
# License:: The Ruby License
|
8
|
+
|
9
|
+
USAGE = "Describe bio-alignment"
|
10
|
+
|
11
|
+
if ARGV.size == 0
|
12
|
+
print USAGE
|
13
|
+
end
|
14
|
+
|
15
|
+
require 'bio-alignment'
|
16
|
+
require 'optparse'
|
17
|
+
|
18
|
+
# Uncomment when using the bio-logger
|
19
|
+
# require 'bio-logger'
|
20
|
+
# Bio::Log::CLI.logger('stderr')
|
21
|
+
# Bio::Log::CLI.trace('info')
|
22
|
+
|
23
|
+
options = {:example_switch=>false,:show_help=>false}
|
24
|
+
opts = OptionParser.new do |o|
|
25
|
+
o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
|
26
|
+
|
27
|
+
o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
|
28
|
+
# TODO: your logic here, below an example
|
29
|
+
options[:example_parameter] = 'this is a parameter'
|
30
|
+
end
|
31
|
+
|
32
|
+
o.separator ""
|
33
|
+
o.on("--switch-example", 'TODO: put a description for the SWITCH') do
|
34
|
+
# TODO: your logic here, below an example
|
35
|
+
self[:example_switch] = true
|
36
|
+
end
|
37
|
+
|
38
|
+
# Uncomment the following when using the bio-logger
|
39
|
+
# o.separator ""
|
40
|
+
# o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
41
|
+
# Bio::Log::CLI.logger(name)
|
42
|
+
# end
|
43
|
+
#
|
44
|
+
# o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
45
|
+
# Bio::Log::CLI.trace(s)
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# o.on("-q", "--quiet", "Run quietly") do |q|
|
49
|
+
# Bio::Log::CLI.trace('error')
|
50
|
+
# end
|
51
|
+
#
|
52
|
+
# o.on("-v", "--verbose", "Run verbosely") do |v|
|
53
|
+
# Bio::Log::CLI.trace('info')
|
54
|
+
# end
|
55
|
+
#
|
56
|
+
# o.on("--debug", "Show debug messages") do |v|
|
57
|
+
# Bio::Log::CLI.trace('debug')
|
58
|
+
# end
|
59
|
+
|
60
|
+
o.separator ""
|
61
|
+
o.on_tail('-h', '--help', 'display this help and exit') do
|
62
|
+
options[:show_help] = true
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
begin
|
67
|
+
opts.parse!(ARGV)
|
68
|
+
|
69
|
+
# Uncomment the following when using the bio-logger
|
70
|
+
# Bio::Log::CLI.configure('bio-alignment')
|
71
|
+
|
72
|
+
# TODO: your code here
|
73
|
+
# use options for your logic
|
74
|
+
rescue OptionParser::InvalidOption => e
|
75
|
+
options[:invalid_argument] = e.message
|
76
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Bio-alignment design
|
2
|
+
|
3
|
+
## Introduction
|
4
|
+
|
5
|
+
Biological multi-sequence alignments (MSA) are normally matrices of
|
6
|
+
nucleotide or amino acid sequences, with gaps. Despite this rather
|
7
|
+
simple premise, most software fails make it simple to access these
|
8
|
+
structures. Also most implementations fail to support a 'pay load' of
|
9
|
+
items in the matrix (mostly because underlying sequences are String
|
10
|
+
based). This means a developer has to track information in multiple
|
11
|
+
places, for example a base pair quality score. This makes code complex
|
12
|
+
and therefore error prone. With bio-alignment elements of the matrix
|
13
|
+
can carry information. This means that when the alignment gets edited,
|
14
|
+
the element moves, and the information migrates along. For example,
|
15
|
+
say we have a nucleotide sequence with pay load
|
16
|
+
|
17
|
+
A G T A
|
18
|
+
| | | |
|
19
|
+
5 9 * 1
|
20
|
+
|
21
|
+
most library implementations will have two strings "AGTA" and "59*1".
|
22
|
+
Removing the third nucleodide would mean removing it twice, first
|
23
|
+
"AGA", next "591". With bio-alignment this is one action because we
|
24
|
+
have one object for each element, that contains both values, e.g. the
|
25
|
+
payload of T is *. Removing T from the list also removes *.
|
26
|
+
|
27
|
+
In addition bio-alignment deals with codons and codon translation.
|
28
|
+
Rather than track mulitiple matrices, the codon is viewed as an element,
|
29
|
+
and the translated codon as the pay load. When an alignment gets
|
30
|
+
reordered the code only has to do it in one place.
|
31
|
+
|
32
|
+
Likewise, an alignment column can have a pay load (e.g. quality score
|
33
|
+
in a pile up), and an alignment row can have a pay load (e.g. the
|
34
|
+
sequence name). The concept of pay load is handled through generic
|
35
|
+
matrix element, column, or row 'attributes'.
|
36
|
+
|
37
|
+
Many of these ideas came from my work on the [BioScala
|
38
|
+
project](https://github.com/pjotrp/bioscala/blob/master/doc/design.txt),
|
39
|
+
The BioScala library has the advantage of type safety throughout.
|
40
|
+
|
41
|
+
Copyright (C) 2012 Pjotr Prins <pjotr.prins@thebird.nl>
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'bio-alignment'
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-alignment
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.alpha
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Pjotr Prins
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-25 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &18984620 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.3.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *18984620
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: bundler
|
27
|
+
requirement: &18984040 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.0.0
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *18984040
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: jeweler
|
38
|
+
requirement: &18983460 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.7.0
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *18983460
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bio
|
49
|
+
requirement: &18982860 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.4.2
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *18982860
|
58
|
+
description: Alignment handler for multiple sequence alignments. Support for any nucleotide,
|
59
|
+
amino acid and codon sequences that are lists. I.e. any list with payload can be
|
60
|
+
used, as long as it can be indexed
|
61
|
+
email: pjotr.public01@thebird.nl
|
62
|
+
executables:
|
63
|
+
- bio-alignment
|
64
|
+
extensions: []
|
65
|
+
extra_rdoc_files:
|
66
|
+
- LICENSE.txt
|
67
|
+
- README.md
|
68
|
+
files:
|
69
|
+
- .document
|
70
|
+
- .rspec
|
71
|
+
- Gemfile
|
72
|
+
- LICENSE.txt
|
73
|
+
- README.md
|
74
|
+
- Rakefile
|
75
|
+
- VERSION
|
76
|
+
- bin/bio-alignment
|
77
|
+
- doc/bio-alignment-design.md
|
78
|
+
- lib/bio-alignment.rb
|
79
|
+
- spec/bio-alignment_spec.rb
|
80
|
+
- spec/spec_helper.rb
|
81
|
+
homepage: http://github.com/pjotrp/bioruby-alignment
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
segments:
|
95
|
+
- 0
|
96
|
+
hash: 1744402862562390387
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ! '>'
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 1.3.1
|
103
|
+
requirements: []
|
104
|
+
rubyforge_project:
|
105
|
+
rubygems_version: 1.8.10
|
106
|
+
signing_key:
|
107
|
+
specification_version: 3
|
108
|
+
summary: Multiple sequence alignments (MSA)
|
109
|
+
test_files: []
|