needleman_wunsch_aligner 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +24 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +96 -0
- data/Rakefile +14 -0
- data/lib/needleman_wunsch_aligner/example_paragraph_and_sentence_aligner.rb +49 -0
- data/lib/needleman_wunsch_aligner/version.rb +3 -0
- data/lib/needleman_wunsch_aligner.rb +173 -0
- data/needleman_wunsch_aligner.gemspec +22 -0
- data/spec/needleman_wunsch_aligner/example_paragraph_and_sentence_aligner_spec.rb +162 -0
- data/spec/needleman_wunsch_aligner_spec.rb +93 -0
- data/spec/spec_helper.rb +6 -0
- metadata +87 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 525f3aee90fc6e9de7a8de268cfcd7300836b5e7
|
4
|
+
data.tar.gz: 561e1b71b51e6c45cbaf0e6da72bbe9341a9adbb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b87838a2c2bf5460090d68ea23494aa2a714556b51f558ea17a9ad708612181f0a71ae96b56025a6e1ed5af96512ed25fce8a694f1cb3476876fad85c8d82f88
|
7
|
+
data.tar.gz: 2579acb036d6b4fba976fc3705a1cbc129ef9fc68a30458b699d37ac2909f0607b8a6e65ecde3aab4217816dc556d44e56b342b0b3e54551ac545f4993f1b43c
|
data/.gitignore
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.ruby-gemset
|
6
|
+
.ruby-version
|
7
|
+
.yardoc
|
8
|
+
Gemfile.lock
|
9
|
+
InstalledFiles
|
10
|
+
_yardoc
|
11
|
+
coverage
|
12
|
+
doc/
|
13
|
+
lib/bundler/man
|
14
|
+
pkg
|
15
|
+
rdoc
|
16
|
+
spec/reports
|
17
|
+
test/tmp
|
18
|
+
test/version_tmp
|
19
|
+
tmp
|
20
|
+
*.bundle
|
21
|
+
*.so
|
22
|
+
*.o
|
23
|
+
*.a
|
24
|
+
mkmf.log
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Jo Hund
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# NeedlemanWunschAligner
|
2
|
+
|
3
|
+
This gem finds the optimal alignment of two sequences of any kind of Ruby Objects. You can implement sophisticated scoring functions, using any of the Objects’ attributes.
|
4
|
+
|
5
|
+
The [Needleman-Wunsch algorithm](https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm) is typically used in bioinformatics to align protein or nucleotide sequences, however it works really well for any kind of sequence. I have used this gem to align paragraphs and sentences of pairs of bilingual texts.
|
6
|
+
|
7
|
+
Given two sequences
|
8
|
+
|
9
|
+
seq1 = 'GCATGCU'
|
10
|
+
seq2 = 'GATTACA'
|
11
|
+
|
12
|
+
The algorithm will find the optimal alignment based on a scoring function you specify:
|
13
|
+
|
14
|
+
GCATG-CU
|
15
|
+
=+==!-=!
|
16
|
+
G-ATTACA
|
17
|
+
|
18
|
+
Meaning of the symbols:
|
19
|
+
|
20
|
+
= Match
|
21
|
+
! Mismatch
|
22
|
+
+ Insert
|
23
|
+
- Deletion
|
24
|
+
|
25
|
+
Insert and Deletion are usually grouped together as `IndDel`.
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
|
29
|
+
Add this line to your application's Gemfile:
|
30
|
+
|
31
|
+
gem 'needleman_wunsch_aligner'
|
32
|
+
|
33
|
+
And then execute:
|
34
|
+
|
35
|
+
$ bundle
|
36
|
+
|
37
|
+
Or install it yourself as:
|
38
|
+
|
39
|
+
$ gem install needleman_wunsch_aligner
|
40
|
+
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
Instantiate a new aligner with the two sequences and compute the optimal alignment:
|
44
|
+
|
45
|
+
require 'needleman_wunsch_aligner'
|
46
|
+
|
47
|
+
aligner = NeedlemanWunschAligner.new([1,2,3], [2,3,4])
|
48
|
+
aligner.get_optimal_alignment
|
49
|
+
# => [[1, 2, 3, nil], [nil, 2, 3, 4]]
|
50
|
+
|
51
|
+
Inspect the alignment:
|
52
|
+
|
53
|
+
aligner.print_alignment
|
54
|
+
|
55
|
+
# => 1 | nil
|
56
|
+
2 | 2
|
57
|
+
3 | 3
|
58
|
+
nil | 4
|
59
|
+
|
60
|
+
Inspect the score table:
|
61
|
+
|
62
|
+
aligner.print_as_table(:score)
|
63
|
+
# => 2 3 4
|
64
|
+
0 -1 -2 -3
|
65
|
+
1 -1 -2 -3 -4
|
66
|
+
2 -2 0 -1 -2
|
67
|
+
3 -3 -1 1 0
|
68
|
+
|
69
|
+
Inspect the traceback table:
|
70
|
+
|
71
|
+
aligner.print_as_table(:traceback)
|
72
|
+
# => 2 3 4
|
73
|
+
x ← ← ←
|
74
|
+
1 ↑ ↑ ↑ ↑
|
75
|
+
2 ↑ ⬉ ← ←
|
76
|
+
3 ↑ ↑ ⬉ ←
|
77
|
+
|
78
|
+
## Customization
|
79
|
+
|
80
|
+
The gem comes with a very basic scoring function. You can implement much more
|
81
|
+
sophisticated ones by subclassing the `NeedlemanWunschAligner` class and overriding the following instance methods:
|
82
|
+
|
83
|
+
* `compute_score`
|
84
|
+
* `default_gap_penalty`
|
85
|
+
* `gap_indicator`
|
86
|
+
|
87
|
+
Please see `NeedlemanWunschAligner::ExampleParagraphAndSentenceAligner` for an
|
88
|
+
example.
|
89
|
+
|
90
|
+
## Contributing
|
91
|
+
|
92
|
+
1. Fork it ( https://github.com/jhund/needleman_wunsch_aligner/fork )
|
93
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
94
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
95
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
96
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
Bundler::GemHelper.install_tasks
|
8
|
+
|
9
|
+
require 'rake/testtask'
|
10
|
+
Rake::TestTask.new do |test|
|
11
|
+
test.libs << 'spec'
|
12
|
+
test.pattern = 'spec/**/*_spec.rb'
|
13
|
+
test.verbose = true
|
14
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class NeedlemanWunschAligner
|
2
|
+
class ExampleParagraphAndSentenceAligner < NeedlemanWunschAligner
|
3
|
+
|
4
|
+
# Get score for alignment pair of paragraphs and sentences. Aligner prioritizes
|
5
|
+
# alignment of paragraphs over that of sentences.
|
6
|
+
#
|
7
|
+
# p/1 p/2 p/nil s/a s/b s/nil
|
8
|
+
# p/1 25 -25 -25 -250 -250 -250
|
9
|
+
# p/2 25 -25 -250 -250 -250
|
10
|
+
# p/nil 25 -250 -250 -250
|
11
|
+
# s/a 10 -10 -10
|
12
|
+
# s/b 10 -10
|
13
|
+
# s/nil 10
|
14
|
+
#
|
15
|
+
# param left_el [Hash]
|
16
|
+
# param top_el [Hash]
|
17
|
+
# return [Integer]
|
18
|
+
def compute_score(left_el, top_el)
|
19
|
+
score = 0
|
20
|
+
if left_el[:type] == top_el[:type]
|
21
|
+
# Match on type (paragraph vs. sentence)
|
22
|
+
case left_el[:type]
|
23
|
+
when :paragraph
|
24
|
+
score += left_el[:id] == top_el[:id] ? 25 : -25
|
25
|
+
when :sentence
|
26
|
+
score += left_el[:id] == top_el[:id] ? 10 : -10
|
27
|
+
else
|
28
|
+
raise "Handle this: #{ [left_el, top_el].inspect }"
|
29
|
+
end
|
30
|
+
elsif [left_el, top_el].any? { |e| :paragraph == e[:type] }
|
31
|
+
# Difference in type, one is :paragraph. This is more significant
|
32
|
+
# than sentences.
|
33
|
+
score += -250
|
34
|
+
else
|
35
|
+
raise "Handle this: #{ [left_el, top_el].inspect }"
|
36
|
+
end
|
37
|
+
score
|
38
|
+
end
|
39
|
+
|
40
|
+
def default_gap_penalty
|
41
|
+
-10
|
42
|
+
end
|
43
|
+
|
44
|
+
def gap_indicator
|
45
|
+
{ type: :gap }
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
require "needleman_wunsch_aligner/version"
|
2
|
+
|
3
|
+
# Finds the optimal alignment of two sequences using the Needleman-Wunsch algorithm.
|
4
|
+
# This basic implementation works with any Ruby object and just looks at object
|
5
|
+
# identity for the scoring algorithm.
|
6
|
+
#
|
7
|
+
# See ExampleParagraphAndSentenceAligner for an example of a more sophisticated
|
8
|
+
# scoring algorithm.
|
9
|
+
class NeedlemanWunschAligner
|
10
|
+
|
11
|
+
# @param left_seq [Array<Object>] sequence drawn at left of matrix
|
12
|
+
# @param top_seq [Array<Object>] sequence drawn at top of matrix
|
13
|
+
def initialize(left_seq, top_seq)
|
14
|
+
@left_seq = left_seq
|
15
|
+
@top_seq = top_seq
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns two arrays that represent the optimal alignment.
|
19
|
+
def get_optimal_alignment
|
20
|
+
construct_score_matrix_and_traceback_matrix
|
21
|
+
compute_optimal_alignment
|
22
|
+
end
|
23
|
+
|
24
|
+
# This is a basic implementation of the scoring algorithm. See
|
25
|
+
# ExampleParagraphAndSentenceAligner for a more complex scoring function.
|
26
|
+
# @param left_el [Object]
|
27
|
+
# @param top_el [Object]
|
28
|
+
# @return [Numeric]
|
29
|
+
def compute_score(left_el, top_el)
|
30
|
+
left_el == top_el ? 1 : -3
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns the default penalty for a gap.
|
34
|
+
# @return [Numeric]
|
35
|
+
def default_gap_penalty
|
36
|
+
-1
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns a sequence element to indicate a gap. Needs to be compatible with
|
40
|
+
# other sequence elements and your scoring function.
|
41
|
+
# @return [Object]
|
42
|
+
def gap_indicator
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
|
46
|
+
# Prints the optimal alignment.
|
47
|
+
# @param col_width [Integer, optional] max width of each col in chars
|
48
|
+
def print_alignment(col_width = 20)
|
49
|
+
aligned_left_seq, aligned_top_seq = get_optimal_alignment
|
50
|
+
puts
|
51
|
+
aligned_left_seq.each_with_index do |ls_el, idx|
|
52
|
+
rs_el = aligned_top_seq[idx]
|
53
|
+
puts [
|
54
|
+
ls_el.inspect[0..col_width].rjust(col_width),
|
55
|
+
rs_el.inspect[0..col_width].ljust(col_width),
|
56
|
+
].join(' | ')
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Prints either the score or the traceback matrix as table.
|
61
|
+
# @param which_matrix [Symbol] one of :traceback or :score
|
62
|
+
# @param col_width [Integer, optional], defaults to 3
|
63
|
+
def print_as_table(which_matrix, col_width = 3)
|
64
|
+
get_optimal_alignment if @score_matrix.nil?
|
65
|
+
the_matrix = case which_matrix
|
66
|
+
when :traceback
|
67
|
+
@traceback_matrix
|
68
|
+
when :score
|
69
|
+
@score_matrix
|
70
|
+
else
|
71
|
+
raise "Handle this: #{ which_matrix.inspect }"
|
72
|
+
end
|
73
|
+
|
74
|
+
puts
|
75
|
+
puts 'left_seq = ' + @left_seq.join
|
76
|
+
puts 'top_seq = ' + @top_seq.join
|
77
|
+
puts
|
78
|
+
print ' ' * 2 * col_width
|
79
|
+
|
80
|
+
# Print header row
|
81
|
+
@top_seq.each_index { |e| print(@top_seq[e].to_s.rjust(col_width)) }
|
82
|
+
|
83
|
+
puts ''
|
84
|
+
traverse_score_matrix do |row, col|
|
85
|
+
if 0 == col and 0 == row
|
86
|
+
# first column in first row
|
87
|
+
print ' '.rjust(col_width)
|
88
|
+
elsif 0 == col
|
89
|
+
# first col in subsequent rows
|
90
|
+
print @left_seq[row - 1].to_s.rjust(col_width)
|
91
|
+
end
|
92
|
+
print the_matrix[row][col].to_s.rjust(col_width)
|
93
|
+
puts '' if col == the_matrix[row].length - 1
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
protected
|
98
|
+
|
99
|
+
def construct_score_matrix_and_traceback_matrix
|
100
|
+
initialize_score_matrix_and_traceback_matrix
|
101
|
+
traverse_score_matrix do |row, col|
|
102
|
+
if 0 == row && 0 == col # top left cell
|
103
|
+
@score_matrix[0][0] = 0
|
104
|
+
@traceback_matrix[0][0] = 'x'
|
105
|
+
elsif 0 == row # first row
|
106
|
+
@score_matrix[0][col] = col * default_gap_penalty
|
107
|
+
@traceback_matrix[0][col] = '←'
|
108
|
+
elsif 0 == col # first col
|
109
|
+
@score_matrix[row][0] = row * default_gap_penalty
|
110
|
+
@traceback_matrix[row][0] = '↑'
|
111
|
+
else # other cells
|
112
|
+
# compute scores
|
113
|
+
from_top = @score_matrix[row-1][col] + default_gap_penalty
|
114
|
+
from_left = @score_matrix[row][col-1] + default_gap_penalty
|
115
|
+
# @left_seq and @top_seq are off by 1 because we added cells for gaps in the matrix
|
116
|
+
from_top_left = @score_matrix[row-1][col-1] + compute_score(@left_seq[row-1], @top_seq[col-1])
|
117
|
+
|
118
|
+
# find max score and direction
|
119
|
+
max, direction = [from_top_left, '⬉']
|
120
|
+
max, direction = [from_top, '↑'] if from_top > max
|
121
|
+
max, direction = [from_left, '←'] if from_left > max
|
122
|
+
|
123
|
+
@score_matrix[row][col] = max
|
124
|
+
@traceback_matrix[row][col] = direction
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def compute_optimal_alignment
|
130
|
+
row = @score_matrix.length-1
|
131
|
+
col = @score_matrix[0].length-1
|
132
|
+
left = Array.new
|
133
|
+
top = Array.new
|
134
|
+
while row > 0 or col > 0
|
135
|
+
if @traceback_matrix[row][col] == '⬉'
|
136
|
+
left.push(@left_seq[row-1])
|
137
|
+
top.push(@top_seq[col-1])
|
138
|
+
row -= 1
|
139
|
+
col -= 1
|
140
|
+
elsif @traceback_matrix[row][col] == '←'
|
141
|
+
left.push(gap_indicator)
|
142
|
+
top.push @top_seq[col-1]
|
143
|
+
col -= 1
|
144
|
+
elsif @traceback_matrix[row][col] == '↑'
|
145
|
+
left.push @left_seq[row-1]
|
146
|
+
top.push(gap_indicator)
|
147
|
+
row -= 1
|
148
|
+
else
|
149
|
+
puts "something strange happened" # this shouldn't happen
|
150
|
+
end
|
151
|
+
end
|
152
|
+
[left.reverse, top.reverse]
|
153
|
+
end
|
154
|
+
|
155
|
+
def traverse_score_matrix
|
156
|
+
@score_matrix.each_index do |row|
|
157
|
+
@score_matrix[row].each_index do |col|
|
158
|
+
yield(row, col)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def initialize_score_matrix_and_traceback_matrix
|
164
|
+
@score_matrix = Array.new(@left_seq.length + 1)
|
165
|
+
@traceback_matrix = Array.new(@left_seq.length + 1)
|
166
|
+
|
167
|
+
@score_matrix.each_index do |row|
|
168
|
+
@score_matrix[row] = Array.new(@top_seq.length + 1)
|
169
|
+
@traceback_matrix[row] = Array.new(@top_seq.length + 1)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'needleman_wunsch_aligner/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "needleman_wunsch_aligner"
|
8
|
+
spec.version = NeedlemanWunschAligner::VERSION
|
9
|
+
spec.authors = ["Jo Hund"]
|
10
|
+
spec.email = ["jhund@clearcove.ca"]
|
11
|
+
spec.summary = %q{Find the optimal alignment of two sequences of Ruby Objects.}
|
12
|
+
spec.homepage = "https://github.com/jhund/needleman_wunsch_aligner"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
21
|
+
spec.add_development_dependency "rake"
|
22
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'needleman_wunsch_aligner'
|
3
|
+
require 'needleman_wunsch_aligner/example_paragraph_and_sentence_aligner'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
class NeedlemanWunschAligner
|
7
|
+
|
8
|
+
# Container for test data
|
9
|
+
class TestData
|
10
|
+
|
11
|
+
def self.sequence_a
|
12
|
+
[
|
13
|
+
{ type: :paragraph, id: 1 },
|
14
|
+
{ type: :sentence, id: nil },
|
15
|
+
{ type: :sentence, id: nil },
|
16
|
+
{ type: :sentence, id: nil },
|
17
|
+
{ type: :paragraph, id: 2 },
|
18
|
+
{ type: :sentence, id: nil },
|
19
|
+
{ type: :sentence, id: nil },
|
20
|
+
{ type: :paragraph, id: 3 },
|
21
|
+
{ type: :sentence, id: nil },
|
22
|
+
{ type: :sentence, id: nil },
|
23
|
+
{ type: :sentence, id: nil },
|
24
|
+
{ type: :sentence, id: nil },
|
25
|
+
]
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.sequence_b
|
29
|
+
[
|
30
|
+
{ type: :paragraph, id: nil },
|
31
|
+
{ type: :sentence, id: nil },
|
32
|
+
{ type: :paragraph, id: 1 },
|
33
|
+
{ type: :sentence, id: nil },
|
34
|
+
{ type: :sentence, id: nil },
|
35
|
+
{ type: :paragraph, id: 2 },
|
36
|
+
{ type: :sentence, id: nil },
|
37
|
+
{ type: :sentence, id: nil },
|
38
|
+
{ type: :sentence, id: nil },
|
39
|
+
{ type: :sentence, id: nil },
|
40
|
+
{ type: :paragraph, id: 3 },
|
41
|
+
{ type: :sentence, id: nil },
|
42
|
+
{ type: :sentence, id: nil },
|
43
|
+
{ type: :sentence, id: nil },
|
44
|
+
]
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.optimal_alignment
|
48
|
+
[
|
49
|
+
[
|
50
|
+
{ type: :gap },
|
51
|
+
{ type: :gap },
|
52
|
+
{ type: :paragraph, id: 1 },
|
53
|
+
{ type: :sentence, id: nil },
|
54
|
+
{ type: :sentence, id: nil },
|
55
|
+
{ type: :sentence, id: nil },
|
56
|
+
{ type: :paragraph, id: 2 },
|
57
|
+
{ type: :gap },
|
58
|
+
{ type: :gap },
|
59
|
+
{ type: :sentence, id: nil },
|
60
|
+
{ type: :sentence, id: nil },
|
61
|
+
{ type: :paragraph, id: 3 },
|
62
|
+
{ type: :sentence, id: nil },
|
63
|
+
{ type: :sentence, id: nil },
|
64
|
+
{ type: :sentence, id: nil },
|
65
|
+
{ type: :sentence, id: nil },
|
66
|
+
],
|
67
|
+
[
|
68
|
+
{ type: :paragraph, id: nil },
|
69
|
+
{ type: :sentence, id: nil },
|
70
|
+
{ type: :paragraph, id: 1 },
|
71
|
+
{ type: :gap },
|
72
|
+
{ type: :sentence, id: nil },
|
73
|
+
{ type: :sentence, id: nil },
|
74
|
+
{ type: :paragraph, id: 2 },
|
75
|
+
{ type: :sentence, id: nil },
|
76
|
+
{ type: :sentence, id: nil },
|
77
|
+
{ type: :sentence, id: nil },
|
78
|
+
{ type: :sentence, id: nil },
|
79
|
+
{ type: :paragraph, id: 3 },
|
80
|
+
{ type: :gap },
|
81
|
+
{ type: :sentence, id: nil },
|
82
|
+
{ type: :sentence, id: nil },
|
83
|
+
{ type: :sentence, id: nil },
|
84
|
+
]
|
85
|
+
]
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
describe ExampleParagraphAndSentenceAligner do
|
91
|
+
|
92
|
+
let(:sequence_a){ TestData.sequence_a }
|
93
|
+
let(:sequence_b){ TestData.sequence_b }
|
94
|
+
let(:optimal_alignment){ TestData.optimal_alignment }
|
95
|
+
let(:aligner){ ExampleParagraphAndSentenceAligner.new(sequence_a, sequence_b) }
|
96
|
+
|
97
|
+
describe "#get_optimal_alignment" do
|
98
|
+
|
99
|
+
it "returns the optimal_alignment" do
|
100
|
+
r = aligner.get_optimal_alignment
|
101
|
+
r.must_equal(optimal_alignment)
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
describe "#compute_score" do
|
107
|
+
|
108
|
+
# Test matrix:
|
109
|
+
# p/1 p/2 p/nil s/a s/b s/nil
|
110
|
+
# p/1 25 -25 -25 -250 -250 -250
|
111
|
+
# p/2 25 -25 -250 -250 -250
|
112
|
+
# p/nil 25 -250 -250 -250
|
113
|
+
# s/a 10 -10 -10
|
114
|
+
# s/b 10 -10
|
115
|
+
# s/nil 10
|
116
|
+
|
117
|
+
[
|
118
|
+
[{ type: :paragraph, id: 1 }, { type: :paragraph, id: 1 }, 25],
|
119
|
+
[{ type: :paragraph, id: 1 }, { type: :paragraph, id: 2 }, -25],
|
120
|
+
[{ type: :paragraph, id: 1 }, { type: :paragraph, id: nil }, -25],
|
121
|
+
[{ type: :paragraph, id: 1 }, { type: :sentence, id: :a }, -250],
|
122
|
+
[{ type: :paragraph, id: 1 }, { type: :sentence, id: :b }, -250],
|
123
|
+
[{ type: :paragraph, id: 1 }, { type: :sentence, id: nil }, -250],
|
124
|
+
|
125
|
+
[{ type: :paragraph, id: 2 }, { type: :paragraph, id: 2 }, 25],
|
126
|
+
[{ type: :paragraph, id: 2 }, { type: :paragraph, id: nil }, -25],
|
127
|
+
[{ type: :paragraph, id: 2 }, { type: :sentence, id: :a }, -250],
|
128
|
+
[{ type: :paragraph, id: 2 }, { type: :sentence, id: :b }, -250],
|
129
|
+
[{ type: :paragraph, id: 2 }, { type: :sentence, id: nil }, -250],
|
130
|
+
|
131
|
+
[{ type: :paragraph, id: nil }, { type: :paragraph, id: nil }, 25],
|
132
|
+
[{ type: :paragraph, id: nil }, { type: :sentence, id: :a }, -250],
|
133
|
+
[{ type: :paragraph, id: nil }, { type: :sentence, id: :b }, -250],
|
134
|
+
[{ type: :paragraph, id: nil }, { type: :sentence, id: nil }, -250],
|
135
|
+
|
136
|
+
[{ type: :sentence, id: :a }, { type: :sentence, id: :a }, 10],
|
137
|
+
[{ type: :sentence, id: :a }, { type: :sentence, id: :b }, -10],
|
138
|
+
[{ type: :sentence, id: :a }, { type: :sentence, id: nil }, -10],
|
139
|
+
|
140
|
+
[{ type: :sentence, id: :b }, { type: :sentence, id: :b }, 10],
|
141
|
+
[{ type: :sentence, id: :b }, { type: :sentence, id: nil }, -10],
|
142
|
+
|
143
|
+
[{ type: :sentence, id: nil }, { type: :sentence, id: nil }, 10],
|
144
|
+
].each do |(left_el, right_el, xpect)|
|
145
|
+
|
146
|
+
it "handles #{ left_el.inspect }:#{ right_el.inspect }" do
|
147
|
+
aligner.send(:compute_score, left_el, right_el).must_equal(xpect)
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe "#default_gap_penalty" do
|
154
|
+
|
155
|
+
it 'returns the expected value' do
|
156
|
+
aligner.send(:default_gap_penalty).must_equal(-10)
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# must raise exceptions on overridable methods
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'needleman_wunsch_aligner'
|
4
|
+
|
5
|
+
describe NeedlemanWunschAligner do
|
6
|
+
|
7
|
+
let(:aligner){ NeedlemanWunschAligner.new([], []) }
|
8
|
+
|
9
|
+
describe "#get_optimal_alignment" do
|
10
|
+
|
11
|
+
[
|
12
|
+
[
|
13
|
+
[1,2,3],
|
14
|
+
[1,2,3],
|
15
|
+
[
|
16
|
+
[1,2,3],
|
17
|
+
[1,2,3]
|
18
|
+
]
|
19
|
+
],
|
20
|
+
[
|
21
|
+
[1,2,3],
|
22
|
+
[2,3,4],
|
23
|
+
[
|
24
|
+
[ 1,2,3,nil],
|
25
|
+
[nil,2,3,4]
|
26
|
+
]
|
27
|
+
],
|
28
|
+
[
|
29
|
+
[2],
|
30
|
+
[1,2,3],
|
31
|
+
[
|
32
|
+
[nil,2,nil],
|
33
|
+
[1 ,2,3]
|
34
|
+
]
|
35
|
+
],
|
36
|
+
[
|
37
|
+
[1,1,1,1],
|
38
|
+
[2,2,2,2],
|
39
|
+
[
|
40
|
+
[nil,nil,nil,nil,1 ,1 ,1 ,1],
|
41
|
+
[2 ,2 ,2 ,2 ,nil,nil,nil,nil]
|
42
|
+
]
|
43
|
+
],
|
44
|
+
[
|
45
|
+
[1,1,1,1],
|
46
|
+
[2,2,2,1],
|
47
|
+
[
|
48
|
+
[nil,nil,nil,1 ,1 ,1 ,1],
|
49
|
+
[2 ,2 ,2 ,nil,nil,nil,1]
|
50
|
+
]
|
51
|
+
],
|
52
|
+
].each do |(seq_a, seq_b, optimal_alignment)|
|
53
|
+
|
54
|
+
it "returns the optimal_alignment" do
|
55
|
+
a = NeedlemanWunschAligner.new(seq_a, seq_b)
|
56
|
+
a.get_optimal_alignment.must_equal(optimal_alignment)
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
describe "#compute_score" do
|
64
|
+
|
65
|
+
[
|
66
|
+
[1, 1, 1],
|
67
|
+
[1, 2, -3],
|
68
|
+
].each do |(left_el, right_el, xpect)|
|
69
|
+
|
70
|
+
it "handles #{ left_el.inspect }:#{ right_el.inspect }" do
|
71
|
+
aligner.send(:compute_score, left_el, right_el).must_equal(xpect)
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "#default_gap_penalty" do
|
78
|
+
|
79
|
+
it 'returns the expected value' do
|
80
|
+
aligner.send(:default_gap_penalty).must_equal(-1)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
describe "#gap_indicator" do
|
86
|
+
|
87
|
+
it 'returns the expected value' do
|
88
|
+
aligner.send(:gap_indicator).must_equal(nil)
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: needleman_wunsch_aligner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jo Hund
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-07-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description:
|
42
|
+
email:
|
43
|
+
- jhund@clearcove.ca
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- Gemfile
|
50
|
+
- LICENSE.txt
|
51
|
+
- README.md
|
52
|
+
- Rakefile
|
53
|
+
- lib/needleman_wunsch_aligner.rb
|
54
|
+
- lib/needleman_wunsch_aligner/example_paragraph_and_sentence_aligner.rb
|
55
|
+
- lib/needleman_wunsch_aligner/version.rb
|
56
|
+
- needleman_wunsch_aligner.gemspec
|
57
|
+
- spec/needleman_wunsch_aligner/example_paragraph_and_sentence_aligner_spec.rb
|
58
|
+
- spec/needleman_wunsch_aligner_spec.rb
|
59
|
+
- spec/spec_helper.rb
|
60
|
+
homepage: https://github.com/jhund/needleman_wunsch_aligner
|
61
|
+
licenses:
|
62
|
+
- MIT
|
63
|
+
metadata: {}
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubyforge_project:
|
80
|
+
rubygems_version: 2.2.2
|
81
|
+
signing_key:
|
82
|
+
specification_version: 4
|
83
|
+
summary: Find the optimal alignment of two sequences of Ruby Objects.
|
84
|
+
test_files:
|
85
|
+
- spec/needleman_wunsch_aligner/example_paragraph_and_sentence_aligner_spec.rb
|
86
|
+
- spec/needleman_wunsch_aligner_spec.rb
|
87
|
+
- spec/spec_helper.rb
|